Example usage for com.amazonaws.services.elasticmapreduce.model StepConfig setHadoopJarStep

Introduction

In this page you can find the example usage for com.amazonaws.services.elasticmapreduce.model StepConfig setHadoopJarStep.

Prototype


public void setHadoopJarStep(HadoopJarStepConfig hadoopJarStep)

Source Link

Document

The JAR file used for the step.

Usage

From source file:datameer.awstasks.aws.emr.EmrCluster.java

License:Apache License

private static StepConfig createDebugStep() {
    StepConfig debugStep = new StepConfig();
    debugStep.setName("Setup Hadoop Debugging");
    debugStep.setActionOnFailure("TERMINATE_JOB_FLOW");

    HadoopJarStepConfig hadoopJarStepConfig = new HadoopJarStepConfig();
    hadoopJarStepConfig.setJar("s3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar");
    hadoopJarStepConfig.getArgs().add("s3://us-east-1.elasticmapreduce/libs/state-pusher/0.1/fetch");
    debugStep.setHadoopJarStep(hadoopJarStepConfig);
    return debugStep;
}

From source file:datameer.awstasks.aws.emr.EmrCluster.java

License:Apache License

public StepFuture executeJobStep(String name, File jobJar, String s3JobJarName, Class<?> mainClass,
        String... args) {//from   w  w  w  .  ja  va  2 s.  c  o m
    checkConnection(true);
    HadoopJarStepConfig jarConfig = new HadoopJarStepConfig();
    if (jobJar != null) {
        String s3JobJarUri = uploadingJobJar(jobJar, s3JobJarName);
        jarConfig.setJar(s3JobJarUri);
    }
    if (mainClass != null) {
        jarConfig.setMainClass(mainClass.getName());
    }
    jarConfig.setArgs(Arrays.asList(args));
    StepConfig stepConfig = new StepConfig();
    stepConfig.setName(name);
    stepConfig.setActionOnFailure("CONTINUE");
    stepConfig.setHadoopJarStep(jarConfig);
    _emrWebService
            .addJobFlowSteps(new AddJobFlowStepsRequest().withJobFlowId(_jobFlowId).withSteps(stepConfig));
    _emrWebService.clearDescribeJobFlowCache();
    return new StepFuture(stepConfig.getName(), getStepIndex(getJobFlowDetail(_jobFlowId), name));
}

From source file:org.pentaho.amazon.client.impl.EmrClientImpl.java

License:Apache License

private StepConfig initHadoopStep(String jarUrl, String mainClass, List<String> jarStepArgs) {
    StepConfig stepConfig = new StepConfig();
    stepConfig.setName("custom jar: " + jarUrl);

    stepConfig.setHadoopJarStep(configureHadoopStep(jarUrl, mainClass, jarStepArgs));
    if (this.alive) {
        stepConfig.withActionOnFailure(ActionOnFailure.CANCEL_AND_WAIT);
    } else {// w w  w  .j a v a2s. co  m
        stepConfig.withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW);
    }
    return stepConfig;
}

From source file:org.pentaho.amazon.emr.job.AmazonElasticMapReduceJobExecutor.java

License:Apache License

public Result execute(Result result, int arg1) throws KettleException {
    Log4jFileAppender appender = null;/*from  www.ja va2 s.com*/
    String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$
    try {
        appender = LogWriter.createFileAppender(logFileName, true, false);
        LogWriter.getInstance().addAppender(appender);
        log.setLogLevel(parentJob.getLogLevel());
    } catch (Exception e) {
        logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.FailedToOpenLogFile", //$NON-NLS-1$
                logFileName, e.toString()));
        logError(Const.getStackTracker(e));
    }

    try {
        // create/connect aws service
        AmazonElasticMapReduceClient emrClient = new AmazonElasticMapReduceClient(awsCredentials);

        // pull down jar from vfs
        FileObject jarFile = KettleVFS.getFileObject(buildFilename(jarUrl));
        File tmpFile = File.createTempFile("customEMR", "jar");
        tmpFile.deleteOnExit();
        FileOutputStream tmpFileOut = new FileOutputStream(tmpFile);
        IOUtils.copy(jarFile.getContent().getInputStream(), tmpFileOut);
        URL localJarUrl = tmpFile.toURI().toURL();

        // find main class in jar
        String mainClass = getMainClass(localJarUrl);

        // create staging bucket
        AmazonS3 s3Client = new AmazonS3Client(awsCredentials);

        FileSystemOptions opts = new FileSystemOptions();
        DefaultFileSystemConfigBuilder.getInstance().setUserAuthenticator(opts, new StaticUserAuthenticator(
                null, awsCredentials.getAWSAccessKeyId(), awsCredentials.getAWSSecretKey()));
        FileObject stagingDirFileObject = KettleVFS.getFileObject(stagingDir, getVariables(), opts);

        String stagingBucketName = stagingDirFileObject.getName().getBaseName();
        if (!s3Client.doesBucketExist(stagingBucketName)) {
            s3Client.createBucket(stagingBucketName);
        }

        // delete old jar if needed
        try {
            s3Client.deleteObject(stagingBucketName, jarFile.getName().getBaseName());
        } catch (Exception ex) {
            logError(Const.getStackTracker(ex));
        }

        // put jar in s3 staging bucket
        s3Client.putObject(new PutObjectRequest(stagingBucketName, jarFile.getName().getBaseName(), tmpFile));
        // create non-vfs s3 url to jar
        String stagingS3JarUrl = "s3://" + stagingBucketName + "/" + jarFile.getName().getBaseName();
        String stagingS3BucketUrl = "s3://" + stagingBucketName;

        RunJobFlowRequest runJobFlowRequest = null;
        RunJobFlowResult runJobFlowResult = null;
        if (StringUtil.isEmpty(hadoopJobFlowId)) {
            // create EMR job flow
            runJobFlowRequest = createJobFlow(stagingS3BucketUrl, stagingS3JarUrl, mainClass);
            // start EMR job
            runJobFlowResult = emrClient.runJobFlow(runJobFlowRequest);
        } else {
            List<String> jarStepArgs = new ArrayList<String>();
            if (!StringUtil.isEmpty(cmdLineArgs)) {
                StringTokenizer st = new StringTokenizer(cmdLineArgs, " ");
                while (st.hasMoreTokens()) {
                    String token = st.nextToken();
                    logBasic("adding args: " + token);
                    jarStepArgs.add(token);
                }
            }

            HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig();
            hadoopJarStep.setJar(stagingS3JarUrl);
            hadoopJarStep.setMainClass(mainClass);
            hadoopJarStep.setArgs(jarStepArgs);

            StepConfig stepConfig = new StepConfig();
            stepConfig.setName("custom jar: " + jarUrl);
            stepConfig.setHadoopJarStep(hadoopJarStep);

            List<StepConfig> steps = new ArrayList<StepConfig>();
            steps.add(stepConfig);

            AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest();
            addJobFlowStepsRequest.setJobFlowId(hadoopJobFlowId);
            addJobFlowStepsRequest.setSteps(steps);

            emrClient.addJobFlowSteps(addJobFlowStepsRequest);
        }

        String loggingIntervalS = environmentSubstitute(loggingInterval);
        int logIntv = 60;
        try {
            logIntv = Integer.parseInt(loggingIntervalS);
        } catch (NumberFormatException ex) {
            logError("Unable to parse logging interval '" + loggingIntervalS + "' - using " + "default of 60");
        }

        // monitor it / blocking / logging if desired
        if (blocking) {
            try {
                if (log.isBasic()) {

                    String executionState = "RUNNING";

                    List<String> jobFlowIds = new ArrayList<String>();
                    String id = hadoopJobFlowId;
                    if (StringUtil.isEmpty(hadoopJobFlowId)) {
                        id = runJobFlowResult.getJobFlowId();
                        jobFlowIds.add(id);
                    }

                    while (isRunning(executionState)) {
                        DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest();
                        describeJobFlowsRequest.setJobFlowIds(jobFlowIds);

                        DescribeJobFlowsResult describeJobFlowsResult = emrClient
                                .describeJobFlows(describeJobFlowsRequest);
                        boolean found = false;
                        for (JobFlowDetail jobFlowDetail : describeJobFlowsResult.getJobFlows()) {
                            if (jobFlowDetail.getJobFlowId().equals(id)) {
                                executionState = jobFlowDetail.getExecutionStatusDetail().getState();
                                found = true;
                            }
                        }

                        if (!found) {
                            break;
                        }
                        // logBasic(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.RunningPercent", setupPercent,
                        // mapPercent, reducePercent));
                        logBasic(hadoopJobName + " execution status: " + executionState);
                        try {
                            if (isRunning(executionState)) {
                                Thread.sleep(logIntv * 1000);
                            }
                        } catch (InterruptedException ie) {
                            // Ignore
                        }
                    }

                    if ("FAILED".equalsIgnoreCase(executionState)) {
                        result.setStopped(true);
                        result.setNrErrors(1);
                        result.setResult(false);

                        S3Object outObject = s3Client.getObject(stagingBucketName, id + "/steps/1/stdout");
                        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
                        IOUtils.copy(outObject.getObjectContent(), outStream);
                        logError(outStream.toString());

                        S3Object errorObject = s3Client.getObject(stagingBucketName, id + "/steps/1/stderr");
                        ByteArrayOutputStream errorStream = new ByteArrayOutputStream();
                        IOUtils.copy(errorObject.getObjectContent(), errorStream);
                        logError(errorStream.toString());
                    }
                }
            } catch (Exception e) {
                logError(e.getMessage(), e);
            }
        }

    } catch (Throwable t) {
        t.printStackTrace();
        result.setStopped(true);
        result.setNrErrors(1);
        result.setResult(false);
        logError(t.getMessage(), t);
    }

    if (appender != null) {
        LogWriter.getInstance().removeAppender(appender);
        appender.close();

        ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(),
                parentJob.getJobname(), getName());
        result.getResultFiles().put(resultFile.getFile().toString(), resultFile);
    }

    return result;
}

From source file:org.pentaho.amazon.emr.job.AmazonElasticMapReduceJobExecutor.java

License:Apache License

public RunJobFlowRequest createJobFlow(String stagingS3BucketUrl, String stagingS3Jar, String mainClass) {
    List<String> jarStepArgs = new ArrayList<String>();
    if (!StringUtil.isEmpty(cmdLineArgs)) {
        StringTokenizer st = new StringTokenizer(cmdLineArgs, " ");
        while (st.hasMoreTokens()) {
            String token = st.nextToken();
            logBasic("adding args: " + token);
            jarStepArgs.add(token);//from w  w  w .ja  va  2s . c  o m
        }
    }

    HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig();
    hadoopJarStep.setJar(stagingS3Jar);
    hadoopJarStep.setMainClass(mainClass);
    hadoopJarStep.setArgs(jarStepArgs);

    StepConfig stepConfig = new StepConfig();
    stepConfig.setName("custom jar: " + jarUrl);
    stepConfig.setHadoopJarStep(hadoopJarStep);

    List<StepConfig> steps = new ArrayList<StepConfig>();
    steps.add(stepConfig);

    String numInstancesS = environmentSubstitute(numInstances);
    int numInsts = 2;
    try {
        numInsts = Integer.parseInt(numInstancesS);
    } catch (NumberFormatException e) {
        logError("Unable to parse number of instances to use '" + numInstancesS + "' - "
                + "using 2 instances...");
    }
    JobFlowInstancesConfig instances = new JobFlowInstancesConfig();
    instances.setInstanceCount(numInsts);
    instances.setMasterInstanceType(getInstanceType(masterInstanceType));
    instances.setSlaveInstanceType(getInstanceType(slaveInstanceType));
    instances.setHadoopVersion("0.20");

    RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest();
    runJobFlowRequest.setSteps(steps);
    runJobFlowRequest.setLogUri(stagingS3BucketUrl);
    runJobFlowRequest.setName(hadoopJobName);
    runJobFlowRequest.setInstances(instances);

    // ScriptBootstrapActionConfig scriptBootstrapAction = new ScriptBootstrapActionConfig();
    // scriptBootstrapAction.setPath("s3://mddwordcount/bootstrap.sh");
    // List<String> bootstrapArgs = new ArrayList<String>();
    // bootstrapArgs.add("http://pdi-node-dist.s3.amazonaws.com");
    // //
    // bootstrapArgs.add(
    //   "http://ci.pentaho.com/view/Data%20Integration/job/Kettle/lastSuccessfulBuild/artifact/Kettle/");
    // bootstrapArgs.add("pdi-hadoop-node-TRUNK-SNAPSHOT.zip");
    // scriptBootstrapAction.setArgs(bootstrapArgs);
    // BootstrapActionConfig bootstrapActionConfig = new BootstrapActionConfig();
    // bootstrapActionConfig.setName("mdd bootstrap");
    // bootstrapActionConfig.setScriptBootstrapAction(scriptBootstrapAction);
    // List<BootstrapActionConfig> bootstrapActions = new ArrayList<BootstrapActionConfig>();
    // bootstrapActions.add(bootstrapActionConfig);
    // runJobFlowRequest.setBootstrapActions(bootstrapActions);

    return runJobFlowRequest;
}

From source file:org.pentaho.amazon.hive.job.AmazonHiveJobExecutor.java

License:Apache License

/**
 * Configure the HadoopJarStep, which is one Hadoop step of an EMR job to be submitted to AWS.
 * //from  w  w w .  j  av a 2 s. com
 * @param stepName
 *          name of step
 * @param stagingS3JarUrl
 *          URL for MapReduce jar file
 * @param args
 *          arguments for MapReduce jar
 * @return configuration data object for the step
 * 
 */
public List<StepConfig> ConfigHadoopJarStep(String stepName, String stagingS3JarUrl, String args) {

    List<String> jarStepArgs = new ArrayList<String>();
    jarStepArgs = ConfigArgs(args, " "); //$NON-NLS-1$

    HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig();
    hadoopJarStep.setJar(stagingS3JarUrl);
    hadoopJarStep.setArgs(jarStepArgs);

    StepConfig stepConfig = new StepConfig();
    stepConfig.setName(stepName);
    stepConfig.setHadoopJarStep(hadoopJarStep);
    if (isAlive()) { // Job flow stays in "WAITING" state if this step fails.
        stepConfig.setActionOnFailure("CANCEL_AND_WAIT"); //$NON-NLS-1$
    } else { // Job flow is terminated if this step fails.
        stepConfig.setActionOnFailure("TERMINATE_JOB_FLOW"); //$NON-NLS-1$
    }

    List<StepConfig> steps = new ArrayList<StepConfig>();
    steps.add(stepConfig);

    return steps;
}