Example usage for com.amazonaws.services.elasticmapreduce.model HadoopJarStepConfig HadoopJarStepConfig

Introduction

In this page you can find the example usage for com.amazonaws.services.elasticmapreduce.model HadoopJarStepConfig HadoopJarStepConfig.

Prototype

public HadoopJarStepConfig()

Source Link

Document

Default constructor for HadoopJarStepConfig object.

Usage

From source file:awswc.AwsConsoleApp.java

License:Open Source License

static void runJobFlow() throws InterruptedException {
    // Configure instances to use
    JobFlowInstancesConfig instances = new JobFlowInstancesConfig();
    //********************************************************************//
    instances.setHadoopVersion(HADOOP_VERSION);
    instances.withEc2KeyName("ayuda-vp1");
    instances.setInstanceCount(MASTER_INSTANCE_COUNT);
    //instances.setInstanceGroups(instanceGroups)
    instances.setMasterInstanceType(InstanceType.M24xlarge.toString());
    instances.setSlaveInstanceType(InstanceType.M24xlarge.toString());
    //********************************************************************//
    HadoopJarStepConfig hadoopJarStep1 = new HadoopJarStepConfig().withJar(S3N_WORD_COUNT_JAR_) // This should be a full map reduce application.
            .withArgs(BUCKET_NAME + "inWC", BUCKET_NAME + "outWC");

    StepConfig stepConfig1 = new StepConfig().withName("wordcount").withHadoopJarStep(hadoopJarStep1)
            .withActionOnFailure("TERMINATE_JOB_FLOW");

    //********************************************************************//

    //********************************************************************//
    HadoopJarStepConfig hadoopJarStep2 = new HadoopJarStepConfig().withJar(S3N_MAX_WORD_COUNT_JAR) // This should be a full map reduce application.
            .withArgs(BUCKET_NAME + "outWC", BUCKET_NAME + "outXWC", "hate", "10");

    StepConfig stepConfig2 = new StepConfig().withName("maxwordcount").withHadoopJarStep(hadoopJarStep2)
            .withActionOnFailure("TERMINATE_JOB_FLOW");
    //********************************************************************//

    Collection<StepConfig> csc = new ArrayList<StepConfig>();
    csc.add(stepConfig1);/*w  w  w.  j ava2 s.c o m*/
    csc.add(stepConfig2);

    // BootstrapActions bootstrapActions = new BootstrapActions();
    RunJobFlowRequest runFlowRequest = new RunJobFlowRequest().withName(FLOW_NAME).withInstances(instances)
            .withSteps(csc).withLogUri(BUCKET_NAME + "debug")
    /*.withBootstrapActions(
      bootstrapActions.newRunIf(
          "instance.isMaster=true",
          bootstrapActions.newConfigureDaemons()
              .withHeapSize(Daemon.JobTracker, 4096)
              .build()),
              bootstrapActions.newRunIf(
                      "instance.isRunningNameNode=true",
                      bootstrapActions.newConfigureDaemons()
                      .withHeapSize(Daemon.NameNode, 4096).build()),
      bootstrapActions.newRunIf(
                                     "instance.isRunningDataNode=true",
                                     bootstrapActions.newConfigureDaemons()
                                     .withHeapSize(Daemon.DataNode, 4096).build()),
      bootstrapActions.newRunIf(
                                     "instance.isRunningJobTracker=true",
                                     bootstrapActions.newConfigureDaemons()
                                     .withHeapSize(Daemon.JobTracker, 4096).build()),
      bootstrapActions.newRunIf(
                                             "instance.isRunningTaskTracker=true",
                                             bootstrapActions.newConfigureDaemons()
                                             .withHeapSize(Daemon.TaskTracker, 4096).build())                                             
                                             
                                     /*,
                                             
      bootstrapActions.newRunIf(
                                             "instance.isSlave=true",
      bootstrapActions.newConfigureHadoop()
                                       .withKeyValue(ConfigFile.Site,"mapred.tasktracker.map.tasks.maximum", "4"))                                            
              )*/;

    RunJobFlowResult runJobFlowResult = emr.runJobFlow(runFlowRequest);

    String jobFlowId = runJobFlowResult.getJobFlowId();
    System.out.println("Ran job flow with id: " + jobFlowId);

    //wasFinished(runJobFlowResult);

}

From source file:datameer.awstasks.aws.emr.EmrCluster.java

License:Apache License

private static StepConfig createDebugStep() {
    StepConfig debugStep = new StepConfig();
    debugStep.setName("Setup Hadoop Debugging");
    debugStep.setActionOnFailure("TERMINATE_JOB_FLOW");

    HadoopJarStepConfig hadoopJarStepConfig = new HadoopJarStepConfig();
    hadoopJarStepConfig.setJar("s3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar");
    hadoopJarStepConfig.getArgs().add("s3://us-east-1.elasticmapreduce/libs/state-pusher/0.1/fetch");
    debugStep.setHadoopJarStep(hadoopJarStepConfig);
    return debugStep;
}

From source file:datameer.awstasks.aws.emr.EmrCluster.java

License:Apache License

public StepFuture executeJobStep(String name, File jobJar, String s3JobJarName, Class<?> mainClass,
        String... args) {//from  w ww  .j a va2s  . c o m
    checkConnection(true);
    HadoopJarStepConfig jarConfig = new HadoopJarStepConfig();
    if (jobJar != null) {
        String s3JobJarUri = uploadingJobJar(jobJar, s3JobJarName);
        jarConfig.setJar(s3JobJarUri);
    }
    if (mainClass != null) {
        jarConfig.setMainClass(mainClass.getName());
    }
    jarConfig.setArgs(Arrays.asList(args));
    StepConfig stepConfig = new StepConfig();
    stepConfig.setName(name);
    stepConfig.setActionOnFailure("CONTINUE");
    stepConfig.setHadoopJarStep(jarConfig);
    _emrWebService
            .addJobFlowSteps(new AddJobFlowStepsRequest().withJobFlowId(_jobFlowId).withSteps(stepConfig));
    _emrWebService.clearDescribeJobFlowCache();
    return new StepFuture(stepConfig.getName(), getStepIndex(getJobFlowDetail(_jobFlowId), name));
}

From source file:fr.ens.biologie.genomique.eoulsan.util.cloud.AWSElasticMapReduceJob.java

License:LGPL

void init() {

    requireNonNull(this.AWSAccessKey);
    requireNonNull(this.AWSAccessKey);
    requireNonNull(this.jarLocation);
    requireNonNull(this.jarArguments);
    requireNonNull(this.slavesInstanceType);
    requireNonNull(this.hadoopVersion);
    requireNonNull(this.jobFlowName);

    if (this.nInstances < 1) {
        throw new IllegalArgumentException("the number of instance is lower than 1");
    }/*from w w w . java 2s.  c o m*/

    if (this.masterInstanceType == null) {
        this.masterInstanceType = this.slavesInstanceType;
    }

    // Set the hadoop jar step
    final HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig().withJar(this.jarLocation.trim())
            .withArgs(this.jarArguments);

    // Set step config
    final StepConfig stepConfig = new StepConfig().withName(this.jobFlowName + "-step")
            .withHadoopJarStep(hadoopJarStep).withActionOnFailure("TERMINATE_JOB_FLOW");

    // Set the instance
    final JobFlowInstancesConfig instances = new JobFlowInstancesConfig().withInstanceCount(this.nInstances)
            .withMasterInstanceType(this.masterInstanceType).withSlaveInstanceType(this.slavesInstanceType)
            .withHadoopVersion(this.hadoopVersion);

    // Configure hadoop
    final ScriptBootstrapActionConfig scriptBootstrapAction = new ScriptBootstrapActionConfig()
            .withPath("s3n://eu-west-1.elasticmapreduce/bootstrap-actions/configure-hadoop")
            .withArgs("--site-key-value",
                    "mapreduce.tasktracker.map.tasks.maximum=" + this.taskTrackerMaxMapTasks);

    final BootstrapActionConfig bootstrapActions = new BootstrapActionConfig().withName("Configure hadoop")
            .withScriptBootstrapAction(scriptBootstrapAction);

    // Enable debugging
    StepFactory stepFactory = new StepFactory();
    StepConfig enableDebugging = new StepConfig().withName("Enable Debugging")
            .withActionOnFailure("TERMINATE_JOB_FLOW").withHadoopJarStep(stepFactory.newEnableDebuggingStep());

    // Run flow
    this.runFlowRequest = new RunJobFlowRequest().withName(this.jobFlowName);

    // Enable or not debugging
    if (this.enableDebugging) {
        this.runFlowRequest.withInstances(instances).withSteps(enableDebugging, stepConfig);
    } else {
        this.runFlowRequest.withInstances(instances).withSteps(stepConfig);
    }

    // Limit the number of task in a task tracker
    if (this.taskTrackerMaxMapTasks > 0) {
        this.runFlowRequest.withBootstrapActions(bootstrapActions);
    }

    if (this.logPathname != null && !"".equals(this.logPathname)) {
        this.runFlowRequest.withLogUri(this.logPathname);
    }

    // Set EC2 Key name
    if (this.ec2KeyName != null) {
        this.runFlowRequest.getInstances().setEc2KeyName(this.ec2KeyName);
    }
}

From source file:org.deeplearning4j.legacyExamples.EmrSparkExample.java

License:Apache License

public void entryPoint(String[] args) {
    JCommander jcmdr = new JCommander(this);
    try {//from   ww w.j  av  a 2 s . c  o m
        jcmdr.parse(args);
    } catch (ParameterException e) {
        jcmdr.usage();
        try {
            Thread.sleep(500);
        } catch (Exception e2) {
        }
        throw e;
    }

    AmazonElasticMapReduceClientBuilder builder = AmazonElasticMapReduceClientBuilder.standard();
    builder.withRegion(region);
    builder.withCredentials(getCredentialsProvider());

    AmazonElasticMapReduce emr = builder.build();

    List<StepConfig> steps = new ArrayList<>();

    if (upload) {
        log.info("uploading uber jar");

        AmazonS3ClientBuilder s3builder = AmazonS3ClientBuilder.standard();
        s3builder.withRegion(region);
        s3builder.withCredentials(getCredentialsProvider());
        AmazonS3 s3Client = s3builder.build();

        if (!s3Client.doesBucketExist(bucketName)) {
            s3Client.createBucket(bucketName);
        }

        File uberJarFile = new File(uberJar);

        s3Client.putObject(new PutObjectRequest(bucketName, uberJarFile.getName(), uberJarFile));
    }

    if (debug) {
        log.info("enable debug");

        StepFactory stepFactory = new StepFactory(builder.getRegion() + ".elasticmapreduce");
        StepConfig enableDebugging = new StepConfig().withName("Enable Debugging")
                .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
                .withHadoopJarStep(stepFactory.newEnableDebuggingStep());
        steps.add(enableDebugging);
    }

    if (execute) {
        log.info("execute spark step");

        HadoopJarStepConfig sparkStepConf = new HadoopJarStepConfig();
        sparkStepConf.withJar("command-runner.jar");
        sparkStepConf.withArgs("spark-submit", "--deploy-mode", "cluster", "--class", className,
                getS3UberJarUrl(), "-useSparkLocal", "false");

        ActionOnFailure action = ActionOnFailure.TERMINATE_JOB_FLOW;

        if (keepAlive) {
            action = ActionOnFailure.CONTINUE;
        }

        StepConfig sparkStep = new StepConfig().withName("Spark Step").withActionOnFailure(action)
                .withHadoopJarStep(sparkStepConf);
        steps.add(sparkStep);
    }

    log.info("create spark cluster");

    Application sparkApp = new Application().withName("Spark");

    // service and job flow role will be created automatically when
    // launching cluster in aws console, better do that first or create
    // manually

    RunJobFlowRequest request = new RunJobFlowRequest().withName("Spark Cluster").withSteps(steps)
            .withServiceRole("EMR_DefaultRole").withJobFlowRole("EMR_EC2_DefaultRole")
            .withApplications(sparkApp).withReleaseLabel(emrVersion).withLogUri(getS3BucketLogsUrl())
            .withInstances(new JobFlowInstancesConfig().withEc2KeyName("spark").withInstanceCount(instanceCount)
                    .withKeepJobFlowAliveWhenNoSteps(keepAlive).withMasterInstanceType(instanceType)
                    .withSlaveInstanceType(instanceType));

    RunJobFlowResult result = emr.runJobFlow(request);

    log.info(result.toString());

    log.info("done");
}

From source file:org.finra.dm.dao.helper.EmrHelper.java

License:Apache License

/**
 * Builds the StepConfig for the Hadoop jar step.
 *
 * @param stepName the step name.//w ww .  ja  v a  2  s.c  om
 * @param jarLocation the location of jar.
 * @param mainClass the main class.
 * @param scriptArguments the arguments.
 * @param isContinueOnError indicate what to do on error.
 *
 * @return the stepConfig.
 */
public StepConfig getEmrHadoopJarStepConfig(String stepName, String jarLocation, String mainClass,
        List<String> scriptArguments, Boolean isContinueOnError) {
    // Default ActionOnFailure is to cancel the execution and wait
    ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT;

    if (isContinueOnError != null && isContinueOnError) {
        // Override based on user input
        actionOnFailure = ActionOnFailure.CONTINUE;
    }

    // If there are no arguments
    if (CollectionUtils.isEmpty(scriptArguments)) {
        // Build the StepConfig object and return
        return new StepConfig().withName(stepName.trim()).withActionOnFailure(actionOnFailure)
                .withHadoopJarStep(
                        new HadoopJarStepConfig().withJar(jarLocation.trim()).withMainClass(mainClass));
    } else {
        // If there are arguments, include the arguments in the StepConfig object
        return new StepConfig().withName(stepName.trim()).withActionOnFailure(actionOnFailure)
                .withHadoopJarStep(
                        new HadoopJarStepConfig().withJar(jarLocation.trim()).withMainClass(mainClass)
                                .withArgs(scriptArguments.toArray(new String[scriptArguments.size()])));
    }
}

From source file:org.huahinframework.emanager.amazonaws.elasticmapreduce.CustomJarStepConfig.java

License:Apache License

@Override
public StepConfig[] createStepConfig(Config config) {
    List<String> args = new ArrayList<String>();
    for (String s : config.getArgs()) {
        args.add(s);//from  w  w  w. j  a va2 s  .c  o m
    }

    HadoopJarStepConfig hadoopJarStepConfig = new HadoopJarStepConfig().withJar(config.getRun()).withArgs(args);
    if (config.getMainClass() != null && !config.getMainClass().isEmpty()) {
        hadoopJarStepConfig.setMainClass(config.getMainClass());
    }

    StepConfig stepConfig = new StepConfig().withName(config.getName()).withActionOnFailure(ACTION_ON_FAILURE)
            .withHadoopJarStep(hadoopJarStepConfig);
    StepConfig[] stepConfigs = new StepConfig[1];
    stepConfigs[0] = stepConfig;
    return stepConfigs;
}

From source file:org.huahinframework.emanager.amazonaws.elasticmapreduce.StreamingStepConfig.java

License:Apache License

@Override
public StepConfig[] createStepConfig(Config config) {
    List<String> args = new ArrayList<String>();
    for (Entry<String, String> entry : config.getArgMap().entrySet()) {
        args.add("-" + entry.getKey());
        args.add(entry.getValue());/*www  .  j  a  va 2 s. c  o m*/
    }

    StepConfig stepConfig = new StepConfig().withName(config.getName()).withActionOnFailure(ACTION_ON_FAILURE)
            .withHadoopJarStep(new HadoopJarStepConfig().withJar(STREAMING_JAR).withArgs(args));
    StepConfig[] stepConfigs = new StepConfig[1];
    stepConfigs[0] = stepConfig;
    return stepConfigs;
}

From source file:org.pentaho.amazon.client.impl.EmrClientImpl.java

License:Apache License

private static HadoopJarStepConfig configureHadoopStep(String stagingS3Jar, String mainClass,
        List<String> jarStepArgs) {
    HadoopJarStepConfig hadoopJarStepConfig = new HadoopJarStepConfig();
    hadoopJarStepConfig.setJar(stagingS3Jar);
    hadoopJarStepConfig.setMainClass(mainClass);
    hadoopJarStepConfig.setArgs(jarStepArgs);

    return hadoopJarStepConfig;
}

From source file:org.pentaho.amazon.emr.job.AmazonElasticMapReduceJobExecutor.java

License:Apache License

public Result execute(Result result, int arg1) throws KettleException {
    Log4jFileAppender appender = null;// ww w . j a  v  a 2 s  .  c om
    String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$
    try {
        appender = LogWriter.createFileAppender(logFileName, true, false);
        LogWriter.getInstance().addAppender(appender);
        log.setLogLevel(parentJob.getLogLevel());
    } catch (Exception e) {
        logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.FailedToOpenLogFile", //$NON-NLS-1$
                logFileName, e.toString()));
        logError(Const.getStackTracker(e));
    }

    try {
        // create/connect aws service
        AmazonElasticMapReduceClient emrClient = new AmazonElasticMapReduceClient(awsCredentials);

        // pull down jar from vfs
        FileObject jarFile = KettleVFS.getFileObject(buildFilename(jarUrl));
        File tmpFile = File.createTempFile("customEMR", "jar");
        tmpFile.deleteOnExit();
        FileOutputStream tmpFileOut = new FileOutputStream(tmpFile);
        IOUtils.copy(jarFile.getContent().getInputStream(), tmpFileOut);
        URL localJarUrl = tmpFile.toURI().toURL();

        // find main class in jar
        String mainClass = getMainClass(localJarUrl);

        // create staging bucket
        AmazonS3 s3Client = new AmazonS3Client(awsCredentials);

        FileSystemOptions opts = new FileSystemOptions();
        DefaultFileSystemConfigBuilder.getInstance().setUserAuthenticator(opts, new StaticUserAuthenticator(
                null, awsCredentials.getAWSAccessKeyId(), awsCredentials.getAWSSecretKey()));
        FileObject stagingDirFileObject = KettleVFS.getFileObject(stagingDir, getVariables(), opts);

        String stagingBucketName = stagingDirFileObject.getName().getBaseName();
        if (!s3Client.doesBucketExist(stagingBucketName)) {
            s3Client.createBucket(stagingBucketName);
        }

        // delete old jar if needed
        try {
            s3Client.deleteObject(stagingBucketName, jarFile.getName().getBaseName());
        } catch (Exception ex) {
            logError(Const.getStackTracker(ex));
        }

        // put jar in s3 staging bucket
        s3Client.putObject(new PutObjectRequest(stagingBucketName, jarFile.getName().getBaseName(), tmpFile));
        // create non-vfs s3 url to jar
        String stagingS3JarUrl = "s3://" + stagingBucketName + "/" + jarFile.getName().getBaseName();
        String stagingS3BucketUrl = "s3://" + stagingBucketName;

        RunJobFlowRequest runJobFlowRequest = null;
        RunJobFlowResult runJobFlowResult = null;
        if (StringUtil.isEmpty(hadoopJobFlowId)) {
            // create EMR job flow
            runJobFlowRequest = createJobFlow(stagingS3BucketUrl, stagingS3JarUrl, mainClass);
            // start EMR job
            runJobFlowResult = emrClient.runJobFlow(runJobFlowRequest);
        } else {
            List<String> jarStepArgs = new ArrayList<String>();
            if (!StringUtil.isEmpty(cmdLineArgs)) {
                StringTokenizer st = new StringTokenizer(cmdLineArgs, " ");
                while (st.hasMoreTokens()) {
                    String token = st.nextToken();
                    logBasic("adding args: " + token);
                    jarStepArgs.add(token);
                }
            }

            HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig();
            hadoopJarStep.setJar(stagingS3JarUrl);
            hadoopJarStep.setMainClass(mainClass);
            hadoopJarStep.setArgs(jarStepArgs);

            StepConfig stepConfig = new StepConfig();
            stepConfig.setName("custom jar: " + jarUrl);
            stepConfig.setHadoopJarStep(hadoopJarStep);

            List<StepConfig> steps = new ArrayList<StepConfig>();
            steps.add(stepConfig);

            AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest();
            addJobFlowStepsRequest.setJobFlowId(hadoopJobFlowId);
            addJobFlowStepsRequest.setSteps(steps);

            emrClient.addJobFlowSteps(addJobFlowStepsRequest);
        }

        String loggingIntervalS = environmentSubstitute(loggingInterval);
        int logIntv = 60;
        try {
            logIntv = Integer.parseInt(loggingIntervalS);
        } catch (NumberFormatException ex) {
            logError("Unable to parse logging interval '" + loggingIntervalS + "' - using " + "default of 60");
        }

        // monitor it / blocking / logging if desired
        if (blocking) {
            try {
                if (log.isBasic()) {

                    String executionState = "RUNNING";

                    List<String> jobFlowIds = new ArrayList<String>();
                    String id = hadoopJobFlowId;
                    if (StringUtil.isEmpty(hadoopJobFlowId)) {
                        id = runJobFlowResult.getJobFlowId();
                        jobFlowIds.add(id);
                    }

                    while (isRunning(executionState)) {
                        DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest();
                        describeJobFlowsRequest.setJobFlowIds(jobFlowIds);

                        DescribeJobFlowsResult describeJobFlowsResult = emrClient
                                .describeJobFlows(describeJobFlowsRequest);
                        boolean found = false;
                        for (JobFlowDetail jobFlowDetail : describeJobFlowsResult.getJobFlows()) {
                            if (jobFlowDetail.getJobFlowId().equals(id)) {
                                executionState = jobFlowDetail.getExecutionStatusDetail().getState();
                                found = true;
                            }
                        }

                        if (!found) {
                            break;
                        }
                        // logBasic(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.RunningPercent", setupPercent,
                        // mapPercent, reducePercent));
                        logBasic(hadoopJobName + " execution status: " + executionState);
                        try {
                            if (isRunning(executionState)) {
                                Thread.sleep(logIntv * 1000);
                            }
                        } catch (InterruptedException ie) {
                            // Ignore
                        }
                    }

                    if ("FAILED".equalsIgnoreCase(executionState)) {
                        result.setStopped(true);
                        result.setNrErrors(1);
                        result.setResult(false);

                        S3Object outObject = s3Client.getObject(stagingBucketName, id + "/steps/1/stdout");
                        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
                        IOUtils.copy(outObject.getObjectContent(), outStream);
                        logError(outStream.toString());

                        S3Object errorObject = s3Client.getObject(stagingBucketName, id + "/steps/1/stderr");
                        ByteArrayOutputStream errorStream = new ByteArrayOutputStream();
                        IOUtils.copy(errorObject.getObjectContent(), errorStream);
                        logError(errorStream.toString());
                    }
                }
            } catch (Exception e) {
                logError(e.getMessage(), e);
            }
        }

    } catch (Throwable t) {
        t.printStackTrace();
        result.setStopped(true);
        result.setNrErrors(1);
        result.setResult(false);
        logError(t.getMessage(), t);
    }

    if (appender != null) {
        LogWriter.getInstance().removeAppender(appender);
        appender.close();

        ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(),
                parentJob.getJobname(), getName());
        result.getResultFiles().put(resultFile.getFile().toString(), resultFile);
    }

    return result;
}