Example usage for com.amazonaws.services.elasticmapreduce.model StepConfig StepConfig

Introduction

In this page you can find the example usage for com.amazonaws.services.elasticmapreduce.model StepConfig StepConfig.

Prototype

public StepConfig()

Source Link

Document

Default constructor for StepConfig object.

Usage

From source file:awswc.AwsConsoleApp.java

License:Open Source License

static void runJobFlow() throws InterruptedException {
    // Configure instances to use
    JobFlowInstancesConfig instances = new JobFlowInstancesConfig();
    //********************************************************************//
    instances.setHadoopVersion(HADOOP_VERSION);
    instances.withEc2KeyName("ayuda-vp1");
    instances.setInstanceCount(MASTER_INSTANCE_COUNT);
    //instances.setInstanceGroups(instanceGroups)
    instances.setMasterInstanceType(InstanceType.M24xlarge.toString());
    instances.setSlaveInstanceType(InstanceType.M24xlarge.toString());
    //********************************************************************//
    HadoopJarStepConfig hadoopJarStep1 = new HadoopJarStepConfig().withJar(S3N_WORD_COUNT_JAR_) // This should be a full map reduce application.
            .withArgs(BUCKET_NAME + "inWC", BUCKET_NAME + "outWC");

    StepConfig stepConfig1 = new StepConfig().withName("wordcount").withHadoopJarStep(hadoopJarStep1)
            .withActionOnFailure("TERMINATE_JOB_FLOW");

    //********************************************************************//

    //********************************************************************//
    HadoopJarStepConfig hadoopJarStep2 = new HadoopJarStepConfig().withJar(S3N_MAX_WORD_COUNT_JAR) // This should be a full map reduce application.
            .withArgs(BUCKET_NAME + "outWC", BUCKET_NAME + "outXWC", "hate", "10");

    StepConfig stepConfig2 = new StepConfig().withName("maxwordcount").withHadoopJarStep(hadoopJarStep2)
            .withActionOnFailure("TERMINATE_JOB_FLOW");
    //********************************************************************//

    Collection<StepConfig> csc = new ArrayList<StepConfig>();
    csc.add(stepConfig1);/*  ww w. ja  va 2s .c  o  m*/
    csc.add(stepConfig2);

    // BootstrapActions bootstrapActions = new BootstrapActions();
    RunJobFlowRequest runFlowRequest = new RunJobFlowRequest().withName(FLOW_NAME).withInstances(instances)
            .withSteps(csc).withLogUri(BUCKET_NAME + "debug")
    /*.withBootstrapActions(
      bootstrapActions.newRunIf(
          "instance.isMaster=true",
          bootstrapActions.newConfigureDaemons()
              .withHeapSize(Daemon.JobTracker, 4096)
              .build()),
              bootstrapActions.newRunIf(
                      "instance.isRunningNameNode=true",
                      bootstrapActions.newConfigureDaemons()
                      .withHeapSize(Daemon.NameNode, 4096).build()),
      bootstrapActions.newRunIf(
                                     "instance.isRunningDataNode=true",
                                     bootstrapActions.newConfigureDaemons()
                                     .withHeapSize(Daemon.DataNode, 4096).build()),
      bootstrapActions.newRunIf(
                                     "instance.isRunningJobTracker=true",
                                     bootstrapActions.newConfigureDaemons()
                                     .withHeapSize(Daemon.JobTracker, 4096).build()),
      bootstrapActions.newRunIf(
                                             "instance.isRunningTaskTracker=true",
                                             bootstrapActions.newConfigureDaemons()
                                             .withHeapSize(Daemon.TaskTracker, 4096).build())                                             
                                             
                                     /*,
                                             
      bootstrapActions.newRunIf(
                                             "instance.isSlave=true",
      bootstrapActions.newConfigureHadoop()
                                       .withKeyValue(ConfigFile.Site,"mapred.tasktracker.map.tasks.maximum", "4"))                                            
              )*/;

    RunJobFlowResult runJobFlowResult = emr.runJobFlow(runFlowRequest);

    String jobFlowId = runJobFlowResult.getJobFlowId();
    System.out.println("Ran job flow with id: " + jobFlowId);

    //wasFinished(runJobFlowResult);

}

From source file:com.aegeus.aws.ElasticMapReduceService.java

License:Apache License

public void addSteps() {
    StepConfig parseStep = new StepConfig().withName("Parse logs")
            .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW);

    StepConfig persistStep = new StepConfig().withName("Persist layer")
            .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW);

    AddJobFlowStepsRequest request = new AddJobFlowStepsRequest().withJobFlowId(clusterId).withSteps(parseStep,
            persistStep);//from  ww  w.  j ava 2  s .c  o m

    stepIds = emr.addJobFlowSteps(request).getStepIds();
}

From source file:com.clouddrive.parth.NewClass.java

public static String runCluster() throws Exception {
    long start = System.currentTimeMillis();
    String temp = "";
    // Configure the job flow
    //RunJobFlowRequest request = new RunJobFlowRequest().withName("parth");
    // if (request == null) {
    RunJobFlowRequest request = new RunJobFlowRequest(FLOW_NAME, configInstance());
    request.setLogUri(S3N_LOG_URI);
    // }//from  www .  ja  v a2 s .  c o m

    // Configure the Hadoop jar to use
    HadoopJarStepConfig jarConfig = new HadoopJarStepConfig(S3N_HADOOP_JAR);
    jarConfig.setArgs(ARGS_AS_LIST);

    try {

        StepConfig enableDebugging = new StepConfig().withName("Enable debugging")
                .withActionOnFailure("TERMINATE_JOB_FLOW")
                .withHadoopJarStep(new StepFactory().newEnableDebuggingStep());

        StepConfig runJar = new StepConfig(S3N_HADOOP_JAR.substring(S3N_HADOOP_JAR.indexOf('/') + 1),
                jarConfig);

        request.setSteps(Arrays.asList(new StepConfig[] { enableDebugging, runJar }));

        // Run the job flow
        RunJobFlowResult result = emr.runJobFlow(request);

        // Check the status of the running job
        String lastState = "";

        STATUS_LOOP: while (true) {
            DescribeJobFlowsRequest desc = new DescribeJobFlowsRequest(
                    Arrays.asList(new String[] { result.getJobFlowId() }));
            DescribeJobFlowsResult descResult = emr.describeJobFlows(desc);
            for (JobFlowDetail detail : descResult.getJobFlows()) {
                String state = detail.getExecutionStatusDetail().getState();
                if (isDone(state)) {
                    System.out.println("Job " + state + ": " + detail.toString());
                    break STATUS_LOOP;
                } else if (!lastState.equals(state)) {
                    lastState = state;
                    System.out.println("Job " + state + " at " + new Date().toString());
                }
            }
            Thread.sleep(10000);
        }
        temp = FLOW_NAME;
        long end = System.currentTimeMillis();
        System.out.println("Computation " + (end - start));
    } catch (AmazonServiceException ase) {
        System.out.println("Caught Exception: " + ase.getMessage());
        System.out.println("Reponse Status Code: " + ase.getStatusCode());
        System.out.println("Error Code: " + ase.getErrorCode());
        System.out.println("Request ID: " + ase.getRequestId());
    }
    return temp;
}

From source file:datameer.awstasks.aws.emr.EmrCluster.java

License:Apache License

private static StepConfig createDebugStep() {
    StepConfig debugStep = new StepConfig();
    debugStep.setName("Setup Hadoop Debugging");
    debugStep.setActionOnFailure("TERMINATE_JOB_FLOW");

    HadoopJarStepConfig hadoopJarStepConfig = new HadoopJarStepConfig();
    hadoopJarStepConfig.setJar("s3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar");
    hadoopJarStepConfig.getArgs().add("s3://us-east-1.elasticmapreduce/libs/state-pusher/0.1/fetch");
    debugStep.setHadoopJarStep(hadoopJarStepConfig);
    return debugStep;
}

From source file:datameer.awstasks.aws.emr.EmrCluster.java

License:Apache License

public StepFuture executeJobStep(String name, File jobJar, String s3JobJarName, Class<?> mainClass,
        String... args) {/*from  ww w  .jav  a 2  s. c o m*/
    checkConnection(true);
    HadoopJarStepConfig jarConfig = new HadoopJarStepConfig();
    if (jobJar != null) {
        String s3JobJarUri = uploadingJobJar(jobJar, s3JobJarName);
        jarConfig.setJar(s3JobJarUri);
    }
    if (mainClass != null) {
        jarConfig.setMainClass(mainClass.getName());
    }
    jarConfig.setArgs(Arrays.asList(args));
    StepConfig stepConfig = new StepConfig();
    stepConfig.setName(name);
    stepConfig.setActionOnFailure("CONTINUE");
    stepConfig.setHadoopJarStep(jarConfig);
    _emrWebService
            .addJobFlowSteps(new AddJobFlowStepsRequest().withJobFlowId(_jobFlowId).withSteps(stepConfig));
    _emrWebService.clearDescribeJobFlowCache();
    return new StepFuture(stepConfig.getName(), getStepIndex(getJobFlowDetail(_jobFlowId), name));
}

From source file:fr.ens.biologie.genomique.eoulsan.util.cloud.AWSElasticMapReduceJob.java

License:LGPL

void init() {

    requireNonNull(this.AWSAccessKey);
    requireNonNull(this.AWSAccessKey);
    requireNonNull(this.jarLocation);
    requireNonNull(this.jarArguments);
    requireNonNull(this.slavesInstanceType);
    requireNonNull(this.hadoopVersion);
    requireNonNull(this.jobFlowName);

    if (this.nInstances < 1) {
        throw new IllegalArgumentException("the number of instance is lower than 1");
    }/*from w  w w . ja v a 2s.  com*/

    if (this.masterInstanceType == null) {
        this.masterInstanceType = this.slavesInstanceType;
    }

    // Set the hadoop jar step
    final HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig().withJar(this.jarLocation.trim())
            .withArgs(this.jarArguments);

    // Set step config
    final StepConfig stepConfig = new StepConfig().withName(this.jobFlowName + "-step")
            .withHadoopJarStep(hadoopJarStep).withActionOnFailure("TERMINATE_JOB_FLOW");

    // Set the instance
    final JobFlowInstancesConfig instances = new JobFlowInstancesConfig().withInstanceCount(this.nInstances)
            .withMasterInstanceType(this.masterInstanceType).withSlaveInstanceType(this.slavesInstanceType)
            .withHadoopVersion(this.hadoopVersion);

    // Configure hadoop
    final ScriptBootstrapActionConfig scriptBootstrapAction = new ScriptBootstrapActionConfig()
            .withPath("s3n://eu-west-1.elasticmapreduce/bootstrap-actions/configure-hadoop")
            .withArgs("--site-key-value",
                    "mapreduce.tasktracker.map.tasks.maximum=" + this.taskTrackerMaxMapTasks);

    final BootstrapActionConfig bootstrapActions = new BootstrapActionConfig().withName("Configure hadoop")
            .withScriptBootstrapAction(scriptBootstrapAction);

    // Enable debugging
    StepFactory stepFactory = new StepFactory();
    StepConfig enableDebugging = new StepConfig().withName("Enable Debugging")
            .withActionOnFailure("TERMINATE_JOB_FLOW").withHadoopJarStep(stepFactory.newEnableDebuggingStep());

    // Run flow
    this.runFlowRequest = new RunJobFlowRequest().withName(this.jobFlowName);

    // Enable or not debugging
    if (this.enableDebugging) {
        this.runFlowRequest.withInstances(instances).withSteps(enableDebugging, stepConfig);
    } else {
        this.runFlowRequest.withInstances(instances).withSteps(stepConfig);
    }

    // Limit the number of task in a task tracker
    if (this.taskTrackerMaxMapTasks > 0) {
        this.runFlowRequest.withBootstrapActions(bootstrapActions);
    }

    if (this.logPathname != null && !"".equals(this.logPathname)) {
        this.runFlowRequest.withLogUri(this.logPathname);
    }

    // Set EC2 Key name
    if (this.ec2KeyName != null) {
        this.runFlowRequest.getInstances().setEc2KeyName(this.ec2KeyName);
    }
}

From source file:org.deeplearning4j.legacyExamples.EmrSparkExample.java

License:Apache License

public void entryPoint(String[] args) {
    JCommander jcmdr = new JCommander(this);
    try {/*from   w w  w  . j  a  va2  s .  c  o m*/
        jcmdr.parse(args);
    } catch (ParameterException e) {
        jcmdr.usage();
        try {
            Thread.sleep(500);
        } catch (Exception e2) {
        }
        throw e;
    }

    AmazonElasticMapReduceClientBuilder builder = AmazonElasticMapReduceClientBuilder.standard();
    builder.withRegion(region);
    builder.withCredentials(getCredentialsProvider());

    AmazonElasticMapReduce emr = builder.build();

    List<StepConfig> steps = new ArrayList<>();

    if (upload) {
        log.info("uploading uber jar");

        AmazonS3ClientBuilder s3builder = AmazonS3ClientBuilder.standard();
        s3builder.withRegion(region);
        s3builder.withCredentials(getCredentialsProvider());
        AmazonS3 s3Client = s3builder.build();

        if (!s3Client.doesBucketExist(bucketName)) {
            s3Client.createBucket(bucketName);
        }

        File uberJarFile = new File(uberJar);

        s3Client.putObject(new PutObjectRequest(bucketName, uberJarFile.getName(), uberJarFile));
    }

    if (debug) {
        log.info("enable debug");

        StepFactory stepFactory = new StepFactory(builder.getRegion() + ".elasticmapreduce");
        StepConfig enableDebugging = new StepConfig().withName("Enable Debugging")
                .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
                .withHadoopJarStep(stepFactory.newEnableDebuggingStep());
        steps.add(enableDebugging);
    }

    if (execute) {
        log.info("execute spark step");

        HadoopJarStepConfig sparkStepConf = new HadoopJarStepConfig();
        sparkStepConf.withJar("command-runner.jar");
        sparkStepConf.withArgs("spark-submit", "--deploy-mode", "cluster", "--class", className,
                getS3UberJarUrl(), "-useSparkLocal", "false");

        ActionOnFailure action = ActionOnFailure.TERMINATE_JOB_FLOW;

        if (keepAlive) {
            action = ActionOnFailure.CONTINUE;
        }

        StepConfig sparkStep = new StepConfig().withName("Spark Step").withActionOnFailure(action)
                .withHadoopJarStep(sparkStepConf);
        steps.add(sparkStep);
    }

    log.info("create spark cluster");

    Application sparkApp = new Application().withName("Spark");

    // service and job flow role will be created automatically when
    // launching cluster in aws console, better do that first or create
    // manually

    RunJobFlowRequest request = new RunJobFlowRequest().withName("Spark Cluster").withSteps(steps)
            .withServiceRole("EMR_DefaultRole").withJobFlowRole("EMR_EC2_DefaultRole")
            .withApplications(sparkApp).withReleaseLabel(emrVersion).withLogUri(getS3BucketLogsUrl())
            .withInstances(new JobFlowInstancesConfig().withEc2KeyName("spark").withInstanceCount(instanceCount)
                    .withKeepJobFlowAliveWhenNoSteps(keepAlive).withMasterInstanceType(instanceType)
                    .withSlaveInstanceType(instanceType));

    RunJobFlowResult result = emr.runJobFlow(request);

    log.info(result.toString());

    log.info("done");
}

From source file:org.finra.dm.dao.helper.EmrHelper.java

License:Apache License

/**
 * Builds the StepConfig for the Hadoop jar step.
 *
 * @param stepName the step name.//w  w w.ja va2s. c  om
 * @param jarLocation the location of jar.
 * @param mainClass the main class.
 * @param scriptArguments the arguments.
 * @param isContinueOnError indicate what to do on error.
 *
 * @return the stepConfig.
 */
public StepConfig getEmrHadoopJarStepConfig(String stepName, String jarLocation, String mainClass,
        List<String> scriptArguments, Boolean isContinueOnError) {
    // Default ActionOnFailure is to cancel the execution and wait
    ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT;

    if (isContinueOnError != null && isContinueOnError) {
        // Override based on user input
        actionOnFailure = ActionOnFailure.CONTINUE;
    }

    // If there are no arguments
    if (CollectionUtils.isEmpty(scriptArguments)) {
        // Build the StepConfig object and return
        return new StepConfig().withName(stepName.trim()).withActionOnFailure(actionOnFailure)
                .withHadoopJarStep(
                        new HadoopJarStepConfig().withJar(jarLocation.trim()).withMainClass(mainClass));
    } else {
        // If there are arguments, include the arguments in the StepConfig object
        return new StepConfig().withName(stepName.trim()).withActionOnFailure(actionOnFailure)
                .withHadoopJarStep(
                        new HadoopJarStepConfig().withJar(jarLocation.trim()).withMainClass(mainClass)
                                .withArgs(scriptArguments.toArray(new String[scriptArguments.size()])));
    }
}

From source file:org.finra.dm.dao.impl.EmrDaoImpl.java

License:Apache License

/**
 * Create the step config list of objects for hive/pig installation.
 *
 * @param emrClusterDefinition the EMR definition name value.
 *
 * @return list of step configuration that contains all the steps for the given configuration.
 *//*from   w  w w .j  a va2 s . c om*/
private List<StepConfig> getStepConfig(EmrClusterDefinition emrClusterDefinition) {
    StepFactory stepFactory = new StepFactory();
    List<StepConfig> appSteps = new ArrayList<>();

    String hadoopJarForShellScript = configurationHelper.getProperty(ConfigurationValue.EMR_SHELL_SCRIPT_JAR);

    // Add step to copy DM oozie wrapper workflow to HDFS.
    String wrapperWorkflowS3Location = getS3LocationForConfiguration(
            emrHelper.getEmrOozieDmWorkflowS3LocationConfiguration());

    String wrapperWorkflowHdfsLocation = configurationHelper
            .getProperty(ConfigurationValue.EMR_OOZIE_DM_WRAPPER_WORKFLOW_HDFS_LOCATION);

    List<String> s3ToHdfsCopyScriptArgsList = new ArrayList<>();

    s3ToHdfsCopyScriptArgsList.add(wrapperWorkflowS3Location + emrHelper.getS3HdfsCopyScriptName());

    // 1. Source S3 location
    // 2. Target HDFS location.
    // 3. Temp folder to use on local node.
    s3ToHdfsCopyScriptArgsList.add(wrapperWorkflowS3Location);
    s3ToHdfsCopyScriptArgsList.add(wrapperWorkflowHdfsLocation);
    s3ToHdfsCopyScriptArgsList.add(UUID.randomUUID().toString());

    HadoopJarStepConfig copyWrapperJarConfig = new HadoopJarStepConfig(hadoopJarForShellScript)
            .withArgs(s3ToHdfsCopyScriptArgsList);
    appSteps.add(new StepConfig().withName("Copy DM oozie wrapper").withHadoopJarStep(copyWrapperJarConfig));

    // Create install hive step and add to the StepConfig list
    if (StringUtils.isNotBlank(emrClusterDefinition.getHiveVersion())) {
        StepConfig installHive = new StepConfig().withName("Hive " + emrClusterDefinition.getHiveVersion())
                .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
                .withHadoopJarStep(stepFactory.newInstallHiveStep(emrClusterDefinition.getHiveVersion()));
        appSteps.add(installHive);
    }

    // Create install Pig step and add to the StepConfig List
    if (StringUtils.isNotBlank(emrClusterDefinition.getPigVersion())) {
        StepConfig installPig = new StepConfig().withName("Pig " + emrClusterDefinition.getPigVersion())
                .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
                .withHadoopJarStep(stepFactory.newInstallPigStep(emrClusterDefinition.getPigVersion()));
        appSteps.add(installPig);
    }

    // Add Oozie support if needed
    if (emrClusterDefinition.isInstallOozie() != null && emrClusterDefinition.isInstallOozie()) {
        String oozieShellArg = getS3StagingLocation()
                + configurationHelper.getProperty(ConfigurationValue.S3_URL_PATH_DELIMITER)
                + configurationHelper.getProperty(ConfigurationValue.EMR_OOZIE_TAR_FILE);

        List<String> argsList = new ArrayList<>();
        argsList.add(getOozieScriptLocation());
        argsList.add(oozieShellArg);

        HadoopJarStepConfig jarConfig = new HadoopJarStepConfig(hadoopJarForShellScript).withArgs(argsList);
        appSteps.add(new StepConfig().withName("Oozie").withHadoopJarStep(jarConfig));
    }

    // Add the hadoop jar steps that need to be added.
    if (!CollectionUtils.isEmpty(emrClusterDefinition.getHadoopJarSteps())) {
        for (HadoopJarStep hadoopJarStep : emrClusterDefinition.getHadoopJarSteps()) {
            StepConfig stepConfig = emrHelper.getEmrHadoopJarStepConfig(hadoopJarStep.getStepName(),
                    hadoopJarStep.getJarLocation(), hadoopJarStep.getMainClass(),
                    hadoopJarStep.getScriptArguments(), hadoopJarStep.isContinueOnError());

            appSteps.add(stepConfig);
        }
    }

    return appSteps;
}

From source file:org.finra.dm.service.helper.EmrHiveStepHelper.java

License:Apache License

@Override
public StepConfig getEmrStepConfig(Object step) {
    EmrHiveStep emrHiveStep = (EmrHiveStep) step;

    // Default ActionOnFailure is to cancel the execution and wait
    ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT;

    if (emrHiveStep.isContinueOnError() != null && emrHiveStep.isContinueOnError()) {
        // Override based on user input
        actionOnFailure = ActionOnFailure.CONTINUE;
    }/*from  ww  w .  j  a v  a 2s.  c  o  m*/

    // If there are no arguments to hive script
    if (CollectionUtils.isEmpty(emrHiveStep.getScriptArguments())) {
        // Just build the StepConfig object and return
        return new StepConfig().withName(emrHiveStep.getStepName().trim()).withActionOnFailure(actionOnFailure)
                .withHadoopJarStep(
                        new StepFactory().newRunHiveScriptStep(emrHiveStep.getScriptLocation().trim()));
    }
    // If there are arguments specified
    else {
        // For each argument, add "-d" option
        List<String> hiveArgs = new ArrayList<>();
        for (String hiveArg : emrHiveStep.getScriptArguments()) {
            hiveArgs.add("-d");
            hiveArgs.add(hiveArg);
        }
        // Return the StepConfig object
        return new StepConfig().withName(emrHiveStep.getStepName().trim()).withActionOnFailure(actionOnFailure)
                .withHadoopJarStep(new StepFactory().newRunHiveScriptStep(
                        emrHiveStep.getScriptLocation().trim(), hiveArgs.toArray(new String[hiveArgs.size()])));
    }
}