Example usage for com.amazonaws.services.elasticmapreduce.util StepFactory StepFactory

List of usage examples for com.amazonaws.services.elasticmapreduce.util StepFactory StepFactory

Introduction

In this page you can find the example usage for com.amazonaws.services.elasticmapreduce.util StepFactory StepFactory.

Prototype

public StepFactory() 

Source Link

Document

Creates a new StepFactory using the default Elastic Map Reduce bucket (us-east-1.elasticmapreduce) for the default (us-east-1) region.

Usage

From source file:com.clouddrive.parth.NewClass.java

public static String runCluster() throws Exception {
    long start = System.currentTimeMillis();
    String temp = "";
    // Configure the job flow
    //RunJobFlowRequest request = new RunJobFlowRequest().withName("parth");
    // if (request == null) {
    RunJobFlowRequest request = new RunJobFlowRequest(FLOW_NAME, configInstance());
    request.setLogUri(S3N_LOG_URI);
    // }// w  w  w . j  a v a2  s .com

    // Configure the Hadoop jar to use
    HadoopJarStepConfig jarConfig = new HadoopJarStepConfig(S3N_HADOOP_JAR);
    jarConfig.setArgs(ARGS_AS_LIST);

    try {

        StepConfig enableDebugging = new StepConfig().withName("Enable debugging")
                .withActionOnFailure("TERMINATE_JOB_FLOW")
                .withHadoopJarStep(new StepFactory().newEnableDebuggingStep());

        StepConfig runJar = new StepConfig(S3N_HADOOP_JAR.substring(S3N_HADOOP_JAR.indexOf('/') + 1),
                jarConfig);

        request.setSteps(Arrays.asList(new StepConfig[] { enableDebugging, runJar }));

        // Run the job flow
        RunJobFlowResult result = emr.runJobFlow(request);

        // Check the status of the running job
        String lastState = "";

        STATUS_LOOP: while (true) {
            DescribeJobFlowsRequest desc = new DescribeJobFlowsRequest(
                    Arrays.asList(new String[] { result.getJobFlowId() }));
            DescribeJobFlowsResult descResult = emr.describeJobFlows(desc);
            for (JobFlowDetail detail : descResult.getJobFlows()) {
                String state = detail.getExecutionStatusDetail().getState();
                if (isDone(state)) {
                    System.out.println("Job " + state + ": " + detail.toString());
                    break STATUS_LOOP;
                } else if (!lastState.equals(state)) {
                    lastState = state;
                    System.out.println("Job " + state + " at " + new Date().toString());
                }
            }
            Thread.sleep(10000);
        }
        temp = FLOW_NAME;
        long end = System.currentTimeMillis();
        System.out.println("Computation " + (end - start));
    } catch (AmazonServiceException ase) {
        System.out.println("Caught Exception: " + ase.getMessage());
        System.out.println("Reponse Status Code: " + ase.getStatusCode());
        System.out.println("Error Code: " + ase.getErrorCode());
        System.out.println("Request ID: " + ase.getRequestId());
    }
    return temp;
}

From source file:fr.ens.biologie.genomique.eoulsan.util.cloud.AWSElasticMapReduceJob.java

License:LGPL

void init() {

    requireNonNull(this.AWSAccessKey);
    requireNonNull(this.AWSAccessKey);
    requireNonNull(this.jarLocation);
    requireNonNull(this.jarArguments);
    requireNonNull(this.slavesInstanceType);
    requireNonNull(this.hadoopVersion);
    requireNonNull(this.jobFlowName);

    if (this.nInstances < 1) {
        throw new IllegalArgumentException("the number of instance is lower than 1");
    }/*w  w w . ja  va2 s.  c o m*/

    if (this.masterInstanceType == null) {
        this.masterInstanceType = this.slavesInstanceType;
    }

    // Set the hadoop jar step
    final HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig().withJar(this.jarLocation.trim())
            .withArgs(this.jarArguments);

    // Set step config
    final StepConfig stepConfig = new StepConfig().withName(this.jobFlowName + "-step")
            .withHadoopJarStep(hadoopJarStep).withActionOnFailure("TERMINATE_JOB_FLOW");

    // Set the instance
    final JobFlowInstancesConfig instances = new JobFlowInstancesConfig().withInstanceCount(this.nInstances)
            .withMasterInstanceType(this.masterInstanceType).withSlaveInstanceType(this.slavesInstanceType)
            .withHadoopVersion(this.hadoopVersion);

    // Configure hadoop
    final ScriptBootstrapActionConfig scriptBootstrapAction = new ScriptBootstrapActionConfig()
            .withPath("s3n://eu-west-1.elasticmapreduce/bootstrap-actions/configure-hadoop")
            .withArgs("--site-key-value",
                    "mapreduce.tasktracker.map.tasks.maximum=" + this.taskTrackerMaxMapTasks);

    final BootstrapActionConfig bootstrapActions = new BootstrapActionConfig().withName("Configure hadoop")
            .withScriptBootstrapAction(scriptBootstrapAction);

    // Enable debugging
    StepFactory stepFactory = new StepFactory();
    StepConfig enableDebugging = new StepConfig().withName("Enable Debugging")
            .withActionOnFailure("TERMINATE_JOB_FLOW").withHadoopJarStep(stepFactory.newEnableDebuggingStep());

    // Run flow
    this.runFlowRequest = new RunJobFlowRequest().withName(this.jobFlowName);

    // Enable or not debugging
    if (this.enableDebugging) {
        this.runFlowRequest.withInstances(instances).withSteps(enableDebugging, stepConfig);
    } else {
        this.runFlowRequest.withInstances(instances).withSteps(stepConfig);
    }

    // Limit the number of task in a task tracker
    if (this.taskTrackerMaxMapTasks > 0) {
        this.runFlowRequest.withBootstrapActions(bootstrapActions);
    }

    if (this.logPathname != null && !"".equals(this.logPathname)) {
        this.runFlowRequest.withLogUri(this.logPathname);
    }

    // Set EC2 Key name
    if (this.ec2KeyName != null) {
        this.runFlowRequest.getInstances().setEc2KeyName(this.ec2KeyName);
    }
}

From source file:org.finra.dm.dao.impl.EmrDaoImpl.java

License:Apache License

/**
 * Create the step config list of objects for hive/pig installation.
 *
 * @param emrClusterDefinition the EMR definition name value.
 *
 * @return list of step configuration that contains all the steps for the given configuration.
 *//*from   w  w w .  j  a  va2 s  .  co m*/
private List<StepConfig> getStepConfig(EmrClusterDefinition emrClusterDefinition) {
    StepFactory stepFactory = new StepFactory();
    List<StepConfig> appSteps = new ArrayList<>();

    String hadoopJarForShellScript = configurationHelper.getProperty(ConfigurationValue.EMR_SHELL_SCRIPT_JAR);

    // Add step to copy DM oozie wrapper workflow to HDFS.
    String wrapperWorkflowS3Location = getS3LocationForConfiguration(
            emrHelper.getEmrOozieDmWorkflowS3LocationConfiguration());

    String wrapperWorkflowHdfsLocation = configurationHelper
            .getProperty(ConfigurationValue.EMR_OOZIE_DM_WRAPPER_WORKFLOW_HDFS_LOCATION);

    List<String> s3ToHdfsCopyScriptArgsList = new ArrayList<>();

    s3ToHdfsCopyScriptArgsList.add(wrapperWorkflowS3Location + emrHelper.getS3HdfsCopyScriptName());

    // 1. Source S3 location
    // 2. Target HDFS location.
    // 3. Temp folder to use on local node.
    s3ToHdfsCopyScriptArgsList.add(wrapperWorkflowS3Location);
    s3ToHdfsCopyScriptArgsList.add(wrapperWorkflowHdfsLocation);
    s3ToHdfsCopyScriptArgsList.add(UUID.randomUUID().toString());

    HadoopJarStepConfig copyWrapperJarConfig = new HadoopJarStepConfig(hadoopJarForShellScript)
            .withArgs(s3ToHdfsCopyScriptArgsList);
    appSteps.add(new StepConfig().withName("Copy DM oozie wrapper").withHadoopJarStep(copyWrapperJarConfig));

    // Create install hive step and add to the StepConfig list
    if (StringUtils.isNotBlank(emrClusterDefinition.getHiveVersion())) {
        StepConfig installHive = new StepConfig().withName("Hive " + emrClusterDefinition.getHiveVersion())
                .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
                .withHadoopJarStep(stepFactory.newInstallHiveStep(emrClusterDefinition.getHiveVersion()));
        appSteps.add(installHive);
    }

    // Create install Pig step and add to the StepConfig List
    if (StringUtils.isNotBlank(emrClusterDefinition.getPigVersion())) {
        StepConfig installPig = new StepConfig().withName("Pig " + emrClusterDefinition.getPigVersion())
                .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
                .withHadoopJarStep(stepFactory.newInstallPigStep(emrClusterDefinition.getPigVersion()));
        appSteps.add(installPig);
    }

    // Add Oozie support if needed
    if (emrClusterDefinition.isInstallOozie() != null && emrClusterDefinition.isInstallOozie()) {
        String oozieShellArg = getS3StagingLocation()
                + configurationHelper.getProperty(ConfigurationValue.S3_URL_PATH_DELIMITER)
                + configurationHelper.getProperty(ConfigurationValue.EMR_OOZIE_TAR_FILE);

        List<String> argsList = new ArrayList<>();
        argsList.add(getOozieScriptLocation());
        argsList.add(oozieShellArg);

        HadoopJarStepConfig jarConfig = new HadoopJarStepConfig(hadoopJarForShellScript).withArgs(argsList);
        appSteps.add(new StepConfig().withName("Oozie").withHadoopJarStep(jarConfig));
    }

    // Add the hadoop jar steps that need to be added.
    if (!CollectionUtils.isEmpty(emrClusterDefinition.getHadoopJarSteps())) {
        for (HadoopJarStep hadoopJarStep : emrClusterDefinition.getHadoopJarSteps()) {
            StepConfig stepConfig = emrHelper.getEmrHadoopJarStepConfig(hadoopJarStep.getStepName(),
                    hadoopJarStep.getJarLocation(), hadoopJarStep.getMainClass(),
                    hadoopJarStep.getScriptArguments(), hadoopJarStep.isContinueOnError());

            appSteps.add(stepConfig);
        }
    }

    return appSteps;
}

From source file:org.finra.dm.service.helper.EmrHiveStepHelper.java

License:Apache License

@Override
public StepConfig getEmrStepConfig(Object step) {
    EmrHiveStep emrHiveStep = (EmrHiveStep) step;

    // Default ActionOnFailure is to cancel the execution and wait
    ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT;

    if (emrHiveStep.isContinueOnError() != null && emrHiveStep.isContinueOnError()) {
        // Override based on user input
        actionOnFailure = ActionOnFailure.CONTINUE;
    }/*from  w ww . j av  a 2s .com*/

    // If there are no arguments to hive script
    if (CollectionUtils.isEmpty(emrHiveStep.getScriptArguments())) {
        // Just build the StepConfig object and return
        return new StepConfig().withName(emrHiveStep.getStepName().trim()).withActionOnFailure(actionOnFailure)
                .withHadoopJarStep(
                        new StepFactory().newRunHiveScriptStep(emrHiveStep.getScriptLocation().trim()));
    }
    // If there are arguments specified
    else {
        // For each argument, add "-d" option
        List<String> hiveArgs = new ArrayList<>();
        for (String hiveArg : emrHiveStep.getScriptArguments()) {
            hiveArgs.add("-d");
            hiveArgs.add(hiveArg);
        }
        // Return the StepConfig object
        return new StepConfig().withName(emrHiveStep.getStepName().trim()).withActionOnFailure(actionOnFailure)
                .withHadoopJarStep(new StepFactory().newRunHiveScriptStep(
                        emrHiveStep.getScriptLocation().trim(), hiveArgs.toArray(new String[hiveArgs.size()])));
    }
}

From source file:org.finra.dm.service.helper.EmrPigStepHelper.java

License:Apache License

@Override
public StepConfig getEmrStepConfig(Object step) {
    EmrPigStep pigStep = (EmrPigStep) step;

    // Default ActionOnFailure is to cancel the execution and wait
    ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT;

    if (pigStep.isContinueOnError() != null && pigStep.isContinueOnError()) {
        // Override based on user input
        actionOnFailure = ActionOnFailure.CONTINUE;
    }/* ww  w.  j a  va 2  s .c o  m*/

    // If there are no arguments to hive script
    if (CollectionUtils.isEmpty(pigStep.getScriptArguments())) {
        // Just build the StepConfig object and return
        return new StepConfig().withName(pigStep.getStepName().trim()).withActionOnFailure(actionOnFailure)
                .withHadoopJarStep(new StepFactory().newRunPigScriptStep(pigStep.getScriptLocation().trim()));
    }
    // If there are arguments specified
    else {
        return new StepConfig().withName(pigStep.getStepName().trim()).withActionOnFailure(actionOnFailure)
                .withHadoopJarStep(new StepFactory().newRunPigScriptStep(pigStep.getScriptLocation().trim(),
                        pigStep.getScriptArguments().toArray(new String[pigStep.getScriptArguments().size()])));
    }
}

From source file:org.finra.herd.dao.impl.EmrDaoImpl.java

License:Apache License

/**
 * Create the step config list of objects for hive/pig installation.
 *
 * @param emrClusterDefinition the EMR definition name value.
 *
 * @return list of step configuration that contains all the steps for the given configuration.
 *///from www  .j  a  va  2 s. c  o  m
private List<StepConfig> getStepConfig(EmrClusterDefinition emrClusterDefinition) {
    StepFactory stepFactory = new StepFactory();
    List<StepConfig> appSteps = new ArrayList<>();

    // Create install hive step and add to the StepConfig list
    if (StringUtils.isNotBlank(emrClusterDefinition.getHiveVersion())) {
        StepConfig installHive = new StepConfig().withName("Hive " + emrClusterDefinition.getHiveVersion())
                .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
                .withHadoopJarStep(stepFactory.newInstallHiveStep(emrClusterDefinition.getHiveVersion()));
        appSteps.add(installHive);
    }

    // Create install Pig step and add to the StepConfig List
    if (StringUtils.isNotBlank(emrClusterDefinition.getPigVersion())) {
        StepConfig installPig = new StepConfig().withName("Pig " + emrClusterDefinition.getPigVersion())
                .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
                .withHadoopJarStep(stepFactory.newInstallPigStep(emrClusterDefinition.getPigVersion()));
        appSteps.add(installPig);
    }

    // Add the hadoop jar steps that need to be added.
    if (!CollectionUtils.isEmpty(emrClusterDefinition.getHadoopJarSteps())) {
        for (HadoopJarStep hadoopJarStep : emrClusterDefinition.getHadoopJarSteps()) {
            StepConfig stepConfig = emrHelper.getEmrHadoopJarStepConfig(hadoopJarStep.getStepName(),
                    hadoopJarStep.getJarLocation(), hadoopJarStep.getMainClass(),
                    hadoopJarStep.getScriptArguments(), hadoopJarStep.isContinueOnError());

            appSteps.add(stepConfig);
        }
    }

    return appSteps;
}

From source file:org.huahinframework.emanager.amazonaws.elasticmapreduce.ElasticMapReduceManager.java

License:Apache License

/**
 * @param config/*from  ww  w. j  av a  2 s  .  c  om*/
 * @throws URISyntaxException
 */
public void runJob(Config config) throws URISyntaxException {
    RunJobFlowRequest runJobFlowRequest = null;

    CreateStepConfigger csc = getCreateStepConfigger(config);
    if (csc == null) {
        log.error("Step config create error");
        return;
    }

    if (jobFlowId == null) {
        runJobFlowRequest = new RunJobFlowRequest().withName(MAP_REDUCE_NAME)
                .withBootstrapActions(
                        new BootstrapActionConfig().withName(MEMORY_BOOTSTRAP_NAME).withScriptBootstrapAction(
                                new ScriptBootstrapActionConfig().withPath(MEMORY_BOOTSTRAP_URI)),
                        new BootstrapActionConfig().withName(HADOOP_BOOTSTRAP_NAME).withScriptBootstrapAction(
                                new ScriptBootstrapActionConfig().withPath(HADOOP_BOOTSTRAP_URI)
                                        .withArgs("--mapred-key-value", "mapred.task.timeout=3600000")),
                        new BootstrapActionConfig().withName(HUAHIN_BOOTSTRAP_NAME).withScriptBootstrapAction(
                                new ScriptBootstrapActionConfig().withPath(emrProperties.getConfigureS3Path())))
                .withInstances(setupJobFlowInstancesConfig());
        if (!isEmpty(emrProperties.getLogUri())) {
            runJobFlowRequest.setLogUri(emrProperties.getLogUri());
        }

        List<StepConfig> stepConfigs = new ArrayList<StepConfig>();
        if (emrProperties.isDebug()) {
            StepConfig enableDebugging = new StepConfig().withName(EMR_DEBUGGIN_NAME)
                    .withActionOnFailure(ACTION_ON_TERMINATE)
                    .withHadoopJarStep(new StepFactory().newEnableDebuggingStep());
            stepConfigs.add(enableDebugging);
        }

        for (StepConfig sc : csc.createStepConfig(config)) {
            stepConfigs.add(sc);
        }
        runJobFlowRequest.setSteps(stepConfigs);

        try {
            RunJobFlowResult result = emr.runJobFlow(runJobFlowRequest);
            jobFlowId = result.getJobFlowId();
            checkDate = new Date();
        } catch (Exception e) {
            e.printStackTrace();
            log.error(e);
        }
    } else {
        AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest().withJobFlowId(jobFlowId)
                .withSteps(csc.createStepConfig(config));
        emr.addJobFlowSteps(addJobFlowStepsRequest);
    }

    running = true;
    try {
        config.setJobFlowId(jobFlowId);
        QueueUtils.updateQueue(config);
    } catch (IOException e) {
        e.printStackTrace();
        log.error(e);
    }

    int stepSize = 0;
    String stepStatus = JobUtils.STEP_STATUS_PENDING;
    while (stepStatus.equals(JobUtils.STEP_STATUS_PENDING) || stepStatus.equals(JobUtils.STEP_STATUS_RUNNING)) {
        if (sleep()) {
            break;
        }

        DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest()
                .withJobFlowIds(jobFlowId);
        DescribeJobFlowsResult describeJobFlowsResult = emr.describeJobFlows(describeJobFlowsRequest);
        if (describeJobFlowsResult.getJobFlows().size() != 1) {
            break;
        }

        JobFlowDetail jobFlowDetail = describeJobFlowsResult.getJobFlows().get(0);
        JobFlowInstancesDetail instancesDetail = jobFlowDetail.getInstances();
        masterPublicDnsName = instancesDetail.getMasterPublicDnsName();
        if (isEmpty(config.getMasterPublicDnsName())) {
            try {
                config.setMasterPublicDnsName(masterPublicDnsName);
                QueueUtils.updateQueue(config);
            } catch (IOException e) {
                e.printStackTrace();
                log.error(e);
            }
        }

        stepSize = jobFlowDetail.getSteps().size();
        for (StepDetail stepDetail : jobFlowDetail.getSteps()) {
            if (stepDetail.getStepConfig().getName().equals(config.getName())) {
                stepStatus = stepDetail.getExecutionStatusDetail().getState();
                break;
            }
        }
    }

    if (config.isDeleteOnExit()) {
        if (config.getJobType() == Config.JOB_TYPE_STREAMING) {
            S3Utils.delete(s3, config.getArgMap().get("mapper"));
            S3Utils.delete(s3, config.getArgMap().get("reducer"));
        } else {
            S3Utils.delete(s3, config.getRun());
        }
    }

    // Add More than 256 Steps to a Job Flow(http://goo.gl/JDtsV)
    if (stepSize >= 255) {
        instanceTerminate();
    }

    running = false;

    if (stepStatus.equals(JobUtils.STEP_STATUS_COMPLETED)) {
        config.setStatus(Config.JOB_STATUS_COMPLETE);
    } else if (stepStatus.equals(JobUtils.STEP_STATUS_FAILED)) {
        config.setStatus(Config.JOB_STATUS_ERROR);
    } else if (terminated) {
        config.setStatus(Config.JOB_STATUS_CANCEL);
    }

    try {
        QueueUtils.updateQueue(config);
    } catch (IOException e) {
        e.printStackTrace();
        log.error(e);
    }
}

From source file:org.huahinframework.emanager.amazonaws.elasticmapreduce.HiveStepConfig.java

License:Apache License

/**
 * @param args
 */
public HiveStepConfig() {
    this.stepFactory = new StepFactory();
}

From source file:org.huahinframework.emanager.amazonaws.elasticmapreduce.PigStepConfig.java

License:Apache License

/**
 * @param args
 */
public PigStepConfig() {
    this.stepFactory = new StepFactory();
}

From source file:org.pentaho.amazon.client.impl.EmrClientImpl.java

License:Apache License

private StepConfig configureHiveStep(String stagingS3qUrl, String cmdLineArgs) {

    String[] cmdLineArgsArr;//from  ww w . j ava 2s.c o m
    if (cmdLineArgs == null) {
        cmdLineArgsArr = new String[] { "" };
    } else {
        List<String> cmdArgs = Arrays.asList(cmdLineArgs.split("\\s+"));
        List<String> updatedCmdArgs = cmdArgs.stream().map(e -> replaceDoubleS3(e))
                .collect(Collectors.toList());
        cmdLineArgsArr = updatedCmdArgs.toArray(new String[updatedCmdArgs.size()]);
    }

    StepConfig hiveStepConfig = new StepConfig("Hive",
            new StepFactory().newRunHiveScriptStep(stagingS3qUrl, cmdLineArgsArr));
    if (alive) {
        hiveStepConfig.withActionOnFailure(ActionOnFailure.CANCEL_AND_WAIT);
    } else {
        hiveStepConfig.withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW);
    }
    return hiveStepConfig;
}