Example usage for com.amazonaws.services.elasticmapreduce.model HadoopJarStepConfig HadoopJarStepConfig

Introduction

In this page you can find the example usage for com.amazonaws.services.elasticmapreduce.model HadoopJarStepConfig HadoopJarStepConfig.

Prototype

public HadoopJarStepConfig(String jar)

Source Link

Document

Constructs a new HadoopJarStepConfig object.

Usage

From source file:com.clouddrive.parth.NewClass.java

public static String runCluster() throws Exception {
    long start = System.currentTimeMillis();
    String temp = "";
    // Configure the job flow
    //RunJobFlowRequest request = new RunJobFlowRequest().withName("parth");
    // if (request == null) {
    RunJobFlowRequest request = new RunJobFlowRequest(FLOW_NAME, configInstance());
    request.setLogUri(S3N_LOG_URI);
    // }//w ww  . jav  a  2s  .c  o m

    // Configure the Hadoop jar to use
    HadoopJarStepConfig jarConfig = new HadoopJarStepConfig(S3N_HADOOP_JAR);
    jarConfig.setArgs(ARGS_AS_LIST);

    try {

        StepConfig enableDebugging = new StepConfig().withName("Enable debugging")
                .withActionOnFailure("TERMINATE_JOB_FLOW")
                .withHadoopJarStep(new StepFactory().newEnableDebuggingStep());

        StepConfig runJar = new StepConfig(S3N_HADOOP_JAR.substring(S3N_HADOOP_JAR.indexOf('/') + 1),
                jarConfig);

        request.setSteps(Arrays.asList(new StepConfig[] { enableDebugging, runJar }));

        // Run the job flow
        RunJobFlowResult result = emr.runJobFlow(request);

        // Check the status of the running job
        String lastState = "";

        STATUS_LOOP: while (true) {
            DescribeJobFlowsRequest desc = new DescribeJobFlowsRequest(
                    Arrays.asList(new String[] { result.getJobFlowId() }));
            DescribeJobFlowsResult descResult = emr.describeJobFlows(desc);
            for (JobFlowDetail detail : descResult.getJobFlows()) {
                String state = detail.getExecutionStatusDetail().getState();
                if (isDone(state)) {
                    System.out.println("Job " + state + ": " + detail.toString());
                    break STATUS_LOOP;
                } else if (!lastState.equals(state)) {
                    lastState = state;
                    System.out.println("Job " + state + " at " + new Date().toString());
                }
            }
            Thread.sleep(10000);
        }
        temp = FLOW_NAME;
        long end = System.currentTimeMillis();
        System.out.println("Computation " + (end - start));
    } catch (AmazonServiceException ase) {
        System.out.println("Caught Exception: " + ase.getMessage());
        System.out.println("Reponse Status Code: " + ase.getStatusCode());
        System.out.println("Error Code: " + ase.getErrorCode());
        System.out.println("Request ID: " + ase.getRequestId());
    }
    return temp;
}

From source file:org.finra.dm.dao.impl.EmrDaoImpl.java

License:Apache License

/**
 * Create the step config list of objects for hive/pig installation.
 *
 * @param emrClusterDefinition the EMR definition name value.
 *
 * @return list of step configuration that contains all the steps for the given configuration.
 *//*  ww  w .j  a v  a  2 s .c o  m*/
private List<StepConfig> getStepConfig(EmrClusterDefinition emrClusterDefinition) {
    StepFactory stepFactory = new StepFactory();
    List<StepConfig> appSteps = new ArrayList<>();

    String hadoopJarForShellScript = configurationHelper.getProperty(ConfigurationValue.EMR_SHELL_SCRIPT_JAR);

    // Add step to copy DM oozie wrapper workflow to HDFS.
    String wrapperWorkflowS3Location = getS3LocationForConfiguration(
            emrHelper.getEmrOozieDmWorkflowS3LocationConfiguration());

    String wrapperWorkflowHdfsLocation = configurationHelper
            .getProperty(ConfigurationValue.EMR_OOZIE_DM_WRAPPER_WORKFLOW_HDFS_LOCATION);

    List<String> s3ToHdfsCopyScriptArgsList = new ArrayList<>();

    s3ToHdfsCopyScriptArgsList.add(wrapperWorkflowS3Location + emrHelper.getS3HdfsCopyScriptName());

    // 1. Source S3 location
    // 2. Target HDFS location.
    // 3. Temp folder to use on local node.
    s3ToHdfsCopyScriptArgsList.add(wrapperWorkflowS3Location);
    s3ToHdfsCopyScriptArgsList.add(wrapperWorkflowHdfsLocation);
    s3ToHdfsCopyScriptArgsList.add(UUID.randomUUID().toString());

    HadoopJarStepConfig copyWrapperJarConfig = new HadoopJarStepConfig(hadoopJarForShellScript)
            .withArgs(s3ToHdfsCopyScriptArgsList);
    appSteps.add(new StepConfig().withName("Copy DM oozie wrapper").withHadoopJarStep(copyWrapperJarConfig));

    // Create install hive step and add to the StepConfig list
    if (StringUtils.isNotBlank(emrClusterDefinition.getHiveVersion())) {
        StepConfig installHive = new StepConfig().withName("Hive " + emrClusterDefinition.getHiveVersion())
                .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
                .withHadoopJarStep(stepFactory.newInstallHiveStep(emrClusterDefinition.getHiveVersion()));
        appSteps.add(installHive);
    }

    // Create install Pig step and add to the StepConfig List
    if (StringUtils.isNotBlank(emrClusterDefinition.getPigVersion())) {
        StepConfig installPig = new StepConfig().withName("Pig " + emrClusterDefinition.getPigVersion())
                .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
                .withHadoopJarStep(stepFactory.newInstallPigStep(emrClusterDefinition.getPigVersion()));
        appSteps.add(installPig);
    }

    // Add Oozie support if needed
    if (emrClusterDefinition.isInstallOozie() != null && emrClusterDefinition.isInstallOozie()) {
        String oozieShellArg = getS3StagingLocation()
                + configurationHelper.getProperty(ConfigurationValue.S3_URL_PATH_DELIMITER)
                + configurationHelper.getProperty(ConfigurationValue.EMR_OOZIE_TAR_FILE);

        List<String> argsList = new ArrayList<>();
        argsList.add(getOozieScriptLocation());
        argsList.add(oozieShellArg);

        HadoopJarStepConfig jarConfig = new HadoopJarStepConfig(hadoopJarForShellScript).withArgs(argsList);
        appSteps.add(new StepConfig().withName("Oozie").withHadoopJarStep(jarConfig));
    }

    // Add the hadoop jar steps that need to be added.
    if (!CollectionUtils.isEmpty(emrClusterDefinition.getHadoopJarSteps())) {
        for (HadoopJarStep hadoopJarStep : emrClusterDefinition.getHadoopJarSteps()) {
            StepConfig stepConfig = emrHelper.getEmrHadoopJarStepConfig(hadoopJarStep.getStepName(),
                    hadoopJarStep.getJarLocation(), hadoopJarStep.getMainClass(),
                    hadoopJarStep.getScriptArguments(), hadoopJarStep.isContinueOnError());

            appSteps.add(stepConfig);
        }
    }

    return appSteps;
}

From source file:org.finra.dm.service.helper.EmrOozieStepHelper.java

License:Apache License

@Override
public StepConfig getEmrStepConfig(Object step) {
    EmrOozieStep oozieStep = (EmrOozieStep) step;

    // Hadoop Jar provided by Amazon to run shell script
    String hadoopJarForShellScript = configurationHelper.getProperty(ConfigurationValue.EMR_SHELL_SCRIPT_JAR);

    // Oozie SDK cannot be used at the moment, as the Oozie port 11000 needs to be opened for Oozie SDK usage
    // As a workaround, a custom shell script is used to run the Oozie client to add any oozie job
    // Once Oozie SDK implementation is in place, this custom shell script can be removed
    // Get the custom oozie shell script
    String oozieShellScript = emrHelper.getS3StagingLocation()
            + configurationHelper.getProperty(ConfigurationValue.S3_URL_PATH_DELIMITER)
            + configurationHelper.getProperty(ConfigurationValue.EMR_OOZIE_RUN_SCRIPT);

    // Default ActionOnFailure is to cancel the execution and wait
    ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT;
    if (oozieStep.isContinueOnError() != null && oozieStep.isContinueOnError()) {
        // Override based on user input
        actionOnFailure = ActionOnFailure.CONTINUE;
    }/*from w ww.  j ava2  s .  com*/

    // Add the arguments to the custom shell script
    List<String> argsList = new ArrayList<>();

    // Get the oozie client run shell script
    argsList.add(oozieShellScript);

    // Specify the arguments
    argsList.add(oozieStep.getWorkflowXmlLocation().trim());
    argsList.add(oozieStep.getOoziePropertiesFileLocation().trim());

    // Build the StepConfig object and return
    HadoopJarStepConfig jarConfig = new HadoopJarStepConfig(hadoopJarForShellScript).withArgs(argsList);
    return new StepConfig().withName(oozieStep.getStepName().trim()).withActionOnFailure(actionOnFailure)
            .withHadoopJarStep(jarConfig);
}

From source file:org.finra.dm.service.helper.EmrShellStepHelper.java

License:Apache License

@Override
public StepConfig getEmrStepConfig(Object step) {
    EmrShellStep emrShellStep = (EmrShellStep) step;

    // Hadoop Jar provided by Amazon for running Shell Scripts
    String hadoopJarForShellScript = configurationHelper.getProperty(ConfigurationValue.EMR_SHELL_SCRIPT_JAR);

    // Default ActionOnFailure is to cancel the execution and wait
    ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT;
    if (emrShellStep.isContinueOnError() != null && emrShellStep.isContinueOnError()) {
        // Override based on user input
        actionOnFailure = ActionOnFailure.CONTINUE;
    }//from   w  w  w.j a  v  a  2 s  .  c om

    // Add the script location
    List<String> argsList = new ArrayList<>();
    argsList.add(emrShellStep.getScriptLocation().trim());

    // Add the script arguments
    if (!CollectionUtils.isEmpty(emrShellStep.getScriptArguments())) {
        for (String argument : emrShellStep.getScriptArguments()) {
            argsList.add(argument.trim());
        }
    }

    // Return the StepConfig object
    HadoopJarStepConfig jarConfig = new HadoopJarStepConfig(hadoopJarForShellScript).withArgs(argsList);
    return new StepConfig().withName(emrShellStep.getStepName().trim()).withActionOnFailure(actionOnFailure)
            .withHadoopJarStep(jarConfig);
}