Example usage for com.amazonaws.services.elasticmapreduce.model HadoopJarStepConfig HadoopJarStepConfig

List of usage examples for com.amazonaws.services.elasticmapreduce.model HadoopJarStepConfig HadoopJarStepConfig

Introduction

In this page you can find the example usage for com.amazonaws.services.elasticmapreduce.model HadoopJarStepConfig HadoopJarStepConfig.

Prototype

public HadoopJarStepConfig(String jar) 

Source Link

Document

Constructs a new HadoopJarStepConfig object.

Usage

From source file:com.clouddrive.parth.NewClass.java

public static String runCluster() throws Exception {
    long start = System.currentTimeMillis();
    String temp = "";
    // Configure the job flow
    //RunJobFlowRequest request = new RunJobFlowRequest().withName("parth");
    // if (request == null) {
    RunJobFlowRequest request = new RunJobFlowRequest(FLOW_NAME, configInstance());
    request.setLogUri(S3N_LOG_URI);
    // }//w ww  . jav  a  2s  .c  o m

    // Configure the Hadoop jar to use
    HadoopJarStepConfig jarConfig = new HadoopJarStepConfig(S3N_HADOOP_JAR);
    jarConfig.setArgs(ARGS_AS_LIST);

    try {

        StepConfig enableDebugging = new StepConfig().withName("Enable debugging")
                .withActionOnFailure("TERMINATE_JOB_FLOW")
                .withHadoopJarStep(new StepFactory().newEnableDebuggingStep());

        StepConfig runJar = new StepConfig(S3N_HADOOP_JAR.substring(S3N_HADOOP_JAR.indexOf('/') + 1),
                jarConfig);

        request.setSteps(Arrays.asList(new StepConfig[] { enableDebugging, runJar }));

        // Run the job flow
        RunJobFlowResult result = emr.runJobFlow(request);

        // Check the status of the running job
        String lastState = "";

        STATUS_LOOP: while (true) {
            DescribeJobFlowsRequest desc = new DescribeJobFlowsRequest(
                    Arrays.asList(new String[] { result.getJobFlowId() }));
            DescribeJobFlowsResult descResult = emr.describeJobFlows(desc);
            for (JobFlowDetail detail : descResult.getJobFlows()) {
                String state = detail.getExecutionStatusDetail().getState();
                if (isDone(state)) {
                    System.out.println("Job " + state + ": " + detail.toString());
                    break STATUS_LOOP;
                } else if (!lastState.equals(state)) {
                    lastState = state;
                    System.out.println("Job " + state + " at " + new Date().toString());
                }
            }
            Thread.sleep(10000);
        }
        temp = FLOW_NAME;
        long end = System.currentTimeMillis();
        System.out.println("Computation " + (end - start));
    } catch (AmazonServiceException ase) {
        System.out.println("Caught Exception: " + ase.getMessage());
        System.out.println("Reponse Status Code: " + ase.getStatusCode());
        System.out.println("Error Code: " + ase.getErrorCode());
        System.out.println("Request ID: " + ase.getRequestId());
    }
    return temp;
}

From source file:org.finra.dm.dao.impl.EmrDaoImpl.java

License:Apache License

/**
 * Create the step config list of objects for hive/pig installation.
 *
 * @param emrClusterDefinition the EMR definition name value.
 *
 * @return list of step configuration that contains all the steps for the given configuration.
 *//*  ww  w .j  a v  a  2 s .c o  m*/
private List<StepConfig> getStepConfig(EmrClusterDefinition emrClusterDefinition) {
    StepFactory stepFactory = new StepFactory();
    List<StepConfig> appSteps = new ArrayList<>();

    String hadoopJarForShellScript = configurationHelper.getProperty(ConfigurationValue.EMR_SHELL_SCRIPT_JAR);

    // Add step to copy DM oozie wrapper workflow to HDFS.
    String wrapperWorkflowS3Location = getS3LocationForConfiguration(
            emrHelper.getEmrOozieDmWorkflowS3LocationConfiguration());

    String wrapperWorkflowHdfsLocation = configurationHelper
            .getProperty(ConfigurationValue.EMR_OOZIE_DM_WRAPPER_WORKFLOW_HDFS_LOCATION);

    List<String> s3ToHdfsCopyScriptArgsList = new ArrayList<>();

    s3ToHdfsCopyScriptArgsList.add(wrapperWorkflowS3Location + emrHelper.getS3HdfsCopyScriptName());

    // 1. Source S3 location
    // 2. Target HDFS location.
    // 3. Temp folder to use on local node.
    s3ToHdfsCopyScriptArgsList.add(wrapperWorkflowS3Location);
    s3ToHdfsCopyScriptArgsList.add(wrapperWorkflowHdfsLocation);
    s3ToHdfsCopyScriptArgsList.add(UUID.randomUUID().toString());

    HadoopJarStepConfig copyWrapperJarConfig = new HadoopJarStepConfig(hadoopJarForShellScript)
            .withArgs(s3ToHdfsCopyScriptArgsList);
    appSteps.add(new StepConfig().withName("Copy DM oozie wrapper").withHadoopJarStep(copyWrapperJarConfig));

    // Create install hive step and add to the StepConfig list
    if (StringUtils.isNotBlank(emrClusterDefinition.getHiveVersion())) {
        StepConfig installHive = new StepConfig().withName("Hive " + emrClusterDefinition.getHiveVersion())
                .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
                .withHadoopJarStep(stepFactory.newInstallHiveStep(emrClusterDefinition.getHiveVersion()));
        appSteps.add(installHive);
    }

    // Create install Pig step and add to the StepConfig List
    if (StringUtils.isNotBlank(emrClusterDefinition.getPigVersion())) {
        StepConfig installPig = new StepConfig().withName("Pig " + emrClusterDefinition.getPigVersion())
                .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
                .withHadoopJarStep(stepFactory.newInstallPigStep(emrClusterDefinition.getPigVersion()));
        appSteps.add(installPig);
    }

    // Add Oozie support if needed
    if (emrClusterDefinition.isInstallOozie() != null && emrClusterDefinition.isInstallOozie()) {
        String oozieShellArg = getS3StagingLocation()
                + configurationHelper.getProperty(ConfigurationValue.S3_URL_PATH_DELIMITER)
                + configurationHelper.getProperty(ConfigurationValue.EMR_OOZIE_TAR_FILE);

        List<String> argsList = new ArrayList<>();
        argsList.add(getOozieScriptLocation());
        argsList.add(oozieShellArg);

        HadoopJarStepConfig jarConfig = new HadoopJarStepConfig(hadoopJarForShellScript).withArgs(argsList);
        appSteps.add(new StepConfig().withName("Oozie").withHadoopJarStep(jarConfig));
    }

    // Add the hadoop jar steps that need to be added.
    if (!CollectionUtils.isEmpty(emrClusterDefinition.getHadoopJarSteps())) {
        for (HadoopJarStep hadoopJarStep : emrClusterDefinition.getHadoopJarSteps()) {
            StepConfig stepConfig = emrHelper.getEmrHadoopJarStepConfig(hadoopJarStep.getStepName(),
                    hadoopJarStep.getJarLocation(), hadoopJarStep.getMainClass(),
                    hadoopJarStep.getScriptArguments(), hadoopJarStep.isContinueOnError());

            appSteps.add(stepConfig);
        }
    }

    return appSteps;
}

From source file:org.finra.dm.service.helper.EmrOozieStepHelper.java

License:Apache License

@Override
public StepConfig getEmrStepConfig(Object step) {
    EmrOozieStep oozieStep = (EmrOozieStep) step;

    // Hadoop Jar provided by Amazon to run shell script
    String hadoopJarForShellScript = configurationHelper.getProperty(ConfigurationValue.EMR_SHELL_SCRIPT_JAR);

    // Oozie SDK cannot be used at the moment, as the Oozie port 11000 needs to be opened for Oozie SDK usage
    // As a workaround, a custom shell script is used to run the Oozie client to add any oozie job
    // Once Oozie SDK implementation is in place, this custom shell script can be removed
    // Get the custom oozie shell script
    String oozieShellScript = emrHelper.getS3StagingLocation()
            + configurationHelper.getProperty(ConfigurationValue.S3_URL_PATH_DELIMITER)
            + configurationHelper.getProperty(ConfigurationValue.EMR_OOZIE_RUN_SCRIPT);

    // Default ActionOnFailure is to cancel the execution and wait
    ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT;
    if (oozieStep.isContinueOnError() != null && oozieStep.isContinueOnError()) {
        // Override based on user input
        actionOnFailure = ActionOnFailure.CONTINUE;
    }/*from w ww.  j ava2  s .  com*/

    // Add the arguments to the custom shell script
    List<String> argsList = new ArrayList<>();

    // Get the oozie client run shell script
    argsList.add(oozieShellScript);

    // Specify the arguments
    argsList.add(oozieStep.getWorkflowXmlLocation().trim());
    argsList.add(oozieStep.getOoziePropertiesFileLocation().trim());

    // Build the StepConfig object and return
    HadoopJarStepConfig jarConfig = new HadoopJarStepConfig(hadoopJarForShellScript).withArgs(argsList);
    return new StepConfig().withName(oozieStep.getStepName().trim()).withActionOnFailure(actionOnFailure)
            .withHadoopJarStep(jarConfig);
}

From source file:org.finra.dm.service.helper.EmrShellStepHelper.java

License:Apache License

@Override
public StepConfig getEmrStepConfig(Object step) {
    EmrShellStep emrShellStep = (EmrShellStep) step;

    // Hadoop Jar provided by Amazon for running Shell Scripts
    String hadoopJarForShellScript = configurationHelper.getProperty(ConfigurationValue.EMR_SHELL_SCRIPT_JAR);

    // Default ActionOnFailure is to cancel the execution and wait
    ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT;
    if (emrShellStep.isContinueOnError() != null && emrShellStep.isContinueOnError()) {
        // Override based on user input
        actionOnFailure = ActionOnFailure.CONTINUE;
    }//from   w  w  w.j a  v  a  2 s  .  c om

    // Add the script location
    List<String> argsList = new ArrayList<>();
    argsList.add(emrShellStep.getScriptLocation().trim());

    // Add the script arguments
    if (!CollectionUtils.isEmpty(emrShellStep.getScriptArguments())) {
        for (String argument : emrShellStep.getScriptArguments()) {
            argsList.add(argument.trim());
        }
    }

    // Return the StepConfig object
    HadoopJarStepConfig jarConfig = new HadoopJarStepConfig(hadoopJarForShellScript).withArgs(argsList);
    return new StepConfig().withName(emrShellStep.getStepName().trim()).withActionOnFailure(actionOnFailure)
            .withHadoopJarStep(jarConfig);
}