Example usage for com.amazonaws.services.elasticmapreduce.util StepFactory newInstallPigStep

List of usage examples for com.amazonaws.services.elasticmapreduce.util StepFactory newInstallPigStep

Introduction

In this page you can find the example usage for com.amazonaws.services.elasticmapreduce.util StepFactory newInstallPigStep.

Prototype

public HadoopJarStepConfig newInstallPigStep(String... pigVersions) 

Source Link

Document

Step that installs Pig on your job flow.

Usage

From source file:org.finra.dm.dao.impl.EmrDaoImpl.java

License:Apache License

/**
 * Create the step config list of objects for hive/pig installation.
 *
 * @param emrClusterDefinition the EMR definition name value.
 *
 * @return list of step configuration that contains all the steps for the given configuration.
 *///from w  w w  . j  av  a2 s  .  c om
private List<StepConfig> getStepConfig(EmrClusterDefinition emrClusterDefinition) {
    StepFactory stepFactory = new StepFactory();
    List<StepConfig> appSteps = new ArrayList<>();

    String hadoopJarForShellScript = configurationHelper.getProperty(ConfigurationValue.EMR_SHELL_SCRIPT_JAR);

    // Add step to copy DM oozie wrapper workflow to HDFS.
    String wrapperWorkflowS3Location = getS3LocationForConfiguration(
            emrHelper.getEmrOozieDmWorkflowS3LocationConfiguration());

    String wrapperWorkflowHdfsLocation = configurationHelper
            .getProperty(ConfigurationValue.EMR_OOZIE_DM_WRAPPER_WORKFLOW_HDFS_LOCATION);

    List<String> s3ToHdfsCopyScriptArgsList = new ArrayList<>();

    s3ToHdfsCopyScriptArgsList.add(wrapperWorkflowS3Location + emrHelper.getS3HdfsCopyScriptName());

    // 1. Source S3 location
    // 2. Target HDFS location.
    // 3. Temp folder to use on local node.
    s3ToHdfsCopyScriptArgsList.add(wrapperWorkflowS3Location);
    s3ToHdfsCopyScriptArgsList.add(wrapperWorkflowHdfsLocation);
    s3ToHdfsCopyScriptArgsList.add(UUID.randomUUID().toString());

    HadoopJarStepConfig copyWrapperJarConfig = new HadoopJarStepConfig(hadoopJarForShellScript)
            .withArgs(s3ToHdfsCopyScriptArgsList);
    appSteps.add(new StepConfig().withName("Copy DM oozie wrapper").withHadoopJarStep(copyWrapperJarConfig));

    // Create install hive step and add to the StepConfig list
    if (StringUtils.isNotBlank(emrClusterDefinition.getHiveVersion())) {
        StepConfig installHive = new StepConfig().withName("Hive " + emrClusterDefinition.getHiveVersion())
                .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
                .withHadoopJarStep(stepFactory.newInstallHiveStep(emrClusterDefinition.getHiveVersion()));
        appSteps.add(installHive);
    }

    // Create install Pig step and add to the StepConfig List
    if (StringUtils.isNotBlank(emrClusterDefinition.getPigVersion())) {
        StepConfig installPig = new StepConfig().withName("Pig " + emrClusterDefinition.getPigVersion())
                .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
                .withHadoopJarStep(stepFactory.newInstallPigStep(emrClusterDefinition.getPigVersion()));
        appSteps.add(installPig);
    }

    // Add Oozie support if needed
    if (emrClusterDefinition.isInstallOozie() != null && emrClusterDefinition.isInstallOozie()) {
        String oozieShellArg = getS3StagingLocation()
                + configurationHelper.getProperty(ConfigurationValue.S3_URL_PATH_DELIMITER)
                + configurationHelper.getProperty(ConfigurationValue.EMR_OOZIE_TAR_FILE);

        List<String> argsList = new ArrayList<>();
        argsList.add(getOozieScriptLocation());
        argsList.add(oozieShellArg);

        HadoopJarStepConfig jarConfig = new HadoopJarStepConfig(hadoopJarForShellScript).withArgs(argsList);
        appSteps.add(new StepConfig().withName("Oozie").withHadoopJarStep(jarConfig));
    }

    // Add the hadoop jar steps that need to be added.
    if (!CollectionUtils.isEmpty(emrClusterDefinition.getHadoopJarSteps())) {
        for (HadoopJarStep hadoopJarStep : emrClusterDefinition.getHadoopJarSteps()) {
            StepConfig stepConfig = emrHelper.getEmrHadoopJarStepConfig(hadoopJarStep.getStepName(),
                    hadoopJarStep.getJarLocation(), hadoopJarStep.getMainClass(),
                    hadoopJarStep.getScriptArguments(), hadoopJarStep.isContinueOnError());

            appSteps.add(stepConfig);
        }
    }

    return appSteps;
}

From source file:org.finra.herd.dao.impl.EmrDaoImpl.java

License:Apache License

/**
 * Create the step config list of objects for hive/pig installation.
 *
 * @param emrClusterDefinition the EMR definition name value.
 *
 * @return list of step configuration that contains all the steps for the given configuration.
 *//*ww  w  .  j ava2s .co m*/
private List<StepConfig> getStepConfig(EmrClusterDefinition emrClusterDefinition) {
    StepFactory stepFactory = new StepFactory();
    List<StepConfig> appSteps = new ArrayList<>();

    // Create install hive step and add to the StepConfig list
    if (StringUtils.isNotBlank(emrClusterDefinition.getHiveVersion())) {
        StepConfig installHive = new StepConfig().withName("Hive " + emrClusterDefinition.getHiveVersion())
                .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
                .withHadoopJarStep(stepFactory.newInstallHiveStep(emrClusterDefinition.getHiveVersion()));
        appSteps.add(installHive);
    }

    // Create install Pig step and add to the StepConfig List
    if (StringUtils.isNotBlank(emrClusterDefinition.getPigVersion())) {
        StepConfig installPig = new StepConfig().withName("Pig " + emrClusterDefinition.getPigVersion())
                .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
                .withHadoopJarStep(stepFactory.newInstallPigStep(emrClusterDefinition.getPigVersion()));
        appSteps.add(installPig);
    }

    // Add the hadoop jar steps that need to be added.
    if (!CollectionUtils.isEmpty(emrClusterDefinition.getHadoopJarSteps())) {
        for (HadoopJarStep hadoopJarStep : emrClusterDefinition.getHadoopJarSteps()) {
            StepConfig stepConfig = emrHelper.getEmrHadoopJarStepConfig(hadoopJarStep.getStepName(),
                    hadoopJarStep.getJarLocation(), hadoopJarStep.getMainClass(),
                    hadoopJarStep.getScriptArguments(), hadoopJarStep.isContinueOnError());

            appSteps.add(stepConfig);
        }
    }

    return appSteps;
}