Example usage for com.amazonaws.services.elasticmapreduce.model RunJobFlowRequest setBootstrapActions

List of usage examples for com.amazonaws.services.elasticmapreduce.model RunJobFlowRequest setBootstrapActions

Introduction

In this page you can find the example usage for com.amazonaws.services.elasticmapreduce.model RunJobFlowRequest setBootstrapActions.

Prototype


public void setBootstrapActions(java.util.Collection<BootstrapActionConfig> bootstrapActions) 

Source Link

Document

A list of bootstrap actions to run before Hadoop starts on the cluster nodes.

Usage

From source file:datameer.awstasks.aws.emr.EmrCluster.java

License:Apache License

public synchronized void startup() throws InterruptedException {
    checkConnection(false);//w w  w. j  a va  2 s  . c  o m
    _clusterState = ClusterState.STARTING;
    boolean successful = false;
    try {
        EmrSettings settings = getSettings();
        if (settings.getPrivateKeyName() == null) {
            throw new NullPointerException(
                    "privateKeyName must not be null please configure settings properly");
        }
        LOG.info("Starting job flow '" + getName() + "' ...");
        if (!getRunningJobFlowDetailsByName(getName()).isEmpty()) {
            throw new IllegalStateException("Job flow with name '" + getName() + "' already running.");
        }
        boolean keepAlive = true;
        JobFlowInstancesConfig jobConfig = new JobFlowInstancesConfig();
        jobConfig.setHadoopVersion(_settings.getHadoopVersion());
        jobConfig.setMasterInstanceType(settings.getMasterInstanceType().getId());
        jobConfig.setSlaveInstanceType(settings.getNodeInstanceType().getId());
        jobConfig.setInstanceCount(settings.getInstanceCount());
        jobConfig.setEc2KeyName(settings.getPrivateKeyName());
        jobConfig.setPlacement(new PlacementType());
        jobConfig.setKeepJobFlowAliveWhenNoSteps(keepAlive);

        final RunJobFlowRequest startRequest = new RunJobFlowRequest();

        startRequest.setLogUri("s3n://" + settings.getS3Bucket() + settings.getS3LogPath());
        startRequest.setInstances(jobConfig);
        startRequest.setName(getName());
        startRequest.setAdditionalInfo(_settings.getAdditionalStartInfo());
        startRequest.setBootstrapActions(_settings.getBootstrapActions());
        if (settings.isDebugEnabled()) {
            startRequest.withSteps(DEBUG_STEP);
        }
        RunJobFlowResult startResponse = _emrWebService.runJobFlow(startRequest);
        _jobFlowId = startResponse.getJobFlowId();
        waitUntilClusterStarted(_jobFlowId);
        LOG.info("elastic cluster '" + getName() + "/" + _jobFlowId + "' started, master-host is "
                + _masterHost);
        successful = true;
    } finally {
        if (successful) {
            _clusterState = ClusterState.CONNECTED;
        } else {
            _clusterState = ClusterState.UNCONNECTED;
            _jobFlowId = null;
        }
    }
}

From source file:org.finra.dm.dao.impl.EmrDaoImpl.java

License:Apache License

/**
 * Create the run job flow request object.
 *
 * @param emrClusterDefinition the EMR definition name value.
 * @param clusterName the EMR cluster name.
 *
 * @return run job flow request for the given configuration.
 *///from  w  w  w  .j  a v  a  2  s .c o  m
private RunJobFlowRequest getRunJobFlowRequest(String clusterName, EmrClusterDefinition emrClusterDefinition) {
    // Create the object
    RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest(clusterName,
            getJobFlowInstancesConfig(emrClusterDefinition));

    // Set release label
    if (StringUtils.isNotBlank(emrClusterDefinition.getReleaseLabel())) {
        runJobFlowRequest.setReleaseLabel(emrClusterDefinition.getReleaseLabel());
    }

    // Set list of Applications
    List<EmrClusterDefinitionApplication> emrClusterDefinitionApplications = emrClusterDefinition
            .getApplications();
    if (!CollectionUtils.isEmpty(emrClusterDefinitionApplications)) {
        runJobFlowRequest.setApplications(getApplications(emrClusterDefinitionApplications));
    }

    // Set list of Configurations
    List<EmrClusterDefinitionConfiguration> emrClusterDefinitionConfigurations = emrClusterDefinition
            .getConfigurations();
    if (!CollectionUtils.isEmpty(emrClusterDefinitionConfigurations)) {
        runJobFlowRequest.setConfigurations(getConfigurations(emrClusterDefinitionConfigurations));
    }

    // Set the log bucket if specified
    if (StringUtils.isNotBlank(emrClusterDefinition.getLogBucket())) {
        runJobFlowRequest.setLogUri(emrClusterDefinition.getLogBucket());
    }

    // Set the visible to all flag
    if (emrClusterDefinition.isVisibleToAll() != null) {
        runJobFlowRequest.setVisibleToAllUsers(emrClusterDefinition.isVisibleToAll());
    }

    // Set the IAM profile for the nodes
    if (StringUtils.isNotBlank(emrClusterDefinition.getEc2NodeIamProfileName())) {
        runJobFlowRequest.setJobFlowRole(emrClusterDefinition.getEc2NodeIamProfileName());
    } else {
        runJobFlowRequest.setJobFlowRole(dmStringHelper
                .getRequiredConfigurationValue(ConfigurationValue.EMR_DEFAULT_EC2_NODE_IAM_PROFILE_NAME));
    }

    // Set the IAM profile for the service
    if (StringUtils.isNotBlank(emrClusterDefinition.getServiceIamRole())) {
        runJobFlowRequest.setServiceRole(emrClusterDefinition.getServiceIamRole());
    } else {
        runJobFlowRequest.setServiceRole(dmStringHelper
                .getRequiredConfigurationValue(ConfigurationValue.EMR_DEFAULT_SERVICE_IAM_ROLE_NAME));
    }

    // Set the AMI version if specified
    if (StringUtils.isNotBlank(emrClusterDefinition.getAmiVersion())) {
        runJobFlowRequest.setAmiVersion(emrClusterDefinition.getAmiVersion());
    }

    // Set the additionalInfo if specified
    if (StringUtils.isNotBlank(emrClusterDefinition.getAdditionalInfo())) {
        runJobFlowRequest.setAdditionalInfo(emrClusterDefinition.getAdditionalInfo());
    }

    // Set the bootstrap actions
    if (!getBootstrapActionConfigList(emrClusterDefinition).isEmpty()) {
        runJobFlowRequest.setBootstrapActions(getBootstrapActionConfigList(emrClusterDefinition));
    }

    // Set the app installation steps
    if (!getStepConfig(emrClusterDefinition).isEmpty()) {
        runJobFlowRequest.setSteps(getStepConfig(emrClusterDefinition));
    }

    // Set the tags
    if (!getEmrTags(emrClusterDefinition).isEmpty()) {
        runJobFlowRequest.setTags(getEmrTags(emrClusterDefinition));
    }

    // Assign supported products as applicable
    if (StringUtils.isNotBlank(emrClusterDefinition.getSupportedProduct())) {
        List<String> supportedProducts = new ArrayList<>();
        supportedProducts.add(emrClusterDefinition.getSupportedProduct());
        runJobFlowRequest.setSupportedProducts(supportedProducts);
    }

    // Return the object
    return runJobFlowRequest;
}

From source file:org.finra.herd.dao.impl.EmrDaoImpl.java

License:Apache License

/**
 * Create the run job flow request object.
 *
 * @param emrClusterDefinition the EMR definition name value
 * @param clusterName the EMR cluster name
 *
 * @return the run job flow request for the given configuration
 *//*from w w w.j a va  2  s  .  co m*/
private RunJobFlowRequest getRunJobFlowRequest(String clusterName, EmrClusterDefinition emrClusterDefinition) {
    // Create the object
    RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest(clusterName,
            getJobFlowInstancesConfig(emrClusterDefinition));

    // Set release label
    if (StringUtils.isNotBlank(emrClusterDefinition.getReleaseLabel())) {
        runJobFlowRequest.setReleaseLabel(emrClusterDefinition.getReleaseLabel());
    }

    // Set list of Applications
    List<EmrClusterDefinitionApplication> emrClusterDefinitionApplications = emrClusterDefinition
            .getApplications();
    if (!CollectionUtils.isEmpty(emrClusterDefinitionApplications)) {
        runJobFlowRequest.setApplications(getApplications(emrClusterDefinitionApplications));
    }

    // Set list of Configurations
    List<EmrClusterDefinitionConfiguration> emrClusterDefinitionConfigurations = emrClusterDefinition
            .getConfigurations();
    if (!CollectionUtils.isEmpty(emrClusterDefinitionConfigurations)) {
        runJobFlowRequest.setConfigurations(getConfigurations(emrClusterDefinitionConfigurations));
    }

    // Set the log bucket if specified
    if (StringUtils.isNotBlank(emrClusterDefinition.getLogBucket())) {
        runJobFlowRequest.setLogUri(emrClusterDefinition.getLogBucket());
    }

    // Set the visible to all flag
    if (emrClusterDefinition.isVisibleToAll() != null) {
        runJobFlowRequest.setVisibleToAllUsers(emrClusterDefinition.isVisibleToAll());
    }

    // Set the IAM profile for the nodes
    if (StringUtils.isNotBlank(emrClusterDefinition.getEc2NodeIamProfileName())) {
        runJobFlowRequest.setJobFlowRole(emrClusterDefinition.getEc2NodeIamProfileName());
    } else {
        runJobFlowRequest.setJobFlowRole(herdStringHelper
                .getRequiredConfigurationValue(ConfigurationValue.EMR_DEFAULT_EC2_NODE_IAM_PROFILE_NAME));
    }

    // Set the IAM profile for the service
    if (StringUtils.isNotBlank(emrClusterDefinition.getServiceIamRole())) {
        runJobFlowRequest.setServiceRole(emrClusterDefinition.getServiceIamRole());
    } else {
        runJobFlowRequest.setServiceRole(herdStringHelper
                .getRequiredConfigurationValue(ConfigurationValue.EMR_DEFAULT_SERVICE_IAM_ROLE_NAME));
    }

    // Set the AMI version if specified
    if (StringUtils.isNotBlank(emrClusterDefinition.getAmiVersion())) {
        runJobFlowRequest.setAmiVersion(emrClusterDefinition.getAmiVersion());
    }

    // Set the additionalInfo if specified
    if (StringUtils.isNotBlank(emrClusterDefinition.getAdditionalInfo())) {
        runJobFlowRequest.setAdditionalInfo(emrClusterDefinition.getAdditionalInfo());
    }

    // Set the bootstrap actions
    List<BootstrapActionConfig> bootstrapActionConfigList = getBootstrapActionConfigList(emrClusterDefinition);
    if (!bootstrapActionConfigList.isEmpty()) {
        runJobFlowRequest.setBootstrapActions(bootstrapActionConfigList);
    }

    // Set the app installation steps
    runJobFlowRequest.setSteps(getStepConfig(emrClusterDefinition));

    // Set the tags
    runJobFlowRequest.setTags(getEmrTags(emrClusterDefinition));

    // Assign supported products as applicable
    if (StringUtils.isNotBlank(emrClusterDefinition.getSupportedProduct())) {
        List<String> supportedProducts = new ArrayList<>();
        supportedProducts.add(emrClusterDefinition.getSupportedProduct());
        runJobFlowRequest.setSupportedProducts(supportedProducts);
    }

    // Assign security configuration.
    if (StringUtils.isNotBlank(emrClusterDefinition.getSecurityConfiguration())) {
        runJobFlowRequest.setSecurityConfiguration(emrClusterDefinition.getSecurityConfiguration());
    }

    // Assign scale down behavior.
    if (StringUtils.isNotBlank(emrClusterDefinition.getScaleDownBehavior())) {
        runJobFlowRequest.setScaleDownBehavior(emrClusterDefinition.getScaleDownBehavior());
    }

    // Assign Kerberos attributes.
    runJobFlowRequest
            .setKerberosAttributes(getKerberosAttributes(emrClusterDefinition.getKerberosAttributes()));

    // Return the object
    return runJobFlowRequest;
}

From source file:org.pentaho.amazon.client.impl.EmrClientImpl.java

License:Apache License

@VisibleForTesting
RunJobFlowRequest initEmrCluster(String stagingS3FileUrl, String stagingS3BucketUrl, String stepType,
        String mainClass, String bootstrapActions, AbstractAmazonJobEntry jobEntry) {

    RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest();

    runJobFlowRequest.setName(jobEntry.getHadoopJobName());
    runJobFlowRequest.setReleaseLabel(jobEntry.getEmrRelease());
    runJobFlowRequest.setLogUri(stagingS3BucketUrl);

    JobFlowInstancesConfig instances = initEC2Instance(Integer.parseInt(jobEntry.getNumInstances()),
            jobEntry.getMasterInstanceType(), jobEntry.getSlaveInstanceType());
    runJobFlowRequest.setInstances(instances);

    List<StepConfig> steps = initSteps(stagingS3FileUrl, stepType, mainClass, jobEntry);
    if (steps.size() > 0) {
        runJobFlowRequest.setSteps(steps);
    }// w w w . j  ava2 s  .co  m

    if (stepType.equals(STEP_HIVE)) {
        List<Application> applications = initApplications();
        if (applications.size() > 0) {
            runJobFlowRequest.setApplications(applications);
        }

        List<BootstrapActionConfig> stepBootstrapActions = initBootstrapActions(bootstrapActions);
        if (stepBootstrapActions != null && stepBootstrapActions.size() > 0) {
            runJobFlowRequest.setBootstrapActions(stepBootstrapActions);
        }
    }

    String ec2Role = jobEntry.getEc2Role();
    if (ec2Role == null || ec2Role.trim().isEmpty()) {
        runJobFlowRequest.setJobFlowRole(EMR_EC2_DEFAULT_ROLE);
    } else {
        runJobFlowRequest.setJobFlowRole(ec2Role);
    }

    String emrRole = jobEntry.getEmrRole();
    if (emrRole == null || emrRole.trim().isEmpty()) {
        runJobFlowRequest.setServiceRole(EMR_EFAULT_ROLE);
    } else {
        runJobFlowRequest.setServiceRole(emrRole);
    }

    runJobFlowRequest.setVisibleToAllUsers(true);

    return runJobFlowRequest;
}

From source file:org.pentaho.amazon.hive.job.AmazonHiveJobExecutor.java

License:Apache License

/**
 * Prepare to create a EMR job flow./*from www .  j  a  va 2  s .c  o  m*/
 * 
 * @return RunJobFlowRequest The object to request an EMR job flow
 */
public RunJobFlowRequest createJobFlow() {

    // Create a RunJobFlowRequest object, set a name for the job flow.
    RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest();
    runJobFlowRequest.setName(hadoopJobName);

    // Set a log URL.
    String logUrl = stagingDir;
    if (stagingDir.indexOf("@s3") > 0) { //$NON-NLS-1$
        logUrl = S3FileProvider.SCHEME + "://" + stagingDir.substring(stagingDir.indexOf("@s3") + 4); //$NON-NLS-1$
    }
    runJobFlowRequest.setLogUri(logUrl);

    // Determine the instances for Hadoop cluster.
    String numInstancesS = environmentSubstitute(numInstances);
    int numInsts = 2;
    try {
        numInsts = Integer.parseInt(numInstancesS);
    } catch (NumberFormatException e) {
        logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.InstanceNumber.Error", //$NON-NLS-1$
                numInstancesS));
    }
    JobFlowInstancesConfig instances = new JobFlowInstancesConfig();
    instances.setInstanceCount(numInsts);
    instances.setMasterInstanceType(getInstanceType(masterInstanceType));
    instances.setSlaveInstanceType(getInstanceType(slaveInstanceType));
    instances.setHadoopVersion("0.20"); //$NON-NLS-1$
    instances.setKeepJobFlowAliveWhenNoSteps(alive);
    runJobFlowRequest.setInstances(instances);

    // Set bootstrap actions.
    runJobFlowRequest.setBootstrapActions(ConfigBootstrapActions());

    // Create an EMR step to setup Hive.
    String args = "s3://elasticmapreduce/libs/hive/hive-script --base-path s3://elasticmapreduce/libs/hive/ --hive-versions 0.7 --install-hive"; //$NON-NLS-1$
    List<StepConfig> steps = ConfigHadoopJarStep("Setup Hive", jarUrl, args); //$NON-NLS-1$
    runJobFlowRequest.setSteps(steps);

    return runJobFlowRequest;
}