Example usage for com.amazonaws.services.elasticmapreduce.model RunJobFlowRequest setInstances

Introduction

In this page you can find the example usage for com.amazonaws.services.elasticmapreduce.model RunJobFlowRequest setInstances.

Prototype


public void setInstances(JobFlowInstancesConfig instances)

Source Link

Document

A specification of the number and type of Amazon EC2 instances.

Usage

From source file:datameer.awstasks.aws.emr.EmrCluster.java

License:Apache License

public synchronized void startup() throws InterruptedException {
    checkConnection(false);//from  w  w w  .j  a  v a 2 s. c  o  m
    _clusterState = ClusterState.STARTING;
    boolean successful = false;
    try {
        EmrSettings settings = getSettings();
        if (settings.getPrivateKeyName() == null) {
            throw new NullPointerException(
                    "privateKeyName must not be null please configure settings properly");
        }
        LOG.info("Starting job flow '" + getName() + "' ...");
        if (!getRunningJobFlowDetailsByName(getName()).isEmpty()) {
            throw new IllegalStateException("Job flow with name '" + getName() + "' already running.");
        }
        boolean keepAlive = true;
        JobFlowInstancesConfig jobConfig = new JobFlowInstancesConfig();
        jobConfig.setHadoopVersion(_settings.getHadoopVersion());
        jobConfig.setMasterInstanceType(settings.getMasterInstanceType().getId());
        jobConfig.setSlaveInstanceType(settings.getNodeInstanceType().getId());
        jobConfig.setInstanceCount(settings.getInstanceCount());
        jobConfig.setEc2KeyName(settings.getPrivateKeyName());
        jobConfig.setPlacement(new PlacementType());
        jobConfig.setKeepJobFlowAliveWhenNoSteps(keepAlive);

        final RunJobFlowRequest startRequest = new RunJobFlowRequest();

        startRequest.setLogUri("s3n://" + settings.getS3Bucket() + settings.getS3LogPath());
        startRequest.setInstances(jobConfig);
        startRequest.setName(getName());
        startRequest.setAdditionalInfo(_settings.getAdditionalStartInfo());
        startRequest.setBootstrapActions(_settings.getBootstrapActions());
        if (settings.isDebugEnabled()) {
            startRequest.withSteps(DEBUG_STEP);
        }
        RunJobFlowResult startResponse = _emrWebService.runJobFlow(startRequest);
        _jobFlowId = startResponse.getJobFlowId();
        waitUntilClusterStarted(_jobFlowId);
        LOG.info("elastic cluster '" + getName() + "/" + _jobFlowId + "' started, master-host is "
                + _masterHost);
        successful = true;
    } finally {
        if (successful) {
            _clusterState = ClusterState.CONNECTED;
        } else {
            _clusterState = ClusterState.UNCONNECTED;
            _jobFlowId = null;
        }
    }
}

From source file:org.pentaho.amazon.client.impl.EmrClientImpl.java

License:Apache License

@VisibleForTesting
RunJobFlowRequest initEmrCluster(String stagingS3FileUrl, String stagingS3BucketUrl, String stepType,
        String mainClass, String bootstrapActions, AbstractAmazonJobEntry jobEntry) {

    RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest();

    runJobFlowRequest.setName(jobEntry.getHadoopJobName());
    runJobFlowRequest.setReleaseLabel(jobEntry.getEmrRelease());
    runJobFlowRequest.setLogUri(stagingS3BucketUrl);

    JobFlowInstancesConfig instances = initEC2Instance(Integer.parseInt(jobEntry.getNumInstances()),
            jobEntry.getMasterInstanceType(), jobEntry.getSlaveInstanceType());
    runJobFlowRequest.setInstances(instances);

    List<StepConfig> steps = initSteps(stagingS3FileUrl, stepType, mainClass, jobEntry);
    if (steps.size() > 0) {
        runJobFlowRequest.setSteps(steps);
    }/*  ww  w  .j av  a 2s.  c  o m*/

    if (stepType.equals(STEP_HIVE)) {
        List<Application> applications = initApplications();
        if (applications.size() > 0) {
            runJobFlowRequest.setApplications(applications);
        }

        List<BootstrapActionConfig> stepBootstrapActions = initBootstrapActions(bootstrapActions);
        if (stepBootstrapActions != null && stepBootstrapActions.size() > 0) {
            runJobFlowRequest.setBootstrapActions(stepBootstrapActions);
        }
    }

    String ec2Role = jobEntry.getEc2Role();
    if (ec2Role == null || ec2Role.trim().isEmpty()) {
        runJobFlowRequest.setJobFlowRole(EMR_EC2_DEFAULT_ROLE);
    } else {
        runJobFlowRequest.setJobFlowRole(ec2Role);
    }

    String emrRole = jobEntry.getEmrRole();
    if (emrRole == null || emrRole.trim().isEmpty()) {
        runJobFlowRequest.setServiceRole(EMR_EFAULT_ROLE);
    } else {
        runJobFlowRequest.setServiceRole(emrRole);
    }

    runJobFlowRequest.setVisibleToAllUsers(true);

    return runJobFlowRequest;
}

From source file:org.pentaho.amazon.emr.job.AmazonElasticMapReduceJobExecutor.java

License:Apache License

public RunJobFlowRequest createJobFlow(String stagingS3BucketUrl, String stagingS3Jar, String mainClass) {
    List<String> jarStepArgs = new ArrayList<String>();
    if (!StringUtil.isEmpty(cmdLineArgs)) {
        StringTokenizer st = new StringTokenizer(cmdLineArgs, " ");
        while (st.hasMoreTokens()) {
            String token = st.nextToken();
            logBasic("adding args: " + token);
            jarStepArgs.add(token);//  ww w.j a  v  a 2s .  com
        }
    }

    HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig();
    hadoopJarStep.setJar(stagingS3Jar);
    hadoopJarStep.setMainClass(mainClass);
    hadoopJarStep.setArgs(jarStepArgs);

    StepConfig stepConfig = new StepConfig();
    stepConfig.setName("custom jar: " + jarUrl);
    stepConfig.setHadoopJarStep(hadoopJarStep);

    List<StepConfig> steps = new ArrayList<StepConfig>();
    steps.add(stepConfig);

    String numInstancesS = environmentSubstitute(numInstances);
    int numInsts = 2;
    try {
        numInsts = Integer.parseInt(numInstancesS);
    } catch (NumberFormatException e) {
        logError("Unable to parse number of instances to use '" + numInstancesS + "' - "
                + "using 2 instances...");
    }
    JobFlowInstancesConfig instances = new JobFlowInstancesConfig();
    instances.setInstanceCount(numInsts);
    instances.setMasterInstanceType(getInstanceType(masterInstanceType));
    instances.setSlaveInstanceType(getInstanceType(slaveInstanceType));
    instances.setHadoopVersion("0.20");

    RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest();
    runJobFlowRequest.setSteps(steps);
    runJobFlowRequest.setLogUri(stagingS3BucketUrl);
    runJobFlowRequest.setName(hadoopJobName);
    runJobFlowRequest.setInstances(instances);

    // ScriptBootstrapActionConfig scriptBootstrapAction = new ScriptBootstrapActionConfig();
    // scriptBootstrapAction.setPath("s3://mddwordcount/bootstrap.sh");
    // List<String> bootstrapArgs = new ArrayList<String>();
    // bootstrapArgs.add("http://pdi-node-dist.s3.amazonaws.com");
    // //
    // bootstrapArgs.add(
    //   "http://ci.pentaho.com/view/Data%20Integration/job/Kettle/lastSuccessfulBuild/artifact/Kettle/");
    // bootstrapArgs.add("pdi-hadoop-node-TRUNK-SNAPSHOT.zip");
    // scriptBootstrapAction.setArgs(bootstrapArgs);
    // BootstrapActionConfig bootstrapActionConfig = new BootstrapActionConfig();
    // bootstrapActionConfig.setName("mdd bootstrap");
    // bootstrapActionConfig.setScriptBootstrapAction(scriptBootstrapAction);
    // List<BootstrapActionConfig> bootstrapActions = new ArrayList<BootstrapActionConfig>();
    // bootstrapActions.add(bootstrapActionConfig);
    // runJobFlowRequest.setBootstrapActions(bootstrapActions);

    return runJobFlowRequest;
}

From source file:org.pentaho.amazon.hive.job.AmazonHiveJobExecutor.java

License:Apache License

/**
 * Prepare to create a EMR job flow.//from   www.  j  a v a  2  s  . c  o m
 * 
 * @return RunJobFlowRequest The object to request an EMR job flow
 */
public RunJobFlowRequest createJobFlow() {

    // Create a RunJobFlowRequest object, set a name for the job flow.
    RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest();
    runJobFlowRequest.setName(hadoopJobName);

    // Set a log URL.
    String logUrl = stagingDir;
    if (stagingDir.indexOf("@s3") > 0) { //$NON-NLS-1$
        logUrl = S3FileProvider.SCHEME + "://" + stagingDir.substring(stagingDir.indexOf("@s3") + 4); //$NON-NLS-1$
    }
    runJobFlowRequest.setLogUri(logUrl);

    // Determine the instances for Hadoop cluster.
    String numInstancesS = environmentSubstitute(numInstances);
    int numInsts = 2;
    try {
        numInsts = Integer.parseInt(numInstancesS);
    } catch (NumberFormatException e) {
        logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.InstanceNumber.Error", //$NON-NLS-1$
                numInstancesS));
    }
    JobFlowInstancesConfig instances = new JobFlowInstancesConfig();
    instances.setInstanceCount(numInsts);
    instances.setMasterInstanceType(getInstanceType(masterInstanceType));
    instances.setSlaveInstanceType(getInstanceType(slaveInstanceType));
    instances.setHadoopVersion("0.20"); //$NON-NLS-1$
    instances.setKeepJobFlowAliveWhenNoSteps(alive);
    runJobFlowRequest.setInstances(instances);

    // Set bootstrap actions.
    runJobFlowRequest.setBootstrapActions(ConfigBootstrapActions());

    // Create an EMR step to setup Hive.
    String args = "s3://elasticmapreduce/libs/hive/hive-script --base-path s3://elasticmapreduce/libs/hive/ --hive-versions 0.7 --install-hive"; //$NON-NLS-1$
    List<StepConfig> steps = ConfigHadoopJarStep("Setup Hive", jarUrl, args); //$NON-NLS-1$
    runJobFlowRequest.setSteps(steps);

    return runJobFlowRequest;
}