Example usage for com.amazonaws.services.elasticmapreduce.model RunJobFlowRequest RunJobFlowRequest

List of usage examples for com.amazonaws.services.elasticmapreduce.model RunJobFlowRequest RunJobFlowRequest

Introduction

In this page you can find the example usage for com.amazonaws.services.elasticmapreduce.model RunJobFlowRequest RunJobFlowRequest.

Prototype

public RunJobFlowRequest() 

Source Link

Document

Default constructor for RunJobFlowRequest object.

Usage

From source file:awswc.AwsConsoleApp.java

License:Open Source License

static void runJobFlow() throws InterruptedException {
    // Configure instances to use
    JobFlowInstancesConfig instances = new JobFlowInstancesConfig();
    //********************************************************************//
    instances.setHadoopVersion(HADOOP_VERSION);
    instances.withEc2KeyName("ayuda-vp1");
    instances.setInstanceCount(MASTER_INSTANCE_COUNT);
    //instances.setInstanceGroups(instanceGroups)
    instances.setMasterInstanceType(InstanceType.M24xlarge.toString());
    instances.setSlaveInstanceType(InstanceType.M24xlarge.toString());
    //********************************************************************//
    HadoopJarStepConfig hadoopJarStep1 = new HadoopJarStepConfig().withJar(S3N_WORD_COUNT_JAR_) // This should be a full map reduce application.
            .withArgs(BUCKET_NAME + "inWC", BUCKET_NAME + "outWC");

    StepConfig stepConfig1 = new StepConfig().withName("wordcount").withHadoopJarStep(hadoopJarStep1)
            .withActionOnFailure("TERMINATE_JOB_FLOW");

    //********************************************************************//

    //********************************************************************//
    HadoopJarStepConfig hadoopJarStep2 = new HadoopJarStepConfig().withJar(S3N_MAX_WORD_COUNT_JAR) // This should be a full map reduce application.
            .withArgs(BUCKET_NAME + "outWC", BUCKET_NAME + "outXWC", "hate", "10");

    StepConfig stepConfig2 = new StepConfig().withName("maxwordcount").withHadoopJarStep(hadoopJarStep2)
            .withActionOnFailure("TERMINATE_JOB_FLOW");
    //********************************************************************//

    Collection<StepConfig> csc = new ArrayList<StepConfig>();
    csc.add(stepConfig1);//www  .j  av a 2s .  c  o  m
    csc.add(stepConfig2);

    // BootstrapActions bootstrapActions = new BootstrapActions();
    RunJobFlowRequest runFlowRequest = new RunJobFlowRequest().withName(FLOW_NAME).withInstances(instances)
            .withSteps(csc).withLogUri(BUCKET_NAME + "debug")
    /*.withBootstrapActions(
      bootstrapActions.newRunIf(
          "instance.isMaster=true",
          bootstrapActions.newConfigureDaemons()
              .withHeapSize(Daemon.JobTracker, 4096)
              .build()),
              bootstrapActions.newRunIf(
                      "instance.isRunningNameNode=true",
                      bootstrapActions.newConfigureDaemons()
                      .withHeapSize(Daemon.NameNode, 4096).build()),
      bootstrapActions.newRunIf(
                                     "instance.isRunningDataNode=true",
                                     bootstrapActions.newConfigureDaemons()
                                     .withHeapSize(Daemon.DataNode, 4096).build()),
      bootstrapActions.newRunIf(
                                     "instance.isRunningJobTracker=true",
                                     bootstrapActions.newConfigureDaemons()
                                     .withHeapSize(Daemon.JobTracker, 4096).build()),
      bootstrapActions.newRunIf(
                                             "instance.isRunningTaskTracker=true",
                                             bootstrapActions.newConfigureDaemons()
                                             .withHeapSize(Daemon.TaskTracker, 4096).build())                                             
                                             
                                     /*,
                                             
      bootstrapActions.newRunIf(
                                             "instance.isSlave=true",
      bootstrapActions.newConfigureHadoop()
                                       .withKeyValue(ConfigFile.Site,"mapred.tasktracker.map.tasks.maximum", "4"))                                            
              )*/;

    RunJobFlowResult runJobFlowResult = emr.runJobFlow(runFlowRequest);

    String jobFlowId = runJobFlowResult.getJobFlowId();
    System.out.println("Ran job flow with id: " + jobFlowId);

    //wasFinished(runJobFlowResult);

}

From source file:com.aegeus.aws.ElasticMapReduceService.java

License:Apache License

/**
 * Create a new EMR Cluster over Hadoop 2.4.0
 *//*  www  . j  a  va2  s  .c  o  m*/
public void createCluster() {
    JobFlowInstancesConfig instances = new JobFlowInstancesConfig()
            .withInstanceCount((int) config.getInstanceCount()).withMasterInstanceType(config.getMasterType())
            .withSlaveInstanceType(config.getSlaveType());

    if (Strings.isNullOrEmpty(config.getKeyName())) {
        instances.setEc2KeyName(config.getKeyName());
    }

    if (!Strings.isNullOrEmpty(config.getSubnetId())) {
        instances.setEc2SubnetId(config.getSubnetId());
    } else {
        instances.setPlacement(new PlacementType(config.getPlace()));
    }

    ScriptBootstrapActionConfig installEsConfig = new ScriptBootstrapActionConfig()
            .withPath("s3://support.elasticmapreduce/bootstrap-actions/other/elasticsearch_install.rb");

    BootstrapActionConfig installEs = new BootstrapActionConfig("Elasticsearch Install", installEsConfig);

    RunJobFlowRequest request = new RunJobFlowRequest().withName(config.getName()).withReleaseLabel("emr-4.1.0")
            .withServiceRole("Default_AWS_Role").withJobFlowRole("Default_AWS_Role")
            .withBootstrapActions(installEs).withInstances(instances);

    if (!Strings.isNullOrEmpty(config.getLogBucket())) {
        request.setLogUri(config.getLogBucket());
    }

    RunJobFlowResult result = emr.runJobFlow(request);

    clusterId = result.getJobFlowId();
}

From source file:datameer.awstasks.aws.emr.EmrCluster.java

License:Apache License

public synchronized void startup() throws InterruptedException {
    checkConnection(false);/* ww w .  j  a  v a  2  s . c om*/
    _clusterState = ClusterState.STARTING;
    boolean successful = false;
    try {
        EmrSettings settings = getSettings();
        if (settings.getPrivateKeyName() == null) {
            throw new NullPointerException(
                    "privateKeyName must not be null please configure settings properly");
        }
        LOG.info("Starting job flow '" + getName() + "' ...");
        if (!getRunningJobFlowDetailsByName(getName()).isEmpty()) {
            throw new IllegalStateException("Job flow with name '" + getName() + "' already running.");
        }
        boolean keepAlive = true;
        JobFlowInstancesConfig jobConfig = new JobFlowInstancesConfig();
        jobConfig.setHadoopVersion(_settings.getHadoopVersion());
        jobConfig.setMasterInstanceType(settings.getMasterInstanceType().getId());
        jobConfig.setSlaveInstanceType(settings.getNodeInstanceType().getId());
        jobConfig.setInstanceCount(settings.getInstanceCount());
        jobConfig.setEc2KeyName(settings.getPrivateKeyName());
        jobConfig.setPlacement(new PlacementType());
        jobConfig.setKeepJobFlowAliveWhenNoSteps(keepAlive);

        final RunJobFlowRequest startRequest = new RunJobFlowRequest();

        startRequest.setLogUri("s3n://" + settings.getS3Bucket() + settings.getS3LogPath());
        startRequest.setInstances(jobConfig);
        startRequest.setName(getName());
        startRequest.setAdditionalInfo(_settings.getAdditionalStartInfo());
        startRequest.setBootstrapActions(_settings.getBootstrapActions());
        if (settings.isDebugEnabled()) {
            startRequest.withSteps(DEBUG_STEP);
        }
        RunJobFlowResult startResponse = _emrWebService.runJobFlow(startRequest);
        _jobFlowId = startResponse.getJobFlowId();
        waitUntilClusterStarted(_jobFlowId);
        LOG.info("elastic cluster '" + getName() + "/" + _jobFlowId + "' started, master-host is "
                + _masterHost);
        successful = true;
    } finally {
        if (successful) {
            _clusterState = ClusterState.CONNECTED;
        } else {
            _clusterState = ClusterState.UNCONNECTED;
            _jobFlowId = null;
        }
    }
}

From source file:fr.ens.biologie.genomique.eoulsan.util.cloud.AWSElasticMapReduceJob.java

License:LGPL

void init() {

    requireNonNull(this.AWSAccessKey);
    requireNonNull(this.AWSAccessKey);
    requireNonNull(this.jarLocation);
    requireNonNull(this.jarArguments);
    requireNonNull(this.slavesInstanceType);
    requireNonNull(this.hadoopVersion);
    requireNonNull(this.jobFlowName);

    if (this.nInstances < 1) {
        throw new IllegalArgumentException("the number of instance is lower than 1");
    }// w ww .  ja  v  a 2 s.  c  o m

    if (this.masterInstanceType == null) {
        this.masterInstanceType = this.slavesInstanceType;
    }

    // Set the hadoop jar step
    final HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig().withJar(this.jarLocation.trim())
            .withArgs(this.jarArguments);

    // Set step config
    final StepConfig stepConfig = new StepConfig().withName(this.jobFlowName + "-step")
            .withHadoopJarStep(hadoopJarStep).withActionOnFailure("TERMINATE_JOB_FLOW");

    // Set the instance
    final JobFlowInstancesConfig instances = new JobFlowInstancesConfig().withInstanceCount(this.nInstances)
            .withMasterInstanceType(this.masterInstanceType).withSlaveInstanceType(this.slavesInstanceType)
            .withHadoopVersion(this.hadoopVersion);

    // Configure hadoop
    final ScriptBootstrapActionConfig scriptBootstrapAction = new ScriptBootstrapActionConfig()
            .withPath("s3n://eu-west-1.elasticmapreduce/bootstrap-actions/configure-hadoop")
            .withArgs("--site-key-value",
                    "mapreduce.tasktracker.map.tasks.maximum=" + this.taskTrackerMaxMapTasks);

    final BootstrapActionConfig bootstrapActions = new BootstrapActionConfig().withName("Configure hadoop")
            .withScriptBootstrapAction(scriptBootstrapAction);

    // Enable debugging
    StepFactory stepFactory = new StepFactory();
    StepConfig enableDebugging = new StepConfig().withName("Enable Debugging")
            .withActionOnFailure("TERMINATE_JOB_FLOW").withHadoopJarStep(stepFactory.newEnableDebuggingStep());

    // Run flow
    this.runFlowRequest = new RunJobFlowRequest().withName(this.jobFlowName);

    // Enable or not debugging
    if (this.enableDebugging) {
        this.runFlowRequest.withInstances(instances).withSteps(enableDebugging, stepConfig);
    } else {
        this.runFlowRequest.withInstances(instances).withSteps(stepConfig);
    }

    // Limit the number of task in a task tracker
    if (this.taskTrackerMaxMapTasks > 0) {
        this.runFlowRequest.withBootstrapActions(bootstrapActions);
    }

    if (this.logPathname != null && !"".equals(this.logPathname)) {
        this.runFlowRequest.withLogUri(this.logPathname);
    }

    // Set EC2 Key name
    if (this.ec2KeyName != null) {
        this.runFlowRequest.getInstances().setEc2KeyName(this.ec2KeyName);
    }
}

From source file:org.deeplearning4j.legacyExamples.EmrSparkExample.java

License:Apache License

public void entryPoint(String[] args) {
    JCommander jcmdr = new JCommander(this);
    try {/*from w  w  w  . jav a 2  s  . c  om*/
        jcmdr.parse(args);
    } catch (ParameterException e) {
        jcmdr.usage();
        try {
            Thread.sleep(500);
        } catch (Exception e2) {
        }
        throw e;
    }

    AmazonElasticMapReduceClientBuilder builder = AmazonElasticMapReduceClientBuilder.standard();
    builder.withRegion(region);
    builder.withCredentials(getCredentialsProvider());

    AmazonElasticMapReduce emr = builder.build();

    List<StepConfig> steps = new ArrayList<>();

    if (upload) {
        log.info("uploading uber jar");

        AmazonS3ClientBuilder s3builder = AmazonS3ClientBuilder.standard();
        s3builder.withRegion(region);
        s3builder.withCredentials(getCredentialsProvider());
        AmazonS3 s3Client = s3builder.build();

        if (!s3Client.doesBucketExist(bucketName)) {
            s3Client.createBucket(bucketName);
        }

        File uberJarFile = new File(uberJar);

        s3Client.putObject(new PutObjectRequest(bucketName, uberJarFile.getName(), uberJarFile));
    }

    if (debug) {
        log.info("enable debug");

        StepFactory stepFactory = new StepFactory(builder.getRegion() + ".elasticmapreduce");
        StepConfig enableDebugging = new StepConfig().withName("Enable Debugging")
                .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
                .withHadoopJarStep(stepFactory.newEnableDebuggingStep());
        steps.add(enableDebugging);
    }

    if (execute) {
        log.info("execute spark step");

        HadoopJarStepConfig sparkStepConf = new HadoopJarStepConfig();
        sparkStepConf.withJar("command-runner.jar");
        sparkStepConf.withArgs("spark-submit", "--deploy-mode", "cluster", "--class", className,
                getS3UberJarUrl(), "-useSparkLocal", "false");

        ActionOnFailure action = ActionOnFailure.TERMINATE_JOB_FLOW;

        if (keepAlive) {
            action = ActionOnFailure.CONTINUE;
        }

        StepConfig sparkStep = new StepConfig().withName("Spark Step").withActionOnFailure(action)
                .withHadoopJarStep(sparkStepConf);
        steps.add(sparkStep);
    }

    log.info("create spark cluster");

    Application sparkApp = new Application().withName("Spark");

    // service and job flow role will be created automatically when
    // launching cluster in aws console, better do that first or create
    // manually

    RunJobFlowRequest request = new RunJobFlowRequest().withName("Spark Cluster").withSteps(steps)
            .withServiceRole("EMR_DefaultRole").withJobFlowRole("EMR_EC2_DefaultRole")
            .withApplications(sparkApp).withReleaseLabel(emrVersion).withLogUri(getS3BucketLogsUrl())
            .withInstances(new JobFlowInstancesConfig().withEc2KeyName("spark").withInstanceCount(instanceCount)
                    .withKeepJobFlowAliveWhenNoSteps(keepAlive).withMasterInstanceType(instanceType)
                    .withSlaveInstanceType(instanceType));

    RunJobFlowResult result = emr.runJobFlow(request);

    log.info(result.toString());

    log.info("done");
}

From source file:org.huahinframework.emanager.amazonaws.elasticmapreduce.ElasticMapReduceManager.java

License:Apache License

/**
 * @param config/*from   w w  w .  j a  va  2 s.  c o  m*/
 * @throws URISyntaxException
 */
public void runJob(Config config) throws URISyntaxException {
    RunJobFlowRequest runJobFlowRequest = null;

    CreateStepConfigger csc = getCreateStepConfigger(config);
    if (csc == null) {
        log.error("Step config create error");
        return;
    }

    if (jobFlowId == null) {
        runJobFlowRequest = new RunJobFlowRequest().withName(MAP_REDUCE_NAME)
                .withBootstrapActions(
                        new BootstrapActionConfig().withName(MEMORY_BOOTSTRAP_NAME).withScriptBootstrapAction(
                                new ScriptBootstrapActionConfig().withPath(MEMORY_BOOTSTRAP_URI)),
                        new BootstrapActionConfig().withName(HADOOP_BOOTSTRAP_NAME).withScriptBootstrapAction(
                                new ScriptBootstrapActionConfig().withPath(HADOOP_BOOTSTRAP_URI)
                                        .withArgs("--mapred-key-value", "mapred.task.timeout=3600000")),
                        new BootstrapActionConfig().withName(HUAHIN_BOOTSTRAP_NAME).withScriptBootstrapAction(
                                new ScriptBootstrapActionConfig().withPath(emrProperties.getConfigureS3Path())))
                .withInstances(setupJobFlowInstancesConfig());
        if (!isEmpty(emrProperties.getLogUri())) {
            runJobFlowRequest.setLogUri(emrProperties.getLogUri());
        }

        List<StepConfig> stepConfigs = new ArrayList<StepConfig>();
        if (emrProperties.isDebug()) {
            StepConfig enableDebugging = new StepConfig().withName(EMR_DEBUGGIN_NAME)
                    .withActionOnFailure(ACTION_ON_TERMINATE)
                    .withHadoopJarStep(new StepFactory().newEnableDebuggingStep());
            stepConfigs.add(enableDebugging);
        }

        for (StepConfig sc : csc.createStepConfig(config)) {
            stepConfigs.add(sc);
        }
        runJobFlowRequest.setSteps(stepConfigs);

        try {
            RunJobFlowResult result = emr.runJobFlow(runJobFlowRequest);
            jobFlowId = result.getJobFlowId();
            checkDate = new Date();
        } catch (Exception e) {
            e.printStackTrace();
            log.error(e);
        }
    } else {
        AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest().withJobFlowId(jobFlowId)
                .withSteps(csc.createStepConfig(config));
        emr.addJobFlowSteps(addJobFlowStepsRequest);
    }

    running = true;
    try {
        config.setJobFlowId(jobFlowId);
        QueueUtils.updateQueue(config);
    } catch (IOException e) {
        e.printStackTrace();
        log.error(e);
    }

    int stepSize = 0;
    String stepStatus = JobUtils.STEP_STATUS_PENDING;
    while (stepStatus.equals(JobUtils.STEP_STATUS_PENDING) || stepStatus.equals(JobUtils.STEP_STATUS_RUNNING)) {
        if (sleep()) {
            break;
        }

        DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest()
                .withJobFlowIds(jobFlowId);
        DescribeJobFlowsResult describeJobFlowsResult = emr.describeJobFlows(describeJobFlowsRequest);
        if (describeJobFlowsResult.getJobFlows().size() != 1) {
            break;
        }

        JobFlowDetail jobFlowDetail = describeJobFlowsResult.getJobFlows().get(0);
        JobFlowInstancesDetail instancesDetail = jobFlowDetail.getInstances();
        masterPublicDnsName = instancesDetail.getMasterPublicDnsName();
        if (isEmpty(config.getMasterPublicDnsName())) {
            try {
                config.setMasterPublicDnsName(masterPublicDnsName);
                QueueUtils.updateQueue(config);
            } catch (IOException e) {
                e.printStackTrace();
                log.error(e);
            }
        }

        stepSize = jobFlowDetail.getSteps().size();
        for (StepDetail stepDetail : jobFlowDetail.getSteps()) {
            if (stepDetail.getStepConfig().getName().equals(config.getName())) {
                stepStatus = stepDetail.getExecutionStatusDetail().getState();
                break;
            }
        }
    }

    if (config.isDeleteOnExit()) {
        if (config.getJobType() == Config.JOB_TYPE_STREAMING) {
            S3Utils.delete(s3, config.getArgMap().get("mapper"));
            S3Utils.delete(s3, config.getArgMap().get("reducer"));
        } else {
            S3Utils.delete(s3, config.getRun());
        }
    }

    // Add More than 256 Steps to a Job Flow(http://goo.gl/JDtsV)
    if (stepSize >= 255) {
        instanceTerminate();
    }

    running = false;

    if (stepStatus.equals(JobUtils.STEP_STATUS_COMPLETED)) {
        config.setStatus(Config.JOB_STATUS_COMPLETE);
    } else if (stepStatus.equals(JobUtils.STEP_STATUS_FAILED)) {
        config.setStatus(Config.JOB_STATUS_ERROR);
    } else if (terminated) {
        config.setStatus(Config.JOB_STATUS_CANCEL);
    }

    try {
        QueueUtils.updateQueue(config);
    } catch (IOException e) {
        e.printStackTrace();
        log.error(e);
    }
}

From source file:org.pentaho.amazon.client.impl.EmrClientImpl.java

License:Apache License

@VisibleForTesting
RunJobFlowRequest initEmrCluster(String stagingS3FileUrl, String stagingS3BucketUrl, String stepType,
        String mainClass, String bootstrapActions, AbstractAmazonJobEntry jobEntry) {

    RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest();

    runJobFlowRequest.setName(jobEntry.getHadoopJobName());
    runJobFlowRequest.setReleaseLabel(jobEntry.getEmrRelease());
    runJobFlowRequest.setLogUri(stagingS3BucketUrl);

    JobFlowInstancesConfig instances = initEC2Instance(Integer.parseInt(jobEntry.getNumInstances()),
            jobEntry.getMasterInstanceType(), jobEntry.getSlaveInstanceType());
    runJobFlowRequest.setInstances(instances);

    List<StepConfig> steps = initSteps(stagingS3FileUrl, stepType, mainClass, jobEntry);
    if (steps.size() > 0) {
        runJobFlowRequest.setSteps(steps);
    }/*w ww  .java 2  s  .c  o m*/

    if (stepType.equals(STEP_HIVE)) {
        List<Application> applications = initApplications();
        if (applications.size() > 0) {
            runJobFlowRequest.setApplications(applications);
        }

        List<BootstrapActionConfig> stepBootstrapActions = initBootstrapActions(bootstrapActions);
        if (stepBootstrapActions != null && stepBootstrapActions.size() > 0) {
            runJobFlowRequest.setBootstrapActions(stepBootstrapActions);
        }
    }

    String ec2Role = jobEntry.getEc2Role();
    if (ec2Role == null || ec2Role.trim().isEmpty()) {
        runJobFlowRequest.setJobFlowRole(EMR_EC2_DEFAULT_ROLE);
    } else {
        runJobFlowRequest.setJobFlowRole(ec2Role);
    }

    String emrRole = jobEntry.getEmrRole();
    if (emrRole == null || emrRole.trim().isEmpty()) {
        runJobFlowRequest.setServiceRole(EMR_EFAULT_ROLE);
    } else {
        runJobFlowRequest.setServiceRole(emrRole);
    }

    runJobFlowRequest.setVisibleToAllUsers(true);

    return runJobFlowRequest;
}

From source file:org.pentaho.amazon.emr.job.AmazonElasticMapReduceJobExecutor.java

License:Apache License

public RunJobFlowRequest createJobFlow(String stagingS3BucketUrl, String stagingS3Jar, String mainClass) {
    List<String> jarStepArgs = new ArrayList<String>();
    if (!StringUtil.isEmpty(cmdLineArgs)) {
        StringTokenizer st = new StringTokenizer(cmdLineArgs, " ");
        while (st.hasMoreTokens()) {
            String token = st.nextToken();
            logBasic("adding args: " + token);
            jarStepArgs.add(token);//  w ww . jav a2  s  .  co  m
        }
    }

    HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig();
    hadoopJarStep.setJar(stagingS3Jar);
    hadoopJarStep.setMainClass(mainClass);
    hadoopJarStep.setArgs(jarStepArgs);

    StepConfig stepConfig = new StepConfig();
    stepConfig.setName("custom jar: " + jarUrl);
    stepConfig.setHadoopJarStep(hadoopJarStep);

    List<StepConfig> steps = new ArrayList<StepConfig>();
    steps.add(stepConfig);

    String numInstancesS = environmentSubstitute(numInstances);
    int numInsts = 2;
    try {
        numInsts = Integer.parseInt(numInstancesS);
    } catch (NumberFormatException e) {
        logError("Unable to parse number of instances to use '" + numInstancesS + "' - "
                + "using 2 instances...");
    }
    JobFlowInstancesConfig instances = new JobFlowInstancesConfig();
    instances.setInstanceCount(numInsts);
    instances.setMasterInstanceType(getInstanceType(masterInstanceType));
    instances.setSlaveInstanceType(getInstanceType(slaveInstanceType));
    instances.setHadoopVersion("0.20");

    RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest();
    runJobFlowRequest.setSteps(steps);
    runJobFlowRequest.setLogUri(stagingS3BucketUrl);
    runJobFlowRequest.setName(hadoopJobName);
    runJobFlowRequest.setInstances(instances);

    // ScriptBootstrapActionConfig scriptBootstrapAction = new ScriptBootstrapActionConfig();
    // scriptBootstrapAction.setPath("s3://mddwordcount/bootstrap.sh");
    // List<String> bootstrapArgs = new ArrayList<String>();
    // bootstrapArgs.add("http://pdi-node-dist.s3.amazonaws.com");
    // //
    // bootstrapArgs.add(
    //   "http://ci.pentaho.com/view/Data%20Integration/job/Kettle/lastSuccessfulBuild/artifact/Kettle/");
    // bootstrapArgs.add("pdi-hadoop-node-TRUNK-SNAPSHOT.zip");
    // scriptBootstrapAction.setArgs(bootstrapArgs);
    // BootstrapActionConfig bootstrapActionConfig = new BootstrapActionConfig();
    // bootstrapActionConfig.setName("mdd bootstrap");
    // bootstrapActionConfig.setScriptBootstrapAction(scriptBootstrapAction);
    // List<BootstrapActionConfig> bootstrapActions = new ArrayList<BootstrapActionConfig>();
    // bootstrapActions.add(bootstrapActionConfig);
    // runJobFlowRequest.setBootstrapActions(bootstrapActions);

    return runJobFlowRequest;
}

From source file:org.pentaho.amazon.hive.job.AmazonHiveJobExecutor.java

License:Apache License

/**
 * Prepare to create a EMR job flow.//from  w  w  w . j av a 2 s . c om
 * 
 * @return RunJobFlowRequest The object to request an EMR job flow
 */
public RunJobFlowRequest createJobFlow() {

    // Create a RunJobFlowRequest object, set a name for the job flow.
    RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest();
    runJobFlowRequest.setName(hadoopJobName);

    // Set a log URL.
    String logUrl = stagingDir;
    if (stagingDir.indexOf("@s3") > 0) { //$NON-NLS-1$
        logUrl = S3FileProvider.SCHEME + "://" + stagingDir.substring(stagingDir.indexOf("@s3") + 4); //$NON-NLS-1$
    }
    runJobFlowRequest.setLogUri(logUrl);

    // Determine the instances for Hadoop cluster.
    String numInstancesS = environmentSubstitute(numInstances);
    int numInsts = 2;
    try {
        numInsts = Integer.parseInt(numInstancesS);
    } catch (NumberFormatException e) {
        logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.InstanceNumber.Error", //$NON-NLS-1$
                numInstancesS));
    }
    JobFlowInstancesConfig instances = new JobFlowInstancesConfig();
    instances.setInstanceCount(numInsts);
    instances.setMasterInstanceType(getInstanceType(masterInstanceType));
    instances.setSlaveInstanceType(getInstanceType(slaveInstanceType));
    instances.setHadoopVersion("0.20"); //$NON-NLS-1$
    instances.setKeepJobFlowAliveWhenNoSteps(alive);
    runJobFlowRequest.setInstances(instances);

    // Set bootstrap actions.
    runJobFlowRequest.setBootstrapActions(ConfigBootstrapActions());

    // Create an EMR step to setup Hive.
    String args = "s3://elasticmapreduce/libs/hive/hive-script --base-path s3://elasticmapreduce/libs/hive/ --hive-versions 0.7 --install-hive"; //$NON-NLS-1$
    List<StepConfig> steps = ConfigHadoopJarStep("Setup Hive", jarUrl, args); //$NON-NLS-1$
    runJobFlowRequest.setSteps(steps);

    return runJobFlowRequest;
}

From source file:rollsPOC2.util.AWSHelper.java

public static String createOrFindEMRHiveCluster(String clusterName, boolean createWithKeepAlive)
        throws Exception {
    String clusterId = null;/*from  w  w  w  . j a v a 2s .c o m*/
    AmazonElasticMapReduce emr = AppServices.getEMRClient();
    ClusterSummary clusterSummary = findCluster("Treebeard", emr);
    if (clusterSummary != null) {
        clusterId = clusterSummary.getId();
        System.err.printf("Cluster found with id %s, status %s\n", clusterId,
                clusterSummary.getStatus().getState());
    }

    if (clusterSummary != null && clusterSummary.getStatus().getState().startsWith("TERMINAT")) {
        while (findCluster("Treebeard", emr).getStatus().getState().equals("TERMINATING")) {
            System.out.println("Waiting for previous cluster to terminate");
            Thread.sleep(10000l);
        }

        System.out.println("Starting cluster...");
        StepFactory stepFactory = new StepFactory();

        StepConfig enabledebugging = new StepConfig().withName("Enable debugging")
                .withActionOnFailure("TERMINATE_JOB_FLOW")
                .withHadoopJarStep(stepFactory.newEnableDebuggingStep());

        //          Possibly redundant with ".withApplications(new Application().withName("Hive"))"
        //          StepConfig installHive = new StepConfig()
        //             .withName("Install Hive")
        //             .withActionOnFailure("TERMINATE_JOB_FLOW")
        //             .withHadoopJarStep(stepFactory.newInstallHiveStep());

        RunJobFlowRequest request = new RunJobFlowRequest().withName("Treebeard").withReleaseLabel("emr-4.6.0")
                .withApplications(new Application().withName("Hive")).withSteps(enabledebugging)
                .withVisibleToAllUsers(true)
                .withLogUri("s3://aws-logs-800327301943-us-east-1/elasticmapreduce/")
                .withServiceRole("EMR_DefaultRole").withJobFlowRole("EMR_EC2_DefaultRole")
                .withInstances(new JobFlowInstancesConfig().withEc2KeyName("bjss").withInstanceCount(2)
                        .withMasterInstanceType("m3.xlarge").withSlaveInstanceType("m1.large")
                        .withKeepJobFlowAliveWhenNoSteps(createWithKeepAlive));

        RunJobFlowResult createClusterResult = emr.runJobFlow(request);
        clusterId = createClusterResult.getJobFlowId();
        System.out.printf("Started cluster with id %s\n", clusterId);
    }

    return clusterId;
}