Example usage for com.amazonaws.services.elasticmapreduce.model RunJobFlowResult getJobFlowId

Introduction

In this page you can find the example usage for com.amazonaws.services.elasticmapreduce.model RunJobFlowResult getJobFlowId.

Prototype


public String getJobFlowId()

Source Link

Document

An unique identifier for the job flow.

Usage

From source file:awswc.AwsConsoleApp.java

License:Open Source License

static void runJobFlow() throws InterruptedException {
    // Configure instances to use
    JobFlowInstancesConfig instances = new JobFlowInstancesConfig();
    //********************************************************************//
    instances.setHadoopVersion(HADOOP_VERSION);
    instances.withEc2KeyName("ayuda-vp1");
    instances.setInstanceCount(MASTER_INSTANCE_COUNT);
    //instances.setInstanceGroups(instanceGroups)
    instances.setMasterInstanceType(InstanceType.M24xlarge.toString());
    instances.setSlaveInstanceType(InstanceType.M24xlarge.toString());
    //********************************************************************//
    HadoopJarStepConfig hadoopJarStep1 = new HadoopJarStepConfig().withJar(S3N_WORD_COUNT_JAR_) // This should be a full map reduce application.
            .withArgs(BUCKET_NAME + "inWC", BUCKET_NAME + "outWC");

    StepConfig stepConfig1 = new StepConfig().withName("wordcount").withHadoopJarStep(hadoopJarStep1)
            .withActionOnFailure("TERMINATE_JOB_FLOW");

    //********************************************************************//

    //********************************************************************//
    HadoopJarStepConfig hadoopJarStep2 = new HadoopJarStepConfig().withJar(S3N_MAX_WORD_COUNT_JAR) // This should be a full map reduce application.
            .withArgs(BUCKET_NAME + "outWC", BUCKET_NAME + "outXWC", "hate", "10");

    StepConfig stepConfig2 = new StepConfig().withName("maxwordcount").withHadoopJarStep(hadoopJarStep2)
            .withActionOnFailure("TERMINATE_JOB_FLOW");
    //********************************************************************//

    Collection<StepConfig> csc = new ArrayList<StepConfig>();
    csc.add(stepConfig1);/*w  ww.j a  v a  2  s. co m*/
    csc.add(stepConfig2);

    // BootstrapActions bootstrapActions = new BootstrapActions();
    RunJobFlowRequest runFlowRequest = new RunJobFlowRequest().withName(FLOW_NAME).withInstances(instances)
            .withSteps(csc).withLogUri(BUCKET_NAME + "debug")
    /*.withBootstrapActions(
      bootstrapActions.newRunIf(
          "instance.isMaster=true",
          bootstrapActions.newConfigureDaemons()
              .withHeapSize(Daemon.JobTracker, 4096)
              .build()),
              bootstrapActions.newRunIf(
                      "instance.isRunningNameNode=true",
                      bootstrapActions.newConfigureDaemons()
                      .withHeapSize(Daemon.NameNode, 4096).build()),
      bootstrapActions.newRunIf(
                                     "instance.isRunningDataNode=true",
                                     bootstrapActions.newConfigureDaemons()
                                     .withHeapSize(Daemon.DataNode, 4096).build()),
      bootstrapActions.newRunIf(
                                     "instance.isRunningJobTracker=true",
                                     bootstrapActions.newConfigureDaemons()
                                     .withHeapSize(Daemon.JobTracker, 4096).build()),
      bootstrapActions.newRunIf(
                                             "instance.isRunningTaskTracker=true",
                                             bootstrapActions.newConfigureDaemons()
                                             .withHeapSize(Daemon.TaskTracker, 4096).build())                                             
                                             
                                     /*,
                                             
      bootstrapActions.newRunIf(
                                             "instance.isSlave=true",
      bootstrapActions.newConfigureHadoop()
                                       .withKeyValue(ConfigFile.Site,"mapred.tasktracker.map.tasks.maximum", "4"))                                            
              )*/;

    RunJobFlowResult runJobFlowResult = emr.runJobFlow(runFlowRequest);

    String jobFlowId = runJobFlowResult.getJobFlowId();
    System.out.println("Ran job flow with id: " + jobFlowId);

    //wasFinished(runJobFlowResult);

}

From source file:awswc.AwsConsoleApp.java

License:Open Source License

public static void wasFinished(RunJobFlowResult runJobFlowResult) throws InterruptedException {
    DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest()
            .withJobFlowIds(runJobFlowResult.getJobFlowId());
    int secondsBetweenPolling = 120;
    String state = null;//  w w  w  .j av  a 2  s. com
    do {
        Thread.sleep(secondsBetweenPolling * 1000);
        DescribeJobFlowsResult jobFlowsResult = emr.describeJobFlows(describeJobFlowsRequest);
        JobFlowDetail detail = jobFlowsResult.getJobFlows().get(0);
        JobFlowExecutionStatusDetail executionStatusDetail = detail.getExecutionStatusDetail();
        state = executionStatusDetail.getState();
    } while (state != null && !state.equals("COMPLETE") && !state.equals("FAILED")
            && !state.equals("TERMINATED"));
}

From source file:com.aegeus.aws.ElasticMapReduceService.java

License:Apache License

/**
 * Create a new EMR Cluster over Hadoop 2.4.0
 *//*  w  w w. j  a  v a2s . co  m*/
public void createCluster() {
    JobFlowInstancesConfig instances = new JobFlowInstancesConfig()
            .withInstanceCount((int) config.getInstanceCount()).withMasterInstanceType(config.getMasterType())
            .withSlaveInstanceType(config.getSlaveType());

    if (Strings.isNullOrEmpty(config.getKeyName())) {
        instances.setEc2KeyName(config.getKeyName());
    }

    if (!Strings.isNullOrEmpty(config.getSubnetId())) {
        instances.setEc2SubnetId(config.getSubnetId());
    } else {
        instances.setPlacement(new PlacementType(config.getPlace()));
    }

    ScriptBootstrapActionConfig installEsConfig = new ScriptBootstrapActionConfig()
            .withPath("s3://support.elasticmapreduce/bootstrap-actions/other/elasticsearch_install.rb");

    BootstrapActionConfig installEs = new BootstrapActionConfig("Elasticsearch Install", installEsConfig);

    RunJobFlowRequest request = new RunJobFlowRequest().withName(config.getName()).withReleaseLabel("emr-4.1.0")
            .withServiceRole("Default_AWS_Role").withJobFlowRole("Default_AWS_Role")
            .withBootstrapActions(installEs).withInstances(instances);

    if (!Strings.isNullOrEmpty(config.getLogBucket())) {
        request.setLogUri(config.getLogBucket());
    }

    RunJobFlowResult result = emr.runJobFlow(request);

    clusterId = result.getJobFlowId();
}

From source file:com.clouddrive.parth.NewClass.java

public static String runCluster() throws Exception {
    long start = System.currentTimeMillis();
    String temp = "";
    // Configure the job flow
    //RunJobFlowRequest request = new RunJobFlowRequest().withName("parth");
    // if (request == null) {
    RunJobFlowRequest request = new RunJobFlowRequest(FLOW_NAME, configInstance());
    request.setLogUri(S3N_LOG_URI);
    // }/*from  w w w.  jav  a2s .c om*/

    // Configure the Hadoop jar to use
    HadoopJarStepConfig jarConfig = new HadoopJarStepConfig(S3N_HADOOP_JAR);
    jarConfig.setArgs(ARGS_AS_LIST);

    try {

        StepConfig enableDebugging = new StepConfig().withName("Enable debugging")
                .withActionOnFailure("TERMINATE_JOB_FLOW")
                .withHadoopJarStep(new StepFactory().newEnableDebuggingStep());

        StepConfig runJar = new StepConfig(S3N_HADOOP_JAR.substring(S3N_HADOOP_JAR.indexOf('/') + 1),
                jarConfig);

        request.setSteps(Arrays.asList(new StepConfig[] { enableDebugging, runJar }));

        // Run the job flow
        RunJobFlowResult result = emr.runJobFlow(request);

        // Check the status of the running job
        String lastState = "";

        STATUS_LOOP: while (true) {
            DescribeJobFlowsRequest desc = new DescribeJobFlowsRequest(
                    Arrays.asList(new String[] { result.getJobFlowId() }));
            DescribeJobFlowsResult descResult = emr.describeJobFlows(desc);
            for (JobFlowDetail detail : descResult.getJobFlows()) {
                String state = detail.getExecutionStatusDetail().getState();
                if (isDone(state)) {
                    System.out.println("Job " + state + ": " + detail.toString());
                    break STATUS_LOOP;
                } else if (!lastState.equals(state)) {
                    lastState = state;
                    System.out.println("Job " + state + " at " + new Date().toString());
                }
            }
            Thread.sleep(10000);
        }
        temp = FLOW_NAME;
        long end = System.currentTimeMillis();
        System.out.println("Computation " + (end - start));
    } catch (AmazonServiceException ase) {
        System.out.println("Caught Exception: " + ase.getMessage());
        System.out.println("Reponse Status Code: " + ase.getStatusCode());
        System.out.println("Error Code: " + ase.getErrorCode());
        System.out.println("Request ID: " + ase.getRequestId());
    }
    return temp;
}

From source file:datameer.awstasks.aws.emr.EmrCluster.java

License:Apache License

public synchronized void startup() throws InterruptedException {
    checkConnection(false);/*from w  w  w .  j  a v  a  2s .c o m*/
    _clusterState = ClusterState.STARTING;
    boolean successful = false;
    try {
        EmrSettings settings = getSettings();
        if (settings.getPrivateKeyName() == null) {
            throw new NullPointerException(
                    "privateKeyName must not be null please configure settings properly");
        }
        LOG.info("Starting job flow '" + getName() + "' ...");
        if (!getRunningJobFlowDetailsByName(getName()).isEmpty()) {
            throw new IllegalStateException("Job flow with name '" + getName() + "' already running.");
        }
        boolean keepAlive = true;
        JobFlowInstancesConfig jobConfig = new JobFlowInstancesConfig();
        jobConfig.setHadoopVersion(_settings.getHadoopVersion());
        jobConfig.setMasterInstanceType(settings.getMasterInstanceType().getId());
        jobConfig.setSlaveInstanceType(settings.getNodeInstanceType().getId());
        jobConfig.setInstanceCount(settings.getInstanceCount());
        jobConfig.setEc2KeyName(settings.getPrivateKeyName());
        jobConfig.setPlacement(new PlacementType());
        jobConfig.setKeepJobFlowAliveWhenNoSteps(keepAlive);

        final RunJobFlowRequest startRequest = new RunJobFlowRequest();

        startRequest.setLogUri("s3n://" + settings.getS3Bucket() + settings.getS3LogPath());
        startRequest.setInstances(jobConfig);
        startRequest.setName(getName());
        startRequest.setAdditionalInfo(_settings.getAdditionalStartInfo());
        startRequest.setBootstrapActions(_settings.getBootstrapActions());
        if (settings.isDebugEnabled()) {
            startRequest.withSteps(DEBUG_STEP);
        }
        RunJobFlowResult startResponse = _emrWebService.runJobFlow(startRequest);
        _jobFlowId = startResponse.getJobFlowId();
        waitUntilClusterStarted(_jobFlowId);
        LOG.info("elastic cluster '" + getName() + "/" + _jobFlowId + "' started, master-host is "
                + _masterHost);
        successful = true;
    } finally {
        if (successful) {
            _clusterState = ClusterState.CONNECTED;
        } else {
            _clusterState = ClusterState.UNCONNECTED;
            _jobFlowId = null;
        }
    }
}

From source file:org.huahinframework.emanager.amazonaws.elasticmapreduce.ElasticMapReduceManager.java

License:Apache License

/**
 * @param config/*from   w  w  w . ja v  a2 s  .co m*/
 * @throws URISyntaxException
 */
public void runJob(Config config) throws URISyntaxException {
    RunJobFlowRequest runJobFlowRequest = null;

    CreateStepConfigger csc = getCreateStepConfigger(config);
    if (csc == null) {
        log.error("Step config create error");
        return;
    }

    if (jobFlowId == null) {
        runJobFlowRequest = new RunJobFlowRequest().withName(MAP_REDUCE_NAME)
                .withBootstrapActions(
                        new BootstrapActionConfig().withName(MEMORY_BOOTSTRAP_NAME).withScriptBootstrapAction(
                                new ScriptBootstrapActionConfig().withPath(MEMORY_BOOTSTRAP_URI)),
                        new BootstrapActionConfig().withName(HADOOP_BOOTSTRAP_NAME).withScriptBootstrapAction(
                                new ScriptBootstrapActionConfig().withPath(HADOOP_BOOTSTRAP_URI)
                                        .withArgs("--mapred-key-value", "mapred.task.timeout=3600000")),
                        new BootstrapActionConfig().withName(HUAHIN_BOOTSTRAP_NAME).withScriptBootstrapAction(
                                new ScriptBootstrapActionConfig().withPath(emrProperties.getConfigureS3Path())))
                .withInstances(setupJobFlowInstancesConfig());
        if (!isEmpty(emrProperties.getLogUri())) {
            runJobFlowRequest.setLogUri(emrProperties.getLogUri());
        }

        List<StepConfig> stepConfigs = new ArrayList<StepConfig>();
        if (emrProperties.isDebug()) {
            StepConfig enableDebugging = new StepConfig().withName(EMR_DEBUGGIN_NAME)
                    .withActionOnFailure(ACTION_ON_TERMINATE)
                    .withHadoopJarStep(new StepFactory().newEnableDebuggingStep());
            stepConfigs.add(enableDebugging);
        }

        for (StepConfig sc : csc.createStepConfig(config)) {
            stepConfigs.add(sc);
        }
        runJobFlowRequest.setSteps(stepConfigs);

        try {
            RunJobFlowResult result = emr.runJobFlow(runJobFlowRequest);
            jobFlowId = result.getJobFlowId();
            checkDate = new Date();
        } catch (Exception e) {
            e.printStackTrace();
            log.error(e);
        }
    } else {
        AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest().withJobFlowId(jobFlowId)
                .withSteps(csc.createStepConfig(config));
        emr.addJobFlowSteps(addJobFlowStepsRequest);
    }

    running = true;
    try {
        config.setJobFlowId(jobFlowId);
        QueueUtils.updateQueue(config);
    } catch (IOException e) {
        e.printStackTrace();
        log.error(e);
    }

    int stepSize = 0;
    String stepStatus = JobUtils.STEP_STATUS_PENDING;
    while (stepStatus.equals(JobUtils.STEP_STATUS_PENDING) || stepStatus.equals(JobUtils.STEP_STATUS_RUNNING)) {
        if (sleep()) {
            break;
        }

        DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest()
                .withJobFlowIds(jobFlowId);
        DescribeJobFlowsResult describeJobFlowsResult = emr.describeJobFlows(describeJobFlowsRequest);
        if (describeJobFlowsResult.getJobFlows().size() != 1) {
            break;
        }

        JobFlowDetail jobFlowDetail = describeJobFlowsResult.getJobFlows().get(0);
        JobFlowInstancesDetail instancesDetail = jobFlowDetail.getInstances();
        masterPublicDnsName = instancesDetail.getMasterPublicDnsName();
        if (isEmpty(config.getMasterPublicDnsName())) {
            try {
                config.setMasterPublicDnsName(masterPublicDnsName);
                QueueUtils.updateQueue(config);
            } catch (IOException e) {
                e.printStackTrace();
                log.error(e);
            }
        }

        stepSize = jobFlowDetail.getSteps().size();
        for (StepDetail stepDetail : jobFlowDetail.getSteps()) {
            if (stepDetail.getStepConfig().getName().equals(config.getName())) {
                stepStatus = stepDetail.getExecutionStatusDetail().getState();
                break;
            }
        }
    }

    if (config.isDeleteOnExit()) {
        if (config.getJobType() == Config.JOB_TYPE_STREAMING) {
            S3Utils.delete(s3, config.getArgMap().get("mapper"));
            S3Utils.delete(s3, config.getArgMap().get("reducer"));
        } else {
            S3Utils.delete(s3, config.getRun());
        }
    }

    // Add More than 256 Steps to a Job Flow(http://goo.gl/JDtsV)
    if (stepSize >= 255) {
        instanceTerminate();
    }

    running = false;

    if (stepStatus.equals(JobUtils.STEP_STATUS_COMPLETED)) {
        config.setStatus(Config.JOB_STATUS_COMPLETE);
    } else if (stepStatus.equals(JobUtils.STEP_STATUS_FAILED)) {
        config.setStatus(Config.JOB_STATUS_ERROR);
    } else if (terminated) {
        config.setStatus(Config.JOB_STATUS_CANCEL);
    }

    try {
        QueueUtils.updateQueue(config);
    } catch (IOException e) {
        e.printStackTrace();
        log.error(e);
    }
}

From source file:org.pentaho.amazon.emr.job.AmazonElasticMapReduceJobExecutor.java

License:Apache License

public Result execute(Result result, int arg1) throws KettleException {
    Log4jFileAppender appender = null;//  w w  w  .  ja v a  2 s  . c  o  m
    String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$
    try {
        appender = LogWriter.createFileAppender(logFileName, true, false);
        LogWriter.getInstance().addAppender(appender);
        log.setLogLevel(parentJob.getLogLevel());
    } catch (Exception e) {
        logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.FailedToOpenLogFile", //$NON-NLS-1$
                logFileName, e.toString()));
        logError(Const.getStackTracker(e));
    }

    try {
        // create/connect aws service
        AmazonElasticMapReduceClient emrClient = new AmazonElasticMapReduceClient(awsCredentials);

        // pull down jar from vfs
        FileObject jarFile = KettleVFS.getFileObject(buildFilename(jarUrl));
        File tmpFile = File.createTempFile("customEMR", "jar");
        tmpFile.deleteOnExit();
        FileOutputStream tmpFileOut = new FileOutputStream(tmpFile);
        IOUtils.copy(jarFile.getContent().getInputStream(), tmpFileOut);
        URL localJarUrl = tmpFile.toURI().toURL();

        // find main class in jar
        String mainClass = getMainClass(localJarUrl);

        // create staging bucket
        AmazonS3 s3Client = new AmazonS3Client(awsCredentials);

        FileSystemOptions opts = new FileSystemOptions();
        DefaultFileSystemConfigBuilder.getInstance().setUserAuthenticator(opts, new StaticUserAuthenticator(
                null, awsCredentials.getAWSAccessKeyId(), awsCredentials.getAWSSecretKey()));
        FileObject stagingDirFileObject = KettleVFS.getFileObject(stagingDir, getVariables(), opts);

        String stagingBucketName = stagingDirFileObject.getName().getBaseName();
        if (!s3Client.doesBucketExist(stagingBucketName)) {
            s3Client.createBucket(stagingBucketName);
        }

        // delete old jar if needed
        try {
            s3Client.deleteObject(stagingBucketName, jarFile.getName().getBaseName());
        } catch (Exception ex) {
            logError(Const.getStackTracker(ex));
        }

        // put jar in s3 staging bucket
        s3Client.putObject(new PutObjectRequest(stagingBucketName, jarFile.getName().getBaseName(), tmpFile));
        // create non-vfs s3 url to jar
        String stagingS3JarUrl = "s3://" + stagingBucketName + "/" + jarFile.getName().getBaseName();
        String stagingS3BucketUrl = "s3://" + stagingBucketName;

        RunJobFlowRequest runJobFlowRequest = null;
        RunJobFlowResult runJobFlowResult = null;
        if (StringUtil.isEmpty(hadoopJobFlowId)) {
            // create EMR job flow
            runJobFlowRequest = createJobFlow(stagingS3BucketUrl, stagingS3JarUrl, mainClass);
            // start EMR job
            runJobFlowResult = emrClient.runJobFlow(runJobFlowRequest);
        } else {
            List<String> jarStepArgs = new ArrayList<String>();
            if (!StringUtil.isEmpty(cmdLineArgs)) {
                StringTokenizer st = new StringTokenizer(cmdLineArgs, " ");
                while (st.hasMoreTokens()) {
                    String token = st.nextToken();
                    logBasic("adding args: " + token);
                    jarStepArgs.add(token);
                }
            }

            HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig();
            hadoopJarStep.setJar(stagingS3JarUrl);
            hadoopJarStep.setMainClass(mainClass);
            hadoopJarStep.setArgs(jarStepArgs);

            StepConfig stepConfig = new StepConfig();
            stepConfig.setName("custom jar: " + jarUrl);
            stepConfig.setHadoopJarStep(hadoopJarStep);

            List<StepConfig> steps = new ArrayList<StepConfig>();
            steps.add(stepConfig);

            AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest();
            addJobFlowStepsRequest.setJobFlowId(hadoopJobFlowId);
            addJobFlowStepsRequest.setSteps(steps);

            emrClient.addJobFlowSteps(addJobFlowStepsRequest);
        }

        String loggingIntervalS = environmentSubstitute(loggingInterval);
        int logIntv = 60;
        try {
            logIntv = Integer.parseInt(loggingIntervalS);
        } catch (NumberFormatException ex) {
            logError("Unable to parse logging interval '" + loggingIntervalS + "' - using " + "default of 60");
        }

        // monitor it / blocking / logging if desired
        if (blocking) {
            try {
                if (log.isBasic()) {

                    String executionState = "RUNNING";

                    List<String> jobFlowIds = new ArrayList<String>();
                    String id = hadoopJobFlowId;
                    if (StringUtil.isEmpty(hadoopJobFlowId)) {
                        id = runJobFlowResult.getJobFlowId();
                        jobFlowIds.add(id);
                    }

                    while (isRunning(executionState)) {
                        DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest();
                        describeJobFlowsRequest.setJobFlowIds(jobFlowIds);

                        DescribeJobFlowsResult describeJobFlowsResult = emrClient
                                .describeJobFlows(describeJobFlowsRequest);
                        boolean found = false;
                        for (JobFlowDetail jobFlowDetail : describeJobFlowsResult.getJobFlows()) {
                            if (jobFlowDetail.getJobFlowId().equals(id)) {
                                executionState = jobFlowDetail.getExecutionStatusDetail().getState();
                                found = true;
                            }
                        }

                        if (!found) {
                            break;
                        }
                        // logBasic(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.RunningPercent", setupPercent,
                        // mapPercent, reducePercent));
                        logBasic(hadoopJobName + " execution status: " + executionState);
                        try {
                            if (isRunning(executionState)) {
                                Thread.sleep(logIntv * 1000);
                            }
                        } catch (InterruptedException ie) {
                            // Ignore
                        }
                    }

                    if ("FAILED".equalsIgnoreCase(executionState)) {
                        result.setStopped(true);
                        result.setNrErrors(1);
                        result.setResult(false);

                        S3Object outObject = s3Client.getObject(stagingBucketName, id + "/steps/1/stdout");
                        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
                        IOUtils.copy(outObject.getObjectContent(), outStream);
                        logError(outStream.toString());

                        S3Object errorObject = s3Client.getObject(stagingBucketName, id + "/steps/1/stderr");
                        ByteArrayOutputStream errorStream = new ByteArrayOutputStream();
                        IOUtils.copy(errorObject.getObjectContent(), errorStream);
                        logError(errorStream.toString());
                    }
                }
            } catch (Exception e) {
                logError(e.getMessage(), e);
            }
        }

    } catch (Throwable t) {
        t.printStackTrace();
        result.setStopped(true);
        result.setNrErrors(1);
        result.setResult(false);
        logError(t.getMessage(), t);
    }

    if (appender != null) {
        LogWriter.getInstance().removeAppender(appender);
        appender.close();

        ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(),
                parentJob.getJobname(), getName());
        result.getResultFiles().put(resultFile.getFile().toString(), resultFile);
    }

    return result;
}

From source file:org.pentaho.amazon.hive.job.AmazonHiveJobExecutor.java

License:Apache License

/**
 * Executes a Hive job into the AWS Elastic MapReduce service.
 *//*from   w  w w . ja v  a 2  s.c  o m*/
public Result execute(Result result, int arg1) throws KettleException {

    // Setup a log file.
    Log4jFileAppender appender = null;
    String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$
    try {
        appender = LogWriter.createFileAppender(logFileName, true, false);
        LogWriter.getInstance().addAppender(appender);
        log.setLogLevel(parentJob.getLogLevel());
    } catch (Exception e) {
        logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.FailedToOpenLogFile", //$NON-NLS-1$
                logFileName, e.toString()));
        logError(Const.getStackTracker(e));
    }

    try {
        // Create and connect an AWS service.
        AmazonElasticMapReduceClient emrClient = new AmazonElasticMapReduceClient(awsCredentials);
        AmazonS3 s3Client = new AmazonS3Client(awsCredentials);

        // Get bucket name and S3 URL.
        String stagingBucketName = GetBucketName(stagingDir);
        String stagingS3BucketUrl = "s3://" + stagingBucketName; //$NON-NLS-1$

        // Prepare staging S3 URL for Hive script file.
        String stagingS3qUrl = "";
        if (qUrl.startsWith(S3FileProvider.SCHEME + "://")) { //$NON-NLS-1$

            // If the .q file is in S3, its staging S3 URL is s3://{bucketname}/{path}
            if (qUrl.indexOf("@s3") > 0) { //$NON-NLS-1$
                stagingS3qUrl = S3FileProvider.SCHEME + "://" + qUrl.substring(qUrl.indexOf("@s3") + 4); //$NON-NLS-1$
            } else {
                stagingS3qUrl = qUrl;
            }

        } else {
            // A local filename is given for the Hive script file. It should be copied to the S3 Log Directory.
            // First, check for the correct protocol.
            if (!qUrl.startsWith("file:")) { //$NON-NLS-1$
                if (log.isBasic()) {
                    logBasic(BaseMessages.getString(PKG,
                            "AmazonElasticMapReduceJobExecutor.HiveScriptFilename.Error") + qUrl); //$NON-NLS-1$
                }
            }
            // pull down .q file from VSF
            FileObject qFile = KettleVFS.getFileObject(buildFilename(qUrl));
            File tmpFile = File.createTempFile("customEMR", "q"); //$NON-NLS-1$
            tmpFile.deleteOnExit();
            FileOutputStream tmpFileOut = new FileOutputStream(tmpFile);
            IOUtils.copy(qFile.getContent().getInputStream(), tmpFileOut);
            // Get key name for the script file S3 destination. Key is defined as path name after {bucket}/
            String key = GetKeyFromS3Url(stagingDir);
            if (key == null) {
                key = qFile.getName().getBaseName();
            } else {
                key += "/" + qFile.getName().getBaseName(); //$NON-NLS-1$
            }

            // delete the previous .q file in S3
            try {
                s3Client.deleteObject(stagingBucketName, key);
            } catch (Exception ex) {
                logError(Const.getStackTracker(ex));
            }

            // Put .q file in S3 Log Directory.
            s3Client.putObject(new PutObjectRequest(stagingBucketName, key, tmpFile));
            stagingS3qUrl = stagingS3BucketUrl + "/" + key; //$NON-NLS-1$
        }

        // AWS provides script-runner.jar (in its public bucket), which should be used as a MapReduce jar for Hive EMR
        // job.
        jarUrl = "s3://elasticmapreduce/libs/script-runner/script-runner.jar"; //$NON-NLS-1$

        RunJobFlowRequest runJobFlowRequest = null;
        RunJobFlowResult runJobFlowResult = null;
        if (StringUtil.isEmpty(hadoopJobFlowId)) {
            // create an EMR job flow, start a step to setup Hive and get the job flow ID.
            runJobFlowRequest = createJobFlow();
            runJobFlowResult = emrClient.runJobFlow(runJobFlowRequest);
            hadoopJobFlowId = runJobFlowResult.getJobFlowId();
        }

        // Now EMR job flow is ready to accept a Run Hive Script step.
        // First, prepare a Job Flow ID list.
        List<String> jobFlowIds = new ArrayList<String>();
        jobFlowIds.add(hadoopJobFlowId);

        // Configure a HadoopJarStep.
        String args = "s3://elasticmapreduce/libs/hive/hive-script "
                + "--base-path s3://elasticmapreduce/libs/hive/ --hive-version 0.7 --run-hive-script --args -f "
                + environmentSubstitute(stagingS3qUrl) + " " + environmentSubstitute(cmdLineArgs); //$NON-NLS-1$
        List<StepConfig> steps = ConfigHadoopJarStep(hadoopJobName, jarUrl, args);

        // Add a Run Hive Script step to the existing job flow.
        AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest();
        addJobFlowStepsRequest.setJobFlowId(hadoopJobFlowId);
        addJobFlowStepsRequest.setSteps(steps);
        emrClient.addJobFlowSteps(addJobFlowStepsRequest);

        // Set a logging interval.
        String loggingIntervalS = environmentSubstitute(loggingInterval);
        int logIntv = 10;
        try {
            logIntv = Integer.parseInt(loggingIntervalS);
        } catch (NumberFormatException ex) {
            logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.LoggingInterval.Error", //$NON-NLS-1$
                    loggingIntervalS));
        }

        // monitor and log if intended.
        if (blocking) {
            try {
                if (log.isBasic()) {

                    String executionState = "RUNNING"; //$NON-NLS-1$

                    while (isRunning(executionState)) {
                        DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest();
                        describeJobFlowsRequest.setJobFlowIds(jobFlowIds);

                        DescribeJobFlowsResult describeJobFlowsResult = emrClient
                                .describeJobFlows(describeJobFlowsRequest);
                        boolean found = false;
                        for (JobFlowDetail jobFlowDetail : describeJobFlowsResult.getJobFlows()) {
                            if (jobFlowDetail.getJobFlowId().equals(hadoopJobFlowId)) {
                                executionState = jobFlowDetail.getExecutionStatusDetail().getState();
                                found = true;
                            }
                        }

                        if (!found) {
                            break;
                        }
                        logBasic(hadoopJobName + " " + BaseMessages.getString(PKG, //$NON-NLS-1$
                                "AmazonElasticMapReduceJobExecutor.JobFlowExecutionStatus", hadoopJobFlowId)
                                + executionState);

                        if (parentJob.isStopped()) {
                            if (!alive) {
                                TerminateJobFlowsRequest terminateJobFlowsRequest = new TerminateJobFlowsRequest();
                                terminateJobFlowsRequest.withJobFlowIds(hadoopJobFlowId);
                                emrClient.terminateJobFlows(terminateJobFlowsRequest);
                            }
                            break;
                        }

                        try {
                            if (isRunning(executionState)) {
                                Thread.sleep(logIntv * 1000);
                            }
                        } catch (InterruptedException ie) {
                            logError(Const.getStackTracker(ie));
                        }
                    }

                    if ("FAILED".equalsIgnoreCase(executionState)) { //$NON-NLS-1$
                        result.setStopped(true);
                        result.setNrErrors(1);
                        result.setResult(false);

                        S3Object outObject = s3Client.getObject(stagingBucketName,
                                hadoopJobFlowId + "/steps/1/stdout"); //$NON-NLS-1$
                        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
                        IOUtils.copy(outObject.getObjectContent(), outStream);
                        logError(outStream.toString());

                        S3Object errorObject = s3Client.getObject(stagingBucketName,
                                hadoopJobFlowId + "/steps/1/stderr"); //$NON-NLS-1$
                        ByteArrayOutputStream errorStream = new ByteArrayOutputStream();
                        IOUtils.copy(errorObject.getObjectContent(), errorStream);
                        logError(errorStream.toString());
                    }
                }
            } catch (Exception e) {
                logError(e.getMessage(), e);
            }
        }

    } catch (Throwable t) {
        t.printStackTrace();
        result.setStopped(true);
        result.setNrErrors(1);
        result.setResult(false);
        logError(t.getMessage(), t);
    }

    if (appender != null) {
        LogWriter.getInstance().removeAppender(appender);
        appender.close();

        ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(),
                parentJob.getJobname(), getName());
        result.getResultFiles().put(resultFile.getFile().toString(), resultFile);
    }

    return result;
}

From source file:rollsPOC2.util.AWSHelper.java

public static String createOrFindEMRHiveCluster(String clusterName, boolean createWithKeepAlive)
        throws Exception {
    String clusterId = null;/*from w w  w .  j a va2  s  . c  om*/
    AmazonElasticMapReduce emr = AppServices.getEMRClient();
    ClusterSummary clusterSummary = findCluster("Treebeard", emr);
    if (clusterSummary != null) {
        clusterId = clusterSummary.getId();
        System.err.printf("Cluster found with id %s, status %s\n", clusterId,
                clusterSummary.getStatus().getState());
    }

    if (clusterSummary != null && clusterSummary.getStatus().getState().startsWith("TERMINAT")) {
        while (findCluster("Treebeard", emr).getStatus().getState().equals("TERMINATING")) {
            System.out.println("Waiting for previous cluster to terminate");
            Thread.sleep(10000l);
        }

        System.out.println("Starting cluster...");
        StepFactory stepFactory = new StepFactory();

        StepConfig enabledebugging = new StepConfig().withName("Enable debugging")
                .withActionOnFailure("TERMINATE_JOB_FLOW")
                .withHadoopJarStep(stepFactory.newEnableDebuggingStep());

        //          Possibly redundant with ".withApplications(new Application().withName("Hive"))"
        //          StepConfig installHive = new StepConfig()
        //             .withName("Install Hive")
        //             .withActionOnFailure("TERMINATE_JOB_FLOW")
        //             .withHadoopJarStep(stepFactory.newInstallHiveStep());

        RunJobFlowRequest request = new RunJobFlowRequest().withName("Treebeard").withReleaseLabel("emr-4.6.0")
                .withApplications(new Application().withName("Hive")).withSteps(enabledebugging)
                .withVisibleToAllUsers(true)
                .withLogUri("s3://aws-logs-800327301943-us-east-1/elasticmapreduce/")
                .withServiceRole("EMR_DefaultRole").withJobFlowRole("EMR_EC2_DefaultRole")
                .withInstances(new JobFlowInstancesConfig().withEc2KeyName("bjss").withInstanceCount(2)
                        .withMasterInstanceType("m3.xlarge").withSlaveInstanceType("m1.large")
                        .withKeepJobFlowAliveWhenNoSteps(createWithKeepAlive));

        RunJobFlowResult createClusterResult = emr.runJobFlow(request);
        clusterId = createClusterResult.getJobFlowId();
        System.out.printf("Started cluster with id %s\n", clusterId);
    }

    return clusterId;
}