Example usage for com.amazonaws.services.elasticmapreduce.model TerminateJobFlowsRequest withJobFlowIds

Introduction

In this page you can find the example usage for com.amazonaws.services.elasticmapreduce.model TerminateJobFlowsRequest withJobFlowIds.

Prototype


public TerminateJobFlowsRequest withJobFlowIds(java.util.Collection<String> jobFlowIds)

Source Link

Document

A list of job flows to be shutdown.

Usage

From source file:org.pentaho.amazon.client.impl.EmrClientImpl.java

License:Apache License

private void terminateJobFlows() {
    if (!requestClusterShutdown) {
        TerminateJobFlowsRequest terminateJobFlowsRequest = new TerminateJobFlowsRequest();
        terminateJobFlowsRequest.withJobFlowIds(hadoopJobFlowId);
        emrClient.terminateJobFlows(terminateJobFlowsRequest);
        currentClusterState = getActualClusterState();
        requestClusterShutdown = true;/*from   w w  w. ja v  a  2  s.  co  m*/
    }
}

From source file:org.pentaho.amazon.hive.job.AmazonHiveJobExecutor.java

License:Apache License

/**
 * Executes a Hive job into the AWS Elastic MapReduce service.
 *//*  w  w  w  .j a  va2 s. c  o m*/
public Result execute(Result result, int arg1) throws KettleException {

    // Setup a log file.
    Log4jFileAppender appender = null;
    String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$
    try {
        appender = LogWriter.createFileAppender(logFileName, true, false);
        LogWriter.getInstance().addAppender(appender);
        log.setLogLevel(parentJob.getLogLevel());
    } catch (Exception e) {
        logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.FailedToOpenLogFile", //$NON-NLS-1$
                logFileName, e.toString()));
        logError(Const.getStackTracker(e));
    }

    try {
        // Create and connect an AWS service.
        AmazonElasticMapReduceClient emrClient = new AmazonElasticMapReduceClient(awsCredentials);
        AmazonS3 s3Client = new AmazonS3Client(awsCredentials);

        // Get bucket name and S3 URL.
        String stagingBucketName = GetBucketName(stagingDir);
        String stagingS3BucketUrl = "s3://" + stagingBucketName; //$NON-NLS-1$

        // Prepare staging S3 URL for Hive script file.
        String stagingS3qUrl = "";
        if (qUrl.startsWith(S3FileProvider.SCHEME + "://")) { //$NON-NLS-1$

            // If the .q file is in S3, its staging S3 URL is s3://{bucketname}/{path}
            if (qUrl.indexOf("@s3") > 0) { //$NON-NLS-1$
                stagingS3qUrl = S3FileProvider.SCHEME + "://" + qUrl.substring(qUrl.indexOf("@s3") + 4); //$NON-NLS-1$
            } else {
                stagingS3qUrl = qUrl;
            }

        } else {
            // A local filename is given for the Hive script file. It should be copied to the S3 Log Directory.
            // First, check for the correct protocol.
            if (!qUrl.startsWith("file:")) { //$NON-NLS-1$
                if (log.isBasic()) {
                    logBasic(BaseMessages.getString(PKG,
                            "AmazonElasticMapReduceJobExecutor.HiveScriptFilename.Error") + qUrl); //$NON-NLS-1$
                }
            }
            // pull down .q file from VSF
            FileObject qFile = KettleVFS.getFileObject(buildFilename(qUrl));
            File tmpFile = File.createTempFile("customEMR", "q"); //$NON-NLS-1$
            tmpFile.deleteOnExit();
            FileOutputStream tmpFileOut = new FileOutputStream(tmpFile);
            IOUtils.copy(qFile.getContent().getInputStream(), tmpFileOut);
            // Get key name for the script file S3 destination. Key is defined as path name after {bucket}/
            String key = GetKeyFromS3Url(stagingDir);
            if (key == null) {
                key = qFile.getName().getBaseName();
            } else {
                key += "/" + qFile.getName().getBaseName(); //$NON-NLS-1$
            }

            // delete the previous .q file in S3
            try {
                s3Client.deleteObject(stagingBucketName, key);
            } catch (Exception ex) {
                logError(Const.getStackTracker(ex));
            }

            // Put .q file in S3 Log Directory.
            s3Client.putObject(new PutObjectRequest(stagingBucketName, key, tmpFile));
            stagingS3qUrl = stagingS3BucketUrl + "/" + key; //$NON-NLS-1$
        }

        // AWS provides script-runner.jar (in its public bucket), which should be used as a MapReduce jar for Hive EMR
        // job.
        jarUrl = "s3://elasticmapreduce/libs/script-runner/script-runner.jar"; //$NON-NLS-1$

        RunJobFlowRequest runJobFlowRequest = null;
        RunJobFlowResult runJobFlowResult = null;
        if (StringUtil.isEmpty(hadoopJobFlowId)) {
            // create an EMR job flow, start a step to setup Hive and get the job flow ID.
            runJobFlowRequest = createJobFlow();
            runJobFlowResult = emrClient.runJobFlow(runJobFlowRequest);
            hadoopJobFlowId = runJobFlowResult.getJobFlowId();
        }

        // Now EMR job flow is ready to accept a Run Hive Script step.
        // First, prepare a Job Flow ID list.
        List<String> jobFlowIds = new ArrayList<String>();
        jobFlowIds.add(hadoopJobFlowId);

        // Configure a HadoopJarStep.
        String args = "s3://elasticmapreduce/libs/hive/hive-script "
                + "--base-path s3://elasticmapreduce/libs/hive/ --hive-version 0.7 --run-hive-script --args -f "
                + environmentSubstitute(stagingS3qUrl) + " " + environmentSubstitute(cmdLineArgs); //$NON-NLS-1$
        List<StepConfig> steps = ConfigHadoopJarStep(hadoopJobName, jarUrl, args);

        // Add a Run Hive Script step to the existing job flow.
        AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest();
        addJobFlowStepsRequest.setJobFlowId(hadoopJobFlowId);
        addJobFlowStepsRequest.setSteps(steps);
        emrClient.addJobFlowSteps(addJobFlowStepsRequest);

        // Set a logging interval.
        String loggingIntervalS = environmentSubstitute(loggingInterval);
        int logIntv = 10;
        try {
            logIntv = Integer.parseInt(loggingIntervalS);
        } catch (NumberFormatException ex) {
            logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.LoggingInterval.Error", //$NON-NLS-1$
                    loggingIntervalS));
        }

        // monitor and log if intended.
        if (blocking) {
            try {
                if (log.isBasic()) {

                    String executionState = "RUNNING"; //$NON-NLS-1$

                    while (isRunning(executionState)) {
                        DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest();
                        describeJobFlowsRequest.setJobFlowIds(jobFlowIds);

                        DescribeJobFlowsResult describeJobFlowsResult = emrClient
                                .describeJobFlows(describeJobFlowsRequest);
                        boolean found = false;
                        for (JobFlowDetail jobFlowDetail : describeJobFlowsResult.getJobFlows()) {
                            if (jobFlowDetail.getJobFlowId().equals(hadoopJobFlowId)) {
                                executionState = jobFlowDetail.getExecutionStatusDetail().getState();
                                found = true;
                            }
                        }

                        if (!found) {
                            break;
                        }
                        logBasic(hadoopJobName + " " + BaseMessages.getString(PKG, //$NON-NLS-1$
                                "AmazonElasticMapReduceJobExecutor.JobFlowExecutionStatus", hadoopJobFlowId)
                                + executionState);

                        if (parentJob.isStopped()) {
                            if (!alive) {
                                TerminateJobFlowsRequest terminateJobFlowsRequest = new TerminateJobFlowsRequest();
                                terminateJobFlowsRequest.withJobFlowIds(hadoopJobFlowId);
                                emrClient.terminateJobFlows(terminateJobFlowsRequest);
                            }
                            break;
                        }

                        try {
                            if (isRunning(executionState)) {
                                Thread.sleep(logIntv * 1000);
                            }
                        } catch (InterruptedException ie) {
                            logError(Const.getStackTracker(ie));
                        }
                    }

                    if ("FAILED".equalsIgnoreCase(executionState)) { //$NON-NLS-1$
                        result.setStopped(true);
                        result.setNrErrors(1);
                        result.setResult(false);

                        S3Object outObject = s3Client.getObject(stagingBucketName,
                                hadoopJobFlowId + "/steps/1/stdout"); //$NON-NLS-1$
                        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
                        IOUtils.copy(outObject.getObjectContent(), outStream);
                        logError(outStream.toString());

                        S3Object errorObject = s3Client.getObject(stagingBucketName,
                                hadoopJobFlowId + "/steps/1/stderr"); //$NON-NLS-1$
                        ByteArrayOutputStream errorStream = new ByteArrayOutputStream();
                        IOUtils.copy(errorObject.getObjectContent(), errorStream);
                        logError(errorStream.toString());
                    }
                }
            } catch (Exception e) {
                logError(e.getMessage(), e);
            }
        }

    } catch (Throwable t) {
        t.printStackTrace();
        result.setStopped(true);
        result.setNrErrors(1);
        result.setResult(false);
        logError(t.getMessage(), t);
    }

    if (appender != null) {
        LogWriter.getInstance().removeAppender(appender);
        appender.close();

        ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(),
                parentJob.getJobname(), getName());
        result.getResultFiles().put(resultFile.getFile().toString(), resultFile);
    }

    return result;
}