List of usage examples for com.amazonaws.services.elasticmapreduce.model AddJobFlowStepsRequest AddJobFlowStepsRequest
public AddJobFlowStepsRequest()
From source file:com.aegeus.aws.ElasticMapReduceService.java
License:Apache License
public void addSteps() { StepConfig parseStep = new StepConfig().withName("Parse logs") .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW); StepConfig persistStep = new StepConfig().withName("Persist layer") .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW); AddJobFlowStepsRequest request = new AddJobFlowStepsRequest().withJobFlowId(clusterId).withSteps(parseStep, persistStep);//from w w w . jav a 2 s. c om stepIds = emr.addJobFlowSteps(request).getStepIds(); }
From source file:datameer.awstasks.aws.emr.EmrCluster.java
License:Apache License
public StepFuture executeJobStep(String name, File jobJar, String s3JobJarName, Class<?> mainClass, String... args) {//ww w. j a v a 2 s . c o m checkConnection(true); HadoopJarStepConfig jarConfig = new HadoopJarStepConfig(); if (jobJar != null) { String s3JobJarUri = uploadingJobJar(jobJar, s3JobJarName); jarConfig.setJar(s3JobJarUri); } if (mainClass != null) { jarConfig.setMainClass(mainClass.getName()); } jarConfig.setArgs(Arrays.asList(args)); StepConfig stepConfig = new StepConfig(); stepConfig.setName(name); stepConfig.setActionOnFailure("CONTINUE"); stepConfig.setHadoopJarStep(jarConfig); _emrWebService .addJobFlowSteps(new AddJobFlowStepsRequest().withJobFlowId(_jobFlowId).withSteps(stepConfig)); _emrWebService.clearDescribeJobFlowCache(); return new StepFuture(stepConfig.getName(), getStepIndex(getJobFlowDetail(_jobFlowId), name)); }
From source file:org.huahinframework.emanager.amazonaws.elasticmapreduce.ElasticMapReduceManager.java
License:Apache License
/** * @param config/*w ww.j a va2s . co m*/ * @throws URISyntaxException */ public void runJob(Config config) throws URISyntaxException { RunJobFlowRequest runJobFlowRequest = null; CreateStepConfigger csc = getCreateStepConfigger(config); if (csc == null) { log.error("Step config create error"); return; } if (jobFlowId == null) { runJobFlowRequest = new RunJobFlowRequest().withName(MAP_REDUCE_NAME) .withBootstrapActions( new BootstrapActionConfig().withName(MEMORY_BOOTSTRAP_NAME).withScriptBootstrapAction( new ScriptBootstrapActionConfig().withPath(MEMORY_BOOTSTRAP_URI)), new BootstrapActionConfig().withName(HADOOP_BOOTSTRAP_NAME).withScriptBootstrapAction( new ScriptBootstrapActionConfig().withPath(HADOOP_BOOTSTRAP_URI) .withArgs("--mapred-key-value", "mapred.task.timeout=3600000")), new BootstrapActionConfig().withName(HUAHIN_BOOTSTRAP_NAME).withScriptBootstrapAction( new ScriptBootstrapActionConfig().withPath(emrProperties.getConfigureS3Path()))) .withInstances(setupJobFlowInstancesConfig()); if (!isEmpty(emrProperties.getLogUri())) { runJobFlowRequest.setLogUri(emrProperties.getLogUri()); } List<StepConfig> stepConfigs = new ArrayList<StepConfig>(); if (emrProperties.isDebug()) { StepConfig enableDebugging = new StepConfig().withName(EMR_DEBUGGIN_NAME) .withActionOnFailure(ACTION_ON_TERMINATE) .withHadoopJarStep(new StepFactory().newEnableDebuggingStep()); stepConfigs.add(enableDebugging); } for (StepConfig sc : csc.createStepConfig(config)) { stepConfigs.add(sc); } runJobFlowRequest.setSteps(stepConfigs); try { RunJobFlowResult result = emr.runJobFlow(runJobFlowRequest); jobFlowId = result.getJobFlowId(); checkDate = new Date(); } catch (Exception e) { e.printStackTrace(); log.error(e); } } else { AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest().withJobFlowId(jobFlowId) .withSteps(csc.createStepConfig(config)); emr.addJobFlowSteps(addJobFlowStepsRequest); } running = true; try { config.setJobFlowId(jobFlowId); QueueUtils.updateQueue(config); } catch (IOException e) { e.printStackTrace(); log.error(e); } int stepSize = 0; String stepStatus = JobUtils.STEP_STATUS_PENDING; while (stepStatus.equals(JobUtils.STEP_STATUS_PENDING) || stepStatus.equals(JobUtils.STEP_STATUS_RUNNING)) { if (sleep()) { break; } DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest() .withJobFlowIds(jobFlowId); DescribeJobFlowsResult describeJobFlowsResult = emr.describeJobFlows(describeJobFlowsRequest); if (describeJobFlowsResult.getJobFlows().size() != 1) { break; } JobFlowDetail jobFlowDetail = describeJobFlowsResult.getJobFlows().get(0); JobFlowInstancesDetail instancesDetail = jobFlowDetail.getInstances(); masterPublicDnsName = instancesDetail.getMasterPublicDnsName(); if (isEmpty(config.getMasterPublicDnsName())) { try { config.setMasterPublicDnsName(masterPublicDnsName); QueueUtils.updateQueue(config); } catch (IOException e) { e.printStackTrace(); log.error(e); } } stepSize = jobFlowDetail.getSteps().size(); for (StepDetail stepDetail : jobFlowDetail.getSteps()) { if (stepDetail.getStepConfig().getName().equals(config.getName())) { stepStatus = stepDetail.getExecutionStatusDetail().getState(); break; } } } if (config.isDeleteOnExit()) { if (config.getJobType() == Config.JOB_TYPE_STREAMING) { S3Utils.delete(s3, config.getArgMap().get("mapper")); S3Utils.delete(s3, config.getArgMap().get("reducer")); } else { S3Utils.delete(s3, config.getRun()); } } // Add More than 256 Steps to a Job Flow(http://goo.gl/JDtsV) if (stepSize >= 255) { instanceTerminate(); } running = false; if (stepStatus.equals(JobUtils.STEP_STATUS_COMPLETED)) { config.setStatus(Config.JOB_STATUS_COMPLETE); } else if (stepStatus.equals(JobUtils.STEP_STATUS_FAILED)) { config.setStatus(Config.JOB_STATUS_ERROR); } else if (terminated) { config.setStatus(Config.JOB_STATUS_CANCEL); } try { QueueUtils.updateQueue(config); } catch (IOException e) { e.printStackTrace(); log.error(e); } }
From source file:org.pentaho.amazon.client.impl.EmrClientImpl.java
License:Apache License
@Override public void addStepToExistingJobFlow(String stagingS3FileUrl, String stagingS3BucketUrl, String stepType, String mainClass, AbstractAmazonJobEntry jobEntry) { this.alive = jobEntry.getAlive(); this.hadoopJobFlowId = jobEntry.getHadoopJobFlowId(); setStepsFromCluster();// ww w . ja va2 s .c o m List<StepConfig> steps = initSteps(stagingS3FileUrl, stepType, mainClass, jobEntry); AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest(); addJobFlowStepsRequest.setJobFlowId(hadoopJobFlowId); addJobFlowStepsRequest.setSteps(steps); emrClient.addJobFlowSteps(addJobFlowStepsRequest); stepId = getSpecifiedRunningStep(); }
From source file:org.pentaho.amazon.emr.job.AmazonElasticMapReduceJobExecutor.java
License:Apache License
public Result execute(Result result, int arg1) throws KettleException { Log4jFileAppender appender = null;/*from w w w .ja va 2 s . co m*/ String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$ try { appender = LogWriter.createFileAppender(logFileName, true, false); LogWriter.getInstance().addAppender(appender); log.setLogLevel(parentJob.getLogLevel()); } catch (Exception e) { logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.FailedToOpenLogFile", //$NON-NLS-1$ logFileName, e.toString())); logError(Const.getStackTracker(e)); } try { // create/connect aws service AmazonElasticMapReduceClient emrClient = new AmazonElasticMapReduceClient(awsCredentials); // pull down jar from vfs FileObject jarFile = KettleVFS.getFileObject(buildFilename(jarUrl)); File tmpFile = File.createTempFile("customEMR", "jar"); tmpFile.deleteOnExit(); FileOutputStream tmpFileOut = new FileOutputStream(tmpFile); IOUtils.copy(jarFile.getContent().getInputStream(), tmpFileOut); URL localJarUrl = tmpFile.toURI().toURL(); // find main class in jar String mainClass = getMainClass(localJarUrl); // create staging bucket AmazonS3 s3Client = new AmazonS3Client(awsCredentials); FileSystemOptions opts = new FileSystemOptions(); DefaultFileSystemConfigBuilder.getInstance().setUserAuthenticator(opts, new StaticUserAuthenticator( null, awsCredentials.getAWSAccessKeyId(), awsCredentials.getAWSSecretKey())); FileObject stagingDirFileObject = KettleVFS.getFileObject(stagingDir, getVariables(), opts); String stagingBucketName = stagingDirFileObject.getName().getBaseName(); if (!s3Client.doesBucketExist(stagingBucketName)) { s3Client.createBucket(stagingBucketName); } // delete old jar if needed try { s3Client.deleteObject(stagingBucketName, jarFile.getName().getBaseName()); } catch (Exception ex) { logError(Const.getStackTracker(ex)); } // put jar in s3 staging bucket s3Client.putObject(new PutObjectRequest(stagingBucketName, jarFile.getName().getBaseName(), tmpFile)); // create non-vfs s3 url to jar String stagingS3JarUrl = "s3://" + stagingBucketName + "/" + jarFile.getName().getBaseName(); String stagingS3BucketUrl = "s3://" + stagingBucketName; RunJobFlowRequest runJobFlowRequest = null; RunJobFlowResult runJobFlowResult = null; if (StringUtil.isEmpty(hadoopJobFlowId)) { // create EMR job flow runJobFlowRequest = createJobFlow(stagingS3BucketUrl, stagingS3JarUrl, mainClass); // start EMR job runJobFlowResult = emrClient.runJobFlow(runJobFlowRequest); } else { List<String> jarStepArgs = new ArrayList<String>(); if (!StringUtil.isEmpty(cmdLineArgs)) { StringTokenizer st = new StringTokenizer(cmdLineArgs, " "); while (st.hasMoreTokens()) { String token = st.nextToken(); logBasic("adding args: " + token); jarStepArgs.add(token); } } HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig(); hadoopJarStep.setJar(stagingS3JarUrl); hadoopJarStep.setMainClass(mainClass); hadoopJarStep.setArgs(jarStepArgs); StepConfig stepConfig = new StepConfig(); stepConfig.setName("custom jar: " + jarUrl); stepConfig.setHadoopJarStep(hadoopJarStep); List<StepConfig> steps = new ArrayList<StepConfig>(); steps.add(stepConfig); AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest(); addJobFlowStepsRequest.setJobFlowId(hadoopJobFlowId); addJobFlowStepsRequest.setSteps(steps); emrClient.addJobFlowSteps(addJobFlowStepsRequest); } String loggingIntervalS = environmentSubstitute(loggingInterval); int logIntv = 60; try { logIntv = Integer.parseInt(loggingIntervalS); } catch (NumberFormatException ex) { logError("Unable to parse logging interval '" + loggingIntervalS + "' - using " + "default of 60"); } // monitor it / blocking / logging if desired if (blocking) { try { if (log.isBasic()) { String executionState = "RUNNING"; List<String> jobFlowIds = new ArrayList<String>(); String id = hadoopJobFlowId; if (StringUtil.isEmpty(hadoopJobFlowId)) { id = runJobFlowResult.getJobFlowId(); jobFlowIds.add(id); } while (isRunning(executionState)) { DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest(); describeJobFlowsRequest.setJobFlowIds(jobFlowIds); DescribeJobFlowsResult describeJobFlowsResult = emrClient .describeJobFlows(describeJobFlowsRequest); boolean found = false; for (JobFlowDetail jobFlowDetail : describeJobFlowsResult.getJobFlows()) { if (jobFlowDetail.getJobFlowId().equals(id)) { executionState = jobFlowDetail.getExecutionStatusDetail().getState(); found = true; } } if (!found) { break; } // logBasic(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.RunningPercent", setupPercent, // mapPercent, reducePercent)); logBasic(hadoopJobName + " execution status: " + executionState); try { if (isRunning(executionState)) { Thread.sleep(logIntv * 1000); } } catch (InterruptedException ie) { // Ignore } } if ("FAILED".equalsIgnoreCase(executionState)) { result.setStopped(true); result.setNrErrors(1); result.setResult(false); S3Object outObject = s3Client.getObject(stagingBucketName, id + "/steps/1/stdout"); ByteArrayOutputStream outStream = new ByteArrayOutputStream(); IOUtils.copy(outObject.getObjectContent(), outStream); logError(outStream.toString()); S3Object errorObject = s3Client.getObject(stagingBucketName, id + "/steps/1/stderr"); ByteArrayOutputStream errorStream = new ByteArrayOutputStream(); IOUtils.copy(errorObject.getObjectContent(), errorStream); logError(errorStream.toString()); } } } catch (Exception e) { logError(e.getMessage(), e); } } } catch (Throwable t) { t.printStackTrace(); result.setStopped(true); result.setNrErrors(1); result.setResult(false); logError(t.getMessage(), t); } if (appender != null) { LogWriter.getInstance().removeAppender(appender); appender.close(); ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(), parentJob.getJobname(), getName()); result.getResultFiles().put(resultFile.getFile().toString(), resultFile); } return result; }
From source file:org.pentaho.amazon.hive.job.AmazonHiveJobExecutor.java
License:Apache License
/** * Executes a Hive job into the AWS Elastic MapReduce service. */// w w w. j a v a 2 s . co m public Result execute(Result result, int arg1) throws KettleException { // Setup a log file. Log4jFileAppender appender = null; String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$ try { appender = LogWriter.createFileAppender(logFileName, true, false); LogWriter.getInstance().addAppender(appender); log.setLogLevel(parentJob.getLogLevel()); } catch (Exception e) { logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.FailedToOpenLogFile", //$NON-NLS-1$ logFileName, e.toString())); logError(Const.getStackTracker(e)); } try { // Create and connect an AWS service. AmazonElasticMapReduceClient emrClient = new AmazonElasticMapReduceClient(awsCredentials); AmazonS3 s3Client = new AmazonS3Client(awsCredentials); // Get bucket name and S3 URL. String stagingBucketName = GetBucketName(stagingDir); String stagingS3BucketUrl = "s3://" + stagingBucketName; //$NON-NLS-1$ // Prepare staging S3 URL for Hive script file. String stagingS3qUrl = ""; if (qUrl.startsWith(S3FileProvider.SCHEME + "://")) { //$NON-NLS-1$ // If the .q file is in S3, its staging S3 URL is s3://{bucketname}/{path} if (qUrl.indexOf("@s3") > 0) { //$NON-NLS-1$ stagingS3qUrl = S3FileProvider.SCHEME + "://" + qUrl.substring(qUrl.indexOf("@s3") + 4); //$NON-NLS-1$ } else { stagingS3qUrl = qUrl; } } else { // A local filename is given for the Hive script file. It should be copied to the S3 Log Directory. // First, check for the correct protocol. if (!qUrl.startsWith("file:")) { //$NON-NLS-1$ if (log.isBasic()) { logBasic(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.HiveScriptFilename.Error") + qUrl); //$NON-NLS-1$ } } // pull down .q file from VSF FileObject qFile = KettleVFS.getFileObject(buildFilename(qUrl)); File tmpFile = File.createTempFile("customEMR", "q"); //$NON-NLS-1$ tmpFile.deleteOnExit(); FileOutputStream tmpFileOut = new FileOutputStream(tmpFile); IOUtils.copy(qFile.getContent().getInputStream(), tmpFileOut); // Get key name for the script file S3 destination. Key is defined as path name after {bucket}/ String key = GetKeyFromS3Url(stagingDir); if (key == null) { key = qFile.getName().getBaseName(); } else { key += "/" + qFile.getName().getBaseName(); //$NON-NLS-1$ } // delete the previous .q file in S3 try { s3Client.deleteObject(stagingBucketName, key); } catch (Exception ex) { logError(Const.getStackTracker(ex)); } // Put .q file in S3 Log Directory. s3Client.putObject(new PutObjectRequest(stagingBucketName, key, tmpFile)); stagingS3qUrl = stagingS3BucketUrl + "/" + key; //$NON-NLS-1$ } // AWS provides script-runner.jar (in its public bucket), which should be used as a MapReduce jar for Hive EMR // job. jarUrl = "s3://elasticmapreduce/libs/script-runner/script-runner.jar"; //$NON-NLS-1$ RunJobFlowRequest runJobFlowRequest = null; RunJobFlowResult runJobFlowResult = null; if (StringUtil.isEmpty(hadoopJobFlowId)) { // create an EMR job flow, start a step to setup Hive and get the job flow ID. runJobFlowRequest = createJobFlow(); runJobFlowResult = emrClient.runJobFlow(runJobFlowRequest); hadoopJobFlowId = runJobFlowResult.getJobFlowId(); } // Now EMR job flow is ready to accept a Run Hive Script step. // First, prepare a Job Flow ID list. List<String> jobFlowIds = new ArrayList<String>(); jobFlowIds.add(hadoopJobFlowId); // Configure a HadoopJarStep. String args = "s3://elasticmapreduce/libs/hive/hive-script " + "--base-path s3://elasticmapreduce/libs/hive/ --hive-version 0.7 --run-hive-script --args -f " + environmentSubstitute(stagingS3qUrl) + " " + environmentSubstitute(cmdLineArgs); //$NON-NLS-1$ List<StepConfig> steps = ConfigHadoopJarStep(hadoopJobName, jarUrl, args); // Add a Run Hive Script step to the existing job flow. AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest(); addJobFlowStepsRequest.setJobFlowId(hadoopJobFlowId); addJobFlowStepsRequest.setSteps(steps); emrClient.addJobFlowSteps(addJobFlowStepsRequest); // Set a logging interval. String loggingIntervalS = environmentSubstitute(loggingInterval); int logIntv = 10; try { logIntv = Integer.parseInt(loggingIntervalS); } catch (NumberFormatException ex) { logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.LoggingInterval.Error", //$NON-NLS-1$ loggingIntervalS)); } // monitor and log if intended. if (blocking) { try { if (log.isBasic()) { String executionState = "RUNNING"; //$NON-NLS-1$ while (isRunning(executionState)) { DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest(); describeJobFlowsRequest.setJobFlowIds(jobFlowIds); DescribeJobFlowsResult describeJobFlowsResult = emrClient .describeJobFlows(describeJobFlowsRequest); boolean found = false; for (JobFlowDetail jobFlowDetail : describeJobFlowsResult.getJobFlows()) { if (jobFlowDetail.getJobFlowId().equals(hadoopJobFlowId)) { executionState = jobFlowDetail.getExecutionStatusDetail().getState(); found = true; } } if (!found) { break; } logBasic(hadoopJobName + " " + BaseMessages.getString(PKG, //$NON-NLS-1$ "AmazonElasticMapReduceJobExecutor.JobFlowExecutionStatus", hadoopJobFlowId) + executionState); if (parentJob.isStopped()) { if (!alive) { TerminateJobFlowsRequest terminateJobFlowsRequest = new TerminateJobFlowsRequest(); terminateJobFlowsRequest.withJobFlowIds(hadoopJobFlowId); emrClient.terminateJobFlows(terminateJobFlowsRequest); } break; } try { if (isRunning(executionState)) { Thread.sleep(logIntv * 1000); } } catch (InterruptedException ie) { logError(Const.getStackTracker(ie)); } } if ("FAILED".equalsIgnoreCase(executionState)) { //$NON-NLS-1$ result.setStopped(true); result.setNrErrors(1); result.setResult(false); S3Object outObject = s3Client.getObject(stagingBucketName, hadoopJobFlowId + "/steps/1/stdout"); //$NON-NLS-1$ ByteArrayOutputStream outStream = new ByteArrayOutputStream(); IOUtils.copy(outObject.getObjectContent(), outStream); logError(outStream.toString()); S3Object errorObject = s3Client.getObject(stagingBucketName, hadoopJobFlowId + "/steps/1/stderr"); //$NON-NLS-1$ ByteArrayOutputStream errorStream = new ByteArrayOutputStream(); IOUtils.copy(errorObject.getObjectContent(), errorStream); logError(errorStream.toString()); } } } catch (Exception e) { logError(e.getMessage(), e); } } } catch (Throwable t) { t.printStackTrace(); result.setStopped(true); result.setNrErrors(1); result.setResult(false); logError(t.getMessage(), t); } if (appender != null) { LogWriter.getInstance().removeAppender(appender); appender.close(); ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(), parentJob.getJobname(), getName()); result.getResultFiles().put(resultFile.getFile().toString(), resultFile); } return result; }