List of usage examples for com.amazonaws.services.elasticmapreduce.model HadoopJarStepConfig setJar
public void setJar(String jar)
A path to a JAR file run during the step.
From source file:datameer.awstasks.aws.emr.EmrCluster.java
License:Apache License
private static StepConfig createDebugStep() { StepConfig debugStep = new StepConfig(); debugStep.setName("Setup Hadoop Debugging"); debugStep.setActionOnFailure("TERMINATE_JOB_FLOW"); HadoopJarStepConfig hadoopJarStepConfig = new HadoopJarStepConfig(); hadoopJarStepConfig.setJar("s3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar"); hadoopJarStepConfig.getArgs().add("s3://us-east-1.elasticmapreduce/libs/state-pusher/0.1/fetch"); debugStep.setHadoopJarStep(hadoopJarStepConfig); return debugStep; }
From source file:datameer.awstasks.aws.emr.EmrCluster.java
License:Apache License
public StepFuture executeJobStep(String name, File jobJar, String s3JobJarName, Class<?> mainClass, String... args) {//ww w.j a v a 2 s . c om checkConnection(true); HadoopJarStepConfig jarConfig = new HadoopJarStepConfig(); if (jobJar != null) { String s3JobJarUri = uploadingJobJar(jobJar, s3JobJarName); jarConfig.setJar(s3JobJarUri); } if (mainClass != null) { jarConfig.setMainClass(mainClass.getName()); } jarConfig.setArgs(Arrays.asList(args)); StepConfig stepConfig = new StepConfig(); stepConfig.setName(name); stepConfig.setActionOnFailure("CONTINUE"); stepConfig.setHadoopJarStep(jarConfig); _emrWebService .addJobFlowSteps(new AddJobFlowStepsRequest().withJobFlowId(_jobFlowId).withSteps(stepConfig)); _emrWebService.clearDescribeJobFlowCache(); return new StepFuture(stepConfig.getName(), getStepIndex(getJobFlowDetail(_jobFlowId), name)); }
From source file:org.pentaho.amazon.client.impl.EmrClientImpl.java
License:Apache License
private static HadoopJarStepConfig configureHadoopStep(String stagingS3Jar, String mainClass, List<String> jarStepArgs) { HadoopJarStepConfig hadoopJarStepConfig = new HadoopJarStepConfig(); hadoopJarStepConfig.setJar(stagingS3Jar); hadoopJarStepConfig.setMainClass(mainClass); hadoopJarStepConfig.setArgs(jarStepArgs); return hadoopJarStepConfig; }
From source file:org.pentaho.amazon.emr.job.AmazonElasticMapReduceJobExecutor.java
License:Apache License
public Result execute(Result result, int arg1) throws KettleException { Log4jFileAppender appender = null;// w ww . j a va2s . c o m String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$ try { appender = LogWriter.createFileAppender(logFileName, true, false); LogWriter.getInstance().addAppender(appender); log.setLogLevel(parentJob.getLogLevel()); } catch (Exception e) { logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.FailedToOpenLogFile", //$NON-NLS-1$ logFileName, e.toString())); logError(Const.getStackTracker(e)); } try { // create/connect aws service AmazonElasticMapReduceClient emrClient = new AmazonElasticMapReduceClient(awsCredentials); // pull down jar from vfs FileObject jarFile = KettleVFS.getFileObject(buildFilename(jarUrl)); File tmpFile = File.createTempFile("customEMR", "jar"); tmpFile.deleteOnExit(); FileOutputStream tmpFileOut = new FileOutputStream(tmpFile); IOUtils.copy(jarFile.getContent().getInputStream(), tmpFileOut); URL localJarUrl = tmpFile.toURI().toURL(); // find main class in jar String mainClass = getMainClass(localJarUrl); // create staging bucket AmazonS3 s3Client = new AmazonS3Client(awsCredentials); FileSystemOptions opts = new FileSystemOptions(); DefaultFileSystemConfigBuilder.getInstance().setUserAuthenticator(opts, new StaticUserAuthenticator( null, awsCredentials.getAWSAccessKeyId(), awsCredentials.getAWSSecretKey())); FileObject stagingDirFileObject = KettleVFS.getFileObject(stagingDir, getVariables(), opts); String stagingBucketName = stagingDirFileObject.getName().getBaseName(); if (!s3Client.doesBucketExist(stagingBucketName)) { s3Client.createBucket(stagingBucketName); } // delete old jar if needed try { s3Client.deleteObject(stagingBucketName, jarFile.getName().getBaseName()); } catch (Exception ex) { logError(Const.getStackTracker(ex)); } // put jar in s3 staging bucket s3Client.putObject(new PutObjectRequest(stagingBucketName, jarFile.getName().getBaseName(), tmpFile)); // create non-vfs s3 url to jar String stagingS3JarUrl = "s3://" + stagingBucketName + "/" + jarFile.getName().getBaseName(); String stagingS3BucketUrl = "s3://" + stagingBucketName; RunJobFlowRequest runJobFlowRequest = null; RunJobFlowResult runJobFlowResult = null; if (StringUtil.isEmpty(hadoopJobFlowId)) { // create EMR job flow runJobFlowRequest = createJobFlow(stagingS3BucketUrl, stagingS3JarUrl, mainClass); // start EMR job runJobFlowResult = emrClient.runJobFlow(runJobFlowRequest); } else { List<String> jarStepArgs = new ArrayList<String>(); if (!StringUtil.isEmpty(cmdLineArgs)) { StringTokenizer st = new StringTokenizer(cmdLineArgs, " "); while (st.hasMoreTokens()) { String token = st.nextToken(); logBasic("adding args: " + token); jarStepArgs.add(token); } } HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig(); hadoopJarStep.setJar(stagingS3JarUrl); hadoopJarStep.setMainClass(mainClass); hadoopJarStep.setArgs(jarStepArgs); StepConfig stepConfig = new StepConfig(); stepConfig.setName("custom jar: " + jarUrl); stepConfig.setHadoopJarStep(hadoopJarStep); List<StepConfig> steps = new ArrayList<StepConfig>(); steps.add(stepConfig); AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest(); addJobFlowStepsRequest.setJobFlowId(hadoopJobFlowId); addJobFlowStepsRequest.setSteps(steps); emrClient.addJobFlowSteps(addJobFlowStepsRequest); } String loggingIntervalS = environmentSubstitute(loggingInterval); int logIntv = 60; try { logIntv = Integer.parseInt(loggingIntervalS); } catch (NumberFormatException ex) { logError("Unable to parse logging interval '" + loggingIntervalS + "' - using " + "default of 60"); } // monitor it / blocking / logging if desired if (blocking) { try { if (log.isBasic()) { String executionState = "RUNNING"; List<String> jobFlowIds = new ArrayList<String>(); String id = hadoopJobFlowId; if (StringUtil.isEmpty(hadoopJobFlowId)) { id = runJobFlowResult.getJobFlowId(); jobFlowIds.add(id); } while (isRunning(executionState)) { DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest(); describeJobFlowsRequest.setJobFlowIds(jobFlowIds); DescribeJobFlowsResult describeJobFlowsResult = emrClient .describeJobFlows(describeJobFlowsRequest); boolean found = false; for (JobFlowDetail jobFlowDetail : describeJobFlowsResult.getJobFlows()) { if (jobFlowDetail.getJobFlowId().equals(id)) { executionState = jobFlowDetail.getExecutionStatusDetail().getState(); found = true; } } if (!found) { break; } // logBasic(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.RunningPercent", setupPercent, // mapPercent, reducePercent)); logBasic(hadoopJobName + " execution status: " + executionState); try { if (isRunning(executionState)) { Thread.sleep(logIntv * 1000); } } catch (InterruptedException ie) { // Ignore } } if ("FAILED".equalsIgnoreCase(executionState)) { result.setStopped(true); result.setNrErrors(1); result.setResult(false); S3Object outObject = s3Client.getObject(stagingBucketName, id + "/steps/1/stdout"); ByteArrayOutputStream outStream = new ByteArrayOutputStream(); IOUtils.copy(outObject.getObjectContent(), outStream); logError(outStream.toString()); S3Object errorObject = s3Client.getObject(stagingBucketName, id + "/steps/1/stderr"); ByteArrayOutputStream errorStream = new ByteArrayOutputStream(); IOUtils.copy(errorObject.getObjectContent(), errorStream); logError(errorStream.toString()); } } } catch (Exception e) { logError(e.getMessage(), e); } } } catch (Throwable t) { t.printStackTrace(); result.setStopped(true); result.setNrErrors(1); result.setResult(false); logError(t.getMessage(), t); } if (appender != null) { LogWriter.getInstance().removeAppender(appender); appender.close(); ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(), parentJob.getJobname(), getName()); result.getResultFiles().put(resultFile.getFile().toString(), resultFile); } return result; }
From source file:org.pentaho.amazon.emr.job.AmazonElasticMapReduceJobExecutor.java
License:Apache License
public RunJobFlowRequest createJobFlow(String stagingS3BucketUrl, String stagingS3Jar, String mainClass) { List<String> jarStepArgs = new ArrayList<String>(); if (!StringUtil.isEmpty(cmdLineArgs)) { StringTokenizer st = new StringTokenizer(cmdLineArgs, " "); while (st.hasMoreTokens()) { String token = st.nextToken(); logBasic("adding args: " + token); jarStepArgs.add(token);/*from w ww . ja v a 2s . com*/ } } HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig(); hadoopJarStep.setJar(stagingS3Jar); hadoopJarStep.setMainClass(mainClass); hadoopJarStep.setArgs(jarStepArgs); StepConfig stepConfig = new StepConfig(); stepConfig.setName("custom jar: " + jarUrl); stepConfig.setHadoopJarStep(hadoopJarStep); List<StepConfig> steps = new ArrayList<StepConfig>(); steps.add(stepConfig); String numInstancesS = environmentSubstitute(numInstances); int numInsts = 2; try { numInsts = Integer.parseInt(numInstancesS); } catch (NumberFormatException e) { logError("Unable to parse number of instances to use '" + numInstancesS + "' - " + "using 2 instances..."); } JobFlowInstancesConfig instances = new JobFlowInstancesConfig(); instances.setInstanceCount(numInsts); instances.setMasterInstanceType(getInstanceType(masterInstanceType)); instances.setSlaveInstanceType(getInstanceType(slaveInstanceType)); instances.setHadoopVersion("0.20"); RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest(); runJobFlowRequest.setSteps(steps); runJobFlowRequest.setLogUri(stagingS3BucketUrl); runJobFlowRequest.setName(hadoopJobName); runJobFlowRequest.setInstances(instances); // ScriptBootstrapActionConfig scriptBootstrapAction = new ScriptBootstrapActionConfig(); // scriptBootstrapAction.setPath("s3://mddwordcount/bootstrap.sh"); // List<String> bootstrapArgs = new ArrayList<String>(); // bootstrapArgs.add("http://pdi-node-dist.s3.amazonaws.com"); // // // bootstrapArgs.add( // "http://ci.pentaho.com/view/Data%20Integration/job/Kettle/lastSuccessfulBuild/artifact/Kettle/"); // bootstrapArgs.add("pdi-hadoop-node-TRUNK-SNAPSHOT.zip"); // scriptBootstrapAction.setArgs(bootstrapArgs); // BootstrapActionConfig bootstrapActionConfig = new BootstrapActionConfig(); // bootstrapActionConfig.setName("mdd bootstrap"); // bootstrapActionConfig.setScriptBootstrapAction(scriptBootstrapAction); // List<BootstrapActionConfig> bootstrapActions = new ArrayList<BootstrapActionConfig>(); // bootstrapActions.add(bootstrapActionConfig); // runJobFlowRequest.setBootstrapActions(bootstrapActions); return runJobFlowRequest; }
From source file:org.pentaho.amazon.hive.job.AmazonHiveJobExecutor.java
License:Apache License
/** * Configure the HadoopJarStep, which is one Hadoop step of an EMR job to be submitted to AWS. * //w ww . j a v a 2 s.com * @param stepName * name of step * @param stagingS3JarUrl * URL for MapReduce jar file * @param args * arguments for MapReduce jar * @return configuration data object for the step * */ public List<StepConfig> ConfigHadoopJarStep(String stepName, String stagingS3JarUrl, String args) { List<String> jarStepArgs = new ArrayList<String>(); jarStepArgs = ConfigArgs(args, " "); //$NON-NLS-1$ HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig(); hadoopJarStep.setJar(stagingS3JarUrl); hadoopJarStep.setArgs(jarStepArgs); StepConfig stepConfig = new StepConfig(); stepConfig.setName(stepName); stepConfig.setHadoopJarStep(hadoopJarStep); if (isAlive()) { // Job flow stays in "WAITING" state if this step fails. stepConfig.setActionOnFailure("CANCEL_AND_WAIT"); //$NON-NLS-1$ } else { // Job flow is terminated if this step fails. stepConfig.setActionOnFailure("TERMINATE_JOB_FLOW"); //$NON-NLS-1$ } List<StepConfig> steps = new ArrayList<StepConfig>(); steps.add(stepConfig); return steps; }