List of usage examples for com.amazonaws.services.elasticmapreduce.model RunJobFlowResult getJobFlowId
public String getJobFlowId()
An unique identifier for the job flow.
From source file:awswc.AwsConsoleApp.java
License:Open Source License
static void runJobFlow() throws InterruptedException { // Configure instances to use JobFlowInstancesConfig instances = new JobFlowInstancesConfig(); //********************************************************************// instances.setHadoopVersion(HADOOP_VERSION); instances.withEc2KeyName("ayuda-vp1"); instances.setInstanceCount(MASTER_INSTANCE_COUNT); //instances.setInstanceGroups(instanceGroups) instances.setMasterInstanceType(InstanceType.M24xlarge.toString()); instances.setSlaveInstanceType(InstanceType.M24xlarge.toString()); //********************************************************************// HadoopJarStepConfig hadoopJarStep1 = new HadoopJarStepConfig().withJar(S3N_WORD_COUNT_JAR_) // This should be a full map reduce application. .withArgs(BUCKET_NAME + "inWC", BUCKET_NAME + "outWC"); StepConfig stepConfig1 = new StepConfig().withName("wordcount").withHadoopJarStep(hadoopJarStep1) .withActionOnFailure("TERMINATE_JOB_FLOW"); //********************************************************************// //********************************************************************// HadoopJarStepConfig hadoopJarStep2 = new HadoopJarStepConfig().withJar(S3N_MAX_WORD_COUNT_JAR) // This should be a full map reduce application. .withArgs(BUCKET_NAME + "outWC", BUCKET_NAME + "outXWC", "hate", "10"); StepConfig stepConfig2 = new StepConfig().withName("maxwordcount").withHadoopJarStep(hadoopJarStep2) .withActionOnFailure("TERMINATE_JOB_FLOW"); //********************************************************************// Collection<StepConfig> csc = new ArrayList<StepConfig>(); csc.add(stepConfig1);/*w ww.j a v a 2 s. co m*/ csc.add(stepConfig2); // BootstrapActions bootstrapActions = new BootstrapActions(); RunJobFlowRequest runFlowRequest = new RunJobFlowRequest().withName(FLOW_NAME).withInstances(instances) .withSteps(csc).withLogUri(BUCKET_NAME + "debug") /*.withBootstrapActions( bootstrapActions.newRunIf( "instance.isMaster=true", bootstrapActions.newConfigureDaemons() .withHeapSize(Daemon.JobTracker, 4096) .build()), bootstrapActions.newRunIf( "instance.isRunningNameNode=true", bootstrapActions.newConfigureDaemons() .withHeapSize(Daemon.NameNode, 4096).build()), bootstrapActions.newRunIf( "instance.isRunningDataNode=true", bootstrapActions.newConfigureDaemons() .withHeapSize(Daemon.DataNode, 4096).build()), bootstrapActions.newRunIf( "instance.isRunningJobTracker=true", bootstrapActions.newConfigureDaemons() .withHeapSize(Daemon.JobTracker, 4096).build()), bootstrapActions.newRunIf( "instance.isRunningTaskTracker=true", bootstrapActions.newConfigureDaemons() .withHeapSize(Daemon.TaskTracker, 4096).build()) /*, bootstrapActions.newRunIf( "instance.isSlave=true", bootstrapActions.newConfigureHadoop() .withKeyValue(ConfigFile.Site,"mapred.tasktracker.map.tasks.maximum", "4")) )*/; RunJobFlowResult runJobFlowResult = emr.runJobFlow(runFlowRequest); String jobFlowId = runJobFlowResult.getJobFlowId(); System.out.println("Ran job flow with id: " + jobFlowId); //wasFinished(runJobFlowResult); }
From source file:awswc.AwsConsoleApp.java
License:Open Source License
public static void wasFinished(RunJobFlowResult runJobFlowResult) throws InterruptedException { DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest() .withJobFlowIds(runJobFlowResult.getJobFlowId()); int secondsBetweenPolling = 120; String state = null;// w w w .j av a 2 s. com do { Thread.sleep(secondsBetweenPolling * 1000); DescribeJobFlowsResult jobFlowsResult = emr.describeJobFlows(describeJobFlowsRequest); JobFlowDetail detail = jobFlowsResult.getJobFlows().get(0); JobFlowExecutionStatusDetail executionStatusDetail = detail.getExecutionStatusDetail(); state = executionStatusDetail.getState(); } while (state != null && !state.equals("COMPLETE") && !state.equals("FAILED") && !state.equals("TERMINATED")); }
From source file:com.aegeus.aws.ElasticMapReduceService.java
License:Apache License
/** * Create a new EMR Cluster over Hadoop 2.4.0 *//* w w w. j a v a2s . co m*/ public void createCluster() { JobFlowInstancesConfig instances = new JobFlowInstancesConfig() .withInstanceCount((int) config.getInstanceCount()).withMasterInstanceType(config.getMasterType()) .withSlaveInstanceType(config.getSlaveType()); if (Strings.isNullOrEmpty(config.getKeyName())) { instances.setEc2KeyName(config.getKeyName()); } if (!Strings.isNullOrEmpty(config.getSubnetId())) { instances.setEc2SubnetId(config.getSubnetId()); } else { instances.setPlacement(new PlacementType(config.getPlace())); } ScriptBootstrapActionConfig installEsConfig = new ScriptBootstrapActionConfig() .withPath("s3://support.elasticmapreduce/bootstrap-actions/other/elasticsearch_install.rb"); BootstrapActionConfig installEs = new BootstrapActionConfig("Elasticsearch Install", installEsConfig); RunJobFlowRequest request = new RunJobFlowRequest().withName(config.getName()).withReleaseLabel("emr-4.1.0") .withServiceRole("Default_AWS_Role").withJobFlowRole("Default_AWS_Role") .withBootstrapActions(installEs).withInstances(instances); if (!Strings.isNullOrEmpty(config.getLogBucket())) { request.setLogUri(config.getLogBucket()); } RunJobFlowResult result = emr.runJobFlow(request); clusterId = result.getJobFlowId(); }
From source file:com.clouddrive.parth.NewClass.java
public static String runCluster() throws Exception { long start = System.currentTimeMillis(); String temp = ""; // Configure the job flow //RunJobFlowRequest request = new RunJobFlowRequest().withName("parth"); // if (request == null) { RunJobFlowRequest request = new RunJobFlowRequest(FLOW_NAME, configInstance()); request.setLogUri(S3N_LOG_URI); // }/*from w w w. jav a2s .c om*/ // Configure the Hadoop jar to use HadoopJarStepConfig jarConfig = new HadoopJarStepConfig(S3N_HADOOP_JAR); jarConfig.setArgs(ARGS_AS_LIST); try { StepConfig enableDebugging = new StepConfig().withName("Enable debugging") .withActionOnFailure("TERMINATE_JOB_FLOW") .withHadoopJarStep(new StepFactory().newEnableDebuggingStep()); StepConfig runJar = new StepConfig(S3N_HADOOP_JAR.substring(S3N_HADOOP_JAR.indexOf('/') + 1), jarConfig); request.setSteps(Arrays.asList(new StepConfig[] { enableDebugging, runJar })); // Run the job flow RunJobFlowResult result = emr.runJobFlow(request); // Check the status of the running job String lastState = ""; STATUS_LOOP: while (true) { DescribeJobFlowsRequest desc = new DescribeJobFlowsRequest( Arrays.asList(new String[] { result.getJobFlowId() })); DescribeJobFlowsResult descResult = emr.describeJobFlows(desc); for (JobFlowDetail detail : descResult.getJobFlows()) { String state = detail.getExecutionStatusDetail().getState(); if (isDone(state)) { System.out.println("Job " + state + ": " + detail.toString()); break STATUS_LOOP; } else if (!lastState.equals(state)) { lastState = state; System.out.println("Job " + state + " at " + new Date().toString()); } } Thread.sleep(10000); } temp = FLOW_NAME; long end = System.currentTimeMillis(); System.out.println("Computation " + (end - start)); } catch (AmazonServiceException ase) { System.out.println("Caught Exception: " + ase.getMessage()); System.out.println("Reponse Status Code: " + ase.getStatusCode()); System.out.println("Error Code: " + ase.getErrorCode()); System.out.println("Request ID: " + ase.getRequestId()); } return temp; }
From source file:datameer.awstasks.aws.emr.EmrCluster.java
License:Apache License
public synchronized void startup() throws InterruptedException { checkConnection(false);/*from w w w . j a v a 2s .c o m*/ _clusterState = ClusterState.STARTING; boolean successful = false; try { EmrSettings settings = getSettings(); if (settings.getPrivateKeyName() == null) { throw new NullPointerException( "privateKeyName must not be null please configure settings properly"); } LOG.info("Starting job flow '" + getName() + "' ..."); if (!getRunningJobFlowDetailsByName(getName()).isEmpty()) { throw new IllegalStateException("Job flow with name '" + getName() + "' already running."); } boolean keepAlive = true; JobFlowInstancesConfig jobConfig = new JobFlowInstancesConfig(); jobConfig.setHadoopVersion(_settings.getHadoopVersion()); jobConfig.setMasterInstanceType(settings.getMasterInstanceType().getId()); jobConfig.setSlaveInstanceType(settings.getNodeInstanceType().getId()); jobConfig.setInstanceCount(settings.getInstanceCount()); jobConfig.setEc2KeyName(settings.getPrivateKeyName()); jobConfig.setPlacement(new PlacementType()); jobConfig.setKeepJobFlowAliveWhenNoSteps(keepAlive); final RunJobFlowRequest startRequest = new RunJobFlowRequest(); startRequest.setLogUri("s3n://" + settings.getS3Bucket() + settings.getS3LogPath()); startRequest.setInstances(jobConfig); startRequest.setName(getName()); startRequest.setAdditionalInfo(_settings.getAdditionalStartInfo()); startRequest.setBootstrapActions(_settings.getBootstrapActions()); if (settings.isDebugEnabled()) { startRequest.withSteps(DEBUG_STEP); } RunJobFlowResult startResponse = _emrWebService.runJobFlow(startRequest); _jobFlowId = startResponse.getJobFlowId(); waitUntilClusterStarted(_jobFlowId); LOG.info("elastic cluster '" + getName() + "/" + _jobFlowId + "' started, master-host is " + _masterHost); successful = true; } finally { if (successful) { _clusterState = ClusterState.CONNECTED; } else { _clusterState = ClusterState.UNCONNECTED; _jobFlowId = null; } } }
From source file:org.huahinframework.emanager.amazonaws.elasticmapreduce.ElasticMapReduceManager.java
License:Apache License
/** * @param config/*from w w w . ja v a2 s .co m*/ * @throws URISyntaxException */ public void runJob(Config config) throws URISyntaxException { RunJobFlowRequest runJobFlowRequest = null; CreateStepConfigger csc = getCreateStepConfigger(config); if (csc == null) { log.error("Step config create error"); return; } if (jobFlowId == null) { runJobFlowRequest = new RunJobFlowRequest().withName(MAP_REDUCE_NAME) .withBootstrapActions( new BootstrapActionConfig().withName(MEMORY_BOOTSTRAP_NAME).withScriptBootstrapAction( new ScriptBootstrapActionConfig().withPath(MEMORY_BOOTSTRAP_URI)), new BootstrapActionConfig().withName(HADOOP_BOOTSTRAP_NAME).withScriptBootstrapAction( new ScriptBootstrapActionConfig().withPath(HADOOP_BOOTSTRAP_URI) .withArgs("--mapred-key-value", "mapred.task.timeout=3600000")), new BootstrapActionConfig().withName(HUAHIN_BOOTSTRAP_NAME).withScriptBootstrapAction( new ScriptBootstrapActionConfig().withPath(emrProperties.getConfigureS3Path()))) .withInstances(setupJobFlowInstancesConfig()); if (!isEmpty(emrProperties.getLogUri())) { runJobFlowRequest.setLogUri(emrProperties.getLogUri()); } List<StepConfig> stepConfigs = new ArrayList<StepConfig>(); if (emrProperties.isDebug()) { StepConfig enableDebugging = new StepConfig().withName(EMR_DEBUGGIN_NAME) .withActionOnFailure(ACTION_ON_TERMINATE) .withHadoopJarStep(new StepFactory().newEnableDebuggingStep()); stepConfigs.add(enableDebugging); } for (StepConfig sc : csc.createStepConfig(config)) { stepConfigs.add(sc); } runJobFlowRequest.setSteps(stepConfigs); try { RunJobFlowResult result = emr.runJobFlow(runJobFlowRequest); jobFlowId = result.getJobFlowId(); checkDate = new Date(); } catch (Exception e) { e.printStackTrace(); log.error(e); } } else { AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest().withJobFlowId(jobFlowId) .withSteps(csc.createStepConfig(config)); emr.addJobFlowSteps(addJobFlowStepsRequest); } running = true; try { config.setJobFlowId(jobFlowId); QueueUtils.updateQueue(config); } catch (IOException e) { e.printStackTrace(); log.error(e); } int stepSize = 0; String stepStatus = JobUtils.STEP_STATUS_PENDING; while (stepStatus.equals(JobUtils.STEP_STATUS_PENDING) || stepStatus.equals(JobUtils.STEP_STATUS_RUNNING)) { if (sleep()) { break; } DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest() .withJobFlowIds(jobFlowId); DescribeJobFlowsResult describeJobFlowsResult = emr.describeJobFlows(describeJobFlowsRequest); if (describeJobFlowsResult.getJobFlows().size() != 1) { break; } JobFlowDetail jobFlowDetail = describeJobFlowsResult.getJobFlows().get(0); JobFlowInstancesDetail instancesDetail = jobFlowDetail.getInstances(); masterPublicDnsName = instancesDetail.getMasterPublicDnsName(); if (isEmpty(config.getMasterPublicDnsName())) { try { config.setMasterPublicDnsName(masterPublicDnsName); QueueUtils.updateQueue(config); } catch (IOException e) { e.printStackTrace(); log.error(e); } } stepSize = jobFlowDetail.getSteps().size(); for (StepDetail stepDetail : jobFlowDetail.getSteps()) { if (stepDetail.getStepConfig().getName().equals(config.getName())) { stepStatus = stepDetail.getExecutionStatusDetail().getState(); break; } } } if (config.isDeleteOnExit()) { if (config.getJobType() == Config.JOB_TYPE_STREAMING) { S3Utils.delete(s3, config.getArgMap().get("mapper")); S3Utils.delete(s3, config.getArgMap().get("reducer")); } else { S3Utils.delete(s3, config.getRun()); } } // Add More than 256 Steps to a Job Flow(http://goo.gl/JDtsV) if (stepSize >= 255) { instanceTerminate(); } running = false; if (stepStatus.equals(JobUtils.STEP_STATUS_COMPLETED)) { config.setStatus(Config.JOB_STATUS_COMPLETE); } else if (stepStatus.equals(JobUtils.STEP_STATUS_FAILED)) { config.setStatus(Config.JOB_STATUS_ERROR); } else if (terminated) { config.setStatus(Config.JOB_STATUS_CANCEL); } try { QueueUtils.updateQueue(config); } catch (IOException e) { e.printStackTrace(); log.error(e); } }
From source file:org.pentaho.amazon.emr.job.AmazonElasticMapReduceJobExecutor.java
License:Apache License
public Result execute(Result result, int arg1) throws KettleException { Log4jFileAppender appender = null;// w w w . ja v a 2 s . c o m String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$ try { appender = LogWriter.createFileAppender(logFileName, true, false); LogWriter.getInstance().addAppender(appender); log.setLogLevel(parentJob.getLogLevel()); } catch (Exception e) { logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.FailedToOpenLogFile", //$NON-NLS-1$ logFileName, e.toString())); logError(Const.getStackTracker(e)); } try { // create/connect aws service AmazonElasticMapReduceClient emrClient = new AmazonElasticMapReduceClient(awsCredentials); // pull down jar from vfs FileObject jarFile = KettleVFS.getFileObject(buildFilename(jarUrl)); File tmpFile = File.createTempFile("customEMR", "jar"); tmpFile.deleteOnExit(); FileOutputStream tmpFileOut = new FileOutputStream(tmpFile); IOUtils.copy(jarFile.getContent().getInputStream(), tmpFileOut); URL localJarUrl = tmpFile.toURI().toURL(); // find main class in jar String mainClass = getMainClass(localJarUrl); // create staging bucket AmazonS3 s3Client = new AmazonS3Client(awsCredentials); FileSystemOptions opts = new FileSystemOptions(); DefaultFileSystemConfigBuilder.getInstance().setUserAuthenticator(opts, new StaticUserAuthenticator( null, awsCredentials.getAWSAccessKeyId(), awsCredentials.getAWSSecretKey())); FileObject stagingDirFileObject = KettleVFS.getFileObject(stagingDir, getVariables(), opts); String stagingBucketName = stagingDirFileObject.getName().getBaseName(); if (!s3Client.doesBucketExist(stagingBucketName)) { s3Client.createBucket(stagingBucketName); } // delete old jar if needed try { s3Client.deleteObject(stagingBucketName, jarFile.getName().getBaseName()); } catch (Exception ex) { logError(Const.getStackTracker(ex)); } // put jar in s3 staging bucket s3Client.putObject(new PutObjectRequest(stagingBucketName, jarFile.getName().getBaseName(), tmpFile)); // create non-vfs s3 url to jar String stagingS3JarUrl = "s3://" + stagingBucketName + "/" + jarFile.getName().getBaseName(); String stagingS3BucketUrl = "s3://" + stagingBucketName; RunJobFlowRequest runJobFlowRequest = null; RunJobFlowResult runJobFlowResult = null; if (StringUtil.isEmpty(hadoopJobFlowId)) { // create EMR job flow runJobFlowRequest = createJobFlow(stagingS3BucketUrl, stagingS3JarUrl, mainClass); // start EMR job runJobFlowResult = emrClient.runJobFlow(runJobFlowRequest); } else { List<String> jarStepArgs = new ArrayList<String>(); if (!StringUtil.isEmpty(cmdLineArgs)) { StringTokenizer st = new StringTokenizer(cmdLineArgs, " "); while (st.hasMoreTokens()) { String token = st.nextToken(); logBasic("adding args: " + token); jarStepArgs.add(token); } } HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig(); hadoopJarStep.setJar(stagingS3JarUrl); hadoopJarStep.setMainClass(mainClass); hadoopJarStep.setArgs(jarStepArgs); StepConfig stepConfig = new StepConfig(); stepConfig.setName("custom jar: " + jarUrl); stepConfig.setHadoopJarStep(hadoopJarStep); List<StepConfig> steps = new ArrayList<StepConfig>(); steps.add(stepConfig); AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest(); addJobFlowStepsRequest.setJobFlowId(hadoopJobFlowId); addJobFlowStepsRequest.setSteps(steps); emrClient.addJobFlowSteps(addJobFlowStepsRequest); } String loggingIntervalS = environmentSubstitute(loggingInterval); int logIntv = 60; try { logIntv = Integer.parseInt(loggingIntervalS); } catch (NumberFormatException ex) { logError("Unable to parse logging interval '" + loggingIntervalS + "' - using " + "default of 60"); } // monitor it / blocking / logging if desired if (blocking) { try { if (log.isBasic()) { String executionState = "RUNNING"; List<String> jobFlowIds = new ArrayList<String>(); String id = hadoopJobFlowId; if (StringUtil.isEmpty(hadoopJobFlowId)) { id = runJobFlowResult.getJobFlowId(); jobFlowIds.add(id); } while (isRunning(executionState)) { DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest(); describeJobFlowsRequest.setJobFlowIds(jobFlowIds); DescribeJobFlowsResult describeJobFlowsResult = emrClient .describeJobFlows(describeJobFlowsRequest); boolean found = false; for (JobFlowDetail jobFlowDetail : describeJobFlowsResult.getJobFlows()) { if (jobFlowDetail.getJobFlowId().equals(id)) { executionState = jobFlowDetail.getExecutionStatusDetail().getState(); found = true; } } if (!found) { break; } // logBasic(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.RunningPercent", setupPercent, // mapPercent, reducePercent)); logBasic(hadoopJobName + " execution status: " + executionState); try { if (isRunning(executionState)) { Thread.sleep(logIntv * 1000); } } catch (InterruptedException ie) { // Ignore } } if ("FAILED".equalsIgnoreCase(executionState)) { result.setStopped(true); result.setNrErrors(1); result.setResult(false); S3Object outObject = s3Client.getObject(stagingBucketName, id + "/steps/1/stdout"); ByteArrayOutputStream outStream = new ByteArrayOutputStream(); IOUtils.copy(outObject.getObjectContent(), outStream); logError(outStream.toString()); S3Object errorObject = s3Client.getObject(stagingBucketName, id + "/steps/1/stderr"); ByteArrayOutputStream errorStream = new ByteArrayOutputStream(); IOUtils.copy(errorObject.getObjectContent(), errorStream); logError(errorStream.toString()); } } } catch (Exception e) { logError(e.getMessage(), e); } } } catch (Throwable t) { t.printStackTrace(); result.setStopped(true); result.setNrErrors(1); result.setResult(false); logError(t.getMessage(), t); } if (appender != null) { LogWriter.getInstance().removeAppender(appender); appender.close(); ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(), parentJob.getJobname(), getName()); result.getResultFiles().put(resultFile.getFile().toString(), resultFile); } return result; }
From source file:org.pentaho.amazon.hive.job.AmazonHiveJobExecutor.java
License:Apache License
/** * Executes a Hive job into the AWS Elastic MapReduce service. *//*from w w w . ja v a 2 s.c o m*/ public Result execute(Result result, int arg1) throws KettleException { // Setup a log file. Log4jFileAppender appender = null; String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$ try { appender = LogWriter.createFileAppender(logFileName, true, false); LogWriter.getInstance().addAppender(appender); log.setLogLevel(parentJob.getLogLevel()); } catch (Exception e) { logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.FailedToOpenLogFile", //$NON-NLS-1$ logFileName, e.toString())); logError(Const.getStackTracker(e)); } try { // Create and connect an AWS service. AmazonElasticMapReduceClient emrClient = new AmazonElasticMapReduceClient(awsCredentials); AmazonS3 s3Client = new AmazonS3Client(awsCredentials); // Get bucket name and S3 URL. String stagingBucketName = GetBucketName(stagingDir); String stagingS3BucketUrl = "s3://" + stagingBucketName; //$NON-NLS-1$ // Prepare staging S3 URL for Hive script file. String stagingS3qUrl = ""; if (qUrl.startsWith(S3FileProvider.SCHEME + "://")) { //$NON-NLS-1$ // If the .q file is in S3, its staging S3 URL is s3://{bucketname}/{path} if (qUrl.indexOf("@s3") > 0) { //$NON-NLS-1$ stagingS3qUrl = S3FileProvider.SCHEME + "://" + qUrl.substring(qUrl.indexOf("@s3") + 4); //$NON-NLS-1$ } else { stagingS3qUrl = qUrl; } } else { // A local filename is given for the Hive script file. It should be copied to the S3 Log Directory. // First, check for the correct protocol. if (!qUrl.startsWith("file:")) { //$NON-NLS-1$ if (log.isBasic()) { logBasic(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.HiveScriptFilename.Error") + qUrl); //$NON-NLS-1$ } } // pull down .q file from VSF FileObject qFile = KettleVFS.getFileObject(buildFilename(qUrl)); File tmpFile = File.createTempFile("customEMR", "q"); //$NON-NLS-1$ tmpFile.deleteOnExit(); FileOutputStream tmpFileOut = new FileOutputStream(tmpFile); IOUtils.copy(qFile.getContent().getInputStream(), tmpFileOut); // Get key name for the script file S3 destination. Key is defined as path name after {bucket}/ String key = GetKeyFromS3Url(stagingDir); if (key == null) { key = qFile.getName().getBaseName(); } else { key += "/" + qFile.getName().getBaseName(); //$NON-NLS-1$ } // delete the previous .q file in S3 try { s3Client.deleteObject(stagingBucketName, key); } catch (Exception ex) { logError(Const.getStackTracker(ex)); } // Put .q file in S3 Log Directory. s3Client.putObject(new PutObjectRequest(stagingBucketName, key, tmpFile)); stagingS3qUrl = stagingS3BucketUrl + "/" + key; //$NON-NLS-1$ } // AWS provides script-runner.jar (in its public bucket), which should be used as a MapReduce jar for Hive EMR // job. jarUrl = "s3://elasticmapreduce/libs/script-runner/script-runner.jar"; //$NON-NLS-1$ RunJobFlowRequest runJobFlowRequest = null; RunJobFlowResult runJobFlowResult = null; if (StringUtil.isEmpty(hadoopJobFlowId)) { // create an EMR job flow, start a step to setup Hive and get the job flow ID. runJobFlowRequest = createJobFlow(); runJobFlowResult = emrClient.runJobFlow(runJobFlowRequest); hadoopJobFlowId = runJobFlowResult.getJobFlowId(); } // Now EMR job flow is ready to accept a Run Hive Script step. // First, prepare a Job Flow ID list. List<String> jobFlowIds = new ArrayList<String>(); jobFlowIds.add(hadoopJobFlowId); // Configure a HadoopJarStep. String args = "s3://elasticmapreduce/libs/hive/hive-script " + "--base-path s3://elasticmapreduce/libs/hive/ --hive-version 0.7 --run-hive-script --args -f " + environmentSubstitute(stagingS3qUrl) + " " + environmentSubstitute(cmdLineArgs); //$NON-NLS-1$ List<StepConfig> steps = ConfigHadoopJarStep(hadoopJobName, jarUrl, args); // Add a Run Hive Script step to the existing job flow. AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest(); addJobFlowStepsRequest.setJobFlowId(hadoopJobFlowId); addJobFlowStepsRequest.setSteps(steps); emrClient.addJobFlowSteps(addJobFlowStepsRequest); // Set a logging interval. String loggingIntervalS = environmentSubstitute(loggingInterval); int logIntv = 10; try { logIntv = Integer.parseInt(loggingIntervalS); } catch (NumberFormatException ex) { logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.LoggingInterval.Error", //$NON-NLS-1$ loggingIntervalS)); } // monitor and log if intended. if (blocking) { try { if (log.isBasic()) { String executionState = "RUNNING"; //$NON-NLS-1$ while (isRunning(executionState)) { DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest(); describeJobFlowsRequest.setJobFlowIds(jobFlowIds); DescribeJobFlowsResult describeJobFlowsResult = emrClient .describeJobFlows(describeJobFlowsRequest); boolean found = false; for (JobFlowDetail jobFlowDetail : describeJobFlowsResult.getJobFlows()) { if (jobFlowDetail.getJobFlowId().equals(hadoopJobFlowId)) { executionState = jobFlowDetail.getExecutionStatusDetail().getState(); found = true; } } if (!found) { break; } logBasic(hadoopJobName + " " + BaseMessages.getString(PKG, //$NON-NLS-1$ "AmazonElasticMapReduceJobExecutor.JobFlowExecutionStatus", hadoopJobFlowId) + executionState); if (parentJob.isStopped()) { if (!alive) { TerminateJobFlowsRequest terminateJobFlowsRequest = new TerminateJobFlowsRequest(); terminateJobFlowsRequest.withJobFlowIds(hadoopJobFlowId); emrClient.terminateJobFlows(terminateJobFlowsRequest); } break; } try { if (isRunning(executionState)) { Thread.sleep(logIntv * 1000); } } catch (InterruptedException ie) { logError(Const.getStackTracker(ie)); } } if ("FAILED".equalsIgnoreCase(executionState)) { //$NON-NLS-1$ result.setStopped(true); result.setNrErrors(1); result.setResult(false); S3Object outObject = s3Client.getObject(stagingBucketName, hadoopJobFlowId + "/steps/1/stdout"); //$NON-NLS-1$ ByteArrayOutputStream outStream = new ByteArrayOutputStream(); IOUtils.copy(outObject.getObjectContent(), outStream); logError(outStream.toString()); S3Object errorObject = s3Client.getObject(stagingBucketName, hadoopJobFlowId + "/steps/1/stderr"); //$NON-NLS-1$ ByteArrayOutputStream errorStream = new ByteArrayOutputStream(); IOUtils.copy(errorObject.getObjectContent(), errorStream); logError(errorStream.toString()); } } } catch (Exception e) { logError(e.getMessage(), e); } } } catch (Throwable t) { t.printStackTrace(); result.setStopped(true); result.setNrErrors(1); result.setResult(false); logError(t.getMessage(), t); } if (appender != null) { LogWriter.getInstance().removeAppender(appender); appender.close(); ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(), parentJob.getJobname(), getName()); result.getResultFiles().put(resultFile.getFile().toString(), resultFile); } return result; }
From source file:rollsPOC2.util.AWSHelper.java
public static String createOrFindEMRHiveCluster(String clusterName, boolean createWithKeepAlive) throws Exception { String clusterId = null;/*from w w w . j a va2 s . c om*/ AmazonElasticMapReduce emr = AppServices.getEMRClient(); ClusterSummary clusterSummary = findCluster("Treebeard", emr); if (clusterSummary != null) { clusterId = clusterSummary.getId(); System.err.printf("Cluster found with id %s, status %s\n", clusterId, clusterSummary.getStatus().getState()); } if (clusterSummary != null && clusterSummary.getStatus().getState().startsWith("TERMINAT")) { while (findCluster("Treebeard", emr).getStatus().getState().equals("TERMINATING")) { System.out.println("Waiting for previous cluster to terminate"); Thread.sleep(10000l); } System.out.println("Starting cluster..."); StepFactory stepFactory = new StepFactory(); StepConfig enabledebugging = new StepConfig().withName("Enable debugging") .withActionOnFailure("TERMINATE_JOB_FLOW") .withHadoopJarStep(stepFactory.newEnableDebuggingStep()); // Possibly redundant with ".withApplications(new Application().withName("Hive"))" // StepConfig installHive = new StepConfig() // .withName("Install Hive") // .withActionOnFailure("TERMINATE_JOB_FLOW") // .withHadoopJarStep(stepFactory.newInstallHiveStep()); RunJobFlowRequest request = new RunJobFlowRequest().withName("Treebeard").withReleaseLabel("emr-4.6.0") .withApplications(new Application().withName("Hive")).withSteps(enabledebugging) .withVisibleToAllUsers(true) .withLogUri("s3://aws-logs-800327301943-us-east-1/elasticmapreduce/") .withServiceRole("EMR_DefaultRole").withJobFlowRole("EMR_EC2_DefaultRole") .withInstances(new JobFlowInstancesConfig().withEc2KeyName("bjss").withInstanceCount(2) .withMasterInstanceType("m3.xlarge").withSlaveInstanceType("m1.large") .withKeepJobFlowAliveWhenNoSteps(createWithKeepAlive)); RunJobFlowResult createClusterResult = emr.runJobFlow(request); clusterId = createClusterResult.getJobFlowId(); System.out.printf("Started cluster with id %s\n", clusterId); } return clusterId; }