List of usage examples for com.amazonaws.services.elasticmapreduce.model RunJobFlowRequest setLogUri
public void setLogUri(String logUri)
The location in Amazon S3 to write the log files of the job flow.
From source file:com.aegeus.aws.ElasticMapReduceService.java
License:Apache License
/** * Create a new EMR Cluster over Hadoop 2.4.0 *///from w w w . j a va 2 s . c o m public void createCluster() { JobFlowInstancesConfig instances = new JobFlowInstancesConfig() .withInstanceCount((int) config.getInstanceCount()).withMasterInstanceType(config.getMasterType()) .withSlaveInstanceType(config.getSlaveType()); if (Strings.isNullOrEmpty(config.getKeyName())) { instances.setEc2KeyName(config.getKeyName()); } if (!Strings.isNullOrEmpty(config.getSubnetId())) { instances.setEc2SubnetId(config.getSubnetId()); } else { instances.setPlacement(new PlacementType(config.getPlace())); } ScriptBootstrapActionConfig installEsConfig = new ScriptBootstrapActionConfig() .withPath("s3://support.elasticmapreduce/bootstrap-actions/other/elasticsearch_install.rb"); BootstrapActionConfig installEs = new BootstrapActionConfig("Elasticsearch Install", installEsConfig); RunJobFlowRequest request = new RunJobFlowRequest().withName(config.getName()).withReleaseLabel("emr-4.1.0") .withServiceRole("Default_AWS_Role").withJobFlowRole("Default_AWS_Role") .withBootstrapActions(installEs).withInstances(instances); if (!Strings.isNullOrEmpty(config.getLogBucket())) { request.setLogUri(config.getLogBucket()); } RunJobFlowResult result = emr.runJobFlow(request); clusterId = result.getJobFlowId(); }
From source file:com.clouddrive.parth.NewClass.java
public static String runCluster() throws Exception { long start = System.currentTimeMillis(); String temp = ""; // Configure the job flow //RunJobFlowRequest request = new RunJobFlowRequest().withName("parth"); // if (request == null) { RunJobFlowRequest request = new RunJobFlowRequest(FLOW_NAME, configInstance()); request.setLogUri(S3N_LOG_URI); // }/*from w w w. j a v a 2 s .c o m*/ // Configure the Hadoop jar to use HadoopJarStepConfig jarConfig = new HadoopJarStepConfig(S3N_HADOOP_JAR); jarConfig.setArgs(ARGS_AS_LIST); try { StepConfig enableDebugging = new StepConfig().withName("Enable debugging") .withActionOnFailure("TERMINATE_JOB_FLOW") .withHadoopJarStep(new StepFactory().newEnableDebuggingStep()); StepConfig runJar = new StepConfig(S3N_HADOOP_JAR.substring(S3N_HADOOP_JAR.indexOf('/') + 1), jarConfig); request.setSteps(Arrays.asList(new StepConfig[] { enableDebugging, runJar })); // Run the job flow RunJobFlowResult result = emr.runJobFlow(request); // Check the status of the running job String lastState = ""; STATUS_LOOP: while (true) { DescribeJobFlowsRequest desc = new DescribeJobFlowsRequest( Arrays.asList(new String[] { result.getJobFlowId() })); DescribeJobFlowsResult descResult = emr.describeJobFlows(desc); for (JobFlowDetail detail : descResult.getJobFlows()) { String state = detail.getExecutionStatusDetail().getState(); if (isDone(state)) { System.out.println("Job " + state + ": " + detail.toString()); break STATUS_LOOP; } else if (!lastState.equals(state)) { lastState = state; System.out.println("Job " + state + " at " + new Date().toString()); } } Thread.sleep(10000); } temp = FLOW_NAME; long end = System.currentTimeMillis(); System.out.println("Computation " + (end - start)); } catch (AmazonServiceException ase) { System.out.println("Caught Exception: " + ase.getMessage()); System.out.println("Reponse Status Code: " + ase.getStatusCode()); System.out.println("Error Code: " + ase.getErrorCode()); System.out.println("Request ID: " + ase.getRequestId()); } return temp; }
From source file:datameer.awstasks.aws.emr.EmrCluster.java
License:Apache License
public synchronized void startup() throws InterruptedException { checkConnection(false);//w ww .ja v a 2 s . c o m _clusterState = ClusterState.STARTING; boolean successful = false; try { EmrSettings settings = getSettings(); if (settings.getPrivateKeyName() == null) { throw new NullPointerException( "privateKeyName must not be null please configure settings properly"); } LOG.info("Starting job flow '" + getName() + "' ..."); if (!getRunningJobFlowDetailsByName(getName()).isEmpty()) { throw new IllegalStateException("Job flow with name '" + getName() + "' already running."); } boolean keepAlive = true; JobFlowInstancesConfig jobConfig = new JobFlowInstancesConfig(); jobConfig.setHadoopVersion(_settings.getHadoopVersion()); jobConfig.setMasterInstanceType(settings.getMasterInstanceType().getId()); jobConfig.setSlaveInstanceType(settings.getNodeInstanceType().getId()); jobConfig.setInstanceCount(settings.getInstanceCount()); jobConfig.setEc2KeyName(settings.getPrivateKeyName()); jobConfig.setPlacement(new PlacementType()); jobConfig.setKeepJobFlowAliveWhenNoSteps(keepAlive); final RunJobFlowRequest startRequest = new RunJobFlowRequest(); startRequest.setLogUri("s3n://" + settings.getS3Bucket() + settings.getS3LogPath()); startRequest.setInstances(jobConfig); startRequest.setName(getName()); startRequest.setAdditionalInfo(_settings.getAdditionalStartInfo()); startRequest.setBootstrapActions(_settings.getBootstrapActions()); if (settings.isDebugEnabled()) { startRequest.withSteps(DEBUG_STEP); } RunJobFlowResult startResponse = _emrWebService.runJobFlow(startRequest); _jobFlowId = startResponse.getJobFlowId(); waitUntilClusterStarted(_jobFlowId); LOG.info("elastic cluster '" + getName() + "/" + _jobFlowId + "' started, master-host is " + _masterHost); successful = true; } finally { if (successful) { _clusterState = ClusterState.CONNECTED; } else { _clusterState = ClusterState.UNCONNECTED; _jobFlowId = null; } } }
From source file:org.finra.dm.dao.impl.EmrDaoImpl.java
License:Apache License
/** * Create the run job flow request object. * * @param emrClusterDefinition the EMR definition name value. * @param clusterName the EMR cluster name. * * @return run job flow request for the given configuration. */// ww w . ja va 2 s . com private RunJobFlowRequest getRunJobFlowRequest(String clusterName, EmrClusterDefinition emrClusterDefinition) { // Create the object RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest(clusterName, getJobFlowInstancesConfig(emrClusterDefinition)); // Set release label if (StringUtils.isNotBlank(emrClusterDefinition.getReleaseLabel())) { runJobFlowRequest.setReleaseLabel(emrClusterDefinition.getReleaseLabel()); } // Set list of Applications List<EmrClusterDefinitionApplication> emrClusterDefinitionApplications = emrClusterDefinition .getApplications(); if (!CollectionUtils.isEmpty(emrClusterDefinitionApplications)) { runJobFlowRequest.setApplications(getApplications(emrClusterDefinitionApplications)); } // Set list of Configurations List<EmrClusterDefinitionConfiguration> emrClusterDefinitionConfigurations = emrClusterDefinition .getConfigurations(); if (!CollectionUtils.isEmpty(emrClusterDefinitionConfigurations)) { runJobFlowRequest.setConfigurations(getConfigurations(emrClusterDefinitionConfigurations)); } // Set the log bucket if specified if (StringUtils.isNotBlank(emrClusterDefinition.getLogBucket())) { runJobFlowRequest.setLogUri(emrClusterDefinition.getLogBucket()); } // Set the visible to all flag if (emrClusterDefinition.isVisibleToAll() != null) { runJobFlowRequest.setVisibleToAllUsers(emrClusterDefinition.isVisibleToAll()); } // Set the IAM profile for the nodes if (StringUtils.isNotBlank(emrClusterDefinition.getEc2NodeIamProfileName())) { runJobFlowRequest.setJobFlowRole(emrClusterDefinition.getEc2NodeIamProfileName()); } else { runJobFlowRequest.setJobFlowRole(dmStringHelper .getRequiredConfigurationValue(ConfigurationValue.EMR_DEFAULT_EC2_NODE_IAM_PROFILE_NAME)); } // Set the IAM profile for the service if (StringUtils.isNotBlank(emrClusterDefinition.getServiceIamRole())) { runJobFlowRequest.setServiceRole(emrClusterDefinition.getServiceIamRole()); } else { runJobFlowRequest.setServiceRole(dmStringHelper .getRequiredConfigurationValue(ConfigurationValue.EMR_DEFAULT_SERVICE_IAM_ROLE_NAME)); } // Set the AMI version if specified if (StringUtils.isNotBlank(emrClusterDefinition.getAmiVersion())) { runJobFlowRequest.setAmiVersion(emrClusterDefinition.getAmiVersion()); } // Set the additionalInfo if specified if (StringUtils.isNotBlank(emrClusterDefinition.getAdditionalInfo())) { runJobFlowRequest.setAdditionalInfo(emrClusterDefinition.getAdditionalInfo()); } // Set the bootstrap actions if (!getBootstrapActionConfigList(emrClusterDefinition).isEmpty()) { runJobFlowRequest.setBootstrapActions(getBootstrapActionConfigList(emrClusterDefinition)); } // Set the app installation steps if (!getStepConfig(emrClusterDefinition).isEmpty()) { runJobFlowRequest.setSteps(getStepConfig(emrClusterDefinition)); } // Set the tags if (!getEmrTags(emrClusterDefinition).isEmpty()) { runJobFlowRequest.setTags(getEmrTags(emrClusterDefinition)); } // Assign supported products as applicable if (StringUtils.isNotBlank(emrClusterDefinition.getSupportedProduct())) { List<String> supportedProducts = new ArrayList<>(); supportedProducts.add(emrClusterDefinition.getSupportedProduct()); runJobFlowRequest.setSupportedProducts(supportedProducts); } // Return the object return runJobFlowRequest; }
From source file:org.finra.herd.dao.impl.EmrDaoImpl.java
License:Apache License
/** * Create the run job flow request object. * * @param emrClusterDefinition the EMR definition name value * @param clusterName the EMR cluster name * * @return the run job flow request for the given configuration */// w w w. j av a 2 s .c o m private RunJobFlowRequest getRunJobFlowRequest(String clusterName, EmrClusterDefinition emrClusterDefinition) { // Create the object RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest(clusterName, getJobFlowInstancesConfig(emrClusterDefinition)); // Set release label if (StringUtils.isNotBlank(emrClusterDefinition.getReleaseLabel())) { runJobFlowRequest.setReleaseLabel(emrClusterDefinition.getReleaseLabel()); } // Set list of Applications List<EmrClusterDefinitionApplication> emrClusterDefinitionApplications = emrClusterDefinition .getApplications(); if (!CollectionUtils.isEmpty(emrClusterDefinitionApplications)) { runJobFlowRequest.setApplications(getApplications(emrClusterDefinitionApplications)); } // Set list of Configurations List<EmrClusterDefinitionConfiguration> emrClusterDefinitionConfigurations = emrClusterDefinition .getConfigurations(); if (!CollectionUtils.isEmpty(emrClusterDefinitionConfigurations)) { runJobFlowRequest.setConfigurations(getConfigurations(emrClusterDefinitionConfigurations)); } // Set the log bucket if specified if (StringUtils.isNotBlank(emrClusterDefinition.getLogBucket())) { runJobFlowRequest.setLogUri(emrClusterDefinition.getLogBucket()); } // Set the visible to all flag if (emrClusterDefinition.isVisibleToAll() != null) { runJobFlowRequest.setVisibleToAllUsers(emrClusterDefinition.isVisibleToAll()); } // Set the IAM profile for the nodes if (StringUtils.isNotBlank(emrClusterDefinition.getEc2NodeIamProfileName())) { runJobFlowRequest.setJobFlowRole(emrClusterDefinition.getEc2NodeIamProfileName()); } else { runJobFlowRequest.setJobFlowRole(herdStringHelper .getRequiredConfigurationValue(ConfigurationValue.EMR_DEFAULT_EC2_NODE_IAM_PROFILE_NAME)); } // Set the IAM profile for the service if (StringUtils.isNotBlank(emrClusterDefinition.getServiceIamRole())) { runJobFlowRequest.setServiceRole(emrClusterDefinition.getServiceIamRole()); } else { runJobFlowRequest.setServiceRole(herdStringHelper .getRequiredConfigurationValue(ConfigurationValue.EMR_DEFAULT_SERVICE_IAM_ROLE_NAME)); } // Set the AMI version if specified if (StringUtils.isNotBlank(emrClusterDefinition.getAmiVersion())) { runJobFlowRequest.setAmiVersion(emrClusterDefinition.getAmiVersion()); } // Set the additionalInfo if specified if (StringUtils.isNotBlank(emrClusterDefinition.getAdditionalInfo())) { runJobFlowRequest.setAdditionalInfo(emrClusterDefinition.getAdditionalInfo()); } // Set the bootstrap actions List<BootstrapActionConfig> bootstrapActionConfigList = getBootstrapActionConfigList(emrClusterDefinition); if (!bootstrapActionConfigList.isEmpty()) { runJobFlowRequest.setBootstrapActions(bootstrapActionConfigList); } // Set the app installation steps runJobFlowRequest.setSteps(getStepConfig(emrClusterDefinition)); // Set the tags runJobFlowRequest.setTags(getEmrTags(emrClusterDefinition)); // Assign supported products as applicable if (StringUtils.isNotBlank(emrClusterDefinition.getSupportedProduct())) { List<String> supportedProducts = new ArrayList<>(); supportedProducts.add(emrClusterDefinition.getSupportedProduct()); runJobFlowRequest.setSupportedProducts(supportedProducts); } // Assign security configuration. if (StringUtils.isNotBlank(emrClusterDefinition.getSecurityConfiguration())) { runJobFlowRequest.setSecurityConfiguration(emrClusterDefinition.getSecurityConfiguration()); } // Assign scale down behavior. if (StringUtils.isNotBlank(emrClusterDefinition.getScaleDownBehavior())) { runJobFlowRequest.setScaleDownBehavior(emrClusterDefinition.getScaleDownBehavior()); } // Assign Kerberos attributes. runJobFlowRequest .setKerberosAttributes(getKerberosAttributes(emrClusterDefinition.getKerberosAttributes())); // Return the object return runJobFlowRequest; }
From source file:org.huahinframework.emanager.amazonaws.elasticmapreduce.ElasticMapReduceManager.java
License:Apache License
/** * @param config/*w w w .ja va 2 s. c o m*/ * @throws URISyntaxException */ public void runJob(Config config) throws URISyntaxException { RunJobFlowRequest runJobFlowRequest = null; CreateStepConfigger csc = getCreateStepConfigger(config); if (csc == null) { log.error("Step config create error"); return; } if (jobFlowId == null) { runJobFlowRequest = new RunJobFlowRequest().withName(MAP_REDUCE_NAME) .withBootstrapActions( new BootstrapActionConfig().withName(MEMORY_BOOTSTRAP_NAME).withScriptBootstrapAction( new ScriptBootstrapActionConfig().withPath(MEMORY_BOOTSTRAP_URI)), new BootstrapActionConfig().withName(HADOOP_BOOTSTRAP_NAME).withScriptBootstrapAction( new ScriptBootstrapActionConfig().withPath(HADOOP_BOOTSTRAP_URI) .withArgs("--mapred-key-value", "mapred.task.timeout=3600000")), new BootstrapActionConfig().withName(HUAHIN_BOOTSTRAP_NAME).withScriptBootstrapAction( new ScriptBootstrapActionConfig().withPath(emrProperties.getConfigureS3Path()))) .withInstances(setupJobFlowInstancesConfig()); if (!isEmpty(emrProperties.getLogUri())) { runJobFlowRequest.setLogUri(emrProperties.getLogUri()); } List<StepConfig> stepConfigs = new ArrayList<StepConfig>(); if (emrProperties.isDebug()) { StepConfig enableDebugging = new StepConfig().withName(EMR_DEBUGGIN_NAME) .withActionOnFailure(ACTION_ON_TERMINATE) .withHadoopJarStep(new StepFactory().newEnableDebuggingStep()); stepConfigs.add(enableDebugging); } for (StepConfig sc : csc.createStepConfig(config)) { stepConfigs.add(sc); } runJobFlowRequest.setSteps(stepConfigs); try { RunJobFlowResult result = emr.runJobFlow(runJobFlowRequest); jobFlowId = result.getJobFlowId(); checkDate = new Date(); } catch (Exception e) { e.printStackTrace(); log.error(e); } } else { AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest().withJobFlowId(jobFlowId) .withSteps(csc.createStepConfig(config)); emr.addJobFlowSteps(addJobFlowStepsRequest); } running = true; try { config.setJobFlowId(jobFlowId); QueueUtils.updateQueue(config); } catch (IOException e) { e.printStackTrace(); log.error(e); } int stepSize = 0; String stepStatus = JobUtils.STEP_STATUS_PENDING; while (stepStatus.equals(JobUtils.STEP_STATUS_PENDING) || stepStatus.equals(JobUtils.STEP_STATUS_RUNNING)) { if (sleep()) { break; } DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest() .withJobFlowIds(jobFlowId); DescribeJobFlowsResult describeJobFlowsResult = emr.describeJobFlows(describeJobFlowsRequest); if (describeJobFlowsResult.getJobFlows().size() != 1) { break; } JobFlowDetail jobFlowDetail = describeJobFlowsResult.getJobFlows().get(0); JobFlowInstancesDetail instancesDetail = jobFlowDetail.getInstances(); masterPublicDnsName = instancesDetail.getMasterPublicDnsName(); if (isEmpty(config.getMasterPublicDnsName())) { try { config.setMasterPublicDnsName(masterPublicDnsName); QueueUtils.updateQueue(config); } catch (IOException e) { e.printStackTrace(); log.error(e); } } stepSize = jobFlowDetail.getSteps().size(); for (StepDetail stepDetail : jobFlowDetail.getSteps()) { if (stepDetail.getStepConfig().getName().equals(config.getName())) { stepStatus = stepDetail.getExecutionStatusDetail().getState(); break; } } } if (config.isDeleteOnExit()) { if (config.getJobType() == Config.JOB_TYPE_STREAMING) { S3Utils.delete(s3, config.getArgMap().get("mapper")); S3Utils.delete(s3, config.getArgMap().get("reducer")); } else { S3Utils.delete(s3, config.getRun()); } } // Add More than 256 Steps to a Job Flow(http://goo.gl/JDtsV) if (stepSize >= 255) { instanceTerminate(); } running = false; if (stepStatus.equals(JobUtils.STEP_STATUS_COMPLETED)) { config.setStatus(Config.JOB_STATUS_COMPLETE); } else if (stepStatus.equals(JobUtils.STEP_STATUS_FAILED)) { config.setStatus(Config.JOB_STATUS_ERROR); } else if (terminated) { config.setStatus(Config.JOB_STATUS_CANCEL); } try { QueueUtils.updateQueue(config); } catch (IOException e) { e.printStackTrace(); log.error(e); } }
From source file:org.pentaho.amazon.client.impl.EmrClientImpl.java
License:Apache License
@VisibleForTesting RunJobFlowRequest initEmrCluster(String stagingS3FileUrl, String stagingS3BucketUrl, String stepType, String mainClass, String bootstrapActions, AbstractAmazonJobEntry jobEntry) { RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest(); runJobFlowRequest.setName(jobEntry.getHadoopJobName()); runJobFlowRequest.setReleaseLabel(jobEntry.getEmrRelease()); runJobFlowRequest.setLogUri(stagingS3BucketUrl); JobFlowInstancesConfig instances = initEC2Instance(Integer.parseInt(jobEntry.getNumInstances()), jobEntry.getMasterInstanceType(), jobEntry.getSlaveInstanceType()); runJobFlowRequest.setInstances(instances); List<StepConfig> steps = initSteps(stagingS3FileUrl, stepType, mainClass, jobEntry); if (steps.size() > 0) { runJobFlowRequest.setSteps(steps); }/* w w w . j a va2 s.c o m*/ if (stepType.equals(STEP_HIVE)) { List<Application> applications = initApplications(); if (applications.size() > 0) { runJobFlowRequest.setApplications(applications); } List<BootstrapActionConfig> stepBootstrapActions = initBootstrapActions(bootstrapActions); if (stepBootstrapActions != null && stepBootstrapActions.size() > 0) { runJobFlowRequest.setBootstrapActions(stepBootstrapActions); } } String ec2Role = jobEntry.getEc2Role(); if (ec2Role == null || ec2Role.trim().isEmpty()) { runJobFlowRequest.setJobFlowRole(EMR_EC2_DEFAULT_ROLE); } else { runJobFlowRequest.setJobFlowRole(ec2Role); } String emrRole = jobEntry.getEmrRole(); if (emrRole == null || emrRole.trim().isEmpty()) { runJobFlowRequest.setServiceRole(EMR_EFAULT_ROLE); } else { runJobFlowRequest.setServiceRole(emrRole); } runJobFlowRequest.setVisibleToAllUsers(true); return runJobFlowRequest; }
From source file:org.pentaho.amazon.emr.job.AmazonElasticMapReduceJobExecutor.java
License:Apache License
public RunJobFlowRequest createJobFlow(String stagingS3BucketUrl, String stagingS3Jar, String mainClass) { List<String> jarStepArgs = new ArrayList<String>(); if (!StringUtil.isEmpty(cmdLineArgs)) { StringTokenizer st = new StringTokenizer(cmdLineArgs, " "); while (st.hasMoreTokens()) { String token = st.nextToken(); logBasic("adding args: " + token); jarStepArgs.add(token);/*from ww w .j av a 2 s . c o m*/ } } HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig(); hadoopJarStep.setJar(stagingS3Jar); hadoopJarStep.setMainClass(mainClass); hadoopJarStep.setArgs(jarStepArgs); StepConfig stepConfig = new StepConfig(); stepConfig.setName("custom jar: " + jarUrl); stepConfig.setHadoopJarStep(hadoopJarStep); List<StepConfig> steps = new ArrayList<StepConfig>(); steps.add(stepConfig); String numInstancesS = environmentSubstitute(numInstances); int numInsts = 2; try { numInsts = Integer.parseInt(numInstancesS); } catch (NumberFormatException e) { logError("Unable to parse number of instances to use '" + numInstancesS + "' - " + "using 2 instances..."); } JobFlowInstancesConfig instances = new JobFlowInstancesConfig(); instances.setInstanceCount(numInsts); instances.setMasterInstanceType(getInstanceType(masterInstanceType)); instances.setSlaveInstanceType(getInstanceType(slaveInstanceType)); instances.setHadoopVersion("0.20"); RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest(); runJobFlowRequest.setSteps(steps); runJobFlowRequest.setLogUri(stagingS3BucketUrl); runJobFlowRequest.setName(hadoopJobName); runJobFlowRequest.setInstances(instances); // ScriptBootstrapActionConfig scriptBootstrapAction = new ScriptBootstrapActionConfig(); // scriptBootstrapAction.setPath("s3://mddwordcount/bootstrap.sh"); // List<String> bootstrapArgs = new ArrayList<String>(); // bootstrapArgs.add("http://pdi-node-dist.s3.amazonaws.com"); // // // bootstrapArgs.add( // "http://ci.pentaho.com/view/Data%20Integration/job/Kettle/lastSuccessfulBuild/artifact/Kettle/"); // bootstrapArgs.add("pdi-hadoop-node-TRUNK-SNAPSHOT.zip"); // scriptBootstrapAction.setArgs(bootstrapArgs); // BootstrapActionConfig bootstrapActionConfig = new BootstrapActionConfig(); // bootstrapActionConfig.setName("mdd bootstrap"); // bootstrapActionConfig.setScriptBootstrapAction(scriptBootstrapAction); // List<BootstrapActionConfig> bootstrapActions = new ArrayList<BootstrapActionConfig>(); // bootstrapActions.add(bootstrapActionConfig); // runJobFlowRequest.setBootstrapActions(bootstrapActions); return runJobFlowRequest; }
From source file:org.pentaho.amazon.hive.job.AmazonHiveJobExecutor.java
License:Apache License
/** * Prepare to create a EMR job flow./*from w w w . jav a 2 s . co m*/ * * @return RunJobFlowRequest The object to request an EMR job flow */ public RunJobFlowRequest createJobFlow() { // Create a RunJobFlowRequest object, set a name for the job flow. RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest(); runJobFlowRequest.setName(hadoopJobName); // Set a log URL. String logUrl = stagingDir; if (stagingDir.indexOf("@s3") > 0) { //$NON-NLS-1$ logUrl = S3FileProvider.SCHEME + "://" + stagingDir.substring(stagingDir.indexOf("@s3") + 4); //$NON-NLS-1$ } runJobFlowRequest.setLogUri(logUrl); // Determine the instances for Hadoop cluster. String numInstancesS = environmentSubstitute(numInstances); int numInsts = 2; try { numInsts = Integer.parseInt(numInstancesS); } catch (NumberFormatException e) { logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.InstanceNumber.Error", //$NON-NLS-1$ numInstancesS)); } JobFlowInstancesConfig instances = new JobFlowInstancesConfig(); instances.setInstanceCount(numInsts); instances.setMasterInstanceType(getInstanceType(masterInstanceType)); instances.setSlaveInstanceType(getInstanceType(slaveInstanceType)); instances.setHadoopVersion("0.20"); //$NON-NLS-1$ instances.setKeepJobFlowAliveWhenNoSteps(alive); runJobFlowRequest.setInstances(instances); // Set bootstrap actions. runJobFlowRequest.setBootstrapActions(ConfigBootstrapActions()); // Create an EMR step to setup Hive. String args = "s3://elasticmapreduce/libs/hive/hive-script --base-path s3://elasticmapreduce/libs/hive/ --hive-versions 0.7 --install-hive"; //$NON-NLS-1$ List<StepConfig> steps = ConfigHadoopJarStep("Setup Hive", jarUrl, args); //$NON-NLS-1$ runJobFlowRequest.setSteps(steps); return runJobFlowRequest; }