List of usage examples for com.amazonaws.services.elasticmapreduce.model RunJobFlowRequest RunJobFlowRequest
public RunJobFlowRequest()
From source file:awswc.AwsConsoleApp.java
License:Open Source License
static void runJobFlow() throws InterruptedException { // Configure instances to use JobFlowInstancesConfig instances = new JobFlowInstancesConfig(); //********************************************************************// instances.setHadoopVersion(HADOOP_VERSION); instances.withEc2KeyName("ayuda-vp1"); instances.setInstanceCount(MASTER_INSTANCE_COUNT); //instances.setInstanceGroups(instanceGroups) instances.setMasterInstanceType(InstanceType.M24xlarge.toString()); instances.setSlaveInstanceType(InstanceType.M24xlarge.toString()); //********************************************************************// HadoopJarStepConfig hadoopJarStep1 = new HadoopJarStepConfig().withJar(S3N_WORD_COUNT_JAR_) // This should be a full map reduce application. .withArgs(BUCKET_NAME + "inWC", BUCKET_NAME + "outWC"); StepConfig stepConfig1 = new StepConfig().withName("wordcount").withHadoopJarStep(hadoopJarStep1) .withActionOnFailure("TERMINATE_JOB_FLOW"); //********************************************************************// //********************************************************************// HadoopJarStepConfig hadoopJarStep2 = new HadoopJarStepConfig().withJar(S3N_MAX_WORD_COUNT_JAR) // This should be a full map reduce application. .withArgs(BUCKET_NAME + "outWC", BUCKET_NAME + "outXWC", "hate", "10"); StepConfig stepConfig2 = new StepConfig().withName("maxwordcount").withHadoopJarStep(hadoopJarStep2) .withActionOnFailure("TERMINATE_JOB_FLOW"); //********************************************************************// Collection<StepConfig> csc = new ArrayList<StepConfig>(); csc.add(stepConfig1);//www .j av a 2s . c o m csc.add(stepConfig2); // BootstrapActions bootstrapActions = new BootstrapActions(); RunJobFlowRequest runFlowRequest = new RunJobFlowRequest().withName(FLOW_NAME).withInstances(instances) .withSteps(csc).withLogUri(BUCKET_NAME + "debug") /*.withBootstrapActions( bootstrapActions.newRunIf( "instance.isMaster=true", bootstrapActions.newConfigureDaemons() .withHeapSize(Daemon.JobTracker, 4096) .build()), bootstrapActions.newRunIf( "instance.isRunningNameNode=true", bootstrapActions.newConfigureDaemons() .withHeapSize(Daemon.NameNode, 4096).build()), bootstrapActions.newRunIf( "instance.isRunningDataNode=true", bootstrapActions.newConfigureDaemons() .withHeapSize(Daemon.DataNode, 4096).build()), bootstrapActions.newRunIf( "instance.isRunningJobTracker=true", bootstrapActions.newConfigureDaemons() .withHeapSize(Daemon.JobTracker, 4096).build()), bootstrapActions.newRunIf( "instance.isRunningTaskTracker=true", bootstrapActions.newConfigureDaemons() .withHeapSize(Daemon.TaskTracker, 4096).build()) /*, bootstrapActions.newRunIf( "instance.isSlave=true", bootstrapActions.newConfigureHadoop() .withKeyValue(ConfigFile.Site,"mapred.tasktracker.map.tasks.maximum", "4")) )*/; RunJobFlowResult runJobFlowResult = emr.runJobFlow(runFlowRequest); String jobFlowId = runJobFlowResult.getJobFlowId(); System.out.println("Ran job flow with id: " + jobFlowId); //wasFinished(runJobFlowResult); }
From source file:com.aegeus.aws.ElasticMapReduceService.java
License:Apache License
/** * Create a new EMR Cluster over Hadoop 2.4.0 *//* www . j a va2 s .c o m*/ public void createCluster() { JobFlowInstancesConfig instances = new JobFlowInstancesConfig() .withInstanceCount((int) config.getInstanceCount()).withMasterInstanceType(config.getMasterType()) .withSlaveInstanceType(config.getSlaveType()); if (Strings.isNullOrEmpty(config.getKeyName())) { instances.setEc2KeyName(config.getKeyName()); } if (!Strings.isNullOrEmpty(config.getSubnetId())) { instances.setEc2SubnetId(config.getSubnetId()); } else { instances.setPlacement(new PlacementType(config.getPlace())); } ScriptBootstrapActionConfig installEsConfig = new ScriptBootstrapActionConfig() .withPath("s3://support.elasticmapreduce/bootstrap-actions/other/elasticsearch_install.rb"); BootstrapActionConfig installEs = new BootstrapActionConfig("Elasticsearch Install", installEsConfig); RunJobFlowRequest request = new RunJobFlowRequest().withName(config.getName()).withReleaseLabel("emr-4.1.0") .withServiceRole("Default_AWS_Role").withJobFlowRole("Default_AWS_Role") .withBootstrapActions(installEs).withInstances(instances); if (!Strings.isNullOrEmpty(config.getLogBucket())) { request.setLogUri(config.getLogBucket()); } RunJobFlowResult result = emr.runJobFlow(request); clusterId = result.getJobFlowId(); }
From source file:datameer.awstasks.aws.emr.EmrCluster.java
License:Apache License
public synchronized void startup() throws InterruptedException { checkConnection(false);/* ww w . j a v a 2 s . c om*/ _clusterState = ClusterState.STARTING; boolean successful = false; try { EmrSettings settings = getSettings(); if (settings.getPrivateKeyName() == null) { throw new NullPointerException( "privateKeyName must not be null please configure settings properly"); } LOG.info("Starting job flow '" + getName() + "' ..."); if (!getRunningJobFlowDetailsByName(getName()).isEmpty()) { throw new IllegalStateException("Job flow with name '" + getName() + "' already running."); } boolean keepAlive = true; JobFlowInstancesConfig jobConfig = new JobFlowInstancesConfig(); jobConfig.setHadoopVersion(_settings.getHadoopVersion()); jobConfig.setMasterInstanceType(settings.getMasterInstanceType().getId()); jobConfig.setSlaveInstanceType(settings.getNodeInstanceType().getId()); jobConfig.setInstanceCount(settings.getInstanceCount()); jobConfig.setEc2KeyName(settings.getPrivateKeyName()); jobConfig.setPlacement(new PlacementType()); jobConfig.setKeepJobFlowAliveWhenNoSteps(keepAlive); final RunJobFlowRequest startRequest = new RunJobFlowRequest(); startRequest.setLogUri("s3n://" + settings.getS3Bucket() + settings.getS3LogPath()); startRequest.setInstances(jobConfig); startRequest.setName(getName()); startRequest.setAdditionalInfo(_settings.getAdditionalStartInfo()); startRequest.setBootstrapActions(_settings.getBootstrapActions()); if (settings.isDebugEnabled()) { startRequest.withSteps(DEBUG_STEP); } RunJobFlowResult startResponse = _emrWebService.runJobFlow(startRequest); _jobFlowId = startResponse.getJobFlowId(); waitUntilClusterStarted(_jobFlowId); LOG.info("elastic cluster '" + getName() + "/" + _jobFlowId + "' started, master-host is " + _masterHost); successful = true; } finally { if (successful) { _clusterState = ClusterState.CONNECTED; } else { _clusterState = ClusterState.UNCONNECTED; _jobFlowId = null; } } }
From source file:fr.ens.biologie.genomique.eoulsan.util.cloud.AWSElasticMapReduceJob.java
License:LGPL
void init() { requireNonNull(this.AWSAccessKey); requireNonNull(this.AWSAccessKey); requireNonNull(this.jarLocation); requireNonNull(this.jarArguments); requireNonNull(this.slavesInstanceType); requireNonNull(this.hadoopVersion); requireNonNull(this.jobFlowName); if (this.nInstances < 1) { throw new IllegalArgumentException("the number of instance is lower than 1"); }// w ww . ja v a 2 s. c o m if (this.masterInstanceType == null) { this.masterInstanceType = this.slavesInstanceType; } // Set the hadoop jar step final HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig().withJar(this.jarLocation.trim()) .withArgs(this.jarArguments); // Set step config final StepConfig stepConfig = new StepConfig().withName(this.jobFlowName + "-step") .withHadoopJarStep(hadoopJarStep).withActionOnFailure("TERMINATE_JOB_FLOW"); // Set the instance final JobFlowInstancesConfig instances = new JobFlowInstancesConfig().withInstanceCount(this.nInstances) .withMasterInstanceType(this.masterInstanceType).withSlaveInstanceType(this.slavesInstanceType) .withHadoopVersion(this.hadoopVersion); // Configure hadoop final ScriptBootstrapActionConfig scriptBootstrapAction = new ScriptBootstrapActionConfig() .withPath("s3n://eu-west-1.elasticmapreduce/bootstrap-actions/configure-hadoop") .withArgs("--site-key-value", "mapreduce.tasktracker.map.tasks.maximum=" + this.taskTrackerMaxMapTasks); final BootstrapActionConfig bootstrapActions = new BootstrapActionConfig().withName("Configure hadoop") .withScriptBootstrapAction(scriptBootstrapAction); // Enable debugging StepFactory stepFactory = new StepFactory(); StepConfig enableDebugging = new StepConfig().withName("Enable Debugging") .withActionOnFailure("TERMINATE_JOB_FLOW").withHadoopJarStep(stepFactory.newEnableDebuggingStep()); // Run flow this.runFlowRequest = new RunJobFlowRequest().withName(this.jobFlowName); // Enable or not debugging if (this.enableDebugging) { this.runFlowRequest.withInstances(instances).withSteps(enableDebugging, stepConfig); } else { this.runFlowRequest.withInstances(instances).withSteps(stepConfig); } // Limit the number of task in a task tracker if (this.taskTrackerMaxMapTasks > 0) { this.runFlowRequest.withBootstrapActions(bootstrapActions); } if (this.logPathname != null && !"".equals(this.logPathname)) { this.runFlowRequest.withLogUri(this.logPathname); } // Set EC2 Key name if (this.ec2KeyName != null) { this.runFlowRequest.getInstances().setEc2KeyName(this.ec2KeyName); } }
From source file:org.deeplearning4j.legacyExamples.EmrSparkExample.java
License:Apache License
public void entryPoint(String[] args) { JCommander jcmdr = new JCommander(this); try {/*from w w w . jav a 2 s . c om*/ jcmdr.parse(args); } catch (ParameterException e) { jcmdr.usage(); try { Thread.sleep(500); } catch (Exception e2) { } throw e; } AmazonElasticMapReduceClientBuilder builder = AmazonElasticMapReduceClientBuilder.standard(); builder.withRegion(region); builder.withCredentials(getCredentialsProvider()); AmazonElasticMapReduce emr = builder.build(); List<StepConfig> steps = new ArrayList<>(); if (upload) { log.info("uploading uber jar"); AmazonS3ClientBuilder s3builder = AmazonS3ClientBuilder.standard(); s3builder.withRegion(region); s3builder.withCredentials(getCredentialsProvider()); AmazonS3 s3Client = s3builder.build(); if (!s3Client.doesBucketExist(bucketName)) { s3Client.createBucket(bucketName); } File uberJarFile = new File(uberJar); s3Client.putObject(new PutObjectRequest(bucketName, uberJarFile.getName(), uberJarFile)); } if (debug) { log.info("enable debug"); StepFactory stepFactory = new StepFactory(builder.getRegion() + ".elasticmapreduce"); StepConfig enableDebugging = new StepConfig().withName("Enable Debugging") .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW) .withHadoopJarStep(stepFactory.newEnableDebuggingStep()); steps.add(enableDebugging); } if (execute) { log.info("execute spark step"); HadoopJarStepConfig sparkStepConf = new HadoopJarStepConfig(); sparkStepConf.withJar("command-runner.jar"); sparkStepConf.withArgs("spark-submit", "--deploy-mode", "cluster", "--class", className, getS3UberJarUrl(), "-useSparkLocal", "false"); ActionOnFailure action = ActionOnFailure.TERMINATE_JOB_FLOW; if (keepAlive) { action = ActionOnFailure.CONTINUE; } StepConfig sparkStep = new StepConfig().withName("Spark Step").withActionOnFailure(action) .withHadoopJarStep(sparkStepConf); steps.add(sparkStep); } log.info("create spark cluster"); Application sparkApp = new Application().withName("Spark"); // service and job flow role will be created automatically when // launching cluster in aws console, better do that first or create // manually RunJobFlowRequest request = new RunJobFlowRequest().withName("Spark Cluster").withSteps(steps) .withServiceRole("EMR_DefaultRole").withJobFlowRole("EMR_EC2_DefaultRole") .withApplications(sparkApp).withReleaseLabel(emrVersion).withLogUri(getS3BucketLogsUrl()) .withInstances(new JobFlowInstancesConfig().withEc2KeyName("spark").withInstanceCount(instanceCount) .withKeepJobFlowAliveWhenNoSteps(keepAlive).withMasterInstanceType(instanceType) .withSlaveInstanceType(instanceType)); RunJobFlowResult result = emr.runJobFlow(request); log.info(result.toString()); log.info("done"); }
From source file:org.huahinframework.emanager.amazonaws.elasticmapreduce.ElasticMapReduceManager.java
License:Apache License
/** * @param config/*from w w w . j a va 2 s. c o m*/ * @throws URISyntaxException */ public void runJob(Config config) throws URISyntaxException { RunJobFlowRequest runJobFlowRequest = null; CreateStepConfigger csc = getCreateStepConfigger(config); if (csc == null) { log.error("Step config create error"); return; } if (jobFlowId == null) { runJobFlowRequest = new RunJobFlowRequest().withName(MAP_REDUCE_NAME) .withBootstrapActions( new BootstrapActionConfig().withName(MEMORY_BOOTSTRAP_NAME).withScriptBootstrapAction( new ScriptBootstrapActionConfig().withPath(MEMORY_BOOTSTRAP_URI)), new BootstrapActionConfig().withName(HADOOP_BOOTSTRAP_NAME).withScriptBootstrapAction( new ScriptBootstrapActionConfig().withPath(HADOOP_BOOTSTRAP_URI) .withArgs("--mapred-key-value", "mapred.task.timeout=3600000")), new BootstrapActionConfig().withName(HUAHIN_BOOTSTRAP_NAME).withScriptBootstrapAction( new ScriptBootstrapActionConfig().withPath(emrProperties.getConfigureS3Path()))) .withInstances(setupJobFlowInstancesConfig()); if (!isEmpty(emrProperties.getLogUri())) { runJobFlowRequest.setLogUri(emrProperties.getLogUri()); } List<StepConfig> stepConfigs = new ArrayList<StepConfig>(); if (emrProperties.isDebug()) { StepConfig enableDebugging = new StepConfig().withName(EMR_DEBUGGIN_NAME) .withActionOnFailure(ACTION_ON_TERMINATE) .withHadoopJarStep(new StepFactory().newEnableDebuggingStep()); stepConfigs.add(enableDebugging); } for (StepConfig sc : csc.createStepConfig(config)) { stepConfigs.add(sc); } runJobFlowRequest.setSteps(stepConfigs); try { RunJobFlowResult result = emr.runJobFlow(runJobFlowRequest); jobFlowId = result.getJobFlowId(); checkDate = new Date(); } catch (Exception e) { e.printStackTrace(); log.error(e); } } else { AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest().withJobFlowId(jobFlowId) .withSteps(csc.createStepConfig(config)); emr.addJobFlowSteps(addJobFlowStepsRequest); } running = true; try { config.setJobFlowId(jobFlowId); QueueUtils.updateQueue(config); } catch (IOException e) { e.printStackTrace(); log.error(e); } int stepSize = 0; String stepStatus = JobUtils.STEP_STATUS_PENDING; while (stepStatus.equals(JobUtils.STEP_STATUS_PENDING) || stepStatus.equals(JobUtils.STEP_STATUS_RUNNING)) { if (sleep()) { break; } DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest() .withJobFlowIds(jobFlowId); DescribeJobFlowsResult describeJobFlowsResult = emr.describeJobFlows(describeJobFlowsRequest); if (describeJobFlowsResult.getJobFlows().size() != 1) { break; } JobFlowDetail jobFlowDetail = describeJobFlowsResult.getJobFlows().get(0); JobFlowInstancesDetail instancesDetail = jobFlowDetail.getInstances(); masterPublicDnsName = instancesDetail.getMasterPublicDnsName(); if (isEmpty(config.getMasterPublicDnsName())) { try { config.setMasterPublicDnsName(masterPublicDnsName); QueueUtils.updateQueue(config); } catch (IOException e) { e.printStackTrace(); log.error(e); } } stepSize = jobFlowDetail.getSteps().size(); for (StepDetail stepDetail : jobFlowDetail.getSteps()) { if (stepDetail.getStepConfig().getName().equals(config.getName())) { stepStatus = stepDetail.getExecutionStatusDetail().getState(); break; } } } if (config.isDeleteOnExit()) { if (config.getJobType() == Config.JOB_TYPE_STREAMING) { S3Utils.delete(s3, config.getArgMap().get("mapper")); S3Utils.delete(s3, config.getArgMap().get("reducer")); } else { S3Utils.delete(s3, config.getRun()); } } // Add More than 256 Steps to a Job Flow(http://goo.gl/JDtsV) if (stepSize >= 255) { instanceTerminate(); } running = false; if (stepStatus.equals(JobUtils.STEP_STATUS_COMPLETED)) { config.setStatus(Config.JOB_STATUS_COMPLETE); } else if (stepStatus.equals(JobUtils.STEP_STATUS_FAILED)) { config.setStatus(Config.JOB_STATUS_ERROR); } else if (terminated) { config.setStatus(Config.JOB_STATUS_CANCEL); } try { QueueUtils.updateQueue(config); } catch (IOException e) { e.printStackTrace(); log.error(e); } }
From source file:org.pentaho.amazon.client.impl.EmrClientImpl.java
License:Apache License
@VisibleForTesting RunJobFlowRequest initEmrCluster(String stagingS3FileUrl, String stagingS3BucketUrl, String stepType, String mainClass, String bootstrapActions, AbstractAmazonJobEntry jobEntry) { RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest(); runJobFlowRequest.setName(jobEntry.getHadoopJobName()); runJobFlowRequest.setReleaseLabel(jobEntry.getEmrRelease()); runJobFlowRequest.setLogUri(stagingS3BucketUrl); JobFlowInstancesConfig instances = initEC2Instance(Integer.parseInt(jobEntry.getNumInstances()), jobEntry.getMasterInstanceType(), jobEntry.getSlaveInstanceType()); runJobFlowRequest.setInstances(instances); List<StepConfig> steps = initSteps(stagingS3FileUrl, stepType, mainClass, jobEntry); if (steps.size() > 0) { runJobFlowRequest.setSteps(steps); }/*w ww .java 2 s .c o m*/ if (stepType.equals(STEP_HIVE)) { List<Application> applications = initApplications(); if (applications.size() > 0) { runJobFlowRequest.setApplications(applications); } List<BootstrapActionConfig> stepBootstrapActions = initBootstrapActions(bootstrapActions); if (stepBootstrapActions != null && stepBootstrapActions.size() > 0) { runJobFlowRequest.setBootstrapActions(stepBootstrapActions); } } String ec2Role = jobEntry.getEc2Role(); if (ec2Role == null || ec2Role.trim().isEmpty()) { runJobFlowRequest.setJobFlowRole(EMR_EC2_DEFAULT_ROLE); } else { runJobFlowRequest.setJobFlowRole(ec2Role); } String emrRole = jobEntry.getEmrRole(); if (emrRole == null || emrRole.trim().isEmpty()) { runJobFlowRequest.setServiceRole(EMR_EFAULT_ROLE); } else { runJobFlowRequest.setServiceRole(emrRole); } runJobFlowRequest.setVisibleToAllUsers(true); return runJobFlowRequest; }
From source file:org.pentaho.amazon.emr.job.AmazonElasticMapReduceJobExecutor.java
License:Apache License
public RunJobFlowRequest createJobFlow(String stagingS3BucketUrl, String stagingS3Jar, String mainClass) { List<String> jarStepArgs = new ArrayList<String>(); if (!StringUtil.isEmpty(cmdLineArgs)) { StringTokenizer st = new StringTokenizer(cmdLineArgs, " "); while (st.hasMoreTokens()) { String token = st.nextToken(); logBasic("adding args: " + token); jarStepArgs.add(token);// w ww . jav a2 s . co m } } HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig(); hadoopJarStep.setJar(stagingS3Jar); hadoopJarStep.setMainClass(mainClass); hadoopJarStep.setArgs(jarStepArgs); StepConfig stepConfig = new StepConfig(); stepConfig.setName("custom jar: " + jarUrl); stepConfig.setHadoopJarStep(hadoopJarStep); List<StepConfig> steps = new ArrayList<StepConfig>(); steps.add(stepConfig); String numInstancesS = environmentSubstitute(numInstances); int numInsts = 2; try { numInsts = Integer.parseInt(numInstancesS); } catch (NumberFormatException e) { logError("Unable to parse number of instances to use '" + numInstancesS + "' - " + "using 2 instances..."); } JobFlowInstancesConfig instances = new JobFlowInstancesConfig(); instances.setInstanceCount(numInsts); instances.setMasterInstanceType(getInstanceType(masterInstanceType)); instances.setSlaveInstanceType(getInstanceType(slaveInstanceType)); instances.setHadoopVersion("0.20"); RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest(); runJobFlowRequest.setSteps(steps); runJobFlowRequest.setLogUri(stagingS3BucketUrl); runJobFlowRequest.setName(hadoopJobName); runJobFlowRequest.setInstances(instances); // ScriptBootstrapActionConfig scriptBootstrapAction = new ScriptBootstrapActionConfig(); // scriptBootstrapAction.setPath("s3://mddwordcount/bootstrap.sh"); // List<String> bootstrapArgs = new ArrayList<String>(); // bootstrapArgs.add("http://pdi-node-dist.s3.amazonaws.com"); // // // bootstrapArgs.add( // "http://ci.pentaho.com/view/Data%20Integration/job/Kettle/lastSuccessfulBuild/artifact/Kettle/"); // bootstrapArgs.add("pdi-hadoop-node-TRUNK-SNAPSHOT.zip"); // scriptBootstrapAction.setArgs(bootstrapArgs); // BootstrapActionConfig bootstrapActionConfig = new BootstrapActionConfig(); // bootstrapActionConfig.setName("mdd bootstrap"); // bootstrapActionConfig.setScriptBootstrapAction(scriptBootstrapAction); // List<BootstrapActionConfig> bootstrapActions = new ArrayList<BootstrapActionConfig>(); // bootstrapActions.add(bootstrapActionConfig); // runJobFlowRequest.setBootstrapActions(bootstrapActions); return runJobFlowRequest; }
From source file:org.pentaho.amazon.hive.job.AmazonHiveJobExecutor.java
License:Apache License
/** * Prepare to create a EMR job flow.//from w w w . j av a 2 s . c om * * @return RunJobFlowRequest The object to request an EMR job flow */ public RunJobFlowRequest createJobFlow() { // Create a RunJobFlowRequest object, set a name for the job flow. RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest(); runJobFlowRequest.setName(hadoopJobName); // Set a log URL. String logUrl = stagingDir; if (stagingDir.indexOf("@s3") > 0) { //$NON-NLS-1$ logUrl = S3FileProvider.SCHEME + "://" + stagingDir.substring(stagingDir.indexOf("@s3") + 4); //$NON-NLS-1$ } runJobFlowRequest.setLogUri(logUrl); // Determine the instances for Hadoop cluster. String numInstancesS = environmentSubstitute(numInstances); int numInsts = 2; try { numInsts = Integer.parseInt(numInstancesS); } catch (NumberFormatException e) { logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.InstanceNumber.Error", //$NON-NLS-1$ numInstancesS)); } JobFlowInstancesConfig instances = new JobFlowInstancesConfig(); instances.setInstanceCount(numInsts); instances.setMasterInstanceType(getInstanceType(masterInstanceType)); instances.setSlaveInstanceType(getInstanceType(slaveInstanceType)); instances.setHadoopVersion("0.20"); //$NON-NLS-1$ instances.setKeepJobFlowAliveWhenNoSteps(alive); runJobFlowRequest.setInstances(instances); // Set bootstrap actions. runJobFlowRequest.setBootstrapActions(ConfigBootstrapActions()); // Create an EMR step to setup Hive. String args = "s3://elasticmapreduce/libs/hive/hive-script --base-path s3://elasticmapreduce/libs/hive/ --hive-versions 0.7 --install-hive"; //$NON-NLS-1$ List<StepConfig> steps = ConfigHadoopJarStep("Setup Hive", jarUrl, args); //$NON-NLS-1$ runJobFlowRequest.setSteps(steps); return runJobFlowRequest; }
From source file:rollsPOC2.util.AWSHelper.java
public static String createOrFindEMRHiveCluster(String clusterName, boolean createWithKeepAlive) throws Exception { String clusterId = null;/*from w w w . j a v a 2s .c o m*/ AmazonElasticMapReduce emr = AppServices.getEMRClient(); ClusterSummary clusterSummary = findCluster("Treebeard", emr); if (clusterSummary != null) { clusterId = clusterSummary.getId(); System.err.printf("Cluster found with id %s, status %s\n", clusterId, clusterSummary.getStatus().getState()); } if (clusterSummary != null && clusterSummary.getStatus().getState().startsWith("TERMINAT")) { while (findCluster("Treebeard", emr).getStatus().getState().equals("TERMINATING")) { System.out.println("Waiting for previous cluster to terminate"); Thread.sleep(10000l); } System.out.println("Starting cluster..."); StepFactory stepFactory = new StepFactory(); StepConfig enabledebugging = new StepConfig().withName("Enable debugging") .withActionOnFailure("TERMINATE_JOB_FLOW") .withHadoopJarStep(stepFactory.newEnableDebuggingStep()); // Possibly redundant with ".withApplications(new Application().withName("Hive"))" // StepConfig installHive = new StepConfig() // .withName("Install Hive") // .withActionOnFailure("TERMINATE_JOB_FLOW") // .withHadoopJarStep(stepFactory.newInstallHiveStep()); RunJobFlowRequest request = new RunJobFlowRequest().withName("Treebeard").withReleaseLabel("emr-4.6.0") .withApplications(new Application().withName("Hive")).withSteps(enabledebugging) .withVisibleToAllUsers(true) .withLogUri("s3://aws-logs-800327301943-us-east-1/elasticmapreduce/") .withServiceRole("EMR_DefaultRole").withJobFlowRole("EMR_EC2_DefaultRole") .withInstances(new JobFlowInstancesConfig().withEc2KeyName("bjss").withInstanceCount(2) .withMasterInstanceType("m3.xlarge").withSlaveInstanceType("m1.large") .withKeepJobFlowAliveWhenNoSteps(createWithKeepAlive)); RunJobFlowResult createClusterResult = emr.runJobFlow(request); clusterId = createClusterResult.getJobFlowId(); System.out.printf("Started cluster with id %s\n", clusterId); } return clusterId; }