List of usage examples for com.amazonaws.services.elasticmapreduce.model RunJobFlowRequest setInstances
public void setInstances(JobFlowInstancesConfig instances)
A specification of the number and type of Amazon EC2 instances.
From source file:datameer.awstasks.aws.emr.EmrCluster.java
License:Apache License
public synchronized void startup() throws InterruptedException { checkConnection(false);//from w w w .j a v a 2 s. c o m _clusterState = ClusterState.STARTING; boolean successful = false; try { EmrSettings settings = getSettings(); if (settings.getPrivateKeyName() == null) { throw new NullPointerException( "privateKeyName must not be null please configure settings properly"); } LOG.info("Starting job flow '" + getName() + "' ..."); if (!getRunningJobFlowDetailsByName(getName()).isEmpty()) { throw new IllegalStateException("Job flow with name '" + getName() + "' already running."); } boolean keepAlive = true; JobFlowInstancesConfig jobConfig = new JobFlowInstancesConfig(); jobConfig.setHadoopVersion(_settings.getHadoopVersion()); jobConfig.setMasterInstanceType(settings.getMasterInstanceType().getId()); jobConfig.setSlaveInstanceType(settings.getNodeInstanceType().getId()); jobConfig.setInstanceCount(settings.getInstanceCount()); jobConfig.setEc2KeyName(settings.getPrivateKeyName()); jobConfig.setPlacement(new PlacementType()); jobConfig.setKeepJobFlowAliveWhenNoSteps(keepAlive); final RunJobFlowRequest startRequest = new RunJobFlowRequest(); startRequest.setLogUri("s3n://" + settings.getS3Bucket() + settings.getS3LogPath()); startRequest.setInstances(jobConfig); startRequest.setName(getName()); startRequest.setAdditionalInfo(_settings.getAdditionalStartInfo()); startRequest.setBootstrapActions(_settings.getBootstrapActions()); if (settings.isDebugEnabled()) { startRequest.withSteps(DEBUG_STEP); } RunJobFlowResult startResponse = _emrWebService.runJobFlow(startRequest); _jobFlowId = startResponse.getJobFlowId(); waitUntilClusterStarted(_jobFlowId); LOG.info("elastic cluster '" + getName() + "/" + _jobFlowId + "' started, master-host is " + _masterHost); successful = true; } finally { if (successful) { _clusterState = ClusterState.CONNECTED; } else { _clusterState = ClusterState.UNCONNECTED; _jobFlowId = null; } } }
From source file:org.pentaho.amazon.client.impl.EmrClientImpl.java
License:Apache License
@VisibleForTesting
RunJobFlowRequest initEmrCluster(String stagingS3FileUrl, String stagingS3BucketUrl, String stepType,
String mainClass, String bootstrapActions, AbstractAmazonJobEntry jobEntry) {
RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest();
runJobFlowRequest.setName(jobEntry.getHadoopJobName());
runJobFlowRequest.setReleaseLabel(jobEntry.getEmrRelease());
runJobFlowRequest.setLogUri(stagingS3BucketUrl);
JobFlowInstancesConfig instances = initEC2Instance(Integer.parseInt(jobEntry.getNumInstances()),
jobEntry.getMasterInstanceType(), jobEntry.getSlaveInstanceType());
runJobFlowRequest.setInstances(instances);
List<StepConfig> steps = initSteps(stagingS3FileUrl, stepType, mainClass, jobEntry);
if (steps.size() > 0) {
runJobFlowRequest.setSteps(steps);
}/* ww w .j av a 2s. c o m*/
if (stepType.equals(STEP_HIVE)) {
List<Application> applications = initApplications();
if (applications.size() > 0) {
runJobFlowRequest.setApplications(applications);
}
List<BootstrapActionConfig> stepBootstrapActions = initBootstrapActions(bootstrapActions);
if (stepBootstrapActions != null && stepBootstrapActions.size() > 0) {
runJobFlowRequest.setBootstrapActions(stepBootstrapActions);
}
}
String ec2Role = jobEntry.getEc2Role();
if (ec2Role == null || ec2Role.trim().isEmpty()) {
runJobFlowRequest.setJobFlowRole(EMR_EC2_DEFAULT_ROLE);
} else {
runJobFlowRequest.setJobFlowRole(ec2Role);
}
String emrRole = jobEntry.getEmrRole();
if (emrRole == null || emrRole.trim().isEmpty()) {
runJobFlowRequest.setServiceRole(EMR_EFAULT_ROLE);
} else {
runJobFlowRequest.setServiceRole(emrRole);
}
runJobFlowRequest.setVisibleToAllUsers(true);
return runJobFlowRequest;
}
From source file:org.pentaho.amazon.emr.job.AmazonElasticMapReduceJobExecutor.java
License:Apache License
public RunJobFlowRequest createJobFlow(String stagingS3BucketUrl, String stagingS3Jar, String mainClass) { List<String> jarStepArgs = new ArrayList<String>(); if (!StringUtil.isEmpty(cmdLineArgs)) { StringTokenizer st = new StringTokenizer(cmdLineArgs, " "); while (st.hasMoreTokens()) { String token = st.nextToken(); logBasic("adding args: " + token); jarStepArgs.add(token);// ww w.j a v a 2s . com } } HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig(); hadoopJarStep.setJar(stagingS3Jar); hadoopJarStep.setMainClass(mainClass); hadoopJarStep.setArgs(jarStepArgs); StepConfig stepConfig = new StepConfig(); stepConfig.setName("custom jar: " + jarUrl); stepConfig.setHadoopJarStep(hadoopJarStep); List<StepConfig> steps = new ArrayList<StepConfig>(); steps.add(stepConfig); String numInstancesS = environmentSubstitute(numInstances); int numInsts = 2; try { numInsts = Integer.parseInt(numInstancesS); } catch (NumberFormatException e) { logError("Unable to parse number of instances to use '" + numInstancesS + "' - " + "using 2 instances..."); } JobFlowInstancesConfig instances = new JobFlowInstancesConfig(); instances.setInstanceCount(numInsts); instances.setMasterInstanceType(getInstanceType(masterInstanceType)); instances.setSlaveInstanceType(getInstanceType(slaveInstanceType)); instances.setHadoopVersion("0.20"); RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest(); runJobFlowRequest.setSteps(steps); runJobFlowRequest.setLogUri(stagingS3BucketUrl); runJobFlowRequest.setName(hadoopJobName); runJobFlowRequest.setInstances(instances); // ScriptBootstrapActionConfig scriptBootstrapAction = new ScriptBootstrapActionConfig(); // scriptBootstrapAction.setPath("s3://mddwordcount/bootstrap.sh"); // List<String> bootstrapArgs = new ArrayList<String>(); // bootstrapArgs.add("http://pdi-node-dist.s3.amazonaws.com"); // // // bootstrapArgs.add( // "http://ci.pentaho.com/view/Data%20Integration/job/Kettle/lastSuccessfulBuild/artifact/Kettle/"); // bootstrapArgs.add("pdi-hadoop-node-TRUNK-SNAPSHOT.zip"); // scriptBootstrapAction.setArgs(bootstrapArgs); // BootstrapActionConfig bootstrapActionConfig = new BootstrapActionConfig(); // bootstrapActionConfig.setName("mdd bootstrap"); // bootstrapActionConfig.setScriptBootstrapAction(scriptBootstrapAction); // List<BootstrapActionConfig> bootstrapActions = new ArrayList<BootstrapActionConfig>(); // bootstrapActions.add(bootstrapActionConfig); // runJobFlowRequest.setBootstrapActions(bootstrapActions); return runJobFlowRequest; }
From source file:org.pentaho.amazon.hive.job.AmazonHiveJobExecutor.java
License:Apache License
/** * Prepare to create a EMR job flow.//from www. j a v a 2 s . c o m * * @return RunJobFlowRequest The object to request an EMR job flow */ public RunJobFlowRequest createJobFlow() { // Create a RunJobFlowRequest object, set a name for the job flow. RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest(); runJobFlowRequest.setName(hadoopJobName); // Set a log URL. String logUrl = stagingDir; if (stagingDir.indexOf("@s3") > 0) { //$NON-NLS-1$ logUrl = S3FileProvider.SCHEME + "://" + stagingDir.substring(stagingDir.indexOf("@s3") + 4); //$NON-NLS-1$ } runJobFlowRequest.setLogUri(logUrl); // Determine the instances for Hadoop cluster. String numInstancesS = environmentSubstitute(numInstances); int numInsts = 2; try { numInsts = Integer.parseInt(numInstancesS); } catch (NumberFormatException e) { logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.InstanceNumber.Error", //$NON-NLS-1$ numInstancesS)); } JobFlowInstancesConfig instances = new JobFlowInstancesConfig(); instances.setInstanceCount(numInsts); instances.setMasterInstanceType(getInstanceType(masterInstanceType)); instances.setSlaveInstanceType(getInstanceType(slaveInstanceType)); instances.setHadoopVersion("0.20"); //$NON-NLS-1$ instances.setKeepJobFlowAliveWhenNoSteps(alive); runJobFlowRequest.setInstances(instances); // Set bootstrap actions. runJobFlowRequest.setBootstrapActions(ConfigBootstrapActions()); // Create an EMR step to setup Hive. String args = "s3://elasticmapreduce/libs/hive/hive-script --base-path s3://elasticmapreduce/libs/hive/ --hive-versions 0.7 --install-hive"; //$NON-NLS-1$ List<StepConfig> steps = ConfigHadoopJarStep("Setup Hive", jarUrl, args); //$NON-NLS-1$ runJobFlowRequest.setSteps(steps); return runJobFlowRequest; }