List of usage examples for com.amazonaws.services.elasticmapreduce.model RunJobFlowRequest setBootstrapActions
public void setBootstrapActions(java.util.Collection<BootstrapActionConfig> bootstrapActions)
A list of bootstrap actions to run before Hadoop starts on the cluster nodes.
From source file:datameer.awstasks.aws.emr.EmrCluster.java
License:Apache License
public synchronized void startup() throws InterruptedException { checkConnection(false);//w w w. j a va 2 s . c o m _clusterState = ClusterState.STARTING; boolean successful = false; try { EmrSettings settings = getSettings(); if (settings.getPrivateKeyName() == null) { throw new NullPointerException( "privateKeyName must not be null please configure settings properly"); } LOG.info("Starting job flow '" + getName() + "' ..."); if (!getRunningJobFlowDetailsByName(getName()).isEmpty()) { throw new IllegalStateException("Job flow with name '" + getName() + "' already running."); } boolean keepAlive = true; JobFlowInstancesConfig jobConfig = new JobFlowInstancesConfig(); jobConfig.setHadoopVersion(_settings.getHadoopVersion()); jobConfig.setMasterInstanceType(settings.getMasterInstanceType().getId()); jobConfig.setSlaveInstanceType(settings.getNodeInstanceType().getId()); jobConfig.setInstanceCount(settings.getInstanceCount()); jobConfig.setEc2KeyName(settings.getPrivateKeyName()); jobConfig.setPlacement(new PlacementType()); jobConfig.setKeepJobFlowAliveWhenNoSteps(keepAlive); final RunJobFlowRequest startRequest = new RunJobFlowRequest(); startRequest.setLogUri("s3n://" + settings.getS3Bucket() + settings.getS3LogPath()); startRequest.setInstances(jobConfig); startRequest.setName(getName()); startRequest.setAdditionalInfo(_settings.getAdditionalStartInfo()); startRequest.setBootstrapActions(_settings.getBootstrapActions()); if (settings.isDebugEnabled()) { startRequest.withSteps(DEBUG_STEP); } RunJobFlowResult startResponse = _emrWebService.runJobFlow(startRequest); _jobFlowId = startResponse.getJobFlowId(); waitUntilClusterStarted(_jobFlowId); LOG.info("elastic cluster '" + getName() + "/" + _jobFlowId + "' started, master-host is " + _masterHost); successful = true; } finally { if (successful) { _clusterState = ClusterState.CONNECTED; } else { _clusterState = ClusterState.UNCONNECTED; _jobFlowId = null; } } }
From source file:org.finra.dm.dao.impl.EmrDaoImpl.java
License:Apache License
/** * Create the run job flow request object. * * @param emrClusterDefinition the EMR definition name value. * @param clusterName the EMR cluster name. * * @return run job flow request for the given configuration. *///from w w w .j a v a 2 s .c o m private RunJobFlowRequest getRunJobFlowRequest(String clusterName, EmrClusterDefinition emrClusterDefinition) { // Create the object RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest(clusterName, getJobFlowInstancesConfig(emrClusterDefinition)); // Set release label if (StringUtils.isNotBlank(emrClusterDefinition.getReleaseLabel())) { runJobFlowRequest.setReleaseLabel(emrClusterDefinition.getReleaseLabel()); } // Set list of Applications List<EmrClusterDefinitionApplication> emrClusterDefinitionApplications = emrClusterDefinition .getApplications(); if (!CollectionUtils.isEmpty(emrClusterDefinitionApplications)) { runJobFlowRequest.setApplications(getApplications(emrClusterDefinitionApplications)); } // Set list of Configurations List<EmrClusterDefinitionConfiguration> emrClusterDefinitionConfigurations = emrClusterDefinition .getConfigurations(); if (!CollectionUtils.isEmpty(emrClusterDefinitionConfigurations)) { runJobFlowRequest.setConfigurations(getConfigurations(emrClusterDefinitionConfigurations)); } // Set the log bucket if specified if (StringUtils.isNotBlank(emrClusterDefinition.getLogBucket())) { runJobFlowRequest.setLogUri(emrClusterDefinition.getLogBucket()); } // Set the visible to all flag if (emrClusterDefinition.isVisibleToAll() != null) { runJobFlowRequest.setVisibleToAllUsers(emrClusterDefinition.isVisibleToAll()); } // Set the IAM profile for the nodes if (StringUtils.isNotBlank(emrClusterDefinition.getEc2NodeIamProfileName())) { runJobFlowRequest.setJobFlowRole(emrClusterDefinition.getEc2NodeIamProfileName()); } else { runJobFlowRequest.setJobFlowRole(dmStringHelper .getRequiredConfigurationValue(ConfigurationValue.EMR_DEFAULT_EC2_NODE_IAM_PROFILE_NAME)); } // Set the IAM profile for the service if (StringUtils.isNotBlank(emrClusterDefinition.getServiceIamRole())) { runJobFlowRequest.setServiceRole(emrClusterDefinition.getServiceIamRole()); } else { runJobFlowRequest.setServiceRole(dmStringHelper .getRequiredConfigurationValue(ConfigurationValue.EMR_DEFAULT_SERVICE_IAM_ROLE_NAME)); } // Set the AMI version if specified if (StringUtils.isNotBlank(emrClusterDefinition.getAmiVersion())) { runJobFlowRequest.setAmiVersion(emrClusterDefinition.getAmiVersion()); } // Set the additionalInfo if specified if (StringUtils.isNotBlank(emrClusterDefinition.getAdditionalInfo())) { runJobFlowRequest.setAdditionalInfo(emrClusterDefinition.getAdditionalInfo()); } // Set the bootstrap actions if (!getBootstrapActionConfigList(emrClusterDefinition).isEmpty()) { runJobFlowRequest.setBootstrapActions(getBootstrapActionConfigList(emrClusterDefinition)); } // Set the app installation steps if (!getStepConfig(emrClusterDefinition).isEmpty()) { runJobFlowRequest.setSteps(getStepConfig(emrClusterDefinition)); } // Set the tags if (!getEmrTags(emrClusterDefinition).isEmpty()) { runJobFlowRequest.setTags(getEmrTags(emrClusterDefinition)); } // Assign supported products as applicable if (StringUtils.isNotBlank(emrClusterDefinition.getSupportedProduct())) { List<String> supportedProducts = new ArrayList<>(); supportedProducts.add(emrClusterDefinition.getSupportedProduct()); runJobFlowRequest.setSupportedProducts(supportedProducts); } // Return the object return runJobFlowRequest; }
From source file:org.finra.herd.dao.impl.EmrDaoImpl.java
License:Apache License
/** * Create the run job flow request object. * * @param emrClusterDefinition the EMR definition name value * @param clusterName the EMR cluster name * * @return the run job flow request for the given configuration *//*from w w w.j a va 2 s . co m*/ private RunJobFlowRequest getRunJobFlowRequest(String clusterName, EmrClusterDefinition emrClusterDefinition) { // Create the object RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest(clusterName, getJobFlowInstancesConfig(emrClusterDefinition)); // Set release label if (StringUtils.isNotBlank(emrClusterDefinition.getReleaseLabel())) { runJobFlowRequest.setReleaseLabel(emrClusterDefinition.getReleaseLabel()); } // Set list of Applications List<EmrClusterDefinitionApplication> emrClusterDefinitionApplications = emrClusterDefinition .getApplications(); if (!CollectionUtils.isEmpty(emrClusterDefinitionApplications)) { runJobFlowRequest.setApplications(getApplications(emrClusterDefinitionApplications)); } // Set list of Configurations List<EmrClusterDefinitionConfiguration> emrClusterDefinitionConfigurations = emrClusterDefinition .getConfigurations(); if (!CollectionUtils.isEmpty(emrClusterDefinitionConfigurations)) { runJobFlowRequest.setConfigurations(getConfigurations(emrClusterDefinitionConfigurations)); } // Set the log bucket if specified if (StringUtils.isNotBlank(emrClusterDefinition.getLogBucket())) { runJobFlowRequest.setLogUri(emrClusterDefinition.getLogBucket()); } // Set the visible to all flag if (emrClusterDefinition.isVisibleToAll() != null) { runJobFlowRequest.setVisibleToAllUsers(emrClusterDefinition.isVisibleToAll()); } // Set the IAM profile for the nodes if (StringUtils.isNotBlank(emrClusterDefinition.getEc2NodeIamProfileName())) { runJobFlowRequest.setJobFlowRole(emrClusterDefinition.getEc2NodeIamProfileName()); } else { runJobFlowRequest.setJobFlowRole(herdStringHelper .getRequiredConfigurationValue(ConfigurationValue.EMR_DEFAULT_EC2_NODE_IAM_PROFILE_NAME)); } // Set the IAM profile for the service if (StringUtils.isNotBlank(emrClusterDefinition.getServiceIamRole())) { runJobFlowRequest.setServiceRole(emrClusterDefinition.getServiceIamRole()); } else { runJobFlowRequest.setServiceRole(herdStringHelper .getRequiredConfigurationValue(ConfigurationValue.EMR_DEFAULT_SERVICE_IAM_ROLE_NAME)); } // Set the AMI version if specified if (StringUtils.isNotBlank(emrClusterDefinition.getAmiVersion())) { runJobFlowRequest.setAmiVersion(emrClusterDefinition.getAmiVersion()); } // Set the additionalInfo if specified if (StringUtils.isNotBlank(emrClusterDefinition.getAdditionalInfo())) { runJobFlowRequest.setAdditionalInfo(emrClusterDefinition.getAdditionalInfo()); } // Set the bootstrap actions List<BootstrapActionConfig> bootstrapActionConfigList = getBootstrapActionConfigList(emrClusterDefinition); if (!bootstrapActionConfigList.isEmpty()) { runJobFlowRequest.setBootstrapActions(bootstrapActionConfigList); } // Set the app installation steps runJobFlowRequest.setSteps(getStepConfig(emrClusterDefinition)); // Set the tags runJobFlowRequest.setTags(getEmrTags(emrClusterDefinition)); // Assign supported products as applicable if (StringUtils.isNotBlank(emrClusterDefinition.getSupportedProduct())) { List<String> supportedProducts = new ArrayList<>(); supportedProducts.add(emrClusterDefinition.getSupportedProduct()); runJobFlowRequest.setSupportedProducts(supportedProducts); } // Assign security configuration. if (StringUtils.isNotBlank(emrClusterDefinition.getSecurityConfiguration())) { runJobFlowRequest.setSecurityConfiguration(emrClusterDefinition.getSecurityConfiguration()); } // Assign scale down behavior. if (StringUtils.isNotBlank(emrClusterDefinition.getScaleDownBehavior())) { runJobFlowRequest.setScaleDownBehavior(emrClusterDefinition.getScaleDownBehavior()); } // Assign Kerberos attributes. runJobFlowRequest .setKerberosAttributes(getKerberosAttributes(emrClusterDefinition.getKerberosAttributes())); // Return the object return runJobFlowRequest; }
From source file:org.pentaho.amazon.client.impl.EmrClientImpl.java
License:Apache License
@VisibleForTesting RunJobFlowRequest initEmrCluster(String stagingS3FileUrl, String stagingS3BucketUrl, String stepType, String mainClass, String bootstrapActions, AbstractAmazonJobEntry jobEntry) { RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest(); runJobFlowRequest.setName(jobEntry.getHadoopJobName()); runJobFlowRequest.setReleaseLabel(jobEntry.getEmrRelease()); runJobFlowRequest.setLogUri(stagingS3BucketUrl); JobFlowInstancesConfig instances = initEC2Instance(Integer.parseInt(jobEntry.getNumInstances()), jobEntry.getMasterInstanceType(), jobEntry.getSlaveInstanceType()); runJobFlowRequest.setInstances(instances); List<StepConfig> steps = initSteps(stagingS3FileUrl, stepType, mainClass, jobEntry); if (steps.size() > 0) { runJobFlowRequest.setSteps(steps); }// w w w . j ava2 s .co m if (stepType.equals(STEP_HIVE)) { List<Application> applications = initApplications(); if (applications.size() > 0) { runJobFlowRequest.setApplications(applications); } List<BootstrapActionConfig> stepBootstrapActions = initBootstrapActions(bootstrapActions); if (stepBootstrapActions != null && stepBootstrapActions.size() > 0) { runJobFlowRequest.setBootstrapActions(stepBootstrapActions); } } String ec2Role = jobEntry.getEc2Role(); if (ec2Role == null || ec2Role.trim().isEmpty()) { runJobFlowRequest.setJobFlowRole(EMR_EC2_DEFAULT_ROLE); } else { runJobFlowRequest.setJobFlowRole(ec2Role); } String emrRole = jobEntry.getEmrRole(); if (emrRole == null || emrRole.trim().isEmpty()) { runJobFlowRequest.setServiceRole(EMR_EFAULT_ROLE); } else { runJobFlowRequest.setServiceRole(emrRole); } runJobFlowRequest.setVisibleToAllUsers(true); return runJobFlowRequest; }
From source file:org.pentaho.amazon.hive.job.AmazonHiveJobExecutor.java
License:Apache License
/** * Prepare to create a EMR job flow./*from www . j a va 2 s .c o m*/ * * @return RunJobFlowRequest The object to request an EMR job flow */ public RunJobFlowRequest createJobFlow() { // Create a RunJobFlowRequest object, set a name for the job flow. RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest(); runJobFlowRequest.setName(hadoopJobName); // Set a log URL. String logUrl = stagingDir; if (stagingDir.indexOf("@s3") > 0) { //$NON-NLS-1$ logUrl = S3FileProvider.SCHEME + "://" + stagingDir.substring(stagingDir.indexOf("@s3") + 4); //$NON-NLS-1$ } runJobFlowRequest.setLogUri(logUrl); // Determine the instances for Hadoop cluster. String numInstancesS = environmentSubstitute(numInstances); int numInsts = 2; try { numInsts = Integer.parseInt(numInstancesS); } catch (NumberFormatException e) { logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.InstanceNumber.Error", //$NON-NLS-1$ numInstancesS)); } JobFlowInstancesConfig instances = new JobFlowInstancesConfig(); instances.setInstanceCount(numInsts); instances.setMasterInstanceType(getInstanceType(masterInstanceType)); instances.setSlaveInstanceType(getInstanceType(slaveInstanceType)); instances.setHadoopVersion("0.20"); //$NON-NLS-1$ instances.setKeepJobFlowAliveWhenNoSteps(alive); runJobFlowRequest.setInstances(instances); // Set bootstrap actions. runJobFlowRequest.setBootstrapActions(ConfigBootstrapActions()); // Create an EMR step to setup Hive. String args = "s3://elasticmapreduce/libs/hive/hive-script --base-path s3://elasticmapreduce/libs/hive/ --hive-versions 0.7 --install-hive"; //$NON-NLS-1$ List<StepConfig> steps = ConfigHadoopJarStep("Setup Hive", jarUrl, args); //$NON-NLS-1$ runJobFlowRequest.setSteps(steps); return runJobFlowRequest; }