List of usage examples for com.amazonaws.services.elasticmapreduce.model JobFlowInstancesConfig JobFlowInstancesConfig
JobFlowInstancesConfig
From source file:awswc.AwsConsoleApp.java
License:Open Source License
static void runJobFlow() throws InterruptedException { // Configure instances to use JobFlowInstancesConfig instances = new JobFlowInstancesConfig(); //********************************************************************// instances.setHadoopVersion(HADOOP_VERSION); instances.withEc2KeyName("ayuda-vp1"); instances.setInstanceCount(MASTER_INSTANCE_COUNT); //instances.setInstanceGroups(instanceGroups) instances.setMasterInstanceType(InstanceType.M24xlarge.toString()); instances.setSlaveInstanceType(InstanceType.M24xlarge.toString()); //********************************************************************// HadoopJarStepConfig hadoopJarStep1 = new HadoopJarStepConfig().withJar(S3N_WORD_COUNT_JAR_) // This should be a full map reduce application. .withArgs(BUCKET_NAME + "inWC", BUCKET_NAME + "outWC"); StepConfig stepConfig1 = new StepConfig().withName("wordcount").withHadoopJarStep(hadoopJarStep1) .withActionOnFailure("TERMINATE_JOB_FLOW"); //********************************************************************// //********************************************************************// HadoopJarStepConfig hadoopJarStep2 = new HadoopJarStepConfig().withJar(S3N_MAX_WORD_COUNT_JAR) // This should be a full map reduce application. .withArgs(BUCKET_NAME + "outWC", BUCKET_NAME + "outXWC", "hate", "10"); StepConfig stepConfig2 = new StepConfig().withName("maxwordcount").withHadoopJarStep(hadoopJarStep2) .withActionOnFailure("TERMINATE_JOB_FLOW"); //********************************************************************// Collection<StepConfig> csc = new ArrayList<StepConfig>(); csc.add(stepConfig1);// w w w . j a v a 2 s. c o m csc.add(stepConfig2); // BootstrapActions bootstrapActions = new BootstrapActions(); RunJobFlowRequest runFlowRequest = new RunJobFlowRequest().withName(FLOW_NAME).withInstances(instances) .withSteps(csc).withLogUri(BUCKET_NAME + "debug") /*.withBootstrapActions( bootstrapActions.newRunIf( "instance.isMaster=true", bootstrapActions.newConfigureDaemons() .withHeapSize(Daemon.JobTracker, 4096) .build()), bootstrapActions.newRunIf( "instance.isRunningNameNode=true", bootstrapActions.newConfigureDaemons() .withHeapSize(Daemon.NameNode, 4096).build()), bootstrapActions.newRunIf( "instance.isRunningDataNode=true", bootstrapActions.newConfigureDaemons() .withHeapSize(Daemon.DataNode, 4096).build()), bootstrapActions.newRunIf( "instance.isRunningJobTracker=true", bootstrapActions.newConfigureDaemons() .withHeapSize(Daemon.JobTracker, 4096).build()), bootstrapActions.newRunIf( "instance.isRunningTaskTracker=true", bootstrapActions.newConfigureDaemons() .withHeapSize(Daemon.TaskTracker, 4096).build()) /*, bootstrapActions.newRunIf( "instance.isSlave=true", bootstrapActions.newConfigureHadoop() .withKeyValue(ConfigFile.Site,"mapred.tasktracker.map.tasks.maximum", "4")) )*/; RunJobFlowResult runJobFlowResult = emr.runJobFlow(runFlowRequest); String jobFlowId = runJobFlowResult.getJobFlowId(); System.out.println("Ran job flow with id: " + jobFlowId); //wasFinished(runJobFlowResult); }
From source file:com.aegeus.aws.ElasticMapReduceService.java
License:Apache License
/** * Create a new EMR Cluster over Hadoop 2.4.0 *//*from w ww . j a va2 s .c o m*/ public void createCluster() { JobFlowInstancesConfig instances = new JobFlowInstancesConfig() .withInstanceCount((int) config.getInstanceCount()).withMasterInstanceType(config.getMasterType()) .withSlaveInstanceType(config.getSlaveType()); if (Strings.isNullOrEmpty(config.getKeyName())) { instances.setEc2KeyName(config.getKeyName()); } if (!Strings.isNullOrEmpty(config.getSubnetId())) { instances.setEc2SubnetId(config.getSubnetId()); } else { instances.setPlacement(new PlacementType(config.getPlace())); } ScriptBootstrapActionConfig installEsConfig = new ScriptBootstrapActionConfig() .withPath("s3://support.elasticmapreduce/bootstrap-actions/other/elasticsearch_install.rb"); BootstrapActionConfig installEs = new BootstrapActionConfig("Elasticsearch Install", installEsConfig); RunJobFlowRequest request = new RunJobFlowRequest().withName(config.getName()).withReleaseLabel("emr-4.1.0") .withServiceRole("Default_AWS_Role").withJobFlowRole("Default_AWS_Role") .withBootstrapActions(installEs).withInstances(instances); if (!Strings.isNullOrEmpty(config.getLogBucket())) { request.setLogUri(config.getLogBucket()); } RunJobFlowResult result = emr.runJobFlow(request); clusterId = result.getJobFlowId(); }
From source file:com.clouddrive.parth.NewClass.java
private static JobFlowInstancesConfig configInstance() throws Exception { // Configure instances to use JobFlowInstancesConfig instance = new JobFlowInstancesConfig(); instance.setHadoopVersion(HADOOP_VERSION); instance.setInstanceCount(INSTANCE_COUNT); instance.setMasterInstanceType(INSTANCE_TYPE); instance.setSlaveInstanceType(INSTANCE_TYPE); //instance.setKeepJobFlowAliveWhenNoSteps(true); //instance.setEc2KeyName("parth"); return instance; }
From source file:datameer.awstasks.aws.emr.EmrCluster.java
License:Apache License
public synchronized void startup() throws InterruptedException { checkConnection(false);/* www . jav a 2 s. co m*/ _clusterState = ClusterState.STARTING; boolean successful = false; try { EmrSettings settings = getSettings(); if (settings.getPrivateKeyName() == null) { throw new NullPointerException( "privateKeyName must not be null please configure settings properly"); } LOG.info("Starting job flow '" + getName() + "' ..."); if (!getRunningJobFlowDetailsByName(getName()).isEmpty()) { throw new IllegalStateException("Job flow with name '" + getName() + "' already running."); } boolean keepAlive = true; JobFlowInstancesConfig jobConfig = new JobFlowInstancesConfig(); jobConfig.setHadoopVersion(_settings.getHadoopVersion()); jobConfig.setMasterInstanceType(settings.getMasterInstanceType().getId()); jobConfig.setSlaveInstanceType(settings.getNodeInstanceType().getId()); jobConfig.setInstanceCount(settings.getInstanceCount()); jobConfig.setEc2KeyName(settings.getPrivateKeyName()); jobConfig.setPlacement(new PlacementType()); jobConfig.setKeepJobFlowAliveWhenNoSteps(keepAlive); final RunJobFlowRequest startRequest = new RunJobFlowRequest(); startRequest.setLogUri("s3n://" + settings.getS3Bucket() + settings.getS3LogPath()); startRequest.setInstances(jobConfig); startRequest.setName(getName()); startRequest.setAdditionalInfo(_settings.getAdditionalStartInfo()); startRequest.setBootstrapActions(_settings.getBootstrapActions()); if (settings.isDebugEnabled()) { startRequest.withSteps(DEBUG_STEP); } RunJobFlowResult startResponse = _emrWebService.runJobFlow(startRequest); _jobFlowId = startResponse.getJobFlowId(); waitUntilClusterStarted(_jobFlowId); LOG.info("elastic cluster '" + getName() + "/" + _jobFlowId + "' started, master-host is " + _masterHost); successful = true; } finally { if (successful) { _clusterState = ClusterState.CONNECTED; } else { _clusterState = ClusterState.UNCONNECTED; _jobFlowId = null; } } }
From source file:fr.ens.biologie.genomique.eoulsan.util.cloud.AWSElasticMapReduceJob.java
License:LGPL
void init() { requireNonNull(this.AWSAccessKey); requireNonNull(this.AWSAccessKey); requireNonNull(this.jarLocation); requireNonNull(this.jarArguments); requireNonNull(this.slavesInstanceType); requireNonNull(this.hadoopVersion); requireNonNull(this.jobFlowName); if (this.nInstances < 1) { throw new IllegalArgumentException("the number of instance is lower than 1"); }// w ww .j a v a2 s.co m if (this.masterInstanceType == null) { this.masterInstanceType = this.slavesInstanceType; } // Set the hadoop jar step final HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig().withJar(this.jarLocation.trim()) .withArgs(this.jarArguments); // Set step config final StepConfig stepConfig = new StepConfig().withName(this.jobFlowName + "-step") .withHadoopJarStep(hadoopJarStep).withActionOnFailure("TERMINATE_JOB_FLOW"); // Set the instance final JobFlowInstancesConfig instances = new JobFlowInstancesConfig().withInstanceCount(this.nInstances) .withMasterInstanceType(this.masterInstanceType).withSlaveInstanceType(this.slavesInstanceType) .withHadoopVersion(this.hadoopVersion); // Configure hadoop final ScriptBootstrapActionConfig scriptBootstrapAction = new ScriptBootstrapActionConfig() .withPath("s3n://eu-west-1.elasticmapreduce/bootstrap-actions/configure-hadoop") .withArgs("--site-key-value", "mapreduce.tasktracker.map.tasks.maximum=" + this.taskTrackerMaxMapTasks); final BootstrapActionConfig bootstrapActions = new BootstrapActionConfig().withName("Configure hadoop") .withScriptBootstrapAction(scriptBootstrapAction); // Enable debugging StepFactory stepFactory = new StepFactory(); StepConfig enableDebugging = new StepConfig().withName("Enable Debugging") .withActionOnFailure("TERMINATE_JOB_FLOW").withHadoopJarStep(stepFactory.newEnableDebuggingStep()); // Run flow this.runFlowRequest = new RunJobFlowRequest().withName(this.jobFlowName); // Enable or not debugging if (this.enableDebugging) { this.runFlowRequest.withInstances(instances).withSteps(enableDebugging, stepConfig); } else { this.runFlowRequest.withInstances(instances).withSteps(stepConfig); } // Limit the number of task in a task tracker if (this.taskTrackerMaxMapTasks > 0) { this.runFlowRequest.withBootstrapActions(bootstrapActions); } if (this.logPathname != null && !"".equals(this.logPathname)) { this.runFlowRequest.withLogUri(this.logPathname); } // Set EC2 Key name if (this.ec2KeyName != null) { this.runFlowRequest.getInstances().setEc2KeyName(this.ec2KeyName); } }
From source file:org.deeplearning4j.legacyExamples.EmrSparkExample.java
License:Apache License
public void entryPoint(String[] args) { JCommander jcmdr = new JCommander(this); try {// w ww .j a va 2 s. com jcmdr.parse(args); } catch (ParameterException e) { jcmdr.usage(); try { Thread.sleep(500); } catch (Exception e2) { } throw e; } AmazonElasticMapReduceClientBuilder builder = AmazonElasticMapReduceClientBuilder.standard(); builder.withRegion(region); builder.withCredentials(getCredentialsProvider()); AmazonElasticMapReduce emr = builder.build(); List<StepConfig> steps = new ArrayList<>(); if (upload) { log.info("uploading uber jar"); AmazonS3ClientBuilder s3builder = AmazonS3ClientBuilder.standard(); s3builder.withRegion(region); s3builder.withCredentials(getCredentialsProvider()); AmazonS3 s3Client = s3builder.build(); if (!s3Client.doesBucketExist(bucketName)) { s3Client.createBucket(bucketName); } File uberJarFile = new File(uberJar); s3Client.putObject(new PutObjectRequest(bucketName, uberJarFile.getName(), uberJarFile)); } if (debug) { log.info("enable debug"); StepFactory stepFactory = new StepFactory(builder.getRegion() + ".elasticmapreduce"); StepConfig enableDebugging = new StepConfig().withName("Enable Debugging") .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW) .withHadoopJarStep(stepFactory.newEnableDebuggingStep()); steps.add(enableDebugging); } if (execute) { log.info("execute spark step"); HadoopJarStepConfig sparkStepConf = new HadoopJarStepConfig(); sparkStepConf.withJar("command-runner.jar"); sparkStepConf.withArgs("spark-submit", "--deploy-mode", "cluster", "--class", className, getS3UberJarUrl(), "-useSparkLocal", "false"); ActionOnFailure action = ActionOnFailure.TERMINATE_JOB_FLOW; if (keepAlive) { action = ActionOnFailure.CONTINUE; } StepConfig sparkStep = new StepConfig().withName("Spark Step").withActionOnFailure(action) .withHadoopJarStep(sparkStepConf); steps.add(sparkStep); } log.info("create spark cluster"); Application sparkApp = new Application().withName("Spark"); // service and job flow role will be created automatically when // launching cluster in aws console, better do that first or create // manually RunJobFlowRequest request = new RunJobFlowRequest().withName("Spark Cluster").withSteps(steps) .withServiceRole("EMR_DefaultRole").withJobFlowRole("EMR_EC2_DefaultRole") .withApplications(sparkApp).withReleaseLabel(emrVersion).withLogUri(getS3BucketLogsUrl()) .withInstances(new JobFlowInstancesConfig().withEc2KeyName("spark").withInstanceCount(instanceCount) .withKeepJobFlowAliveWhenNoSteps(keepAlive).withMasterInstanceType(instanceType) .withSlaveInstanceType(instanceType)); RunJobFlowResult result = emr.runJobFlow(request); log.info(result.toString()); log.info("done"); }
From source file:org.finra.dm.dao.impl.EmrDaoImpl.java
License:Apache License
/** * Create the job flow instance configuration which contains all the job flow configuration details. * * @param emrClusterDefinition the EMR cluster definition that contains all the EMR parameters. * * @return the job flow instance configuration. *///from ww w. j av a 2s. c om private JobFlowInstancesConfig getJobFlowInstancesConfig(EmrClusterDefinition emrClusterDefinition) { // Create a new job flow instance config object JobFlowInstancesConfig jobFlowInstancesConfig = new JobFlowInstancesConfig(); // Add the DM EMR support security group as additional group to master node. String additionalSecurityGroup = configurationHelper .getProperty(ConfigurationValue.EMR_DM_SUPPORT_SECURITY_GROUP); if (StringUtils.isNotBlank(additionalSecurityGroup)) { List<String> additionalSecurityGroups = new ArrayList<>(); additionalSecurityGroups.add(additionalSecurityGroup); jobFlowInstancesConfig.setAdditionalMasterSecurityGroups(additionalSecurityGroups); } // Fill-in the ssh key if (StringUtils.isNotBlank(emrClusterDefinition.getSshKeyPairName())) { jobFlowInstancesConfig.setEc2KeyName(emrClusterDefinition.getSshKeyPairName()); } // Fill-in subnet id if (StringUtils.isNotBlank(emrClusterDefinition.getSubnetId())) { jobFlowInstancesConfig.setEc2SubnetId(emrClusterDefinition.getSubnetId()); } // Fill in instance groups jobFlowInstancesConfig.setInstanceGroups(getInstanceGroupConfig(emrClusterDefinition)); // Check for optional parameters and then fill-in // Keep Alive Cluster flag if (emrClusterDefinition.isKeepAlive() != null) { jobFlowInstancesConfig.setKeepJobFlowAliveWhenNoSteps(emrClusterDefinition.isKeepAlive()); } // Termination protection flag if (emrClusterDefinition.isTerminationProtection() != null) { jobFlowInstancesConfig.setTerminationProtected(emrClusterDefinition.isTerminationProtection()); } // Setting the hadoop version if (StringUtils.isNotBlank(emrClusterDefinition.getHadoopVersion())) { jobFlowInstancesConfig.setHadoopVersion(emrClusterDefinition.getHadoopVersion()); } // Return the object return jobFlowInstancesConfig; }
From source file:org.finra.herd.dao.impl.EmrDaoImpl.java
License:Apache License
/** * Creates the job flow instance configuration containing specification of the number and type of Amazon EC2 instances. * * @param emrClusterDefinition the EMR cluster definition that contains all the EMR parameters * * @return the job flow instance configuration *///w w w .j ava 2 s . co m private JobFlowInstancesConfig getJobFlowInstancesConfig(EmrClusterDefinition emrClusterDefinition) { // Create a new job flow instances configuration object. JobFlowInstancesConfig jobFlowInstancesConfig = new JobFlowInstancesConfig(); // Set up master/slave security group jobFlowInstancesConfig.setEmrManagedMasterSecurityGroup(emrClusterDefinition.getMasterSecurityGroup()); jobFlowInstancesConfig.setEmrManagedSlaveSecurityGroup(emrClusterDefinition.getSlaveSecurityGroup()); // Add additional security groups to master nodes. jobFlowInstancesConfig .setAdditionalMasterSecurityGroups(emrClusterDefinition.getAdditionalMasterSecurityGroups()); // Add additional security groups to slave nodes. jobFlowInstancesConfig .setAdditionalSlaveSecurityGroups(emrClusterDefinition.getAdditionalSlaveSecurityGroups()); // Fill-in the ssh key. if (StringUtils.isNotBlank(emrClusterDefinition.getSshKeyPairName())) { jobFlowInstancesConfig.setEc2KeyName(emrClusterDefinition.getSshKeyPairName()); } // Fill in configuration for the instance groups in a cluster. jobFlowInstancesConfig .setInstanceGroups(getInstanceGroupConfigs(emrClusterDefinition.getInstanceDefinitions())); // Fill in instance fleet configuration. jobFlowInstancesConfig.setInstanceFleets(getInstanceFleets(emrClusterDefinition.getInstanceFleets())); // Fill-in subnet id. if (StringUtils.isNotBlank(emrClusterDefinition.getSubnetId())) { // Use collection of subnet IDs when instance fleet configuration is specified. Otherwise, we expect a single EC2 subnet ID to be passed here. if (CollectionUtils.isNotEmpty(jobFlowInstancesConfig.getInstanceFleets())) { jobFlowInstancesConfig .setEc2SubnetIds(herdStringHelper.splitAndTrim(emrClusterDefinition.getSubnetId(), ",")); } else { jobFlowInstancesConfig.setEc2SubnetId(emrClusterDefinition.getSubnetId()); } } // Fill in optional keep alive flag. if (emrClusterDefinition.isKeepAlive() != null) { jobFlowInstancesConfig.setKeepJobFlowAliveWhenNoSteps(emrClusterDefinition.isKeepAlive()); } // Fill in optional termination protection flag. if (emrClusterDefinition.isTerminationProtection() != null) { jobFlowInstancesConfig.setTerminationProtected(emrClusterDefinition.isTerminationProtection()); } // Fill in optional Hadoop version flag. if (StringUtils.isNotBlank(emrClusterDefinition.getHadoopVersion())) { jobFlowInstancesConfig.setHadoopVersion(emrClusterDefinition.getHadoopVersion()); } // Return the object. return jobFlowInstancesConfig; }
From source file:org.huahinframework.emanager.amazonaws.elasticmapreduce.ElasticMapReduceManager.java
License:Apache License
/** * @return JobFlowInstancesConfig// w w w . j a va2s . c o m */ private JobFlowInstancesConfig setupJobFlowInstancesConfig() { JobFlowInstancesConfig config = new JobFlowInstancesConfig().withKeepJobFlowAliveWhenNoSteps(true) .withInstanceCount(emrProperties.getInstanceCount()) .withMasterInstanceType(emrProperties.getMasterInstanceType()); if (!isEmpty(emrProperties.getKeyPairName())) { config.setEc2KeyName(emrProperties.getKeyPairName()); } if (!isEmpty(emrProperties.getHadoopVersion())) { config.setHadoopVersion(emrProperties.getHadoopVersion()); } if (!isEmpty(emrProperties.getAvailabilityZone())) { config.setPlacement(new PlacementType().withAvailabilityZone(emrProperties.getAvailabilityZone())); } if (!isEmpty(emrProperties.getSlaveInstanceType())) { config.setSlaveInstanceType(emrProperties.getSlaveInstanceType()); } else { config.setSlaveInstanceType(emrProperties.getMasterInstanceType()); } return config; }
From source file:org.pentaho.amazon.client.impl.EmrClientImpl.java
License:Apache License
private JobFlowInstancesConfig initEC2Instance(Integer numInsts, String masterInstanceType, String slaveInstanceType) { JobFlowInstancesConfig instances = new JobFlowInstancesConfig(); instances.setInstanceCount(numInsts); instances.setMasterInstanceType(masterInstanceType); instances.setSlaveInstanceType(slaveInstanceType); instances.setKeepJobFlowAliveWhenNoSteps(alive); return instances; }