List of usage examples for com.amazonaws.services.elasticmapreduce.util StepFactory newEnableDebuggingStep
public HadoopJarStepConfig newEnableDebuggingStep()
From source file:fr.ens.biologie.genomique.eoulsan.util.cloud.AWSElasticMapReduceJob.java
License:LGPL
void init() { requireNonNull(this.AWSAccessKey); requireNonNull(this.AWSAccessKey); requireNonNull(this.jarLocation); requireNonNull(this.jarArguments); requireNonNull(this.slavesInstanceType); requireNonNull(this.hadoopVersion); requireNonNull(this.jobFlowName); if (this.nInstances < 1) { throw new IllegalArgumentException("the number of instance is lower than 1"); }//from w ww. ja va 2 s .c om if (this.masterInstanceType == null) { this.masterInstanceType = this.slavesInstanceType; } // Set the hadoop jar step final HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig().withJar(this.jarLocation.trim()) .withArgs(this.jarArguments); // Set step config final StepConfig stepConfig = new StepConfig().withName(this.jobFlowName + "-step") .withHadoopJarStep(hadoopJarStep).withActionOnFailure("TERMINATE_JOB_FLOW"); // Set the instance final JobFlowInstancesConfig instances = new JobFlowInstancesConfig().withInstanceCount(this.nInstances) .withMasterInstanceType(this.masterInstanceType).withSlaveInstanceType(this.slavesInstanceType) .withHadoopVersion(this.hadoopVersion); // Configure hadoop final ScriptBootstrapActionConfig scriptBootstrapAction = new ScriptBootstrapActionConfig() .withPath("s3n://eu-west-1.elasticmapreduce/bootstrap-actions/configure-hadoop") .withArgs("--site-key-value", "mapreduce.tasktracker.map.tasks.maximum=" + this.taskTrackerMaxMapTasks); final BootstrapActionConfig bootstrapActions = new BootstrapActionConfig().withName("Configure hadoop") .withScriptBootstrapAction(scriptBootstrapAction); // Enable debugging StepFactory stepFactory = new StepFactory(); StepConfig enableDebugging = new StepConfig().withName("Enable Debugging") .withActionOnFailure("TERMINATE_JOB_FLOW").withHadoopJarStep(stepFactory.newEnableDebuggingStep()); // Run flow this.runFlowRequest = new RunJobFlowRequest().withName(this.jobFlowName); // Enable or not debugging if (this.enableDebugging) { this.runFlowRequest.withInstances(instances).withSteps(enableDebugging, stepConfig); } else { this.runFlowRequest.withInstances(instances).withSteps(stepConfig); } // Limit the number of task in a task tracker if (this.taskTrackerMaxMapTasks > 0) { this.runFlowRequest.withBootstrapActions(bootstrapActions); } if (this.logPathname != null && !"".equals(this.logPathname)) { this.runFlowRequest.withLogUri(this.logPathname); } // Set EC2 Key name if (this.ec2KeyName != null) { this.runFlowRequest.getInstances().setEc2KeyName(this.ec2KeyName); } }
From source file:org.deeplearning4j.legacyExamples.EmrSparkExample.java
License:Apache License
public void entryPoint(String[] args) { JCommander jcmdr = new JCommander(this); try {//from w w w. java 2s . com jcmdr.parse(args); } catch (ParameterException e) { jcmdr.usage(); try { Thread.sleep(500); } catch (Exception e2) { } throw e; } AmazonElasticMapReduceClientBuilder builder = AmazonElasticMapReduceClientBuilder.standard(); builder.withRegion(region); builder.withCredentials(getCredentialsProvider()); AmazonElasticMapReduce emr = builder.build(); List<StepConfig> steps = new ArrayList<>(); if (upload) { log.info("uploading uber jar"); AmazonS3ClientBuilder s3builder = AmazonS3ClientBuilder.standard(); s3builder.withRegion(region); s3builder.withCredentials(getCredentialsProvider()); AmazonS3 s3Client = s3builder.build(); if (!s3Client.doesBucketExist(bucketName)) { s3Client.createBucket(bucketName); } File uberJarFile = new File(uberJar); s3Client.putObject(new PutObjectRequest(bucketName, uberJarFile.getName(), uberJarFile)); } if (debug) { log.info("enable debug"); StepFactory stepFactory = new StepFactory(builder.getRegion() + ".elasticmapreduce"); StepConfig enableDebugging = new StepConfig().withName("Enable Debugging") .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW) .withHadoopJarStep(stepFactory.newEnableDebuggingStep()); steps.add(enableDebugging); } if (execute) { log.info("execute spark step"); HadoopJarStepConfig sparkStepConf = new HadoopJarStepConfig(); sparkStepConf.withJar("command-runner.jar"); sparkStepConf.withArgs("spark-submit", "--deploy-mode", "cluster", "--class", className, getS3UberJarUrl(), "-useSparkLocal", "false"); ActionOnFailure action = ActionOnFailure.TERMINATE_JOB_FLOW; if (keepAlive) { action = ActionOnFailure.CONTINUE; } StepConfig sparkStep = new StepConfig().withName("Spark Step").withActionOnFailure(action) .withHadoopJarStep(sparkStepConf); steps.add(sparkStep); } log.info("create spark cluster"); Application sparkApp = new Application().withName("Spark"); // service and job flow role will be created automatically when // launching cluster in aws console, better do that first or create // manually RunJobFlowRequest request = new RunJobFlowRequest().withName("Spark Cluster").withSteps(steps) .withServiceRole("EMR_DefaultRole").withJobFlowRole("EMR_EC2_DefaultRole") .withApplications(sparkApp).withReleaseLabel(emrVersion).withLogUri(getS3BucketLogsUrl()) .withInstances(new JobFlowInstancesConfig().withEc2KeyName("spark").withInstanceCount(instanceCount) .withKeepJobFlowAliveWhenNoSteps(keepAlive).withMasterInstanceType(instanceType) .withSlaveInstanceType(instanceType)); RunJobFlowResult result = emr.runJobFlow(request); log.info(result.toString()); log.info("done"); }
From source file:rollsPOC2.util.AWSHelper.java
public static String createOrFindEMRHiveCluster(String clusterName, boolean createWithKeepAlive) throws Exception { String clusterId = null;//from w ww. j av a2 s .c om AmazonElasticMapReduce emr = AppServices.getEMRClient(); ClusterSummary clusterSummary = findCluster("Treebeard", emr); if (clusterSummary != null) { clusterId = clusterSummary.getId(); System.err.printf("Cluster found with id %s, status %s\n", clusterId, clusterSummary.getStatus().getState()); } if (clusterSummary != null && clusterSummary.getStatus().getState().startsWith("TERMINAT")) { while (findCluster("Treebeard", emr).getStatus().getState().equals("TERMINATING")) { System.out.println("Waiting for previous cluster to terminate"); Thread.sleep(10000l); } System.out.println("Starting cluster..."); StepFactory stepFactory = new StepFactory(); StepConfig enabledebugging = new StepConfig().withName("Enable debugging") .withActionOnFailure("TERMINATE_JOB_FLOW") .withHadoopJarStep(stepFactory.newEnableDebuggingStep()); // Possibly redundant with ".withApplications(new Application().withName("Hive"))" // StepConfig installHive = new StepConfig() // .withName("Install Hive") // .withActionOnFailure("TERMINATE_JOB_FLOW") // .withHadoopJarStep(stepFactory.newInstallHiveStep()); RunJobFlowRequest request = new RunJobFlowRequest().withName("Treebeard").withReleaseLabel("emr-4.6.0") .withApplications(new Application().withName("Hive")).withSteps(enabledebugging) .withVisibleToAllUsers(true) .withLogUri("s3://aws-logs-800327301943-us-east-1/elasticmapreduce/") .withServiceRole("EMR_DefaultRole").withJobFlowRole("EMR_EC2_DefaultRole") .withInstances(new JobFlowInstancesConfig().withEc2KeyName("bjss").withInstanceCount(2) .withMasterInstanceType("m3.xlarge").withSlaveInstanceType("m1.large") .withKeepJobFlowAliveWhenNoSteps(createWithKeepAlive)); RunJobFlowResult createClusterResult = emr.runJobFlow(request); clusterId = createClusterResult.getJobFlowId(); System.out.printf("Started cluster with id %s\n", clusterId); } return clusterId; }