List of usage examples for com.amazonaws.services.elasticmapreduce.util StepFactory StepFactory
public StepFactory(String bucket)
From source file:org.deeplearning4j.legacyExamples.EmrSparkExample.java
License:Apache License
public void entryPoint(String[] args) { JCommander jcmdr = new JCommander(this); try {//from w w w. j a v a2s. co m jcmdr.parse(args); } catch (ParameterException e) { jcmdr.usage(); try { Thread.sleep(500); } catch (Exception e2) { } throw e; } AmazonElasticMapReduceClientBuilder builder = AmazonElasticMapReduceClientBuilder.standard(); builder.withRegion(region); builder.withCredentials(getCredentialsProvider()); AmazonElasticMapReduce emr = builder.build(); List<StepConfig> steps = new ArrayList<>(); if (upload) { log.info("uploading uber jar"); AmazonS3ClientBuilder s3builder = AmazonS3ClientBuilder.standard(); s3builder.withRegion(region); s3builder.withCredentials(getCredentialsProvider()); AmazonS3 s3Client = s3builder.build(); if (!s3Client.doesBucketExist(bucketName)) { s3Client.createBucket(bucketName); } File uberJarFile = new File(uberJar); s3Client.putObject(new PutObjectRequest(bucketName, uberJarFile.getName(), uberJarFile)); } if (debug) { log.info("enable debug"); StepFactory stepFactory = new StepFactory(builder.getRegion() + ".elasticmapreduce"); StepConfig enableDebugging = new StepConfig().withName("Enable Debugging") .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW) .withHadoopJarStep(stepFactory.newEnableDebuggingStep()); steps.add(enableDebugging); } if (execute) { log.info("execute spark step"); HadoopJarStepConfig sparkStepConf = new HadoopJarStepConfig(); sparkStepConf.withJar("command-runner.jar"); sparkStepConf.withArgs("spark-submit", "--deploy-mode", "cluster", "--class", className, getS3UberJarUrl(), "-useSparkLocal", "false"); ActionOnFailure action = ActionOnFailure.TERMINATE_JOB_FLOW; if (keepAlive) { action = ActionOnFailure.CONTINUE; } StepConfig sparkStep = new StepConfig().withName("Spark Step").withActionOnFailure(action) .withHadoopJarStep(sparkStepConf); steps.add(sparkStep); } log.info("create spark cluster"); Application sparkApp = new Application().withName("Spark"); // service and job flow role will be created automatically when // launching cluster in aws console, better do that first or create // manually RunJobFlowRequest request = new RunJobFlowRequest().withName("Spark Cluster").withSteps(steps) .withServiceRole("EMR_DefaultRole").withJobFlowRole("EMR_EC2_DefaultRole") .withApplications(sparkApp).withReleaseLabel(emrVersion).withLogUri(getS3BucketLogsUrl()) .withInstances(new JobFlowInstancesConfig().withEc2KeyName("spark").withInstanceCount(instanceCount) .withKeepJobFlowAliveWhenNoSteps(keepAlive).withMasterInstanceType(instanceType) .withSlaveInstanceType(instanceType)); RunJobFlowResult result = emr.runJobFlow(request); log.info(result.toString()); log.info("done"); }