List of usage examples for com.amazonaws.services.elasticmapreduce.util StepFactory StepFactory
public StepFactory()
From source file:com.clouddrive.parth.NewClass.java
public static String runCluster() throws Exception { long start = System.currentTimeMillis(); String temp = ""; // Configure the job flow //RunJobFlowRequest request = new RunJobFlowRequest().withName("parth"); // if (request == null) { RunJobFlowRequest request = new RunJobFlowRequest(FLOW_NAME, configInstance()); request.setLogUri(S3N_LOG_URI); // }// w w w . j a v a2 s .com // Configure the Hadoop jar to use HadoopJarStepConfig jarConfig = new HadoopJarStepConfig(S3N_HADOOP_JAR); jarConfig.setArgs(ARGS_AS_LIST); try { StepConfig enableDebugging = new StepConfig().withName("Enable debugging") .withActionOnFailure("TERMINATE_JOB_FLOW") .withHadoopJarStep(new StepFactory().newEnableDebuggingStep()); StepConfig runJar = new StepConfig(S3N_HADOOP_JAR.substring(S3N_HADOOP_JAR.indexOf('/') + 1), jarConfig); request.setSteps(Arrays.asList(new StepConfig[] { enableDebugging, runJar })); // Run the job flow RunJobFlowResult result = emr.runJobFlow(request); // Check the status of the running job String lastState = ""; STATUS_LOOP: while (true) { DescribeJobFlowsRequest desc = new DescribeJobFlowsRequest( Arrays.asList(new String[] { result.getJobFlowId() })); DescribeJobFlowsResult descResult = emr.describeJobFlows(desc); for (JobFlowDetail detail : descResult.getJobFlows()) { String state = detail.getExecutionStatusDetail().getState(); if (isDone(state)) { System.out.println("Job " + state + ": " + detail.toString()); break STATUS_LOOP; } else if (!lastState.equals(state)) { lastState = state; System.out.println("Job " + state + " at " + new Date().toString()); } } Thread.sleep(10000); } temp = FLOW_NAME; long end = System.currentTimeMillis(); System.out.println("Computation " + (end - start)); } catch (AmazonServiceException ase) { System.out.println("Caught Exception: " + ase.getMessage()); System.out.println("Reponse Status Code: " + ase.getStatusCode()); System.out.println("Error Code: " + ase.getErrorCode()); System.out.println("Request ID: " + ase.getRequestId()); } return temp; }
From source file:fr.ens.biologie.genomique.eoulsan.util.cloud.AWSElasticMapReduceJob.java
License:LGPL
void init() { requireNonNull(this.AWSAccessKey); requireNonNull(this.AWSAccessKey); requireNonNull(this.jarLocation); requireNonNull(this.jarArguments); requireNonNull(this.slavesInstanceType); requireNonNull(this.hadoopVersion); requireNonNull(this.jobFlowName); if (this.nInstances < 1) { throw new IllegalArgumentException("the number of instance is lower than 1"); }/*w w w . ja va2 s. c o m*/ if (this.masterInstanceType == null) { this.masterInstanceType = this.slavesInstanceType; } // Set the hadoop jar step final HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig().withJar(this.jarLocation.trim()) .withArgs(this.jarArguments); // Set step config final StepConfig stepConfig = new StepConfig().withName(this.jobFlowName + "-step") .withHadoopJarStep(hadoopJarStep).withActionOnFailure("TERMINATE_JOB_FLOW"); // Set the instance final JobFlowInstancesConfig instances = new JobFlowInstancesConfig().withInstanceCount(this.nInstances) .withMasterInstanceType(this.masterInstanceType).withSlaveInstanceType(this.slavesInstanceType) .withHadoopVersion(this.hadoopVersion); // Configure hadoop final ScriptBootstrapActionConfig scriptBootstrapAction = new ScriptBootstrapActionConfig() .withPath("s3n://eu-west-1.elasticmapreduce/bootstrap-actions/configure-hadoop") .withArgs("--site-key-value", "mapreduce.tasktracker.map.tasks.maximum=" + this.taskTrackerMaxMapTasks); final BootstrapActionConfig bootstrapActions = new BootstrapActionConfig().withName("Configure hadoop") .withScriptBootstrapAction(scriptBootstrapAction); // Enable debugging StepFactory stepFactory = new StepFactory(); StepConfig enableDebugging = new StepConfig().withName("Enable Debugging") .withActionOnFailure("TERMINATE_JOB_FLOW").withHadoopJarStep(stepFactory.newEnableDebuggingStep()); // Run flow this.runFlowRequest = new RunJobFlowRequest().withName(this.jobFlowName); // Enable or not debugging if (this.enableDebugging) { this.runFlowRequest.withInstances(instances).withSteps(enableDebugging, stepConfig); } else { this.runFlowRequest.withInstances(instances).withSteps(stepConfig); } // Limit the number of task in a task tracker if (this.taskTrackerMaxMapTasks > 0) { this.runFlowRequest.withBootstrapActions(bootstrapActions); } if (this.logPathname != null && !"".equals(this.logPathname)) { this.runFlowRequest.withLogUri(this.logPathname); } // Set EC2 Key name if (this.ec2KeyName != null) { this.runFlowRequest.getInstances().setEc2KeyName(this.ec2KeyName); } }
From source file:org.finra.dm.dao.impl.EmrDaoImpl.java
License:Apache License
/** * Create the step config list of objects for hive/pig installation. * * @param emrClusterDefinition the EMR definition name value. * * @return list of step configuration that contains all the steps for the given configuration. *//*from w w w . j a va2 s . co m*/ private List<StepConfig> getStepConfig(EmrClusterDefinition emrClusterDefinition) { StepFactory stepFactory = new StepFactory(); List<StepConfig> appSteps = new ArrayList<>(); String hadoopJarForShellScript = configurationHelper.getProperty(ConfigurationValue.EMR_SHELL_SCRIPT_JAR); // Add step to copy DM oozie wrapper workflow to HDFS. String wrapperWorkflowS3Location = getS3LocationForConfiguration( emrHelper.getEmrOozieDmWorkflowS3LocationConfiguration()); String wrapperWorkflowHdfsLocation = configurationHelper .getProperty(ConfigurationValue.EMR_OOZIE_DM_WRAPPER_WORKFLOW_HDFS_LOCATION); List<String> s3ToHdfsCopyScriptArgsList = new ArrayList<>(); s3ToHdfsCopyScriptArgsList.add(wrapperWorkflowS3Location + emrHelper.getS3HdfsCopyScriptName()); // 1. Source S3 location // 2. Target HDFS location. // 3. Temp folder to use on local node. s3ToHdfsCopyScriptArgsList.add(wrapperWorkflowS3Location); s3ToHdfsCopyScriptArgsList.add(wrapperWorkflowHdfsLocation); s3ToHdfsCopyScriptArgsList.add(UUID.randomUUID().toString()); HadoopJarStepConfig copyWrapperJarConfig = new HadoopJarStepConfig(hadoopJarForShellScript) .withArgs(s3ToHdfsCopyScriptArgsList); appSteps.add(new StepConfig().withName("Copy DM oozie wrapper").withHadoopJarStep(copyWrapperJarConfig)); // Create install hive step and add to the StepConfig list if (StringUtils.isNotBlank(emrClusterDefinition.getHiveVersion())) { StepConfig installHive = new StepConfig().withName("Hive " + emrClusterDefinition.getHiveVersion()) .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW) .withHadoopJarStep(stepFactory.newInstallHiveStep(emrClusterDefinition.getHiveVersion())); appSteps.add(installHive); } // Create install Pig step and add to the StepConfig List if (StringUtils.isNotBlank(emrClusterDefinition.getPigVersion())) { StepConfig installPig = new StepConfig().withName("Pig " + emrClusterDefinition.getPigVersion()) .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW) .withHadoopJarStep(stepFactory.newInstallPigStep(emrClusterDefinition.getPigVersion())); appSteps.add(installPig); } // Add Oozie support if needed if (emrClusterDefinition.isInstallOozie() != null && emrClusterDefinition.isInstallOozie()) { String oozieShellArg = getS3StagingLocation() + configurationHelper.getProperty(ConfigurationValue.S3_URL_PATH_DELIMITER) + configurationHelper.getProperty(ConfigurationValue.EMR_OOZIE_TAR_FILE); List<String> argsList = new ArrayList<>(); argsList.add(getOozieScriptLocation()); argsList.add(oozieShellArg); HadoopJarStepConfig jarConfig = new HadoopJarStepConfig(hadoopJarForShellScript).withArgs(argsList); appSteps.add(new StepConfig().withName("Oozie").withHadoopJarStep(jarConfig)); } // Add the hadoop jar steps that need to be added. if (!CollectionUtils.isEmpty(emrClusterDefinition.getHadoopJarSteps())) { for (HadoopJarStep hadoopJarStep : emrClusterDefinition.getHadoopJarSteps()) { StepConfig stepConfig = emrHelper.getEmrHadoopJarStepConfig(hadoopJarStep.getStepName(), hadoopJarStep.getJarLocation(), hadoopJarStep.getMainClass(), hadoopJarStep.getScriptArguments(), hadoopJarStep.isContinueOnError()); appSteps.add(stepConfig); } } return appSteps; }
From source file:org.finra.dm.service.helper.EmrHiveStepHelper.java
License:Apache License
@Override public StepConfig getEmrStepConfig(Object step) { EmrHiveStep emrHiveStep = (EmrHiveStep) step; // Default ActionOnFailure is to cancel the execution and wait ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT; if (emrHiveStep.isContinueOnError() != null && emrHiveStep.isContinueOnError()) { // Override based on user input actionOnFailure = ActionOnFailure.CONTINUE; }/*from w ww . j av a 2s .com*/ // If there are no arguments to hive script if (CollectionUtils.isEmpty(emrHiveStep.getScriptArguments())) { // Just build the StepConfig object and return return new StepConfig().withName(emrHiveStep.getStepName().trim()).withActionOnFailure(actionOnFailure) .withHadoopJarStep( new StepFactory().newRunHiveScriptStep(emrHiveStep.getScriptLocation().trim())); } // If there are arguments specified else { // For each argument, add "-d" option List<String> hiveArgs = new ArrayList<>(); for (String hiveArg : emrHiveStep.getScriptArguments()) { hiveArgs.add("-d"); hiveArgs.add(hiveArg); } // Return the StepConfig object return new StepConfig().withName(emrHiveStep.getStepName().trim()).withActionOnFailure(actionOnFailure) .withHadoopJarStep(new StepFactory().newRunHiveScriptStep( emrHiveStep.getScriptLocation().trim(), hiveArgs.toArray(new String[hiveArgs.size()]))); } }
From source file:org.finra.dm.service.helper.EmrPigStepHelper.java
License:Apache License
@Override public StepConfig getEmrStepConfig(Object step) { EmrPigStep pigStep = (EmrPigStep) step; // Default ActionOnFailure is to cancel the execution and wait ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT; if (pigStep.isContinueOnError() != null && pigStep.isContinueOnError()) { // Override based on user input actionOnFailure = ActionOnFailure.CONTINUE; }/* ww w. j a va 2 s .c o m*/ // If there are no arguments to hive script if (CollectionUtils.isEmpty(pigStep.getScriptArguments())) { // Just build the StepConfig object and return return new StepConfig().withName(pigStep.getStepName().trim()).withActionOnFailure(actionOnFailure) .withHadoopJarStep(new StepFactory().newRunPigScriptStep(pigStep.getScriptLocation().trim())); } // If there are arguments specified else { return new StepConfig().withName(pigStep.getStepName().trim()).withActionOnFailure(actionOnFailure) .withHadoopJarStep(new StepFactory().newRunPigScriptStep(pigStep.getScriptLocation().trim(), pigStep.getScriptArguments().toArray(new String[pigStep.getScriptArguments().size()]))); } }
From source file:org.finra.herd.dao.impl.EmrDaoImpl.java
License:Apache License
/** * Create the step config list of objects for hive/pig installation. * * @param emrClusterDefinition the EMR definition name value. * * @return list of step configuration that contains all the steps for the given configuration. *///from www .j a va 2 s. c o m private List<StepConfig> getStepConfig(EmrClusterDefinition emrClusterDefinition) { StepFactory stepFactory = new StepFactory(); List<StepConfig> appSteps = new ArrayList<>(); // Create install hive step and add to the StepConfig list if (StringUtils.isNotBlank(emrClusterDefinition.getHiveVersion())) { StepConfig installHive = new StepConfig().withName("Hive " + emrClusterDefinition.getHiveVersion()) .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW) .withHadoopJarStep(stepFactory.newInstallHiveStep(emrClusterDefinition.getHiveVersion())); appSteps.add(installHive); } // Create install Pig step and add to the StepConfig List if (StringUtils.isNotBlank(emrClusterDefinition.getPigVersion())) { StepConfig installPig = new StepConfig().withName("Pig " + emrClusterDefinition.getPigVersion()) .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW) .withHadoopJarStep(stepFactory.newInstallPigStep(emrClusterDefinition.getPigVersion())); appSteps.add(installPig); } // Add the hadoop jar steps that need to be added. if (!CollectionUtils.isEmpty(emrClusterDefinition.getHadoopJarSteps())) { for (HadoopJarStep hadoopJarStep : emrClusterDefinition.getHadoopJarSteps()) { StepConfig stepConfig = emrHelper.getEmrHadoopJarStepConfig(hadoopJarStep.getStepName(), hadoopJarStep.getJarLocation(), hadoopJarStep.getMainClass(), hadoopJarStep.getScriptArguments(), hadoopJarStep.isContinueOnError()); appSteps.add(stepConfig); } } return appSteps; }
From source file:org.huahinframework.emanager.amazonaws.elasticmapreduce.ElasticMapReduceManager.java
License:Apache License
/** * @param config/*from ww w. j av a 2 s . c om*/ * @throws URISyntaxException */ public void runJob(Config config) throws URISyntaxException { RunJobFlowRequest runJobFlowRequest = null; CreateStepConfigger csc = getCreateStepConfigger(config); if (csc == null) { log.error("Step config create error"); return; } if (jobFlowId == null) { runJobFlowRequest = new RunJobFlowRequest().withName(MAP_REDUCE_NAME) .withBootstrapActions( new BootstrapActionConfig().withName(MEMORY_BOOTSTRAP_NAME).withScriptBootstrapAction( new ScriptBootstrapActionConfig().withPath(MEMORY_BOOTSTRAP_URI)), new BootstrapActionConfig().withName(HADOOP_BOOTSTRAP_NAME).withScriptBootstrapAction( new ScriptBootstrapActionConfig().withPath(HADOOP_BOOTSTRAP_URI) .withArgs("--mapred-key-value", "mapred.task.timeout=3600000")), new BootstrapActionConfig().withName(HUAHIN_BOOTSTRAP_NAME).withScriptBootstrapAction( new ScriptBootstrapActionConfig().withPath(emrProperties.getConfigureS3Path()))) .withInstances(setupJobFlowInstancesConfig()); if (!isEmpty(emrProperties.getLogUri())) { runJobFlowRequest.setLogUri(emrProperties.getLogUri()); } List<StepConfig> stepConfigs = new ArrayList<StepConfig>(); if (emrProperties.isDebug()) { StepConfig enableDebugging = new StepConfig().withName(EMR_DEBUGGIN_NAME) .withActionOnFailure(ACTION_ON_TERMINATE) .withHadoopJarStep(new StepFactory().newEnableDebuggingStep()); stepConfigs.add(enableDebugging); } for (StepConfig sc : csc.createStepConfig(config)) { stepConfigs.add(sc); } runJobFlowRequest.setSteps(stepConfigs); try { RunJobFlowResult result = emr.runJobFlow(runJobFlowRequest); jobFlowId = result.getJobFlowId(); checkDate = new Date(); } catch (Exception e) { e.printStackTrace(); log.error(e); } } else { AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest().withJobFlowId(jobFlowId) .withSteps(csc.createStepConfig(config)); emr.addJobFlowSteps(addJobFlowStepsRequest); } running = true; try { config.setJobFlowId(jobFlowId); QueueUtils.updateQueue(config); } catch (IOException e) { e.printStackTrace(); log.error(e); } int stepSize = 0; String stepStatus = JobUtils.STEP_STATUS_PENDING; while (stepStatus.equals(JobUtils.STEP_STATUS_PENDING) || stepStatus.equals(JobUtils.STEP_STATUS_RUNNING)) { if (sleep()) { break; } DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest() .withJobFlowIds(jobFlowId); DescribeJobFlowsResult describeJobFlowsResult = emr.describeJobFlows(describeJobFlowsRequest); if (describeJobFlowsResult.getJobFlows().size() != 1) { break; } JobFlowDetail jobFlowDetail = describeJobFlowsResult.getJobFlows().get(0); JobFlowInstancesDetail instancesDetail = jobFlowDetail.getInstances(); masterPublicDnsName = instancesDetail.getMasterPublicDnsName(); if (isEmpty(config.getMasterPublicDnsName())) { try { config.setMasterPublicDnsName(masterPublicDnsName); QueueUtils.updateQueue(config); } catch (IOException e) { e.printStackTrace(); log.error(e); } } stepSize = jobFlowDetail.getSteps().size(); for (StepDetail stepDetail : jobFlowDetail.getSteps()) { if (stepDetail.getStepConfig().getName().equals(config.getName())) { stepStatus = stepDetail.getExecutionStatusDetail().getState(); break; } } } if (config.isDeleteOnExit()) { if (config.getJobType() == Config.JOB_TYPE_STREAMING) { S3Utils.delete(s3, config.getArgMap().get("mapper")); S3Utils.delete(s3, config.getArgMap().get("reducer")); } else { S3Utils.delete(s3, config.getRun()); } } // Add More than 256 Steps to a Job Flow(http://goo.gl/JDtsV) if (stepSize >= 255) { instanceTerminate(); } running = false; if (stepStatus.equals(JobUtils.STEP_STATUS_COMPLETED)) { config.setStatus(Config.JOB_STATUS_COMPLETE); } else if (stepStatus.equals(JobUtils.STEP_STATUS_FAILED)) { config.setStatus(Config.JOB_STATUS_ERROR); } else if (terminated) { config.setStatus(Config.JOB_STATUS_CANCEL); } try { QueueUtils.updateQueue(config); } catch (IOException e) { e.printStackTrace(); log.error(e); } }
From source file:org.huahinframework.emanager.amazonaws.elasticmapreduce.HiveStepConfig.java
License:Apache License
/** * @param args */ public HiveStepConfig() { this.stepFactory = new StepFactory(); }
From source file:org.huahinframework.emanager.amazonaws.elasticmapreduce.PigStepConfig.java
License:Apache License
/** * @param args */ public PigStepConfig() { this.stepFactory = new StepFactory(); }
From source file:org.pentaho.amazon.client.impl.EmrClientImpl.java
License:Apache License
private StepConfig configureHiveStep(String stagingS3qUrl, String cmdLineArgs) { String[] cmdLineArgsArr;//from ww w . j ava 2s.c o m if (cmdLineArgs == null) { cmdLineArgsArr = new String[] { "" }; } else { List<String> cmdArgs = Arrays.asList(cmdLineArgs.split("\\s+")); List<String> updatedCmdArgs = cmdArgs.stream().map(e -> replaceDoubleS3(e)) .collect(Collectors.toList()); cmdLineArgsArr = updatedCmdArgs.toArray(new String[updatedCmdArgs.size()]); } StepConfig hiveStepConfig = new StepConfig("Hive", new StepFactory().newRunHiveScriptStep(stagingS3qUrl, cmdLineArgsArr)); if (alive) { hiveStepConfig.withActionOnFailure(ActionOnFailure.CANCEL_AND_WAIT); } else { hiveStepConfig.withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW); } return hiveStepConfig; }