Example usage for com.amazonaws.services.elasticmapreduce.model HadoopJarStepConfig setMainClass

List of usage examples for com.amazonaws.services.elasticmapreduce.model HadoopJarStepConfig setMainClass

Introduction

In this page you can find the example usage for com.amazonaws.services.elasticmapreduce.model HadoopJarStepConfig setMainClass.

Prototype


public void setMainClass(String mainClass) 

Source Link

Document

The name of the main class in the specified Java file.

Usage

From source file:datameer.awstasks.aws.emr.EmrCluster.java

License:Apache License

public StepFuture executeJobStep(String name, File jobJar, String s3JobJarName, Class<?> mainClass,
        String... args) {/*from  www. ja  v a2s. c om*/
    checkConnection(true);
    HadoopJarStepConfig jarConfig = new HadoopJarStepConfig();
    if (jobJar != null) {
        String s3JobJarUri = uploadingJobJar(jobJar, s3JobJarName);
        jarConfig.setJar(s3JobJarUri);
    }
    if (mainClass != null) {
        jarConfig.setMainClass(mainClass.getName());
    }
    jarConfig.setArgs(Arrays.asList(args));
    StepConfig stepConfig = new StepConfig();
    stepConfig.setName(name);
    stepConfig.setActionOnFailure("CONTINUE");
    stepConfig.setHadoopJarStep(jarConfig);
    _emrWebService
            .addJobFlowSteps(new AddJobFlowStepsRequest().withJobFlowId(_jobFlowId).withSteps(stepConfig));
    _emrWebService.clearDescribeJobFlowCache();
    return new StepFuture(stepConfig.getName(), getStepIndex(getJobFlowDetail(_jobFlowId), name));
}

From source file:org.huahinframework.emanager.amazonaws.elasticmapreduce.CustomJarStepConfig.java

License:Apache License

@Override
public StepConfig[] createStepConfig(Config config) {
    List<String> args = new ArrayList<String>();
    for (String s : config.getArgs()) {
        args.add(s);//from ww w . j  a v  a 2s . co  m
    }

    HadoopJarStepConfig hadoopJarStepConfig = new HadoopJarStepConfig().withJar(config.getRun()).withArgs(args);
    if (config.getMainClass() != null && !config.getMainClass().isEmpty()) {
        hadoopJarStepConfig.setMainClass(config.getMainClass());
    }

    StepConfig stepConfig = new StepConfig().withName(config.getName()).withActionOnFailure(ACTION_ON_FAILURE)
            .withHadoopJarStep(hadoopJarStepConfig);
    StepConfig[] stepConfigs = new StepConfig[1];
    stepConfigs[0] = stepConfig;
    return stepConfigs;
}

From source file:org.pentaho.amazon.client.impl.EmrClientImpl.java

License:Apache License

private static HadoopJarStepConfig configureHadoopStep(String stagingS3Jar, String mainClass,
        List<String> jarStepArgs) {
    HadoopJarStepConfig hadoopJarStepConfig = new HadoopJarStepConfig();
    hadoopJarStepConfig.setJar(stagingS3Jar);
    hadoopJarStepConfig.setMainClass(mainClass);
    hadoopJarStepConfig.setArgs(jarStepArgs);

    return hadoopJarStepConfig;
}

From source file:org.pentaho.amazon.emr.job.AmazonElasticMapReduceJobExecutor.java

License:Apache License

public Result execute(Result result, int arg1) throws KettleException {
    Log4jFileAppender appender = null;//from  w  w  w . j  ava2 s .  co m
    String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$
    try {
        appender = LogWriter.createFileAppender(logFileName, true, false);
        LogWriter.getInstance().addAppender(appender);
        log.setLogLevel(parentJob.getLogLevel());
    } catch (Exception e) {
        logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.FailedToOpenLogFile", //$NON-NLS-1$
                logFileName, e.toString()));
        logError(Const.getStackTracker(e));
    }

    try {
        // create/connect aws service
        AmazonElasticMapReduceClient emrClient = new AmazonElasticMapReduceClient(awsCredentials);

        // pull down jar from vfs
        FileObject jarFile = KettleVFS.getFileObject(buildFilename(jarUrl));
        File tmpFile = File.createTempFile("customEMR", "jar");
        tmpFile.deleteOnExit();
        FileOutputStream tmpFileOut = new FileOutputStream(tmpFile);
        IOUtils.copy(jarFile.getContent().getInputStream(), tmpFileOut);
        URL localJarUrl = tmpFile.toURI().toURL();

        // find main class in jar
        String mainClass = getMainClass(localJarUrl);

        // create staging bucket
        AmazonS3 s3Client = new AmazonS3Client(awsCredentials);

        FileSystemOptions opts = new FileSystemOptions();
        DefaultFileSystemConfigBuilder.getInstance().setUserAuthenticator(opts, new StaticUserAuthenticator(
                null, awsCredentials.getAWSAccessKeyId(), awsCredentials.getAWSSecretKey()));
        FileObject stagingDirFileObject = KettleVFS.getFileObject(stagingDir, getVariables(), opts);

        String stagingBucketName = stagingDirFileObject.getName().getBaseName();
        if (!s3Client.doesBucketExist(stagingBucketName)) {
            s3Client.createBucket(stagingBucketName);
        }

        // delete old jar if needed
        try {
            s3Client.deleteObject(stagingBucketName, jarFile.getName().getBaseName());
        } catch (Exception ex) {
            logError(Const.getStackTracker(ex));
        }

        // put jar in s3 staging bucket
        s3Client.putObject(new PutObjectRequest(stagingBucketName, jarFile.getName().getBaseName(), tmpFile));
        // create non-vfs s3 url to jar
        String stagingS3JarUrl = "s3://" + stagingBucketName + "/" + jarFile.getName().getBaseName();
        String stagingS3BucketUrl = "s3://" + stagingBucketName;

        RunJobFlowRequest runJobFlowRequest = null;
        RunJobFlowResult runJobFlowResult = null;
        if (StringUtil.isEmpty(hadoopJobFlowId)) {
            // create EMR job flow
            runJobFlowRequest = createJobFlow(stagingS3BucketUrl, stagingS3JarUrl, mainClass);
            // start EMR job
            runJobFlowResult = emrClient.runJobFlow(runJobFlowRequest);
        } else {
            List<String> jarStepArgs = new ArrayList<String>();
            if (!StringUtil.isEmpty(cmdLineArgs)) {
                StringTokenizer st = new StringTokenizer(cmdLineArgs, " ");
                while (st.hasMoreTokens()) {
                    String token = st.nextToken();
                    logBasic("adding args: " + token);
                    jarStepArgs.add(token);
                }
            }

            HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig();
            hadoopJarStep.setJar(stagingS3JarUrl);
            hadoopJarStep.setMainClass(mainClass);
            hadoopJarStep.setArgs(jarStepArgs);

            StepConfig stepConfig = new StepConfig();
            stepConfig.setName("custom jar: " + jarUrl);
            stepConfig.setHadoopJarStep(hadoopJarStep);

            List<StepConfig> steps = new ArrayList<StepConfig>();
            steps.add(stepConfig);

            AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest();
            addJobFlowStepsRequest.setJobFlowId(hadoopJobFlowId);
            addJobFlowStepsRequest.setSteps(steps);

            emrClient.addJobFlowSteps(addJobFlowStepsRequest);
        }

        String loggingIntervalS = environmentSubstitute(loggingInterval);
        int logIntv = 60;
        try {
            logIntv = Integer.parseInt(loggingIntervalS);
        } catch (NumberFormatException ex) {
            logError("Unable to parse logging interval '" + loggingIntervalS + "' - using " + "default of 60");
        }

        // monitor it / blocking / logging if desired
        if (blocking) {
            try {
                if (log.isBasic()) {

                    String executionState = "RUNNING";

                    List<String> jobFlowIds = new ArrayList<String>();
                    String id = hadoopJobFlowId;
                    if (StringUtil.isEmpty(hadoopJobFlowId)) {
                        id = runJobFlowResult.getJobFlowId();
                        jobFlowIds.add(id);
                    }

                    while (isRunning(executionState)) {
                        DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest();
                        describeJobFlowsRequest.setJobFlowIds(jobFlowIds);

                        DescribeJobFlowsResult describeJobFlowsResult = emrClient
                                .describeJobFlows(describeJobFlowsRequest);
                        boolean found = false;
                        for (JobFlowDetail jobFlowDetail : describeJobFlowsResult.getJobFlows()) {
                            if (jobFlowDetail.getJobFlowId().equals(id)) {
                                executionState = jobFlowDetail.getExecutionStatusDetail().getState();
                                found = true;
                            }
                        }

                        if (!found) {
                            break;
                        }
                        // logBasic(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.RunningPercent", setupPercent,
                        // mapPercent, reducePercent));
                        logBasic(hadoopJobName + " execution status: " + executionState);
                        try {
                            if (isRunning(executionState)) {
                                Thread.sleep(logIntv * 1000);
                            }
                        } catch (InterruptedException ie) {
                            // Ignore
                        }
                    }

                    if ("FAILED".equalsIgnoreCase(executionState)) {
                        result.setStopped(true);
                        result.setNrErrors(1);
                        result.setResult(false);

                        S3Object outObject = s3Client.getObject(stagingBucketName, id + "/steps/1/stdout");
                        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
                        IOUtils.copy(outObject.getObjectContent(), outStream);
                        logError(outStream.toString());

                        S3Object errorObject = s3Client.getObject(stagingBucketName, id + "/steps/1/stderr");
                        ByteArrayOutputStream errorStream = new ByteArrayOutputStream();
                        IOUtils.copy(errorObject.getObjectContent(), errorStream);
                        logError(errorStream.toString());
                    }
                }
            } catch (Exception e) {
                logError(e.getMessage(), e);
            }
        }

    } catch (Throwable t) {
        t.printStackTrace();
        result.setStopped(true);
        result.setNrErrors(1);
        result.setResult(false);
        logError(t.getMessage(), t);
    }

    if (appender != null) {
        LogWriter.getInstance().removeAppender(appender);
        appender.close();

        ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(),
                parentJob.getJobname(), getName());
        result.getResultFiles().put(resultFile.getFile().toString(), resultFile);
    }

    return result;
}

From source file:org.pentaho.amazon.emr.job.AmazonElasticMapReduceJobExecutor.java

License:Apache License

public RunJobFlowRequest createJobFlow(String stagingS3BucketUrl, String stagingS3Jar, String mainClass) {
    List<String> jarStepArgs = new ArrayList<String>();
    if (!StringUtil.isEmpty(cmdLineArgs)) {
        StringTokenizer st = new StringTokenizer(cmdLineArgs, " ");
        while (st.hasMoreTokens()) {
            String token = st.nextToken();
            logBasic("adding args: " + token);
            jarStepArgs.add(token);/* w  ww.j  ava  2  s  . co  m*/
        }
    }

    HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig();
    hadoopJarStep.setJar(stagingS3Jar);
    hadoopJarStep.setMainClass(mainClass);
    hadoopJarStep.setArgs(jarStepArgs);

    StepConfig stepConfig = new StepConfig();
    stepConfig.setName("custom jar: " + jarUrl);
    stepConfig.setHadoopJarStep(hadoopJarStep);

    List<StepConfig> steps = new ArrayList<StepConfig>();
    steps.add(stepConfig);

    String numInstancesS = environmentSubstitute(numInstances);
    int numInsts = 2;
    try {
        numInsts = Integer.parseInt(numInstancesS);
    } catch (NumberFormatException e) {
        logError("Unable to parse number of instances to use '" + numInstancesS + "' - "
                + "using 2 instances...");
    }
    JobFlowInstancesConfig instances = new JobFlowInstancesConfig();
    instances.setInstanceCount(numInsts);
    instances.setMasterInstanceType(getInstanceType(masterInstanceType));
    instances.setSlaveInstanceType(getInstanceType(slaveInstanceType));
    instances.setHadoopVersion("0.20");

    RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest();
    runJobFlowRequest.setSteps(steps);
    runJobFlowRequest.setLogUri(stagingS3BucketUrl);
    runJobFlowRequest.setName(hadoopJobName);
    runJobFlowRequest.setInstances(instances);

    // ScriptBootstrapActionConfig scriptBootstrapAction = new ScriptBootstrapActionConfig();
    // scriptBootstrapAction.setPath("s3://mddwordcount/bootstrap.sh");
    // List<String> bootstrapArgs = new ArrayList<String>();
    // bootstrapArgs.add("http://pdi-node-dist.s3.amazonaws.com");
    // //
    // bootstrapArgs.add(
    //   "http://ci.pentaho.com/view/Data%20Integration/job/Kettle/lastSuccessfulBuild/artifact/Kettle/");
    // bootstrapArgs.add("pdi-hadoop-node-TRUNK-SNAPSHOT.zip");
    // scriptBootstrapAction.setArgs(bootstrapArgs);
    // BootstrapActionConfig bootstrapActionConfig = new BootstrapActionConfig();
    // bootstrapActionConfig.setName("mdd bootstrap");
    // bootstrapActionConfig.setScriptBootstrapAction(scriptBootstrapAction);
    // List<BootstrapActionConfig> bootstrapActions = new ArrayList<BootstrapActionConfig>();
    // bootstrapActions.add(bootstrapActionConfig);
    // runJobFlowRequest.setBootstrapActions(bootstrapActions);

    return runJobFlowRequest;
}