org.pentaho.amazon.hive.job.AmazonHiveJobExecutor.java Source code

Introduction

Here is the source code for org.pentaho.amazon.hive.job.AmazonHiveJobExecutor.java
Source

/*******************************************************************************
 *
 * Pentaho Big Data
 *
 * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/

package org.pentaho.amazon.hive.job;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;

import org.apache.commons.io.IOUtils;
import org.apache.commons.vfs.FileObject;
import org.pentaho.amazon.AbstractAmazonJobEntry;
import org.pentaho.di.cluster.SlaveServer;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.Result;
import org.pentaho.di.core.ResultFile;
import org.pentaho.di.core.annotations.JobEntry;
import org.pentaho.di.core.database.DatabaseMeta;
import org.pentaho.di.core.encryption.Encr;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.exception.KettleXMLException;
import org.pentaho.di.core.logging.Log4jFileAppender;
import org.pentaho.di.core.logging.LogWriter;
import org.pentaho.di.core.util.StringUtil;
import org.pentaho.di.core.vfs.KettleVFS;
import org.pentaho.di.core.xml.XMLHandler;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.job.entry.JobEntryInterface;
import org.pentaho.di.repository.ObjectId;
import org.pentaho.di.repository.Repository;
import org.pentaho.s3.vfs.S3FileProvider;
import org.w3c.dom.Node;

import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduceClient;
import com.amazonaws.services.elasticmapreduce.model.AddJobFlowStepsRequest;
import com.amazonaws.services.elasticmapreduce.model.BootstrapActionConfig;
import com.amazonaws.services.elasticmapreduce.model.DescribeJobFlowsRequest;
import com.amazonaws.services.elasticmapreduce.model.DescribeJobFlowsResult;
import com.amazonaws.services.elasticmapreduce.model.HadoopJarStepConfig;
import com.amazonaws.services.elasticmapreduce.model.JobFlowDetail;
import com.amazonaws.services.elasticmapreduce.model.JobFlowInstancesConfig;
import com.amazonaws.services.elasticmapreduce.model.RunJobFlowRequest;
import com.amazonaws.services.elasticmapreduce.model.RunJobFlowResult;
import com.amazonaws.services.elasticmapreduce.model.ScriptBootstrapActionConfig;
import com.amazonaws.services.elasticmapreduce.model.StepConfig;
import com.amazonaws.services.elasticmapreduce.model.TerminateJobFlowsRequest;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.model.PutObjectRequest;
import com.amazonaws.services.s3.model.S3Object;

/**
 * AmazonHiveJobExecutor A job entry plug-in class to submits a Hive job into the AWS Elastic MapReduce service from
 * Pentaho Data Integration (Kettle).
 */
@JobEntry(id = "HiveJobExecutorPlugin", image = "AWS-HIVE.svg", name = "HiveJobExecutorPlugin.Name", description = "HiveJobExecutorPlugin.Description", categoryDescription = "i18n:org.pentaho.di.job:JobCategory.Category.BigData", i18nPackageName = "org.pentaho.amazon.hive.job")
public class AmazonHiveJobExecutor extends AbstractAmazonJobEntry implements Cloneable, JobEntryInterface {

    private static Class<?> PKG = AmazonHiveJobExecutor.class; // for i18n purposes, needed by Translator2!! $NON-NLS-1$

    protected String qUrl = "";
    protected String bootstrapActions = "";
    protected boolean alive;

    public AmazonHiveJobExecutor() {
    }

    public String getQUrl() {
        return qUrl;
    }

    public void setQUrl(String qUrl) {
        this.qUrl = qUrl;
    }

    public String getBootstrapActions() {
        return bootstrapActions;
    }

    public void setBootstrapActions(String bootstrapActions) {
        this.bootstrapActions = bootstrapActions;
    }

    public boolean isAlive() {
        return alive;
    }

    public void setAlive(boolean alive) {
        this.alive = alive;
    }

    /**
     * Executes a Hive job into the AWS Elastic MapReduce service.
     */
    public Result execute(Result result, int arg1) throws KettleException {

        // Setup a log file.
        Log4jFileAppender appender = null;
        String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$
        try {
            appender = LogWriter.createFileAppender(logFileName, true, false);
            LogWriter.getInstance().addAppender(appender);
            log.setLogLevel(parentJob.getLogLevel());
        } catch (Exception e) {
            logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.FailedToOpenLogFile", //$NON-NLS-1$
                    logFileName, e.toString()));
            logError(Const.getStackTracker(e));
        }

        try {
            // Create and connect an AWS service.
            AmazonElasticMapReduceClient emrClient = new AmazonElasticMapReduceClient(awsCredentials);
            AmazonS3 s3Client = new AmazonS3Client(awsCredentials);

            // Get bucket name and S3 URL.
            String stagingBucketName = GetBucketName(stagingDir);
            String stagingS3BucketUrl = "s3://" + stagingBucketName; //$NON-NLS-1$

            // Prepare staging S3 URL for Hive script file.
            String stagingS3qUrl = "";
            if (qUrl.startsWith(S3FileProvider.SCHEME + "://")) { //$NON-NLS-1$

                // If the .q file is in S3, its staging S3 URL is s3://{bucketname}/{path}
                if (qUrl.indexOf("@s3") > 0) { //$NON-NLS-1$
                    stagingS3qUrl = S3FileProvider.SCHEME + "://" + qUrl.substring(qUrl.indexOf("@s3") + 4); //$NON-NLS-1$
                } else {
                    stagingS3qUrl = qUrl;
                }

            } else {
                // A local filename is given for the Hive script file. It should be copied to the S3 Log Directory.
                // First, check for the correct protocol.
                if (!qUrl.startsWith("file:")) { //$NON-NLS-1$
                    if (log.isBasic()) {
                        logBasic(BaseMessages.getString(PKG,
                                "AmazonElasticMapReduceJobExecutor.HiveScriptFilename.Error") + qUrl); //$NON-NLS-1$
                    }
                }
                // pull down .q file from VSF
                FileObject qFile = KettleVFS.getFileObject(buildFilename(qUrl));
                File tmpFile = File.createTempFile("customEMR", "q"); //$NON-NLS-1$
                tmpFile.deleteOnExit();
                FileOutputStream tmpFileOut = new FileOutputStream(tmpFile);
                IOUtils.copy(qFile.getContent().getInputStream(), tmpFileOut);
                // Get key name for the script file S3 destination. Key is defined as path name after {bucket}/
                String key = GetKeyFromS3Url(stagingDir);
                if (key == null) {
                    key = qFile.getName().getBaseName();
                } else {
                    key += "/" + qFile.getName().getBaseName(); //$NON-NLS-1$
                }

                // delete the previous .q file in S3
                try {
                    s3Client.deleteObject(stagingBucketName, key);
                } catch (Exception ex) {
                    logError(Const.getStackTracker(ex));
                }

                // Put .q file in S3 Log Directory.
                s3Client.putObject(new PutObjectRequest(stagingBucketName, key, tmpFile));
                stagingS3qUrl = stagingS3BucketUrl + "/" + key; //$NON-NLS-1$
            }

            // AWS provides script-runner.jar (in its public bucket), which should be used as a MapReduce jar for Hive EMR
            // job.
            jarUrl = "s3://elasticmapreduce/libs/script-runner/script-runner.jar"; //$NON-NLS-1$

            RunJobFlowRequest runJobFlowRequest = null;
            RunJobFlowResult runJobFlowResult = null;
            if (StringUtil.isEmpty(hadoopJobFlowId)) {
                // create an EMR job flow, start a step to setup Hive and get the job flow ID.
                runJobFlowRequest = createJobFlow();
                runJobFlowResult = emrClient.runJobFlow(runJobFlowRequest);
                hadoopJobFlowId = runJobFlowResult.getJobFlowId();
            }

            // Now EMR job flow is ready to accept a Run Hive Script step.
            // First, prepare a Job Flow ID list.
            List<String> jobFlowIds = new ArrayList<String>();
            jobFlowIds.add(hadoopJobFlowId);

            // Configure a HadoopJarStep.
            String args = "s3://elasticmapreduce/libs/hive/hive-script "
                    + "--base-path s3://elasticmapreduce/libs/hive/ --hive-version 0.7 --run-hive-script --args -f "
                    + environmentSubstitute(stagingS3qUrl) + " " + environmentSubstitute(cmdLineArgs); //$NON-NLS-1$
            List<StepConfig> steps = ConfigHadoopJarStep(hadoopJobName, jarUrl, args);

            // Add a Run Hive Script step to the existing job flow.
            AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest();
            addJobFlowStepsRequest.setJobFlowId(hadoopJobFlowId);
            addJobFlowStepsRequest.setSteps(steps);
            emrClient.addJobFlowSteps(addJobFlowStepsRequest);

            // Set a logging interval.
            String loggingIntervalS = environmentSubstitute(loggingInterval);
            int logIntv = 10;
            try {
                logIntv = Integer.parseInt(loggingIntervalS);
            } catch (NumberFormatException ex) {
                logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.LoggingInterval.Error", //$NON-NLS-1$
                        loggingIntervalS));
            }

            // monitor and log if intended.
            if (blocking) {
                try {
                    if (log.isBasic()) {

                        String executionState = "RUNNING"; //$NON-NLS-1$

                        while (isRunning(executionState)) {
                            DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest();
                            describeJobFlowsRequest.setJobFlowIds(jobFlowIds);

                            DescribeJobFlowsResult describeJobFlowsResult = emrClient
                                    .describeJobFlows(describeJobFlowsRequest);
                            boolean found = false;
                            for (JobFlowDetail jobFlowDetail : describeJobFlowsResult.getJobFlows()) {
                                if (jobFlowDetail.getJobFlowId().equals(hadoopJobFlowId)) {
                                    executionState = jobFlowDetail.getExecutionStatusDetail().getState();
                                    found = true;
                                }
                            }

                            if (!found) {
                                break;
                            }
                            logBasic(hadoopJobName + " " + BaseMessages.getString(PKG, //$NON-NLS-1$
                                    "AmazonElasticMapReduceJobExecutor.JobFlowExecutionStatus", hadoopJobFlowId)
                                    + executionState);

                            if (parentJob.isStopped()) {
                                if (!alive) {
                                    TerminateJobFlowsRequest terminateJobFlowsRequest = new TerminateJobFlowsRequest();
                                    terminateJobFlowsRequest.withJobFlowIds(hadoopJobFlowId);
                                    emrClient.terminateJobFlows(terminateJobFlowsRequest);
                                }
                                break;
                            }

                            try {
                                if (isRunning(executionState)) {
                                    Thread.sleep(logIntv * 1000);
                                }
                            } catch (InterruptedException ie) {
                                logError(Const.getStackTracker(ie));
                            }
                        }

                        if ("FAILED".equalsIgnoreCase(executionState)) { //$NON-NLS-1$
                            result.setStopped(true);
                            result.setNrErrors(1);
                            result.setResult(false);

                            S3Object outObject = s3Client.getObject(stagingBucketName,
                                    hadoopJobFlowId + "/steps/1/stdout"); //$NON-NLS-1$
                            ByteArrayOutputStream outStream = new ByteArrayOutputStream();
                            IOUtils.copy(outObject.getObjectContent(), outStream);
                            logError(outStream.toString());

                            S3Object errorObject = s3Client.getObject(stagingBucketName,
                                    hadoopJobFlowId + "/steps/1/stderr"); //$NON-NLS-1$
                            ByteArrayOutputStream errorStream = new ByteArrayOutputStream();
                            IOUtils.copy(errorObject.getObjectContent(), errorStream);
                            logError(errorStream.toString());
                        }
                    }
                } catch (Exception e) {
                    logError(e.getMessage(), e);
                }
            }

        } catch (Throwable t) {
            t.printStackTrace();
            result.setStopped(true);
            result.setNrErrors(1);
            result.setResult(false);
            logError(t.getMessage(), t);
        }

        if (appender != null) {
            LogWriter.getInstance().removeAppender(appender);
            appender.close();

            ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(),
                    parentJob.getJobname(), getName());
            result.getResultFiles().put(resultFile.getFile().toString(), resultFile);
        }

        return result;
    }

    /**
     * Prepare to create a EMR job flow.
     * 
     * @return RunJobFlowRequest The object to request an EMR job flow
     */
    public RunJobFlowRequest createJobFlow() {

        // Create a RunJobFlowRequest object, set a name for the job flow.
        RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest();
        runJobFlowRequest.setName(hadoopJobName);

        // Set a log URL.
        String logUrl = stagingDir;
        if (stagingDir.indexOf("@s3") > 0) { //$NON-NLS-1$
            logUrl = S3FileProvider.SCHEME + "://" + stagingDir.substring(stagingDir.indexOf("@s3") + 4); //$NON-NLS-1$
        }
        runJobFlowRequest.setLogUri(logUrl);

        // Determine the instances for Hadoop cluster.
        String numInstancesS = environmentSubstitute(numInstances);
        int numInsts = 2;
        try {
            numInsts = Integer.parseInt(numInstancesS);
        } catch (NumberFormatException e) {
            logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.InstanceNumber.Error", //$NON-NLS-1$
                    numInstancesS));
        }
        JobFlowInstancesConfig instances = new JobFlowInstancesConfig();
        instances.setInstanceCount(numInsts);
        instances.setMasterInstanceType(getInstanceType(masterInstanceType));
        instances.setSlaveInstanceType(getInstanceType(slaveInstanceType));
        instances.setHadoopVersion("0.20"); //$NON-NLS-1$
        instances.setKeepJobFlowAliveWhenNoSteps(alive);
        runJobFlowRequest.setInstances(instances);

        // Set bootstrap actions.
        runJobFlowRequest.setBootstrapActions(ConfigBootstrapActions());

        // Create an EMR step to setup Hive.
        String args = "s3://elasticmapreduce/libs/hive/hive-script --base-path s3://elasticmapreduce/libs/hive/ --hive-versions 0.7 --install-hive"; //$NON-NLS-1$
        List<StepConfig> steps = ConfigHadoopJarStep("Setup Hive", jarUrl, args); //$NON-NLS-1$
        runJobFlowRequest.setSteps(steps);

        return runJobFlowRequest;
    }

    /**
     * Configure the bootstrap actions, which are executed before Hadoop starts.
     * 
     * @return List<StepConfig> configuration data for the bootstrap actions
     * 
     */
    public List<BootstrapActionConfig> ConfigBootstrapActions() {

        List<BootstrapActionConfig> bootstrapActionConfigs = new ArrayList<BootstrapActionConfig>();

        if (!StringUtil.isEmpty(bootstrapActions)) {

            StringTokenizer st = new StringTokenizer(bootstrapActions, " "); //$NON-NLS-1$
            String path = "";
            String name = "";
            List<String> args = null;
            int actionCount = 0;

            while (st.hasMoreTokens()) {

                // Take a key/value pair.
                String key = st.nextToken();
                String value = st.nextToken();

                // If an argument is enclosed by double quote, take the string without double quote.
                if (value.startsWith("\"")) { //$NON-NLS-1$
                    while (!value.endsWith("\"")) { //$NON-NLS-1$
                        if (st.hasMoreTokens()) {
                            value += " " + st.nextToken(); //$NON-NLS-1$
                        } else {
                            if (log.isBasic()) {
                                logBasic(BaseMessages.getString(PKG,
                                        "AmazonElasticMapReduceJobExecutor.BootstrapActionArgument.Error", key, //$NON-NLS-1$
                                        value));
                            }
                            return null;
                        }
                    }
                    value = value.substring(1, value.length() - 1);
                }

                // if (log.isBasic()) logBasic("adding args: " + key + " " + value);

                if (key.equals("--bootstrap-action")) { //$NON-NLS-1$
                    if (!Const.isEmpty(path)) {
                        actionCount++;
                        if (name.equals("")) {
                            name = "Bootstrap Action " + actionCount;
                        }
                        // Enter data for one bootstrap action.
                        BootstrapActionConfig bootstrapActionConfig = ConfigureBootstrapAction(path, name, args);
                        bootstrapActionConfigs.add(bootstrapActionConfig);
                        name = "";
                        args = null;
                    }
                    if (value.startsWith("s3://")) { //$NON-NLS-1$
                        path = value;
                    } else { // The value for a bootstrap action does not start with "s3://".
                        if (log.isBasic()) {
                            logBasic(BaseMessages.getString(PKG,
                                    "AmazonElasticMapReduceJobExecutor.BootstrapActionPath.Error", key, value)); //$NON-NLS-1$
                        }
                        return null;
                    }
                }
                if (key.equals("--bootstrap-name")) { //$NON-NLS-1$
                    name = value;
                }
                if (key.equals("--args")) { //$NON-NLS-1$
                    args = ConfigArgs(value, ","); //$NON-NLS-1$
                }
            }

            if (!Const.isEmpty(path)) {
                actionCount++;
                if (name.equals("")) {
                    name = "Bootstrap Action " + actionCount; //$NON-NLS-1$
                }
                // Enter data for the last bootstrap action.
                BootstrapActionConfig bootstrapActionConfig = ConfigureBootstrapAction(path, name, args);
                bootstrapActionConfigs.add(bootstrapActionConfig);
            }
        }

        return bootstrapActionConfigs;
    }

    /**
     * Configure a bootstrap action object, given its name, path and arguments.
     * 
     * @param path
     *          - path for the bootstrap action program in S3
     * @param name
     *          - name of the bootstrap action
     * @param args
     *          - arguments for the bootstrap action
     * @return configuration data object for one bootstrap action
     * 
     */
    BootstrapActionConfig ConfigureBootstrapAction(String path, String name, List<String> args) {

        ScriptBootstrapActionConfig scriptBootstrapActionConfig = new ScriptBootstrapActionConfig();
        BootstrapActionConfig bootstrapActionConfig = new BootstrapActionConfig();
        scriptBootstrapActionConfig.setPath(path);
        scriptBootstrapActionConfig.setArgs(args);
        bootstrapActionConfig.setName(name);
        bootstrapActionConfig.setScriptBootstrapAction(scriptBootstrapActionConfig);

        return bootstrapActionConfig;
    }

    /**
     * Configure the HadoopJarStep, which is one Hadoop step of an EMR job to be submitted to AWS.
     * 
     * @param stepName
     *          name of step
     * @param stagingS3JarUrl
     *          URL for MapReduce jar file
     * @param args
     *          arguments for MapReduce jar
     * @return configuration data object for the step
     * 
     */
    public List<StepConfig> ConfigHadoopJarStep(String stepName, String stagingS3JarUrl, String args) {

        List<String> jarStepArgs = new ArrayList<String>();
        jarStepArgs = ConfigArgs(args, " "); //$NON-NLS-1$

        HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig();
        hadoopJarStep.setJar(stagingS3JarUrl);
        hadoopJarStep.setArgs(jarStepArgs);

        StepConfig stepConfig = new StepConfig();
        stepConfig.setName(stepName);
        stepConfig.setHadoopJarStep(hadoopJarStep);
        if (isAlive()) { // Job flow stays in "WAITING" state if this step fails.
            stepConfig.setActionOnFailure("CANCEL_AND_WAIT"); //$NON-NLS-1$
        } else { // Job flow is terminated if this step fails.
            stepConfig.setActionOnFailure("TERMINATE_JOB_FLOW"); //$NON-NLS-1$
        }

        List<StepConfig> steps = new ArrayList<StepConfig>();
        steps.add(stepConfig);

        return steps;
    }

    /**
     * Given a unparsed arguments and a separator, print log for each argument and return a list of arguments.
     * 
     * @param args
     *          - unparsed arguments
     * @param separator
     *          - separates one argument from another.
     * @return A list of arguments
     */
    public List<String> ConfigArgs(String args, String separator) {

        List<String> argList = new ArrayList<String>();
        if (!StringUtil.isEmpty(args)) {
            StringTokenizer st = new StringTokenizer(args, separator);
            while (st.hasMoreTokens()) {
                String token = st.nextToken();
                if (log.isBasic()) {
                    logBasic(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.AddingArgument") //$NON-NLS-1$
                            + token);
                }
                argList.add(token);
            }
        }
        return argList;
    }

    /**
     * Get an instance type.
     * 
     * @param unparsedInstanceType
     *          - unparsed instance type
     * @return A string for the instance type
     */
    public static String getInstanceType(String unparsedInstanceType) {
        return unparsedInstanceType.substring(unparsedInstanceType.lastIndexOf("[") + 1, //$NON-NLS-1$
                unparsedInstanceType.lastIndexOf("]"));
    }

    /**
     * Determine if the job flow is in a running state.
     * 
     * @param state
     *          - state of job low
     * @return true if it is not in COMPLETED or FAILED or TERMINATED, and false otherwise.
     */
    public static boolean isRunning(String state) {
        // * <b>Pattern: </b>COMPLETED|FAILED|TERMINATED|RUNNING|SHUTTING_DOWN|STARTING|WAITING|BOOTSTRAPPING<br/>
        if ("COMPLETED".equalsIgnoreCase(state)) { //$NON-NLS-1$
            return false;
        }
        if ("FAILED".equalsIgnoreCase(state)) { //$NON-NLS-1$
            return false;
        }
        if ("TERMINATED".equalsIgnoreCase(state)) { //$NON-NLS-1$
            return false;
        }
        return true;
    }

    /**
     * Load attributes
     */
    public void loadXML(Node entrynode, List<DatabaseMeta> databases, List<SlaveServer> slaveServers,
            Repository rep) throws KettleXMLException {
        super.loadXML(entrynode, databases, slaveServers);
        hadoopJobName = XMLHandler.getTagValue(entrynode, "hadoop_job_name"); //$NON-NLS-1$
        hadoopJobFlowId = XMLHandler.getTagValue(entrynode, "hadoop_job_flow_id"); //$NON-NLS-1$
        qUrl = XMLHandler.getTagValue(entrynode, "q_url"); //$NON-NLS-1$
        accessKey = Encr.decryptPasswordOptionallyEncrypted(XMLHandler.getTagValue(entrynode, "access_key")); //$NON-NLS-1$
        secretKey = Encr.decryptPasswordOptionallyEncrypted(XMLHandler.getTagValue(entrynode, "secret_key")); //$NON-NLS-1$
        bootstrapActions = XMLHandler.getTagValue(entrynode, "bootstrap_actions"); //$NON-NLS-1$
        stagingDir = XMLHandler.getTagValue(entrynode, "staging_dir"); //$NON-NLS-1$
        numInstances = XMLHandler.getTagValue(entrynode, "num_instances"); //$NON-NLS-1$
        masterInstanceType = XMLHandler.getTagValue(entrynode, "master_instance_type"); //$NON-NLS-1$
        slaveInstanceType = XMLHandler.getTagValue(entrynode, "slave_instance_type"); //$NON-NLS-1$
        cmdLineArgs = XMLHandler.getTagValue(entrynode, "command_line_args"); //$NON-NLS-1$
        alive = "Y".equalsIgnoreCase(XMLHandler.getTagValue(entrynode, "alive")); //$NON-NLS-1$
        blocking = "Y".equalsIgnoreCase(XMLHandler.getTagValue(entrynode, "blocking")); //$NON-NLS-1$
        loggingInterval = XMLHandler.getTagValue(entrynode, "logging_interval"); //$NON-NLS-1$
    }

    /**
     * Get attributes
     */
    public String getXML() {
        StringBuffer retval = new StringBuffer(1024);
        retval.append(super.getXML());
        retval.append("      ").append(XMLHandler.addTagValue("hadoop_job_name", hadoopJobName)); //$NON-NLS-1$
        retval.append("      ").append(XMLHandler.addTagValue("hadoop_job_flow_id", hadoopJobFlowId)); //$NON-NLS-1$
        retval.append("      ").append(XMLHandler.addTagValue("q_url", qUrl)); //$NON-NLS-1$
        retval.append("      ") //$NON-NLS-1$
                .append(XMLHandler.addTagValue("access_key", Encr.encryptPasswordIfNotUsingVariables(accessKey)));
        retval.append("      ") //$NON-NLS-1$
                .append(XMLHandler.addTagValue("secret_key", Encr.encryptPasswordIfNotUsingVariables(secretKey)));
        retval.append("      ").append(XMLHandler.addTagValue("bootstrap_actions", bootstrapActions)); //$NON-NLS-1$
        retval.append("      ").append(XMLHandler.addTagValue("staging_dir", stagingDir)); //$NON-NLS-1$
        retval.append("      ").append(XMLHandler.addTagValue("num_instances", numInstances)); //$NON-NLS-1$
        retval.append("      ").append(XMLHandler.addTagValue("master_instance_type", masterInstanceType)); //$NON-NLS-1$
        retval.append("      ").append(XMLHandler.addTagValue("slave_instance_type", slaveInstanceType)); //$NON-NLS-1$
        retval.append("      ").append(XMLHandler.addTagValue("command_line_args", cmdLineArgs)); //$NON-NLS-1$
        retval.append("      ").append(XMLHandler.addTagValue("alive", alive)); //$NON-NLS-1$
        retval.append("      ").append(XMLHandler.addTagValue("blocking", blocking)); //$NON-NLS-1$
        retval.append("      ").append(XMLHandler.addTagValue("logging_interval", loggingInterval)); //$NON-NLS-1$
        retval.append("      ").append(XMLHandler.addTagValue("hadoop_job_name", hadoopJobName)); //$NON-NLS-1$

        return retval.toString();
    }

    /**
     * Load attributes from a repository
     */
    public void loadRep(Repository rep, ObjectId id_jobentry, List<DatabaseMeta> databases,
            List<SlaveServer> slaveServers) throws KettleException {
        if (rep != null) {
            super.loadRep(rep, id_jobentry, databases, slaveServers);

            setHadoopJobName(rep.getJobEntryAttributeString(id_jobentry, "hadoop_job_name")); //$NON-NLS-1$
            setHadoopJobFlowId(rep.getJobEntryAttributeString(id_jobentry, "hadoop_job_flow_id")); //$NON-NLS-1$
            setQUrl(rep.getJobEntryAttributeString(id_jobentry, "q_url")); //$NON-NLS-1$
            setAccessKey(Encr
                    .decryptPasswordOptionallyEncrypted(rep.getJobEntryAttributeString(id_jobentry, "access_key"))); //$NON-NLS-1$
            setSecretKey(Encr
                    .decryptPasswordOptionallyEncrypted(rep.getJobEntryAttributeString(id_jobentry, "secret_key"))); //$NON-NLS-1$
            setBootstrapActions(rep.getJobEntryAttributeString(id_jobentry, "bootstrap_actions")); //$NON-NLS-1$
            setStagingDir(rep.getJobEntryAttributeString(id_jobentry, "staging_dir")); //$NON-NLS-1$
            setNumInstances(rep.getJobEntryAttributeString(id_jobentry, "num_instances")); //$NON-NLS-1$
            setMasterInstanceType(rep.getJobEntryAttributeString(id_jobentry, "master_instance_type")); //$NON-NLS-1$
            setSlaveInstanceType(rep.getJobEntryAttributeString(id_jobentry, "slave_instance_type")); //$NON-NLS-1$
            setCmdLineArgs(rep.getJobEntryAttributeString(id_jobentry, "command_line_args")); //$NON-NLS-1$
            setAlive(rep.getJobEntryAttributeBoolean(id_jobentry, "alive")); //$NON-NLS-1$
            setBlocking(rep.getJobEntryAttributeBoolean(id_jobentry, "blocking")); //$NON-NLS-1$
            setLoggingInterval(rep.getJobEntryAttributeString(id_jobentry, "logging_interval")); //$NON-NLS-1$

        } else {
            throw new KettleException(
                    BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.LoadFromRepository.Error")); //$NON-NLS-1$
        }
    }

    /**
     * Save attributes to a repository
     */
    public void saveRep(Repository rep, ObjectId id_job) throws KettleException {
        if (rep != null) {
            super.saveRep(rep, id_job);

            rep.saveJobEntryAttribute(id_job, getObjectId(), "hadoop_job_name", hadoopJobName); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "hadoop_job_flow_id", hadoopJobFlowId); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "q_url", qUrl); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "secret_key", //$NON-NLS-1$
                    Encr.encryptPasswordIfNotUsingVariables(secretKey));
            rep.saveJobEntryAttribute(id_job, getObjectId(), "access_key", //$NON-NLS-1$
                    Encr.encryptPasswordIfNotUsingVariables(accessKey));
            rep.saveJobEntryAttribute(id_job, getObjectId(), "bootstrap_actions", bootstrapActions); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "staging_dir", stagingDir); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "num_instances", numInstances); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "master_instance_type", masterInstanceType); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "slave_instance_type", slaveInstanceType); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "command_line_args", cmdLineArgs); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "alive", alive); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "blocking", blocking); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "logging_interval", loggingInterval); //$NON-NLS-1$

        } else {
            throw new KettleException(
                    BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.SaveToRepository.Error")); //$NON-NLS-1$
        }
    }

    /**
     * Build S3 URL. Replace "/" and "\" with ASCII equivalents within the access/secret keys, otherwise VFS will have
     * trouble in parsing the filename.
     * 
     * @param filename
     *          - S3 URL of a file with access/secret keys in it
     * @return S3 URL with "/" and "\" with ASCII equivalents within the access/secret keys
     */
    public String buildFilename(String filename) {
        filename = environmentSubstitute(filename);
        if (filename.startsWith(S3FileProvider.SCHEME)) {
            String authPart = filename.substring(S3FileProvider.SCHEME.length() + 3, filename.indexOf("@s3")) //$NON-NLS-1$
                    .replaceAll("\\+", "%2B").replaceAll("/", "%2F");
            filename = S3FileProvider.SCHEME + "://" + authPart + "@s3" //$NON-NLS-1$
                    + filename.substring(filename.indexOf("@s3") + 3);
        }
        return filename;
    }

    /**
     * Build full S3 URL by inserting the access/secret keys. Replace "/" and "\" with ASCII equivalents within the
     * access/secret keys, otherwise VFS will have trouble in parsing the filename.
     */
    public String buildFullS3Url(String filename) {
        if (filename.startsWith(S3FileProvider.SCHEME + "://") //$NON-NLS-1$
                && !(filename.startsWith(S3FileProvider.SCHEME + ":///"))) {
            String authPart = accessKey + ":" + secretKey; //$NON-NLS-1$
            authPart = authPart.replaceAll("\\+", "%2B").replaceAll("/", "%2F"); //$NON-NLS-1$
            filename = S3FileProvider.SCHEME + "://" + authPart + "@s3" + filename.substring(5); //$NON-NLS-1$
        }
        return filename;
    }

    /**
     * Get a bucket name from S3 URL.
     * 
     * @param filename
     *          - S3 URL with or without access/secret keys
     * @return a string for bucket name
     */
    public String GetBucketName(String filename) {
        int i = filename.indexOf("@s3/") + 4; // URL with access/secret keys //$NON-NLS-1$
        if (i > 4) {
            int j = filename.indexOf("/", i); //$NON-NLS-1$
            if (i < j) {
                return filename.substring(i, j); // URL ends with file or folder
            } else {
                return filename.substring(i); // URL ends with bucket name itself
            }
        }

        // URL without access/secret keys
        i = filename.indexOf("/", 5); //$NON-NLS-1$
        if (i > 5) {
            return filename.substring(5, i); // URL ends with file or folder
        } else {
            return filename.substring(5); // URL ends with bucket name itself
        }
    }

    /**
     * Get a file key from full S3 URL, which is a string after "{bucketname}/".
     * 
     * @param filename
     *          - S3 URL with access/secret keys
     * @return key, which is a string after "{bucketname}/"
     */
    public String GetKeyFromS3Url(String filename) {
        int i = filename.indexOf("@s3/") + 4; //$NON-NLS-1$
        if (i > 4) {
            filename = filename.substring(filename.indexOf("/", i) + 1); //$NON-NLS-1$
        } else {
            filename = filename.substring(filename.indexOf("/", 5) + 1); //$NON-NLS-1$
        }
        return filename;
    }

    public boolean evaluates() {
        return true;
    }

    public boolean isUnconditional() {
        return true;
    }

    /**
     * Get the class name for the dialog box of this plug-in.
     */
    @Override
    public String getDialogClassName() {
        String className = getClass().getCanonicalName();
        className = className.replaceFirst("\\.job\\.", ".ui."); //$NON-NLS-1$
        className += "Dialog"; //$NON-NLS-1$
        return className;
    }

}