Java tutorial
/******************************************************************************* * * Pentaho Big Data * * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.amazon.hive.job; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileOutputStream; import java.util.ArrayList; import java.util.List; import java.util.StringTokenizer; import org.apache.commons.io.IOUtils; import org.apache.commons.vfs.FileObject; import org.pentaho.amazon.AbstractAmazonJobEntry; import org.pentaho.di.cluster.SlaveServer; import org.pentaho.di.core.Const; import org.pentaho.di.core.Result; import org.pentaho.di.core.ResultFile; import org.pentaho.di.core.annotations.JobEntry; import org.pentaho.di.core.database.DatabaseMeta; import org.pentaho.di.core.encryption.Encr; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettleXMLException; import org.pentaho.di.core.logging.Log4jFileAppender; import org.pentaho.di.core.logging.LogWriter; import org.pentaho.di.core.util.StringUtil; import org.pentaho.di.core.vfs.KettleVFS; import org.pentaho.di.core.xml.XMLHandler; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.job.entry.JobEntryInterface; import org.pentaho.di.repository.ObjectId; import org.pentaho.di.repository.Repository; import org.pentaho.s3.vfs.S3FileProvider; import org.w3c.dom.Node; import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduceClient; import com.amazonaws.services.elasticmapreduce.model.AddJobFlowStepsRequest; import com.amazonaws.services.elasticmapreduce.model.BootstrapActionConfig; import com.amazonaws.services.elasticmapreduce.model.DescribeJobFlowsRequest; import com.amazonaws.services.elasticmapreduce.model.DescribeJobFlowsResult; import com.amazonaws.services.elasticmapreduce.model.HadoopJarStepConfig; import com.amazonaws.services.elasticmapreduce.model.JobFlowDetail; import com.amazonaws.services.elasticmapreduce.model.JobFlowInstancesConfig; import com.amazonaws.services.elasticmapreduce.model.RunJobFlowRequest; import com.amazonaws.services.elasticmapreduce.model.RunJobFlowResult; import com.amazonaws.services.elasticmapreduce.model.ScriptBootstrapActionConfig; import com.amazonaws.services.elasticmapreduce.model.StepConfig; import com.amazonaws.services.elasticmapreduce.model.TerminateJobFlowsRequest; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.model.PutObjectRequest; import com.amazonaws.services.s3.model.S3Object; /** * AmazonHiveJobExecutor A job entry plug-in class to submits a Hive job into the AWS Elastic MapReduce service from * Pentaho Data Integration (Kettle). */ @JobEntry(id = "HiveJobExecutorPlugin", image = "AWS-HIVE.svg", name = "HiveJobExecutorPlugin.Name", description = "HiveJobExecutorPlugin.Description", categoryDescription = "i18n:org.pentaho.di.job:JobCategory.Category.BigData", i18nPackageName = "org.pentaho.amazon.hive.job") public class AmazonHiveJobExecutor extends AbstractAmazonJobEntry implements Cloneable, JobEntryInterface { private static Class<?> PKG = AmazonHiveJobExecutor.class; // for i18n purposes, needed by Translator2!! $NON-NLS-1$ protected String qUrl = ""; protected String bootstrapActions = ""; protected boolean alive; public AmazonHiveJobExecutor() { } public String getQUrl() { return qUrl; } public void setQUrl(String qUrl) { this.qUrl = qUrl; } public String getBootstrapActions() { return bootstrapActions; } public void setBootstrapActions(String bootstrapActions) { this.bootstrapActions = bootstrapActions; } public boolean isAlive() { return alive; } public void setAlive(boolean alive) { this.alive = alive; } /** * Executes a Hive job into the AWS Elastic MapReduce service. */ public Result execute(Result result, int arg1) throws KettleException { // Setup a log file. Log4jFileAppender appender = null; String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$ try { appender = LogWriter.createFileAppender(logFileName, true, false); LogWriter.getInstance().addAppender(appender); log.setLogLevel(parentJob.getLogLevel()); } catch (Exception e) { logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.FailedToOpenLogFile", //$NON-NLS-1$ logFileName, e.toString())); logError(Const.getStackTracker(e)); } try { // Create and connect an AWS service. AmazonElasticMapReduceClient emrClient = new AmazonElasticMapReduceClient(awsCredentials); AmazonS3 s3Client = new AmazonS3Client(awsCredentials); // Get bucket name and S3 URL. String stagingBucketName = GetBucketName(stagingDir); String stagingS3BucketUrl = "s3://" + stagingBucketName; //$NON-NLS-1$ // Prepare staging S3 URL for Hive script file. String stagingS3qUrl = ""; if (qUrl.startsWith(S3FileProvider.SCHEME + "://")) { //$NON-NLS-1$ // If the .q file is in S3, its staging S3 URL is s3://{bucketname}/{path} if (qUrl.indexOf("@s3") > 0) { //$NON-NLS-1$ stagingS3qUrl = S3FileProvider.SCHEME + "://" + qUrl.substring(qUrl.indexOf("@s3") + 4); //$NON-NLS-1$ } else { stagingS3qUrl = qUrl; } } else { // A local filename is given for the Hive script file. It should be copied to the S3 Log Directory. // First, check for the correct protocol. if (!qUrl.startsWith("file:")) { //$NON-NLS-1$ if (log.isBasic()) { logBasic(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.HiveScriptFilename.Error") + qUrl); //$NON-NLS-1$ } } // pull down .q file from VSF FileObject qFile = KettleVFS.getFileObject(buildFilename(qUrl)); File tmpFile = File.createTempFile("customEMR", "q"); //$NON-NLS-1$ tmpFile.deleteOnExit(); FileOutputStream tmpFileOut = new FileOutputStream(tmpFile); IOUtils.copy(qFile.getContent().getInputStream(), tmpFileOut); // Get key name for the script file S3 destination. Key is defined as path name after {bucket}/ String key = GetKeyFromS3Url(stagingDir); if (key == null) { key = qFile.getName().getBaseName(); } else { key += "/" + qFile.getName().getBaseName(); //$NON-NLS-1$ } // delete the previous .q file in S3 try { s3Client.deleteObject(stagingBucketName, key); } catch (Exception ex) { logError(Const.getStackTracker(ex)); } // Put .q file in S3 Log Directory. s3Client.putObject(new PutObjectRequest(stagingBucketName, key, tmpFile)); stagingS3qUrl = stagingS3BucketUrl + "/" + key; //$NON-NLS-1$ } // AWS provides script-runner.jar (in its public bucket), which should be used as a MapReduce jar for Hive EMR // job. jarUrl = "s3://elasticmapreduce/libs/script-runner/script-runner.jar"; //$NON-NLS-1$ RunJobFlowRequest runJobFlowRequest = null; RunJobFlowResult runJobFlowResult = null; if (StringUtil.isEmpty(hadoopJobFlowId)) { // create an EMR job flow, start a step to setup Hive and get the job flow ID. runJobFlowRequest = createJobFlow(); runJobFlowResult = emrClient.runJobFlow(runJobFlowRequest); hadoopJobFlowId = runJobFlowResult.getJobFlowId(); } // Now EMR job flow is ready to accept a Run Hive Script step. // First, prepare a Job Flow ID list. List<String> jobFlowIds = new ArrayList<String>(); jobFlowIds.add(hadoopJobFlowId); // Configure a HadoopJarStep. String args = "s3://elasticmapreduce/libs/hive/hive-script " + "--base-path s3://elasticmapreduce/libs/hive/ --hive-version 0.7 --run-hive-script --args -f " + environmentSubstitute(stagingS3qUrl) + " " + environmentSubstitute(cmdLineArgs); //$NON-NLS-1$ List<StepConfig> steps = ConfigHadoopJarStep(hadoopJobName, jarUrl, args); // Add a Run Hive Script step to the existing job flow. AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest(); addJobFlowStepsRequest.setJobFlowId(hadoopJobFlowId); addJobFlowStepsRequest.setSteps(steps); emrClient.addJobFlowSteps(addJobFlowStepsRequest); // Set a logging interval. String loggingIntervalS = environmentSubstitute(loggingInterval); int logIntv = 10; try { logIntv = Integer.parseInt(loggingIntervalS); } catch (NumberFormatException ex) { logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.LoggingInterval.Error", //$NON-NLS-1$ loggingIntervalS)); } // monitor and log if intended. if (blocking) { try { if (log.isBasic()) { String executionState = "RUNNING"; //$NON-NLS-1$ while (isRunning(executionState)) { DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest(); describeJobFlowsRequest.setJobFlowIds(jobFlowIds); DescribeJobFlowsResult describeJobFlowsResult = emrClient .describeJobFlows(describeJobFlowsRequest); boolean found = false; for (JobFlowDetail jobFlowDetail : describeJobFlowsResult.getJobFlows()) { if (jobFlowDetail.getJobFlowId().equals(hadoopJobFlowId)) { executionState = jobFlowDetail.getExecutionStatusDetail().getState(); found = true; } } if (!found) { break; } logBasic(hadoopJobName + " " + BaseMessages.getString(PKG, //$NON-NLS-1$ "AmazonElasticMapReduceJobExecutor.JobFlowExecutionStatus", hadoopJobFlowId) + executionState); if (parentJob.isStopped()) { if (!alive) { TerminateJobFlowsRequest terminateJobFlowsRequest = new TerminateJobFlowsRequest(); terminateJobFlowsRequest.withJobFlowIds(hadoopJobFlowId); emrClient.terminateJobFlows(terminateJobFlowsRequest); } break; } try { if (isRunning(executionState)) { Thread.sleep(logIntv * 1000); } } catch (InterruptedException ie) { logError(Const.getStackTracker(ie)); } } if ("FAILED".equalsIgnoreCase(executionState)) { //$NON-NLS-1$ result.setStopped(true); result.setNrErrors(1); result.setResult(false); S3Object outObject = s3Client.getObject(stagingBucketName, hadoopJobFlowId + "/steps/1/stdout"); //$NON-NLS-1$ ByteArrayOutputStream outStream = new ByteArrayOutputStream(); IOUtils.copy(outObject.getObjectContent(), outStream); logError(outStream.toString()); S3Object errorObject = s3Client.getObject(stagingBucketName, hadoopJobFlowId + "/steps/1/stderr"); //$NON-NLS-1$ ByteArrayOutputStream errorStream = new ByteArrayOutputStream(); IOUtils.copy(errorObject.getObjectContent(), errorStream); logError(errorStream.toString()); } } } catch (Exception e) { logError(e.getMessage(), e); } } } catch (Throwable t) { t.printStackTrace(); result.setStopped(true); result.setNrErrors(1); result.setResult(false); logError(t.getMessage(), t); } if (appender != null) { LogWriter.getInstance().removeAppender(appender); appender.close(); ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(), parentJob.getJobname(), getName()); result.getResultFiles().put(resultFile.getFile().toString(), resultFile); } return result; } /** * Prepare to create a EMR job flow. * * @return RunJobFlowRequest The object to request an EMR job flow */ public RunJobFlowRequest createJobFlow() { // Create a RunJobFlowRequest object, set a name for the job flow. RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest(); runJobFlowRequest.setName(hadoopJobName); // Set a log URL. String logUrl = stagingDir; if (stagingDir.indexOf("@s3") > 0) { //$NON-NLS-1$ logUrl = S3FileProvider.SCHEME + "://" + stagingDir.substring(stagingDir.indexOf("@s3") + 4); //$NON-NLS-1$ } runJobFlowRequest.setLogUri(logUrl); // Determine the instances for Hadoop cluster. String numInstancesS = environmentSubstitute(numInstances); int numInsts = 2; try { numInsts = Integer.parseInt(numInstancesS); } catch (NumberFormatException e) { logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.InstanceNumber.Error", //$NON-NLS-1$ numInstancesS)); } JobFlowInstancesConfig instances = new JobFlowInstancesConfig(); instances.setInstanceCount(numInsts); instances.setMasterInstanceType(getInstanceType(masterInstanceType)); instances.setSlaveInstanceType(getInstanceType(slaveInstanceType)); instances.setHadoopVersion("0.20"); //$NON-NLS-1$ instances.setKeepJobFlowAliveWhenNoSteps(alive); runJobFlowRequest.setInstances(instances); // Set bootstrap actions. runJobFlowRequest.setBootstrapActions(ConfigBootstrapActions()); // Create an EMR step to setup Hive. String args = "s3://elasticmapreduce/libs/hive/hive-script --base-path s3://elasticmapreduce/libs/hive/ --hive-versions 0.7 --install-hive"; //$NON-NLS-1$ List<StepConfig> steps = ConfigHadoopJarStep("Setup Hive", jarUrl, args); //$NON-NLS-1$ runJobFlowRequest.setSteps(steps); return runJobFlowRequest; } /** * Configure the bootstrap actions, which are executed before Hadoop starts. * * @return List<StepConfig> configuration data for the bootstrap actions * */ public List<BootstrapActionConfig> ConfigBootstrapActions() { List<BootstrapActionConfig> bootstrapActionConfigs = new ArrayList<BootstrapActionConfig>(); if (!StringUtil.isEmpty(bootstrapActions)) { StringTokenizer st = new StringTokenizer(bootstrapActions, " "); //$NON-NLS-1$ String path = ""; String name = ""; List<String> args = null; int actionCount = 0; while (st.hasMoreTokens()) { // Take a key/value pair. String key = st.nextToken(); String value = st.nextToken(); // If an argument is enclosed by double quote, take the string without double quote. if (value.startsWith("\"")) { //$NON-NLS-1$ while (!value.endsWith("\"")) { //$NON-NLS-1$ if (st.hasMoreTokens()) { value += " " + st.nextToken(); //$NON-NLS-1$ } else { if (log.isBasic()) { logBasic(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.BootstrapActionArgument.Error", key, //$NON-NLS-1$ value)); } return null; } } value = value.substring(1, value.length() - 1); } // if (log.isBasic()) logBasic("adding args: " + key + " " + value); if (key.equals("--bootstrap-action")) { //$NON-NLS-1$ if (!Const.isEmpty(path)) { actionCount++; if (name.equals("")) { name = "Bootstrap Action " + actionCount; } // Enter data for one bootstrap action. BootstrapActionConfig bootstrapActionConfig = ConfigureBootstrapAction(path, name, args); bootstrapActionConfigs.add(bootstrapActionConfig); name = ""; args = null; } if (value.startsWith("s3://")) { //$NON-NLS-1$ path = value; } else { // The value for a bootstrap action does not start with "s3://". if (log.isBasic()) { logBasic(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.BootstrapActionPath.Error", key, value)); //$NON-NLS-1$ } return null; } } if (key.equals("--bootstrap-name")) { //$NON-NLS-1$ name = value; } if (key.equals("--args")) { //$NON-NLS-1$ args = ConfigArgs(value, ","); //$NON-NLS-1$ } } if (!Const.isEmpty(path)) { actionCount++; if (name.equals("")) { name = "Bootstrap Action " + actionCount; //$NON-NLS-1$ } // Enter data for the last bootstrap action. BootstrapActionConfig bootstrapActionConfig = ConfigureBootstrapAction(path, name, args); bootstrapActionConfigs.add(bootstrapActionConfig); } } return bootstrapActionConfigs; } /** * Configure a bootstrap action object, given its name, path and arguments. * * @param path * - path for the bootstrap action program in S3 * @param name * - name of the bootstrap action * @param args * - arguments for the bootstrap action * @return configuration data object for one bootstrap action * */ BootstrapActionConfig ConfigureBootstrapAction(String path, String name, List<String> args) { ScriptBootstrapActionConfig scriptBootstrapActionConfig = new ScriptBootstrapActionConfig(); BootstrapActionConfig bootstrapActionConfig = new BootstrapActionConfig(); scriptBootstrapActionConfig.setPath(path); scriptBootstrapActionConfig.setArgs(args); bootstrapActionConfig.setName(name); bootstrapActionConfig.setScriptBootstrapAction(scriptBootstrapActionConfig); return bootstrapActionConfig; } /** * Configure the HadoopJarStep, which is one Hadoop step of an EMR job to be submitted to AWS. * * @param stepName * name of step * @param stagingS3JarUrl * URL for MapReduce jar file * @param args * arguments for MapReduce jar * @return configuration data object for the step * */ public List<StepConfig> ConfigHadoopJarStep(String stepName, String stagingS3JarUrl, String args) { List<String> jarStepArgs = new ArrayList<String>(); jarStepArgs = ConfigArgs(args, " "); //$NON-NLS-1$ HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig(); hadoopJarStep.setJar(stagingS3JarUrl); hadoopJarStep.setArgs(jarStepArgs); StepConfig stepConfig = new StepConfig(); stepConfig.setName(stepName); stepConfig.setHadoopJarStep(hadoopJarStep); if (isAlive()) { // Job flow stays in "WAITING" state if this step fails. stepConfig.setActionOnFailure("CANCEL_AND_WAIT"); //$NON-NLS-1$ } else { // Job flow is terminated if this step fails. stepConfig.setActionOnFailure("TERMINATE_JOB_FLOW"); //$NON-NLS-1$ } List<StepConfig> steps = new ArrayList<StepConfig>(); steps.add(stepConfig); return steps; } /** * Given a unparsed arguments and a separator, print log for each argument and return a list of arguments. * * @param args * - unparsed arguments * @param separator * - separates one argument from another. * @return A list of arguments */ public List<String> ConfigArgs(String args, String separator) { List<String> argList = new ArrayList<String>(); if (!StringUtil.isEmpty(args)) { StringTokenizer st = new StringTokenizer(args, separator); while (st.hasMoreTokens()) { String token = st.nextToken(); if (log.isBasic()) { logBasic(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.AddingArgument") //$NON-NLS-1$ + token); } argList.add(token); } } return argList; } /** * Get an instance type. * * @param unparsedInstanceType * - unparsed instance type * @return A string for the instance type */ public static String getInstanceType(String unparsedInstanceType) { return unparsedInstanceType.substring(unparsedInstanceType.lastIndexOf("[") + 1, //$NON-NLS-1$ unparsedInstanceType.lastIndexOf("]")); } /** * Determine if the job flow is in a running state. * * @param state * - state of job low * @return true if it is not in COMPLETED or FAILED or TERMINATED, and false otherwise. */ public static boolean isRunning(String state) { // * <b>Pattern: </b>COMPLETED|FAILED|TERMINATED|RUNNING|SHUTTING_DOWN|STARTING|WAITING|BOOTSTRAPPING<br/> if ("COMPLETED".equalsIgnoreCase(state)) { //$NON-NLS-1$ return false; } if ("FAILED".equalsIgnoreCase(state)) { //$NON-NLS-1$ return false; } if ("TERMINATED".equalsIgnoreCase(state)) { //$NON-NLS-1$ return false; } return true; } /** * Load attributes */ public void loadXML(Node entrynode, List<DatabaseMeta> databases, List<SlaveServer> slaveServers, Repository rep) throws KettleXMLException { super.loadXML(entrynode, databases, slaveServers); hadoopJobName = XMLHandler.getTagValue(entrynode, "hadoop_job_name"); //$NON-NLS-1$ hadoopJobFlowId = XMLHandler.getTagValue(entrynode, "hadoop_job_flow_id"); //$NON-NLS-1$ qUrl = XMLHandler.getTagValue(entrynode, "q_url"); //$NON-NLS-1$ accessKey = Encr.decryptPasswordOptionallyEncrypted(XMLHandler.getTagValue(entrynode, "access_key")); //$NON-NLS-1$ secretKey = Encr.decryptPasswordOptionallyEncrypted(XMLHandler.getTagValue(entrynode, "secret_key")); //$NON-NLS-1$ bootstrapActions = XMLHandler.getTagValue(entrynode, "bootstrap_actions"); //$NON-NLS-1$ stagingDir = XMLHandler.getTagValue(entrynode, "staging_dir"); //$NON-NLS-1$ numInstances = XMLHandler.getTagValue(entrynode, "num_instances"); //$NON-NLS-1$ masterInstanceType = XMLHandler.getTagValue(entrynode, "master_instance_type"); //$NON-NLS-1$ slaveInstanceType = XMLHandler.getTagValue(entrynode, "slave_instance_type"); //$NON-NLS-1$ cmdLineArgs = XMLHandler.getTagValue(entrynode, "command_line_args"); //$NON-NLS-1$ alive = "Y".equalsIgnoreCase(XMLHandler.getTagValue(entrynode, "alive")); //$NON-NLS-1$ blocking = "Y".equalsIgnoreCase(XMLHandler.getTagValue(entrynode, "blocking")); //$NON-NLS-1$ loggingInterval = XMLHandler.getTagValue(entrynode, "logging_interval"); //$NON-NLS-1$ } /** * Get attributes */ public String getXML() { StringBuffer retval = new StringBuffer(1024); retval.append(super.getXML()); retval.append(" ").append(XMLHandler.addTagValue("hadoop_job_name", hadoopJobName)); //$NON-NLS-1$ retval.append(" ").append(XMLHandler.addTagValue("hadoop_job_flow_id", hadoopJobFlowId)); //$NON-NLS-1$ retval.append(" ").append(XMLHandler.addTagValue("q_url", qUrl)); //$NON-NLS-1$ retval.append(" ") //$NON-NLS-1$ .append(XMLHandler.addTagValue("access_key", Encr.encryptPasswordIfNotUsingVariables(accessKey))); retval.append(" ") //$NON-NLS-1$ .append(XMLHandler.addTagValue("secret_key", Encr.encryptPasswordIfNotUsingVariables(secretKey))); retval.append(" ").append(XMLHandler.addTagValue("bootstrap_actions", bootstrapActions)); //$NON-NLS-1$ retval.append(" ").append(XMLHandler.addTagValue("staging_dir", stagingDir)); //$NON-NLS-1$ retval.append(" ").append(XMLHandler.addTagValue("num_instances", numInstances)); //$NON-NLS-1$ retval.append(" ").append(XMLHandler.addTagValue("master_instance_type", masterInstanceType)); //$NON-NLS-1$ retval.append(" ").append(XMLHandler.addTagValue("slave_instance_type", slaveInstanceType)); //$NON-NLS-1$ retval.append(" ").append(XMLHandler.addTagValue("command_line_args", cmdLineArgs)); //$NON-NLS-1$ retval.append(" ").append(XMLHandler.addTagValue("alive", alive)); //$NON-NLS-1$ retval.append(" ").append(XMLHandler.addTagValue("blocking", blocking)); //$NON-NLS-1$ retval.append(" ").append(XMLHandler.addTagValue("logging_interval", loggingInterval)); //$NON-NLS-1$ retval.append(" ").append(XMLHandler.addTagValue("hadoop_job_name", hadoopJobName)); //$NON-NLS-1$ return retval.toString(); } /** * Load attributes from a repository */ public void loadRep(Repository rep, ObjectId id_jobentry, List<DatabaseMeta> databases, List<SlaveServer> slaveServers) throws KettleException { if (rep != null) { super.loadRep(rep, id_jobentry, databases, slaveServers); setHadoopJobName(rep.getJobEntryAttributeString(id_jobentry, "hadoop_job_name")); //$NON-NLS-1$ setHadoopJobFlowId(rep.getJobEntryAttributeString(id_jobentry, "hadoop_job_flow_id")); //$NON-NLS-1$ setQUrl(rep.getJobEntryAttributeString(id_jobentry, "q_url")); //$NON-NLS-1$ setAccessKey(Encr .decryptPasswordOptionallyEncrypted(rep.getJobEntryAttributeString(id_jobentry, "access_key"))); //$NON-NLS-1$ setSecretKey(Encr .decryptPasswordOptionallyEncrypted(rep.getJobEntryAttributeString(id_jobentry, "secret_key"))); //$NON-NLS-1$ setBootstrapActions(rep.getJobEntryAttributeString(id_jobentry, "bootstrap_actions")); //$NON-NLS-1$ setStagingDir(rep.getJobEntryAttributeString(id_jobentry, "staging_dir")); //$NON-NLS-1$ setNumInstances(rep.getJobEntryAttributeString(id_jobentry, "num_instances")); //$NON-NLS-1$ setMasterInstanceType(rep.getJobEntryAttributeString(id_jobentry, "master_instance_type")); //$NON-NLS-1$ setSlaveInstanceType(rep.getJobEntryAttributeString(id_jobentry, "slave_instance_type")); //$NON-NLS-1$ setCmdLineArgs(rep.getJobEntryAttributeString(id_jobentry, "command_line_args")); //$NON-NLS-1$ setAlive(rep.getJobEntryAttributeBoolean(id_jobentry, "alive")); //$NON-NLS-1$ setBlocking(rep.getJobEntryAttributeBoolean(id_jobentry, "blocking")); //$NON-NLS-1$ setLoggingInterval(rep.getJobEntryAttributeString(id_jobentry, "logging_interval")); //$NON-NLS-1$ } else { throw new KettleException( BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.LoadFromRepository.Error")); //$NON-NLS-1$ } } /** * Save attributes to a repository */ public void saveRep(Repository rep, ObjectId id_job) throws KettleException { if (rep != null) { super.saveRep(rep, id_job); rep.saveJobEntryAttribute(id_job, getObjectId(), "hadoop_job_name", hadoopJobName); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "hadoop_job_flow_id", hadoopJobFlowId); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "q_url", qUrl); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "secret_key", //$NON-NLS-1$ Encr.encryptPasswordIfNotUsingVariables(secretKey)); rep.saveJobEntryAttribute(id_job, getObjectId(), "access_key", //$NON-NLS-1$ Encr.encryptPasswordIfNotUsingVariables(accessKey)); rep.saveJobEntryAttribute(id_job, getObjectId(), "bootstrap_actions", bootstrapActions); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "staging_dir", stagingDir); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "num_instances", numInstances); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "master_instance_type", masterInstanceType); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "slave_instance_type", slaveInstanceType); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "command_line_args", cmdLineArgs); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "alive", alive); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "blocking", blocking); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "logging_interval", loggingInterval); //$NON-NLS-1$ } else { throw new KettleException( BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.SaveToRepository.Error")); //$NON-NLS-1$ } } /** * Build S3 URL. Replace "/" and "\" with ASCII equivalents within the access/secret keys, otherwise VFS will have * trouble in parsing the filename. * * @param filename * - S3 URL of a file with access/secret keys in it * @return S3 URL with "/" and "\" with ASCII equivalents within the access/secret keys */ public String buildFilename(String filename) { filename = environmentSubstitute(filename); if (filename.startsWith(S3FileProvider.SCHEME)) { String authPart = filename.substring(S3FileProvider.SCHEME.length() + 3, filename.indexOf("@s3")) //$NON-NLS-1$ .replaceAll("\\+", "%2B").replaceAll("/", "%2F"); filename = S3FileProvider.SCHEME + "://" + authPart + "@s3" //$NON-NLS-1$ + filename.substring(filename.indexOf("@s3") + 3); } return filename; } /** * Build full S3 URL by inserting the access/secret keys. Replace "/" and "\" with ASCII equivalents within the * access/secret keys, otherwise VFS will have trouble in parsing the filename. */ public String buildFullS3Url(String filename) { if (filename.startsWith(S3FileProvider.SCHEME + "://") //$NON-NLS-1$ && !(filename.startsWith(S3FileProvider.SCHEME + ":///"))) { String authPart = accessKey + ":" + secretKey; //$NON-NLS-1$ authPart = authPart.replaceAll("\\+", "%2B").replaceAll("/", "%2F"); //$NON-NLS-1$ filename = S3FileProvider.SCHEME + "://" + authPart + "@s3" + filename.substring(5); //$NON-NLS-1$ } return filename; } /** * Get a bucket name from S3 URL. * * @param filename * - S3 URL with or without access/secret keys * @return a string for bucket name */ public String GetBucketName(String filename) { int i = filename.indexOf("@s3/") + 4; // URL with access/secret keys //$NON-NLS-1$ if (i > 4) { int j = filename.indexOf("/", i); //$NON-NLS-1$ if (i < j) { return filename.substring(i, j); // URL ends with file or folder } else { return filename.substring(i); // URL ends with bucket name itself } } // URL without access/secret keys i = filename.indexOf("/", 5); //$NON-NLS-1$ if (i > 5) { return filename.substring(5, i); // URL ends with file or folder } else { return filename.substring(5); // URL ends with bucket name itself } } /** * Get a file key from full S3 URL, which is a string after "{bucketname}/". * * @param filename * - S3 URL with access/secret keys * @return key, which is a string after "{bucketname}/" */ public String GetKeyFromS3Url(String filename) { int i = filename.indexOf("@s3/") + 4; //$NON-NLS-1$ if (i > 4) { filename = filename.substring(filename.indexOf("/", i) + 1); //$NON-NLS-1$ } else { filename = filename.substring(filename.indexOf("/", 5) + 1); //$NON-NLS-1$ } return filename; } public boolean evaluates() { return true; } public boolean isUnconditional() { return true; } /** * Get the class name for the dialog box of this plug-in. */ @Override public String getDialogClassName() { String className = getClass().getCanonicalName(); className = className.replaceFirst("\\.job\\.", ".ui."); //$NON-NLS-1$ className += "Dialog"; //$NON-NLS-1$ return className; } }