Example usage for org.apache.hadoop.mapred JobConf getCredentials

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getCredentials.

Prototype

public Credentials getCredentials()

Source Link

Document

Get credentials for the job.

Usage

From source file:org.apache.sqoop.mapreduce.ParquetJob.java

License:Apache License

/**
 * Configure the import job. The import process will use a Kite dataset to
 * write data records into Parquet format internally. The input key class is
 * {@link org.apache.sqoop.lib.SqoopRecord}. The output key is
 * {@link org.apache.avro.generic.GenericRecord}.
 *///from w  w w  . ja  va 2s .c om
public static void configureImportJob(JobConf conf, Schema schema, String uri, WriteMode writeMode)
        throws IOException {
    Dataset dataset;

    // Add hive delegation token only if we don't already have one.
    if (uri.startsWith("dataset:hive")) {
        Configuration hiveConf = HiveConfig.getHiveConf(conf);
        if (isSecureMetastore(hiveConf)) {
            // Copy hive configs to job config
            HiveConfig.addHiveConfigs(hiveConf, conf);

            if (conf.getCredentials().getToken(new Text(HIVE_METASTORE_TOKEN_ALIAS)) == null) {
                addHiveDelegationToken(conf);
            }
        }
    }

    if (Datasets.exists(uri)) {
        if (WriteMode.DEFAULT.equals(writeMode)) {
            throw new IOException("Destination exists! " + uri);
        }

        dataset = Datasets.load(uri);
        Schema writtenWith = dataset.getDescriptor().getSchema();
        if (!SchemaValidationUtil.canRead(writtenWith, schema)) {
            throw new IOException(String.format("Expected schema: %s%nActual schema: %s", writtenWith, schema));
        }
    } else {
        dataset = createDataset(schema, getCompressionType(conf), uri);
    }
    conf.set(CONF_AVRO_SCHEMA, schema.toString());

    DatasetKeyOutputFormat.ConfigBuilder builder = DatasetKeyOutputFormat.configure(conf);
    if (WriteMode.OVERWRITE.equals(writeMode)) {
        builder.overwrite(dataset);
    } else if (WriteMode.APPEND.equals(writeMode)) {
        builder.appendTo(dataset);
    } else {
        builder.writeTo(dataset);
    }
}

From source file:org.apache.sqoop.mapreduce.ParquetJob.java

License:Apache License

/**
 * Add hive delegation token to credentials store.
 * @param conf//w w w.ja  v a 2s. co m
 */
private static void addHiveDelegationToken(JobConf conf) {
    // Need to use reflection since there's no compile time dependency on the client libs.
    Class<?> HiveConfClass;
    Class<?> HiveMetaStoreClientClass;

    try {
        HiveMetaStoreClientClass = Class.forName(HIVE_METASTORE_CLIENT_CLASS);
    } catch (ClassNotFoundException ex) {
        LOG.error("Could not load " + HIVE_METASTORE_CLIENT_CLASS + " when adding hive delegation token. "
                + "Make sure HIVE_CONF_DIR is set correctly.", ex);
        throw new RuntimeException("Couldn't fetch delegation token.", ex);
    }

    try {
        HiveConfClass = Class.forName(HiveConfig.HIVE_CONF_CLASS);
    } catch (ClassNotFoundException ex) {
        LOG.error("Could not load " + HiveConfig.HIVE_CONF_CLASS + " when adding hive delegation token."
                + " Make sure HIVE_CONF_DIR is set correctly.", ex);
        throw new RuntimeException("Couldn't fetch delegation token.", ex);
    }

    try {
        Object client = HiveMetaStoreClientClass.getConstructor(HiveConfClass).newInstance(HiveConfClass
                .getConstructor(Configuration.class, Class.class).newInstance(conf, Configuration.class));
        // getDelegationToken(String kerberosPrincial)
        Method getDelegationTokenMethod = HiveMetaStoreClientClass.getMethod("getDelegationToken",
                String.class);
        Object tokenStringForm = getDelegationTokenMethod.invoke(client,
                UserGroupInformation.getLoginUser().getShortUserName());

        // Load token
        Token<DelegationTokenIdentifier> metastoreToken = new Token<DelegationTokenIdentifier>();
        metastoreToken.decodeFromUrlString(tokenStringForm.toString());
        conf.getCredentials().addToken(new Text(HIVE_METASTORE_TOKEN_ALIAS), metastoreToken);

        LOG.debug("Successfully fetched hive metastore delegation token. " + metastoreToken);
    } catch (Exception ex) {
        LOG.error("Couldn't fetch delegation token.", ex);
        throw new RuntimeException("Couldn't fetch delegation token.", ex);
    }
}

From source file:org.apache.tez.mapreduce.common.MRInputAMSplitGenerator.java

License:Apache License

@Override
public List<Event> initialize() throws Exception {
    Stopwatch sw = null;//from   www.  ja va 2  s  .  c  o  m
    if (LOG.isDebugEnabled()) {
        sw = new Stopwatch().start();
    }
    MRInputUserPayloadProto userPayloadProto = MRInputHelpers
            .parseMRInputPayload(getContext().getInputUserPayload());
    if (LOG.isDebugEnabled()) {
        sw.stop();
        LOG.debug("Time to parse MRInput payload into prot: " + sw.elapsedMillis());
    }
    if (LOG.isDebugEnabled()) {
        sw.reset().start();
    }
    Configuration conf = TezUtils.createConfFromByteString(userPayloadProto.getConfigurationBytes());

    sendSerializedEvents = conf.getBoolean(MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD,
            MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD_DEFAULT);
    LOG.info("Emitting serialized splits: " + sendSerializedEvents);
    if (LOG.isDebugEnabled()) {
        sw.stop();
        LOG.debug("Time converting ByteString to configuration: " + sw.elapsedMillis());
    }

    if (LOG.isDebugEnabled()) {
        sw.reset().start();
    }

    int totalResource = getContext().getTotalAvailableResource().getMemory();
    int taskResource = getContext().getVertexTaskResource().getMemory();
    float waves = conf.getFloat(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_WAVES,
            TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_WAVES_DEFAULT);

    int numTasks = (int) ((totalResource * waves) / taskResource);

    LOG.info("Input " + getContext().getInputName() + " asking for " + numTasks + " tasks. Headroom: "
            + totalResource + " Task Resource: " + taskResource + " waves: " + waves);

    // Read all credentials into the credentials instance stored in JobConf.
    JobConf jobConf = new JobConf(conf);
    jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials());

    InputSplitInfoMem inputSplitInfo = null;
    boolean groupSplits = userPayloadProto.getGroupingEnabled();
    if (groupSplits) {
        LOG.info("Grouping input splits");
        inputSplitInfo = MRInputHelpers.generateInputSplitsToMem(jobConf, true, numTasks);
    } else {
        inputSplitInfo = MRInputHelpers.generateInputSplitsToMem(jobConf, false, 0);
    }
    if (LOG.isDebugEnabled()) {
        sw.stop();
        LOG.debug("Time to create splits to mem: " + sw.elapsedMillis());
    }

    List<Event> events = Lists.newArrayListWithCapacity(inputSplitInfo.getNumTasks() + 1);

    InputConfigureVertexTasksEvent configureVertexEvent = InputConfigureVertexTasksEvent.create(
            inputSplitInfo.getNumTasks(), VertexLocationHint.create(inputSplitInfo.getTaskLocationHints()),
            InputSpecUpdate.getDefaultSinglePhysicalInputSpecUpdate());
    events.add(configureVertexEvent);

    if (sendSerializedEvents) {
        MRSplitsProto splitsProto = inputSplitInfo.getSplitsProto();
        int count = 0;
        for (MRSplitProto mrSplit : splitsProto.getSplitsList()) {
            // Unnecessary array copy, can be avoided by using ByteBuffer instead of a raw array.
            InputDataInformationEvent diEvent = InputDataInformationEvent.createWithSerializedPayload(count++,
                    mrSplit.toByteString().asReadOnlyByteBuffer());
            events.add(diEvent);
        }
    } else {
        int count = 0;
        if (inputSplitInfo.holdsNewFormatSplits()) {
            for (org.apache.hadoop.mapreduce.InputSplit split : inputSplitInfo.getNewFormatSplits()) {
                InputDataInformationEvent diEvent = InputDataInformationEvent.createWithObjectPayload(count++,
                        split);
                events.add(diEvent);
            }
        } else {
            for (org.apache.hadoop.mapred.InputSplit split : inputSplitInfo.getOldFormatSplits()) {
                InputDataInformationEvent diEvent = InputDataInformationEvent.createWithObjectPayload(count++,
                        split);
                events.add(diEvent);
            }
        }
    }

    return events;
}

From source file:org.apache.tez.mapreduce.hadoop.MRInputHelpers.java

License:Apache License

/**
 * Generates Input splits and stores them in a {@link org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitsProto} instance.
 *
 * Returns an instance of {@link InputSplitInfoMem}
 *
 * With grouping enabled, the eventual configuration used by the tasks, will have
 * the user-specified InputFormat replaced by either {@link org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat}
 * or {@link org.apache.hadoop.mapreduce.split.TezGroupedSplitsInputFormat}
 *
 * @param conf/*from  ww  w.j  av  a 2  s  . c  om*/
 *          an instance of Configuration which is used to determine whether
 *          the mapred of mapreduce API is being used. This Configuration
 *          instance should also contain adequate information to be able to
 *          generate splits - like the InputFormat being used and related
 *          configuration.
 * @param groupSplits whether to group the splits or not
 * @param targetTasks the number of target tasks if grouping is enabled. Specify as 0 otherwise.
 * @return an instance of {@link InputSplitInfoMem} which supports a subset of
 *         the APIs defined on {@link InputSplitInfo}
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws InterruptedException
 */
@InterfaceStability.Unstable
public static InputSplitInfoMem generateInputSplitsToMem(Configuration conf, boolean groupSplits,
        int targetTasks) throws IOException, ClassNotFoundException, InterruptedException {

    InputSplitInfoMem splitInfoMem = null;
    JobConf jobConf = new JobConf(conf);
    if (jobConf.getUseNewMapper()) {
        LOG.info("Generating mapreduce api input splits");
        Job job = Job.getInstance(conf);
        org.apache.hadoop.mapreduce.InputSplit[] splits = generateNewSplits(job, groupSplits, targetTasks);
        splitInfoMem = new InputSplitInfoMem(splits, createTaskLocationHintsFromSplits(splits), splits.length,
                job.getCredentials(), job.getConfiguration());
    } else {
        LOG.info("Generating mapred api input splits");
        org.apache.hadoop.mapred.InputSplit[] splits = generateOldSplits(jobConf, groupSplits, targetTasks);
        splitInfoMem = new InputSplitInfoMem(splits, createTaskLocationHintsFromSplits(splits), splits.length,
                jobConf.getCredentials(), jobConf);
    }
    LOG.info("NumSplits: " + splitInfoMem.getNumTasks() + ", SerializedSize: "
            + splitInfoMem.getSplitsProto().getSerializedSize());
    return splitInfoMem;
}

From source file:org.apache.tez.mapreduce.hadoop.MRInputHelpers.java

License:Apache License

/**
 * Generate old-api mapred InputFormat splits
 * @param jobConf JobConf required by InputFormat class
 * @param inputSplitDir Directory in which to generate splits information
 *
 * @return InputSplitInfo containing the split files' information and the
 * number of splits generated to be used to determining parallelism of
 * the map stage.//from w  w  w.j a  v a  2 s .  co m
 *
 * @throws IOException
 */
private static InputSplitInfoDisk writeOldSplits(JobConf jobConf, Path inputSplitDir) throws IOException {

    org.apache.hadoop.mapred.InputSplit[] splits = generateOldSplits(jobConf, false, 0);

    JobSplitWriter.createSplitFiles(inputSplitDir, jobConf, inputSplitDir.getFileSystem(jobConf), splits);

    List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splits.length);
    for (int i = 0; i < splits.length; ++i) {
        locationHints.add(TaskLocationHint
                .createTaskLocationHint(new HashSet<String>(Arrays.asList(splits[i].getLocations())), null));
    }

    return new InputSplitInfoDisk(JobSubmissionFiles.getJobSplitFile(inputSplitDir),
            JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir), splits.length, locationHints,
            jobConf.getCredentials());
}

From source file:org.cloudera.RMCredentialsProvider.java

License:Apache License

@Override
public void addtoJobConf(JobConf jobconf, CredentialsProperties props, Context context) throws Exception {
    try {// w ww .  j  a  va 2  s .  com

        Token<DelegationTokenIdentifier> abctoken = new Token<DelegationTokenIdentifier>();
        jobconf.getCredentials().addToken(new Text("ABC Token"), abctoken);
        XLog.getLog(getClass()).debug("Added the ABC token in job conf");

    } catch (Exception e) {
        XLog.getLog(getClass()).warn("Exception in addtoJobConf", e);
        throw e;
    }
}

From source file:org.jd.copier.mapred.DistCp.java

License:Apache License

/**
 * Initialize DFSCopyFileMapper specific job-configuration.
 * @param conf : The dfs/mapred configuration.
 * @param jobConf : The handle to the jobConf object to be initialized.
 * @param args Arguments/*ww  w.j  a  va2 s .  c o m*/
 * @return true if it is necessary to launch a job.
 */
private static boolean setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException {
    jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString());

    //set boolean values
    final boolean update = args.flags.contains(Options.UPDATE);
    final boolean skipCRCCheck = args.flags.contains(Options.SKIPCRC);
    final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE);
    jobConf.setBoolean(Options.UPDATE.propertyname, update);
    jobConf.setBoolean(Options.SKIPCRC.propertyname, skipCRCCheck);
    jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite);
    jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname,
            args.flags.contains(Options.IGNORE_READ_FAILURES));
    jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS));

    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobConf);
    Path stagingArea;
    try {
        stagingArea = JobSubmissionFiles.getStagingDir(jClient, conf);
    } catch (InterruptedException e) {
        throw new IOException(e);
    }

    Path jobDirectory = new Path(stagingArea + NAME + "_" + randomId);
    FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION);
    FileSystem.mkdirs(jClient.getFs(), jobDirectory, mapredSysPerms);
    jobConf.set(JOB_DIR_LABEL, jobDirectory.toString());

    long maxBytesPerMap = conf.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP);

    FileSystem dstfs = args.dst.getFileSystem(conf);

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), new Path[] { args.dst }, conf);

    boolean dstExists = dstfs.exists(args.dst);
    boolean dstIsDir = false;
    if (dstExists) {
        dstIsDir = dstfs.getFileStatus(args.dst).isDir();
    }

    // default logPath
    Path logPath = args.log;
    if (logPath == null) {
        String filename = "_distcp_logs_" + randomId;
        if (!dstExists || !dstIsDir) {
            Path parent = args.dst.getParent();
            if (null == parent) {
                // If dst is '/' on S3, it might not exist yet, but dst.getParent()
                // will return null. In this case, use '/' as its own parent to prevent
                // NPE errors below.
                parent = args.dst;
            }
            if (!dstfs.exists(parent)) {
                dstfs.mkdirs(parent);
            }
            logPath = new Path(parent, filename);
        } else {
            logPath = new Path(args.dst, filename);
        }
    }
    FileOutputFormat.setOutputPath(jobConf, logPath);

    // create src list, dst list
    FileSystem jobfs = jobDirectory.getFileSystem(jobConf);

    Path srcfilelist = new Path(jobDirectory, "_distcp_src_files");
    jobConf.set(SRC_LIST_LABEL, srcfilelist.toString());
    SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files");
    SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class,
            Text.class, SequenceFile.CompressionType.NONE);

    Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs");
    jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString());
    SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    // handle the case where the destination directory doesn't exist
    // and we've only a single src directory OR we're updating/overwriting
    // the contents of the destination directory.
    final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite;
    int srcCount = 0, cnsyncf = 0, dirsyn = 0;
    long fileCount = 0L, byteCount = 0L, cbsyncs = 0L;
    try {
        for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) {
            final Path src = srcItr.next();
            FileSystem srcfs = src.getFileSystem(conf);
            FileStatus srcfilestat = srcfs.getFileStatus(src);
            Path root = special && srcfilestat.isDir() ? src : src.getParent();
            if (srcfilestat.isDir()) {
                ++srcCount;
            }

            Stack<FileStatus> pathstack = new Stack<FileStatus>();
            for (pathstack.push(srcfilestat); !pathstack.empty();) {
                FileStatus cur = pathstack.pop();
                FileStatus[] children = srcfs.listStatus(cur.getPath());
                for (int i = 0; i < children.length; i++) {
                    boolean skipfile = false;
                    final FileStatus child = children[i];
                    final String dst = makeRelative(root, child.getPath());
                    ++srcCount;

                    if (child.isDir()) {
                        pathstack.push(child);
                    } else {
                        //skip file if the src and the dst files are the same.
                        skipfile = update
                                && sameFile(srcfs, child, dstfs, new Path(args.dst, dst), skipCRCCheck);
                        //skip file if it exceed file limit or size limit
                        skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit;

                        if (!skipfile) {
                            ++fileCount;
                            byteCount += child.getLen();

                            if (LOG.isTraceEnabled()) {
                                LOG.trace("adding file " + child.getPath());
                            }

                            ++cnsyncf;
                            cbsyncs += child.getLen();
                            if (cnsyncf > SYNC_FILE_MAX || cbsyncs > maxBytesPerMap) {
                                src_writer.sync();
                                dst_writer.sync();
                                cnsyncf = 0;
                                cbsyncs = 0L;
                            }
                        }
                    }

                    if (!skipfile) {
                        src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()),
                                new FilePair(child, dst));
                    }

                    dst_writer.append(new Text(dst), new Text(child.getPath().toString()));
                }

                if (cur.isDir()) {
                    String dst = makeRelative(root, cur.getPath());
                    dir_writer.append(new Text(dst), new FilePair(cur, dst));
                    if (++dirsyn > SYNC_FILE_MAX) {
                        dirsyn = 0;
                        dir_writer.sync();
                    }
                }
            }
        }
    } finally {
        checkAndClose(src_writer);
        checkAndClose(dst_writer);
        checkAndClose(dir_writer);
    }

    FileStatus dststatus = null;
    try {
        dststatus = dstfs.getFileStatus(args.dst);
    } catch (FileNotFoundException fnfe) {
        LOG.info(args.dst + " does not exist.");
    }

    // create dest path dir if copying > 1 file
    if (dststatus == null) {
        if (srcCount > 1 && !dstfs.mkdirs(args.dst)) {
            throw new IOException("Failed to create" + args.dst);
        }
    }

    final Path sorted = new Path(jobDirectory, "_distcp_sorted");
    checkDuplication(jobfs, dstfilelist, sorted, conf);

    if (dststatus != null && args.flags.contains(Options.DELETE)) {
        deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf);
    }

    Path tmpDir = new Path(
            (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst,
            "_distcp_tmp_" + randomId);
    jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString());

    // Explicitly create the tmpDir to ensure that it can be cleaned
    // up by fullyDelete() later.
    tmpDir.getFileSystem(conf).mkdirs(tmpDir);

    LOG.info("sourcePathsCount=" + srcCount);
    LOG.info("filesToCopyCount=" + fileCount);
    LOG.info("bytesToCopyCount=" + StringUtils.humanReadableInt(byteCount));
    jobConf.setInt(SRC_COUNT_LABEL, srcCount);
    jobConf.setLong(TOTAL_SIZE_LABEL, byteCount);
    setMapCount(byteCount, jobConf);
    return fileCount > 0;
}

From source file:org.kitesdk.tools.TransformTask.java

License:Apache License

private static Configuration addHiveDelegationToken(Configuration conf) {
    // this uses a JobConf because we don't have access to the MR Job
    JobConf jobConf = new JobConf(conf);

    try {/*from   ww w . ja v  a  2 s  .  c  om*/
        if (conf.getBoolean(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL.varname, false)) {
            // Need to set up delegation token auth
            HiveMetaStoreClient metaStoreClient = new HiveMetaStoreClient(new HiveConf());
            String hiveTokenStr = metaStoreClient.getDelegationToken("yarn");
            Token<DelegationTokenIdentifier> hiveToken = new Token<DelegationTokenIdentifier>();
            hiveToken.decodeFromUrlString(hiveTokenStr);
            jobConf.getCredentials().addToken(HIVE_MS_TOKEN_ALIAS, hiveToken);
        }

        return jobConf;

    } catch (IOException e) {
        throw new RuntimeException("Unable to obtain Hive delegation token", e);
    } catch (TException e) {
        throw new RuntimeException("Unable to obtain Hive delegation token", e);
    }
}

From source file:org.pentaho.big.data.impl.shim.mapreduce.PentahoMapReduceJobBuilderImpl.java

License:Apache License

@Override
protected MapReduceJobAdvanced submit(Configuration conf) throws IOException {
    cleanOutputPath(conf);//w  w w. j  ava 2s.  c o  m

    FileSystem fs = hadoopShim.getFileSystem(conf);

    if (Boolean.parseBoolean(getProperty(conf, pmrProperties, PENTAHO_MAPREDUCE_PROPERTY_USE_DISTRIBUTED_CACHE,
            Boolean.toString(true)))) {
        String installPath = getProperty(conf, pmrProperties,
                PENTAHO_MAPREDUCE_PROPERTY_KETTLE_HDFS_INSTALL_DIR, null);
        String installId = getProperty(conf, pmrProperties, PENTAHO_MAPREDUCE_PROPERTY_KETTLE_INSTALLATION_ID,
                null);
        try {
            if (Utils.isEmpty(installPath)) {
                throw new IllegalArgumentException(BaseMessages.getString(PKG,
                        JOB_ENTRY_HADOOP_TRANS_JOB_EXECUTOR_KETTLE_HDFS_INSTALL_DIR_MISSING));
            }
            if (Utils.isEmpty(installId)) {
                installId = this.installId;
            }
            if (!installPath.endsWith(Const.FILE_SEPARATOR)) {
                installPath += Const.FILE_SEPARATOR;
            }

            Path kettleEnvInstallDir = fs.asPath(installPath, installId);
            FileObject pmrLibArchive = pmrArchiveGetter.getPmrArchive(conf);

            // Make sure the version we're attempting to use is installed
            if (hadoopShim.getDistributedCacheUtil().isKettleEnvironmentInstalledAt(fs, kettleEnvInstallDir)) {
                log.logDetailed(BaseMessages.getString(PKG,
                        "JobEntryHadoopTransJobExecutor.UsingKettleInstallationFrom",
                        kettleEnvInstallDir.toUri().getPath()));
            } else {
                // Load additional plugin folders as requested
                String additionalPluginNames = getProperty(conf, pmrProperties,
                        PENTAHO_MAPREDUCE_PROPERTY_ADDITIONAL_PLUGINS, null);
                if (pmrLibArchive == null) {
                    throw new KettleException(BaseMessages.getString(PKG,
                            JOB_ENTRY_HADOOP_TRANS_JOB_EXECUTOR_UNABLE_TO_LOCATE_ARCHIVE,
                            pmrArchiveGetter.getVfsFilename(conf)));
                }

                log.logBasic(BaseMessages.getString(PKG, "JobEntryHadoopTransJobExecutor.InstallingKettleAt",
                        kettleEnvInstallDir));

                FileObject bigDataPluginFolder = vfsPluginDirectory;
                hadoopShim.getDistributedCacheUtil().installKettleEnvironment(pmrLibArchive, fs,
                        kettleEnvInstallDir, bigDataPluginFolder, additionalPluginNames);

                log.logBasic(BaseMessages.getString(PKG,
                        "JobEntryHadoopTransJobExecutor.InstallationOfKettleSuccessful", kettleEnvInstallDir));
            }

            stageMetaStoreForHadoop(conf, fs, installPath);

            if (!hadoopShim.getDistributedCacheUtil().isKettleEnvironmentInstalledAt(fs, kettleEnvInstallDir)) {
                throw new KettleException(BaseMessages.getString(PKG,
                        JOB_ENTRY_HADOOP_TRANS_JOB_EXECUTOR_KETTLE_INSTALLATION_MISSING_FROM,
                        kettleEnvInstallDir.toUri().getPath()));
            }

            log.logBasic(BaseMessages.getString(PKG,
                    JOB_ENTRY_HADOOP_TRANS_JOB_EXECUTOR_CONFIGURING_JOB_WITH_KETTLE_AT,
                    kettleEnvInstallDir.toUri().getPath()));

            String mapreduceClasspath = conf.get(MAPREDUCE_APPLICATION_CLASSPATH,
                    DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH);
            conf.set(MAPREDUCE_APPLICATION_CLASSPATH, CLASSES + mapreduceClasspath);

            hadoopShim.getDistributedCacheUtil().configureWithKettleEnvironment(conf, fs, kettleEnvInstallDir);
            log.logBasic(MAPREDUCE_APPLICATION_CLASSPATH + ": " + conf.get(MAPREDUCE_APPLICATION_CLASSPATH));
        } catch (Exception ex) {
            throw new IOException(BaseMessages.getString(PKG,
                    JOB_ENTRY_HADOOP_TRANS_JOB_EXECUTOR_INSTALLATION_OF_KETTLE_FAILED), ex);
        }
    }
    JobConf jobConf = conf.getAsDelegateConf(JobConf.class);
    jobConf.getCredentials().addAll(UserGroupInformation.getCurrentUser().getCredentials());
    return super.submit(conf);
}

From source file:org.pentaho.big.data.impl.shim.mapreduce.PentahoMapReduceJobBuilderImplTest.java

License:Apache License

@Test
public void testSubmitNoDistributedCache() throws IOException {
    Configuration conf = mock(Configuration.class);
    JobConf jobConf = mock(JobConf.class);
    when(jobConf.getCredentials()).thenReturn(new Credentials());
    when(conf.getAsDelegateConf(any())).thenReturn(jobConf);
    when(conf.get(PentahoMapReduceJobBuilderImpl.PENTAHO_MAPREDUCE_PROPERTY_USE_DISTRIBUTED_CACHE))
            .thenReturn(Boolean.toString(false));
    pentahoMapReduceJobBuilder.submit(conf);
    verify(hadoopShim).submitJob(conf);//from  www.j  av a 2  s.  c o m
}