Example usage for org.apache.hadoop.mapred JobConf getJar

List of usage examples for org.apache.hadoop.mapred JobConf getJar

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getJar.

Prototype

public String getJar() 

Source Link

Document

Get the user jar for the map-reduce job.

Usage

From source file:it.crs4.pydoop.pipes.Submitter.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    CommandLineParser cli = new CommandLineParser();
    if (args.length == 0) {
        cli.printUsage();//from   w ww  . j  a va 2s .  c om
        return 1;
    }
    cli.addOption("input", false, "input path to the maps", "path");
    cli.addOption("output", false, "output path from the reduces", "path");

    cli.addOption("jar", false, "job jar file", "path");
    cli.addOption("inputformat", false, "java classname of InputFormat", "class");
    //cli.addArgument("javareader", false, "is the RecordReader in Java");
    cli.addOption("map", false, "java classname of Mapper", "class");
    cli.addOption("partitioner", false, "java classname of Partitioner", "class");
    cli.addOption("reduce", false, "java classname of Reducer", "class");
    cli.addOption("writer", false, "java classname of OutputFormat", "class");
    cli.addOption("program", false, "URI to application executable", "class");
    cli.addOption("reduces", false, "number of reduces", "num");
    cli.addOption("jobconf", false,
            "\"n1=v1,n2=v2,..\" (Deprecated) Optional. Add or override a JobConf property.", "key=val");
    cli.addOption("lazyOutput", false, "Optional. Create output lazily", "boolean");
    Parser parser = cli.createParser();
    try {

        GenericOptionsParser genericParser = new GenericOptionsParser(getConf(), args);
        CommandLine results = parser.parse(cli.options, genericParser.getRemainingArgs());

        JobConf job = new JobConf(getConf());

        if (results.hasOption("input")) {
            FileInputFormat.setInputPaths(job, results.getOptionValue("input"));
        }
        if (results.hasOption("output")) {
            FileOutputFormat.setOutputPath(job, new Path(results.getOptionValue("output")));
        }
        if (results.hasOption("jar")) {
            job.setJar(results.getOptionValue("jar"));
        }
        if (results.hasOption("inputformat")) {
            setIsJavaRecordReader(job, true);
            job.setInputFormat(getClass(results, "inputformat", job, InputFormat.class));
        }
        if (results.hasOption("javareader")) {
            setIsJavaRecordReader(job, true);
        }
        if (results.hasOption("map")) {
            setIsJavaMapper(job, true);
            job.setMapperClass(getClass(results, "map", job, Mapper.class));
        }
        if (results.hasOption("partitioner")) {
            job.setPartitionerClass(getClass(results, "partitioner", job, Partitioner.class));
        }
        if (results.hasOption("reduce")) {
            setIsJavaReducer(job, true);
            job.setReducerClass(getClass(results, "reduce", job, Reducer.class));
        }
        if (results.hasOption("reduces")) {
            job.setNumReduceTasks(Integer.parseInt(results.getOptionValue("reduces")));
        }
        if (results.hasOption("writer")) {
            setIsJavaRecordWriter(job, true);
            job.setOutputFormat(getClass(results, "writer", job, OutputFormat.class));
        }

        if (results.hasOption("lazyOutput")) {
            if (Boolean.parseBoolean(results.getOptionValue("lazyOutput"))) {
                LazyOutputFormat.setOutputFormatClass(job, job.getOutputFormat().getClass());
            }
        }

        if (results.hasOption("program")) {
            setExecutable(job, results.getOptionValue("program"));
        }
        if (results.hasOption("jobconf")) {
            LOG.warn("-jobconf option is deprecated, please use -D instead.");
            String options = results.getOptionValue("jobconf");
            StringTokenizer tokenizer = new StringTokenizer(options, ",");
            while (tokenizer.hasMoreTokens()) {
                String keyVal = tokenizer.nextToken().trim();
                String[] keyValSplit = keyVal.split("=");
                job.set(keyValSplit[0], keyValSplit[1]);
            }
        }
        // if they gave us a jar file, include it into the class path
        String jarFile = job.getJar();
        if (jarFile != null) {
            final URL[] urls = new URL[] { FileSystem.getLocal(job).pathToFile(new Path(jarFile)).toURL() };
            //FindBugs complains that creating a URLClassLoader should be
            //in a doPrivileged() block. 
            ClassLoader loader = AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() {
                public ClassLoader run() {
                    return new URLClassLoader(urls);
                }
            });
            job.setClassLoader(loader);
        }

        runJob(job);
        return 0;
    } catch (ParseException pe) {
        LOG.info("Error : " + pe);
        cli.printUsage();
        return 1;
    }

}

From source file:org.apache.oozie.action.hadoop.TestMapReduceActionExecutor.java

License:Apache License

@SuppressWarnings("unchecked")
public void testSetupMethods() throws Exception {
    MapReduceActionExecutor ae = new MapReduceActionExecutor();
    assertEquals(Arrays.asList(StreamingMain.class), ae.getLauncherClasses());

    Element actionXml = XmlUtils.parseXml("<map-reduce>" + "<job-tracker>" + getJobTrackerUri()
            + "</job-tracker>" + "<name-node>" + getNameNodeUri() + "</name-node>" + "<configuration>"
            + "<property><name>mapred.input.dir</name><value>IN</value></property>"
            + "<property><name>mapred.output.dir</name><value>OUT</value></property>" + "</configuration>"
            + "</map-reduce>");

    XConfiguration protoConf = new XConfiguration();
    protoConf.set(WorkflowAppService.HADOOP_USER, getTestUser());

    WorkflowJobBean wf = createBaseWorkflow(protoConf, "mr-action");
    WorkflowActionBean action = (WorkflowActionBean) wf.getActions().get(0);
    action.setType(ae.getType());//from   ww  w  .  ja v  a2 s. c  o  m

    Context context = new Context(wf, action);

    Configuration conf = ae.createBaseHadoopConf(context, actionXml);
    ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir());
    assertEquals("IN", conf.get("mapred.input.dir"));
    JobConf launcherJobConf = ae.createLauncherConf(getFileSystem(), context, action, actionXml, conf);
    assertEquals(false, launcherJobConf.getBoolean("mapreduce.job.complete.cancel.delegation.tokens", true));
    assertEquals(true, conf.getBoolean("mapreduce.job.complete.cancel.delegation.tokens", false));

    // Enable uber jars to test that MapReduceActionExecutor picks up the oozie.mapreduce.uber.jar property correctly
    Services serv = Services.get();
    boolean originalUberJarDisabled = serv.getConf().getBoolean("oozie.action.mapreduce.uber.jar.enable",
            false);
    serv.getConf().setBoolean("oozie.action.mapreduce.uber.jar.enable", true);

    actionXml = createUberJarActionXML(getNameNodeUri() + "/app/job.jar", "");
    conf = ae.createBaseHadoopConf(context, actionXml);
    ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir());
    assertEquals(getNameNodeUri() + "/app/job.jar", conf.get("oozie.mapreduce.uber.jar")); // absolute path with namenode
    launcherJobConf = ae.createLauncherConf(getFileSystem(), context, action, actionXml, conf);
    assertEquals(getNameNodeUri() + "/app/job.jar", launcherJobConf.getJar()); // same for launcher conf

    actionXml = createUberJarActionXML("/app/job.jar", "");
    conf = ae.createBaseHadoopConf(context, actionXml);
    ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir());
    assertEquals(getNameNodeUri() + "/app/job.jar", conf.get("oozie.mapreduce.uber.jar")); // absolute path without namenode
    launcherJobConf = ae.createLauncherConf(getFileSystem(), context, action, actionXml, conf);
    assertEquals(getNameNodeUri() + "/app/job.jar", launcherJobConf.getJar()); // same for launcher conf

    actionXml = createUberJarActionXML("job.jar", "");
    conf = ae.createBaseHadoopConf(context, actionXml);
    ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir());
    assertEquals(getFsTestCaseDir() + "/job.jar", conf.get("oozie.mapreduce.uber.jar")); // relative path
    launcherJobConf = ae.createLauncherConf(getFileSystem(), context, action, actionXml, conf);
    assertEquals(getFsTestCaseDir() + "/job.jar", launcherJobConf.getJar()); // same for launcher

    actionXml = createUberJarActionXML("job.jar", "<streaming></streaming>");
    conf = ae.createBaseHadoopConf(context, actionXml);
    ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir());
    assertEquals("", conf.get("oozie.mapreduce.uber.jar")); // ignored for streaming
    launcherJobConf = ae.createLauncherConf(getFileSystem(), context, action, actionXml, conf);
    assertNull(launcherJobConf.getJar()); // same for launcher conf (not set)

    actionXml = createUberJarActionXML("job.jar", "<pipes></pipes>");
    conf = ae.createBaseHadoopConf(context, actionXml);
    ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir());
    assertEquals("", conf.get("oozie.mapreduce.uber.jar")); // ignored for pipes
    launcherJobConf = ae.createLauncherConf(getFileSystem(), context, action, actionXml, conf);
    assertNull(launcherJobConf.getJar()); // same for launcher conf (not set)

    actionXml = XmlUtils.parseXml("<map-reduce>" + "<job-tracker>" + getJobTrackerUri() + "</job-tracker>"
            + "<name-node>" + getNameNodeUri() + "</name-node>" + "</map-reduce>");
    conf = ae.createBaseHadoopConf(context, actionXml);
    ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir());
    assertNull(conf.get("oozie.mapreduce.uber.jar")); // doesn't resolve if not set
    launcherJobConf = ae.createLauncherConf(getFileSystem(), context, action, actionXml, conf);
    assertNull(launcherJobConf.getJar()); // same for launcher conf

    // Disable uber jars to test that MapReduceActionExecutor won't allow the oozie.mapreduce.uber.jar property
    serv.getConf().setBoolean("oozie.action.mapreduce.uber.jar.enable", false);
    try {
        actionXml = createUberJarActionXML(getNameNodeUri() + "/app/job.jar", "");
        conf = ae.createBaseHadoopConf(context, actionXml);
        ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir());
        fail("ActionExecutorException expected because uber jars are disabled");
    } catch (ActionExecutorException aee) {
        assertEquals("MR003", aee.getErrorCode());
        assertEquals(ActionExecutorException.ErrorType.ERROR, aee.getErrorType());
        assertTrue(aee.getMessage().contains("oozie.action.mapreduce.uber.jar.enable"));
        assertTrue(aee.getMessage().contains("oozie.mapreduce.uber.jar"));
    }
    serv.getConf().setBoolean("oozie.action.mapreduce.uber.jar.enable", originalUberJarDisabled);

    actionXml = XmlUtils.parseXml("<map-reduce>" + "<job-tracker>" + getJobTrackerUri() + "</job-tracker>"
            + "<name-node>" + getNameNodeUri() + "</name-node>" + "<streaming>" + "<mapper>M</mapper>"
            + "<reducer>R</reducer>" + "<record-reader>RR</record-reader>"
            + "<record-reader-mapping>RRM1=1</record-reader-mapping>"
            + "<record-reader-mapping>RRM2=2</record-reader-mapping>" + "<env>e=E</env>" + "<env>ee=EE</env>"
            + "</streaming>" + "<configuration>"
            + "<property><name>mapred.input.dir</name><value>IN</value></property>"
            + "<property><name>mapred.output.dir</name><value>OUT</value></property>" + "</configuration>"
            + "</map-reduce>");

    conf = ae.createBaseHadoopConf(context, actionXml);
    ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir());
    assertEquals("M", conf.get("oozie.streaming.mapper"));
    assertEquals("R", conf.get("oozie.streaming.reducer"));
    assertEquals("RR", conf.get("oozie.streaming.record-reader"));
    assertEquals("2", conf.get("oozie.streaming.record-reader-mapping.size"));
    assertEquals("2", conf.get("oozie.streaming.env.size"));

    actionXml = XmlUtils.parseXml("<map-reduce>" + "<job-tracker>" + getJobTrackerUri() + "</job-tracker>"
            + "<name-node>" + getNameNodeUri() + "</name-node>" + "<pipes>" + "<map>M</map>"
            + "<reduce>R</reduce>" + "<inputformat>IF</inputformat>" + "<partitioner>P</partitioner>"
            + "<writer>W</writer>" + "<program>PP</program>" + "</pipes>" + "<configuration>"
            + "<property><name>mapred.input.dir</name><value>IN</value></property>"
            + "<property><name>mapred.output.dir</name><value>OUT</value></property>" + "</configuration>"
            + "</map-reduce>");

    conf = ae.createBaseHadoopConf(context, actionXml);
    ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir());
    assertEquals("M", conf.get("oozie.pipes.map"));
    assertEquals("R", conf.get("oozie.pipes.reduce"));
    assertEquals("IF", conf.get("oozie.pipes.inputformat"));
    assertEquals("P", conf.get("oozie.pipes.partitioner"));
    assertEquals("W", conf.get("oozie.pipes.writer"));
    assertEquals(getFsTestCaseDir() + "/PP", conf.get("oozie.pipes.program"));
}

From source file:org.kitesdk.apps.cli.commands.InstallCommand.java

License:Apache License

private static final List<File> getLibraryJars() {

    // Current implementation assumes that library files
    // are in the same directory, so locate it and
    // include it in the project library.

    // This is ugly, using the jobConf logic to identify the containing
    // JAR. There should be a better way to do this.
    JobConf jobConf = new JobConf();
    jobConf.setJarByClass(InstallCommand.class);
    String containingJar = jobConf.getJar();

    File file = new File(containingJar).getParentFile();

    File[] jarFiles = file.listFiles();

    return Arrays.asList(jarFiles);
}

From source file:org.kitesdk.apps.spark.spi.scheduled.SparkJobManager.java

License:Apache License

@Override
public void writeOozieActionBlock(XMLWriter writer, Schedule schedule) {

    writer.startElement("spark");
    writer.addAttribute("xmlns", "uri:oozie:spark-action:0.1");
    element(writer, "job-tracker", "${jobTracker}");
    element(writer, "name-node", "${nameNode}");

    // TODO: the job-xml should probably be job-specific configuration.
    // element(writer, "job-xml", "${appConfigPath}");

    // Make the nominal time visible to the workflow action.
    writer.startElement("configuration");

    // Use the spark and hive sharelibs since many actions use both.
    property(writer, "oozie.action.sharelib.for.spark", "spark,hive2");
    property(writer, "kiteAppRoot", "${kiteAppRoot}");

    OozieScheduling.writeJobConfiguration(writer, schedule, context.getHadoopConf());

    writer.endElement(); // configuration

    element(writer, "master", "yarn-cluster");
    element(writer, "name", schedule.getName());
    element(writer, "class", SparkScheduledJobMain.class.getCanonicalName());

    JobConf jobConf = new JobConf();
    jobConf.setJarByClass(schedule.getJobClass());
    String containingJar = jobConf.getJar();

    String jarName = containingJar != null ? "${kiteAppRoot}/lib/" + new File(containingJar).getName() : "";

    element(writer, "jar", jarName);
    element(writer, "spark-opts", getSparkConfString(schedule));
    element(writer, "arg", schedule.getJobClass().getName());

    writer.endElement(); // spark
}

From source file:org.kitesdk.apps.spark.spi.scheduled.SparkJobManager.java

License:Apache License

private static final List<File> getLibraryJars() {

    // Current implementation assumes that library files
    // are in the same directory, so locate it and
    // include it in the project library.

    // This is ugly, using the jobConf logic to identify the containing
    // JAR. There should be a better way to do this.
    JobConf jobConf = new JobConf();
    jobConf.setJarByClass(SchedulableJob.class);
    String containingJar = jobConf.getJar();

    if (containingJar == null)
        return Collections.emptyList();

    File file = new File(containingJar).getParentFile();

    File[] jarFiles = file.listFiles();

    return Arrays.asList(jarFiles);
}

From source file:org.kitesdk.apps.spark.spi.streaming.SparkStreamingJobManager.java

License:Apache License

private static final List<File> getLibraryJars() {

    // Current implementation assumes that library files
    // are in the same directory, so locate it and
    // include it in the project library.

    // This is ugly, using the jobConf logic to identify the containing
    // JAR. There should be a better way to do this.
    JobConf jobConf = new JobConf();
    jobConf.setJarByClass(StreamingJob.class);
    String containingJar = jobConf.getJar();

    if (containingJar == null)
        return Collections.emptyList();

    File file = new File(containingJar).getParentFile();

    File[] jarFiles = file.listFiles();

    return Arrays.asList(jarFiles);
}

From source file:org.kitesdk.apps.spark.spi.streaming.SparkStreamingJobManager.java

License:Apache License

@Override
public void start(FileSystem fs, Path appRoot) {
    JobConf jobConf = new JobConf();
    jobConf.setJarByClass(SparkStreamingJobMain.class);
    String containingJar = jobConf.getJar();

    Path libPath = new Path(appRoot, "lib");

    Path jarPath = new Path(libPath, new File(containingJar).getName());
    jarPath = fs.makeQualified(jarPath);

    SparkLauncher launcher = new SparkLauncher();

    launcher.setMainClass(SparkStreamingJobMain.class.getName());

    launcher.setAppResource(jarPath.toString());

    launcher.setMaster("yarn-cluster");

    try {/*from  ww  w .  j a v a2  s  . c  o  m*/
        // Add the library JARs from HDFS so we don't need to reload
        // them separately into Spark.
        FileStatus[] libJars = fs.listStatus(libPath);

        for (FileStatus jar : libJars) {

            launcher.addJar(jar.getPath().toString());
        }

        // Add the sharelib JARs, since they are not visible to Spark otherwise.
        List<Path> shareLibJars = ShareLibs.jars(sparkJobContext.getHadoopConf(), "hive2");

        for (Path sharelibJar : shareLibJars) {

            launcher.addJar(fs.makeQualified(sharelibJar).toString());
        }

    } catch (IOException e) {
        throw new AppException(e);
    }

    launcher.addAppArgs(appRoot.toString(), description.getJobName());

    // Explicitly set the metastore URI to be usable in the job.
    launcher.setConf("spark.hadoop.hive.metastore.uris",
            sparkJobContext.getHadoopConf().get("hive.metastore.uris"));

    // Add the Avro classes.
    List<Schema> schemas = JobReflection.getSchemas(job);
    StringBuilder avroClassesArg = new StringBuilder();

    avroClassesArg.append("-D").append(KryoAvroRegistrator.KITE_AVRO_CLASSES).append("=");

    boolean first = true;

    for (Schema schema : schemas) {

        if (!first) {
            avroClassesArg.append(",");
        }

        avroClassesArg.append(SpecificData.get().getClass(schema).getName());

        first = false;
    }

    launcher.setConf("spark.driver.extraJavaOptions", avroClassesArg.toString());
    launcher.setConf("spark.executor.extraJavaOptions", avroClassesArg.toString());

    try {

        Process process = launcher.launch();

        // Redirect the spark-submit output to be visible to the reader.
        Thread stdoutThread = writeOutput(process.getInputStream(), System.out);
        Thread stderrThread = writeOutput(process.getErrorStream(), System.err);

        int result = process.waitFor();

        stdoutThread.join();
        stderrThread.join();

        if (result != 0) {
            throw new AppException("spark-submit returned error status: " + result);
        }

    } catch (IOException e) {
        throw new AppException(e);
    } catch (InterruptedException e) {
        throw new AppException(e);
    }
}

From source file:tap.CommandOptions.java

License:Apache License

/**
 * @param pipeline/*from   ww  w  .  j  a v a  2  s  .co  m*/
 * @param conf
 * @param parser
 * @param e
 */
private void handleCmdLineException(Tap pipeline, JobConf conf, CmdLineParser parser, CmdLineException e) {
    String jobName = pipeline.getName();
    if (jobName == null) {
        jobName = "yourJob";
    }
    String jarName = conf.getJar();
    if (jarName == null) {
        jarName = "yourJar";
    }
    String cmd = "hadoop jar " + jarName + " " + jobName;
    System.err.println(e.getMessage());
    System.err.println("Usage: " + cmd + " [options...] arguments...");
    parser.printUsage(System.err);
    System.err.println();

    // print option sample. This is useful some time
    System.err.println("  Example: " + cmd + " " + parser.printExample(ALL));
}

From source file:voldemort.store.readonly.mr.azkaban.AbstractHadoopJob.java

License:Apache License

public JobConf createJobConf(Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass)
        throws IOException, URISyntaxException {
    JobConf conf = new JobConf();
    // set custom class loader with custom find resource strategy.

    conf.setJobName(getId());//from  ww w .j  a  va  2  s .co  m
    conf.setMapperClass(mapperClass);
    conf.setReducerClass(reducerClass);

    String hadoop_ugi = _props.getString("hadoop.job.ugi", null);
    if (hadoop_ugi != null) {
        conf.set("hadoop.job.ugi", hadoop_ugi);
    }

    if (_props.getBoolean("is.local", false)) {
        conf.set("mapred.job.tracker", "local");
        conf.set("fs.default.name", "file:///");
        conf.set("mapred.local.dir", "/tmp/map-red");

        info("Running locally, no hadoop jar set.");
    } else {
        setClassLoaderAndJar(conf, getClass());
        info("Setting hadoop jar file for class:" + getClass() + "  to " + conf.getJar());
        info("*************************************************************************");
        info("          Running on Real Hadoop Cluster(" + conf.get("mapred.job.tracker") + ")           ");
        info("*************************************************************************");
    }

    // set JVM options if present
    if (_props.containsKey("mapred.child.java.opts")) {
        conf.set("mapred.child.java.opts", _props.getString("mapred.child.java.opts"));
        info("mapred.child.java.opts set to " + _props.getString("mapred.child.java.opts"));
    }

    // set input and output paths if they are present
    if (_props.containsKey("input.paths")) {
        List<String> inputPaths = _props.getStringList("input.paths");
        if (inputPaths.size() == 0)
            throw new IllegalArgumentException("Must specify at least one value for property 'input.paths'");
        for (String path : inputPaths) {
            // Implied stuff, but good implied stuff
            if (path.endsWith(LATEST_SUFFIX)) {
                FileSystem fs = FileSystem.get(conf);

                PathFilter filter = new PathFilter() {

                    @Override
                    public boolean accept(Path arg0) {
                        return !arg0.getName().startsWith("_") && !arg0.getName().startsWith(".");
                    }
                };

                String latestPath = path.substring(0, path.length() - LATEST_SUFFIX.length());
                FileStatus[] statuses = fs.listStatus(new Path(latestPath), filter);

                Arrays.sort(statuses);

                path = statuses[statuses.length - 1].getPath().toString();
                System.out.println("Using latest folder: " + path);
            }
            HadoopUtils.addAllSubPaths(conf, new Path(path));
        }
    }

    if (_props.containsKey("output.path")) {
        String location = _props.get("output.path");
        if (location.endsWith("#CURRENT")) {
            DateTimeFormatter format = DateTimeFormat.forPattern(COMMON_FILE_DATE_PATTERN);
            String destPath = format.print(new DateTime());
            location = location.substring(0, location.length() - "#CURRENT".length()) + destPath;
            System.out.println("Store location set to " + location);
        }

        FileOutputFormat.setOutputPath(conf, new Path(location));
        // For testing purpose only remove output file if exists
        if (_props.getBoolean("force.output.overwrite", false)) {
            FileSystem fs = FileOutputFormat.getOutputPath(conf).getFileSystem(conf);
            fs.delete(FileOutputFormat.getOutputPath(conf), true);
        }
    }

    // Adds External jars to hadoop classpath
    String externalJarList = _props.getString("hadoop.external.jarFiles", null);
    if (externalJarList != null) {
        String[] jarFiles = externalJarList.split(",");
        for (String jarFile : jarFiles) {
            info("Adding extenral jar File:" + jarFile);
            DistributedCache.addFileToClassPath(new Path(jarFile), conf);
        }
    }

    // Adds distributed cache files
    String cacheFileList = _props.getString("hadoop.cache.files", null);
    if (cacheFileList != null) {
        String[] cacheFiles = cacheFileList.split(",");
        for (String cacheFile : cacheFiles) {
            info("Adding Distributed Cache File:" + cacheFile);
            DistributedCache.addCacheFile(new URI(cacheFile), conf);
        }
    }

    // Adds distributed cache files
    String archiveFileList = _props.getString("hadoop.cache.archives", null);
    if (archiveFileList != null) {
        String[] archiveFiles = archiveFileList.split(",");
        for (String archiveFile : archiveFiles) {
            info("Adding Distributed Cache Archive File:" + archiveFile);
            DistributedCache.addCacheArchive(new URI(archiveFile), conf);
        }
    }

    String hadoopCacheJarDir = _props.getString("hdfs.default.classpath.dir", null);
    if (hadoopCacheJarDir != null) {
        FileSystem fs = FileSystem.get(conf);
        if (fs != null) {
            FileStatus[] status = fs.listStatus(new Path(hadoopCacheJarDir));

            if (status != null) {
                for (int i = 0; i < status.length; ++i) {
                    if (!status[i].isDir()) {
                        Path path = new Path(hadoopCacheJarDir, status[i].getPath().getName());
                        info("Adding Jar to Distributed Cache Archive File:" + path);

                        DistributedCache.addFileToClassPath(path, conf);
                    }
                }
            } else {
                info("hdfs.default.classpath.dir " + hadoopCacheJarDir + " is empty.");
            }
        } else {
            info("hdfs.default.classpath.dir " + hadoopCacheJarDir + " filesystem doesn't exist");
        }
    }

    // May want to add this to HadoopUtils, but will await refactoring
    for (String key : getProps().keySet()) {
        String lowerCase = key.toLowerCase();
        if (lowerCase.startsWith(HADOOP_PREFIX)) {
            String newKey = key.substring(HADOOP_PREFIX.length());
            conf.set(newKey, getProps().get(key));
        }
    }

    HadoopUtils.setPropsInJob(conf, getProps());
    return conf;
}