List of usage examples for org.apache.hadoop.mapreduce Job setPartitionerClass
public void setPartitionerClass(Class<? extends Partitioner> cls) throws IllegalStateException
From source file:org.chombo.mr.RecordSetModifier.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "record set modifier MR"; job.setJobName(jobName);//from ww w .j av a2s . co m job.setJarByClass(RecordSetModifier.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(RecordSetModifier.ModifierMapper.class); job.setReducerClass(RecordSetModifier.ModifierReducer.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class); job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class); Utility.setConfiguration(job.getConfiguration()); int numReducer = job.getConfiguration().getInt("rsm.num.reducer", -1); numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer; job.setNumReduceTasks(numReducer); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
From source file:org.chombo.mr.TimeGapSequenceGenerator.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Time sequence to time gap sequence conversion"; job.setJobName(jobName);//from w ww . j a v a 2s.c o m job.setJarByClass(TimeGapSequenceGenerator.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); Utility.setConfiguration(job.getConfiguration(), "chombo", true); job.setMapperClass(TimeGapSequenceGenerator.TimeGapMapper.class); job.setReducerClass(TimeGapSequenceGenerator.TimeGapReducer.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class); job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class); int numReducer = job.getConfiguration().getInt("tgs.num.reducer", -1); numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer; job.setNumReduceTasks(numReducer); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
From source file:org.chombo.mr.TimeSequenceFilter.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Time sequence to time gap sequence conversion"; job.setJobName(jobName);/*from ww w . j a va 2 s . c o m*/ job.setJarByClass(TimeSequenceFilter.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); Utility.setConfiguration(job.getConfiguration(), "chombo"); job.setMapperClass(TimeGapSequenceGenerator.TimeGapMapper.class); job.setReducerClass(TimeSequenceFilter.FilterReducer.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class); job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class); int numReducer = job.getConfiguration().getInt("tsf.num.reducer", -1); numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer; job.setNumReduceTasks(numReducer); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
From source file:org.chombo.mr.WeightedAverage.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Weighted average calculating MR"; job.setJobName(jobName);/*from w ww . jav a 2 s. c o m*/ job.setJarByClass(WeightedAverage.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(WeightedAverage.AverageMapper.class); job.setReducerClass(WeightedAverage.AverageReducer.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); Utility.setConfiguration(job.getConfiguration()); if (job.getConfiguration().getInt("group.by.field", -1) >= 0) { //group by job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class); job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class); } int numReducer = job.getConfiguration().getInt("wea.num.reducer", -1); numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer; job.setNumReduceTasks(numReducer); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
From source file:org.clueweb.clueweb12.app.DuplicateFiltering.java
License:Apache License
/** * Runs this tool.//w w w .j av a2s . c o m */ @SuppressWarnings({ "static-access", "deprecation" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg() .withDescription("input path (pfor format expected, add * to retrieve files)") .create(DOCVECTOR_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(TREC_RESULT_FILE)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("dictionary").create(DICTIONARY_OPTION)); options.addOption(OptionBuilder.withArgName("int").hasArg().withDescription("topk").create(TOPK)); options.addOption(OptionBuilder.withArgName("float [0-1]").hasArg() .withDescription("cosine similarity threshold").create(SIM_THRESHOLD)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(DOCVECTOR_OPTION) || !cmdline.hasOption(OUTPUT_OPTION) || !cmdline.hasOption(DICTIONARY_OPTION) || !cmdline.hasOption(TREC_RESULT_FILE) || !cmdline.hasOption(SIM_THRESHOLD) || !cmdline.hasOption(TOPK)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String docvector = cmdline.getOptionValue(DOCVECTOR_OPTION); String trecinput = cmdline.getOptionValue(TREC_RESULT_FILE); String output = cmdline.getOptionValue(OUTPUT_OPTION); String dictionary = cmdline.getOptionValue(DICTIONARY_OPTION); String simThreshold = cmdline.getOptionValue(SIM_THRESHOLD); String topk = cmdline.getOptionValue(TOPK); LOG.info("Tool name: " + DuplicateFiltering.class.getSimpleName()); LOG.info(" - docvector: " + docvector); LOG.info(" - trecinputfile: " + trecinput); LOG.info(" - output: " + output); LOG.info(" - dictionary: " + dictionary); LOG.info(" - cosine similarity threshold: " + SIM_THRESHOLD); LOG.info(" - topk: " + topk); Configuration conf = getConf(); conf.set(DICTIONARY_OPTION, dictionary); conf.setFloat(SIM_THRESHOLD, Float.parseFloat(simThreshold)); conf.set(TREC_RESULT_FILE, trecinput); conf.setInt(TOPK, Integer.parseInt(topk)); conf.set("mapred.task.timeout", "6000000");// default is 600000 FileSystem fs = FileSystem.get(conf); if (fs.exists(new Path(output))) fs.delete(new Path(output)); Job job = new Job(conf, DuplicateFiltering.class.getSimpleName() + ":" + docvector); job.setJarByClass(DuplicateFiltering.class); FileInputFormat.setInputPaths(job, docvector); FileOutputFormat.setOutputPath(job, new Path(output)); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapOutputKeyClass(PairOfIntString.class); job.setMapOutputValueClass(FloatArrayWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setMapperClass(MyMapper.class); job.setPartitionerClass(MyPartitioner.class); job.setReducerClass(MyReducer.class); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); int numDuplicates = (int) job.getCounters().findCounter(Records.DUPLICATES).getValue(); LOG.info("Number of duplicates: " + numDuplicates); return 0; }
From source file:org.clueweb.clueweb12.app.RMRetrieval.java
License:Apache License
/** * Runs this tool./*from w w w. j a va 2 s. c om*/ */ @SuppressWarnings({ "static-access", "deprecation" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg() .withDescription("input path (pfor format expected, add * to retrieve files)") .create(DOCVECTOR_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("dictionary").create(DICTIONARY_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("queries").create(QUERIES_OPTION)); options.addOption( OptionBuilder.withArgName("float").hasArg().withDescription("smoothing").create(SMOOTHING)); options.addOption(OptionBuilder.withArgName("int").hasArg().withDescription("topk").create(TOPK)); options.addOption(OptionBuilder.withArgName("string " + AnalyzerFactory.getOptions()).hasArg() .withDescription("preprocessing").create(PREPROCESSING)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("rmmodel file").create(RMMODEL)); options.addOption( OptionBuilder.withArgName("float").hasArg().withDescription("queryLambda").create(QUERY_LAMBDA)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(DOCVECTOR_OPTION) || !cmdline.hasOption(OUTPUT_OPTION) || !cmdline.hasOption(DICTIONARY_OPTION) || !cmdline.hasOption(QUERIES_OPTION) || !cmdline.hasOption(SMOOTHING) || !cmdline.hasOption(TOPK) || !cmdline.hasOption(QUERY_LAMBDA) || !cmdline.hasOption(PREPROCESSING)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String docvector = cmdline.getOptionValue(DOCVECTOR_OPTION); String output = cmdline.getOptionValue(OUTPUT_OPTION); String dictionary = cmdline.getOptionValue(DICTIONARY_OPTION); String queries = cmdline.getOptionValue(QUERIES_OPTION); String smoothing = cmdline.getOptionValue(SMOOTHING); String topk = cmdline.getOptionValue(TOPK); String preprocessing = cmdline.getOptionValue(PREPROCESSING); String rmmodel = cmdline.getOptionValue(RMMODEL); String queryLambda = cmdline.getOptionValue(QUERY_LAMBDA); LOG.info("Tool name: " + RMRetrieval.class.getSimpleName()); LOG.info(" - docvector: " + docvector); LOG.info(" - output: " + output); LOG.info(" - dictionary: " + dictionary); LOG.info(" - queries: " + queries); LOG.info(" - smoothing: " + smoothing); LOG.info(" - topk: " + topk); LOG.info(" - preprocessing: " + preprocessing); LOG.info(" - rmmodel: " + rmmodel); LOG.info(" - queryLambda: " + queryLambda); Configuration conf = getConf(); conf.set(DICTIONARY_OPTION, dictionary); conf.set(QUERIES_OPTION, queries); conf.setFloat(SMOOTHING, Float.parseFloat(smoothing)); conf.setInt(TOPK, Integer.parseInt(topk)); conf.set(PREPROCESSING, preprocessing); conf.set(RMMODEL, rmmodel); conf.setFloat(QUERY_LAMBDA, Float.parseFloat(queryLambda)); conf.set("mapreduce.map.memory.mb", "10048"); conf.set("mapreduce.map.java.opts", "-Xmx10048m"); conf.set("mapreduce.reduce.memory.mb", "10048"); conf.set("mapreduce.reduce.java.opts", "-Xmx10048m"); conf.set("mapred.task.timeout", "6000000");// default is 600000 FileSystem fs = FileSystem.get(conf); if (fs.exists(new Path(output))) fs.delete(new Path(output)); Job job = new Job(conf, RMRetrieval.class.getSimpleName() + ":" + docvector); job.setJarByClass(RMRetrieval.class); FileInputFormat.setInputPaths(job, docvector); FileOutputFormat.setOutputPath(job, new Path(output)); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapOutputKeyClass(PairOfIntString.class); job.setMapOutputValueClass(FloatWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setMapperClass(MyMapper.class); job.setPartitionerClass(MyPartitioner.class); job.setReducerClass(MyReducer.class); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:org.gridgain.grid.kernal.processors.hadoop.GridHadoopMapReduceEmbeddedSelfTest.java
License:Open Source License
/** * Tests whole job execution with all phases in old and new versions of API with definition of custom * Serialization, Partitioner and IO formats. * @throws Exception If fails./*from w w w. j a v a 2 s . co m*/ */ public void testMultiReducerWholeMapReduceExecution() throws Exception { GridGgfsPath inDir = new GridGgfsPath(PATH_INPUT); ggfs.mkdirs(inDir); GridGgfsPath inFile = new GridGgfsPath(inDir, GridHadoopWordCount2.class.getSimpleName() + "-input"); generateTestFile(inFile.toString(), "key1", 10000, "key2", 20000, "key3", 15000, "key4", 7000, "key5", 12000, "key6", 18000); for (int i = 0; i < 2; i++) { boolean useNewAPI = i == 1; ggfs.delete(new GridGgfsPath(PATH_OUTPUT), true); flags.put("serializationWasConfigured", false); flags.put("partitionerWasConfigured", false); flags.put("inputFormatWasConfigured", false); flags.put("outputFormatWasConfigured", false); JobConf jobConf = new JobConf(); jobConf.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, CustomSerialization.class.getName()); //To split into about 6-7 items for v2 jobConf.setInt(FileInputFormat.SPLIT_MAXSIZE, 65000); //For v1 jobConf.setInt("fs.local.block.size", 65000); // File system coordinates. setupFileSystems(jobConf); GridHadoopWordCount1.setTasksClasses(jobConf, !useNewAPI, !useNewAPI, !useNewAPI); if (!useNewAPI) { jobConf.setPartitionerClass(CustomV1Partitioner.class); jobConf.setInputFormat(CustomV1InputFormat.class); jobConf.setOutputFormat(CustomV1OutputFormat.class); } Job job = Job.getInstance(jobConf); GridHadoopWordCount2.setTasksClasses(job, useNewAPI, useNewAPI, useNewAPI); if (useNewAPI) { job.setPartitionerClass(CustomV2Partitioner.class); job.setInputFormatClass(CustomV2InputFormat.class); job.setOutputFormatClass(CustomV2OutputFormat.class); } job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(job, new Path(ggfsScheme() + inFile.toString())); FileOutputFormat.setOutputPath(job, new Path(ggfsScheme() + PATH_OUTPUT)); job.setNumReduceTasks(3); job.setJarByClass(GridHadoopWordCount2.class); GridFuture<?> fut = grid(0).hadoop().submit(new GridHadoopJobId(UUID.randomUUID(), 1), createJobInfo(job.getConfiguration())); fut.get(); assertTrue("Serialization was configured (new API is " + useNewAPI + ")", flags.get("serializationWasConfigured")); assertTrue("Partitioner was configured (new API is = " + useNewAPI + ")", flags.get("partitionerWasConfigured")); assertTrue("Input format was configured (new API is = " + useNewAPI + ")", flags.get("inputFormatWasConfigured")); assertTrue("Output format was configured (new API is = " + useNewAPI + ")", flags.get("outputFormatWasConfigured")); assertEquals("Use new API = " + useNewAPI, "key3\t15000\n" + "key6\t18000\n", readAndSortFile(PATH_OUTPUT + "/" + (useNewAPI ? "part-r-" : "part-") + "00000")); assertEquals("Use new API = " + useNewAPI, "key1\t10000\n" + "key4\t7000\n", readAndSortFile(PATH_OUTPUT + "/" + (useNewAPI ? "part-r-" : "part-") + "00001")); assertEquals("Use new API = " + useNewAPI, "key2\t20000\n" + "key5\t12000\n", readAndSortFile(PATH_OUTPUT + "/" + (useNewAPI ? "part-r-" : "part-") + "00002")); } }
From source file:org.hdp.wrdcount.custompartitioner.WordCountCustomPartitionerJob.java
@Override public int run(String[] args) throws Exception { // TODO Auto-generated method stub Job job = Job.getInstance(getConf(), "Word Count Job"); Path in = new Path(args[0]); Path out = new Path(args[1]); FileSystem fs = FileSystem.get(getConf()); // does not the HDFS setting that is set for the eclipse env Path pathOut = new Path("/test/wordcount/custompartitioner/op"); if (fs.exists(pathOut)) { fs.delete(out, true);/*from w ww. j a v a 2s .c om*/ } FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setMapperClass(WordCountCustomPartitionerMapper.class); job.setReducerClass(WordCountCustomPartitionerReducer.class); job.setPartitionerClass(WordCountPartitioner.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(3); return job.waitForCompletion(true) ? 0 : 1; }
From source file:org.imageterrier.indexers.hadoop.HadoopIndexer.java
License:Mozilla Public License
protected Job createJob(HadoopIndexerOptions options) throws IOException { final Job job = new Job(getConf()); job.setJobName("terrierIndexing"); if (options.getInputMode() == InputMode.QUANTISED_FEATURES) { job.setMapperClass(QFIndexerMapper.class); } else {/*from w w w. j av a2 s .c om*/ if (options.shardPerThread) { job.setMapperClass(MultithreadedMapper.class); MultithreadedMapper.setMapperClass(job, MTImageIndexerMapper.class); MultithreadedMapper.setNumberOfThreads(job, options.getMultithread()); } else { job.setMapperClass(ImageIndexerMapper.class); } } // Load quantiser (if it exists), extract header, count codebook size if (options.getInputModeOptions().hasQuantiserFile()) { final String quantFile = options.getInputModeOptions().getQuantiserFile(); System.out.println("Loading codebook to see its size"); final SpatialClusters<?> quantiser = readClusters(options); System.out.println("Setting codebook size: " + quantiser.numClusters()); job.getConfiguration().setInt(QUANTISER_SIZE, quantiser.numClusters()); if (quantiser.numClusters() < options.getNumReducers()) options.setNumReducers(quantiser.numClusters()); } job.setReducerClass(IndexerReducer.class); FileOutputFormat.setOutputPath(job, options.getOutputPath()); job.setMapOutputKeyClass(NewSplitEmittedTerm.class); job.setMapOutputValueClass(MapEmittedPostingList.class); job.getConfiguration().setBoolean("indexing.hadoop.multiple.indices", options.isDocumentPartitionMode()); // if // (!job.getConfiguration().get("mapred.job.tracker").equals("local")) { // job.getConfiguration().set("mapred.map.output.compression.codec", // GzipCodec.class.getCanonicalName()); // job.getConfiguration().setBoolean("mapred.compress.map.output", // true); // } else { job.getConfiguration().setBoolean("mapred.compress.map.output", false); // } job.setInputFormatClass(PositionAwareSequenceFileInputFormat.class); // important job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setSortComparatorClass(NewSplitEmittedTerm.SETRawComparatorTermSplitFlush.class); job.setGroupingComparatorClass(NewSplitEmittedTerm.SETRawComparatorTerm.class); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); SequenceFileInputFormat.setInputPaths(job, options.getInputPaths()); job.setNumReduceTasks(options.getNumReducers()); if (options.getNumReducers() > 1) { if (options.isDocumentPartitionMode()) { job.setPartitionerClass(NewSplitEmittedTerm.SETPartitioner.class); } else { // job.setPartitionerClass(NewSplitEmittedTerm.SETPartitionerLowercaseAlphaTerm.class); if (job.getConfiguration().getInt(QUANTISER_SIZE, -1) == -1) { job.setPartitionerClass(NewSplitEmittedTerm.SETPartitionerHashedTerm.class); } else { job.setPartitionerClass(NewSplitEmittedTerm.SETPartitionerCodebookAwareTerm.class); } } } else { // for JUnit tests, we seem to need to restore the original // partitioner class job.setPartitionerClass(HashPartitioner.class); } job.setJarByClass(this.getClass()); return job; }
From source file:org.kiji.mapreduce.output.HFileMapReduceJobOutput.java
License:Apache License
/** * Configures the partitioner for generating HFiles. * * <p>Each generated HFile should fit within a region of of the target table. * Additionally, it's optimal to have only one HFile to load into each region, since a * read from that region will require reading from each HFile under management (until * compaction happens and merges them all back into one HFile).</p> * * <p>To achieve this, we configure a TotalOrderPartitioner that will partition the * records output from the Mapper based on their rank in a total ordering of the * keys. The <code>startKeys</code> argument should contain a list of the first key in * each of those partitions.</p>//w ww . j ava2 s . c o m * * @param job The job to configure. * @param startKeys A list of keys that will mark the boundaries between the partitions * for the sorted map output records. * @throws IOException If there is an error. */ private static void configurePartitioner(Job job, List<HFileKeyValue> startKeys) throws IOException { job.setPartitionerClass(TotalOrderPartitioner.class); LOG.info("Configuring " + startKeys.size() + " reduce partitions."); job.setNumReduceTasks(startKeys.size()); // Write the file that the TotalOrderPartitioner reads to determine where to partition records. Path partitionFilePath = new Path(job.getWorkingDirectory(), "partitions_" + System.currentTimeMillis()); LOG.info("Writing partition information to " + partitionFilePath); final FileSystem fs = partitionFilePath.getFileSystem(job.getConfiguration()); partitionFilePath = partitionFilePath.makeQualified(fs); writePartitionFile(job.getConfiguration(), partitionFilePath, startKeys); // Add it to the distributed cache. try { final URI cacheUri = new URI(partitionFilePath.toString() + "#" + TotalOrderPartitioner.DEFAULT_PATH); DistributedCache.addCacheFile(cacheUri, job.getConfiguration()); } catch (URISyntaxException e) { throw new IOException(e); } DistributedCache.createSymlink(job.getConfiguration()); }