List of usage examples for org.apache.hadoop.mapreduce Job getCacheFiles
public URI[] getCacheFiles() throws IOException
From source file:gov.nasa.jpl.memex.pooledtimeseries.MeanChiSquareDistanceCalculation.java
License:Apache License
public static void main(String[] args) throws Exception { System.loadLibrary(Core.NATIVE_LIBRARY_NAME); Configuration baseConf = new Configuration(); baseConf.set("mapreduce.job.maps", "96"); baseConf.set("mapred.tasktracker.map.tasks.maximum", "96"); JobConf conf = new JobConf(); System.out.println("Before Map:" + conf.getNumMapTasks()); conf.setNumMapTasks(96);/*from ww w .jav a 2 s .com*/ System.out.println("After Map:" + conf.getNumMapTasks()); Job job = Job.getInstance(baseConf); job.setJarByClass(MeanChiSquareDistanceCalculation.class); job.setJobName("mean_chi_square_calculation"); System.out.println("Job ID" + job.getJobID()); System.out.println("Track:" + baseConf.get("mapred.job.tracker")); System.out.println("Job Name" + job.getJobName()); System.out.println(baseConf.get("mapreduce.job.maps")); System.out.println("Caching video-metric-bak.tgz"); job.addCacheArchive(new URI("/user/pts/video-metric-bak.tgz")); URI[] cacheFiles = job.getCacheFiles(); if (cacheFiles != null && cacheFiles.length > 0) { System.out.println("Cache file ->" + cacheFiles[0]); } System.out.println("Cached video-metric-bak.tgz"); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(DoubleWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(DoubleWritable.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.waitForCompletion(true); }
From source file:org.mrgeo.hdfs.partitioners.TileIdPartitionerTest.java
License:Apache License
@Test @Category(UnitTest.class) public void setSplitFile() throws IOException { String file = new Path(outputHdfs, testName.getMethodName()).toString(); Job job = Job.getInstance(conf); TileIdPartitioner.setSplitFile(file, job); conf = job.getConfiguration();/*from ww w . ja v a 2 s .c o m*/ Assert.assertEquals("TileIdPartitioner.splitFile not set", file, conf.get("TileIdPartitioner.splitFile", file)); Assert.assertFalse("TileIdPartitioner.useDistributedCache should not be set", conf.getBoolean("TileIdPartitioner.useDistributedCache", false)); URI files[] = job.getCacheFiles(); Assert.assertNull("Cache files should not be set", files); }
From source file:org.mrgeo.hdfs.partitioners.TileIdPartitionerTest.java
License:Apache License
@Test @Category(UnitTest.class) public void setSplitFileNonLocal() throws IOException { conf = HadoopUtils.createConfiguration(); // force local mode off... String file = new Path(outputHdfs, testName.getMethodName()).toString(); Job job = Job.getInstance(conf); TileIdPartitioner.setSplitFile(file, job); conf = job.getConfiguration();//from w w w.ja va 2s . c o m Assert.assertEquals("TileIdPartitioner.splitFile not set", file, conf.get("TileIdPartitioner.splitFile", null)); Assert.assertTrue("TileIdPartitioner.useDistributedCache should be set", conf.getBoolean("TileIdPartitioner.useDistributedCache", false)); URI files[] = job.getCacheFiles(); Assert.assertEquals("Cache files should have 1 file", 1, files.length); Assert.assertEquals("Cache file name wrong", file, files[0].toString()); }
From source file:org.mrgeo.hdfs.partitioners.TileIdPartitionerTest.java
License:Apache License
@Test @Category(UnitTest.class) public void setupPartitioner() throws IOException { conf = HadoopUtils.createConfiguration(); Job job = Job.getInstance(conf, testName.getMethodName()); conf = job.getConfiguration();//from w w w . j a va2 s . co m FileSystem fs = HadoopFileUtils.getFileSystem(conf); SplitGenerator sg = new TestGenerator(); Path splitfile = TileIdPartitioner.setup(job, sg); Assert.assertTrue("Reducers should be greater than zero", job.getNumReduceTasks() > 0); Assert.assertNotNull("Splitfile should not be null", splitfile); Assert.assertEquals("TileIdPartitioner.splitFile not set", splitfile.toString(), conf.get("TileIdPartitioner.splitFile", null)); Assert.assertTrue("TileIdPartitioner.useDistributedCache should be set", conf.getBoolean("TileIdPartitioner.useDistributedCache", false)); URI files[] = job.getCacheFiles(); Assert.assertEquals("Cache files should have 1 file", 1, files.length); Assert.assertEquals("Cache file name wrong", splitfile.toString(), files[0].toString()); Assert.assertTrue("Partition Splits directory does not exist", fs.exists(splitfile)); Assert.assertTrue("Partition Splits file does not exist", fs.exists(new Path(splitfile, PartitionerSplit.SPLIT_FILE))); Splits splits = new PartitionerSplit(); splits.readSplits(splitfile); PartitionerSplit.PartitionerSplitInfo[] si = (PartitionerSplit.PartitionerSplitInfo[]) splits.getSplits(); Assert.assertEquals("Splits length not correct", generated.length, si.length); for (int i = 0; i < generated.length; i++) { Assert.assertEquals("Splits entry not correct", generated[i].longValue(), si[i].getTileId()); } fs.delete(splitfile, true); }