Example usage for org.apache.hadoop.mapreduce Job getCacheFiles

List of usage examples for org.apache.hadoop.mapreduce Job getCacheFiles

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getCacheFiles.

Prototype


public URI[] getCacheFiles() throws IOException 

Source Link

Document

Get cache files set in the Configuration

Usage

From source file:gov.nasa.jpl.memex.pooledtimeseries.MeanChiSquareDistanceCalculation.java

License:Apache License

public static void main(String[] args) throws Exception {
    System.loadLibrary(Core.NATIVE_LIBRARY_NAME);

    Configuration baseConf = new Configuration();
    baseConf.set("mapreduce.job.maps", "96");
    baseConf.set("mapred.tasktracker.map.tasks.maximum", "96");

    JobConf conf = new JobConf();
    System.out.println("Before Map:" + conf.getNumMapTasks());
    conf.setNumMapTasks(96);/*from ww w  .jav a  2  s .com*/
    System.out.println("After Map:" + conf.getNumMapTasks());

    Job job = Job.getInstance(baseConf);
    job.setJarByClass(MeanChiSquareDistanceCalculation.class);

    job.setJobName("mean_chi_square_calculation");
    System.out.println("Job ID" + job.getJobID());
    System.out.println("Track:" + baseConf.get("mapred.job.tracker"));
    System.out.println("Job Name" + job.getJobName());
    System.out.println(baseConf.get("mapreduce.job.maps"));
    System.out.println("Caching video-metric-bak.tgz");
    job.addCacheArchive(new URI("/user/pts/video-metric-bak.tgz"));
    URI[] cacheFiles = job.getCacheFiles();
    if (cacheFiles != null && cacheFiles.length > 0) {
        System.out.println("Cache file ->" + cacheFiles[0]);
    }
    System.out.println("Cached video-metric-bak.tgz");

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(DoubleWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(DoubleWritable.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.waitForCompletion(true);

}

From source file:org.mrgeo.hdfs.partitioners.TileIdPartitionerTest.java

License:Apache License

@Test
@Category(UnitTest.class)
public void setSplitFile() throws IOException {
    String file = new Path(outputHdfs, testName.getMethodName()).toString();
    Job job = Job.getInstance(conf);

    TileIdPartitioner.setSplitFile(file, job);

    conf = job.getConfiguration();/*from  ww  w .  ja  v  a  2  s .c  o  m*/

    Assert.assertEquals("TileIdPartitioner.splitFile not set", file,
            conf.get("TileIdPartitioner.splitFile", file));
    Assert.assertFalse("TileIdPartitioner.useDistributedCache should not be set",
            conf.getBoolean("TileIdPartitioner.useDistributedCache", false));

    URI files[] = job.getCacheFiles();
    Assert.assertNull("Cache files should not be set", files);
}

From source file:org.mrgeo.hdfs.partitioners.TileIdPartitionerTest.java

License:Apache License

@Test
@Category(UnitTest.class)
public void setSplitFileNonLocal() throws IOException {
    conf = HadoopUtils.createConfiguration(); // force local mode off...

    String file = new Path(outputHdfs, testName.getMethodName()).toString();
    Job job = Job.getInstance(conf);

    TileIdPartitioner.setSplitFile(file, job);

    conf = job.getConfiguration();//from  w  w  w.ja va  2s  . c o m

    Assert.assertEquals("TileIdPartitioner.splitFile not set", file,
            conf.get("TileIdPartitioner.splitFile", null));
    Assert.assertTrue("TileIdPartitioner.useDistributedCache should be set",
            conf.getBoolean("TileIdPartitioner.useDistributedCache", false));

    URI files[] = job.getCacheFiles();
    Assert.assertEquals("Cache files should have 1 file", 1, files.length);

    Assert.assertEquals("Cache file name wrong", file, files[0].toString());
}

From source file:org.mrgeo.hdfs.partitioners.TileIdPartitionerTest.java

License:Apache License

@Test
@Category(UnitTest.class)
public void setupPartitioner() throws IOException {
    conf = HadoopUtils.createConfiguration();

    Job job = Job.getInstance(conf, testName.getMethodName());
    conf = job.getConfiguration();//from   w w  w  . j a  va2 s  .  co  m

    FileSystem fs = HadoopFileUtils.getFileSystem(conf);

    SplitGenerator sg = new TestGenerator();

    Path splitfile = TileIdPartitioner.setup(job, sg);

    Assert.assertTrue("Reducers should be greater than zero", job.getNumReduceTasks() > 0);
    Assert.assertNotNull("Splitfile should not be null", splitfile);

    Assert.assertEquals("TileIdPartitioner.splitFile not set", splitfile.toString(),
            conf.get("TileIdPartitioner.splitFile", null));
    Assert.assertTrue("TileIdPartitioner.useDistributedCache should be set",
            conf.getBoolean("TileIdPartitioner.useDistributedCache", false));

    URI files[] = job.getCacheFiles();
    Assert.assertEquals("Cache files should have 1 file", 1, files.length);

    Assert.assertEquals("Cache file name wrong", splitfile.toString(), files[0].toString());

    Assert.assertTrue("Partition Splits directory does not exist", fs.exists(splitfile));
    Assert.assertTrue("Partition Splits file does not exist",
            fs.exists(new Path(splitfile, PartitionerSplit.SPLIT_FILE)));

    Splits splits = new PartitionerSplit();
    splits.readSplits(splitfile);

    PartitionerSplit.PartitionerSplitInfo[] si = (PartitionerSplit.PartitionerSplitInfo[]) splits.getSplits();

    Assert.assertEquals("Splits length not correct", generated.length, si.length);
    for (int i = 0; i < generated.length; i++) {
        Assert.assertEquals("Splits entry not correct", generated[i].longValue(), si[i].getTileId());
    }

    fs.delete(splitfile, true);
}