Example usage for org.apache.hadoop.mapreduce Job addCacheFile

List of usage examples for org.apache.hadoop.mapreduce Job addCacheFile

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job addCacheFile.

Prototype

public void addCacheFile(URI uri) 

Source Link

Document

Add a file to be localized

Usage

From source file:mx.iteso.msc.examples.FaceCount.java

License:Apache License

public int run(String[] args) throws Exception {
    // Check input arguments
    if (args.length != 2) {
        System.out.println("Usage: FaceCount <input HIB> <output directory>");
        System.exit(0);/*ww w .  ja  va2  s  . c om*/
    }

    // Initialize and configure MapReduce job
    Job job = Job.getInstance();
    // Set input format class which parses the input HIB and spawns map tasks
    job.setInputFormatClass(HibInputFormat.class);
    // Set the driver, mapper, and reducer classes which express the computation
    job.setJarByClass(FaceCount.class);
    job.setMapperClass(FaceCountMapper.class);
    job.setReducerClass(FaceCountReducer.class);
    // Set the types for the key/value pairs passed to/from map and reduce layers
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    // Set the input and output paths on the HDFS
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    // add cascade file
    job.addCacheFile(new URI("/hipi/OpenCV/lbpcascade_frontalface.xml#lbpcascade_frontalface.xml"));

    // Execute the MapReduce job and block until it complets
    boolean success = job.waitForCompletion(true);

    // Return success or failure
    return success ? 0 : 1;
}

From source file:nl.utwente.bigdata.PageRank.java

License:Apache License

public static void run(String[] args) throws Exception {
    Configuration conf = new Configuration();

    //Process args
    GenericOptionsParser parser = new GenericOptionsParser(conf, args);
    String[] otherArgs = parser.getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: pageRank <in> [<in>...] <out> <retweetpath> <pagerankpath");
        System.exit(2);/*from www.  j a v  a  2 s  .c om*/
    }
    conf.set("mapreduce.job.reduce.slowstart.completedmaps", "1");

    //Setup the job
    Job job = Job.getInstance(conf, "Twitter Reader");
    job.setJarByClass(PageRank.class);
    job.setMapperClass(PageRankMapper.class);
    job.setReducerClass(PageRankReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(MapWritable.class);

    //Load input files
    for (int i = 0; i < otherArgs.length - 3; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    //Load output file
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 3]));

    //Load previous results
    FileSystem fs = FileSystem.get(new Configuration());
    FileStatus[] status = fs.listStatus(new Path("hdfs:" + otherArgs[otherArgs.length - 2]));
    for (FileStatus s : status) {
        job.addCacheFile(s.getPath().toUri());
    }
    String str = new String(otherArgs[otherArgs.length - 1]);
    if (!str.equals("overslaan")) {
        FileStatus[] status2 = fs.listStatus(new Path("hdfs:" + otherArgs[otherArgs.length - 1]));
        for (FileStatus s2 : status2) {
            job.addCacheFile(s2.getPath().toUri());
        }
    }

    boolean succesful = job.waitForCompletion(true);
}

From source file:nl.utwente.bigdata.TwitterExample.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: exampleTwitter <in> [<in>...] <out>");
        System.exit(2);/*from ww  w .ja  v a 2 s .  c  o m*/
    }
    Job job = new Job(conf, "Twitter Reader");
    job.addCacheFile(new Path("players.txt").toUri());
    job.setJarByClass(TwitterExample.class);
    job.setMapperClass(ExampleMapper.class);
    job.setReducerClass(ExampleReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:org.apache.accumulo.hadoop.mapreduce.partition.RangePartitioner.java

License:Apache License

/**
 * Sets the hdfs file name to use, containing a newline separated list of Base64 encoded split
 * points that represent ranges for partitioning
 *///from ww  w.j  a v a  2  s . co  m
public static void setSplitFile(Job job, String file) {
    URI uri = new Path(file).toUri();
    job.addCacheFile(uri);
    job.getConfiguration().set(CUTFILE_KEY, uri.getPath());
}

From source file:org.apache.hadoop.examples.terasort.TeraSort.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        usage();/*from ww  w  .  j a  v a2  s .  c om*/
        return 2;
    }
    LOG.info("starting");
    Job job = Job.getInstance(getConf());
    Path inputDir = new Path(args[0]);
    Path outputDir = new Path(args[1]);
    boolean useSimplePartitioner = getUseSimplePartitioner(job);
    TeraInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setJobName("TeraSort");
    job.setJarByClass(TeraSort.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(TeraInputFormat.class);
    job.setOutputFormatClass(TeraOutputFormat.class);
    if (useSimplePartitioner) {
        job.setPartitionerClass(SimplePartitioner.class);
    } else {
        long start = System.currentTimeMillis();
        Path partitionFile = new Path(outputDir, TeraInputFormat.PARTITION_FILENAME);
        URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME);
        try {
            TeraInputFormat.writePartitionFile(job, partitionFile);
        } catch (Throwable e) {
            LOG.error(e.getMessage());
            return -1;
        }
        job.addCacheFile(partitionUri);
        long end = System.currentTimeMillis();
        System.out.println("Spent " + (end - start) + "ms computing partitions.");
        job.setPartitionerClass(TotalOrderPartitioner.class);
    }

    job.getConfiguration().setInt("dfs.replication", getOutputReplication(job));
    int ret = job.waitForCompletion(true) ? 0 : 1;
    LOG.info("done");
    return ret;
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.HadoopTeraSortTest.java

License:Apache License

/**
 * Creates Job instance and sets up necessary properties for it.
 * @param conf The Job config./*www  .  ja v a 2  s.co m*/
 * @return The job.
 * @throws Exception On error.
 */
private Job setupConfig(JobConf conf) throws Exception {
    Job job = Job.getInstance(conf);

    Path inputDir = new Path(generateOutDir);
    Path outputDir = new Path(sortOutDir);

    boolean useSimplePartitioner = TeraSort.getUseSimplePartitioner(job);

    TeraInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);

    job.setJobName("TeraSort");

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(TeraInputFormat.class);
    job.setOutputFormatClass(TeraOutputFormat.class);

    if (useSimplePartitioner)
        job.setPartitionerClass(TeraSort.SimplePartitioner.class);
    else {
        long start = System.currentTimeMillis();

        Path partFile = new Path(outputDir, PARTITION_FILENAME);

        URI partUri = new URI(partFile.toString() + "#" + PARTITION_FILENAME);

        try {
            TeraInputFormat.writePartitionFile(job, partFile);
        } catch (Throwable e) {
            throw new RuntimeException(e);
        }

        job.addCacheFile(partUri);

        long end = System.currentTimeMillis();

        System.out.println("Spent " + (end - start) + "ms computing partitions. "
                + "Partition file added to distributed cache: " + partUri);

        job.setPartitionerClass(getTeraSortTotalOrderPartitioner()/*TeraSort.TotalOrderPartitioner.class*/);
    }

    job.getConfiguration().setInt("dfs.replication", TeraSort.getOutputReplication(job));

    /* TeraOutputFormat.setFinalSync(job, true); */
    Method m = TeraOutputFormat.class.getDeclaredMethod("setFinalSync", JobContext.class, boolean.class);
    m.setAccessible(true);
    m.invoke(null, job, true);

    return job;
}

From source file:org.bgi.flexlab.gaea.data.structure.header.MultipleVCFHeader.java

License:Open Source License

public boolean distributeCacheVcfHeader(String outputPath, Job job, Configuration conf) {
    writeHeaderToHDFS(outputPath, conf);
    try {//ww w  .  j  a  v a2  s  .com
        job.addCacheFile(new URI(conf.get(GaeaVCFHeader.VCF_HEADER_PROPERTY) + "#VcfHeaderObj"));
    } catch (URISyntaxException e) {
        e.printStackTrace();
        return false;
    }
    return true;
}

From source file:org.bgi.flexlab.gaea.data.structure.memoryshare.WholeGenomeShare.java

License:Open Source License

public static boolean distributeCache(String chrList, Job job, String cacheName)
        throws IOException, URISyntaxException {
    job.addCacheFile(new URI(chrList + "#" + cacheName));

    Configuration conf = job.getConfiguration();
    Path refPath = new Path(chrList);
    FileSystem fs = refPath.getFileSystem(conf);
    FSDataInputStream refin = fs.open(refPath);
    LineReader in = new LineReader(refin);
    Text line = new Text();

    String chrFile = "";
    String[] chrs = new String[3];
    while ((in.readLine(line)) != 0) {
        chrFile = line.toString();//from w  ww  . j a  v a 2  s. co m
        chrs = chrFile.split("\t");
        File fileTest = new File(chrs[1]);
        if (fileTest.isFile()) {
            chrs[1] = "file://" + chrs[1];
        }
        job.addCacheFile(new URI(chrs[1] + "#" + chrs[0]));
    }
    in.close();
    refin.close();
    return true;
}

From source file:Patterns.A5_MapSideJoinByDistributedCache.Distributed_InnerJoin_Driver.java

/**
 * @param args the command line arguments
 *//*ww w . j av a 2s  . c  o m*/
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Inner Join User/Artist Data");
    job.setJarByClass(Distributed_InnerJoin_Driver.class);

    job.setMapperClass(Distributed_InnerJoin_Mapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    try {
        job.addCacheFile(new URI(
                "/home/chintan/IdeaProjects/AdvancedDBMS/music-project/inputUserTaste/userid-profile.tsv#user"));
    } catch (URISyntaxException e) {
        e.printStackTrace();
    }

    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 2);
}

From source file:terasort.TeraSort.java

License:Apache License

public int run(String[] args) throws Exception {
    LOG.info("starting");
    Job job = Job.getInstance(getConf());
    Path inputDir = new Path(args[0]);

    Path outputDir = new Path(args[1]);
    FileSystem fs = FileSystem.get(getConf());
    if (fs.exists(outputDir)) {
        fs.delete(outputDir, true);/*from   w  w w.  ja v a 2 s.c o  m*/
    }
    boolean useSimplePartitioner = getUseSimplePartitioner(job);
    TeraInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setJobName("TeraSort");
    job.setJarByClass(TeraSort.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(TeraInputFormat.class);
    job.setOutputFormatClass(TeraOutputFormat.class);
    if (useSimplePartitioner) {
        job.setPartitionerClass(SimplePartitioner.class);
    } else {
        long start = System.currentTimeMillis();
        Path partitionFile = new Path(outputDir, TeraInputFormat.PARTITION_FILENAME);
        URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME);
        try {
            TeraInputFormat.writePartitionFile(job, partitionFile);
        } catch (Throwable e) {
            LOG.error(e.getMessage());
            return -1;
        }
        job.addCacheFile(partitionUri);
        long end = System.currentTimeMillis();
        System.out.println("Spent " + (end - start) + "ms computing partitions.");
        job.setPartitionerClass(TotalOrderPartitioner.class);
    }

    job.getConfiguration().setInt("dfs.replication", getOutputReplication(job));
    TeraOutputFormat.setFinalSync(job, true);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    LOG.info("done");
    return ret;
}