Example usage for org.apache.hadoop.mapreduce Job addCacheFile

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job addCacheFile.

Prototype

public void addCacheFile(URI uri)

Source Link

Document

Add a file to be localized

Usage

From source file:mx.iteso.msc.examples.FaceCount.java

License:Apache License

public int run(String[] args) throws Exception {
    // Check input arguments
    if (args.length != 2) {
        System.out.println("Usage: FaceCount <input HIB> <output directory>");
        System.exit(0);/*ww w .  ja  va2  s  . c om*/
    }

    // Initialize and configure MapReduce job
    Job job = Job.getInstance();
    // Set input format class which parses the input HIB and spawns map tasks
    job.setInputFormatClass(HibInputFormat.class);
    // Set the driver, mapper, and reducer classes which express the computation
    job.setJarByClass(FaceCount.class);
    job.setMapperClass(FaceCountMapper.class);
    job.setReducerClass(FaceCountReducer.class);
    // Set the types for the key/value pairs passed to/from map and reduce layers
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    // Set the input and output paths on the HDFS
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    // add cascade file
    job.addCacheFile(new URI("/hipi/OpenCV/lbpcascade_frontalface.xml#lbpcascade_frontalface.xml"));

    // Execute the MapReduce job and block until it complets
    boolean success = job.waitForCompletion(true);

    // Return success or failure
    return success ? 0 : 1;
}

From source file:nl.utwente.bigdata.PageRank.java

License:Apache License

public static void run(String[] args) throws Exception {
    Configuration conf = new Configuration();

    //Process args
    GenericOptionsParser parser = new GenericOptionsParser(conf, args);
    String[] otherArgs = parser.getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: pageRank <in> [<in>...] <out> <retweetpath> <pagerankpath");
        System.exit(2);/*from www.  j a v  a  2 s  .c om*/
    }
    conf.set("mapreduce.job.reduce.slowstart.completedmaps", "1");

    //Setup the job
    Job job = Job.getInstance(conf, "Twitter Reader");
    job.setJarByClass(PageRank.class);
    job.setMapperClass(PageRankMapper.class);
    job.setReducerClass(PageRankReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(MapWritable.class);

    //Load input files
    for (int i = 0; i < otherArgs.length - 3; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    //Load output file
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 3]));

    //Load previous results
    FileSystem fs = FileSystem.get(new Configuration());
    FileStatus[] status = fs.listStatus(new Path("hdfs:" + otherArgs[otherArgs.length - 2]));
    for (FileStatus s : status) {
        job.addCacheFile(s.getPath().toUri());
    }
    String str = new String(otherArgs[otherArgs.length - 1]);
    if (!str.equals("overslaan")) {
        FileStatus[] status2 = fs.listStatus(new Path("hdfs:" + otherArgs[otherArgs.length - 1]));
        for (FileStatus s2 : status2) {
            job.addCacheFile(s2.getPath().toUri());
        }
    }

    boolean succesful = job.waitForCompletion(true);
}

From source file:nl.utwente.bigdata.TwitterExample.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: exampleTwitter <in> [<in>...] <out>");
        System.exit(2);/*from ww  w .ja  v a 2 s .  c  o m*/
    }
    Job job = new Job(conf, "Twitter Reader");
    job.addCacheFile(new Path("players.txt").toUri());
    job.setJarByClass(TwitterExample.class);
    job.setMapperClass(ExampleMapper.class);
    job.setReducerClass(ExampleReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:org.apache.accumulo.hadoop.mapreduce.partition.RangePartitioner.java

License:Apache License

/**
 * Sets the hdfs file name to use, containing a newline separated list of Base64 encoded split
 * points that represent ranges for partitioning
 *///from ww  w.j  a v a  2  s . co  m
public static void setSplitFile(Job job, String file) {
    URI uri = new Path(file).toUri();
    job.addCacheFile(uri);
    job.getConfiguration().set(CUTFILE_KEY, uri.getPath());
}

From source file:org.apache.hadoop.examples.terasort.TeraSort.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        usage();/*from ww  w  .  j a  v a2  s .  c om*/
        return 2;
    }
    LOG.info("starting");
    Job job = Job.getInstance(getConf());
    Path inputDir = new Path(args[0]);
    Path outputDir = new Path(args[1]);
    boolean useSimplePartitioner = getUseSimplePartitioner(job);
    TeraInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setJobName("TeraSort");
    job.setJarByClass(TeraSort.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(TeraInputFormat.class);
    job.setOutputFormatClass(TeraOutputFormat.class);
    if (useSimplePartitioner) {
        job.setPartitionerClass(SimplePartitioner.class);
    } else {
        long start = System.currentTimeMillis();
        Path partitionFile = new Path(outputDir, TeraInputFormat.PARTITION_FILENAME);
        URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME);
        try {
            TeraInputFormat.writePartitionFile(job, partitionFile);
        } catch (Throwable e) {
            LOG.error(e.getMessage());
            return -1;
        }
        job.addCacheFile(partitionUri);
        long end = System.currentTimeMillis();
        System.out.println("Spent " + (end - start) + "ms computing partitions.");
        job.setPartitionerClass(TotalOrderPartitioner.class);
    }

    job.getConfiguration().setInt("dfs.replication", getOutputReplication(job));
    int ret = job.waitForCompletion(true) ? 0 : 1;
    LOG.info("done");
    return ret;
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.HadoopTeraSortTest.java

License:Apache License

/**
 * Creates Job instance and sets up necessary properties for it.
 * @param conf The Job config./*www  .  ja v a 2  s.co m*/
 * @return The job.
 * @throws Exception On error.
 */
private Job setupConfig(JobConf conf) throws Exception {
    Job job = Job.getInstance(conf);

    Path inputDir = new Path(generateOutDir);
    Path outputDir = new Path(sortOutDir);

    boolean useSimplePartitioner = TeraSort.getUseSimplePartitioner(job);

    TeraInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);

    job.setJobName("TeraSort");

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(TeraInputFormat.class);
    job.setOutputFormatClass(TeraOutputFormat.class);

    if (useSimplePartitioner)
        job.setPartitionerClass(TeraSort.SimplePartitioner.class);
    else {
        long start = System.currentTimeMillis();

        Path partFile = new Path(outputDir, PARTITION_FILENAME);

        URI partUri = new URI(partFile.toString() + "#" + PARTITION_FILENAME);

        try {
            TeraInputFormat.writePartitionFile(job, partFile);
        } catch (Throwable e) {
            throw new RuntimeException(e);
        }

        job.addCacheFile(partUri);

        long end = System.currentTimeMillis();

        System.out.println("Spent " + (end - start) + "ms computing partitions. "
                + "Partition file added to distributed cache: " + partUri);

        job.setPartitionerClass(getTeraSortTotalOrderPartitioner()/*TeraSort.TotalOrderPartitioner.class*/);
    }

    job.getConfiguration().setInt("dfs.replication", TeraSort.getOutputReplication(job));

    /* TeraOutputFormat.setFinalSync(job, true); */
    Method m = TeraOutputFormat.class.getDeclaredMethod("setFinalSync", JobContext.class, boolean.class);
    m.setAccessible(true);
    m.invoke(null, job, true);

    return job;
}

From source file:org.bgi.flexlab.gaea.data.structure.header.MultipleVCFHeader.java

License:Open Source License

public boolean distributeCacheVcfHeader(String outputPath, Job job, Configuration conf) {
    writeHeaderToHDFS(outputPath, conf);
    try {//ww w  .  j  a  v a2  s  .com
        job.addCacheFile(new URI(conf.get(GaeaVCFHeader.VCF_HEADER_PROPERTY) + "#VcfHeaderObj"));
    } catch (URISyntaxException e) {
        e.printStackTrace();
        return false;
    }
    return true;
}

From source file:org.bgi.flexlab.gaea.data.structure.memoryshare.WholeGenomeShare.java

License:Open Source License

public static boolean distributeCache(String chrList, Job job, String cacheName)
        throws IOException, URISyntaxException {
    job.addCacheFile(new URI(chrList + "#" + cacheName));

    Configuration conf = job.getConfiguration();
    Path refPath = new Path(chrList);
    FileSystem fs = refPath.getFileSystem(conf);
    FSDataInputStream refin = fs.open(refPath);
    LineReader in = new LineReader(refin);
    Text line = new Text();

    String chrFile = "";
    String[] chrs = new String[3];
    while ((in.readLine(line)) != 0) {
        chrFile = line.toString();//from w  ww  . j a  v a 2  s. co m
        chrs = chrFile.split("\t");
        File fileTest = new File(chrs[1]);
        if (fileTest.isFile()) {
            chrs[1] = "file://" + chrs[1];
        }
        job.addCacheFile(new URI(chrs[1] + "#" + chrs[0]));
    }
    in.close();
    refin.close();
    return true;
}

From source file:Patterns.A5_MapSideJoinByDistributedCache.Distributed_InnerJoin_Driver.java

/**
 * @param args the command line arguments
 *//*ww w . j av a 2s  . c  o m*/
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Inner Join User/Artist Data");
    job.setJarByClass(Distributed_InnerJoin_Driver.class);

    job.setMapperClass(Distributed_InnerJoin_Mapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    try {
        job.addCacheFile(new URI(
                "/home/chintan/IdeaProjects/AdvancedDBMS/music-project/inputUserTaste/userid-profile.tsv#user"));
    } catch (URISyntaxException e) {
        e.printStackTrace();
    }

    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 2);
}

From source file:terasort.TeraSort.java

License:Apache License

public int run(String[] args) throws Exception {
    LOG.info("starting");
    Job job = Job.getInstance(getConf());
    Path inputDir = new Path(args[0]);

    Path outputDir = new Path(args[1]);
    FileSystem fs = FileSystem.get(getConf());
    if (fs.exists(outputDir)) {
        fs.delete(outputDir, true);/*from   w  w w.  ja v a 2 s.c o  m*/
    }
    boolean useSimplePartitioner = getUseSimplePartitioner(job);
    TeraInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setJobName("TeraSort");
    job.setJarByClass(TeraSort.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(TeraInputFormat.class);
    job.setOutputFormatClass(TeraOutputFormat.class);
    if (useSimplePartitioner) {
        job.setPartitionerClass(SimplePartitioner.class);
    } else {
        long start = System.currentTimeMillis();
        Path partitionFile = new Path(outputDir, TeraInputFormat.PARTITION_FILENAME);
        URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME);
        try {
            TeraInputFormat.writePartitionFile(job, partitionFile);
        } catch (Throwable e) {
            LOG.error(e.getMessage());
            return -1;
        }
        job.addCacheFile(partitionUri);
        long end = System.currentTimeMillis();
        System.out.println("Spent " + (end - start) + "ms computing partitions.");
        job.setPartitionerClass(TotalOrderPartitioner.class);
    }

    job.getConfiguration().setInt("dfs.replication", getOutputReplication(job));
    TeraOutputFormat.setFinalSync(job, true);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    LOG.info("done");
    return ret;
}