Example usage for org.apache.hadoop.mapreduce Job setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException

Source Link

Document

Set the combiner class for the job.

Usage

From source file:com.github.ygf.pagerank.InLinks.java

License:Apache License

private void computeInLinks(Configuration conf, Path linksFile, Path outputDir) throws Exception {

    // This job computes the number of in-links for every page. The
    // implementation is very similar to the classic word count example.

    Job job = Job.getInstance(conf, "InLinks:Computation");

    job.setJarByClass(InLinks.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(InLinksMapper.class);
    job.setCombinerClass(InLinksReducer.class);
    job.setReducerClass(InLinksReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, linksFile);
    FileOutputFormat.setOutputPath(job, new Path(outputDir, "inlinks"));

    job.waitForCompletion(true);/*  www  . ja  v a  2 s.  c  om*/
}

From source file:com.github.ygf.pagerank.PageRank.java

License:Apache License

private void pageRankIteration(int iter, Configuration conf, Path outputDir) throws Exception {

    // This job performs an iteration of the power iteration method to
    // compute PageRank. The map task processes each block M_{i,j}, loads 
    // the corresponding stripe j of the vector v_{k-1} and produces the
    // partial result of the stripe i of the vector v_k. The reduce task
    // sums all the partial results of v_k and adds the teleportation factor
    // (the combiner only sums all the partial results). See Section 5.2
    // (and 5.2.3 in particular) of Mining of Massive Datasets
    // (http://infolab.stanford.edu/~ullman/mmds.html) for details. The
    // output is written in a "vk" subdir of the output dir, where k is the
    // iteration number. MapFileOutputFormat is used to keep an array of the
    // stripes of v.

    Job job = Job.getInstance(conf, "PageRank:Iteration");

    job.setJarByClass(PageRank.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(PageRankIterationMapper.class);
    job.setMapOutputKeyClass(ShortWritable.class);
    job.setMapOutputValueClass(FloatArrayWritable.class);
    job.setCombinerClass(PageRankIterationCombiner.class);
    job.setReducerClass(PageRankIterationReducer.class);
    job.setOutputFormatClass(MapFileOutputFormat.class);
    job.setOutputKeyClass(ShortWritable.class);
    job.setOutputValueClass(FloatArrayWritable.class);
    FileInputFormat.addInputPath(job, new Path(outputDir, "M"));
    FileOutputFormat.setOutputPath(job, new Path(outputDir, "v" + iter));

    job.waitForCompletion(true);/*w  ww. jav  a 2s  .c o  m*/
}

From source file:com.hadoop.mapreduce.examples.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("mapreduce.app-submission.cross-platform", "true");
    String ioArgs[] = { "input", "output2" };
    String[] otherArgs = new GenericOptionsParser(conf, ioArgs).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);/* w  w w.j a  v a 2 s. c  o  m*/
    }
    //job
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);

    //map, combine, reduce
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    //
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.hadoop.mapreduce.TestLzoLazyLoading.java

License:Open Source License

private void runWordCount(Configuration cf, boolean compressIn, boolean compressOut)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration thisConf = new Configuration(cf);
    if (compressIn) {
        thisConf.setBoolean("mapred.compression.lzo.test.codec-checked-after-map", true);
    }//  w  w w .  j  a  v a2  s .c om

    if (compressOut) {
        thisConf.setBoolean("mapred.compression.lzo.test.codec-checked-after-reduce", true);
    }
    Path pathIn = new Path(TEST_ROOT_DIR + "/in");
    Path pathOut = new Path(TEST_ROOT_DIR + "/out");
    localFs.delete(pathIn, true);
    localFs.delete(pathOut, true);
    writeFile(makeFileName("in/part1", compressIn), "this is a test\nof word count test\ntest\n");
    writeFile(makeFileName("in/part2", compressIn), "more test");
    Job job = new Job(thisConf, "word count");
    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    if (compressOut) {
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, LzoCodec.class);
    }
    FileInputFormat.addInputPath(job, pathIn);
    FileOutputFormat.setOutputPath(job, pathOut);
    job.submit();
    assertEquals("IsLzoChecked (client)?", compressIn, LzoCodec.isNativeLzoChecked());
    assertTrue(job.waitForCompletion(false));
    String result = readFile(makeFileName("out/part-r-00000", compressOut));
    System.out.println(result);
    assertEquals("a\t1\ncount\t1\nis\t1\nmore\t1\nof\t1\ntest\t4\nthis\t1\nword\t1\n", result);
}

From source file:com.hhscyber.nl.tweets.processtweets.ProcessTweets.java

/**
 * @param args the command line arguments
 *///from  w w w.  j  a v  a2  s.  c o m
public static void main(String[] args) throws IOException {

    Job client = new Job(new Configuration());
    client.setJarByClass(ProcessTweets.class);
    client.setOutputKeyClass(Text.class);
    client.setOutputValueClass(IntWritable.class);
    client.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(client, new Path("input_concat"));//
    TextOutputFormat.setOutputPath(client, new Path("output2"));

    client.setMapperClass(ProcessTweetsMapper.class);
    client.setReducerClass(ProcessTweetsReducer.class);
    client.setCombinerClass(ProcessTweetsReducer.class);

    try {
        client.submit();
    } catch (Exception e) {
        e.printStackTrace();
    }

}

From source file:com.huihui.mr.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {

    /*HadoopJava.util.Properties??Apache Jakarta Commons Configuration??
     * ????API?org.apache.hadoop.conf.Configuration???
     *///  w  ww  .  j a  va2 s  .c  om
    Configuration conf = new Configuration();
    /*
     * ?HadoopGenericOptionsParser
    ???
    -D mapreduce.job.queuename  ??getRemainingArgs()?
    ?"xrli/STJoin_in","xrli/STJoin_out"?otherArgs
            
    ? fs jt libjars files archives D tokenCacheFile
     */
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);
    }
    conf.set("fs.defaultFS", "hdfs://localhost:9000");
    //
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);

    //??? 
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    //Path    ???URI?Path???Path
    String input = "hdfs://localhost:9000/input/";
    String output = "hdfs://localhost:9000/user/hdfs/log_kpi/browser1";
    FileInputFormat.addInputPath(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));
    //????
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.hzy.test.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    //        String input = "hdfs://192.168.1.118:9000/user/hdfs/log_kpi/";
    //        String output = "hdfs://192.168.1.118:9000/user/hdfs/log_kpi/wc/";
    String input = "/tmp/data.txt";
    // String input = args[0];
    String output = "/tmp/t1";

    // String output = args[1];

    Configuration conf = HdfsDAO.config();

    //        conf.set("mapreduce.framework.name", "yarn");
    ////        conf.set("hbase.zookeeper.quorum", "hadoop01:2181");
    //        conf.set("fs.default.name", "hdfs://hadoop01:9000");
    //        conf.set("yarn.resourcemanager.resource-tracker.address", "hadoop01:8031");
    //        conf.set("yarn.resourcemanager.address", "hadoop01:8032");
    //        conf.set("yarn.resourcemanager.scheduler.address", "hadoop01:8030");
    //        conf.set("yarn.resourcemanager.admin.address", "hadoop01:8033");
    //        conf.set("mapreduce.jobhistory.address", "hadoop01:10020");
    //        conf.set("mapreduce.jobhistory.webapp.address", "hadoop01:19888");

    //        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    //        if (otherArgs.length < 2) {
    //            System.err.println("Usage: wordcount <in> [<in>...] <out>");
    //            System.exit(2);
    //        }/*w ww . ja va2  s  .c  om*/
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    //        for (int i = 0; i < otherArgs.length - 1; ++i) {
    FileInputFormat.addInputPath(job, new Path(input));
    //        }
    FileOutputFormat.setOutputPath(job, new Path(output));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.ifeng.hadoop.thinker.LogDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }//from w  ww.j  av a 2 s  .c  om

    Job job = Job.getInstance(super.getConf());
    job.setJarByClass(getClass());

    FileUtil.fullyDelete(new File(args[1]));

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(LogMapper.class);
    job.setReducerClass(LogReducer.class);
    job.setCombinerClass(LogReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.ifeng.hadoop.thinker.LogLocalDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }/*from w w  w  . j  a  v a  2s .  co m*/

    Job job = Job.getInstance(super.getConf());
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(LogMapper.class);
    job.setReducerClass(LogReducer.class);
    job.setCombinerClass(LogReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.ifeng.vdn.loggroup.mapper.VideologGroupDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }//from   w w w.  j ava 2s . c o m

    Job job = Job.getInstance(super.getConf());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(VideoLogGroupMapper.class);
    job.setReducerClass(VideologGroupReducer.class);
    job.setCombinerClass(VideologGroupReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}