Example usage for org.apache.hadoop.mapreduce Job setCombinerClass

List of usage examples for org.apache.hadoop.mapreduce Job setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException 

Source Link

Document

Set the combiner class for the job.

Usage

From source file:com.github.ygf.pagerank.InLinks.java

License:Apache License

private void computeInLinks(Configuration conf, Path linksFile, Path outputDir) throws Exception {

    // This job computes the number of in-links for every page. The
    // implementation is very similar to the classic word count example.

    Job job = Job.getInstance(conf, "InLinks:Computation");

    job.setJarByClass(InLinks.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(InLinksMapper.class);
    job.setCombinerClass(InLinksReducer.class);
    job.setReducerClass(InLinksReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, linksFile);
    FileOutputFormat.setOutputPath(job, new Path(outputDir, "inlinks"));

    job.waitForCompletion(true);/*  www  . ja  v a  2 s.  c  om*/
}

From source file:com.github.ygf.pagerank.PageRank.java

License:Apache License

private void pageRankIteration(int iter, Configuration conf, Path outputDir) throws Exception {

    // This job performs an iteration of the power iteration method to
    // compute PageRank. The map task processes each block M_{i,j}, loads 
    // the corresponding stripe j of the vector v_{k-1} and produces the
    // partial result of the stripe i of the vector v_k. The reduce task
    // sums all the partial results of v_k and adds the teleportation factor
    // (the combiner only sums all the partial results). See Section 5.2
    // (and 5.2.3 in particular) of Mining of Massive Datasets
    // (http://infolab.stanford.edu/~ullman/mmds.html) for details. The
    // output is written in a "vk" subdir of the output dir, where k is the
    // iteration number. MapFileOutputFormat is used to keep an array of the
    // stripes of v.

    Job job = Job.getInstance(conf, "PageRank:Iteration");

    job.setJarByClass(PageRank.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(PageRankIterationMapper.class);
    job.setMapOutputKeyClass(ShortWritable.class);
    job.setMapOutputValueClass(FloatArrayWritable.class);
    job.setCombinerClass(PageRankIterationCombiner.class);
    job.setReducerClass(PageRankIterationReducer.class);
    job.setOutputFormatClass(MapFileOutputFormat.class);
    job.setOutputKeyClass(ShortWritable.class);
    job.setOutputValueClass(FloatArrayWritable.class);
    FileInputFormat.addInputPath(job, new Path(outputDir, "M"));
    FileOutputFormat.setOutputPath(job, new Path(outputDir, "v" + iter));

    job.waitForCompletion(true);/*w  ww. jav  a 2s  .c o  m*/
}

From source file:com.hadoop.mapreduce.examples.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("mapreduce.app-submission.cross-platform", "true");
    String ioArgs[] = { "input", "output2" };
    String[] otherArgs = new GenericOptionsParser(conf, ioArgs).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);/* w  w w.j a  v a 2 s. c  o  m*/
    }
    //job
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);

    //map, combine, reduce
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    //
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.hadoop.mapreduce.TestLzoLazyLoading.java

License:Open Source License

private void runWordCount(Configuration cf, boolean compressIn, boolean compressOut)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration thisConf = new Configuration(cf);
    if (compressIn) {
        thisConf.setBoolean("mapred.compression.lzo.test.codec-checked-after-map", true);
    }//  w  w w .  j  a  v a2  s .c om

    if (compressOut) {
        thisConf.setBoolean("mapred.compression.lzo.test.codec-checked-after-reduce", true);
    }
    Path pathIn = new Path(TEST_ROOT_DIR + "/in");
    Path pathOut = new Path(TEST_ROOT_DIR + "/out");
    localFs.delete(pathIn, true);
    localFs.delete(pathOut, true);
    writeFile(makeFileName("in/part1", compressIn), "this is a test\nof word count test\ntest\n");
    writeFile(makeFileName("in/part2", compressIn), "more test");
    Job job = new Job(thisConf, "word count");
    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    if (compressOut) {
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, LzoCodec.class);
    }
    FileInputFormat.addInputPath(job, pathIn);
    FileOutputFormat.setOutputPath(job, pathOut);
    job.submit();
    assertEquals("IsLzoChecked (client)?", compressIn, LzoCodec.isNativeLzoChecked());
    assertTrue(job.waitForCompletion(false));
    String result = readFile(makeFileName("out/part-r-00000", compressOut));
    System.out.println(result);
    assertEquals("a\t1\ncount\t1\nis\t1\nmore\t1\nof\t1\ntest\t4\nthis\t1\nword\t1\n", result);
}

From source file:com.hhscyber.nl.tweets.processtweets.ProcessTweets.java

/**
 * @param args the command line arguments
 *///from  w w w.  j  a v  a2  s.  c o m
public static void main(String[] args) throws IOException {

    Job client = new Job(new Configuration());
    client.setJarByClass(ProcessTweets.class);
    client.setOutputKeyClass(Text.class);
    client.setOutputValueClass(IntWritable.class);
    client.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(client, new Path("input_concat"));//
    TextOutputFormat.setOutputPath(client, new Path("output2"));

    client.setMapperClass(ProcessTweetsMapper.class);
    client.setReducerClass(ProcessTweetsReducer.class);
    client.setCombinerClass(ProcessTweetsReducer.class);

    try {
        client.submit();
    } catch (Exception e) {
        e.printStackTrace();
    }

}

From source file:com.huihui.mr.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {

    /*HadoopJava.util.Properties??Apache Jakarta Commons Configuration??
     * ????API?org.apache.hadoop.conf.Configuration???
     *///  w  ww  .  j a  va2 s  .c  om
    Configuration conf = new Configuration();
    /*
     * ?HadoopGenericOptionsParser
    ???
    -D mapreduce.job.queuename  ??getRemainingArgs()?
    ?"xrli/STJoin_in","xrli/STJoin_out"?otherArgs
            
    ? fs jt libjars files archives D tokenCacheFile
     */
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);
    }
    conf.set("fs.defaultFS", "hdfs://localhost:9000");
    //
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);

    //??? 
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    //Path    ???URI?Path???Path
    String input = "hdfs://localhost:9000/input/";
    String output = "hdfs://localhost:9000/user/hdfs/log_kpi/browser1";
    FileInputFormat.addInputPath(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));
    //????
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.hzy.test.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    //        String input = "hdfs://192.168.1.118:9000/user/hdfs/log_kpi/";
    //        String output = "hdfs://192.168.1.118:9000/user/hdfs/log_kpi/wc/";
    String input = "/tmp/data.txt";
    // String input = args[0];
    String output = "/tmp/t1";

    // String output = args[1];

    Configuration conf = HdfsDAO.config();

    //        conf.set("mapreduce.framework.name", "yarn");
    ////        conf.set("hbase.zookeeper.quorum", "hadoop01:2181");
    //        conf.set("fs.default.name", "hdfs://hadoop01:9000");
    //        conf.set("yarn.resourcemanager.resource-tracker.address", "hadoop01:8031");
    //        conf.set("yarn.resourcemanager.address", "hadoop01:8032");
    //        conf.set("yarn.resourcemanager.scheduler.address", "hadoop01:8030");
    //        conf.set("yarn.resourcemanager.admin.address", "hadoop01:8033");
    //        conf.set("mapreduce.jobhistory.address", "hadoop01:10020");
    //        conf.set("mapreduce.jobhistory.webapp.address", "hadoop01:19888");

    //        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    //        if (otherArgs.length < 2) {
    //            System.err.println("Usage: wordcount <in> [<in>...] <out>");
    //            System.exit(2);
    //        }/*w ww . ja va2  s  .c  om*/
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    //        for (int i = 0; i < otherArgs.length - 1; ++i) {
    FileInputFormat.addInputPath(job, new Path(input));
    //        }
    FileOutputFormat.setOutputPath(job, new Path(output));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.ifeng.hadoop.thinker.LogDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }//from w  ww.j  av a 2 s  .c  om

    Job job = Job.getInstance(super.getConf());
    job.setJarByClass(getClass());

    FileUtil.fullyDelete(new File(args[1]));

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(LogMapper.class);
    job.setReducerClass(LogReducer.class);
    job.setCombinerClass(LogReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.ifeng.hadoop.thinker.LogLocalDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }/*from w w  w  . j  a  v a  2s .  co m*/

    Job job = Job.getInstance(super.getConf());
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(LogMapper.class);
    job.setReducerClass(LogReducer.class);
    job.setCombinerClass(LogReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.ifeng.vdn.loggroup.mapper.VideologGroupDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }//from   w w w.  j ava 2s . c o m

    Job job = Job.getInstance(super.getConf());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(VideoLogGroupMapper.class);
    job.setReducerClass(VideologGroupReducer.class);
    job.setCombinerClass(VideologGroupReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}