Example usage for org.apache.hadoop.mapred JobConf setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> theClass)

Source Link

Document

Set the user-defined combiner class used to combine map-outputs before being sent to the reducers.

Usage

From source file:buildtestproject.MyFirstMapReduce.java

public static void main(String[] args) throws Exception {
    //Configuration conf = new Configuration();
    JobConf conf = new JobConf(MyFirstMapReduce.class);
    //Job job = Job.getInstance(conf, "word-count-one");
    conf.setJobName("word-count-one");

    conf.setMapperClass(TokenizerMapper.class);
    conf.setCombinerClass(IntSumReducer.class);
    conf.setReducerClass(IntSumReducer.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    //        job.setJarByClass(MyFirstMapReduce.class);
    //        job.setMapperClass(TokenizerMapper.class);
    //        job.setCombinerClass(IntSumReducer.class);
    //        job.setReducerClass(IntSumReducer.class);
    //        /*from w ww  . jav  a2s.c om*/
    //        job.setOutputKeyClass(Text.class);
    //        job.setOutputValueClass(IntWritable.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    //        FileInputFormat.addInputPath(job, new Path(args[0]));
    //        FileOutputFormat.setOutputPath(job, new Path(args[1]));

    JobClient.runJob(conf);

    //        System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:cn.edu.xmu.dm.mapreduce.MultiFileWordCount.java

License:Apache License

public int run(String[] args) throws Exception {

    if (args.length < 2) {
        printUsage();/* w  ww. j  a v a2 s  .c  o  m*/
        return 1;
    }

    JobConf job = new JobConf(getConf(), MultiFileWordCount.class);
    job.setJobName("MultiFileWordCount");

    // set the InputFormat of the job to our InputFormat
    job.setInputFormat(MyInputFormat.class);

    // the keys are words (strings)
    job.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    job.setOutputValueClass(LongWritable.class);

    // use the defined mapper
    job.setMapperClass(MapClass.class);
    // use the WordCount Reducer
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(LongSumReducer.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    JobClient.runJob(job);

    return 0;
}

From source file:com.acme.extensions.mr.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    JobConf job = new JobConf();
    String[] otherArgs = new GenericOptionsParser(job, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from  ww w.  j a va2 s .co m*/
    }

    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    JobClient.runJob(job);
}

From source file:com.aerospike.hadoop.examples.wordcountinput.WordCountInput.java

License:Apache License

public int run(final String[] args) throws Exception {

    log.info("run starting");

    final Configuration conf = getConf();

    JobConf job = new JobConf(conf, WordCountInput.class);
    job.setJobName("AerospikeWordCountInput");

    job.setInputFormat(AerospikeInputFormat.class);
    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setOutputFormat(TextOutputFormat.class);

    FileOutputFormat.setOutputPath(job, new Path(args[0]));

    JobClient.runJob(job);/*from   ww  w  .j a va2s. c  om*/

    log.info("finished");
    return 0;
}

From source file:com.aerospike.hadoop.examples.wordcountoutput.WordCountOutput.java

License:Apache License

public int run(final String[] args) throws Exception {

    log.info("run starting");

    final Configuration conf = getConf();

    JobConf job = new JobConf(conf, WordCountOutput.class);
    job.setJobName("AerospikeWordCountOutput");

    for (int ii = 0; ii < args.length; ++ii) {
        FileInputFormat.addInputPath(job, new Path(args[ii]));
    }/*from   w ww  . ja  v a  2  s.  c  om*/

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setOutputFormat(MyOutputFormat.class);

    JobClient.runJob(job);

    log.info("finished");
    return 0;
}

From source file:com.benchmark.mapred.Grep.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 4) {
        System.out.println("Grep <inDir> <outDir> <numreduces> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }//from ww  w.j av  a2s .co  m

    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    JobConf grepJob = new JobConf(getConf(), Grep.class);

    try {

        Date startIteration = new Date();
        grepJob.setJobName("grep-search");

        FileInputFormat.setInputPaths(grepJob, args[0]);
        grepJob.setMapperClass(RegexMapper.class);
        grepJob.set("mapred.mapper.regex", args[3]);
        if (args.length == 5)
            grepJob.set("mapred.mapper.regex.group", args[4]);

        grepJob.setCombinerClass(LongSumReducer.class);
        grepJob.setReducerClass(LongSumReducer.class);
        FileOutputFormat.setOutputPath(grepJob, new Path(args[1]));
        grepJob.setOutputKeyClass(Text.class);
        grepJob.setOutputValueClass(LongWritable.class);
        grepJob.setNumReduceTasks(Integer.parseInt(args[2]));

        JobClient.runJob(grepJob);

        Date endIteration = new Date();
        System.out.println("The iteration took " + (endIteration.getTime() - startIteration.getTime()) / 1000
                + " seconds.");
    } finally {
        FileSystem.get(grepJob).delete(tempDir, true);
    }
    return 0;
}

From source file:com.benchmark.mapred.MultiFileWordCount.java

License:Apache License

public int run(String[] args) throws Exception {

    if (args.length < 2) {
        printUsage();//from ww  w .  j a  v a2  s.  c o  m
        return 1;
    }

    JobConf job = new JobConf(getConf(), MultiFileWordCount.class);
    job.setJobName("MultiFileWordCount");

    //set the InputFormat of the job to our InputFormat
    job.setInputFormat(MyInputFormat.class);

    // the keys are words (strings)
    job.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    job.setOutputValueClass(LongWritable.class);

    //use the defined mapper
    job.setMapperClass(MapClass.class);
    //use the WordCount Reducer
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(LongSumReducer.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    JobClient.runJob(job);

    return 0;
}

From source file:com.chriscx.mapred.Driver.java

public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), Driver.class);
    conf.setJobName("wordcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        if ("-skip".equals(args[i])) {
            DistributedCache.addCacheFile(new Path(args[++i]).toUri(), conf);
            conf.setBoolean("wordcount.skip.patterns", true);
        } else {//from   www .jav a2s  .  com
            other_args.add(args[i]);
        }
    }

    FileInputFormat.setInputPaths(conf, new Path(other_args.get(0)));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
}

From source file:com.chriscx.matching.Driver.java

public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), com.chriscx.mapred.Driver.class);
    conf.setJobName("Matching");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        if ("-skip".equals(args[i])) {
            DistributedCache.addCacheFile(new Path(args[++i]).toUri(), conf);
            conf.setBoolean("wordcount.skip.patterns", true);
        } else {//from w  w w. j  av  a  2s.  c  o m
            other_args.add(args[i]);
        }
    }

    FileInputFormat.setInputPaths(conf, new Path(other_args.get(0)));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
}

From source file:com.cloudera.hbase.OldWordCount.java

License:Open Source License

public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(WordCount.class);
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//  ww w. j  a va 2 s.c o  m
    }
    conf.setJobName("word count");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(conf, new Path(otherArgs[1]));
    JobClient.runJob(conf);
    return;
}