Example usage for org.apache.hadoop.mapreduce Job setReducerClass

List of usage examples for org.apache.hadoop.mapreduce Job setReducerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setReducerClass.

Prototype

public void setReducerClass(Class<? extends Reducer> cls) throws IllegalStateException 

Source Link

Document

Set the Reducer for the job.

Usage

From source file:com.github.ygf.pagerank.PageRank.java

License:Apache License

private void pageRankIteration(int iter, Configuration conf, Path outputDir) throws Exception {

    // This job performs an iteration of the power iteration method to
    // compute PageRank. The map task processes each block M_{i,j}, loads 
    // the corresponding stripe j of the vector v_{k-1} and produces the
    // partial result of the stripe i of the vector v_k. The reduce task
    // sums all the partial results of v_k and adds the teleportation factor
    // (the combiner only sums all the partial results). See Section 5.2
    // (and 5.2.3 in particular) of Mining of Massive Datasets
    // (http://infolab.stanford.edu/~ullman/mmds.html) for details. The
    // output is written in a "vk" subdir of the output dir, where k is the
    // iteration number. MapFileOutputFormat is used to keep an array of the
    // stripes of v.

    Job job = Job.getInstance(conf, "PageRank:Iteration");

    job.setJarByClass(PageRank.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(PageRankIterationMapper.class);
    job.setMapOutputKeyClass(ShortWritable.class);
    job.setMapOutputValueClass(FloatArrayWritable.class);
    job.setCombinerClass(PageRankIterationCombiner.class);
    job.setReducerClass(PageRankIterationReducer.class);
    job.setOutputFormatClass(MapFileOutputFormat.class);
    job.setOutputKeyClass(ShortWritable.class);
    job.setOutputValueClass(FloatArrayWritable.class);
    FileInputFormat.addInputPath(job, new Path(outputDir, "M"));
    FileOutputFormat.setOutputPath(job, new Path(outputDir, "v" + iter));

    job.waitForCompletion(true);//from   w  w w .  ja  va 2s.c om
}

From source file:com.github.ygf.pagerank.PageRank.java

License:Apache License

private void summarizeResults(int iter, Configuration conf, Path outputDir) throws Exception {

    // This job creates a plain text file with the top N PageRanks and the
    // titles of the pages. Each map task emits the top N PageRanks it
    // receives, and the reduce task merges the partial results into the
    // global top N PageRanks. A single reducer is used in the job in order
    // to have access to all the individual top N PageRanks from the
    // mappers. The reducer looks up the titles in the index built by
    // TitleIndex. This job was designed considering that N is small.

    int topResults = Integer.parseInt(conf.get("pagerank.top_results"));

    Job job = Job.getInstance(conf, "PageRank:TopN");

    job.setJarByClass(PageRank.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(PageRankTopNMapper.class);
    job.setMapOutputKeyClass(FloatWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setReducerClass(PageRankTopNReducer.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(FloatWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(outputDir, "v" + iter));
    FileOutputFormat.setOutputPath(job, new Path(outputDir, "v" + iter + "-top" + topResults));

    job.setNumReduceTasks(1);/*from w  w w . j  av a  2  s.  c o  m*/
    job.waitForCompletion(true);
}

From source file:com.goldsaxfoundation.bigdata.Module5.SimpleMapReduce.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job = new Job(conf, "wordcount");
    job.setJarByClass(SimpleMapReduce.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);//  ww  w. j a va 2 s. c  om
}

From source file:com.google.cloud.bigtable.mapreduce.Import.java

License:Open Source License

/**
 * Sets up the actual job.//  w ww.  j av a 2s  .  c  om
 * @param conf The current configuration.
 * @param args The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    TableName tableName = TableName.valueOf(args[0]);
    conf.set(TABLE_NAME, tableName.getNameAsString());
    Path inputDir = new Path(args[1]);
    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
    job.setJarByClass(Importer.class);
    FileInputFormat.setInputPaths(job, inputDir);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);

    // make sure we get the filter in the jars
    try {
        Class<? extends Filter> filter = conf.getClass(FILTER_CLASS_CONF_KEY, null, Filter.class);
        if (filter != null) {
            TableMapReduceUtil.addDependencyJars(conf, filter);
        }
    } catch (Exception e) {
        throw new IOException(e);
    }

    if (hfileOutPath != null) {
        job.setMapperClass(KeyValueImporter.class);
        try (Connection conn = ConnectionFactory.createConnection(conf);
                Table table = conn.getTable(tableName);
                RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
            job.setReducerClass(KeyValueSortReducer.class);
            Path outputDir = new Path(hfileOutPath);
            FileOutputFormat.setOutputPath(job, outputDir);
            job.setMapOutputKeyClass(ImmutableBytesWritable.class);
            job.setMapOutputValueClass(KeyValue.class);
            HFileOutputFormat2.configureIncrementalLoad(job, table, regionLocator);
            TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
                    com.google.common.base.Preconditions.class);
        }
    } else {
        // No reducers.  Just write straight to table.  Call initTableReducerJob
        // because it sets up the TableOutputFormat.
        job.setMapperClass(Importer.class);
        TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);
        job.setNumReduceTasks(0);
    }
    return job;
}

From source file:com.gsinnovations.howdah.AbstractJob.java

License:Apache License

protected Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer,
        Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue,
        Class<? extends OutputFormat> outputFormat) throws IOException {

    Job job = new Job(new Configuration(getConf()));
    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }/*from ww w.  j a  va2s. c om*/
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    job.setMapOutputKeyClass(mapperKey);
    job.setMapOutputValueClass(mapperValue);

    jobConf.setBoolean("mapred.compress.map.output", true);

    job.setReducerClass(reducer);
    job.setOutputKeyClass(reducerKey);
    job.setOutputValueClass(reducerValue);

    job.setJobName(getCustomJobName(job, mapper, reducer));

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:com.gsvic.csmr.CSMRBase.java

License:Apache License

public static void generatePairs(String in, String out)
        throws IOException, InterruptedException, ClassNotFoundException {

    Configuration conf = new Configuration();
    path = out;//  w w w .j  a  v  a 2  s  .  c o m
    Job job;
    Path input, output;
    input = new Path(in);
    output = new Path(path + "/CSMRPairs");

    job = new Job(conf);
    job.setJobName("CSMR Pairs Job");
    job.setJarByClass(CSMRBase.class);

    FileInputFormat.addInputPath(job, input);
    FileOutputFormat.setOutputPath(job, output);

    job.setMapperClass(CSMRMapper.class);
    job.setReducerClass(CSMRReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(DocumentWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(VectorArrayWritable.class);

    job.waitForCompletion(true);
}

From source file:com.gsvic.csmr.CSMRBase.java

License:Apache License

public static void StartCSMR() throws IOException, InterruptedException, ClassNotFoundException {

    Configuration conf = new Configuration();
    Job job;
    job = new Job(conf);
    job.setJobName("CSMR Cosine Similarity Job");
    job.setJarByClass(CSMRBase.class);

    FileInputFormat.addInputPath(job, new Path(path + "/CSMRPairs/part-r-00000"));
    FileOutputFormat.setOutputPath(job, new Path(path + "/Results"));
    job.setMapperClass(Mapper.class);
    job.setReducerClass(CosineSimilarityReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(VectorArrayWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    System.exit(job.waitForCompletion(true) ? 1 : 0);

}

From source file:com.hadoop.examples.secondSort.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    // ?hadoop?//  ww w . ja  va2s.  c om
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysort <in> <out>");
        System.exit(2);
    }
    // ?
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    // Mapper
    job.setMapperClass(MapClass.class);
    // ???CombinerCombiner<Text, IntWritable>Reduce<IntPair, IntWritable>?
    //job.setCombinerClass(Reduce.class);

    // Reducer
    job.setReducerClass(Reduce.class);

    // *
    // *group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);
    //setSortComparatorClass()hadoopkey?(?2.Hadoopkey?)
    //IntPair?compareTo()
    //job.setSortComparatorClass(cls);
    // *
    job.setGroupingComparatorClass(FirstGroupingComparator.class);

    // map Key
    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    // mapValue
    job.setMapOutputValueClass(IntWritable.class);

    // rduceKeyTextOutputFormatClassTextOutputFormat
    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    // rduceValue
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    // ??job
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.hadoop.mapreduce.examples.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("mapreduce.app-submission.cross-platform", "true");
    String ioArgs[] = { "input", "output2" };
    String[] otherArgs = new GenericOptionsParser(conf, ioArgs).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);//  w w  w . jav a2s.  co m
    }
    //job
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);

    //map, combine, reduce
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    //
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.hadoop.mapreduce.TestLzoLazyLoading.java

License:Open Source License

private void runWordCount(Configuration cf, boolean compressIn, boolean compressOut)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration thisConf = new Configuration(cf);
    if (compressIn) {
        thisConf.setBoolean("mapred.compression.lzo.test.codec-checked-after-map", true);
    }//ww w  .  ja  va2  s. c  o  m

    if (compressOut) {
        thisConf.setBoolean("mapred.compression.lzo.test.codec-checked-after-reduce", true);
    }
    Path pathIn = new Path(TEST_ROOT_DIR + "/in");
    Path pathOut = new Path(TEST_ROOT_DIR + "/out");
    localFs.delete(pathIn, true);
    localFs.delete(pathOut, true);
    writeFile(makeFileName("in/part1", compressIn), "this is a test\nof word count test\ntest\n");
    writeFile(makeFileName("in/part2", compressIn), "more test");
    Job job = new Job(thisConf, "word count");
    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    if (compressOut) {
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, LzoCodec.class);
    }
    FileInputFormat.addInputPath(job, pathIn);
    FileOutputFormat.setOutputPath(job, pathOut);
    job.submit();
    assertEquals("IsLzoChecked (client)?", compressIn, LzoCodec.isNativeLzoChecked());
    assertTrue(job.waitForCompletion(false));
    String result = readFile(makeFileName("out/part-r-00000", compressOut));
    System.out.println(result);
    assertEquals("a\t1\ncount\t1\nis\t1\nmore\t1\nof\t1\ntest\t4\nthis\t1\nword\t1\n", result);
}