Example usage for org.apache.hadoop.mapred JobConf setReducerClass

List of usage examples for org.apache.hadoop.mapred JobConf setReducerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setReducerClass.

Prototype

public void setReducerClass(Class<? extends Reducer> theClass) 

Source Link

Document

Set the Reducer class for the job.

Usage

From source file:edu.umd.cloud9.demo.DemoWordCountTuple2.java

License:Apache License

/**
 * Runs this tool.//w w w  .  jav a 2  s.  c o  m
 */
public int run(String[] args) throws Exception {
    if (args.length != 4) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];

    int numMapTasks = Integer.parseInt(args[2]);
    int numReduceTasks = Integer.parseInt(args[3]);

    sLogger.info("Tool: DemoWordCountTuple2");
    sLogger.info(" - input path: " + inputPath);
    sLogger.info(" - output path: " + outputPath);
    sLogger.info(" - number of mappers: " + numMapTasks);
    sLogger.info(" - number of reducers: " + numReduceTasks);

    JobConf conf = new JobConf(DemoWordCountTuple2.class);
    conf.setJobName("DemoWordCountTuple2");

    conf.setNumMapTasks(numMapTasks);
    conf.setNumReduceTasks(numReduceTasks);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputKeyClass(Tuple.class);
    conf.setOutputValueClass(IntWritable.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    conf.setMapperClass(MapClass.class);
    conf.setCombinerClass(ReduceClass.class);
    conf.setReducerClass(ReduceClass.class);

    // Delete the output directory if it exists already
    Path outputDir = new Path(outputPath);
    FileSystem.get(conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    JobClient.runJob(conf);
    sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:edu.umd.cloud9.examples.BigramCount.java

License:Apache License

/**
 * Runs this tool./*from  ww w .  j  ava  2s.  c o m*/
 */
public int run(String[] args) throws Exception {
    if (args.length != 4) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];

    int mapTasks = Integer.parseInt(args[2]);
    int reduceTasks = Integer.parseInt(args[3]);

    sLogger.info("Tool: BigramCount");
    sLogger.info(" - input path: " + inputPath);
    sLogger.info(" - output path: " + outputPath);
    sLogger.info(" - number of mappers: " + mapTasks);
    sLogger.info(" - number of reducers: " + reduceTasks);

    JobConf conf = new JobConf(BigramCount.class);
    conf.setJobName("BigramCount");

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    /**
     *  Note that these must match the Class arguments given in the mapper 
     */
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(MyMapper.class);
    conf.setCombinerClass(MyReducer.class);
    conf.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already
    Path outputDir = new Path(outputPath);
    FileSystem.get(outputDir.toUri(), conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    JobClient.runJob(conf);
    sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:edu.umd.cloud9.examples.BigramRelativeFrequency.java

License:Apache License

/**
 * Runs this tool./*from  w  ww.j  a v a  2s . co  m*/
 */
public int run(String[] args) throws Exception {
    if (args.length != 4) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];

    int mapTasks = Integer.parseInt(args[2]);
    int reduceTasks = Integer.parseInt(args[3]);

    sLogger.info("Tool: BigramRelativeFrequency");
    sLogger.info(" - input path: " + inputPath);
    sLogger.info(" - output path: " + outputPath);
    sLogger.info(" - number of mappers: " + mapTasks);
    sLogger.info(" - number of reducers: " + reduceTasks);

    JobConf conf = new JobConf(BigramRelativeFrequency.class);
    conf.setJobName("BigramRelativeFrequency");

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    /**
     *  Note that these must match the Class arguments given in the mapper 
     */
    conf.setOutputKeyClass(PairOfStrings.class);
    conf.setOutputValueClass(FloatWritable.class);

    conf.setMapperClass(MyMapper.class);
    conf.setCombinerClass(MyCombiner.class);
    conf.setReducerClass(MyReducer.class);
    conf.setPartitionerClass(MyPartitioner.class);

    //Delete the output directory if it exists already
    Path outputDir = new Path(outputPath);
    FileSystem.get(outputDir.toUri(), conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    JobClient.runJob(conf);
    sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:edu.umd.cloud9.examples.BuildInvertedIndex.java

License:Apache License

/**
 * Runs this tool./*from ww w  .  j  a  va  2 s  . c o  m*/
 */
public int run(String[] args) throws Exception {
    if (args.length != 4) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];

    int mapTasks = Integer.parseInt(args[2]);
    int reduceTasks = Integer.parseInt(args[3]);

    sLogger.info("Tool: BuildInvertedIndex");
    sLogger.info(" - input path: " + inputPath);
    sLogger.info(" - output path: " + outputPath);
    sLogger.info(" - number of mappers: " + mapTasks);
    sLogger.info(" - number of reducers: " + reduceTasks);

    JobConf conf = new JobConf(BuildInvertedIndex.class);
    conf.setJobName("BuildInvertedIndex");

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    /**
     *  Note that these must match the Class arguments given in the mapper 
     */
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(PairOfInts.class);

    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already
    Path outputDir = new Path(outputPath);
    FileSystem.get(outputDir.toUri(), conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    JobClient.runJob(conf);
    sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:edu.umd.cloud9.examples.DemoWordCount.java

License:Apache License

/**
 * Runs this tool./*from w w w.  j  av a2s  .  co  m*/
 */
public int run(String[] args) throws Exception {
    if (args.length != 4) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];

    int mapTasks = Integer.parseInt(args[2]);
    int reduceTasks = Integer.parseInt(args[3]);

    sLogger.info("Tool: DemoWordCount");
    sLogger.info(" - input path: " + inputPath);
    sLogger.info(" - output path: " + outputPath);
    sLogger.info(" - number of mappers: " + mapTasks);
    sLogger.info(" - number of reducers: " + reduceTasks);

    JobConf conf = new JobConf(DemoWordCount.class);
    conf.setJobName("DemoWordCount");

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    /**
     *  Note that these must match the Class arguments given in the mapper 
     */
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(MyMapper.class);
    conf.setCombinerClass(MyReducer.class);
    conf.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already
    Path outputDir = new Path(outputPath);
    FileSystem.get(outputDir.toUri(), conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    JobClient.runJob(conf);
    sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:edu.umd.cloud9.examples.JingBigramRelativeFrequency.java

License:Apache License

/**
  * Runs this tool./*from   w  w w .  j a va2  s .  co m*/
  */
 public int run(String[] args) throws Exception {
     if (args.length != 4) {
         printUsage();
         return -1;
     }

     String inputPath = args[0];
     String outputPath = args[1];

     int mapTasks = Integer.parseInt(args[2]);
     int reduceTasks = Integer.parseInt(args[3]);

     sLogger.info("Tool: DemoWordCount");
     sLogger.info(" - input path: " + inputPath);
     sLogger.info(" - output path: " + outputPath);
     sLogger.info(" - number of mappers: " + mapTasks);
     sLogger.info(" - number of reducers: " + reduceTasks);

     JobConf conf = new JobConf(BigramRelativeFrequency.class);
     conf.setJobName("DemoWordCount");

     conf.setNumMapTasks(mapTasks);
     conf.setNumReduceTasks(reduceTasks);

     FileInputFormat.setInputPaths(conf, new Path(inputPath));
     FileOutputFormat.setOutputPath(conf, new Path(outputPath));
     FileOutputFormat.setCompressOutput(conf, false);

     /**
      *  Note that these must match the Class arguments given in the mapper 
      */
     conf.setOutputKeyClass(Text.class);
     conf.setOutputValueClass(IntWritable.class);

     conf.setMapperClass(MyMapper.class);
     conf.setCombinerClass(MyReducer.class);
     conf.setReducerClass(MyReducer.class);

     // Delete the output directory if it exists already
     Path outputDir = new Path(outputPath);
     FileSystem.get(outputDir.toUri(), conf).delete(outputDir, true);

     long startTime = System.currentTimeMillis();
     JobClient.runJob(conf);
     sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
     return 0;
 }

From source file:edu.umd.cloud9.io.benchmark.HadoopSortRandomPairsOfInts.java

License:Apache License

/**
 * Runs this benchmark./*  ww  w  .j  a v a2s .  c  o  m*/
 */
public static void main(String[] args) throws IOException {
    String inputPath = "random-pairs.seq";
    String outputPath = "random-pairs.sorted";
    int numMapTasks = 1;
    int numReduceTasks = 1;

    JobConf conf = new JobConf(HadoopSortRandomPairsOfInts.class);
    conf.setJobName("SortRandomPairsOfInts");

    conf.setNumMapTasks(numMapTasks);
    conf.setNumReduceTasks(numReduceTasks);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputKeyClass(PairOfInts.class);
    conf.setOutputValueClass(IntWritable.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapperClass(IdentityMapper.class);
    conf.setCombinerClass(IdentityReducer.class);
    conf.setReducerClass(IdentityReducer.class);

    // Delete the output directory if it exists already
    Path outputDir = new Path(outputPath);
    FileSystem.get(conf).delete(outputDir, true);

    long startTime;
    double duration;

    startTime = System.currentTimeMillis();

    JobClient.runJob(conf);

    duration = (System.currentTimeMillis() - startTime) / 1000.0;
    System.out.println("Job took " + duration + " seconds");
}

From source file:edu.umd.cloud9.pagerank.BuildPageRankRecords.java

License:Apache License

/**
 * Runs this tool.//from ww  w . j  a v a 2s. co  m
 */
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];
    int n = Integer.parseInt(args[2]);

    sLogger.info("Tool name: BuildPageRankRecords");
    sLogger.info(" - inputDir: " + inputPath);
    sLogger.info(" - outputDir: " + outputPath);
    sLogger.info(" - numNodes: " + n);

    JobConf conf = new JobConf(BuildPageRankRecords.class);
    conf.setJobName("PackageLinkGraph");

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(0);

    conf.setInt("NodeCnt", n);
    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);

    TextInputFormat.addInputPath(conf, new Path(inputPath));
    SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(PageRankNode.class);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(PageRankNode.class);

    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(IdentityReducer.class);

    // delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(outputPath), true);

    JobClient.runJob(conf);

    return 0;
}

From source file:edu.umd.cloud9.pagerank.FindMaxPageRankNodes.java

License:Apache License

/**
 * Runs this tool.//from  w  ww.ja  v  a  2s . com
 */
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];
    int n = Integer.parseInt(args[2]);

    sLogger.info("Tool name: FindMaxPageRankNodes");
    sLogger.info(" - input: " + inputPath);
    sLogger.info(" - output: " + outputPath);
    sLogger.info(" - n: " + n);

    JobConf conf = new JobConf(FindMaxPageRankNodes.class);
    conf.setJobName("FindMaxPageRankNodes");

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(1);

    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);
    conf.setInt("n", n);

    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(FloatWritable.class);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(FloatWritable.class);

    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(MyReducer.class);

    // delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(outputPath), true);

    JobClient.runJob(conf);

    return 0;
}

From source file:edu.umd.cloud9.pagerank.PartitionGraph.java

License:Apache License

public int run(String[] args) throws IOException {
    if (args.length != 5) {
        printUsage();//from w  w  w  .ja  v  a  2s .  c o m
        return -1;
    }

    String inPath = args[0];
    String outPath = args[1];
    int numParts = Integer.parseInt(args[2]);
    boolean useRange = Integer.parseInt(args[3]) != 0;
    int nodeCount = Integer.parseInt(args[4]);

    sLogger.info("Tool name: PartitionGraph");
    sLogger.info(" - inputDir: " + inPath);
    sLogger.info(" - outputDir: " + outPath);
    sLogger.info(" - numPartitions: " + numParts);
    sLogger.info(" - useRange?: " + useRange);
    sLogger.info(" - nodeCnt: " + nodeCount);

    JobConf conf = new JobConf(PartitionGraph.class);

    conf.setJobName("Partition Graph " + numParts);
    conf.setNumReduceTasks(numParts);

    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);
    conf.set("mapred.child.java.opts", "-Xmx2048m");
    conf.setInt("NodeCount", nodeCount);

    FileInputFormat.setInputPaths(conf, new Path(inPath));
    FileOutputFormat.setOutputPath(conf, new Path(outPath));

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(PageRankNode.class);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(PageRankNode.class);

    conf.setMapperClass(MapClass.class);
    conf.setReducerClass(ReduceClass.class);

    conf.setSpeculativeExecution(false);

    if (useRange) {
        conf.setPartitionerClass(RangePartitioner.class);
    }

    FileSystem.get(conf).delete(new Path(outPath), true);

    JobClient.runJob(conf);

    return 0;
}