Example usage for org.apache.hadoop.mapreduce Job setReducerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setReducerClass.

Prototype

public void setReducerClass(Class<? extends Reducer> cls) throws IllegalStateException

Source Link

Document

Set the Reducer for the job.

Usage

From source file:TestBAM.java

License:Open Source License

public int run(String[] args) throws Exception {
    final Configuration conf = getConf();

    conf.set(MyOutputFormat.HEADER_FROM_FILE, args[0]);
    DistributedCache.addFileToClassPath(new Path("hdfs:///libjars/hadoop-bam-7.0.0-jar-with-dependencies.jar"),
            conf);/* w w  w . j  ava 2  s.  c  o  m*/

    final Job job = new Job(conf);

    job.setJarByClass(TestBAM.class);
    job.setMapperClass(TestBAMMapper.class);
    job.setReducerClass(TestBAMReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(SAMRecordWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(SAMRecordWritable.class);

    job.setInputFormatClass(AnySAMInputFormat.class);
    job.setOutputFormatClass(TestBAM.MyOutputFormat.class);

    org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, new Path(args[0]));

    org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.submit();

    if (!job.waitForCompletion(true)) {
        System.err.println("sort :: Job failed.");
        return 1;
    }

    return 0;
}

From source file:PairsPMI_M.java

License:Apache License

/**
 * Runs this tool./*from ww w. j  a va2  s  .  c  o  m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    // First MapReduce Job

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool name: " + PairsPMI_M.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - tmp path: " + outputPath + "/tmp");
    LOG.info(" - num reducers: " + reduceTasks);

    Job job = Job.getInstance(getConf());
    job.setJobName(PairsPMI_M.class.getSimpleName());
    job.setJarByClass(PairsPMI_M.class);

    // Delete the tmp directory if it exists already
    Path tmpDir = new Path("tmp_wj");
    FileSystem.get(getConf()).delete(tmpDir, true);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path("tmp_wj"));

    job.setMapOutputKeyClass(PairOfStrings.class);
    job.setMapOutputValueClass(FloatWritable.class);
    job.setOutputKeyClass(PairOfStrings.class);
    job.setOutputValueClass(FloatWritable.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    //    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);
    job.setPartitionerClass(MyPartitioner.class);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    double time1 = (System.currentTimeMillis() - startTime) / 1000.0;
    System.out.println("Job Finished in " + time1 + " seconds");
    numRecords = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS")
            .getValue();

    /*
     *  Second MapReduce Job
     */

    LOG.info("Tool name: " + PairsPMI_M.class.getSimpleName());
    LOG.info("second stage of MapReduce");
    LOG.info(" - input from tmp path: " + outputPath + "/tmp_wj");
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - num reducers: " + reduceTasks);

    // set the global variable
    Configuration conf = getConf();
    conf.setLong("numRec", numRecords);

    job = Job.getInstance(getConf());
    job.setJobName(PairsPMI_M.class.getSimpleName());
    job.setJarByClass(PairsPMI_M.class);

    // Delete the output directory if it exists already
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    job.setNumReduceTasks(reduceTasks);
    FileInputFormat.setInputPaths(job, new Path("tmp_wj/part*"));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(PairOfStrings.class);
    job.setMapOutputValueClass(FloatWritable.class);
    // job.setOutputKeyClass(PairOfStrings.class);
    job.setOutputKeyClass(Text.class);

    job.setOutputValueClass(FloatWritable.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    //   job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapperClass(MyMapperSecond.class);
    //    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducerSecond.class);
    job.setPartitionerClass(MyPartitioner.class);

    startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    double time2 = (System.currentTimeMillis() - startTime) / 1000.0;
    System.out.println("Second job finished in " + time2 + " seconds");
    System.out.println("Total time: " + (time1 + time2) + " seconds");

    return 0;
}

From source file:PopularURLs.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "popularurls");
    job.setJarByClass(PopularURLs.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.waitForCompletion(true);/* w w  w .  j  a  va 2s . c  om*/
}

From source file:WordCountB.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);//from www  .  j  av  a  2s . c o  m
    }
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCountB.class);
    job.setMapperClass(TokenizerMapper.class);
    // Setup the Combiner
    job.setCombinerClass(IntSumReducer.class);
    // Setup the Partitioner
    job.setPartitionerClass(Letterpartitioner.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:PerTaskTally.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from w  w w  .ja  v  a 2s  .  c  om
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(PerMapTally.class);
    job.setMapperClass(TokenizerMapper.class);
    // Aniket changes starts
    /* Here the partitioner is being called*/
    job.setPartitionerClass(WordPartitioner.class);
    // Aniket changes ends
    // Part 4 Aniket changes starts
    /* Here I am just disabling the combiner */
    // job.setCombinerClass(IntSumReducer.class);
    // Part 4 Aniket changes ends
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:WordCount_PerTaskTally.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    GenericOptionsParser parser = new GenericOptionsParser(conf, args);
    String[] otherArgs = parser.getRemainingArgs();

    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/* w w w.  ja  va 2s  .c  om*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount_PerTaskTally.class);
    job.setMapperClass(TokenizerMapper.class);

    // disable combiner

    // job.setCombinerClass(IntSumReducer.class);

    job.setPartitionerClass(WordPartitioner.class);
    job.setNumReduceTasks(5);

    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:ComputeCooccurrenceMatrixStripes.java

License:Apache License

/**
 * Runs this tool./* w  w  w.j  a v  a 2 s. c o m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("window size").create(WINDOW));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;
    int window = cmdline.hasOption(WINDOW) ? Integer.parseInt(cmdline.getOptionValue(WINDOW)) : 2;

    LOG.info("Tool: " + ComputeCooccurrenceMatrixStripes.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - window: " + window);
    LOG.info(" - number of reducers: " + reduceTasks);

    Job job = Job.getInstance(getConf());
    job.setJobName(ComputeCooccurrenceMatrixStripes.class.getSimpleName());
    job.setJarByClass(ComputeCooccurrenceMatrixStripes.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    job.getConfiguration().setInt("window", window);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(Text.class);
    job.setOutputValueClass(String2IntOpenHashMapWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(String2IntOpenHashMapWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyReducer.class);
    job.setReducerClass(MyReducer.class);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:Distinct.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: Distinct <in> <out>");
        System.exit(2);/*from ww w . ja  v a2 s  .  c o  m*/
    }
    Job job = Job.getInstance(conf, "distinct1");
    job.setJarByClass(Distinct.class);
    job.setMapperClass(TokenizerMapper1.class);
    job.setReducerClass(Reducer1.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(tempDir));
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.waitForCompletion(true);

    Configuration conf2 = new Configuration();
    Job job2 = Job.getInstance(conf2, "distinct2");
    job2.setJarByClass(Distinct.class);
    job2.setMapperClass(TokenizerMapper2.class);
    job2.setReducerClass(Reducer2.class);
    FileInputFormat.addInputPath(job2, new Path(tempDir));
    FileOutputFormat.setOutputPath(job2, new Path(otherArgs[1]));
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(IntWritable.class);
    System.exit(job2.waitForCompletion(true) ? 0 : 1);
}

From source file:BigBWA.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();

    for (String argumento : args) {
        LOG.info("Arg: " + argumento);
    }/*www  . j a  v a2  s  . c  o m*/

    String inputPath = "";
    String outputPath = "";

    boolean useReducer = false;

    BwaOptions options = new BwaOptions(args);

    //We set the timeout and stablish the bwa library to call BWA methods
    conf.set("mapreduce.task.timeout", "0");
    conf.set("mapreduce.map.env", "LD_LIBRARY_PATH=./bwa.zip/");

    //==================Algorithm election==================
    //One of the algorithms is going to be in use, because tge default is always specified.
    if (options.isMemAlgorithm()) {
        //Case of the mem algorithm
        conf.set("mem", "true");
        conf.set("aln", "false");
        conf.set("bwasw", "false");
    }

    else if (options.isAlnAlgorithm()) {
        // Case of aln algorithm
        conf.set("mem", "false");
        conf.set("aln", "true");
        conf.set("bwasw", "false");
    }

    else if (options.isBwaswAlgorithm()) {
        // Case of bwasw algorithm
        conf.set("mem", "false");
        conf.set("aln", "false");
        conf.set("bwasw", "true");
    }

    //==================Index election==================
    if (options.getIndexPath() != "") {
        conf.set("indexRoute", options.getIndexPath());
    } else {
        System.err.println("No index has been found. Aborting.");
        System.exit(1);
    }

    //==================Type of reads election==================
    //There is always going to be a type of reads, because default is paired
    if (options.isPairedReads()) {
        conf.set("paired", "true");
        conf.set("single", "false");
    } else if (options.isSingleReads()) {
        conf.set("paired", "false");
        conf.set("single", "true");
    }

    //==================Use of reducer==================
    if (options.isUseReducer()) {
        useReducer = true;
        conf.set("useReducer", "true");
    } else {
        conf.set("useReducer", "false");
    }

    //==================Number of threads per map==================
    if (options.getNumThreads() != "0") {
        conf.set("bwathreads", options.getNumThreads());
    }

    //==================RG Header===================
    if (options.getReadgroupHeader() != "") {
        conf.set("rgheader", options.getReadgroupHeader());
    }

    //==================Input and output paths==================
    inputPath = options.getInputPath();
    outputPath = options.getOutputPath();

    conf.set("outputGenomics", outputPath);

    //==================Partition number==================
    if (options.getPartitionNumber() != 0) {
        try {
            FileSystem fs = FileSystem.get(conf);

            Path inputFilePath = new Path(inputPath);

            ContentSummary cSummary = fs.getContentSummary(inputFilePath);

            long length = cSummary.getLength();

            fs.close();

            conf.set("mapreduce.input.fileinputformat.split.maxsize",
                    String.valueOf((length) / options.getPartitionNumber()));
            conf.set("mapreduce.input.fileinputformat.split.minsize",
                    String.valueOf((length) / options.getPartitionNumber()));
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
            LOG.error(e.toString());

            System.exit(1);
        }

    }

    //Job job = new Job(conf,"BigBWA_"+outputPath);
    Job job = Job.getInstance(conf, "BigBWA_" + outputPath);

    job.setJarByClass(BigBWA.class);
    job.setMapperClass(BigBWAMap.class);
    //job.setCombinerClass(BigBWACombiner.class);

    if (useReducer) {
        job.setReducerClass(BigBWAReducer.class);

        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(Text.class);

        job.setNumReduceTasks(1);
    } else {
        job.setNumReduceTasks(0);
    }

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:BigramRelativeFrequencyTuple.java

License:Apache License

/**
 * Runs this tool.//from   w w w .j av a2  s .  c  om
 */
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];
    int reduceTasks = Integer.parseInt(args[2]);

    LOG.info("Tool name: " + BigramRelativeFrequencyTuple.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - num reducers: " + reduceTasks);

    Job job = Job.getInstance(getConf());
    job.setJobName(BigramRelativeFrequencyTuple.class.getSimpleName());
    job.setJarByClass(BigramRelativeFrequencyTuple.class);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(BinSedesTuple.class);
    job.setMapOutputValueClass(FloatWritable.class);
    job.setOutputKeyClass(BinSedesTuple.class);
    job.setOutputValueClass(FloatWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);
    job.setPartitionerClass(MyPartitioner.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}