Example usage for org.apache.hadoop.mapreduce Job setReducerClass

List of usage examples for org.apache.hadoop.mapreduce Job setReducerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setReducerClass.

Prototype

public void setReducerClass(Class<? extends Reducer> cls) throws IllegalStateException 

Source Link

Document

Set the Reducer for the job.

Usage

From source file:TestBAM.java

License:Open Source License

public int run(String[] args) throws Exception {
    final Configuration conf = getConf();

    conf.set(MyOutputFormat.HEADER_FROM_FILE, args[0]);
    DistributedCache.addFileToClassPath(new Path("hdfs:///libjars/hadoop-bam-7.0.0-jar-with-dependencies.jar"),
            conf);/* w w  w . j  ava 2  s.  c  o  m*/

    final Job job = new Job(conf);

    job.setJarByClass(TestBAM.class);
    job.setMapperClass(TestBAMMapper.class);
    job.setReducerClass(TestBAMReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(SAMRecordWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(SAMRecordWritable.class);

    job.setInputFormatClass(AnySAMInputFormat.class);
    job.setOutputFormatClass(TestBAM.MyOutputFormat.class);

    org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, new Path(args[0]));

    org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.submit();

    if (!job.waitForCompletion(true)) {
        System.err.println("sort :: Job failed.");
        return 1;
    }

    return 0;
}

From source file:PairsPMI_M.java

License:Apache License

/**
 * Runs this tool./*from ww w. j  a va2  s  .  c  o  m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    // First MapReduce Job

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool name: " + PairsPMI_M.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - tmp path: " + outputPath + "/tmp");
    LOG.info(" - num reducers: " + reduceTasks);

    Job job = Job.getInstance(getConf());
    job.setJobName(PairsPMI_M.class.getSimpleName());
    job.setJarByClass(PairsPMI_M.class);

    // Delete the tmp directory if it exists already
    Path tmpDir = new Path("tmp_wj");
    FileSystem.get(getConf()).delete(tmpDir, true);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path("tmp_wj"));

    job.setMapOutputKeyClass(PairOfStrings.class);
    job.setMapOutputValueClass(FloatWritable.class);
    job.setOutputKeyClass(PairOfStrings.class);
    job.setOutputValueClass(FloatWritable.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    //    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);
    job.setPartitionerClass(MyPartitioner.class);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    double time1 = (System.currentTimeMillis() - startTime) / 1000.0;
    System.out.println("Job Finished in " + time1 + " seconds");
    numRecords = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS")
            .getValue();

    /*
     *  Second MapReduce Job
     */

    LOG.info("Tool name: " + PairsPMI_M.class.getSimpleName());
    LOG.info("second stage of MapReduce");
    LOG.info(" - input from tmp path: " + outputPath + "/tmp_wj");
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - num reducers: " + reduceTasks);

    // set the global variable
    Configuration conf = getConf();
    conf.setLong("numRec", numRecords);

    job = Job.getInstance(getConf());
    job.setJobName(PairsPMI_M.class.getSimpleName());
    job.setJarByClass(PairsPMI_M.class);

    // Delete the output directory if it exists already
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    job.setNumReduceTasks(reduceTasks);
    FileInputFormat.setInputPaths(job, new Path("tmp_wj/part*"));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(PairOfStrings.class);
    job.setMapOutputValueClass(FloatWritable.class);
    // job.setOutputKeyClass(PairOfStrings.class);
    job.setOutputKeyClass(Text.class);

    job.setOutputValueClass(FloatWritable.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    //   job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapperClass(MyMapperSecond.class);
    //    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducerSecond.class);
    job.setPartitionerClass(MyPartitioner.class);

    startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    double time2 = (System.currentTimeMillis() - startTime) / 1000.0;
    System.out.println("Second job finished in " + time2 + " seconds");
    System.out.println("Total time: " + (time1 + time2) + " seconds");

    return 0;
}

From source file:PopularURLs.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "popularurls");
    job.setJarByClass(PopularURLs.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.waitForCompletion(true);/* w w  w .  j  a  va 2s . c  om*/
}

From source file:WordCountB.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);//from www  .  j  av  a  2s . c o  m
    }
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCountB.class);
    job.setMapperClass(TokenizerMapper.class);
    // Setup the Combiner
    job.setCombinerClass(IntSumReducer.class);
    // Setup the Partitioner
    job.setPartitionerClass(Letterpartitioner.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:PerTaskTally.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from w  w w  .ja  v  a 2s  .  c  om
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(PerMapTally.class);
    job.setMapperClass(TokenizerMapper.class);
    // Aniket changes starts
    /* Here the partitioner is being called*/
    job.setPartitionerClass(WordPartitioner.class);
    // Aniket changes ends
    // Part 4 Aniket changes starts
    /* Here I am just disabling the combiner */
    // job.setCombinerClass(IntSumReducer.class);
    // Part 4 Aniket changes ends
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:WordCount_PerTaskTally.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    GenericOptionsParser parser = new GenericOptionsParser(conf, args);
    String[] otherArgs = parser.getRemainingArgs();

    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/* w w w.  ja  va 2s  .c  om*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount_PerTaskTally.class);
    job.setMapperClass(TokenizerMapper.class);

    // disable combiner

    // job.setCombinerClass(IntSumReducer.class);

    job.setPartitionerClass(WordPartitioner.class);
    job.setNumReduceTasks(5);

    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:ComputeCooccurrenceMatrixStripes.java

License:Apache License

/**
 * Runs this tool./* w  w  w.j  a v  a 2 s. c o m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("window size").create(WINDOW));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;
    int window = cmdline.hasOption(WINDOW) ? Integer.parseInt(cmdline.getOptionValue(WINDOW)) : 2;

    LOG.info("Tool: " + ComputeCooccurrenceMatrixStripes.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - window: " + window);
    LOG.info(" - number of reducers: " + reduceTasks);

    Job job = Job.getInstance(getConf());
    job.setJobName(ComputeCooccurrenceMatrixStripes.class.getSimpleName());
    job.setJarByClass(ComputeCooccurrenceMatrixStripes.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    job.getConfiguration().setInt("window", window);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(Text.class);
    job.setOutputValueClass(String2IntOpenHashMapWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(String2IntOpenHashMapWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyReducer.class);
    job.setReducerClass(MyReducer.class);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:Distinct.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: Distinct <in> <out>");
        System.exit(2);/*from ww w . ja  v a2 s  .  c o  m*/
    }
    Job job = Job.getInstance(conf, "distinct1");
    job.setJarByClass(Distinct.class);
    job.setMapperClass(TokenizerMapper1.class);
    job.setReducerClass(Reducer1.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(tempDir));
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.waitForCompletion(true);

    Configuration conf2 = new Configuration();
    Job job2 = Job.getInstance(conf2, "distinct2");
    job2.setJarByClass(Distinct.class);
    job2.setMapperClass(TokenizerMapper2.class);
    job2.setReducerClass(Reducer2.class);
    FileInputFormat.addInputPath(job2, new Path(tempDir));
    FileOutputFormat.setOutputPath(job2, new Path(otherArgs[1]));
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(IntWritable.class);
    System.exit(job2.waitForCompletion(true) ? 0 : 1);
}

From source file:BigBWA.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();

    for (String argumento : args) {
        LOG.info("Arg: " + argumento);
    }/*www  . j a  v a2  s  . c  o m*/

    String inputPath = "";
    String outputPath = "";

    boolean useReducer = false;

    BwaOptions options = new BwaOptions(args);

    //We set the timeout and stablish the bwa library to call BWA methods
    conf.set("mapreduce.task.timeout", "0");
    conf.set("mapreduce.map.env", "LD_LIBRARY_PATH=./bwa.zip/");

    //==================Algorithm election==================
    //One of the algorithms is going to be in use, because tge default is always specified.
    if (options.isMemAlgorithm()) {
        //Case of the mem algorithm
        conf.set("mem", "true");
        conf.set("aln", "false");
        conf.set("bwasw", "false");
    }

    else if (options.isAlnAlgorithm()) {
        // Case of aln algorithm
        conf.set("mem", "false");
        conf.set("aln", "true");
        conf.set("bwasw", "false");
    }

    else if (options.isBwaswAlgorithm()) {
        // Case of bwasw algorithm
        conf.set("mem", "false");
        conf.set("aln", "false");
        conf.set("bwasw", "true");
    }

    //==================Index election==================
    if (options.getIndexPath() != "") {
        conf.set("indexRoute", options.getIndexPath());
    } else {
        System.err.println("No index has been found. Aborting.");
        System.exit(1);
    }

    //==================Type of reads election==================
    //There is always going to be a type of reads, because default is paired
    if (options.isPairedReads()) {
        conf.set("paired", "true");
        conf.set("single", "false");
    } else if (options.isSingleReads()) {
        conf.set("paired", "false");
        conf.set("single", "true");
    }

    //==================Use of reducer==================
    if (options.isUseReducer()) {
        useReducer = true;
        conf.set("useReducer", "true");
    } else {
        conf.set("useReducer", "false");
    }

    //==================Number of threads per map==================
    if (options.getNumThreads() != "0") {
        conf.set("bwathreads", options.getNumThreads());
    }

    //==================RG Header===================
    if (options.getReadgroupHeader() != "") {
        conf.set("rgheader", options.getReadgroupHeader());
    }

    //==================Input and output paths==================
    inputPath = options.getInputPath();
    outputPath = options.getOutputPath();

    conf.set("outputGenomics", outputPath);

    //==================Partition number==================
    if (options.getPartitionNumber() != 0) {
        try {
            FileSystem fs = FileSystem.get(conf);

            Path inputFilePath = new Path(inputPath);

            ContentSummary cSummary = fs.getContentSummary(inputFilePath);

            long length = cSummary.getLength();

            fs.close();

            conf.set("mapreduce.input.fileinputformat.split.maxsize",
                    String.valueOf((length) / options.getPartitionNumber()));
            conf.set("mapreduce.input.fileinputformat.split.minsize",
                    String.valueOf((length) / options.getPartitionNumber()));
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
            LOG.error(e.toString());

            System.exit(1);
        }

    }

    //Job job = new Job(conf,"BigBWA_"+outputPath);
    Job job = Job.getInstance(conf, "BigBWA_" + outputPath);

    job.setJarByClass(BigBWA.class);
    job.setMapperClass(BigBWAMap.class);
    //job.setCombinerClass(BigBWACombiner.class);

    if (useReducer) {
        job.setReducerClass(BigBWAReducer.class);

        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(Text.class);

        job.setNumReduceTasks(1);
    } else {
        job.setNumReduceTasks(0);
    }

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:BigramRelativeFrequencyTuple.java

License:Apache License

/**
 * Runs this tool.//from   w w w .j av a2  s .  c  om
 */
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];
    int reduceTasks = Integer.parseInt(args[2]);

    LOG.info("Tool name: " + BigramRelativeFrequencyTuple.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - num reducers: " + reduceTasks);

    Job job = Job.getInstance(getConf());
    job.setJobName(BigramRelativeFrequencyTuple.class.getSimpleName());
    job.setJarByClass(BigramRelativeFrequencyTuple.class);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(BinSedesTuple.class);
    job.setMapOutputValueClass(FloatWritable.class);
    job.setOutputKeyClass(BinSedesTuple.class);
    job.setOutputValueClass(FloatWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);
    job.setPartitionerClass(MyPartitioner.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}