Example usage for org.apache.hadoop.mapreduce Job setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException

Source Link

Document

Set the combiner class for the job.

Usage

From source file:jobs.MatrixBlockSum.java

License:Apache License

public int run(String[] args) throws Exception {
    Configuration conf = getConf();

    conf.setInt("mapred.reduce.tasks", Integer.parseInt(args[2]));

    if (args.length >= 4)
        conf.setInt("SR", Integer.parseInt(args[3]));

    if (args.length >= 5)
        conf.setInt("SC", Integer.parseInt(args[4]));

    conf.set("RESNAME", args[1]);

    //heap space - again - should be passed with the -D option
    conf.set("mapred.map.child.java.opts", "-Xmx3G");
    conf.set("mapred.reduce.child.java.opts", "-Xmx3G");

    //job/*from   w  ww.  j  ava 2s  . co  m*/
    Job job1 = new Job(conf, "MatrixBlockSum");
    job1.setJarByClass(MatrixBlockSum.class);

    // No Map
    FileInputFormat.addInputPath(job1, new Path(args[0]));
    job1.setInputFormatClass(SequenceFileInputFormat.class);

    //Reduce       
    job1.setCombinerClass(MatrixBlockSumReducer.class);
    job1.setReducerClass(MatrixBlockSumReducer.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(MatrixBlock.class);
    FileOutputFormat.setOutputPath(job1, new Path(args[1]));
    job1.setOutputFormatClass(SequenceFileOutputFormat.class);
    //job1.setOutputFormatClass(TextOutputFormat.class);

    return job1.waitForCompletion(false) ? 0 : 1;
}

From source file:kogiri.mapreduce.preprocess.indexing.stage2.KmerIndexBuilder.java

License:Open Source License

private int runJob(PreprocessorConfig ppConfig) throws Exception {
    // check config
    validatePreprocessorConfig(ppConfig);

    // configuration
    Configuration conf = this.getConf();

    // set user configuration
    ppConfig.getClusterConfiguration().configureTo(conf);
    ppConfig.saveTo(conf);//  w  ww  .  j av a 2s  .c  o m

    Path[] inputFiles = FileSystemHelper.getAllFastaFilePath(conf, ppConfig.getFastaPath());

    boolean job_result = true;
    List<Job> jobs = new ArrayList<Job>();

    for (int round = 0; round < inputFiles.length; round++) {
        Path roundInputFile = inputFiles[round];
        String roundOutputPath = ppConfig.getKmerIndexPath() + "_round" + round;

        Job job = new Job(conf,
                "Kogiri Preprocessor - Building Kmer Indices (" + round + " of " + inputFiles.length + ")");
        job.setJarByClass(KmerIndexBuilder.class);

        // Mapper
        job.setMapperClass(KmerIndexBuilderMapper.class);
        job.setInputFormatClass(FastaReadInputFormat.class);
        job.setMapOutputKeyClass(CompressedSequenceWritable.class);
        job.setMapOutputValueClass(CompressedIntArrayWritable.class);

        // Combiner
        job.setCombinerClass(KmerIndexBuilderCombiner.class);

        // Partitioner
        job.setPartitionerClass(KmerIndexBuilderPartitioner.class);

        // Reducer
        job.setReducerClass(KmerIndexBuilderReducer.class);

        // Specify key / value
        job.setOutputKeyClass(CompressedSequenceWritable.class);
        job.setOutputValueClass(CompressedIntArrayWritable.class);

        // Inputs
        FileInputFormat.addInputPaths(job, roundInputFile.toString());

        LOG.info("Input file : ");
        LOG.info("> " + roundInputFile.toString());

        String histogramFileName = KmerHistogramHelper.makeKmerHistogramFileName(roundInputFile.getName());
        Path histogramPath = new Path(ppConfig.getKmerHistogramPath(), histogramFileName);

        KmerIndexBuilderPartitioner.setHistogramPath(job.getConfiguration(), histogramPath);

        FileOutputFormat.setOutputPath(job, new Path(roundOutputPath));
        job.setOutputFormatClass(MapFileOutputFormat.class);

        // Use many reducers
        int reducersPerNode = ppConfig.getClusterConfiguration().getMachineCores() / 2;
        if (reducersPerNode < 1) {
            reducersPerNode = 1;
        }
        int reducers = ppConfig.getClusterConfiguration().getMachineNum()
                * (ppConfig.getClusterConfiguration().getMachineCores() / 2);
        LOG.info("Reducers : " + reducers);
        job.setNumReduceTasks(reducers);

        // Execute job and return status
        boolean result = job.waitForCompletion(true);

        jobs.add(job);

        // commit results
        if (result) {
            commitRoundIndexOutputFiles(roundInputFile, new Path(roundOutputPath),
                    new Path(ppConfig.getKmerIndexPath()), job.getConfiguration(), ppConfig.getKmerSize());

            // create index of index
            createIndexOfIndex(new Path(ppConfig.getKmerIndexPath()), roundInputFile, job.getConfiguration(),
                    ppConfig.getKmerSize());
        }

        if (!result) {
            LOG.error("job failed at round " + round + " of " + inputFiles.length);
            job_result = false;
            break;
        }
    }

    // report
    if (ppConfig.getReportPath() != null && !ppConfig.getReportPath().isEmpty()) {
        Report report = new Report();
        report.addJob(jobs);
        report.writeTo(ppConfig.getReportPath());
    }

    return job_result ? 0 : 1;
}

From source file:kogiri.mapreduce.readfrequency.modecount.ModeCounter.java

License:Open Source License

private int runJob(ReadFrequencyCounterConfig rfConfig) throws Exception {
    // check config
    validateReadFrequencyCounterConfig(rfConfig);

    // configuration
    Configuration conf = this.getConf();

    // set user configuration
    rfConfig.getClusterConfiguration().configureTo(conf);
    rfConfig.saveTo(conf);//from   w w  w.  j  av a 2s  .  c o  m

    // table file
    Path tableFilePath = new Path(rfConfig.getKmerMatchPath(), KmerMatchHelper.makeKmerMatchTableFileName());
    FileSystem fs = tableFilePath.getFileSystem(conf);
    KmerMatchFileMapping fileMapping = KmerMatchFileMapping.createInstance(fs, tableFilePath);

    Path[] inputFiles = KmerMatchHelper.getAllKmerMatchResultFilePath(conf, rfConfig.getKmerMatchPath());

    // Register named outputs
    NamedOutputs namedOutputs = new NamedOutputs();
    for (int i = 0; i < fileMapping.getSize(); i++) {
        String fastaFileName = fileMapping.getFastaFileFromID(i);
        namedOutputs.add(fastaFileName);
    }
    namedOutputs.saveTo(conf);

    boolean job_result = true;
    List<Job> jobs = new ArrayList<Job>();

    for (int round = 0; round < fileMapping.getSize(); round++) {
        String roundOutputPath = rfConfig.getReadFrequencyPath() + "_round" + round;

        Job job = new Job(conf, "Kogiri Preprocessor - Computing Mode of Kmer Frequency (" + round + " of "
                + fileMapping.getSize() + ")");
        job.setJarByClass(ModeCounter.class);

        // Mapper
        job.setMapperClass(ModeCounterMapper.class);
        job.setInputFormatClass(TextInputFormat.class);
        job.setMapOutputKeyClass(MultiFileIntWritable.class);
        job.setMapOutputValueClass(CompressedIntArrayWritable.class);

        // Combiner
        job.setCombinerClass(ModeCounterCombiner.class);

        // Partitioner
        job.setPartitionerClass(ModeCounterPartitioner.class);

        // Reducer
        job.setReducerClass(ModeCounterReducer.class);

        // Specify key / value
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        // Inputs
        FileInputFormat.addInputPaths(job, FileSystemHelper.makeCommaSeparated(inputFiles));

        ModeCounterConfig modeCounterConfig = new ModeCounterConfig();
        modeCounterConfig.setMasterFileID(round);
        modeCounterConfig.saveTo(job.getConfiguration());

        FileOutputFormat.setOutputPath(job, new Path(roundOutputPath));
        job.setOutputFormatClass(TextOutputFormat.class);

        for (NamedOutputRecord namedOutput : namedOutputs.getRecord()) {
            MultipleOutputs.addNamedOutput(job, namedOutput.getIdentifier(), TextOutputFormat.class, Text.class,
                    Text.class);
        }

        // Execute job and return status
        boolean result = job.waitForCompletion(true);

        jobs.add(job);

        // commit results
        if (result) {
            commitRoundOutputFiles(new Path(roundOutputPath), new Path(rfConfig.getReadFrequencyPath()),
                    job.getConfiguration(), namedOutputs, round);
        }

        if (!result) {
            LOG.error("job failed at round " + round + " of " + fileMapping.getSize());
            job_result = false;
            break;
        }
    }

    // report
    if (rfConfig.getReportPath() != null && !rfConfig.getReportPath().isEmpty()) {
        Report report = new Report();
        report.addJob(jobs);
        report.writeTo(rfConfig.getReportPath());
    }

    return job_result ? 0 : 1;
}

From source file:libra.preprocess.stage2.KmerIndexBuilder.java

License:Apache License

private int runJob(PreprocessorConfig ppConfig) throws Exception {
    // check config
    validatePreprocessorConfig(ppConfig);

    // configuration
    Configuration conf = this.getConf();

    // set user configuration
    ppConfig.saveTo(conf);//  w  w  w . j a  v a  2 s. co  m

    Path[] inputFiles = FileSystemHelper.getAllFastaFilePath(conf, ppConfig.getFastaPath());

    boolean job_result = true;
    List<Job> jobs = new ArrayList<Job>();

    for (int round = 0; round < inputFiles.length; round++) {
        Path roundInputFile = inputFiles[round];
        String roundOutputPath = ppConfig.getKmerIndexPath() + "_round" + round;

        Job job = new Job(conf,
                "Libra Preprocessor - Building Kmer Indexes (" + round + " of " + inputFiles.length + ")");
        job.setJarByClass(KmerIndexBuilder.class);

        // Mapper
        job.setMapperClass(KmerIndexBuilderMapper.class);
        FastaKmerInputFormat.setKmerSize(conf, ppConfig.getKmerSize());
        job.setInputFormatClass(FastaKmerInputFormat.class);
        job.setMapOutputKeyClass(CompressedSequenceWritable.class);
        job.setMapOutputValueClass(IntWritable.class);

        // Combiner
        job.setCombinerClass(KmerIndexBuilderCombiner.class);

        // Partitioner
        job.setPartitionerClass(KmerIndexBuilderPartitioner.class);

        // Reducer
        job.setReducerClass(KmerIndexBuilderReducer.class);

        // Specify key / value
        job.setOutputKeyClass(CompressedSequenceWritable.class);
        job.setOutputValueClass(IntWritable.class);

        // Inputs
        FileInputFormat.addInputPaths(job, roundInputFile.toString());

        LOG.info("Input file : ");
        LOG.info("> " + roundInputFile.toString());

        String histogramFileName = KmerHistogramHelper.makeKmerHistogramFileName(roundInputFile.getName());
        Path histogramPath = new Path(ppConfig.getKmerHistogramPath(), histogramFileName);

        KmerIndexBuilderPartitioner.setHistogramPath(job.getConfiguration(), histogramPath);

        FileOutputFormat.setOutputPath(job, new Path(roundOutputPath));
        job.setOutputFormatClass(MapFileOutputFormat.class);

        // Use many reducers
        int reducers = conf.getInt("mapred.reduce.tasks", 0);
        if (reducers <= 0) {
            int MRNodes = MapReduceClusterHelper.getNodeNum(conf);
            reducers = MRNodes * 2;
            job.setNumReduceTasks(reducers);
        }
        LOG.info("Reducers : " + reducers);

        // Execute job and return status
        boolean result = job.waitForCompletion(true);

        jobs.add(job);

        // commit results
        if (result) {
            commitRoundIndexOutputFiles(roundInputFile, new Path(roundOutputPath),
                    new Path(ppConfig.getKmerIndexPath()), job.getConfiguration(), ppConfig.getKmerSize());

            // create index of index
            createIndexOfIndex(new Path(ppConfig.getKmerIndexPath()), roundInputFile, job.getConfiguration(),
                    ppConfig.getKmerSize());

            // create statistics of index
            createStatisticsOfIndex(new Path(ppConfig.getKmerStatisticsPath()), roundInputFile,
                    job.getConfiguration(), job.getCounters(), ppConfig.getKmerSize());
        }

        if (!result) {
            LOG.error("job failed at round " + round + " of " + inputFiles.length);
            job_result = false;
            break;
        }
    }

    // report
    if (ppConfig.getReportPath() != null && !ppConfig.getReportPath().isEmpty()) {
        Report report = new Report();
        report.addJob(jobs);
        report.writeTo(ppConfig.getReportPath());
    }

    return job_result ? 0 : 1;
}

From source file:mapreduce.wordcount.WordCountSimple.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCountSimple.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:mapReduceBasics.WordCount.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    GenericOptionsParser GOP = new GenericOptionsParser(conf, args);
    Configuration newConf = GOP.getConfiguration();
    String[] otherArgs = GOP.getRemainingArgs();
    System.err.println("Best of Luck");
    Job job = new Job(newConf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    Path inputPath = new Path(otherArgs[0]);
    Path outputPath = new Path(otherArgs[1]);

    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    FileSystem fs = FileSystem.get(conf);
    fs.delete(outputPath, true);//from  w w  w .  ja v  a 2s.co m

    //    job.setNumReduceTasks(3);
    System.err.println("Input Format - " + job.getInputFormatClass());
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:maxint.maxint.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: max integer <in> <out>");
        System.exit(2);//from  w  w w  .  j av a 2s .  c  o m
    }
    Job job = new Job(conf, "max integer");
    job.setJarByClass(maxint.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(MaxIntReducer.class);
    job.setReducerClass(MaxIntReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:merge.MergeSameData.java

License:Open Source License

public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: StatisticsTotalDataNum <in> <out>");
        System.exit(2);/*from   www  .j ava  2 s . c om*/

    }

    Job job = new Job(conf, "Statistics Total Data Num");
    job.setJarByClass(MergeSameData.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);

}

From source file:muming.examples.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage Of Muming: wordcount <in> [<in>...] <out>");
        System.exit(2);//from www. j a  va  2 s .co  m
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:mvm.rya.accumulo.mr.eval.AccumuloRdfCountTool.java

License:Apache License

/**
 * cloudbase props//  w  w w. j  a  v a 2  s.c om
 */

@Override
public int run(String[] strings) throws Exception {
    conf.set(MRUtils.JOB_NAME_PROP, "Gather Evaluation Statistics");

    //initialize
    init();

    Job job = new Job(conf);
    job.setJarByClass(AccumuloRdfCountTool.class);
    setupInputFormat(job);

    AccumuloInputFormat.setRanges(job,
            Lists.newArrayList(new Range(new Text(new byte[] {}), new Text(new byte[] { Byte.MAX_VALUE }))));
    // set input output of the particular job
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Mutation.class);

    // set mapper and reducer classes
    job.setMapperClass(CountPiecesMapper.class);
    job.setCombinerClass(CountPiecesCombiner.class);
    job.setReducerClass(CountPiecesReducer.class);

    String outputTable = tablePrefix + RdfCloudTripleStoreConstants.TBL_EVAL_SUFFIX;
    setupOutputFormat(job, outputTable);

    // Submit the job
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    int exitCode = job.waitForCompletion(true) ? 0 : 1;

    if (exitCode == 0) {
        Date end_time = new Date();
        System.out.println("Job ended: " + end_time);
        System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
        return 0;
    } else {
        System.out.println("Job Failed!!!");
    }

    return -1;
}