Example usage for org.apache.hadoop.mapreduce Job setCombinerClass

List of usage examples for org.apache.hadoop.mapreduce Job setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException 

Source Link

Document

Set the combiner class for the job.

Usage

From source file:jobs.MatrixBlockSum.java

License:Apache License

public int run(String[] args) throws Exception {
    Configuration conf = getConf();

    conf.setInt("mapred.reduce.tasks", Integer.parseInt(args[2]));

    if (args.length >= 4)
        conf.setInt("SR", Integer.parseInt(args[3]));

    if (args.length >= 5)
        conf.setInt("SC", Integer.parseInt(args[4]));

    conf.set("RESNAME", args[1]);

    //heap space - again - should be passed with the -D option
    conf.set("mapred.map.child.java.opts", "-Xmx3G");
    conf.set("mapred.reduce.child.java.opts", "-Xmx3G");

    //job/*from   w  ww.  j  ava 2s  . co  m*/
    Job job1 = new Job(conf, "MatrixBlockSum");
    job1.setJarByClass(MatrixBlockSum.class);

    // No Map
    FileInputFormat.addInputPath(job1, new Path(args[0]));
    job1.setInputFormatClass(SequenceFileInputFormat.class);

    //Reduce       
    job1.setCombinerClass(MatrixBlockSumReducer.class);
    job1.setReducerClass(MatrixBlockSumReducer.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(MatrixBlock.class);
    FileOutputFormat.setOutputPath(job1, new Path(args[1]));
    job1.setOutputFormatClass(SequenceFileOutputFormat.class);
    //job1.setOutputFormatClass(TextOutputFormat.class);

    return job1.waitForCompletion(false) ? 0 : 1;
}

From source file:kogiri.mapreduce.preprocess.indexing.stage2.KmerIndexBuilder.java

License:Open Source License

private int runJob(PreprocessorConfig ppConfig) throws Exception {
    // check config
    validatePreprocessorConfig(ppConfig);

    // configuration
    Configuration conf = this.getConf();

    // set user configuration
    ppConfig.getClusterConfiguration().configureTo(conf);
    ppConfig.saveTo(conf);//  w  ww  .  j av a 2s  .c  o m

    Path[] inputFiles = FileSystemHelper.getAllFastaFilePath(conf, ppConfig.getFastaPath());

    boolean job_result = true;
    List<Job> jobs = new ArrayList<Job>();

    for (int round = 0; round < inputFiles.length; round++) {
        Path roundInputFile = inputFiles[round];
        String roundOutputPath = ppConfig.getKmerIndexPath() + "_round" + round;

        Job job = new Job(conf,
                "Kogiri Preprocessor - Building Kmer Indices (" + round + " of " + inputFiles.length + ")");
        job.setJarByClass(KmerIndexBuilder.class);

        // Mapper
        job.setMapperClass(KmerIndexBuilderMapper.class);
        job.setInputFormatClass(FastaReadInputFormat.class);
        job.setMapOutputKeyClass(CompressedSequenceWritable.class);
        job.setMapOutputValueClass(CompressedIntArrayWritable.class);

        // Combiner
        job.setCombinerClass(KmerIndexBuilderCombiner.class);

        // Partitioner
        job.setPartitionerClass(KmerIndexBuilderPartitioner.class);

        // Reducer
        job.setReducerClass(KmerIndexBuilderReducer.class);

        // Specify key / value
        job.setOutputKeyClass(CompressedSequenceWritable.class);
        job.setOutputValueClass(CompressedIntArrayWritable.class);

        // Inputs
        FileInputFormat.addInputPaths(job, roundInputFile.toString());

        LOG.info("Input file : ");
        LOG.info("> " + roundInputFile.toString());

        String histogramFileName = KmerHistogramHelper.makeKmerHistogramFileName(roundInputFile.getName());
        Path histogramPath = new Path(ppConfig.getKmerHistogramPath(), histogramFileName);

        KmerIndexBuilderPartitioner.setHistogramPath(job.getConfiguration(), histogramPath);

        FileOutputFormat.setOutputPath(job, new Path(roundOutputPath));
        job.setOutputFormatClass(MapFileOutputFormat.class);

        // Use many reducers
        int reducersPerNode = ppConfig.getClusterConfiguration().getMachineCores() / 2;
        if (reducersPerNode < 1) {
            reducersPerNode = 1;
        }
        int reducers = ppConfig.getClusterConfiguration().getMachineNum()
                * (ppConfig.getClusterConfiguration().getMachineCores() / 2);
        LOG.info("Reducers : " + reducers);
        job.setNumReduceTasks(reducers);

        // Execute job and return status
        boolean result = job.waitForCompletion(true);

        jobs.add(job);

        // commit results
        if (result) {
            commitRoundIndexOutputFiles(roundInputFile, new Path(roundOutputPath),
                    new Path(ppConfig.getKmerIndexPath()), job.getConfiguration(), ppConfig.getKmerSize());

            // create index of index
            createIndexOfIndex(new Path(ppConfig.getKmerIndexPath()), roundInputFile, job.getConfiguration(),
                    ppConfig.getKmerSize());
        }

        if (!result) {
            LOG.error("job failed at round " + round + " of " + inputFiles.length);
            job_result = false;
            break;
        }
    }

    // report
    if (ppConfig.getReportPath() != null && !ppConfig.getReportPath().isEmpty()) {
        Report report = new Report();
        report.addJob(jobs);
        report.writeTo(ppConfig.getReportPath());
    }

    return job_result ? 0 : 1;
}

From source file:kogiri.mapreduce.readfrequency.modecount.ModeCounter.java

License:Open Source License

private int runJob(ReadFrequencyCounterConfig rfConfig) throws Exception {
    // check config
    validateReadFrequencyCounterConfig(rfConfig);

    // configuration
    Configuration conf = this.getConf();

    // set user configuration
    rfConfig.getClusterConfiguration().configureTo(conf);
    rfConfig.saveTo(conf);//from   w w  w.  j  av a 2s  .  c o  m

    // table file
    Path tableFilePath = new Path(rfConfig.getKmerMatchPath(), KmerMatchHelper.makeKmerMatchTableFileName());
    FileSystem fs = tableFilePath.getFileSystem(conf);
    KmerMatchFileMapping fileMapping = KmerMatchFileMapping.createInstance(fs, tableFilePath);

    Path[] inputFiles = KmerMatchHelper.getAllKmerMatchResultFilePath(conf, rfConfig.getKmerMatchPath());

    // Register named outputs
    NamedOutputs namedOutputs = new NamedOutputs();
    for (int i = 0; i < fileMapping.getSize(); i++) {
        String fastaFileName = fileMapping.getFastaFileFromID(i);
        namedOutputs.add(fastaFileName);
    }
    namedOutputs.saveTo(conf);

    boolean job_result = true;
    List<Job> jobs = new ArrayList<Job>();

    for (int round = 0; round < fileMapping.getSize(); round++) {
        String roundOutputPath = rfConfig.getReadFrequencyPath() + "_round" + round;

        Job job = new Job(conf, "Kogiri Preprocessor - Computing Mode of Kmer Frequency (" + round + " of "
                + fileMapping.getSize() + ")");
        job.setJarByClass(ModeCounter.class);

        // Mapper
        job.setMapperClass(ModeCounterMapper.class);
        job.setInputFormatClass(TextInputFormat.class);
        job.setMapOutputKeyClass(MultiFileIntWritable.class);
        job.setMapOutputValueClass(CompressedIntArrayWritable.class);

        // Combiner
        job.setCombinerClass(ModeCounterCombiner.class);

        // Partitioner
        job.setPartitionerClass(ModeCounterPartitioner.class);

        // Reducer
        job.setReducerClass(ModeCounterReducer.class);

        // Specify key / value
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        // Inputs
        FileInputFormat.addInputPaths(job, FileSystemHelper.makeCommaSeparated(inputFiles));

        ModeCounterConfig modeCounterConfig = new ModeCounterConfig();
        modeCounterConfig.setMasterFileID(round);
        modeCounterConfig.saveTo(job.getConfiguration());

        FileOutputFormat.setOutputPath(job, new Path(roundOutputPath));
        job.setOutputFormatClass(TextOutputFormat.class);

        for (NamedOutputRecord namedOutput : namedOutputs.getRecord()) {
            MultipleOutputs.addNamedOutput(job, namedOutput.getIdentifier(), TextOutputFormat.class, Text.class,
                    Text.class);
        }

        // Execute job and return status
        boolean result = job.waitForCompletion(true);

        jobs.add(job);

        // commit results
        if (result) {
            commitRoundOutputFiles(new Path(roundOutputPath), new Path(rfConfig.getReadFrequencyPath()),
                    job.getConfiguration(), namedOutputs, round);
        }

        if (!result) {
            LOG.error("job failed at round " + round + " of " + fileMapping.getSize());
            job_result = false;
            break;
        }
    }

    // report
    if (rfConfig.getReportPath() != null && !rfConfig.getReportPath().isEmpty()) {
        Report report = new Report();
        report.addJob(jobs);
        report.writeTo(rfConfig.getReportPath());
    }

    return job_result ? 0 : 1;
}

From source file:libra.preprocess.stage2.KmerIndexBuilder.java

License:Apache License

private int runJob(PreprocessorConfig ppConfig) throws Exception {
    // check config
    validatePreprocessorConfig(ppConfig);

    // configuration
    Configuration conf = this.getConf();

    // set user configuration
    ppConfig.saveTo(conf);//  w  w  w . j a  v a  2 s. co  m

    Path[] inputFiles = FileSystemHelper.getAllFastaFilePath(conf, ppConfig.getFastaPath());

    boolean job_result = true;
    List<Job> jobs = new ArrayList<Job>();

    for (int round = 0; round < inputFiles.length; round++) {
        Path roundInputFile = inputFiles[round];
        String roundOutputPath = ppConfig.getKmerIndexPath() + "_round" + round;

        Job job = new Job(conf,
                "Libra Preprocessor - Building Kmer Indexes (" + round + " of " + inputFiles.length + ")");
        job.setJarByClass(KmerIndexBuilder.class);

        // Mapper
        job.setMapperClass(KmerIndexBuilderMapper.class);
        FastaKmerInputFormat.setKmerSize(conf, ppConfig.getKmerSize());
        job.setInputFormatClass(FastaKmerInputFormat.class);
        job.setMapOutputKeyClass(CompressedSequenceWritable.class);
        job.setMapOutputValueClass(IntWritable.class);

        // Combiner
        job.setCombinerClass(KmerIndexBuilderCombiner.class);

        // Partitioner
        job.setPartitionerClass(KmerIndexBuilderPartitioner.class);

        // Reducer
        job.setReducerClass(KmerIndexBuilderReducer.class);

        // Specify key / value
        job.setOutputKeyClass(CompressedSequenceWritable.class);
        job.setOutputValueClass(IntWritable.class);

        // Inputs
        FileInputFormat.addInputPaths(job, roundInputFile.toString());

        LOG.info("Input file : ");
        LOG.info("> " + roundInputFile.toString());

        String histogramFileName = KmerHistogramHelper.makeKmerHistogramFileName(roundInputFile.getName());
        Path histogramPath = new Path(ppConfig.getKmerHistogramPath(), histogramFileName);

        KmerIndexBuilderPartitioner.setHistogramPath(job.getConfiguration(), histogramPath);

        FileOutputFormat.setOutputPath(job, new Path(roundOutputPath));
        job.setOutputFormatClass(MapFileOutputFormat.class);

        // Use many reducers
        int reducers = conf.getInt("mapred.reduce.tasks", 0);
        if (reducers <= 0) {
            int MRNodes = MapReduceClusterHelper.getNodeNum(conf);
            reducers = MRNodes * 2;
            job.setNumReduceTasks(reducers);
        }
        LOG.info("Reducers : " + reducers);

        // Execute job and return status
        boolean result = job.waitForCompletion(true);

        jobs.add(job);

        // commit results
        if (result) {
            commitRoundIndexOutputFiles(roundInputFile, new Path(roundOutputPath),
                    new Path(ppConfig.getKmerIndexPath()), job.getConfiguration(), ppConfig.getKmerSize());

            // create index of index
            createIndexOfIndex(new Path(ppConfig.getKmerIndexPath()), roundInputFile, job.getConfiguration(),
                    ppConfig.getKmerSize());

            // create statistics of index
            createStatisticsOfIndex(new Path(ppConfig.getKmerStatisticsPath()), roundInputFile,
                    job.getConfiguration(), job.getCounters(), ppConfig.getKmerSize());
        }

        if (!result) {
            LOG.error("job failed at round " + round + " of " + inputFiles.length);
            job_result = false;
            break;
        }
    }

    // report
    if (ppConfig.getReportPath() != null && !ppConfig.getReportPath().isEmpty()) {
        Report report = new Report();
        report.addJob(jobs);
        report.writeTo(ppConfig.getReportPath());
    }

    return job_result ? 0 : 1;
}

From source file:mapreduce.wordcount.WordCountSimple.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCountSimple.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:mapReduceBasics.WordCount.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    GenericOptionsParser GOP = new GenericOptionsParser(conf, args);
    Configuration newConf = GOP.getConfiguration();
    String[] otherArgs = GOP.getRemainingArgs();
    System.err.println("Best of Luck");
    Job job = new Job(newConf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    Path inputPath = new Path(otherArgs[0]);
    Path outputPath = new Path(otherArgs[1]);

    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    FileSystem fs = FileSystem.get(conf);
    fs.delete(outputPath, true);//from  w w  w .  ja v  a 2s.co m

    //    job.setNumReduceTasks(3);
    System.err.println("Input Format - " + job.getInputFormatClass());
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:maxint.maxint.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: max integer <in> <out>");
        System.exit(2);//from  w  w w  .  j av a 2s .  c  o m
    }
    Job job = new Job(conf, "max integer");
    job.setJarByClass(maxint.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(MaxIntReducer.class);
    job.setReducerClass(MaxIntReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:merge.MergeSameData.java

License:Open Source License

public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: StatisticsTotalDataNum <in> <out>");
        System.exit(2);/*from   www  .j ava  2 s . c om*/

    }

    Job job = new Job(conf, "Statistics Total Data Num");
    job.setJarByClass(MergeSameData.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);

}

From source file:muming.examples.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage Of Muming: wordcount <in> [<in>...] <out>");
        System.exit(2);//from www. j a  va  2 s .co  m
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:mvm.rya.accumulo.mr.eval.AccumuloRdfCountTool.java

License:Apache License

/**
 * cloudbase props//  w  w w. j  a  v a 2  s.c om
 */

@Override
public int run(String[] strings) throws Exception {
    conf.set(MRUtils.JOB_NAME_PROP, "Gather Evaluation Statistics");

    //initialize
    init();

    Job job = new Job(conf);
    job.setJarByClass(AccumuloRdfCountTool.class);
    setupInputFormat(job);

    AccumuloInputFormat.setRanges(job,
            Lists.newArrayList(new Range(new Text(new byte[] {}), new Text(new byte[] { Byte.MAX_VALUE }))));
    // set input output of the particular job
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Mutation.class);

    // set mapper and reducer classes
    job.setMapperClass(CountPiecesMapper.class);
    job.setCombinerClass(CountPiecesCombiner.class);
    job.setReducerClass(CountPiecesReducer.class);

    String outputTable = tablePrefix + RdfCloudTripleStoreConstants.TBL_EVAL_SUFFIX;
    setupOutputFormat(job, outputTable);

    // Submit the job
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    int exitCode = job.waitForCompletion(true) ? 0 : 1;

    if (exitCode == 0) {
        Date end_time = new Date();
        System.out.println("Job ended: " + end_time);
        System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
        return 0;
    } else {
        System.out.println("Job Failed!!!");
    }

    return -1;
}