Example usage for org.apache.hadoop.mapred JobConf setReducerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setReducerClass.

Prototype

public void setReducerClass(Class<? extends Reducer> theClass)

Source Link

Document

Set the Reducer class for the job.

Usage

From source file:edu.ohsu.sonmezsysbio.cloudbreak.command.CommandNovoalignSingleEnds.java

public void runHadoopJob(Configuration configuration) throws IOException, URISyntaxException {
    JobConf conf = new JobConf(configuration);

    conf.setJobName("Single End Alignment");
    conf.setJarByClass(Cloudbreak.class);
    FileInputFormat.addInputPath(conf, new Path(hdfsDataDir));
    Path outputDir = new Path(hdfsAlignmentsDir);
    FileSystem.get(conf).delete(outputDir);
    FileOutputFormat.setOutputPath(conf, outputDir);

    addDistributedCacheFile(conf, reference, "novoalign.reference");

    addDistributedCacheFile(conf, pathToNovoalign, "novoalign.executable");
    if (pathToNovoalignLicense != null) {
        addDistributedCacheFile(conf, pathToNovoalignLicense, "novoalign.license");
    }//from   w  w  w .ja v  a  2s  .co  m

    DistributedCache.createSymlink(conf);
    conf.set("mapred.task.timeout", "3600000");
    conf.set("novoalign.threshold", threshold);
    conf.set("novoalign.quality.format", qualityFormat);

    conf.setInputFormat(SequenceFileInputFormat.class);

    conf.setMapperClass(NovoalignSingleEndMapper.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setCompressMapOutput(true);

    conf.setReducerClass(SingleEndAlignmentsToPairsReducer.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.set("mapred.output.compress", "true");
    conf.set("mapred.output.compression", "org.apache.hadoop.io.compress.SnappyCodec");

    JobClient.runJob(conf);

}

From source file:edu.stolaf.cs.wmrserver.streaming.StreamJob.java

License:Apache License

public static void setStreamReducer(JobConf conf, String reduceCommand) {
    conf.setReducerClass(PipeReducer.class);
    try {/*from w  w w .  j  a v  a 2 s.  co m*/
        conf.set("stream.reduce.streamprocessor", URLEncoder.encode(reduceCommand, "UTF-8"));
    } catch (java.io.UnsupportedEncodingException ex) {
        throw new RuntimeException("The sky is falling! Java doesn't support UTF-8.");
    }
}

From source file:edu.ub.ahstfg.indexer.Indexer.java

License:Open Source License

@Override
public int run(String[] arg0) throws Exception {
    LOG.info("Creating Hadoop job for Indexer.");
    JobConf job = new JobConf(getConf());
    job.setJarByClass(Indexer.class);

    LOG.info("Setting input path to '" + INPUT_PATH + "'");
    FileInputFormat.setInputPaths(job, new Path(INPUT_PATH));
    // Set filters if it's necessary.

    LOG.info("Clearing the output path at '" + OUTPUT_PATH + "'");
    // Change URI to Path if it's necessary.
    FileSystem fs = FileSystem.get(new URI(OUTPUT_PATH), job);

    if (fs.exists(new Path(OUTPUT_PATH))) {
        fs.delete(new Path(OUTPUT_PATH), true);
    }//  w w  w.  j  ava 2s.c o m

    LOG.info("Setting output path to '" + OUTPUT_PATH + "'");
    FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));
    FileOutputFormat.setCompressOutput(job, false);

    LOG.info("Setting input format.");
    job.setInputFormat(ArcInputFormat.class);
    LOG.info("Setting output format.");
    job.setOutputFormat(IndexOutputFormat.class);

    LOG.info("Setting output data types.");
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IndexRecord.class);

    LOG.info("Setting mapper and reducer.");
    job.setMapperClass(IndexerMapper.class);
    job.setMapOutputValueClass(ParsedDocument.class);
    job.setReducerClass(IndexerReducer.class);

    if (JobClient.runJob(job).isSuccessful()) {
        return 0;
    } else {
        return 1;
    }
}

From source file:edu.ub.ahstfg.indexer.wordcount.WordCount.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {

    LOG.info("Creating Hadoop job for ARC input files word count.");
    JobConf job = new JobConf(getConf());
    job.setJarByClass(WordCount.class);

    LOG.info("Setting input path to '" + inputPath + "'");
    FileInputFormat.setInputPaths(job, new Path(inputPath));
    // Set filters if it's necessary.

    LOG.info("Clearing the output path at '" + outputPath + "'");
    // Change URI to Path if it's necessary.
    FileSystem fs = FileSystem.get(new URI(outputPath), job);

    if (fs.exists(new Path(outputPath))) {
        fs.delete(new Path(outputPath), true);
    }/*  www  .  java2s  .  c o m*/

    LOG.info("Setting output path to '" + outputPath + "'");
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileOutputFormat.setCompressOutput(job, false);

    LOG.info("Setting input format.");
    // job.setInputFormat(TextInputFormat.class);
    job.setInputFormat(ArcInputFormat.class);
    LOG.info("Setting output format.");
    job.setOutputFormat(TextOutputFormat.class);

    LOG.info("Setting output data types.");
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    LOG.info("Setting mapper and reducer.");
    // job.setMapperClass(WordCountTextInputMapper.class);
    job.setMapperClass(WordCountArcInputMapper.class);
    job.setReducerClass(LongSumReducer.class);

    if (JobClient.runJob(job).isSuccessful()) {
        return 0;
    } else {
        return 1;
    }
}

From source file:edu.ubc.mirrors.holographs.mapreduce.Driver.java

License:Open Source License

public int run(String[] args) throws Exception {
    JobConf job = new JobConf(getConf());
    job.setClassLoader(Driver.class.getClassLoader());
    job.setInputFormat(SnapshotObjectsOfTypeInputFormat.class);
    job.setMapperClass(InvokeMethodMapper.class);
    job.setCombinerClass(TextCountSumReducer.class);
    job.setReducerClass(TextCountSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.set("snapshotPath", args[0]);
    job.set("targetClassName", "org.eclipse.cdt.internal.core.dom.parser.cpp.CPPASTName");
    job.setInt("splitSize", 10000);
    job.setInt("maxNumObjects", 100000);

    FileInputFormat.addInputPath(job, new Path(args[0]));

    String outputPath = args[1];//from  w w w. j av  a  2 s . c om
    int suffix = 2;
    while (new File(outputPath).exists()) {
        outputPath = args[1] + suffix++;
    }
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    JobClient.runJob(job);
    return 0;
}

From source file:edu.uci.ics.hyracks.imru.util.DataBalancer.java

License:Apache License

public static void main(String[] args) throws IOException {
    JobConf job = new JobConf(DataBalancer.class);

    job.setJobName(DataBalancer.class.getSimpleName());
    job.setMapperClass(MapRecordOnly.class);
    job.setReducerClass(ReduceRecordOnly.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setInputFormat(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setNumReduceTasks(Integer.parseInt(args[2]));

    if (args.length > 3) {
        if (args[3].startsWith("bzip"))
            FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
        if (args[3].startsWith("gz"))
            FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    }/*from w ww . j a  v  a2 s  .c om*/
    JobClient.runJob(job);
}

From source file:edu.uci.ics.pregelix.core.util.DataGenerator.java

License:Apache License

public static void main(String[] args) throws IOException {

    JobConf job = new JobConf(DataGenerator.class);
    FileSystem dfs = FileSystem.get(job);
    String maxFile = "/maxtemp";
    dfs.delete(new Path(maxFile), true);

    job.setJobName(DataGenerator.class.getSimpleName() + "max ID");
    job.setMapperClass(MapMaxId.class);
    job.setCombinerClass(CombineMaxId.class);
    job.setReducerClass(ReduceMaxId.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(VLongWritable.class);

    job.setInputFormat(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(maxFile));
    job.setNumReduceTasks(1);/*from  w w w . j  ava2 s .  c  o m*/
    JobClient.runJob(job);

    job = new JobConf(DataGenerator.class);
    job.set("hyracks.maxid.file", maxFile);
    job.setInt("hyracks.x", Integer.parseInt(args[2]));
    dfs.delete(new Path(args[1]), true);

    job.setJobName(DataGenerator.class.getSimpleName());
    job.setMapperClass(MapRecordGen.class);
    job.setReducerClass(ReduceRecordGen.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setInputFormat(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setNumReduceTasks(Integer.parseInt(args[3]));

    if (args.length > 4) {
        if (args[4].startsWith("bzip"))
            FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
        if (args[4].startsWith("gz"))
            FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    }
    JobClient.runJob(job);
}

From source file:edu.uci.ics.pregelix.example.utils.CommonSource.java

License:Apache License

public static void main(String[] args) throws IOException {
    JobConf job = new JobConf(GraphPreProcessor.class);

    job.setJobName(GraphPreProcessor.class.getSimpleName());
    job.setMapperClass(MapRecordOnly.class);
    job.setReducerClass(ReduceRecordOnly.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setInputFormat(TextInputFormat.class);
    for (int i = 0; i < args.length - 2; i++) {
        FileInputFormat.addInputPath(job, new Path(args[i]));
    }/*w  w  w .ja  va  2 s . c  o m*/
    FileOutputFormat.setOutputPath(job, new Path(args[args.length - 2]));
    job.setNumReduceTasks(Integer.parseInt(args[args.length - 1]));
    JobClient.runJob(job);
}

From source file:edu.uci.ics.pregelix.example.utils.FindLargest.java

License:Apache License

public static void main(String[] args) throws IOException {
    JobConf job = new JobConf(GraphPreProcessor.class);

    job.setJobName(GraphPreProcessor.class.getSimpleName());
    job.setMapperClass(MapRecordOnly.class);
    job.setReducerClass(ReduceRecordOnly.class);
    job.setCombinerClass(ReduceRecordOnly.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setInputFormat(TextInputFormat.class);
    for (int i = 0; i < args.length - 2; i++) {
        FileInputFormat.addInputPath(job, new Path(args[i]));
    }//from   www .  j  ava  2 s .co m
    FileOutputFormat.setOutputPath(job, new Path(args[args.length - 2]));
    job.setNumReduceTasks(Integer.parseInt(args[args.length - 1]));
    JobClient.runJob(job);
}

From source file:edu.uci.ics.pregelix.example.utils.VertexAggregator.java

License:Apache License

public static void main(String[] args) throws IOException {
    JobConf job = new JobConf(VertexAggregator.class);

    job.setJobName(VertexAggregator.class.getSimpleName());
    job.setMapperClass(MapRecordOnly.class);
    job.setCombinerClass(CombineRecordOnly.class);
    job.setReducerClass(ReduceRecordOnly.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setInputFormat(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setNumReduceTasks(Integer.parseInt(args[2]));
    JobClient.runJob(job);/*from   w  w w  .  j av a2 s.c o  m*/
}