Example usage for org.apache.hadoop.mapreduce Job setGroupingComparatorClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setGroupingComparatorClass.

Prototype

public void setGroupingComparatorClass(Class<? extends RawComparator> cls) throws IllegalStateException

Source Link

Document

Define the comparator that controls which keys are grouped together for a single call to Reducer#reduce(Object,Iterable,org.apache.hadoop.mapreduce.Reducer.Context)

Usage

From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.full.Phase4RemoveDuplicatesUsingReduceSideJoins.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf());

    job.setJarByClass(Phase4RemoveDuplicatesUsingReduceSideJoins.class);
    job.setJobName(Phase4RemoveDuplicatesUsingReduceSideJoins.class.getName());

    // paths// www.j a  v  a  2 s  .c om
    // text files of ids to be deleted
    String textFilePath = args[0];
    // corpus with *.warc.gz
    String commaSeparatedInputFiles = args[1];
    // output
    String outputPath = args[2];

    //second input the look up text file
    MultipleInputs.addInputPath(job, new Path(textFilePath), TextInputFormat.class, JoinTextMapper.class);
    //first input the data set (check comma separated availability)
    MultipleInputs.addInputPath(job, new Path(commaSeparatedInputFiles), WARCInputFormat.class,
            JoinWARCMapper.class);

    job.setPartitionerClass(SourceJoiningKeyPartitioner.class);
    job.setGroupingComparatorClass(SourceJoiningGroupingComparator.class);

    job.setMapOutputKeyClass(CompositeKey.class);
    job.setMapOutputValueClass(WARCWritable.class);

    job.setReducerClass(JoinReducer.class);

    job.setOutputFormatClass(WARCOutputFormat.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(WARCWritable.class);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:demo.SsJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    Job job = new Job(conf, "secondary sort");

    job.setJarByClass(SsJob.class);
    job.setPartitionerClass(NaturalKeyPartitioner.class);
    job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class);
    job.setSortComparatorClass(CompositeKeyComparator.class);

    job.setMapOutputKeyClass(StockKey.class);
    job.setMapOutputValueClass(DoubleWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapperClass(SsMapper.class);
    job.setReducerClass(SsReducer.class);

    job.waitForCompletion(true);//from  w w w  .java 2  s. c o m

    return 0;
}

From source file:edu.rosehulman.CollocDriver.java

License:Apache License

/**
 * pass1: generate collocations, ngrams/*  w  ww .j  av  a  2s .c o  m*/
 */
@SuppressWarnings("deprecation")
private static long generateCollocations(Path input, Path output, Configuration baseConf, boolean emitUnigrams,
        int maxNGramSize, int reduceTasks, int minSupport)
        throws IOException, ClassNotFoundException, InterruptedException {

    Configuration con = new Configuration(baseConf);
    con.setBoolean(EMIT_UNIGRAMS, emitUnigrams);
    con.setInt(CollocMapper.MAX_SHINGLE_SIZE, maxNGramSize);
    con.setInt(CollocReducer.MIN_SUPPORT, minSupport);

    Job job = new Job(con);
    job.setJobName(CollocDriver.class.getSimpleName() + ".generateCollocations:" + input);
    job.setJarByClass(CollocDriver.class);

    job.setMapOutputKeyClass(GramKey.class);
    job.setMapOutputValueClass(Gram.class);
    job.setPartitionerClass(GramKeyPartitioner.class);
    job.setGroupingComparatorClass(GramKeyGroupComparator.class);

    job.setOutputKeyClass(Gram.class);
    job.setOutputValueClass(Gram.class);

    job.setCombinerClass(CollocCombiner.class);

    FileInputFormat.setInputPaths(job, input);

    Path outputPath = new Path(output, SUBGRAM_OUTPUT_DIRECTORY);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(CollocMapper.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setReducerClass(CollocReducer.class);
    job.setNumReduceTasks(reduceTasks);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        throw new IllegalStateException("Job failed!");
    }

    return job.getCounters().findCounter(CollocMapper.Count.NGRAM_TOTAL).getValue();
}

From source file:edu.umd.shrawanraina.UserLocation.java

License:Apache License

private void runJob2(String basePath, boolean useCombiner, boolean useInMapperCombiner) throws Exception {
    Configuration conf = getConf();
    Job job = Job.getInstance(conf);
    job.setJobName(UserLocation.class.getSimpleName());
    job.setJarByClass(UserLocation.class);

    // We need to actually count the number of part files to get the number
    // of partitions (because
    // the directory might contain _log).
    int numPartitions = 0;
    for (FileStatus s : FileSystem.get(getConf()).listStatus(new Path(basePath))) {
        if (s.getPath().getName().contains("part-"))
            numPartitions++;//from ww  w  . j a v  a  2  s . c  o  m
    }
    job.setNumReduceTasks(numPartitions);

    FileInputFormat.setInputPaths(job, new Path(basePath));
    String outputPath = basePath + "-out";
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(PairOfStringInt.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setOutputKeyClass(PairOfStringInt.class);
    job.setOutputValueClass(NullWritable.class);

    job.setMapperClass(MapClass2.class);
    //job.setCombinerClass(ReduceClass2.class);
    job.setReducerClass(ReduceClass2.class);

    //job.setPartitionerClass(CustomKeyPartitioner.class);
    job.setGroupingComparatorClass(CustomGroupingComparator.class);
    job.setSortComparatorClass(CustomKeyComparator.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    //return 0;
}

From source file:flink.applications.model.fraud.prepare.Projection.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Projection  and grouping  MR";
    job.setJobName(jobName);/*from w  ww . jav a  2s . c o  m*/

    job.setJarByClass(Projection.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration());
    String operation = job.getConfiguration().get("projection.operation", "project");

    if (operation.startsWith("grouping")) {
        //group by
        job.setMapperClass(Projection.ProjectionMapper.class);
        job.setReducerClass(Projection.ProjectionReducer.class);

        job.setMapOutputKeyClass(Tuple.class);
        job.setMapOutputValueClass(Text.class);

        job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

        //order by
        boolean doOrderBy = job.getConfiguration().getInt("orderBy.field", -1) >= 0;
        if (doOrderBy) {
            job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class);
            job.setPartitionerClass(SecondarySort.TupleTextPartitioner.class);
        }

    } else {
        //simple projection
        job.setMapperClass(Projection.SimpleProjectionMapper.class);
    }

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:hamr.core.general.job.GeneralJob.java

License:Open Source License

public static void generalization(Class<? extends AnnotedBean> abClass, Job job) {
    job.setMapperClass(GeneralMapper.class);
    job.setPartitionerClass(GeneralPartitioner.class);
    job.setMapOutputKeyClass(abClass);/* ww w .  j  av  a2s.  c  o m*/
    job.setMapOutputValueClass(NullWritable.class);
    job.setReducerClass(GeneralReducer.class);
    job.setGroupingComparatorClass(GeneralGroupComparator.class);
}

From source file:hk.newsRecommender.TFIDF2.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String hdfsUrl = conf.get("fs.defaultFS");

    //      part0----------------------------------------------------
    Job job0 = Job.getInstance(conf, "sfitSingleNews");
    Path output0Path = new Path(hdfsUrl + "/data/recommend/tfidf0");
    HadoopUtil.delete(conf, output0Path);
    job0.setJarByClass(TFIDF.class);
    job0.setMapperClass(Mapper_Part0.class);
    // job1.setCombinerClass(Combiner_Part1.class); // combiner?
    job0.setReducerClass(Reduce_Part0.class);
    job0.setMapOutputKeyClass(Text.class);
    job0.setMapOutputValueClass(Text.class);
    job0.setOutputKeyClass(Text.class);
    job0.setOutputValueClass(Text.class);
    // job1.setNumReduceTasks(p.length);
    FileInputFormat.addInputPath(job0, new Path(hdfsUrl + "/data/recommend/data2.txt"));
    FileOutputFormat.setOutputPath(job0, output0Path);
    job0.waitForCompletion(true);//from ww  w  .j av  a 2  s.c  o m

    //      part1----------------------------------------------------
    Job job1 = Job.getInstance(conf, "computeTF");
    Path outputPath1 = new Path(hdfsUrl + "/data/recommend/tfidf1");
    HadoopUtil.delete(conf, outputPath1);
    job1.setJarByClass(TFIDF.class);
    job1.setMapperClass(Mapper_Part1.class);
    job1.setReducerClass(Reduce_Part1.class);
    job1.setMapOutputKeyClass(Text.class);
    job1.setMapOutputValueClass(Text.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);
    job1.setPartitionerClass(MyPartitoner.class); // MyPartitoner
    FileInputFormat.addInputPath(job1, new Path(hdfsUrl + "/data/recommend/tfidf0"));
    FileOutputFormat.setOutputPath(job1, outputPath1);
    job1.waitForCompletion(true);

    //      part2----------------------------------------
    Job job2 = Job.getInstance(conf, "computeTFIDF");
    Path outputPath2 = new Path(hdfsUrl + "/data/recommend/tfidf2");
    HadoopUtil.delete(conf, outputPath2);
    job2.setJarByClass(TFIDF.class);
    job2.setMapOutputKeyClass(Text.class);
    job2.setMapOutputValueClass(Text.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(Text.class);
    job2.setMapperClass(Mapper_Part2.class);
    job2.setReducerClass(Reduce_Part2.class);
    FileInputFormat.setInputPaths(job2, new Path(hdfsUrl + "/data/recommend/tfidf1"));
    FileOutputFormat.setOutputPath(job2, outputPath2);
    job2.waitForCompletion(true);

    //      part3----------------------------------------
    Configuration conf3 = new Configuration();
    Path outputPath3 = new Path(hdfsUrl + "/data/recommend/tfidf3");
    HadoopUtil.delete(conf, outputPath3);
    Job job3 = Job.getInstance(conf3, "My_tdif_part3");
    job3.setMapperClass(Mapper_Part3.class);
    job3.setReducerClass(Reduce_Part3.class);
    job3.setMapOutputKeyClass(CustomKey.class);
    job3.setMapOutputValueClass(NullWritable.class);
    job3.setOutputKeyClass(CustomKey.class);
    job3.setOutputValueClass(NullWritable.class);
    job3.setGroupingComparatorClass(CustomGroupComparator.class);
    job3.setPartitionerClass(CustomPartitioner.class); // MyPartitoner
    FileInputFormat.addInputPath(job3, new Path(hdfsUrl + "/data/recommend/tfidf2"));
    FileOutputFormat.setOutputPath(job3, outputPath3);
    job3.waitForCompletion(true);

}

From source file:hk.newsRecommender.TFIDFClassify.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String hdfsUrl = conf.get("fs.defaultFS");

    //      part1----------------------------------------------------
    Job job1 = Job.getInstance(conf, "computeTF");
    Path outputPath1 = new Path(hdfsUrl + "/data/recommend/class1/tfidf1");
    HadoopUtil.delete(conf, outputPath1);
    job1.setJarByClass(TFIDFClassify.class);
    job1.setMapperClass(Mapper_Part1.class);
    job1.setReducerClass(Reduce_Part1.class);
    job1.setMapOutputKeyClass(Text.class);
    job1.setMapOutputValueClass(Text.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);
    job1.setPartitionerClass(MyPartitoner.class); // MyPartitoner
    FileInputFormat.addInputPath(job1, new Path(hdfsUrl + "/data/recommend/data3.txt"));
    FileOutputFormat.setOutputPath(job1, outputPath1);
    job1.waitForCompletion(true);//from w ww. j  av  a2s.  c  o  m

    // part2----------------------------------------
    Job job2 = Job.getInstance(conf, "computIDF");
    Path outputPath2 = new Path(hdfsUrl + "/data/recommend/class1/tfidf2");
    HadoopUtil.delete(conf, outputPath2);
    job2.setJarByClass(TFIDFClassify.class);
    job2.setMapOutputKeyClass(Text.class);
    job2.setMapOutputValueClass(Text.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(Text.class);
    job2.setMapperClass(Mapper_Part2.class);
    job2.setReducerClass(Reduce_Part2.class);
    FileInputFormat.setInputPaths(job2, new Path(hdfsUrl + "/data/recommend/class1/tfidf1"));
    FileOutputFormat.setOutputPath(job2, outputPath2);
    job2.waitForCompletion(true);

    //      part3----------------------------------------
    Job job3 = Job.getInstance(conf, "sortByTFIDFDec");
    Path outputPath3 = new Path(hdfsUrl + "/data/recommend/class1/tfidf3");
    HadoopUtil.delete(conf, outputPath3);
    job3.setMapperClass(Mapper_Part3.class);
    job3.setReducerClass(Reduce_Part3.class);
    job3.setMapOutputKeyClass(CustomKey.class);
    job3.setMapOutputValueClass(NullWritable.class);
    job3.setOutputKeyClass(CustomKey.class);
    job3.setOutputValueClass(NullWritable.class);
    job3.setGroupingComparatorClass(CustomGroupComparator.class);
    job3.setPartitionerClass(CustomPartitioner.class); // MyPartitoner
    FileInputFormat.addInputPath(job3, new Path(hdfsUrl + "/data/recommend/class1/tfidf2"));
    FileOutputFormat.setOutputPath(job3, outputPath3);
    job3.waitForCompletion(true);

    //      part4---------------??-------------------------
    //      Job job4 = Job.getInstance(conf, "siftKeywords");
    //      Path outputPath4=new Path(hdfsUrl + "/data/recommend/class1/matrix1");
    //      HadoopUtil.delete(conf, outputPath4);
    //      job4.setJarByClass(TFIDF.class);
    //      job4.setMapperClass(Mapper_Part4.class);
    //      job4.setReducerClass(Reduce_Part4.class);
    //      job4.setMapOutputKeyClass(Text.class);
    //      job4.setMapOutputValueClass(Text.class);
    //      job4.setOutputKeyClass(Text.class);
    //      job4.setOutputValueClass(Text.class);
    //      job4.setPartitionerClass(CustomPartitioner.class);
    //      FileInputFormat.addInputPath(job4, new Path(hdfsUrl + "/data/recommend/class1/tfidf3"));
    //      FileOutputFormat.setOutputPath(job4, outputPath4);
    //      job4.waitForCompletion(true);

    //      part5----------------------------------------
    FileSystem fsopen = FileSystem.get(conf);
    FSDataInputStream in = fsopen.open(new Path(hdfsUrl + "/data/recommend/matrix1/part-r-00000"));
    Scanner scan = new Scanner(in);
    List<String> keywordList = new ArrayList<String>();
    while (scan.hasNext()) {
        keywordList.add(scan.next());
    }
    //      must before job
    conf.setStrings("keyword", keywordList.toArray(new String[keywordList.size()]));
    Job job5 = Job.getInstance(conf, "generateMatrix");
    Path outputPath5 = new Path(hdfsUrl + "/data/recommend/class1/matrix2");
    HadoopUtil.delete(conf, outputPath5);
    job5.setJarByClass(TFIDF.class);
    job5.setMapperClass(Mapper_Part5.class);
    job5.setReducerClass(Reduce_Part5.class);
    job5.setMapOutputKeyClass(Text.class);
    job5.setMapOutputValueClass(Text.class);
    job5.setOutputKeyClass(Text.class);
    job5.setOutputValueClass(NullWritable.class);
    FileInputFormat.addInputPath(job5, new Path(hdfsUrl + "/data/recommend/class1/tfidf3"));
    FileOutputFormat.setOutputPath(job5, outputPath5);
    job5.waitForCompletion(true);

}

From source file:io.apigee.lembos.mapreduce.LembosMapReduceRunner.java

License:Apache License

/**
 * Returns a properly configured, ready to run Hadoop {@link Job}.
 *
 * @param args the command line arguments as supported by {@link GenericOptionsParser}
 *
 * @return the configured job//from w  w w . j ava  2  s .c o  m
 *
 * @throws IOException if there is a problem creating the job
 * @throws ExecutionException if there is an issue running the Node.js module
 * @throws InterruptedException if the execution of the Node.js module gets interrupted
 * @throws NodeException if there is an issue with the Node.js module
 */
public Job initJob(final String[] args)
        throws ExecutionException, InterruptedException, IOException, NodeException {
    final GenericOptionsParser gop = new GenericOptionsParser(args);

    // If ran from ToolRunner, conf should already be set but if not, set it manually
    if (conf == null) {
        setConf(gop.getConfiguration());
    }

    // Load the Hadoop FS URL handler
    RunnerUtils.loadFsUrlStreamHandler(getConf());

    // Persist the non-Runner CLI arguments
    conf.setStrings(LembosConstants.MR_MODULE_ARGS, gop.getRemainingArgs());

    // Package the Node.js module and prepare it to be submitted with the Job
    RunnerUtils.prepareModuleForJob(conf);

    // Add "-libjars" to the current ClassLoader if necessary
    RunnerUtils.addLibJarsToClassLoader(conf);

    // Create Node.js environment for local use
    mrEnv = LembosMapReduceEnvironment.fromConf(conf);

    if (JavaScriptUtils.isDefined(mrEnv.getConfiguration())) {
        for (final Map.Entry<Object, Object> propertyEntry : mrEnv.getConfiguration().entrySet()) {
            final String key = propertyEntry.getKey().toString();
            final Writable value = ConversionUtils.jsToWritable(propertyEntry.getValue(), mrEnv.getModule());

            // Do not set these as we'll be setting them later from values we were passed from the CLI
            if (key.equals(LembosConstants.MR_MODULE_NAME)) {
                continue;
            }

            if (value instanceof BooleanWritable) {
                conf.setBoolean(key, ((BooleanWritable) value).get());
            } else if (value instanceof DoubleWritable || value instanceof FloatWritable) {
                conf.setFloat(key, Float.valueOf(value.toString()));
            } else if (value instanceof IntWritable) {
                conf.setInt(key, ((IntWritable) value).get());
            } else if (value instanceof LongWritable) {
                conf.setLong(key, ((LongWritable) value).get());
            } else if (value instanceof Text) {
                conf.set(key, value.toString());
            } else {
                System.err.println("Cannot convert JavaScript (" + value.getClass().getName()
                        + ") to Configuration, using String");
                conf.set(key, value.toString());
            }
        }
    }

    // Create Job
    final String jobName = "LembosMapReduceJob-" + mrEnv.getModuleName();
    final Job job = new Job(conf, jobName);

    jobWrapper = JobWrap.getInstance(mrEnv.getRuntime(), job);

    if (JavaScriptUtils.isDefined(mrEnv.getJobSetupFunction())) {
        mrEnv.callFunctionSync(mrEnv.getJobSetupFunction(), new Object[] { jobWrapper });
    }

    // Always set the mapper
    job.setMapperClass(LembosMapper.class);

    // Conditionally set the combiner
    if (JavaScriptUtils.isDefined(mrEnv.getCombineFunction())) {
        job.setCombinerClass(LembosCombiner.class);
    }

    // Conditionally set the group comparator
    if (JavaScriptUtils.isDefined(mrEnv.getGroupFunction())) {
        job.setGroupingComparatorClass(LembosGroupComparator.class);
    }

    // Conditionally set the partitioner
    if (JavaScriptUtils.isDefined(mrEnv.getPartitionFunction())) {
        job.setPartitionerClass(LembosPartitioner.class);
    }

    // Conditionally set the reducer
    if (JavaScriptUtils.isDefined(mrEnv.getReduceFunction())) {
        job.setReducerClass(LembosReducer.class);
    } else {
        job.setNumReduceTasks(0);
    }

    // Conditionally set the sort comparator
    if (JavaScriptUtils.isDefined(mrEnv.getSortFunction())) {
        job.setSortComparatorClass(LembosSortComparator.class);
    }

    // This could potentially be unsafe but for testing, we need to set this based on the path to the built JAR
    if (job.getJar() == null) {
        job.setJarByClass(LembosMapReduceRunner.class);
    }

    // MapReduce configuration reference:
    //
    // http://hadoop.apache.org/docs/stable/hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml
    // org.apache.hadoop.mapreduce.MRConfig
    // org.apache.hadoop.mapreduce.MRJobConfig

    return job;
}

From source file:it.crs4.seal.demux.Demux.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    LOG.info("starting");

    Configuration conf = getConf();
    DemuxOptionParser parser = new DemuxOptionParser();
    parser.parse(conf, args);//  w  ww. j  a va 2 s  .  c o  m

    conf.setBoolean(CONF_NO_INDEX_READS, parser.getNoIndexReads());
    conf.setBoolean(CONF_SEPARATE_READS, parser.getSeparateReads());

    LOG.info("Using " + parser.getNReduceTasks() + " reduce tasks");
    if (parser.getNoIndexReads())
        LOG.info("Not expecting to find any index reads.  Will demultiplex based only on lane.");

    // load sample sheet to fail early in case of problems
    DemuxUtils.loadSampleSheet(parser.getSampleSheetPath(), conf);

    // must be called before creating the job, since the job
    // *copies* the Configuration.
    distributeSampleSheet(parser.getSampleSheetPath());

    // Create a Job using the processed conf
    Job job = new Job(getConf(), makeJobName(parser.getInputPaths().get(0)));

    job.setJarByClass(Demux.class);

    // input paths
    for (Path p : parser.getInputPaths())
        FileInputFormat.addInputPath(job, p);

    job.setInputFormatClass(FormatNameMap.getInputFormat(parser.getInputFormatName("qseq")));

    job.setMapperClass(Map.class);
    job.setMapOutputKeyClass(SequenceId.class);
    job.setMapOutputValueClass(SequencedFragment.class);

    job.setPartitionerClass(SequenceIdLocationPartitioner.class);
    job.setGroupingComparatorClass(GroupByLocationComparator.class);
    job.setSortComparatorClass(TwoOneThreeSortComparator.class);

    job.setReducerClass(Red.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(SequencedFragment.class);

    // output
    job.setOutputFormatClass(DemuxOutputFormat.class);
    FileOutputFormat.setOutputPath(job, parser.getOutputPath());

    // Submit the job, then poll for progress until the job is complete
    boolean result = job.waitForCompletion(true);
    if (result) {
        LOG.info("done");
        if (parser.getCreateLaneContent())
            createLaneContentFiles(parser.getOutputPath(), parser.getSampleSheetPath());
        return 0;
    } else {
        LOG.fatal(this.getClass().getName() + " failed!");
        return 1;
    }
}