Example usage for org.apache.hadoop.mapreduce Job setGroupingComparatorClass

List of usage examples for org.apache.hadoop.mapreduce Job setGroupingComparatorClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setGroupingComparatorClass.

Prototype

public void setGroupingComparatorClass(Class<? extends RawComparator> cls) throws IllegalStateException 

Source Link

Document

Define the comparator that controls which keys are grouped together for a single call to Reducer#reduce(Object,Iterable,org.apache.hadoop.mapreduce.Reducer.Context)

Usage

From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.full.Phase4RemoveDuplicatesUsingReduceSideJoins.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf());

    job.setJarByClass(Phase4RemoveDuplicatesUsingReduceSideJoins.class);
    job.setJobName(Phase4RemoveDuplicatesUsingReduceSideJoins.class.getName());

    // paths// www.j a  v  a  2 s  .c om
    // text files of ids to be deleted
    String textFilePath = args[0];
    // corpus with *.warc.gz
    String commaSeparatedInputFiles = args[1];
    // output
    String outputPath = args[2];

    //second input the look up text file
    MultipleInputs.addInputPath(job, new Path(textFilePath), TextInputFormat.class, JoinTextMapper.class);
    //first input the data set (check comma separated availability)
    MultipleInputs.addInputPath(job, new Path(commaSeparatedInputFiles), WARCInputFormat.class,
            JoinWARCMapper.class);

    job.setPartitionerClass(SourceJoiningKeyPartitioner.class);
    job.setGroupingComparatorClass(SourceJoiningGroupingComparator.class);

    job.setMapOutputKeyClass(CompositeKey.class);
    job.setMapOutputValueClass(WARCWritable.class);

    job.setReducerClass(JoinReducer.class);

    job.setOutputFormatClass(WARCOutputFormat.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(WARCWritable.class);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:demo.SsJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    Job job = new Job(conf, "secondary sort");

    job.setJarByClass(SsJob.class);
    job.setPartitionerClass(NaturalKeyPartitioner.class);
    job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class);
    job.setSortComparatorClass(CompositeKeyComparator.class);

    job.setMapOutputKeyClass(StockKey.class);
    job.setMapOutputValueClass(DoubleWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapperClass(SsMapper.class);
    job.setReducerClass(SsReducer.class);

    job.waitForCompletion(true);//from  w w w  .java 2  s. c o m

    return 0;
}

From source file:edu.rosehulman.CollocDriver.java

License:Apache License

/**
 * pass1: generate collocations, ngrams/*  w  ww .j  av  a  2s .c o  m*/
 */
@SuppressWarnings("deprecation")
private static long generateCollocations(Path input, Path output, Configuration baseConf, boolean emitUnigrams,
        int maxNGramSize, int reduceTasks, int minSupport)
        throws IOException, ClassNotFoundException, InterruptedException {

    Configuration con = new Configuration(baseConf);
    con.setBoolean(EMIT_UNIGRAMS, emitUnigrams);
    con.setInt(CollocMapper.MAX_SHINGLE_SIZE, maxNGramSize);
    con.setInt(CollocReducer.MIN_SUPPORT, minSupport);

    Job job = new Job(con);
    job.setJobName(CollocDriver.class.getSimpleName() + ".generateCollocations:" + input);
    job.setJarByClass(CollocDriver.class);

    job.setMapOutputKeyClass(GramKey.class);
    job.setMapOutputValueClass(Gram.class);
    job.setPartitionerClass(GramKeyPartitioner.class);
    job.setGroupingComparatorClass(GramKeyGroupComparator.class);

    job.setOutputKeyClass(Gram.class);
    job.setOutputValueClass(Gram.class);

    job.setCombinerClass(CollocCombiner.class);

    FileInputFormat.setInputPaths(job, input);

    Path outputPath = new Path(output, SUBGRAM_OUTPUT_DIRECTORY);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(CollocMapper.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setReducerClass(CollocReducer.class);
    job.setNumReduceTasks(reduceTasks);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        throw new IllegalStateException("Job failed!");
    }

    return job.getCounters().findCounter(CollocMapper.Count.NGRAM_TOTAL).getValue();
}

From source file:edu.umd.shrawanraina.UserLocation.java

License:Apache License

private void runJob2(String basePath, boolean useCombiner, boolean useInMapperCombiner) throws Exception {
    Configuration conf = getConf();
    Job job = Job.getInstance(conf);
    job.setJobName(UserLocation.class.getSimpleName());
    job.setJarByClass(UserLocation.class);

    // We need to actually count the number of part files to get the number
    // of partitions (because
    // the directory might contain _log).
    int numPartitions = 0;
    for (FileStatus s : FileSystem.get(getConf()).listStatus(new Path(basePath))) {
        if (s.getPath().getName().contains("part-"))
            numPartitions++;//from ww  w  . j a v  a  2  s . c  o  m
    }
    job.setNumReduceTasks(numPartitions);

    FileInputFormat.setInputPaths(job, new Path(basePath));
    String outputPath = basePath + "-out";
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(PairOfStringInt.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setOutputKeyClass(PairOfStringInt.class);
    job.setOutputValueClass(NullWritable.class);

    job.setMapperClass(MapClass2.class);
    //job.setCombinerClass(ReduceClass2.class);
    job.setReducerClass(ReduceClass2.class);

    //job.setPartitionerClass(CustomKeyPartitioner.class);
    job.setGroupingComparatorClass(CustomGroupingComparator.class);
    job.setSortComparatorClass(CustomKeyComparator.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    //return 0;
}

From source file:flink.applications.model.fraud.prepare.Projection.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Projection  and grouping  MR";
    job.setJobName(jobName);/*from w  ww . jav a  2s . c o  m*/

    job.setJarByClass(Projection.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration());
    String operation = job.getConfiguration().get("projection.operation", "project");

    if (operation.startsWith("grouping")) {
        //group by
        job.setMapperClass(Projection.ProjectionMapper.class);
        job.setReducerClass(Projection.ProjectionReducer.class);

        job.setMapOutputKeyClass(Tuple.class);
        job.setMapOutputValueClass(Text.class);

        job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

        //order by
        boolean doOrderBy = job.getConfiguration().getInt("orderBy.field", -1) >= 0;
        if (doOrderBy) {
            job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class);
            job.setPartitionerClass(SecondarySort.TupleTextPartitioner.class);
        }

    } else {
        //simple projection
        job.setMapperClass(Projection.SimpleProjectionMapper.class);
    }

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:hamr.core.general.job.GeneralJob.java

License:Open Source License

public static void generalization(Class<? extends AnnotedBean> abClass, Job job) {
    job.setMapperClass(GeneralMapper.class);
    job.setPartitionerClass(GeneralPartitioner.class);
    job.setMapOutputKeyClass(abClass);/* ww w .  j  av  a2s.  c  o m*/
    job.setMapOutputValueClass(NullWritable.class);
    job.setReducerClass(GeneralReducer.class);
    job.setGroupingComparatorClass(GeneralGroupComparator.class);
}

From source file:hk.newsRecommender.TFIDF2.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String hdfsUrl = conf.get("fs.defaultFS");

    //      part0----------------------------------------------------
    Job job0 = Job.getInstance(conf, "sfitSingleNews");
    Path output0Path = new Path(hdfsUrl + "/data/recommend/tfidf0");
    HadoopUtil.delete(conf, output0Path);
    job0.setJarByClass(TFIDF.class);
    job0.setMapperClass(Mapper_Part0.class);
    // job1.setCombinerClass(Combiner_Part1.class); // combiner?
    job0.setReducerClass(Reduce_Part0.class);
    job0.setMapOutputKeyClass(Text.class);
    job0.setMapOutputValueClass(Text.class);
    job0.setOutputKeyClass(Text.class);
    job0.setOutputValueClass(Text.class);
    // job1.setNumReduceTasks(p.length);
    FileInputFormat.addInputPath(job0, new Path(hdfsUrl + "/data/recommend/data2.txt"));
    FileOutputFormat.setOutputPath(job0, output0Path);
    job0.waitForCompletion(true);//from ww  w  .j av  a 2  s.c  o m

    //      part1----------------------------------------------------
    Job job1 = Job.getInstance(conf, "computeTF");
    Path outputPath1 = new Path(hdfsUrl + "/data/recommend/tfidf1");
    HadoopUtil.delete(conf, outputPath1);
    job1.setJarByClass(TFIDF.class);
    job1.setMapperClass(Mapper_Part1.class);
    job1.setReducerClass(Reduce_Part1.class);
    job1.setMapOutputKeyClass(Text.class);
    job1.setMapOutputValueClass(Text.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);
    job1.setPartitionerClass(MyPartitoner.class); // MyPartitoner
    FileInputFormat.addInputPath(job1, new Path(hdfsUrl + "/data/recommend/tfidf0"));
    FileOutputFormat.setOutputPath(job1, outputPath1);
    job1.waitForCompletion(true);

    //      part2----------------------------------------
    Job job2 = Job.getInstance(conf, "computeTFIDF");
    Path outputPath2 = new Path(hdfsUrl + "/data/recommend/tfidf2");
    HadoopUtil.delete(conf, outputPath2);
    job2.setJarByClass(TFIDF.class);
    job2.setMapOutputKeyClass(Text.class);
    job2.setMapOutputValueClass(Text.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(Text.class);
    job2.setMapperClass(Mapper_Part2.class);
    job2.setReducerClass(Reduce_Part2.class);
    FileInputFormat.setInputPaths(job2, new Path(hdfsUrl + "/data/recommend/tfidf1"));
    FileOutputFormat.setOutputPath(job2, outputPath2);
    job2.waitForCompletion(true);

    //      part3----------------------------------------
    Configuration conf3 = new Configuration();
    Path outputPath3 = new Path(hdfsUrl + "/data/recommend/tfidf3");
    HadoopUtil.delete(conf, outputPath3);
    Job job3 = Job.getInstance(conf3, "My_tdif_part3");
    job3.setMapperClass(Mapper_Part3.class);
    job3.setReducerClass(Reduce_Part3.class);
    job3.setMapOutputKeyClass(CustomKey.class);
    job3.setMapOutputValueClass(NullWritable.class);
    job3.setOutputKeyClass(CustomKey.class);
    job3.setOutputValueClass(NullWritable.class);
    job3.setGroupingComparatorClass(CustomGroupComparator.class);
    job3.setPartitionerClass(CustomPartitioner.class); // MyPartitoner
    FileInputFormat.addInputPath(job3, new Path(hdfsUrl + "/data/recommend/tfidf2"));
    FileOutputFormat.setOutputPath(job3, outputPath3);
    job3.waitForCompletion(true);

}

From source file:hk.newsRecommender.TFIDFClassify.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String hdfsUrl = conf.get("fs.defaultFS");

    //      part1----------------------------------------------------
    Job job1 = Job.getInstance(conf, "computeTF");
    Path outputPath1 = new Path(hdfsUrl + "/data/recommend/class1/tfidf1");
    HadoopUtil.delete(conf, outputPath1);
    job1.setJarByClass(TFIDFClassify.class);
    job1.setMapperClass(Mapper_Part1.class);
    job1.setReducerClass(Reduce_Part1.class);
    job1.setMapOutputKeyClass(Text.class);
    job1.setMapOutputValueClass(Text.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);
    job1.setPartitionerClass(MyPartitoner.class); // MyPartitoner
    FileInputFormat.addInputPath(job1, new Path(hdfsUrl + "/data/recommend/data3.txt"));
    FileOutputFormat.setOutputPath(job1, outputPath1);
    job1.waitForCompletion(true);//from w ww. j  av  a2s.  c  o  m

    // part2----------------------------------------
    Job job2 = Job.getInstance(conf, "computIDF");
    Path outputPath2 = new Path(hdfsUrl + "/data/recommend/class1/tfidf2");
    HadoopUtil.delete(conf, outputPath2);
    job2.setJarByClass(TFIDFClassify.class);
    job2.setMapOutputKeyClass(Text.class);
    job2.setMapOutputValueClass(Text.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(Text.class);
    job2.setMapperClass(Mapper_Part2.class);
    job2.setReducerClass(Reduce_Part2.class);
    FileInputFormat.setInputPaths(job2, new Path(hdfsUrl + "/data/recommend/class1/tfidf1"));
    FileOutputFormat.setOutputPath(job2, outputPath2);
    job2.waitForCompletion(true);

    //      part3----------------------------------------
    Job job3 = Job.getInstance(conf, "sortByTFIDFDec");
    Path outputPath3 = new Path(hdfsUrl + "/data/recommend/class1/tfidf3");
    HadoopUtil.delete(conf, outputPath3);
    job3.setMapperClass(Mapper_Part3.class);
    job3.setReducerClass(Reduce_Part3.class);
    job3.setMapOutputKeyClass(CustomKey.class);
    job3.setMapOutputValueClass(NullWritable.class);
    job3.setOutputKeyClass(CustomKey.class);
    job3.setOutputValueClass(NullWritable.class);
    job3.setGroupingComparatorClass(CustomGroupComparator.class);
    job3.setPartitionerClass(CustomPartitioner.class); // MyPartitoner
    FileInputFormat.addInputPath(job3, new Path(hdfsUrl + "/data/recommend/class1/tfidf2"));
    FileOutputFormat.setOutputPath(job3, outputPath3);
    job3.waitForCompletion(true);

    //      part4---------------??-------------------------
    //      Job job4 = Job.getInstance(conf, "siftKeywords");
    //      Path outputPath4=new Path(hdfsUrl + "/data/recommend/class1/matrix1");
    //      HadoopUtil.delete(conf, outputPath4);
    //      job4.setJarByClass(TFIDF.class);
    //      job4.setMapperClass(Mapper_Part4.class);
    //      job4.setReducerClass(Reduce_Part4.class);
    //      job4.setMapOutputKeyClass(Text.class);
    //      job4.setMapOutputValueClass(Text.class);
    //      job4.setOutputKeyClass(Text.class);
    //      job4.setOutputValueClass(Text.class);
    //      job4.setPartitionerClass(CustomPartitioner.class);
    //      FileInputFormat.addInputPath(job4, new Path(hdfsUrl + "/data/recommend/class1/tfidf3"));
    //      FileOutputFormat.setOutputPath(job4, outputPath4);
    //      job4.waitForCompletion(true);

    //      part5----------------------------------------
    FileSystem fsopen = FileSystem.get(conf);
    FSDataInputStream in = fsopen.open(new Path(hdfsUrl + "/data/recommend/matrix1/part-r-00000"));
    Scanner scan = new Scanner(in);
    List<String> keywordList = new ArrayList<String>();
    while (scan.hasNext()) {
        keywordList.add(scan.next());
    }
    //      must before job
    conf.setStrings("keyword", keywordList.toArray(new String[keywordList.size()]));
    Job job5 = Job.getInstance(conf, "generateMatrix");
    Path outputPath5 = new Path(hdfsUrl + "/data/recommend/class1/matrix2");
    HadoopUtil.delete(conf, outputPath5);
    job5.setJarByClass(TFIDF.class);
    job5.setMapperClass(Mapper_Part5.class);
    job5.setReducerClass(Reduce_Part5.class);
    job5.setMapOutputKeyClass(Text.class);
    job5.setMapOutputValueClass(Text.class);
    job5.setOutputKeyClass(Text.class);
    job5.setOutputValueClass(NullWritable.class);
    FileInputFormat.addInputPath(job5, new Path(hdfsUrl + "/data/recommend/class1/tfidf3"));
    FileOutputFormat.setOutputPath(job5, outputPath5);
    job5.waitForCompletion(true);

}

From source file:io.apigee.lembos.mapreduce.LembosMapReduceRunner.java

License:Apache License

/**
 * Returns a properly configured, ready to run Hadoop {@link Job}.
 *
 * @param args the command line arguments as supported by {@link GenericOptionsParser}
 *
 * @return the configured job//from w  w w . j ava  2  s .c o  m
 *
 * @throws IOException if there is a problem creating the job
 * @throws ExecutionException if there is an issue running the Node.js module
 * @throws InterruptedException if the execution of the Node.js module gets interrupted
 * @throws NodeException if there is an issue with the Node.js module
 */
public Job initJob(final String[] args)
        throws ExecutionException, InterruptedException, IOException, NodeException {
    final GenericOptionsParser gop = new GenericOptionsParser(args);

    // If ran from ToolRunner, conf should already be set but if not, set it manually
    if (conf == null) {
        setConf(gop.getConfiguration());
    }

    // Load the Hadoop FS URL handler
    RunnerUtils.loadFsUrlStreamHandler(getConf());

    // Persist the non-Runner CLI arguments
    conf.setStrings(LembosConstants.MR_MODULE_ARGS, gop.getRemainingArgs());

    // Package the Node.js module and prepare it to be submitted with the Job
    RunnerUtils.prepareModuleForJob(conf);

    // Add "-libjars" to the current ClassLoader if necessary
    RunnerUtils.addLibJarsToClassLoader(conf);

    // Create Node.js environment for local use
    mrEnv = LembosMapReduceEnvironment.fromConf(conf);

    if (JavaScriptUtils.isDefined(mrEnv.getConfiguration())) {
        for (final Map.Entry<Object, Object> propertyEntry : mrEnv.getConfiguration().entrySet()) {
            final String key = propertyEntry.getKey().toString();
            final Writable value = ConversionUtils.jsToWritable(propertyEntry.getValue(), mrEnv.getModule());

            // Do not set these as we'll be setting them later from values we were passed from the CLI
            if (key.equals(LembosConstants.MR_MODULE_NAME)) {
                continue;
            }

            if (value instanceof BooleanWritable) {
                conf.setBoolean(key, ((BooleanWritable) value).get());
            } else if (value instanceof DoubleWritable || value instanceof FloatWritable) {
                conf.setFloat(key, Float.valueOf(value.toString()));
            } else if (value instanceof IntWritable) {
                conf.setInt(key, ((IntWritable) value).get());
            } else if (value instanceof LongWritable) {
                conf.setLong(key, ((LongWritable) value).get());
            } else if (value instanceof Text) {
                conf.set(key, value.toString());
            } else {
                System.err.println("Cannot convert JavaScript (" + value.getClass().getName()
                        + ") to Configuration, using String");
                conf.set(key, value.toString());
            }
        }
    }

    // Create Job
    final String jobName = "LembosMapReduceJob-" + mrEnv.getModuleName();
    final Job job = new Job(conf, jobName);

    jobWrapper = JobWrap.getInstance(mrEnv.getRuntime(), job);

    if (JavaScriptUtils.isDefined(mrEnv.getJobSetupFunction())) {
        mrEnv.callFunctionSync(mrEnv.getJobSetupFunction(), new Object[] { jobWrapper });
    }

    // Always set the mapper
    job.setMapperClass(LembosMapper.class);

    // Conditionally set the combiner
    if (JavaScriptUtils.isDefined(mrEnv.getCombineFunction())) {
        job.setCombinerClass(LembosCombiner.class);
    }

    // Conditionally set the group comparator
    if (JavaScriptUtils.isDefined(mrEnv.getGroupFunction())) {
        job.setGroupingComparatorClass(LembosGroupComparator.class);
    }

    // Conditionally set the partitioner
    if (JavaScriptUtils.isDefined(mrEnv.getPartitionFunction())) {
        job.setPartitionerClass(LembosPartitioner.class);
    }

    // Conditionally set the reducer
    if (JavaScriptUtils.isDefined(mrEnv.getReduceFunction())) {
        job.setReducerClass(LembosReducer.class);
    } else {
        job.setNumReduceTasks(0);
    }

    // Conditionally set the sort comparator
    if (JavaScriptUtils.isDefined(mrEnv.getSortFunction())) {
        job.setSortComparatorClass(LembosSortComparator.class);
    }

    // This could potentially be unsafe but for testing, we need to set this based on the path to the built JAR
    if (job.getJar() == null) {
        job.setJarByClass(LembosMapReduceRunner.class);
    }

    // MapReduce configuration reference:
    //
    // http://hadoop.apache.org/docs/stable/hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml
    // org.apache.hadoop.mapreduce.MRConfig
    // org.apache.hadoop.mapreduce.MRJobConfig

    return job;
}

From source file:it.crs4.seal.demux.Demux.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    LOG.info("starting");

    Configuration conf = getConf();
    DemuxOptionParser parser = new DemuxOptionParser();
    parser.parse(conf, args);//  w  ww. j  a va 2 s  .  c o  m

    conf.setBoolean(CONF_NO_INDEX_READS, parser.getNoIndexReads());
    conf.setBoolean(CONF_SEPARATE_READS, parser.getSeparateReads());

    LOG.info("Using " + parser.getNReduceTasks() + " reduce tasks");
    if (parser.getNoIndexReads())
        LOG.info("Not expecting to find any index reads.  Will demultiplex based only on lane.");

    // load sample sheet to fail early in case of problems
    DemuxUtils.loadSampleSheet(parser.getSampleSheetPath(), conf);

    // must be called before creating the job, since the job
    // *copies* the Configuration.
    distributeSampleSheet(parser.getSampleSheetPath());

    // Create a Job using the processed conf
    Job job = new Job(getConf(), makeJobName(parser.getInputPaths().get(0)));

    job.setJarByClass(Demux.class);

    // input paths
    for (Path p : parser.getInputPaths())
        FileInputFormat.addInputPath(job, p);

    job.setInputFormatClass(FormatNameMap.getInputFormat(parser.getInputFormatName("qseq")));

    job.setMapperClass(Map.class);
    job.setMapOutputKeyClass(SequenceId.class);
    job.setMapOutputValueClass(SequencedFragment.class);

    job.setPartitionerClass(SequenceIdLocationPartitioner.class);
    job.setGroupingComparatorClass(GroupByLocationComparator.class);
    job.setSortComparatorClass(TwoOneThreeSortComparator.class);

    job.setReducerClass(Red.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(SequencedFragment.class);

    // output
    job.setOutputFormatClass(DemuxOutputFormat.class);
    FileOutputFormat.setOutputPath(job, parser.getOutputPath());

    // Submit the job, then poll for progress until the job is complete
    boolean result = job.waitForCompletion(true);
    if (result) {
        LOG.info("done");
        if (parser.getCreateLaneContent())
            createLaneContentFiles(parser.getOutputPath(), parser.getSampleSheetPath());
        return 0;
    } else {
        LOG.fatal(this.getClass().getName() + " failed!");
        return 1;
    }
}