Example usage for org.apache.hadoop.mapreduce Job setSortComparatorClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setSortComparatorClass.

Prototype

public void setSortComparatorClass(Class<? extends RawComparator> cls) throws IllegalStateException

Source Link

Document

Define the comparator that controls how the keys are sorted before they are passed to the Reducer .

Usage

From source file:top25products.Top25ProductChaining.java

/**
 * @param args the command line arguments
 *///from w  w w  .  j  a v  a 2  s. c o  m
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    //job 1
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Job: 1, top 25 Products based on ratings");
    job.setJarByClass(top25products.Top25ProductChaining.class);
    job.setMapperClass(FirstMapper.class);
    job.setReducerClass(FirstReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path firstJobOutput = new Path(args[1]);
    FileOutputFormat.setOutputPath(job, firstJobOutput);
    job.waitForCompletion(true);

    //job 2
    Job job2 = Job.getInstance(conf, "Job: 2, top 25 Products based on ratings");
    job2.setJarByClass(top25products.Top25ProductChaining.class);
    job2.setMapperClass(SecondMapper.class);
    job2.setReducerClass(SecondReducer.class);
    job2.setSortComparatorClass(SortKeyComparator.class);
    job2.setMapOutputKeyClass(DoubleWritable.class);
    job2.setMapOutputValueClass(Text.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(DoubleWritable.class);
    FileInputFormat.addInputPath(job2, firstJobOutput);
    String timeStamp = new SimpleDateFormat("yyyy.MM.dd.HH.mm.ss", Locale.US)
            .format(new Timestamp(System.currentTimeMillis()));
    FileOutputFormat.setOutputPath(job2, new Path(args[1] + timeStamp));
    System.exit(job2.waitForCompletion(true) ? 0 : 1);
}

From source file:uk.ac.cam.eng.extraction.hadoop.features.phrase.Source2TargetJob.java

License:Apache License

public static Job getJob(Configuration conf) throws IOException {
    conf.set("mapred.map.child.java.opts", "-Xmx200m");
    conf.set("mapred.reduce.child.java.opts", "-Xmx5128m");
    conf.setBoolean(MarginalReducer.SOURCE_TO_TARGET, true);
    Job job = new Job(conf);
    job.setJarByClass(Source2TargetJob.class);
    job.setJobName("Source2Taget");
    job.setSortComparatorClass(Source2TargetComparator.class);
    job.setPartitionerClass(Source2TargetPartitioner.class);
    job.setMapperClass(KeepProvenanceCountsOnlyMapper.class);
    job.setReducerClass(MarginalReducer.class);
    job.setMapOutputKeyClass(RuleWritable.class);
    job.setMapOutputValueClass(ProvenanceCountMap.class);
    job.setOutputKeyClass(RuleWritable.class);
    job.setOutputValueClass(FeatureMap.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    return job;/*  w w  w.  j  a  v a  2 s. c o  m*/
}

From source file:uk.ac.cam.eng.extraction.hadoop.features.phrase.Target2SourceJob.java

License:Apache License

public static Job getJob(Configuration conf) throws IOException {
    conf.set("mapred.map.child.java.opts", "-Xmx200m");
    conf.set("mapred.reduce.child.java.opts", "-Xmx5128m");
    conf.setBoolean(MarginalReducer.SOURCE_TO_TARGET, false);
    Job job = new Job(conf);
    job.setJarByClass(Target2SourceJob.class);
    job.setJobName("Target2Source");
    job.setSortComparatorClass(Target2SourceComparator.class);
    job.setPartitionerClass(Target2SourcePartitioner.class);
    job.setMapperClass(SwappingMapper.class);
    job.setReducerClass(MarginalReducer.class);
    job.setMapOutputKeyClass(RuleWritable.class);
    job.setMapOutputValueClass(ProvenanceCountMap.class);
    job.setOutputKeyClass(RuleWritable.class);
    job.setOutputValueClass(FeatureMap.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    return job;//from w  ww .j  a  v a 2 s.c  o m
}

From source file:uk.ac.cam.eng.extraction.hadoop.merge.MergeJob.java

License:Apache License

public static Job getJob(Configuration conf) throws IOException {

    conf.set("mapred.map.child.java.opts", "-Xmx200m");
    conf.set("mapred.reduce.child.java.opts", "-Xmx10240m");

    Job job = new Job(conf);
    job.setJarByClass(MergeJob.class);
    job.setJobName("Merge");
    job.setSortComparatorClass(MergeComparator.class);
    job.setPartitionerClass(MergePartitioner.class);
    job.setReducerClass(MergeReducer.class);
    job.setCombinerClass(MergeCombiner.class);
    job.setMapOutputKeyClass(RuleWritable.class);
    job.setMapOutputValueClass(AlignmentAndFeatureMap.class);
    job.setOutputKeyClass(RuleWritable.class);
    job.setOutputValueClass(AlignmentAndFeatureMap.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SimpleHFileOutputFormat.class);
    return job;//  w  w  w .  j  av  a  2s . c  om
}