Example usage for org.apache.hadoop.mapreduce Job setSortComparatorClass

List of usage examples for org.apache.hadoop.mapreduce Job setSortComparatorClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setSortComparatorClass.

Prototype

public void setSortComparatorClass(Class<? extends RawComparator> cls) throws IllegalStateException 

Source Link

Document

Define the comparator that controls how the keys are sorted before they are passed to the Reducer .

Usage

From source file:top25products.Top25ProductChaining.java

/**
 * @param args the command line arguments
 *///from w  w w  .  j  a v  a 2  s. c o  m
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    //job 1
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Job: 1, top 25 Products based on ratings");
    job.setJarByClass(top25products.Top25ProductChaining.class);
    job.setMapperClass(FirstMapper.class);
    job.setReducerClass(FirstReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path firstJobOutput = new Path(args[1]);
    FileOutputFormat.setOutputPath(job, firstJobOutput);
    job.waitForCompletion(true);

    //job 2
    Job job2 = Job.getInstance(conf, "Job: 2, top 25 Products based on ratings");
    job2.setJarByClass(top25products.Top25ProductChaining.class);
    job2.setMapperClass(SecondMapper.class);
    job2.setReducerClass(SecondReducer.class);
    job2.setSortComparatorClass(SortKeyComparator.class);
    job2.setMapOutputKeyClass(DoubleWritable.class);
    job2.setMapOutputValueClass(Text.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(DoubleWritable.class);
    FileInputFormat.addInputPath(job2, firstJobOutput);
    String timeStamp = new SimpleDateFormat("yyyy.MM.dd.HH.mm.ss", Locale.US)
            .format(new Timestamp(System.currentTimeMillis()));
    FileOutputFormat.setOutputPath(job2, new Path(args[1] + timeStamp));
    System.exit(job2.waitForCompletion(true) ? 0 : 1);
}

From source file:uk.ac.cam.eng.extraction.hadoop.features.phrase.Source2TargetJob.java

License:Apache License

public static Job getJob(Configuration conf) throws IOException {
    conf.set("mapred.map.child.java.opts", "-Xmx200m");
    conf.set("mapred.reduce.child.java.opts", "-Xmx5128m");
    conf.setBoolean(MarginalReducer.SOURCE_TO_TARGET, true);
    Job job = new Job(conf);
    job.setJarByClass(Source2TargetJob.class);
    job.setJobName("Source2Taget");
    job.setSortComparatorClass(Source2TargetComparator.class);
    job.setPartitionerClass(Source2TargetPartitioner.class);
    job.setMapperClass(KeepProvenanceCountsOnlyMapper.class);
    job.setReducerClass(MarginalReducer.class);
    job.setMapOutputKeyClass(RuleWritable.class);
    job.setMapOutputValueClass(ProvenanceCountMap.class);
    job.setOutputKeyClass(RuleWritable.class);
    job.setOutputValueClass(FeatureMap.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    return job;/*  w w  w.  j  a  v a  2 s. c o  m*/
}

From source file:uk.ac.cam.eng.extraction.hadoop.features.phrase.Target2SourceJob.java

License:Apache License

public static Job getJob(Configuration conf) throws IOException {
    conf.set("mapred.map.child.java.opts", "-Xmx200m");
    conf.set("mapred.reduce.child.java.opts", "-Xmx5128m");
    conf.setBoolean(MarginalReducer.SOURCE_TO_TARGET, false);
    Job job = new Job(conf);
    job.setJarByClass(Target2SourceJob.class);
    job.setJobName("Target2Source");
    job.setSortComparatorClass(Target2SourceComparator.class);
    job.setPartitionerClass(Target2SourcePartitioner.class);
    job.setMapperClass(SwappingMapper.class);
    job.setReducerClass(MarginalReducer.class);
    job.setMapOutputKeyClass(RuleWritable.class);
    job.setMapOutputValueClass(ProvenanceCountMap.class);
    job.setOutputKeyClass(RuleWritable.class);
    job.setOutputValueClass(FeatureMap.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    return job;//from w  ww .j  a  v a 2 s.c  o m
}

From source file:uk.ac.cam.eng.extraction.hadoop.merge.MergeJob.java

License:Apache License

public static Job getJob(Configuration conf) throws IOException {

    conf.set("mapred.map.child.java.opts", "-Xmx200m");
    conf.set("mapred.reduce.child.java.opts", "-Xmx10240m");

    Job job = new Job(conf);
    job.setJarByClass(MergeJob.class);
    job.setJobName("Merge");
    job.setSortComparatorClass(MergeComparator.class);
    job.setPartitionerClass(MergePartitioner.class);
    job.setReducerClass(MergeReducer.class);
    job.setCombinerClass(MergeCombiner.class);
    job.setMapOutputKeyClass(RuleWritable.class);
    job.setMapOutputValueClass(AlignmentAndFeatureMap.class);
    job.setOutputKeyClass(RuleWritable.class);
    job.setOutputValueClass(AlignmentAndFeatureMap.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SimpleHFileOutputFormat.class);
    return job;//  w  w  w .  j  av  a  2s . c  om
}