Example usage for org.apache.hadoop.mapreduce Job setGroupingComparatorClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setGroupingComparatorClass.

Prototype

public void setGroupingComparatorClass(Class<? extends RawComparator> cls) throws IllegalStateException

Source Link

Document

Define the comparator that controls which keys are grouped together for a single call to Reducer#reduce(Object,Iterable,org.apache.hadoop.mapreduce.Reducer.Context)

Usage

From source file:com.elixir.hadoop.Chromo.FragmentCoverage.java

License:Apache License

public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);//from ww  w . j  a  va  2  s. c  om
    }
    Job job = Job.getInstance(conf, "position");
    job.setJarByClass(FragmentCoverage.class);

    job.setMapperClass(CoverageMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setNumReduceTasks(5);
    job.setMapOutputKeyClass(com.elixir.hadoop.Chromo.SecondrySort.IntPair.class);
    //job.setSpeculativeExecution(true);
    job.setPartitionerClass(ChromoPartitioner.class);
    job.setGroupingComparatorClass(com.elixir.hadoop.Chromo.SecondrySort.FirstGroupingComparator.class);
    job.setReducerClass(IntSumReducer.class);

    job.setOutputKeyClass(Text.class);

    job.setOutputValueClass(IntWritable.class);
    //   job.setOutputFormatClass(Text.class);

    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.hadoop.examples.secondSort.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    // ?hadoop?/*from  w w  w .ja v a  2s.c  om*/
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysort <in> <out>");
        System.exit(2);
    }
    // ?
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    // Mapper
    job.setMapperClass(MapClass.class);
    // ???CombinerCombiner<Text, IntWritable>Reduce<IntPair, IntWritable>?
    //job.setCombinerClass(Reduce.class);

    // Reducer
    job.setReducerClass(Reduce.class);

    // *
    // *group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);
    //setSortComparatorClass()hadoopkey?(?2.Hadoopkey?)
    //IntPair?compareTo()
    //job.setSortComparatorClass(cls);
    // *
    job.setGroupingComparatorClass(FirstGroupingComparator.class);

    // map Key
    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    // mapValue
    job.setMapOutputValueClass(IntWritable.class);

    // rduceKeyTextOutputFormatClassTextOutputFormat
    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    // rduceValue
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    // ??job
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.hadoop.secondarysort.SecondarySortDESC.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    // if (otherArgs.length != 2) {
    // System.err.println("Usage: secondarysrot <in> <out>");
    // System.exit(2);
    // }/*from w  w  w.  j  a v a  2  s. com*/

    // JobConf jobConf = new JobConf();

    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySortDESC.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    // group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);
    job.setGroupingComparatorClass(FirstGroupingComparator.class);
    // conf.setClass("mapred.output.key.comparator.class",
    // KeyComparator.class, RawComparator.class);
    // job.setSortComparatorClass(SecondGroupingComparator.class);
    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    job.setMapOutputValueClass(IntWritable.class);

    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(inPath));
    FileOutputFormat.setOutputPath(job, new Path(outPath));
    FileSystem fileSystem = FileSystem.get(conf);
    if (fileSystem.exists(new Path(outPath))) {
        fileSystem.delete(new Path(outPath));
    }
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.hn.cluster.hadoop.mrs.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    // ?hadoop?/*  w w  w. j  a v  a 2  s .  c  o  m*/
    Configuration conf = new Configuration();
    // ?
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    // Mapper
    job.setMapperClass(MapClass.class);
    // Reducer
    job.setReducerClass(Reduce.class);

    // 
    job.setPartitionerClass(FirstPartitioner.class);
    // 
    job.setGroupingComparatorClass(FirstGroupingComparator.class);

    // map Key
    job.setMapOutputKeyClass(IntPair.class);
    // mapValue
    job.setMapOutputValueClass(IntWritable.class);

    // rduceKeyTextOutputFormatClassTextOutputFormat
    job.setOutputKeyClass(Text.class);
    // rduceValue
    job.setOutputValueClass(IntWritable.class);

    /**
     * ?????splites???RecordReder
     * ??RecordReder?keyvalue
     * Map<LongWritable, Text>
     * Mapmap<LongWritable, Text>Mapmap
     * ?List<IntPair, IntWritable>
     * map?job.setPartitionerClassList?reducer
     */
    job.setInputFormatClass(TextInputFormat.class);
    // ??RecordWriter?
    job.setOutputFormatClass(TextOutputFormat.class);

    // hdfs
    FileInputFormat.addInputPath(job, new Path("hdfs://192.1168.1.12:9000/input/input/soso.txt"));
    // hdfs
    FileOutputFormat.setOutputPath(job, new Path("hdfs://192.1168.1.12:9000/output/sort/"));
    // ??job
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.howbuy.hadoop.mr.online.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysrot <in> <out>");
        System.exit(2);//from w  w w.  j a  v  a 2s.c  o  m
    }
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    // group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);
    job.setGroupingComparatorClass(FirstGroupingComparator.class);

    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    job.setMapOutputValueClass(IntWritable.class);

    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setInputFormatClass(TextInputFormat.class);
    // job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setNumReduceTasks(3);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.juniarto.secondsorter.SsJob.java

public int run(String[] allArgs) throws Exception {
    Configuration conf = getConf();
    Job job = new Job(conf, "secondary sort");

    job.setJarByClass(SsJob.class);
    job.setPartitionerClass(NaturalKeyPartitioner.class);
    job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class);
    job.setSortComparatorClass(CompositeKeyComparator.class);

    job.setMapOutputKeyClass(TextDsi.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapperClass(SsMapper.class);
    job.setReducerClass(SsReducer.class);
    job.setNumReduceTasks(2);/*from   www  .ja v  a  2s . c o m*/

    String[] args = new GenericOptionsParser(getConf(), allArgs).getRemainingArgs();
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    //job.submit();

    long time1 = System.nanoTime();
    boolean status = job.waitForCompletion(true);
    long time2 = System.nanoTime();
    long timeSpent = time2 - time1;
    LOG.info("TIME: " + timeSpent);
    return 0;

}

From source file:com.metamx.druid.indexer.SortableBytes.java

License:Open Source License

public static void useSortableBytesAsMapOutputKey(Job job) {
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setGroupingComparatorClass(SortableBytesGroupingComparator.class);
    job.setSortComparatorClass(SortableBytesSortingComparator.class);
    job.setPartitionerClass(SortableBytesPartitioner.class);
}

From source file:com.neu.cs6240.TopKExperts.JoinQA.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: JoinQA <in> <out>");
        System.exit(2);//from  w  w w  . ja  v a  2  s  .com
    }
    Job job = new Job(conf, "JoinQA");
    job.setJarByClass(JoinQA.class);
    job.setMapperClass(JoinQAMapper.class);
    job.setReducerClass(JoinQAReducer.class);
    job.setOutputKeyClass(JoinQAKey.class);
    job.setOutputValueClass(JoinQAValue.class);
    job.setPartitionerClass(JoinQAPartitioner.class);
    job.setGroupingComparatorClass(JoinQAGroupComparator.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    boolean isSucess = false;

    isSucess = job.waitForCompletion(true);

    if (isSucess) {
        // On successful completion of JoinQA start UserAnswerCountPerHashTag
        System.out.println("MR - JoinQA complete. Starting UserAnswerCountPerHashTag...");
        String[] argsForMR2 = new String[2];
        argsForMR2[0] = otherArgs[1];
        argsForMR2[1] = otherArgs[1] + "MR2";
        isSucess = UserAnswerCountPerHashTag.initUserAnswerCountPerHashTag(argsForMR2);

        if (isSucess) {
            // On successful completion of UserAnswerCountPerHashTag start TopKPerHashTag
            System.out.println("MR - UserAnswerCountPerHashTag complete. Starting TopKPerHashTag...");
            String[] argsForMR3 = new String[2];
            argsForMR3[0] = argsForMR2[1];
            argsForMR3[1] = argsForMR2[1] + "MR3";
            isSucess = TopKPerHashTag.initTopKPerHashTag(argsForMR3);
            if (isSucess) {
                // Successfully complete TopKPerHashTag MR
                System.out.println("All MR - Successful.");
            } else {
                // Failed UserAnswerCountPerHashTag MR
                System.out.println("MR - TopKPerHashTag failed.");
            }
        } else {
            // On unsuccessful completion of JoinQA end MR
            System.out.println("MR - UserAnswerCountPerHashTag failed.");
        }

    } else {
        // On unsuccessful completion of JoinQA end MR
        System.out.println("MR - JoinQA failed.");
    }

    System.exit(isSucess ? 0 : 1);
}

From source file:com.neu.cs6240.TopKExperts.TopKPerHashTag.java

License:Apache License

public static boolean initTopKPerHashTag(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: TopKPerHashTag <in> <out>");
        System.exit(2);//from  w ww . java 2  s  . co  m
    }
    Job job = new Job(conf, "TopKPerHashTag");
    job.setJarByClass(TopKPerHashTag.class);
    job.setMapperClass(TopKPerHashTagMapper.class);
    job.setReducerClass(TopKPerHashTagReducer.class);
    job.setOutputKeyClass(TopKPerHashTagKey.class);
    job.setOutputValueClass(Text.class);
    job.setPartitionerClass(TopKPerHashTagPartitioner.class);
    job.setGroupingComparatorClass(TopKPerHashTagGroupComparator.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    return job.waitForCompletion(true);
}

From source file:com.neu.cs6240.TopKExperts.UserAnswerCountPerHashTag.java

License:Apache License

public static boolean initUserAnswerCountPerHashTag(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: UserAnswerCountPerHashTag <in> <out>");
        System.exit(2);//w ww . ja v  a  2s .  c o m
    }
    Job job = new Job(conf, "UserAnswerCountPerHashTag");
    job.setJarByClass(UserAnswerCountPerHashTag.class);
    job.setMapperClass(UserAnswerCountPerHashTagMapper.class);
    job.setReducerClass(UserAnswerCountPerHashTagReducer.class);
    job.setOutputKeyClass(UserAnswerCountPerHashTagKey.class);
    job.setOutputValueClass(IntWritable.class);
    job.setPartitionerClass(UserAnswerCountPerHashTagPartitioner.class);
    job.setGroupingComparatorClass(UserAnswerCountPerHashTagGroupComparator.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    return job.waitForCompletion(true);
}