List of usage examples for org.apache.hadoop.mapreduce Job setGroupingComparatorClass
public void setGroupingComparatorClass(Class<? extends RawComparator> cls) throws IllegalStateException
From source file:com.elixir.hadoop.Chromo.FragmentCoverage.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);//from ww w . j a va 2 s. c om } Job job = Job.getInstance(conf, "position"); job.setJarByClass(FragmentCoverage.class); job.setMapperClass(CoverageMapper.class); job.setCombinerClass(IntSumReducer.class); job.setNumReduceTasks(5); job.setMapOutputKeyClass(com.elixir.hadoop.Chromo.SecondrySort.IntPair.class); //job.setSpeculativeExecution(true); job.setPartitionerClass(ChromoPartitioner.class); job.setGroupingComparatorClass(com.elixir.hadoop.Chromo.SecondrySort.FirstGroupingComparator.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // job.setOutputFormatClass(Text.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.hadoop.examples.secondSort.SecondarySort.java
License:Apache License
public static void main(String[] args) throws Exception { // ?hadoop?/*from w w w .ja v a 2s.c om*/ Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: secondarysort <in> <out>"); System.exit(2); } // ? Job job = new Job(conf, "secondary sort"); job.setJarByClass(SecondarySort.class); // Mapper job.setMapperClass(MapClass.class); // ???CombinerCombiner<Text, IntWritable>Reduce<IntPair, IntWritable>? //job.setCombinerClass(Reduce.class); // Reducer job.setReducerClass(Reduce.class); // * // *group and partition by the first int in the pair job.setPartitionerClass(FirstPartitioner.class); //setSortComparatorClass()hadoopkey?(?2.Hadoopkey?) //IntPair?compareTo() //job.setSortComparatorClass(cls); // * job.setGroupingComparatorClass(FirstGroupingComparator.class); // map Key // the map output is IntPair, IntWritable job.setMapOutputKeyClass(IntPair.class); // mapValue job.setMapOutputValueClass(IntWritable.class); // rduceKeyTextOutputFormatClassTextOutputFormat // the reduce output is Text, IntWritable job.setOutputKeyClass(Text.class); // rduceValue job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); // ??job System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.hadoop.secondarysort.SecondarySortDESC.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); // if (otherArgs.length != 2) { // System.err.println("Usage: secondarysrot <in> <out>"); // System.exit(2); // }/*from w w w. j a v a 2 s. com*/ // JobConf jobConf = new JobConf(); Job job = new Job(conf, "secondary sort"); job.setJarByClass(SecondarySortDESC.class); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); // group and partition by the first int in the pair job.setPartitionerClass(FirstPartitioner.class); job.setGroupingComparatorClass(FirstGroupingComparator.class); // conf.setClass("mapred.output.key.comparator.class", // KeyComparator.class, RawComparator.class); // job.setSortComparatorClass(SecondGroupingComparator.class); // the map output is IntPair, IntWritable job.setMapOutputKeyClass(IntPair.class); job.setMapOutputValueClass(IntWritable.class); // the reduce output is Text, IntWritable job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(inPath)); FileOutputFormat.setOutputPath(job, new Path(outPath)); FileSystem fileSystem = FileSystem.get(conf); if (fileSystem.exists(new Path(outPath))) { fileSystem.delete(new Path(outPath)); } System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.hn.cluster.hadoop.mrs.SecondarySort.java
License:Apache License
public static void main(String[] args) throws Exception { // ?hadoop?/* w w w. j a v a 2 s . c o m*/ Configuration conf = new Configuration(); // ? Job job = new Job(conf, "secondary sort"); job.setJarByClass(SecondarySort.class); // Mapper job.setMapperClass(MapClass.class); // Reducer job.setReducerClass(Reduce.class); // job.setPartitionerClass(FirstPartitioner.class); // job.setGroupingComparatorClass(FirstGroupingComparator.class); // map Key job.setMapOutputKeyClass(IntPair.class); // mapValue job.setMapOutputValueClass(IntWritable.class); // rduceKeyTextOutputFormatClassTextOutputFormat job.setOutputKeyClass(Text.class); // rduceValue job.setOutputValueClass(IntWritable.class); /** * ?????splites???RecordReder * ??RecordReder?keyvalue * Map<LongWritable, Text> * Mapmap<LongWritable, Text>Mapmap * ?List<IntPair, IntWritable> * map?job.setPartitionerClassList?reducer */ job.setInputFormatClass(TextInputFormat.class); // ??RecordWriter? job.setOutputFormatClass(TextOutputFormat.class); // hdfs FileInputFormat.addInputPath(job, new Path("hdfs://192.1168.1.12:9000/input/input/soso.txt")); // hdfs FileOutputFormat.setOutputPath(job, new Path("hdfs://192.1168.1.12:9000/output/sort/")); // ??job System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.howbuy.hadoop.mr.online.SecondarySort.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: secondarysrot <in> <out>"); System.exit(2);//from w w w. j a v a 2s.c o m } Job job = new Job(conf, "secondary sort"); job.setJarByClass(SecondarySort.class); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); // group and partition by the first int in the pair job.setPartitionerClass(FirstPartitioner.class); job.setGroupingComparatorClass(FirstGroupingComparator.class); // the map output is IntPair, IntWritable job.setMapOutputKeyClass(IntPair.class); job.setMapOutputValueClass(IntWritable.class); // the reduce output is Text, IntWritable job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(TextInputFormat.class); // job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(3); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.juniarto.secondsorter.SsJob.java
public int run(String[] allArgs) throws Exception { Configuration conf = getConf(); Job job = new Job(conf, "secondary sort"); job.setJarByClass(SsJob.class); job.setPartitionerClass(NaturalKeyPartitioner.class); job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class); job.setSortComparatorClass(CompositeKeyComparator.class); job.setMapOutputKeyClass(TextDsi.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(SsMapper.class); job.setReducerClass(SsReducer.class); job.setNumReduceTasks(2);/*from www .ja v a 2s . c o m*/ String[] args = new GenericOptionsParser(getConf(), allArgs).getRemainingArgs(); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); //job.submit(); long time1 = System.nanoTime(); boolean status = job.waitForCompletion(true); long time2 = System.nanoTime(); long timeSpent = time2 - time1; LOG.info("TIME: " + timeSpent); return 0; }
From source file:com.metamx.druid.indexer.SortableBytes.java
License:Open Source License
public static void useSortableBytesAsMapOutputKey(Job job) { job.setMapOutputKeyClass(BytesWritable.class); job.setGroupingComparatorClass(SortableBytesGroupingComparator.class); job.setSortComparatorClass(SortableBytesSortingComparator.class); job.setPartitionerClass(SortableBytesPartitioner.class); }
From source file:com.neu.cs6240.TopKExperts.JoinQA.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: JoinQA <in> <out>"); System.exit(2);//from w w w . ja v a 2 s .com } Job job = new Job(conf, "JoinQA"); job.setJarByClass(JoinQA.class); job.setMapperClass(JoinQAMapper.class); job.setReducerClass(JoinQAReducer.class); job.setOutputKeyClass(JoinQAKey.class); job.setOutputValueClass(JoinQAValue.class); job.setPartitionerClass(JoinQAPartitioner.class); job.setGroupingComparatorClass(JoinQAGroupComparator.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); boolean isSucess = false; isSucess = job.waitForCompletion(true); if (isSucess) { // On successful completion of JoinQA start UserAnswerCountPerHashTag System.out.println("MR - JoinQA complete. Starting UserAnswerCountPerHashTag..."); String[] argsForMR2 = new String[2]; argsForMR2[0] = otherArgs[1]; argsForMR2[1] = otherArgs[1] + "MR2"; isSucess = UserAnswerCountPerHashTag.initUserAnswerCountPerHashTag(argsForMR2); if (isSucess) { // On successful completion of UserAnswerCountPerHashTag start TopKPerHashTag System.out.println("MR - UserAnswerCountPerHashTag complete. Starting TopKPerHashTag..."); String[] argsForMR3 = new String[2]; argsForMR3[0] = argsForMR2[1]; argsForMR3[1] = argsForMR2[1] + "MR3"; isSucess = TopKPerHashTag.initTopKPerHashTag(argsForMR3); if (isSucess) { // Successfully complete TopKPerHashTag MR System.out.println("All MR - Successful."); } else { // Failed UserAnswerCountPerHashTag MR System.out.println("MR - TopKPerHashTag failed."); } } else { // On unsuccessful completion of JoinQA end MR System.out.println("MR - UserAnswerCountPerHashTag failed."); } } else { // On unsuccessful completion of JoinQA end MR System.out.println("MR - JoinQA failed."); } System.exit(isSucess ? 0 : 1); }
From source file:com.neu.cs6240.TopKExperts.TopKPerHashTag.java
License:Apache License
public static boolean initTopKPerHashTag(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: TopKPerHashTag <in> <out>"); System.exit(2);//from w ww . java 2 s . co m } Job job = new Job(conf, "TopKPerHashTag"); job.setJarByClass(TopKPerHashTag.class); job.setMapperClass(TopKPerHashTagMapper.class); job.setReducerClass(TopKPerHashTagReducer.class); job.setOutputKeyClass(TopKPerHashTagKey.class); job.setOutputValueClass(Text.class); job.setPartitionerClass(TopKPerHashTagPartitioner.class); job.setGroupingComparatorClass(TopKPerHashTagGroupComparator.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); return job.waitForCompletion(true); }
From source file:com.neu.cs6240.TopKExperts.UserAnswerCountPerHashTag.java
License:Apache License
public static boolean initUserAnswerCountPerHashTag(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: UserAnswerCountPerHashTag <in> <out>"); System.exit(2);//w ww . ja v a 2s . c o m } Job job = new Job(conf, "UserAnswerCountPerHashTag"); job.setJarByClass(UserAnswerCountPerHashTag.class); job.setMapperClass(UserAnswerCountPerHashTagMapper.class); job.setReducerClass(UserAnswerCountPerHashTagReducer.class); job.setOutputKeyClass(UserAnswerCountPerHashTagKey.class); job.setOutputValueClass(IntWritable.class); job.setPartitionerClass(UserAnswerCountPerHashTagPartitioner.class); job.setGroupingComparatorClass(UserAnswerCountPerHashTagGroupComparator.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); return job.waitForCompletion(true); }