List of usage examples for org.apache.hadoop.mapreduce Job setSortComparatorClass
public void setSortComparatorClass(Class<? extends RawComparator> cls) throws IllegalStateException
From source file:com.cloudera.crunch.GroupingOptions.java
License:Open Source License
public void configure(Job job) { if (partitionerClass != null) { job.setPartitionerClass(partitionerClass); }// w w w . java 2 s . c om if (groupingComparatorClass != null) { job.setGroupingComparatorClass(groupingComparatorClass); } if (sortComparatorClass != null) { job.setSortComparatorClass(sortComparatorClass); } if (numReducers > 0) { job.setNumReduceTasks(numReducers); LOG.info(String.format("Using %d reduce tasks", numReducers)); } }
From source file:com.cloudera.crunch.type.avro.AvroGroupedTableType.java
License:Open Source License
@Override public void configureShuffle(Job job, GroupingOptions options) { AvroTableType<K, V> att = (AvroTableType<K, V>) tableType; String schemaJson = att.getSchema().toString(); job.getConfiguration().set(AvroJob.MAP_OUTPUT_SCHEMA, schemaJson); job.setSortComparatorClass(AvroKeyComparator.class); job.setMapOutputKeyClass(AvroKey.class); job.setMapOutputValueClass(AvroValue.class); if (options != null) { options.configure(job);/*w w w . j a v a2s . co m*/ } Collection<String> serializations = job.getConfiguration().getStringCollection("io.serializations"); if (!serializations.contains(AvroSerialization.class.getName())) { serializations.add(AvroSerialization.class.getName()); job.getConfiguration().setStrings("io.serializations", serializations.toArray(new String[0])); } }
From source file:com.conversantmedia.mapreduce.io.CompositeSortKeySerialization.java
License:Apache License
/** * Convenience method to configure the job for using the composite key. * @param job the job using this serializer * @param groupKeyClass the key type used for grouping * @param sortKeyClass the key type used for sorting *//*from w ww. j a va 2s .c o m*/ @SuppressWarnings("rawtypes") public static void configureMapOutputKey(Job job, Class<? extends WritableComparable> groupKeyClass, Class<? extends WritableComparable> sortKeyClass) { // First, setup our classes... job.getConfiguration().set(CONF_KEY_GROUPKEY_CLASS, groupKeyClass.getName()); job.getConfiguration().set(CONF_KEY_SORTKEY_CLASS, sortKeyClass.getName()); // Set this class as our map output key job.setMapOutputKeyClass(CompositeSortKey.class); // Setup the partitioner and comparators. job.setPartitionerClass(CompositeSortKey.KeyPartitioner.class); job.setGroupingComparatorClass(CompositeSortKey.GroupingComparator.class); job.setSortComparatorClass(CompositeSortKey.NaturalSortComparator.class); // Now setup the serialization by registering with the framework. Collection<String> serializations = new ArrayList<>(); serializations.add(CompositeSortKeySerialization.class.getName()); serializations.addAll(job.getConfiguration().getStringCollection("io.serializations")); job.getConfiguration().setStrings("io.serializations", serializations.toArray(new String[serializations.size()])); }
From source file:com.conversantmedia.mapreduce.tool.annotation.handler.SorterAnnotationHandler.java
License:Apache License
@Override public void process(Annotation annotation, Job job, Object target) { Sorter sorter = (Sorter) annotation; if (sorter != null && sorter.value() != null && sorter.value() != NULLCOMPARATOR.class) { job.setSortComparatorClass(sorter.value()); }//from w ww .j a v a2 s .co m }
From source file:com.daleway.training.hadoop.condprob.ConditionalProbabilityPairsSecondarySort.java
License:Apache License
public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException { Job job = new Job(conf, "pair wise count"); job.setJarByClass(ConditionalProbabilityPairsSecondarySort.class); job.setMapperClass(TokenizerMapper.class); job.setSortComparatorClass(KeyComparator.class); job.setGroupingComparatorClass(GroupComparator.class); //job.setCombinerClass(IntSumReducer.class); job.setPartitionerClass(ProbDistPartitioner.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); //Is the output value class for Map or Reduce ? job.setOutputValueClass(Text.class); //job.setNumReduceTasks(5); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job;//from w ww . j ava 2s. c o m }
From source file:com.daleway.training.hadoop.pagerank.PageRankSecondarySort.java
License:Apache License
public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException { Job job = new Job(conf, "pair wise count"); job.setJarByClass(PageRankSecondarySort.class); job.setMapperClass(TokenizerMapper.class); job.setReducerClass(IntSumReducer.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setSortComparatorClass(LongWritable.DecreasingComparator.class); job.setMaxReduceAttempts(1);/*from w w w. j ava2 s. c o m*/ job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job; }
From source file:com.datasalt.pangool.tuplemr.TupleMRBuilder.java
License:Apache License
public Job createJob() throws IOException, TupleMRException { failIfNull(tupleReducer, "Need to set a group handler"); failIfEmpty(multipleInputs.getMultiInputs(), "Need to add at least one input"); failIfNull(outputFormat, "Need to set output format"); failIfNull(outputKeyClass, "Need to set outputKeyClass"); failIfNull(outputValueClass, "Need to set outputValueClass"); failIfNull(outputPath, "Need to set outputPath"); // perform a deep copy of the Configuration this.conf = new Configuration(this.conf); TupleMRConfig tupleMRConf = buildConf(); // Serialize PangoolConf in Hadoop Configuration instanceFilesCreated.addAll(TupleMRConfig.set(tupleMRConf, conf)); Job job = (jobName == null) ? new Job(conf) : new Job(conf, jobName); if (tupleMRConf.getRollupFrom() != null) { job.setReducerClass(RollupReducer.class); } else {/* w ww .jav a 2 s. c om*/ job.setReducerClass(SimpleReducer.class); } if (tupleCombiner != null) { job.setCombinerClass(SimpleCombiner.class); // not rollup by now // Set Combiner Handler String uniqueName = UUID.randomUUID().toString() + '.' + "combiner-handler.dat"; try { InstancesDistributor.distribute(tupleCombiner, uniqueName, job.getConfiguration()); instanceFilesCreated.add(uniqueName); job.getConfiguration().set(SimpleCombiner.CONF_COMBINER_HANDLER, uniqueName); } catch (URISyntaxException e1) { throw new TupleMRException(e1); } } // Set Tuple Reducer try { String uniqueName = UUID.randomUUID().toString() + '.' + "group-handler.dat"; InstancesDistributor.distribute(tupleReducer, uniqueName, job.getConfiguration()); instanceFilesCreated.add(uniqueName); job.getConfiguration().set(SimpleReducer.CONF_REDUCER_HANDLER, uniqueName); } catch (URISyntaxException e1) { throw new TupleMRException(e1); } // Enabling serialization TupleSerialization.enableSerialization(job.getConfiguration()); job.setJarByClass((jarByClass != null) ? jarByClass : tupleReducer.getClass()); job.setMapOutputKeyClass(DatumWrapper.class); job.setMapOutputValueClass(NullWritable.class); job.setPartitionerClass(TupleHashPartitioner.class); job.setGroupingComparatorClass(GroupComparator.class); job.setSortComparatorClass(SortComparator.class); job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); FileOutputFormat.setOutputPath(job, outputPath); instanceFilesCreated.addAll(multipleInputs.configureJob(job)); instanceFilesCreated.addAll(namedOutputs.configureJob(job)); // Configure a {@link ProxyOutputFormat} for Pangool's Multiple Outputs to // work: {@link PangoolMultipleOutput} String uniqueName = UUID.randomUUID().toString() + '.' + "out-format.dat"; try { InstancesDistributor.distribute(outputFormat, uniqueName, conf); instanceFilesCreated.add(uniqueName); } catch (URISyntaxException e1) { throw new TupleMRException(e1); } job.getConfiguration().set(ProxyOutputFormat.PROXIED_OUTPUT_FORMAT_CONF, uniqueName); job.setOutputFormatClass(ProxyOutputFormat.class); return job; }
From source file:com.fanlehai.hadoop.serialize.avro.MapReduceAvroWordCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { printUsage();/*from w w w .j av a 2s. c om*/ } FileSystem.get(new Configuration()).delete(new Path(args[1]), true); Job job = Job.getInstance(super.getConf(), "AvroWordCount"); job.setJarByClass(MapReduceAvroWordCount.class); job.setJobName("AvroWordCount"); // We call setOutputSchema first so we can override the configuration // parameters it sets AvroJob.setOutputKeySchema(job, Pair.getPairSchema(Schema.create(Type.STRING), Schema.create(Type.INT))); job.setOutputValueClass(NullWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setSortComparatorClass(Text.Comparator.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 1 : 0; }
From source file:com.j.distributed.sorter.SorterJob.java
@Override public int run(String... options) throws Exception { Job job = Job.getInstance(getConf(), getClass().toString()); job.setJarByClass(getClass());//from ww w .jav a2s . c o m job.setMapperClass(SorterMapper.class); job.setCombinerClass(SorterReducer.class); job.setReducerClass(SorterReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setSortComparatorClass(LongWritable.DecreasingComparator.class); FileInputFormat.addInputPath(job, new Path(options[1])); FileOutputFormat.setOutputPath(job, new Path(options[2])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.juniarto.secondsorter.SsJob.java
public int run(String[] allArgs) throws Exception { Configuration conf = getConf(); Job job = new Job(conf, "secondary sort"); job.setJarByClass(SsJob.class); job.setPartitionerClass(NaturalKeyPartitioner.class); job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class); job.setSortComparatorClass(CompositeKeyComparator.class); job.setMapOutputKeyClass(TextDsi.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(SsMapper.class); job.setReducerClass(SsReducer.class); job.setNumReduceTasks(2);/* w w w . ja v a 2 s. c o m*/ String[] args = new GenericOptionsParser(getConf(), allArgs).getRemainingArgs(); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); //job.submit(); long time1 = System.nanoTime(); boolean status = job.waitForCompletion(true); long time2 = System.nanoTime(); long timeSpent = time2 - time1; LOG.info("TIME: " + timeSpent); return 0; }