List of usage examples for org.apache.hadoop.mapreduce Job setSortComparatorClass
public void setSortComparatorClass(Class<? extends RawComparator> cls) throws IllegalStateException
From source file:nl.sanoma.hdt.report.generator.ReportGeneratorDriver.java
License:Open Source License
/** * Job to join the data and the metadata from distributed cache and * calculate the revenue by quarter and most popular product category for user * * @param dBPath the path of the import MapFile * @param inputPath the path of the logs directory * @param outputPath the path of the output directory * @return returns the exitCode of the job * @throws IOException// w w w. j a v a 2s . c o m * @throws URISyntaxException * @throws InterruptedException * @throws ClassNotFoundException */ public Boolean generateReport(String dBPath, String inputPath, String outputPath) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { Job job = new Job(getConf()); Configuration conf = job.getConfiguration(); job.setJobName("Repor Generator"); DistributedCache.addCacheFile(new URI(dBPath), conf); job.setJarByClass(ReportGeneratorDriver.class); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setPartitionerClass(KeyDataPartitioner.class); job.setGroupingComparatorClass(KeyDataGroupingComparator.class); job.setSortComparatorClass(KeyDataComparator.class); job.setMapperClass(ReportGeneratorMapper.class); job.setMapOutputKeyClass(KeyData.class); job.setMapOutputValueClass(ValueData.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setReducerClass(ReportGeneratorReducer.class); job.setNumReduceTasks(1); return job.waitForCompletion(true); }
From source file:org.acacia.csr.java.CSRConverter.java
License:Apache License
public static void main(String[] args) throws Exception { if (!validArgs(args)) { printUsage();//from ww w . ja v a2 s . c o m return; } //These are the temp paths that are created on HDFS String dir1 = "/user/miyuru/csrconverter-output"; String dir2 = "/user/miyuru/csrconverter-output-sorted"; //We first delete the temporary directories if they exist on the HDFS FileSystem fs1 = FileSystem.get(new JobConf()); System.out.println("Deleting the dir : " + dir1); if (fs1.exists(new Path(dir1))) { fs1.delete(new Path(dir1), true); } System.out.println("Done deleting the dir : " + dir1); System.out.println("Deleting the dir : " + dir2); if (fs1.exists(new Path(dir2))) { fs1.delete(new Path(dir2), true); } Path notinPath = new Path("/user/miyuru/notinverts/notinverts"); if (!fs1.exists(notinPath)) { fs1.create(notinPath); } System.out.println("Done deleting the dir : " + dir2); //Note on Aug 23 2014: Sometimes after this the mapReduce job hangs. need to see why. VertexCounterClient.setDefaultGraphID(args[3], args[2]); //First job creates the inverted index JobConf conf = new JobConf(CSRConverter.class); conf.set("org.acacia.partitioner.hbase.zookeeper.quorum", args[1]); conf.set("org.acacia.partitioner.hbase.table", args[2]); conf.set("org.acacia.partitioner.hbase.contacthost", args[3]); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); //conf.setMapperClass(InvertedMapper.class); conf.setReducerClass(InvertedReducer.class); //conf.setInputFormat(TextInputFormat.class); conf.setInputFormat(NLinesInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); //FileInputFormat.setInputPaths(conf, new Path(args[0])); MultipleInputs.addInputPath(conf, new Path(args[0]), NLinesInputFormat.class, InvertedMapper.class); MultipleInputs.addInputPath(conf, new Path("/user/miyuru/notinverts/notinverts"), TextInputFormat.class, InvertedMapper.class); FileOutputFormat.setOutputPath(conf, new Path(dir1)); //Also for the moment we turn-off the speculative execution conf.setBoolean("mapred.map.tasks.speculative.execution", false); conf.setBoolean("mapred.reduce.tasks.speculative.execution", false); conf.setNumMapTasks(96); conf.setNumReduceTasks(96); conf.setPartitionerClass(VertexPartitioner.class); conf.set("vertex-count", args[4]); conf.set("zero-flag", args[5]); Job job = new Job(conf, "csr_inverter"); job.setSortComparatorClass(SortComparator.class); job.waitForCompletion(true); }
From source file:org.acacia.csr.java.NotInFinder.java
License:Apache License
public static void main(String[] args) throws Exception { String dir1 = "/user/miyuru/wcout"; String dir2 = "/user/miyuru/notinverts"; //We first delete the temporary directories if they exist on the HDFS FileSystem fs1 = FileSystem.get(new JobConf()); if (fs1.exists(new Path(dir2))) { fs1.delete(new Path(dir2), true); }// www .java2s .com JobConf conf = new JobConf(); conf.setNumMapTasks(96); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(LongWritable.class); conf.setMapperClass(TokenizerMapper.class); conf.setReducerClass(IntSumReducer.class); conf.setCombinerClass(IntSumReducer.class); conf.setInputFormat(NLinesInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(dir1)); FileOutputFormat.setOutputPath(conf, new Path(dir2)); Job job = new Job(conf, "NotInFinder"); job.setJarByClass(WordCount.class); // job.setMapperClass(TokenizerMapper.class); // job.setCombinerClass(IntSumReducer.class); // job.setReducerClass(IntSumReducer.class); // job.setOutputKeyClass(LongWritable.class); // job.setOutputValueClass(LongWritable.class); job.setSortComparatorClass(SortComparator.class); job.waitForCompletion(true); }
From source file:org.acacia.csr.java.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { /*/*from w ww.j a va 2 s . c o m*/ String dir1 = "/user/miyuru/wcout"; //We first delete the temporary directories if they exist on the HDFS FileSystem fs1 = FileSystem.get(new JobConf()); if(fs1.exists(new Path(dir1))){ fs1.delete(new Path(dir1), true); } JobConf conf = new JobConf(); conf.setNumMapTasks(96); Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); job.setSortComparatorClass(SortComparator.class); FileInputFormat.addInputPath(job, new Path("/user/miyuru/input")); FileOutputFormat.setOutputPath(job, new Path(dir1)); job.waitForCompletion(true); */ String dir3 = "/user/miyuru/wcout"; String dir5 = "/user/miyuru/input"; //We first delete the temporary directories if they exist on the HDFS FileSystem fs3 = FileSystem.get(new JobConf()); if (fs3.exists(new Path(dir3))) { fs3.delete(new Path(dir3), true); } JobConf conf3 = new JobConf(); conf3.setNumMapTasks(96); FileInputFormat.addInputPath(conf3, new Path(dir5)); FileOutputFormat.setOutputPath(conf3, new Path(dir3)); Job job3 = new Job(conf3, "word count"); job3.setJarByClass(WordCount.class); job3.setMapperClass(TokenizerMapper.class); job3.setCombinerClass(IntSumReducer.class); job3.setReducerClass(IntSumReducer.class); job3.setOutputKeyClass(LongWritable.class); job3.setOutputValueClass(LongWritable.class); job3.setSortComparatorClass(SortComparator.class); job3.waitForCompletion(true); PrintWriter writer; try { writer = new PrintWriter("/tmp/wfile", "UTF-8"); writer.println(""); writer.flush(); writer.close(); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (UnsupportedEncodingException e) { // TODO Auto-generated catch block e.printStackTrace(); } System.out.println("------Done Word Count---------------"); }
From source file:org.acacia.csr.java.ZeroVertexSearcher.java
License:Apache License
public static void main(String[] args) throws Exception { /*//from www .j a v a 2s. c o m String dir1 = "/user/miyuru/wcout"; //We first delete the temporary directories if they exist on the HDFS FileSystem fs1 = FileSystem.get(new JobConf()); if(fs1.exists(new Path(dir1))){ fs1.delete(new Path(dir1), true); } JobConf conf = new JobConf(); conf.setNumMapTasks(96); Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); job.setSortComparatorClass(SortComparator.class); FileInputFormat.addInputPath(job, new Path("/user/miyuru/input")); FileOutputFormat.setOutputPath(job, new Path(dir1)); job.waitForCompletion(true); */ String dir3 = "/user/miyuru/zout"; String dir5 = "/user/miyuru/input"; //We first delete the temporary directories if they exist on the HDFS FileSystem fs3 = FileSystem.get(new JobConf()); if (fs3.exists(new Path(dir3))) { fs3.delete(new Path(dir3), true); } JobConf conf3 = new JobConf(); conf3.setNumMapTasks(96); FileInputFormat.addInputPath(conf3, new Path(dir5)); FileOutputFormat.setOutputPath(conf3, new Path(dir3)); conf3.set("mapred.map.max.attempts", "0");//If the job fails we assume that it happens because we found zero. Therfore we do not attempt again. Job job3 = new Job(conf3, "zero_vertex_search"); job3.setJarByClass(ZeroVertexSearcher.class); job3.setMapperClass(TokenizerMapper.class); job3.setCombinerClass(IntSumReducer.class); job3.setReducerClass(IntSumReducer.class); job3.setOutputKeyClass(LongWritable.class); job3.setOutputValueClass(LongWritable.class); job3.setNumReduceTasks(0); job3.setSortComparatorClass(SortComparator.class); try { job3.waitForCompletion(true); } catch (org.acacia.csr.java.ZeroFoundException ex) { System.out.println("Found Zero vertex"); job3.killJob(); } System.out.println("------Done Zero Vertex search---------------"); }
From source file:org.apache.avro.mapreduce.AvroJob.java
License:Apache License
/** * Sets the map output key schema./* ww w . j a v a 2 s .c om*/ * * @param job The job to configure. * @param schema The map output key schema. */ public static void setMapOutputKeySchema(Job job, Schema schema) { job.setMapOutputKeyClass(AvroKey.class); job.setGroupingComparatorClass(AvroKeyComparator.class); job.setSortComparatorClass(AvroKeyComparator.class); AvroSerialization.setKeyWriterSchema(job.getConfiguration(), schema); AvroSerialization.setKeyReaderSchema(job.getConfiguration(), schema); AvroSerialization.addToConfiguration(job.getConfiguration()); }
From source file:org.apache.crunch.GroupingOptions.java
License:Apache License
public void configure(Job job) { if (partitionerClass != null) { job.setPartitionerClass(partitionerClass); }//from w w w . ja va2s.c o m if (groupingComparatorClass != null) { job.setGroupingComparatorClass(groupingComparatorClass); } if (sortComparatorClass != null) { job.setSortComparatorClass(sortComparatorClass); } if (numReducers > 0) { job.setNumReduceTasks(numReducers); } for (Map.Entry<String, String> e : extraConf.entrySet()) { job.getConfiguration().set(e.getKey(), e.getValue()); } }
From source file:org.apache.crunch.types.avro.AvroGroupedTableType.java
License:Apache License
@Override public void configureShuffle(Job job, GroupingOptions options) { AvroTableType<K, V> att = (AvroTableType<K, V>) tableType; String schemaJson = att.getSchema().toString(); Configuration conf = job.getConfiguration(); if (att.hasReflect()) { if (att.hasSpecific()) { Avros.checkCombiningSpecificAndReflectionSchemas(); }//w w w .j a v a2 s. co m conf.setBoolean(AvroJob.MAP_OUTPUT_IS_REFLECT, true); } conf.set(AvroJob.MAP_OUTPUT_SCHEMA, schemaJson); job.setSortComparatorClass(AvroKeyComparator.class); job.setMapOutputKeyClass(AvroKey.class); job.setMapOutputValueClass(AvroValue.class); if (options != null) { options.configure(job); } Avros.configureReflectDataFactory(conf); Collection<String> serializations = job.getConfiguration().getStringCollection("io.serializations"); if (!serializations.contains(SafeAvroSerialization.class.getName())) { serializations.add(SafeAvroSerialization.class.getName()); job.getConfiguration().setStrings("io.serializations", serializations.toArray(new String[0])); } }
From source file:org.apache.druid.indexer.SortableBytes.java
License:Apache License
public static void useSortableBytesAsMapOutputKey(Job job, Class<? extends Partitioner> partitionerClass) { job.setMapOutputKeyClass(BytesWritable.class); job.setGroupingComparatorClass(SortableBytesGroupingComparator.class); job.setSortComparatorClass(SortableBytesSortingComparator.class); job.setPartitionerClass(partitionerClass); }
From source file:org.apache.gobblin.compaction.mapreduce.CompactionOrcJobConfigurator.java
License:Apache License
protected void configureMapper(Job job) { job.setInputFormatClass(OrcValueCombineFileInputFormat.class); job.setMapperClass(OrcValueMapper.class); job.setMapOutputKeyClass(OrcKey.class); job.setMapOutputValueClass(OrcValue.class); job.setGroupingComparatorClass(OrcKeyComparator.class); job.setSortComparatorClass(OrcKeyComparator.class); }