List of usage examples for org.apache.hadoop.mapreduce Job setGroupingComparatorClass
public void setGroupingComparatorClass(Class<? extends RawComparator> cls) throws IllegalStateException
From source file:nl.sanoma.hdt.report.generator.ReportGeneratorDriver.java
License:Open Source License
/** * Job to join the data and the metadata from distributed cache and * calculate the revenue by quarter and most popular product category for user * * @param dBPath the path of the import MapFile * @param inputPath the path of the logs directory * @param outputPath the path of the output directory * @return returns the exitCode of the job * @throws IOException/*from w ww . j a v a 2 s . c o m*/ * @throws URISyntaxException * @throws InterruptedException * @throws ClassNotFoundException */ public Boolean generateReport(String dBPath, String inputPath, String outputPath) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { Job job = new Job(getConf()); Configuration conf = job.getConfiguration(); job.setJobName("Repor Generator"); DistributedCache.addCacheFile(new URI(dBPath), conf); job.setJarByClass(ReportGeneratorDriver.class); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setPartitionerClass(KeyDataPartitioner.class); job.setGroupingComparatorClass(KeyDataGroupingComparator.class); job.setSortComparatorClass(KeyDataComparator.class); job.setMapperClass(ReportGeneratorMapper.class); job.setMapOutputKeyClass(KeyData.class); job.setMapOutputValueClass(ValueData.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setReducerClass(ReportGeneratorReducer.class); job.setNumReduceTasks(1); return job.waitForCompletion(true); }
From source file:org.apache.avro.mapreduce.AvroJob.java
License:Apache License
/** * Sets the map output key schema./* w ww .j av a 2 s . c o m*/ * * @param job The job to configure. * @param schema The map output key schema. */ public static void setMapOutputKeySchema(Job job, Schema schema) { job.setMapOutputKeyClass(AvroKey.class); job.setGroupingComparatorClass(AvroKeyComparator.class); job.setSortComparatorClass(AvroKeyComparator.class); AvroSerialization.setKeyWriterSchema(job.getConfiguration(), schema); AvroSerialization.setKeyReaderSchema(job.getConfiguration(), schema); AvroSerialization.addToConfiguration(job.getConfiguration()); }
From source file:org.apache.blur.mapreduce.lib.update.Driver.java
License:Apache License
@Override public int run(String[] args) throws Exception { int c = 0;/*from ww w . j a v a 2s.c om*/ if (args.length < 5) { System.err.println( "Usage Driver <table> <mr inc working path> <output path> <zk connection> <reducer multipler> <extra config files...>"); } String table = args[c++]; String mrIncWorkingPathStr = args[c++]; String outputPathStr = args[c++]; String blurZkConnection = args[c++]; int reducerMultipler = Integer.parseInt(args[c++]); for (; c < args.length; c++) { String externalConfigFileToAdd = args[c]; getConf().addResource(new Path(externalConfigFileToAdd)); } Path outputPath = new Path(outputPathStr); Path mrIncWorkingPath = new Path(mrIncWorkingPathStr); FileSystem fileSystem = mrIncWorkingPath.getFileSystem(getConf()); Path newData = new Path(mrIncWorkingPath, NEW); Path inprogressData = new Path(mrIncWorkingPath, INPROGRESS); Path completeData = new Path(mrIncWorkingPath, COMPLETE); Path fileCache = new Path(mrIncWorkingPath, CACHE); fileSystem.mkdirs(newData); fileSystem.mkdirs(inprogressData); fileSystem.mkdirs(completeData); fileSystem.mkdirs(fileCache); List<Path> srcPathList = new ArrayList<Path>(); for (FileStatus fileStatus : fileSystem.listStatus(newData)) { srcPathList.add(fileStatus.getPath()); } if (srcPathList.isEmpty()) { return 0; } List<Path> inprogressPathList = new ArrayList<Path>(); boolean success = false; Iface client = null; try { inprogressPathList = movePathList(fileSystem, inprogressData, srcPathList); Job job = Job.getInstance(getConf(), "Blur Row Updater for table [" + table + "]"); client = BlurClient.getClientFromZooKeeperConnectionStr(blurZkConnection); waitForOtherSnapshotsToBeRemoved(client, table, MRUPDATE_SNAPSHOT); client.createSnapshot(table, MRUPDATE_SNAPSHOT); TableDescriptor descriptor = client.describe(table); Path tablePath = new Path(descriptor.getTableUri()); BlurInputFormat.setLocalCachePath(job, fileCache); BlurInputFormat.addTable(job, descriptor, MRUPDATE_SNAPSHOT); MultipleInputs.addInputPath(job, tablePath, BlurInputFormat.class, MapperForExistingData.class); for (Path p : inprogressPathList) { FileInputFormat.addInputPath(job, p); MultipleInputs.addInputPath(job, p, SequenceFileInputFormat.class, MapperForNewData.class); } BlurOutputFormat.setOutputPath(job, outputPath); BlurOutputFormat.setupJob(job, descriptor); job.setReducerClass(UpdateReducer.class); job.setMapOutputKeyClass(IndexKey.class); job.setMapOutputValueClass(IndexValue.class); job.setPartitionerClass(IndexKeyPartitioner.class); job.setGroupingComparatorClass(IndexKeyWritableComparator.class); BlurOutputFormat.setReducerMultiplier(job, reducerMultipler); success = job.waitForCompletion(true); Counters counters = job.getCounters(); LOG.info("Counters [" + counters + "]"); } finally { if (success) { LOG.info("Indexing job succeeded!"); movePathList(fileSystem, completeData, inprogressPathList); } else { LOG.error("Indexing job failed!"); movePathList(fileSystem, newData, inprogressPathList); } if (client != null) { client.removeSnapshot(table, MRUPDATE_SNAPSHOT); } } if (success) { return 0; } else { return 1; } }
From source file:org.apache.crunch.GroupingOptions.java
License:Apache License
public void configure(Job job) { if (partitionerClass != null) { job.setPartitionerClass(partitionerClass); }//from w w w . ja va 2 s. com if (groupingComparatorClass != null) { job.setGroupingComparatorClass(groupingComparatorClass); } if (sortComparatorClass != null) { job.setSortComparatorClass(sortComparatorClass); } if (numReducers > 0) { job.setNumReduceTasks(numReducers); } for (Map.Entry<String, String> e : extraConf.entrySet()) { job.getConfiguration().set(e.getKey(), e.getValue()); } }
From source file:org.apache.druid.indexer.SortableBytes.java
License:Apache License
public static void useSortableBytesAsMapOutputKey(Job job, Class<? extends Partitioner> partitionerClass) { job.setMapOutputKeyClass(BytesWritable.class); job.setGroupingComparatorClass(SortableBytesGroupingComparator.class); job.setSortComparatorClass(SortableBytesSortingComparator.class); job.setPartitionerClass(partitionerClass); }
From source file:org.apache.gobblin.compaction.mapreduce.CompactionOrcJobConfigurator.java
License:Apache License
protected void configureMapper(Job job) { job.setInputFormatClass(OrcValueCombineFileInputFormat.class); job.setMapperClass(OrcValueMapper.class); job.setMapOutputKeyClass(OrcKey.class); job.setMapOutputValueClass(OrcValue.class); job.setGroupingComparatorClass(OrcKeyComparator.class); job.setSortComparatorClass(OrcKeyComparator.class); }
From source file:org.apache.hadoop.examples.SecondarySort.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: secondarysort <in> <out>"); System.exit(2);/*w ww. j ava2s . c o m*/ } Job job = Job.getInstance(conf, "secondary sort"); job.setJarByClass(SecondarySort.class); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); // group and partition by the first int in the pair job.setPartitionerClass(FirstPartitioner.class); job.setGroupingComparatorClass(FirstGroupingComparator.class); // the map output is IntPair, IntWritable job.setMapOutputKeyClass(IntPair.class); job.setMapOutputValueClass(IntWritable.class); // the reduce output is Text, IntWritable job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:org.apache.ignite.internal.processors.hadoop.GridHadoopGroupingTest.java
License:Apache License
/** * @param combiner With combiner.//from ww w . j a va2 s . c om * @throws Exception If failed. */ public void doTestGrouping(boolean combiner) throws Exception { vals.clear(); Job job = Job.getInstance(); job.setInputFormatClass(InFormat.class); job.setOutputFormatClass(OutFormat.class); job.setOutputKeyClass(YearTemperature.class); job.setOutputValueClass(Text.class); job.setMapperClass(Mapper.class); if (combiner) { job.setCombinerClass(MyReducer.class); job.setNumReduceTasks(0); job.setCombinerKeyGroupingComparatorClass(YearComparator.class); } else { job.setReducerClass(MyReducer.class); job.setNumReduceTasks(4); job.setGroupingComparatorClass(YearComparator.class); } grid(0).hadoop().submit(new GridHadoopJobId(UUID.randomUUID(), 2), createJobInfo(job.getConfiguration())) .get(30000); assertTrue(vals.isEmpty()); }
From source file:org.apache.ignite.internal.processors.hadoop.HadoopGroupingTest.java
License:Apache License
/** * @param combiner With combiner./*from w w w .j ava 2 s . c om*/ * @throws Exception If failed. */ public void doTestGrouping(boolean combiner) throws Exception { vals.clear(); Job job = Job.getInstance(); job.setInputFormatClass(InFormat.class); job.setOutputFormatClass(OutFormat.class); job.setOutputKeyClass(YearTemperature.class); job.setOutputValueClass(Text.class); job.setMapperClass(Mapper.class); if (combiner) { job.setCombinerClass(MyReducer.class); job.setNumReduceTasks(0); job.setCombinerKeyGroupingComparatorClass(YearComparator.class); } else { job.setReducerClass(MyReducer.class); job.setNumReduceTasks(4); job.setGroupingComparatorClass(YearComparator.class); } grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 2), createJobInfo(job.getConfiguration())) .get(30000); assertTrue(vals.isEmpty()); }
From source file:org.apache.ignite.internal.processors.hadoop.impl.HadoopGroupingTest.java
License:Apache License
/** * @param combiner With combiner./*from w w w. j a va 2s . c o m*/ * @throws Exception If failed. */ public void doTestGrouping(boolean combiner) throws Exception { HadoopGroupingTestState.values().clear(); Job job = Job.getInstance(); job.setInputFormatClass(InFormat.class); job.setOutputFormatClass(OutFormat.class); job.setOutputKeyClass(YearTemperature.class); job.setOutputValueClass(Text.class); job.setMapperClass(Mapper.class); if (combiner) { job.setCombinerClass(MyReducer.class); job.setNumReduceTasks(0); job.setCombinerKeyGroupingComparatorClass(YearComparator.class); } else { job.setReducerClass(MyReducer.class); job.setNumReduceTasks(4); job.setGroupingComparatorClass(YearComparator.class); } grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 2), createJobInfo(job.getConfiguration())) .get(30000); assertTrue(HadoopGroupingTestState.values().isEmpty()); }