List of usage examples for org.apache.hadoop.mapreduce Job setPartitionerClass
public void setPartitionerClass(Class<? extends Partitioner> cls) throws IllegalStateException
From source file:com.bigdog.hadoop.mapreduce.partition.KpiApp.java
public void kpi() throws Exception { final Job job = new Job(new Configuration(), KpiApp.class.getSimpleName()); job.setJarByClass(KpiApp.class); //1.1 /*from w w w . j a v a2 s .com*/ FileInputFormat.setInputPaths(job, INPUT_PATH); //?? job.setInputFormatClass(TextInputFormat.class); //1.2Mapper job.setMapperClass(MyMapper.class); //<k2,v2> job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(KpiWritable.class); //1.3 job.setPartitionerClass(KpiPartitioner.class); job.setNumReduceTasks(2); //1.4 TODO ?? //1.5 TODO ?? //2.2 reduce job.setReducerClass(MyReducer.class); //<k3,v3> job.setOutputKeyClass(Text.class); job.setOutputValueClass(KpiWritable.class); //2.3 FileOutputFormat.setOutputPath(job, new Path(OUT_PATH)); //? job.setOutputFormatClass(TextOutputFormat.class); //???JobTracker job.waitForCompletion(true); }
From source file:com.binbo.wordcount.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);//w ww. j a va2s . co m } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); // Set the combiner job.setPartitionerClass(WordPartitioner.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.ci.backports.hadoop.hbase.ZHFileOutputFormat.java
License:Apache License
/** * Configure a MapReduce Job to perform an incremental load into the given * table. This/*from w ww. ja v a2 s . c o m*/ * <ul> * <li>Inspects the table to configure a total order partitioner</li> * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li> * <li>Sets the number of reduce tasks to match the current number of regions</li> * <li>Sets the output key/value class to match ZHFileOutputFormat's requirements</li> * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or * ZPutSortReducer)</li> * </ul> * The user should be sure to set the map output value class to either KeyValue or Put before * running this function. */ public static void configureIncrementalLoad(Job job, HTable table) throws IOException { Configuration conf = job.getConfiguration(); job.setPartitionerClass(TotalOrderPartitioner.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); job.setOutputFormatClass(ZHFileOutputFormat.class); // Based on the configured map output class, set the correct reducer to properly // sort the incoming values. // TODO it would be nice to pick one or the other of these formats. if (KeyValue.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(KeyValueSortReducer.class); } else if (Put.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(ZPutSortReducer.class); } else { LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass()); } LOG.info("Looking up current regions for table " + table); List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table); LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count"); job.setNumReduceTasks(startKeys.size()); Path partitionsPath = new Path(job.getWorkingDirectory(), "partitions_" + System.currentTimeMillis()); LOG.info("Writing partition information to " + partitionsPath); FileSystem fs = partitionsPath.getFileSystem(conf); writePartitions(conf, partitionsPath, startKeys); partitionsPath.makeQualified(fs); URI cacheUri; try { cacheUri = new URI(partitionsPath.toString() + "#" + TotalOrderPartitioner.DEFAULT_PATH); } catch (URISyntaxException e) { throw new IOException(e); } DistributedCache.addCacheFile(cacheUri, conf); DistributedCache.createSymlink(conf); LOG.info("Incremental table output configured."); }
From source file:com.citic.zxyjs.zwlscx.mapreduce.lib.input.HFileOutputFormatBase.java
License:Apache License
/** * Configure <code>job</code> with a TotalOrderPartitioner, partitioning * against <code>splitPoints</code>. Cleans up the partitions file after job * exists.// w w w . ja va 2s .c o m */ static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints) throws IOException { // create the partitions file FileSystem fs = FileSystem.get(job.getConfiguration()); Path partitionsPath = new Path("/tmp", "partitions_" + UUID.randomUUID()); fs.makeQualified(partitionsPath); fs.deleteOnExit(partitionsPath); writePartitions(job.getConfiguration(), partitionsPath, splitPoints); // configure job to use it job.setPartitionerClass(TotalOrderPartitioner.class); TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), partitionsPath); }
From source file:com.cloudera.crunch.GroupingOptions.java
License:Open Source License
public void configure(Job job) { if (partitionerClass != null) { job.setPartitionerClass(partitionerClass); }/*from w ww . j a v a 2s .c om*/ if (groupingComparatorClass != null) { job.setGroupingComparatorClass(groupingComparatorClass); } if (sortComparatorClass != null) { job.setSortComparatorClass(sortComparatorClass); } if (numReducers > 0) { job.setNumReduceTasks(numReducers); LOG.info(String.format("Using %d reduce tasks", numReducers)); } }
From source file:com.cloudera.recordservice.examples.terasort.TeraSort.java
License:Apache License
@Override public int run(String[] args) throws Exception { boolean useRecordService = false; if (args.length != 2 && args.length != 3) { usage();//from w ww . j av a2s .c o m return 1; } if (args.length == 3) { useRecordService = Boolean.parseBoolean(args[2]); } LOG.info("starting"); Job job = Job.getInstance(getConf()); boolean useSimplePartitioner = getUseSimplePartitioner(job); if (useRecordService) { RecordServiceConfig.setInputTable(job.getConfiguration(), null, args[0]); job.setInputFormatClass(RecordServiceTeraInputFormat.class); useSimplePartitioner = true; } else { Path inputDir = new Path(args[0]); TeraInputFormat.setInputPaths(job, inputDir); job.setInputFormatClass(TeraInputFormat.class); } Path outputDir = new Path(args[1]); FileOutputFormat.setOutputPath(job, outputDir); job.setJobName("TeraSort"); job.setJarByClass(TeraSort.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TeraOutputFormat.class); if (useSimplePartitioner) { job.setPartitionerClass(SimplePartitioner.class); } else { long start = System.currentTimeMillis(); Path partitionFile = new Path(outputDir, TeraInputFormat.PARTITION_FILENAME); URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME); try { TeraInputFormat.writePartitionFile(job, partitionFile); } catch (Throwable e) { LOG.error(e.getMessage()); return -1; } job.addCacheFile(partitionUri); long end = System.currentTimeMillis(); System.out.println("Spent " + (end - start) + "ms computing partitions."); job.setPartitionerClass(TotalOrderPartitioner.class); } job.getConfiguration().setInt("dfs.replication", getOutputReplication(job)); TeraOutputFormat.setFinalSync(job, true); int ret = job.waitForCompletion(true) ? 0 : 1; LOG.info("done"); return ret; }
From source file:com.conversantmedia.mapreduce.io.CompositeSortKeySerialization.java
License:Apache License
/** * Convenience method to configure the job for using the composite key. * @param job the job using this serializer * @param groupKeyClass the key type used for grouping * @param sortKeyClass the key type used for sorting *///from w w w. j a v a2 s . c om @SuppressWarnings("rawtypes") public static void configureMapOutputKey(Job job, Class<? extends WritableComparable> groupKeyClass, Class<? extends WritableComparable> sortKeyClass) { // First, setup our classes... job.getConfiguration().set(CONF_KEY_GROUPKEY_CLASS, groupKeyClass.getName()); job.getConfiguration().set(CONF_KEY_SORTKEY_CLASS, sortKeyClass.getName()); // Set this class as our map output key job.setMapOutputKeyClass(CompositeSortKey.class); // Setup the partitioner and comparators. job.setPartitionerClass(CompositeSortKey.KeyPartitioner.class); job.setGroupingComparatorClass(CompositeSortKey.GroupingComparator.class); job.setSortComparatorClass(CompositeSortKey.NaturalSortComparator.class); // Now setup the serialization by registering with the framework. Collection<String> serializations = new ArrayList<>(); serializations.add(CompositeSortKeySerialization.class.getName()); serializations.addAll(job.getConfiguration().getStringCollection("io.serializations")); job.getConfiguration().setStrings("io.serializations", serializations.toArray(new String[serializations.size()])); }
From source file:com.conversantmedia.mapreduce.tool.annotation.handler.PartitionerAnnotationHandler.java
License:Apache License
@Override public void process(Annotation annotation, Job job, Object target) { Partitioner partitioner = (Partitioner) annotation; if (partitioner != null && partitioner.value() != null && partitioner.value() != NULLPARTITIONER.class) { job.setPartitionerClass(partitioner.value()); }/*www .j av a 2 s.co m*/ }
From source file:com.daleway.training.hadoop.condprob.ConditionalProbabilityPairs.java
License:Apache License
public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException { Job job = new Job(conf, "pair wise count"); job.setJarByClass(ConditionalProbabilityPairs.class); job.setMapperClass(TokenizerMapper.class); //job.setCombinerClass(IntSumReducer.class); job.setPartitionerClass(ProbDistPartitioner.class); job.setReducerClass(IntSumReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(5);/*w w w . jav a 2 s.c o m*/ FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job; }
From source file:com.daleway.training.hadoop.condprob.ConditionalProbabilityPairsSecondarySort.java
License:Apache License
public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException { Job job = new Job(conf, "pair wise count"); job.setJarByClass(ConditionalProbabilityPairsSecondarySort.class); job.setMapperClass(TokenizerMapper.class); job.setSortComparatorClass(KeyComparator.class); job.setGroupingComparatorClass(GroupComparator.class); //job.setCombinerClass(IntSumReducer.class); job.setPartitionerClass(ProbDistPartitioner.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); //Is the output value class for Map or Reduce ? job.setOutputValueClass(Text.class); //job.setNumReduceTasks(5); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job;/*from ww w .java2 s .c o m*/ }