List of usage examples for org.apache.hadoop.mapreduce Job getSortComparator
public RawComparator<?> getSortComparator()
From source file:org.huahinframework.unit.JobDriver.java
License:Apache License
/** * @param job/*from w w w . j a v a 2s . c o m*/ * @return List<Pair<Key, Value>> * @throws InstantiationException * @throws IllegalAccessException * @throws ClassNotFoundException */ @SuppressWarnings({ "rawtypes", "unchecked" }) private MapReduceDriver<WritableComparable, Writable, WritableComparable, Writable, WritableComparable, Writable> createDriver( Job job) throws InstantiationException, IllegalAccessException, ClassNotFoundException { Mapper mapper = job.getMapperClass().newInstance(); Reducer reducer = job.getReducerClass().newInstance(); RawComparator groupingComparator = job.getGroupingComparator(); RawComparator sortComparator = job.getSortComparator(); MapReduceDriver<WritableComparable, Writable, WritableComparable, Writable, WritableComparable, Writable> driver = MapReduceDriver .newMapReduceDriver(mapper, reducer).withKeyGroupingComparator(groupingComparator) .withKeyOrderComparator(sortComparator); driver.setConfiguration(job.getConfiguration()); return driver; }
From source file:sampler.TotalOrderPartitioner.java
License:Open Source License
/** * Read in the partition file and build indexing data structures. * If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and * <tt>total.order.partitioner.natural.order</tt> is not false, a trie * of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes * will be built. Otherwise, keys will be located using a binary search of * the partition keyset using the {@link org.apache.hadoop.io.RawComparator} * defined for this job. The input file must be sorted with the same * comparator and contain {@link Job#getNumReduceTasks()} - 1 keys. *///w w w. ja v a 2 s .co m @SuppressWarnings("unchecked") // keytype from conf not static public void setConf(Configuration conf) { try { this.conf = conf; String parts = getPartitionFile(conf); final Path partFile = new Path(parts); final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(conf) // assume in DistributedCache : partFile.getFileSystem(conf); Job job = new Job(conf); Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass(); K[] splitPoints = readPartitions(fs, partFile, keyClass, conf); if (splitPoints.length != job.getNumReduceTasks() - 1) { System.out.println(job.getNumReduceTasks()); System.out.println(splitPoints.length); throw new IOException("Wrong number of partitions in keyset:" + splitPoints.length); } RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator(); for (int i = 0; i < splitPoints.length - 1; ++i) { if (comparator.compare(splitPoints[i], splitPoints[i + 1]) >= 0) { throw new IOException("Split points are out of order"); } } boolean natOrder = conf.getBoolean(NATURAL_ORDER, true); if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) { partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0], // Now that blocks of identical splitless trie nodes are // represented reentrantly, and we develop a leaf for any trie // node with only one split point, the only reason for a depth // limit is to refute stack overflow or bloat in the pathological // case where the split points are long and mostly look like bytes // iii...iixii...iii . Therefore, we make the default depth // limit large but not huge. conf.getInt(MAX_TRIE_DEPTH, 200)); } else { partitions = new BinarySearchNode(splitPoints, comparator); } } catch (IOException e) { throw new IllegalArgumentException("Can't read partitions file", e); } }