Example usage for org.apache.hadoop.mapreduce Job getSortComparator

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getSortComparator.

Prototype

public RawComparator<?> getSortComparator()

Source Link

Document

Get the RawComparator comparator used to compare keys.

Usage

From source file:org.huahinframework.unit.JobDriver.java

License:Apache License

/**
 * @param job/*from   w  w w .  j a v  a  2s  . c o  m*/
 * @return List<Pair<Key, Value>>
 * @throws InstantiationException
 * @throws IllegalAccessException
 * @throws ClassNotFoundException
 */
@SuppressWarnings({ "rawtypes", "unchecked" })
private MapReduceDriver<WritableComparable, Writable, WritableComparable, Writable, WritableComparable, Writable> createDriver(
        Job job) throws InstantiationException, IllegalAccessException, ClassNotFoundException {
    Mapper mapper = job.getMapperClass().newInstance();
    Reducer reducer = job.getReducerClass().newInstance();
    RawComparator groupingComparator = job.getGroupingComparator();
    RawComparator sortComparator = job.getSortComparator();
    MapReduceDriver<WritableComparable, Writable, WritableComparable, Writable, WritableComparable, Writable> driver = MapReduceDriver
            .newMapReduceDriver(mapper, reducer).withKeyGroupingComparator(groupingComparator)
            .withKeyOrderComparator(sortComparator);
    driver.setConfiguration(job.getConfiguration());
    return driver;
}

From source file:sampler.TotalOrderPartitioner.java

License:Open Source License

/**
 * Read in the partition file and build indexing data structures.
 * If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and
 * <tt>total.order.partitioner.natural.order</tt> is not false, a trie
 * of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes
 * will be built. Otherwise, keys will be located using a binary search of
 * the partition keyset using the {@link org.apache.hadoop.io.RawComparator}
 * defined for this job. The input file must be sorted with the same
 * comparator and contain {@link Job#getNumReduceTasks()} - 1 keys.
 *///w w  w. ja  v a 2 s  .co  m
@SuppressWarnings("unchecked") // keytype from conf not static
public void setConf(Configuration conf) {
    try {
        this.conf = conf;
        String parts = getPartitionFile(conf);
        final Path partFile = new Path(parts);
        final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(conf) // assume in DistributedCache
                : partFile.getFileSystem(conf);

        Job job = new Job(conf);
        Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
        K[] splitPoints = readPartitions(fs, partFile, keyClass, conf);
        if (splitPoints.length != job.getNumReduceTasks() - 1) {
            System.out.println(job.getNumReduceTasks());
            System.out.println(splitPoints.length);
            throw new IOException("Wrong number of partitions in keyset:" + splitPoints.length);
        }
        RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator();
        for (int i = 0; i < splitPoints.length - 1; ++i) {
            if (comparator.compare(splitPoints[i], splitPoints[i + 1]) >= 0) {
                throw new IOException("Split points are out of order");
            }
        }
        boolean natOrder = conf.getBoolean(NATURAL_ORDER, true);
        if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) {
            partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0],
                    // Now that blocks of identical splitless trie nodes are 
                    // represented reentrantly, and we develop a leaf for any trie
                    // node with only one split point, the only reason for a depth
                    // limit is to refute stack overflow or bloat in the pathological
                    // case where the split points are long and mostly look like bytes 
                    // iii...iixii...iii   .  Therefore, we make the default depth
                    // limit large but not huge.
                    conf.getInt(MAX_TRIE_DEPTH, 200));
        } else {
            partitions = new BinarySearchNode(splitPoints, comparator);
        }
    } catch (IOException e) {
        throw new IllegalArgumentException("Can't read partitions file", e);
    }
}