Example usage for org.apache.hadoop.mapreduce Job getSortComparator

List of usage examples for org.apache.hadoop.mapreduce Job getSortComparator

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getSortComparator.

Prototype

public RawComparator<?> getSortComparator() 

Source Link

Document

Get the RawComparator comparator used to compare keys.

Usage

From source file:org.huahinframework.unit.JobDriver.java

License:Apache License

/**
 * @param job/*from   w  w w .  j a v  a  2s  . c o  m*/
 * @return List<Pair<Key, Value>>
 * @throws InstantiationException
 * @throws IllegalAccessException
 * @throws ClassNotFoundException
 */
@SuppressWarnings({ "rawtypes", "unchecked" })
private MapReduceDriver<WritableComparable, Writable, WritableComparable, Writable, WritableComparable, Writable> createDriver(
        Job job) throws InstantiationException, IllegalAccessException, ClassNotFoundException {
    Mapper mapper = job.getMapperClass().newInstance();
    Reducer reducer = job.getReducerClass().newInstance();
    RawComparator groupingComparator = job.getGroupingComparator();
    RawComparator sortComparator = job.getSortComparator();
    MapReduceDriver<WritableComparable, Writable, WritableComparable, Writable, WritableComparable, Writable> driver = MapReduceDriver
            .newMapReduceDriver(mapper, reducer).withKeyGroupingComparator(groupingComparator)
            .withKeyOrderComparator(sortComparator);
    driver.setConfiguration(job.getConfiguration());
    return driver;
}

From source file:sampler.TotalOrderPartitioner.java

License:Open Source License

/**
 * Read in the partition file and build indexing data structures.
 * If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and
 * <tt>total.order.partitioner.natural.order</tt> is not false, a trie
 * of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes
 * will be built. Otherwise, keys will be located using a binary search of
 * the partition keyset using the {@link org.apache.hadoop.io.RawComparator}
 * defined for this job. The input file must be sorted with the same
 * comparator and contain {@link Job#getNumReduceTasks()} - 1 keys.
 *///w w  w. ja  v a 2 s  .co  m
@SuppressWarnings("unchecked") // keytype from conf not static
public void setConf(Configuration conf) {
    try {
        this.conf = conf;
        String parts = getPartitionFile(conf);
        final Path partFile = new Path(parts);
        final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(conf) // assume in DistributedCache
                : partFile.getFileSystem(conf);

        Job job = new Job(conf);
        Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
        K[] splitPoints = readPartitions(fs, partFile, keyClass, conf);
        if (splitPoints.length != job.getNumReduceTasks() - 1) {
            System.out.println(job.getNumReduceTasks());
            System.out.println(splitPoints.length);
            throw new IOException("Wrong number of partitions in keyset:" + splitPoints.length);
        }
        RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator();
        for (int i = 0; i < splitPoints.length - 1; ++i) {
            if (comparator.compare(splitPoints[i], splitPoints[i + 1]) >= 0) {
                throw new IOException("Split points are out of order");
            }
        }
        boolean natOrder = conf.getBoolean(NATURAL_ORDER, true);
        if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) {
            partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0],
                    // Now that blocks of identical splitless trie nodes are 
                    // represented reentrantly, and we develop a leaf for any trie
                    // node with only one split point, the only reason for a depth
                    // limit is to refute stack overflow or bloat in the pathological
                    // case where the split points are long and mostly look like bytes 
                    // iii...iixii...iii   .  Therefore, we make the default depth
                    // limit large but not huge.
                    conf.getInt(MAX_TRIE_DEPTH, 200));
        } else {
            partitions = new BinarySearchNode(splitPoints, comparator);
        }
    } catch (IOException e) {
        throw new IllegalArgumentException("Can't read partitions file", e);
    }
}