Example usage for org.apache.hadoop.mapreduce Job getMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getMapOutputKeyClass.

Prototype

public Class<?> getMapOutputKeyClass()

Source Link

Document

Get the key class for the map output data.

Usage

From source file:org.cloudgraph.hbase.mapreduce.GraphMapReduceSetup.java

License:Apache License

/**
 * Add the HBase dependency jars as well as jars for any of the configured job
 * classes to the job configuration, so that JobClient will ship them to the
 * cluster and add them to the DistributedCache.
 */// ww w  . j  a  v  a 2 s.  com
public static void addDependencyJars(Job job) throws IOException {
    try {
        addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class,
                com.google.protobuf.Message.class, com.google.common.collect.ImmutableSet.class,
                org.apache.hadoop.hbase.util.Bytes.class, // one class from
                // hbase.jar
                job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(),
                job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(),
                job.getPartitionerClass(), job.getCombinerClass());
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
}

From source file:org.kiji.mapreduce.framework.MapReduceJobBuilder.java

License:Apache License

/**
 * Configures the job with any Avro reader or writer schemas specified by the mapper class.
 *
 * <p>If the job's mapper class uses AvroKey as the job's input key class, it should
 * have implemented the AvroKeyReader interface to specify the reader schema for the
 * input key.  Likewise, if it uses AvroValue as the job's input value class, it should
 * have implemented the AvroValueReader interface.</p>
 *
 * <p>If the job's mapper class uses AvroKey as the output key class, it should
 * have implemented the AvroKeyWriter interface to specify the writer schema for the
 * output key.  Likewise, if it uses AvroValue as the output value class, it should have
 * implemented the AvroValueWriter interface.</p>
 *
 * <p>This method makes sure those interfaces were implemented correctly, uses them to
 * fetch the reader/writer schemas as necessary, and sets them in the Job configuration
 * so the Avro input format and serialization framework can access them.</p>
 *
 * @param job The job to configure.//from   ww w  .  j  ava2s.com
 * @param mapper The Kiji mapper the job is configured to run.
 * @throws IOException If the Avro schemas cannot be configured.
 */
protected void configureAvro(Job job, KijiMapper<?, ?, ?, ?> mapper) throws IOException {
    // If the user has specified particular reader schemas for the records of the input,
    // put it in the job configuration.
    Schema inputKeyReaderSchema = AvroMapReduce.getAvroKeyReaderSchema(mapper);
    if (null != inputKeyReaderSchema) {
        LOG.info("Setting reader schema for the map input key to: " + inputKeyReaderSchema);
        AvroJob.setInputKeySchema(job, inputKeyReaderSchema);
    }
    Schema inputValueReaderSchema = AvroMapReduce.getAvroValueReaderSchema(mapper);
    if (null != inputValueReaderSchema) {
        LOG.info("Setting reader schema for the map input value to: " + inputValueReaderSchema);
        AvroJob.setInputValueSchema(job, inputValueReaderSchema);
    }

    // Set the output writer schemas in the job configuration (if specified).
    Schema outputKeyWriterSchema = AvroMapReduce.getAvroKeyWriterSchema(mapper);
    if (null != outputKeyWriterSchema) {
        if (!AvroKey.class.isAssignableFrom(job.getMapOutputKeyClass())) {
            throw new JobConfigurationException(
                    mapper.getClass().getName() + ".getAvroKeyWriterSchema() returned a non-null Schema"
                            + " but the output key class was not AvroKey.");
        }
        LOG.info("Setting avro serialization for map output key schema: " + outputKeyWriterSchema);
        AvroJob.setMapOutputKeySchema(job, outputKeyWriterSchema);
    }
    Schema outputValueWriterSchema = AvroMapReduce.getAvroValueWriterSchema(mapper);
    if (null != outputValueWriterSchema) {
        if (!AvroValue.class.isAssignableFrom(job.getMapOutputValueClass())) {
            throw new JobConfigurationException(
                    mapper.getClass().getName() + ".getAvroValueWriterSchema() returned a non-null Schema"
                            + " but the output value class was not AvroValue.");
        }
        LOG.info("Setting avro serialization for map output value schema: " + outputValueWriterSchema);
        AvroJob.setMapOutputValueSchema(job, outputValueWriterSchema);
    }
}

From source file:org.kiji.mapreduce.framework.MapReduceJobBuilder.java

License:Apache License

/**
 * Configures the MapReduce reducer for the job.
 *
 * @param job The Hadoop MR job./*from www. j  a  v  a2s . c o m*/
 * @throws IOException If there is an error.
 */
protected void configureReducer(Job job) throws IOException {
    final KijiReducer<?, ?, ?, ?> reducer = getReducer();
    if (null == reducer) {
        LOG.info("No reducer provided. This will be a map-only job");
        job.setNumReduceTasks(0);

        // Set the job output key/value classes based on what the map output key/value classes were
        // since this a map-only job.
        job.setOutputKeyClass(job.getMapOutputKeyClass());
        Schema mapOutputKeySchema = AvroJob.getMapOutputKeySchema(job.getConfiguration());
        if (null != mapOutputKeySchema) {
            AvroJob.setOutputKeySchema(job, mapOutputKeySchema);
        }
        job.setOutputValueClass(job.getMapOutputValueClass());
        Schema mapOutputValueSchema = AvroJob.getMapOutputValueSchema(job.getConfiguration());
        if (null != mapOutputValueSchema) {
            AvroJob.setOutputValueSchema(job, mapOutputValueSchema);
        }
        return;
    }
    if (reducer instanceof Configurable) {
        ((Configurable) reducer).setConf(job.getConfiguration());
    }
    job.setReducerClass(reducer.getClass());

    // Set output key class.
    Class<?> outputKeyClass = reducer.getOutputKeyClass();
    job.setOutputKeyClass(outputKeyClass);
    Schema outputKeyWriterSchema = AvroMapReduce.getAvroKeyWriterSchema(reducer);
    if (AvroKey.class.isAssignableFrom(outputKeyClass)) {
        if (null == outputKeyWriterSchema) {
            throw new JobConfigurationException("Using AvroKey output, but a writer schema was not provided. "
                    + "Did you forget to implement AvroKeyWriter in your KijiReducer?");
        }
        AvroJob.setOutputKeySchema(job, outputKeyWriterSchema);
    } else if (null != outputKeyWriterSchema) {
        throw new JobConfigurationException(
                reducer.getClass().getName() + ".getAvroKeyWriterSchema() returned a non-null Schema"
                        + " but the output key class was not AvroKey.");
    }

    // Set output value class.
    Class<?> outputValueClass = reducer.getOutputValueClass();
    job.setOutputValueClass(outputValueClass);
    Schema outputValueWriterSchema = AvroMapReduce.getAvroValueWriterSchema(reducer);
    if (AvroValue.class.isAssignableFrom(outputValueClass)) {
        if (null == outputValueWriterSchema) {
            throw new JobConfigurationException("Using AvroValue output, but a writer schema was not provided. "
                    + "Did you forget to implement AvroValueWriter in your KijiReducer?");
        }
        AvroJob.setOutputValueSchema(job, outputValueWriterSchema);
    } else if (null != outputValueWriterSchema) {
        throw new JobConfigurationException(
                reducer.getClass().getName() + ".getAvroValueWriterSchema() returned a non-null Schema"
                        + " but the output value class was not AvroValue.");
    }
}

From source file:sampler.TotalOrderPartitioner.java

License:Open Source License

/**
 * Read in the partition file and build indexing data structures.
 * If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and
 * <tt>total.order.partitioner.natural.order</tt> is not false, a trie
 * of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes
 * will be built. Otherwise, keys will be located using a binary search of
 * the partition keyset using the {@link org.apache.hadoop.io.RawComparator}
 * defined for this job. The input file must be sorted with the same
 * comparator and contain {@link Job#getNumReduceTasks()} - 1 keys.
 *//*from w w w  . j a va2  s .  c  om*/
@SuppressWarnings("unchecked") // keytype from conf not static
public void setConf(Configuration conf) {
    try {
        this.conf = conf;
        String parts = getPartitionFile(conf);
        final Path partFile = new Path(parts);
        final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(conf) // assume in DistributedCache
                : partFile.getFileSystem(conf);

        Job job = new Job(conf);
        Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
        K[] splitPoints = readPartitions(fs, partFile, keyClass, conf);
        if (splitPoints.length != job.getNumReduceTasks() - 1) {
            System.out.println(job.getNumReduceTasks());
            System.out.println(splitPoints.length);
            throw new IOException("Wrong number of partitions in keyset:" + splitPoints.length);
        }
        RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator();
        for (int i = 0; i < splitPoints.length - 1; ++i) {
            if (comparator.compare(splitPoints[i], splitPoints[i + 1]) >= 0) {
                throw new IOException("Split points are out of order");
            }
        }
        boolean natOrder = conf.getBoolean(NATURAL_ORDER, true);
        if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) {
            partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0],
                    // Now that blocks of identical splitless trie nodes are 
                    // represented reentrantly, and we develop a leaf for any trie
                    // node with only one split point, the only reason for a depth
                    // limit is to refute stack overflow or bloat in the pathological
                    // case where the split points are long and mostly look like bytes 
                    // iii...iixii...iii   .  Therefore, we make the default depth
                    // limit large but not huge.
                    conf.getInt(MAX_TRIE_DEPTH, 200));
        } else {
            partitions = new BinarySearchNode(splitPoints, comparator);
        }
    } catch (IOException e) {
        throw new IllegalArgumentException("Can't read partitions file", e);
    }
}