List of usage examples for org.apache.hadoop.mapreduce Job getMapOutputKeyClass
public Class<?> getMapOutputKeyClass()
From source file:org.cloudgraph.hbase.mapreduce.GraphMapReduceSetup.java
License:Apache License
/** * Add the HBase dependency jars as well as jars for any of the configured job * classes to the job configuration, so that JobClient will ship them to the * cluster and add them to the DistributedCache. */// ww w . j a v a 2 s. com public static void addDependencyJars(Job job) throws IOException { try { addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class, com.google.protobuf.Message.class, com.google.common.collect.ImmutableSet.class, org.apache.hadoop.hbase.util.Bytes.class, // one class from // hbase.jar job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(), job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(), job.getPartitionerClass(), job.getCombinerClass()); } catch (ClassNotFoundException e) { throw new IOException(e); } }
From source file:org.kiji.mapreduce.framework.MapReduceJobBuilder.java
License:Apache License
/** * Configures the job with any Avro reader or writer schemas specified by the mapper class. * * <p>If the job's mapper class uses AvroKey as the job's input key class, it should * have implemented the AvroKeyReader interface to specify the reader schema for the * input key. Likewise, if it uses AvroValue as the job's input value class, it should * have implemented the AvroValueReader interface.</p> * * <p>If the job's mapper class uses AvroKey as the output key class, it should * have implemented the AvroKeyWriter interface to specify the writer schema for the * output key. Likewise, if it uses AvroValue as the output value class, it should have * implemented the AvroValueWriter interface.</p> * * <p>This method makes sure those interfaces were implemented correctly, uses them to * fetch the reader/writer schemas as necessary, and sets them in the Job configuration * so the Avro input format and serialization framework can access them.</p> * * @param job The job to configure.//from ww w . j ava2s.com * @param mapper The Kiji mapper the job is configured to run. * @throws IOException If the Avro schemas cannot be configured. */ protected void configureAvro(Job job, KijiMapper<?, ?, ?, ?> mapper) throws IOException { // If the user has specified particular reader schemas for the records of the input, // put it in the job configuration. Schema inputKeyReaderSchema = AvroMapReduce.getAvroKeyReaderSchema(mapper); if (null != inputKeyReaderSchema) { LOG.info("Setting reader schema for the map input key to: " + inputKeyReaderSchema); AvroJob.setInputKeySchema(job, inputKeyReaderSchema); } Schema inputValueReaderSchema = AvroMapReduce.getAvroValueReaderSchema(mapper); if (null != inputValueReaderSchema) { LOG.info("Setting reader schema for the map input value to: " + inputValueReaderSchema); AvroJob.setInputValueSchema(job, inputValueReaderSchema); } // Set the output writer schemas in the job configuration (if specified). Schema outputKeyWriterSchema = AvroMapReduce.getAvroKeyWriterSchema(mapper); if (null != outputKeyWriterSchema) { if (!AvroKey.class.isAssignableFrom(job.getMapOutputKeyClass())) { throw new JobConfigurationException( mapper.getClass().getName() + ".getAvroKeyWriterSchema() returned a non-null Schema" + " but the output key class was not AvroKey."); } LOG.info("Setting avro serialization for map output key schema: " + outputKeyWriterSchema); AvroJob.setMapOutputKeySchema(job, outputKeyWriterSchema); } Schema outputValueWriterSchema = AvroMapReduce.getAvroValueWriterSchema(mapper); if (null != outputValueWriterSchema) { if (!AvroValue.class.isAssignableFrom(job.getMapOutputValueClass())) { throw new JobConfigurationException( mapper.getClass().getName() + ".getAvroValueWriterSchema() returned a non-null Schema" + " but the output value class was not AvroValue."); } LOG.info("Setting avro serialization for map output value schema: " + outputValueWriterSchema); AvroJob.setMapOutputValueSchema(job, outputValueWriterSchema); } }
From source file:org.kiji.mapreduce.framework.MapReduceJobBuilder.java
License:Apache License
/** * Configures the MapReduce reducer for the job. * * @param job The Hadoop MR job./*from www. j a v a2s . c o m*/ * @throws IOException If there is an error. */ protected void configureReducer(Job job) throws IOException { final KijiReducer<?, ?, ?, ?> reducer = getReducer(); if (null == reducer) { LOG.info("No reducer provided. This will be a map-only job"); job.setNumReduceTasks(0); // Set the job output key/value classes based on what the map output key/value classes were // since this a map-only job. job.setOutputKeyClass(job.getMapOutputKeyClass()); Schema mapOutputKeySchema = AvroJob.getMapOutputKeySchema(job.getConfiguration()); if (null != mapOutputKeySchema) { AvroJob.setOutputKeySchema(job, mapOutputKeySchema); } job.setOutputValueClass(job.getMapOutputValueClass()); Schema mapOutputValueSchema = AvroJob.getMapOutputValueSchema(job.getConfiguration()); if (null != mapOutputValueSchema) { AvroJob.setOutputValueSchema(job, mapOutputValueSchema); } return; } if (reducer instanceof Configurable) { ((Configurable) reducer).setConf(job.getConfiguration()); } job.setReducerClass(reducer.getClass()); // Set output key class. Class<?> outputKeyClass = reducer.getOutputKeyClass(); job.setOutputKeyClass(outputKeyClass); Schema outputKeyWriterSchema = AvroMapReduce.getAvroKeyWriterSchema(reducer); if (AvroKey.class.isAssignableFrom(outputKeyClass)) { if (null == outputKeyWriterSchema) { throw new JobConfigurationException("Using AvroKey output, but a writer schema was not provided. " + "Did you forget to implement AvroKeyWriter in your KijiReducer?"); } AvroJob.setOutputKeySchema(job, outputKeyWriterSchema); } else if (null != outputKeyWriterSchema) { throw new JobConfigurationException( reducer.getClass().getName() + ".getAvroKeyWriterSchema() returned a non-null Schema" + " but the output key class was not AvroKey."); } // Set output value class. Class<?> outputValueClass = reducer.getOutputValueClass(); job.setOutputValueClass(outputValueClass); Schema outputValueWriterSchema = AvroMapReduce.getAvroValueWriterSchema(reducer); if (AvroValue.class.isAssignableFrom(outputValueClass)) { if (null == outputValueWriterSchema) { throw new JobConfigurationException("Using AvroValue output, but a writer schema was not provided. " + "Did you forget to implement AvroValueWriter in your KijiReducer?"); } AvroJob.setOutputValueSchema(job, outputValueWriterSchema); } else if (null != outputValueWriterSchema) { throw new JobConfigurationException( reducer.getClass().getName() + ".getAvroValueWriterSchema() returned a non-null Schema" + " but the output value class was not AvroValue."); } }
From source file:sampler.TotalOrderPartitioner.java
License:Open Source License
/** * Read in the partition file and build indexing data structures. * If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and * <tt>total.order.partitioner.natural.order</tt> is not false, a trie * of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes * will be built. Otherwise, keys will be located using a binary search of * the partition keyset using the {@link org.apache.hadoop.io.RawComparator} * defined for this job. The input file must be sorted with the same * comparator and contain {@link Job#getNumReduceTasks()} - 1 keys. *//*from w w w . j a va2 s . c om*/ @SuppressWarnings("unchecked") // keytype from conf not static public void setConf(Configuration conf) { try { this.conf = conf; String parts = getPartitionFile(conf); final Path partFile = new Path(parts); final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(conf) // assume in DistributedCache : partFile.getFileSystem(conf); Job job = new Job(conf); Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass(); K[] splitPoints = readPartitions(fs, partFile, keyClass, conf); if (splitPoints.length != job.getNumReduceTasks() - 1) { System.out.println(job.getNumReduceTasks()); System.out.println(splitPoints.length); throw new IOException("Wrong number of partitions in keyset:" + splitPoints.length); } RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator(); for (int i = 0; i < splitPoints.length - 1; ++i) { if (comparator.compare(splitPoints[i], splitPoints[i + 1]) >= 0) { throw new IOException("Split points are out of order"); } } boolean natOrder = conf.getBoolean(NATURAL_ORDER, true); if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) { partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0], // Now that blocks of identical splitless trie nodes are // represented reentrantly, and we develop a leaf for any trie // node with only one split point, the only reason for a depth // limit is to refute stack overflow or bloat in the pathological // case where the split points are long and mostly look like bytes // iii...iixii...iii . Therefore, we make the default depth // limit large but not huge. conf.getInt(MAX_TRIE_DEPTH, 200)); } else { partitions = new BinarySearchNode(splitPoints, comparator); } } catch (IOException e) { throw new IllegalArgumentException("Can't read partitions file", e); } }