List of usage examples for org.apache.hadoop.mapreduce MRJobConfig OUTPUT_KEY_CLASS
String OUTPUT_KEY_CLASS
To view the source code for org.apache.hadoop.mapreduce MRJobConfig OUTPUT_KEY_CLASS.
Click Source Link
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java
License:Apache License
/** * Sets the output key and value classes in the job configuration by inspecting the {@link Mapper} and {@link Reducer} * if it is not set by the user./*ww w.java2 s .com*/ * * @param job the MapReduce job * @param mapperTypeToken TypeToken of a configured mapper (may not be configured on the job). Has already been * resolved from the job's mapper class. */ private void setOutputClassesIfNeeded(Job job, @Nullable TypeToken<?> mapperTypeToken) { Configuration conf = job.getConfiguration(); // Try to get the type from reducer TypeToken<?> type = resolveClass(conf, MRJobConfig.REDUCE_CLASS_ATTR, Reducer.class); if (type == null) { // Map only job type = mapperTypeToken; } // If not able to detect type, nothing to set if (type == null || !(type.getType() instanceof ParameterizedType)) { return; } Type[] typeArgs = ((ParameterizedType) type.getType()).getActualTypeArguments(); // Set it only if the user didn't set it in beforeSubmit // The key and value type are in the 3rd and 4th type parameters if (!isProgrammaticConfig(conf, MRJobConfig.OUTPUT_KEY_CLASS)) { Class<?> cls = TypeToken.of(typeArgs[2]).getRawType(); LOG.debug("Set output key class to {}", cls); job.setOutputKeyClass(cls); } if (!isProgrammaticConfig(conf, MRJobConfig.OUTPUT_VALUE_CLASS)) { Class<?> cls = TypeToken.of(typeArgs[3]).getRawType(); LOG.debug("Set output value class to {}", cls); job.setOutputValueClass(cls); } }
From source file:it.crs4.pydoop.mapreduce.pipes.CommandLineParser.java
License:Apache License
private static void setupPipesJob(Job job) throws IOException, ClassNotFoundException { Configuration conf = job.getConfiguration(); // default map output types to Text if (!getIsJavaMapper(conf)) { job.setMapperClass(PipesMapper.class); // Save the user's partitioner and hook in our's. setJavaPartitioner(conf, job.getPartitionerClass()); job.setPartitionerClass(PipesPartitioner.class); }/*from w ww. j a v a2 s .com*/ if (!getIsJavaReducer(conf)) { job.setReducerClass(PipesReducer.class); if (!getIsJavaRecordWriter(conf)) { job.setOutputFormatClass(NullOutputFormat.class); } } String textClassname = Text.class.getName(); setIfUnset(conf, MRJobConfig.MAP_OUTPUT_KEY_CLASS, textClassname); setIfUnset(conf, MRJobConfig.MAP_OUTPUT_VALUE_CLASS, textClassname); setIfUnset(conf, MRJobConfig.OUTPUT_KEY_CLASS, textClassname); setIfUnset(conf, MRJobConfig.OUTPUT_VALUE_CLASS, textClassname); // Use PipesNonJavaInputFormat if necessary to handle progress reporting // from C++ RecordReaders ... if (!getIsJavaRecordReader(conf) && !getIsJavaMapper(conf)) { conf.setClass(Submitter.INPUT_FORMAT, job.getInputFormatClass(), InputFormat.class); job.setInputFormatClass(PipesNonJavaInputFormat.class); } if (avroInput != null) { if (explicitInputFormat) { conf.setClass(Submitter.INPUT_FORMAT, job.getInputFormatClass(), InputFormat.class); } // else let the bridge fall back to the appropriate Avro IF switch (avroInput) { case K: job.setInputFormatClass(PydoopAvroInputKeyBridge.class); break; case V: job.setInputFormatClass(PydoopAvroInputValueBridge.class); break; case KV: job.setInputFormatClass(PydoopAvroInputKeyValueBridge.class); break; default: throw new IllegalArgumentException("Bad Avro input type"); } } if (avroOutput != null) { if (explicitOutputFormat) { conf.setClass(Submitter.OUTPUT_FORMAT, job.getOutputFormatClass(), OutputFormat.class); } // else let the bridge fall back to the appropriate Avro OF conf.set(props.getProperty("AVRO_OUTPUT"), avroOutput.name()); switch (avroOutput) { case K: job.setOutputFormatClass(PydoopAvroOutputKeyBridge.class); break; case V: job.setOutputFormatClass(PydoopAvroOutputValueBridge.class); break; case KV: job.setOutputFormatClass(PydoopAvroOutputKeyValueBridge.class); break; default: throw new IllegalArgumentException("Bad Avro output type"); } } String exec = getExecutable(conf); if (exec == null) { String msg = "No application program defined."; throw new IllegalArgumentException(msg); } // add default debug script only when executable is expressed as // <path>#<executable> //FIXME: this is kind of useless if the pipes program is not in c++ if (exec.contains("#")) { // set default gdb commands for map and reduce task String defScript = "$HADOOP_PREFIX/src/c++/pipes/debug/pipes-default-script"; setIfUnset(conf, MRJobConfig.MAP_DEBUG_SCRIPT, defScript); setIfUnset(conf, MRJobConfig.REDUCE_DEBUG_SCRIPT, defScript); } URI[] fileCache = DistributedCache.getCacheFiles(conf); if (fileCache == null) { fileCache = new URI[1]; } else { URI[] tmp = new URI[fileCache.length + 1]; System.arraycopy(fileCache, 0, tmp, 1, fileCache.length); fileCache = tmp; } try { fileCache[0] = new URI(exec); } catch (URISyntaxException e) { String msg = "Problem parsing executable URI " + exec; IOException ie = new IOException(msg); ie.initCause(e); throw ie; } DistributedCache.setCacheFiles(fileCache, conf); }
From source file:it.crs4.pydoop.pipes.Submitter.java
License:Apache License
private static void setupPipesJob(JobConf conf) throws IOException { // default map output types to Text if (!getIsJavaMapper(conf)) { conf.setMapRunnerClass(PipesMapRunner.class); // Save the user's partitioner and hook in our's. setJavaPartitioner(conf, conf.getPartitionerClass()); conf.setPartitionerClass(PipesPartitioner.class); }/*from www .j ava 2 s . co m*/ if (!getIsJavaReducer(conf)) { conf.setReducerClass(PipesReducer.class); if (!getIsJavaRecordWriter(conf)) { conf.setOutputFormat(NullOutputFormat.class); } } String textClassname = Text.class.getName(); setIfUnset(conf, MRJobConfig.MAP_OUTPUT_KEY_CLASS, textClassname); setIfUnset(conf, MRJobConfig.MAP_OUTPUT_VALUE_CLASS, textClassname); setIfUnset(conf, MRJobConfig.OUTPUT_KEY_CLASS, textClassname); setIfUnset(conf, MRJobConfig.OUTPUT_VALUE_CLASS, textClassname); // Use PipesNonJavaInputFormat if necessary to handle progress reporting // from C++ RecordReaders ... if (!getIsJavaRecordReader(conf) && !getIsJavaMapper(conf)) { conf.setClass(Submitter.INPUT_FORMAT, conf.getInputFormat().getClass(), InputFormat.class); conf.setInputFormat(PipesNonJavaInputFormat.class); } String exec = getExecutable(conf); if (exec == null) { throw new IllegalArgumentException("No application program defined."); } // add default debug script only when executable is expressed as // <path>#<executable> if (exec.contains("#")) { // set default gdb commands for map and reduce task String defScript = "$HADOOP_PREFIX/src/c++/pipes/debug/pipes-default-script"; setIfUnset(conf, MRJobConfig.MAP_DEBUG_SCRIPT, defScript); setIfUnset(conf, MRJobConfig.REDUCE_DEBUG_SCRIPT, defScript); } URI[] fileCache = DistributedCache.getCacheFiles(conf); if (fileCache == null) { fileCache = new URI[1]; } else { URI[] tmp = new URI[fileCache.length + 1]; System.arraycopy(fileCache, 0, tmp, 1, fileCache.length); fileCache = tmp; } try { fileCache[0] = new URI(exec); } catch (URISyntaxException e) { IOException ie = new IOException("Problem parsing execable URI " + exec); ie.initCause(e); throw ie; } DistributedCache.setCacheFiles(fileCache, conf); }
From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOTest.java
License:Apache License
private static Configuration loadTestConfiguration(Class<?> outputFormatClassName, Class<?> keyClass, Class<?> valueClass) { Configuration conf = new Configuration(); conf.setClass(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, outputFormatClassName, OutputFormat.class); conf.setClass(MRJobConfig.OUTPUT_KEY_CLASS, keyClass, Object.class); conf.setClass(MRJobConfig.OUTPUT_VALUE_CLASS, valueClass, Object.class); conf.setInt(MRJobConfig.NUM_REDUCES, REDUCERS_COUNT); conf.set(MRJobConfig.ID, String.valueOf(1)); return conf;//from ww w.j a v a 2s .c o m }
From source file:org.shaf.core.io.emulator.RecordWriterFactoryTest.java
License:Apache License
/** * Run the record reader factory tests.//from w ww .j a v a 2 s. c o m * * @throws Exception * if the test fails for some reason. */ @Test public void testFactory() throws Exception { Configuration config = job.getConfiguration(); config.set(FileOutputFormat.OUTDIR, super.dir.toString()); config.set(MRJobConfig.OUTPUT_KEY_CLASS, "org.apache.hadoop.io.NullWritable"); config.set(MRJobConfig.OUTPUT_VALUE_CLASS, "org.apache.hadoop.io.NullWritable"); assertEquals(TextWriter.class, RecordWriterFactory.createRecordWriter(SomeProcess1.class, job.getConfiguration()).getClass()); assertEquals(SequenceWriter.class, RecordWriterFactory.createRecordWriter(SomeProcess2.class, job.getConfiguration()).getClass()); }
From source file:org.shaf.core.io.emulator.SequenceWriter.java
License:Apache License
/** * Constructs a new sequence file writer. * //from w w w.ja v a 2 s . co m * @param config * the writer configuration. * @throws IOException * if the writer has failed to initialize. */ public SequenceWriter(Configuration config) throws IOException { super(config); try { this.keyClass = Class.forName(config.get(MRJobConfig.OUTPUT_KEY_CLASS)); this.valueClass = Class.forName(config.get(MRJobConfig.OUTPUT_VALUE_CLASS)); super.out.writeUTF(this.keyClass.getCanonicalName()); super.out.writeUTF(this.valueClass.getCanonicalName()); } catch (ClassNotFoundException exc) { throw new IOException(exc); } }