Example usage for org.apache.hadoop.mapreduce MRJobConfig OUTPUT_KEY

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce MRJobConfig OUTPUT_KEY_CLASS.

Prototype

String OUTPUT_KEY_CLASS

To view the source code for org.apache.hadoop.mapreduce MRJobConfig OUTPUT_KEY_CLASS.

Click Source Link

Usage

From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java

License:Apache License

/**
 * Sets the output key and value classes in the job configuration by inspecting the {@link Mapper} and {@link Reducer}
 * if it is not set by the user./*ww w.java2 s  .com*/
 *
 * @param job the MapReduce job
 * @param mapperTypeToken TypeToken of a configured mapper (may not be configured on the job). Has already been
 *                        resolved from the job's mapper class.
 */
private void setOutputClassesIfNeeded(Job job, @Nullable TypeToken<?> mapperTypeToken) {
    Configuration conf = job.getConfiguration();

    // Try to get the type from reducer
    TypeToken<?> type = resolveClass(conf, MRJobConfig.REDUCE_CLASS_ATTR, Reducer.class);

    if (type == null) {
        // Map only job
        type = mapperTypeToken;
    }

    // If not able to detect type, nothing to set
    if (type == null || !(type.getType() instanceof ParameterizedType)) {
        return;
    }

    Type[] typeArgs = ((ParameterizedType) type.getType()).getActualTypeArguments();

    // Set it only if the user didn't set it in beforeSubmit
    // The key and value type are in the 3rd and 4th type parameters
    if (!isProgrammaticConfig(conf, MRJobConfig.OUTPUT_KEY_CLASS)) {
        Class<?> cls = TypeToken.of(typeArgs[2]).getRawType();
        LOG.debug("Set output key class to {}", cls);
        job.setOutputKeyClass(cls);
    }
    if (!isProgrammaticConfig(conf, MRJobConfig.OUTPUT_VALUE_CLASS)) {
        Class<?> cls = TypeToken.of(typeArgs[3]).getRawType();
        LOG.debug("Set output value class to {}", cls);
        job.setOutputValueClass(cls);
    }
}

From source file:it.crs4.pydoop.mapreduce.pipes.CommandLineParser.java

License:Apache License

private static void setupPipesJob(Job job) throws IOException, ClassNotFoundException {
    Configuration conf = job.getConfiguration();
    // default map output types to Text
    if (!getIsJavaMapper(conf)) {
        job.setMapperClass(PipesMapper.class);
        // Save the user's partitioner and hook in our's.
        setJavaPartitioner(conf, job.getPartitionerClass());
        job.setPartitionerClass(PipesPartitioner.class);
    }/*from w ww.  j  a v a2 s  .com*/
    if (!getIsJavaReducer(conf)) {
        job.setReducerClass(PipesReducer.class);
        if (!getIsJavaRecordWriter(conf)) {
            job.setOutputFormatClass(NullOutputFormat.class);
        }
    }
    String textClassname = Text.class.getName();
    setIfUnset(conf, MRJobConfig.MAP_OUTPUT_KEY_CLASS, textClassname);
    setIfUnset(conf, MRJobConfig.MAP_OUTPUT_VALUE_CLASS, textClassname);
    setIfUnset(conf, MRJobConfig.OUTPUT_KEY_CLASS, textClassname);
    setIfUnset(conf, MRJobConfig.OUTPUT_VALUE_CLASS, textClassname);

    // Use PipesNonJavaInputFormat if necessary to handle progress reporting
    // from C++ RecordReaders ...
    if (!getIsJavaRecordReader(conf) && !getIsJavaMapper(conf)) {
        conf.setClass(Submitter.INPUT_FORMAT, job.getInputFormatClass(), InputFormat.class);
        job.setInputFormatClass(PipesNonJavaInputFormat.class);
    }

    if (avroInput != null) {
        if (explicitInputFormat) {
            conf.setClass(Submitter.INPUT_FORMAT, job.getInputFormatClass(), InputFormat.class);
        } // else let the bridge fall back to the appropriate Avro IF
        switch (avroInput) {
        case K:
            job.setInputFormatClass(PydoopAvroInputKeyBridge.class);
            break;
        case V:
            job.setInputFormatClass(PydoopAvroInputValueBridge.class);
            break;
        case KV:
            job.setInputFormatClass(PydoopAvroInputKeyValueBridge.class);
            break;
        default:
            throw new IllegalArgumentException("Bad Avro input type");
        }
    }
    if (avroOutput != null) {
        if (explicitOutputFormat) {
            conf.setClass(Submitter.OUTPUT_FORMAT, job.getOutputFormatClass(), OutputFormat.class);
        } // else let the bridge fall back to the appropriate Avro OF
        conf.set(props.getProperty("AVRO_OUTPUT"), avroOutput.name());
        switch (avroOutput) {
        case K:
            job.setOutputFormatClass(PydoopAvroOutputKeyBridge.class);
            break;
        case V:
            job.setOutputFormatClass(PydoopAvroOutputValueBridge.class);
            break;
        case KV:
            job.setOutputFormatClass(PydoopAvroOutputKeyValueBridge.class);
            break;
        default:
            throw new IllegalArgumentException("Bad Avro output type");
        }
    }

    String exec = getExecutable(conf);
    if (exec == null) {
        String msg = "No application program defined.";
        throw new IllegalArgumentException(msg);
    }
    // add default debug script only when executable is expressed as
    // <path>#<executable>
    //FIXME: this is kind of useless if the pipes program is not in c++
    if (exec.contains("#")) {
        // set default gdb commands for map and reduce task
        String defScript = "$HADOOP_PREFIX/src/c++/pipes/debug/pipes-default-script";
        setIfUnset(conf, MRJobConfig.MAP_DEBUG_SCRIPT, defScript);
        setIfUnset(conf, MRJobConfig.REDUCE_DEBUG_SCRIPT, defScript);
    }
    URI[] fileCache = DistributedCache.getCacheFiles(conf);
    if (fileCache == null) {
        fileCache = new URI[1];
    } else {
        URI[] tmp = new URI[fileCache.length + 1];
        System.arraycopy(fileCache, 0, tmp, 1, fileCache.length);
        fileCache = tmp;
    }
    try {
        fileCache[0] = new URI(exec);
    } catch (URISyntaxException e) {
        String msg = "Problem parsing executable URI " + exec;
        IOException ie = new IOException(msg);
        ie.initCause(e);
        throw ie;
    }
    DistributedCache.setCacheFiles(fileCache, conf);
}

From source file:it.crs4.pydoop.pipes.Submitter.java

License:Apache License

private static void setupPipesJob(JobConf conf) throws IOException {
    // default map output types to Text
    if (!getIsJavaMapper(conf)) {
        conf.setMapRunnerClass(PipesMapRunner.class);
        // Save the user's partitioner and hook in our's.
        setJavaPartitioner(conf, conf.getPartitionerClass());
        conf.setPartitionerClass(PipesPartitioner.class);
    }/*from   www  .j  ava  2  s  . co  m*/
    if (!getIsJavaReducer(conf)) {
        conf.setReducerClass(PipesReducer.class);
        if (!getIsJavaRecordWriter(conf)) {
            conf.setOutputFormat(NullOutputFormat.class);
        }
    }
    String textClassname = Text.class.getName();
    setIfUnset(conf, MRJobConfig.MAP_OUTPUT_KEY_CLASS, textClassname);
    setIfUnset(conf, MRJobConfig.MAP_OUTPUT_VALUE_CLASS, textClassname);
    setIfUnset(conf, MRJobConfig.OUTPUT_KEY_CLASS, textClassname);
    setIfUnset(conf, MRJobConfig.OUTPUT_VALUE_CLASS, textClassname);

    // Use PipesNonJavaInputFormat if necessary to handle progress reporting
    // from C++ RecordReaders ...
    if (!getIsJavaRecordReader(conf) && !getIsJavaMapper(conf)) {
        conf.setClass(Submitter.INPUT_FORMAT, conf.getInputFormat().getClass(), InputFormat.class);
        conf.setInputFormat(PipesNonJavaInputFormat.class);
    }

    String exec = getExecutable(conf);
    if (exec == null) {
        throw new IllegalArgumentException("No application program defined.");
    }
    // add default debug script only when executable is expressed as
    // <path>#<executable>
    if (exec.contains("#")) {
        // set default gdb commands for map and reduce task 
        String defScript = "$HADOOP_PREFIX/src/c++/pipes/debug/pipes-default-script";
        setIfUnset(conf, MRJobConfig.MAP_DEBUG_SCRIPT, defScript);
        setIfUnset(conf, MRJobConfig.REDUCE_DEBUG_SCRIPT, defScript);
    }
    URI[] fileCache = DistributedCache.getCacheFiles(conf);
    if (fileCache == null) {
        fileCache = new URI[1];
    } else {
        URI[] tmp = new URI[fileCache.length + 1];
        System.arraycopy(fileCache, 0, tmp, 1, fileCache.length);
        fileCache = tmp;
    }
    try {
        fileCache[0] = new URI(exec);
    } catch (URISyntaxException e) {
        IOException ie = new IOException("Problem parsing execable URI " + exec);
        ie.initCause(e);
        throw ie;
    }
    DistributedCache.setCacheFiles(fileCache, conf);
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOTest.java

License:Apache License

private static Configuration loadTestConfiguration(Class<?> outputFormatClassName, Class<?> keyClass,
        Class<?> valueClass) {
    Configuration conf = new Configuration();
    conf.setClass(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, outputFormatClassName, OutputFormat.class);
    conf.setClass(MRJobConfig.OUTPUT_KEY_CLASS, keyClass, Object.class);
    conf.setClass(MRJobConfig.OUTPUT_VALUE_CLASS, valueClass, Object.class);
    conf.setInt(MRJobConfig.NUM_REDUCES, REDUCERS_COUNT);
    conf.set(MRJobConfig.ID, String.valueOf(1));
    return conf;//from   ww  w.j  a v a  2s  .c  o m
}

From source file:org.shaf.core.io.emulator.RecordWriterFactoryTest.java

License:Apache License

/**
 * Run the record reader factory tests.//from w ww .j a  v  a 2  s. c o  m
 * 
 * @throws Exception
 *             if the test fails for some reason.
 */
@Test
public void testFactory() throws Exception {
    Configuration config = job.getConfiguration();
    config.set(FileOutputFormat.OUTDIR, super.dir.toString());
    config.set(MRJobConfig.OUTPUT_KEY_CLASS, "org.apache.hadoop.io.NullWritable");
    config.set(MRJobConfig.OUTPUT_VALUE_CLASS, "org.apache.hadoop.io.NullWritable");

    assertEquals(TextWriter.class,
            RecordWriterFactory.createRecordWriter(SomeProcess1.class, job.getConfiguration()).getClass());

    assertEquals(SequenceWriter.class,
            RecordWriterFactory.createRecordWriter(SomeProcess2.class, job.getConfiguration()).getClass());
}

From source file:org.shaf.core.io.emulator.SequenceWriter.java

License:Apache License

/**
 * Constructs a new sequence file writer.
 * //from  w  w  w.ja  v  a  2 s  . co m
 * @param config
 *            the writer configuration.
 * @throws IOException
 *             if the writer has failed to initialize.
 */
public SequenceWriter(Configuration config) throws IOException {
    super(config);

    try {
        this.keyClass = Class.forName(config.get(MRJobConfig.OUTPUT_KEY_CLASS));
        this.valueClass = Class.forName(config.get(MRJobConfig.OUTPUT_VALUE_CLASS));

        super.out.writeUTF(this.keyClass.getCanonicalName());
        super.out.writeUTF(this.valueClass.getCanonicalName());

    } catch (ClassNotFoundException exc) {
        throw new IOException(exc);
    }
}

Example usage for org.apache.hadoop.mapreduce MRJobConfig OUTPUT_KEY_CLASS

Introduction

Prototype

Usage