Example usage for org.apache.hadoop.mapreduce OutputFormat getRecordWriter

List of usage examples for org.apache.hadoop.mapreduce OutputFormat getRecordWriter

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce OutputFormat getRecordWriter.

Prototype

public abstract RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException;

Source Link

Document

Get the RecordWriter for the given task.

Usage

From source file:org.gridgain.grid.kernal.processors.hadoop.v2.GridHadoopV2Task.java

License:Open Source License

/**
 * Put write into Hadoop context and return associated output format instance.
 *
 * @param jobCtx Job context./*from  w w  w.j a v a  2 s  .  c  o  m*/
 * @return Output format.
 * @throws GridException In case of Grid exception.
 * @throws InterruptedException In case of interrupt.
 */
protected OutputFormat prepareWriter(JobContext jobCtx) throws GridException, InterruptedException {
    try {
        OutputFormat outputFormat = getOutputFormat(jobCtx);

        assert outputFormat != null;

        OutputCommitter outCommitter = outputFormat.getOutputCommitter(hadoopCtx);

        if (outCommitter != null)
            outCommitter.setupTask(hadoopCtx);

        RecordWriter writer = outputFormat.getRecordWriter(hadoopCtx);

        hadoopCtx.writer(writer);

        return outputFormat;
    } catch (IOException | ClassNotFoundException e) {
        throw new GridException(e);
    }
}

From source file:org.mrgeo.hadoop.multipleoutputs.DirectoryMultipleOutputs.java

License:Apache License

@SuppressWarnings("rawtypes")
private synchronized RecordWriter getRecordWriter(final TaskAttemptContext taskContext,
        final String baseFileName) throws IOException, InterruptedException {

    // look for record-writer in the cache
    RecordWriter writer = recordWriters.get(baseFileName);

    // If not in cache, create a new one
    if (writer == null) {
        // in MultipleOutputs, the following commented out line of code was used here
        ///*  w  ww .j  av  a2 s .  c o  m*/
        // FileOutputFormat.setOutputName(taskContext, baseFileName);
        //
        // we can't do that because this method has package visibility but we can do something
        // even worse and inline that code
        //

        // this makes the output file have the same prefix as the directory, instead of the default
        // "part".
        //taskContext.getConfiguration().set(BASE_OUTPUT_NAME, baseFileName);

        try {
            Configuration conf = taskContext.getConfiguration();

            Class<? extends OutputFormat<?, ?>> format = taskContext.getOutputFormatClass();
            OutputFormat of = ReflectionUtils.newInstance(format, conf);

            writer = of.getRecordWriter(taskContext);
        } catch (final ClassNotFoundException e) {
            throw new IOException(e);
        }

        // if counters are enabled, wrap the writer with context
        // to increment counters
        if (countersEnabled) {
            writer = new RecordWriterWithCounter(writer, baseFileName, context);
        }

        // add the record-writer to the cache
        recordWriters.put(baseFileName, writer);
    }
    return writer;
}

From source file:org.tensorflow.hadoop.io.TFRecordFileTest.java

License:Open Source License

@Test
public void testInputOutputFormat() throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);/*from   w w w.  j  a  v  a 2  s  .c o  m*/

    Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "tfr-test");

    TFRecordFileOutputFormat.setOutputPath(job, outdir);

    TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
    OutputFormat<BytesWritable, NullWritable> outputFormat = new TFRecordFileOutputFormat();
    OutputCommitter committer = outputFormat.getOutputCommitter(context);
    committer.setupJob(job);
    RecordWriter<BytesWritable, NullWritable> writer = outputFormat.getRecordWriter(context);

    // Write Example with random numbers
    Random rand = new Random();
    Map<Long, Long> records = new TreeMap<Long, Long>();
    try {
        for (int i = 0; i < RECORDS; ++i) {
            long randValue = rand.nextLong();
            records.put((long) i, randValue);
            Int64List data = Int64List.newBuilder().addValue(i).addValue(randValue).build();
            Feature feature = Feature.newBuilder().setInt64List(data).build();
            Features features = Features.newBuilder().putFeature("data", feature).build();
            Example example = Example.newBuilder().setFeatures(features).build();
            BytesWritable key = new BytesWritable(example.toByteArray());
            writer.write(key, NullWritable.get());
        }
    } finally {
        writer.close(context);
    }
    committer.commitTask(context);
    committer.commitJob(job);

    // Read and compare
    TFRecordFileInputFormat.setInputPaths(job, outdir);
    InputFormat<BytesWritable, NullWritable> inputFormat = new TFRecordFileInputFormat();
    for (InputSplit split : inputFormat.getSplits(job)) {
        RecordReader<BytesWritable, NullWritable> reader = inputFormat.createRecordReader(split, context);
        MapContext<BytesWritable, NullWritable, BytesWritable, NullWritable> mcontext = new MapContextImpl<BytesWritable, NullWritable, BytesWritable, NullWritable>(
                job.getConfiguration(), context.getTaskAttemptID(), reader, null, null,
                MapReduceTestUtil.createDummyReporter(), split);
        reader.initialize(split, mcontext);
        try {
            while (reader.nextKeyValue()) {
                BytesWritable bytes = reader.getCurrentKey();
                Example example = Example.parseFrom(bytes.getBytes());
                Int64List data = example.getFeatures().getFeatureMap().get("data").getInt64List();
                Long key = data.getValue(0);
                Long value = data.getValue(1);
                assertEquals(records.get(key), value);
                records.remove(key);
            }
        } finally {
            reader.close();
        }
    }
    assertEquals(0, records.size());
}