Example usage for org.apache.hadoop.mapreduce OutputFormat getRecordWriter

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce OutputFormat getRecordWriter.

Prototype

public abstract RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException;

Source Link

Document

Get the RecordWriter for the given task.

Usage

From source file:org.gridgain.grid.kernal.processors.hadoop.v2.GridHadoopV2Task.java

License:Open Source License

/**
 * Put write into Hadoop context and return associated output format instance.
 *
 * @param jobCtx Job context./*from  w w  w.j a v a  2 s  .  c  o  m*/
 * @return Output format.
 * @throws GridException In case of Grid exception.
 * @throws InterruptedException In case of interrupt.
 */
protected OutputFormat prepareWriter(JobContext jobCtx) throws GridException, InterruptedException {
    try {
        OutputFormat outputFormat = getOutputFormat(jobCtx);

        assert outputFormat != null;

        OutputCommitter outCommitter = outputFormat.getOutputCommitter(hadoopCtx);

        if (outCommitter != null)
            outCommitter.setupTask(hadoopCtx);

        RecordWriter writer = outputFormat.getRecordWriter(hadoopCtx);

        hadoopCtx.writer(writer);

        return outputFormat;
    } catch (IOException | ClassNotFoundException e) {
        throw new GridException(e);
    }
}

From source file:org.mrgeo.hadoop.multipleoutputs.DirectoryMultipleOutputs.java

License:Apache License

@SuppressWarnings("rawtypes")
private synchronized RecordWriter getRecordWriter(final TaskAttemptContext taskContext,
        final String baseFileName) throws IOException, InterruptedException {

    // look for record-writer in the cache
    RecordWriter writer = recordWriters.get(baseFileName);

    // If not in cache, create a new one
    if (writer == null) {
        // in MultipleOutputs, the following commented out line of code was used here
        ///*  w  ww .j  av  a2 s .  c o  m*/
        // FileOutputFormat.setOutputName(taskContext, baseFileName);
        //
        // we can't do that because this method has package visibility but we can do something
        // even worse and inline that code
        //

        // this makes the output file have the same prefix as the directory, instead of the default
        // "part".
        //taskContext.getConfiguration().set(BASE_OUTPUT_NAME, baseFileName);

        try {
            Configuration conf = taskContext.getConfiguration();

            Class<? extends OutputFormat<?, ?>> format = taskContext.getOutputFormatClass();
            OutputFormat of = ReflectionUtils.newInstance(format, conf);

            writer = of.getRecordWriter(taskContext);
        } catch (final ClassNotFoundException e) {
            throw new IOException(e);
        }

        // if counters are enabled, wrap the writer with context
        // to increment counters
        if (countersEnabled) {
            writer = new RecordWriterWithCounter(writer, baseFileName, context);
        }

        // add the record-writer to the cache
        recordWriters.put(baseFileName, writer);
    }
    return writer;
}

From source file:org.tensorflow.hadoop.io.TFRecordFileTest.java

License:Open Source License

@Test
public void testInputOutputFormat() throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);/*from   w w w.  j  a  v  a 2  s  .c o  m*/

    Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "tfr-test");

    TFRecordFileOutputFormat.setOutputPath(job, outdir);

    TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
    OutputFormat<BytesWritable, NullWritable> outputFormat = new TFRecordFileOutputFormat();
    OutputCommitter committer = outputFormat.getOutputCommitter(context);
    committer.setupJob(job);
    RecordWriter<BytesWritable, NullWritable> writer = outputFormat.getRecordWriter(context);

    // Write Example with random numbers
    Random rand = new Random();
    Map<Long, Long> records = new TreeMap<Long, Long>();
    try {
        for (int i = 0; i < RECORDS; ++i) {
            long randValue = rand.nextLong();
            records.put((long) i, randValue);
            Int64List data = Int64List.newBuilder().addValue(i).addValue(randValue).build();
            Feature feature = Feature.newBuilder().setInt64List(data).build();
            Features features = Features.newBuilder().putFeature("data", feature).build();
            Example example = Example.newBuilder().setFeatures(features).build();
            BytesWritable key = new BytesWritable(example.toByteArray());
            writer.write(key, NullWritable.get());
        }
    } finally {
        writer.close(context);
    }
    committer.commitTask(context);
    committer.commitJob(job);

    // Read and compare
    TFRecordFileInputFormat.setInputPaths(job, outdir);
    InputFormat<BytesWritable, NullWritable> inputFormat = new TFRecordFileInputFormat();
    for (InputSplit split : inputFormat.getSplits(job)) {
        RecordReader<BytesWritable, NullWritable> reader = inputFormat.createRecordReader(split, context);
        MapContext<BytesWritable, NullWritable, BytesWritable, NullWritable> mcontext = new MapContextImpl<BytesWritable, NullWritable, BytesWritable, NullWritable>(
                job.getConfiguration(), context.getTaskAttemptID(), reader, null, null,
                MapReduceTestUtil.createDummyReporter(), split);
        reader.initialize(split, mcontext);
        try {
            while (reader.nextKeyValue()) {
                BytesWritable bytes = reader.getCurrentKey();
                Example example = Example.parseFrom(bytes.getBytes());
                Int64List data = example.getFeatures().getFeatureMap().get("data").getInt64List();
                Long key = data.getValue(0);
                Long value = data.getValue(1);
                assertEquals(records.get(key), value);
                records.remove(key);
            }
        } finally {
            reader.close();
        }
    }
    assertEquals(0, records.size());
}