List of usage examples for org.apache.hadoop.mapreduce OutputFormat getRecordWriter
public abstract RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException;
From source file:org.gridgain.grid.kernal.processors.hadoop.v2.GridHadoopV2Task.java
License:Open Source License
/** * Put write into Hadoop context and return associated output format instance. * * @param jobCtx Job context./*from w w w.j a v a 2 s . c o m*/ * @return Output format. * @throws GridException In case of Grid exception. * @throws InterruptedException In case of interrupt. */ protected OutputFormat prepareWriter(JobContext jobCtx) throws GridException, InterruptedException { try { OutputFormat outputFormat = getOutputFormat(jobCtx); assert outputFormat != null; OutputCommitter outCommitter = outputFormat.getOutputCommitter(hadoopCtx); if (outCommitter != null) outCommitter.setupTask(hadoopCtx); RecordWriter writer = outputFormat.getRecordWriter(hadoopCtx); hadoopCtx.writer(writer); return outputFormat; } catch (IOException | ClassNotFoundException e) { throw new GridException(e); } }
From source file:org.mrgeo.hadoop.multipleoutputs.DirectoryMultipleOutputs.java
License:Apache License
@SuppressWarnings("rawtypes") private synchronized RecordWriter getRecordWriter(final TaskAttemptContext taskContext, final String baseFileName) throws IOException, InterruptedException { // look for record-writer in the cache RecordWriter writer = recordWriters.get(baseFileName); // If not in cache, create a new one if (writer == null) { // in MultipleOutputs, the following commented out line of code was used here ///* w ww .j av a2 s . c o m*/ // FileOutputFormat.setOutputName(taskContext, baseFileName); // // we can't do that because this method has package visibility but we can do something // even worse and inline that code // // this makes the output file have the same prefix as the directory, instead of the default // "part". //taskContext.getConfiguration().set(BASE_OUTPUT_NAME, baseFileName); try { Configuration conf = taskContext.getConfiguration(); Class<? extends OutputFormat<?, ?>> format = taskContext.getOutputFormatClass(); OutputFormat of = ReflectionUtils.newInstance(format, conf); writer = of.getRecordWriter(taskContext); } catch (final ClassNotFoundException e) { throw new IOException(e); } // if counters are enabled, wrap the writer with context // to increment counters if (countersEnabled) { writer = new RecordWriterWithCounter(writer, baseFileName, context); } // add the record-writer to the cache recordWriters.put(baseFileName, writer); } return writer; }
From source file:org.tensorflow.hadoop.io.TFRecordFileTest.java
License:Open Source License
@Test public void testInputOutputFormat() throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf);/*from w w w. j a v a 2 s .c o m*/ Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "tfr-test"); TFRecordFileOutputFormat.setOutputPath(job, outdir); TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration()); OutputFormat<BytesWritable, NullWritable> outputFormat = new TFRecordFileOutputFormat(); OutputCommitter committer = outputFormat.getOutputCommitter(context); committer.setupJob(job); RecordWriter<BytesWritable, NullWritable> writer = outputFormat.getRecordWriter(context); // Write Example with random numbers Random rand = new Random(); Map<Long, Long> records = new TreeMap<Long, Long>(); try { for (int i = 0; i < RECORDS; ++i) { long randValue = rand.nextLong(); records.put((long) i, randValue); Int64List data = Int64List.newBuilder().addValue(i).addValue(randValue).build(); Feature feature = Feature.newBuilder().setInt64List(data).build(); Features features = Features.newBuilder().putFeature("data", feature).build(); Example example = Example.newBuilder().setFeatures(features).build(); BytesWritable key = new BytesWritable(example.toByteArray()); writer.write(key, NullWritable.get()); } } finally { writer.close(context); } committer.commitTask(context); committer.commitJob(job); // Read and compare TFRecordFileInputFormat.setInputPaths(job, outdir); InputFormat<BytesWritable, NullWritable> inputFormat = new TFRecordFileInputFormat(); for (InputSplit split : inputFormat.getSplits(job)) { RecordReader<BytesWritable, NullWritable> reader = inputFormat.createRecordReader(split, context); MapContext<BytesWritable, NullWritable, BytesWritable, NullWritable> mcontext = new MapContextImpl<BytesWritable, NullWritable, BytesWritable, NullWritable>( job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mcontext); try { while (reader.nextKeyValue()) { BytesWritable bytes = reader.getCurrentKey(); Example example = Example.parseFrom(bytes.getBytes()); Int64List data = example.getFeatures().getFeatureMap().get("data").getInt64List(); Long key = data.getValue(0); Long value = data.getValue(1); assertEquals(records.get(key), value); records.remove(key); } } finally { reader.close(); } } assertEquals(0, records.size()); }