List of usage examples for org.apache.hadoop.mapreduce RecordWriter write
public abstract void write(K key, V value) throws IOException, InterruptedException;
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputFormat.java
License:Apache License
/** * Create a composite record writer that can write key/value data to different output files. * * @return a composite record writer// w w w. ja va2s .co m * @throws IOException */ @Override public RecordWriter<K, V> getRecordWriter(final TaskAttemptContext job) throws IOException { final String outputName = FileOutputFormat.getOutputName(job); Configuration configuration = job.getConfiguration(); Class<? extends DynamicPartitioner> partitionerClass = configuration.getClass( PartitionedFileSetArguments.DYNAMIC_PARTITIONER_CLASS_NAME, null, DynamicPartitioner.class); @SuppressWarnings("unchecked") final DynamicPartitioner<K, V> dynamicPartitioner = new InstantiatorFactory(false) .get(TypeToken.of(partitionerClass)).create(); MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration); final BasicMapReduceTaskContext<K, V> taskContext = classLoader.getTaskContextProvider().get(job); String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET); PartitionedFileSet outputDataset = taskContext.getDataset(outputDatasetName); final Partitioning partitioning = outputDataset.getPartitioning(); dynamicPartitioner.initialize(taskContext); return new RecordWriter<K, V>() { // a cache storing the record writers for different output files. Map<PartitionKey, RecordWriter<K, V>> recordWriters = new HashMap<>(); public void write(K key, V value) throws IOException, InterruptedException { PartitionKey partitionKey = dynamicPartitioner.getPartitionKey(key, value); RecordWriter<K, V> rw = this.recordWriters.get(partitionKey); if (rw == null) { String relativePath = PartitionedFileSetDataset.getOutputPath(partitionKey, partitioning); String finalPath = relativePath + "/" + outputName; // if we don't have the record writer yet for the final path, create one and add it to the cache rw = getBaseRecordWriter(getTaskAttemptContext(job, finalPath)); this.recordWriters.put(partitionKey, rw); } rw.write(key, value); } @Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { try { List<RecordWriter<?, ?>> recordWriters = new ArrayList<>(); recordWriters.addAll(this.recordWriters.values()); MultipleOutputs.closeRecordWriters(recordWriters, context); taskContext.flushOperations(); } catch (Exception e) { throw new IOException(e); } finally { dynamicPartitioner.destroy(); } } }; }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.MultiWriter.java
License:Apache License
public void write(K key, V value) throws IOException, InterruptedException { PartitionKey partitionKey = dynamicPartitioner.getPartitionKey(key, value); RecordWriter<K, V> rw = this.recordWriters.get(partitionKey); if (rw == null) { // if we don't have the record writer yet for the final path, create one and add it to the cache TaskAttemptContext taskAttemptContext = getKeySpecificContext(partitionKey); rw = getBaseRecordWriter(taskAttemptContext); this.recordWriters.put(partitionKey, rw); this.contexts.put(partitionKey, taskAttemptContext); }/*from ww w . ja v a 2 s. c o m*/ rw.write(key, value); }
From source file:com.facebook.hiveio.output.OutputCmd.java
License:Apache License
/** * Write output/*from ww w. j ava2 s .c om*/ * * @param context Context * @throws Exception */ public void write(Context context) throws Exception { PerThread threadLocal = context.perThread.get(); HiveApiOutputCommitter outputCommitter = context.outputFormat.getOutputCommitter(threadLocal.taskContext()); outputCommitter.setupTask(threadLocal.taskContext()); RecordWriter<WritableComparable, HiveWritableRecord> recordWriter = context.outputFormat .getRecordWriter(threadLocal.taskContext()); HiveWritableRecord record = HiveRecordFactory.newWritableRecord(context.schema); // TODO: allow type promotions: see https://github.com/facebook/hive-io-experimental/issues/15 record.set(0, 11L); record.set(1, 22.22); record.set(2, true); record.set(3, "foo"); recordWriter.write(NullWritable.get(), record); record.set(0, 33L); record.set(1, 44.44); record.set(2, false); record.set(3, "bar"); recordWriter.write(NullWritable.get(), record); recordWriter.close(threadLocal.taskContext()); if (outputCommitter.needsTaskCommit(threadLocal.taskContext())) { outputCommitter.commitTask(threadLocal.taskContext()); } }
From source file:com.github.dryangkun.hbase.tidx.hive.HiveHFileOutputFormat.java
License:Apache License
@Override public RecordWriter getHiveRecordWriter(final JobConf jc, final Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, final Progressable progressable) throws IOException { // Read configuration for the target path, first from jobconf, then from table properties String hfilePath = getFamilyPath(jc, tableProperties); if (hfilePath == null) { throw new RuntimeException("Please set " + HFILE_FAMILY_PATH + " to target location for HFiles"); }//from w ww .j ava 2 s . com // Target path's last component is also the column family name. final Path columnFamilyPath = new Path(hfilePath); final String columnFamilyName = columnFamilyPath.getName(); final byte[] columnFamilyNameBytes = Bytes.toBytes(columnFamilyName); final Job job = new Job(jc); setCompressOutput(job, isCompressed); setOutputPath(job, finalOutPath); // Create the HFile writer final org.apache.hadoop.mapreduce.TaskAttemptContext tac = ShimLoader.getHadoopShims() .newTaskAttemptContext(job.getConfiguration(), progressable); final Path outputdir = FileOutputFormat.getOutputPath(tac); final org.apache.hadoop.mapreduce.RecordWriter<ImmutableBytesWritable, KeyValue> fileWriter = getFileWriter( tac); // Individual columns are going to be pivoted to HBase cells, // and for each row, they need to be written out in order // of column name, so sort the column names now, creating a // mapping to their column position. However, the first // column is interpreted as the row key. String columnList = tableProperties.getProperty("columns"); String[] columnArray = columnList.split(","); final SortedMap<byte[], Integer> columnMap = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR); int i = 0; for (String columnName : columnArray) { if (i != 0) { columnMap.put(Bytes.toBytes(columnName), i); } ++i; } return new RecordWriter() { @Override public void close(boolean abort) throws IOException { try { fileWriter.close(null); if (abort) { return; } // Move the hfiles file(s) from the task output directory to the // location specified by the user. FileSystem fs = outputdir.getFileSystem(jc); fs.mkdirs(columnFamilyPath); Path srcDir = outputdir; for (;;) { FileStatus[] files = fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER); if ((files == null) || (files.length == 0)) { throw new IOException("No family directories found in " + srcDir); } if (files.length != 1) { throw new IOException("Multiple family directories found in " + srcDir); } srcDir = files[0].getPath(); if (srcDir.getName().equals(columnFamilyName)) { break; } } for (FileStatus regionFile : fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER)) { fs.rename(regionFile.getPath(), new Path(columnFamilyPath, regionFile.getPath().getName())); } // Hive actually wants a file as task output (not a directory), so // replace the empty directory with an empty file to keep it happy. fs.delete(outputdir, true); fs.createNewFile(outputdir); } catch (InterruptedException ex) { throw new IOException(ex); } } private void writeText(Text text) throws IOException { // Decompose the incoming text row into fields. String s = text.toString(); String[] fields = s.split("\u0001"); assert (fields.length <= (columnMap.size() + 1)); // First field is the row key. byte[] rowKeyBytes = Bytes.toBytes(fields[0]); // Remaining fields are cells addressed by column name within row. for (Map.Entry<byte[], Integer> entry : columnMap.entrySet()) { byte[] columnNameBytes = entry.getKey(); int iColumn = entry.getValue(); String val; if (iColumn >= fields.length) { // trailing blank field val = ""; } else { val = fields[iColumn]; if ("\\N".equals(val)) { // omit nulls continue; } } byte[] valBytes = Bytes.toBytes(val); KeyValue kv = new KeyValue(rowKeyBytes, columnFamilyNameBytes, columnNameBytes, valBytes); try { fileWriter.write(null, kv); } catch (IOException e) { LOG.error("Failed while writing row: " + s); throw e; } catch (InterruptedException ex) { throw new IOException(ex); } } } private void writePut(PutWritable put) throws IOException { ImmutableBytesWritable row = new ImmutableBytesWritable(put.getPut().getRow()); SortedMap<byte[], List<Cell>> cells = put.getPut().getFamilyCellMap(); for (Map.Entry<byte[], List<Cell>> entry : cells.entrySet()) { Collections.sort(entry.getValue(), new CellComparator()); for (Cell c : entry.getValue()) { try { fileWriter.write(row, KeyValueUtil.copyToNewKeyValue(c)); } catch (InterruptedException e) { throw (InterruptedIOException) new InterruptedIOException().initCause(e); } } } } @Override public void write(Writable w) throws IOException { if (w instanceof Text) { writeText((Text) w); } else if (w instanceof PutWritable) { writePut((PutWritable) w); } else { throw new IOException("Unexpected writable " + w); } } }; }
From source file:com.hadoop.mapreduce.TestLzoTextInputFormat.java
License:Open Source License
/** * Creates an lzo file with random data. * //from www. j a v a2 s .com * @param outputDir Output directory. * @param fs File system we're using. * @param attemptContext Task attempt context, contains task id etc. * @throws IOException * @throws InterruptedException */ private byte[] createTestInput(Path outputDir, FileSystem fs, TaskAttemptContext attemptContext, int charsToOutput) throws IOException, InterruptedException { TextOutputFormat<Text, Text> output = new TextOutputFormat<Text, Text>(); RecordWriter<Text, Text> rw = null; md5.reset(); try { rw = output.getRecordWriter(attemptContext); char[] chars = "abcdefghijklmnopqrstuvwxyz\u00E5\u00E4\u00F6".toCharArray(); Random r = new Random(System.currentTimeMillis()); Text key = new Text(); Text value = new Text(); int charsMax = chars.length - 1; for (int i = 0; i < charsToOutput;) { i += fillText(chars, r, charsMax, key); i += fillText(chars, r, charsMax, value); rw.write(key, value); md5.update(key.getBytes(), 0, key.getLength()); // text output format writes tab between the key and value md5.update("\t".getBytes("UTF-8")); md5.update(value.getBytes(), 0, value.getLength()); } } finally { if (rw != null) { rw.close(attemptContext); OutputCommitter committer = output.getOutputCommitter(attemptContext); committer.commitTask(attemptContext); committer.cleanupJob(attemptContext); } } byte[] result = md5.digest(); md5.reset(); return result; }
From source file:com.linkedin.pinot.hadoop.io.PinotOutputFormatTest.java
License:Apache License
private Map<Integer, Emp> addTestData() throws IOException, InterruptedException { int days = 2000; int sal = 20; RecordWriter<Object, Emp> writer = outputFormat.getRecordWriter(fakeTaskAttemptContext); Map<Integer, Emp> inputMap = new HashMap<>(); for (int i = 0; i < 10; i++) { String name = "name " + i; Emp e = new Emp(i, name, days + i, sal + i); writer.write(null, e); inputMap.put(i, e);//www . j a v a 2s .c om } writer.close(fakeTaskAttemptContext); return inputMap; }
From source file:com.metamx.milano.hadoop.MilanoProtoFileOutputFormatTests.java
License:Apache License
@Test public void testBuildAndReadProtoFile() throws Exception { MilanoProtoFileOutputFormat outputFormat = new MilanoProtoFileOutputFormat(); MilanoTypeMetadata.TypeMetadata.Builder metadata = MilanoTool .with(Testing.TestItem.getDescriptor().getName(), Testing.getDescriptor()).getMetadata() .toBuilder();/*from w w w . j av a 2s . co m*/ metadata.addFileMetadata(MilanoTypeMetadata.FileMetadata.newBuilder().setKey("Key 1") .setValue(ByteString.copyFromUtf8("Value 1"))); metadata.addFileMetadata(MilanoTypeMetadata.FileMetadata.newBuilder().setKey("Key 2") .setValue(ByteString.copyFromUtf8("Value 2"))); outputFormat.setMetadata(metadata.build()); TaskAttemptContext context = protoTestObjects.getContext(); Configuration conf = context.getConfiguration(); @SuppressWarnings("unchecked") RecordWriter<String, Message> writer = outputFormat.getRecordWriter(context); for (int i = 0; i < protoTestObjects.getTestItems().size(); i++) { writer.write("dummy", protoTestObjects.getTestItem(i)); } writer.close(protoTestObjects.getContext()); }
From source file:com.moz.fiji.mapreduce.output.TestFijiHFileOutputFormat.java
License:Apache License
@Test public void testMaxHFileSizeSameRow() throws Exception { final HFileKeyValue entry1 = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); final HFileKeyValue entry2 = entry("row-key", mDefaultLGId, "b", 1L, makeBytes(0, 1024)); mConf.setInt(FijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1); final TaskAttemptID taskAttemptId = FijiMRPlatformBridge.get().newTaskAttemptID("jobTracker_jtPort", 314, TaskType.MAP, 159, 2);/*from w w w . j av a 2 s.c om*/ final TaskAttemptContext context = FijiMRPlatformBridge.get().newTaskAttemptContext(mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, FijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); writer.write(entry1, NW); writer.write(entry2, NW); writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString()); assertTrue(!fs.exists(inMemoryDir)); assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue(), entry2.getKeyValue()); assertFalse(fs.exists(new Path(defaultDir, "00001"))); mFormat.getOutputCommitter(context).commitTask(context); }
From source file:com.moz.fiji.mapreduce.output.TestFijiHFileOutputFormat.java
License:Apache License
@Test public void testMaxHFileSizeNewRow() throws Exception { final HFileKeyValue entry1 = entry("row-key1", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); final HFileKeyValue entry2 = entry("row-key2", mDefaultLGId, "b", 1L, makeBytes(0, 1024)); mConf.setInt(FijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1); final TaskAttemptID taskAttemptId = FijiMRPlatformBridge.get().newTaskAttemptID("jobTracker_jtPort", 314, TaskType.MAP, 159, 2);//from w ww . jav a 2 s .c om final TaskAttemptContext context = FijiMRPlatformBridge.get().newTaskAttemptContext(mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, FijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); writer.write(entry1, NW); writer.write(entry2, NW); writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString()); assertFalse(fs.exists(inMemoryDir)); assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue()); assertHFileContent(new Path(defaultDir, "00001"), entry2.getKeyValue()); assertFalse(fs.exists(new Path(defaultDir, "00002"))); mFormat.getOutputCommitter(context).commitTask(context); }
From source file:com.moz.fiji.mapreduce.output.TestFijiHFileOutputFormat.java
License:Apache License
@Test public void testMultipleLayouts() throws Exception { final TaskAttemptID taskAttemptId = FijiMRPlatformBridge.get().newTaskAttemptID("jobTracker_jtPort", 314, TaskType.MAP, 159, 2);/*from w ww . j a v a2 s .c om*/ final TaskAttemptContext context = FijiMRPlatformBridge.get().newTaskAttemptContext(mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, FijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); final HFileKeyValue defaultEntry = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); writer.write(defaultEntry, NW); final HFileKeyValue inMemoryEntry = entry("row-key", mInMemoryLGId, "a", 1L, makeBytes(2, 1024)); writer.write(inMemoryEntry, NW); try { // Test with an invalid locality group ID: final ColumnId invalid = new ColumnId(1234); assertTrue(!mLayout.getLocalityGroupIdNameMap().containsKey(invalid)); writer.write(entry("row-key", invalid, "a", 1L, HConstants.EMPTY_BYTE_ARRAY), NW); fail("Output format did not fail on unknown locality group IDs."); } catch (IllegalArgumentException iae) { LOG.info("Expected error: " + iae); } writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString()); assertTrue(fs.exists(inMemoryDir)); assertHFileContent(new Path(defaultDir, "00000"), defaultEntry.getKeyValue()); assertHFileContent(new Path(inMemoryDir, "00000"), inMemoryEntry.getKeyValue()); mFormat.getOutputCommitter(context).commitTask(context); }