List of usage examples for org.apache.hadoop.mapreduce RecordWriter close
public abstract void close(TaskAttemptContext context) throws IOException, InterruptedException;
RecordWriter
to future operations. From source file:com.metamx.milano.hadoop.MilanoProtoFileOutputFormatTests.java
License:Apache License
@Test public void testBuildAndReadProtoFile() throws Exception { MilanoProtoFileOutputFormat outputFormat = new MilanoProtoFileOutputFormat(); MilanoTypeMetadata.TypeMetadata.Builder metadata = MilanoTool .with(Testing.TestItem.getDescriptor().getName(), Testing.getDescriptor()).getMetadata() .toBuilder();/*from w w w . j ava2 s . c o m*/ metadata.addFileMetadata(MilanoTypeMetadata.FileMetadata.newBuilder().setKey("Key 1") .setValue(ByteString.copyFromUtf8("Value 1"))); metadata.addFileMetadata(MilanoTypeMetadata.FileMetadata.newBuilder().setKey("Key 2") .setValue(ByteString.copyFromUtf8("Value 2"))); outputFormat.setMetadata(metadata.build()); TaskAttemptContext context = protoTestObjects.getContext(); Configuration conf = context.getConfiguration(); @SuppressWarnings("unchecked") RecordWriter<String, Message> writer = outputFormat.getRecordWriter(context); for (int i = 0; i < protoTestObjects.getTestItems().size(); i++) { writer.write("dummy", protoTestObjects.getTestItem(i)); } writer.close(protoTestObjects.getContext()); }
From source file:com.moz.fiji.mapreduce.output.TestFijiHFileOutputFormat.java
License:Apache License
@Test public void testMaxHFileSizeSameRow() throws Exception { final HFileKeyValue entry1 = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); final HFileKeyValue entry2 = entry("row-key", mDefaultLGId, "b", 1L, makeBytes(0, 1024)); mConf.setInt(FijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1); final TaskAttemptID taskAttemptId = FijiMRPlatformBridge.get().newTaskAttemptID("jobTracker_jtPort", 314, TaskType.MAP, 159, 2);// w w w .j a v a 2 s . c o m final TaskAttemptContext context = FijiMRPlatformBridge.get().newTaskAttemptContext(mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, FijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); writer.write(entry1, NW); writer.write(entry2, NW); writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString()); assertTrue(!fs.exists(inMemoryDir)); assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue(), entry2.getKeyValue()); assertFalse(fs.exists(new Path(defaultDir, "00001"))); mFormat.getOutputCommitter(context).commitTask(context); }
From source file:com.moz.fiji.mapreduce.output.TestFijiHFileOutputFormat.java
License:Apache License
@Test public void testMaxHFileSizeNewRow() throws Exception { final HFileKeyValue entry1 = entry("row-key1", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); final HFileKeyValue entry2 = entry("row-key2", mDefaultLGId, "b", 1L, makeBytes(0, 1024)); mConf.setInt(FijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1); final TaskAttemptID taskAttemptId = FijiMRPlatformBridge.get().newTaskAttemptID("jobTracker_jtPort", 314, TaskType.MAP, 159, 2);// w w w . ja va 2 s. c o m final TaskAttemptContext context = FijiMRPlatformBridge.get().newTaskAttemptContext(mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, FijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); writer.write(entry1, NW); writer.write(entry2, NW); writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString()); assertFalse(fs.exists(inMemoryDir)); assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue()); assertHFileContent(new Path(defaultDir, "00001"), entry2.getKeyValue()); assertFalse(fs.exists(new Path(defaultDir, "00002"))); mFormat.getOutputCommitter(context).commitTask(context); }
From source file:com.moz.fiji.mapreduce.output.TestFijiHFileOutputFormat.java
License:Apache License
@Test public void testMultipleLayouts() throws Exception { final TaskAttemptID taskAttemptId = FijiMRPlatformBridge.get().newTaskAttemptID("jobTracker_jtPort", 314, TaskType.MAP, 159, 2);/*w w w . j a va2s. c o m*/ final TaskAttemptContext context = FijiMRPlatformBridge.get().newTaskAttemptContext(mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, FijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); final HFileKeyValue defaultEntry = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); writer.write(defaultEntry, NW); final HFileKeyValue inMemoryEntry = entry("row-key", mInMemoryLGId, "a", 1L, makeBytes(2, 1024)); writer.write(inMemoryEntry, NW); try { // Test with an invalid locality group ID: final ColumnId invalid = new ColumnId(1234); assertTrue(!mLayout.getLocalityGroupIdNameMap().containsKey(invalid)); writer.write(entry("row-key", invalid, "a", 1L, HConstants.EMPTY_BYTE_ARRAY), NW); fail("Output format did not fail on unknown locality group IDs."); } catch (IllegalArgumentException iae) { LOG.info("Expected error: " + iae); } writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString()); assertTrue(fs.exists(inMemoryDir)); assertHFileContent(new Path(defaultDir, "00000"), defaultEntry.getKeyValue()); assertHFileContent(new Path(inMemoryDir, "00000"), inMemoryEntry.getKeyValue()); mFormat.getOutputCommitter(context).commitTask(context); }
From source file:com.moz.fiji.mapreduce.output.TestFijiHFileOutputFormat.java
License:Apache License
@Test public void testTombstonesInHFile() throws Exception { final HFileKeyValue put = entry("row-key1", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); final HFileKeyValue deleteCell = entry("row-key2", mDefaultLGId, "a", 1L, HFileKeyValue.Type.DeleteCell); final HFileKeyValue deleteColumn = entry("row-key3", mDefaultLGId, "a", 1L, HFileKeyValue.Type.DeleteColumn); final HFileKeyValue deleteFamily = entry("row-key4", mDefaultLGId, "a", 1L, HFileKeyValue.Type.DeleteFamily); final TaskAttemptID taskAttemptId = FijiMRPlatformBridge.get().newTaskAttemptID("jobTracker_jtPort", 314, TaskType.MAP, 159, 2);/*from w w w.ja va 2 s . c o m*/ final TaskAttemptContext context = FijiMRPlatformBridge.get().newTaskAttemptContext(mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, FijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); writer.write(put, NW); writer.write(deleteCell, NW); writer.write(deleteColumn, NW); writer.write(deleteFamily, NW); writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); assertHFileContent(new Path(defaultDir, "00000"), put.getKeyValue(), deleteCell.getKeyValue(), deleteColumn.getKeyValue(), deleteFamily.getKeyValue()); assertFalse(fs.exists(new Path(defaultDir, "00001"))); mFormat.getOutputCommitter(context).commitTask(context); }
From source file:com.pinterest.terrapin.hadoop.HFileRecordWriterTest.java
License:Apache License
@Test public void testWrite() throws Exception { Configuration conf = new Configuration(); HColumnDescriptor columnDescriptor = new HColumnDescriptor(); // Disable block cache to ensure it reads the actual file content. columnDescriptor.setBlockCacheEnabled(false); FileSystem fs = FileSystem.get(conf); int blockSize = conf.getInt(Constants.HFILE_BLOCKSIZE, 16384); final StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, new CacheConfig(conf, columnDescriptor), fs, blockSize).withFilePath(new Path(tempFile.toURI())).build(); /* Create our RecordWriter */ RecordWriter<BytesWritable, BytesWritable> hfileWriter = new HFileRecordWriter(writer); List<String> keys = Lists.newArrayList(); List<String> values = Lists.newArrayList(); for (int i = 0; i < 100; ++i) { String key = String.format("%03d", i); String val = "value " + i; keys.add(key);/*from w ww . ja v a2 s. c om*/ values.add(val); hfileWriter.write(new BytesWritable(key.getBytes()), new BytesWritable(val.getBytes())); } /* This internally closes the StoreFile.Writer */ hfileWriter.close(null); HFile.Reader reader = HFile.createReader(fs, new Path(tempFile.toURI()), new CacheConfig(conf, columnDescriptor)); HFileScanner scanner = reader.getScanner(false, false, false); boolean valid = scanner.seekTo(); List<String> gotKeys = Lists.newArrayListWithCapacity(keys.size()); List<String> gotValues = Lists.newArrayListWithCapacity(values.size()); while (valid) { KeyValue keyValue = scanner.getKeyValue(); gotKeys.add(new String(keyValue.getRow())); gotValues.add(new String(keyValue.getValue())); valid = scanner.next(); } assertEquals(keys, gotKeys); assertEquals(values, gotValues); reader.close(); }
From source file:com.scaleoutsoftware.soss.hserver.hadoop.MapperWrapperMapreduce.java
License:Apache License
/** * Runs mapper for the single split.//from w w w . j ava2s . c o m * * @param mapOutputAccumulator mapOutputAccumulator to use * @param split split ot run on */ @Override @SuppressWarnings("unchecked") public void runSplit(MapOutputAccumulator<OUTKEY, OUTVALUE> mapOutputAccumulator, Object split, int splitIndex) throws IOException, ClassNotFoundException, InterruptedException { TaskAttemptID taskAttemptId = hadoopVersionSpecificCode.createTaskAttemptId(jobId, true, splitIndex); //Setup task ID info TaskAttemptContext taskContext = hadoopVersionSpecificCode.createTaskAttemptContext(configuration, taskAttemptId); InputFormat inputFormat = ReflectionUtils.newInstance(jobContext.getInputFormatClass(), configuration); //Create RecordReader org.apache.hadoop.mapreduce.RecordReader<INKEY, INVALUE> input = inputFormat .createRecordReader((InputSplit) split, taskContext); //Make a mapper org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE> mapper; try { mapper = (org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) mapperConstructor .newInstance(); } catch (Exception e) { throw new RuntimeException(e); } org.apache.hadoop.mapreduce.RecordWriter output; OutputCommitter committer = null; if (mapOnlyJob) { OutputFormat outputFormat = ReflectionUtils.newInstance(jobContext.getOutputFormatClass(), configuration); output = (org.apache.hadoop.mapreduce.RecordWriter<OUTKEY, OUTVALUE>) outputFormat .getRecordWriter(taskContext); committer = outputFormat.getOutputCommitter(taskContext); committer.setupTask(taskContext); } else { output = new MapOutputCollector<OUTKEY, OUTVALUE>(mapOutputAccumulator); } input.initialize((InputSplit) split, taskContext); org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>.Context mapperContext = hadoopVersionSpecificCode .getMapperContext(configuration, taskAttemptId, input, output); mapper.run(mapperContext); input.close(); output.close(mapperContext); if (mapOnlyJob && committer != null) { committer.commitTask(taskContext); } }
From source file:com.twitter.elephanttwin.lzo.retrieval.TestLzoIndexing.java
License:Open Source License
private static void createLZOFile(String fileName, int repeatTimes, boolean b64format) throws Exception { File file = new File(fileName); // first create the file, duplicate the strings as many times as needed // to make rowCnt lines in the generated file if (file.exists()) { file.delete();/*from ww w. j a v a2s . c o m*/ } DataOutputStream os = new DataOutputStream(codec.createOutputStream(new FileOutputStream(file))); RecordWriter<ExciteLog, ThriftWritable<ExciteLog>> writer = null; if (b64format) //LzoBinaryB64LineRecordWriter<ExciteLog, ThriftWritable<ExciteLog>> writer = LzoBinaryB64LineRecordWriter.newThriftWriter(ExciteLog.class, os); else //LzoBinaryBlockRecordWriter<ExciteLog, ThriftWritable<ExciteLog>> writer = new LzoBinaryBlockRecordWriter<ExciteLog, ThriftWritable<ExciteLog>>( new ThriftBlockWriter<ExciteLog>(os, ExciteLog.class, 10)); ThriftWritable<ExciteLog> thriftWritable = ThriftWritable.newInstance(ExciteLog.class); for (int i = 0; i < repeatTimes; i++) for (int j = 0; j < uids.length; j++) { ExciteLog ExciteLog = new ExciteLog(); ExciteLog.setUid(uids[j]); thriftWritable.set(ExciteLog); writer.write(null, thriftWritable); } writer.close(null); // need to create an LZO index file for it in order to use it. LzoIndex.createIndex(FileSystem.get(conf), new Path(fileName)); }
From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBOutputFormatTest.java
License:Apache License
@Test public void testDynamoDBRecordWriter() throws InstantiationException, IllegalAccessException, IOException, InterruptedException { AmazonDynamoDBClient client = createMock(AmazonDynamoDBClient.class); TaskAttemptContext context = createMock(TaskAttemptContext.class); DynamoDBOutputFormat<MyTable, NullWritable> outputFormat = new DynamoDBOutputFormat<MyTable, NullWritable>(); RecordWriter<MyTable, NullWritable> writer = outputFormat.getRecordWriter(client, TABLE_NAME); Capture<PutItemRequest> putCapture = new Capture<PutItemRequest>(); expect(client.putItem(capture(putCapture))).andReturn(new PutItemResult()); client.shutdown();/* w w w .jav a 2 s. c o m*/ AttributeValue hashKey = new AttributeValue().withN(HASHKEY_VALUE); AttributeValue rangeKey = new AttributeValue().withN(RANGEKEY_VALUE); MyTable record = new MyTable(); record.setHashKeyValue(hashKey); record.setRangeKeyValue(rangeKey); replay(client); replay(context); writer.write(record, NullWritable.get()); writer.close(context); PutItemRequest put = putCapture.getValue(); Map<String, AttributeValue> item = put.getItem(); assertEquals(2, item.size()); assertEquals(hashKey, item.get(HASHKEY_FIELD)); assertEquals(rangeKey, item.get(RANGEKEY_FIELD)); verify(client); verify(context); }
From source file:com.yahoo.glimmer.indexing.generator.IndexRecordWriterTest.java
License:Open Source License
@Test public void test() throws Exception { context.checking(new Expectations() { {/* ww w. j a v a2s . c om*/ allowing(taskContext).getConfiguration(); will(returnValue(conf)); allowing(taskContext).getTaskAttemptID(); will(returnValue(taskAttemptID)); } }); OutputFormat outputFormat = new IndexRecordWriter.OutputFormat(); conf.setStrings("RdfFieldNames", "index0", "index1"); conf.setEnum("IndexType", RDFDocumentFactory.IndexType.VERTICAL); RecordWriter<IntWritable, IndexRecordWriterValue> recordWriter = outputFormat.getRecordWriter(taskContext); IntWritable key = new IntWritable(); IndexRecordWriterTermValue termValue = new IndexRecordWriterTermValue(); IndexRecordWriterDocValue docValue = new IndexRecordWriterDocValue(); IndexRecordWriterSizeValue sizeValue = new IndexRecordWriterSizeValue(); // ALIGNEMENT_INDEX key.set(DocumentMapper.ALIGNMENT_INDEX); termValue.setTerm("term1"); termValue.setTermFrequency(1); // The alignment index doesn't have positions/counts. termValue.setOccurrenceCount(0); termValue.setSumOfMaxTermPositions(0); recordWriter.write(key, termValue); docValue.setDocument(0); // term1 occurs in index 0 recordWriter.write(key, docValue); // Index 0 key.set(0); termValue.setTermFrequency(3); termValue.setOccurrenceCount(6); termValue.setSumOfMaxTermPositions(15 + 12 + 18); recordWriter.write(key, termValue); docValue.setDocument(3); docValue.clearOccerrences(); docValue.addOccurrence(11); docValue.addOccurrence(15); recordWriter.write(key, docValue); docValue.setDocument(4); docValue.clearOccerrences(); docValue.addOccurrence(12); recordWriter.write(key, docValue); docValue.setDocument(7); docValue.clearOccerrences(); docValue.addOccurrence(14); docValue.addOccurrence(17); docValue.addOccurrence(18); recordWriter.write(key, docValue); // ALIGNEMENT_INDEX key.set(DocumentMapper.ALIGNMENT_INDEX); termValue.setTerm("term2"); termValue.setTermFrequency(2); // The alignment index doesn't have positions/counts. termValue.setOccurrenceCount(0); termValue.setSumOfMaxTermPositions(0); recordWriter.write(key, termValue); docValue.clearOccerrences(); docValue.setDocument(0); // term2 occurs in index 0 & 1 recordWriter.write(key, docValue); docValue.setDocument(1); // term2 occurs in index 0 & 1 recordWriter.write(key, docValue); // Index 0 key.set(0); termValue.setTermFrequency(2); termValue.setOccurrenceCount(4); termValue.setSumOfMaxTermPositions(19 + 16); recordWriter.write(key, termValue); docValue.setDocument(1); docValue.clearOccerrences(); docValue.addOccurrence(10); docValue.addOccurrence(19); recordWriter.write(key, docValue); docValue.setDocument(7); docValue.clearOccerrences(); docValue.addOccurrence(13); docValue.addOccurrence(16); recordWriter.write(key, docValue); // Index 1 key.set(1); termValue.setTermFrequency(1); termValue.setOccurrenceCount(1); termValue.setSumOfMaxTermPositions(14); recordWriter.write(key, termValue); docValue.setDocument(1); docValue.clearOccerrences(); docValue.addOccurrence(14); recordWriter.write(key, docValue); // ALIGNMENT_INDEX key.set(DocumentMapper.ALIGNMENT_INDEX); termValue.setTerm("term3"); termValue.setTermFrequency(1); // The alignment index doesn't have positions/counts. termValue.setOccurrenceCount(0); termValue.setSumOfMaxTermPositions(0); recordWriter.write(key, termValue); docValue.setDocument(1); // term3 occurs in index 1 recordWriter.write(key, docValue); docValue.clearOccerrences(); // Index 1 key.set(1); termValue.setTermFrequency(1); termValue.setOccurrenceCount(2); termValue.setSumOfMaxTermPositions(11); recordWriter.write(key, termValue); docValue.setDocument(3); docValue.clearOccerrences(); docValue.addOccurrence(10); docValue.addOccurrence(11); recordWriter.write(key, docValue); // Doc Sizes. key.set(0); sizeValue.setDocument(0); sizeValue.setSize(3); recordWriter.write(key, sizeValue); sizeValue.setDocument(3); sizeValue.setSize(1); recordWriter.write(key, sizeValue); sizeValue.setDocument(4); sizeValue.setSize(10); recordWriter.write(key, sizeValue); sizeValue.setDocument(6); sizeValue.setSize(2); recordWriter.write(key, sizeValue); key.set(1); sizeValue.setDocument(3); sizeValue.setSize(3); recordWriter.write(key, sizeValue); sizeValue.setDocument(6); sizeValue.setSize(5); recordWriter.write(key, sizeValue); recordWriter.close(taskContext); // Check the written indexes.. Path workPath = outputFormat.getDefaultWorkFile(taskContext, ""); System.out.println("Default work file is " + workPath.toString()); String dir = workPath.toUri().getPath(); BitStreamIndex index0 = (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/index0", true, true); assertEquals(8, index0.numberOfDocuments); assertEquals(2, index0.numberOfTerms); assertTrue(index0.hasPositions); // term1 checkOccurrences(index0.documents(0), 3, "(3:11,15) (4:12) (7:14,17,18)"); // term2 checkOccurrences(index0.documents(1), 2, "(1:10,19) (7:13,16)"); assertEquals("[3, 0, 0, 1, 10, 0, 2, 0]", index0.sizes.toString()); BitStreamIndex index1 = (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/index1", true, true); assertEquals(8, index1.numberOfDocuments); assertEquals(2, index1.numberOfTerms); assertTrue(index0.hasPositions); checkOccurrences(index1.documents(0), 1, "(1:14)"); // term3 checkOccurrences(index1.documents(1), 1, "(3:10,11)"); BitStreamIndex indexAlignment = (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/alignment", true); assertEquals(8, indexAlignment.numberOfDocuments); assertEquals(3, indexAlignment.numberOfTerms); assertFalse(indexAlignment.hasPositions); // term1 assertEquals(1, indexAlignment.documents(0).frequency()); // term2 assertEquals(2, indexAlignment.documents(1).frequency()); // term3 assertEquals(1, indexAlignment.documents(2).frequency()); assertEquals("[0, 0, 0, 3, 0, 0, 5, 0]", index1.sizes.toString()); }