List of usage examples for org.apache.hadoop.mapreduce RecordReader close
public abstract void close() throws IOException;
From source file:org.apache.mnemonic.mapreduce.MneMapreduceChunkDataTest.java
License:Apache License
@Test(enabled = true, dependsOnMethods = { "testWriteChunkData" }) public void testReadChunkData() throws Exception { List<String> partfns = new ArrayList<String>(); long reccnt = 0L; long tsize = 0L; Checksum cs = new CRC32(); cs.reset();/*ww w. ja v a2 s . c o m*/ File folder = new File(m_workdir.toString()); File[] listfiles = folder.listFiles(); for (int idx = 0; idx < listfiles.length; ++idx) { if (listfiles[idx].isFile() && listfiles[idx].getName().startsWith(MneConfigHelper.getBaseOutputName(m_conf, null)) && listfiles[idx].getName().endsWith(MneConfigHelper.DEFAULT_FILE_EXTENSION)) { partfns.add(listfiles[idx].getName()); } } Collections.sort(partfns); // keep the order for checksum for (int idx = 0; idx < partfns.size(); ++idx) { System.out.println(String.format("Verifying : %s", partfns.get(idx))); FileSplit split = new FileSplit(new Path(m_workdir, partfns.get(idx)), 0, 0L, new String[0]); InputFormat<NullWritable, MneDurableInputValue<DurableChunk<?>>> inputFormat = new MneInputFormat<MneDurableInputValue<DurableChunk<?>>, DurableChunk<?>>(); RecordReader<NullWritable, MneDurableInputValue<DurableChunk<?>>> reader = inputFormat .createRecordReader(split, m_tacontext); MneDurableInputValue<DurableChunk<?>> dchkval = null; while (reader.nextKeyValue()) { dchkval = reader.getCurrentValue(); byte b; for (int j = 0; j < dchkval.getValue().getSize(); ++j) { b = unsafe.getByte(dchkval.getValue().get() + j); cs.update(b); } tsize += dchkval.getValue().getSize(); ++reccnt; } reader.close(); } AssertJUnit.assertEquals(m_reccnt, reccnt); AssertJUnit.assertEquals(m_totalsize, tsize); AssertJUnit.assertEquals(m_checksum, cs.getValue()); System.out.println(String.format("The checksum of chunk is %d", m_checksum)); }
From source file:org.apache.mnemonic.mapreduce.MneMapreduceLongDataTest.java
License:Apache License
@Test(enabled = true, dependsOnMethods = { "testWriteLongData" }) public void testReadLongData() throws Exception { long sum = 0L; long reccnt = 0L; File folder = new File(m_workdir.toString()); File[] listfiles = folder.listFiles(); for (int idx = 0; idx < listfiles.length; ++idx) { if (listfiles[idx].isFile() && listfiles[idx].getName().startsWith(MneConfigHelper.getBaseOutputName(m_conf, null)) && listfiles[idx].getName().endsWith(MneConfigHelper.DEFAULT_FILE_EXTENSION)) { System.out.println(String.format("Verifying : %s", listfiles[idx].getName())); FileSplit split = new FileSplit(new Path(m_workdir, listfiles[idx].getName()), 0, 0L, new String[0]); InputFormat<NullWritable, MneDurableInputValue<Long>> inputFormat = new MneInputFormat<MneDurableInputValue<Long>, Long>(); RecordReader<NullWritable, MneDurableInputValue<Long>> reader = inputFormat .createRecordReader(split, m_tacontext); MneDurableInputValue<Long> mdval = null; while (reader.nextKeyValue()) { mdval = reader.getCurrentValue(); sum += mdval.getValue(); ++reccnt;//w w w. j a v a2 s . com } reader.close(); } } AssertJUnit.assertEquals(m_sum, sum); AssertJUnit.assertEquals(m_reccnt, reccnt); System.out.println(String.format("The checksum of long data is %d", sum)); }
From source file:org.apache.mnemonic.mapreduce.MneMapreducePersonDataTest.java
License:Apache License
@Test(enabled = true, dependsOnMethods = { "testWritePersonData" }) public void testReadPersonData() throws Exception { long sumage = 0L; long reccnt = 0L; File folder = new File(m_workdir.toString()); File[] listfiles = folder.listFiles(); for (int idx = 0; idx < listfiles.length; ++idx) { if (listfiles[idx].isFile() && listfiles[idx].getName().startsWith(MneConfigHelper.getBaseOutputName(m_conf, null)) && listfiles[idx].getName().endsWith(MneConfigHelper.DEFAULT_FILE_EXTENSION)) { System.out.println(String.format("Verifying : %s", listfiles[idx].getName())); FileSplit split = new FileSplit(new Path(m_workdir, listfiles[idx].getName()), 0, 0L, new String[0]); InputFormat<NullWritable, MneDurableInputValue<Person<Long>>> inputFormat = new MneInputFormat<MneDurableInputValue<Person<Long>>, Person<Long>>(); RecordReader<NullWritable, MneDurableInputValue<Person<Long>>> reader = inputFormat .createRecordReader(split, m_tacontext); MneDurableInputValue<Person<Long>> personval = null; while (reader.nextKeyValue()) { personval = reader.getCurrentValue(); AssertJUnit.assertTrue(personval.getValue().getAge() < 51); sumage += personval.getValue().getAge(); ++reccnt;/* w w w . j a v a 2s.c o m*/ } reader.close(); } } AssertJUnit.assertEquals(m_reccnt, reccnt); AssertJUnit.assertEquals(m_sumage, sumage); System.out.println(String.format("The checksum of ages is %d", sumage)); }
From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.HadoopElementIterator.java
License:Apache License
@Override public void close() { try {/*from w w w. ja v a2 s . com*/ for (final RecordReader reader : this.readers) { reader.close(); } } catch (final IOException e) { throw new IllegalStateException(e.getMessage(), e); } }
From source file:org.commoncrawl.hadoop.io.mapreduce.ArcFileInputFormatTests.java
License:Apache License
static void validateSplit(FileSystem fs, InputSplit split, List<Pair<Path, List<TestRecord>>> splits, RecordReader<Text, BytesWritable> reader) throws IOException, InterruptedException { int splitDataIndex = getIndexOfSplit(splits, split); Assert.assertTrue(splitDataIndex != -1); List<TestRecord> records = splits.get(splitDataIndex).e1; int itemIndex = 0; // iterate and validate stuff ... while (reader.nextKeyValue()) { Text key = reader.getCurrentKey(); BytesWritable value = reader.getCurrentValue(); TestRecord testRecord = records.get(itemIndex++); // get test key bytes as utf-8 bytes ... byte[] testKeyBytes = testRecord.url.getBytes(Charset.forName("UTF-8")); // compare against raw key bytes to validate key is the same (Text's utf-8 mapping code replaces invalid characters // with ?, which causes our test case (which does use invalid characters to from the key, to break. Assert.assertTrue(ArcFileReaderTests.compareTo(testKeyBytes, 0, testKeyBytes.length, key.getBytes(), 0, key.getLength()) == 0);/*from w ww. j a v a 2s.co m*/ // retured bytes represent the header(encoded in utf-8), terminated by a \r\n\r\n. The content follows this terminator // we search for this specific byte pattern to locate start of content, then compare it against source ... int indexofHeaderTerminator = ByteArrayUtils.indexOf(value.getBytes(), 0, value.getLength(), "\r\n\r\n".getBytes()); indexofHeaderTerminator += 4; Assert.assertTrue(ArcFileReaderTests.compareTo(testRecord.data, 0, testRecord.data.length, value.getBytes(), indexofHeaderTerminator, testRecord.data.length) == 0); } reader.close(); Assert.assertEquals(itemIndex, ArcFileReaderTests.BASIC_TEST_RECORD_COUNT); splits.remove(splitDataIndex); }
From source file:org.commoncrawl.hadoop.io.mapreduce.ArcFileInputFormatTests.java
License:Apache License
static void validateArcFileItemSplit(FileSystem fs, InputSplit split, List<Pair<Path, List<TestRecord>>> splits, RecordReader<Text, ArcFileItem> reader) throws IOException, InterruptedException { int splitDataIndex = getIndexOfSplit(splits, split); Assert.assertTrue(splitDataIndex != -1); List<TestRecord> records = splits.get(splitDataIndex).e1; int itemIndex = 0; // iterate and validate stuff ... while (reader.nextKeyValue()) { Text key = reader.getCurrentKey(); ArcFileItem value = reader.getCurrentValue(); TestRecord testRecord = records.get(itemIndex++); // get test key bytes as utf-8 bytes ... byte[] testKeyBytes = testRecord.url.getBytes(Charset.forName("UTF-8")); // compare against raw key bytes to validate key is the same (Text's utf-8 mapping code replaces invalid characters // with ?, which causes our test case (which does use invalid characters to from the key, to break. Assert.assertTrue(ArcFileReaderTests.compareTo(testKeyBytes, 0, testKeyBytes.length, key.getBytes(), 0, key.getLength()) == 0);// w w w.ja v a 2 s . c om // retured bytes represent the header(encoded in utf-8), terminated by a \r\n\r\n. The content follows this terminator // we search for this specific byte pattern to locate start of content, then compare it against source ... Assert.assertTrue(ArcFileReaderTests.compareTo(testRecord.data, 0, testRecord.data.length, value.getContent().getReadOnlyBytes(), value.getContent().getOffset(), value.getContent().getCount()) == 0); NIOHttpHeaders headers = ArcFileItemUtils.buildHeaderFromArcFileItemHeaders(value.getHeaderItems()); // validate metadata Assert.assertEquals("text/html", headers.findValue(Constants.ARCFileHeader_ARC_MimeType)); Assert.assertEquals(value.getArcFilePos(), testRecord.streamPos); Assert.assertEquals(value.getArcFileSize(), testRecord.rawSize); Assert.assertEquals("test-value", headers.findValue("test")); Assert.assertEquals(value.getArcFileName(), ((FileSplit) split).getPath().getName()); } reader.close(); Assert.assertEquals(itemIndex, ArcFileReaderTests.BASIC_TEST_RECORD_COUNT); splits.remove(splitDataIndex); }
From source file:org.msgpack.hadoop.mapreduce.input.TestMessagePackInputFormat.java
License:Apache License
void checkFormat(Job job) throws Exception { TaskAttemptContext attemptContext = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID("123", 0, false, 1, 2)); MessagePackInputFormat format = new MessagePackInputFormat(); FileInputFormat.setInputPaths(job, workDir); List<InputSplit> splits = format.getSplits(job); assertEquals(1, splits.size());//from w ww .j a v a2 s .co m for (int j = 0; j < splits.size(); j++) { RecordReader<LongWritable, MessagePackWritable> reader = format.createRecordReader(splits.get(j), attemptContext); reader.initialize(splits.get(j), attemptContext); int count = 0; try { while (reader.nextKeyValue()) { LongWritable key = reader.getCurrentKey(); Value val = reader.getCurrentValue().get(); assertEquals(count, val.asIntegerValue().getLong()); count++; } } finally { reader.close(); } } }
From source file:org.tensorflow.hadoop.io.TFRecordFileTest.java
License:Open Source License
@Test public void testInputOutputFormat() throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf);/*from w w w .ja v a 2 s . c o m*/ Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "tfr-test"); TFRecordFileOutputFormat.setOutputPath(job, outdir); TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration()); OutputFormat<BytesWritable, NullWritable> outputFormat = new TFRecordFileOutputFormat(); OutputCommitter committer = outputFormat.getOutputCommitter(context); committer.setupJob(job); RecordWriter<BytesWritable, NullWritable> writer = outputFormat.getRecordWriter(context); // Write Example with random numbers Random rand = new Random(); Map<Long, Long> records = new TreeMap<Long, Long>(); try { for (int i = 0; i < RECORDS; ++i) { long randValue = rand.nextLong(); records.put((long) i, randValue); Int64List data = Int64List.newBuilder().addValue(i).addValue(randValue).build(); Feature feature = Feature.newBuilder().setInt64List(data).build(); Features features = Features.newBuilder().putFeature("data", feature).build(); Example example = Example.newBuilder().setFeatures(features).build(); BytesWritable key = new BytesWritable(example.toByteArray()); writer.write(key, NullWritable.get()); } } finally { writer.close(context); } committer.commitTask(context); committer.commitJob(job); // Read and compare TFRecordFileInputFormat.setInputPaths(job, outdir); InputFormat<BytesWritable, NullWritable> inputFormat = new TFRecordFileInputFormat(); for (InputSplit split : inputFormat.getSplits(job)) { RecordReader<BytesWritable, NullWritable> reader = inputFormat.createRecordReader(split, context); MapContext<BytesWritable, NullWritable, BytesWritable, NullWritable> mcontext = new MapContextImpl<BytesWritable, NullWritable, BytesWritable, NullWritable>( job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mcontext); try { while (reader.nextKeyValue()) { BytesWritable bytes = reader.getCurrentKey(); Example example = Example.parseFrom(bytes.getBytes()); Int64List data = example.getFeatures().getFeatureMap().get("data").getInt64List(); Long key = data.getValue(0); Long value = data.getValue(1); assertEquals(records.get(key), value); records.remove(key); } } finally { reader.close(); } } assertEquals(0, records.size()); }
From source file:org.warcbase.mapreduce.lib.Chain.java
License:Apache License
@SuppressWarnings("unchecked") void runMapper(TaskInputOutputContext context, int index) throws IOException, InterruptedException { Mapper mapper = mappers.get(index);//from w w w . j a va2s . c o m RecordReader rr = new ChainRecordReader(context); RecordWriter rw = new ChainRecordWriter(context); Mapper.Context mapperContext = createMapContext(rr, rw, context, getConf(index)); mapper.run(mapperContext); rr.close(); rw.close(context); }