List of usage examples for org.apache.hadoop.mapreduce RecordReader close
public abstract void close() throws IOException;
From source file:org.apache.avro.mapreduce.TestAvroKeyValueRecordReader.java
License:Apache License
/** * Verifies that avro records can be read and progress is reported correctly. *//*from ww w .ja v a2s . co m*/ @Test public void testReadRecords() throws IOException, InterruptedException { // Create the test avro file input with two records: // 1. <"firstkey", 1> // 2. <"second", 2> Schema keyValueSchema = AvroKeyValue.getSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.INT)); AvroKeyValue<CharSequence, Integer> firstInputRecord = new AvroKeyValue<CharSequence, Integer>( new GenericData.Record(keyValueSchema)); firstInputRecord.setKey("first"); firstInputRecord.setValue(1); AvroKeyValue<CharSequence, Integer> secondInputRecord = new AvroKeyValue<CharSequence, Integer>( new GenericData.Record(keyValueSchema)); secondInputRecord.setKey("second"); secondInputRecord.setValue(2); final SeekableInput avroFileInput = new SeekableFileInput( AvroFiles.createFile(new File(mTempDir.getRoot(), "myInputFile.avro"), keyValueSchema, firstInputRecord.get(), secondInputRecord.get())); // Create the record reader over the avro input file. RecordReader<AvroKey<CharSequence>, AvroValue<Integer>> recordReader = new AvroKeyValueRecordReader<CharSequence, Integer>( Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.INT)) { @Override protected SeekableInput createSeekableInput(Configuration conf, Path path) throws IOException { return avroFileInput; } }; // Set up the job configuration. Configuration conf = new Configuration(); // Create a mock input split for this record reader. FileSplit inputSplit = createMock(FileSplit.class); expect(inputSplit.getPath()).andReturn(new Path("/path/to/an/avro/file")).anyTimes(); expect(inputSplit.getStart()).andReturn(0L).anyTimes(); expect(inputSplit.getLength()).andReturn(avroFileInput.length()).anyTimes(); // Create a mock task attempt context for this record reader. TaskAttemptContext context = createMock(TaskAttemptContext.class); expect(context.getConfiguration()).andReturn(conf).anyTimes(); // Initialize the record reader. replay(inputSplit); replay(context); recordReader.initialize(inputSplit, context); assertEquals("Progress should be zero before any records are read", 0.0f, recordReader.getProgress(), 0.0f); // Some variables to hold the records. AvroKey<CharSequence> key; AvroValue<Integer> value; // Read the first record. assertTrue("Expected at least one record", recordReader.nextKeyValue()); key = recordReader.getCurrentKey(); value = recordReader.getCurrentValue(); assertNotNull("First record had null key", key); assertNotNull("First record had null value", value); assertEquals("first", key.datum().toString()); assertEquals(1, value.datum().intValue()); assertTrue("getCurrentKey() returned different keys for the same record", key == recordReader.getCurrentKey()); assertTrue("getCurrentValue() returned different values for the same record", value == recordReader.getCurrentValue()); // Read the second record. assertTrue("Expected to read a second record", recordReader.nextKeyValue()); key = recordReader.getCurrentKey(); value = recordReader.getCurrentValue(); assertNotNull("Second record had null key", key); assertNotNull("Second record had null value", value); assertEquals("second", key.datum().toString()); assertEquals(2, value.datum().intValue()); assertEquals("Progress should be complete (2 out of 2 records processed)", 1.0f, recordReader.getProgress(), 0.0f); // There should be no more records. assertFalse("Expected only 2 records", recordReader.nextKeyValue()); // Close the record reader. recordReader.close(); // Verify the expected calls on the mocks. verify(inputSplit); verify(context); }
From source file:org.apache.carbondata.store.LocalCarbonStore.java
License:Apache License
@Override public Iterator<CarbonRow> scan(AbsoluteTableIdentifier tableIdentifier, String[] projectColumns, Expression filter) throws IOException { Objects.requireNonNull(tableIdentifier); Objects.requireNonNull(projectColumns); CarbonTable table = getTable(tableIdentifier.getTablePath()); if (table.isStreamingSink() || table.isHivePartitionTable()) { throw new UnsupportedOperationException("streaming and partition table is not supported"); }//from w ww . j a va 2 s . co m // TODO: use InputFormat to prune data and read data final CarbonTableInputFormat format = new CarbonTableInputFormat(); final Job job = new Job(new Configuration()); CarbonInputFormat.setTableInfo(job.getConfiguration(), table.getTableInfo()); CarbonInputFormat.setTablePath(job.getConfiguration(), table.getTablePath()); CarbonInputFormat.setTableName(job.getConfiguration(), table.getTableName()); CarbonInputFormat.setDatabaseName(job.getConfiguration(), table.getDatabaseName()); CarbonInputFormat.setCarbonReadSupport(job.getConfiguration(), CarbonRowReadSupport.class); CarbonInputFormat.setColumnProjection(job.getConfiguration(), new CarbonProjection(projectColumns)); if (filter != null) { CarbonInputFormat.setFilterPredicates(job.getConfiguration(), filter); } final List<InputSplit> splits = format.getSplits(new JobContextImpl(job.getConfiguration(), new JobID())); List<RecordReader<Void, Object>> readers = new ArrayList<>(splits.size()); List<CarbonRow> rows = new ArrayList<>(); try { for (InputSplit split : splits) { TaskAttemptContextImpl attempt = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); RecordReader reader = format.createRecordReader(split, attempt); reader.initialize(split, attempt); readers.add(reader); } for (RecordReader<Void, Object> reader : readers) { while (reader.nextKeyValue()) { rows.add((CarbonRow) reader.getCurrentValue()); } try { reader.close(); } catch (IOException e) { LOGGER.error(e); } } } catch (InterruptedException e) { throw new IOException(e); } finally { for (RecordReader<Void, Object> reader : readers) { try { reader.close(); } catch (IOException e) { LOGGER.error(e); } } } return rows.iterator(); }
From source file:org.apache.crunch.kafka.inputformat.KafkaInputFormatIT.java
License:Apache License
@Test public void getSplitsCreateReaders() throws IOException, InterruptedException { List<String> keys = ClusterTest.writeData(ClusterTest.getProducerProperties(), topic, "batch", 10, 10); Map<TopicPartition, Long> startOffsets = getBrokerOffsets(consumerProps, OffsetRequest.EarliestTime(), topic);/*from w w w. j av a2s. co m*/ Map<TopicPartition, Long> endOffsets = getBrokerOffsets(consumerProps, OffsetRequest.LatestTime(), topic); Map<TopicPartition, Pair<Long, Long>> offsets = new HashMap<>(); for (Map.Entry<TopicPartition, Long> entry : startOffsets.entrySet()) { Long endingOffset = endOffsets.get(entry.getKey()); offsets.put(entry.getKey(), Pair.of(entry.getValue(), endingOffset)); } KafkaInputFormat.writeOffsetsToConfiguration(offsets, config); KafkaInputFormat inputFormat = new KafkaInputFormat(); inputFormat.setConf(config); List<InputSplit> splits = inputFormat.getSplits(null); assertThat(splits.size(), is(offsets.size())); for (InputSplit split : splits) { KafkaInputSplit inputSplit = (KafkaInputSplit) split; Pair<Long, Long> startEnd = offsets.get(inputSplit.getTopicPartition()); assertThat(inputSplit.getStartingOffset(), is(startEnd.first())); assertThat(inputSplit.getEndingOffset(), is(startEnd.second())); } //create readers and consume the data when(taskContext.getConfiguration()).thenReturn(config); Set<String> keysRead = new HashSet<>(); //read all data from all splits for (InputSplit split : splits) { KafkaInputSplit inputSplit = (KafkaInputSplit) split; long start = inputSplit.getStartingOffset(); long end = inputSplit.getEndingOffset(); RecordReader<BytesWritable, BytesWritable> recordReader = inputFormat.createRecordReader(split, taskContext); recordReader.initialize(split, taskContext); int numRecordsFound = 0; String currentKey; while (recordReader.nextKeyValue()) { currentKey = new String(recordReader.getCurrentKey().getBytes()); keysRead.add(currentKey); assertThat(keys, hasItem(currentKey)); assertThat(recordReader.getCurrentValue(), is(notNullValue())); numRecordsFound++; } recordReader.close(); //assert that it encountered a partitions worth of data assertThat(((long) numRecordsFound), is(end - start)); } //validate the same number of unique keys was read as were written. assertThat(keysRead.size(), is(keys.size())); }
From source file:org.apache.druid.data.input.orc.DruidOrcInputFormatTest.java
License:Apache License
@Test public void testRead() throws IOException, InterruptedException { InputFormat inputFormat = ReflectionUtils.newInstance(OrcNewInputFormat.class, job.getConfiguration()); TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); RecordReader reader = inputFormat.createRecordReader(split, context); InputRowParser<OrcStruct> parser = (InputRowParser<OrcStruct>) config.getParser(); reader.initialize(split, context);//from w ww.ja v a 2 s . c o m reader.nextKeyValue(); OrcStruct data = (OrcStruct) reader.getCurrentValue(); MapBasedInputRow row = (MapBasedInputRow) parser.parseBatch(data).get(0); Assert.assertTrue(row.getEvent().keySet().size() == 4); Assert.assertEquals(DateTimes.of(timestamp), row.getTimestamp()); Assert.assertEquals(parser.getParseSpec().getDimensionsSpec().getDimensionNames(), row.getDimensions()); Assert.assertEquals(col1, row.getEvent().get("col1")); Assert.assertEquals(Arrays.asList(col2), row.getDimension("col2")); reader.close(); }
From source file:org.apache.druid.data.input.orc.DruidOrcInputFormatTest.java
License:Apache License
@Test public void testReadDateColumn() throws IOException, InterruptedException { File testFile2 = makeOrcFileWithDate(); Path path = new Path(testFile2.getAbsoluteFile().toURI()); FileSplit split = new FileSplit(path, 0, testFile2.length(), null); InputFormat inputFormat = ReflectionUtils.newInstance(OrcNewInputFormat.class, job.getConfiguration()); TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); RecordReader reader = inputFormat.createRecordReader(split, context); InputRowParser<OrcStruct> parser = (InputRowParser<OrcStruct>) config.getParser(); reader.initialize(split, context);// ww w .jav a 2 s . co m reader.nextKeyValue(); OrcStruct data = (OrcStruct) reader.getCurrentValue(); MapBasedInputRow row = (MapBasedInputRow) parser.parseBatch(data).get(0); Assert.assertTrue(row.getEvent().keySet().size() == 4); Assert.assertEquals(DateTimes.of(timestamp), row.getTimestamp()); Assert.assertEquals(parser.getParseSpec().getDimensionsSpec().getDimensionNames(), row.getDimensions()); Assert.assertEquals(col1, row.getEvent().get("col1")); Assert.assertEquals(Arrays.asList(col2), row.getDimension("col2")); reader.close(); }
From source file:org.apache.hcatalog.pig.TestE2EScenarios.java
License:Apache License
private void copyTable(String in, String out) throws IOException, InterruptedException { Job ijob = new Job(); Job ojob = new Job(); HCatInputFormat inpy = new HCatInputFormat(); inpy.setInput(ijob, null, in);/* ww w.j ava 2 s . c o m*/ HCatOutputFormat oupy = new HCatOutputFormat(); oupy.setOutput(ojob, OutputJobInfo.create(null, out, new HashMap<String, String>())); // Test HCatContext System.err.println("HCatContext INSTANCE is present : " + HCatContext.INSTANCE.getConf().isPresent()); if (HCatContext.INSTANCE.getConf().isPresent()) { System.err.println("HCatContext tinyint->int promotion says " + HCatContext.INSTANCE.getConf().get() .getBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)); } HCatSchema tableSchema = inpy.getTableSchema(ijob.getConfiguration()); System.err.println("Copying from [" + in + "] to [" + out + "] with schema : " + tableSchema.toString()); oupy.setSchema(ojob, tableSchema); oupy.checkOutputSpecs(ojob); OutputCommitter oc = oupy.getOutputCommitter(createTaskAttemptContext(ojob.getConfiguration())); oc.setupJob(ojob); for (InputSplit split : inpy.getSplits(ijob)) { TaskAttemptContext rtaskContext = createTaskAttemptContext(ijob.getConfiguration()); TaskAttemptContext wtaskContext = createTaskAttemptContext(ojob.getConfiguration()); RecordReader<WritableComparable, HCatRecord> rr = inpy.createRecordReader(split, rtaskContext); rr.initialize(split, rtaskContext); OutputCommitter taskOc = oupy.getOutputCommitter(wtaskContext); taskOc.setupTask(wtaskContext); RecordWriter<WritableComparable<?>, HCatRecord> rw = oupy.getRecordWriter(wtaskContext); while (rr.nextKeyValue()) { rw.write(rr.getCurrentKey(), rr.getCurrentValue()); } rw.close(wtaskContext); taskOc.commitTask(wtaskContext); rr.close(); } oc.commitJob(ojob); }
From source file:org.apache.hyracks.dataflow.hadoop.mapreduce.MapperOperatorDescriptor.java
License:Apache License
@SuppressWarnings("deprecation") @Override/*from w w w .j av a 2 s. c om*/ public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions) throws HyracksDataException { final HadoopHelper helper = new HadoopHelper(config); final Configuration conf = helper.getConfiguration(); final Mapper<K1, V1, K2, V2> mapper = helper.getMapper(); final InputFormat<K1, V1> inputFormat = helper.getInputFormat(); final IInputSplitProvider isp = factory.createInputSplitProvider(partition); final TaskAttemptID taId = new TaskAttemptID("foo", jobId, true, partition, 0); final TaskAttemptContext taskAttemptContext = helper.createTaskAttemptContext(taId); final int framesLimit = helper.getSortFrameLimit(ctx); final IBinaryComparatorFactory[] comparatorFactories = helper.getSortComparatorFactories(); class SortingRecordWriter extends RecordWriter<K2, V2> { private final ArrayTupleBuilder tb; private final IFrame frame; private final FrameTupleAppender fta; private ExternalSortRunGenerator runGen; private int blockId; public SortingRecordWriter() throws HyracksDataException { tb = new ArrayTupleBuilder(2); frame = new VSizeFrame(ctx); fta = new FrameTupleAppender(frame); } public void initBlock(int blockId) throws HyracksDataException { runGen = new ExternalSortRunGenerator(ctx, new int[] { 0 }, null, comparatorFactories, helper.getMapOutputRecordDescriptorWithoutExtraFields(), Algorithm.MERGE_SORT, framesLimit); this.blockId = blockId; } @Override public void close(TaskAttemptContext arg0) throws IOException, InterruptedException { } @Override public void write(K2 key, V2 value) throws IOException, InterruptedException { DataOutput dos = tb.getDataOutput(); tb.reset(); key.write(dos); tb.addFieldEndOffset(); value.write(dos); tb.addFieldEndOffset(); if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { runGen.nextFrame(frame.getBuffer()); fta.reset(frame, true); if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { throw new HyracksDataException("Record size (" + tb.getSize() + ") larger than frame size (" + frame.getBuffer().capacity() + ")"); } } } public void sortAndFlushBlock(final IFrameWriter writer) throws HyracksDataException { if (fta.getTupleCount() > 0) { runGen.nextFrame(frame.getBuffer()); fta.reset(frame, true); } runGen.close(); IFrameWriter delegatingWriter = new IFrameWriter() { private final FrameTupleAppender appender = new FrameTupleAppender(new VSizeFrame(ctx)); private final FrameTupleAccessor fta = new FrameTupleAccessor( helper.getMapOutputRecordDescriptorWithoutExtraFields()); private final ArrayTupleBuilder tb = new ArrayTupleBuilder(3); @Override public void open() throws HyracksDataException { } @Override public void nextFrame(ByteBuffer buffer) throws HyracksDataException { fta.reset(buffer); int n = fta.getTupleCount(); for (int i = 0; i < n; ++i) { tb.reset(); tb.addField(fta, i, 0); tb.addField(fta, i, 1); try { tb.getDataOutput().writeInt(blockId); } catch (IOException e) { throw new HyracksDataException(e); } tb.addFieldEndOffset(); if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { appender.flush(writer, true); if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { throw new IllegalStateException(); } } } } @Override public void close() throws HyracksDataException { appender.flush(writer, true); } @Override public void fail() throws HyracksDataException { // TODO Auto-generated method stub } }; if (helper.hasCombiner()) { Reducer<K2, V2, K2, V2> combiner = helper.getCombiner(); TaskAttemptID ctaId = new TaskAttemptID("foo", jobId, true, partition, 0); TaskAttemptContext ctaskAttemptContext = helper.createTaskAttemptContext(taId); final IFrameWriter outputWriter = delegatingWriter; RecordWriter<K2, V2> recordWriter = new RecordWriter<K2, V2>() { private final FrameTupleAppender fta = new FrameTupleAppender(new VSizeFrame(ctx)); private final ArrayTupleBuilder tb = new ArrayTupleBuilder(2); { outputWriter.open(); } @Override public void write(K2 key, V2 value) throws IOException, InterruptedException { DataOutput dos = tb.getDataOutput(); tb.reset(); key.write(dos); tb.addFieldEndOffset(); value.write(dos); tb.addFieldEndOffset(); if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { fta.flush(outputWriter, true); if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { throw new IllegalStateException(); } } } @Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { fta.flush(outputWriter, true); } }; delegatingWriter = new ReduceWriter<K2, V2, K2, V2>(ctx, helper, new int[] { HadoopHelper.KEY_FIELD_INDEX }, helper.getGroupingComparatorFactories(), helper.getMapOutputRecordDescriptorWithoutExtraFields(), combiner, recordWriter, ctaId, ctaskAttemptContext); } IBinaryComparator[] comparators = new IBinaryComparator[comparatorFactories.length]; for (int i = 0; i < comparatorFactories.length; ++i) { comparators[i] = comparatorFactories[i].createBinaryComparator(); } ExternalSortRunMerger merger = new ExternalSortRunMerger(ctx, runGen.getSorter(), runGen.getRuns(), new int[] { 0 }, comparators, null, helper.getMapOutputRecordDescriptorWithoutExtraFields(), framesLimit, delegatingWriter); merger.process(); } } return new AbstractUnaryOutputSourceOperatorNodePushable() { @SuppressWarnings("unchecked") @Override public void initialize() throws HyracksDataException { try { writer.open(); SortingRecordWriter recordWriter = new SortingRecordWriter(); InputSplit split = null; int blockId = 0; while ((split = isp.next()) != null) { try { RecordReader<K1, V1> recordReader = inputFormat.createRecordReader(split, taskAttemptContext); ClassLoader ctxCL = Thread.currentThread().getContextClassLoader(); try { Thread.currentThread().setContextClassLoader(getClass().getClassLoader()); recordReader.initialize(split, taskAttemptContext); } finally { Thread.currentThread().setContextClassLoader(ctxCL); } recordWriter.initBlock(blockId); Mapper<K1, V1, K2, V2>.Context mCtx = new MRContextUtil().createMapContext(conf, taId, recordReader, recordWriter, null, null, split); mapper.run(mCtx); recordReader.close(); recordWriter.sortAndFlushBlock(writer); ++blockId; } catch (IOException e) { throw new HyracksDataException(e); } catch (InterruptedException e) { throw new HyracksDataException(e); } } } catch (Throwable th) { writer.fail(); throw th; } finally { writer.close(); } } }; }
From source file:org.apache.jena.hadoop.rdf.io.input.AbstractNodeTupleInputFormatTests.java
License:Apache License
protected final int countTuples(RecordReader<LongWritable, T> reader) throws IOException, InterruptedException { int count = 0; // Check initial progress LOG.info(String.format("Initial Reported Progress %f", reader.getProgress())); float progress = reader.getProgress(); if (Float.compare(0.0f, progress) == 0) { Assert.assertEquals(0.0d, reader.getProgress(), 0.0d); } else if (Float.compare(1.0f, progress) == 0) { // If reader is reported 1.0 straight away then we expect there to // be no key values Assert.assertEquals(1.0d, reader.getProgress(), 0.0d); Assert.assertFalse(reader.nextKeyValue()); } else {// ww w . j a v a 2s. c om Assert.fail(String.format( "Expected progress of 0.0 or 1.0 before reader has been accessed for first time but got %f", progress)); } // Count tuples boolean debug = LOG.isDebugEnabled(); while (reader.nextKeyValue()) { count++; progress = reader.getProgress(); if (debug) LOG.debug(String.format("Current Reported Progress %f", progress)); Assert.assertTrue(String.format("Progress should be in the range 0.0 < p <= 1.0 but got %f", progress), progress > 0.0f && progress <= 1.0f); } reader.close(); LOG.info(String.format("Got %d tuples from this record reader", count)); // Check final progress LOG.info(String.format("Final Reported Progress %f", reader.getProgress())); Assert.assertEquals(1.0d, reader.getProgress(), 0.0d); return count; }
From source file:org.apache.jena.tdbloader4.partitioners.SplitSampler.java
License:Apache License
/** * From each split sampled, take the first numSamples / numSplits records. */// w w w . j a v a 2s . c o m @SuppressWarnings("unchecked") // ArrayList::toArray doesn't preserve type public K[] getSample(InputFormat<K, V> inf, Job job) throws IOException, InterruptedException { List<InputSplit> splits = inf.getSplits(job); ArrayList<K> samples = new ArrayList<K>(numSamples); int splitsToSample = Math.min(maxSplitsSampled, splits.size()); int samplesPerSplit = numSamples / splitsToSample; log.debug("Sampling {} splits, taking {} samples per split", splitsToSample, samplesPerSplit); long records = 0; for (int i = 0; i < splitsToSample; ++i) { TaskAttemptContext samplingContext = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()); InputSplit split = splits.get(i); log.debug("Sampling {} split", split); RecordReader<K, V> reader = inf.createRecordReader(split, samplingContext); reader.initialize(split, samplingContext); while (reader.nextKeyValue()) { LongQuadWritable currentKey = (LongQuadWritable) reader.getCurrentKey(); // TODO: why do we need to do that? Why on earth we have -1 in subject, predicate or object position??? if ((currentKey.get(0) > 0) && (currentKey.get(1) > 0) && (currentKey.get(2) > 0)) { LongQuadWritable key = new LongQuadWritable(currentKey.get(0), currentKey.get(1), currentKey.get(2), currentKey.get(3)); log.debug("Sampled {}", key); samples.add((K) key); ++records; if (records >= (i + 1) * samplesPerSplit) { log.debug("Records is {} and (i + 1) * samplesPerSplit is {}", records, (i + 1) * samplesPerSplit); break; } } } reader.close(); } return (K[]) samples.toArray(); }
From source file:org.apache.mnemonic.mapreduce.MneMapreduceBufferDataTest.java
License:Apache License
@Test(enabled = true, dependsOnMethods = { "testWriteBufferData" }) public void testReadBufferData() throws Exception { long reccnt = 0L; long tsize = 0L; byte[] buf;/*from w ww . j av a2 s . com*/ Checksum cs = new CRC32(); cs.reset(); File folder = new File(m_workdir.toString()); File[] listfiles = folder.listFiles(); for (int idx = 0; idx < listfiles.length; ++idx) { if (listfiles[idx].isFile() && listfiles[idx].getName().startsWith(MneConfigHelper.getBaseOutputName(m_conf, null)) && listfiles[idx].getName().endsWith(MneConfigHelper.DEFAULT_FILE_EXTENSION)) { m_partfns.add(listfiles[idx].getName()); } } Collections.sort(m_partfns); // keep the order for checksum for (int idx = 0; idx < m_partfns.size(); ++idx) { System.out.println(String.format("Verifying : %s", m_partfns.get(idx))); FileSplit split = new FileSplit(new Path(m_workdir, m_partfns.get(idx)), 0, 0L, new String[0]); InputFormat<NullWritable, MneDurableInputValue<DurableBuffer<?>>> inputFormat = new MneInputFormat<MneDurableInputValue<DurableBuffer<?>>, DurableBuffer<?>>(); RecordReader<NullWritable, MneDurableInputValue<DurableBuffer<?>>> reader = inputFormat .createRecordReader(split, m_tacontext); MneDurableInputValue<DurableBuffer<?>> dbufval = null; while (reader.nextKeyValue()) { dbufval = reader.getCurrentValue(); assert dbufval.getValue().getSize() == dbufval.getValue().get().capacity(); dbufval.getValue().get().clear(); buf = new byte[dbufval.getValue().get().capacity()]; dbufval.getValue().get().get(buf); cs.update(buf, 0, buf.length); tsize += dbufval.getValue().getSize(); ++reccnt; } reader.close(); } AssertJUnit.assertEquals(m_reccnt, reccnt); AssertJUnit.assertEquals(m_totalsize, tsize); AssertJUnit.assertEquals(m_checksum, cs.getValue()); System.out.println(String.format("The checksum of buffer is %d", m_checksum)); }