Example usage for org.apache.hadoop.io DataInputBuffer DataInputBuffer

List of usage examples for org.apache.hadoop.io DataInputBuffer DataInputBuffer

Introduction

In this page you can find the example usage for org.apache.hadoop.io DataInputBuffer DataInputBuffer.

Prototype

public DataInputBuffer() 

Source Link

Document

Constructs a new empty buffer.

Usage

From source file:org.apache.tez.runtime.library.common.sort.impl.dflt.DefaultSorter.java

License:Apache License

protected void spill(int mstart, int mend) throws IOException, InterruptedException {

    //approximate the length of the output file to be the length of the
    //buffer + header lengths for the partitions
    final long size = (bufend >= bufstart ? bufend - bufstart : (bufvoid - bufend) + bufstart)
            + partitions * APPROX_HEADER_LENGTH;
    FSDataOutputStream out = null;//from   w  w w . j a va  2 s  .c  o m
    try {
        // create spill file
        final TezSpillRecord spillRec = new TezSpillRecord(partitions);
        final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size);
        spillFilePaths.put(numSpills, filename);
        out = rfs.create(filename);

        int spindex = mstart;
        final InMemValBytes value = createInMemValBytes();
        boolean rle = isRLENeeded();
        for (int i = 0; i < partitions; ++i) {
            IFile.Writer writer = null;
            try {
                long segmentStart = out.getPos();
                writer = new Writer(conf, out, keyClass, valClass, codec, spilledRecordsCounter, null, rle);
                if (combiner == null) {
                    // spill directly
                    DataInputBuffer key = new DataInputBuffer();
                    while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) {
                        final int kvoff = offsetFor(spindex);
                        int keystart = kvmeta.get(kvoff + KEYSTART);
                        int valstart = kvmeta.get(kvoff + VALSTART);
                        key.reset(kvbuffer, keystart, valstart - keystart);
                        getVBytesForOffset(kvoff, value);
                        writer.append(key, value);
                        ++spindex;
                    }
                } else {
                    int spstart = spindex;
                    while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) {
                        ++spindex;
                    }
                    // Note: we would like to avoid the combiner if we've fewer
                    // than some threshold of records for a partition
                    if (spstart != spindex) {
                        TezRawKeyValueIterator kvIter = new MRResultIterator(spstart, spindex);
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("Running combine processor");
                        }
                        runCombineProcessor(kvIter, writer);
                    }
                }

                // close the writer
                writer.close();
                if (numSpills > 0) {
                    additionalSpillBytesWritten.increment(writer.getCompressedLength());
                    numAdditionalSpills.increment(1);
                    // Reset the value will be set during the final merge.
                    outputBytesWithOverheadCounter.setValue(0);
                } else {
                    // Set this up for the first write only. Subsequent ones will be handled in the final merge.
                    outputBytesWithOverheadCounter.increment(writer.getRawLength());
                }
                // record offsets
                final TezIndexRecord rec = new TezIndexRecord(segmentStart, writer.getRawLength(),
                        writer.getCompressedLength());
                spillRec.putIndex(rec, i);

                writer = null;
            } finally {
                if (null != writer)
                    writer.close();
            }
        }

        if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
            // create spill index file
            Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills,
                    partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH);
            spillFileIndexPaths.put(numSpills, indexFilename);
            spillRec.writeToFile(indexFilename, conf);
        } else {
            indexCacheList.add(spillRec);
            totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
        }
        LOG.info("Finished spill " + numSpills);
        ++numSpills;
    } finally {
        if (out != null)
            out.close();
    }
}

From source file:org.apache.tez.runtime.library.common.sort.impl.TestIFile.java

License:Apache License

@Test(timeout = 5000)
//Write empty key value pairs
public void testWritingEmptyKeyValues() throws IOException {
    DataInputBuffer key = new DataInputBuffer();
    DataInputBuffer value = new DataInputBuffer();
    IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath, null, null, null, null, null);
    writer.append(key, value);/*from www  .  java 2s  . c o  m*/
    writer.append(key, value);
    writer.append(key, value);
    writer.append(key, value);
    writer.close();

    IFile.Reader reader = new Reader(localFs, outputPath, null, null, null, false, -1, 1024);
    DataInputBuffer keyIn = new DataInputBuffer();
    DataInputBuffer valIn = new DataInputBuffer();
    int records = 0;
    while (reader.nextRawKey(keyIn)) {
        reader.nextRawValue(valIn);
        records++;
        assert (keyIn.getLength() == 0);
        assert (valIn.getLength() == 0);
    }
    assertTrue("Number of records read does not match", (records == 4));
    reader.close();
}

From source file:org.apache.tez.runtime.library.common.sort.impl.TestIFile.java

License:Apache License

@Test(timeout = 5000)
//test with sorted data and repeat keys
public void testWithRLEMarker() throws IOException {
    //Test with append(Object, Object)
    FSDataOutputStream out = localFs.create(outputPath);
    IFile.Writer writer = new IFile.Writer(defaultConf, out, Text.class, IntWritable.class, codec, null, null,
            true);//from  www .ja v  a 2  s  . c om

    Text key = new Text("key0");
    IntWritable value = new IntWritable(0);
    writer.append(key, value);

    //same key (RLE should kick in)
    key = new Text("key0");
    writer.append(key, value);
    assertTrue(writer.sameKey);

    //Different key
    key = new Text("key1");
    writer.append(key, value);
    assertFalse(writer.sameKey);
    writer.close();
    out.close();

    //Test with append(DataInputBuffer key, DataInputBuffer value)
    byte[] kvbuffer = "key1Value1key1Value2key3Value3".getBytes();
    int keyLength = 4;
    int valueLength = 6;
    int pos = 0;
    out = localFs.create(outputPath);
    writer = new IFile.Writer(defaultConf, out, Text.class, IntWritable.class, codec, null, null, true);

    DataInputBuffer kin = new DataInputBuffer();
    kin.reset(kvbuffer, pos, keyLength);

    DataInputBuffer vin = new DataInputBuffer();
    DataOutputBuffer vout = new DataOutputBuffer();
    (new IntWritable(0)).write(vout);
    vin.reset(vout.getData(), vout.getLength());

    //Write initial KV pair
    writer.append(kin, vin);
    assertFalse(writer.sameKey);
    pos += (keyLength + valueLength);

    //Second key is similar to key1 (RLE should kick in)
    kin.reset(kvbuffer, pos, keyLength);
    (new IntWritable(0)).write(vout);
    vin.reset(vout.getData(), vout.getLength());
    writer.append(kin, vin);
    assertTrue(writer.sameKey);
    pos += (keyLength + valueLength);

    //Next key (key3) is different (RLE should not kick in)
    kin.reset(kvbuffer, pos, keyLength);
    (new IntWritable(0)).write(vout);
    vin.reset(vout.getData(), vout.getLength());
    writer.append(kin, vin);
    assertFalse(writer.sameKey);

    writer.close();
    out.close();
}

From source file:org.apache.tez.runtime.library.common.sort.impl.TestIFile.java

License:Apache License

@Test(timeout = 5000)
//Test appendValue with DataInputBuffer
public void testAppendValueWithDataInputBuffer() throws IOException {
    List<KVPair> data = KVDataGen.generateTestData(false, rnd.nextInt(100));
    IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath, Text.class, IntWritable.class,
            codec, null, null);//from  w  w w . j a va2  s .  c o  m

    final DataInputBuffer previousKey = new DataInputBuffer();
    DataInputBuffer key = new DataInputBuffer();
    DataInputBuffer value = new DataInputBuffer();
    for (KVPair kvp : data) {
        populateData(kvp, key, value);

        if ((previousKey != null && BufferUtils.compare(key, previousKey) == 0)) {
            writer.appendValue(value);
        } else {
            writer.append(key, value);
        }
        previousKey.reset(k.getData(), 0, k.getLength());
    }

    writer.close();

    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);
}

From source file:org.apache.tez.runtime.library.common.sort.impl.TestIFile.java

License:Apache License

/**
 * Data verification//from  w w w .ja  va2s.  c o  m
 *
 * @param reader
 * @param data
 * @throws IOException
 */
private void verifyData(Reader reader, List<KVPair> data) throws IOException {
    LOG.info("Data verification");
    Text readKey = new Text();
    IntWritable readValue = new IntWritable();
    DataInputBuffer keyIn = new DataInputBuffer();
    DataInputBuffer valIn = new DataInputBuffer();
    Deserializer<Text> keyDeserializer;
    Deserializer<IntWritable> valDeserializer;
    SerializationFactory serializationFactory = new SerializationFactory(defaultConf);
    keyDeserializer = serializationFactory.getDeserializer(Text.class);
    valDeserializer = serializationFactory.getDeserializer(IntWritable.class);
    keyDeserializer.open(keyIn);
    valDeserializer.open(valIn);

    int numRecordsRead = 0;

    while (reader.nextRawKey(keyIn)) {
        reader.nextRawValue(valIn);
        readKey = keyDeserializer.deserialize(readKey);
        readValue = valDeserializer.deserialize(readValue);

        KVPair expected = data.get(numRecordsRead);
        assertEquals("Key does not match: Expected: " + expected.getKey() + ", Read: " + readKey,
                expected.getKey(), readKey);
        assertEquals("Value does not match: Expected: " + expected.getvalue() + ", Read: " + readValue,
                expected.getvalue(), readValue);

        numRecordsRead++;
    }
    assertEquals("Expected: " + data.size() + " records, but found: " + numRecordsRead, data.size(),
            numRecordsRead);
    LOG.info("Found: " + numRecordsRead + " records");
}

From source file:org.apache.tez.runtime.library.common.sort.impl.TestIFile.java

License:Apache License

private Writer writeTestFileUsingDataBuffer(IFile.Writer writer, boolean rle, boolean repeatKeys,
        List<KVPair> data, CompressionCodec codec) throws IOException {
    DataInputBuffer previousKey = new DataInputBuffer();
    DataInputBuffer key = new DataInputBuffer();
    DataInputBuffer value = new DataInputBuffer();
    for (KVPair kvp : data) {
        populateData(kvp, key, value);//  ww  w.  j  av  a2  s  . co m

        if (repeatKeys && (previousKey != null && BufferUtils.compare(key, previousKey) == 0)) {
            writer.append(IFile.REPEAT_KEY, value);
        } else {
            writer.append(key, value);
        }
        previousKey.reset(key.getData(), 0, key.getLength());
    }

    writer.close();

    LOG.info("Uncompressed: " + writer.getRawLength());
    LOG.info("CompressedSize: " + writer.getCompressedLength());

    return writer;
}

From source file:org.apache.tez.runtime.library.common.sort.impl.TestPipelinedSorter.java

License:Apache License

private void verifyData(IFile.Reader reader) throws IOException {
    Text readKey = new Text();
    Text readValue = new Text();
    DataInputBuffer keyIn = new DataInputBuffer();
    DataInputBuffer valIn = new DataInputBuffer();
    SerializationFactory serializationFactory = new SerializationFactory(conf);
    Deserializer<Text> keyDeserializer = serializationFactory.getDeserializer(Text.class);
    Deserializer<Text> valDeserializer = serializationFactory.getDeserializer(Text.class);
    keyDeserializer.open(keyIn);//w w w  .  ja  v  a  2 s .  c o  m
    valDeserializer.open(valIn);

    int numRecordsRead = 0;

    for (Map.Entry<String, String> entry : sortedDataMap.entrySet()) {
        String key = entry.getKey();
        String val = entry.getValue();
        if (reader.nextRawKey(keyIn)) {
            reader.nextRawValue(valIn);
            readKey = keyDeserializer.deserialize(readKey);
            readValue = valDeserializer.deserialize(readValue);
            Assert.assertTrue(key.equalsIgnoreCase(readKey.toString()));
            Assert.assertTrue(val.equalsIgnoreCase(readValue.toString()));
            numRecordsRead++;
        }
    }
    Assert.assertTrue(numRecordsRead == sortedDataMap.size());
}

From source file:org.apache.tez.runtime.library.common.TestValuesIterator.java

License:Apache License

/**
 * create inmemory segments/*from   www . j a  v a  2s  .c  o m*/
 *
 * @return
 * @throws IOException
 */
public List<TezMerger.Segment> createInMemStreams() throws IOException {
    int numberOfStreams = Math.max(2, rnd.nextInt(10));
    LOG.info("No of streams : " + numberOfStreams);

    SerializationFactory serializationFactory = new SerializationFactory(conf);
    Serializer keySerializer = serializationFactory.getSerializer(keyClass);
    Serializer valueSerializer = serializationFactory.getSerializer(valClass);

    LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    InputContext context = createTezInputContext();
    MergeManager mergeManager = new MergeManager(conf, fs, localDirAllocator, context, null, null, null, null,
            null, 1024 * 1024 * 10, null, false, -1);

    DataOutputBuffer keyBuf = new DataOutputBuffer();
    DataOutputBuffer valBuf = new DataOutputBuffer();
    DataInputBuffer keyIn = new DataInputBuffer();
    DataInputBuffer valIn = new DataInputBuffer();
    keySerializer.open(keyBuf);
    valueSerializer.open(valBuf);

    List<TezMerger.Segment> segments = new LinkedList<TezMerger.Segment>();
    for (int i = 0; i < numberOfStreams; i++) {
        BoundedByteArrayOutputStream bout = new BoundedByteArrayOutputStream(1024 * 1024);
        InMemoryWriter writer = new InMemoryWriter(bout);
        Map<Writable, Writable> data = createData();
        //write data
        for (Map.Entry<Writable, Writable> entry : data.entrySet()) {
            keySerializer.serialize(entry.getKey());
            valueSerializer.serialize(entry.getValue());
            keyIn.reset(keyBuf.getData(), 0, keyBuf.getLength());
            valIn.reset(valBuf.getData(), 0, valBuf.getLength());
            writer.append(keyIn, valIn);
            originalData.put(entry.getKey(), entry.getValue());
            keyBuf.reset();
            valBuf.reset();
            keyIn.reset();
            valIn.reset();
        }
        IFile.Reader reader = new InMemoryReader(mergeManager, null, bout.getBuffer(), 0,
                bout.getBuffer().length);
        segments.add(new TezMerger.Segment(reader, true));

        data.clear();
        writer.close();
    }
    return segments;
}

From source file:org.apache.tez.runtime.library.common.writers.TestUnorderedPartitionedKVWriter.java

License:Apache License

public void textTest(int numRegularRecords, int numPartitions, long availableMemory, int numLargeKeys,
        int numLargevalues, int numLargeKvPairs) throws IOException, InterruptedException {
    Partitioner partitioner = new HashPartitioner();
    ApplicationId appId = ApplicationId.newInstance(10000, 1);
    TezCounters counters = new TezCounters();
    String uniqueId = UUID.randomUUID().toString();
    OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId);
    Random random = new Random();

    Configuration conf = createConfiguration(outputContext, Text.class, Text.class, shouldCompress, -1,
            HashPartitioner.class);
    CompressionCodec codec = null;/*from   w ww .jav a  2  s .  c o  m*/
    if (shouldCompress) {
        codec = new DefaultCodec();
        ((Configurable) codec).setConf(conf);
    }

    int numRecordsWritten = 0;

    Map<Integer, Multimap<String, String>> expectedValues = new HashMap<Integer, Multimap<String, String>>();
    for (int i = 0; i < numPartitions; i++) {
        expectedValues.put(i, LinkedListMultimap.<String, String>create());
    }

    UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf,
            numPartitions, availableMemory);

    int sizePerBuffer = kvWriter.sizePerBuffer;

    BitSet partitionsWithData = new BitSet(numPartitions);
    Text keyText = new Text();
    Text valText = new Text();
    for (int i = 0; i < numRegularRecords; i++) {
        String key = createRandomString(Math.abs(random.nextInt(10)));
        String val = createRandomString(Math.abs(random.nextInt(20)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }

    // Write Large key records
    for (int i = 0; i < numLargeKeys; i++) {
        String key = createRandomString(sizePerBuffer + Math.abs(random.nextInt(100)));
        String val = createRandomString(Math.abs(random.nextInt(20)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }

    // Write Large val records
    for (int i = 0; i < numLargevalues; i++) {
        String key = createRandomString(Math.abs(random.nextInt(10)));
        String val = createRandomString(sizePerBuffer + Math.abs(random.nextInt(100)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }

    // Write records where key + val are large (but both can fit in the buffer individually)
    for (int i = 0; i < numLargeKvPairs; i++) {
        String key = createRandomString(sizePerBuffer / 2 + Math.abs(random.nextInt(100)));
        String val = createRandomString(sizePerBuffer / 2 + Math.abs(random.nextInt(100)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }

    List<Event> events = kvWriter.close();
    verify(outputContext, never()).fatalError(any(Throwable.class), any(String.class));

    TezCounter outputLargeRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_LARGE_RECORDS);
    assertEquals(numLargeKeys + numLargevalues + numLargeKvPairs, outputLargeRecordsCounter.getValue());

    // Validate the event
    assertEquals(1, events.size());
    assertTrue(events.get(0) instanceof CompositeDataMovementEvent);
    CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(0);
    assertEquals(0, cdme.getSourceIndexStart());
    assertEquals(numPartitions, cdme.getCount());
    DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto
            .parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
    assertFalse(eventProto.hasData());
    BitSet emptyPartitionBits = null;
    if (partitionsWithData.cardinality() != numPartitions) {
        assertTrue(eventProto.hasEmptyPartitions());
        byte[] emptyPartitions = TezCommonUtils
                .decompressByteStringToByteArray(eventProto.getEmptyPartitions());
        emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions);
        assertEquals(numPartitions - partitionsWithData.cardinality(), emptyPartitionBits.cardinality());
    } else {
        assertFalse(eventProto.hasEmptyPartitions());
        emptyPartitionBits = new BitSet(numPartitions);
    }
    assertEquals(HOST_STRING, eventProto.getHost());
    assertEquals(SHUFFLE_PORT, eventProto.getPort());
    assertEquals(uniqueId, eventProto.getPathComponent());

    // Verify the data
    // Verify the actual data
    TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId);
    Path outputFilePath = kvWriter.finalOutPath;
    Path spillFilePath = kvWriter.finalIndexPath;
    if (numRecordsWritten > 0) {
        assertTrue(localFs.exists(outputFilePath));
        assertTrue(localFs.exists(spillFilePath));
    } else {
        return;
    }

    // Special case for 0 records.
    TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf);
    DataInputBuffer keyBuffer = new DataInputBuffer();
    DataInputBuffer valBuffer = new DataInputBuffer();
    Text keyDeser = new Text();
    Text valDeser = new Text();
    for (int i = 0; i < numPartitions; i++) {
        if (emptyPartitionBits.get(i)) {
            continue;
        }
        TezIndexRecord indexRecord = spillRecord.getIndex(i);
        FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath);
        inStream.seek(indexRecord.getStartOffset());
        IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false,
                0, -1);
        while (reader.nextRawKey(keyBuffer)) {
            reader.nextRawValue(valBuffer);
            keyDeser.readFields(keyBuffer);
            valDeser.readFields(valBuffer);
            int partition = partitioner.getPartition(keyDeser, valDeser, numPartitions);
            assertTrue(expectedValues.get(partition).remove(keyDeser.toString(), valDeser.toString()));
        }
        inStream.close();
    }
    for (int i = 0; i < numPartitions; i++) {
        assertEquals(0, expectedValues.get(i).size());
        expectedValues.remove(i);
    }
    assertEquals(0, expectedValues.size());
}

From source file:org.apache.tez.runtime.library.common.writers.TestUnorderedPartitionedKVWriter.java

License:Apache License

private void baseTest(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress)
        throws IOException, InterruptedException {
    PartitionerForTest partitioner = new PartitionerForTest();
    ApplicationId appId = ApplicationId.newInstance(10000, 1);
    TezCounters counters = new TezCounters();
    String uniqueId = UUID.randomUUID().toString();
    OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId);

    Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class,
            shouldCompress, -1);/*from www .ja v  a2s .  co  m*/
    CompressionCodec codec = null;
    if (shouldCompress) {
        codec = new DefaultCodec();
        ((Configurable) codec).setConf(conf);
    }

    int numOutputs = numPartitions;
    long availableMemory = 2048;
    int numRecordsWritten = 0;

    Map<Integer, Multimap<Integer, Long>> expectedValues = new HashMap<Integer, Multimap<Integer, Long>>();
    for (int i = 0; i < numOutputs; i++) {
        expectedValues.put(i, LinkedListMultimap.<Integer, Long>create());
    }

    UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf,
            numOutputs, availableMemory);

    int sizePerBuffer = kvWriter.sizePerBuffer;
    int sizePerRecord = 4 + 8; // IntW + LongW
    int sizePerRecordWithOverhead = sizePerRecord + 12; // Record + META_OVERHEAD

    IntWritable intWritable = new IntWritable();
    LongWritable longWritable = new LongWritable();
    for (int i = 0; i < numRecords; i++) {
        intWritable.set(i);
        longWritable.set(i);
        int partition = partitioner.getPartition(intWritable, longWritable, numOutputs);
        if (skippedPartitions != null && skippedPartitions.contains(partition)) {
            continue;
        }
        expectedValues.get(partition).put(intWritable.get(), longWritable.get());
        kvWriter.write(intWritable, longWritable);
        numRecordsWritten++;
    }
    List<Event> events = kvWriter.close();

    int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead;
    int numExpectedSpills = numRecordsWritten / recordsPerBuffer;

    verify(outputContext, never()).fatalError(any(Throwable.class), any(String.class));

    // Verify the status of the buffers
    if (numExpectedSpills == 0) {
        assertEquals(1, kvWriter.numInitializedBuffers);
    } else {
        assertTrue(kvWriter.numInitializedBuffers > 1);
    }
    assertNull(kvWriter.currentBuffer);
    assertEquals(0, kvWriter.availableBuffers.size());

    // Verify the counters
    TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES);
    TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS);
    TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
    TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
    TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS);
    TezCounter additionalSpillBytesWritternCounter = counters
            .findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
    TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);
    TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);
    assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue());
    assertEquals(numRecordsWritten, outputRecordsCounter.getValue());
    assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue());
    long fileOutputBytes = fileOutputBytesCounter.getValue();
    if (numRecordsWritten > 0) {
        assertTrue(fileOutputBytes > 0);
        if (!shouldCompress) {
            assertTrue(fileOutputBytes > outputRecordBytesCounter.getValue());
        }
    } else {
        assertEquals(0, fileOutputBytes);
    }
    assertEquals(recordsPerBuffer * numExpectedSpills, spilledRecordsCounter.getValue());
    long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue();
    long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue();
    if (numExpectedSpills == 0) {
        assertEquals(0, additionalSpillBytesWritten);
        assertEquals(0, additionalSpillBytesRead);
    } else {
        assertTrue(additionalSpillBytesWritten > 0);
        assertTrue(additionalSpillBytesRead > 0);
        if (!shouldCompress) {
            assertTrue(additionalSpillBytesWritten > (recordsPerBuffer * numExpectedSpills * sizePerRecord));
            assertTrue(additionalSpillBytesRead > (recordsPerBuffer * numExpectedSpills * sizePerRecord));
        }
    }
    assertTrue(additionalSpillBytesWritten == additionalSpillBytesRead);
    assertEquals(numExpectedSpills, numAdditionalSpillsCounter.getValue());

    BitSet emptyPartitionBits = null;
    // Verify the event returned
    assertEquals(1, events.size());
    assertTrue(events.get(0) instanceof CompositeDataMovementEvent);
    CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(0);
    assertEquals(0, cdme.getSourceIndexStart());
    assertEquals(numOutputs, cdme.getCount());
    DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto
            .parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
    assertFalse(eventProto.hasData());
    if (skippedPartitions == null && numRecordsWritten > 0) {
        assertFalse(eventProto.hasEmptyPartitions());
        emptyPartitionBits = new BitSet(numPartitions);
    } else {
        assertTrue(eventProto.hasEmptyPartitions());
        byte[] emptyPartitions = TezCommonUtils
                .decompressByteStringToByteArray(eventProto.getEmptyPartitions());
        emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions);
        if (numRecordsWritten == 0) {
            assertEquals(numPartitions, emptyPartitionBits.cardinality());
        } else {
            for (Integer e : skippedPartitions) {
                assertTrue(emptyPartitionBits.get(e));
            }
            assertEquals(skippedPartitions.size(), emptyPartitionBits.cardinality());
        }
    }
    if (emptyPartitionBits.cardinality() != numPartitions) {
        assertEquals(HOST_STRING, eventProto.getHost());
        assertEquals(SHUFFLE_PORT, eventProto.getPort());
        assertEquals(uniqueId, eventProto.getPathComponent());
    } else {
        assertFalse(eventProto.hasHost());
        assertFalse(eventProto.hasPort());
        assertFalse(eventProto.hasPathComponent());
    }

    // Verify the actual data
    TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId);
    Path outputFilePath = kvWriter.finalOutPath;
    Path spillFilePath = kvWriter.finalIndexPath;

    if (numRecordsWritten > 0) {
        assertTrue(localFs.exists(outputFilePath));
        assertTrue(localFs.exists(spillFilePath));
    } else {
        return;
    }

    // Special case for 0 records.
    TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf);
    DataInputBuffer keyBuffer = new DataInputBuffer();
    DataInputBuffer valBuffer = new DataInputBuffer();
    IntWritable keyDeser = new IntWritable();
    LongWritable valDeser = new LongWritable();
    for (int i = 0; i < numOutputs; i++) {
        if (skippedPartitions != null && skippedPartitions.contains(i)) {
            continue;
        }
        TezIndexRecord indexRecord = spillRecord.getIndex(i);
        FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath);
        inStream.seek(indexRecord.getStartOffset());
        IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false,
                0, -1);
        while (reader.nextRawKey(keyBuffer)) {
            reader.nextRawValue(valBuffer);
            keyDeser.readFields(keyBuffer);
            valDeser.readFields(valBuffer);
            int partition = partitioner.getPartition(keyDeser, valDeser, numOutputs);
            assertTrue(expectedValues.get(partition).remove(keyDeser.get(), valDeser.get()));
        }
        inStream.close();
    }
    for (int i = 0; i < numOutputs; i++) {
        assertEquals(0, expectedValues.get(i).size());
        expectedValues.remove(i);
    }
    assertEquals(0, expectedValues.size());
}