Example usage for org.apache.hadoop.io DataInputBuffer reset

List of usage examples for org.apache.hadoop.io DataInputBuffer reset

Introduction

In this page you can find the example usage for org.apache.hadoop.io DataInputBuffer reset.

Prototype

public void reset(byte[] input, int start, int length) 

Source Link

Document

Resets the data that the buffer reads.

Usage

From source file:org.apache.tez.engine.common.shuffle.impl.InMemoryReader.java

License:Apache License

public KeyState readRawKey(DataInputBuffer key) throws IOException {
    try {/*from  www .  j  av  a  2  s . c o  m*/
        if (!positionToNextRecord(memDataIn)) {
            return KeyState.NO_KEY;
        }
        // Setup the key
        int pos = memDataIn.getPosition();
        byte[] data = memDataIn.getData();
        if (currentKeyLength == IFile.RLE_MARKER) {
            key.reset(data, prevKeyPos, prevKeyLength);
            currentKeyLength = prevKeyLength;
            return KeyState.SAME_KEY;
        }
        key.reset(data, pos, currentKeyLength);
        prevKeyPos = pos;
        // Position for the next value
        long skipped = memDataIn.skip(currentKeyLength);
        if (skipped != currentKeyLength) {
            throw new IOException("Rec# " + recNo + ": Failed to skip past key of length: " + currentKeyLength);
        }

        // Record the byte
        bytesRead += currentKeyLength;
        return KeyState.NEW_KEY;
    } catch (IOException ioe) {
        dumpOnError();
        throw ioe;
    }
}

From source file:org.apache.tez.engine.common.shuffle.impl.InMemoryReader.java

License:Apache License

public void nextRawValue(DataInputBuffer value) throws IOException {
    try {//from   w  w  w. ja v  a 2 s  . com
        int pos = memDataIn.getPosition();
        byte[] data = memDataIn.getData();
        value.reset(data, pos, currentValueLength);

        // Position for the next record
        long skipped = memDataIn.skip(currentValueLength);
        if (skipped != currentValueLength) {
            throw new IOException(
                    "Rec# " + recNo + ": Failed to skip past value of length: " + currentValueLength);
        }
        // Record the byte
        bytesRead += currentValueLength;

        ++recNo;
    } catch (IOException ioe) {
        dumpOnError();
        throw ioe;
    }
}

From source file:org.apache.tez.engine.common.sort.impl.dflt.DefaultSorter.java

License:Apache License

protected void spill(int mstart, int mend) throws IOException, InterruptedException {

    //approximate the length of the output file to be the length of the
    //buffer + header lengths for the partitions
    final long size = (bufend >= bufstart ? bufend - bufstart : (bufvoid - bufend) + bufstart)
            + partitions * APPROX_HEADER_LENGTH;
    FSDataOutputStream out = null;//from  w  w  w .java  2 s.c om
    try {
        // create spill file
        final TezSpillRecord spillRec = new TezSpillRecord(partitions);
        final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size);
        out = rfs.create(filename);

        int spindex = mstart;
        final InMemValBytes value = createInMemValBytes();
        for (int i = 0; i < partitions; ++i) {
            IFile.Writer writer = null;
            try {
                long segmentStart = out.getPos();
                writer = new Writer(job, out, keyClass, valClass, codec, spilledRecordsCounter);
                if (combineProcessor == null) {
                    // spill directly
                    DataInputBuffer key = new DataInputBuffer();
                    while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) {
                        final int kvoff = offsetFor(spindex);
                        key.reset(kvbuffer, kvmeta.get(kvoff + KEYSTART),
                                (kvmeta.get(kvoff + VALSTART) - kvmeta.get(kvoff + KEYSTART)));
                        getVBytesForOffset(kvoff, value);
                        writer.append(key, value);
                        ++spindex;
                    }
                } else {
                    int spstart = spindex;
                    while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) {
                        ++spindex;
                    }
                    // Note: we would like to avoid the combiner if we've fewer
                    // than some threshold of records for a partition
                    if (spstart != spindex) {
                        TezRawKeyValueIterator kvIter = new MRResultIterator(spstart, spindex);
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("Running combine processor");
                        }
                        runCombineProcessor(kvIter, writer);
                    }
                }

                // close the writer
                writer.close();

                // record offsets
                final TezIndexRecord rec = new TezIndexRecord(segmentStart, writer.getRawLength(),
                        writer.getCompressedLength());
                spillRec.putIndex(rec, i);

                writer = null;
            } finally {
                if (null != writer)
                    writer.close();
            }
        }

        if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
            // create spill index file
            Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills,
                    partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH);
            spillRec.writeToFile(indexFilename, job);
        } else {
            indexCacheList.add(spillRec);
            totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
        }
        LOG.info("Finished spill " + numSpills);
        ++numSpills;
    } finally {
        if (out != null)
            out.close();
    }
}

From source file:org.apache.tez.engine.common.sort.impl.dflt.SortBufferInputStream.java

License:Apache License

@Override
public int read(byte[] b, int off, int len) throws IOException {
    if (available() == 0) {
        return -1;
    }/* w w w. j  ava  2 s  .  com*/

    int currentOffset = off;
    int currentLength = len;
    int currentReadBytes = 0;

    // Check if there is residual data in the dualBuf
    int residualLen = out.getCurrent();
    if (residualLen > 0) {
        int readable = Math.min(currentLength, residualLen);
        System.arraycopy(dualBuf, 0, b, currentOffset, readable);
        currentOffset += readable;
        currentReadBytes += readable;
        out.setCurrentPointer(-readable);

        // buffer has less capacity
        currentLength -= readable;

        if (LOG.isDebugEnabled()) {
            LOG.debug("XXX read_residual:" + " readable=" + readable + " readBytes=" + readBytes);
        }
    }

    // Now, use the provided buffer
    if (LOG.isDebugEnabled()) {
        LOG.debug("XXX read: out.reset" + " b=" + b + " currentOffset=" + currentOffset + " currentLength="
                + currentLength + " recIndex=" + recIndex);
    }
    out.reset(b, currentOffset, currentLength);

    // Read from sort-buffer into the provided buffer, space permitting
    DataInputBuffer key = new DataInputBuffer();
    final InMemValBytes value = sorter.createInMemValBytes();

    int kvPartition = 0;
    int numRec = 0;
    for (; currentLength > 0 && recIndex < mend
            && (kvPartition = getKVPartition(recIndex)) == partition; ++recIndex) {

        final int kvoff = sorter.offsetFor(recIndex);

        int keyLen = (kvmeta.get(kvoff + InMemoryShuffleSorter.VALSTART)
                - kvmeta.get(kvoff + InMemoryShuffleSorter.KEYSTART));
        key.reset(kvbuffer, kvmeta.get(kvoff + InMemoryShuffleSorter.KEYSTART), keyLen);

        int valLen = sorter.getVBytesForOffset(kvoff, value);

        int recLen = (keyLen + WritableUtils.getVIntSize(keyLen))
                + (valLen + WritableUtils.getVIntSize(valLen));

        currentReadBytes += recLen;
        currentOffset += recLen;
        currentLength -= recLen;

        // Write out key/value into the in-mem ifile
        if (LOG.isDebugEnabled()) {
            LOG.debug("XXX read: sortOutput.append" + " #rec=" + ++numRec + " recIndex=" + recIndex + " kvoff="
                    + kvoff + " keyLen=" + keyLen + " valLen=" + valLen + " recLen=" + recLen + " readBytes="
                    + readBytes + " currentReadBytes=" + currentReadBytes + " currentLength=" + currentLength);
        }
        sortOutput.append(key, value);
    }

    // If we are at the end of the segment, close the ifile
    if (currentLength > 0 && (recIndex == mend || kvPartition != partition)) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("XXX About to call close:" + " currentLength=" + currentLength + " recIndex=" + recIndex
                    + " mend=" + mend + " kvPartition=" + kvPartition + " partitino=" + partition);
        }
        sortOutput.close();
        currentReadBytes += (InMemoryShuffleSorter.IFILE_EOF_LENGTH
                + InMemoryShuffleSorter.IFILE_CHECKSUM_LENGTH);
    } else {
        if (LOG.isDebugEnabled()) {
            LOG.debug("XXX Hmm..." + " currentLength=" + currentLength + " recIndex=" + recIndex + " mend="
                    + mend + " kvPartition=" + kvPartition + " partitino=" + partition);
        }
    }

    int retVal = Math.min(currentReadBytes, len);
    readBytes += retVal;
    if (LOG.isDebugEnabled()) {
        LOG.debug("XXX read: done" + " retVal=" + retVal + " currentReadBytes=" + currentReadBytes + " len="
                + len + " readBytes=" + readBytes + " partitionBytes=" + partitionBytes + " residualBytes="
                + out.getCurrent());
    }
    return retVal;
}

From source file:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryReader.java

License:Apache License

public KeyState readRawKey(DataInputBuffer key) throws IOException {
    try {/* w  ww  .  j  a va2  s.com*/
        if (!positionToNextRecord(memDataIn)) {
            return KeyState.NO_KEY;
        }
        // Setup the key
        int pos = memDataIn.getPosition();
        byte[] data = memDataIn.getData();
        if (currentKeyLength == IFile.RLE_MARKER) {
            // get key length from original key
            key.reset(data, originalKeyPos, originalKeyLength);
            return KeyState.SAME_KEY;
        }
        key.reset(data, pos, currentKeyLength);
        // Position for the next value
        long skipped = memDataIn.skip(currentKeyLength);
        if (skipped != currentKeyLength) {
            throw new IOException("Rec# " + recNo + ": Failed to skip past key of length: " + currentKeyLength);
        }
        bytesRead += currentKeyLength;
        return KeyState.NEW_KEY;
    } catch (IOException ioe) {
        dumpOnError();
        throw ioe;
    }
}

From source file:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryReader.java

License:Apache License

public void nextRawValue(DataInputBuffer value) throws IOException {
    try {/*from www . j  a va 2 s . com*/
        int pos = memDataIn.getPosition();
        byte[] data = memDataIn.getData();
        value.reset(data, pos, currentValueLength);

        // Position for the next record
        long skipped = memDataIn.skip(currentValueLength);
        if (skipped != currentValueLength) {
            throw new IOException(
                    "Rec# " + recNo + ": Failed to skip past value of length: " + currentValueLength);
        }
        // Record the byte
        bytesRead += currentValueLength;
        ++recNo;
    } catch (IOException ioe) {
        dumpOnError();
        throw ioe;
    }
}

From source file:org.apache.tez.runtime.library.common.sort.impl.dflt.DefaultSorter.java

License:Apache License

protected void spill(int mstart, int mend) throws IOException, InterruptedException {

    //approximate the length of the output file to be the length of the
    //buffer + header lengths for the partitions
    final long size = (bufend >= bufstart ? bufend - bufstart : (bufvoid - bufend) + bufstart)
            + partitions * APPROX_HEADER_LENGTH;
    FSDataOutputStream out = null;/*from w  ww  . jav  a  2s  . c o  m*/
    try {
        // create spill file
        final TezSpillRecord spillRec = new TezSpillRecord(partitions);
        final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size);
        spillFilePaths.put(numSpills, filename);
        out = rfs.create(filename);

        int spindex = mstart;
        final InMemValBytes value = createInMemValBytes();
        boolean rle = isRLENeeded();
        for (int i = 0; i < partitions; ++i) {
            IFile.Writer writer = null;
            try {
                long segmentStart = out.getPos();
                writer = new Writer(conf, out, keyClass, valClass, codec, spilledRecordsCounter, null, rle);
                if (combiner == null) {
                    // spill directly
                    DataInputBuffer key = new DataInputBuffer();
                    while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) {
                        final int kvoff = offsetFor(spindex);
                        int keystart = kvmeta.get(kvoff + KEYSTART);
                        int valstart = kvmeta.get(kvoff + VALSTART);
                        key.reset(kvbuffer, keystart, valstart - keystart);
                        getVBytesForOffset(kvoff, value);
                        writer.append(key, value);
                        ++spindex;
                    }
                } else {
                    int spstart = spindex;
                    while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) {
                        ++spindex;
                    }
                    // Note: we would like to avoid the combiner if we've fewer
                    // than some threshold of records for a partition
                    if (spstart != spindex) {
                        TezRawKeyValueIterator kvIter = new MRResultIterator(spstart, spindex);
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("Running combine processor");
                        }
                        runCombineProcessor(kvIter, writer);
                    }
                }

                // close the writer
                writer.close();
                if (numSpills > 0) {
                    additionalSpillBytesWritten.increment(writer.getCompressedLength());
                    numAdditionalSpills.increment(1);
                    // Reset the value will be set during the final merge.
                    outputBytesWithOverheadCounter.setValue(0);
                } else {
                    // Set this up for the first write only. Subsequent ones will be handled in the final merge.
                    outputBytesWithOverheadCounter.increment(writer.getRawLength());
                }
                // record offsets
                final TezIndexRecord rec = new TezIndexRecord(segmentStart, writer.getRawLength(),
                        writer.getCompressedLength());
                spillRec.putIndex(rec, i);

                writer = null;
            } finally {
                if (null != writer)
                    writer.close();
            }
        }

        if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
            // create spill index file
            Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills,
                    partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH);
            spillFileIndexPaths.put(numSpills, indexFilename);
            spillRec.writeToFile(indexFilename, conf);
        } else {
            indexCacheList.add(spillRec);
            totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
        }
        LOG.info("Finished spill " + numSpills);
        ++numSpills;
    } finally {
        if (out != null)
            out.close();
    }
}

From source file:org.apache.tez.runtime.library.common.sort.impl.TestIFile.java

License:Apache License

@Test(timeout = 5000)
//test with sorted data and repeat keys
public void testWithRLEMarker() throws IOException {
    //Test with append(Object, Object)
    FSDataOutputStream out = localFs.create(outputPath);
    IFile.Writer writer = new IFile.Writer(defaultConf, out, Text.class, IntWritable.class, codec, null, null,
            true);// w w w .  j  a v  a 2  s  . c om

    Text key = new Text("key0");
    IntWritable value = new IntWritable(0);
    writer.append(key, value);

    //same key (RLE should kick in)
    key = new Text("key0");
    writer.append(key, value);
    assertTrue(writer.sameKey);

    //Different key
    key = new Text("key1");
    writer.append(key, value);
    assertFalse(writer.sameKey);
    writer.close();
    out.close();

    //Test with append(DataInputBuffer key, DataInputBuffer value)
    byte[] kvbuffer = "key1Value1key1Value2key3Value3".getBytes();
    int keyLength = 4;
    int valueLength = 6;
    int pos = 0;
    out = localFs.create(outputPath);
    writer = new IFile.Writer(defaultConf, out, Text.class, IntWritable.class, codec, null, null, true);

    DataInputBuffer kin = new DataInputBuffer();
    kin.reset(kvbuffer, pos, keyLength);

    DataInputBuffer vin = new DataInputBuffer();
    DataOutputBuffer vout = new DataOutputBuffer();
    (new IntWritable(0)).write(vout);
    vin.reset(vout.getData(), vout.getLength());

    //Write initial KV pair
    writer.append(kin, vin);
    assertFalse(writer.sameKey);
    pos += (keyLength + valueLength);

    //Second key is similar to key1 (RLE should kick in)
    kin.reset(kvbuffer, pos, keyLength);
    (new IntWritable(0)).write(vout);
    vin.reset(vout.getData(), vout.getLength());
    writer.append(kin, vin);
    assertTrue(writer.sameKey);
    pos += (keyLength + valueLength);

    //Next key (key3) is different (RLE should not kick in)
    kin.reset(kvbuffer, pos, keyLength);
    (new IntWritable(0)).write(vout);
    vin.reset(vout.getData(), vout.getLength());
    writer.append(kin, vin);
    assertFalse(writer.sameKey);

    writer.close();
    out.close();
}

From source file:org.apache.tez.runtime.library.common.sort.impl.TestIFile.java

License:Apache License

@Test(timeout = 5000)
//Test appendValue with DataInputBuffer
public void testAppendValueWithDataInputBuffer() throws IOException {
    List<KVPair> data = KVDataGen.generateTestData(false, rnd.nextInt(100));
    IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath, Text.class, IntWritable.class,
            codec, null, null);/*  w ww  . j  a  v a  2  s  .c  om*/

    final DataInputBuffer previousKey = new DataInputBuffer();
    DataInputBuffer key = new DataInputBuffer();
    DataInputBuffer value = new DataInputBuffer();
    for (KVPair kvp : data) {
        populateData(kvp, key, value);

        if ((previousKey != null && BufferUtils.compare(key, previousKey) == 0)) {
            writer.appendValue(value);
        } else {
            writer.append(key, value);
        }
        previousKey.reset(k.getData(), 0, k.getLength());
    }

    writer.close();

    readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);
}

From source file:org.apache.tez.runtime.library.common.sort.impl.TestIFile.java

License:Apache License

private Writer writeTestFileUsingDataBuffer(IFile.Writer writer, boolean rle, boolean repeatKeys,
        List<KVPair> data, CompressionCodec codec) throws IOException {
    DataInputBuffer previousKey = new DataInputBuffer();
    DataInputBuffer key = new DataInputBuffer();
    DataInputBuffer value = new DataInputBuffer();
    for (KVPair kvp : data) {
        populateData(kvp, key, value);/*  www  .java2  s.c  o  m*/

        if (repeatKeys && (previousKey != null && BufferUtils.compare(key, previousKey) == 0)) {
            writer.append(IFile.REPEAT_KEY, value);
        } else {
            writer.append(key, value);
        }
        previousKey.reset(key.getData(), 0, key.getLength());
    }

    writer.close();

    LOG.info("Uncompressed: " + writer.getRawLength());
    LOG.info("CompressedSize: " + writer.getCompressedLength());

    return writer;
}