Example usage for org.apache.hadoop.io.compress Decompressor reset

List of usage examples for org.apache.hadoop.io.compress Decompressor reset

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress Decompressor reset.

Prototype

public void reset();

Source Link

Document

Resets decompressor and input and output buffers so that a new set of input data can be processed.

Usage

From source file:data.intelligence.platform.yarn.etl.io.CodecPool.java

License:Apache License

/**
 * Return the {@link Decompressor} to the pool.
 * // ww  w.ja  va  2 s.com
 * @param decompressor
 *            the <code>Decompressor</code> to be returned to the pool
 */
public static void returnDecompressor(Decompressor decompressor) {
    if (decompressor == null) {
        return;
    }
    decompressor.reset();
    payback(DECOMPRESSOR_POOL, decompressor);
}

From source file:nl.basjes.hadoop.io.compress.TestSplittableCodecSeams.java

License:Apache License

/**
 * This test checks if reading the file in a splitted way results
 * in the same lines as reading the file as a single 'split'.
 *//*from  w w w.j ava 2 s .  co m*/
private void validateSplitSeams(final Configuration conf, final FileSystem fs, final Path filename,
        final Class<? extends SplittableCompressionCodec> codecClass, final long splitSize,
        final long recordsInFile, final long lastSplitSizeLimit) throws IOException {
    // To make the test predictable
    conf.setInt("io.file.buffer.size", BUFFER_SIZE);

    final FileStatus infile = fs.getFileStatus(filename);
    final long inputLength = infile.getLen();

    if (inputLength > Integer.MAX_VALUE) {
        fail("Bad test file length.");
    }

    LOG.info("Input is " + inputLength + " bytes. " + "making a split every " + splitSize + " bytes.");

    if (inputLength <= splitSize) {
        fail("The compressed test file is too small to do any useful testing.");
    }

    final SplittableCompressionCodec codec = ReflectionUtils.newInstance(codecClass, conf);

    /*
     * The validation is done as follows:
     * 1) We open the entire file as a single split as the reference
     * 2) We create a sequence of splits and validate each line with the
     *    reference split.
     * The lines from these two must match 100%.
     */

    final Text refLine = new Text();
    final Decompressor refDcmp = CodecPool.getDecompressor(codec);
    assertNotNull("Unable to load the decompressor for codec \"" + codec.getClass().getName() + "\"", refDcmp);

    final SplitCompressionInputStream refStream = codec.createInputStream(fs.open(infile.getPath()), refDcmp, 0,
            inputLength, SplittableCompressionCodec.READ_MODE.BYBLOCK);
    final LineReader refReader = new LineReader(refStream, conf);

    final Text line = new Text();
    final Decompressor dcmp = CodecPool.getDecompressor(codec);
    assertNotNull("Unable to load the decompressor for codec \"" + codec.getClass().getName() + "\"", refDcmp);

    try {
        long start = 0;
        long end = splitSize;
        int splitCount = 0;
        long refLineNumber = 0;
        long splitLineNumber;

        while (end <= inputLength) {
            splitLineNumber = 0;
            ++splitCount;
            LOG.debug("-------------------------------------------------------");
            dcmp.reset(); // Reset the Decompressor for reuse with the new stream

            final SplitCompressionInputStream splitStream = codec.createInputStream(fs.open(infile.getPath()),
                    dcmp, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);

            final long adjustedStart = splitStream.getAdjustedStart();
            final long adjustedEnd = splitStream.getAdjustedEnd();

            if (LOG.isDebugEnabled()) {
                LOG.debug("Doing split " + splitCount + " on range " + " (" + start + "-" + end + ")"
                        + " adjusted to (" + adjustedStart + "-" + adjustedEnd + ")");
            }

            final LineReader lreader = new LineReader(splitStream, conf);

            if (start != 0) {
                // Not the first split so we discard the first (incomplete) line.
                int readChars = lreader.readLine(line);
                if (LOG.isTraceEnabled()) {
                    LOG.trace("DISCARD LINE " + 0 + " in split " + splitCount + " pos=" + splitStream.getPos()
                            + " length=" + readChars + ": \"" + line + "\"");
                }
            }

            // Now read until the end of this split
            while (nextKeyValue(splitStream, lreader, adjustedEnd, line)) {
                ++splitLineNumber;

                // Get the reference value
                if (!nextKeyValue(refStream, refReader, inputLength, refLine)) {
                    LOG.error(String.format("S>%05d: %s", splitLineNumber, line));
                    fail("Split goes beyond the end of the reference with line number " + splitLineNumber);
                }
                ++refLineNumber;

                if (LOG.isDebugEnabled() && refLineNumber > (recordsInFile - 10)) {
                    LOG.debug(String.format("R<%05d: %s", refLineNumber, refLine));
                    LOG.debug(String.format("S>%05d: %s", splitLineNumber, line));
                }

                assertEquals("Line must be same in reference and in split at line " + refLineNumber, refLine,
                        line);

                if (LOG.isTraceEnabled()) {
                    LOG.trace("LINE " + splitLineNumber + " in split " + splitCount + " (" + refLineNumber
                            + ") pos=" + splitStream.getPos() + " length=" + line.getLength() + ": \"" + line
                            + "\"");
                }
            }

            // We just read through the entire split
            LOG.debug("Checked split " + splitCount + " (" + adjustedStart + "-" + adjustedEnd + ") "
                    + "containing " + splitLineNumber + " lines.");

            if (end == inputLength) {
                LOG.info("====================> Finished the last split <====================");
                break; // We've reached the end of the last split
            }

            // Determine start and end for the next split
            start = end;

            if ((end + lastSplitSizeLimit) > inputLength) {
                end = inputLength;
                LOG.info("====================> Starting the last split (" + start + " - " + end
                        + ") <====================");
            } else {
                end += splitSize;
                LOG.info("====================> Starting the next split (" + start + " - " + end
                        + ") <====================");
            }

        }

        if (nextKeyValue(refStream, refReader, inputLength, refLine)) {
            ++refLineNumber;
            LOG.error(String.format("R<%05d: %s", refLineNumber, refLine));
            fail("The reference is at least one line longer than the last split ( " + "splitSize=" + splitSize
                    + ", " + "inputLength= " + inputLength + ", " + "split start=" + start + ", " + "split end="
                    + end + ", " + "line=" + refLineNumber + ")");
        }

        LOG.info("Verified " + refLineNumber + " lines in " + splitCount + " splits.");

    } finally {
        CodecPool.returnDecompressor(dcmp);
        CodecPool.returnDecompressor(refDcmp);
    }
}

From source file:org.apache.tajo.storage.compress.CodecPool.java

License:Apache License

/**
 * Return the {@link Decompressor} to the pool.
 *
 * @param decompressor//from w ww.j a  v  a  2 s .c  o  m
 *          the <code>Decompressor</code> to be returned to the pool
 */
public static void returnDecompressor(Decompressor decompressor) {
    if (decompressor == null) {
        return;
    }
    // if the decompressor can't be reused, don't pool it.
    if (decompressor.getClass().isAnnotationPresent(DoNotPool.class)) {
        return;
    }
    decompressor.reset();
    payback(DECOMPRESSOR_POOL, decompressor);
}

From source file:org.apache.tez.runtime.library.shuffle.common.ShuffleUtils.java

License:Apache License

@SuppressWarnings("resource")
public static void shuffleToMemory(MemoryFetchedInput fetchedInput, InputStream input, int decompressedLength,
        int compressedLength, CompressionCodec codec, boolean ifileReadAhead, int ifileReadAheadLength, Log LOG)
        throws IOException {
    IFileInputStream checksumIn = new IFileInputStream(input, compressedLength, ifileReadAhead,
            ifileReadAheadLength);/*from   w w  w. j  av a 2  s .  c o m*/

    input = checksumIn;

    // Are map-outputs compressed?
    if (codec != null) {
        Decompressor decompressor = CodecPool.getDecompressor(codec);
        decompressor.reset();
        input = codec.createInputStream(input, decompressor);
    }
    // Copy map-output into an in-memory buffer
    byte[] shuffleData = fetchedInput.getBytes();

    try {
        IOUtils.readFully(input, shuffleData, 0, shuffleData.length);
        // metrics.inputBytes(shuffleData.length);
        LOG.info("Read " + shuffleData.length + " bytes from input for "
                + fetchedInput.getInputAttemptIdentifier());
    } catch (IOException ioe) {
        // Close the streams
        IOUtils.cleanup(LOG, input);
        // Re-throw
        throw ioe;
    }
}

From source file:org.mrgeo.data.raster.RasterWritable.java

License:Apache License

public static MrGeoRaster toMrGeoRaster(final RasterWritable writable, final CompressionCodec codec,
        final Decompressor decompressor) throws IOException {
    decompressor.reset();
    final ByteArrayInputStream bis = new ByteArrayInputStream(writable.bytes, 0, writable.getSize());
    final CompressionInputStream gis = codec.createInputStream(bis, decompressor);
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    IOUtils.copyBytes(gis, baos, 1024 * 1024 * 2, true);

    return toMrGeoRaster(new RasterWritable(baos.toByteArray()));
}

From source file:org.mrgeo.vector.mrsvector.VectorTileWritable.java

License:Apache License

public static VectorTile toMrsVector(final VectorTileWritable writable, final CompressionCodec codec,
        final Decompressor decompressor) throws IOException {
    decompressor.reset();
    final ByteArrayInputStream bis = new ByteArrayInputStream(writable.getBytes(), 0, writable.getLength());
    final CompressionInputStream gis = codec.createInputStream(bis, decompressor);
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    IOUtils.copyBytes(gis, baos, 1024 * 1024 * 2, true);

    byte[] data = baos.toByteArray();
    return VectorTile.fromProtobuf(data, 0, data.length);
}