Example usage for org.apache.hadoop.io.compress Decompressor reset

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress Decompressor reset.

Prototype

public void reset();

Source Link

Document

Resets decompressor and input and output buffers so that a new set of input data can be processed.

Usage

From source file:data.intelligence.platform.yarn.etl.io.CodecPool.java

License:Apache License

/**
 * Return the {@link Decompressor} to the pool.
 * // ww  w.ja  va  2 s.com
 * @param decompressor
 *            the <code>Decompressor</code> to be returned to the pool
 */
public static void returnDecompressor(Decompressor decompressor) {
    if (decompressor == null) {
        return;
    }
    decompressor.reset();
    payback(DECOMPRESSOR_POOL, decompressor);
}

From source file:nl.basjes.hadoop.io.compress.TestSplittableCodecSeams.java

License:Apache License

/**
 * This test checks if reading the file in a splitted way results
 * in the same lines as reading the file as a single 'split'.
 *//*from  w w w.j ava 2 s .  co m*/
private void validateSplitSeams(final Configuration conf, final FileSystem fs, final Path filename,
        final Class<? extends SplittableCompressionCodec> codecClass, final long splitSize,
        final long recordsInFile, final long lastSplitSizeLimit) throws IOException {
    // To make the test predictable
    conf.setInt("io.file.buffer.size", BUFFER_SIZE);

    final FileStatus infile = fs.getFileStatus(filename);
    final long inputLength = infile.getLen();

    if (inputLength > Integer.MAX_VALUE) {
        fail("Bad test file length.");
    }

    LOG.info("Input is " + inputLength + " bytes. " + "making a split every " + splitSize + " bytes.");

    if (inputLength <= splitSize) {
        fail("The compressed test file is too small to do any useful testing.");
    }

    final SplittableCompressionCodec codec = ReflectionUtils.newInstance(codecClass, conf);

    /*
     * The validation is done as follows:
     * 1) We open the entire file as a single split as the reference
     * 2) We create a sequence of splits and validate each line with the
     *    reference split.
     * The lines from these two must match 100%.
     */

    final Text refLine = new Text();
    final Decompressor refDcmp = CodecPool.getDecompressor(codec);
    assertNotNull("Unable to load the decompressor for codec \"" + codec.getClass().getName() + "\"", refDcmp);

    final SplitCompressionInputStream refStream = codec.createInputStream(fs.open(infile.getPath()), refDcmp, 0,
            inputLength, SplittableCompressionCodec.READ_MODE.BYBLOCK);
    final LineReader refReader = new LineReader(refStream, conf);

    final Text line = new Text();
    final Decompressor dcmp = CodecPool.getDecompressor(codec);
    assertNotNull("Unable to load the decompressor for codec \"" + codec.getClass().getName() + "\"", refDcmp);

    try {
        long start = 0;
        long end = splitSize;
        int splitCount = 0;
        long refLineNumber = 0;
        long splitLineNumber;

        while (end <= inputLength) {
            splitLineNumber = 0;
            ++splitCount;
            LOG.debug("-------------------------------------------------------");
            dcmp.reset(); // Reset the Decompressor for reuse with the new stream

            final SplitCompressionInputStream splitStream = codec.createInputStream(fs.open(infile.getPath()),
                    dcmp, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);

            final long adjustedStart = splitStream.getAdjustedStart();
            final long adjustedEnd = splitStream.getAdjustedEnd();

            if (LOG.isDebugEnabled()) {
                LOG.debug("Doing split " + splitCount + " on range " + " (" + start + "-" + end + ")"
                        + " adjusted to (" + adjustedStart + "-" + adjustedEnd + ")");
            }

            final LineReader lreader = new LineReader(splitStream, conf);

            if (start != 0) {
                // Not the first split so we discard the first (incomplete) line.
                int readChars = lreader.readLine(line);
                if (LOG.isTraceEnabled()) {
                    LOG.trace("DISCARD LINE " + 0 + " in split " + splitCount + " pos=" + splitStream.getPos()
                            + " length=" + readChars + ": \"" + line + "\"");
                }
            }

            // Now read until the end of this split
            while (nextKeyValue(splitStream, lreader, adjustedEnd, line)) {
                ++splitLineNumber;

                // Get the reference value
                if (!nextKeyValue(refStream, refReader, inputLength, refLine)) {
                    LOG.error(String.format("S>%05d: %s", splitLineNumber, line));
                    fail("Split goes beyond the end of the reference with line number " + splitLineNumber);
                }
                ++refLineNumber;

                if (LOG.isDebugEnabled() && refLineNumber > (recordsInFile - 10)) {
                    LOG.debug(String.format("R<%05d: %s", refLineNumber, refLine));
                    LOG.debug(String.format("S>%05d: %s", splitLineNumber, line));
                }

                assertEquals("Line must be same in reference and in split at line " + refLineNumber, refLine,
                        line);

                if (LOG.isTraceEnabled()) {
                    LOG.trace("LINE " + splitLineNumber + " in split " + splitCount + " (" + refLineNumber
                            + ") pos=" + splitStream.getPos() + " length=" + line.getLength() + ": \"" + line
                            + "\"");
                }
            }

            // We just read through the entire split
            LOG.debug("Checked split " + splitCount + " (" + adjustedStart + "-" + adjustedEnd + ") "
                    + "containing " + splitLineNumber + " lines.");

            if (end == inputLength) {
                LOG.info("====================> Finished the last split <====================");
                break; // We've reached the end of the last split
            }

            // Determine start and end for the next split
            start = end;

            if ((end + lastSplitSizeLimit) > inputLength) {
                end = inputLength;
                LOG.info("====================> Starting the last split (" + start + " - " + end
                        + ") <====================");
            } else {
                end += splitSize;
                LOG.info("====================> Starting the next split (" + start + " - " + end
                        + ") <====================");
            }

        }

        if (nextKeyValue(refStream, refReader, inputLength, refLine)) {
            ++refLineNumber;
            LOG.error(String.format("R<%05d: %s", refLineNumber, refLine));
            fail("The reference is at least one line longer than the last split ( " + "splitSize=" + splitSize
                    + ", " + "inputLength= " + inputLength + ", " + "split start=" + start + ", " + "split end="
                    + end + ", " + "line=" + refLineNumber + ")");
        }

        LOG.info("Verified " + refLineNumber + " lines in " + splitCount + " splits.");

    } finally {
        CodecPool.returnDecompressor(dcmp);
        CodecPool.returnDecompressor(refDcmp);
    }
}

From source file:org.apache.tajo.storage.compress.CodecPool.java

License:Apache License

/**
 * Return the {@link Decompressor} to the pool.
 *
 * @param decompressor//from w ww.j a  v  a  2 s .c  o  m
 *          the <code>Decompressor</code> to be returned to the pool
 */
public static void returnDecompressor(Decompressor decompressor) {
    if (decompressor == null) {
        return;
    }
    // if the decompressor can't be reused, don't pool it.
    if (decompressor.getClass().isAnnotationPresent(DoNotPool.class)) {
        return;
    }
    decompressor.reset();
    payback(DECOMPRESSOR_POOL, decompressor);
}

From source file:org.apache.tez.runtime.library.shuffle.common.ShuffleUtils.java

License:Apache License

@SuppressWarnings("resource")
public static void shuffleToMemory(MemoryFetchedInput fetchedInput, InputStream input, int decompressedLength,
        int compressedLength, CompressionCodec codec, boolean ifileReadAhead, int ifileReadAheadLength, Log LOG)
        throws IOException {
    IFileInputStream checksumIn = new IFileInputStream(input, compressedLength, ifileReadAhead,
            ifileReadAheadLength);/*from   w w  w. j  av a 2  s .  c o m*/

    input = checksumIn;

    // Are map-outputs compressed?
    if (codec != null) {
        Decompressor decompressor = CodecPool.getDecompressor(codec);
        decompressor.reset();
        input = codec.createInputStream(input, decompressor);
    }
    // Copy map-output into an in-memory buffer
    byte[] shuffleData = fetchedInput.getBytes();

    try {
        IOUtils.readFully(input, shuffleData, 0, shuffleData.length);
        // metrics.inputBytes(shuffleData.length);
        LOG.info("Read " + shuffleData.length + " bytes from input for "
                + fetchedInput.getInputAttemptIdentifier());
    } catch (IOException ioe) {
        // Close the streams
        IOUtils.cleanup(LOG, input);
        // Re-throw
        throw ioe;
    }
}

From source file:org.mrgeo.data.raster.RasterWritable.java

License:Apache License

public static MrGeoRaster toMrGeoRaster(final RasterWritable writable, final CompressionCodec codec,
        final Decompressor decompressor) throws IOException {
    decompressor.reset();
    final ByteArrayInputStream bis = new ByteArrayInputStream(writable.bytes, 0, writable.getSize());
    final CompressionInputStream gis = codec.createInputStream(bis, decompressor);
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    IOUtils.copyBytes(gis, baos, 1024 * 1024 * 2, true);

    return toMrGeoRaster(new RasterWritable(baos.toByteArray()));
}

From source file:org.mrgeo.vector.mrsvector.VectorTileWritable.java

License:Apache License

public static VectorTile toMrsVector(final VectorTileWritable writable, final CompressionCodec codec,
        final Decompressor decompressor) throws IOException {
    decompressor.reset();
    final ByteArrayInputStream bis = new ByteArrayInputStream(writable.getBytes(), 0, writable.getLength());
    final CompressionInputStream gis = codec.createInputStream(bis, decompressor);
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    IOUtils.copyBytes(gis, baos, 1024 * 1024 * 2, true);

    byte[] data = baos.toByteArray();
    return VectorTile.fromProtobuf(data, 0, data.length);
}