List of usage examples for org.apache.hadoop.io.compress Decompressor reset
public void reset();
From source file:data.intelligence.platform.yarn.etl.io.CodecPool.java
License:Apache License
/** * Return the {@link Decompressor} to the pool. * // ww w.ja va 2 s.com * @param decompressor * the <code>Decompressor</code> to be returned to the pool */ public static void returnDecompressor(Decompressor decompressor) { if (decompressor == null) { return; } decompressor.reset(); payback(DECOMPRESSOR_POOL, decompressor); }
From source file:nl.basjes.hadoop.io.compress.TestSplittableCodecSeams.java
License:Apache License
/** * This test checks if reading the file in a splitted way results * in the same lines as reading the file as a single 'split'. *//*from w w w.j ava 2 s . co m*/ private void validateSplitSeams(final Configuration conf, final FileSystem fs, final Path filename, final Class<? extends SplittableCompressionCodec> codecClass, final long splitSize, final long recordsInFile, final long lastSplitSizeLimit) throws IOException { // To make the test predictable conf.setInt("io.file.buffer.size", BUFFER_SIZE); final FileStatus infile = fs.getFileStatus(filename); final long inputLength = infile.getLen(); if (inputLength > Integer.MAX_VALUE) { fail("Bad test file length."); } LOG.info("Input is " + inputLength + " bytes. " + "making a split every " + splitSize + " bytes."); if (inputLength <= splitSize) { fail("The compressed test file is too small to do any useful testing."); } final SplittableCompressionCodec codec = ReflectionUtils.newInstance(codecClass, conf); /* * The validation is done as follows: * 1) We open the entire file as a single split as the reference * 2) We create a sequence of splits and validate each line with the * reference split. * The lines from these two must match 100%. */ final Text refLine = new Text(); final Decompressor refDcmp = CodecPool.getDecompressor(codec); assertNotNull("Unable to load the decompressor for codec \"" + codec.getClass().getName() + "\"", refDcmp); final SplitCompressionInputStream refStream = codec.createInputStream(fs.open(infile.getPath()), refDcmp, 0, inputLength, SplittableCompressionCodec.READ_MODE.BYBLOCK); final LineReader refReader = new LineReader(refStream, conf); final Text line = new Text(); final Decompressor dcmp = CodecPool.getDecompressor(codec); assertNotNull("Unable to load the decompressor for codec \"" + codec.getClass().getName() + "\"", refDcmp); try { long start = 0; long end = splitSize; int splitCount = 0; long refLineNumber = 0; long splitLineNumber; while (end <= inputLength) { splitLineNumber = 0; ++splitCount; LOG.debug("-------------------------------------------------------"); dcmp.reset(); // Reset the Decompressor for reuse with the new stream final SplitCompressionInputStream splitStream = codec.createInputStream(fs.open(infile.getPath()), dcmp, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); final long adjustedStart = splitStream.getAdjustedStart(); final long adjustedEnd = splitStream.getAdjustedEnd(); if (LOG.isDebugEnabled()) { LOG.debug("Doing split " + splitCount + " on range " + " (" + start + "-" + end + ")" + " adjusted to (" + adjustedStart + "-" + adjustedEnd + ")"); } final LineReader lreader = new LineReader(splitStream, conf); if (start != 0) { // Not the first split so we discard the first (incomplete) line. int readChars = lreader.readLine(line); if (LOG.isTraceEnabled()) { LOG.trace("DISCARD LINE " + 0 + " in split " + splitCount + " pos=" + splitStream.getPos() + " length=" + readChars + ": \"" + line + "\""); } } // Now read until the end of this split while (nextKeyValue(splitStream, lreader, adjustedEnd, line)) { ++splitLineNumber; // Get the reference value if (!nextKeyValue(refStream, refReader, inputLength, refLine)) { LOG.error(String.format("S>%05d: %s", splitLineNumber, line)); fail("Split goes beyond the end of the reference with line number " + splitLineNumber); } ++refLineNumber; if (LOG.isDebugEnabled() && refLineNumber > (recordsInFile - 10)) { LOG.debug(String.format("R<%05d: %s", refLineNumber, refLine)); LOG.debug(String.format("S>%05d: %s", splitLineNumber, line)); } assertEquals("Line must be same in reference and in split at line " + refLineNumber, refLine, line); if (LOG.isTraceEnabled()) { LOG.trace("LINE " + splitLineNumber + " in split " + splitCount + " (" + refLineNumber + ") pos=" + splitStream.getPos() + " length=" + line.getLength() + ": \"" + line + "\""); } } // We just read through the entire split LOG.debug("Checked split " + splitCount + " (" + adjustedStart + "-" + adjustedEnd + ") " + "containing " + splitLineNumber + " lines."); if (end == inputLength) { LOG.info("====================> Finished the last split <===================="); break; // We've reached the end of the last split } // Determine start and end for the next split start = end; if ((end + lastSplitSizeLimit) > inputLength) { end = inputLength; LOG.info("====================> Starting the last split (" + start + " - " + end + ") <===================="); } else { end += splitSize; LOG.info("====================> Starting the next split (" + start + " - " + end + ") <===================="); } } if (nextKeyValue(refStream, refReader, inputLength, refLine)) { ++refLineNumber; LOG.error(String.format("R<%05d: %s", refLineNumber, refLine)); fail("The reference is at least one line longer than the last split ( " + "splitSize=" + splitSize + ", " + "inputLength= " + inputLength + ", " + "split start=" + start + ", " + "split end=" + end + ", " + "line=" + refLineNumber + ")"); } LOG.info("Verified " + refLineNumber + " lines in " + splitCount + " splits."); } finally { CodecPool.returnDecompressor(dcmp); CodecPool.returnDecompressor(refDcmp); } }
From source file:org.apache.tajo.storage.compress.CodecPool.java
License:Apache License
/** * Return the {@link Decompressor} to the pool. * * @param decompressor//from w ww.j a v a 2 s .c o m * the <code>Decompressor</code> to be returned to the pool */ public static void returnDecompressor(Decompressor decompressor) { if (decompressor == null) { return; } // if the decompressor can't be reused, don't pool it. if (decompressor.getClass().isAnnotationPresent(DoNotPool.class)) { return; } decompressor.reset(); payback(DECOMPRESSOR_POOL, decompressor); }
From source file:org.apache.tez.runtime.library.shuffle.common.ShuffleUtils.java
License:Apache License
@SuppressWarnings("resource") public static void shuffleToMemory(MemoryFetchedInput fetchedInput, InputStream input, int decompressedLength, int compressedLength, CompressionCodec codec, boolean ifileReadAhead, int ifileReadAheadLength, Log LOG) throws IOException { IFileInputStream checksumIn = new IFileInputStream(input, compressedLength, ifileReadAhead, ifileReadAheadLength);/*from w w w. j av a 2 s . c o m*/ input = checksumIn; // Are map-outputs compressed? if (codec != null) { Decompressor decompressor = CodecPool.getDecompressor(codec); decompressor.reset(); input = codec.createInputStream(input, decompressor); } // Copy map-output into an in-memory buffer byte[] shuffleData = fetchedInput.getBytes(); try { IOUtils.readFully(input, shuffleData, 0, shuffleData.length); // metrics.inputBytes(shuffleData.length); LOG.info("Read " + shuffleData.length + " bytes from input for " + fetchedInput.getInputAttemptIdentifier()); } catch (IOException ioe) { // Close the streams IOUtils.cleanup(LOG, input); // Re-throw throw ioe; } }
From source file:org.mrgeo.data.raster.RasterWritable.java
License:Apache License
public static MrGeoRaster toMrGeoRaster(final RasterWritable writable, final CompressionCodec codec, final Decompressor decompressor) throws IOException { decompressor.reset(); final ByteArrayInputStream bis = new ByteArrayInputStream(writable.bytes, 0, writable.getSize()); final CompressionInputStream gis = codec.createInputStream(bis, decompressor); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); IOUtils.copyBytes(gis, baos, 1024 * 1024 * 2, true); return toMrGeoRaster(new RasterWritable(baos.toByteArray())); }
From source file:org.mrgeo.vector.mrsvector.VectorTileWritable.java
License:Apache License
public static VectorTile toMrsVector(final VectorTileWritable writable, final CompressionCodec codec, final Decompressor decompressor) throws IOException { decompressor.reset(); final ByteArrayInputStream bis = new ByteArrayInputStream(writable.getBytes(), 0, writable.getLength()); final CompressionInputStream gis = codec.createInputStream(bis, decompressor); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); IOUtils.copyBytes(gis, baos, 1024 * 1024 * 2, true); byte[] data = baos.toByteArray(); return VectorTile.fromProtobuf(data, 0, data.length); }