io.airlift.compress.lzo.HadoopLzopInputStream.java Source code

Java tutorial

Introduction

Here is the source code for io.airlift.compress.lzo.HadoopLzopInputStream.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.airlift.compress.lzo;

import org.apache.hadoop.io.compress.CompressionInputStream;

import java.io.ByteArrayInputStream;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.zip.Adler32;
import java.util.zip.Checksum;

import static io.airlift.compress.lzo.LzoConstants.SIZE_OF_LONG;
import static io.airlift.compress.lzo.LzopCodec.LZOP_IMPLEMENTATION_VERSION;
import static io.airlift.compress.lzo.LzopCodec.LZOP_MAGIC;
import static io.airlift.compress.lzo.LzopCodec.LZO_1X_VARIANT;
import static java.lang.String.format;

class HadoopLzopInputStream extends CompressionInputStream {
    private static final int LZO_IMPLEMENTATION_VERSION = 0x2060;

    private final LzoDecompressor decompressor = new LzoDecompressor();
    private final InputStream in;
    private final byte[] uncompressedChunk;

    private int uncompressedLength;
    private int uncompressedOffset;

    private boolean finished;

    private byte[] compressed = new byte[0];

    public HadoopLzopInputStream(InputStream in, int maxUncompressedLength) throws IOException {
        super(in);
        this.in = in;
        // over allocate buffer which makes decompression easier
        uncompressedChunk = new byte[maxUncompressedLength + SIZE_OF_LONG];

        byte[] magic = new byte[LZOP_MAGIC.length];
        readInput(magic, 0, magic.length);
        if (!Arrays.equals(magic, LZOP_MAGIC)) {
            throw new IOException("Not an LZOP file");
        }

        byte[] header = new byte[25];
        readInput(header, 0, header.length);
        ByteArrayInputStream headerStream = new ByteArrayInputStream(header);

        // lzop version: ignored
        readBigEndianShort(headerStream);

        // lzo version
        int lzoVersion = readBigEndianShort(headerStream);
        if (lzoVersion > LZO_IMPLEMENTATION_VERSION) {
            throw new IOException(format("Unsupported LZO version 0x%08X", lzoVersion));
        }

        // lzop version of the format
        int lzopCompatibility = readBigEndianShort(headerStream);
        if (lzopCompatibility > LZOP_IMPLEMENTATION_VERSION) {
            throw new IOException(format("Unsupported LZOP version 0x%08X", lzopCompatibility));
        }

        // variant: must be LZO 1X
        int variant = headerStream.read();
        if (variant != LZO_1X_VARIANT) {
            throw new IOException(format("Unsupported LZO variant %s", variant));
        }

        // level: ignored
        headerStream.read();

        // flags: none supported
        int flags = readBigEndianInt(headerStream);
        if (flags != 0) {
            throw new IOException(format("Unsupported LZO flags %s", flags));
        }

        // output file mode: ignored
        readBigEndianInt(headerStream);

        // output file modified time: ignored
        readBigEndianInt(headerStream);

        // output file time zone offset: ignored
        readBigEndianInt(headerStream);

        // output file name: ignored
        int fileNameLength = headerStream.read();
        byte[] fileName = new byte[fileNameLength];
        readInput(fileName, 0, fileName.length);

        // verify header checksum
        int headerChecksumValue = readBigEndianInt(in);

        Checksum headerChecksum = new Adler32();
        headerChecksum.update(header, 0, header.length);
        headerChecksum.update(fileName, 0, fileName.length);
        if (headerChecksumValue != (int) headerChecksum.getValue()) {
            throw new IOException("Invalid header checksum");
        }
    }

    @Override
    public int read() throws IOException {
        if (finished) {
            return -1;
        }

        while (uncompressedOffset >= uncompressedLength) {
            int compressedLength = bufferCompressedData();
            if (finished) {
                return -1;
            }

            decompress(compressedLength, uncompressedChunk, 0, uncompressedChunk.length);
        }
        return uncompressedChunk[uncompressedOffset++] & 0xFF;
    }

    @Override
    public int read(byte[] output, int offset, int length) throws IOException {
        if (finished) {
            return -1;
        }

        while (uncompressedOffset >= uncompressedLength) {
            int compressedLength = bufferCompressedData();
            if (finished) {
                return -1;
            }

            // favor writing directly to user buffer to avoid extra copy
            if (length >= uncompressedLength) {
                decompress(compressedLength, output, offset, length);
                uncompressedOffset = uncompressedLength;
                return uncompressedLength;
            }

            decompress(compressedLength, uncompressedChunk, 0, uncompressedChunk.length);
        }
        int size = Math.min(length, uncompressedLength - uncompressedOffset);
        System.arraycopy(uncompressedChunk, uncompressedOffset, output, offset, size);
        uncompressedOffset += size;
        return size;
    }

    @Override
    public void resetState() throws IOException {
        uncompressedLength = 0;
        uncompressedOffset = 0;
        finished = false;
    }

    private int bufferCompressedData() throws IOException {
        uncompressedOffset = 0;
        uncompressedLength = readBigEndianInt(in);
        if (uncompressedLength == -1) {
            // LZOP file MUST end with uncompressedLength == 0
            throw new EOFException("encountered EOF while reading block data");
        }
        if (uncompressedLength == 0) {
            finished = true;
            return -1;
        }

        int compressedLength = readBigEndianInt(in);
        if (compressedLength == -1) {
            throw new EOFException("encountered EOF while reading block data");
        }

        return compressedLength;
    }

    private void decompress(int compressedLength, byte[] output, int outputOffset, int outputLength)
            throws IOException {
        if (uncompressedLength == compressedLength) {
            readInput(output, outputOffset, compressedLength);
        } else {
            if (compressed.length < compressedLength) {
                // over allocate buffer which makes decompression easier
                compressed = new byte[compressedLength + SIZE_OF_LONG];
            }
            readInput(compressed, 0, compressedLength);
            int actualUncompressedLength = decompressor.decompress(compressed, 0, compressedLength, output,
                    outputOffset, outputLength);
            if (actualUncompressedLength != uncompressedLength) {
                throw new IOException("Decompressor did not decompress the entire block");
            }
        }
    }

    private void readInput(byte[] buffer, int offset, int length) throws IOException {
        while (length > 0) {
            int size = in.read(buffer, offset, length);
            if (size == -1) {
                throw new EOFException("encountered EOF while reading block data");
            }
            offset += size;
            length -= size;
        }
    }

    private static int readBigEndianShort(InputStream in) throws IOException {
        int b1 = in.read();
        if (b1 < 0) {
            return -1;
        }

        int b2 = in.read();
        // If second byte is negative, the stream it truncated
        if ((b2) < 0) {
            throw new IOException("Stream is truncated");
        }
        return (b1 << 8) + (b2);
    }

    private static int readBigEndianInt(InputStream in) throws IOException {
        int b1 = in.read();
        if (b1 < 0) {
            return -1;
        }
        int b2 = in.read();
        int b3 = in.read();
        int b4 = in.read();

        // If any of the other bits are negative, the stream it truncated
        if ((b2 | b3 | b4) < 0) {
            throw new IOException("Stream is truncated");
        }
        return ((b1 << 24) + (b2 << 16) + (b3 << 8) + (b4));
    }
}