edu.umn.cs.spatialHadoop.io.RandomCompressedInputStream.java Source code

Java tutorial

Introduction

Here is the source code for edu.umn.cs.spatialHadoop.io.RandomCompressedInputStream.java

Source

/***********************************************************************
* Copyright (c) 2015 by Regents of the University of Minnesota.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Apache License, Version 2.0 which 
* accompanies this distribution and is available at
* http://www.opensource.org/licenses/apache2.0.php.
*
*************************************************************************/
package edu.umn.cs.spatialHadoop.io;

import java.io.IOException;
import java.io.InputStream;
import java.util.zip.GZIPInputStream;

import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PositionedReadable;
import org.apache.hadoop.fs.Seekable;

/**
 * Provides random access to a file that was previously written using
 * {@link RandomCompressedOutputStream}. The underlying stream should be
 * seekable and the total size of it should be known because the lookup table
 * that helps doing the random access is stored at the very end.
 * 
 * @author Ahmed Eldawy
 *
 */
public class RandomCompressedInputStream extends InputStream implements Seekable, PositionedReadable {
    /**The underlying stream of compressed data*/
    private FSDataInputStream compressedIn;
    private InputStream decompressedIn;

    private long[] blockOffsetsInCompressedFile;
    private long[] blockOffsetsInRawFile;

    private long pos;
    private int currentBlock;

    public RandomCompressedInputStream(FileSystem fs, Path p) throws IOException {
        this(fs.open(p), fs.getFileStatus(p).getLen());
    }

    public RandomCompressedInputStream(FSDataInputStream in, long totalLength) throws IOException {
        this.compressedIn = new FSDataInputStream(in);
        // Read and cache the lookup table
        this.compressedIn.seek(totalLength - 4);
        int numberOfBlocks = this.compressedIn.readInt();
        this.blockOffsetsInCompressedFile = new long[numberOfBlocks + 1];
        this.blockOffsetsInRawFile = new long[numberOfBlocks + 1];
        this.compressedIn.seek(totalLength - 4 - numberOfBlocks * (8 + 8));
        for (int i = 1; i <= numberOfBlocks; i++) {
            blockOffsetsInCompressedFile[i] = this.compressedIn.readLong();
            blockOffsetsInRawFile[i] = this.compressedIn.readLong();
        }
        this.compressedIn.seek(0);
        this.decompressedIn = new GZIPInputStream(this.compressedIn);
    }

    @Override
    public long getPos() throws IOException {
        return pos;
    }

    private void gotoBlock(int blockIndex) throws IOException {
        compressedIn.seek(this.blockOffsetsInCompressedFile[blockIndex]);
        long blockSizeInCompressedFile = this.blockOffsetsInCompressedFile[blockIndex + 1]
                - this.blockOffsetsInCompressedFile[blockIndex];
        decompressedIn = new GZIPInputStream(new InputSubstream(this.compressedIn, blockSizeInCompressedFile));
        this.pos = this.blockOffsetsInRawFile[blockIndex];
        this.currentBlock = blockIndex;
    }

    @Override
    public void seek(long newPos) throws IOException {
        if (newPos >= getPos() && newPos < this.blockOffsetsInRawFile[currentBlock + 1]) {
            // Just skip bytes to newPos if within the same block
            this.skip(newPos - getPos());
            return;
        }
        int newBlock = findBlock(newPos);
        gotoBlock(newBlock);
        this.skip(newPos - getPos());
    }

    private boolean gotoBlockNewSource(int blockIndex) throws IOException {
        if (!compressedIn.seekToNewSource(this.blockOffsetsInCompressedFile[blockIndex]))
            return false;
        long remainingCompressedBytes = getCompressedLength() - this.blockOffsetsInCompressedFile[blockIndex];
        decompressedIn = new GZIPInputStream(new InputSubstream(this.compressedIn, remainingCompressedBytes));
        this.pos = this.blockOffsetsInRawFile[blockIndex];
        this.currentBlock = blockIndex;
        return true;
    }

    private long getCompressedLength() {
        return blockOffsetsInCompressedFile[blockOffsetsInCompressedFile.length - 1];
    }

    private long getDecompressedLength() {
        return blockOffsetsInRawFile[blockOffsetsInRawFile.length - 1];
    }

    @Override
    public boolean seekToNewSource(long newPos) throws IOException {
        int newBlock = findBlock(newPos);
        if (!gotoBlockNewSource(newBlock))
            return false;
        this.skip(newPos - this.blockOffsetsInRawFile[newBlock]);
        return true;
    }

    @Override
    public int read() throws IOException {
        if (pos >= getDecompressedLength())
            return -1;
        int b = this.decompressedIn.read();
        pos++;
        if (pos >= blockOffsetsInRawFile[currentBlock + 1]) {
            currentBlock++;
        }
        return b;
    }

    @Override
    public long skip(long n) throws IOException {
        long canSkip = Math.min(n, getDecompressedLength() - getPos());
        this.decompressedIn.skip(canSkip);
        pos += canSkip;
        return canSkip;
    }

    @Override
    public void close() throws IOException {
        this.compressedIn.close();
    }

    /**
     * Finds the block that contains the given position in the uncompressed
     * file.
     * @param newPos
     * @return
     */
    private int findBlock(long newPos) {
        int s = 0;
        int e = blockOffsetsInRawFile.length;
        while (s < e) {
            int m = (s + e) / 2;
            if (blockOffsetsInRawFile[m] < newPos) {
                s = m + 1;
            } else {
                e = m;
            }
        }
        return s - 1;
    }

    @Override
    public int read(long position, byte[] buffer, int offset, int length) throws IOException {
        long oldPos = getPos();
        seek(position);
        int x = read(buffer, offset, length);
        seek(oldPos);
        return x;
    }

    @Override
    public void readFully(long position, byte[] buffer, int offset, int length) throws IOException {
        long oldPos = getPos();
        seek(position);
        read(buffer, offset, length);
        seek(oldPos);
    }

    @Override
    public void readFully(long position, byte[] buffer) throws IOException {
        readFully(position, buffer, 0, buffer.length);
    }

    @Override
    public int available() throws IOException {
        return (int) Math.min(getDecompressedLength() - pos, this.compressedIn.available());
    }
}