com.ibm.stocator.fs.cos.COSInputStream.java Source code

Java tutorial

Introduction

Here is the source code for com.ibm.stocator.fs.cos.COSInputStream.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.ibm.stocator.fs.cos;

import java.io.EOFException;
import java.io.IOException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.amazonaws.AmazonClientException;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.S3ObjectInputStream;
import com.google.common.base.Preconditions;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.CanSetReadahead;
import org.apache.hadoop.fs.FSExceptionMessages;
import org.apache.hadoop.fs.FSInputStream;

public class COSInputStream extends FSInputStream implements CanSetReadahead {
    /**
     * This is the public position; the one set in {@link #seek(long)}
     * and returned in {@link #getPos()}.
     */
    private long pos;
    /**
     * Closed bit. Volatile so reads are non-blocking.
     * Updates must be in a synchronized block to guarantee an atomic check and
     * set
     */
    private volatile boolean closed;
    private S3ObjectInputStream wrappedStream;
    private final AmazonS3 client;
    private final String bucket;
    private final String key;
    private final long contentLength;
    private final String uri;
    private static final Logger LOG = LoggerFactory.getLogger(COSInputStream.class);
    private final COSInputPolicy inputPolicy;
    private long readahead = COSConstants.DEFAULT_READAHEAD_RANGE;

    /**
     * This is the actual position within the object, used by
     * lazy seek to decide whether to seek on the next read or not.
     */
    private long nextReadPos;

    /**
     * The end of the content range of the last request.
     * This is an absolute value of the range, not a length field.
     */
    private long contentRangeFinish;

    /**
     * The start of the content range of the last request.
     */
    private long contentRangeStart;

    public COSInputStream(String bucketT, String keyT, long contentLengthT, AmazonS3 clientT, long readahead,
            COSInputPolicy inputPolicyT) {
        bucket = bucketT;
        key = keyT;
        contentLength = contentLengthT;
        client = clientT;
        uri = bucket + "/" + key;
        inputPolicy = inputPolicyT;
        setReadahead(readahead);
    }

    /**
     * Opens up the stream at specified target position and for given length.
     *
     * @param reason reason for reopen
     * @param targetPos target position
     * @param length length requested
     * @throws IOException on any failure to open the object
     */
    private synchronized void reopen(String reason, long targetPos, long length) throws IOException {

        if (wrappedStream != null) {
            closeStream("reopen(" + reason + ")", contentRangeFinish, false);
        }

        contentRangeFinish = calculateRequestLimit(inputPolicy, targetPos, length, contentLength, readahead);
        LOG.debug("reopen({}) for {} range[{}-{}], length={}," + " streamPosition={}, nextReadPosition={}", uri,
                reason, targetPos, contentRangeFinish, length, pos, nextReadPos);

        try {
            GetObjectRequest request = new GetObjectRequest(bucket, key).withRange(targetPos,
                    contentRangeFinish - 1);
            wrappedStream = client.getObject(request).getObjectContent();
            contentRangeStart = targetPos;
            if (wrappedStream == null) {
                throw new IOException("Null IO stream from reopen of (" + reason + ") " + uri);
            }
        } catch (AmazonClientException e) {
            throw COSUtils.translateException("Reopen at position " + targetPos, uri, e);
        }

        pos = targetPos;
    }

    @Override
    public synchronized long getPos() throws IOException {
        return (nextReadPos < 0) ? 0 : nextReadPos;
    }

    @Override
    public synchronized void seek(long targetPos) throws IOException {
        checkNotClosed();

        // Do not allow negative seek
        if (targetPos < 0) {
            throw new EOFException(FSExceptionMessages.NEGATIVE_SEEK + " " + targetPos);
        }

        if (contentLength <= 0) {
            return;
        }

        // Lazy seek
        nextReadPos = targetPos;
    }

    /**
     * Seek without raising any exception. This is for use in
     * {@code finally} clauses
     * @param positiveTargetPos a target position which must be positive
     */
    private void seekQuietly(long positiveTargetPos) {
        try {
            seek(positiveTargetPos);
        } catch (IOException ioe) {
            LOG.debug("Ignoring IOE on seek of {} to {}", uri, positiveTargetPos, ioe);
        }
    }

    /**
     * Adjust the stream to a specific position.
     *
     * @param targetPos target seek position
     * @param length length of content that needs to be read from targetPos
     * @throws IOException
     */
    private void seekInStream(long targetPos, long length) throws IOException {
        checkNotClosed();
        if (wrappedStream == null) {
            return;
        }
        // compute how much more to skip
        long diff = targetPos - pos;
        if (diff > 0) {
            // forward seek -this is where data can be skipped

            int available = wrappedStream.available();
            // always seek at least as far as what is available
            long forwardSeekRange = Math.max(readahead, available);
            // work out how much is actually left in the stream
            // then choose whichever comes first: the range or the EOF
            long remainingInCurrentRequest = remainingInCurrentRequest();

            long forwardSeekLimit = Math.min(remainingInCurrentRequest, forwardSeekRange);
            boolean skipForward = remainingInCurrentRequest > 0 && diff <= forwardSeekLimit;
            if (skipForward) {
                // the forward seek range is within the limits
                LOG.debug("Forward seek on {}, of {} bytes", uri, diff);
                long skipped = wrappedStream.skip(diff);
                if (skipped > 0) {
                    pos += skipped;
                    // as these bytes have been read, they are included in the counter
                }

                if (pos == targetPos) {
                    // all is well
                    return;
                } else {
                    // log a warning; continue to attempt to re-open
                    LOG.warn("Failed to seek on {} to {}. Current position {}", uri, targetPos, pos);
                }
            }
        } else if (diff < 0) {
            // backwards seek
        } else {
            // targetPos == pos
            if (remainingInCurrentRequest() > 0) {
                // if there is data left in the stream, keep going
                return;
            }

        }

        // if the code reaches here, the stream needs to be reopened.
        // close the stream; if read the object will be opened at the new pos
        closeStream("seekInStream()", contentRangeFinish, false);
        pos = targetPos;
    }

    @Override
    public boolean seekToNewSource(long targetPos) throws IOException {
        return false;
    }

    /**
     * Perform lazy seek and adjust stream to correct position for reading.
     *
     * @param targetPos position from where data should be read
     * @param len length of the content that needs to be read
     */
    private void lazySeek(long targetPos, long len) throws IOException {
        //For lazy seek
        seekInStream(targetPos, len);

        //re-open at specific location if needed
        if (wrappedStream == null) {
            reopen("read from new offset", targetPos, len);
        }
    }

    @Override
    public synchronized int read() throws IOException {
        checkNotClosed();
        if (contentLength == 0 || (nextReadPos >= contentLength)) {
            return -1;
        }

        int byteRead;
        try {
            lazySeek(nextReadPos, 1);
            byteRead = wrappedStream.read();
        } catch (EOFException e) {
            return -1;
        } catch (IOException e) {
            onReadFailure(e, 1);
            byteRead = wrappedStream.read();
        }

        if (byteRead >= 0) {
            pos++;
            nextReadPos++;
        }

        return byteRead;
    }

    /**
     * {@inheritDoc}
     *
     * This updates the statistics on read operations started and whether
     * or not the read operation "completed", that is: returned the exact
     * number of bytes requested.
     * @throws IOException if there are other problems
     */
    @Override
    public synchronized int read(byte[] buf, int off, int len) throws IOException {
        checkNotClosed();

        validatePositionedReadArgs(nextReadPos, buf, off, len);
        if (len == 0) {
            return 0;
        }

        if (contentLength == 0 || (nextReadPos >= contentLength)) {
            return -1;
        }

        try {
            lazySeek(nextReadPos, len);
        } catch (EOFException e) {
            // the end of the file has moved
            return -1;
        }

        int bytesRead;
        try {
            bytesRead = wrappedStream.read(buf, off, len);
        } catch (EOFException e) {
            onReadFailure(e, len);
            // the base implementation swallows EOFs.
            return -1;
        } catch (IOException e) {
            onReadFailure(e, len);
            bytesRead = wrappedStream.read(buf, off, len);
        }

        if (bytesRead > 0) {
            pos += bytesRead;
            nextReadPos += bytesRead;
        }
        return bytesRead;
    }

    /**
     * Handle an IOE on a read by attempting to re-open the stream.
     * The filesystem's readException count will be incremented.
     * @param ioe exception caught
     * @param length length of data being attempted to read
     * @throws IOException any exception thrown on the re-open attempt
     */
    private void onReadFailure(IOException ioe, int length) throws IOException {
        LOG.info("Got exception while trying to read from stream {}" + " trying to recover: " + ioe, uri);
        LOG.debug("While trying to read from stream {}", uri, ioe);
        reopen("failure recovery", pos, length);
    }

    /**
     * Verify that the input stream is open. Non blocking; this gives
     * the last state of the volatile {@link #closed} field.
     * @throws IOException if the connection is closed
     */
    private void checkNotClosed() throws IOException {
        if (closed) {
            throw new IOException(uri + ": " + FSExceptionMessages.STREAM_IS_CLOSED);
        }
    }

    /**
     * Close the stream.
     * This triggers publishing of the stream statistics back to the filesystem
     * statistics.
     * This operation is synchronized, so that only one thread can attempt to
     * close the connection; all later/blocked calls are no-ops.
     * @throws IOException on any problem
     */
    @Override
    public synchronized void close() throws IOException {
        if (!closed) {
            closed = true;
            try {
                // close or abort the stream
                closeStream("close() operation", contentRangeFinish, false);
                // this is actually a no-op
                super.close();
            } finally {
                LOG.trace("final of close for {}", uri);
            }
        }
    }

    /**
     * Close a stream: decide whether to abort or close, based on
     * the length of the stream and the current position.
     * If a close() is attempted and fails, the operation escalates to
     * an abort.
     *
     * This does not set the {@link #closed} flag.
     * @param reason reason for stream being closed; used in messages
     * @param length length of the stream
     * @param forceAbort force an abort; used if explicitly requested
     */
    private void closeStream(String reason, long length, boolean forceAbort) {
        if (wrappedStream != null) {

            // if the amount of data remaining in the current request is greater
            // than the readahead value: abort.
            long remaining = remainingInCurrentRequest();
            LOG.debug("Closing stream {}: {}", reason, forceAbort ? "abort" : "soft");
            boolean shouldAbort = forceAbort || remaining > readahead;
            if (!shouldAbort) {
                try {
                    // clean close. This will read to the end of the stream,
                    // so, while cleaner, can be pathological on a multi-GB object

                    // explicitly drain the stream
                    long drained = 0;
                    while (wrappedStream.read() >= 0) {
                        drained++;
                    }
                    LOG.debug("Drained stream of {} bytes", drained);

                    // now close it
                    wrappedStream.close();
                    // this MUST come after the close, so that if the IO operations fail
                    // and an abort is triggered, the initial attempt's statistics
                    // aren't collected.
                } catch (IOException e) {
                    // exception escalates to an abort
                    LOG.debug("When closing {} stream for {}", uri, reason, e);
                    shouldAbort = true;
                }
            }
            if (shouldAbort) {
                // Abort, rather than just close, the underlying stream.  Otherwise, the
                // remaining object payload is read from S3 while closing the stream.
                LOG.debug("Aborting stream");
                wrappedStream.abort();
            }
            LOG.debug(
                    "Stream {} {}: {}; remaining={} streamPos={}," + " nextReadPos={},"
                            + " request range {}-{} length={}",
                    uri, (shouldAbort ? "aborted" : "closed"), reason, remaining, pos, nextReadPos,
                    contentRangeStart, contentRangeFinish, length);
            wrappedStream = null;
        }
    }

    /**
     * Forcibly reset the stream, by aborting the connection. The next
     * {@code read()} operation will trigger the opening of a new HTTPS
     * connection.
     *
     * This is potentially very inefficient, and should only be invoked
     * in extreme circumstances. It logs at info for this reason.
     * @return true if the connection was actually reset
     * @throws IOException if invoked on a closed stream
     */
    @InterfaceStability.Unstable
    public synchronized boolean resetConnection() throws IOException {
        checkNotClosed();
        boolean connectionOpen = wrappedStream != null;
        if (connectionOpen) {
            LOG.info("Forced reset of connection to {}", uri);
            closeStream("reset()", contentRangeFinish, true);
        }
        return connectionOpen;
    }

    @Override
    public synchronized int available() throws IOException {
        checkNotClosed();

        long remaining = remainingInFile();
        if (remaining > Integer.MAX_VALUE) {
            return Integer.MAX_VALUE;
        }
        return (int) remaining;
    }

    /**
     * Bytes left in stream.
     * @return how many bytes are left to read
     */
    @InterfaceAudience.Private
    @InterfaceStability.Unstable
    public synchronized long remainingInFile() {
        return contentLength - pos;
    }

    /**
     * Bytes left in the current request.
     * Only valid if there is an active request.
     * @return how many bytes are left to read in the current GET
     */
    @InterfaceAudience.Private
    @InterfaceStability.Unstable
    public synchronized long remainingInCurrentRequest() {
        return contentRangeFinish - pos;
    }

    @InterfaceAudience.Private
    @InterfaceStability.Unstable
    public synchronized long getContentRangeFinish() {
        return contentRangeFinish;
    }

    @InterfaceAudience.Private
    @InterfaceStability.Unstable
    public synchronized long getContentRangeStart() {
        return contentRangeStart;
    }

    @Override
    public boolean markSupported() {
        return false;
    }

    /**
     * String value includes statistics as well as stream state.
     * <b>Important: there are no guarantees as to the stability
     * of this value.</b>
     * @return a string value for printing in logs/diagnostics
     */
    @Override
    @InterfaceStability.Unstable
    public String toString() {
        synchronized (this) {
            final StringBuilder sb = new StringBuilder("COSInputStream{");
            sb.append(uri);
            sb.append(" wrappedStream=").append(wrappedStream != null ? "open" : "closed");
            sb.append(" read policy=").append(inputPolicy);
            sb.append(" pos=").append(pos);
            sb.append(" nextReadPos=").append(nextReadPos);
            sb.append(" contentLength=").append(contentLength);
            sb.append(" contentRangeStart=").append(contentRangeStart);
            sb.append(" contentRangeFinish=").append(contentRangeFinish);
            sb.append(" remainingInCurrentRequest=").append(remainingInCurrentRequest());
            sb.append('\n');
            sb.append('}');
            return sb.toString();
        }
    }

    /**
     * Subclass {@code readFully()} operation which only seeks at the start
     * of the series of operations; seeking back at the end.
     *
     * This is significantly higher performance if multiple read attempts are
     * needed to fetch the data, as it does not break the HTTP connection.
     *
     * To maintain thread safety requirements, this operation is synchronized
     * for the duration of the sequence.
     * {@inheritDoc}
     *
     */
    @Override
    public void readFully(long position, byte[] buffer, int offset, int length) throws IOException {
        checkNotClosed();
        validatePositionedReadArgs(position, buffer, offset, length);
        if (length == 0) {
            return;
        }
        int nread = 0;
        synchronized (this) {
            long oldPos = getPos();
            try {
                seek(position);
                while (nread < length) {
                    int nbytes = read(buffer, offset + nread, length - nread);
                    if (nbytes < 0) {
                        throw new EOFException("EOF_IN_READ_FULLY");
                    }
                    nread += nbytes;
                }
            } finally {
                seekQuietly(oldPos);
            }
        }
    }

    @Override
    public synchronized void setReadahead(Long readaheadT) {
        if (readaheadT == null) {
            readahead = COSConstants.DEFAULT_READAHEAD_RANGE;
        } else {
            Preconditions.checkArgument(readahead >= 0, "Negative readahead value");
            readahead = readaheadT;
        }
    }

    /**
     * Get the current readahead value.
     * @return a non-negative readahead value
     */
    public synchronized long getReadahead() {
        return readahead;
    }

    /**
     * Calculate the limit for a get request, based on input policy
     * and state of object.
     * @param inputPolicy input policy
     * @param targetPos position of the read
     * @param length length of bytes requested; if less than zero "unknown"
     * @param contentLength total length of file
     * @param readahead current readahead value
     * @return the absolute value of the limit of the request
     */
    static long calculateRequestLimit(COSInputPolicy inputPolicy, long targetPos, long length, long contentLength,
            long readahead) {
        long rangeLimit;
        switch (inputPolicy) {
        case Random:
            // positioned.
            // read either this block, or the here + readahead value.
            rangeLimit = (length < 0) ? contentLength : targetPos + Math.max(readahead, length);
            break;

        case Sequential:
            // sequential: plan for reading the entire object.
            rangeLimit = contentLength;
            break;

        case Normal:
        default:
            rangeLimit = contentLength;
        }
        // cannot read past the end of the object
        rangeLimit = Math.min(contentLength, rangeLimit);
        return rangeLimit;
    }

    protected void validatePositionedReadArgs(long position, byte[] buffer, int offset, int length)
            throws EOFException {
        Preconditions.checkArgument(length >= 0, "length is negative");
        if (position < 0) {
            throw new EOFException("position is negative");
        }
        Preconditions.checkArgument(buffer != null, "Null buffer");
        if (buffer.length - offset < length) {
            throw new IndexOutOfBoundsException("TOO_MANY_BYTES_FOR_DEST_BUFFER" + ": request length=" + length
                    + ", with offset =" + offset + "; buffer capacity =" + (buffer.length - offset));
        }
    }

}