alluxio.underfs.s3a.S3AInputStream.java Source code

Java tutorial

Introduction

Here is the source code for alluxio.underfs.s3a.S3AInputStream.java

Source

/*
 * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
 * (the "License"). You may not use this work except in compliance with the License, which is
 * available at www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied, as more fully set forth in the License.
 *
 * See the NOTICE file distributed with this work for information regarding copyright ownership.
 */

package alluxio.underfs.s3a;

import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.S3ObjectInputStream;

import java.io.IOException;
import java.io.InputStream;
import javax.annotation.concurrent.NotThreadSafe;

/**
 * A wrapper around an {@link S3ObjectInputStream} which handles skips efficiently.
 */
@NotThreadSafe
public class S3AInputStream extends InputStream {
    /** Client for operations with s3. */
    private final AmazonS3 mClient;
    /** Name of the bucket the object resides in. */
    private final String mBucketName;
    /** The path of the object to read. */
    private final String mKey;

    /** The backing input stream from s3. */
    private S3ObjectInputStream mIn;
    /** The current position of the stream. */
    private long mPos;

    /**
     * Constructor for an input stream of an object in s3 using the aws-sdk implementation to read
     * the data. The stream will be positioned at the start of the file.
     *
     * @param bucketName the bucket the object resides in
     * @param key the path of the object to read
     * @param client the s3 client to use for operations
     */
    public S3AInputStream(String bucketName, String key, AmazonS3 client) {
        this(bucketName, key, client, 0L);
    }

    /**
     * Constructor for an input stream of an object in s3 using the aws-sdk implementation to read
     * the data. The stream will be positioned at the specified position.
     *
     * @param bucketName the bucket the object resides in
     * @param key the path of the object to read
     * @param client the s3 client to use for operations
     * @param position the position to begin reading from
     */
    public S3AInputStream(String bucketName, String key, AmazonS3 client, long position) {
        mBucketName = bucketName;
        mKey = key;
        mClient = client;
        mPos = position;
    }

    @Override
    public void close() {
        closeStream();
    }

    @Override
    public int read() throws IOException {
        if (mIn == null) {
            openStream();
        }
        int value = mIn.read();
        if (value != -1) { // valid data read
            mPos++;
        }
        return value;
    }

    @Override
    public int read(byte[] b) throws IOException {
        return read(b, 0, b.length);
    }

    @Override
    public int read(byte[] b, int offset, int length) throws IOException {
        if (length == 0) {
            return 0;
        }
        if (mIn == null) {
            openStream();
        }
        int read = mIn.read(b, offset, length);
        if (read != -1) {
            mPos += read;
        }
        return read;
    }

    @Override
    public long skip(long n) {
        if (n <= 0) {
            return 0;
        }
        closeStream();
        mPos += n;
        openStream();
        return n;
    }

    /**
     * Opens a new stream at mPos if the wrapped stream mIn is null.
     */
    private void openStream() {
        if (mIn != null) { // stream is already open
            return;
        }
        GetObjectRequest getReq = new GetObjectRequest(mBucketName, mKey);
        getReq.setRange(mPos);
        mIn = mClient.getObject(getReq).getObjectContent();
    }

    /**
     * Closes the current stream.
     */
    // TODO(calvin): Investigate if close instead of abort will bring performance benefits.
    private void closeStream() {
        if (mIn == null) {
            return;
        }
        mIn.abort();
        mIn = null;
    }
}