org.apache.hadoop.hdfs.web.ByteRangeInputStream.java Source code

Introduction

Here is the source code for org.apache.hadoop.hdfs.web.ByteRangeInputStream.java
Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hdfs.web;

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;

import org.apache.commons.io.input.BoundedInputStream;
import org.apache.hadoop.fs.FSInputStream;
import org.apache.http.HttpStatus;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.net.HttpHeaders;

/**
 * To support HTTP byte streams, a new connection to an HTTP server needs to be
 * created each time. This class hides the complexity of those multiple
 * connections from the client. Whenever seek() is called, a new connection
 * is made on the successive read(). The normal input stream functions are
 * connected to the currently active input stream.
 */
public abstract class ByteRangeInputStream extends FSInputStream {

    /**
     * This class wraps a URL and provides method to open connection.
     * It can be overridden to change how a connection is opened.
     */
    public static abstract class URLOpener {
        protected URL url;

        public URLOpener(URL u) {
            url = u;
        }

        public void setURL(URL u) {
            url = u;
        }

        public URL getURL() {
            return url;
        }

        /**
         * Connect to server with a data offset.
         */
        protected abstract HttpURLConnection connect(final long offset, final boolean resolved) throws IOException;
    }

    enum StreamStatus {
        NORMAL, SEEK, CLOSED
    }

    protected InputStream in;
    protected URLOpener originalURL;
    protected URLOpener resolvedURL;
    protected long startPos = 0;
    protected long currentPos = 0;
    protected Long fileLength = null;

    StreamStatus status = StreamStatus.SEEK;

    /**
     * Create with the specified URLOpeners. Original url is used to open the
     * stream for the first time. Resolved url is used in subsequent requests.
     *
     * @param o
     *     Original url
     * @param r
     *     Resolved url
     */
    public ByteRangeInputStream(URLOpener o, URLOpener r) throws IOException {
        this.originalURL = o;
        this.resolvedURL = r;
        getInputStream();
    }

    protected abstract URL getResolvedUrl(final HttpURLConnection connection) throws IOException;

    @VisibleForTesting
    protected InputStream getInputStream() throws IOException {
        switch (status) {
        case NORMAL:
            break;
        case SEEK:
            if (in != null) {
                in.close();
            }
            in = openInputStream();
            status = StreamStatus.NORMAL;
            break;
        case CLOSED:
            throw new IOException("Stream closed");
        }
        return in;
    }

    @VisibleForTesting
    protected InputStream openInputStream() throws IOException {
        // Use the original url if no resolved url exists, eg. if
        // it's the first time a request is made.
        final boolean resolved = resolvedURL.getURL() != null;
        final URLOpener opener = resolved ? resolvedURL : originalURL;

        final HttpURLConnection connection = opener.connect(startPos, resolved);
        resolvedURL.setURL(getResolvedUrl(connection));

        InputStream in = connection.getInputStream();
        final Map<String, List<String>> headers = connection.getHeaderFields();
        if (isChunkedTransferEncoding(headers)) {
            // file length is not known
            fileLength = null;
        } else {
            // for non-chunked transfer-encoding, get content-length
            long streamlength = getStreamLength(connection, headers);
            fileLength = startPos + streamlength;

            // Java has a bug with >2GB request streams.  It won't bounds check
            // the reads so the transfer blocks until the server times out
            in = new BoundedInputStream(in, streamlength);
        }

        return in;
    }

    private static long getStreamLength(HttpURLConnection connection, Map<String, List<String>> headers)
            throws IOException {
        String cl = connection.getHeaderField(HttpHeaders.CONTENT_LENGTH);
        if (cl == null) {
            // Try to get the content length by parsing the content range
            // because HftpFileSystem does not return the content length
            // if the content is partial.
            if (connection.getResponseCode() == HttpStatus.SC_PARTIAL_CONTENT) {
                cl = connection.getHeaderField(HttpHeaders.CONTENT_RANGE);
                return getLengthFromRange(cl);
            } else {
                throw new IOException(HttpHeaders.CONTENT_LENGTH + " is missing: " + headers);
            }
        }
        return Long.parseLong(cl);
    }

    private static long getLengthFromRange(String cl) throws IOException {
        try {

            String[] str = cl.substring(6).split("[-/]");
            return Long.parseLong(str[1]) - Long.parseLong(str[0]) + 1;
        } catch (Exception e) {
            throw new IOException(
                    "failed to get content length by parsing the content range: " + cl + " " + e.getMessage());
        }
    }

    private static boolean isChunkedTransferEncoding(final Map<String, List<String>> headers) {
        return contains(headers, HttpHeaders.TRANSFER_ENCODING, "chunked")
                || contains(headers, HttpHeaders.TE, "chunked");
    }

    /**
     * Does the HTTP header map contain the given key, value pair?
     */
    private static boolean contains(final Map<String, List<String>> headers, final String key, final String value) {
        final List<String> values = headers.get(key);
        if (values != null) {
            for (String v : values) {
                for (final StringTokenizer t = new StringTokenizer(v, ","); t.hasMoreTokens();) {
                    if (value.equalsIgnoreCase(t.nextToken())) {
                        return true;
                    }
                }
            }
        }
        return false;
    }

    private int update(final int n) throws IOException {
        if (n != -1) {
            currentPos += n;
        } else if (fileLength != null && currentPos < fileLength) {
            throw new IOException("Got EOF but currentPos = " + currentPos + " < filelength = " + fileLength);
        }
        return n;
    }

    @Override
    public int read() throws IOException {
        final int b = getInputStream().read();
        update((b == -1) ? -1 : 1);
        return b;
    }

    @Override
    public int read(byte b[], int off, int len) throws IOException {
        return update(getInputStream().read(b, off, len));
    }

    /**
     * Seek to the given offset from the start of the file.
     * The next read() will be from that location.  Can't
     * seek past the end of the file.
     */
    @Override
    public void seek(long pos) throws IOException {
        if (pos != currentPos) {
            startPos = pos;
            currentPos = pos;
            if (status != StreamStatus.CLOSED) {
                status = StreamStatus.SEEK;
            }
        }
    }

    /**
     * Return the current offset from the start of the file
     */
    @Override
    public long getPos() throws IOException {
        return currentPos;
    }

    /**
     * Seeks a different copy of the data.  Returns true if
     * found a new source, false otherwise.
     */
    @Override
    public boolean seekToNewSource(long targetPos) throws IOException {
        return false;
    }

    @Override
    public void close() throws IOException {
        if (in != null) {
            in.close();
            in = null;
        }
        status = StreamStatus.CLOSED;
    }
}