cascading.tap.hadoop.HttpFileSystem.java Source code

Java tutorial

Introduction

Here is the source code for cascading.tap.hadoop.HttpFileSystem.java

Source

/*
 * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved.
 *
 * Project and contact information: http://www.cascading.org/
 *
 * This file is part of the Cascading project.
 *
 * Cascading is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Cascading is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Cascading.  If not, see <http://www.gnu.org/licenses/>.
 */

package cascading.tap.hadoop;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.log4j.Logger;

/**
 * Class HttpFileSystem provides a basic read-only {@link FileSystem} for accessing remote HTTP and HTTPS data.
 * <p/>
 * To use this FileSystem, just use regular http:// or https:// URLs.
 */
public class HttpFileSystem extends StreamedFileSystem {
    /** Field LOG */
    private static final Logger LOG = Logger.getLogger(HttpFileSystem.class);

    /** Field HTTP_SCHEME */
    public static final String HTTP_SCHEME = "http";
    /** Field HTTPS_SCHEME */
    public static final String HTTPS_SCHEME = "https";

    static {
        HttpURLConnection.setFollowRedirects(true);
    }

    /** Field scheme */
    private String scheme;
    /** Field authority */
    private String authority;

    @Override
    public void initialize(URI uri, Configuration configuration) throws IOException {
        setConf(configuration);

        scheme = uri.getScheme();
        authority = uri.getAuthority();
    }

    @Override
    public URI getUri() {
        try {
            return new URI(scheme, authority, null, null, null);
        } catch (URISyntaxException exception) {
            throw new RuntimeException("failed parsing uri", exception);
        }
    }

    @Override
    public FileStatus[] globStatus(Path path, PathFilter pathFilter) throws IOException {
        FileStatus fileStatus = getFileStatus(path);

        if (fileStatus == null)
            return null;

        return new FileStatus[] { fileStatus };
    }

    @Override
    public FSDataInputStream open(Path path, int i) throws IOException {
        URL url = makeUrl(path);

        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
        connection.setRequestMethod("GET");
        connection.connect();

        debugConnection(connection);

        return new FSDataInputStream(
                new FSDigestInputStream(connection.getInputStream(), getMD5SumFor(getConf(), path)));
    }

    @Override
    public boolean exists(Path path) throws IOException {
        URL url = makeUrl(path);

        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
        connection.setRequestMethod("HEAD");
        connection.connect();

        debugConnection(connection);

        return connection.getResponseCode() == 200;
    }

    @Override
    public FileStatus getFileStatus(Path path) throws IOException {
        URL url = makeUrl(path);

        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
        connection.setRequestMethod("HEAD");
        connection.connect();

        debugConnection(connection);

        if (connection.getResponseCode() != 200)
            throw new FileNotFoundException("could not find file: " + path);

        long length = connection.getHeaderFieldInt("Content-Length", 0);

        length = length < 0 ? 0 : length; // queries may return -1

        long modified = connection.getHeaderFieldDate("Last-Modified", System.currentTimeMillis());

        return new FileStatus(length, false, 1, getDefaultBlockSize(), modified, path);
    }

    private void debugConnection(HttpURLConnection connection) throws IOException {
        if (LOG.isDebugEnabled()) {
            LOG.debug("connection.getURL() = " + connection.getURL());
            LOG.debug("connection.getRequestMethod() = " + connection.getRequestMethod());
            LOG.debug("connection.getResponseCode() = " + connection.getResponseCode());
            LOG.debug("connection.getResponseMessage() = " + connection.getResponseMessage());
            LOG.debug("connection.getContentLength() = " + connection.getContentLength());
        }
    }

    private URL makeUrl(Path path) throws IOException {
        if (path.toString().startsWith(scheme))
            return URI.create(path.toString()).toURL();

        try {
            return new URI(scheme, authority, path.toString(), null, null).toURL();
        } catch (URISyntaxException exception) {
            throw new IOException(exception.getMessage());
        }
    }
}