Java tutorial
/* * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Cascading is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cascading is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cascading. If not, see <http://www.gnu.org/licenses/>. */ package cascading.tap.hadoop; import java.io.FileNotFoundException; import java.io.IOException; import java.net.HttpURLConnection; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.log4j.Logger; /** * Class HttpFileSystem provides a basic read-only {@link FileSystem} for accessing remote HTTP and HTTPS data. * <p/> * To use this FileSystem, just use regular http:// or https:// URLs. */ public class HttpFileSystem extends StreamedFileSystem { /** Field LOG */ private static final Logger LOG = Logger.getLogger(HttpFileSystem.class); /** Field HTTP_SCHEME */ public static final String HTTP_SCHEME = "http"; /** Field HTTPS_SCHEME */ public static final String HTTPS_SCHEME = "https"; static { HttpURLConnection.setFollowRedirects(true); } /** Field scheme */ private String scheme; /** Field authority */ private String authority; @Override public void initialize(URI uri, Configuration configuration) throws IOException { setConf(configuration); scheme = uri.getScheme(); authority = uri.getAuthority(); } @Override public URI getUri() { try { return new URI(scheme, authority, null, null, null); } catch (URISyntaxException exception) { throw new RuntimeException("failed parsing uri", exception); } } @Override public FileStatus[] globStatus(Path path, PathFilter pathFilter) throws IOException { FileStatus fileStatus = getFileStatus(path); if (fileStatus == null) return null; return new FileStatus[] { fileStatus }; } @Override public FSDataInputStream open(Path path, int i) throws IOException { URL url = makeUrl(path); HttpURLConnection connection = (HttpURLConnection) url.openConnection(); connection.setRequestMethod("GET"); connection.connect(); debugConnection(connection); return new FSDataInputStream( new FSDigestInputStream(connection.getInputStream(), getMD5SumFor(getConf(), path))); } @Override public boolean exists(Path path) throws IOException { URL url = makeUrl(path); HttpURLConnection connection = (HttpURLConnection) url.openConnection(); connection.setRequestMethod("HEAD"); connection.connect(); debugConnection(connection); return connection.getResponseCode() == 200; } @Override public FileStatus getFileStatus(Path path) throws IOException { URL url = makeUrl(path); HttpURLConnection connection = (HttpURLConnection) url.openConnection(); connection.setRequestMethod("HEAD"); connection.connect(); debugConnection(connection); if (connection.getResponseCode() != 200) throw new FileNotFoundException("could not find file: " + path); long length = connection.getHeaderFieldInt("Content-Length", 0); length = length < 0 ? 0 : length; // queries may return -1 long modified = connection.getHeaderFieldDate("Last-Modified", System.currentTimeMillis()); return new FileStatus(length, false, 1, getDefaultBlockSize(), modified, path); } private void debugConnection(HttpURLConnection connection) throws IOException { if (LOG.isDebugEnabled()) { LOG.debug("connection.getURL() = " + connection.getURL()); LOG.debug("connection.getRequestMethod() = " + connection.getRequestMethod()); LOG.debug("connection.getResponseCode() = " + connection.getResponseCode()); LOG.debug("connection.getResponseMessage() = " + connection.getResponseMessage()); LOG.debug("connection.getContentLength() = " + connection.getContentLength()); } } private URL makeUrl(Path path) throws IOException { if (path.toString().startsWith(scheme)) return URI.create(path.toString()).toURL(); try { return new URI(scheme, authority, path.toString(), null, null).toURL(); } catch (URISyntaxException exception) { throw new IOException(exception.getMessage()); } } }