com.twitter.common.net.UrlResolverUtil.java Source code

Java tutorial

Introduction

Here is the source code for com.twitter.common.net.UrlResolverUtil.java

Source

// =================================================================================================
// Copyright 2011 Twitter, Inc.
// -------------------------------------------------------------------------------------------------
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this work except in compliance with the License.
// You may obtain a copy of the License in the LICENSE file, or at:
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =================================================================================================

package com.twitter.common.net;

import com.google.common.base.Function;
import com.google.common.base.Functions;
import com.google.common.base.Preconditions;
import com.twitter.common.base.MorePreconditions;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.Proxy;
import java.net.Proxy.Type;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Map;
import java.util.logging.Logger;
import javax.annotation.Nullable;

/**
 * A utility that can resolve HTTP urls.
 *
 * @author John Sirois
 */
class UrlResolverUtil {

    private static final Logger LOG = Logger.getLogger(UrlResolverUtil.class.getName());

    // Default user-agent string to user for HTTP requests.
    private static final String DEFAULT_USER_AGENT = "Lynxy/6.6.6dev.8 libwww-FM/3.14159FM";

    private static Map<String, String> checkNotBlank(Map<String, String> hostToUserAgent) {
        Preconditions.checkNotNull(hostToUserAgent);
        MorePreconditions.checkNotBlank(hostToUserAgent.entrySet());
        return hostToUserAgent;
    }

    private final Function<? super URL, String> urlToUserAgent;

    UrlResolverUtil(Map<String, String> hostToUserAgent) {
        this(Functions.compose(Functions.forMap(checkNotBlank(hostToUserAgent), DEFAULT_USER_AGENT),
                new Function<URL, String>() {
                    @Override
                    public String apply(URL url) {
                        return url.getHost();
                    }
                }));
    }

    UrlResolverUtil(Function<? super URL, String> urlToUserAgent) {
        this.urlToUserAgent = Preconditions.checkNotNull(urlToUserAgent);
    }

    /**
     * Returns the URL that {@code url} lands on, which will be the result of a 3xx redirect,
     * or {@code url} if the url does not redirect using an HTTP 3xx response code.  If there is a
     * non-2xx or 3xx HTTP response code null is returned.
     *
     * @param url The URL to follow.
     * @return The redirected URL, or {@code url} if {@code url} returns a 2XX response, otherwise
     *         null
     * @throws java.io.IOException If an error occurs while trying to follow the url.
     */
    String getEffectiveUrl(String url, @Nullable ProxyConfig proxyConfig) throws IOException {
        Preconditions.checkNotNull(url);
        // Don't follow https.
        if (url.startsWith("https://")) {
            url = url.replace("https://", "http://");
        } else if (!url.startsWith("http://")) {
            url = "http://" + url;
        }

        URL urlObj = new URL(url);

        HttpURLConnection con;
        if (proxyConfig != null) {
            Proxy proxy = new Proxy(Type.HTTP, proxyConfig.getProxyAddress());
            con = (HttpURLConnection) urlObj.openConnection(proxy);
            ProxyAuthorizer.adapt(proxyConfig).authorize(con);
        } else {
            con = (HttpURLConnection) urlObj.openConnection();
        }
        try {

            // TODO(John Sirois): several commonly tweeted hosts 406 or 400 on HEADs and only work with GETs
            // fix the call chain to be able to specify retry-with-GET
            con.setRequestMethod("HEAD");

            con.setUseCaches(true);
            con.setConnectTimeout(5000);
            con.setReadTimeout(5000);
            con.setInstanceFollowRedirects(false);

            // I hate to have to do this, but some URL shorteners don't respond otherwise.
            con.setRequestProperty("User-Agent", urlToUserAgent.apply(urlObj));
            try {
                con.connect();
            } catch (StringIndexOutOfBoundsException e) {
                LOG.info("Got StringIndexOutOfBoundsException when fetching headers for " + url);
                return null;
            }

            int responseCode = con.getResponseCode();
            switch (responseCode / 100) {
            case 2:
                return url;
            case 3:
                String location = con.getHeaderField("Location");
                if (location == null) {
                    if (responseCode != 304 /* not modified */) {
                        LOG.info(String.format("[%d] Location header was null for URL: %s", responseCode, url));
                    }
                    return url;
                }

                // HTTP 1.1 spec says this should be an absolute URI, but i see lots of instances where it
                // is relative, so we need to check.
                try {
                    String domain = UrlHelper.getDomainChecked(location);
                    if (domain == null || domain.isEmpty()) {
                        // This is a relative URI.
                        location = "http://" + UrlHelper.getDomain(url) + location;
                    }
                } catch (URISyntaxException e) {
                    LOG.info("location contained an invalid URI: " + location);
                }

                return location;
            default:
                LOG.info("Failed to resolve url: " + url + " with: " + responseCode + " -> "
                        + con.getResponseMessage());
                return null;
            }
        } finally {
            con.disconnect();
        }
    }
}