org.deri.pipes.utils.HttpResponseCache.java Source code

Java tutorial

Introduction

Here is the source code for org.deri.pipes.utils.HttpResponseCache.java

Source

/*
 * Copyright (c) 2008-2009,
 * 
 * Digital Enterprise Research Institute, National University of Ireland, 
 * Galway, Ireland
 * http://www.deri.org/
 * http://pipes.deri.org/
 *
 * Semantic Web Pipes is distributed under New BSD License.
 * 
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without 
 * modification, are permitted provided that the following conditions are met:
 * 
 *  * Redistributions of source code must retain the above copyright notice, 
 *    this list of conditions and the following disclaimer.
 *  * Redistributions in binary form must reproduce the above copyright 
 *    notice, this list of conditions and the following disclaimer in the 
 *    documentation and/or other materials provided with the distribution and 
 *    reference to the source code.
 *  * The name of Digital Enterprise Research Institute, 
 *    National University of Ireland, Galway, Ireland; 
 *    may not be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
 * POSSIBILITY OF SUCH DAMAGE.
 */

package org.deri.pipes.utils;

import java.io.IOException;
import java.math.BigInteger;
import java.net.URLDecoder;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.HttpMethodBase;
import org.apache.commons.httpclient.methods.EntityEnclosingMethod;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.HeadMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.httpclient.util.DateUtil;
import org.apache.jcs.JCS;
import org.apache.log4j.Logger;

/**
 * Utility class for retrieving HTTP response from cache. The cache
 * is used by pipes for minimising http traffic. The result of each
 * HTTP GET request is stored in the cache for a minimum of
 * MINIMUM_CACHE_TIME_MILLIS, during which time the response for any other same request
 * will be retrieved from the cache. After MINIMUM_CACHE_TIME_MILLIS a HTTP HEAD
 * request is used to check the Last-Modified header to see if the content changed,
 * if not the cache is updated for another MINIMUM_CACHE_TIME_MILLIS. If the content
 * has changed, it is fetched with HTTP GET and placed into the cache.
 * 
 * TODO: use streams rather than storing the full response in memory.
 * @author robful
 *
 */
public class HttpResponseCache {
    /**
     * 
     */
    private static final String EXPIRES_HEADER = "Expires";
    /**
     * 
     */
    private static final String HEADER_USER_AGENT = "User-Agent";
    /**
     * 
     */
    private static final String HEADER_LAST_MODIFIED = "Last-Modified";
    private static final String HEADER_CONTENT_TYPE = "Content-Type";
    /**
     * Minimum time in milliseconds to cache http response.
     * If zero or less, responses will not be cached.
     */
    public static long MINIMUM_CACHE_TIME_MILLIS = 300000;//5 minutes
    /**
     * Maximum size of content retrieved.
     */
    public static int MAX_CONTENT_SIZE = 5000000;//about 5mb max for now
    static Logger logger = Logger.getLogger(HttpResponseCache.class);

    public static HttpResponseData getResponseData(HttpClient client, String location) throws Exception {
        return getResponseData(client, location, (Map) null);
    }

    /**
     * @param client
     * @param location
     * @param location2
     * @return
     */
    public static HttpResponseData getResponseData(HttpClient client, String location,
            Map<String, String> requestHeaders) throws Exception {
        synchronized (client) {
            if (MINIMUM_CACHE_TIME_MILLIS <= 0) {
                logger.debug("caching disabled.");
                return getDataFromRequest(client, location, requestHeaders);
            }
            String cacheKey = makeCacheKey(location, requestHeaders);
            if (requestHeaders == null) {
                requestHeaders = new HashMap<String, String>();
            }
            if (requestHeaders.get(HEADER_USER_AGENT) == null) {
                requestHeaders.put(HEADER_USER_AGENT, getDefaultUserAgent());
            }
            JCS jcs = null;
            try {
                jcs = JCS.getInstance("httpResponseCache");
            } catch (Exception e) {
                logger.warn("Problem getting JCS cache" + e, e);
            }
            if (jcs != null) {
                try {
                    HttpResponseData data = (HttpResponseData) jcs.get(cacheKey);
                    if (data != null) {
                        if (data.getExpires() > System.currentTimeMillis()) {
                            logger.info("Retrieved from cache (not timed out):" + location);
                            return data;
                        }
                        if (location.length() < 2000) {
                            HeadMethod headMethod = new HeadMethod(location);
                            headMethod.setFollowRedirects(true);
                            addRequestHeaders(headMethod, requestHeaders);

                            try {
                                int response = client.executeMethod(headMethod);
                                Header lastModifiedHeader = headMethod.getResponseHeader(HEADER_LAST_MODIFIED);
                                if (response == data.getResponse()) {
                                    if (lastModifiedHeader == null) {
                                        logger.debug("Not using cache (No last modified header available) for "
                                                + location);
                                    } else if (lastModifiedHeader != null
                                            && data.getLastModified().equals(lastModifiedHeader.getValue())) {
                                        setExpires(data, headMethod);
                                        jcs.put(cacheKey, data);
                                        logger.info("Retrieved from cache (used HTTP HEAD request to check "
                                                + HEADER_LAST_MODIFIED + ") :" + location);
                                        return data;
                                    } else {
                                        logger.debug("Not using cache (last modified changed) for " + location);
                                    }
                                }
                            } finally {
                                headMethod.releaseConnection();
                            }
                        }

                    }
                } catch (Exception e) {
                    logger.warn("Problem retrieving from cache for " + location, e);
                }
            }
            HttpResponseData data = getDataFromRequest(client, location, requestHeaders);
            if (jcs != null) {
                try {
                    jcs.put(cacheKey, data);
                    logger.debug("cached " + location);
                } catch (Exception e) {
                    logger.warn("Could not store response for " + location + " in cache", e);
                }
            }

            return data;
        }
    }

    /**
     * @param data
     * @param headMethod
     */
    private static void setExpires(HttpResponseData data, HttpMethodBase method) {
        long expires = System.currentTimeMillis() + MINIMUM_CACHE_TIME_MILLIS;
        Header expiresHeader = method.getResponseHeader(EXPIRES_HEADER);
        if (expiresHeader != null) {
            try {
                Date expiresDate = DateUtil.parseDate(expiresHeader.getValue());
                if (expiresDate.getTime() > expires) {
                    logger.info("Setting cache time according to expiresHeader=[" + expiresHeader.getValue() + "]");
                    expires = expiresDate.getTime();
                } else {
                    logger.debug("Ignoring expires header [" + expiresHeader.getValue() + "]");
                }
            } catch (Exception e) {
                logger.debug("Problem parsing expires header [" + expiresHeader.getValue() + "]");
            }
        }
        data.setExpires(expires);

    }

    private static HttpResponseData getDataFromRequest(HttpClient client, String location,
            Map<String, String> requestHeaders) throws IOException, HttpException {
        HttpMethodBase method = new GetMethod(location);
        method.setFollowRedirects(true);
        try {
            if (location.length() > 2000 && location.indexOf('?') >= 0) {
                logger.info("Using post method because request location is very long");
                PostMethod postMethod = new PostMethod(location.substring(0, location.indexOf('?')));
                String urlDecoded = URLDecoder.decode(location.substring(location.indexOf('?') + 1), "UTF-8");
                String[] parts = urlDecoded.split("\\&");
                for (String part : parts) {
                    String[] keyval = part.split("=", 2);
                    if (keyval.length == 2) {
                        postMethod.addParameter(keyval[0], keyval[1]);
                    } else {
                        postMethod.addParameter(keyval[0], "");
                    }
                }
                method = postMethod;
            }
            addRequestHeaders(method, requestHeaders);
            int response = client.executeMethod(method);
            HttpResponseData data = new HttpResponseData();
            setExpires(data, method);
            data.setResponse(response);
            data.setCharSet(method.getResponseCharSet());
            Header lastModifiedHeader = method.getResponseHeader(HEADER_LAST_MODIFIED);
            if (lastModifiedHeader != null) {
                data.setLastModified(lastModifiedHeader.getValue());
            }
            Header contentTypeHeader = method.getResponseHeader(HEADER_CONTENT_TYPE);
            if (contentTypeHeader != null) {
                data.setContentType(contentTypeHeader.getValue());
            }
            data.setBody(method.getResponseBody(MAX_CONTENT_SIZE));

            return data;
        } finally {
            method.releaseConnection();
        }
    }

    private static String getDefaultUserAgent() {
        //todo: use a system property if set.
        return "Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en)";
    }

    /**
     * @param headMethod
     * @param requestHeaders
     */
    private static void addRequestHeaders(HttpMethod method, Map<String, String> requestHeaders) {
        if (requestHeaders != null) {
            for (String key : requestHeaders.keySet()) {
                method.addRequestHeader(key, requestHeaders.get(key));
            }
        }
    }

    /**
     * @param location
     * @param requestHeaders
     * @return
     */
    private static String makeCacheKey(String location, Map<String, String> requestHeaders) {
        if (requestHeaders == null || requestHeaders.size() == 0) {
            return getMD5(location);
        }
        StringBuilder sb = new StringBuilder();
        sb.append(location);
        List<String> keys = new ArrayList<String>();
        keys.addAll(requestHeaders.keySet());
        Collections.sort(keys);
        for (String key : keys) {
            sb.append('[').append(key).append('=').append(requestHeaders.get(key)).append(']');
        }
        return getMD5(sb.toString());
    }

    /**
     * @param string
     * @return
     */
    static String getMD5(String string) {
        try {
            MessageDigest digest = MessageDigest.getInstance("MD5");
            BigInteger bigInt = new BigInteger(1, digest.digest(string.getBytes("UTF-8")));
            String md5 = bigInt.toString(16);
            if (logger.isDebugEnabled()) {
                logger.debug("using md5=[" + md5 + "] for " + string);
            }
            return md5;
        } catch (Throwable t) {
            logger.info("couldn't calculate md5 because:" + t, t);
            return string;
        }
    }
}