com.gistlabs.mechanize.impl.MechanizeAgent.java Source code

Java tutorial

Introduction

Here is the source code for com.gistlabs.mechanize.impl.MechanizeAgent.java

Source

/**
 * Copyright (C) 2012-2014 Gist Labs, LLC. (http://gistlabs.com)
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */
package com.gistlabs.mechanize.impl;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;

import org.apache.http.Header;
import org.apache.http.HttpException;
import org.apache.http.HttpResponse;
import org.apache.http.HttpResponseInterceptor;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpRequestBase;
import org.apache.http.entity.BufferedHttpEntity;
import org.apache.http.impl.client.AbstractHttpClient;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.HttpProtocolParams;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.protocol.HttpContext;

import com.gistlabs.mechanize.Mechanize;
import com.gistlabs.mechanize.Resource;
import com.gistlabs.mechanize.ResourceFactory;
import com.gistlabs.mechanize.cache.HttpCacheFilter;
import com.gistlabs.mechanize.cache.api.HttpCache;
import com.gistlabs.mechanize.cache.inMemory.InMemoryHttpCache;
import com.gistlabs.mechanize.cookie.Cookies;
import com.gistlabs.mechanize.exceptions.MechanizeExceptionFactory;
import com.gistlabs.mechanize.filters.DefaultMechanizeChainFilter;
import com.gistlabs.mechanize.filters.MechanizeChainFilter;
import com.gistlabs.mechanize.parameters.Parameters;
import com.gistlabs.mechanize.requestor.PageRequestor;
import com.gistlabs.mechanize.requestor.RequestBuilder;
import com.gistlabs.mechanize.requestor.RequestBuilderFactory;
import com.gistlabs.mechanize.util.apache.ContentType;

/**
 * Mechanize agent acts as a focal point for HTTP interactions and also as a factory for Page objects from responses.
 *
 * <p>Interesting resources: http://en.wikipedia.org/wiki/List_of_HTTP_header_fields</p>
 *
 * <p>NOTE: The mechanize library is not synchronized and should be used in a single thread environment or with custom synchronization.</p>
 *
 * @author Martin Kersten<Martin.Kersten.mk@gmail.com>
 * @author John Heintz <john@gistlabs.com>
 */
public class MechanizeAgent implements PageRequestor<Resource>, RequestBuilderFactory<Resource>, Mechanize {

    static final Map<String, ResourceFactory> PAGE_FACTORIES = new HashMap<String, ResourceFactory>();

    static ResourceFactory lookupFactory(final String mimeType) {
        return PAGE_FACTORIES.get(mimeType);
    }

    static void registerFactory(final ResourceFactory factory) {
        Collection<String> contentMatches = factory.getContentMatches();
        for (String mimeType : contentMatches)
            PAGE_FACTORIES.put(mimeType, factory);
    }

    static String VERSION;

    public static void setVersion(final String version) {
        VERSION = version;
    }

    static {
        MechanizeInitializer.initialize();
    }

    private final DefaultMechanizeChainFilter requestChain;
    private final AbstractHttpClient client;
    private final Cookies cookies;

    public MechanizeAgent() {
        this(buildDefaultHttpClient());
    }

    public MechanizeAgent(final AbstractHttpClient client) {
        this(client, new InMemoryHttpCache());
    }

    public MechanizeAgent(final HttpCache httpCache) {
        this(buildDefaultHttpClient(), httpCache);
    }

    public MechanizeAgent(final AbstractHttpClient client, final HttpCache httpCache) {
        this.client = client;
        setupClient(client);

        this.requestChain = new DefaultMechanizeChainFilter(new MechanizeHttpClientFilter(this.client));
        addFilter(new HttpCacheFilter(httpCache));

        this.cookies = new Cookies(this.client);

    }

    /**
     * This method is used to capture Location headers after HttpClient redirect handling.
     */
    private void setupClient(final AbstractHttpClient client) {
        this.client.addResponseInterceptor(new HttpResponseInterceptor() {
            @Override
            public void process(final HttpResponse response, final HttpContext context)
                    throws HttpException, IOException {
                Header header = response.getFirstHeader("Location");
                if (header != null)
                    context.setAttribute("Location", header.getValue());
            }
        });
    }

    public MechanizeAgent prefixFilter(final MechanizeChainFilter filter) {
        this.requestChain.prefix(filter);
        return this;
    }

    public MechanizeAgent addFilter(final MechanizeChainFilter filter) {
        this.requestChain.add(filter);
        return this;
    }

    /**
     * Configure the default HttpClient used by mechanize.
     */
    public static AbstractHttpClient buildDefaultHttpClient() {
        DefaultHttpClient defaultHttpClient = new DefaultHttpClient();
        return defaultHttpClient;
    }

    /**
     *
     * @param userAgent The value to set User-Agent HTTP parameter to for requests
     * @return
     */
    public MechanizeAgent setUserAgent(final String userAgent) {
        HttpProtocolParams.setUserAgent(this.client.getParams(), userAgent);
        return this;
    }

    /**
     *
     * @return the User-Agent that HttpClient is currently using.
     */
    public String getUserAgent() {
        return HttpProtocolParams.getUserAgent(this.client.getParams());
    }

    /* (non-Javadoc)
     * @see com.gistlabs.mechanize.Mechanize#getClient()
     */
    @Override
    public AbstractHttpClient getClient() {
        return client;
    }

    /* (non-Javadoc)
     * @see com.gistlabs.mechanize.Mechanize#doRequest(java.lang.String)
     */
    @Override
    public RequestBuilder<Resource> doRequest(final String uri) {
        return new RequestBuilder<Resource>(this, uri);
    }

    /**
     * Returns the resource received uppon the request. The resource can be casted to any expected subclass of resource
     * but will fail with ClassCastException if the expected type of resource is not the actual returned resource.
     */
    @SuppressWarnings("unchecked")
    @Override
    public <T extends Resource> T request(final HttpRequestBase request) {
        try {
            HttpResponse response = execute(client, request);
            Resource resource = toPage(request, response);
            return (T) resource;
        } catch (Exception e) {
            throw MechanizeExceptionFactory.newException(e);
        }
    }

    /* (non-Javadoc)
     * @see com.gistlabs.mechanize.Mechanize#get(java.lang.String)
     */
    @Override
    public <T extends Resource> T get(final String uri) {
        return doRequest(uri).get();
    }

    /* (non-Javadoc)
     * @see com.gistlabs.mechanize.Mechanize#post(java.lang.String, java.util.Map)
     */
    @Override
    public <T extends Resource> T post(final String uri, final Map<String, String> params)
            throws UnsupportedEncodingException {
        return post(uri, new Parameters(unsafeCast(params)));
    }

    @SuppressWarnings({ "unchecked", "rawtypes" })
    private Map<String, Object> unsafeCast(final Map<String, String> params) {
        return (Map) params;
    }

    /* (non-Javadoc)
     * @see com.gistlabs.mechanize.Mechanize#post(java.lang.String, com.gistlabs.mechanize.parameters.Parameters)
     */
    @Override
    public <T extends Resource> T post(final String uri, final Parameters params) {
        return doRequest(uri).set(params).post();
    }

    /** Idles / Waits for the given amount of milliseconds useful to prevent being blocked by mass sending
     *  requests or to appear as a artificial user. */
    public void idle(final int milliseconds) {
        long startTime = System.currentTimeMillis();
        while ((System.currentTimeMillis() - startTime) < milliseconds)
            try {
                Thread.sleep(Math.max(1, milliseconds - (System.currentTimeMillis() - startTime)));
            } catch (InterruptedException e) {
            }
    }

    /* (non-Javadoc)
     * @see com.gistlabs.mechanize.Mechanize#cookies()
     */
    @Override
    public Cookies cookies() {
        return cookies;
    }

    protected Resource toPage(final HttpRequestBase request, final HttpResponse response)
            throws IOException, UnsupportedEncodingException {

        ContentType contentType = getContentType(response);

        ResourceFactory factory = lookupFactory(contentType.getMimeType());
        if (factory == null)
            factory = lookupFactory(ContentType.WILDCARD.getMimeType());

        if (factory == null)
            throw MechanizeExceptionFactory.newMechanizeException(
                    "No viable page type found, and no wildcard mime type factory registered.");

        return factory.buildPage(this, request, response);
    }

    protected ContentType getContentType(final HttpResponse response) {
        return ContentType.getOrDefault(response.getEntity());
    }

    protected HttpResponse execute(final HttpClient client, final HttpRequestBase request) throws Exception {
        HttpContext context = new BasicHttpContext();
        HttpResponse response = requestChain.execute(request, context);

        if (context.getAttribute("Location") != null)
            response.setHeader(MECHANIZE_LOCATION, (String) context.getAttribute("Location"));

        response.setEntity(new BufferedHttpEntity(response.getEntity()));

        return response;
    }

    @Override
    public String absoluteUrl(final String uri) {
        try {
            return new URL(uri).toExternalForm();
        } catch (MalformedURLException e) {
            throw MechanizeExceptionFactory.newException(e);
        }
    }
}