Example usage for org.apache.commons.httpclient.params HttpMethodParams makeLenient

List of usage examples for org.apache.commons.httpclient.params HttpMethodParams makeLenient

Introduction

In this page you can find the example usage for org.apache.commons.httpclient.params HttpMethodParams makeLenient.

Prototype

public void makeLenient() 

Source Link

Usage

From source file:com.iflytek.spider.protocol.httpclient.HttpResponseSmiply.java

/**
 * Fetches the given <code>url</code> and prepares HTTP response.
 * //w  ww  .j av a2  s  . co m
 * @param http
 *            An instance of the implementation class of this plugin
 * @param url
 *            URL to be fetched
 * @param datum
 *            Crawl data
 * @param followRedirects
 *            Whether to follow redirects; follows redirect if and only if
 *            this is true
 * @return HTTP response
 * @throws IOException
 *             When an error occurs
 */
HttpResponseSmiply(HttpSimply http, URL url, CrawlDatum datum, boolean followRedirects) throws IOException {

    // Prepare GET method for HTTP request
    this.url = url;
    GetMethod get = new GetMethod(url.toString());
    get.setFollowRedirects(followRedirects);
    get.setDoAuthentication(true);
    if (datum.getModifiedTime() > 0) {
        get.setRequestHeader("If-Modified-Since", HttpDateFormat.toString(datum.getModifiedTime()));
    }

    // Set HTTP parameters
    HttpMethodParams params = get.getParams();
    if (http.getUseHttp11()) {
        params.setVersion(HttpVersion.HTTP_1_1);
    } else {
        params.setVersion(HttpVersion.HTTP_1_0);
    }
    params.makeLenient();
    params.setContentCharset("UTF-8");
    params.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
    params.setBooleanParameter(HttpMethodParams.SINGLE_COOKIE_HEADER, true);
    // XXX (ab) not sure about this... the default is to retry 3 times; if
    // XXX the request body was sent the method is not retried, so there is
    // XXX little danger in retrying...
    // params.setParameter(HttpMethodParams.RETRY_HANDLER, null);
    try {
        code = HttpSimply.getClient().executeMethod(get);

        Header[] heads = get.getResponseHeaders();

        for (int i = 0; i < heads.length; i++) {
            headers.set(heads[i].getName(), heads[i].getValue());
        }

        // Limit download size
        int contentLength = Integer.MAX_VALUE;
        String contentLengthString = headers.get(Response.CONTENT_LENGTH);
        if (contentLengthString != null) {
            try {
                contentLength = Integer.parseInt(contentLengthString.trim());
            } catch (NumberFormatException ex) {
                throw new HttpException("bad content length: " + contentLengthString);
            }
        }
        if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) {
            contentLength = http.getMaxContent();
        }

        // always read content. Sometimes content is useful to find a cause
        // for error.
        InputStream in = get.getResponseBodyAsStream();
        try {
            byte[] buffer = new byte[HttpBaseSimply.BUFFER_SIZE];
            int bufferFilled = 0;
            int totalRead = 0;
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            while ((bufferFilled = in.read(buffer, 0, buffer.length)) != -1 && totalRead < contentLength) {
                totalRead += bufferFilled;
                out.write(buffer, 0, bufferFilled);
            }

            content = out.toByteArray();
        } catch (Exception e) {
            if (code == 200)
                throw new IOException(e.toString());
            // for codes other than 200 OK, we are fine with empty content
        } finally {
            in.close();
            get.abort();
        }

        StringBuilder fetchTrace = null;
        if (Http.LOG.isTraceEnabled()) {
            // Trace message
            fetchTrace = new StringBuilder(
                    "url: " + url + "; status code: " + code + "; bytes received: " + content.length);
            if (getHeader(Response.CONTENT_LENGTH) != null)
                fetchTrace.append("; Content-Length: " + getHeader(Response.CONTENT_LENGTH));
            if (getHeader(Response.LOCATION) != null)
                fetchTrace.append("; Location: " + getHeader(Response.LOCATION));
        }
        // Extract gzip, x-gzip and deflate content
        if (content != null) {
            // check if we have to uncompress it
            String contentEncoding = headers.get(Response.CONTENT_ENCODING);
            if (contentEncoding != null && Http.LOG.isTraceEnabled())
                fetchTrace.append("; Content-Encoding: " + contentEncoding);
            if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
                content = http.processGzipEncoded(content, url);
                if (Http.LOG.isTraceEnabled())
                    fetchTrace.append("; extracted to " + content.length + " bytes");
            } else if ("deflate".equals(contentEncoding)) {
                content = http.processDeflateEncoded(content, url);
                if (Http.LOG.isTraceEnabled())
                    fetchTrace.append("; extracted to " + content.length + " bytes");
            }
        }

        // Log trace message
        if (Http.LOG.isTraceEnabled()) {
            Http.LOG.trace(fetchTrace);
        }
    } finally {
        get.releaseConnection();
    }
}

From source file:com.iflytek.spider.protocol.httpclient.HttpResponse.java

/**
 * Fetches the given <code>url</code> and prepares HTTP response.
 * //from  w  w w .  j  a  v  a  2  s.c  om
 * @param http
 *            An instance of the implementation class of this plugin
 * @param url
 *            URL to be fetched
 * @param datum
 *            Crawl data
 * @param followRedirects
 *            Whether to follow redirects; follows redirect if and only if
 *            this is true
 * @return HTTP response
 * @throws IOException
 *             When an error occurs
 */
HttpResponse(Http http, URL url, CrawlDatum datum, boolean followRedirects) throws IOException {

    // Prepare GET method for HTTP request
    this.url = url;
    GetMethod get = new GetMethod(url.toString());
    get.setFollowRedirects(followRedirects);
    get.setDoAuthentication(true);
    if (datum.getModifiedTime() > 0) {
        get.setRequestHeader("If-Modified-Since", HttpDateFormat.toString(datum.getModifiedTime()));
    }

    // Set HTTP parameters
    HttpMethodParams params = get.getParams();
    if (http.getUseHttp11()) {
        params.setVersion(HttpVersion.HTTP_1_1);
    } else {
        params.setVersion(HttpVersion.HTTP_1_0);
    }
    params.makeLenient();
    params.setContentCharset("UTF-8");
    params.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
    params.setBooleanParameter(HttpMethodParams.SINGLE_COOKIE_HEADER, true);
    // XXX (ab) not sure about this... the default is to retry 3 times; if
    // XXX the request body was sent the method is not retried, so there is
    // XXX little danger in retrying...
    // params.setParameter(HttpMethodParams.RETRY_HANDLER, null);
    try {
        code = Http.getClient().executeMethod(get);

        Header[] heads = get.getResponseHeaders();

        for (int i = 0; i < heads.length; i++) {
            headers.set(heads[i].getName(), heads[i].getValue());
        }

        // Limit download size
        int contentLength = Integer.MAX_VALUE;
        String contentLengthString = headers.get(Response.CONTENT_LENGTH);
        if (contentLengthString != null) {
            try {
                contentLength = Integer.parseInt(contentLengthString.trim());
            } catch (NumberFormatException ex) {
                throw new HttpException("bad content length: " + contentLengthString);
            }
        }
        if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) {
            contentLength = http.getMaxContent();
        }

        // always read content. Sometimes content is useful to find a cause
        // for error.
        InputStream in = get.getResponseBodyAsStream();
        try {
            byte[] buffer = new byte[HttpBase.BUFFER_SIZE];
            int bufferFilled = 0;
            int totalRead = 0;
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            while ((bufferFilled = in.read(buffer, 0, buffer.length)) != -1 && totalRead < contentLength) {
                totalRead += bufferFilled;
                out.write(buffer, 0, bufferFilled);
            }

            content = out.toByteArray();
        } catch (Exception e) {
            if (code == 200)
                throw new IOException(e.toString());
            // for codes other than 200 OK, we are fine with empty content
        } finally {
            in.close();
            get.abort();
        }

        StringBuilder fetchTrace = null;
        if (Http.LOG.isTraceEnabled()) {
            // Trace message
            fetchTrace = new StringBuilder(
                    "url: " + url + "; status code: " + code + "; bytes received: " + content.length);
            if (getHeader(Response.CONTENT_LENGTH) != null)
                fetchTrace.append("; Content-Length: " + getHeader(Response.CONTENT_LENGTH));
            if (getHeader(Response.LOCATION) != null)
                fetchTrace.append("; Location: " + getHeader(Response.LOCATION));
        }
        // Extract gzip, x-gzip and deflate content
        if (content != null) {
            // check if we have to uncompress it
            String contentEncoding = headers.get(Response.CONTENT_ENCODING);
            if (contentEncoding != null && Http.LOG.isTraceEnabled())
                fetchTrace.append("; Content-Encoding: " + contentEncoding);
            if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
                content = http.processGzipEncoded(content, url);
                if (Http.LOG.isTraceEnabled())
                    fetchTrace.append("; extracted to " + content.length + " bytes");
            } else if ("deflate".equals(contentEncoding)) {
                content = http.processDeflateEncoded(content, url);
                if (Http.LOG.isTraceEnabled())
                    fetchTrace.append("; extracted to " + content.length + " bytes");
            }
        }

        // Log trace message
        if (Http.LOG.isTraceEnabled()) {
            Http.LOG.trace(fetchTrace);
        }
    } finally {
        get.releaseConnection();
    }
}

From source file:org.apache.nutch.protocol.httpclient.HttpResponse.java

/**
 * Fetches the given <code>url</code> and prepares HTTP response.
 * /* w ww.  ja  v a  2s.  com*/
 * @param http
 *          An instance of the implementation class of this plugin
 * @param url
 *          URL to be fetched
 * @param datum
 *          Crawl data
 * @param followRedirects
 *          Whether to follow redirects; follows redirect if and only if this
 *          is true
 * @return HTTP response
 * @throws IOException
 *           When an error occurs
 */
HttpResponse(Http http, URL url, CrawlDatum datum, boolean followRedirects) throws IOException {

    // Prepare GET method for HTTP request
    this.url = url;
    GetMethod get = new GetMethod(url.toString());
    get.setFollowRedirects(followRedirects);
    get.setDoAuthentication(true);
    if (http.isIfModifiedSinceEnabled() && datum.getModifiedTime() > 0) {
        get.setRequestHeader("If-Modified-Since", HttpDateFormat.toString(datum.getModifiedTime()));
    }

    // Set HTTP parameters
    HttpMethodParams params = get.getParams();
    if (http.getUseHttp11()) {
        params.setVersion(HttpVersion.HTTP_1_1);
    } else {
        params.setVersion(HttpVersion.HTTP_1_0);
    }
    params.makeLenient();
    params.setContentCharset("UTF-8");

    if (http.isCookieEnabled()) {
        params.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
        params.setBooleanParameter(HttpMethodParams.SINGLE_COOKIE_HEADER, true);
    } else {
        params.setCookiePolicy(CookiePolicy.IGNORE_COOKIES);
    }
    // XXX (ab) not sure about this... the default is to retry 3 times; if
    // XXX the request body was sent the method is not retried, so there is
    // XXX little danger in retrying...
    // params.setParameter(HttpMethodParams.RETRY_HANDLER, null);

    if (http.isCookieEnabled() && datum.getMetaData().containsKey(http.COOKIE)) {
        String cookie = ((Text) datum.getMetaData().get(http.COOKIE)).toString();
        get.addRequestHeader("Cookie", cookie);
    }

    try {
        HttpClient client = Http.getClient();
        client.getParams().setParameter("http.useragent", http.getUserAgent()); // NUTCH-1941
        code = client.executeMethod(get);

        Header[] heads = get.getResponseHeaders();

        for (int i = 0; i < heads.length; i++) {
            headers.set(heads[i].getName(), heads[i].getValue());
        }

        // Limit download size
        int contentLength = Integer.MAX_VALUE;
        String contentLengthString = headers.get(Response.CONTENT_LENGTH);
        if (contentLengthString != null) {
            try {
                contentLength = Integer.parseInt(contentLengthString.trim());
            } catch (NumberFormatException ex) {
                throw new HttpException("bad content length: " + contentLengthString);
            }
        }
        if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) {
            contentLength = http.getMaxContent();
        }

        // always read content. Sometimes content is useful to find a cause
        // for error.
        InputStream in = get.getResponseBodyAsStream();
        try {
            byte[] buffer = new byte[HttpBase.BUFFER_SIZE];
            int bufferFilled = 0;
            int totalRead = 0;
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            while ((bufferFilled = in.read(buffer, 0, buffer.length)) != -1
                    && totalRead + bufferFilled <= contentLength) {
                totalRead += bufferFilled;
                out.write(buffer, 0, bufferFilled);
            }

            content = out.toByteArray();
        } catch (Exception e) {
            if (code == 200)
                throw new IOException(e.toString());
            // for codes other than 200 OK, we are fine with empty content
        } finally {
            if (in != null) {
                in.close();
            }
            get.abort();
        }

        StringBuilder fetchTrace = null;
        if (Http.LOG.isTraceEnabled()) {
            // Trace message
            fetchTrace = new StringBuilder(
                    "url: " + url + "; status code: " + code + "; bytes received: " + content.length);
            if (getHeader(Response.CONTENT_LENGTH) != null)
                fetchTrace.append("; Content-Length: " + getHeader(Response.CONTENT_LENGTH));
            if (getHeader(Response.LOCATION) != null)
                fetchTrace.append("; Location: " + getHeader(Response.LOCATION));
        }
        // Extract gzip, x-gzip and deflate content
        if (content != null) {
            // check if we have to uncompress it
            String contentEncoding = headers.get(Response.CONTENT_ENCODING);
            if (contentEncoding != null && Http.LOG.isTraceEnabled())
                fetchTrace.append("; Content-Encoding: " + contentEncoding);
            if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
                content = http.processGzipEncoded(content, url);
                if (Http.LOG.isTraceEnabled())
                    fetchTrace.append("; extracted to " + content.length + " bytes");
            } else if ("deflate".equals(contentEncoding)) {
                content = http.processDeflateEncoded(content, url);
                if (Http.LOG.isTraceEnabled())
                    fetchTrace.append("; extracted to " + content.length + " bytes");
            }
        }

        // Logger trace message
        if (Http.LOG.isTraceEnabled()) {
            Http.LOG.trace(fetchTrace.toString());
        }
    } finally {
        get.releaseConnection();
    }
}

From source file:org.apache.nutch.protocol.httpclient.HttpResponseBak.java

/**
 * Fetches the given <code>url</code> and prepares HTTP response.
 *
 * @param http                An instance of the implementation class
 *                            of this plugin
 * @param url                 URL to be fetched
 * @param datum               Crawl data
 * @param followRedirects     Whether to follow redirects; follows
 *                            redirect if and only if this is true
 * @return                    HTTP response
 * @throws IOException        When an error occurs
 *//*from  w  w  w  . ja  v a  2 s.  com*/
HttpResponseBak(HttpBak http, URL url, CrawlDatum datum, boolean followRedirects) throws IOException {

    // Prepare GET method for HTTP request
    this.url = url;
    GetMethod get = new GetMethod(url.toString());
    get.setFollowRedirects(followRedirects);
    get.setDoAuthentication(true);
    if (datum.getModifiedTime() > 0) {
        get.setRequestHeader("If-Modified-Since", HttpDateFormat.toString(datum.getModifiedTime()));
    }

    // Set HTTP parameters
    HttpMethodParams params = get.getParams();
    if (http.getUseHttp11()) {
        params.setVersion(HttpVersion.HTTP_1_1);
    } else {
        params.setVersion(HttpVersion.HTTP_1_0);
    }
    params.makeLenient();
    params.setContentCharset("UTF-8");
    params.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
    params.setBooleanParameter(HttpMethodParams.SINGLE_COOKIE_HEADER, true);
    // XXX (ab) not sure about this... the default is to retry 3 times; if
    // XXX the request body was sent the method is not retried, so there is
    // XXX little danger in retrying...
    // params.setParameter(HttpMethodParams.RETRY_HANDLER, null);
    try {
        code = Http.getClient().executeMethod(get);

        Header[] heads = get.getResponseHeaders();

        for (int i = 0; i < heads.length; i++) {
            headers.set(heads[i].getName(), heads[i].getValue());
        }

        // Limit download size
        int contentLength = Integer.MAX_VALUE;
        String contentLengthString = headers.get(Response.CONTENT_LENGTH);
        if (contentLengthString != null) {
            try {
                contentLength = Integer.parseInt(contentLengthString.trim());
            } catch (NumberFormatException ex) {
                throw new HttpException("bad content length: " + contentLengthString);
            }
        }
        if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) {
            contentLength = http.getMaxContent();
        }

        // always read content. Sometimes content is useful to find a cause
        // for error.
        InputStream in = get.getResponseBodyAsStream();
        try {
            byte[] buffer = new byte[HttpBase.BUFFER_SIZE];
            int bufferFilled = 0;
            int totalRead = 0;
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            while ((bufferFilled = in.read(buffer, 0, buffer.length)) != -1
                    && totalRead + bufferFilled <= contentLength) {
                totalRead += bufferFilled;
                out.write(buffer, 0, bufferFilled);
            }

            content = out.toByteArray();
        } catch (Exception e) {
            if (code == 200)
                throw new IOException(e.toString());
            // for codes other than 200 OK, we are fine with empty content
        } finally {
            if (in != null) {
                in.close();
            }
            get.abort();
        }

        StringBuilder fetchTrace = null;
        if (Http.LOG.isTraceEnabled()) {
            // Trace message
            fetchTrace = new StringBuilder(
                    "url: " + url + "; status code: " + code + "; bytes received: " + content.length);
            if (getHeader(Response.CONTENT_LENGTH) != null)
                fetchTrace.append("; Content-Length: " + getHeader(Response.CONTENT_LENGTH));
            if (getHeader(Response.LOCATION) != null)
                fetchTrace.append("; Location: " + getHeader(Response.LOCATION));
        }
        // Extract gzip, x-gzip and deflate content
        if (content != null) {
            // check if we have to uncompress it
            String contentEncoding = headers.get(Response.CONTENT_ENCODING);
            if (contentEncoding != null && Http.LOG.isTraceEnabled())
                fetchTrace.append("; Content-Encoding: " + contentEncoding);
            if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
                content = http.processGzipEncoded(content, url);
                if (Http.LOG.isTraceEnabled())
                    fetchTrace.append("; extracted to " + content.length + " bytes");
            } else if ("deflate".equals(contentEncoding)) {
                content = http.processDeflateEncoded(content, url);
                if (Http.LOG.isTraceEnabled())
                    fetchTrace.append("; extracted to " + content.length + " bytes");
            }
        }

        // Logger trace message
        if (Http.LOG.isTraceEnabled()) {
            Http.LOG.trace(fetchTrace.toString());
        }
    } finally {
        get.releaseConnection();
    }
}

From source file:org.apache.nutch.protocol.httpclient.proxy.HttpResponse.java

/**
 * Fetches the given <code>url</code> and prepares HTTP response.
 * /* ww w . ja  v a  2  s.  co m*/
 * @param http
 *          An instance of the implementation class of this plugin
 * @param url
 *          URL to be fetched
 * @param datum
 *          Crawl data
 * @param followRedirects
 *          Whether to follow redirects; follows redirect if and only if this
 *          is true
 * @return HTTP response
 * @throws IOException
 *           When an error occurs
 */
HttpResponse(Http http, URL url, CrawlDatum datum, boolean followRedirects) throws IOException {

    // Prepare GET method for HTTP request
    this.url = url;
    GetMethod get = new GetMethod(url.toString());
    get.setFollowRedirects(followRedirects);
    get.setDoAuthentication(true);
    if (http.isIfModifiedSinceEnabled() && datum.getModifiedTime() > 0) {
        get.setRequestHeader("If-Modified-Since", HttpDateFormat.toString(datum.getModifiedTime()));
    }

    // Set HTTP parameters
    HttpMethodParams params = get.getParams();
    if (http.getUseHttp11()) {
        params.setVersion(HttpVersion.HTTP_1_1);
    } else {
        params.setVersion(HttpVersion.HTTP_1_0);
    }
    params.makeLenient();
    params.setContentCharset("UTF-8");
    params.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
    params.setBooleanParameter(HttpMethodParams.SINGLE_COOKIE_HEADER, true);
    // XXX (ab) not sure about this... the default is to retry 3 times; if
    // XXX the request body was sent the method is not retried, so there is
    // XXX little danger in retrying...
    // params.setParameter(HttpMethodParams.RETRY_HANDLER, null);
    try {
        HttpClient client = Http.getClient();
        client.getParams().setParameter("http.useragent", http.getUserAgent()); // NUTCH-1941
        code = client.executeMethod(get);

        Header[] heads = get.getResponseHeaders();

        for (int i = 0; i < heads.length; i++) {
            headers.set(heads[i].getName(), heads[i].getValue());
        }

        // Limit download size
        int contentLength = Integer.MAX_VALUE;
        String contentLengthString = headers.get(Response.CONTENT_LENGTH);
        if (contentLengthString != null) {
            try {
                contentLength = Integer.parseInt(contentLengthString.trim());
            } catch (NumberFormatException ex) {
                throw new HttpException("bad content length: " + contentLengthString);
            }
        }
        if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) {
            contentLength = http.getMaxContent();
        }

        // always read content. Sometimes content is useful to find a cause
        // for error.
        InputStream in = get.getResponseBodyAsStream();
        try {
            byte[] buffer = new byte[HttpBase.BUFFER_SIZE];
            int bufferFilled = 0;
            int totalRead = 0;
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            while ((bufferFilled = in.read(buffer, 0, buffer.length)) != -1
                    && totalRead + bufferFilled <= contentLength) {
                totalRead += bufferFilled;
                out.write(buffer, 0, bufferFilled);
            }

            content = out.toByteArray();
        } catch (Exception e) {
            if (code == 200)
                throw new IOException(e.toString());
            // for codes other than 200 OK, we are fine with empty content
        } finally {
            if (in != null) {
                in.close();
            }
            get.abort();
        }

        StringBuilder fetchTrace = null;
        if (Http.LOG.isTraceEnabled()) {
            // Trace message
            fetchTrace = new StringBuilder(
                    "url: " + url + "; status code: " + code + "; bytes received: " + content.length);
            if (getHeader(Response.CONTENT_LENGTH) != null)
                fetchTrace.append("; Content-Length: " + getHeader(Response.CONTENT_LENGTH));
            if (getHeader(Response.LOCATION) != null)
                fetchTrace.append("; Location: " + getHeader(Response.LOCATION));
        }
        // Extract gzip, x-gzip and deflate content
        if (content != null) {
            // check if we have to uncompress it
            String contentEncoding = headers.get(Response.CONTENT_ENCODING);
            if (contentEncoding != null && Http.LOG.isTraceEnabled())
                fetchTrace.append("; Content-Encoding: " + contentEncoding);
            if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
                content = http.processGzipEncoded(content, url);
                if (Http.LOG.isTraceEnabled())
                    fetchTrace.append("; extracted to " + content.length + " bytes");
            } else if ("deflate".equals(contentEncoding)) {
                content = http.processDeflateEncoded(content, url);
                if (Http.LOG.isTraceEnabled())
                    fetchTrace.append("; extracted to " + content.length + " bytes");
            }
        }

        // Logger trace message
        if (Http.LOG.isTraceEnabled()) {
            Http.LOG.trace(fetchTrace.toString());
        }
    } finally {
        get.releaseConnection();
    }
}

From source file:org.apache.nutch.protocol.webdriver.HttpResponse.java

/**
 * Fetches the given <code>url</code> and prepares HTTP response. Fetch the
 * content using WebDriver to extract HTML from Ajax site, other responses are
 * fetches using HTTPClient./* w  w  w .j a  va2s  .  co m*/
 * 
 * @param http
 *          An instance of the implementation class of this plugin
 * @param url
 *          URL to be fetched
 * @param page
 *          WebPage
 * @param followRedirects
 *          Whether to follow redirects; follows redirect if and only if this
 *          is true
 * @return HTTP response
 * @throws IOException
 *           When an error occurs
 */
HttpResponse(Http http, URL url, WebPage page, Configuration conf) throws IOException {

    // Prepare GET method for HTTP request
    this.url = url;
    this.conf = conf;
    GetMethod get = new GetMethod(url.toString());
    get.setFollowRedirects(false);
    get.setDoAuthentication(true);
    if (page.getModifiedTime() > 0) {
        get.setRequestHeader("If-Modified-Since", HttpDateFormat.toString(page.getModifiedTime()));
    }

    // Set HTTP parameters
    HttpMethodParams params = get.getParams();
    if (http.getUseHttp11()) {
        params.setVersion(HttpVersion.HTTP_1_1);
    } else {
        params.setVersion(HttpVersion.HTTP_1_0);
    }
    params.makeLenient();
    params.setContentCharset("UTF-8");

    try {
        HttpClient client = Http.getClient();
        client.getParams().setParameter("http.useragent", http.getUserAgent()); // NUTCH-1941
        code = client.executeMethod(get);

        Header[] heads = get.getResponseHeaders();

        for (int i = 0; i < heads.length; i++) {
            headers.set(heads[i].getName(), heads[i].getValue());
        }

        readPlainContent(url);

        StringBuilder fetchTrace = null;
        if (Http.LOG.isTraceEnabled()) {
            // Trace message
            fetchTrace = new StringBuilder(
                    "url: " + url + "; status code: " + code + "; bytes received: " + content.length);
            if (getHeader(Response.CONTENT_LENGTH) != null)
                fetchTrace.append("; Content-Length: " + getHeader(Response.CONTENT_LENGTH));
            if (getHeader(Response.LOCATION) != null)
                fetchTrace.append("; Location: " + getHeader(Response.LOCATION));
        }
        // add headers in metadata to row
        if (page.getHeaders() != null) {
            page.getHeaders().clear();
        }
        for (String key : headers.names()) {
            page.getHeaders().put(new Utf8(key), new Utf8(headers.get(key)));
        }

        // Logger trace message
        if (Http.LOG.isTraceEnabled()) {
            Http.LOG.trace(fetchTrace.toString());
        }
    } finally {
        get.releaseConnection();
    }
}

From source file:org.eclipse.smila.connectivity.framework.crawler.web.http.HttpResponse.java

/**
 * Sets the http parameters.//from www  .  j  a  v a  2  s. c  o m
 * 
 * @param http
 *          the http
 * @param httpMethod
 *          the http method
 */
private void setHttpParameters(HttpBase http, HttpMethodBase httpMethod) {
    httpMethod.setFollowRedirects(false);
    httpMethod.setRequestHeader("User-Agent", http.getUserAgent());
    httpMethod.setRequestHeader("Referer", http.getReferer());

    httpMethod.setDoAuthentication(true);

    for (Header header : http.getHeaders()) {
        httpMethod.addRequestHeader(header);
    }

    final HttpMethodParams params = httpMethod.getParams();
    if (http.getUseHttp11()) {
        params.setVersion(HttpVersion.HTTP_1_1);
    } else {
        params.setVersion(HttpVersion.HTTP_1_0);
    }
    params.makeLenient();
    params.setContentCharset("UTF-8");

    if (http.isCookiesEnabled()) {
        params.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
    } else {
        params.setCookiePolicy(CookiePolicy.IGNORE_COOKIES);
    }
    params.setBooleanParameter(HttpMethodParams.SINGLE_COOKIE_HEADER, true);
    // the default is to retry 3 times; if
    // the request body was sent the method is not retried, so there is
    // little danger in retrying
    // retries are handled on the higher level
    params.setParameter(HttpMethodParams.RETRY_HANDLER, null);
}