Example usage for org.apache.commons.httpclient.params HttpMethodParams setVersion

List of usage examples for org.apache.commons.httpclient.params HttpMethodParams setVersion

Introduction

In this page you can find the example usage for org.apache.commons.httpclient.params HttpMethodParams setVersion.

Prototype

public void setVersion(HttpVersion paramHttpVersion) 

Source Link

Usage

From source file:com.iflytek.spider.protocol.httpclient.HttpResponseSmiply.java

/**
 * Fetches the given <code>url</code> and prepares HTTP response.
 * //from   w w  w .j ava 2s  .c o m
 * @param http
 *            An instance of the implementation class of this plugin
 * @param url
 *            URL to be fetched
 * @param datum
 *            Crawl data
 * @param followRedirects
 *            Whether to follow redirects; follows redirect if and only if
 *            this is true
 * @return HTTP response
 * @throws IOException
 *             When an error occurs
 */
HttpResponseSmiply(HttpSimply http, URL url, CrawlDatum datum, boolean followRedirects) throws IOException {

    // Prepare GET method for HTTP request
    this.url = url;
    GetMethod get = new GetMethod(url.toString());
    get.setFollowRedirects(followRedirects);
    get.setDoAuthentication(true);
    if (datum.getModifiedTime() > 0) {
        get.setRequestHeader("If-Modified-Since", HttpDateFormat.toString(datum.getModifiedTime()));
    }

    // Set HTTP parameters
    HttpMethodParams params = get.getParams();
    if (http.getUseHttp11()) {
        params.setVersion(HttpVersion.HTTP_1_1);
    } else {
        params.setVersion(HttpVersion.HTTP_1_0);
    }
    params.makeLenient();
    params.setContentCharset("UTF-8");
    params.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
    params.setBooleanParameter(HttpMethodParams.SINGLE_COOKIE_HEADER, true);
    // XXX (ab) not sure about this... the default is to retry 3 times; if
    // XXX the request body was sent the method is not retried, so there is
    // XXX little danger in retrying...
    // params.setParameter(HttpMethodParams.RETRY_HANDLER, null);
    try {
        code = HttpSimply.getClient().executeMethod(get);

        Header[] heads = get.getResponseHeaders();

        for (int i = 0; i < heads.length; i++) {
            headers.set(heads[i].getName(), heads[i].getValue());
        }

        // Limit download size
        int contentLength = Integer.MAX_VALUE;
        String contentLengthString = headers.get(Response.CONTENT_LENGTH);
        if (contentLengthString != null) {
            try {
                contentLength = Integer.parseInt(contentLengthString.trim());
            } catch (NumberFormatException ex) {
                throw new HttpException("bad content length: " + contentLengthString);
            }
        }
        if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) {
            contentLength = http.getMaxContent();
        }

        // always read content. Sometimes content is useful to find a cause
        // for error.
        InputStream in = get.getResponseBodyAsStream();
        try {
            byte[] buffer = new byte[HttpBaseSimply.BUFFER_SIZE];
            int bufferFilled = 0;
            int totalRead = 0;
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            while ((bufferFilled = in.read(buffer, 0, buffer.length)) != -1 && totalRead < contentLength) {
                totalRead += bufferFilled;
                out.write(buffer, 0, bufferFilled);
            }

            content = out.toByteArray();
        } catch (Exception e) {
            if (code == 200)
                throw new IOException(e.toString());
            // for codes other than 200 OK, we are fine with empty content
        } finally {
            in.close();
            get.abort();
        }

        StringBuilder fetchTrace = null;
        if (Http.LOG.isTraceEnabled()) {
            // Trace message
            fetchTrace = new StringBuilder(
                    "url: " + url + "; status code: " + code + "; bytes received: " + content.length);
            if (getHeader(Response.CONTENT_LENGTH) != null)
                fetchTrace.append("; Content-Length: " + getHeader(Response.CONTENT_LENGTH));
            if (getHeader(Response.LOCATION) != null)
                fetchTrace.append("; Location: " + getHeader(Response.LOCATION));
        }
        // Extract gzip, x-gzip and deflate content
        if (content != null) {
            // check if we have to uncompress it
            String contentEncoding = headers.get(Response.CONTENT_ENCODING);
            if (contentEncoding != null && Http.LOG.isTraceEnabled())
                fetchTrace.append("; Content-Encoding: " + contentEncoding);
            if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
                content = http.processGzipEncoded(content, url);
                if (Http.LOG.isTraceEnabled())
                    fetchTrace.append("; extracted to " + content.length + " bytes");
            } else if ("deflate".equals(contentEncoding)) {
                content = http.processDeflateEncoded(content, url);
                if (Http.LOG.isTraceEnabled())
                    fetchTrace.append("; extracted to " + content.length + " bytes");
            }
        }

        // Log trace message
        if (Http.LOG.isTraceEnabled()) {
            Http.LOG.trace(fetchTrace);
        }
    } finally {
        get.releaseConnection();
    }
}

From source file:com.iflytek.spider.protocol.httpclient.HttpResponse.java

/**
 * Fetches the given <code>url</code> and prepares HTTP response.
 * /*from w w  w.  ja va 2 s  .  c  o m*/
 * @param http
 *            An instance of the implementation class of this plugin
 * @param url
 *            URL to be fetched
 * @param datum
 *            Crawl data
 * @param followRedirects
 *            Whether to follow redirects; follows redirect if and only if
 *            this is true
 * @return HTTP response
 * @throws IOException
 *             When an error occurs
 */
HttpResponse(Http http, URL url, CrawlDatum datum, boolean followRedirects) throws IOException {

    // Prepare GET method for HTTP request
    this.url = url;
    GetMethod get = new GetMethod(url.toString());
    get.setFollowRedirects(followRedirects);
    get.setDoAuthentication(true);
    if (datum.getModifiedTime() > 0) {
        get.setRequestHeader("If-Modified-Since", HttpDateFormat.toString(datum.getModifiedTime()));
    }

    // Set HTTP parameters
    HttpMethodParams params = get.getParams();
    if (http.getUseHttp11()) {
        params.setVersion(HttpVersion.HTTP_1_1);
    } else {
        params.setVersion(HttpVersion.HTTP_1_0);
    }
    params.makeLenient();
    params.setContentCharset("UTF-8");
    params.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
    params.setBooleanParameter(HttpMethodParams.SINGLE_COOKIE_HEADER, true);
    // XXX (ab) not sure about this... the default is to retry 3 times; if
    // XXX the request body was sent the method is not retried, so there is
    // XXX little danger in retrying...
    // params.setParameter(HttpMethodParams.RETRY_HANDLER, null);
    try {
        code = Http.getClient().executeMethod(get);

        Header[] heads = get.getResponseHeaders();

        for (int i = 0; i < heads.length; i++) {
            headers.set(heads[i].getName(), heads[i].getValue());
        }

        // Limit download size
        int contentLength = Integer.MAX_VALUE;
        String contentLengthString = headers.get(Response.CONTENT_LENGTH);
        if (contentLengthString != null) {
            try {
                contentLength = Integer.parseInt(contentLengthString.trim());
            } catch (NumberFormatException ex) {
                throw new HttpException("bad content length: " + contentLengthString);
            }
        }
        if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) {
            contentLength = http.getMaxContent();
        }

        // always read content. Sometimes content is useful to find a cause
        // for error.
        InputStream in = get.getResponseBodyAsStream();
        try {
            byte[] buffer = new byte[HttpBase.BUFFER_SIZE];
            int bufferFilled = 0;
            int totalRead = 0;
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            while ((bufferFilled = in.read(buffer, 0, buffer.length)) != -1 && totalRead < contentLength) {
                totalRead += bufferFilled;
                out.write(buffer, 0, bufferFilled);
            }

            content = out.toByteArray();
        } catch (Exception e) {
            if (code == 200)
                throw new IOException(e.toString());
            // for codes other than 200 OK, we are fine with empty content
        } finally {
            in.close();
            get.abort();
        }

        StringBuilder fetchTrace = null;
        if (Http.LOG.isTraceEnabled()) {
            // Trace message
            fetchTrace = new StringBuilder(
                    "url: " + url + "; status code: " + code + "; bytes received: " + content.length);
            if (getHeader(Response.CONTENT_LENGTH) != null)
                fetchTrace.append("; Content-Length: " + getHeader(Response.CONTENT_LENGTH));
            if (getHeader(Response.LOCATION) != null)
                fetchTrace.append("; Location: " + getHeader(Response.LOCATION));
        }
        // Extract gzip, x-gzip and deflate content
        if (content != null) {
            // check if we have to uncompress it
            String contentEncoding = headers.get(Response.CONTENT_ENCODING);
            if (contentEncoding != null && Http.LOG.isTraceEnabled())
                fetchTrace.append("; Content-Encoding: " + contentEncoding);
            if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
                content = http.processGzipEncoded(content, url);
                if (Http.LOG.isTraceEnabled())
                    fetchTrace.append("; extracted to " + content.length + " bytes");
            } else if ("deflate".equals(contentEncoding)) {
                content = http.processDeflateEncoded(content, url);
                if (Http.LOG.isTraceEnabled())
                    fetchTrace.append("; extracted to " + content.length + " bytes");
            }
        }

        // Log trace message
        if (Http.LOG.isTraceEnabled()) {
            Http.LOG.trace(fetchTrace);
        }
    } finally {
        get.releaseConnection();
    }
}

From source file:de.innovationgate.wgpublisher.webtml.Include.java

private void performURLInclude() throws TMLException {

    Status status = (Status) getStatus();

    try {//from ww w. j  ava 2s .  co  m

        HttpClient client = WGFactory.getHttpClientFactory().createHttpClient();
        HttpMethodParams methodParams = new HttpMethodParams();
        methodParams.setSoTimeout(Integer.parseInt(getTimeout()));
        methodParams.setVersion(HttpVersion.HTTP_1_1);

        GetMethod getMethod = new GetMethod();
        getMethod.setURI(new URI(status.ref, false));
        getMethod.setParams(methodParams);
        getMethod.setFollowRedirects(true);

        int httpStatus = client.executeMethod(getMethod);
        if (httpStatus != HttpServletResponse.SC_OK) {
            throw new TMLException("Response status " + httpStatus + " (" + getMethod.getStatusText()
                    + ") for included URL " + ref, true);
        }

        String encoding = getEncoding();
        if (encoding == null) {
            encoding = getMethod.getResponseCharSet();
            if (encoding == null) {
                getTMLContext().addwarning("No encoding returned from URL '" + status.ref
                        + "'. Assuming default encoding " + encoding);
                encoding = getCore().getCharacterEncoding();
            }
        }

        Reader reader = new InputStreamReader(getMethod.getResponseBodyAsStream(), encoding);
        StringWriter writer = new StringWriter();
        char[] buf = new char[2048];
        long count = 0;

        String limitStr = getLimit();
        long charLimit = 0;
        try {
            charLimit = Math.round(1024 * 1024 * Double.parseDouble(limitStr));
        } catch (NumberFormatException e) {
            throw new TMLException("Cannot parse limit attribute as number: " + limitStr);
        }

        int len;
        while ((len = reader.read(buf)) != -1) {
            writer.write(buf, 0, len);
            count += len;
            if (charLimit != 0 && count > charLimit) {
                throw new TMLException("Include of URL '" + status.ref + "' reaches content limit of "
                        + limitStr + " million characters. Include is cancelled.");
            }
        }
        this.setResult(writer.toString());

    } catch (java.io.IOException exc) {
        log.error("Exception including url", exc);
        this.addWarning("Exception while including url: " + exc.getMessage());
    }

}

From source file:com.zimbra.common.soap.SoapHttpTransport.java

public Element invoke(Element document, boolean raw, boolean noSession, String requestedAccountId,
        String changeToken, String tokenType, ResponseHandler respHandler)
        throws IOException, HttpException, ServiceException {
    PostMethod method = null;//from   w w  w .j  av a 2 s.com

    try {
        // Assemble post method.  Append document name, so that the request
        // type is written to the access log.
        String uri, query;
        int i = mUri.indexOf('?');
        if (i >= 0) {
            uri = mUri.substring(0, i);
            query = mUri.substring(i);
        } else {
            uri = mUri;
            query = "";
        }
        if (!uri.endsWith("/"))
            uri += '/';
        uri += getDocumentName(document);
        method = new PostMethod(uri + query);

        // Set user agent if it's specified.
        String agentName = getUserAgentName();
        if (agentName != null) {
            String agentVersion = getUserAgentVersion();
            if (agentVersion != null)
                agentName += " " + agentVersion;
            method.setRequestHeader(new Header("User-Agent", agentName));
        }

        // the content-type charset will determine encoding used
        // when we set the request body
        method.setRequestHeader("Content-Type", getRequestProtocol().getContentType());
        if (getClientIp() != null) {
            method.setRequestHeader(RemoteIP.X_ORIGINATING_IP_HEADER, getClientIp());
            if (ZimbraLog.misc.isDebugEnabled()) {
                ZimbraLog.misc.debug("set remote IP header [%s] to [%s]", RemoteIP.X_ORIGINATING_IP_HEADER,
                        getClientIp());
            }
        }
        Element soapReq = generateSoapMessage(document, raw, noSession, requestedAccountId, changeToken,
                tokenType);
        String soapMessage = SoapProtocol.toString(soapReq, getPrettyPrint());
        HttpMethodParams params = method.getParams();

        method.setRequestEntity(new StringRequestEntity(soapMessage, null, "UTF-8"));

        if (getRequestProtocol().hasSOAPActionHeader())
            method.setRequestHeader("SOAPAction", mUri);

        if (mCustomHeaders != null) {
            for (Map.Entry<String, String> entry : mCustomHeaders.entrySet())
                method.setRequestHeader(entry.getKey(), entry.getValue());
        }

        String host = method.getURI().getHost();
        HttpState state = HttpClientUtil.newHttpState(getAuthToken(), host, this.isAdmin());
        String trustedToken = getTrustedToken();
        if (trustedToken != null) {
            state.addCookie(
                    new Cookie(host, ZimbraCookie.COOKIE_ZM_TRUST_TOKEN, trustedToken, "/", null, false));
        }
        params.setCookiePolicy(state.getCookies().length == 0 ? CookiePolicy.IGNORE_COOKIES
                : CookiePolicy.BROWSER_COMPATIBILITY);
        params.setParameter(HttpMethodParams.RETRY_HANDLER,
                new DefaultHttpMethodRetryHandler(mRetryCount - 1, true));
        params.setSoTimeout(mTimeout);
        params.setVersion(HttpVersion.HTTP_1_1);
        method.setRequestHeader("Connection", mKeepAlive ? "Keep-alive" : "Close");

        if (mHostConfig != null && mHostConfig.getUsername() != null && mHostConfig.getPassword() != null) {
            state.setProxyCredentials(new AuthScope(null, -1),
                    new UsernamePasswordCredentials(mHostConfig.getUsername(), mHostConfig.getPassword()));
        }

        if (mHttpDebugListener != null) {
            mHttpDebugListener.sendSoapMessage(method, soapReq, state);
        }

        int responseCode = mClient.executeMethod(mHostConfig, method, state);
        // SOAP allows for "200" on success and "500" on failure;
        //   real server issues will probably be "503" or "404"
        if (responseCode != HttpServletResponse.SC_OK
                && responseCode != HttpServletResponse.SC_INTERNAL_SERVER_ERROR)
            throw ServiceException.PROXY_ERROR(method.getStatusLine().toString(), uri);

        // Read the response body.  Use the stream API instead of the byte[]
        // version to avoid HTTPClient whining about a large response.
        InputStreamReader reader = new InputStreamReader(method.getResponseBodyAsStream(),
                SoapProtocol.getCharset());
        String responseStr = "";

        try {
            if (respHandler != null) {
                respHandler.process(reader);
                return null;
            } else {
                responseStr = ByteUtil.getContent(reader, (int) method.getResponseContentLength(), false);
                Element soapResp = parseSoapResponse(responseStr, raw);

                if (mHttpDebugListener != null) {
                    mHttpDebugListener.receiveSoapMessage(method, soapResp);
                }
                return soapResp;
            }
        } catch (SoapFaultException x) {
            // attach request/response to the exception and rethrow
            x.setFaultRequest(soapMessage);
            x.setFaultResponse(responseStr.substring(0, Math.min(10240, responseStr.length())));
            throw x;
        }
    } finally {
        // Release the connection to the connection manager
        if (method != null)
            method.releaseConnection();

        // really not necessary if running in the server because the reaper thread
        // of our connection manager will take care it.
        // if called from CLI, all connections will be closed when the CLI
        // exits.  Leave it here anyway.
        if (!mKeepAlive)
            mClient.getHttpConnectionManager().closeIdleConnections(0);
    }
}

From source file:org.apache.camel.component.http.HttpProducer.java

public void process(Exchange exchange) throws Exception {
    // if we bridge endpoint then we need to skip matching headers with the HTTP_QUERY to avoid sending
    // duplicated headers to the receiver, so use this skipRequestHeaders as the list of headers to skip
    Map<String, Object> skipRequestHeaders = null;

    if (getEndpoint().isBridgeEndpoint()) {
        exchange.setProperty(Exchange.SKIP_GZIP_ENCODING, Boolean.TRUE);
        String queryString = exchange.getIn().getHeader(Exchange.HTTP_QUERY, String.class);
        if (queryString != null) {
            skipRequestHeaders = URISupport.parseQuery(queryString);
        }//from   w w w.  j  ava2s  . c  o  m
        // Need to remove the Host key as it should be not used 
        exchange.getIn().getHeaders().remove("host");
    }
    HttpMethod method = createMethod(exchange);
    Message in = exchange.getIn();
    String httpProtocolVersion = in.getHeader(Exchange.HTTP_PROTOCOL_VERSION, String.class);
    if (httpProtocolVersion != null) {
        // set the HTTP protocol version
        HttpMethodParams params = method.getParams();
        params.setVersion(HttpVersion.parse(httpProtocolVersion));
    }

    HeaderFilterStrategy strategy = getEndpoint().getHeaderFilterStrategy();

    // propagate headers as HTTP headers
    for (Map.Entry<String, Object> entry : in.getHeaders().entrySet()) {
        String key = entry.getKey();
        Object headerValue = in.getHeader(key);

        if (headerValue != null) {
            // use an iterator as there can be multiple values. (must not use a delimiter, and allow empty values)
            final Iterator<?> it = ObjectHelper.createIterator(headerValue, null, true);

            // the value to add as request header
            final List<String> values = new ArrayList<String>();

            // if its a multi value then check each value if we can add it and for multi values they
            // should be combined into a single value
            while (it.hasNext()) {
                String value = exchange.getContext().getTypeConverter().convertTo(String.class, it.next());

                // we should not add headers for the parameters in the uri if we bridge the endpoint
                // as then we would duplicate headers on both the endpoint uri, and in HTTP headers as well
                if (skipRequestHeaders != null && skipRequestHeaders.containsKey(key)) {
                    continue;
                }
                if (value != null && strategy != null
                        && !strategy.applyFilterToCamelHeaders(key, value, exchange)) {
                    values.add(value);
                }
            }

            // add the value(s) as a http request header
            if (values.size() > 0) {
                // use the default toString of a ArrayList to create in the form [xxx, yyy]
                // if multi valued, for a single value, then just output the value as is
                String s = values.size() > 1 ? values.toString() : values.get(0);
                method.addRequestHeader(key, s);
            }
        }
    }

    // lets store the result in the output message.
    try {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Executing http {} method: {}", method.getName(), method.getURI().toString());
        }
        int responseCode = executeMethod(method);
        LOG.debug("Http responseCode: {}", responseCode);

        if (!throwException) {
            // if we do not use failed exception then populate response for all response codes
            populateResponse(exchange, method, in, strategy, responseCode);
        } else {
            if (responseCode >= 100 && responseCode < 300) {
                // only populate response for OK response
                populateResponse(exchange, method, in, strategy, responseCode);
            } else {
                // operation failed so populate exception to throw
                throw populateHttpOperationFailedException(exchange, method, responseCode);
            }
        }
    } finally {
        method.releaseConnection();
    }
}

From source file:org.apache.nutch.protocol.httpclient.HttpResponse.java

/**
 * Fetches the given <code>url</code> and prepares HTTP response.
 * /*from  w w w  .  j ava 2 s . co m*/
 * @param http
 *          An instance of the implementation class of this plugin
 * @param url
 *          URL to be fetched
 * @param datum
 *          Crawl data
 * @param followRedirects
 *          Whether to follow redirects; follows redirect if and only if this
 *          is true
 * @return HTTP response
 * @throws IOException
 *           When an error occurs
 */
HttpResponse(Http http, URL url, CrawlDatum datum, boolean followRedirects) throws IOException {

    // Prepare GET method for HTTP request
    this.url = url;
    GetMethod get = new GetMethod(url.toString());
    get.setFollowRedirects(followRedirects);
    get.setDoAuthentication(true);
    if (http.isIfModifiedSinceEnabled() && datum.getModifiedTime() > 0) {
        get.setRequestHeader("If-Modified-Since", HttpDateFormat.toString(datum.getModifiedTime()));
    }

    // Set HTTP parameters
    HttpMethodParams params = get.getParams();
    if (http.getUseHttp11()) {
        params.setVersion(HttpVersion.HTTP_1_1);
    } else {
        params.setVersion(HttpVersion.HTTP_1_0);
    }
    params.makeLenient();
    params.setContentCharset("UTF-8");

    if (http.isCookieEnabled()) {
        params.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
        params.setBooleanParameter(HttpMethodParams.SINGLE_COOKIE_HEADER, true);
    } else {
        params.setCookiePolicy(CookiePolicy.IGNORE_COOKIES);
    }
    // XXX (ab) not sure about this... the default is to retry 3 times; if
    // XXX the request body was sent the method is not retried, so there is
    // XXX little danger in retrying...
    // params.setParameter(HttpMethodParams.RETRY_HANDLER, null);

    if (http.isCookieEnabled() && datum.getMetaData().containsKey(http.COOKIE)) {
        String cookie = ((Text) datum.getMetaData().get(http.COOKIE)).toString();
        get.addRequestHeader("Cookie", cookie);
    }

    try {
        HttpClient client = Http.getClient();
        client.getParams().setParameter("http.useragent", http.getUserAgent()); // NUTCH-1941
        code = client.executeMethod(get);

        Header[] heads = get.getResponseHeaders();

        for (int i = 0; i < heads.length; i++) {
            headers.set(heads[i].getName(), heads[i].getValue());
        }

        // Limit download size
        int contentLength = Integer.MAX_VALUE;
        String contentLengthString = headers.get(Response.CONTENT_LENGTH);
        if (contentLengthString != null) {
            try {
                contentLength = Integer.parseInt(contentLengthString.trim());
            } catch (NumberFormatException ex) {
                throw new HttpException("bad content length: " + contentLengthString);
            }
        }
        if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) {
            contentLength = http.getMaxContent();
        }

        // always read content. Sometimes content is useful to find a cause
        // for error.
        InputStream in = get.getResponseBodyAsStream();
        try {
            byte[] buffer = new byte[HttpBase.BUFFER_SIZE];
            int bufferFilled = 0;
            int totalRead = 0;
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            while ((bufferFilled = in.read(buffer, 0, buffer.length)) != -1
                    && totalRead + bufferFilled <= contentLength) {
                totalRead += bufferFilled;
                out.write(buffer, 0, bufferFilled);
            }

            content = out.toByteArray();
        } catch (Exception e) {
            if (code == 200)
                throw new IOException(e.toString());
            // for codes other than 200 OK, we are fine with empty content
        } finally {
            if (in != null) {
                in.close();
            }
            get.abort();
        }

        StringBuilder fetchTrace = null;
        if (Http.LOG.isTraceEnabled()) {
            // Trace message
            fetchTrace = new StringBuilder(
                    "url: " + url + "; status code: " + code + "; bytes received: " + content.length);
            if (getHeader(Response.CONTENT_LENGTH) != null)
                fetchTrace.append("; Content-Length: " + getHeader(Response.CONTENT_LENGTH));
            if (getHeader(Response.LOCATION) != null)
                fetchTrace.append("; Location: " + getHeader(Response.LOCATION));
        }
        // Extract gzip, x-gzip and deflate content
        if (content != null) {
            // check if we have to uncompress it
            String contentEncoding = headers.get(Response.CONTENT_ENCODING);
            if (contentEncoding != null && Http.LOG.isTraceEnabled())
                fetchTrace.append("; Content-Encoding: " + contentEncoding);
            if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
                content = http.processGzipEncoded(content, url);
                if (Http.LOG.isTraceEnabled())
                    fetchTrace.append("; extracted to " + content.length + " bytes");
            } else if ("deflate".equals(contentEncoding)) {
                content = http.processDeflateEncoded(content, url);
                if (Http.LOG.isTraceEnabled())
                    fetchTrace.append("; extracted to " + content.length + " bytes");
            }
        }

        // Logger trace message
        if (Http.LOG.isTraceEnabled()) {
            Http.LOG.trace(fetchTrace.toString());
        }
    } finally {
        get.releaseConnection();
    }
}

From source file:org.apache.nutch.protocol.httpclient.HttpResponseBak.java

/**
 * Fetches the given <code>url</code> and prepares HTTP response.
 *
 * @param http                An instance of the implementation class
 *                            of this plugin
 * @param url                 URL to be fetched
 * @param datum               Crawl data
 * @param followRedirects     Whether to follow redirects; follows
 *                            redirect if and only if this is true
 * @return                    HTTP response
 * @throws IOException        When an error occurs
 *///  w w  w  .j  a  v  a  2s. c  o  m
HttpResponseBak(HttpBak http, URL url, CrawlDatum datum, boolean followRedirects) throws IOException {

    // Prepare GET method for HTTP request
    this.url = url;
    GetMethod get = new GetMethod(url.toString());
    get.setFollowRedirects(followRedirects);
    get.setDoAuthentication(true);
    if (datum.getModifiedTime() > 0) {
        get.setRequestHeader("If-Modified-Since", HttpDateFormat.toString(datum.getModifiedTime()));
    }

    // Set HTTP parameters
    HttpMethodParams params = get.getParams();
    if (http.getUseHttp11()) {
        params.setVersion(HttpVersion.HTTP_1_1);
    } else {
        params.setVersion(HttpVersion.HTTP_1_0);
    }
    params.makeLenient();
    params.setContentCharset("UTF-8");
    params.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
    params.setBooleanParameter(HttpMethodParams.SINGLE_COOKIE_HEADER, true);
    // XXX (ab) not sure about this... the default is to retry 3 times; if
    // XXX the request body was sent the method is not retried, so there is
    // XXX little danger in retrying...
    // params.setParameter(HttpMethodParams.RETRY_HANDLER, null);
    try {
        code = Http.getClient().executeMethod(get);

        Header[] heads = get.getResponseHeaders();

        for (int i = 0; i < heads.length; i++) {
            headers.set(heads[i].getName(), heads[i].getValue());
        }

        // Limit download size
        int contentLength = Integer.MAX_VALUE;
        String contentLengthString = headers.get(Response.CONTENT_LENGTH);
        if (contentLengthString != null) {
            try {
                contentLength = Integer.parseInt(contentLengthString.trim());
            } catch (NumberFormatException ex) {
                throw new HttpException("bad content length: " + contentLengthString);
            }
        }
        if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) {
            contentLength = http.getMaxContent();
        }

        // always read content. Sometimes content is useful to find a cause
        // for error.
        InputStream in = get.getResponseBodyAsStream();
        try {
            byte[] buffer = new byte[HttpBase.BUFFER_SIZE];
            int bufferFilled = 0;
            int totalRead = 0;
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            while ((bufferFilled = in.read(buffer, 0, buffer.length)) != -1
                    && totalRead + bufferFilled <= contentLength) {
                totalRead += bufferFilled;
                out.write(buffer, 0, bufferFilled);
            }

            content = out.toByteArray();
        } catch (Exception e) {
            if (code == 200)
                throw new IOException(e.toString());
            // for codes other than 200 OK, we are fine with empty content
        } finally {
            if (in != null) {
                in.close();
            }
            get.abort();
        }

        StringBuilder fetchTrace = null;
        if (Http.LOG.isTraceEnabled()) {
            // Trace message
            fetchTrace = new StringBuilder(
                    "url: " + url + "; status code: " + code + "; bytes received: " + content.length);
            if (getHeader(Response.CONTENT_LENGTH) != null)
                fetchTrace.append("; Content-Length: " + getHeader(Response.CONTENT_LENGTH));
            if (getHeader(Response.LOCATION) != null)
                fetchTrace.append("; Location: " + getHeader(Response.LOCATION));
        }
        // Extract gzip, x-gzip and deflate content
        if (content != null) {
            // check if we have to uncompress it
            String contentEncoding = headers.get(Response.CONTENT_ENCODING);
            if (contentEncoding != null && Http.LOG.isTraceEnabled())
                fetchTrace.append("; Content-Encoding: " + contentEncoding);
            if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
                content = http.processGzipEncoded(content, url);
                if (Http.LOG.isTraceEnabled())
                    fetchTrace.append("; extracted to " + content.length + " bytes");
            } else if ("deflate".equals(contentEncoding)) {
                content = http.processDeflateEncoded(content, url);
                if (Http.LOG.isTraceEnabled())
                    fetchTrace.append("; extracted to " + content.length + " bytes");
            }
        }

        // Logger trace message
        if (Http.LOG.isTraceEnabled()) {
            Http.LOG.trace(fetchTrace.toString());
        }
    } finally {
        get.releaseConnection();
    }
}

From source file:org.apache.nutch.protocol.httpclient.proxy.HttpResponse.java

/**
 * Fetches the given <code>url</code> and prepares HTTP response.
 * /*from w  w w .  jav a2  s .  com*/
 * @param http
 *          An instance of the implementation class of this plugin
 * @param url
 *          URL to be fetched
 * @param datum
 *          Crawl data
 * @param followRedirects
 *          Whether to follow redirects; follows redirect if and only if this
 *          is true
 * @return HTTP response
 * @throws IOException
 *           When an error occurs
 */
HttpResponse(Http http, URL url, CrawlDatum datum, boolean followRedirects) throws IOException {

    // Prepare GET method for HTTP request
    this.url = url;
    GetMethod get = new GetMethod(url.toString());
    get.setFollowRedirects(followRedirects);
    get.setDoAuthentication(true);
    if (http.isIfModifiedSinceEnabled() && datum.getModifiedTime() > 0) {
        get.setRequestHeader("If-Modified-Since", HttpDateFormat.toString(datum.getModifiedTime()));
    }

    // Set HTTP parameters
    HttpMethodParams params = get.getParams();
    if (http.getUseHttp11()) {
        params.setVersion(HttpVersion.HTTP_1_1);
    } else {
        params.setVersion(HttpVersion.HTTP_1_0);
    }
    params.makeLenient();
    params.setContentCharset("UTF-8");
    params.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
    params.setBooleanParameter(HttpMethodParams.SINGLE_COOKIE_HEADER, true);
    // XXX (ab) not sure about this... the default is to retry 3 times; if
    // XXX the request body was sent the method is not retried, so there is
    // XXX little danger in retrying...
    // params.setParameter(HttpMethodParams.RETRY_HANDLER, null);
    try {
        HttpClient client = Http.getClient();
        client.getParams().setParameter("http.useragent", http.getUserAgent()); // NUTCH-1941
        code = client.executeMethod(get);

        Header[] heads = get.getResponseHeaders();

        for (int i = 0; i < heads.length; i++) {
            headers.set(heads[i].getName(), heads[i].getValue());
        }

        // Limit download size
        int contentLength = Integer.MAX_VALUE;
        String contentLengthString = headers.get(Response.CONTENT_LENGTH);
        if (contentLengthString != null) {
            try {
                contentLength = Integer.parseInt(contentLengthString.trim());
            } catch (NumberFormatException ex) {
                throw new HttpException("bad content length: " + contentLengthString);
            }
        }
        if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) {
            contentLength = http.getMaxContent();
        }

        // always read content. Sometimes content is useful to find a cause
        // for error.
        InputStream in = get.getResponseBodyAsStream();
        try {
            byte[] buffer = new byte[HttpBase.BUFFER_SIZE];
            int bufferFilled = 0;
            int totalRead = 0;
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            while ((bufferFilled = in.read(buffer, 0, buffer.length)) != -1
                    && totalRead + bufferFilled <= contentLength) {
                totalRead += bufferFilled;
                out.write(buffer, 0, bufferFilled);
            }

            content = out.toByteArray();
        } catch (Exception e) {
            if (code == 200)
                throw new IOException(e.toString());
            // for codes other than 200 OK, we are fine with empty content
        } finally {
            if (in != null) {
                in.close();
            }
            get.abort();
        }

        StringBuilder fetchTrace = null;
        if (Http.LOG.isTraceEnabled()) {
            // Trace message
            fetchTrace = new StringBuilder(
                    "url: " + url + "; status code: " + code + "; bytes received: " + content.length);
            if (getHeader(Response.CONTENT_LENGTH) != null)
                fetchTrace.append("; Content-Length: " + getHeader(Response.CONTENT_LENGTH));
            if (getHeader(Response.LOCATION) != null)
                fetchTrace.append("; Location: " + getHeader(Response.LOCATION));
        }
        // Extract gzip, x-gzip and deflate content
        if (content != null) {
            // check if we have to uncompress it
            String contentEncoding = headers.get(Response.CONTENT_ENCODING);
            if (contentEncoding != null && Http.LOG.isTraceEnabled())
                fetchTrace.append("; Content-Encoding: " + contentEncoding);
            if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
                content = http.processGzipEncoded(content, url);
                if (Http.LOG.isTraceEnabled())
                    fetchTrace.append("; extracted to " + content.length + " bytes");
            } else if ("deflate".equals(contentEncoding)) {
                content = http.processDeflateEncoded(content, url);
                if (Http.LOG.isTraceEnabled())
                    fetchTrace.append("; extracted to " + content.length + " bytes");
            }
        }

        // Logger trace message
        if (Http.LOG.isTraceEnabled()) {
            Http.LOG.trace(fetchTrace.toString());
        }
    } finally {
        get.releaseConnection();
    }
}

From source file:org.apache.nutch.protocol.webdriver.HttpResponse.java

/**
 * Fetches the given <code>url</code> and prepares HTTP response. Fetch the
 * content using WebDriver to extract HTML from Ajax site, other responses are
 * fetches using HTTPClient.//from   w ww .j  a v  a  2 s  .  c  o  m
 * 
 * @param http
 *          An instance of the implementation class of this plugin
 * @param url
 *          URL to be fetched
 * @param page
 *          WebPage
 * @param followRedirects
 *          Whether to follow redirects; follows redirect if and only if this
 *          is true
 * @return HTTP response
 * @throws IOException
 *           When an error occurs
 */
HttpResponse(Http http, URL url, WebPage page, Configuration conf) throws IOException {

    // Prepare GET method for HTTP request
    this.url = url;
    this.conf = conf;
    GetMethod get = new GetMethod(url.toString());
    get.setFollowRedirects(false);
    get.setDoAuthentication(true);
    if (page.getModifiedTime() > 0) {
        get.setRequestHeader("If-Modified-Since", HttpDateFormat.toString(page.getModifiedTime()));
    }

    // Set HTTP parameters
    HttpMethodParams params = get.getParams();
    if (http.getUseHttp11()) {
        params.setVersion(HttpVersion.HTTP_1_1);
    } else {
        params.setVersion(HttpVersion.HTTP_1_0);
    }
    params.makeLenient();
    params.setContentCharset("UTF-8");

    try {
        HttpClient client = Http.getClient();
        client.getParams().setParameter("http.useragent", http.getUserAgent()); // NUTCH-1941
        code = client.executeMethod(get);

        Header[] heads = get.getResponseHeaders();

        for (int i = 0; i < heads.length; i++) {
            headers.set(heads[i].getName(), heads[i].getValue());
        }

        readPlainContent(url);

        StringBuilder fetchTrace = null;
        if (Http.LOG.isTraceEnabled()) {
            // Trace message
            fetchTrace = new StringBuilder(
                    "url: " + url + "; status code: " + code + "; bytes received: " + content.length);
            if (getHeader(Response.CONTENT_LENGTH) != null)
                fetchTrace.append("; Content-Length: " + getHeader(Response.CONTENT_LENGTH));
            if (getHeader(Response.LOCATION) != null)
                fetchTrace.append("; Location: " + getHeader(Response.LOCATION));
        }
        // add headers in metadata to row
        if (page.getHeaders() != null) {
            page.getHeaders().clear();
        }
        for (String key : headers.names()) {
            page.getHeaders().put(new Utf8(key), new Utf8(headers.get(key)));
        }

        // Logger trace message
        if (Http.LOG.isTraceEnabled()) {
            Http.LOG.trace(fetchTrace.toString());
        }
    } finally {
        get.releaseConnection();
    }
}

From source file:org.eclipse.smila.connectivity.framework.crawler.web.http.HttpResponse.java

/**
 * Sets the http parameters.// w  ww .  j  a  va2s .  c  om
 * 
 * @param http
 *          the http
 * @param httpMethod
 *          the http method
 */
private void setHttpParameters(HttpBase http, HttpMethodBase httpMethod) {
    httpMethod.setFollowRedirects(false);
    httpMethod.setRequestHeader("User-Agent", http.getUserAgent());
    httpMethod.setRequestHeader("Referer", http.getReferer());

    httpMethod.setDoAuthentication(true);

    for (Header header : http.getHeaders()) {
        httpMethod.addRequestHeader(header);
    }

    final HttpMethodParams params = httpMethod.getParams();
    if (http.getUseHttp11()) {
        params.setVersion(HttpVersion.HTTP_1_1);
    } else {
        params.setVersion(HttpVersion.HTTP_1_0);
    }
    params.makeLenient();
    params.setContentCharset("UTF-8");

    if (http.isCookiesEnabled()) {
        params.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
    } else {
        params.setCookiePolicy(CookiePolicy.IGNORE_COOKIES);
    }
    params.setBooleanParameter(HttpMethodParams.SINGLE_COOKIE_HEADER, true);
    // the default is to retry 3 times; if
    // the request body was sent the method is not retried, so there is
    // little danger in retrying
    // retries are handled on the higher level
    params.setParameter(HttpMethodParams.RETRY_HANDLER, null);
}