List of usage examples for org.apache.commons.httpclient.params HttpMethodParams setVersion
public void setVersion(HttpVersion paramHttpVersion)
From source file:com.iflytek.spider.protocol.httpclient.HttpResponseSmiply.java
/** * Fetches the given <code>url</code> and prepares HTTP response. * //from w w w .j ava 2s .c o m * @param http * An instance of the implementation class of this plugin * @param url * URL to be fetched * @param datum * Crawl data * @param followRedirects * Whether to follow redirects; follows redirect if and only if * this is true * @return HTTP response * @throws IOException * When an error occurs */ HttpResponseSmiply(HttpSimply http, URL url, CrawlDatum datum, boolean followRedirects) throws IOException { // Prepare GET method for HTTP request this.url = url; GetMethod get = new GetMethod(url.toString()); get.setFollowRedirects(followRedirects); get.setDoAuthentication(true); if (datum.getModifiedTime() > 0) { get.setRequestHeader("If-Modified-Since", HttpDateFormat.toString(datum.getModifiedTime())); } // Set HTTP parameters HttpMethodParams params = get.getParams(); if (http.getUseHttp11()) { params.setVersion(HttpVersion.HTTP_1_1); } else { params.setVersion(HttpVersion.HTTP_1_0); } params.makeLenient(); params.setContentCharset("UTF-8"); params.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY); params.setBooleanParameter(HttpMethodParams.SINGLE_COOKIE_HEADER, true); // XXX (ab) not sure about this... the default is to retry 3 times; if // XXX the request body was sent the method is not retried, so there is // XXX little danger in retrying... // params.setParameter(HttpMethodParams.RETRY_HANDLER, null); try { code = HttpSimply.getClient().executeMethod(get); Header[] heads = get.getResponseHeaders(); for (int i = 0; i < heads.length; i++) { headers.set(heads[i].getName(), heads[i].getValue()); } // Limit download size int contentLength = Integer.MAX_VALUE; String contentLengthString = headers.get(Response.CONTENT_LENGTH); if (contentLengthString != null) { try { contentLength = Integer.parseInt(contentLengthString.trim()); } catch (NumberFormatException ex) { throw new HttpException("bad content length: " + contentLengthString); } } if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) { contentLength = http.getMaxContent(); } // always read content. Sometimes content is useful to find a cause // for error. InputStream in = get.getResponseBodyAsStream(); try { byte[] buffer = new byte[HttpBaseSimply.BUFFER_SIZE]; int bufferFilled = 0; int totalRead = 0; ByteArrayOutputStream out = new ByteArrayOutputStream(); while ((bufferFilled = in.read(buffer, 0, buffer.length)) != -1 && totalRead < contentLength) { totalRead += bufferFilled; out.write(buffer, 0, bufferFilled); } content = out.toByteArray(); } catch (Exception e) { if (code == 200) throw new IOException(e.toString()); // for codes other than 200 OK, we are fine with empty content } finally { in.close(); get.abort(); } StringBuilder fetchTrace = null; if (Http.LOG.isTraceEnabled()) { // Trace message fetchTrace = new StringBuilder( "url: " + url + "; status code: " + code + "; bytes received: " + content.length); if (getHeader(Response.CONTENT_LENGTH) != null) fetchTrace.append("; Content-Length: " + getHeader(Response.CONTENT_LENGTH)); if (getHeader(Response.LOCATION) != null) fetchTrace.append("; Location: " + getHeader(Response.LOCATION)); } // Extract gzip, x-gzip and deflate content if (content != null) { // check if we have to uncompress it String contentEncoding = headers.get(Response.CONTENT_ENCODING); if (contentEncoding != null && Http.LOG.isTraceEnabled()) fetchTrace.append("; Content-Encoding: " + contentEncoding); if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) { content = http.processGzipEncoded(content, url); if (Http.LOG.isTraceEnabled()) fetchTrace.append("; extracted to " + content.length + " bytes"); } else if ("deflate".equals(contentEncoding)) { content = http.processDeflateEncoded(content, url); if (Http.LOG.isTraceEnabled()) fetchTrace.append("; extracted to " + content.length + " bytes"); } } // Log trace message if (Http.LOG.isTraceEnabled()) { Http.LOG.trace(fetchTrace); } } finally { get.releaseConnection(); } }
From source file:com.iflytek.spider.protocol.httpclient.HttpResponse.java
/** * Fetches the given <code>url</code> and prepares HTTP response. * /*from w w w. ja va 2 s . c o m*/ * @param http * An instance of the implementation class of this plugin * @param url * URL to be fetched * @param datum * Crawl data * @param followRedirects * Whether to follow redirects; follows redirect if and only if * this is true * @return HTTP response * @throws IOException * When an error occurs */ HttpResponse(Http http, URL url, CrawlDatum datum, boolean followRedirects) throws IOException { // Prepare GET method for HTTP request this.url = url; GetMethod get = new GetMethod(url.toString()); get.setFollowRedirects(followRedirects); get.setDoAuthentication(true); if (datum.getModifiedTime() > 0) { get.setRequestHeader("If-Modified-Since", HttpDateFormat.toString(datum.getModifiedTime())); } // Set HTTP parameters HttpMethodParams params = get.getParams(); if (http.getUseHttp11()) { params.setVersion(HttpVersion.HTTP_1_1); } else { params.setVersion(HttpVersion.HTTP_1_0); } params.makeLenient(); params.setContentCharset("UTF-8"); params.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY); params.setBooleanParameter(HttpMethodParams.SINGLE_COOKIE_HEADER, true); // XXX (ab) not sure about this... the default is to retry 3 times; if // XXX the request body was sent the method is not retried, so there is // XXX little danger in retrying... // params.setParameter(HttpMethodParams.RETRY_HANDLER, null); try { code = Http.getClient().executeMethod(get); Header[] heads = get.getResponseHeaders(); for (int i = 0; i < heads.length; i++) { headers.set(heads[i].getName(), heads[i].getValue()); } // Limit download size int contentLength = Integer.MAX_VALUE; String contentLengthString = headers.get(Response.CONTENT_LENGTH); if (contentLengthString != null) { try { contentLength = Integer.parseInt(contentLengthString.trim()); } catch (NumberFormatException ex) { throw new HttpException("bad content length: " + contentLengthString); } } if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) { contentLength = http.getMaxContent(); } // always read content. Sometimes content is useful to find a cause // for error. InputStream in = get.getResponseBodyAsStream(); try { byte[] buffer = new byte[HttpBase.BUFFER_SIZE]; int bufferFilled = 0; int totalRead = 0; ByteArrayOutputStream out = new ByteArrayOutputStream(); while ((bufferFilled = in.read(buffer, 0, buffer.length)) != -1 && totalRead < contentLength) { totalRead += bufferFilled; out.write(buffer, 0, bufferFilled); } content = out.toByteArray(); } catch (Exception e) { if (code == 200) throw new IOException(e.toString()); // for codes other than 200 OK, we are fine with empty content } finally { in.close(); get.abort(); } StringBuilder fetchTrace = null; if (Http.LOG.isTraceEnabled()) { // Trace message fetchTrace = new StringBuilder( "url: " + url + "; status code: " + code + "; bytes received: " + content.length); if (getHeader(Response.CONTENT_LENGTH) != null) fetchTrace.append("; Content-Length: " + getHeader(Response.CONTENT_LENGTH)); if (getHeader(Response.LOCATION) != null) fetchTrace.append("; Location: " + getHeader(Response.LOCATION)); } // Extract gzip, x-gzip and deflate content if (content != null) { // check if we have to uncompress it String contentEncoding = headers.get(Response.CONTENT_ENCODING); if (contentEncoding != null && Http.LOG.isTraceEnabled()) fetchTrace.append("; Content-Encoding: " + contentEncoding); if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) { content = http.processGzipEncoded(content, url); if (Http.LOG.isTraceEnabled()) fetchTrace.append("; extracted to " + content.length + " bytes"); } else if ("deflate".equals(contentEncoding)) { content = http.processDeflateEncoded(content, url); if (Http.LOG.isTraceEnabled()) fetchTrace.append("; extracted to " + content.length + " bytes"); } } // Log trace message if (Http.LOG.isTraceEnabled()) { Http.LOG.trace(fetchTrace); } } finally { get.releaseConnection(); } }
From source file:de.innovationgate.wgpublisher.webtml.Include.java
private void performURLInclude() throws TMLException { Status status = (Status) getStatus(); try {//from ww w. j ava 2s . co m HttpClient client = WGFactory.getHttpClientFactory().createHttpClient(); HttpMethodParams methodParams = new HttpMethodParams(); methodParams.setSoTimeout(Integer.parseInt(getTimeout())); methodParams.setVersion(HttpVersion.HTTP_1_1); GetMethod getMethod = new GetMethod(); getMethod.setURI(new URI(status.ref, false)); getMethod.setParams(methodParams); getMethod.setFollowRedirects(true); int httpStatus = client.executeMethod(getMethod); if (httpStatus != HttpServletResponse.SC_OK) { throw new TMLException("Response status " + httpStatus + " (" + getMethod.getStatusText() + ") for included URL " + ref, true); } String encoding = getEncoding(); if (encoding == null) { encoding = getMethod.getResponseCharSet(); if (encoding == null) { getTMLContext().addwarning("No encoding returned from URL '" + status.ref + "'. Assuming default encoding " + encoding); encoding = getCore().getCharacterEncoding(); } } Reader reader = new InputStreamReader(getMethod.getResponseBodyAsStream(), encoding); StringWriter writer = new StringWriter(); char[] buf = new char[2048]; long count = 0; String limitStr = getLimit(); long charLimit = 0; try { charLimit = Math.round(1024 * 1024 * Double.parseDouble(limitStr)); } catch (NumberFormatException e) { throw new TMLException("Cannot parse limit attribute as number: " + limitStr); } int len; while ((len = reader.read(buf)) != -1) { writer.write(buf, 0, len); count += len; if (charLimit != 0 && count > charLimit) { throw new TMLException("Include of URL '" + status.ref + "' reaches content limit of " + limitStr + " million characters. Include is cancelled."); } } this.setResult(writer.toString()); } catch (java.io.IOException exc) { log.error("Exception including url", exc); this.addWarning("Exception while including url: " + exc.getMessage()); } }
From source file:com.zimbra.common.soap.SoapHttpTransport.java
public Element invoke(Element document, boolean raw, boolean noSession, String requestedAccountId, String changeToken, String tokenType, ResponseHandler respHandler) throws IOException, HttpException, ServiceException { PostMethod method = null;//from w w w .j av a 2 s.com try { // Assemble post method. Append document name, so that the request // type is written to the access log. String uri, query; int i = mUri.indexOf('?'); if (i >= 0) { uri = mUri.substring(0, i); query = mUri.substring(i); } else { uri = mUri; query = ""; } if (!uri.endsWith("/")) uri += '/'; uri += getDocumentName(document); method = new PostMethod(uri + query); // Set user agent if it's specified. String agentName = getUserAgentName(); if (agentName != null) { String agentVersion = getUserAgentVersion(); if (agentVersion != null) agentName += " " + agentVersion; method.setRequestHeader(new Header("User-Agent", agentName)); } // the content-type charset will determine encoding used // when we set the request body method.setRequestHeader("Content-Type", getRequestProtocol().getContentType()); if (getClientIp() != null) { method.setRequestHeader(RemoteIP.X_ORIGINATING_IP_HEADER, getClientIp()); if (ZimbraLog.misc.isDebugEnabled()) { ZimbraLog.misc.debug("set remote IP header [%s] to [%s]", RemoteIP.X_ORIGINATING_IP_HEADER, getClientIp()); } } Element soapReq = generateSoapMessage(document, raw, noSession, requestedAccountId, changeToken, tokenType); String soapMessage = SoapProtocol.toString(soapReq, getPrettyPrint()); HttpMethodParams params = method.getParams(); method.setRequestEntity(new StringRequestEntity(soapMessage, null, "UTF-8")); if (getRequestProtocol().hasSOAPActionHeader()) method.setRequestHeader("SOAPAction", mUri); if (mCustomHeaders != null) { for (Map.Entry<String, String> entry : mCustomHeaders.entrySet()) method.setRequestHeader(entry.getKey(), entry.getValue()); } String host = method.getURI().getHost(); HttpState state = HttpClientUtil.newHttpState(getAuthToken(), host, this.isAdmin()); String trustedToken = getTrustedToken(); if (trustedToken != null) { state.addCookie( new Cookie(host, ZimbraCookie.COOKIE_ZM_TRUST_TOKEN, trustedToken, "/", null, false)); } params.setCookiePolicy(state.getCookies().length == 0 ? CookiePolicy.IGNORE_COOKIES : CookiePolicy.BROWSER_COMPATIBILITY); params.setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(mRetryCount - 1, true)); params.setSoTimeout(mTimeout); params.setVersion(HttpVersion.HTTP_1_1); method.setRequestHeader("Connection", mKeepAlive ? "Keep-alive" : "Close"); if (mHostConfig != null && mHostConfig.getUsername() != null && mHostConfig.getPassword() != null) { state.setProxyCredentials(new AuthScope(null, -1), new UsernamePasswordCredentials(mHostConfig.getUsername(), mHostConfig.getPassword())); } if (mHttpDebugListener != null) { mHttpDebugListener.sendSoapMessage(method, soapReq, state); } int responseCode = mClient.executeMethod(mHostConfig, method, state); // SOAP allows for "200" on success and "500" on failure; // real server issues will probably be "503" or "404" if (responseCode != HttpServletResponse.SC_OK && responseCode != HttpServletResponse.SC_INTERNAL_SERVER_ERROR) throw ServiceException.PROXY_ERROR(method.getStatusLine().toString(), uri); // Read the response body. Use the stream API instead of the byte[] // version to avoid HTTPClient whining about a large response. InputStreamReader reader = new InputStreamReader(method.getResponseBodyAsStream(), SoapProtocol.getCharset()); String responseStr = ""; try { if (respHandler != null) { respHandler.process(reader); return null; } else { responseStr = ByteUtil.getContent(reader, (int) method.getResponseContentLength(), false); Element soapResp = parseSoapResponse(responseStr, raw); if (mHttpDebugListener != null) { mHttpDebugListener.receiveSoapMessage(method, soapResp); } return soapResp; } } catch (SoapFaultException x) { // attach request/response to the exception and rethrow x.setFaultRequest(soapMessage); x.setFaultResponse(responseStr.substring(0, Math.min(10240, responseStr.length()))); throw x; } } finally { // Release the connection to the connection manager if (method != null) method.releaseConnection(); // really not necessary if running in the server because the reaper thread // of our connection manager will take care it. // if called from CLI, all connections will be closed when the CLI // exits. Leave it here anyway. if (!mKeepAlive) mClient.getHttpConnectionManager().closeIdleConnections(0); } }
From source file:org.apache.camel.component.http.HttpProducer.java
public void process(Exchange exchange) throws Exception { // if we bridge endpoint then we need to skip matching headers with the HTTP_QUERY to avoid sending // duplicated headers to the receiver, so use this skipRequestHeaders as the list of headers to skip Map<String, Object> skipRequestHeaders = null; if (getEndpoint().isBridgeEndpoint()) { exchange.setProperty(Exchange.SKIP_GZIP_ENCODING, Boolean.TRUE); String queryString = exchange.getIn().getHeader(Exchange.HTTP_QUERY, String.class); if (queryString != null) { skipRequestHeaders = URISupport.parseQuery(queryString); }//from w w w. j ava2s . c o m // Need to remove the Host key as it should be not used exchange.getIn().getHeaders().remove("host"); } HttpMethod method = createMethod(exchange); Message in = exchange.getIn(); String httpProtocolVersion = in.getHeader(Exchange.HTTP_PROTOCOL_VERSION, String.class); if (httpProtocolVersion != null) { // set the HTTP protocol version HttpMethodParams params = method.getParams(); params.setVersion(HttpVersion.parse(httpProtocolVersion)); } HeaderFilterStrategy strategy = getEndpoint().getHeaderFilterStrategy(); // propagate headers as HTTP headers for (Map.Entry<String, Object> entry : in.getHeaders().entrySet()) { String key = entry.getKey(); Object headerValue = in.getHeader(key); if (headerValue != null) { // use an iterator as there can be multiple values. (must not use a delimiter, and allow empty values) final Iterator<?> it = ObjectHelper.createIterator(headerValue, null, true); // the value to add as request header final List<String> values = new ArrayList<String>(); // if its a multi value then check each value if we can add it and for multi values they // should be combined into a single value while (it.hasNext()) { String value = exchange.getContext().getTypeConverter().convertTo(String.class, it.next()); // we should not add headers for the parameters in the uri if we bridge the endpoint // as then we would duplicate headers on both the endpoint uri, and in HTTP headers as well if (skipRequestHeaders != null && skipRequestHeaders.containsKey(key)) { continue; } if (value != null && strategy != null && !strategy.applyFilterToCamelHeaders(key, value, exchange)) { values.add(value); } } // add the value(s) as a http request header if (values.size() > 0) { // use the default toString of a ArrayList to create in the form [xxx, yyy] // if multi valued, for a single value, then just output the value as is String s = values.size() > 1 ? values.toString() : values.get(0); method.addRequestHeader(key, s); } } } // lets store the result in the output message. try { if (LOG.isDebugEnabled()) { LOG.debug("Executing http {} method: {}", method.getName(), method.getURI().toString()); } int responseCode = executeMethod(method); LOG.debug("Http responseCode: {}", responseCode); if (!throwException) { // if we do not use failed exception then populate response for all response codes populateResponse(exchange, method, in, strategy, responseCode); } else { if (responseCode >= 100 && responseCode < 300) { // only populate response for OK response populateResponse(exchange, method, in, strategy, responseCode); } else { // operation failed so populate exception to throw throw populateHttpOperationFailedException(exchange, method, responseCode); } } } finally { method.releaseConnection(); } }
From source file:org.apache.nutch.protocol.httpclient.HttpResponse.java
/** * Fetches the given <code>url</code> and prepares HTTP response. * /*from w w w . j ava 2 s . co m*/ * @param http * An instance of the implementation class of this plugin * @param url * URL to be fetched * @param datum * Crawl data * @param followRedirects * Whether to follow redirects; follows redirect if and only if this * is true * @return HTTP response * @throws IOException * When an error occurs */ HttpResponse(Http http, URL url, CrawlDatum datum, boolean followRedirects) throws IOException { // Prepare GET method for HTTP request this.url = url; GetMethod get = new GetMethod(url.toString()); get.setFollowRedirects(followRedirects); get.setDoAuthentication(true); if (http.isIfModifiedSinceEnabled() && datum.getModifiedTime() > 0) { get.setRequestHeader("If-Modified-Since", HttpDateFormat.toString(datum.getModifiedTime())); } // Set HTTP parameters HttpMethodParams params = get.getParams(); if (http.getUseHttp11()) { params.setVersion(HttpVersion.HTTP_1_1); } else { params.setVersion(HttpVersion.HTTP_1_0); } params.makeLenient(); params.setContentCharset("UTF-8"); if (http.isCookieEnabled()) { params.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY); params.setBooleanParameter(HttpMethodParams.SINGLE_COOKIE_HEADER, true); } else { params.setCookiePolicy(CookiePolicy.IGNORE_COOKIES); } // XXX (ab) not sure about this... the default is to retry 3 times; if // XXX the request body was sent the method is not retried, so there is // XXX little danger in retrying... // params.setParameter(HttpMethodParams.RETRY_HANDLER, null); if (http.isCookieEnabled() && datum.getMetaData().containsKey(http.COOKIE)) { String cookie = ((Text) datum.getMetaData().get(http.COOKIE)).toString(); get.addRequestHeader("Cookie", cookie); } try { HttpClient client = Http.getClient(); client.getParams().setParameter("http.useragent", http.getUserAgent()); // NUTCH-1941 code = client.executeMethod(get); Header[] heads = get.getResponseHeaders(); for (int i = 0; i < heads.length; i++) { headers.set(heads[i].getName(), heads[i].getValue()); } // Limit download size int contentLength = Integer.MAX_VALUE; String contentLengthString = headers.get(Response.CONTENT_LENGTH); if (contentLengthString != null) { try { contentLength = Integer.parseInt(contentLengthString.trim()); } catch (NumberFormatException ex) { throw new HttpException("bad content length: " + contentLengthString); } } if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) { contentLength = http.getMaxContent(); } // always read content. Sometimes content is useful to find a cause // for error. InputStream in = get.getResponseBodyAsStream(); try { byte[] buffer = new byte[HttpBase.BUFFER_SIZE]; int bufferFilled = 0; int totalRead = 0; ByteArrayOutputStream out = new ByteArrayOutputStream(); while ((bufferFilled = in.read(buffer, 0, buffer.length)) != -1 && totalRead + bufferFilled <= contentLength) { totalRead += bufferFilled; out.write(buffer, 0, bufferFilled); } content = out.toByteArray(); } catch (Exception e) { if (code == 200) throw new IOException(e.toString()); // for codes other than 200 OK, we are fine with empty content } finally { if (in != null) { in.close(); } get.abort(); } StringBuilder fetchTrace = null; if (Http.LOG.isTraceEnabled()) { // Trace message fetchTrace = new StringBuilder( "url: " + url + "; status code: " + code + "; bytes received: " + content.length); if (getHeader(Response.CONTENT_LENGTH) != null) fetchTrace.append("; Content-Length: " + getHeader(Response.CONTENT_LENGTH)); if (getHeader(Response.LOCATION) != null) fetchTrace.append("; Location: " + getHeader(Response.LOCATION)); } // Extract gzip, x-gzip and deflate content if (content != null) { // check if we have to uncompress it String contentEncoding = headers.get(Response.CONTENT_ENCODING); if (contentEncoding != null && Http.LOG.isTraceEnabled()) fetchTrace.append("; Content-Encoding: " + contentEncoding); if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) { content = http.processGzipEncoded(content, url); if (Http.LOG.isTraceEnabled()) fetchTrace.append("; extracted to " + content.length + " bytes"); } else if ("deflate".equals(contentEncoding)) { content = http.processDeflateEncoded(content, url); if (Http.LOG.isTraceEnabled()) fetchTrace.append("; extracted to " + content.length + " bytes"); } } // Logger trace message if (Http.LOG.isTraceEnabled()) { Http.LOG.trace(fetchTrace.toString()); } } finally { get.releaseConnection(); } }
From source file:org.apache.nutch.protocol.httpclient.HttpResponseBak.java
/** * Fetches the given <code>url</code> and prepares HTTP response. * * @param http An instance of the implementation class * of this plugin * @param url URL to be fetched * @param datum Crawl data * @param followRedirects Whether to follow redirects; follows * redirect if and only if this is true * @return HTTP response * @throws IOException When an error occurs */// w w w .j a v a 2s. c o m HttpResponseBak(HttpBak http, URL url, CrawlDatum datum, boolean followRedirects) throws IOException { // Prepare GET method for HTTP request this.url = url; GetMethod get = new GetMethod(url.toString()); get.setFollowRedirects(followRedirects); get.setDoAuthentication(true); if (datum.getModifiedTime() > 0) { get.setRequestHeader("If-Modified-Since", HttpDateFormat.toString(datum.getModifiedTime())); } // Set HTTP parameters HttpMethodParams params = get.getParams(); if (http.getUseHttp11()) { params.setVersion(HttpVersion.HTTP_1_1); } else { params.setVersion(HttpVersion.HTTP_1_0); } params.makeLenient(); params.setContentCharset("UTF-8"); params.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY); params.setBooleanParameter(HttpMethodParams.SINGLE_COOKIE_HEADER, true); // XXX (ab) not sure about this... the default is to retry 3 times; if // XXX the request body was sent the method is not retried, so there is // XXX little danger in retrying... // params.setParameter(HttpMethodParams.RETRY_HANDLER, null); try { code = Http.getClient().executeMethod(get); Header[] heads = get.getResponseHeaders(); for (int i = 0; i < heads.length; i++) { headers.set(heads[i].getName(), heads[i].getValue()); } // Limit download size int contentLength = Integer.MAX_VALUE; String contentLengthString = headers.get(Response.CONTENT_LENGTH); if (contentLengthString != null) { try { contentLength = Integer.parseInt(contentLengthString.trim()); } catch (NumberFormatException ex) { throw new HttpException("bad content length: " + contentLengthString); } } if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) { contentLength = http.getMaxContent(); } // always read content. Sometimes content is useful to find a cause // for error. InputStream in = get.getResponseBodyAsStream(); try { byte[] buffer = new byte[HttpBase.BUFFER_SIZE]; int bufferFilled = 0; int totalRead = 0; ByteArrayOutputStream out = new ByteArrayOutputStream(); while ((bufferFilled = in.read(buffer, 0, buffer.length)) != -1 && totalRead + bufferFilled <= contentLength) { totalRead += bufferFilled; out.write(buffer, 0, bufferFilled); } content = out.toByteArray(); } catch (Exception e) { if (code == 200) throw new IOException(e.toString()); // for codes other than 200 OK, we are fine with empty content } finally { if (in != null) { in.close(); } get.abort(); } StringBuilder fetchTrace = null; if (Http.LOG.isTraceEnabled()) { // Trace message fetchTrace = new StringBuilder( "url: " + url + "; status code: " + code + "; bytes received: " + content.length); if (getHeader(Response.CONTENT_LENGTH) != null) fetchTrace.append("; Content-Length: " + getHeader(Response.CONTENT_LENGTH)); if (getHeader(Response.LOCATION) != null) fetchTrace.append("; Location: " + getHeader(Response.LOCATION)); } // Extract gzip, x-gzip and deflate content if (content != null) { // check if we have to uncompress it String contentEncoding = headers.get(Response.CONTENT_ENCODING); if (contentEncoding != null && Http.LOG.isTraceEnabled()) fetchTrace.append("; Content-Encoding: " + contentEncoding); if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) { content = http.processGzipEncoded(content, url); if (Http.LOG.isTraceEnabled()) fetchTrace.append("; extracted to " + content.length + " bytes"); } else if ("deflate".equals(contentEncoding)) { content = http.processDeflateEncoded(content, url); if (Http.LOG.isTraceEnabled()) fetchTrace.append("; extracted to " + content.length + " bytes"); } } // Logger trace message if (Http.LOG.isTraceEnabled()) { Http.LOG.trace(fetchTrace.toString()); } } finally { get.releaseConnection(); } }
From source file:org.apache.nutch.protocol.httpclient.proxy.HttpResponse.java
/** * Fetches the given <code>url</code> and prepares HTTP response. * /*from w w w . jav a2 s . com*/ * @param http * An instance of the implementation class of this plugin * @param url * URL to be fetched * @param datum * Crawl data * @param followRedirects * Whether to follow redirects; follows redirect if and only if this * is true * @return HTTP response * @throws IOException * When an error occurs */ HttpResponse(Http http, URL url, CrawlDatum datum, boolean followRedirects) throws IOException { // Prepare GET method for HTTP request this.url = url; GetMethod get = new GetMethod(url.toString()); get.setFollowRedirects(followRedirects); get.setDoAuthentication(true); if (http.isIfModifiedSinceEnabled() && datum.getModifiedTime() > 0) { get.setRequestHeader("If-Modified-Since", HttpDateFormat.toString(datum.getModifiedTime())); } // Set HTTP parameters HttpMethodParams params = get.getParams(); if (http.getUseHttp11()) { params.setVersion(HttpVersion.HTTP_1_1); } else { params.setVersion(HttpVersion.HTTP_1_0); } params.makeLenient(); params.setContentCharset("UTF-8"); params.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY); params.setBooleanParameter(HttpMethodParams.SINGLE_COOKIE_HEADER, true); // XXX (ab) not sure about this... the default is to retry 3 times; if // XXX the request body was sent the method is not retried, so there is // XXX little danger in retrying... // params.setParameter(HttpMethodParams.RETRY_HANDLER, null); try { HttpClient client = Http.getClient(); client.getParams().setParameter("http.useragent", http.getUserAgent()); // NUTCH-1941 code = client.executeMethod(get); Header[] heads = get.getResponseHeaders(); for (int i = 0; i < heads.length; i++) { headers.set(heads[i].getName(), heads[i].getValue()); } // Limit download size int contentLength = Integer.MAX_VALUE; String contentLengthString = headers.get(Response.CONTENT_LENGTH); if (contentLengthString != null) { try { contentLength = Integer.parseInt(contentLengthString.trim()); } catch (NumberFormatException ex) { throw new HttpException("bad content length: " + contentLengthString); } } if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) { contentLength = http.getMaxContent(); } // always read content. Sometimes content is useful to find a cause // for error. InputStream in = get.getResponseBodyAsStream(); try { byte[] buffer = new byte[HttpBase.BUFFER_SIZE]; int bufferFilled = 0; int totalRead = 0; ByteArrayOutputStream out = new ByteArrayOutputStream(); while ((bufferFilled = in.read(buffer, 0, buffer.length)) != -1 && totalRead + bufferFilled <= contentLength) { totalRead += bufferFilled; out.write(buffer, 0, bufferFilled); } content = out.toByteArray(); } catch (Exception e) { if (code == 200) throw new IOException(e.toString()); // for codes other than 200 OK, we are fine with empty content } finally { if (in != null) { in.close(); } get.abort(); } StringBuilder fetchTrace = null; if (Http.LOG.isTraceEnabled()) { // Trace message fetchTrace = new StringBuilder( "url: " + url + "; status code: " + code + "; bytes received: " + content.length); if (getHeader(Response.CONTENT_LENGTH) != null) fetchTrace.append("; Content-Length: " + getHeader(Response.CONTENT_LENGTH)); if (getHeader(Response.LOCATION) != null) fetchTrace.append("; Location: " + getHeader(Response.LOCATION)); } // Extract gzip, x-gzip and deflate content if (content != null) { // check if we have to uncompress it String contentEncoding = headers.get(Response.CONTENT_ENCODING); if (contentEncoding != null && Http.LOG.isTraceEnabled()) fetchTrace.append("; Content-Encoding: " + contentEncoding); if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) { content = http.processGzipEncoded(content, url); if (Http.LOG.isTraceEnabled()) fetchTrace.append("; extracted to " + content.length + " bytes"); } else if ("deflate".equals(contentEncoding)) { content = http.processDeflateEncoded(content, url); if (Http.LOG.isTraceEnabled()) fetchTrace.append("; extracted to " + content.length + " bytes"); } } // Logger trace message if (Http.LOG.isTraceEnabled()) { Http.LOG.trace(fetchTrace.toString()); } } finally { get.releaseConnection(); } }
From source file:org.apache.nutch.protocol.webdriver.HttpResponse.java
/** * Fetches the given <code>url</code> and prepares HTTP response. Fetch the * content using WebDriver to extract HTML from Ajax site, other responses are * fetches using HTTPClient.//from w ww .j a v a 2 s . c o m * * @param http * An instance of the implementation class of this plugin * @param url * URL to be fetched * @param page * WebPage * @param followRedirects * Whether to follow redirects; follows redirect if and only if this * is true * @return HTTP response * @throws IOException * When an error occurs */ HttpResponse(Http http, URL url, WebPage page, Configuration conf) throws IOException { // Prepare GET method for HTTP request this.url = url; this.conf = conf; GetMethod get = new GetMethod(url.toString()); get.setFollowRedirects(false); get.setDoAuthentication(true); if (page.getModifiedTime() > 0) { get.setRequestHeader("If-Modified-Since", HttpDateFormat.toString(page.getModifiedTime())); } // Set HTTP parameters HttpMethodParams params = get.getParams(); if (http.getUseHttp11()) { params.setVersion(HttpVersion.HTTP_1_1); } else { params.setVersion(HttpVersion.HTTP_1_0); } params.makeLenient(); params.setContentCharset("UTF-8"); try { HttpClient client = Http.getClient(); client.getParams().setParameter("http.useragent", http.getUserAgent()); // NUTCH-1941 code = client.executeMethod(get); Header[] heads = get.getResponseHeaders(); for (int i = 0; i < heads.length; i++) { headers.set(heads[i].getName(), heads[i].getValue()); } readPlainContent(url); StringBuilder fetchTrace = null; if (Http.LOG.isTraceEnabled()) { // Trace message fetchTrace = new StringBuilder( "url: " + url + "; status code: " + code + "; bytes received: " + content.length); if (getHeader(Response.CONTENT_LENGTH) != null) fetchTrace.append("; Content-Length: " + getHeader(Response.CONTENT_LENGTH)); if (getHeader(Response.LOCATION) != null) fetchTrace.append("; Location: " + getHeader(Response.LOCATION)); } // add headers in metadata to row if (page.getHeaders() != null) { page.getHeaders().clear(); } for (String key : headers.names()) { page.getHeaders().put(new Utf8(key), new Utf8(headers.get(key))); } // Logger trace message if (Http.LOG.isTraceEnabled()) { Http.LOG.trace(fetchTrace.toString()); } } finally { get.releaseConnection(); } }
From source file:org.eclipse.smila.connectivity.framework.crawler.web.http.HttpResponse.java
/** * Sets the http parameters.// w ww . j a va2s . c om * * @param http * the http * @param httpMethod * the http method */ private void setHttpParameters(HttpBase http, HttpMethodBase httpMethod) { httpMethod.setFollowRedirects(false); httpMethod.setRequestHeader("User-Agent", http.getUserAgent()); httpMethod.setRequestHeader("Referer", http.getReferer()); httpMethod.setDoAuthentication(true); for (Header header : http.getHeaders()) { httpMethod.addRequestHeader(header); } final HttpMethodParams params = httpMethod.getParams(); if (http.getUseHttp11()) { params.setVersion(HttpVersion.HTTP_1_1); } else { params.setVersion(HttpVersion.HTTP_1_0); } params.makeLenient(); params.setContentCharset("UTF-8"); if (http.isCookiesEnabled()) { params.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY); } else { params.setCookiePolicy(CookiePolicy.IGNORE_COOKIES); } params.setBooleanParameter(HttpMethodParams.SINGLE_COOKIE_HEADER, true); // the default is to retry 3 times; if // the request body was sent the method is not retried, so there is // little danger in retrying // retries are handled on the higher level params.setParameter(HttpMethodParams.RETRY_HANDLER, null); }