List of usage examples for org.apache.http.client ClientProtocolException getCause
public synchronized Throwable getCause()
From source file:org.commonjava.propulsor.client.http.ClientHttpSupport.java
public void putWithStream(final String path, final InputStream stream, final int... responseCodes) throws ClientHttpException { connect();//from ww w .j a v a 2 s . c o m final HttpPut put = newRawPut(buildUrl(baseUrl, path)); final CloseableHttpClient client = newClient(); CloseableHttpResponse response = null; try { put.setEntity(new InputStreamEntity(stream)); response = client.execute(put); final StatusLine sl = response.getStatusLine(); if (!validResponseCode(sl.getStatusCode(), responseCodes)) { throw new ClientProtocolException(new ClientHttpException(sl.getStatusCode(), "Error in response from: %s.\n%s", path, new ClientHttpResponseErrorDetails(response))); } } catch (final ClientProtocolException e) { final Throwable cause = e.getCause(); if (cause != null && (cause instanceof ClientHttpException)) { throw (ClientHttpException) cause; } throw new ClientHttpException("Client request failed: %s", e, e.getMessage()); } catch (final IOException e) { throw new ClientHttpException("Client request failed: %s", e, e.getMessage()); } finally { cleanupResources(put, response, client); } }
From source file:org.commonjava.indy.client.core.IndyClientHttp.java
public void putWithStream(final String path, final InputStream stream, final int... responseCodes) throws IndyClientException { connect();//from w ww .ja v a2s . c o m final HttpPut put = newRawPut(buildUrl(baseUrl, path)); final CloseableHttpClient client = newClient(); CloseableHttpResponse response = null; try { put.setEntity(new InputStreamEntity(stream)); response = client.execute(put, newContext()); final StatusLine sl = response.getStatusLine(); if (!validResponseCode(sl.getStatusCode(), responseCodes)) { throw new ClientProtocolException(new IndyClientException(sl.getStatusCode(), "Error in response from: %s.\n%s", path, new IndyResponseErrorDetails(response))); } } catch (final ClientProtocolException e) { final Throwable cause = e.getCause(); if (cause != null && (cause instanceof IndyClientException)) { throw (IndyClientException) cause; } throw new IndyClientException("Indy request failed: %s", e, e.getMessage()); } catch (final IOException e) { throw new IndyClientException("Indy request failed: %s", e, e.getMessage()); } finally { cleanupResources(put, response, client); } }
From source file:com.frochr123.fabqr.gui.FabQRUploadDialog.java
private void btnPublishActionPerformed(java.awt.event.ActionEvent evt)//GEN-FIRST:event_btnPublishActionPerformed {//GEN-HEADEREND:event_btnPublishActionPerformed // Disable all GUI elements and set loading status handleGUIElements(false, true);/*from w w w . j a v a 2 s .co m*/ // Start new thread to prepare and upload data new Thread(new Runnable() { public void run() { try { // Check for valid situation, otherwise abort if (MainView.getInstance() == null || VisicutModel.getInstance() == null || VisicutModel.getInstance().getPlfFile() == null || PreferencesManager.getInstance() == null || PreferencesManager.getInstance().getPreferences() == null || !FabQRFunctions.isFabqrActive() || FabQRFunctions.getFabqrPrivateURL() == null || FabQRFunctions.getFabqrPrivateURL().isEmpty()) { throw new Exception(bundle.getString("ERROR_CRITICAL")); } // Texts String name = txtName.getText(); String email = txtEmail.getText(); String projectName = txtProjectName.getText(); name = ((name == null) ? "" : name.trim()); email = ((email == null) ? "" : email.trim()); projectName = ((projectName == null) ? "" : projectName.trim()); // Check valid project name if (projectName.isEmpty()) { throw new Exception(bundle.getString("ERROR_EMPTY_PROJECT_NAME")); } if (projectName.length() < 3) { throw new Exception(bundle.getString("ERROR_TOO_SHORT_PROJECT_NAME")); } // Get license index, index -1 = error, index 0 = requires no name / mail, index > 0 requires name and email int licenseIndex = cmbbxLicense.getSelectedIndex(); // Check valid license if (licenseIndex < 0) { throw new Exception(bundle.getString("ERROR_INVALID_LICENSE")); } // Check for valid name and email, needed for these licenses else if (licenseIndex > 0) { if (name.isEmpty()) { throw new Exception(bundle.getString("ERROR_LICENSE_NEEDS_NAME")); } if (email.isEmpty()) { throw new Exception(bundle.getString("ERROR_LICENSE_NEEDS_EMAIL")); } } // For these cases email must be checked if (licenseIndex > 0 || !email.isEmpty()) { // Simple and inaccurate check for valid email with regex Pattern emailPattern = Pattern.compile("^.+@.+\\..+$"); if (!emailPattern.matcher(email).find()) { throw new Exception(bundle.getString("ERROR_INVALID_EMAIL")); } } // Build string for selected tools String tools = "Laser cutter"; if (cbPCBSoldering.isSelected()) { tools = tools + ",PCB / Soldering"; } if (cb3DPrinter.isSelected()) { tools = tools + ",3D printer"; } if (cbCNCRouter.isSelected()) { tools = tools + ",CNC router"; } if (cbArduino.isSelected()) { tools = tools + ",Arduino"; } if (cbRaspberryPi.isSelected()) { tools = tools + ",Raspberry Pi"; } // Check valid description String description = txtDescription.getText(); description = ((description == null) ? "" : description.trim()); if (description.isEmpty()) { throw new Exception(bundle.getString("ERROR_EMPTY_DESCRIPTION")); } // Images, real image is allowed to be null, scheme image must not be null BufferedImage imageReal = latestFullCameraImage; BufferedImage imageScheme = PreviewImageExport.generateImage( RefreshProjectorThread.getProjectorWidth(), RefreshProjectorThread.getProjectorHeight(), true); if (imageScheme == null) { throw new Exception(bundle.getString("ERROR_EMPTY_SCHEME_IMAGE")); } // Get PLF data PlfFile plfFile = VisicutModel.getInstance().getPlfFile(); // Check PLF data if (plfFile == null) { throw new Exception(bundle.getString("ERROR_INVALID_PLF_DATA")); } // Get internal data: lasercutter name String lasercutterName = ""; if (VisicutModel.getInstance().getSelectedLaserDevice() != null && VisicutModel.getInstance().getSelectedLaserDevice().getLaserCutter() != null) { lasercutterName = VisicutModel.getInstance().getSelectedLaserDevice().getLaserCutter() .getModelName(); lasercutterName = ((lasercutterName == null) ? "" : lasercutterName.trim()); } // Check lasercutter name if (lasercutterName.isEmpty()) { throw new Exception(bundle.getString("ERROR_EMPTY_LASERCUTTER_NAME")); } // Get internal data: lasercutter material string String lasercutterMaterial = ""; if (VisicutModel.getInstance().getMaterial() != null) { String lasercutterMaterialName = VisicutModel.getInstance().getMaterial().getName(); lasercutterMaterialName = ((lasercutterMaterialName == null) ? "" : lasercutterMaterialName.trim()); // Check material name if (lasercutterMaterialName.isEmpty()) { throw new Exception(bundle.getString("ERROR_EMPTY_MATERIAL_NAME")); } float lasercutterMaterialThickness = VisicutModel.getInstance().getMaterialThickness(); lasercutterMaterial = lasercutterMaterialName + ", " + new Float(lasercutterMaterialThickness).toString() + " mm"; } // Get internal data: Location, FabLab name String location = PreferencesManager.getInstance().getPreferences().getLabName(); if (location == null || location.isEmpty()) { throw new Exception(bundle.getString("ERROR_INVALID_FABLAB_NAME")); } // Upload data FabQRFunctions.uploadFabQRProject(name, email, projectName, licenseIndex, tools, description, location, imageReal, imageScheme, plfFile, lasercutterName, lasercutterMaterial); // On success show suscess message and disable loading icon showStatus(bundle.getString("SUCCESS_MESSAGE")); lblStatus.setIcon(null); // On success enable cancel button again to close dialog btnClose.setEnabled(true); } catch (ClientProtocolException e) { // Enable all GUI elements and disable loading status handleGUIElements(true, true); // Show error message if (e.getCause() != null && e.getCause().getMessage() != null && !e.getCause().getMessage().isEmpty()) { showStatus("HTTP exception: " + e.getCause().getMessage()); } else { showStatus(e.getMessage()); } } catch (Exception e) { // Enable all GUI elements and disable loading status handleGUIElements(true, true); // Show error message showStatus(e.getMessage()); } } }).start(); }
From source file:com.smartsheet.api.internal.http.DefaultHttpClient.java
/** * Make an HTTP request and return the response. * * @param smartsheetRequest the smartsheet request * @return the HTTP response/*from www. j a v a2s.c o m*/ * @throws HttpClientException the HTTP client exception */ public HttpResponse request(HttpRequest smartsheetRequest) throws HttpClientException { Util.throwIfNull(smartsheetRequest); if (smartsheetRequest.getUri() == null) { throw new IllegalArgumentException("A Request URI is required."); } int attempt = 0; long start = System.currentTimeMillis(); HttpRequestBase apacheHttpRequest; HttpResponse smartsheetResponse; InputStream bodyStream = null; if (smartsheetRequest.getEntity() != null && smartsheetRequest.getEntity().getContent() != null) { bodyStream = smartsheetRequest.getEntity().getContent(); } // the retry logic will consume the body stream so we make sure it supports mark/reset and mark it boolean canRetryRequest = bodyStream == null || bodyStream.markSupported(); if (!canRetryRequest) { try { // attempt to wrap the body stream in a input-stream that does support mark/reset bodyStream = new ByteArrayInputStream(StreamUtil.readBytesFromStream(bodyStream)); // close the old stream (just to be tidy) and then replace it with a reset-able stream smartsheetRequest.getEntity().getContent().close(); smartsheetRequest.getEntity().setContent(bodyStream); canRetryRequest = true; } catch (IOException ignore) { } } // the retry loop while (true) { apacheHttpRequest = createApacheRequest(smartsheetRequest); // Set HTTP headers if (smartsheetRequest.getHeaders() != null) { for (Map.Entry<String, String> header : smartsheetRequest.getHeaders().entrySet()) { apacheHttpRequest.addHeader(header.getKey(), header.getValue()); } } HttpEntitySnapshot requestEntityCopy = null; HttpEntitySnapshot responseEntityCopy = null; // Set HTTP entity final HttpEntity entity = smartsheetRequest.getEntity(); if (apacheHttpRequest instanceof HttpEntityEnclosingRequestBase && entity != null && entity.getContent() != null) { try { // we need access to the original request stream so we can log it (in the event of errors and/or tracing) requestEntityCopy = new HttpEntitySnapshot(entity); } catch (IOException iox) { logger.error("failed to make copy of original request entity - {}", iox); } InputStreamEntity streamEntity = new InputStreamEntity(entity.getContent(), entity.getContentLength()); streamEntity.setChunked(false); // why? not supported by library? ((HttpEntityEnclosingRequestBase) apacheHttpRequest).setEntity(streamEntity); } // mark the body so we can reset on retry if (canRetryRequest && bodyStream != null) { bodyStream.mark((int) smartsheetRequest.getEntity().getContentLength()); } // Make the HTTP request smartsheetResponse = new HttpResponse(); HttpContext context = new BasicHttpContext(); try { long startTime = System.currentTimeMillis(); apacheHttpResponse = this.httpClient.execute(apacheHttpRequest, context); long endTime = System.currentTimeMillis(); // Set request headers to values ACTUALLY SENT (not just created by us), this would include: // 'Connection', 'Accept-Encoding', etc. However, if a proxy is used, this may be the proxy's CONNECT // request, hence the test for HTTP method first Object httpRequest = context.getAttribute("http.request"); if (httpRequest != null && HttpRequestWrapper.class.isAssignableFrom(httpRequest.getClass())) { HttpRequestWrapper actualRequest = (HttpRequestWrapper) httpRequest; switch (HttpMethod.valueOf(actualRequest.getMethod())) { case GET: case POST: case PUT: case DELETE: apacheHttpRequest.setHeaders(((HttpRequestWrapper) httpRequest).getAllHeaders()); break; } } // Set returned headers smartsheetResponse.setHeaders(new HashMap<String, String>()); for (Header header : apacheHttpResponse.getAllHeaders()) { smartsheetResponse.getHeaders().put(header.getName(), header.getValue()); } smartsheetResponse.setStatus(apacheHttpResponse.getStatusLine().getStatusCode(), apacheHttpResponse.getStatusLine().toString()); // Set returned entities if (apacheHttpResponse.getEntity() != null) { HttpEntity httpEntity = new HttpEntity(); httpEntity.setContentType(apacheHttpResponse.getEntity().getContentType().getValue()); httpEntity.setContentLength(apacheHttpResponse.getEntity().getContentLength()); httpEntity.setContent(apacheHttpResponse.getEntity().getContent()); smartsheetResponse.setEntity(httpEntity); responseEntityCopy = new HttpEntitySnapshot(httpEntity); } long responseTime = endTime - startTime; logRequest(apacheHttpRequest, requestEntityCopy, smartsheetResponse, responseEntityCopy, responseTime); if (traces.size() > 0) { // trace-logging of request and response (if so configured) RequestAndResponseData requestAndResponseData = RequestAndResponseData.of(apacheHttpRequest, requestEntityCopy, smartsheetResponse, responseEntityCopy, traces); TRACE_WRITER.println(requestAndResponseData.toString(tracePrettyPrint)); } if (smartsheetResponse.getStatusCode() == 200) { // call successful, exit the retry loop break; } // the retry logic might consume the content stream so we make sure it supports mark/reset and mark it InputStream contentStream = smartsheetResponse.getEntity().getContent(); if (!contentStream.markSupported()) { // wrap the response stream in a input-stream that does support mark/reset contentStream = new ByteArrayInputStream(StreamUtil.readBytesFromStream(contentStream)); // close the old stream (just to be tidy) and then replace it with a reset-able stream smartsheetResponse.getEntity().getContent().close(); smartsheetResponse.getEntity().setContent(contentStream); } try { contentStream.mark((int) smartsheetResponse.getEntity().getContentLength()); long timeSpent = System.currentTimeMillis() - start; if (!shouldRetry(++attempt, timeSpent, smartsheetResponse)) { // should not retry, or retry time exceeded, exit the retry loop break; } } finally { if (bodyStream != null) { bodyStream.reset(); } contentStream.reset(); } // moving this to finally causes issues because socket is closed (which means response stream is closed) this.releaseConnection(); } catch (ClientProtocolException e) { try { logger.warn("ClientProtocolException " + e.getMessage()); logger.warn("{}", RequestAndResponseData.of(apacheHttpRequest, requestEntityCopy, smartsheetResponse, responseEntityCopy, REQUEST_RESPONSE_SUMMARY)); // if this is a PUT and was retried by the http client, the body content stream is at the // end and is a NonRepeatableRequest. If we marked the body content stream prior to execute, // reset and retry if (canRetryRequest && e.getCause() instanceof NonRepeatableRequestException) { if (smartsheetRequest.getEntity() != null) { smartsheetRequest.getEntity().getContent().reset(); } continue; } } catch (IOException ignore) { } throw new HttpClientException("Error occurred.", e); } catch (NoHttpResponseException e) { try { logger.warn("NoHttpResponseException " + e.getMessage()); logger.warn("{}", RequestAndResponseData.of(apacheHttpRequest, requestEntityCopy, smartsheetResponse, responseEntityCopy, REQUEST_RESPONSE_SUMMARY)); // check to see if the response was empty and this was a POST. All other HTTP methods // will be automatically retried by the http client. // (POST is non-idempotent and is not retried automatically, but is safe for us to retry) if (canRetryRequest && smartsheetRequest.getMethod() == HttpMethod.POST) { if (smartsheetRequest.getEntity() != null) { smartsheetRequest.getEntity().getContent().reset(); } continue; } } catch (IOException ignore) { } throw new HttpClientException("Error occurred.", e); } catch (IOException e) { try { logger.warn("{}", RequestAndResponseData.of(apacheHttpRequest, requestEntityCopy, smartsheetResponse, responseEntityCopy, REQUEST_RESPONSE_SUMMARY)); } catch (IOException ignore) { } throw new HttpClientException("Error occurred.", e); } } return smartsheetResponse; }
From source file:crawlercommons.fetcher.SimpleHttpFetcher.java
private FetchedResult doRequest(HttpRequestBase request, String url, Payload payload) throws BaseFetchException { LOGGER.trace("Fetching " + url); HttpResponse response;/*w ww . j a v a2 s . c o m*/ long readStartTime; Metadata headerMap = new Metadata(); String redirectedUrl = null; String newBaseUrl = null; int numRedirects = 0; boolean needAbort = true; String contentType = ""; String mimeType = ""; String hostAddress = null; // Create a local instance of cookie store, and bind to local context // Without this we get killed w/lots of threads, due to sync() on single cookie store. HttpContext localContext = new BasicHttpContext(); CookieStore cookieStore = new BasicCookieStore(); localContext.setAttribute(ClientContext.COOKIE_STORE, cookieStore); StringBuilder fetchTrace = null; if (LOGGER.isTraceEnabled()) { fetchTrace = new StringBuilder("Fetched url: " + url); } try { request.setURI(new URI(url)); readStartTime = System.currentTimeMillis(); response = _httpClient.execute(request, localContext); Header[] headers = response.getAllHeaders(); for (Header header : headers) { headerMap.add(header.getName(), header.getValue()); } int httpStatus = response.getStatusLine().getStatusCode(); if (LOGGER.isTraceEnabled()) { fetchTrace.append("; status code: " + httpStatus); if (headerMap.get(HttpHeaders.CONTENT_LENGTH) != null) { fetchTrace.append("; Content-Length: " + headerMap.get(HttpHeaders.CONTENT_LENGTH)); } if (headerMap.get(HttpHeaders.LOCATION) != null) { fetchTrace.append("; Location: " + headerMap.get(HttpHeaders.LOCATION)); } } if ((httpStatus < 200) || (httpStatus >= 300)) { // We can't just check against SC_OK, as some wackos return 201, 202, etc throw new HttpFetchException(url, "Error fetching " + url, httpStatus, headerMap); } redirectedUrl = extractRedirectedUrl(url, localContext); URI permRedirectUri = (URI) localContext.getAttribute(PERM_REDIRECT_CONTEXT_KEY); if (permRedirectUri != null) { newBaseUrl = permRedirectUri.toURL().toExternalForm(); } Integer redirects = (Integer) localContext.getAttribute(REDIRECT_COUNT_CONTEXT_KEY); if (redirects != null) { numRedirects = redirects.intValue(); } hostAddress = (String) (localContext.getAttribute(HOST_ADDRESS)); if (hostAddress == null) { throw new UrlFetchException(url, "Host address not saved in context"); } Header cth = response.getFirstHeader(HttpHeaders.CONTENT_TYPE); if (cth != null) { contentType = cth.getValue(); } // Check if we should abort due to mime-type filtering. Note that this will fail if the server // doesn't report a mime-type, but that's how we want it as this configuration is typically // used when only a subset of parsers are installed/enabled, so we don't want the auto-detect // code in Tika to get triggered & try to process an unsupported type. If you want unknown // mime-types from the server to be processed, set "" as one of the valid mime-types in FetcherPolicy. mimeType = getMimeTypeFromContentType(contentType); Set<String> mimeTypes = getValidMimeTypes(); if ((mimeTypes != null) && (mimeTypes.size() > 0)) { if (!mimeTypes.contains(mimeType)) { throw new AbortedFetchException(url, "Invalid mime-type: " + mimeType, AbortedFetchReason.INVALID_MIMETYPE); } } needAbort = false; } catch (ClientProtocolException e) { // Oleg guarantees that no abort is needed in the case of an IOException (which is is a subclass of) needAbort = false; // If the root case was a "too many redirects" error, we want to map this to a specific // exception that contains the final redirect. if (e.getCause() instanceof MyRedirectException) { MyRedirectException mre = (MyRedirectException) e.getCause(); String redirectUrl = url; try { redirectUrl = mre.getUri().toURL().toExternalForm(); } catch (MalformedURLException e2) { LOGGER.warn("Invalid URI saved during redirect handling: " + mre.getUri()); } throw new RedirectFetchException(url, redirectUrl, mre.getReason()); } else if (e.getCause() instanceof RedirectException) { throw new RedirectFetchException(url, extractRedirectedUrl(url, localContext), RedirectExceptionReason.TOO_MANY_REDIRECTS); } else { throw new IOFetchException(url, e); } } catch (IOException e) { // Oleg guarantees that no abort is needed in the case of an IOException needAbort = false; if (e instanceof ConnectionPoolTimeoutException) { // Should never happen, so let's dump some info about the connection pool. ThreadSafeClientConnManager cm = (ThreadSafeClientConnManager) _httpClient.getConnectionManager(); int numConnections = cm.getConnectionsInPool(); cm.closeIdleConnections(0, TimeUnit.MILLISECONDS); LOGGER.error(String.format( "Got ConnectionPoolTimeoutException: %d connections before, %d after idle close", numConnections, cm.getConnectionsInPool())); } throw new IOFetchException(url, e); } catch (URISyntaxException e) { throw new UrlFetchException(url, e.getMessage()); } catch (IllegalStateException e) { throw new UrlFetchException(url, e.getMessage()); } catch (BaseFetchException e) { throw e; } catch (Exception e) { // Map anything else to a generic IOFetchException // TODO KKr - create generic fetch exception throw new IOFetchException(url, new IOException(e)); } finally { safeAbort(needAbort, request); } // Figure out how much data we want to try to fetch. int maxContentSize = getMaxContentSize(mimeType); int targetLength = maxContentSize; boolean truncated = false; String contentLengthStr = headerMap.get(HttpHeaders.CONTENT_LENGTH); if (contentLengthStr != null) { try { int contentLength = Integer.parseInt(contentLengthStr); if (contentLength > targetLength) { truncated = true; } else { targetLength = contentLength; } } catch (NumberFormatException e) { // Ignore (and log) invalid content length values. LOGGER.warn("Invalid content length in header: " + contentLengthStr); } } // Now finally read in response body, up to targetLength bytes. // Note that entity might be null, for zero length responses. byte[] content = new byte[0]; long readRate = 0; HttpEntity entity = response.getEntity(); needAbort = true; if (entity != null) { InputStream in = null; try { in = entity.getContent(); byte[] buffer = new byte[BUFFER_SIZE]; int bytesRead = 0; int totalRead = 0; ByteArrayOutputStream out = new ByteArrayOutputStream(DEFAULT_BYTEARRAY_SIZE); int readRequests = 0; int minResponseRate = getMinResponseRate(); // TODO KKr - we need to monitor the rate while reading a // single block. Look at HttpClient // metrics support for how to do this. Once we fix this, fix // the test to read a smaller (< 20K) // chuck of data. while ((totalRead < targetLength) && ((bytesRead = in.read(buffer, 0, Math.min(buffer.length, targetLength - totalRead))) != -1)) { readRequests += 1; totalRead += bytesRead; out.write(buffer, 0, bytesRead); // Assume read time is at least one millisecond, to avoid DBZ exception. long totalReadTime = Math.max(1, System.currentTimeMillis() - readStartTime); readRate = (totalRead * 1000L) / totalReadTime; // Don't bail on the first read cycle, as we can get a hiccup starting out. // Also don't bail if we've read everything we need. if ((readRequests > 1) && (totalRead < targetLength) && (readRate < minResponseRate)) { throw new AbortedFetchException(url, "Slow response rate of " + readRate + " bytes/sec", AbortedFetchReason.SLOW_RESPONSE_RATE); } // Check to see if we got interrupted. if (Thread.interrupted()) { throw new AbortedFetchException(url, AbortedFetchReason.INTERRUPTED); } } content = out.toByteArray(); needAbort = truncated || (in.available() > 0); } catch (IOException e) { // We don't need to abort if there's an IOException throw new IOFetchException(url, e); } finally { safeAbort(needAbort, request); safeClose(in); } } // Toss truncated image content. if ((truncated) && (!isTextMimeType(mimeType))) { throw new AbortedFetchException(url, "Truncated image", AbortedFetchReason.CONTENT_SIZE); } // Now see if we need to uncompress the content. String contentEncoding = headerMap.get(HttpHeaders.CONTENT_ENCODING); if (contentEncoding != null) { if (LOGGER.isTraceEnabled()) { fetchTrace.append("; Content-Encoding: " + contentEncoding); } // TODO KKr We might want to just decompress a truncated gzip // containing text (since we have a max content size to save us // from any gzip corruption). We might want to break the following // out into a separate method, by the way (if not refactor this // entire monolithic method). // try { if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) { if (truncated) { throw new AbortedFetchException(url, "Truncated compressed data", AbortedFetchReason.CONTENT_SIZE); } else { ExpandedResult expandedResult = EncodingUtils.processGzipEncoded(content, maxContentSize); truncated = expandedResult.isTruncated(); if ((truncated) && (!isTextMimeType(mimeType))) { throw new AbortedFetchException(url, "Truncated decompressed image", AbortedFetchReason.CONTENT_SIZE); } else { content = expandedResult.getExpanded(); if (LOGGER.isTraceEnabled()) { fetchTrace.append("; unzipped to " + content.length + " bytes"); } } // } else if ("deflate".equals(contentEncoding)) { // content = EncodingUtils.processDeflateEncoded(content); // if (LOGGER.isTraceEnabled()) { // fetchTrace.append("; inflated to " + content.length + " bytes"); // } } } } catch (IOException e) { throw new IOFetchException(url, e); } } // Finally dump out the trace msg we've been building. if (LOGGER.isTraceEnabled()) { LOGGER.trace(fetchTrace.toString()); } // TODO KKr - Save truncated flag in FetchedResult/FetchedDatum. return new FetchedResult(url, redirectedUrl, System.currentTimeMillis(), headerMap, content, contentType, (int) readRate, payload, newBaseUrl, numRedirects, hostAddress); }
From source file:crawlercommons.fetcher.http.SimpleHttpFetcher.java
private FetchedResult doRequest(HttpRequestBase request, String url, Payload payload) throws BaseFetchException { LOGGER.trace("Fetching " + url); HttpResponse response;/*from ww w .java 2 s . co m*/ long readStartTime; Metadata headerMap = new Metadata(); String redirectedUrl = null; String newBaseUrl = null; int numRedirects = 0; boolean needAbort = true; String contentType = ""; String mimeType = ""; String hostAddress = null; int statusCode = HttpStatus.SC_INTERNAL_SERVER_ERROR; String reasonPhrase = null; // Create a local instance of cookie store, and bind to local context // Without this we get killed w/lots of threads, due to sync() on single // cookie store. HttpContext localContext = new BasicHttpContext(); CookieStore cookieStore = localCookieStore.get(); localContext.setAttribute(HttpClientContext.COOKIE_STORE, cookieStore); StringBuilder fetchTrace = null; if (LOGGER.isTraceEnabled()) { fetchTrace = new StringBuilder("Fetched url: " + url); } try { request.setURI(new URI(url)); readStartTime = System.currentTimeMillis(); response = _httpClient.execute(request, localContext); Header[] headers = response.getAllHeaders(); for (Header header : headers) { headerMap.add(header.getName(), header.getValue()); } statusCode = response.getStatusLine().getStatusCode(); reasonPhrase = response.getStatusLine().getReasonPhrase(); if (LOGGER.isTraceEnabled()) { fetchTrace.append("; status code: " + statusCode); if (headerMap.get(HttpHeaders.CONTENT_LENGTH) != null) { fetchTrace.append("; Content-Length: " + headerMap.get(HttpHeaders.CONTENT_LENGTH)); } if (headerMap.get(HttpHeaders.LOCATION) != null) { fetchTrace.append("; Location: " + headerMap.get(HttpHeaders.LOCATION)); } } if ((statusCode < 200) || (statusCode >= 300)) { // We can't just check against SC_OK, as some wackos return 201, // 202, etc throw new HttpFetchException(url, "Error fetching " + url + " due to \"" + reasonPhrase + "\"", statusCode, headerMap); } redirectedUrl = extractRedirectedUrl(url, localContext); URI permRedirectUri = (URI) localContext.getAttribute(PERM_REDIRECT_CONTEXT_KEY); if (permRedirectUri != null) { newBaseUrl = permRedirectUri.toURL().toExternalForm(); } Integer redirects = (Integer) localContext.getAttribute(REDIRECT_COUNT_CONTEXT_KEY); if (redirects != null) { numRedirects = redirects.intValue(); } hostAddress = (String) (localContext.getAttribute(HOST_ADDRESS)); if (hostAddress == null) { throw new UrlFetchException(url, "Host address not saved in context"); } Header cth = response.getFirstHeader(HttpHeaders.CONTENT_TYPE); if (cth != null) { contentType = cth.getValue(); } // Check if we should abort due to mime-type filtering. Note that // this will fail if the server // doesn't report a mime-type, but that's how we want it as this // configuration is typically // used when only a subset of parsers are installed/enabled, so we // don't want the auto-detect // code in Tika to get triggered & try to process an unsupported // type. If you want unknown // mime-types from the server to be processed, set "" as one of the // valid mime-types in // FetcherPolicy. mimeType = getMimeTypeFromContentType(contentType); Set<String> mimeTypes = getValidMimeTypes(); if ((mimeTypes != null) && (mimeTypes.size() > 0)) { if (!mimeTypes.contains(mimeType)) { throw new AbortedFetchException(url, "Invalid mime-type: " + mimeType, AbortedFetchReason.INVALID_MIMETYPE); } } needAbort = false; } catch (ClientProtocolException e) { // Oleg guarantees that no abort is needed in the case of an // IOException // (which is is a subclass of) needAbort = false; // If the root case was a "too many redirects" error, we want to map // this to a specific // exception that contains the final redirect. if (e.getCause() instanceof MyRedirectException) { MyRedirectException mre = (MyRedirectException) e.getCause(); String redirectUrl = url; try { redirectUrl = mre.getUri().toURL().toExternalForm(); } catch (MalformedURLException e2) { LOGGER.warn("Invalid URI saved during redirect handling: " + mre.getUri()); } throw new RedirectFetchException(url, redirectUrl, mre.getReason()); } else if (e.getCause() instanceof RedirectException) { LOGGER.error(e.getMessage()); throw new RedirectFetchException(url, extractRedirectedUrl(url, localContext), RedirectExceptionReason.TOO_MANY_REDIRECTS); } else { throw new IOFetchException(url, e); } } catch (IOException e) { // Oleg guarantees that no abort is needed in the case of an // IOException needAbort = false; throw new IOFetchException(url, e); } catch (URISyntaxException e) { throw new UrlFetchException(url, e.getMessage()); } catch (IllegalStateException e) { throw new UrlFetchException(url, e.getMessage()); } catch (BaseFetchException e) { throw e; } catch (Exception e) { // Map anything else to a generic IOFetchException // TODO KKr - create generic fetch exception throw new IOFetchException(url, new IOException(e)); } finally { safeAbort(needAbort, request); } // Figure out how much data we want to try to fetch. int maxContentSize = getMaxContentSize(mimeType); int targetLength = maxContentSize; boolean truncated = false; String contentLengthStr = headerMap.get(HttpHeaders.CONTENT_LENGTH); if (contentLengthStr != null) { try { int contentLength = Integer.parseInt(contentLengthStr); if (contentLength > targetLength) { truncated = true; } else { targetLength = contentLength; } } catch (NumberFormatException e) { // Ignore (and log) invalid content length values. LOGGER.warn("Invalid content length in header: " + contentLengthStr); } } // Now finally read in response body, up to targetLength bytes. // Note that entity might be null, for zero length responses. byte[] content = new byte[0]; long readRate = 0; HttpEntity entity = response.getEntity(); needAbort = true; if (entity != null) { InputStream in = null; try { in = entity.getContent(); byte[] buffer = new byte[BUFFER_SIZE]; int bytesRead = 0; int totalRead = 0; ByteArrayOutputStream out = new ByteArrayOutputStream(DEFAULT_BYTEARRAY_SIZE); int readRequests = 0; int minResponseRate = getMinResponseRate(); // TODO KKr - we need to monitor the rate while reading a // single block. Look at HttpClient // metrics support for how to do this. Once we fix this, fix // the test to read a smaller (< 20K) // chuck of data. while ((totalRead < targetLength) && ((bytesRead = in.read(buffer, 0, Math.min(buffer.length, targetLength - totalRead))) != -1)) { readRequests += 1; totalRead += bytesRead; out.write(buffer, 0, bytesRead); // Assume read time is at least one millisecond, to avoid // DBZ exception. long totalReadTime = Math.max(1, System.currentTimeMillis() - readStartTime); readRate = (totalRead * 1000L) / totalReadTime; // Don't bail on the first read cycle, as we can get a // hiccup starting out. // Also don't bail if we've read everything we need. if ((readRequests > 1) && (totalRead < targetLength) && (readRate < minResponseRate)) { throw new AbortedFetchException(url, "Slow response rate of " + readRate + " bytes/sec", AbortedFetchReason.SLOW_RESPONSE_RATE); } // Check to see if we got interrupted, but don't clear the // interrupted flag. if (Thread.currentThread().isInterrupted()) { throw new AbortedFetchException(url, AbortedFetchReason.INTERRUPTED); } } content = out.toByteArray(); needAbort = truncated || (in.available() > 0); } catch (IOException e) { // We don't need to abort if there's an IOException throw new IOFetchException(url, e); } finally { safeAbort(needAbort, request); safeClose(in); } } // Toss truncated image content. if ((truncated) && (!isTextMimeType(mimeType))) { throw new AbortedFetchException(url, "Truncated image", AbortedFetchReason.CONTENT_SIZE); } // Now see if we need to uncompress the content. String contentEncoding = headerMap.get(HttpHeaders.CONTENT_ENCODING); if (contentEncoding != null) { if (LOGGER.isTraceEnabled()) { fetchTrace.append("; Content-Encoding: " + contentEncoding); } // TODO KKr We might want to just decompress a truncated gzip // containing text (since we have a max content size to save us // from any gzip corruption). We might want to break the following // out into a separate method, by the way (if not refactor this // entire monolithic method). // try { if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) { if (truncated) { throw new AbortedFetchException(url, "Truncated compressed data", AbortedFetchReason.CONTENT_SIZE); } else { ExpandedResult expandedResult = EncodingUtils.processGzipEncoded(content, maxContentSize); truncated = expandedResult.isTruncated(); if ((truncated) && (!isTextMimeType(mimeType))) { throw new AbortedFetchException(url, "Truncated decompressed image", AbortedFetchReason.CONTENT_SIZE); } else { content = expandedResult.getExpanded(); if (LOGGER.isTraceEnabled()) { fetchTrace.append("; unzipped to " + content.length + " bytes"); } } // } else if ("deflate".equals(contentEncoding)) { // content = // EncodingUtils.processDeflateEncoded(content); // if (LOGGER.isTraceEnabled()) { // fetchTrace.append("; inflated to " + content.length + // " bytes"); // } } } } catch (IOException e) { throw new IOFetchException(url, e); } } // Finally dump out the trace msg we've been building. if (LOGGER.isTraceEnabled()) { LOGGER.trace(fetchTrace.toString()); } // TODO KKr - Save truncated flag in FetchedResult/FetchedDatum. return new FetchedResult(url, redirectedUrl, System.currentTimeMillis(), headerMap, content, contentType, (int) readRate, payload, newBaseUrl, numRedirects, hostAddress, statusCode, reasonPhrase); }