Example usage for org.apache.commons.httpclient HttpMethodBase isAborted

List of usage examples for org.apache.commons.httpclient HttpMethodBase isAborted

Introduction

In this page you can find the example usage for org.apache.commons.httpclient HttpMethodBase isAborted.

Prototype

public boolean isAborted() 

Source Link

Document

Tests whether the execution of this method has been aborted

Usage

From source file:com.cyberway.issue.crawler.fetcher.FetchHTTP.java

protected void innerProcess(final CrawlURI curi) throws InterruptedException {
    if (!canFetch(curi)) {
        // Cannot fetch this, due to protocol, retries, or other problems
        return;/*from w  w w.  j a  va2 s.  c om*/
    }

    this.curisHandled++;

    // Note begin time
    curi.putLong(A_FETCH_BEGAN_TIME, System.currentTimeMillis());

    // Get a reference to the HttpRecorder that is set into this ToeThread.
    HttpRecorder rec = HttpRecorder.getHttpRecorder();

    // Shall we get a digest on the content downloaded?
    boolean digestContent = ((Boolean) getUncheckedAttribute(curi, ATTR_DIGEST_CONTENT)).booleanValue();
    String algorithm = null;
    if (digestContent) {
        algorithm = ((String) getUncheckedAttribute(curi, ATTR_DIGEST_ALGORITHM));
        rec.getRecordedInput().setDigest(algorithm);
    } else {
        // clear
        rec.getRecordedInput().setDigest((MessageDigest) null);
    }

    // Below we do two inner classes that add check of midfetch
    // filters just as we're about to receive the response body.
    String curiString = curi.getUURI().toString();
    HttpMethodBase method = null;
    if (curi.isPost()) {
        method = new HttpRecorderPostMethod(curiString, rec) {
            protected void readResponseBody(HttpState state, HttpConnection conn)
                    throws IOException, HttpException {
                addResponseContent(this, curi);
                if (checkMidfetchAbort(curi, this.httpRecorderMethod, conn)) {
                    doAbort(curi, this, MIDFETCH_ABORT_LOG);
                } else {
                    super.readResponseBody(state, conn);
                }
            }
        };
    } else {
        method = new HttpRecorderGetMethod(curiString, rec) {
            protected void readResponseBody(HttpState state, HttpConnection conn)
                    throws IOException, HttpException {
                addResponseContent(this, curi);
                if (checkMidfetchAbort(curi, this.httpRecorderMethod, conn)) {
                    doAbort(curi, this, MIDFETCH_ABORT_LOG);
                } else {
                    super.readResponseBody(state, conn);
                }
            }
        };
    }

    HostConfiguration customConfigOrNull = configureMethod(curi, method);

    // Set httpRecorder into curi. Subsequent code both here and later
    // in extractors expects to find the HttpRecorder in the CrawlURI.
    curi.setHttpRecorder(rec);

    // Populate credentials. Set config so auth. is not automatic.
    boolean addedCredentials = populateCredentials(curi, method);
    method.setDoAuthentication(addedCredentials);

    // set hardMax on bytes (if set by operator)
    long hardMax = getMaxLength(curi);
    // set overall timeout (if set by operator)
    long timeoutMs = 1000 * getTimeout(curi);
    // Get max fetch rate (bytes/ms). It comes in in KB/sec
    long maxRateKBps = getMaxFetchRate(curi);
    rec.getRecordedInput().setLimits(hardMax, timeoutMs, maxRateKBps);

    try {
        this.http.executeMethod(customConfigOrNull, method);
    } catch (RecorderTooMuchHeaderException ex) {
        // when too much header material, abort like other truncations
        doAbort(curi, method, HEADER_TRUNC);
    } catch (IOException e) {
        failedExecuteCleanup(method, curi, e);
        return;
    } catch (ArrayIndexOutOfBoundsException e) {
        // For weird windows-only ArrayIndex exceptions in native
        // code... see
        // http://forum.java.sun.com/thread.jsp?forum=11&thread=378356
        // treating as if it were an IOException
        failedExecuteCleanup(method, curi, e);
        return;
    }

    // set softMax on bytes to get (if implied by content-length) 
    long softMax = method.getResponseContentLength();

    try {
        if (!method.isAborted()) {
            // Force read-to-end, so that any socket hangs occur here,
            // not in later modules.
            rec.getRecordedInput().readFullyOrUntil(softMax);
        }
    } catch (RecorderTimeoutException ex) {
        doAbort(curi, method, TIMER_TRUNC);
    } catch (RecorderLengthExceededException ex) {
        doAbort(curi, method, LENGTH_TRUNC);
    } catch (IOException e) {
        cleanup(curi, e, "readFully", S_CONNECT_LOST);
        return;
    } catch (ArrayIndexOutOfBoundsException e) {
        // For weird windows-only ArrayIndex exceptions from native code
        // see http://forum.java.sun.com/thread.jsp?forum=11&thread=378356
        // treating as if it were an IOException
        cleanup(curi, e, "readFully", S_CONNECT_LOST);
        return;
    } finally {
        // ensure recording has stopped
        rec.closeRecorders();
        if (!method.isAborted()) {
            method.releaseConnection();
        }
        // Note completion time
        curi.putLong(A_FETCH_COMPLETED_TIME, System.currentTimeMillis());
        // Set the response charset into the HttpRecord if available.
        setCharacterEncoding(rec, method);
        setSizes(curi, rec);
    }

    if (digestContent) {
        curi.setContentDigest(algorithm, rec.getRecordedInput().getDigestValue());
    }
    if (logger.isLoggable(Level.INFO)) {
        logger.info((curi.isPost() ? "POST" : "GET") + " " + curi.getUURI().toString() + " "
                + method.getStatusCode() + " " + rec.getRecordedInput().getSize() + " "
                + curi.getContentType());
    }

    if (curi.isSuccess() && addedCredentials) {
        // Promote the credentials from the CrawlURI to the CrawlServer
        // so they are available for all subsequent CrawlURIs on this
        // server.
        promoteCredentials(curi);
        if (logger.isLoggable(Level.FINE)) {
            // Print out the cookie.  Might help with the debugging.
            Header setCookie = method.getResponseHeader("set-cookie");
            if (setCookie != null) {
                logger.fine(setCookie.toString().trim());
            }
        }
    } else if (method.getStatusCode() == HttpStatus.SC_UNAUTHORIZED) {
        // 401 is not 'success'.
        handle401(method, curi);
    }

    if (rec.getRecordedInput().isOpen()) {
        logger.severe(curi.toString() + " RIS still open. Should have" + " been closed by method release: "
                + Thread.currentThread().getName());
        try {
            rec.getRecordedInput().close();
        } catch (IOException e) {
            logger.log(Level.SEVERE, "second-chance RIS close failed", e);
        }
    }
}

From source file:org.archive.crawler.fetcher.OptimizeFetchHTTP.java

protected void innerProcess(final CrawlURI curi) throws InterruptedException {
    if (!canFetch(curi)) {
        // Cannot fetch this, due to protocol, retries, or other problems
        return;//from   ww w . j  a va  2  s . co  m
    }

    HttpClient http = this.getClient();
    setLocalIP(http);

    this.curisHandled++;

    // Note begin time
    curi.putLong(A_FETCH_BEGAN_TIME, System.currentTimeMillis());

    // Get a reference to the HttpRecorder that is set into this ToeThread.
    HttpRecorder rec = HttpRecorder.getHttpRecorder();

    // Shall we get a digest on the content downloaded?
    boolean digestContent = ((Boolean) getUncheckedAttribute(curi, ATTR_DIGEST_CONTENT)).booleanValue();
    String algorithm = null;
    if (digestContent) {
        algorithm = ((String) getUncheckedAttribute(curi, ATTR_DIGEST_ALGORITHM));
        rec.getRecordedInput().setDigest(algorithm);
    } else {
        // clear
        rec.getRecordedInput().setDigest((MessageDigest) null);
    }

    // Below we do two inner classes that add check of midfetch
    // filters just as we're about to receive the response body.
    String curiString = curi.getUURI().toString();
    HttpMethodBase method = null;
    if (curi.isPost()) {
        method = new HttpRecorderPostMethod(curiString, rec) {
            protected void readResponseBody(HttpState state, HttpConnection conn)
                    throws IOException, HttpException {
                addResponseContent(this, curi);
                if (checkMidfetchAbort(curi, this.httpRecorderMethod, conn)) {
                    doAbort(curi, this, MIDFETCH_ABORT_LOG);
                } else {
                    super.readResponseBody(state, conn);
                }
            }
        };
    } else {
        method = new HttpRecorderGetMethod(curiString, rec) {
            protected void readResponseBody(HttpState state, HttpConnection conn)
                    throws IOException, HttpException {
                addResponseContent(this, curi);
                if (checkMidfetchAbort(curi, this.httpRecorderMethod, conn)) {
                    doAbort(curi, this, MIDFETCH_ABORT_LOG);
                } else {
                    super.readResponseBody(state, conn);
                }
            }
        };
    }

    HostConfiguration customConfigOrNull = configureMethod(curi, method);

    // Set httpRecorder into curi. Subsequent code both here and later
    // in extractors expects to find the HttpRecorder in the CrawlURI.
    curi.setHttpRecorder(rec);

    // Populate credentials. Set config so auth. is not automatic.
    boolean addedCredentials = populateCredentials(curi, method);
    method.setDoAuthentication(addedCredentials);

    // set hardMax on bytes (if set by operator)
    long hardMax = getMaxLength(curi);
    // set overall timeout (if set by operator)
    long timeoutMs = 1000 * getTimeout(curi);
    // Get max fetch rate (bytes/ms). It comes in in KB/sec
    long maxRateKBps = getMaxFetchRate(curi);
    rec.getRecordedInput().setLimits(hardMax, timeoutMs, maxRateKBps);

    try {
        http.executeMethod(customConfigOrNull, method);
    } catch (RecorderTooMuchHeaderException ex) {
        // when too much header material, abort like other truncations
        doAbort(curi, method, HEADER_TRUNC);
    } catch (IOException e) {
        failedExecuteCleanup(method, curi, e);
        return;
    } catch (ArrayIndexOutOfBoundsException e) {
        // For weird windows-only ArrayIndex exceptions in native
        // code... see
        // http://forum.java.sun.com/thread.jsp?forum=11&thread=378356
        // treating as if it were an IOException
        failedExecuteCleanup(method, curi, e);
        return;
    }

    // set softMax on bytes to get (if implied by content-length) 
    long softMax = method.getResponseContentLength();

    try {
        if (!curi.isSeed() && curi.getFetchStatus() == HttpStatus.SC_NOT_MODIFIED) {
            logger.debug(curi.getUURI().toString() + " is not modify");
            curi.skipToProcessorChain(getController().getPostprocessorChain());
        } else if (!method.isAborted()) {
            // Force read-to-end, so that any socket hangs occur here,
            // not in later modules.
            rec.getRecordedInput().readFullyOrUntil(softMax);
        }
    } catch (RecorderTimeoutException ex) {
        doAbort(curi, method, TIMER_TRUNC);
    } catch (RecorderLengthExceededException ex) {
        doAbort(curi, method, LENGTH_TRUNC);
    } catch (IOException e) {
        cleanup(curi, e, "readFully", S_CONNECT_LOST);
        return;
    } catch (ArrayIndexOutOfBoundsException e) {
        // For weird windows-only ArrayIndex exceptions from native code
        // see http://forum.java.sun.com/thread.jsp?forum=11&thread=378356
        // treating as if it were an IOException
        cleanup(curi, e, "readFully", S_CONNECT_LOST);
        return;
    } finally {
        // ensure recording has stopped
        rec.closeRecorders();
        logger.debug("cloase backup file.&uri= " + curi.getCrawlURIString());
        if (!method.isAborted()) {
            method.releaseConnection();
        }
        // Note completion time
        curi.putLong(A_FETCH_COMPLETED_TIME, System.currentTimeMillis());
        // Set the response charset into the HttpRecord if available.
        setCharacterEncoding(rec, method);
        setSizes(curi, rec);
    }

    if (digestContent) {
        curi.setContentDigest(algorithm, rec.getRecordedInput().getDigestValue());
    }

    logger.info((curi.isPost() ? "POST" : "GET") + " " + curi.getUURI().toString() + " "
            + method.getStatusCode() + " " + rec.getRecordedInput().getSize() + " " + curi.getContentType());

    if (curi.isSuccess() && addedCredentials) {
        // Promote the credentials from the CrawlURI to the CrawlServer
        // so they are available for all subsequent CrawlURIs on this
        // server.
        promoteCredentials(curi);
        if (logger.isDebugEnabled()) {
            // Print out the cookie.  Might help with the debugging.
            Header setCookie = method.getResponseHeader("set-cookie");
            if (setCookie != null) {
                logger.debug(setCookie.toString().trim());
            }
        }
    } else if (method.getStatusCode() == HttpStatus.SC_UNAUTHORIZED) {
        // 401 is not 'success'.
        handle401(method, curi);
    }

    if (rec.getRecordedInput().isOpen()) {
        logger.error(curi.toString() + " RIS still open. Should have" + " been closed by method release: "
                + Thread.currentThread().getName());
        try {
            rec.getRecordedInput().close();
        } catch (IOException e) {
            logger.error("second-chance RIS close failed", e);
        }
    }
}