Example usage for org.apache.hadoop.record Buffer set

List of usage examples for org.apache.hadoop.record Buffer set

Introduction

In this page you can find the example usage for org.apache.hadoop.record Buffer set.

Prototype

public void set(byte[] bytes) 

Source Link

Document

Use the specified bytes array as underlying sequence.

Usage

From source file:org.commoncrawl.service.crawler.CrawlTarget.java

License:Open Source License

public void fetchSucceeded(NIOHttpConnection connection, NIOHttpHeaders httpHeaders,
        NIOBufferList nioContentBuffer) {

    boolean failure = false;
    int failureReason = CrawlURL.FailureReason.UNKNOWN;
    Exception failureException = null;
    String failureDescription = "";

    // revalidate ip address here ...
    if (getRedirectCount() == 0) {
        // check to see if ip address go reresolved ...
        if (connection.getResolvedAddress() != null) {

            InetAddress address = connection.getResolvedAddress();

            int ipAddress = 0;

            if (address.getAddress() != null) {
                // if so, update url data information ...
                ipAddress = IPAddressUtils.IPV4AddressToInteger(address.getAddress());
            } else {
                LOG.error("### BUG int Address getAddress returned Null for target:" + getActiveURL());
            }//from  www. j a v  a  2 s. com

            // LOG.info("IP Address for URL:" + getActiveURL() + " is:" + ipAddress
            // + " ttl is:" + connection.getResolvedAddressTTL());
            setServerIP(ipAddress);
            setServerIPTTL(connection.getResolvedAddressTTL());
        }
    }

    Buffer contentBuffer = new Buffer();
    byte data[] = new byte[nioContentBuffer.available()];

    int responseCode = -1;

    try {
        responseCode = NIOHttpConnection.getHttpResponseCode(httpHeaders);

        if (!isAcceptableSuccessResponseCode(responseCode)) {
            failure = true;
            failureReason = CrawlURL.FailureReason.InvalidResponseCode;
            failureDescription = "URL:" + getOriginalURL() + " returned invalid responseCode:" + responseCode;
        }
    } catch (Exception e) {
        failure = true;
        failureReason = CrawlURL.FailureReason.RuntimeError;
        failureException = e;
        failureDescription = "getHTTPResponse Threw:" + StringUtils.stringifyException(e) + " for URL:"
                + getOriginalURL();
    }

    if (!failure) {
        // populate a conventional buffer object with content data ...

        try {
            // read data from nio buffer into byte array
            nioContentBuffer.read(data);
            // and reset source buffer .... (releasing memory )...
            nioContentBuffer.reset();
            // set byte buffer into buffer object ...
            contentBuffer.set(data);

        } catch (IOException e) {

            failure = true;
            failureReason = CrawlURL.FailureReason.IOException;
            failureException = e;
            failureDescription = "Unable to read Content Buffer from successfull Fetch for URL:"
                    + getOriginalURL();
        }
    }

    if (!failure) {
        // populate crawl url data
        _activeRequestHeaders = httpHeaders.toString();
        _activeRequestResultCode = (short) NIOHttpConnection.getHttpResponseCode(httpHeaders);
        ;
    }

    if (failure) {
        if (failureException != null) {
            if (Environment.detailLogEnabled())
                LOG.error(StringUtils.stringifyException(failureException));
        }
        fetchFailed(failureReason, failureDescription);
    } else {

        // call host ...
        _sourceList.fetchSucceeded(this, connection.getDownloadTime(), httpHeaders, contentBuffer);

        // Add to CrawlLog for both content gets and robots gets
        // create a crawl url object
        CrawlURL urlData = createCrawlURLObject(CrawlURL.CrawlResult.SUCCESS, contentBuffer);
        // set truncation flag if content truncation during download
        if (connection.isContentTruncated()) {
            urlData.setFlags(urlData.getFlags() | CrawlURL.Flags.TruncatedDuringDownload);
        }
        // and update segment progress logs ...
        getEngine().crawlComplete(connection, urlData, this, true);

        /*
         * if ((getFlags() & CrawlURL.Flags.IsRobotsURL) != 0) {
         * getEngine().logSuccessfulRobotsGET(connection, this); }
         */
    }
}