List of usage examples for org.apache.hadoop.record Buffer set
public void set(byte[] bytes)
From source file:org.commoncrawl.service.crawler.CrawlTarget.java
License:Open Source License
public void fetchSucceeded(NIOHttpConnection connection, NIOHttpHeaders httpHeaders, NIOBufferList nioContentBuffer) { boolean failure = false; int failureReason = CrawlURL.FailureReason.UNKNOWN; Exception failureException = null; String failureDescription = ""; // revalidate ip address here ... if (getRedirectCount() == 0) { // check to see if ip address go reresolved ... if (connection.getResolvedAddress() != null) { InetAddress address = connection.getResolvedAddress(); int ipAddress = 0; if (address.getAddress() != null) { // if so, update url data information ... ipAddress = IPAddressUtils.IPV4AddressToInteger(address.getAddress()); } else { LOG.error("### BUG int Address getAddress returned Null for target:" + getActiveURL()); }//from www. j a v a 2 s. com // LOG.info("IP Address for URL:" + getActiveURL() + " is:" + ipAddress // + " ttl is:" + connection.getResolvedAddressTTL()); setServerIP(ipAddress); setServerIPTTL(connection.getResolvedAddressTTL()); } } Buffer contentBuffer = new Buffer(); byte data[] = new byte[nioContentBuffer.available()]; int responseCode = -1; try { responseCode = NIOHttpConnection.getHttpResponseCode(httpHeaders); if (!isAcceptableSuccessResponseCode(responseCode)) { failure = true; failureReason = CrawlURL.FailureReason.InvalidResponseCode; failureDescription = "URL:" + getOriginalURL() + " returned invalid responseCode:" + responseCode; } } catch (Exception e) { failure = true; failureReason = CrawlURL.FailureReason.RuntimeError; failureException = e; failureDescription = "getHTTPResponse Threw:" + StringUtils.stringifyException(e) + " for URL:" + getOriginalURL(); } if (!failure) { // populate a conventional buffer object with content data ... try { // read data from nio buffer into byte array nioContentBuffer.read(data); // and reset source buffer .... (releasing memory )... nioContentBuffer.reset(); // set byte buffer into buffer object ... contentBuffer.set(data); } catch (IOException e) { failure = true; failureReason = CrawlURL.FailureReason.IOException; failureException = e; failureDescription = "Unable to read Content Buffer from successfull Fetch for URL:" + getOriginalURL(); } } if (!failure) { // populate crawl url data _activeRequestHeaders = httpHeaders.toString(); _activeRequestResultCode = (short) NIOHttpConnection.getHttpResponseCode(httpHeaders); ; } if (failure) { if (failureException != null) { if (Environment.detailLogEnabled()) LOG.error(StringUtils.stringifyException(failureException)); } fetchFailed(failureReason, failureDescription); } else { // call host ... _sourceList.fetchSucceeded(this, connection.getDownloadTime(), httpHeaders, contentBuffer); // Add to CrawlLog for both content gets and robots gets // create a crawl url object CrawlURL urlData = createCrawlURLObject(CrawlURL.CrawlResult.SUCCESS, contentBuffer); // set truncation flag if content truncation during download if (connection.isContentTruncated()) { urlData.setFlags(urlData.getFlags() | CrawlURL.Flags.TruncatedDuringDownload); } // and update segment progress logs ... getEngine().crawlComplete(connection, urlData, this, true); /* * if ((getFlags() & CrawlURL.Flags.IsRobotsURL) != 0) { * getEngine().logSuccessfulRobotsGET(connection, this); } */ } }