List of usage examples for org.apache.hadoop.record Buffer Buffer
public Buffer()
From source file:com.jfolson.hive.serde.RTypedBytesInput.java
License:Apache License
/** * Reads the raw bytes following a <code>Type.VECTOR</code> code. * /*from ww w.j a v a2 s . co m*/ * @return the obtained bytes sequence * @throws IOException */ public byte[] readRawVector() throws IOException { Buffer buffer = new Buffer(); int length = readVectorHeader(); buffer.append(new byte[] { (byte) RType.VECTOR.code, (byte) (0xff & (length >> 24)), (byte) (0xff & (length >> 16)), (byte) (0xff & (length >> 8)), (byte) (0xff & length) }); for (int i = 0; i < length; i++) { buffer.append(readRaw()); } return buffer.get(); }
From source file:com.jfolson.hive.serde.RTypedBytesInput.java
License:Apache License
/** * Reads the raw bytes following a <code>Type.MAP</code> code. * /*from w w w . j ava 2 s . c o m*/ * @return the obtained bytes sequence * @throws IOException */ public byte[] readRawMap() throws IOException { Buffer buffer = new Buffer(); int length = readMapHeader(); buffer.append(new byte[] { (byte) RType.MAP.code, (byte) (0xff & (length >> 24)), (byte) (0xff & (length >> 16)), (byte) (0xff & (length >> 8)), (byte) (0xff & length) }); for (int i = 0; i < length; i++) { buffer.append(readRaw()); buffer.append(readRaw()); } return buffer.get(); }
From source file:org.commoncrawl.service.crawler.CrawlTarget.java
License:Open Source License
public void fetchSucceeded(NIOHttpConnection connection, NIOHttpHeaders httpHeaders, NIOBufferList nioContentBuffer) { boolean failure = false; int failureReason = CrawlURL.FailureReason.UNKNOWN; Exception failureException = null; String failureDescription = ""; // revalidate ip address here ... if (getRedirectCount() == 0) { // check to see if ip address go reresolved ... if (connection.getResolvedAddress() != null) { InetAddress address = connection.getResolvedAddress(); int ipAddress = 0; if (address.getAddress() != null) { // if so, update url data information ... ipAddress = IPAddressUtils.IPV4AddressToInteger(address.getAddress()); } else { LOG.error("### BUG int Address getAddress returned Null for target:" + getActiveURL()); }/*from w w w . ja v a 2s . c o m*/ // LOG.info("IP Address for URL:" + getActiveURL() + " is:" + ipAddress // + " ttl is:" + connection.getResolvedAddressTTL()); setServerIP(ipAddress); setServerIPTTL(connection.getResolvedAddressTTL()); } } Buffer contentBuffer = new Buffer(); byte data[] = new byte[nioContentBuffer.available()]; int responseCode = -1; try { responseCode = NIOHttpConnection.getHttpResponseCode(httpHeaders); if (!isAcceptableSuccessResponseCode(responseCode)) { failure = true; failureReason = CrawlURL.FailureReason.InvalidResponseCode; failureDescription = "URL:" + getOriginalURL() + " returned invalid responseCode:" + responseCode; } } catch (Exception e) { failure = true; failureReason = CrawlURL.FailureReason.RuntimeError; failureException = e; failureDescription = "getHTTPResponse Threw:" + StringUtils.stringifyException(e) + " for URL:" + getOriginalURL(); } if (!failure) { // populate a conventional buffer object with content data ... try { // read data from nio buffer into byte array nioContentBuffer.read(data); // and reset source buffer .... (releasing memory )... nioContentBuffer.reset(); // set byte buffer into buffer object ... contentBuffer.set(data); } catch (IOException e) { failure = true; failureReason = CrawlURL.FailureReason.IOException; failureException = e; failureDescription = "Unable to read Content Buffer from successfull Fetch for URL:" + getOriginalURL(); } } if (!failure) { // populate crawl url data _activeRequestHeaders = httpHeaders.toString(); _activeRequestResultCode = (short) NIOHttpConnection.getHttpResponseCode(httpHeaders); ; } if (failure) { if (failureException != null) { if (Environment.detailLogEnabled()) LOG.error(StringUtils.stringifyException(failureException)); } fetchFailed(failureReason, failureDescription); } else { // call host ... _sourceList.fetchSucceeded(this, connection.getDownloadTime(), httpHeaders, contentBuffer); // Add to CrawlLog for both content gets and robots gets // create a crawl url object CrawlURL urlData = createCrawlURLObject(CrawlURL.CrawlResult.SUCCESS, contentBuffer); // set truncation flag if content truncation during download if (connection.isContentTruncated()) { urlData.setFlags(urlData.getFlags() | CrawlURL.Flags.TruncatedDuringDownload); } // and update segment progress logs ... getEngine().crawlComplete(connection, urlData, this, true); /* * if ((getFlags() & CrawlURL.Flags.IsRobotsURL) != 0) { * getEngine().logSuccessfulRobotsGET(connection, this); } */ } }
From source file:org.commoncrawl.util.TimeSeriesDataFile.java
License:Open Source License
private void doCommonRead(ArrayList<KeyValueTuple<Long, ValueType>> valuesOut, RandomAccessFile file, long headerOffset, long endOfPrevRecord, int currentRecordLength, int recordsToRead, long optionalMinKeyValue) throws IOException { Buffer recordBuffer = new Buffer(); DataInputBuffer inputBuffer = new DataInputBuffer(); // ok start walking backwards ... while (recordsToRead != 0) { // setup new previous record pos pointer endOfPrevRecord = endOfPrevRecord - currentRecordLength - 4; // and seek to it endOfLastRecord - 4 file.seek(endOfPrevRecord - 4);// ww w .j a v a 2 s . c o m recordBuffer.setCapacity(currentRecordLength + 8); // read in proper amount of data ... file.read(recordBuffer.get(), 0, currentRecordLength + 8); // ok initialize input buffer ... inputBuffer.reset(recordBuffer.get(), currentRecordLength + 8); // now read next record length first ... int nextRecordLength = inputBuffer.readInt(); // next read sync bytes ... int syncBytes = inputBuffer.readInt(); // validate if (syncBytes != SyncBytes) { throw new IOException("Corrupt Record Detected!"); } // ok read real record bytes ... int realRecordBytes = inputBuffer.readInt(); // read crc ... long crcValue = inputBuffer.readLong(); // ok validate crc ... crc.reset(); crc.update(inputBuffer.getData(), inputBuffer.getPosition(), realRecordBytes - 8); if (crcValue != crc.getValue()) { throw new IOException("CRC Mismatch!"); } // ok now read key and value try { long key = WritableUtils.readVLong(inputBuffer); if (optionalMinKeyValue != -1 && key < optionalMinKeyValue) { break; } ValueType value = (ValueType) valueClass.newInstance(); value.readFields(inputBuffer); KeyValueTuple tuple = new KeyValueTuple<Long, ValueType>(key, value); tuple.recordPos = endOfPrevRecord; valuesOut.add(0, tuple); } catch (Exception e) { throw new IOException(e); } currentRecordLength = nextRecordLength; recordsToRead--; if (endOfPrevRecord == headerOffset) break; } }
From source file:org.commoncrawl.util.TimeSeriesDataFile.java
License:Open Source License
/** * get the key value of the last record in the file * @return record key as a long or -1 if zero records in file * @throws IOException//from w w w .ja v a 2s. com */ public synchronized long getLastRecordKey() throws IOException { LogFileHeader header = new LogFileHeader(); if (fileName.exists()) { RandomAccessFile file = new RandomAccessFile(fileName, "r"); Buffer recordBuffer = new Buffer(); DataInputBuffer inputBuffer = new DataInputBuffer(); try { //read header ... long headerOffset = readLogFileHeader(file, header); return header._lastRecordKey; } finally { if (file != null) { file.close(); } } } return -1; }
From source file:org.commoncrawl.util.TimeSeriesDataFile.java
License:Open Source License
/** * get the number of records in the file * /* w ww.j av a 2 s .c om*/ * @return record count in file * @throws IOException */ public synchronized int getRecordCount() throws IOException { LogFileHeader header = new LogFileHeader(); if (fileName.exists()) { RandomAccessFile file = new RandomAccessFile(fileName, "r"); Buffer recordBuffer = new Buffer(); DataInputBuffer inputBuffer = new DataInputBuffer(); try { //read header ... long headerOffset = readLogFileHeader(file, header); return header._itemCount; } finally { if (file != null) { file.close(); } } } return 0; }