Example usage for org.apache.hadoop.record Buffer Buffer

List of usage examples for org.apache.hadoop.record Buffer Buffer

Introduction

In this page you can find the example usage for org.apache.hadoop.record Buffer Buffer.

Prototype

public Buffer() 

Source Link

Document

Create a zero-count sequence.

Usage

From source file:com.jfolson.hive.serde.RTypedBytesInput.java

License:Apache License

/**
 * Reads the raw bytes following a <code>Type.VECTOR</code> code.
 * /*from   ww w.j a  v  a2  s .  co  m*/
 * @return the obtained bytes sequence
 * @throws IOException
 */
public byte[] readRawVector() throws IOException {
    Buffer buffer = new Buffer();
    int length = readVectorHeader();
    buffer.append(new byte[] { (byte) RType.VECTOR.code, (byte) (0xff & (length >> 24)),
            (byte) (0xff & (length >> 16)), (byte) (0xff & (length >> 8)), (byte) (0xff & length) });
    for (int i = 0; i < length; i++) {
        buffer.append(readRaw());
    }
    return buffer.get();
}

From source file:com.jfolson.hive.serde.RTypedBytesInput.java

License:Apache License

/**
 * Reads the raw bytes following a <code>Type.MAP</code> code.
 * /*from   w w w  . j  ava  2 s  . c o m*/
 * @return the obtained bytes sequence
 * @throws IOException
 */
public byte[] readRawMap() throws IOException {
    Buffer buffer = new Buffer();
    int length = readMapHeader();
    buffer.append(new byte[] { (byte) RType.MAP.code, (byte) (0xff & (length >> 24)),
            (byte) (0xff & (length >> 16)), (byte) (0xff & (length >> 8)), (byte) (0xff & length) });
    for (int i = 0; i < length; i++) {
        buffer.append(readRaw());
        buffer.append(readRaw());
    }
    return buffer.get();
}

From source file:org.commoncrawl.service.crawler.CrawlTarget.java

License:Open Source License

public void fetchSucceeded(NIOHttpConnection connection, NIOHttpHeaders httpHeaders,
        NIOBufferList nioContentBuffer) {

    boolean failure = false;
    int failureReason = CrawlURL.FailureReason.UNKNOWN;
    Exception failureException = null;
    String failureDescription = "";

    // revalidate ip address here ...
    if (getRedirectCount() == 0) {
        // check to see if ip address go reresolved ...
        if (connection.getResolvedAddress() != null) {

            InetAddress address = connection.getResolvedAddress();

            int ipAddress = 0;

            if (address.getAddress() != null) {
                // if so, update url data information ...
                ipAddress = IPAddressUtils.IPV4AddressToInteger(address.getAddress());
            } else {
                LOG.error("### BUG int Address getAddress returned Null for target:" + getActiveURL());
            }/*from  w  w w . ja v  a 2s  .  c  o m*/

            // LOG.info("IP Address for URL:" + getActiveURL() + " is:" + ipAddress
            // + " ttl is:" + connection.getResolvedAddressTTL());
            setServerIP(ipAddress);
            setServerIPTTL(connection.getResolvedAddressTTL());
        }
    }

    Buffer contentBuffer = new Buffer();
    byte data[] = new byte[nioContentBuffer.available()];

    int responseCode = -1;

    try {
        responseCode = NIOHttpConnection.getHttpResponseCode(httpHeaders);

        if (!isAcceptableSuccessResponseCode(responseCode)) {
            failure = true;
            failureReason = CrawlURL.FailureReason.InvalidResponseCode;
            failureDescription = "URL:" + getOriginalURL() + " returned invalid responseCode:" + responseCode;
        }
    } catch (Exception e) {
        failure = true;
        failureReason = CrawlURL.FailureReason.RuntimeError;
        failureException = e;
        failureDescription = "getHTTPResponse Threw:" + StringUtils.stringifyException(e) + " for URL:"
                + getOriginalURL();
    }

    if (!failure) {
        // populate a conventional buffer object with content data ...

        try {
            // read data from nio buffer into byte array
            nioContentBuffer.read(data);
            // and reset source buffer .... (releasing memory )...
            nioContentBuffer.reset();
            // set byte buffer into buffer object ...
            contentBuffer.set(data);

        } catch (IOException e) {

            failure = true;
            failureReason = CrawlURL.FailureReason.IOException;
            failureException = e;
            failureDescription = "Unable to read Content Buffer from successfull Fetch for URL:"
                    + getOriginalURL();
        }
    }

    if (!failure) {
        // populate crawl url data
        _activeRequestHeaders = httpHeaders.toString();
        _activeRequestResultCode = (short) NIOHttpConnection.getHttpResponseCode(httpHeaders);
        ;
    }

    if (failure) {
        if (failureException != null) {
            if (Environment.detailLogEnabled())
                LOG.error(StringUtils.stringifyException(failureException));
        }
        fetchFailed(failureReason, failureDescription);
    } else {

        // call host ...
        _sourceList.fetchSucceeded(this, connection.getDownloadTime(), httpHeaders, contentBuffer);

        // Add to CrawlLog for both content gets and robots gets
        // create a crawl url object
        CrawlURL urlData = createCrawlURLObject(CrawlURL.CrawlResult.SUCCESS, contentBuffer);
        // set truncation flag if content truncation during download
        if (connection.isContentTruncated()) {
            urlData.setFlags(urlData.getFlags() | CrawlURL.Flags.TruncatedDuringDownload);
        }
        // and update segment progress logs ...
        getEngine().crawlComplete(connection, urlData, this, true);

        /*
         * if ((getFlags() & CrawlURL.Flags.IsRobotsURL) != 0) {
         * getEngine().logSuccessfulRobotsGET(connection, this); }
         */
    }
}

From source file:org.commoncrawl.util.TimeSeriesDataFile.java

License:Open Source License

private void doCommonRead(ArrayList<KeyValueTuple<Long, ValueType>> valuesOut, RandomAccessFile file,
        long headerOffset, long endOfPrevRecord, int currentRecordLength, int recordsToRead,
        long optionalMinKeyValue) throws IOException {

    Buffer recordBuffer = new Buffer();
    DataInputBuffer inputBuffer = new DataInputBuffer();

    // ok start walking backwards ... 
    while (recordsToRead != 0) {
        // setup new previous record pos pointer  
        endOfPrevRecord = endOfPrevRecord - currentRecordLength - 4;
        // and seek to it endOfLastRecord - 4
        file.seek(endOfPrevRecord - 4);// ww  w .j a  v  a  2  s  .  c o  m

        recordBuffer.setCapacity(currentRecordLength + 8);
        // read in proper amount of data ...
        file.read(recordBuffer.get(), 0, currentRecordLength + 8);
        // ok initialize input buffer ... 
        inputBuffer.reset(recordBuffer.get(), currentRecordLength + 8);
        // now read next record length first ... 
        int nextRecordLength = inputBuffer.readInt();
        // next read sync bytes ... 
        int syncBytes = inputBuffer.readInt();
        // validate 
        if (syncBytes != SyncBytes) {
            throw new IOException("Corrupt Record Detected!");
        }
        // ok read real record bytes ... 
        int realRecordBytes = inputBuffer.readInt();
        // read crc ... 
        long crcValue = inputBuffer.readLong();
        // ok validate crc ...  
        crc.reset();
        crc.update(inputBuffer.getData(), inputBuffer.getPosition(), realRecordBytes - 8);
        if (crcValue != crc.getValue()) {
            throw new IOException("CRC Mismatch!");
        }
        // ok now read key and value 
        try {
            long key = WritableUtils.readVLong(inputBuffer);

            if (optionalMinKeyValue != -1 && key < optionalMinKeyValue) {
                break;
            }

            ValueType value = (ValueType) valueClass.newInstance();
            value.readFields(inputBuffer);
            KeyValueTuple tuple = new KeyValueTuple<Long, ValueType>(key, value);
            tuple.recordPos = endOfPrevRecord;
            valuesOut.add(0, tuple);

        } catch (Exception e) {
            throw new IOException(e);
        }

        currentRecordLength = nextRecordLength;

        recordsToRead--;

        if (endOfPrevRecord == headerOffset)
            break;
    }
}

From source file:org.commoncrawl.util.TimeSeriesDataFile.java

License:Open Source License

/**
 * get the key value of the last record in the file 
 * @return record key as a long or -1 if zero records in file 
 * @throws IOException//from  w  w  w  .ja v a 2s.  com
 */
public synchronized long getLastRecordKey() throws IOException {
    LogFileHeader header = new LogFileHeader();

    if (fileName.exists()) {

        RandomAccessFile file = new RandomAccessFile(fileName, "r");

        Buffer recordBuffer = new Buffer();
        DataInputBuffer inputBuffer = new DataInputBuffer();
        try {

            //read header ... 
            long headerOffset = readLogFileHeader(file, header);

            return header._lastRecordKey;
        } finally {
            if (file != null) {
                file.close();
            }
        }
    }
    return -1;
}

From source file:org.commoncrawl.util.TimeSeriesDataFile.java

License:Open Source License

/**
 * get the number of records in the file 
 * /*  w  ww.j av  a 2  s .c  om*/
 * @return record count in file 
 * @throws IOException
 */
public synchronized int getRecordCount() throws IOException {

    LogFileHeader header = new LogFileHeader();

    if (fileName.exists()) {

        RandomAccessFile file = new RandomAccessFile(fileName, "r");

        Buffer recordBuffer = new Buffer();
        DataInputBuffer inputBuffer = new DataInputBuffer();
        try {

            //read header ... 
            long headerOffset = readLogFileHeader(file, header);

            return header._itemCount;
        } finally {
            if (file != null) {
                file.close();
            }
        }
    }
    return 0;
}