Example usage for org.apache.hadoop.record Buffer Buffer

List of usage examples for org.apache.hadoop.record Buffer Buffer

Introduction

In this page you can find the example usage for org.apache.hadoop.record Buffer Buffer.

Prototype

public Buffer(byte[] bytes, int offset, int length) 

Source Link

Document

Create a Buffer using the byte range as the initial value.

Usage

From source file:com.jfolson.hive.serde.RBaseSerDe.java

License:Apache License

protected void serializeField(Object o, ObjectInspector oi, Object reuse) throws IOException {
    //LOG.info("Serializing hive type: "+oi.getTypeName());
    //LOG.info("Serializing category: "+oi.getCategory().toString());
    if (o == null) {
        tbOut.writeNull();/*from ww  w .ja v  a 2 s  . c o m*/
        return;
    }
    switch (oi.getCategory()) {
    case PRIMITIVE: {
        PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
        //LOG.info("Serializing primitive: "+poi.getPrimitiveCategory().toString());
        switch (poi.getPrimitiveCategory()) {
        case VOID: {
            return;
        }
        case BINARY: {
            BinaryObjectInspector boi = (BinaryObjectInspector) poi;
            TypedBytesWritable bytes = reuse == null ? new TypedBytesWritable() : (TypedBytesWritable) reuse;
            BytesWritable bytesWrite = boi.getPrimitiveWritableObject(o);
            if (bytesWrite != null) {
                bytes.set(bytesWrite);
                if (!RType.isValid(bytes)) {
                    LOG.error("Invalid typedbytes detected with type: " + RType.getType(bytes).code);
                    bytes.setValue(new Buffer(bytesWrite.getBytes(), 0, bytesWrite.getLength()));
                }
                //LOG.info("Writing binary primitive with class: "+bytes.getClass().getName());
                tbOut.write(bytes);
            }

            return;
        }
        case BOOLEAN: {
            BooleanObjectInspector boi = (BooleanObjectInspector) poi;
            BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse;
            r.set(boi.get(o));
            tbOut.write(r);
            return;
        }
        case BYTE: {
            ByteObjectInspector boi = (ByteObjectInspector) poi;
            ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse;
            r.set(boi.get(o));
            tbOut.write(r);
            return;
        }
        case SHORT: {
            ShortObjectInspector spoi = (ShortObjectInspector) poi;
            ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse;
            r.set(spoi.get(o));
            tbOut.write(r);
            return;
        }
        case INT: {
            IntObjectInspector ioi = (IntObjectInspector) poi;
            IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
            r.set(ioi.get(o));
            tbOut.write(r);
            return;
        }
        case LONG: {
            LongObjectInspector loi = (LongObjectInspector) poi;
            LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse;
            r.set(loi.get(o));
            tbOut.write(r);
            return;
        }
        case FLOAT: {
            FloatObjectInspector foi = (FloatObjectInspector) poi;
            FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse;
            r.set(foi.get(o));
            tbOut.write(r);
            return;
        }
        case DOUBLE:
            DoubleObjectInspector doi = (DoubleObjectInspector) poi;
            DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse;
            r.set(doi.get(o));
            tbOut.write(r);
            return;
        case STRING: {
            StringObjectInspector soi = (StringObjectInspector) poi;
            Text t = soi.getPrimitiveWritableObject(o);
            tbOut.write(t);
            return;
        }
        default: {
            throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
        }
        }
    }
    case LIST: {
        ListObjectInspector loi = (ListObjectInspector) oi;
        ObjectInspector elemOI = loi.getListElementObjectInspector();
        List l = loi.getList(o);
        // Don't use array (typecode: 144) until everything supports NA values in typedbytes
        if (false) {//(elemOI.getCategory()==ObjectInspector.Category.PRIMITIVE){
            tbOut.writeArray(l, (PrimitiveObjectInspector) elemOI);
        } else {
            tbOut.writeVector(l, (PrimitiveObjectInspector) elemOI);
        }
        return;
    }
    case MAP:
    case STRUCT: {
        // For complex object, serialize to JSON format
        String s = SerDeUtils.getJSONString(o, oi);
        Text t = reuse == null ? new Text() : (Text) reuse;

        // convert to Text and write it
        t.set(s);
        tbOut.write(t);
        return;
    }
    default: {
        throw new RuntimeException("Unrecognized type: " + oi.getCategory());
    }
    }
}

From source file:com.jfolson.hive.serde.RTypedBytesOutput.java

License:Apache License

public void writeTypedBytes(TypedBytesWritable tb) throws IOException {
    //LOG.info("Writing as typedbytes bytes");
    writeRaw(new Buffer(tb.getBytes(), 0, tb.getLength()).get());
}

From source file:com.jfolson.hive.serde.RTypedBytesSerDe.java

License:Apache License

private void serializeField(Object o, ObjectInspector oi, Object reuse) throws IOException {
    //LOG.info("Serializing hive type: "+oi.getTypeName());
    //LOG.info("Serializing category: "+oi.getCategory().toString());
    if (o == null) {
        tbOut.writeNull();/*from  w  w  w  . j  av a2 s .com*/
        return;
    }
    switch (oi.getCategory()) {
    case PRIMITIVE: {
        PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
        //LOG.info("Serializing primitive: "+poi.getPrimitiveCategory().toString());
        switch (poi.getPrimitiveCategory()) {
        case VOID: {
            return;
        }
        case BINARY: {
            BinaryObjectInspector boi = (BinaryObjectInspector) poi;
            TypedBytesWritable bytes = reuse == null ? new TypedBytesWritable() : (TypedBytesWritable) reuse;
            BytesWritable bytesWrite = boi.getPrimitiveWritableObject(o);
            if (bytesWrite != null) {
                bytes.set(bytesWrite);
                if (!RType.isValid(bytes)) {
                    LOG.error("Invalid typedbytes detected with type: " + RType.getType(bytes).code);
                    bytes.setValue(new Buffer(bytesWrite.getBytes(), 0, bytesWrite.getLength()));
                }
                //LOG.info("Writing binary primitive with class: "+bytes.getClass().getName());
                tbOut.write(bytes);
            }

            return;
        }
        case BOOLEAN: {
            BooleanObjectInspector boi = (BooleanObjectInspector) poi;
            BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse;
            r.set(boi.get(o));
            tbOut.write(r);
            return;
        }
        case BYTE: {
            ByteObjectInspector boi = (ByteObjectInspector) poi;
            ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse;
            r.set(boi.get(o));
            tbOut.write(r);
            return;
        }
        case SHORT: {
            ShortObjectInspector spoi = (ShortObjectInspector) poi;
            ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse;
            r.set(spoi.get(o));
            tbOut.write(r);
            return;
        }
        case INT: {
            IntObjectInspector ioi = (IntObjectInspector) poi;
            IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
            r.set(ioi.get(o));
            tbOut.write(r);
            return;
        }
        case LONG: {
            LongObjectInspector loi = (LongObjectInspector) poi;
            LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse;
            r.set(loi.get(o));
            tbOut.write(r);
            return;
        }
        case FLOAT: {
            FloatObjectInspector foi = (FloatObjectInspector) poi;
            FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse;
            r.set(foi.get(o));
            tbOut.write(r);
            return;
        }
        case DOUBLE:
            DoubleObjectInspector doi = (DoubleObjectInspector) poi;
            DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse;
            r.set(doi.get(o));
            tbOut.write(r);
            return;
        case STRING: {
            StringObjectInspector soi = (StringObjectInspector) poi;
            Text t = soi.getPrimitiveWritableObject(o);
            tbOut.write(t);
            return;
        }
        default: {
            throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
        }
        }
    }
    case LIST: {
        ListObjectInspector loi = (ListObjectInspector) oi;
        ObjectInspector elemOI = loi.getListElementObjectInspector();
        List l = loi.getList(o);
        if (false) {//(elemOI.getCategory()==ObjectInspector.Category.PRIMITIVE){
            tbOut.writeArray(l, (PrimitiveObjectInspector) elemOI);
        } else {
            tbOut.writeVector(l, (PrimitiveObjectInspector) elemOI);
        }
        return;
    }
    case MAP:
    case STRUCT: {
        // For complex object, serialize to JSON format
        String s = SerDeUtils.getJSONString(o, oi);
        Text t = reuse == null ? new Text() : (Text) reuse;

        // convert to Text and write it
        t.set(s);
        tbOut.write(t);
        return;
    }
    default: {
        throw new RuntimeException("Unrecognized type: " + oi.getCategory());
    }
    }
}

From source file:org.commoncrawl.service.crawler.CrawlSegmentLog.java

License:Open Source License

/** sync the incoming segment against the local crawl log and then send it up to the history server **/
public int syncToLog(CrawlSegmentFPMap segmentDetail) throws IOException {
    if (Environment.detailLogEnabled())
        LOG.info("### SYNC: List:" + _listId + " Segment:" + _segmentId + " Syncing Progress Log");

    int itemsProcessed = 0;

    // and construct a path to the local crawl segment directory ... 
    File activeLogPath = buildActivePath(_rootDataDir, _listId, _segmentId);
    File checkpointLogPath = buildCheckpointPath(_rootDataDir, _listId, _segmentId);

    // check if it exists ... 
    if (checkpointLogPath.exists()) {
        // log it ... 
        if (Environment.detailLogEnabled())
            LOG.info("### SYNC: List:" + _listId + " Segment:" + _segmentId + " Checkpoint Log Found");
        // rename it as the active log ... 
        checkpointLogPath.renameTo(activeLogPath);
    }/*from   w w w. j a va 2 s .c om*/

    if (activeLogPath.exists()) {
        // reconcile against active log (if it exists) ...
        _localLogItemCount = reconcileLogFile(FileSystem.getLocal(CrawlEnvironment.getHadoopConfig()),
                new Path(activeLogPath.getAbsolutePath()), _listId, _segmentId, segmentDetail, null);
        if (Environment.detailLogEnabled())
            LOG.info("### SYNC: List:" + _listId + " Segment:" + _segmentId
                    + " Reconciled Local Log File with ProcessedItemCount:" + _localLogItemCount);
        itemsProcessed += _localLogItemCount;
    }

    FileSystem hdfs = CrawlEnvironment.getDefaultFileSystem();

    // first things first ... check to see if special completion log file exists in hdfs 
    Path hdfsSegmentCompletionLogPath = new Path(
            CrawlEnvironment.getCrawlSegmentDataDirectory() + "/" + getListId() + "/" + getSegmentId() + "/"
                    + CrawlEnvironment.buildCrawlSegmentCompletionLogFileName(getNodeName()));

    if (hdfs.exists(hdfsSegmentCompletionLogPath)) {
        if (Environment.detailLogEnabled())
            LOG.info("### SYNC: List:" + _listId + " Segment:" + _segmentId
                    + " Completion File Found. Marking Segment Complete");
        // if the file exists then this segment has been crawled and uploaded already ... 
        // if active log file exists ... delete it ... 
        if (activeLogPath.exists())
            activeLogPath.delete();
        //reset local log item count ... 
        _localLogItemCount = 0;
        itemsProcessed = -1;

        // remove all hosts from segment
        segmentDetail._urlsComplete = segmentDetail._urlCount;
    } else {

        if (segmentDetail != null) {
            if (Environment.detailLogEnabled())
                LOG.info("### SYNC: Building BulkItem History Query for List:" + _listId + " Segment:"
                        + _segmentId);
            BulkItemHistoryQuery query = buildHistoryQueryBufferFromMap(segmentDetail);

            if (query != null) {
                // create blocking semaphore ... 
                final Semaphore semaphore = new Semaphore(1);
                semaphore.acquireUninterruptibly();
                if (Environment.detailLogEnabled())
                    LOG.info("### SYNC: Dispatching query to history server");
                //create an outer response object we can pass aysnc response to ... 
                final BulkItemHistoryQueryResponse outerResponse = new BulkItemHistoryQueryResponse();

                CrawlerServer.getServer().getHistoryServiceStub().bulkItemQuery(query,
                        new Callback<BulkItemHistoryQuery, BulkItemHistoryQueryResponse>() {

                            @Override
                            public void requestComplete(
                                    final AsyncRequest<BulkItemHistoryQuery, BulkItemHistoryQueryResponse> request) {
                                // response returns in async thread context ... 
                                if (request.getStatus() == Status.Success) {
                                    if (Environment.detailLogEnabled())
                                        LOG.info(
                                                "###SYNC: bulk Query to history server succeeded. setting out resposne");
                                    ImmutableBuffer buffer = request.getOutput().getResponseList();
                                    outerResponse.setResponseList(
                                            new Buffer(buffer.getReadOnlyBytes(), 0, buffer.getCount()));
                                } else {
                                    LOG.error("###SYNC: bulk Query to history server failed.");

                                }
                                // release semaphore
                                semaphore.release();
                            }
                        });
                LOG.info("###SYNC: Loader thread blocked waiting for bulk query response");
                semaphore.acquireUninterruptibly();
                LOG.info("###SYNC: Loader thread received response from history server");

                if (outerResponse.getResponseList().getCount() == 0) {
                    LOG.error("###SYNC: History Server Bulk Query Returned NULL!!! for List:" + _listId
                            + " Segment:" + _segmentId);
                } else {
                    // ok time to process the response and integrate the results into the fp list 
                    updateFPMapFromBulkQueryResponse(segmentDetail, outerResponse);
                }
            } else {
                if (Environment.detailLogEnabled())
                    LOG.warn("### SYNC: No fingerprints found when processing segment detail for List:"
                            + _listId + " Segment:" + _segmentId);
                segmentDetail._urlsComplete = segmentDetail._urlCount;
            }
        }
        /*
        // and now walk hdfs looking for any checkpointed logs ...
        // scan based on checkpoint filename ... 
        FileStatus[] remoteCheckpointFiles = hdfs.globStatus(new Path(CrawlEnvironment.getCrawlSegmentDataDirectory() + "/" + getListId() + "/"
            + getSegmentId() + "/" + CrawlEnvironment.buildCrawlSegmentLogCheckpointWildcardString(getNodeName())));
                
        if (remoteCheckpointFiles != null) {
                
          LOG.info("### SYNC: List:"+ _listId + " Segment:" + _segmentId +" Found Remote Checkpoint Files");
                  
          // create a temp file to hold the reconciled log ... 
          File consolidatedLogFile = null;
                  
          if (remoteCheckpointFiles.length > 1) { 
            // create temp log file ... 
            consolidatedLogFile = File.createTempFile("SegmentLog", Long.toString(System.currentTimeMillis()));
            // write out header ... 
            CrawlSegmentLog.writeHeader(consolidatedLogFile,0);
          }
          // walk the files 
          for(FileStatus checkpointFilePath : remoteCheckpointFiles) {
            // and reconcile them against segment ... 
            itemsProcessed += reconcileLogFile(hdfs,checkpointFilePath.getPath(),getListId(),getSegmentId(),segmentDetail,consolidatedLogFile);
            LOG.info("### SYNC: List:"+ _listId + " Segment:" + _segmentId +" Processed Checkpoint File:" + checkpointFilePath.getPath() + " Items Processed:" + itemsProcessed);          
          }
                  
          // finally ... if consolidatedLogFile is not null 
          if (consolidatedLogFile != null) { 
            // build a new hdfs file name ... 
            Path consolidatedHDFSPath = new Path(CrawlEnvironment.getCrawlSegmentDataDirectory() + "/" + getListId() + "/" + getSegmentId() + "/" + CrawlEnvironment.buildCrawlSegmentLogCheckpointFileName(getNodeName(), System.currentTimeMillis()));
            LOG.info("### SYNC: List:"+ _listId + " Segment:" + _segmentId +" Writing Consolidated Log File:" + consolidatedHDFSPath + " to HDFS");         
            // and copy local file to log ... 
            hdfs.copyFromLocalFile(new Path(consolidatedLogFile.getAbsolutePath()),consolidatedHDFSPath);
            // and delete all previous log file entries ... 
            for (FileStatus oldCheckPointFile : remoteCheckpointFiles) { 
              hdfs.delete(oldCheckPointFile.getPath());
            }
            consolidatedLogFile.delete();
          }
        }
        */
    }

    if (segmentDetail != null) {
        _remainingURLS += (segmentDetail._urlCount - segmentDetail._urlsComplete);
        // mark url count as valid now ...
        _urlCountValid = true;

        // now if remaining url count is zero ... then mark the segment as complete ... 
        if (_remainingURLS == 0 && _localLogItemCount == 0) {
            _segmentComplete = true;
        }
    }
    if (Environment.detailLogEnabled())
        LOG.info("### SYNC: List:" + _listId + " Segment:" + _segmentId
                + " Done Syncing Progress Log TotalURLS:" + segmentDetail._urlCount + " RemainingURLS:"
                + _remainingURLS + " LocalLogItemCount:" + _localLogItemCount);

    return itemsProcessed;
}

From source file:org.commoncrawl.service.crawlhistory.CrawlHistoryServer.java

License:Open Source License

@Override
public void bulkItemQuery(AsyncContext<BulkItemHistoryQuery, BulkItemHistoryQueryResponse> rpcContext)
        throws RPCException {
    LOG.info("Received BulkItemQueryRequest");
    ImmutableBuffer inputBuffer = rpcContext.getInput().getFingerprintList();

    if (inputBuffer.getCount() != 0) {
        try {/* w w  w. java 2 s. com*/

            if (_bloomFilter == null) {
                throw new IOException("BloomFilter Not Initilized. Invalid Server State!");
            }

            DataInputStream inputStream = new DataInputStream(
                    new ByteArrayInputStream(inputBuffer.getReadOnlyBytes(), 0, inputBuffer.getCount()));

            BitStream bitStreamOut = new BitStream();

            URLFPV2 fingerprint = new URLFPV2();

            int itemsPresent = 0;
            while (inputStream.available() != 0) {
                fingerprint.setDomainHash(WritableUtils.readVLong(inputStream));
                fingerprint.setUrlHash(WritableUtils.readVLong(inputStream));
                if (_bloomFilter.isPresent(fingerprint)) {
                    bitStreamOut.addbit(1);
                    ++itemsPresent;
                } else {
                    bitStreamOut.addbit(0);
                }
            }

            LOG.info("Received BulkItemQueryRequest Completed with " + itemsPresent + " items found");

            rpcContext.getOutput()
                    .setResponseList(new Buffer(bitStreamOut.bits, 0, (bitStreamOut.nbits + 7) / 8));
        } catch (IOException e) {
            LOG.error(CCStringUtils.stringifyException(e));
            rpcContext.setStatus(Status.Error_RequestFailed);
            rpcContext.setErrorDesc(CCStringUtils.stringifyException(e));
        }
        rpcContext.completeRequest();
    }

}

From source file:org.commoncrawl.service.listcrawler.ProxyServlet.java

License:Open Source License

private static void cacheS3ItemResult(ArcFileItem itemResult, String targetURL, long fingerpint) {
    CacheItem cacheItem = new CacheItem();

    cacheItem.setUrlFingerprint(fingerpint);
    cacheItem.setUrl(targetURL);/*from  ww  w  .  ja va 2  s . c om*/
    cacheItem.setSource((byte) CacheItem.Source.S3Cache);
    cacheItem.setHeaderItems(itemResult.getHeaderItems());
    cacheItem.setFieldDirty(CacheItem.Field_HEADERITEMS);
    cacheItem.setContent(
            new Buffer(itemResult.getContent().getReadOnlyBytes(), 0, itemResult.getContent().getCount()));
    if ((itemResult.getFlags() & ArcFileItem.Flags.TruncatedInDownload) != 0) {
        cacheItem.setFlags(cacheItem.getFlags() | CacheItem.Flags.Flag_WasTruncatedDuringDownload);
    }
    if ((itemResult.getFlags() & ArcFileItem.Flags.TruncatedInInflate) != 0) {
        cacheItem.setFlags(cacheItem.getFlags() | CacheItem.Flags.Flag_WasTruncatedDuringInflate);
    }

    ProxyServer.getSingleton().getCache().cacheItem(cacheItem, null);
}

From source file:org.commoncrawl.service.listcrawler.ProxyServlet.java

License:Open Source License

private static void sendS3ItemResponse(final HttpServletRequest req, final HttpServletResponse response,
        ArcFileItem responseItem, String renderAs, AsyncResponse responseObject, long requestStartTime)
        throws IOException {

    CacheItem cacheItem = new CacheItem();

    // populate a cache item object ... 
    cacheItem.setHeaderItems(responseItem.getHeaderItems());
    cacheItem.setFieldDirty(CacheItem.Field_HEADERITEMS);
    cacheItem.setUrl(responseItem.getUri());
    cacheItem.setUrlFingerprint(URLUtils.getCanonicalURLFingerprint(responseItem.getUri(), true));
    cacheItem.setSource((byte) CacheItem.Source.S3Cache);
    cacheItem.setContent(//from w ww. j a v  a2s. c  o m
            new Buffer(responseItem.getContent().getReadOnlyBytes(), 0, responseItem.getContent().getCount()));

    sendCacheItemResponse(req, response, cacheItem, true, renderAs, responseObject, requestStartTime);

}