Example usage for org.apache.hadoop.record Buffer Buffer

Introduction

In this page you can find the example usage for org.apache.hadoop.record Buffer Buffer.

Prototype

public Buffer(byte[] bytes, int offset, int length)

Source Link

Document

Create a Buffer using the byte range as the initial value.

Usage

From source file:com.jfolson.hive.serde.RBaseSerDe.java

License:Apache License

protected void serializeField(Object o, ObjectInspector oi, Object reuse) throws IOException {
    //LOG.info("Serializing hive type: "+oi.getTypeName());
    //LOG.info("Serializing category: "+oi.getCategory().toString());
    if (o == null) {
        tbOut.writeNull();/*from ww  w .ja v  a 2 s  . c o m*/
        return;
    }
    switch (oi.getCategory()) {
    case PRIMITIVE: {
        PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
        //LOG.info("Serializing primitive: "+poi.getPrimitiveCategory().toString());
        switch (poi.getPrimitiveCategory()) {
        case VOID: {
            return;
        }
        case BINARY: {
            BinaryObjectInspector boi = (BinaryObjectInspector) poi;
            TypedBytesWritable bytes = reuse == null ? new TypedBytesWritable() : (TypedBytesWritable) reuse;
            BytesWritable bytesWrite = boi.getPrimitiveWritableObject(o);
            if (bytesWrite != null) {
                bytes.set(bytesWrite);
                if (!RType.isValid(bytes)) {
                    LOG.error("Invalid typedbytes detected with type: " + RType.getType(bytes).code);
                    bytes.setValue(new Buffer(bytesWrite.getBytes(), 0, bytesWrite.getLength()));
                }
                //LOG.info("Writing binary primitive with class: "+bytes.getClass().getName());
                tbOut.write(bytes);
            }

            return;
        }
        case BOOLEAN: {
            BooleanObjectInspector boi = (BooleanObjectInspector) poi;
            BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse;
            r.set(boi.get(o));
            tbOut.write(r);
            return;
        }
        case BYTE: {
            ByteObjectInspector boi = (ByteObjectInspector) poi;
            ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse;
            r.set(boi.get(o));
            tbOut.write(r);
            return;
        }
        case SHORT: {
            ShortObjectInspector spoi = (ShortObjectInspector) poi;
            ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse;
            r.set(spoi.get(o));
            tbOut.write(r);
            return;
        }
        case INT: {
            IntObjectInspector ioi = (IntObjectInspector) poi;
            IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
            r.set(ioi.get(o));
            tbOut.write(r);
            return;
        }
        case LONG: {
            LongObjectInspector loi = (LongObjectInspector) poi;
            LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse;
            r.set(loi.get(o));
            tbOut.write(r);
            return;
        }
        case FLOAT: {
            FloatObjectInspector foi = (FloatObjectInspector) poi;
            FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse;
            r.set(foi.get(o));
            tbOut.write(r);
            return;
        }
        case DOUBLE:
            DoubleObjectInspector doi = (DoubleObjectInspector) poi;
            DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse;
            r.set(doi.get(o));
            tbOut.write(r);
            return;
        case STRING: {
            StringObjectInspector soi = (StringObjectInspector) poi;
            Text t = soi.getPrimitiveWritableObject(o);
            tbOut.write(t);
            return;
        }
        default: {
            throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
        }
        }
    }
    case LIST: {
        ListObjectInspector loi = (ListObjectInspector) oi;
        ObjectInspector elemOI = loi.getListElementObjectInspector();
        List l = loi.getList(o);
        // Don't use array (typecode: 144) until everything supports NA values in typedbytes
        if (false) {//(elemOI.getCategory()==ObjectInspector.Category.PRIMITIVE){
            tbOut.writeArray(l, (PrimitiveObjectInspector) elemOI);
        } else {
            tbOut.writeVector(l, (PrimitiveObjectInspector) elemOI);
        }
        return;
    }
    case MAP:
    case STRUCT: {
        // For complex object, serialize to JSON format
        String s = SerDeUtils.getJSONString(o, oi);
        Text t = reuse == null ? new Text() : (Text) reuse;

        // convert to Text and write it
        t.set(s);
        tbOut.write(t);
        return;
    }
    default: {
        throw new RuntimeException("Unrecognized type: " + oi.getCategory());
    }
    }
}

From source file:com.jfolson.hive.serde.RTypedBytesOutput.java

License:Apache License

public void writeTypedBytes(TypedBytesWritable tb) throws IOException {
    //LOG.info("Writing as typedbytes bytes");
    writeRaw(new Buffer(tb.getBytes(), 0, tb.getLength()).get());
}

From source file:com.jfolson.hive.serde.RTypedBytesSerDe.java

License:Apache License

private void serializeField(Object o, ObjectInspector oi, Object reuse) throws IOException {
    //LOG.info("Serializing hive type: "+oi.getTypeName());
    //LOG.info("Serializing category: "+oi.getCategory().toString());
    if (o == null) {
        tbOut.writeNull();/*from  w  w  w  . j  av a2 s .com*/
        return;
    }
    switch (oi.getCategory()) {
    case PRIMITIVE: {
        PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
        //LOG.info("Serializing primitive: "+poi.getPrimitiveCategory().toString());
        switch (poi.getPrimitiveCategory()) {
        case VOID: {
            return;
        }
        case BINARY: {
            BinaryObjectInspector boi = (BinaryObjectInspector) poi;
            TypedBytesWritable bytes = reuse == null ? new TypedBytesWritable() : (TypedBytesWritable) reuse;
            BytesWritable bytesWrite = boi.getPrimitiveWritableObject(o);
            if (bytesWrite != null) {
                bytes.set(bytesWrite);
                if (!RType.isValid(bytes)) {
                    LOG.error("Invalid typedbytes detected with type: " + RType.getType(bytes).code);
                    bytes.setValue(new Buffer(bytesWrite.getBytes(), 0, bytesWrite.getLength()));
                }
                //LOG.info("Writing binary primitive with class: "+bytes.getClass().getName());
                tbOut.write(bytes);
            }

            return;
        }
        case BOOLEAN: {
            BooleanObjectInspector boi = (BooleanObjectInspector) poi;
            BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse;
            r.set(boi.get(o));
            tbOut.write(r);
            return;
        }
        case BYTE: {
            ByteObjectInspector boi = (ByteObjectInspector) poi;
            ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse;
            r.set(boi.get(o));
            tbOut.write(r);
            return;
        }
        case SHORT: {
            ShortObjectInspector spoi = (ShortObjectInspector) poi;
            ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse;
            r.set(spoi.get(o));
            tbOut.write(r);
            return;
        }
        case INT: {
            IntObjectInspector ioi = (IntObjectInspector) poi;
            IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
            r.set(ioi.get(o));
            tbOut.write(r);
            return;
        }
        case LONG: {
            LongObjectInspector loi = (LongObjectInspector) poi;
            LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse;
            r.set(loi.get(o));
            tbOut.write(r);
            return;
        }
        case FLOAT: {
            FloatObjectInspector foi = (FloatObjectInspector) poi;
            FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse;
            r.set(foi.get(o));
            tbOut.write(r);
            return;
        }
        case DOUBLE:
            DoubleObjectInspector doi = (DoubleObjectInspector) poi;
            DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse;
            r.set(doi.get(o));
            tbOut.write(r);
            return;
        case STRING: {
            StringObjectInspector soi = (StringObjectInspector) poi;
            Text t = soi.getPrimitiveWritableObject(o);
            tbOut.write(t);
            return;
        }
        default: {
            throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
        }
        }
    }
    case LIST: {
        ListObjectInspector loi = (ListObjectInspector) oi;
        ObjectInspector elemOI = loi.getListElementObjectInspector();
        List l = loi.getList(o);
        if (false) {//(elemOI.getCategory()==ObjectInspector.Category.PRIMITIVE){
            tbOut.writeArray(l, (PrimitiveObjectInspector) elemOI);
        } else {
            tbOut.writeVector(l, (PrimitiveObjectInspector) elemOI);
        }
        return;
    }
    case MAP:
    case STRUCT: {
        // For complex object, serialize to JSON format
        String s = SerDeUtils.getJSONString(o, oi);
        Text t = reuse == null ? new Text() : (Text) reuse;

        // convert to Text and write it
        t.set(s);
        tbOut.write(t);
        return;
    }
    default: {
        throw new RuntimeException("Unrecognized type: " + oi.getCategory());
    }
    }
}

From source file:org.commoncrawl.service.crawler.CrawlSegmentLog.java

License:Open Source License

/** sync the incoming segment against the local crawl log and then send it up to the history server **/
public int syncToLog(CrawlSegmentFPMap segmentDetail) throws IOException {
    if (Environment.detailLogEnabled())
        LOG.info("### SYNC: List:" + _listId + " Segment:" + _segmentId + " Syncing Progress Log");

    int itemsProcessed = 0;

    // and construct a path to the local crawl segment directory ... 
    File activeLogPath = buildActivePath(_rootDataDir, _listId, _segmentId);
    File checkpointLogPath = buildCheckpointPath(_rootDataDir, _listId, _segmentId);

    // check if it exists ... 
    if (checkpointLogPath.exists()) {
        // log it ... 
        if (Environment.detailLogEnabled())
            LOG.info("### SYNC: List:" + _listId + " Segment:" + _segmentId + " Checkpoint Log Found");
        // rename it as the active log ... 
        checkpointLogPath.renameTo(activeLogPath);
    }/*from   w w w. j a va 2 s .c om*/

    if (activeLogPath.exists()) {
        // reconcile against active log (if it exists) ...
        _localLogItemCount = reconcileLogFile(FileSystem.getLocal(CrawlEnvironment.getHadoopConfig()),
                new Path(activeLogPath.getAbsolutePath()), _listId, _segmentId, segmentDetail, null);
        if (Environment.detailLogEnabled())
            LOG.info("### SYNC: List:" + _listId + " Segment:" + _segmentId
                    + " Reconciled Local Log File with ProcessedItemCount:" + _localLogItemCount);
        itemsProcessed += _localLogItemCount;
    }

    FileSystem hdfs = CrawlEnvironment.getDefaultFileSystem();

    // first things first ... check to see if special completion log file exists in hdfs 
    Path hdfsSegmentCompletionLogPath = new Path(
            CrawlEnvironment.getCrawlSegmentDataDirectory() + "/" + getListId() + "/" + getSegmentId() + "/"
                    + CrawlEnvironment.buildCrawlSegmentCompletionLogFileName(getNodeName()));

    if (hdfs.exists(hdfsSegmentCompletionLogPath)) {
        if (Environment.detailLogEnabled())
            LOG.info("### SYNC: List:" + _listId + " Segment:" + _segmentId
                    + " Completion File Found. Marking Segment Complete");
        // if the file exists then this segment has been crawled and uploaded already ... 
        // if active log file exists ... delete it ... 
        if (activeLogPath.exists())
            activeLogPath.delete();
        //reset local log item count ... 
        _localLogItemCount = 0;
        itemsProcessed = -1;

        // remove all hosts from segment
        segmentDetail._urlsComplete = segmentDetail._urlCount;
    } else {

        if (segmentDetail != null) {
            if (Environment.detailLogEnabled())
                LOG.info("### SYNC: Building BulkItem History Query for List:" + _listId + " Segment:"
                        + _segmentId);
            BulkItemHistoryQuery query = buildHistoryQueryBufferFromMap(segmentDetail);

            if (query != null) {
                // create blocking semaphore ... 
                final Semaphore semaphore = new Semaphore(1);
                semaphore.acquireUninterruptibly();
                if (Environment.detailLogEnabled())
                    LOG.info("### SYNC: Dispatching query to history server");
                //create an outer response object we can pass aysnc response to ... 
                final BulkItemHistoryQueryResponse outerResponse = new BulkItemHistoryQueryResponse();

                CrawlerServer.getServer().getHistoryServiceStub().bulkItemQuery(query,
                        new Callback<BulkItemHistoryQuery, BulkItemHistoryQueryResponse>() {

                            @Override
                            public void requestComplete(
                                    final AsyncRequest<BulkItemHistoryQuery, BulkItemHistoryQueryResponse> request) {
                                // response returns in async thread context ... 
                                if (request.getStatus() == Status.Success) {
                                    if (Environment.detailLogEnabled())
                                        LOG.info(
                                                "###SYNC: bulk Query to history server succeeded. setting out resposne");
                                    ImmutableBuffer buffer = request.getOutput().getResponseList();
                                    outerResponse.setResponseList(
                                            new Buffer(buffer.getReadOnlyBytes(), 0, buffer.getCount()));
                                } else {
                                    LOG.error("###SYNC: bulk Query to history server failed.");

                                }
                                // release semaphore
                                semaphore.release();
                            }
                        });
                LOG.info("###SYNC: Loader thread blocked waiting for bulk query response");
                semaphore.acquireUninterruptibly();
                LOG.info("###SYNC: Loader thread received response from history server");

                if (outerResponse.getResponseList().getCount() == 0) {
                    LOG.error("###SYNC: History Server Bulk Query Returned NULL!!! for List:" + _listId
                            + " Segment:" + _segmentId);
                } else {
                    // ok time to process the response and integrate the results into the fp list 
                    updateFPMapFromBulkQueryResponse(segmentDetail, outerResponse);
                }
            } else {
                if (Environment.detailLogEnabled())
                    LOG.warn("### SYNC: No fingerprints found when processing segment detail for List:"
                            + _listId + " Segment:" + _segmentId);
                segmentDetail._urlsComplete = segmentDetail._urlCount;
            }
        }
        /*
        // and now walk hdfs looking for any checkpointed logs ...
        // scan based on checkpoint filename ... 
        FileStatus[] remoteCheckpointFiles = hdfs.globStatus(new Path(CrawlEnvironment.getCrawlSegmentDataDirectory() + "/" + getListId() + "/"
            + getSegmentId() + "/" + CrawlEnvironment.buildCrawlSegmentLogCheckpointWildcardString(getNodeName())));
                
        if (remoteCheckpointFiles != null) {
                
          LOG.info("### SYNC: List:"+ _listId + " Segment:" + _segmentId +" Found Remote Checkpoint Files");
                  
          // create a temp file to hold the reconciled log ... 
          File consolidatedLogFile = null;
                  
          if (remoteCheckpointFiles.length > 1) { 
            // create temp log file ... 
            consolidatedLogFile = File.createTempFile("SegmentLog", Long.toString(System.currentTimeMillis()));
            // write out header ... 
            CrawlSegmentLog.writeHeader(consolidatedLogFile,0);
          }
          // walk the files 
          for(FileStatus checkpointFilePath : remoteCheckpointFiles) {
            // and reconcile them against segment ... 
            itemsProcessed += reconcileLogFile(hdfs,checkpointFilePath.getPath(),getListId(),getSegmentId(),segmentDetail,consolidatedLogFile);
            LOG.info("### SYNC: List:"+ _listId + " Segment:" + _segmentId +" Processed Checkpoint File:" + checkpointFilePath.getPath() + " Items Processed:" + itemsProcessed);          
          }
                  
          // finally ... if consolidatedLogFile is not null 
          if (consolidatedLogFile != null) { 
            // build a new hdfs file name ... 
            Path consolidatedHDFSPath = new Path(CrawlEnvironment.getCrawlSegmentDataDirectory() + "/" + getListId() + "/" + getSegmentId() + "/" + CrawlEnvironment.buildCrawlSegmentLogCheckpointFileName(getNodeName(), System.currentTimeMillis()));
            LOG.info("### SYNC: List:"+ _listId + " Segment:" + _segmentId +" Writing Consolidated Log File:" + consolidatedHDFSPath + " to HDFS");         
            // and copy local file to log ... 
            hdfs.copyFromLocalFile(new Path(consolidatedLogFile.getAbsolutePath()),consolidatedHDFSPath);
            // and delete all previous log file entries ... 
            for (FileStatus oldCheckPointFile : remoteCheckpointFiles) { 
              hdfs.delete(oldCheckPointFile.getPath());
            }
            consolidatedLogFile.delete();
          }
        }
        */
    }

    if (segmentDetail != null) {
        _remainingURLS += (segmentDetail._urlCount - segmentDetail._urlsComplete);
        // mark url count as valid now ...
        _urlCountValid = true;

        // now if remaining url count is zero ... then mark the segment as complete ... 
        if (_remainingURLS == 0 && _localLogItemCount == 0) {
            _segmentComplete = true;
        }
    }
    if (Environment.detailLogEnabled())
        LOG.info("### SYNC: List:" + _listId + " Segment:" + _segmentId
                + " Done Syncing Progress Log TotalURLS:" + segmentDetail._urlCount + " RemainingURLS:"
                + _remainingURLS + " LocalLogItemCount:" + _localLogItemCount);

    return itemsProcessed;
}

From source file:org.commoncrawl.service.crawlhistory.CrawlHistoryServer.java

License:Open Source License

@Override
public void bulkItemQuery(AsyncContext<BulkItemHistoryQuery, BulkItemHistoryQueryResponse> rpcContext)
        throws RPCException {
    LOG.info("Received BulkItemQueryRequest");
    ImmutableBuffer inputBuffer = rpcContext.getInput().getFingerprintList();

    if (inputBuffer.getCount() != 0) {
        try {/* w w  w. java 2 s. com*/

            if (_bloomFilter == null) {
                throw new IOException("BloomFilter Not Initilized. Invalid Server State!");
            }

            DataInputStream inputStream = new DataInputStream(
                    new ByteArrayInputStream(inputBuffer.getReadOnlyBytes(), 0, inputBuffer.getCount()));

            BitStream bitStreamOut = new BitStream();

            URLFPV2 fingerprint = new URLFPV2();

            int itemsPresent = 0;
            while (inputStream.available() != 0) {
                fingerprint.setDomainHash(WritableUtils.readVLong(inputStream));
                fingerprint.setUrlHash(WritableUtils.readVLong(inputStream));
                if (_bloomFilter.isPresent(fingerprint)) {
                    bitStreamOut.addbit(1);
                    ++itemsPresent;
                } else {
                    bitStreamOut.addbit(0);
                }
            }

            LOG.info("Received BulkItemQueryRequest Completed with " + itemsPresent + " items found");

            rpcContext.getOutput()
                    .setResponseList(new Buffer(bitStreamOut.bits, 0, (bitStreamOut.nbits + 7) / 8));
        } catch (IOException e) {
            LOG.error(CCStringUtils.stringifyException(e));
            rpcContext.setStatus(Status.Error_RequestFailed);
            rpcContext.setErrorDesc(CCStringUtils.stringifyException(e));
        }
        rpcContext.completeRequest();
    }

}

From source file:org.commoncrawl.service.listcrawler.ProxyServlet.java

License:Open Source License

private static void cacheS3ItemResult(ArcFileItem itemResult, String targetURL, long fingerpint) {
    CacheItem cacheItem = new CacheItem();

    cacheItem.setUrlFingerprint(fingerpint);
    cacheItem.setUrl(targetURL);/*from  ww  w  .  ja va 2  s . c om*/
    cacheItem.setSource((byte) CacheItem.Source.S3Cache);
    cacheItem.setHeaderItems(itemResult.getHeaderItems());
    cacheItem.setFieldDirty(CacheItem.Field_HEADERITEMS);
    cacheItem.setContent(
            new Buffer(itemResult.getContent().getReadOnlyBytes(), 0, itemResult.getContent().getCount()));
    if ((itemResult.getFlags() & ArcFileItem.Flags.TruncatedInDownload) != 0) {
        cacheItem.setFlags(cacheItem.getFlags() | CacheItem.Flags.Flag_WasTruncatedDuringDownload);
    }
    if ((itemResult.getFlags() & ArcFileItem.Flags.TruncatedInInflate) != 0) {
        cacheItem.setFlags(cacheItem.getFlags() | CacheItem.Flags.Flag_WasTruncatedDuringInflate);
    }

    ProxyServer.getSingleton().getCache().cacheItem(cacheItem, null);
}

From source file:org.commoncrawl.service.listcrawler.ProxyServlet.java

License:Open Source License

private static void sendS3ItemResponse(final HttpServletRequest req, final HttpServletResponse response,
        ArcFileItem responseItem, String renderAs, AsyncResponse responseObject, long requestStartTime)
        throws IOException {

    CacheItem cacheItem = new CacheItem();

    // populate a cache item object ... 
    cacheItem.setHeaderItems(responseItem.getHeaderItems());
    cacheItem.setFieldDirty(CacheItem.Field_HEADERITEMS);
    cacheItem.setUrl(responseItem.getUri());
    cacheItem.setUrlFingerprint(URLUtils.getCanonicalURLFingerprint(responseItem.getUri(), true));
    cacheItem.setSource((byte) CacheItem.Source.S3Cache);
    cacheItem.setContent(//from w ww. j a v  a2s. c  o m
            new Buffer(responseItem.getContent().getReadOnlyBytes(), 0, responseItem.getContent().getCount()));

    sendCacheItemResponse(req, response, cacheItem, true, renderAs, responseObject, requestStartTime);

}