Example usage for org.apache.hadoop.io DataInputBuffer reset

Introduction

In this page you can find the example usage for org.apache.hadoop.io DataInputBuffer reset.

Prototype

public void reset(byte[] input, int start, int length)

Source Link

Document

Resets the data that the buffer reads.

Usage

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

License:Open Source License

private OnDiskCrawlHistoryItem loadOnDiskItemForURLFP(URLFP fingerprint) throws IOException {

    // see if state is cached in memory ...
    boolean loadedFromMemory = false;

    synchronized (this) {
        if (_tempFixedDataBuffer != null) {

            loadedFromMemory = true;/*from  w w  w.ja  va 2  s . c o  m*/

            int low = 0;
            int high = (int) (_tempFixedDataBufferSize / OnDiskCrawlHistoryItem.ON_DISK_SIZE) - 1;

            OnDiskCrawlHistoryItem itemOut = new OnDiskCrawlHistoryItem();
            DataInputBuffer inputBuffer = new DataInputBuffer();

            int iterationNumber = 0;

            while (low <= high) {

                ++iterationNumber;

                int mid = low + ((high - low) / 2);

                inputBuffer.reset(_tempFixedDataBuffer, 0, _tempFixedDataBufferSize);
                inputBuffer.skip(mid * OnDiskCrawlHistoryItem.ON_DISK_SIZE);

                // deserialize 
                itemOut.deserialize(inputBuffer);

                // now compare it against desired hash value ...
                int comparisonResult = itemOut.compareFingerprints(fingerprint);

                if (comparisonResult > 0)
                    high = mid - 1;
                else if (comparisonResult < 0)
                    low = mid + 1;
                else {

                    // cache offset 
                    itemOut._fileOffset = mid * OnDiskCrawlHistoryItem.ON_DISK_SIZE;

                    // LOG.info("Found Match. Took:"+ iterationNumber + " iterations");
                    // and return item 
                    return itemOut;
                }
            }
            //LOG.error("Did Not Find Match For Domain:" + fingerprint.getDomainHash() + " URLFP:" + fingerprint.getUrlHash() + " Took:" + iterationNumber + " iterations");
        }
    }

    if (!loadedFromMemory) {
        //load from disk 

        //LOG.info("Opening Data File for OnDiskItem load for Fingerprint:" + fingerprint.getUrlHash());

        RandomAccessFile file = new RandomAccessFile(_fixedDataFile, "rw");

        // allocate buffer upfront 
        byte[] onDiskItemBuffer = new byte[OnDiskCrawlHistoryItem.ON_DISK_SIZE];
        DataInputBuffer inputStream = new DataInputBuffer();

        //LOG.info("Opened Data File. Searching for match");
        try {

            int low = 0;
            int high = (int) (file.length() / OnDiskCrawlHistoryItem.ON_DISK_SIZE) - 1;

            OnDiskCrawlHistoryItem itemOut = new OnDiskCrawlHistoryItem();

            int iterationNumber = 0;

            while (low <= high) {

                ++iterationNumber;

                int mid = low + ((high - low) / 2);

                // seek to proper location 
                file.seek(mid * OnDiskCrawlHistoryItem.ON_DISK_SIZE);
                // read the data structure 
                file.readFully(onDiskItemBuffer, 0, onDiskItemBuffer.length);
                // map location in file 
                //MappedByteBuffer memoryBuffer = file.getChannel().map(MapMode.READ_ONLY,mid * OnDiskCrawlHistoryItem.ON_DISK_SIZE,OnDiskCrawlHistoryItem.ON_DISK_SIZE);
                //DataInputStream  inputStream = new DataInputStream(new ByteBufferInputStream(memoryBuffer));
                inputStream.reset(onDiskItemBuffer, 0, OnDiskCrawlHistoryItem.ON_DISK_SIZE);

                // deserialize 
                itemOut.deserialize(inputStream);

                // memoryBuffer = null;
                //inputStream = null;

                // now compare it against desired hash value ...
                int comparisonResult = itemOut.compareFingerprints(fingerprint);

                if (comparisonResult > 0)
                    high = mid - 1;
                else if (comparisonResult < 0)
                    low = mid + 1;
                else {

                    // cache offset 
                    itemOut._fileOffset = mid * OnDiskCrawlHistoryItem.ON_DISK_SIZE;

                    // LOG.info("Found Match. Took:"+ iterationNumber + " iterations");
                    // and return item 
                    return itemOut;
                }
            }
            //LOG.error("******Did Not Find Match For Domain:" + fingerprint.getDomainHash() + " URLFP:" + fingerprint.getUrlHash() + " Took:" + iterationNumber + " iterations");

            //DEBUG ONLY !
            // dumpFixedDataFile();
        } finally {
            file.close();
        }
    }
    return null;
}

From source file:org.commoncrawl.service.queryserver.query.InverseLinksByDomainQuery.java

License:Open Source License

static void collectAllTopLevelDomainRecordsByDomain(FileSystem fs, Configuration conf, long databaseId,
        long targetRootDomainFP, FileSystem outputFileSystem, Path finalOutputPath) throws IOException {

    File tempFile = new File("/tmp/inverseLinksReport-" + System.currentTimeMillis());
    tempFile.mkdir();/*from  w  w w.  j  ava 2  s  .  co m*/

    try {
        // create the final output spill writer ...  
        SequenceFileSpillWriter<FlexBuffer, URLFPV2> spillwriter = new SequenceFileSpillWriter<FlexBuffer, URLFPV2>(
                outputFileSystem, conf, finalOutputPath, FlexBuffer.class, URLFPV2.class,
                new PositionBasedSequenceFileIndex.PositionBasedIndexWriter(outputFileSystem,
                        PositionBasedSequenceFileIndex.getIndexNameFromBaseName(finalOutputPath)),
                true);

        try {

            MergeSortSpillWriter<FlexBuffer, URLFPV2> finalMerger = new MergeSortSpillWriter<FlexBuffer, URLFPV2>(
                    conf, spillwriter, FileSystem.getLocal(conf), new Path(tempFile.getAbsolutePath()), null,
                    new ComplexKeyComparator(), FlexBuffer.class, URLFPV2.class, true, null);

            try {

                for (int targetShardId = 0; targetShardId < CrawlEnvironment.NUM_DB_SHARDS; ++targetShardId) {
                    // 0. shard domain id to find index file location ... 
                    int indexShardId = (int) ((targetRootDomainFP & Integer.MAX_VALUE)
                            % CrawlEnvironment.NUM_DB_SHARDS);
                    // build path to index file 
                    Path indexFilePath = new Path("crawl/inverseLinkDB_ByDomain/" + databaseId
                            + "/phase3Data/part-" + NUMBER_FORMAT.format(indexShardId));
                    LOG.info("rootDomain is:" + targetRootDomainFP + " ShardId:" + indexShardId + " Index Path:"
                            + indexFilePath);
                    // 1. scan domainFP to index file first
                    // 2. given index, scan index->pos file to find scan start position
                    // 3. given scan start position, scan forward until fp match is found.
                    // 4. collect all matching entries and output to a file ? 

                    FSDataInputStream indexDataInputStream = fs.open(indexFilePath);
                    try {
                        TFile.Reader reader = new TFile.Reader(indexDataInputStream,
                                fs.getLength(indexFilePath), conf);
                        try {
                            TFile.Reader.Scanner scanner = reader.createScanner();

                            try {
                                // generate key ... 
                                DataOutputBuffer keyBuffer = new DataOutputBuffer();
                                keyBuffer.writeLong(targetRootDomainFP);
                                if (scanner.seekTo(keyBuffer.getData(), 0, keyBuffer.getLength())) {
                                    // setup for value scan 
                                    DataInputStream valueStream = scanner.entry().getValueStream();
                                    int dataOffsetOut = -1;
                                    while (valueStream.available() > 0) {
                                        // read entries looking for our specific entry
                                        int shardIdx = valueStream.readInt();
                                        int dataOffset = valueStream.readInt();
                                        if (shardIdx == targetShardId) {
                                            dataOffsetOut = dataOffset;
                                            break;
                                        }
                                    }
                                    LOG.info("Index Search Yielded:" + dataOffsetOut);
                                    if (dataOffsetOut != -1) {
                                        // ok create a data path 
                                        Path finalDataPath = new Path("crawl/inverseLinkDB_ByDomain/"
                                                + databaseId + "/phase2Data/data-"
                                                + NUMBER_FORMAT.format(targetShardId));
                                        Path finalDataIndexPath = new Path("crawl/inverseLinkDB_ByDomain/"
                                                + databaseId + "/phase2Data/data-"
                                                + NUMBER_FORMAT.format(targetShardId) + ".index");
                                        // check to see if index is already loaded ... 
                                        PositionBasedSequenceFileIndex<FlexBuffer, TextBytes> index = null;
                                        synchronized (_shardToIndexMap) {
                                            index = _shardToIndexMap.get(targetShardId);
                                        }
                                        if (index == null) {
                                            LOG.info("Loading Index from Path:" + finalDataIndexPath);
                                            // load index
                                            index = new PositionBasedSequenceFileIndex<FlexBuffer, TextBytes>(
                                                    fs, finalDataIndexPath, FlexBuffer.class, TextBytes.class);
                                            // put in cache
                                            synchronized (_shardToIndexMap) {
                                                _shardToIndexMap.put(targetShardId, index);
                                            }
                                        }

                                        LOG.info("Initializing Data Reader at Path:" + finalDataPath);
                                        // ok time to create a reader 
                                        SequenceFile.Reader dataReader = new SequenceFile.Reader(fs,
                                                finalDataPath, conf);

                                        try {
                                            LOG.info("Seeking Reader to Index Position:" + dataOffsetOut);
                                            index.seekReaderToItemAtIndex(dataReader, dataOffsetOut);

                                            FlexBuffer keyBytes = new FlexBuffer();
                                            URLFPV2 sourceFP = new URLFPV2();
                                            DataInputBuffer keyReader = new DataInputBuffer();
                                            TextBytes urlTxt = new TextBytes();

                                            // ok read to go ... 
                                            while (dataReader.next(keyBytes, sourceFP)) {
                                                // initialize reader 
                                                keyReader.reset(keyBytes.get(), keyBytes.getOffset(),
                                                        keyBytes.getCount());

                                                long targetFP = keyReader.readLong();

                                                if (targetRootDomainFP == targetFP) {
                                                    finalMerger.spillRecord(keyBytes, sourceFP);
                                                } else {
                                                    LOG.info("FP:" + targetFP + " > TargetFP:"
                                                            + targetRootDomainFP + " Exiting Iteration Loop");
                                                    break;
                                                }
                                            }
                                        } finally {
                                            LOG.info("Closing Reader");
                                            dataReader.close();
                                        }
                                    }
                                }
                            } finally {
                                LOG.info("Closing Scanner");
                                scanner.close();
                            }

                        } finally {
                            LOG.info("Closing TFile Reader");
                            reader.close();
                        }
                    } finally {
                        LOG.info("Closing InputStream");
                        indexDataInputStream.close();
                    }
                }
            } finally {
                finalMerger.close();
            }
        } finally {
            spillwriter.close();
        }
    } catch (IOException e) {
        LOG.error(CCStringUtils.stringifyException(e));
        FileUtils.recursivelyDeleteFile(tempFile);
    }

}

From source file:org.commoncrawl.service.queryserver.query.InverseLinksByDomainQuery.java

License:Open Source License

public static void main(String[] args) {
    // initialize ...
    Configuration conf = new Configuration();

    conf.addResource("nutch-default.xml");
    conf.addResource("nutch-site.xml");
    conf.addResource("core-site.xml");
    conf.addResource("hdfs-site.xml");
    conf.addResource("mapred-site.xml");

    LOG.info("URL:" + args[0] + " ShardId:" + args[1]);

    try {//  w  w w  .j av a2  s .  c  om
        File tempFile = File.createTempFile("inverseLinksReportTest", "seq");
        try {
            FileSystem fs = FileSystem.get(conf);
            FileSystem localFileSystem = FileSystem.getLocal(conf);

            URLFPV2 fp = URLUtils.getURLFPV2FromURL(args[0]);
            if (fp != null) {
                collectAllTopLevelDomainRecordsByDomain(fs, conf, 1282844121161L, fp.getRootDomainHash(),
                        localFileSystem, new Path(tempFile.getAbsolutePath()));

                SequenceFile.Reader reader = new SequenceFile.Reader(localFileSystem,
                        new Path(tempFile.getAbsolutePath()), conf);
                try {
                    FlexBuffer key = new FlexBuffer();
                    URLFPV2 src = new URLFPV2();
                    TextBytes url = new TextBytes();

                    DataInputBuffer inputBuffer = new DataInputBuffer();

                    while (reader.next(key, src)) {
                        inputBuffer.reset(key.get(), key.getOffset(), key.getCount());
                        long targetFP = inputBuffer.readLong();
                        float pageRank = inputBuffer.readFloat();
                        // ok initialize text bytes ... 
                        int textLen = WritableUtils.readVInt(inputBuffer);
                        url.set(key.get(), inputBuffer.getPosition(), textLen);
                        LOG.info("PR:" + pageRank + " URL:" + url.toString());
                    }
                } finally {
                    reader.close();
                }
            }
        } catch (IOException e) {
            LOG.error(CCStringUtils.stringifyException(e));
            // tempFile.delete();
        }
    } catch (IOException e) {
        LOG.error(CCStringUtils.stringifyException(e));
    }
}

From source file:org.commoncrawl.service.queryserver.query.URLLinksQuery.java

License:Open Source License

private static void readPaginatedInlinkingDomainInfo(final DatabaseIndexV2.MasterDatabaseIndex masterIndex,
        FileSystem indexFileSystem, Path indexPath, Path detailPath, int sortOrder, int pageNumber,
        int pageSize, QueryResult<Writable, Writable> resultOut) throws IOException {
    // if descending sort order ... 
    // take pageNumber * pageSize as starting point
    long offset = 0;
    long startPos = 0;
    long endPos = 0;

    FSDataInputStream indexStream = indexFileSystem.open(indexPath);

    try {// w w  w  .j av a 2 s .com

        // read in the total record count ... 
        int totalRecordCount = indexStream.readInt();

        LOG.info("***RecordCount:" + totalRecordCount + " Allocating Buffer Of:" + (totalRecordCount * 4)
                + " bytes. FileLength:" + indexFileSystem.getFileStatus(indexPath).getLen());
        // read in index header data upfront 
        byte indexHeaderData[] = new byte[totalRecordCount * 4];
        // read it 
        indexStream.readFully(indexHeaderData);
        // mark string start pos 
        long detailStartPos = indexStream.getPos();
        // initialize index header reader stream 
        DataInputBuffer indexHeaderStream = new DataInputBuffer();
        indexHeaderStream.reset(indexHeaderData, 0, indexHeaderData.length);

        resultOut.getResults().clear();
        resultOut.setPageNumber(pageNumber);
        resultOut.setTotalRecordCount(totalRecordCount);

        if (sortOrder == ClientQueryInfo.SortOrder.ASCENDING) {
            startPos = pageNumber * pageSize;
            endPos = Math.min(startPos + pageSize, totalRecordCount);
            offset = pageNumber * pageSize;
        } else {
            startPos = totalRecordCount - ((pageNumber + 1) * pageSize);
            endPos = startPos + pageSize;
            startPos = Math.max(0, startPos);
            offset = totalRecordCount - ((pageNumber + 1) * pageSize);
        }
        //LOG.info("readPaginatedResults called on Index with sortOrder:" + sortOrder + " pageNumber: " + pageNumber + " pageSize:" + pageSize + " offset is:" + offset);
        if (startPos < totalRecordCount) {

            //LOG.info("Seeking to Offset:" + startPos);
            indexHeaderStream.skip(startPos * 4);
            //LOG.info("Reading from:"+ startPos + " to:" + endPos + " (exclusive)");
            for (long i = startPos; i < endPos; ++i) {

                // read data offset ... 
                int domainDataPos = indexHeaderStream.readInt();
                // seek to it 
                indexStream.seek(detailStartPos + domainDataPos);
                // read the detail data  
                InlinkingDomainInfo domainInfo = new InlinkingDomainInfo();
                domainInfo.readFields(indexStream);
                // ok extract name 
                String domainName = domainInfo.getDomainName();
                if (domainName.length() == 0) {
                    //TODO: NEED TO TRACK THIS DOWN 
                    domainName = "<<OOPS-NULL>>";
                }
                Text key = new Text(domainName);
                domainInfo.setFieldClean(InlinkingDomainInfo.Field_DOMAINNAME);

                if (sortOrder == ClientQueryInfo.SortOrder.DESCENDING) {
                    resultOut.getResults().add(0, new QueryResultRecord<Writable, Writable>(key, domainInfo));
                } else {
                    resultOut.getResults().add(new QueryResultRecord<Writable, Writable>(key, domainInfo));
                }
            }
        }
    } finally {
        indexStream.close();
    }
}

From source file:org.commoncrawl.util.CharsetUtils.java

License:Open Source License

/** last resort - detect encoding using charset detector **/
public static String detectCharacterEncoding(byte[] contentBytes, int offset, int length,
        EncodingDetector detectorType) {

    if (contentBytes != null && length != 0) {

        if (detectorType == EncodingDetector.MOZILLA) {
            DetectorState state = new DetectorState();

            nsDetector detector = new nsDetector(nsPSMDetector.ALL);

            if (offset != 0) {
                int tempBufferLen = Math.min(length, MAX_CHARS_TO_DETECT);
                byte[] tempBuffer = new byte[tempBufferLen];
                System.arraycopy(contentBytes, offset, tempBuffer, 0, tempBufferLen);
                contentBytes = tempBuffer;
                offset = 0;//w  w w. j a v a  2 s . c om
                length = tempBufferLen;
            }

            detector.Init(state);

            boolean isAscii = detector.isAscii(contentBytes, length);

            if (!isAscii) {
                isAscii = detector.DoIt(contentBytes, Math.min(length, MAX_CHARS_TO_DETECT), false);
            }
            detector.DataEnd();

            if (isAscii) {
                return null;
            } else if (state._detectedCharset != null) {
                return state._detectedCharset;
            } else {
                String prob[] = detector.getProbableCharsets();
                if (prob != null && prob.length != 0) {
                    return prob[0];
                }
            }
        } else {
            // instantiate icu charset detector ... 
            CharsetDetector detector = new CharsetDetector();
            DataInputBuffer buffer = new DataInputBuffer();
            buffer.reset(contentBytes, offset, length);
            try {
                detector.setText(buffer);
                CharsetMatch matches[] = detector.detectAll();
                if (matches != null && matches.length != 0) {
                    int kThresold = 10;
                    CharsetMatch bestMatch = null;
                    for (int i = 0; i < matches.length; ++i) {
                        if (bestMatch == null || matches[i].getConfidence() > bestMatch.getConfidence()) {
                            bestMatch = matches[i];
                        }
                    }
                    if (bestMatch != null) {
                        return bestMatch.getName();
                    } else {
                        return matches[0].getName();
                    }
                }
            } catch (Exception e) {
                LOG.error(CCStringUtils.stringifyException(e));
            } finally {
            }
        }
    }
    return null;
}

From source file:org.commoncrawl.util.MultiFileMergeUtils.java

License:Open Source License

static void scanToItemThenDisplayNext(FileSystem fs, Path path, Configuration conf, URLFPV2 targetItem)
        throws IOException {
    DataOutputBuffer rawKey = new DataOutputBuffer();
    DataInputBuffer keyDataStream = new DataInputBuffer();

    SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
    ValueBytes valueBytes = reader.createValueBytes();

    int i = 0;//ww  w  . ja va 2s.  c  om
    while (reader.nextRawKey(rawKey) != -1) {
        URLFPV2 keyObject = new URLFPV2();
        keyDataStream.reset(rawKey.getData(), 0, rawKey.getLength());
        keyObject.readFields(keyDataStream);
        rawKey.reset();
        reader.nextRawValue(valueBytes);

        if (keyObject.compareTo(targetItem) == 0) {

            reader.nextRawKey(rawKey);
            URLFPV2 nextKeyObject = new URLFPV2();
            keyDataStream.reset(rawKey.getData(), 0, rawKey.getLength());
            nextKeyObject.readFields(keyDataStream);
            LOG.info("Target Domain:" + targetItem.getDomainHash() + " FP:" + targetItem.getUrlHash()
                    + " NextDomain:" + nextKeyObject.getDomainHash() + " NextHash:"
                    + nextKeyObject.getUrlHash());
            break;
        }
    }
    reader.close();
}

From source file:org.commoncrawl.util.MultiFileMergeUtils.java

License:Open Source License

static void addFirstNFPItemsToSet(FileSystem fs, Path path, Configuration conf, Set<URLFPV2> outputSet,
        int nItems) throws IOException {
    DataOutputBuffer rawKey = new DataOutputBuffer();
    DataInputBuffer keyDataStream = new DataInputBuffer();

    SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
    ValueBytes valueBytes = reader.createValueBytes();

    int i = 0;/*from   w w w  .jav a2s . c  om*/
    while (reader.nextRawKey(rawKey) != -1) {
        URLFPV2 keyObject = new URLFPV2();
        keyDataStream.reset(rawKey.getData(), 0, rawKey.getLength());
        keyObject.readFields(keyDataStream);
        outputSet.add(keyObject);
        rawKey.reset();
        reader.nextRawValue(valueBytes);

        if (++i == nItems) {
            break;
        }
    }
    reader.close();
}

From source file:org.commoncrawl.util.TextBytes.java

License:Open Source License

public static void main(String[] args) {
    // run some tests on the new code
    String aTestString = new String("A Test Strnig");
    // convert it to bytes
    byte bytes[] = aTestString.getBytes();
    // over allocate an array
    byte overAllocated[] = new byte[bytes.length * 2];
    // copy source
    System.arraycopy(bytes, 0, overAllocated, bytes.length, bytes.length);
    // now allocate a TextBytes
    TextBytes textBytes = new TextBytes();
    // set the overallocated buffer as the backing store
    textBytes.set(overAllocated, bytes.length, bytes.length);
    // convert it to string first
    String toString = textBytes.toString();
    // validate equal to original
    Assert.assertTrue(aTestString.equals(toString));
    // ok now write it to output buffer
    DataOutputBuffer outputBuffer = new DataOutputBuffer();
    // write string
    try {//from   w ww  .j a va  2  s  .  c om
        textBytes.write(outputBuffer);
        // read length
        DataInputBuffer inputBuffer = new DataInputBuffer();
        inputBuffer.reset(outputBuffer.getData(), 0, outputBuffer.size());
        int encodedLength = WritableUtils.readVInt(inputBuffer);
        // validate arrays match ...
        Assert.assertTrue(encodedLength == bytes.length);
        Assert.assertEquals(WritableComparator.compareBytes(bytes, 0, bytes.length, outputBuffer.getData(),
                inputBuffer.getPosition(), outputBuffer.getLength() - inputBuffer.getPosition()), 0);
        // ok reset input buffer again ...
        inputBuffer.reset(outputBuffer.getData(), 0, outputBuffer.size());
        // read in fields
        textBytes.readFields(inputBuffer);
        // ok see if we are not using the original backing store ...
        Assert.assertTrue(textBytes.getBytes() != overAllocated);
        // validate buffers match to original
        Assert.assertEquals(WritableComparator.compareBytes(bytes, 0, bytes.length, textBytes.getBytes(),
                textBytes.getOffset(), textBytes.getLength()), 0);

    } catch (IOException e) {
        e.printStackTrace();
    }

}

From source file:org.commoncrawl.util.Tuples.java

License:Open Source License

static void validateTextTuple() {
    // validate tuple code 
    IntAndTwoTextByteTuples tuple1 = new IntAndTwoTextByteTuples();
    IntAndTwoTextByteTuples tuple2 = new IntAndTwoTextByteTuples();

    tuple1.setIntValue(1);/*from w  w w  .ja v  a2  s.co m*/
    tuple2.setIntValue(1);
    tuple1.setTextValueBytes(new TextBytes("AAAAA"));
    tuple2.setTextValueBytes(new TextBytes("AAAAA"));
    tuple1.setSecondTextValueBytes(new TextBytes("AAAAA"));
    tuple2.setSecondTextValueBytes(new TextBytes("AAAAB"));

    // compare the two 
    Assert.assertTrue(tuple1.compareTo(tuple2) == -1);

    tuple1.setTextValueBytes(new TextBytes("BAAAA"));
    Assert.assertTrue(tuple1.compareTo(tuple2) == 1);
    tuple2.setIntValue(2);
    Assert.assertTrue(tuple1.compareTo(tuple2) == -1);
    // ok restore ... 
    tuple1.setTextValueBytes(new TextBytes("AAAAA"));
    tuple2.setTextValueBytes(new TextBytes("AAAAA"));
    tuple1.setSecondTextValueBytes(new TextBytes("AAAAA"));
    tuple2.setSecondTextValueBytes(new TextBytes("AAAAB"));

    DataOutputBuffer outputBuffer = new DataOutputBuffer();

    try {

        tuple1.write(outputBuffer);
        tuple2.write(outputBuffer);

        IntAndTwoTextByteTuples tuple3 = new IntAndTwoTextByteTuples();
        IntAndTwoTextByteTuples tuple4 = new IntAndTwoTextByteTuples();

        DataInputBuffer inputBuffer = new DataInputBuffer();
        inputBuffer.reset(outputBuffer.getData(), 0, outputBuffer.getLength());

        tuple3.readFields(inputBuffer);
        tuple4.readFields(inputBuffer);

        Assert.assertTrue(tuple3.compareTo(tuple1) == 0);
        Assert.assertTrue(tuple4.compareTo(tuple2) == 0);

    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

}

From source file:org.commoncrawl.util.Tuples.java

License:Open Source License

static void validateBufferTuple() {

    // run some tests on the new code 
    String aTestString = new String("A Test Strnig");
    // convert it to bytes
    byte bytes[] = aTestString.getBytes();
    // over allocate an array 
    byte overAllocated[] = new byte[bytes.length * 2];
    // copy source 
    System.arraycopy(bytes, 0, overAllocated, bytes.length, bytes.length);

    IntBufferTuple tuple1 = new IntBufferTuple();
    IntBufferTuple tuple2 = new IntBufferTuple();

    tuple1.setIntValue(1);/*  www  .  j  av a2s  .co  m*/
    tuple2.setIntValue(1);
    tuple1.getBuffer().set(overAllocated, bytes.length, bytes.length);
    tuple2.getBuffer().set(overAllocated, bytes.length, bytes.length);

    Assert.assertTrue(tuple1.compareTo(tuple2) == 0);

    DataOutputBuffer outputBuffer = new DataOutputBuffer();

    try {
        tuple1.write(outputBuffer);
        tuple2.write(outputBuffer);

        DataInputBuffer inputBuffer = new DataInputBuffer();

        inputBuffer.reset(outputBuffer.getData(), 0, outputBuffer.getLength());

        tuple1.readFields(inputBuffer);
        tuple2.readFields(inputBuffer);

        Assert.assertTrue(tuple1.compareTo(tuple2) == 0);

        DataOutputBuffer outputBuffer2 = new DataOutputBuffer();

        tuple1.write(outputBuffer2);
        tuple2.write(outputBuffer2);

        Assert.assertTrue(WritableComparator.compareBytes(outputBuffer.getData(), 0, outputBuffer.getLength(),
                outputBuffer2.getData(), 0, outputBuffer2.getLength()) == 0);
    } catch (IOException e) {
        e.printStackTrace();
    }

}