Example usage for java.io DataOutput writeInt

List of usage examples for java.io DataOutput writeInt

Introduction

In this page you can find the example usage for java.io DataOutput writeInt.

Prototype

void writeInt(int v) throws IOException;

Source Link

Document

Writes an int value, which is comprised of four bytes, to the output stream.

Usage

From source file:org.commoncrawl.service.listcrawler.HDFSFileIndex.java

public static void writeIndex(Vector<FingerprintAndOffsetTuple> offsetInfo, DataOutput indexFileOut)
        throws IOException {

    long firstFingerprint = offsetInfo.get(0)._fingerprint;

    BloomFilter bloomFilter = new BloomFilter(offsetInfo.size(), 0.001201);

    // sort the offset list by fingerprint 
    Collections.sort(offsetInfo, new Comparator<FingerprintAndOffsetTuple>() {

        @Override/*from  w  w  w. j a  v a2s.  c om*/
        public int compare(FingerprintAndOffsetTuple o1, FingerprintAndOffsetTuple o2) {
            return (o1._fingerprint < o2._fingerprint) ? -1 : o1._fingerprint > o2._fingerprint ? 1 : 0;
        }

    });
    // now we need to write the index out

    // allocate working set buffers ...
    ByteBuffer indexDataBuffer = ByteBuffer.allocate(offsetInfo.size() * 16);
    ByteBuffer indexHintsBuffer = ByteBuffer
            .allocate(((((offsetInfo.size() + INDEX_HINT_RECORD_INTERVAL) / INDEX_HINT_RECORD_INTERVAL) + 1)
                    * INDEX_HINT_SIZE) + 4);

    // build index hints placeholder 
    Vector<HDFSFileIndex.IndexItem> hints = new Vector<HDFSFileIndex.IndexItem>();
    // 0 100 200 300 400 500
    for (int i = 0; i < offsetInfo.size(); ++i) {

        if (i % INDEX_HINT_RECORD_INTERVAL == 0 || (i == (offsetInfo.size() - 1))) {
            HDFSFileIndex.IndexItem hint = new IndexItem(offsetInfo.get(i)._fingerprint,
                    (int) offsetInfo.get(i)._offset);
            hints.add(hint);
            // add fingerprint to bloom filter 
            bloomFilter.add(hint.fingerprint);
        }
    }
    // start off the index hints buffer with a hint of the index hint buffer size 
    indexHintsBuffer.putInt(hints.size());

    // track total bits used ... 
    int bitsUsedForHints = 0;
    int bitsUsedForFingerprints = 0;
    int bitsUsedForOffsets = 0;

    // now start populating index data ... 
    for (int hintIdx = 0; hintIdx < hints.size(); ++hintIdx) {

        HDFSFileIndex.IndexItem hint = hints.get(hintIdx);

        LOG.info("IndexWriter FP:" + hint.fingerprint);
        indexHintsBuffer.putLong(hint.fingerprint);
        indexHintsBuffer.putInt(hint.dataOffset);
        indexHintsBuffer.putInt(indexDataBuffer.position());

        // update stats 
        bitsUsedForHints += INDEX_HINT_SIZE * 8;

        if (hintIdx < hints.size() - 1) {
            // track cumilative delta and offset values (for average calc later)
            double cumilativeDelta = 0;
            long cumilativeOffset = 0;

            int subIndexItemCount = 0;
            int nonZeroDeltaCount = 0;

            Vector<HDFSFileIndex.IndexItem> subHints = new Vector<HDFSFileIndex.IndexItem>();

            // initialize last fingerprint to indexed value ... 
            long lastFingerprint = hint.fingerprint;

            // first collect values in between index hints
            for (int nonIndexItem = (hintIdx * INDEX_HINT_RECORD_INTERVAL) + 1; nonIndexItem < ((hintIdx + 1)
                    * INDEX_HINT_RECORD_INTERVAL); ++nonIndexItem) {
                if (nonIndexItem >= offsetInfo.size())
                    break;

                // calculdate fingerprint delta ... 
                long fingerprintDelta = offsetInfo.get(nonIndexItem)._fingerprint - lastFingerprint;
                LOG.info("IndexWriter FP:" + offsetInfo.get(nonIndexItem)._fingerprint + " Delta:"
                        + fingerprintDelta);
                // offset delta

                if (fingerprintDelta != 0) {

                    cumilativeDelta += (double) fingerprintDelta;
                    LOG.info("Cumilative Delta is:" + cumilativeDelta);
                    nonZeroDeltaCount++;
                }

                cumilativeOffset += offsetInfo.get(nonIndexItem)._offset;

                ++subIndexItemCount;

                // add to collection vector 
                subHints.add(new IndexItem(fingerprintDelta, (int) offsetInfo.get(nonIndexItem)._offset));

                // remember the last fingerpint ... 
                lastFingerprint = offsetInfo.get(nonIndexItem)._fingerprint;

                // add item to bloom filter
                bloomFilter.add(lastFingerprint);
            }

            // calculate average delta value 
            double averageDeltaValue = (double) cumilativeDelta / (double) nonZeroDeltaCount;
            // calculate m for fingerprint deltas 
            int mForFingerprints = (int) Math.floor(lg(averageDeltaValue));
            LOG.info("Average Delta Value is:" + averageDeltaValue + " m is:" + mForFingerprints);
            // cacluldate average offset value 
            double averageOffsetValue = (double) cumilativeOffset / (double) subIndexItemCount;
            // calculate m for offsets 
            int mForOffsets = (int) Math.floor(lg(averageOffsetValue));

            // calculate rice codes
            RiceCoding riceCodeFP = new RiceCoding(mForFingerprints);
            RiceCoding riceCodeOffsets = new RiceCoding(mForOffsets);

            // populate bits 
            for (HDFSFileIndex.IndexItem subItemHint : subHints) {
                if (subItemHint.fingerprint == 0) {
                    LOG.warn("Zero Delta for Fingerprint Detected.There are two duplicate entires in log!");
                }
                riceCodeFP.addItem(subItemHint.fingerprint + 1);
                riceCodeOffsets.addItem(subItemHint.dataOffset + 1);
            }
            // now track bits used ... 
            bitsUsedForFingerprints += riceCodeFP.getNumBits();
            bitsUsedForOffsets += riceCodeOffsets.getNumBits();

            // write out metadata 

            // save the current position 
            int currentPosition = indexDataBuffer.position();

            // fingerprint data 
            indexDataBuffer.put((byte) mForFingerprints);
            CacheManager.writeVLongToByteBuffer(indexDataBuffer, riceCodeFP.getNumBits());
            indexDataBuffer.put(riceCodeFP.getBits(), 0, (riceCodeFP.getNumBits() + 7) / 8);

            // offset data 
            indexDataBuffer.put((byte) mForOffsets);
            CacheManager.writeVLongToByteBuffer(indexDataBuffer, riceCodeOffsets.getNumBits());
            indexDataBuffer.put(riceCodeOffsets.getBits(), 0, (riceCodeOffsets.getNumBits() + 7) / 8);

            System.out.println("Item Count:" + subIndexItemCount + "FP Bits:" + subIndexItemCount * 64
                    + " Compressed:" + riceCodeFP.getNumBits() + " Offset Bits:" + subIndexItemCount * 32
                    + " Compressed:" + riceCodeOffsets.getNumBits());

            LOG.info("Item Count:" + subIndexItemCount + "FP Bits:" + subIndexItemCount * 64 + " Compressed:"
                    + riceCodeFP.getNumBits() + " Offset Bits:" + subIndexItemCount * 32 + " Compressed:"
                    + riceCodeOffsets.getNumBits());

            if ((subIndexItemCount * 64) < riceCodeFP.getNumBits()) {
                throw new RuntimeException("Compressed Size > UnCompressed Size!!!!");
            }

            validateIndexData(indexDataBuffer.array(), currentPosition, hint.fingerprint, subHints,
                    bloomFilter);
        }

    }

    if (!bloomFilter.isPresent(firstFingerprint)) {
        throw new RuntimeException("Test Failed!");
    }

    // serialize bloomfilter
    ByteStream baos = new ByteStream(1 << 12);
    BloomFilter.serializer().serialize(bloomFilter, new DataOutputStream(baos));

    // spit out final stats 
    System.out.println(" Bloomfilter Size:" + baos.size() + " IndexHintBuffer Size:"
            + indexHintsBuffer.position() + " IndexDataBuffer Size:" + indexDataBuffer.position());

    // now write out the final index file ... 

    // bloom filter data ... 
    indexFileOut.write(baos.getBuffer(), 0, baos.size());
    // write hint data  
    indexFileOut.write(indexHintsBuffer.array(), 0, indexHintsBuffer.position());
    // write out rice code data size 
    indexFileOut.writeInt(indexDataBuffer.position());
    // finally rice coded sub-index data
    indexFileOut.write(indexDataBuffer.array(), 0, indexDataBuffer.position());
}