Example usage for org.apache.lucene.util ArrayUtil oversize

List of usage examples for org.apache.lucene.util ArrayUtil oversize

Introduction

In this page you can find the example usage for org.apache.lucene.util ArrayUtil oversize.

Prototype


public static int oversize(int minTargetSize, int bytesPerElement) 

Source Link

Document

Returns an array size >= minTargetSize, generally over-allocating exponentially to achieve amortized linear-time cost as the array grows.

Usage

From source file:com.lucure.core.codec.CompressingStoredFieldsIndexReader.java

License:Apache License

CompressingStoredFieldsIndexReader(IndexInput fieldsIndexIn, SegmentInfo si) throws IOException {
    maxDoc = si.getDocCount();/*from ww w.j  av a  2  s .  co m*/
    int[] docBases = new int[16];
    long[] startPointers = new long[16];
    int[] avgChunkDocs = new int[16];
    long[] avgChunkSizes = new long[16];
    PackedInts.Reader[] docBasesDeltas = new PackedInts.Reader[16];
    PackedInts.Reader[] startPointersDeltas = new PackedInts.Reader[16];

    final int packedIntsVersion = fieldsIndexIn.readVInt();

    int blockCount = 0;

    for (;;) {
        final int numChunks = fieldsIndexIn.readVInt();
        if (numChunks == 0) {
            break;
        }
        if (blockCount == docBases.length) {
            final int newSize = ArrayUtil.oversize(blockCount + 1, 8);
            docBases = Arrays.copyOf(docBases, newSize);
            startPointers = Arrays.copyOf(startPointers, newSize);
            avgChunkDocs = Arrays.copyOf(avgChunkDocs, newSize);
            avgChunkSizes = Arrays.copyOf(avgChunkSizes, newSize);
            docBasesDeltas = Arrays.copyOf(docBasesDeltas, newSize);
            startPointersDeltas = Arrays.copyOf(startPointersDeltas, newSize);
        }

        // doc bases
        docBases[blockCount] = fieldsIndexIn.readVInt();
        avgChunkDocs[blockCount] = fieldsIndexIn.readVInt();
        final int bitsPerDocBase = fieldsIndexIn.readVInt();
        if (bitsPerDocBase > 32) {
            throw new CorruptIndexException("Corrupted bitsPerDocBase (resource=" + fieldsIndexIn + ")");
        }
        docBasesDeltas[blockCount] = PackedInts.getReaderNoHeader(fieldsIndexIn, PackedInts.Format.PACKED,
                packedIntsVersion, numChunks, bitsPerDocBase);

        // start pointers
        startPointers[blockCount] = fieldsIndexIn.readVLong();
        avgChunkSizes[blockCount] = fieldsIndexIn.readVLong();
        final int bitsPerStartPointer = fieldsIndexIn.readVInt();
        if (bitsPerStartPointer > 64) {
            throw new CorruptIndexException("Corrupted bitsPerStartPointer (resource=" + fieldsIndexIn + ")");
        }
        startPointersDeltas[blockCount] = PackedInts.getReaderNoHeader(fieldsIndexIn, PackedInts.Format.PACKED,
                packedIntsVersion, numChunks, bitsPerStartPointer);

        ++blockCount;
    }

    this.docBases = Arrays.copyOf(docBases, blockCount);
    this.startPointers = Arrays.copyOf(startPointers, blockCount);
    this.avgChunkDocs = Arrays.copyOf(avgChunkDocs, blockCount);
    this.avgChunkSizes = Arrays.copyOf(avgChunkSizes, blockCount);
    this.docBasesDeltas = Arrays.copyOf(docBasesDeltas, blockCount);
    this.startPointersDeltas = Arrays.copyOf(startPointersDeltas, blockCount);
}

From source file:com.lucure.core.codec.CompressingStoredFieldsWriter.java

License:Apache License

@Override
public void finishDocument() throws IOException {
    if (numBufferedDocs == this.numStoredFields.length) {
        final int newLength = ArrayUtil.oversize(numBufferedDocs + 1, 4);
        this.numStoredFields = Arrays.copyOf(this.numStoredFields, newLength);
        endOffsets = Arrays.copyOf(endOffsets, newLength);
    }/*  w  ww  .  j  ava 2  s . c  o  m*/
    this.numStoredFields[numBufferedDocs] = numStoredFieldsInDoc;
    numStoredFieldsInDoc = 0;
    endOffsets[numBufferedDocs] = bufferedDocs.length;
    ++numBufferedDocs;
    if (triggerFlush()) {
        flush();
    }
}

From source file:com.lucure.core.codec.CompressingStoredFieldsWriter.java

License:Apache License

@Override
public int merge(MergeState mergeState) throws IOException {
    int docCount = 0;
    int idx = 0;/*from  w w w  .  ja  va2s  .  c o m*/

    AccessFilteredDocsAndPositionsEnum.enableMergeAuthorizations();

    for (AtomicReader reader : mergeState.readers) {
        final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++];
        CompressingStoredFieldsReader matchingFieldsReader = null;
        if (matchingSegmentReader != null) {
            final StoredFieldsReader fieldsReader = matchingSegmentReader.getFieldsReader();
            // we can only bulk-copy if the matching reader is also a CompressingStoredFieldsReader
            if (fieldsReader != null && fieldsReader instanceof CompressingStoredFieldsReader) {
                matchingFieldsReader = (CompressingStoredFieldsReader) fieldsReader;
            }
        }

        final int maxDoc = reader.maxDoc();
        final Bits liveDocs = reader.getLiveDocs();

        if (matchingFieldsReader == null || matchingFieldsReader.getVersion() != VERSION_CURRENT // means reader version is not the same as the writer version
                || matchingFieldsReader.getCompressionMode() != compressionMode
                || matchingFieldsReader.getChunkSize() != chunkSize) { // the way data is decompressed depends on the chunk size
            // naive merge...
            for (int i = nextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = nextLiveDoc(i + 1, liveDocs,
                    maxDoc)) {
                Document doc = reader.document(i);
                addDocument(doc, mergeState.fieldInfos);
                ++docCount;
                mergeState.checkAbort.work(300);
            }
        } else {
            int docID = nextLiveDoc(0, liveDocs, maxDoc);
            if (docID < maxDoc) {
                // not all docs were deleted
                final CompressingStoredFieldsReader.ChunkIterator it = matchingFieldsReader
                        .chunkIterator(docID);
                int[] startOffsets = new int[0];
                do {
                    // go to the next chunk that contains docID
                    it.next(docID);
                    // transform lengths into offsets
                    if (startOffsets.length < it.chunkDocs) {
                        startOffsets = new int[ArrayUtil.oversize(it.chunkDocs, 4)];
                    }
                    for (int i = 1; i < it.chunkDocs; ++i) {
                        startOffsets[i] = startOffsets[i - 1] + it.lengths[i - 1];
                    }

                    // decompress
                    it.decompress();
                    if (startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] != it.bytes.length) {
                        throw new CorruptIndexException(
                                "Corrupted: expected chunk size=" + startOffsets[it.chunkDocs - 1]
                                        + it.lengths[it.chunkDocs - 1] + ", got " + it.bytes.length);
                    }
                    // copy non-deleted docs
                    for (; docID < it.docBase + it.chunkDocs; docID = nextLiveDoc(docID + 1, liveDocs,
                            maxDoc)) {
                        final int diff = docID - it.docBase;
                        startDocument();
                        bufferedDocs.writeBytes(it.bytes.bytes, it.bytes.offset + startOffsets[diff],
                                it.lengths[diff]);
                        numStoredFieldsInDoc = it.numStoredFields[diff];
                        finishDocument();
                        ++docCount;
                        mergeState.checkAbort.work(300);
                    }
                } while (docID < maxDoc);

                it.checkIntegrity();
            }
        }
    }

    AccessFilteredDocsAndPositionsEnum.disableMergeAuthorizations();

    finish(mergeState.fieldInfos, docCount);
    return docCount;
}

From source file:com.rocana.lucene.codec.v1.RocanaIntersectTermsEnum.java

License:Apache License

private RocanaIntersectTermsEnumFrame getFrame(int ord) throws IOException {
    if (ord >= stack.length) {
        final RocanaIntersectTermsEnumFrame[] next = new RocanaIntersectTermsEnumFrame[ArrayUtil
                .oversize(1 + ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
        System.arraycopy(stack, 0, next, 0, stack.length);
        for (int stackOrd = stack.length; stackOrd < next.length; stackOrd++) {
            next[stackOrd] = new RocanaIntersectTermsEnumFrame(this, stackOrd);
        }//from w w w  . j av a2s  . c  o  m
        stack = next;
    }
    assert stack[ord].ord == ord;
    return stack[ord];
}

From source file:com.rocana.lucene.codec.v1.RocanaIntersectTermsEnum.java

License:Apache License

private FST.Arc<BytesRef> getArc(int ord) {
    if (ord >= arcs.length) {
        @SuppressWarnings({ "rawtypes", "unchecked" })
        final FST.Arc<BytesRef>[] next = new FST.Arc[ArrayUtil.oversize(1 + ord,
                RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
        System.arraycopy(arcs, 0, next, 0, arcs.length);
        for (int arcOrd = arcs.length; arcOrd < next.length; arcOrd++) {
            next[arcOrd] = new FST.Arc<>();
        }/*from ww w .  j a  v a  2 s. c o m*/
        arcs = next;
    }
    return arcs[ord];
}

From source file:com.rocana.lucene.codec.v1.RocanaIntersectTermsEnumFrame.java

License:Apache License

void load(BytesRef frameIndexData) throws IOException {
    if (frameIndexData != null) {
        floorDataReader.reset(frameIndexData.bytes, frameIndexData.offset, frameIndexData.length);
        // Skip first long -- has redundant fp, hasTerms
        // flag, isFloor flag
        final long code = floorDataReader.readVLong();
        if ((code & RocanaBlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR) != 0) {
            // Floor frame
            numFollowFloorBlocks = floorDataReader.readVInt();
            nextFloorLabel = floorDataReader.readByte() & 0xff;

            // If current state is not accept, and has transitions, we must process
            // first block in case it has empty suffix:
            if (ite.runAutomaton.isAccept(state) == false && transitionCount != 0) {
                // Maybe skip floor blocks:
                assert transitionIndex == 0 : "transitionIndex=" + transitionIndex;
                while (numFollowFloorBlocks != 0 && nextFloorLabel <= transition.min) {
                    fp = fpOrig + (floorDataReader.readVLong() >>> 1);
                    numFollowFloorBlocks--;
                    if (numFollowFloorBlocks != 0) {
                        nextFloorLabel = floorDataReader.readByte() & 0xff;
                    } else {
                        nextFloorLabel = 256;
                    }//from w  w  w .ja  va  2  s.c o  m
                }
            }
        }
    }

    ite.in.seek(fp);
    int code = ite.in.readVInt();
    entCount = code >>> 1;
    assert entCount > 0;
    isLastInFloor = (code & 1) != 0;

    // term suffixes:
    code = ite.in.readVInt();
    isLeafBlock = (code & 1) != 0;
    int numBytes = code >>> 1;
    if (suffixBytes.length < numBytes) {
        suffixBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
    }
    ite.in.readBytes(suffixBytes, 0, numBytes);
    suffixesReader.reset(suffixBytes, 0, numBytes);

    // stats
    numBytes = ite.in.readVInt();
    if (statBytes.length < numBytes) {
        statBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
    }
    ite.in.readBytes(statBytes, 0, numBytes);
    statsReader.reset(statBytes, 0, numBytes);
    metaDataUpto = 0;

    termState.termBlockOrd = 0;
    nextEnt = 0;

    // metadata
    numBytes = ite.in.readVInt();
    if (bytes.length < numBytes) {
        bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
    }
    ite.in.readBytes(bytes, 0, numBytes);
    bytesReader.reset(bytes, 0, numBytes);

    if (!isLastInFloor) {
        // Sub-blocks of a single floor block are always
        // written one after another -- tail recurse:
        fpEnd = ite.in.getFilePointer();
    }

    // Necessary in case this ord previously was an auto-prefix
    // term but now we recurse to a new leaf block
    isAutoPrefixTerm = false;
}

From source file:com.rocana.lucene.codec.v1.RocanaSegmentTermsEnum.java

License:Apache License

private RocanaSegmentTermsEnumFrame getFrame(int ord) throws IOException {
    if (ord >= stack.length) {
        final RocanaSegmentTermsEnumFrame[] next = new RocanaSegmentTermsEnumFrame[ArrayUtil.oversize(1 + ord,
                RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
        System.arraycopy(stack, 0, next, 0, stack.length);
        for (int stackOrd = stack.length; stackOrd < next.length; stackOrd++) {
            next[stackOrd] = new RocanaSegmentTermsEnumFrame(this, stackOrd);
        }/*from w w w  .j a  va 2  s .c o  m*/
        stack = next;
    }
    assert stack[ord].ord == ord;
    return stack[ord];
}

From source file:com.rocana.lucene.codec.v1.RocanaSegmentTermsEnumFrame.java

License:Apache License

public void setFloorData(ByteArrayDataInput in, BytesRef source) {
    final int numBytes = source.length - (in.getPosition() - source.offset);
    if (numBytes > floorData.length) {
        floorData = new byte[ArrayUtil.oversize(numBytes, 1)];
    }//from w  w w .j a  va2 s.  co m
    System.arraycopy(source.bytes, source.offset + in.getPosition(), floorData, 0, numBytes);
    floorDataReader.reset(floorData, 0, numBytes);
    numFollowFloorBlocks = floorDataReader.readVInt();
    nextFloorLabel = floorDataReader.readByte() & 0xff;
    //if (DEBUG) {
    //System.out.println("    setFloorData fpOrig=" + fpOrig + " bytes=" + new BytesRef(source.bytes, source.offset + in.getPosition(), numBytes) + " numFollowFloorBlocks=" + numFollowFloorBlocks + " nextFloorLabel=" + toHex(nextFloorLabel));
    //}
}

From source file:com.rocana.lucene.codec.v1.RocanaSegmentTermsEnumFrame.java

License:Apache License

void loadBlock() throws IOException {

    // Clone the IndexInput lazily, so that consumers
    // that just pull a TermsEnum to
    // seekExact(TermState) don't pay this cost:
    ste.initIndexInput();/*from www .j a va  2 s  .com*/

    if (nextEnt != -1) {
        // Already loaded
        return;
    }
    //System.out.println("blc=" + blockLoadCount);

    ste.in.seek(fp);
    int code = ste.in.readVInt();
    entCount = code >>> 1;
    assert entCount > 0;
    isLastInFloor = (code & 1) != 0;

    assert arc == null || (isLastInFloor || isFloor) : "fp=" + fp + " arc=" + arc + " isFloor=" + isFloor
            + " isLastInFloor=" + isLastInFloor;

    // TODO: if suffixes were stored in random-access
    // array structure, then we could do binary search
    // instead of linear scan to find target term; eg
    // we could have simple array of offsets

    // term suffixes:
    code = ste.in.readVInt();
    isLeafBlock = (code & 1) != 0;
    int numBytes = code >>> 1;
    if (suffixBytes.length < numBytes) {
        suffixBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
    }
    ste.in.readBytes(suffixBytes, 0, numBytes);
    suffixesReader.reset(suffixBytes, 0, numBytes);

    /*if (DEBUG) {
      if (arc == null) {
      System.out.println("    loadBlock (next) fp=" + fp + " entCount=" + entCount + " prefixLen=" + prefix + " isLastInFloor=" + isLastInFloor + " leaf?=" + isLeafBlock);
      } else {
      System.out.println("    loadBlock (seek) fp=" + fp + " entCount=" + entCount + " prefixLen=" + prefix + " hasTerms?=" + hasTerms + " isFloor?=" + isFloor + " isLastInFloor=" + isLastInFloor + " leaf?=" + isLeafBlock);
      }
      }*/

    // stats
    numBytes = ste.in.readVInt();
    if (statBytes.length < numBytes) {
        statBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
    }
    ste.in.readBytes(statBytes, 0, numBytes);
    statsReader.reset(statBytes, 0, numBytes);
    metaDataUpto = 0;

    state.termBlockOrd = 0;
    nextEnt = 0;
    lastSubFP = -1;

    // TODO: we could skip this if !hasTerms; but
    // that's rare so won't help much
    // metadata
    numBytes = ste.in.readVInt();
    if (bytes.length < numBytes) {
        bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
    }
    ste.in.readBytes(bytes, 0, numBytes);
    bytesReader.reset(bytes, 0, numBytes);

    // Sub-blocks of a single floor block are always
    // written one after another -- tail recurse:
    fpEnd = ste.in.getFilePointer();
    // if (DEBUG) {
    //   System.out.println("      fpEnd=" + fpEnd);
    // }
}

From source file:com.sindicetech.siren.analysis.filter.ASCIIFoldingExpansionFilter.java

License:Open Source License

private void growBuffer(char[] buffer, final int newSize) {
    if (buffer.length < newSize) {
        // Not big enough; create a new array with slight
        // over allocation:
        buffer = new char[ArrayUtil.oversize(newSize, RamUsageEstimator.NUM_BYTES_CHAR)];
    }/*from w  ww .j  a v  a2 s.  c  o  m*/
}