List of usage examples for org.apache.lucene.util ArrayUtil oversize
public static int oversize(int minTargetSize, int bytesPerElement)
From source file:com.lucure.core.codec.CompressingStoredFieldsIndexReader.java
License:Apache License
CompressingStoredFieldsIndexReader(IndexInput fieldsIndexIn, SegmentInfo si) throws IOException { maxDoc = si.getDocCount();/*from ww w.j av a 2 s . co m*/ int[] docBases = new int[16]; long[] startPointers = new long[16]; int[] avgChunkDocs = new int[16]; long[] avgChunkSizes = new long[16]; PackedInts.Reader[] docBasesDeltas = new PackedInts.Reader[16]; PackedInts.Reader[] startPointersDeltas = new PackedInts.Reader[16]; final int packedIntsVersion = fieldsIndexIn.readVInt(); int blockCount = 0; for (;;) { final int numChunks = fieldsIndexIn.readVInt(); if (numChunks == 0) { break; } if (blockCount == docBases.length) { final int newSize = ArrayUtil.oversize(blockCount + 1, 8); docBases = Arrays.copyOf(docBases, newSize); startPointers = Arrays.copyOf(startPointers, newSize); avgChunkDocs = Arrays.copyOf(avgChunkDocs, newSize); avgChunkSizes = Arrays.copyOf(avgChunkSizes, newSize); docBasesDeltas = Arrays.copyOf(docBasesDeltas, newSize); startPointersDeltas = Arrays.copyOf(startPointersDeltas, newSize); } // doc bases docBases[blockCount] = fieldsIndexIn.readVInt(); avgChunkDocs[blockCount] = fieldsIndexIn.readVInt(); final int bitsPerDocBase = fieldsIndexIn.readVInt(); if (bitsPerDocBase > 32) { throw new CorruptIndexException("Corrupted bitsPerDocBase (resource=" + fieldsIndexIn + ")"); } docBasesDeltas[blockCount] = PackedInts.getReaderNoHeader(fieldsIndexIn, PackedInts.Format.PACKED, packedIntsVersion, numChunks, bitsPerDocBase); // start pointers startPointers[blockCount] = fieldsIndexIn.readVLong(); avgChunkSizes[blockCount] = fieldsIndexIn.readVLong(); final int bitsPerStartPointer = fieldsIndexIn.readVInt(); if (bitsPerStartPointer > 64) { throw new CorruptIndexException("Corrupted bitsPerStartPointer (resource=" + fieldsIndexIn + ")"); } startPointersDeltas[blockCount] = PackedInts.getReaderNoHeader(fieldsIndexIn, PackedInts.Format.PACKED, packedIntsVersion, numChunks, bitsPerStartPointer); ++blockCount; } this.docBases = Arrays.copyOf(docBases, blockCount); this.startPointers = Arrays.copyOf(startPointers, blockCount); this.avgChunkDocs = Arrays.copyOf(avgChunkDocs, blockCount); this.avgChunkSizes = Arrays.copyOf(avgChunkSizes, blockCount); this.docBasesDeltas = Arrays.copyOf(docBasesDeltas, blockCount); this.startPointersDeltas = Arrays.copyOf(startPointersDeltas, blockCount); }
From source file:com.lucure.core.codec.CompressingStoredFieldsWriter.java
License:Apache License
@Override public void finishDocument() throws IOException { if (numBufferedDocs == this.numStoredFields.length) { final int newLength = ArrayUtil.oversize(numBufferedDocs + 1, 4); this.numStoredFields = Arrays.copyOf(this.numStoredFields, newLength); endOffsets = Arrays.copyOf(endOffsets, newLength); }/* w ww . j ava 2 s . c o m*/ this.numStoredFields[numBufferedDocs] = numStoredFieldsInDoc; numStoredFieldsInDoc = 0; endOffsets[numBufferedDocs] = bufferedDocs.length; ++numBufferedDocs; if (triggerFlush()) { flush(); } }
From source file:com.lucure.core.codec.CompressingStoredFieldsWriter.java
License:Apache License
@Override public int merge(MergeState mergeState) throws IOException { int docCount = 0; int idx = 0;/*from w w w . ja va2s . c o m*/ AccessFilteredDocsAndPositionsEnum.enableMergeAuthorizations(); for (AtomicReader reader : mergeState.readers) { final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++]; CompressingStoredFieldsReader matchingFieldsReader = null; if (matchingSegmentReader != null) { final StoredFieldsReader fieldsReader = matchingSegmentReader.getFieldsReader(); // we can only bulk-copy if the matching reader is also a CompressingStoredFieldsReader if (fieldsReader != null && fieldsReader instanceof CompressingStoredFieldsReader) { matchingFieldsReader = (CompressingStoredFieldsReader) fieldsReader; } } final int maxDoc = reader.maxDoc(); final Bits liveDocs = reader.getLiveDocs(); if (matchingFieldsReader == null || matchingFieldsReader.getVersion() != VERSION_CURRENT // means reader version is not the same as the writer version || matchingFieldsReader.getCompressionMode() != compressionMode || matchingFieldsReader.getChunkSize() != chunkSize) { // the way data is decompressed depends on the chunk size // naive merge... for (int i = nextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = nextLiveDoc(i + 1, liveDocs, maxDoc)) { Document doc = reader.document(i); addDocument(doc, mergeState.fieldInfos); ++docCount; mergeState.checkAbort.work(300); } } else { int docID = nextLiveDoc(0, liveDocs, maxDoc); if (docID < maxDoc) { // not all docs were deleted final CompressingStoredFieldsReader.ChunkIterator it = matchingFieldsReader .chunkIterator(docID); int[] startOffsets = new int[0]; do { // go to the next chunk that contains docID it.next(docID); // transform lengths into offsets if (startOffsets.length < it.chunkDocs) { startOffsets = new int[ArrayUtil.oversize(it.chunkDocs, 4)]; } for (int i = 1; i < it.chunkDocs; ++i) { startOffsets[i] = startOffsets[i - 1] + it.lengths[i - 1]; } // decompress it.decompress(); if (startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] != it.bytes.length) { throw new CorruptIndexException( "Corrupted: expected chunk size=" + startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] + ", got " + it.bytes.length); } // copy non-deleted docs for (; docID < it.docBase + it.chunkDocs; docID = nextLiveDoc(docID + 1, liveDocs, maxDoc)) { final int diff = docID - it.docBase; startDocument(); bufferedDocs.writeBytes(it.bytes.bytes, it.bytes.offset + startOffsets[diff], it.lengths[diff]); numStoredFieldsInDoc = it.numStoredFields[diff]; finishDocument(); ++docCount; mergeState.checkAbort.work(300); } } while (docID < maxDoc); it.checkIntegrity(); } } } AccessFilteredDocsAndPositionsEnum.disableMergeAuthorizations(); finish(mergeState.fieldInfos, docCount); return docCount; }
From source file:com.rocana.lucene.codec.v1.RocanaIntersectTermsEnum.java
License:Apache License
private RocanaIntersectTermsEnumFrame getFrame(int ord) throws IOException { if (ord >= stack.length) { final RocanaIntersectTermsEnumFrame[] next = new RocanaIntersectTermsEnumFrame[ArrayUtil .oversize(1 + ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; System.arraycopy(stack, 0, next, 0, stack.length); for (int stackOrd = stack.length; stackOrd < next.length; stackOrd++) { next[stackOrd] = new RocanaIntersectTermsEnumFrame(this, stackOrd); }//from w w w . j av a2s . c o m stack = next; } assert stack[ord].ord == ord; return stack[ord]; }
From source file:com.rocana.lucene.codec.v1.RocanaIntersectTermsEnum.java
License:Apache License
private FST.Arc<BytesRef> getArc(int ord) { if (ord >= arcs.length) { @SuppressWarnings({ "rawtypes", "unchecked" }) final FST.Arc<BytesRef>[] next = new FST.Arc[ArrayUtil.oversize(1 + ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; System.arraycopy(arcs, 0, next, 0, arcs.length); for (int arcOrd = arcs.length; arcOrd < next.length; arcOrd++) { next[arcOrd] = new FST.Arc<>(); }/*from ww w . j a v a 2 s. c o m*/ arcs = next; } return arcs[ord]; }
From source file:com.rocana.lucene.codec.v1.RocanaIntersectTermsEnumFrame.java
License:Apache License
void load(BytesRef frameIndexData) throws IOException { if (frameIndexData != null) { floorDataReader.reset(frameIndexData.bytes, frameIndexData.offset, frameIndexData.length); // Skip first long -- has redundant fp, hasTerms // flag, isFloor flag final long code = floorDataReader.readVLong(); if ((code & RocanaBlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR) != 0) { // Floor frame numFollowFloorBlocks = floorDataReader.readVInt(); nextFloorLabel = floorDataReader.readByte() & 0xff; // If current state is not accept, and has transitions, we must process // first block in case it has empty suffix: if (ite.runAutomaton.isAccept(state) == false && transitionCount != 0) { // Maybe skip floor blocks: assert transitionIndex == 0 : "transitionIndex=" + transitionIndex; while (numFollowFloorBlocks != 0 && nextFloorLabel <= transition.min) { fp = fpOrig + (floorDataReader.readVLong() >>> 1); numFollowFloorBlocks--; if (numFollowFloorBlocks != 0) { nextFloorLabel = floorDataReader.readByte() & 0xff; } else { nextFloorLabel = 256; }//from w w w .ja va 2 s.c o m } } } } ite.in.seek(fp); int code = ite.in.readVInt(); entCount = code >>> 1; assert entCount > 0; isLastInFloor = (code & 1) != 0; // term suffixes: code = ite.in.readVInt(); isLeafBlock = (code & 1) != 0; int numBytes = code >>> 1; if (suffixBytes.length < numBytes) { suffixBytes = new byte[ArrayUtil.oversize(numBytes, 1)]; } ite.in.readBytes(suffixBytes, 0, numBytes); suffixesReader.reset(suffixBytes, 0, numBytes); // stats numBytes = ite.in.readVInt(); if (statBytes.length < numBytes) { statBytes = new byte[ArrayUtil.oversize(numBytes, 1)]; } ite.in.readBytes(statBytes, 0, numBytes); statsReader.reset(statBytes, 0, numBytes); metaDataUpto = 0; termState.termBlockOrd = 0; nextEnt = 0; // metadata numBytes = ite.in.readVInt(); if (bytes.length < numBytes) { bytes = new byte[ArrayUtil.oversize(numBytes, 1)]; } ite.in.readBytes(bytes, 0, numBytes); bytesReader.reset(bytes, 0, numBytes); if (!isLastInFloor) { // Sub-blocks of a single floor block are always // written one after another -- tail recurse: fpEnd = ite.in.getFilePointer(); } // Necessary in case this ord previously was an auto-prefix // term but now we recurse to a new leaf block isAutoPrefixTerm = false; }
From source file:com.rocana.lucene.codec.v1.RocanaSegmentTermsEnum.java
License:Apache License
private RocanaSegmentTermsEnumFrame getFrame(int ord) throws IOException { if (ord >= stack.length) { final RocanaSegmentTermsEnumFrame[] next = new RocanaSegmentTermsEnumFrame[ArrayUtil.oversize(1 + ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; System.arraycopy(stack, 0, next, 0, stack.length); for (int stackOrd = stack.length; stackOrd < next.length; stackOrd++) { next[stackOrd] = new RocanaSegmentTermsEnumFrame(this, stackOrd); }/*from w w w .j a va 2 s .c o m*/ stack = next; } assert stack[ord].ord == ord; return stack[ord]; }
From source file:com.rocana.lucene.codec.v1.RocanaSegmentTermsEnumFrame.java
License:Apache License
public void setFloorData(ByteArrayDataInput in, BytesRef source) { final int numBytes = source.length - (in.getPosition() - source.offset); if (numBytes > floorData.length) { floorData = new byte[ArrayUtil.oversize(numBytes, 1)]; }//from w w w .j a va2 s. co m System.arraycopy(source.bytes, source.offset + in.getPosition(), floorData, 0, numBytes); floorDataReader.reset(floorData, 0, numBytes); numFollowFloorBlocks = floorDataReader.readVInt(); nextFloorLabel = floorDataReader.readByte() & 0xff; //if (DEBUG) { //System.out.println(" setFloorData fpOrig=" + fpOrig + " bytes=" + new BytesRef(source.bytes, source.offset + in.getPosition(), numBytes) + " numFollowFloorBlocks=" + numFollowFloorBlocks + " nextFloorLabel=" + toHex(nextFloorLabel)); //} }
From source file:com.rocana.lucene.codec.v1.RocanaSegmentTermsEnumFrame.java
License:Apache License
void loadBlock() throws IOException { // Clone the IndexInput lazily, so that consumers // that just pull a TermsEnum to // seekExact(TermState) don't pay this cost: ste.initIndexInput();/*from www .j a va 2 s .com*/ if (nextEnt != -1) { // Already loaded return; } //System.out.println("blc=" + blockLoadCount); ste.in.seek(fp); int code = ste.in.readVInt(); entCount = code >>> 1; assert entCount > 0; isLastInFloor = (code & 1) != 0; assert arc == null || (isLastInFloor || isFloor) : "fp=" + fp + " arc=" + arc + " isFloor=" + isFloor + " isLastInFloor=" + isLastInFloor; // TODO: if suffixes were stored in random-access // array structure, then we could do binary search // instead of linear scan to find target term; eg // we could have simple array of offsets // term suffixes: code = ste.in.readVInt(); isLeafBlock = (code & 1) != 0; int numBytes = code >>> 1; if (suffixBytes.length < numBytes) { suffixBytes = new byte[ArrayUtil.oversize(numBytes, 1)]; } ste.in.readBytes(suffixBytes, 0, numBytes); suffixesReader.reset(suffixBytes, 0, numBytes); /*if (DEBUG) { if (arc == null) { System.out.println(" loadBlock (next) fp=" + fp + " entCount=" + entCount + " prefixLen=" + prefix + " isLastInFloor=" + isLastInFloor + " leaf?=" + isLeafBlock); } else { System.out.println(" loadBlock (seek) fp=" + fp + " entCount=" + entCount + " prefixLen=" + prefix + " hasTerms?=" + hasTerms + " isFloor?=" + isFloor + " isLastInFloor=" + isLastInFloor + " leaf?=" + isLeafBlock); } }*/ // stats numBytes = ste.in.readVInt(); if (statBytes.length < numBytes) { statBytes = new byte[ArrayUtil.oversize(numBytes, 1)]; } ste.in.readBytes(statBytes, 0, numBytes); statsReader.reset(statBytes, 0, numBytes); metaDataUpto = 0; state.termBlockOrd = 0; nextEnt = 0; lastSubFP = -1; // TODO: we could skip this if !hasTerms; but // that's rare so won't help much // metadata numBytes = ste.in.readVInt(); if (bytes.length < numBytes) { bytes = new byte[ArrayUtil.oversize(numBytes, 1)]; } ste.in.readBytes(bytes, 0, numBytes); bytesReader.reset(bytes, 0, numBytes); // Sub-blocks of a single floor block are always // written one after another -- tail recurse: fpEnd = ste.in.getFilePointer(); // if (DEBUG) { // System.out.println(" fpEnd=" + fpEnd); // } }
From source file:com.sindicetech.siren.analysis.filter.ASCIIFoldingExpansionFilter.java
License:Open Source License
private void growBuffer(char[] buffer, final int newSize) { if (buffer.length < newSize) { // Not big enough; create a new array with slight // over allocation: buffer = new char[ArrayUtil.oversize(newSize, RamUsageEstimator.NUM_BYTES_CHAR)]; }/*from w ww .j a v a2 s. c o m*/ }