List of usage examples for org.apache.lucene.store ByteArrayDataInput ByteArrayDataInput
public ByteArrayDataInput(byte[] bytes, int offset, int len)
From source file:com.lucure.core.codec.CompressingStoredFieldsReader.java
License:Apache License
@Override public void visitDocument(int docID, StoredFieldVisitor visitor) throws IOException { fieldsStream.seek(indexReader.getStartPointer(docID)); final int docBase = fieldsStream.readVInt(); final int chunkDocs = fieldsStream.readVInt(); if (docID < docBase || docID >= docBase + chunkDocs || docBase + chunkDocs > numDocs) { throw new CorruptIndexException("Corrupted: docID=" + docID + ", docBase=" + docBase + ", chunkDocs=" + chunkDocs + ", numDocs=" + numDocs + " (resource=" + fieldsStream + ")"); }/*from w ww .j ava 2 s . c o m*/ final int numStoredFields, offset, length, totalLength; if (chunkDocs == 1) { numStoredFields = fieldsStream.readVInt(); offset = 0; length = fieldsStream.readVInt(); totalLength = length; } else { final int bitsPerStoredFields = fieldsStream.readVInt(); if (bitsPerStoredFields == 0) { numStoredFields = fieldsStream.readVInt(); } else if (bitsPerStoredFields > 31) { throw new CorruptIndexException( "bitsPerStoredFields=" + bitsPerStoredFields + " (resource=" + fieldsStream + ")"); } else { final long filePointer = fieldsStream.getFilePointer(); final PackedInts.Reader reader = PackedInts.getDirectReaderNoHeader(fieldsStream, PackedInts.Format.PACKED, packedIntsVersion, chunkDocs, bitsPerStoredFields); numStoredFields = (int) (reader.get(docID - docBase)); fieldsStream.seek(filePointer + PackedInts.Format.PACKED.byteCount(packedIntsVersion, chunkDocs, bitsPerStoredFields)); } final int bitsPerLength = fieldsStream.readVInt(); if (bitsPerLength == 0) { length = fieldsStream.readVInt(); offset = (docID - docBase) * length; totalLength = chunkDocs * length; } else if (bitsPerStoredFields > 31) { throw new CorruptIndexException( "bitsPerLength=" + bitsPerLength + " (resource=" + fieldsStream + ")"); } else { final PackedInts.ReaderIterator it = PackedInts.getReaderIteratorNoHeader(fieldsStream, PackedInts.Format.PACKED, packedIntsVersion, chunkDocs, bitsPerLength, 1); int off = 0; for (int i = 0; i < docID - docBase; ++i) { off += it.next(); } offset = off; length = (int) it.next(); off += length; for (int i = docID - docBase + 1; i < chunkDocs; ++i) { off += it.next(); } totalLength = off; } } if ((length == 0) != (numStoredFields == 0)) { throw new CorruptIndexException("length=" + length + ", numStoredFields=" + numStoredFields + " (resource=" + fieldsStream + ")"); } if (numStoredFields == 0) { // nothing to do return; } final DataInput documentInput; if (version >= VERSION_BIG_CHUNKS && totalLength >= 2 * chunkSize) { assert chunkSize > 0; assert offset < chunkSize; decompressor.decompress(fieldsStream, chunkSize, offset, Math.min(length, chunkSize - offset), bytes); documentInput = new DataInput() { int decompressed = bytes.length; void fillBuffer() throws IOException { assert decompressed <= length; if (decompressed == length) { throw new EOFException(); } final int toDecompress = Math.min(length - decompressed, chunkSize); decompressor.decompress(fieldsStream, toDecompress, 0, toDecompress, bytes); decompressed += toDecompress; } @Override public byte readByte() throws IOException { if (bytes.length == 0) { fillBuffer(); } --bytes.length; return bytes.bytes[bytes.offset++]; } @Override public void readBytes(byte[] b, int offset, int len) throws IOException { while (len > bytes.length) { System.arraycopy(bytes.bytes, bytes.offset, b, offset, bytes.length); len -= bytes.length; offset += bytes.length; fillBuffer(); } System.arraycopy(bytes.bytes, bytes.offset, b, offset, len); bytes.offset += len; bytes.length -= len; } }; } else { final BytesRef bytes = totalLength <= BUFFER_REUSE_THRESHOLD ? this.bytes : new BytesRef(); decompressor.decompress(fieldsStream, totalLength, offset, length, bytes); assert bytes.length == length; documentInput = new ByteArrayDataInput(bytes.bytes, bytes.offset, bytes.length); } for (int fieldIDX = 0; fieldIDX < numStoredFields; fieldIDX++) { final long infoAndBits = documentInput.readVLong(); final int fieldNumber = (int) (infoAndBits >>> TYPE_BITS); final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber); final int bits = (int) (infoAndBits & TYPE_MASK); assert bits <= NUMERIC_DOUBLE : "bits=" + Integer.toHexString(bits); //get restricted FieldVisibility cv = RestrictedStoredFieldVisitor.EMPTY; boolean isRestricted = documentInput.readByte() == 1; if (isRestricted) { int cv_length = documentInput.readVInt(); byte[] cv_bytes = new byte[cv_length]; documentInput.readBytes(cv_bytes, 0, cv_length); cv = new FieldVisibility(cv_bytes); } RestrictedStoredFieldVisitor restrictedStoredFieldVisitor = DelegatingRestrictedFieldVisitor .wrap(visitor); if (evaluate(cv)) { switch (restrictedStoredFieldVisitor.needsField(fieldInfo, cv)) { case YES: readField(documentInput, restrictedStoredFieldVisitor, fieldInfo, bits, cv); break; case NO: skipField(documentInput, bits, cv); break; case STOP: return; } } else { skipField(documentInput, bits, cv); } } }
From source file:com.rocana.lucene.codec.v1.RocanaFieldReader.java
License:Apache License
RocanaFieldReader(RocanaBlockTreeTermsReader parent, FieldInfo fieldInfo, long numTerms, BytesRef rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount, long indexStartFP, int longsSize, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException { assert numTerms > 0; this.fieldInfo = fieldInfo; //DEBUG = RocanaBlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id"); this.parent = parent; this.numTerms = numTerms; this.sumTotalTermFreq = sumTotalTermFreq; this.sumDocFreq = sumDocFreq; this.docCount = docCount; this.indexStartFP = indexStartFP; this.rootCode = rootCode; this.longsSize = longsSize; this.minTerm = minTerm; this.maxTerm = maxTerm; // if (DEBUG) { // System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode=" + rootCode + " divisor=" + indexDivisor); // }// www . j a va2 s . com rootBlockFP = (new ByteArrayDataInput(rootCode.bytes, rootCode.offset, rootCode.length)) .readVLong() >>> RocanaBlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS; if (indexIn != null) { final IndexInput clone = indexIn.clone(); //System.out.println("start=" + indexStartFP + " field=" + fieldInfo.name); clone.seek(indexStartFP); index = new FST<>(clone, ByteSequenceOutputs.getSingleton()); /* if (false) { final String dotFileName = segment + "_" + fieldInfo.name + ".dot"; Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName)); Util.toDot(index, w, false, false); System.out.println("FST INDEX: SAVED to " + dotFileName); w.close(); } */ } else { index = null; } }
From source file:com.rocana.lucene.codec.v1.RocanaSegmentTermsEnum.java
License:Apache License
@SuppressWarnings("unused") private void printSeekState(PrintStream out) throws IOException { if (currentFrame == staticFrame) { out.println(" no prior seek"); } else {/*w w w . j a v a2 s . c o m*/ out.println(" prior seek state:"); int ord = 0; boolean isSeekFrame = true; while (true) { RocanaSegmentTermsEnumFrame f = getFrame(ord); assert f != null; final BytesRef prefix = new BytesRef(term.get().bytes, 0, f.prefix); if (f.nextEnt == -1) { out.println(" frame " + (isSeekFrame ? "(seek)" : "(next)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + prefix + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp << RocanaBlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS : 0) + (f.isFloor ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0)) + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd()); } else { out.println(" frame " + (isSeekFrame ? "(seek, loaded)" : "(next, loaded)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + prefix + " nextEnt=" + f.nextEnt + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp << RocanaBlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS : 0) + (f.isFloor ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0)) + " lastSubFP=" + f.lastSubFP + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd()); } if (fr.index != null) { assert !isSeekFrame || f.arc != null : "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc; if (f.prefix > 0 && isSeekFrame && f.arc.label != (term.byteAt(f.prefix - 1) & 0xFF)) { out.println(" broken seek state: arc.label=" + (char) f.arc.label + " vs term byte=" + (char) (term.byteAt(f.prefix - 1) & 0xFF)); throw new RuntimeException("seek state is broken"); } BytesRef output = Util.get(fr.index, prefix); if (output == null) { out.println(" broken seek state: prefix is not final in index"); throw new RuntimeException("seek state is broken"); } else if (isSeekFrame && !f.isFloor) { final ByteArrayDataInput reader = new ByteArrayDataInput(output.bytes, output.offset, output.length); final long codeOrig = reader.readVLong(); final long code = (f.fp << RocanaBlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS) | (f.hasTerms ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS : 0) | (f.isFloor ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0); if (codeOrig != code) { out.println(" broken seek state: output code=" + codeOrig + " doesn't match frame code=" + code); throw new RuntimeException("seek state is broken"); } } } if (f == currentFrame) { break; } if (f.prefix == validIndexPrefix) { isSeekFrame = false; } ord++; } } }