Example usage for org.apache.lucene.store ByteArrayDataInput ByteArrayDataInput

List of usage examples for org.apache.lucene.store ByteArrayDataInput ByteArrayDataInput

Introduction

In this page you can find the example usage for org.apache.lucene.store ByteArrayDataInput ByteArrayDataInput.

Prototype

public ByteArrayDataInput(byte[] bytes, int offset, int len) 

Source Link

Usage

From source file:com.lucure.core.codec.CompressingStoredFieldsReader.java

License:Apache License

@Override
public void visitDocument(int docID, StoredFieldVisitor visitor) throws IOException {
    fieldsStream.seek(indexReader.getStartPointer(docID));

    final int docBase = fieldsStream.readVInt();
    final int chunkDocs = fieldsStream.readVInt();
    if (docID < docBase || docID >= docBase + chunkDocs || docBase + chunkDocs > numDocs) {
        throw new CorruptIndexException("Corrupted: docID=" + docID + ", docBase=" + docBase + ", chunkDocs="
                + chunkDocs + ", numDocs=" + numDocs + " (resource=" + fieldsStream + ")");
    }/*from   w ww .j  ava 2  s  . c  o m*/

    final int numStoredFields, offset, length, totalLength;
    if (chunkDocs == 1) {
        numStoredFields = fieldsStream.readVInt();
        offset = 0;
        length = fieldsStream.readVInt();
        totalLength = length;
    } else {
        final int bitsPerStoredFields = fieldsStream.readVInt();
        if (bitsPerStoredFields == 0) {
            numStoredFields = fieldsStream.readVInt();
        } else if (bitsPerStoredFields > 31) {
            throw new CorruptIndexException(
                    "bitsPerStoredFields=" + bitsPerStoredFields + " (resource=" + fieldsStream + ")");
        } else {
            final long filePointer = fieldsStream.getFilePointer();
            final PackedInts.Reader reader = PackedInts.getDirectReaderNoHeader(fieldsStream,
                    PackedInts.Format.PACKED, packedIntsVersion, chunkDocs, bitsPerStoredFields);
            numStoredFields = (int) (reader.get(docID - docBase));
            fieldsStream.seek(filePointer
                    + PackedInts.Format.PACKED.byteCount(packedIntsVersion, chunkDocs, bitsPerStoredFields));
        }

        final int bitsPerLength = fieldsStream.readVInt();
        if (bitsPerLength == 0) {
            length = fieldsStream.readVInt();
            offset = (docID - docBase) * length;
            totalLength = chunkDocs * length;
        } else if (bitsPerStoredFields > 31) {
            throw new CorruptIndexException(
                    "bitsPerLength=" + bitsPerLength + " (resource=" + fieldsStream + ")");
        } else {
            final PackedInts.ReaderIterator it = PackedInts.getReaderIteratorNoHeader(fieldsStream,
                    PackedInts.Format.PACKED, packedIntsVersion, chunkDocs, bitsPerLength, 1);
            int off = 0;
            for (int i = 0; i < docID - docBase; ++i) {
                off += it.next();
            }
            offset = off;
            length = (int) it.next();
            off += length;
            for (int i = docID - docBase + 1; i < chunkDocs; ++i) {
                off += it.next();
            }
            totalLength = off;
        }
    }

    if ((length == 0) != (numStoredFields == 0)) {
        throw new CorruptIndexException("length=" + length + ", numStoredFields=" + numStoredFields
                + " (resource=" + fieldsStream + ")");
    }
    if (numStoredFields == 0) {
        // nothing to do
        return;
    }

    final DataInput documentInput;
    if (version >= VERSION_BIG_CHUNKS && totalLength >= 2 * chunkSize) {
        assert chunkSize > 0;
        assert offset < chunkSize;

        decompressor.decompress(fieldsStream, chunkSize, offset, Math.min(length, chunkSize - offset), bytes);
        documentInput = new DataInput() {

            int decompressed = bytes.length;

            void fillBuffer() throws IOException {
                assert decompressed <= length;
                if (decompressed == length) {
                    throw new EOFException();
                }
                final int toDecompress = Math.min(length - decompressed, chunkSize);
                decompressor.decompress(fieldsStream, toDecompress, 0, toDecompress, bytes);
                decompressed += toDecompress;
            }

            @Override
            public byte readByte() throws IOException {
                if (bytes.length == 0) {
                    fillBuffer();
                }
                --bytes.length;
                return bytes.bytes[bytes.offset++];
            }

            @Override
            public void readBytes(byte[] b, int offset, int len) throws IOException {
                while (len > bytes.length) {
                    System.arraycopy(bytes.bytes, bytes.offset, b, offset, bytes.length);
                    len -= bytes.length;
                    offset += bytes.length;
                    fillBuffer();
                }
                System.arraycopy(bytes.bytes, bytes.offset, b, offset, len);
                bytes.offset += len;
                bytes.length -= len;
            }

        };
    } else {
        final BytesRef bytes = totalLength <= BUFFER_REUSE_THRESHOLD ? this.bytes : new BytesRef();
        decompressor.decompress(fieldsStream, totalLength, offset, length, bytes);
        assert bytes.length == length;
        documentInput = new ByteArrayDataInput(bytes.bytes, bytes.offset, bytes.length);
    }

    for (int fieldIDX = 0; fieldIDX < numStoredFields; fieldIDX++) {
        final long infoAndBits = documentInput.readVLong();
        final int fieldNumber = (int) (infoAndBits >>> TYPE_BITS);
        final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);

        final int bits = (int) (infoAndBits & TYPE_MASK);
        assert bits <= NUMERIC_DOUBLE : "bits=" + Integer.toHexString(bits);

        //get restricted
        FieldVisibility cv = RestrictedStoredFieldVisitor.EMPTY;
        boolean isRestricted = documentInput.readByte() == 1;
        if (isRestricted) {
            int cv_length = documentInput.readVInt();
            byte[] cv_bytes = new byte[cv_length];
            documentInput.readBytes(cv_bytes, 0, cv_length);
            cv = new FieldVisibility(cv_bytes);
        }

        RestrictedStoredFieldVisitor restrictedStoredFieldVisitor = DelegatingRestrictedFieldVisitor
                .wrap(visitor);
        if (evaluate(cv)) {
            switch (restrictedStoredFieldVisitor.needsField(fieldInfo, cv)) {
            case YES:
                readField(documentInput, restrictedStoredFieldVisitor, fieldInfo, bits, cv);
                break;
            case NO:
                skipField(documentInput, bits, cv);
                break;
            case STOP:
                return;
            }
        } else {
            skipField(documentInput, bits, cv);
        }
    }
}

From source file:com.rocana.lucene.codec.v1.RocanaFieldReader.java

License:Apache License

RocanaFieldReader(RocanaBlockTreeTermsReader parent, FieldInfo fieldInfo, long numTerms, BytesRef rootCode,
        long sumTotalTermFreq, long sumDocFreq, int docCount, long indexStartFP, int longsSize,
        IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException {
    assert numTerms > 0;
    this.fieldInfo = fieldInfo;
    //DEBUG = RocanaBlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
    this.parent = parent;
    this.numTerms = numTerms;
    this.sumTotalTermFreq = sumTotalTermFreq;
    this.sumDocFreq = sumDocFreq;
    this.docCount = docCount;
    this.indexStartFP = indexStartFP;
    this.rootCode = rootCode;
    this.longsSize = longsSize;
    this.minTerm = minTerm;
    this.maxTerm = maxTerm;
    // if (DEBUG) {
    //   System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode=" + rootCode + " divisor=" + indexDivisor);
    // }//  www  . j  a va2 s  .  com

    rootBlockFP = (new ByteArrayDataInput(rootCode.bytes, rootCode.offset, rootCode.length))
            .readVLong() >>> RocanaBlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS;

    if (indexIn != null) {
        final IndexInput clone = indexIn.clone();
        //System.out.println("start=" + indexStartFP + " field=" + fieldInfo.name);
        clone.seek(indexStartFP);
        index = new FST<>(clone, ByteSequenceOutputs.getSingleton());

        /*
          if (false) {
          final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
          Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
          Util.toDot(index, w, false, false);
          System.out.println("FST INDEX: SAVED to " + dotFileName);
          w.close();
          }
        */
    } else {
        index = null;
    }
}

From source file:com.rocana.lucene.codec.v1.RocanaSegmentTermsEnum.java

License:Apache License

@SuppressWarnings("unused")
private void printSeekState(PrintStream out) throws IOException {
    if (currentFrame == staticFrame) {
        out.println("  no prior seek");
    } else {/*w  w w .  j  a v  a2  s  . c o  m*/
        out.println("  prior seek state:");
        int ord = 0;
        boolean isSeekFrame = true;
        while (true) {
            RocanaSegmentTermsEnumFrame f = getFrame(ord);
            assert f != null;
            final BytesRef prefix = new BytesRef(term.get().bytes, 0, f.prefix);
            if (f.nextEnt == -1) {
                out.println("    frame " + (isSeekFrame ? "(seek)" : "(next)") + " ord=" + ord + " fp=" + f.fp
                        + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix
                        + " prefix=" + prefix + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
                        + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code="
                        + ((f.fp << RocanaBlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS)
                                + (f.hasTerms ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS : 0)
                                + (f.isFloor ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0))
                        + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd="
                        + f.getTermBlockOrd());
            } else {
                out.println("    frame " + (isSeekFrame ? "(seek, loaded)" : "(next, loaded)") + " ord=" + ord
                        + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen="
                        + f.prefix + " prefix=" + prefix + " nextEnt=" + f.nextEnt
                        + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms
                        + " isFloor=" + f.isFloor + " code="
                        + ((f.fp << RocanaBlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS)
                                + (f.hasTerms ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS : 0)
                                + (f.isFloor ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0))
                        + " lastSubFP=" + f.lastSubFP + " isLastInFloor=" + f.isLastInFloor + " mdUpto="
                        + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd());
            }
            if (fr.index != null) {
                assert !isSeekFrame || f.arc != null : "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc;
                if (f.prefix > 0 && isSeekFrame && f.arc.label != (term.byteAt(f.prefix - 1) & 0xFF)) {
                    out.println("      broken seek state: arc.label=" + (char) f.arc.label + " vs term byte="
                            + (char) (term.byteAt(f.prefix - 1) & 0xFF));
                    throw new RuntimeException("seek state is broken");
                }
                BytesRef output = Util.get(fr.index, prefix);
                if (output == null) {
                    out.println("      broken seek state: prefix is not final in index");
                    throw new RuntimeException("seek state is broken");
                } else if (isSeekFrame && !f.isFloor) {
                    final ByteArrayDataInput reader = new ByteArrayDataInput(output.bytes, output.offset,
                            output.length);
                    final long codeOrig = reader.readVLong();
                    final long code = (f.fp << RocanaBlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS)
                            | (f.hasTerms ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS : 0)
                            | (f.isFloor ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0);
                    if (codeOrig != code) {
                        out.println("      broken seek state: output code=" + codeOrig
                                + " doesn't match frame code=" + code);
                        throw new RuntimeException("seek state is broken");
                    }
                }
            }
            if (f == currentFrame) {
                break;
            }
            if (f.prefix == validIndexPrefix) {
                isSeekFrame = false;
            }
            ord++;
        }
    }
}