com.lucure.core.codec.CompressingStoredFieldsReader.java Source code

Introduction

Here is the source code for com.lucure.core.codec.CompressingStoredFieldsReader.java
Source

package com.lucure.core.codec;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import com.lucure.core.AuthorizationsHolder;
import com.lucure.core.index.visitor.DelegatingRestrictedFieldVisitor;
import com.lucure.core.index.visitor.RestrictedStoredFieldVisitor;
import com.lucure.core.security.FieldVisibility;
import com.lucure.core.security.VisibilityParseException;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.compressing.CompressionMode;
import org.apache.lucene.codecs.compressing.Decompressor;
import org.apache.lucene.index.*;
import org.apache.lucene.store.*;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.packed.PackedInts;

import java.io.EOFException;
import java.io.IOException;
import java.util.Arrays;

import static com.lucure.core.codec.AccessFilteredDocsAndPositionsEnum.AllAuthorizationsHolder.ALLAUTHSHOLDER;
import static com.lucure.core.codec.CompressingStoredFieldsWriter.*;
import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.FIELDS_EXTENSION;
import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION;

/**
 * {@link StoredFieldsReader} impl for {@link CompressingStoredFieldsFormat}.
 * @lucene.experimental
 */
public final class CompressingStoredFieldsReader extends StoredFieldsReader {

    // Do not reuse the decompression buffer when there is more than 32kb to decompress
    private static final int BUFFER_REUSE_THRESHOLD = 1 << 15;

    private final int version;
    private final FieldInfos fieldInfos;
    private final CompressingStoredFieldsIndexReader indexReader;
    private final long maxPointer;
    private final IndexInput fieldsStream;
    private final int chunkSize;
    private final int packedIntsVersion;
    private final CompressionMode compressionMode;
    private final Decompressor decompressor;
    private final BytesRef bytes;
    private final int numDocs;
    private boolean closed;

    // used by clone
    private CompressingStoredFieldsReader(CompressingStoredFieldsReader reader) {
        this.version = reader.version;
        this.fieldInfos = reader.fieldInfos;
        this.fieldsStream = reader.fieldsStream.clone();
        this.indexReader = reader.indexReader.clone();
        this.maxPointer = reader.maxPointer;
        this.chunkSize = reader.chunkSize;
        this.packedIntsVersion = reader.packedIntsVersion;
        this.compressionMode = reader.compressionMode;
        this.decompressor = reader.decompressor.clone();
        this.numDocs = reader.numDocs;
        this.bytes = new BytesRef(reader.bytes.bytes.length);
        this.closed = false;
    }

    /** Sole constructor. */
    public CompressingStoredFieldsReader(Directory d, SegmentInfo si, String segmentSuffix, FieldInfos fn,
            IOContext context, String formatName, CompressionMode compressionMode) throws IOException {
        this.compressionMode = compressionMode;
        final String segment = si.name;
        boolean success = false;
        fieldInfos = fn;
        numDocs = si.getDocCount();
        ChecksumIndexInput indexStream = null;
        try {
            final String indexStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix,
                    FIELDS_INDEX_EXTENSION);
            final String fieldsStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION);
            // Load the index into memory
            indexStream = d.openChecksumInput(indexStreamFN, context);
            final String codecNameIdx = formatName + CODEC_SFX_IDX;
            version = CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT);
            assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();
            indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);

            long maxPointer = -1;

            if (version >= VERSION_CHECKSUM) {
                maxPointer = indexStream.readVLong();
                CodecUtil.checkFooter(indexStream);
            } else {
                CodecUtil.checkEOF(indexStream);
            }
            indexStream.close();
            indexStream = null;

            // Open the data file and read metadata
            fieldsStream = d.openInput(fieldsStreamFN, context);
            if (version >= VERSION_CHECKSUM) {
                if (maxPointer + CodecUtil.footerLength() != fieldsStream.length()) {
                    throw new CorruptIndexException("Invalid fieldsStream maxPointer (file truncated?): maxPointer="
                            + maxPointer + ", length=" + fieldsStream.length());
                }
            } else {
                maxPointer = fieldsStream.length();
            }
            this.maxPointer = maxPointer;
            final String codecNameDat = formatName + CODEC_SFX_DAT;
            final int fieldsVersion = CodecUtil.checkHeader(fieldsStream, codecNameDat, VERSION_START,
                    VERSION_CURRENT);
            if (version != fieldsVersion) {
                throw new CorruptIndexException("Version mismatch between stored fields index and data: " + version
                        + " != " + fieldsVersion);
            }
            assert CodecUtil.headerLength(codecNameDat) == fieldsStream.getFilePointer();

            if (version >= VERSION_BIG_CHUNKS) {
                chunkSize = fieldsStream.readVInt();
            } else {
                chunkSize = -1;
            }
            packedIntsVersion = fieldsStream.readVInt();
            decompressor = compressionMode.newDecompressor();
            this.bytes = new BytesRef();

            if (version >= VERSION_CHECKSUM) {
                // NOTE: data file is too costly to verify checksum against all the bytes on open,
                // but for now we at least verify proper structure of the checksum footer: which looks
                // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
                // such as file truncation.
                CodecUtil.retrieveChecksum(fieldsStream);
            }

            success = true;
        } finally {
            if (!success) {
                IOUtils.closeWhileHandlingException(this, indexStream);
            }
        }
    }

    /**
     * @throws AlreadyClosedException if this FieldsReader is closed
     */
    private void ensureOpen() throws AlreadyClosedException {
        if (closed) {
            throw new AlreadyClosedException("this FieldsReader is closed");
        }
    }

    /** 
     * Close the underlying {@link IndexInput}s.
     */
    @Override
    public void close() throws IOException {
        if (!closed) {
            IOUtils.close(fieldsStream);
            closed = true;
        }
    }

    private static void readField(DataInput in, RestrictedStoredFieldVisitor visitor, FieldInfo info, int bits,
            FieldVisibility fieldVisibility) throws IOException {

        switch (bits & TYPE_MASK) {
        case BYTE_ARR:
            int length = in.readVInt();
            byte[] data = new byte[length];
            in.readBytes(data, 0, length);
            visitor.binaryField(info, data, fieldVisibility);
            break;
        case STRING:
            length = in.readVInt();
            data = new byte[length];
            in.readBytes(data, 0, length);
            visitor.stringField(info, new String(data, IOUtils.CHARSET_UTF_8), fieldVisibility);
            break;
        case NUMERIC_INT:
            int intValue = in.readInt();
            visitor.intField(info, intValue, fieldVisibility);
            break;
        case NUMERIC_FLOAT:
            float floatValue = Float.intBitsToFloat(in.readInt());
            visitor.floatField(info, floatValue, fieldVisibility);
            break;
        case NUMERIC_LONG:
            long longValue = in.readLong();
            visitor.longField(info, longValue, fieldVisibility);
            break;
        case NUMERIC_DOUBLE:
            double doubleValue = Double.longBitsToDouble(in.readLong());
            visitor.doubleField(info, doubleValue, fieldVisibility);
            break;
        default:
            throw new AssertionError("Unknown type flag: " + Integer.toHexString(bits));
        }
    }

    private static void skipField(DataInput in, int bits, FieldVisibility cv) throws IOException {
        switch (bits & TYPE_MASK) {
        case BYTE_ARR:
        case STRING:
            final int length = in.readVInt();
            in.skipBytes(length);
            break;
        case NUMERIC_INT:
        case NUMERIC_FLOAT:
            in.readInt();
            break;
        case NUMERIC_LONG:
        case NUMERIC_DOUBLE:
            in.readLong();
            break;
        default:
            throw new AssertionError("Unknown type flag: " + Integer.toHexString(bits));
        }
    }

    @Override
    public void visitDocument(int docID, StoredFieldVisitor visitor) throws IOException {
        fieldsStream.seek(indexReader.getStartPointer(docID));

        final int docBase = fieldsStream.readVInt();
        final int chunkDocs = fieldsStream.readVInt();
        if (docID < docBase || docID >= docBase + chunkDocs || docBase + chunkDocs > numDocs) {
            throw new CorruptIndexException("Corrupted: docID=" + docID + ", docBase=" + docBase + ", chunkDocs="
                    + chunkDocs + ", numDocs=" + numDocs + " (resource=" + fieldsStream + ")");
        }

        final int numStoredFields, offset, length, totalLength;
        if (chunkDocs == 1) {
            numStoredFields = fieldsStream.readVInt();
            offset = 0;
            length = fieldsStream.readVInt();
            totalLength = length;
        } else {
            final int bitsPerStoredFields = fieldsStream.readVInt();
            if (bitsPerStoredFields == 0) {
                numStoredFields = fieldsStream.readVInt();
            } else if (bitsPerStoredFields > 31) {
                throw new CorruptIndexException(
                        "bitsPerStoredFields=" + bitsPerStoredFields + " (resource=" + fieldsStream + ")");
            } else {
                final long filePointer = fieldsStream.getFilePointer();
                final PackedInts.Reader reader = PackedInts.getDirectReaderNoHeader(fieldsStream,
                        PackedInts.Format.PACKED, packedIntsVersion, chunkDocs, bitsPerStoredFields);
                numStoredFields = (int) (reader.get(docID - docBase));
                fieldsStream.seek(filePointer
                        + PackedInts.Format.PACKED.byteCount(packedIntsVersion, chunkDocs, bitsPerStoredFields));
            }

            final int bitsPerLength = fieldsStream.readVInt();
            if (bitsPerLength == 0) {
                length = fieldsStream.readVInt();
                offset = (docID - docBase) * length;
                totalLength = chunkDocs * length;
            } else if (bitsPerStoredFields > 31) {
                throw new CorruptIndexException(
                        "bitsPerLength=" + bitsPerLength + " (resource=" + fieldsStream + ")");
            } else {
                final PackedInts.ReaderIterator it = PackedInts.getReaderIteratorNoHeader(fieldsStream,
                        PackedInts.Format.PACKED, packedIntsVersion, chunkDocs, bitsPerLength, 1);
                int off = 0;
                for (int i = 0; i < docID - docBase; ++i) {
                    off += it.next();
                }
                offset = off;
                length = (int) it.next();
                off += length;
                for (int i = docID - docBase + 1; i < chunkDocs; ++i) {
                    off += it.next();
                }
                totalLength = off;
            }
        }

        if ((length == 0) != (numStoredFields == 0)) {
            throw new CorruptIndexException("length=" + length + ", numStoredFields=" + numStoredFields
                    + " (resource=" + fieldsStream + ")");
        }
        if (numStoredFields == 0) {
            // nothing to do
            return;
        }

        final DataInput documentInput;
        if (version >= VERSION_BIG_CHUNKS && totalLength >= 2 * chunkSize) {
            assert chunkSize > 0;
            assert offset < chunkSize;

            decompressor.decompress(fieldsStream, chunkSize, offset, Math.min(length, chunkSize - offset), bytes);
            documentInput = new DataInput() {

                int decompressed = bytes.length;

                void fillBuffer() throws IOException {
                    assert decompressed <= length;
                    if (decompressed == length) {
                        throw new EOFException();
                    }
                    final int toDecompress = Math.min(length - decompressed, chunkSize);
                    decompressor.decompress(fieldsStream, toDecompress, 0, toDecompress, bytes);
                    decompressed += toDecompress;
                }

                @Override
                public byte readByte() throws IOException {
                    if (bytes.length == 0) {
                        fillBuffer();
                    }
                    --bytes.length;
                    return bytes.bytes[bytes.offset++];
                }

                @Override
                public void readBytes(byte[] b, int offset, int len) throws IOException {
                    while (len > bytes.length) {
                        System.arraycopy(bytes.bytes, bytes.offset, b, offset, bytes.length);
                        len -= bytes.length;
                        offset += bytes.length;
                        fillBuffer();
                    }
                    System.arraycopy(bytes.bytes, bytes.offset, b, offset, len);
                    bytes.offset += len;
                    bytes.length -= len;
                }

            };
        } else {
            final BytesRef bytes = totalLength <= BUFFER_REUSE_THRESHOLD ? this.bytes : new BytesRef();
            decompressor.decompress(fieldsStream, totalLength, offset, length, bytes);
            assert bytes.length == length;
            documentInput = new ByteArrayDataInput(bytes.bytes, bytes.offset, bytes.length);
        }

        for (int fieldIDX = 0; fieldIDX < numStoredFields; fieldIDX++) {
            final long infoAndBits = documentInput.readVLong();
            final int fieldNumber = (int) (infoAndBits >>> TYPE_BITS);
            final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);

            final int bits = (int) (infoAndBits & TYPE_MASK);
            assert bits <= NUMERIC_DOUBLE : "bits=" + Integer.toHexString(bits);

            //get restricted
            FieldVisibility cv = RestrictedStoredFieldVisitor.EMPTY;
            boolean isRestricted = documentInput.readByte() == 1;
            if (isRestricted) {
                int cv_length = documentInput.readVInt();
                byte[] cv_bytes = new byte[cv_length];
                documentInput.readBytes(cv_bytes, 0, cv_length);
                cv = new FieldVisibility(cv_bytes);
            }

            RestrictedStoredFieldVisitor restrictedStoredFieldVisitor = DelegatingRestrictedFieldVisitor
                    .wrap(visitor);
            if (evaluate(cv)) {
                switch (restrictedStoredFieldVisitor.needsField(fieldInfo, cv)) {
                case YES:
                    readField(documentInput, restrictedStoredFieldVisitor, fieldInfo, bits, cv);
                    break;
                case NO:
                    skipField(documentInput, bits, cv);
                    break;
                case STOP:
                    return;
                }
            } else {
                skipField(documentInput, bits, cv);
            }
        }
    }

    private boolean evaluate(FieldVisibility cv) {
        try {
            final AuthorizationsHolder authorizationsHolder = AuthorizationsHolder.threadAuthorizations.get();
            return ALLAUTHSHOLDER.equals(authorizationsHolder)
                    || authorizationsHolder.getVisibilityEvaluator().evaluate(cv);
        } catch (VisibilityParseException e) {
            //ignore for now
        }
        return false;
    }

    @Override
    public StoredFieldsReader clone() {
        ensureOpen();
        return new CompressingStoredFieldsReader(this);
    }

    int getVersion() {
        return version;
    }

    CompressionMode getCompressionMode() {
        return compressionMode;
    }

    int getChunkSize() {
        return chunkSize;
    }

    ChunkIterator chunkIterator(int startDocID) throws IOException {
        ensureOpen();
        fieldsStream.seek(indexReader.getStartPointer(startDocID));
        return new ChunkIterator(startDocID);
    }

    final class ChunkIterator {

        final ChecksumIndexInput fieldsStream;
        final BytesRef spare;
        final BytesRef bytes;
        int docBase;
        int chunkDocs;
        int[] numStoredFields;
        int[] lengths;

        private ChunkIterator(int startDocId) throws IOException {
            this.docBase = -1;
            bytes = new BytesRef();
            spare = new BytesRef();
            numStoredFields = new int[1];
            lengths = new int[1];

            IndexInput in = CompressingStoredFieldsReader.this.fieldsStream;
            in.seek(0);
            fieldsStream = new BufferedChecksumIndexInput(in);
            fieldsStream.seek(indexReader.getStartPointer(startDocId));
        }

        /**
         * Return the decompressed size of the chunk
         */
        int chunkSize() {
            int sum = 0;
            for (int i = 0; i < chunkDocs; ++i) {
                sum += lengths[i];
            }
            return sum;
        }

        /**
         * Go to the chunk containing the provided doc ID.
         */
        void next(int doc) throws IOException {
            assert doc >= docBase + chunkDocs : doc + " " + docBase + " " + chunkDocs;
            fieldsStream.seek(indexReader.getStartPointer(doc));

            final int docBase = fieldsStream.readVInt();
            final int chunkDocs = fieldsStream.readVInt();
            if (docBase < this.docBase + this.chunkDocs || docBase + chunkDocs > numDocs) {
                throw new CorruptIndexException("Corrupted: current docBase=" + this.docBase + ", current numDocs="
                        + this.chunkDocs + ", new docBase=" + docBase + ", new numDocs=" + chunkDocs + " (resource="
                        + fieldsStream + ")");
            }
            this.docBase = docBase;
            this.chunkDocs = chunkDocs;

            if (chunkDocs > numStoredFields.length) {
                final int newLength = ArrayUtil.oversize(chunkDocs, 4);
                numStoredFields = new int[newLength];
                lengths = new int[newLength];
            }

            if (chunkDocs == 1) {
                numStoredFields[0] = fieldsStream.readVInt();
                lengths[0] = fieldsStream.readVInt();
            } else {
                final int bitsPerStoredFields = fieldsStream.readVInt();
                if (bitsPerStoredFields == 0) {
                    Arrays.fill(numStoredFields, 0, chunkDocs, fieldsStream.readVInt());
                } else if (bitsPerStoredFields > 31) {
                    throw new CorruptIndexException(
                            "bitsPerStoredFields=" + bitsPerStoredFields + " (resource=" + fieldsStream + ")");
                } else {
                    final PackedInts.ReaderIterator it = PackedInts.getReaderIteratorNoHeader(fieldsStream,
                            PackedInts.Format.PACKED, packedIntsVersion, chunkDocs, bitsPerStoredFields, 1);
                    for (int i = 0; i < chunkDocs; ++i) {
                        numStoredFields[i] = (int) it.next();
                    }
                }

                final int bitsPerLength = fieldsStream.readVInt();
                if (bitsPerLength == 0) {
                    Arrays.fill(lengths, 0, chunkDocs, fieldsStream.readVInt());
                } else if (bitsPerLength > 31) {
                    throw new CorruptIndexException("bitsPerLength=" + bitsPerLength);
                } else {
                    final PackedInts.ReaderIterator it = PackedInts.getReaderIteratorNoHeader(fieldsStream,
                            PackedInts.Format.PACKED, packedIntsVersion, chunkDocs, bitsPerLength, 1);
                    for (int i = 0; i < chunkDocs; ++i) {
                        lengths[i] = (int) it.next();
                    }
                }
            }
        }

        /**
         * Decompress the chunk.
         */
        void decompress() throws IOException {
            // decompress data
            final int chunkSize = chunkSize();
            if (version >= VERSION_BIG_CHUNKS && chunkSize >= 2 * CompressingStoredFieldsReader.this.chunkSize) {
                bytes.offset = bytes.length = 0;
                for (int decompressed = 0; decompressed < chunkSize;) {
                    final int toDecompress = Math.min(chunkSize - decompressed,
                            CompressingStoredFieldsReader.this.chunkSize);
                    decompressor.decompress(fieldsStream, toDecompress, 0, toDecompress, spare);
                    bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + spare.length);
                    System.arraycopy(spare.bytes, spare.offset, bytes.bytes, bytes.length, spare.length);
                    bytes.length += spare.length;
                    decompressed += toDecompress;
                }
            } else {
                decompressor.decompress(fieldsStream, chunkSize, 0, chunkSize, bytes);
            }
            if (bytes.length != chunkSize) {
                throw new CorruptIndexException("Corrupted: expected chunk size = " + chunkSize() + ", got "
                        + bytes.length + " (resource=" + fieldsStream + ")");
            }
        }

        /**
         * Check integrity of the data. The iterator is not usable after this method has been called.
         */
        void checkIntegrity() throws IOException {
            if (version >= VERSION_CHECKSUM) {
                fieldsStream.seek(fieldsStream.length() - CodecUtil.footerLength());
                CodecUtil.checkFooter(fieldsStream);
            }
        }

    }

    @Override
    public long ramBytesUsed() {
        return indexReader.ramBytesUsed();
    }

    @Override
    public void checkIntegrity() throws IOException {
        if (version >= VERSION_CHECKSUM) {
            CodecUtil.checksumEntireFile(fieldsStream);
        }
    }

}