com.rocana.lucene.codec.v1.RocanaFieldReader.java Source code

Introduction

Here is the source code for com.rocana.lucene.codec.v1.RocanaFieldReader.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.rocana.lucene.codec.v1;

import java.io.IOException;
import java.util.Collection;
import java.util.Collections;

import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.fst.ByteSequenceOutputs;
import org.apache.lucene.util.fst.FST;

/**
 * Fork of Lucene's {@link org.apache.lucene.codecs.blocktree.FieldReader}
 * from Lucene's git repository, tag: releases/lucene-solr/5.5.0
 *
 * Why we forked:
 *   - To use the other forked classes, like {@link RocanaBlockTreeTermsReader}.
 *
 * What changed in the fork?
 *   - Use the other forked classes.
 *   - Removed trailing whitespace.
 *   - Changed these javadocs.
 *   - Renamed class to have 'Rocana' in the name.
 *   - Moved to a different package.
 *
 * This is one of the forked classes where no logic changed, but to get
 * the fork to compile we had to fork this class too. That happened with
 * several classes because they had a hard reference to another class we
 * forked. Ideally, our forked classes would extend the original Lucene
 * class and override just the methods we need to change. Unfortunately
 * in most cases that wasn't an option because many Lucene classes are final.
 *
 * To see a full diff of changes in our fork: compare this version to the very first
 * commit in git history. That first commit is the exact file from Lucene with no
 * modifications.
 *
 * @see RocanaSearchCodecV1
 *
 * Original Lucene documentation:
 * BlockTree's implementation of {@link Terms}.
 * @lucene.internal
 */
public final class RocanaFieldReader extends Terms implements Accountable {

    // private final boolean DEBUG = BlockTreeTermsWriter.DEBUG;

    private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(RocanaFieldReader.class)
            + 3 * RamUsageEstimator.shallowSizeOfInstance(BytesRef.class);

    final long numTerms;
    final FieldInfo fieldInfo;
    final long sumTotalTermFreq;
    final long sumDocFreq;
    final int docCount;
    final long indexStartFP;
    final long rootBlockFP;
    final BytesRef rootCode;
    final BytesRef minTerm;
    final BytesRef maxTerm;
    final int longsSize;
    final RocanaBlockTreeTermsReader parent;

    final FST<BytesRef> index;
    //private boolean DEBUG;

    RocanaFieldReader(RocanaBlockTreeTermsReader parent, FieldInfo fieldInfo, long numTerms, BytesRef rootCode,
            long sumTotalTermFreq, long sumDocFreq, int docCount, long indexStartFP, int longsSize,
            IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException {
        assert numTerms > 0;
        this.fieldInfo = fieldInfo;
        //DEBUG = RocanaBlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
        this.parent = parent;
        this.numTerms = numTerms;
        this.sumTotalTermFreq = sumTotalTermFreq;
        this.sumDocFreq = sumDocFreq;
        this.docCount = docCount;
        this.indexStartFP = indexStartFP;
        this.rootCode = rootCode;
        this.longsSize = longsSize;
        this.minTerm = minTerm;
        this.maxTerm = maxTerm;
        // if (DEBUG) {
        //   System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode=" + rootCode + " divisor=" + indexDivisor);
        // }

        rootBlockFP = (new ByteArrayDataInput(rootCode.bytes, rootCode.offset, rootCode.length))
                .readVLong() >>> RocanaBlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS;

        if (indexIn != null) {
            final IndexInput clone = indexIn.clone();
            //System.out.println("start=" + indexStartFP + " field=" + fieldInfo.name);
            clone.seek(indexStartFP);
            index = new FST<>(clone, ByteSequenceOutputs.getSingleton());

            /*
              if (false) {
              final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
              Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
              Util.toDot(index, w, false, false);
              System.out.println("FST INDEX: SAVED to " + dotFileName);
              w.close();
              }
            */
        } else {
            index = null;
        }
    }

    @Override
    public BytesRef getMin() throws IOException {
        if (minTerm == null) {
            // Older index that didn't store min/maxTerm
            return super.getMin();
        } else {
            return minTerm;
        }
    }

    @Override
    public BytesRef getMax() throws IOException {
        if (maxTerm == null) {
            // Older index that didn't store min/maxTerm
            return super.getMax();
        } else {
            return maxTerm;
        }
    }

    /** For debugging -- used by CheckIndex too*/
    @Override
    public RocanaStats getStats() throws IOException {
        // TODO: add auto-prefix terms into stats
        return new RocanaSegmentTermsEnum(this).computeBlockStats();
    }

    @Override
    public boolean hasFreqs() {
        return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
    }

    @Override
    public boolean hasOffsets() {
        return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
    }

    @Override
    public boolean hasPositions() {
        return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
    }

    @Override
    public boolean hasPayloads() {
        return fieldInfo.hasPayloads();
    }

    @Override
    public TermsEnum iterator() throws IOException {
        return new RocanaSegmentTermsEnum(this);
    }

    @Override
    public long size() {
        return numTerms;
    }

    @Override
    public long getSumTotalTermFreq() {
        return sumTotalTermFreq;
    }

    @Override
    public long getSumDocFreq() {
        return sumDocFreq;
    }

    @Override
    public int getDocCount() {
        return docCount;
    }

    @Override
    public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
        // if (DEBUG) System.out.println("  RocanaFieldReader.intersect startTerm=" + BlockTreeTermsWriter.brToString(startTerm));
        //System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
        // TODO: we could push "it's a range" or "it's a prefix" down into RocanaIntersectTermsEnum?
        // can we optimize knowing that...?
        return new RocanaIntersectTermsEnum(this, compiled.automaton, compiled.runAutomaton,
                compiled.commonSuffixRef, startTerm, compiled.sinkState);
    }

    @Override
    public long ramBytesUsed() {
        return BASE_RAM_BYTES_USED + ((index != null) ? index.ramBytesUsed() : 0);
    }

    @Override
    public Collection<Accountable> getChildResources() {
        if (index == null) {
            return Collections.emptyList();
        } else {
            return Collections.singleton(Accountables.namedAccountable("term index", index));
        }
    }

    @Override
    public String toString() {
        return "BlockTreeTerms(terms=" + numTerms + ",postings=" + sumDocFreq + ",positions=" + sumTotalTermFreq
                + ",docs=" + docCount + ")";
    }
}