Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.rocana.lucene.codec.v1; import java.io.IOException; import java.util.Collection; import java.util.Collections; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.Accountable; import org.apache.lucene.util.Accountables; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.automaton.CompiledAutomaton; import org.apache.lucene.util.fst.ByteSequenceOutputs; import org.apache.lucene.util.fst.FST; /** * Fork of Lucene's {@link org.apache.lucene.codecs.blocktree.FieldReader} * from Lucene's git repository, tag: releases/lucene-solr/5.5.0 * * Why we forked: * - To use the other forked classes, like {@link RocanaBlockTreeTermsReader}. * * What changed in the fork? * - Use the other forked classes. * - Removed trailing whitespace. * - Changed these javadocs. * - Renamed class to have 'Rocana' in the name. * - Moved to a different package. * * This is one of the forked classes where no logic changed, but to get * the fork to compile we had to fork this class too. That happened with * several classes because they had a hard reference to another class we * forked. Ideally, our forked classes would extend the original Lucene * class and override just the methods we need to change. Unfortunately * in most cases that wasn't an option because many Lucene classes are final. * * To see a full diff of changes in our fork: compare this version to the very first * commit in git history. That first commit is the exact file from Lucene with no * modifications. * * @see RocanaSearchCodecV1 * * Original Lucene documentation: * BlockTree's implementation of {@link Terms}. * @lucene.internal */ public final class RocanaFieldReader extends Terms implements Accountable { // private final boolean DEBUG = BlockTreeTermsWriter.DEBUG; private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(RocanaFieldReader.class) + 3 * RamUsageEstimator.shallowSizeOfInstance(BytesRef.class); final long numTerms; final FieldInfo fieldInfo; final long sumTotalTermFreq; final long sumDocFreq; final int docCount; final long indexStartFP; final long rootBlockFP; final BytesRef rootCode; final BytesRef minTerm; final BytesRef maxTerm; final int longsSize; final RocanaBlockTreeTermsReader parent; final FST<BytesRef> index; //private boolean DEBUG; RocanaFieldReader(RocanaBlockTreeTermsReader parent, FieldInfo fieldInfo, long numTerms, BytesRef rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount, long indexStartFP, int longsSize, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException { assert numTerms > 0; this.fieldInfo = fieldInfo; //DEBUG = RocanaBlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id"); this.parent = parent; this.numTerms = numTerms; this.sumTotalTermFreq = sumTotalTermFreq; this.sumDocFreq = sumDocFreq; this.docCount = docCount; this.indexStartFP = indexStartFP; this.rootCode = rootCode; this.longsSize = longsSize; this.minTerm = minTerm; this.maxTerm = maxTerm; // if (DEBUG) { // System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode=" + rootCode + " divisor=" + indexDivisor); // } rootBlockFP = (new ByteArrayDataInput(rootCode.bytes, rootCode.offset, rootCode.length)) .readVLong() >>> RocanaBlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS; if (indexIn != null) { final IndexInput clone = indexIn.clone(); //System.out.println("start=" + indexStartFP + " field=" + fieldInfo.name); clone.seek(indexStartFP); index = new FST<>(clone, ByteSequenceOutputs.getSingleton()); /* if (false) { final String dotFileName = segment + "_" + fieldInfo.name + ".dot"; Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName)); Util.toDot(index, w, false, false); System.out.println("FST INDEX: SAVED to " + dotFileName); w.close(); } */ } else { index = null; } } @Override public BytesRef getMin() throws IOException { if (minTerm == null) { // Older index that didn't store min/maxTerm return super.getMin(); } else { return minTerm; } } @Override public BytesRef getMax() throws IOException { if (maxTerm == null) { // Older index that didn't store min/maxTerm return super.getMax(); } else { return maxTerm; } } /** For debugging -- used by CheckIndex too*/ @Override public RocanaStats getStats() throws IOException { // TODO: add auto-prefix terms into stats return new RocanaSegmentTermsEnum(this).computeBlockStats(); } @Override public boolean hasFreqs() { return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; } @Override public boolean hasOffsets() { return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; } @Override public boolean hasPositions() { return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; } @Override public boolean hasPayloads() { return fieldInfo.hasPayloads(); } @Override public TermsEnum iterator() throws IOException { return new RocanaSegmentTermsEnum(this); } @Override public long size() { return numTerms; } @Override public long getSumTotalTermFreq() { return sumTotalTermFreq; } @Override public long getSumDocFreq() { return sumDocFreq; } @Override public int getDocCount() { return docCount; } @Override public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException { // if (DEBUG) System.out.println(" RocanaFieldReader.intersect startTerm=" + BlockTreeTermsWriter.brToString(startTerm)); //System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton); // TODO: we could push "it's a range" or "it's a prefix" down into RocanaIntersectTermsEnum? // can we optimize knowing that...? return new RocanaIntersectTermsEnum(this, compiled.automaton, compiled.runAutomaton, compiled.commonSuffixRef, startTerm, compiled.sinkState); } @Override public long ramBytesUsed() { return BASE_RAM_BYTES_USED + ((index != null) ? index.ramBytesUsed() : 0); } @Override public Collection<Accountable> getChildResources() { if (index == null) { return Collections.emptyList(); } else { return Collections.singleton(Accountables.namedAccountable("term index", index)); } } @Override public String toString() { return "BlockTreeTerms(terms=" + numTerms + ",postings=" + sumDocFreq + ",positions=" + sumTotalTermFreq + ",docs=" + docCount + ")"; } }