Java tutorial
/** * This software is licensed to you under the Apache License, Version 2.0 (the * "Apache License"). * * LinkedIn's contributions are made under the Apache License. If you contribute * to the Software, the contributions will be deemed to have been made under the * Apache License, unless you expressly indicate otherwise. Please do not make any * contributions that would be inconsistent with the Apache License. * * You may obtain a copy of the Apache License at http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, this software * distributed under the Apache License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the Apache * License for the specific language governing permissions and limitations for the * software governed under the Apache License. * * 2012 LinkedIn Corp. All Rights Reserved. */ package com.browseengine.bobo.facets.data; import it.unimi.dsi.fastutil.ints.IntArrayList; import java.io.IOException; import org.apache.log4j.Logger; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.TermEnum; import org.apache.lucene.index.TermPositions; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.util.OpenBitSet; import com.browseengine.bobo.api.BoboIndexReader; import com.browseengine.bobo.api.BoboIndexReader.WorkArea; import com.browseengine.bobo.facets.range.MultiDataCacheBuilder; import com.browseengine.bobo.sort.DocComparator; import com.browseengine.bobo.sort.DocComparatorSource; import com.browseengine.bobo.util.BigIntBuffer; import com.browseengine.bobo.util.BigNestedIntArray; import com.browseengine.bobo.util.BigNestedIntArray.BufferedLoader; import com.browseengine.bobo.util.BigNestedIntArray.Loader; import com.browseengine.bobo.util.StringArrayComparator; public class MultiValueWithWeightFacetDataCache<T> extends MultiValueFacetDataCache<T> { private static final long serialVersionUID = 1L; public final BigNestedIntArray _weightArray; public MultiValueWithWeightFacetDataCache() { super(); _weightArray = new BigNestedIntArray(); } /** * loads multi-value facet data. This method uses a workarea to prepare loading. * @param fieldName * @param reader * @param listFactory * @param workArea * @throws IOException */ public void load(String fieldName, IndexReader reader, TermListFactory<T> listFactory, WorkArea workArea) throws IOException { long t0 = System.currentTimeMillis(); int maxdoc = reader.maxDoc(); BufferedLoader loader = getBufferedLoader(maxdoc, workArea); BufferedLoader weightLoader = getBufferedLoader(maxdoc, null); TermEnum tenum = null; TermDocs tdoc = null; TermValueList<T> list = (listFactory == null ? (TermValueList<T>) new TermStringList() : listFactory.createTermList()); IntArrayList minIDList = new IntArrayList(); IntArrayList maxIDList = new IntArrayList(); IntArrayList freqList = new IntArrayList(); OpenBitSet bitset = new OpenBitSet(maxdoc + 1); int negativeValueCount = getNegativeValueCount(reader, fieldName.intern()); int t = 0; // current term number list.add(null); minIDList.add(-1); maxIDList.add(-1); freqList.add(0); t++; _overflow = false; String pre = null; int df = 0; int minID = -1; int maxID = -1; int valId = 0; try { tdoc = reader.termDocs(); tenum = reader.terms(new Term(fieldName, "")); if (tenum != null) { do { Term term = tenum.term(); if (term == null || !fieldName.equals(term.field())) break; String val = term.text(); if (val != null) { int weight = 0; String[] split = val.split("\u0000"); if (split.length > 1) { val = split[0]; weight = Integer.parseInt(split[split.length - 1]); } if (pre == null || !val.equals(pre)) { if (pre != null) { freqList.add(df); minIDList.add(minID); maxIDList.add(maxID); } list.add(val); df = 0; minID = -1; maxID = -1; valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; t++; } tdoc.seek(tenum); if (tdoc.next()) { df++; int docid = tdoc.doc(); if (!loader.add(docid, valId)) logOverflow(fieldName); else weightLoader.add(docid, weight); if (docid < minID) minID = docid; bitset.fastSet(docid); while (tdoc.next()) { df++; docid = tdoc.doc(); if (!loader.add(docid, valId)) logOverflow(fieldName); else weightLoader.add(docid, weight); bitset.fastSet(docid); } if (docid > maxID) maxID = docid; } pre = val; } } while (tenum.next()); if (pre != null) { freqList.add(df); minIDList.add(minID); maxIDList.add(maxID); } } } finally { try { if (tdoc != null) { tdoc.close(); } } finally { if (tenum != null) { tenum.close(); } } } list.seal(); try { _nestedArray.load(maxdoc + 1, loader); _weightArray.load(maxdoc + 1, weightLoader); } catch (IOException e) { throw e; } catch (Exception e) { throw new RuntimeException("failed to load due to " + e.toString(), e); } this.valArray = list; this.freqs = freqList.toIntArray(); this.minIDs = minIDList.toIntArray(); this.maxIDs = maxIDList.toIntArray(); int doc = 0; while (doc <= maxdoc && !_nestedArray.contains(doc, 0, true)) { ++doc; } if (doc <= maxdoc) { this.minIDs[0] = doc; doc = maxdoc; while (doc > 0 && !_nestedArray.contains(doc, 0, true)) { --doc; } if (doc > 0) { this.maxIDs[0] = doc; } } this.freqs[0] = maxdoc + 1 - (int) bitset.cardinality(); } }