List of usage examples for org.apache.lucene.util PriorityQueue size
int size
To view the source code for org.apache.lucene.util PriorityQueue size.
Click Source Link
From source file:io.ssc.relationdiscovery.KMeans.java
License:Open Source License
public void printClosestPoints(int centroidIndex, int howMany, OpenIntObjectHashMap<String> patterns) { PriorityQueue<PatternWithDistance> queue = new PriorityQueue<PatternWithDistance>(howMany) { @Override//from w w w . j av a2 s .c o m protected boolean lessThan(PatternWithDistance a, PatternWithDistance b) { return a.distance < b.distance; } }; Vector centroid = centroids[centroidIndex]; for (MatrixSlice rowSlice : A) { Vector row = rowSlice.vector(); double distance = distanceMeasure.distance(centroid, row); queue.insertWithOverflow(new PatternWithDistance(distance, patterns.get(rowSlice.index()))); } while (queue.size() > 0) { System.out.println("\t" + queue.pop()); } }
From source file:net.dataninja.ee.textEngine.facet.GroupCounts.java
License:Open Source License
/** Construct the array of doc hits for the hit group. */ private void buildDocHits(int group, ResultGroup resultGroup) { PriorityQueue queue = hitQueue[group]; int nFound = queue.size(); DocHitImpl[] hitArray = new DocHitImpl[nFound]; for (int i = 0; i < nFound; i++) { int index = nFound - i - 1; hitArray[index] = (DocHitImpl) queue.pop(); }/*from ww w . ja v a 2 s . co m*/ int start = startDoc[group]; int max = maxDocs[group]; int nHits = Math.max(0, Math.min(nFound - start, max)); resultGroup.docHits = new DocHit[nHits]; resultGroup.totalDocs = nDocHits(group); resultGroup.startDoc = start; resultGroup.endDoc = start + nHits; for (int i = startDoc[group]; i < nFound; i++) resultGroup.docHits[i - start] = hitArray[i]; }
From source file:net.dataninja.ee.textEngine.MoreLikeThisQuery.java
License:Apache License
/** * Create the More like query from a PriorityQueue */// w w w . ja va2 s. c o m private Query createQuery(IndexReader indexReader, PriorityQueue q) throws IOException { // Pop everything from the queue. QueryWord[] queryWords = new QueryWord[q.size()]; for (int i = q.size() - 1; i >= 0; i--) queryWords[i] = (QueryWord) q.pop(); BooleanQuery query = new BooleanQuery(true /*disable coord*/); // At the moment, there's no need to scale by the best score. It simply // clouds the query explanation. It doesn't affect the scores, since // Lucene applies a query normalization factor anyway. // //float bestScore = (queryWords.length > 0) ? queryWords[0].score : 0.0f; for (int i = 0; i < fieldNames.length; i++) { ArrayList fieldClauses = new ArrayList(); for (int j = 0; j < queryWords.length; j++) { QueryWord qw = queryWords[j]; Term term = new Term(fieldNames[i], qw.word); // Skip words not present in this field. int docFreq = indexReader.docFreq(term); if (docFreq == 0) continue; // Add it to the query. SpanTermQuery tq = new SpanTermQuery(term); if (boost) tq.setBoost(qw.score); fieldClauses.add(tq); } // for j // If no terms for this field, skip it. if (fieldClauses.isEmpty()) continue; SpanQuery[] clauses = (SpanQuery[]) fieldClauses.toArray(new SpanQuery[fieldClauses.size()]); // Now make a special Or-Near query out of the clauses. SpanOrNearQuery fieldQuery = new SpanOrNearQuery(clauses, 10, false); // Boost if necessary. if (fieldBoosts != null) fieldQuery.setBoost(fieldBoosts[i]); // We currently don't support more-like-this queries on the full text. // It would involve de-chunking, and also fancier logic to pick the // "most interesting" terms in the first place. // if (fieldNames[i].equals("text")) throw new RuntimeException("MoreLikeThisQuery does not support 'text' field."); // And add to the main query. query.add(fieldQuery, BooleanClause.Occur.SHOULD); } // for i // All done. return query; }
From source file:org.apache.jackrabbit.core.query.lucene.WeightedHighlighter.java
License:Apache License
protected String mergeFragments(TermVectorOffsetInfo[] offsets, String text, String excerptStart, String excerptEnd, String fragmentStart, String fragmentEnd, String hlStart, String hlEnd, int maxFragments, int surround) { if (offsets == null || offsets.length == 0) { // nothing to highlight StringBuffer excerpt = new StringBuffer(excerptStart); excerpt.append(fragmentStart);// w w w . ja v a2 s.co m int min = excerpt.length(); excerpt.append(text.substring(0, Math.min(text.length(), surround * 2))); if (text.length() > excerpt.length()) { for (int i = excerpt.length() - 1; i > min; i--) { if (Character.isWhitespace(excerpt.charAt(i))) { excerpt.delete(i, excerpt.length()); excerpt.append(" ..."); break; } } } excerpt.append(fragmentEnd).append(excerptEnd); return excerpt.toString(); } PriorityQueue bestFragments = new FragmentInfoPriorityQueue(maxFragments); for (int i = 0; i < offsets.length; i++) { FragmentInfo fi = new FragmentInfo(offsets[i], surround * 2); for (int j = i + 1; j < offsets.length; j++) { if (!fi.add(offsets[j], text)) { break; } } bestFragments.insert(fi); } // retrieve fragment infos from queue and fill into list, least // fragment comes out first List infos = new LinkedList(); while (bestFragments.size() > 0) { FragmentInfo fi = (FragmentInfo) bestFragments.pop(); infos.add(0, fi); } Map offsetInfos = new IdentityHashMap(); // remove overlapping fragment infos for (Iterator it = infos.iterator(); it.hasNext();) { FragmentInfo fi = (FragmentInfo) it.next(); boolean overlap = false; for (Iterator fit = fi.iterator(); fit.hasNext() && !overlap;) { TermVectorOffsetInfo oi = (TermVectorOffsetInfo) fit.next(); if (offsetInfos.containsKey(oi)) { overlap = true; } } if (overlap) { it.remove(); } else { for (Iterator oit = fi.iterator(); oit.hasNext();) { offsetInfos.put(oit.next(), null); } } } // create excerpts StringBuffer sb = new StringBuffer(excerptStart); for (Iterator it = infos.iterator(); it.hasNext();) { FragmentInfo fi = (FragmentInfo) it.next(); sb.append(fragmentStart); int limit = Math.max(0, fi.getStartOffset() / 2 + fi.getEndOffset() / 2 - surround); int len = startFragment(sb, text, fi.getStartOffset(), limit); TermVectorOffsetInfo lastOffsetInfo = null; for (Iterator fIt = fi.iterator(); fIt.hasNext();) { TermVectorOffsetInfo oi = (TermVectorOffsetInfo) fIt.next(); if (lastOffsetInfo != null) { // fill in text between terms sb.append(text.substring(lastOffsetInfo.getEndOffset(), oi.getStartOffset())); } sb.append(hlStart); sb.append(text.substring(oi.getStartOffset(), oi.getEndOffset())); sb.append(hlEnd); lastOffsetInfo = oi; } limit = Math.min(text.length(), fi.getStartOffset() - len + (surround * 2)); endFragment(sb, text, fi.getEndOffset(), limit); sb.append(fragmentEnd); } sb.append(excerptEnd); return sb.toString(); }
From source file:org.apache.jackrabbit.core.query.lucene.WeightedHighlighter.java
License:Apache License
@Override protected String mergeFragments(TermVectorOffsetInfo[] offsets, String text, String excerptStart, String excerptEnd, String fragmentStart, String fragmentEnd, String hlStart, String hlEnd, int maxFragments, int surround) throws IOException { if (offsets == null || offsets.length == 0) { // nothing to highlight return createDefaultExcerpt(text, excerptStart, excerptEnd, fragmentStart, fragmentEnd, surround * 2); }/*from w ww . j av a2s.c o m*/ PriorityQueue<FragmentInfo> bestFragments = new FragmentInfoPriorityQueue(maxFragments); for (int i = 0; i < offsets.length; i++) { if (offsets[i].getEndOffset() <= text.length()) { FragmentInfo fi = new FragmentInfo(offsets[i], surround * 2); for (int j = i + 1; j < offsets.length; j++) { if (offsets[j].getEndOffset() > text.length()) { break; } if (!fi.add(offsets[j], text)) { break; } } bestFragments.insertWithOverflow(fi); } } if (bestFragments.size() == 0) { return createDefaultExcerpt(text, excerptStart, excerptEnd, fragmentStart, fragmentEnd, surround * 2); } // retrieve fragment infos from queue and fill into list, least // fragment comes out first List<FragmentInfo> infos = new LinkedList<FragmentInfo>(); while (bestFragments.size() > 0) { FragmentInfo fi = (FragmentInfo) bestFragments.pop(); infos.add(0, fi); } Map<TermVectorOffsetInfo, Object> offsetInfos = new IdentityHashMap<TermVectorOffsetInfo, Object>(); // remove overlapping fragment infos Iterator<FragmentInfo> it = infos.iterator(); while (it.hasNext()) { FragmentInfo fi = it.next(); boolean overlap = false; Iterator<TermVectorOffsetInfo> fit = fi.iterator(); while (fit.hasNext() && !overlap) { TermVectorOffsetInfo oi = fit.next(); if (offsetInfos.containsKey(oi)) { overlap = true; } } if (overlap) { it.remove(); } else { Iterator<TermVectorOffsetInfo> oit = fi.iterator(); while (oit.hasNext()) { offsetInfos.put(oit.next(), null); } } } // create excerpts StringBuffer sb = new StringBuffer(excerptStart); it = infos.iterator(); while (it.hasNext()) { FragmentInfo fi = it.next(); sb.append(fragmentStart); int limit = Math.max(0, fi.getStartOffset() / 2 + fi.getEndOffset() / 2 - surround); int len = startFragment(sb, text, fi.getStartOffset(), limit); TermVectorOffsetInfo lastOffsetInfo = null; Iterator<TermVectorOffsetInfo> fIt = fi.iterator(); while (fIt.hasNext()) { TermVectorOffsetInfo oi = fIt.next(); if (lastOffsetInfo != null) { // fill in text between terms sb.append(escape(text.substring(lastOffsetInfo.getEndOffset(), oi.getStartOffset()))); } sb.append(hlStart); sb.append(escape(text.substring(oi.getStartOffset(), oi.getEndOffset()))); sb.append(hlEnd); lastOffsetInfo = oi; } limit = Math.min(text.length(), fi.getStartOffset() - len + (surround * 2)); endFragment(sb, text, fi.getEndOffset(), limit); sb.append(fragmentEnd); } sb.append(excerptEnd); return sb.toString(); }
From source file:org.apache.mahout.math.neighborhood.LocalitySensitiveHashSearch.java
License:Apache License
private PriorityQueue<WeightedThing<Vector>> searchInternal(Vector query) { long queryHash = HashedVector.computeHash64(query, projection); // We keep an approximation of the closest vectors here. PriorityQueue<WeightedThing<Vector>> top = Searcher.getCandidateQueue(getSearchSize()); // We scan the vectors using bit counts as an approximation of the dot product so we can do as few // full distance computations as possible. Our goal is to only do full distance computations for // vectors with hash distance at most as large as the searchSize biggest hash distance seen so far. OnlineSummarizer[] distribution = new OnlineSummarizer[BITS + 1]; for (int i = 0; i < BITS + 1; i++) { distribution[i] = new OnlineSummarizer(); }// w w w . jav a 2 s. c o m distanceEvaluations = 0; // We keep the counts of the hash distances here. This lets us accurately // judge what hash distance cutoff we should use. int[] hashCounts = new int[BITS + 1]; // Maximum number of different bits to still consider a vector a candidate for nearest neighbor. // Starts at the maximum number of bits, but decreases and can increase. int hashLimit = BITS; int limitCount = 0; double distanceLimit = Double.POSITIVE_INFINITY; // In this loop, we have the invariants that: // // limitCount = sum_{i<hashLimit} hashCount[i] // and // limitCount >= searchSize && limitCount - hashCount[hashLimit-1] < searchSize for (HashedVector vector : trainingVectors) { // This computes the Hamming Distance between the vector's hash and the query's hash. // The result is correlated with the angle between the vectors. int bitDot = vector.hammingDistance(queryHash); if (bitDot <= hashLimit) { distanceEvaluations++; double distance = distanceMeasure.distance(query, vector); distribution[bitDot].add(distance); if (distance < distanceLimit) { top.insertWithOverflow(new WeightedThing<Vector>(vector, distance)); if (top.size() == searchSize) { distanceLimit = top.top().getWeight(); } hashCounts[bitDot]++; limitCount++; while (hashLimit > 0 && limitCount - hashCounts[hashLimit - 1] > searchSize) { hashLimit--; limitCount -= hashCounts[hashLimit]; } if (hashLimitStrategy >= 0) { while (hashLimit < MAX_HASH_LIMIT && distribution[hashLimit].getCount() > MIN_DISTRIBUTION_COUNT && ((1 - hashLimitStrategy) * distribution[hashLimit].getQuartile(0) + hashLimitStrategy * distribution[hashLimit].getQuartile(1)) < distanceLimit) { limitCount += hashCounts[hashLimit]; hashLimit++; } } } } } return top; }
From source file:org.apache.mahout.math.neighborhood.LocalitySensitiveHashSearch.java
License:Apache License
@Override public List<WeightedThing<Vector>> search(Vector query, int limit) { PriorityQueue<WeightedThing<Vector>> top = searchInternal(query); List<WeightedThing<Vector>> results = Lists.newArrayListWithExpectedSize(top.size()); while (top.size() != 0) { WeightedThing<Vector> wv = top.pop(); results.add(new WeightedThing<Vector>(((HashedVector) wv.getValue()).getVector(), wv.getWeight())); }//from ww w . j a v a2s.c o m Collections.reverse(results); if (limit < results.size()) { results = results.subList(0, limit); } return results; }
From source file:org.apache.mahout.math.neighborhood.LocalitySensitiveHashSearch.java
License:Apache License
/** * Returns the closest vector to the query. * When only one the nearest vector is needed, use this method, NOT search(query, limit) because * it's faster (less overhead)./* w w w . j a v a2s .co m*/ * This is nearly the same as search(). * * @param query the vector to search for * @param differentThanQuery if true, returns the closest vector different than the query (this * only matters if the query is among the searched vectors), otherwise, * returns the closest vector to the query (even the same vector). * @return the weighted vector closest to the query */ @Override public WeightedThing<Vector> searchFirst(Vector query, boolean differentThanQuery) { // We get the top searchSize neighbors. PriorityQueue<WeightedThing<Vector>> top = searchInternal(query); // We then cut the number down to just the best 2. while (top.size() > 2) { top.pop(); } // If there are fewer than 2 results, we just return the one we have. if (top.size() < 2) { return removeHash(top.pop()); } // There are exactly 2 results. WeightedThing<Vector> secondBest = top.pop(); WeightedThing<Vector> best = top.pop(); // If the best result is the same as the query, but we don't want to return the query. if (differentThanQuery && best.getValue().equals(query)) { best = secondBest; } return removeHash(best); }
From source file:org.apache.solr.request.NumericFacets.java
License:Apache License
public static NamedList<Integer> getCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort) throws IOException { final boolean zeros = mincount <= 0; mincount = Math.max(mincount, 1); final SchemaField sf = searcher.getSchema().getField(fieldName); final FieldType ft = sf.getType(); final NumericType numericType = ft.getNumericType(); if (numericType == null) { throw new IllegalStateException(); }/*from ww w .j a v a2 s . co m*/ final List<AtomicReaderContext> leaves = searcher.getIndexReader().leaves(); // 1. accumulate final HashTable hashTable = new HashTable(); final Iterator<AtomicReaderContext> ctxIt = leaves.iterator(); AtomicReaderContext ctx = null; FieldCache.Longs longs = null; Bits docsWithField = null; int missingCount = 0; for (DocIterator docsIt = docs.iterator(); docsIt.hasNext();) { final int doc = docsIt.nextDoc(); if (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc()) { do { ctx = ctxIt.next(); } while (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc()); assert doc >= ctx.docBase; switch (numericType) { case LONG: longs = FieldCache.DEFAULT.getLongs(ctx.reader(), fieldName, true); break; case INT: final FieldCache.Ints ints = FieldCache.DEFAULT.getInts(ctx.reader(), fieldName, true); longs = new FieldCache.Longs() { @Override public long get(int docID) { return ints.get(docID); } }; break; case FLOAT: final FieldCache.Floats floats = FieldCache.DEFAULT.getFloats(ctx.reader(), fieldName, true); longs = new FieldCache.Longs() { @Override public long get(int docID) { return NumericUtils.floatToSortableInt(floats.get(docID)); } }; break; case DOUBLE: final FieldCache.Doubles doubles = FieldCache.DEFAULT.getDoubles(ctx.reader(), fieldName, true); longs = new FieldCache.Longs() { @Override public long get(int docID) { return NumericUtils.doubleToSortableLong(doubles.get(docID)); } }; break; default: throw new AssertionError(); } docsWithField = FieldCache.DEFAULT.getDocsWithField(ctx.reader(), fieldName); } long v = longs.get(doc - ctx.docBase); if (v != 0 || docsWithField.get(doc - ctx.docBase)) { hashTable.add(doc, v, 1); } else { ++missingCount; } } // 2. select top-k facet values final int pqSize = limit < 0 ? hashTable.size : Math.min(offset + limit, hashTable.size); final PriorityQueue<Entry> pq; if (FacetParams.FACET_SORT_COUNT.equals(sort) || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) { pq = new PriorityQueue<Entry>(pqSize) { @Override protected boolean lessThan(Entry a, Entry b) { if (a.count < b.count || (a.count == b.count && a.bits > b.bits)) { return true; } else { return false; } } }; } else { pq = new PriorityQueue<Entry>(pqSize) { @Override protected boolean lessThan(Entry a, Entry b) { return a.bits > b.bits; } }; } Entry e = null; for (int i = 0; i < hashTable.bits.length; ++i) { if (hashTable.counts[i] >= mincount) { if (e == null) { e = new Entry(); } e.bits = hashTable.bits[i]; e.count = hashTable.counts[i]; e.docID = hashTable.docIDs[i]; e = pq.insertWithOverflow(e); } } // 4. build the NamedList final ValueSource vs = ft.getValueSource(sf, null); final NamedList<Integer> result = new NamedList<Integer>(); // This stuff is complicated because if facet.mincount=0, the counts needs // to be merged with terms from the terms dict if (!zeros || FacetParams.FACET_SORT_COUNT.equals(sort) || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) { // Only keep items we're interested in final Deque<Entry> counts = new ArrayDeque<Entry>(); while (pq.size() > offset) { counts.addFirst(pq.pop()); } // Entries from the PQ first, then using the terms dictionary for (Entry entry : counts) { final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves); final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx)); result.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count); } if (zeros && (limit < 0 || result.size() < limit)) { // need to merge with the term dict if (!sf.indexed()) { throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on field " + sf.getName() + " which is not indexed"); } // Add zeros until there are limit results final Set<String> alreadySeen = new HashSet<String>(); while (pq.size() > 0) { Entry entry = pq.pop(); final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves); final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx)); alreadySeen.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase)); } for (int i = 0; i < result.size(); ++i) { alreadySeen.add(result.getName(i)); } final Terms terms = searcher.getAtomicReader().terms(fieldName); if (terms != null) { final String prefixStr = TrieField.getMainValuePrefix(ft); final BytesRef prefix; if (prefixStr != null) { prefix = new BytesRef(prefixStr); } else { prefix = new BytesRef(); } final TermsEnum termsEnum = terms.iterator(null); BytesRef term; switch (termsEnum.seekCeil(prefix)) { case FOUND: case NOT_FOUND: term = termsEnum.term(); break; case END: term = null; break; default: throw new AssertionError(); } final CharsRef spare = new CharsRef(); for (int skipped = hashTable.size; skipped < offset && term != null && StringHelper.startsWith(term, prefix);) { ft.indexedToReadable(term, spare); final String termStr = spare.toString(); if (!alreadySeen.contains(termStr)) { ++skipped; } term = termsEnum.next(); } for (; term != null && StringHelper.startsWith(term, prefix) && (limit < 0 || result.size() < limit); term = termsEnum.next()) { ft.indexedToReadable(term, spare); final String termStr = spare.toString(); if (!alreadySeen.contains(termStr)) { result.add(termStr, 0); } } } } } else { // sort=index, mincount=0 and we have less than limit items // => Merge the PQ and the terms dictionary on the fly if (!sf.indexed()) { throw new IllegalStateException("Cannot use " + FacetParams.FACET_SORT + "=" + FacetParams.FACET_SORT_INDEX + " on a field which is not indexed"); } final Map<String, Integer> counts = new HashMap<String, Integer>(); while (pq.size() > 0) { final Entry entry = pq.pop(); final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves); final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx)); counts.put(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count); } final Terms terms = searcher.getAtomicReader().terms(fieldName); if (terms != null) { final String prefixStr = TrieField.getMainValuePrefix(ft); final BytesRef prefix; if (prefixStr != null) { prefix = new BytesRef(prefixStr); } else { prefix = new BytesRef(); } final TermsEnum termsEnum = terms.iterator(null); BytesRef term; switch (termsEnum.seekCeil(prefix)) { case FOUND: case NOT_FOUND: term = termsEnum.term(); break; case END: term = null; break; default: throw new AssertionError(); } final CharsRef spare = new CharsRef(); for (int i = 0; i < offset && term != null && StringHelper.startsWith(term, prefix); ++i) { term = termsEnum.next(); } for (; term != null && StringHelper.startsWith(term, prefix) && (limit < 0 || result.size() < limit); term = termsEnum.next()) { ft.indexedToReadable(term, spare); final String termStr = spare.toString(); Integer count = counts.get(termStr); if (count == null) { count = 0; } result.add(termStr, count); } } } if (missing) { result.add(null, missingCount); } return result; }
From source file:org.apache.solr.request.PerSegmentSingleValuedFaceting.java
License:Apache License
NamedList<Integer> getFacetCounts(Executor executor) throws IOException { CompletionService<SegFacet> completionService = new ExecutorCompletionService<SegFacet>(executor); // reuse the translation logic to go from top level set to per-segment set baseSet = docs.getTopFilter();/*from www . j a v a 2 s . c o m*/ final List<AtomicReaderContext> leaves = searcher.getTopReaderContext().leaves(); // The list of pending tasks that aren't immediately submitted // TODO: Is there a completion service, or a delegating executor that can // limit the number of concurrent tasks submitted to a bigger executor? LinkedList<Callable<SegFacet>> pending = new LinkedList<Callable<SegFacet>>(); int threads = nThreads <= 0 ? Integer.MAX_VALUE : nThreads; for (final AtomicReaderContext leave : leaves) { final SegFacet segFacet = new SegFacet(leave); Callable<SegFacet> task = new Callable<SegFacet>() { @Override public SegFacet call() throws Exception { segFacet.countTerms(); return segFacet; } }; // TODO: if limiting threads, submit by largest segment first? if (--threads >= 0) { completionService.submit(task); } else { pending.add(task); } } // now merge the per-segment results PriorityQueue<SegFacet> queue = new PriorityQueue<SegFacet>(leaves.size()) { @Override protected boolean lessThan(SegFacet a, SegFacet b) { return a.tempBR.compareTo(b.tempBR) < 0; } }; boolean hasMissingCount = false; int missingCount = 0; for (int i = 0, c = leaves.size(); i < c; i++) { SegFacet seg = null; try { Future<SegFacet> future = completionService.take(); seg = future.get(); if (!pending.isEmpty()) { completionService.submit(pending.removeFirst()); } } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); } catch (ExecutionException e) { Throwable cause = e.getCause(); if (cause instanceof RuntimeException) { throw (RuntimeException) cause; } else { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error in per-segment faceting on field: " + fieldName, cause); } } if (seg.startTermIndex < seg.endTermIndex) { if (seg.startTermIndex == -1) { hasMissingCount = true; missingCount += seg.counts[0]; seg.pos = 0; } else { seg.pos = seg.startTermIndex; } if (seg.pos < seg.endTermIndex) { seg.tenum = seg.si.termsEnum(); seg.tenum.seekExact(seg.pos); seg.tempBR = seg.tenum.term(); queue.add(seg); } } } FacetCollector collector; if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) { collector = new CountSortedFacetCollector(offset, limit, mincount); } else { collector = new IndexSortedFacetCollector(offset, limit, mincount); } BytesRef val = new BytesRef(); while (queue.size() > 0) { SegFacet seg = queue.top(); // we will normally end up advancing the term enum for this segment // while still using "val", so we need to make a copy since the BytesRef // may be shared across calls. val.copyBytes(seg.tempBR); int count = 0; do { count += seg.counts[seg.pos - seg.startTermIndex]; // TODO: OPTIMIZATION... // if mincount>0 then seg.pos++ can skip ahead to the next non-zero entry. seg.pos++; if (seg.pos >= seg.endTermIndex) { queue.pop(); seg = queue.top(); } else { seg.tempBR = seg.tenum.next(); seg = queue.updateTop(); } } while (seg != null && val.compareTo(seg.tempBR) == 0); boolean stop = collector.collect(val, count); if (stop) break; } NamedList<Integer> res = collector.getFacetCounts(); // convert labels to readable form FieldType ft = searcher.getSchema().getFieldType(fieldName); int sz = res.size(); for (int i = 0; i < sz; i++) { res.setName(i, ft.indexedToReadable(res.getName(i))); } if (missing) { if (!hasMissingCount) { missingCount = SimpleFacets.getFieldMissingCount(searcher, docs, fieldName); } res.add(null, missingCount); } return res; }