List of usage examples for org.apache.lucene.util PriorityQueue add
public final T add(T element)
From source file:com.browseengine.bobo.facets.CombinedFacetAccessible.java
License:Apache License
public List<BrowseFacet> getFacets() { if (_closed) { throw new IllegalStateException("This instance of count collector was already closed"); }// w w w . jav a2 s .c om int maxCnt = _fspec.getMaxCount(); if (maxCnt <= 0) maxCnt = Integer.MAX_VALUE; int minHits = _fspec.getMinHitCount(); LinkedList<BrowseFacet> list = new LinkedList<BrowseFacet>(); int cnt = 0; Comparable facet = null; FacetIterator iter = (FacetIterator) this.iterator(); Comparator<BrowseFacet> comparator; if (FacetSortSpec.OrderValueAsc.equals(_fspec.getOrderBy())) { while ((facet = iter.next(minHits)) != null) { // find the next facet whose combined hit count obeys minHits list.add(new BrowseFacet(String.valueOf(facet), iter.count)); if (++cnt >= maxCnt) break; } } else if (FacetSortSpec.OrderHitsDesc.equals(_fspec.getOrderBy())) { comparator = new Comparator<BrowseFacet>() { public int compare(BrowseFacet f1, BrowseFacet f2) { int val = f2.getHitCount() - f1.getHitCount(); if (val == 0) { val = (f1.getValue().compareTo(f2.getValue())); } return val; } }; if (maxCnt != Integer.MAX_VALUE) { // we will maintain a min heap of size maxCnt // Order by hits in descending order and max count is supplied PriorityQueue queue = createPQ(maxCnt, comparator); int qsize = 0; while ((qsize < maxCnt) && ((facet = iter.next(minHits)) != null)) { queue.add(new BrowseFacet(String.valueOf(facet), iter.count)); qsize++; } if (facet != null) { BrowseFacet rootFacet = (BrowseFacet) queue.top(); minHits = rootFacet.getHitCount() + 1; // facet count less than top of min heap, it will never be added while (((facet = iter.next(minHits)) != null)) { rootFacet.setValue(String.valueOf(facet)); rootFacet.setHitCount(iter.count); rootFacet = (BrowseFacet) queue.updateTop(); minHits = rootFacet.getHitCount() + 1; } } // at this point, queue contains top maxCnt facets that have hitcount >= minHits while (qsize-- > 0) { // append each entry to the beginning of the facet list to order facets by hits descending list.addFirst((BrowseFacet) queue.pop()); } } else { // no maxCnt specified. So fetch all facets according to minHits and sort them later while ((facet = iter.next(minHits)) != null) list.add(new BrowseFacet(String.valueOf(facet), iter.count)); Collections.sort(list, comparator); } } else // FacetSortSpec.OrderByCustom.equals(_fspec.getOrderBy() { comparator = _fspec.getCustomComparatorFactory().newComparator(); if (maxCnt != Integer.MAX_VALUE) { PriorityQueue queue = createPQ(maxCnt, comparator); BrowseFacet browseFacet = new BrowseFacet(); int qsize = 0; while ((qsize < maxCnt) && ((facet = iter.next(minHits)) != null)) { queue.add(new BrowseFacet(String.valueOf(facet), iter.count)); qsize++; } if (facet != null) { while ((facet = iter.next(minHits)) != null) { // check with the top of min heap browseFacet.setHitCount(iter.count); browseFacet.setValue(String.valueOf(facet)); browseFacet = (BrowseFacet) queue.insertWithOverflow(browseFacet); } } // remove from queue and add to the list while (qsize-- > 0) list.addFirst((BrowseFacet) queue.pop()); } else { // order by custom but no max count supplied while ((facet = iter.next(minHits)) != null) list.add(new BrowseFacet(String.valueOf(facet), iter.count)); Collections.sort(list, comparator); } } return list; }
From source file:org.apache.solr.request.PerSegmentSingleValuedFaceting.java
License:Apache License
NamedList<Integer> getFacetCounts(Executor executor) throws IOException { CompletionService<SegFacet> completionService = new ExecutorCompletionService<SegFacet>(executor); // reuse the translation logic to go from top level set to per-segment set baseSet = docs.getTopFilter();//from w ww. j a v a 2 s. c o m final List<AtomicReaderContext> leaves = searcher.getTopReaderContext().leaves(); // The list of pending tasks that aren't immediately submitted // TODO: Is there a completion service, or a delegating executor that can // limit the number of concurrent tasks submitted to a bigger executor? LinkedList<Callable<SegFacet>> pending = new LinkedList<Callable<SegFacet>>(); int threads = nThreads <= 0 ? Integer.MAX_VALUE : nThreads; for (final AtomicReaderContext leave : leaves) { final SegFacet segFacet = new SegFacet(leave); Callable<SegFacet> task = new Callable<SegFacet>() { @Override public SegFacet call() throws Exception { segFacet.countTerms(); return segFacet; } }; // TODO: if limiting threads, submit by largest segment first? if (--threads >= 0) { completionService.submit(task); } else { pending.add(task); } } // now merge the per-segment results PriorityQueue<SegFacet> queue = new PriorityQueue<SegFacet>(leaves.size()) { @Override protected boolean lessThan(SegFacet a, SegFacet b) { return a.tempBR.compareTo(b.tempBR) < 0; } }; boolean hasMissingCount = false; int missingCount = 0; for (int i = 0, c = leaves.size(); i < c; i++) { SegFacet seg = null; try { Future<SegFacet> future = completionService.take(); seg = future.get(); if (!pending.isEmpty()) { completionService.submit(pending.removeFirst()); } } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); } catch (ExecutionException e) { Throwable cause = e.getCause(); if (cause instanceof RuntimeException) { throw (RuntimeException) cause; } else { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error in per-segment faceting on field: " + fieldName, cause); } } if (seg.startTermIndex < seg.endTermIndex) { if (seg.startTermIndex == -1) { hasMissingCount = true; missingCount += seg.counts[0]; seg.pos = 0; } else { seg.pos = seg.startTermIndex; } if (seg.pos < seg.endTermIndex) { seg.tenum = seg.si.termsEnum(); seg.tenum.seekExact(seg.pos); seg.tempBR = seg.tenum.term(); queue.add(seg); } } } FacetCollector collector; if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) { collector = new CountSortedFacetCollector(offset, limit, mincount); } else { collector = new IndexSortedFacetCollector(offset, limit, mincount); } BytesRef val = new BytesRef(); while (queue.size() > 0) { SegFacet seg = queue.top(); // we will normally end up advancing the term enum for this segment // while still using "val", so we need to make a copy since the BytesRef // may be shared across calls. val.copyBytes(seg.tempBR); int count = 0; do { count += seg.counts[seg.pos - seg.startTermIndex]; // TODO: OPTIMIZATION... // if mincount>0 then seg.pos++ can skip ahead to the next non-zero entry. seg.pos++; if (seg.pos >= seg.endTermIndex) { queue.pop(); seg = queue.top(); } else { seg.tempBR = seg.tenum.next(); seg = queue.updateTop(); } } while (seg != null && val.compareTo(seg.tempBR) == 0); boolean stop = collector.collect(val, count); if (stop) break; } NamedList<Integer> res = collector.getFacetCounts(); // convert labels to readable form FieldType ft = searcher.getSchema().getFieldType(fieldName); int sz = res.size(); for (int i = 0; i < sz; i++) { res.setName(i, ft.indexedToReadable(res.getName(i))); } if (missing) { if (!hasMissingCount) { missingCount = SimpleFacets.getFieldMissingCount(searcher, docs, fieldName); } res.add(null, missingCount); } return res; }
From source file:org.apache.solr.search.facet.FacetFieldProcessor.java
License:Apache License
/** Processes the collected data to finds the top slots, and composes it in the response NamedList. */ SimpleOrderedMap<Object> findTopSlots(final int numSlots, final int slotCardinality, IntFunction<Comparable> bucketValFromSlotNumFunc, Function<Comparable, String> fieldQueryValFunc) throws IOException { int numBuckets = 0; List<Object> bucketVals = null; if (freq.numBuckets && fcontext.isShard()) { bucketVals = new ArrayList<>(100); }/*from w w w.j a va 2s .c om*/ final int off = fcontext.isShard() ? 0 : (int) freq.offset; long effectiveLimit = Integer.MAX_VALUE; // use max-int instead of max-long to avoid overflow if (freq.limit >= 0) { effectiveLimit = freq.limit; if (fcontext.isShard()) { // add over-request if this is a shard request if (freq.overrequest == -1) { effectiveLimit = (long) (effectiveLimit * 1.1 + 4); // default: add 10% plus 4 (to overrequest for very small limits) } else { effectiveLimit += freq.overrequest; } } } final int sortMul = freq.sortDirection.getMultiplier(); int maxTopVals = (int) (effectiveLimit >= 0 ? Math.min(off + effectiveLimit, Integer.MAX_VALUE - 1) : Integer.MAX_VALUE - 1); maxTopVals = Math.min(maxTopVals, slotCardinality); final SlotAcc sortAcc = this.sortAcc, indexOrderAcc = this.indexOrderAcc; final BiPredicate<Slot, Slot> orderPredicate; if (indexOrderAcc != null && indexOrderAcc != sortAcc) { orderPredicate = (a, b) -> { int cmp = sortAcc.compare(a.slot, b.slot) * sortMul; return cmp == 0 ? (indexOrderAcc.compare(a.slot, b.slot) > 0) : cmp < 0; }; } else { orderPredicate = (a, b) -> { int cmp = sortAcc.compare(a.slot, b.slot) * sortMul; return cmp == 0 ? b.slot < a.slot : cmp < 0; }; } final PriorityQueue<Slot> queue = new PriorityQueue<Slot>(maxTopVals) { @Override protected boolean lessThan(Slot a, Slot b) { return orderPredicate.test(a, b); } }; // note: We avoid object allocation by having a Slot and re-using the 'bottom'. Slot bottom = null; Slot scratchSlot = new Slot(); for (int slotNum = 0; slotNum < numSlots; slotNum++) { // screen out buckets not matching mincount immediately (i.e. don't even increment numBuckets) if (effectiveMincount > 0 && countAcc.getCount(slotNum) < effectiveMincount) { continue; } numBuckets++; if (bucketVals != null && bucketVals.size() < 100) { Object val = bucketValFromSlotNumFunc.apply(slotNum); bucketVals.add(val); } if (bottom != null) { scratchSlot.slot = slotNum; // scratchSlot is only used to hold this slotNum for the following line if (orderPredicate.test(bottom, scratchSlot)) { bottom.slot = slotNum; bottom = queue.updateTop(); } } else if (effectiveLimit > 0) { // queue not full Slot s = new Slot(); s.slot = slotNum; queue.add(s); if (queue.size() >= maxTopVals) { bottom = queue.top(); } } } assert queue.size() <= numBuckets; SimpleOrderedMap<Object> res = new SimpleOrderedMap<>(); if (freq.numBuckets) { if (!fcontext.isShard()) { res.add("numBuckets", numBuckets); } else { SimpleOrderedMap<Object> map = new SimpleOrderedMap<>(2); map.add("numBuckets", numBuckets); map.add("vals", bucketVals); res.add("numBuckets", map); } } FacetDebugInfo fdebug = fcontext.getDebugInfo(); if (fdebug != null) fdebug.putInfoItem("numBuckets", (long) numBuckets); if (freq.allBuckets) { SimpleOrderedMap<Object> allBuckets = new SimpleOrderedMap<>(); // countAcc.setValues(allBuckets, allBucketsSlot); allBuckets.add("count", allBucketsAcc.getSpecialCount()); allBucketsAcc.setValues(allBuckets, -1); // -1 slotNum is unused for SpecialSlotAcc // allBuckets currently doesn't execute sub-facets (because it doesn't change the domain?) res.add("allBuckets", allBuckets); } if (freq.missing) { // TODO: it would be more efficient to build up a missing DocSet if we need it here anyway. SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>(); fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null); res.add("missing", missingBucket); } // if we are deep paging, we don't have to order the highest "offset" counts. int collectCount = Math.max(0, queue.size() - off); assert collectCount <= effectiveLimit; int[] sortedSlots = new int[collectCount]; for (int i = collectCount - 1; i >= 0; i--) { sortedSlots[i] = queue.pop().slot; } ArrayList<SimpleOrderedMap> bucketList = new ArrayList<>(collectCount); res.add("buckets", bucketList); boolean needFilter = deferredAggs != null || freq.getSubFacets().size() > 0; for (int slotNum : sortedSlots) { SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>(); Comparable val = bucketValFromSlotNumFunc.apply(slotNum); bucket.add("val", val); Query filter = needFilter ? sf.getType().getFieldQuery(null, sf, fieldQueryValFunc.apply(val)) : null; fillBucket(bucket, countAcc.getCount(slotNum), slotNum, null, filter); bucketList.add(bucket); } return res; }
From source file:org.codelibs.bench.core.action.BenchmarkExecutor.java
License:Apache License
private CompetitionIteration.SlowRequest[] getTopN(long[] buckets, List<SearchRequest> requests, int multiplier, int topN) { final int numRequests = requests.size(); // collect the top N final PriorityQueue<IndexAndTime> topNQueue = new PriorityQueue<IndexAndTime>(topN) { @Override/*from ww w. j a v a2s . c om*/ protected boolean lessThan(IndexAndTime a, IndexAndTime b) { return a.avgTime < b.avgTime; } }; assert multiplier > 0; for (int i = 0; i < numRequests; i++) { long sum = 0; long max = Long.MIN_VALUE; for (int j = 0; j < multiplier; j++) { final int base = (numRequests * j); sum += buckets[i + base]; max = Math.max(buckets[i + base], max); } final long avg = sum / multiplier; if (topNQueue.size() < topN) { topNQueue.add(new IndexAndTime(i, max, avg)); } else if (topNQueue.top().avgTime < max) { topNQueue.top().update(i, max, avg); topNQueue.updateTop(); } } final CompetitionIteration.SlowRequest[] slowRequests = new CompetitionIteration.SlowRequest[topNQueue .size()]; int i = topNQueue.size() - 1; while (topNQueue.size() > 0) { IndexAndTime pop = topNQueue.pop(); CompetitionIteration.SlowRequest slow = new CompetitionIteration.SlowRequest(pop.avgTime, pop.maxTime, requests.get(pop.index)); slowRequests[i--] = slow; } return slowRequests; }
From source file:org.codelibs.elasticsearch.search.aggregations.bucket.histogram.InternalDateHistogram.java
License:Apache License
private List<Bucket> reduceBuckets(List<InternalAggregation> aggregations, ReduceContext reduceContext) { final PriorityQueue<IteratorAndCurrent> pq = new PriorityQueue<IteratorAndCurrent>(aggregations.size()) { @Override//w w w . j av a 2s . com protected boolean lessThan(IteratorAndCurrent a, IteratorAndCurrent b) { return a.current.key < b.current.key; } }; for (InternalAggregation aggregation : aggregations) { InternalDateHistogram histogram = (InternalDateHistogram) aggregation; if (histogram.buckets.isEmpty() == false) { pq.add(new IteratorAndCurrent(histogram.buckets.iterator())); } } List<Bucket> reducedBuckets = new ArrayList<>(); if (pq.size() > 0) { // list of buckets coming from different shards that have the same key List<Bucket> currentBuckets = new ArrayList<>(); double key = pq.top().current.key; do { final IteratorAndCurrent top = pq.top(); if (top.current.key != key) { // the key changes, reduce what we already buffered and reset the buffer for current buckets final Bucket reduced = currentBuckets.get(0).reduce(currentBuckets, reduceContext); if (reduced.getDocCount() >= minDocCount) { reducedBuckets.add(reduced); } currentBuckets.clear(); key = top.current.key; } currentBuckets.add(top.current); if (top.iterator.hasNext()) { final Bucket next = top.iterator.next(); assert next.key > top.current.key : "shards must return data sorted by key"; top.current = next; pq.updateTop(); } else { pq.pop(); } } while (pq.size() > 0); if (currentBuckets.isEmpty() == false) { final Bucket reduced = currentBuckets.get(0).reduce(currentBuckets, reduceContext); if (reduced.getDocCount() >= minDocCount) { reducedBuckets.add(reduced); } } } return reducedBuckets; }
From source file:org.codelibs.elasticsearch.search.aggregations.bucket.histogram.InternalHistogram.java
License:Apache License
private List<Bucket> reduceBuckets(List<InternalAggregation> aggregations, ReduceContext reduceContext) { final PriorityQueue<IteratorAndCurrent> pq = new PriorityQueue<IteratorAndCurrent>(aggregations.size()) { @Override//w w w .j a va 2 s. c o m protected boolean lessThan(IteratorAndCurrent a, IteratorAndCurrent b) { return a.current.key < b.current.key; } }; for (InternalAggregation aggregation : aggregations) { InternalHistogram histogram = (InternalHistogram) aggregation; if (histogram.buckets.isEmpty() == false) { pq.add(new IteratorAndCurrent(histogram.buckets.iterator())); } } List<Bucket> reducedBuckets = new ArrayList<>(); if (pq.size() > 0) { // list of buckets coming from different shards that have the same key List<Bucket> currentBuckets = new ArrayList<>(); double key = pq.top().current.key; do { final IteratorAndCurrent top = pq.top(); if (top.current.key != key) { // the key changes, reduce what we already buffered and reset the buffer for current buckets final Bucket reduced = currentBuckets.get(0).reduce(currentBuckets, reduceContext); if (reduced.getDocCount() >= minDocCount) { reducedBuckets.add(reduced); } currentBuckets.clear(); key = top.current.key; } currentBuckets.add(top.current); if (top.iterator.hasNext()) { final Bucket next = top.iterator.next(); assert next.key > top.current.key : "shards must return data sorted by key"; top.current = next; pq.updateTop(); } else { pq.pop(); } } while (pq.size() > 0); if (currentBuckets.isEmpty() == false) { final Bucket reduced = currentBuckets.get(0).reduce(currentBuckets, reduceContext); if (reduced.getDocCount() >= minDocCount) { reducedBuckets.add(reduced); } } } return reducedBuckets; }
From source file:org.codelibs.elasticsearch.search.suggest.phrase.CandidateScorer.java
License:Apache License
private void updateTop(CandidateSet[] candidates, Candidate[] path, PriorityQueue<Correction> corrections, double cutoffScore, double score) throws IOException { score = Math.exp(score);/*from ww w. j av a2 s . co m*/ assert Math.abs(score - score(path, candidates)) < 0.00001; if (score > cutoffScore) { if (corrections.size() < maxNumCorrections) { Candidate[] c = new Candidate[candidates.length]; System.arraycopy(path, 0, c, 0, path.length); corrections.add(new Correction(score, c)); } else if (corrections.top().compareTo(score, path) < 0) { Correction top = corrections.top(); System.arraycopy(path, 0, top.candidates, 0, path.length); top.score = score; corrections.updateTop(); } } }
From source file:org.elasticsearch.search.aggregations.bucket.histogram.InternalAutoDateHistogram.java
License:Apache License
/** * This method works almost exactly the same as * InternalDateHistogram#reduceBuckets(List, ReduceContext), the different * here is that we need to round all the keys we see using the highest level * rounding returned across all the shards so the resolution of the buckets * is the same and they can be reduced together. *///w ww .j a v a2 s . c o m private BucketReduceResult reduceBuckets(List<InternalAggregation> aggregations, ReduceContext reduceContext) { // First we need to find the highest level rounding used across all the // shards int reduceRoundingIdx = 0; for (InternalAggregation aggregation : aggregations) { int aggRoundingIdx = ((InternalAutoDateHistogram) aggregation).bucketInfo.roundingIdx; if (aggRoundingIdx > reduceRoundingIdx) { reduceRoundingIdx = aggRoundingIdx; } } // This rounding will be used to reduce all the buckets RoundingInfo reduceRoundingInfo = bucketInfo.roundingInfos[reduceRoundingIdx]; Rounding reduceRounding = reduceRoundingInfo.rounding; final PriorityQueue<IteratorAndCurrent> pq = new PriorityQueue<IteratorAndCurrent>(aggregations.size()) { @Override protected boolean lessThan(IteratorAndCurrent a, IteratorAndCurrent b) { return a.current.key < b.current.key; } }; for (InternalAggregation aggregation : aggregations) { InternalAutoDateHistogram histogram = (InternalAutoDateHistogram) aggregation; if (histogram.buckets.isEmpty() == false) { pq.add(new IteratorAndCurrent(histogram.buckets.iterator())); } } List<Bucket> reducedBuckets = new ArrayList<>(); if (pq.size() > 0) { // list of buckets coming from different shards that have the same key List<Bucket> currentBuckets = new ArrayList<>(); double key = reduceRounding.round(pq.top().current.key); do { final IteratorAndCurrent top = pq.top(); if (reduceRounding.round(top.current.key) != key) { // the key changes, reduce what we already buffered and reset the buffer for current buckets final Bucket reduced = currentBuckets.get(0).reduce(currentBuckets, reduceRounding, reduceContext); reduceContext.consumeBucketsAndMaybeBreak(1); reducedBuckets.add(reduced); currentBuckets.clear(); key = reduceRounding.round(top.current.key); } currentBuckets.add(top.current); if (top.iterator.hasNext()) { final Bucket next = top.iterator.next(); assert next.key > top.current.key : "shards must return data sorted by key"; top.current = next; pq.updateTop(); } else { pq.pop(); } } while (pq.size() > 0); if (currentBuckets.isEmpty() == false) { final Bucket reduced = currentBuckets.get(0).reduce(currentBuckets, reduceRounding, reduceContext); reduceContext.consumeBucketsAndMaybeBreak(1); reducedBuckets.add(reduced); } } return mergeBucketsIfNeeded(reducedBuckets, reduceRoundingIdx, reduceRoundingInfo, reduceContext); }
From source file:org.elasticsearch.search.suggest.phrase.CandidateScorer.java
License:Apache License
private void updateTop(CandidateSet[] candidates, Candidate[] path, PriorityQueue<Correction> corrections, double cutoffScore, double score) throws IOException { score = Math.exp(score);//from w w w . ja va2 s. c om assert Math.abs(score - score(path, candidates)) < 0.00001; if (score > cutoffScore) { if (corrections.size() < maxNumCorrections) { Candidate[] c = new Candidate[candidates.length]; System.arraycopy(path, 0, c, 0, path.length); corrections.add(new Correction(score, c)); } else if (corrections.top().score < score) { Correction top = corrections.top(); System.arraycopy(path, 0, top.candidates, 0, path.length); top.score = score; corrections.updateTop(); } } }
From source file:uk.co.flax.luwak.termextractor.querytree.ConjunctionNode.java
License:Apache License
private PriorityQueue<QueryTree> buildPriorityQueue(final TreeWeightor weightor) { PriorityQueue<QueryTree> pq = new PriorityQueue<QueryTree>(children.size()) { @Override//from w w w . j av a 2 s . c o m protected boolean lessThan(QueryTree a, QueryTree b) { return a.weight(weightor) > b.weight(weightor); } }; for (QueryTree child : children) { pq.add(child); } return pq; }