List of usage examples for org.apache.lucene.util PriorityQueue PriorityQueue
public PriorityQueue(int maxSize)
From source file:io.ssc.relationdiscovery.KMeans.java
License:Open Source License
public void printClosestPoints(int centroidIndex, int howMany, OpenIntObjectHashMap<String> patterns) { PriorityQueue<PatternWithDistance> queue = new PriorityQueue<PatternWithDistance>(howMany) { @Override/* ww w . j ava 2 s . c om*/ protected boolean lessThan(PatternWithDistance a, PatternWithDistance b) { return a.distance < b.distance; } }; Vector centroid = centroids[centroidIndex]; for (MatrixSlice rowSlice : A) { Vector row = rowSlice.vector(); double distance = distanceMeasure.distance(centroid, row); queue.insertWithOverflow(new PatternWithDistance(distance, patterns.get(rowSlice.index()))); } while (queue.size() > 0) { System.out.println("\t" + queue.pop()); } }
From source file:org.apache.mahout.cf.taste.hadoop.item.UserVectorSplitterMapper.java
License:Apache License
private float findSmallestLargeValue(Vector userVector) { PriorityQueue<Float> topPrefValues = new PriorityQueue<Float>(maxPrefsPerUserConsidered) { @Override/*from w w w .j a v a 2s . c om*/ protected boolean lessThan(Float f1, Float f2) { return f1 < f2; } }; for (Element e : userVector.nonZeroes()) { float absValue = Math.abs((float) e.get()); topPrefValues.insertWithOverflow(absValue); } return topPrefValues.top(); }
From source file:org.apache.mahout.math.neighborhood.Searcher.java
License:Apache License
/** * Returns a bounded size priority queue, in reverse order that keeps track of the best nearest neighbor vectors. * @param limit maximum size of the heap. * @return the priority queue.//w w w .ja v a 2 s . co m */ public static PriorityQueue<WeightedThing<Vector>> getCandidateQueue(int limit) { return new PriorityQueue<WeightedThing<Vector>>(limit) { @Override protected boolean lessThan(WeightedThing<Vector> a, WeightedThing<Vector> b) { return a.getWeight() > b.getWeight(); } }; }
From source file:org.apache.solr.cloud.SizeLimitedDistributedMap.java
License:Apache License
@Override public void put(String trackingId, byte[] data) throws KeeperException, InterruptedException { if (this.size() >= maxSize) { // Bring down the size List<String> children = zookeeper.getChildren(dir, null, true); int cleanupSize = maxSize / 10; final PriorityQueue priorityQueue = new PriorityQueue<Long>(cleanupSize) { @Override//from w ww.ja v a 2 s . c om protected boolean lessThan(Long a, Long b) { return (a > b); } }; for (String child : children) { Stat stat = zookeeper.exists(dir + "/" + child, null, true); priorityQueue.insertWithOverflow(stat.getMzxid()); } long topElementMzxId = (Long) priorityQueue.top(); for (String child : children) { Stat stat = zookeeper.exists(dir + "/" + child, null, true); if (stat.getMzxid() <= topElementMzxId) zookeeper.delete(dir + "/" + child, -1, true); } } super.put(trackingId, data); }
From source file:org.apache.solr.request.NumericFacets.java
License:Apache License
public static NamedList<Integer> getCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort) throws IOException { final boolean zeros = mincount <= 0; mincount = Math.max(mincount, 1); final SchemaField sf = searcher.getSchema().getField(fieldName); final FieldType ft = sf.getType(); final NumericType numericType = ft.getNumericType(); if (numericType == null) { throw new IllegalStateException(); }/*from www . j a v a 2s . c o m*/ final List<AtomicReaderContext> leaves = searcher.getIndexReader().leaves(); // 1. accumulate final HashTable hashTable = new HashTable(); final Iterator<AtomicReaderContext> ctxIt = leaves.iterator(); AtomicReaderContext ctx = null; FieldCache.Longs longs = null; Bits docsWithField = null; int missingCount = 0; for (DocIterator docsIt = docs.iterator(); docsIt.hasNext();) { final int doc = docsIt.nextDoc(); if (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc()) { do { ctx = ctxIt.next(); } while (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc()); assert doc >= ctx.docBase; switch (numericType) { case LONG: longs = FieldCache.DEFAULT.getLongs(ctx.reader(), fieldName, true); break; case INT: final FieldCache.Ints ints = FieldCache.DEFAULT.getInts(ctx.reader(), fieldName, true); longs = new FieldCache.Longs() { @Override public long get(int docID) { return ints.get(docID); } }; break; case FLOAT: final FieldCache.Floats floats = FieldCache.DEFAULT.getFloats(ctx.reader(), fieldName, true); longs = new FieldCache.Longs() { @Override public long get(int docID) { return NumericUtils.floatToSortableInt(floats.get(docID)); } }; break; case DOUBLE: final FieldCache.Doubles doubles = FieldCache.DEFAULT.getDoubles(ctx.reader(), fieldName, true); longs = new FieldCache.Longs() { @Override public long get(int docID) { return NumericUtils.doubleToSortableLong(doubles.get(docID)); } }; break; default: throw new AssertionError(); } docsWithField = FieldCache.DEFAULT.getDocsWithField(ctx.reader(), fieldName); } long v = longs.get(doc - ctx.docBase); if (v != 0 || docsWithField.get(doc - ctx.docBase)) { hashTable.add(doc, v, 1); } else { ++missingCount; } } // 2. select top-k facet values final int pqSize = limit < 0 ? hashTable.size : Math.min(offset + limit, hashTable.size); final PriorityQueue<Entry> pq; if (FacetParams.FACET_SORT_COUNT.equals(sort) || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) { pq = new PriorityQueue<Entry>(pqSize) { @Override protected boolean lessThan(Entry a, Entry b) { if (a.count < b.count || (a.count == b.count && a.bits > b.bits)) { return true; } else { return false; } } }; } else { pq = new PriorityQueue<Entry>(pqSize) { @Override protected boolean lessThan(Entry a, Entry b) { return a.bits > b.bits; } }; } Entry e = null; for (int i = 0; i < hashTable.bits.length; ++i) { if (hashTable.counts[i] >= mincount) { if (e == null) { e = new Entry(); } e.bits = hashTable.bits[i]; e.count = hashTable.counts[i]; e.docID = hashTable.docIDs[i]; e = pq.insertWithOverflow(e); } } // 4. build the NamedList final ValueSource vs = ft.getValueSource(sf, null); final NamedList<Integer> result = new NamedList<Integer>(); // This stuff is complicated because if facet.mincount=0, the counts needs // to be merged with terms from the terms dict if (!zeros || FacetParams.FACET_SORT_COUNT.equals(sort) || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) { // Only keep items we're interested in final Deque<Entry> counts = new ArrayDeque<Entry>(); while (pq.size() > offset) { counts.addFirst(pq.pop()); } // Entries from the PQ first, then using the terms dictionary for (Entry entry : counts) { final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves); final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx)); result.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count); } if (zeros && (limit < 0 || result.size() < limit)) { // need to merge with the term dict if (!sf.indexed()) { throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on field " + sf.getName() + " which is not indexed"); } // Add zeros until there are limit results final Set<String> alreadySeen = new HashSet<String>(); while (pq.size() > 0) { Entry entry = pq.pop(); final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves); final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx)); alreadySeen.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase)); } for (int i = 0; i < result.size(); ++i) { alreadySeen.add(result.getName(i)); } final Terms terms = searcher.getAtomicReader().terms(fieldName); if (terms != null) { final String prefixStr = TrieField.getMainValuePrefix(ft); final BytesRef prefix; if (prefixStr != null) { prefix = new BytesRef(prefixStr); } else { prefix = new BytesRef(); } final TermsEnum termsEnum = terms.iterator(null); BytesRef term; switch (termsEnum.seekCeil(prefix)) { case FOUND: case NOT_FOUND: term = termsEnum.term(); break; case END: term = null; break; default: throw new AssertionError(); } final CharsRef spare = new CharsRef(); for (int skipped = hashTable.size; skipped < offset && term != null && StringHelper.startsWith(term, prefix);) { ft.indexedToReadable(term, spare); final String termStr = spare.toString(); if (!alreadySeen.contains(termStr)) { ++skipped; } term = termsEnum.next(); } for (; term != null && StringHelper.startsWith(term, prefix) && (limit < 0 || result.size() < limit); term = termsEnum.next()) { ft.indexedToReadable(term, spare); final String termStr = spare.toString(); if (!alreadySeen.contains(termStr)) { result.add(termStr, 0); } } } } } else { // sort=index, mincount=0 and we have less than limit items // => Merge the PQ and the terms dictionary on the fly if (!sf.indexed()) { throw new IllegalStateException("Cannot use " + FacetParams.FACET_SORT + "=" + FacetParams.FACET_SORT_INDEX + " on a field which is not indexed"); } final Map<String, Integer> counts = new HashMap<String, Integer>(); while (pq.size() > 0) { final Entry entry = pq.pop(); final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves); final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx)); counts.put(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count); } final Terms terms = searcher.getAtomicReader().terms(fieldName); if (terms != null) { final String prefixStr = TrieField.getMainValuePrefix(ft); final BytesRef prefix; if (prefixStr != null) { prefix = new BytesRef(prefixStr); } else { prefix = new BytesRef(); } final TermsEnum termsEnum = terms.iterator(null); BytesRef term; switch (termsEnum.seekCeil(prefix)) { case FOUND: case NOT_FOUND: term = termsEnum.term(); break; case END: term = null; break; default: throw new AssertionError(); } final CharsRef spare = new CharsRef(); for (int i = 0; i < offset && term != null && StringHelper.startsWith(term, prefix); ++i) { term = termsEnum.next(); } for (; term != null && StringHelper.startsWith(term, prefix) && (limit < 0 || result.size() < limit); term = termsEnum.next()) { ft.indexedToReadable(term, spare); final String termStr = spare.toString(); Integer count = counts.get(termStr); if (count == null) { count = 0; } result.add(termStr, count); } } } if (missing) { result.add(null, missingCount); } return result; }
From source file:org.apache.solr.request.PerSegmentSingleValuedFaceting.java
License:Apache License
NamedList<Integer> getFacetCounts(Executor executor) throws IOException { CompletionService<SegFacet> completionService = new ExecutorCompletionService<SegFacet>(executor); // reuse the translation logic to go from top level set to per-segment set baseSet = docs.getTopFilter();//from w w w. j ava2 s . co m final List<AtomicReaderContext> leaves = searcher.getTopReaderContext().leaves(); // The list of pending tasks that aren't immediately submitted // TODO: Is there a completion service, or a delegating executor that can // limit the number of concurrent tasks submitted to a bigger executor? LinkedList<Callable<SegFacet>> pending = new LinkedList<Callable<SegFacet>>(); int threads = nThreads <= 0 ? Integer.MAX_VALUE : nThreads; for (final AtomicReaderContext leave : leaves) { final SegFacet segFacet = new SegFacet(leave); Callable<SegFacet> task = new Callable<SegFacet>() { @Override public SegFacet call() throws Exception { segFacet.countTerms(); return segFacet; } }; // TODO: if limiting threads, submit by largest segment first? if (--threads >= 0) { completionService.submit(task); } else { pending.add(task); } } // now merge the per-segment results PriorityQueue<SegFacet> queue = new PriorityQueue<SegFacet>(leaves.size()) { @Override protected boolean lessThan(SegFacet a, SegFacet b) { return a.tempBR.compareTo(b.tempBR) < 0; } }; boolean hasMissingCount = false; int missingCount = 0; for (int i = 0, c = leaves.size(); i < c; i++) { SegFacet seg = null; try { Future<SegFacet> future = completionService.take(); seg = future.get(); if (!pending.isEmpty()) { completionService.submit(pending.removeFirst()); } } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); } catch (ExecutionException e) { Throwable cause = e.getCause(); if (cause instanceof RuntimeException) { throw (RuntimeException) cause; } else { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error in per-segment faceting on field: " + fieldName, cause); } } if (seg.startTermIndex < seg.endTermIndex) { if (seg.startTermIndex == -1) { hasMissingCount = true; missingCount += seg.counts[0]; seg.pos = 0; } else { seg.pos = seg.startTermIndex; } if (seg.pos < seg.endTermIndex) { seg.tenum = seg.si.termsEnum(); seg.tenum.seekExact(seg.pos); seg.tempBR = seg.tenum.term(); queue.add(seg); } } } FacetCollector collector; if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) { collector = new CountSortedFacetCollector(offset, limit, mincount); } else { collector = new IndexSortedFacetCollector(offset, limit, mincount); } BytesRef val = new BytesRef(); while (queue.size() > 0) { SegFacet seg = queue.top(); // we will normally end up advancing the term enum for this segment // while still using "val", so we need to make a copy since the BytesRef // may be shared across calls. val.copyBytes(seg.tempBR); int count = 0; do { count += seg.counts[seg.pos - seg.startTermIndex]; // TODO: OPTIMIZATION... // if mincount>0 then seg.pos++ can skip ahead to the next non-zero entry. seg.pos++; if (seg.pos >= seg.endTermIndex) { queue.pop(); seg = queue.top(); } else { seg.tempBR = seg.tenum.next(); seg = queue.updateTop(); } } while (seg != null && val.compareTo(seg.tempBR) == 0); boolean stop = collector.collect(val, count); if (stop) break; } NamedList<Integer> res = collector.getFacetCounts(); // convert labels to readable form FieldType ft = searcher.getSchema().getFieldType(fieldName); int sz = res.size(); for (int i = 0; i < sz; i++) { res.setName(i, ft.indexedToReadable(res.getName(i))); } if (missing) { if (!hasMissingCount) { missingCount = SimpleFacets.getFieldMissingCount(searcher, docs, fieldName); } res.add(null, missingCount); } return res; }
From source file:org.apache.solr.search.facet.FacetFieldProcessor.java
License:Apache License
/** Processes the collected data to finds the top slots, and composes it in the response NamedList. */ SimpleOrderedMap<Object> findTopSlots(final int numSlots, final int slotCardinality, IntFunction<Comparable> bucketValFromSlotNumFunc, Function<Comparable, String> fieldQueryValFunc) throws IOException { int numBuckets = 0; List<Object> bucketVals = null; if (freq.numBuckets && fcontext.isShard()) { bucketVals = new ArrayList<>(100); }//from w ww. j a va 2 s .c o m final int off = fcontext.isShard() ? 0 : (int) freq.offset; long effectiveLimit = Integer.MAX_VALUE; // use max-int instead of max-long to avoid overflow if (freq.limit >= 0) { effectiveLimit = freq.limit; if (fcontext.isShard()) { // add over-request if this is a shard request if (freq.overrequest == -1) { effectiveLimit = (long) (effectiveLimit * 1.1 + 4); // default: add 10% plus 4 (to overrequest for very small limits) } else { effectiveLimit += freq.overrequest; } } } final int sortMul = freq.sortDirection.getMultiplier(); int maxTopVals = (int) (effectiveLimit >= 0 ? Math.min(off + effectiveLimit, Integer.MAX_VALUE - 1) : Integer.MAX_VALUE - 1); maxTopVals = Math.min(maxTopVals, slotCardinality); final SlotAcc sortAcc = this.sortAcc, indexOrderAcc = this.indexOrderAcc; final BiPredicate<Slot, Slot> orderPredicate; if (indexOrderAcc != null && indexOrderAcc != sortAcc) { orderPredicate = (a, b) -> { int cmp = sortAcc.compare(a.slot, b.slot) * sortMul; return cmp == 0 ? (indexOrderAcc.compare(a.slot, b.slot) > 0) : cmp < 0; }; } else { orderPredicate = (a, b) -> { int cmp = sortAcc.compare(a.slot, b.slot) * sortMul; return cmp == 0 ? b.slot < a.slot : cmp < 0; }; } final PriorityQueue<Slot> queue = new PriorityQueue<Slot>(maxTopVals) { @Override protected boolean lessThan(Slot a, Slot b) { return orderPredicate.test(a, b); } }; // note: We avoid object allocation by having a Slot and re-using the 'bottom'. Slot bottom = null; Slot scratchSlot = new Slot(); for (int slotNum = 0; slotNum < numSlots; slotNum++) { // screen out buckets not matching mincount immediately (i.e. don't even increment numBuckets) if (effectiveMincount > 0 && countAcc.getCount(slotNum) < effectiveMincount) { continue; } numBuckets++; if (bucketVals != null && bucketVals.size() < 100) { Object val = bucketValFromSlotNumFunc.apply(slotNum); bucketVals.add(val); } if (bottom != null) { scratchSlot.slot = slotNum; // scratchSlot is only used to hold this slotNum for the following line if (orderPredicate.test(bottom, scratchSlot)) { bottom.slot = slotNum; bottom = queue.updateTop(); } } else if (effectiveLimit > 0) { // queue not full Slot s = new Slot(); s.slot = slotNum; queue.add(s); if (queue.size() >= maxTopVals) { bottom = queue.top(); } } } assert queue.size() <= numBuckets; SimpleOrderedMap<Object> res = new SimpleOrderedMap<>(); if (freq.numBuckets) { if (!fcontext.isShard()) { res.add("numBuckets", numBuckets); } else { SimpleOrderedMap<Object> map = new SimpleOrderedMap<>(2); map.add("numBuckets", numBuckets); map.add("vals", bucketVals); res.add("numBuckets", map); } } FacetDebugInfo fdebug = fcontext.getDebugInfo(); if (fdebug != null) fdebug.putInfoItem("numBuckets", (long) numBuckets); if (freq.allBuckets) { SimpleOrderedMap<Object> allBuckets = new SimpleOrderedMap<>(); // countAcc.setValues(allBuckets, allBucketsSlot); allBuckets.add("count", allBucketsAcc.getSpecialCount()); allBucketsAcc.setValues(allBuckets, -1); // -1 slotNum is unused for SpecialSlotAcc // allBuckets currently doesn't execute sub-facets (because it doesn't change the domain?) res.add("allBuckets", allBuckets); } if (freq.missing) { // TODO: it would be more efficient to build up a missing DocSet if we need it here anyway. SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>(); fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null); res.add("missing", missingBucket); } // if we are deep paging, we don't have to order the highest "offset" counts. int collectCount = Math.max(0, queue.size() - off); assert collectCount <= effectiveLimit; int[] sortedSlots = new int[collectCount]; for (int i = collectCount - 1; i >= 0; i--) { sortedSlots[i] = queue.pop().slot; } ArrayList<SimpleOrderedMap> bucketList = new ArrayList<>(collectCount); res.add("buckets", bucketList); boolean needFilter = deferredAggs != null || freq.getSubFacets().size() > 0; for (int slotNum : sortedSlots) { SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>(); Comparable val = bucketValFromSlotNumFunc.apply(slotNum); bucket.add("val", val); Query filter = needFilter ? sf.getType().getFieldQuery(null, sf, fieldQueryValFunc.apply(val)) : null; fillBucket(bucket, countAcc.getCount(slotNum), slotNum, null, filter); bucketList.add(bucket); } return res; }
From source file:org.apache.solr.search.facet.FacetFieldProcessorByHashNumeric.java
License:Apache License
private SimpleOrderedMap<Object> calcFacets() throws IOException { final FacetRangeProcessor.Calc calc = FacetRangeProcessor.getNumericCalc(sf); // TODO: it would be really nice to know the number of unique values!!!! int possibleValues = fcontext.base.size(); // size smaller tables so that no resize will be necessary int currHashSize = BitUtil.nextHighestPowerOfTwo((int) (possibleValues * (1 / LongCounts.LOAD_FACTOR) + 1)); currHashSize = Math.min(currHashSize, MAXIMUM_STARTING_TABLE_SIZE); final LongCounts table = new LongCounts(currHashSize) { @Override//from w w w .j a v a2 s.c om protected void rehash() { super.rehash(); doRehash(this); oldToNewMapping = null; // allow for gc } }; int numSlots = currHashSize; int numMissing = 0; if (freq.allBuckets) { allBucketsSlot = numSlots++; } indexOrderAcc = new SlotAcc(fcontext) { @Override public void collect(int doc, int slot) throws IOException { } @Override public int compare(int slotA, int slotB) { long s1 = calc.bitsToSortableBits(table.vals[slotA]); long s2 = calc.bitsToSortableBits(table.vals[slotB]); return Long.compare(s1, s2); } @Override public Object getValue(int slotNum) throws IOException { return null; } @Override public void reset() { } @Override public void resize(Resizer resizer) { } }; countAcc = new CountSlotAcc(fcontext) { @Override public void incrementCount(int slot, int count) { throw new UnsupportedOperationException(); } @Override public int getCount(int slot) { return table.counts[slot]; } @Override public Object getValue(int slotNum) { return getCount(slotNum); } @Override public void reset() { throw new UnsupportedOperationException(); } @Override public void collect(int doc, int slot) throws IOException { throw new UnsupportedOperationException(); } @Override public int compare(int slotA, int slotB) { return Integer.compare(table.counts[slotA], table.counts[slotB]); } @Override public void resize(Resizer resizer) { throw new UnsupportedOperationException(); } }; // we set the countAcc & indexAcc first so generic ones won't be created for us. createCollectAcc(fcontext.base.size(), numSlots); if (freq.allBuckets) { allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0); } NumericDocValues values = null; Bits docsWithField = null; // TODO: factor this code out so it can be shared... final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves(); final Iterator<LeafReaderContext> ctxIt = leaves.iterator(); LeafReaderContext ctx = null; int segBase = 0; int segMax; int adjustedMax = 0; for (DocIterator docsIt = fcontext.base.iterator(); docsIt.hasNext();) { final int doc = docsIt.nextDoc(); if (doc >= adjustedMax) { do { ctx = ctxIt.next(); segBase = ctx.docBase; segMax = ctx.reader().maxDoc(); adjustedMax = segBase + segMax; } while (doc >= adjustedMax); assert doc >= ctx.docBase; setNextReaderFirstPhase(ctx); values = DocValues.getNumeric(ctx.reader(), sf.getName()); docsWithField = DocValues.getDocsWithField(ctx.reader(), sf.getName()); } int segDoc = doc - segBase; long val = values.get(segDoc); if (val != 0 || docsWithField.get(segDoc)) { int slot = table.add(val); // this can trigger a rehash rehash // countAcc.incrementCount(slot, 1); // our countAcc is virtual, so this is not needed collectFirstPhase(segDoc, slot); } } // // collection done, time to find the top slots // int numBuckets = 0; List<Object> bucketVals = null; if (freq.numBuckets && fcontext.isShard()) { bucketVals = new ArrayList<>(100); } int off = fcontext.isShard() ? 0 : (int) freq.offset; // add a modest amount of over-request if this is a shard request int lim = freq.limit >= 0 ? (fcontext.isShard() ? (int) (freq.limit * 1.1 + 4) : (int) freq.limit) : Integer.MAX_VALUE; int maxsize = (int) (freq.limit >= 0 ? freq.offset + lim : Integer.MAX_VALUE - 1); maxsize = Math.min(maxsize, table.cardinality); final int sortMul = freq.sortDirection.getMultiplier(); PriorityQueue<Slot> queue = new PriorityQueue<Slot>(maxsize) { @Override protected boolean lessThan(Slot a, Slot b) { // TODO: sort-by-index-order int cmp = sortAcc.compare(a.slot, b.slot) * sortMul; return cmp == 0 ? (indexOrderAcc.compare(a.slot, b.slot) > 0) : cmp < 0; } }; // TODO: create a countAcc that wrapps the table so we can reuse more code? Slot bottom = null; for (int i = 0; i < table.counts.length; i++) { int count = table.counts[i]; if (count < effectiveMincount) { // either not a valid slot, or count not high enough continue; } numBuckets++; // can be different from the table cardinality if mincount > 1 long val = table.vals[i]; if (bucketVals != null && bucketVals.size() < 100) { bucketVals.add(calc.bitsToValue(val)); } if (bottom == null) { bottom = new Slot(); } bottom.slot = i; bottom = queue.insertWithOverflow(bottom); } SimpleOrderedMap<Object> res = new SimpleOrderedMap<>(); if (freq.numBuckets) { if (!fcontext.isShard()) { res.add("numBuckets", numBuckets); } else { SimpleOrderedMap<Object> map = new SimpleOrderedMap<>(2); map.add("numBuckets", numBuckets); map.add("vals", bucketVals); res.add("numBuckets", map); } } FacetDebugInfo fdebug = fcontext.getDebugInfo(); if (fdebug != null) fdebug.putInfoItem("numBuckets", (long) numBuckets); if (freq.allBuckets) { SimpleOrderedMap<Object> allBuckets = new SimpleOrderedMap<>(); // countAcc.setValues(allBuckets, allBucketsSlot); allBuckets.add("count", table.numAdds); allBucketsAcc.setValues(allBuckets, -1); // allBuckets currently doesn't execute sub-facets (because it doesn't change the domain?) res.add("allBuckets", allBuckets); } if (freq.missing) { // TODO: it would be more efficient to buid up a missing DocSet if we need it here anyway. SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>(); fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null); res.add("missing", missingBucket); } // if we are deep paging, we don't have to order the highest "offset" counts. int collectCount = Math.max(0, queue.size() - off); assert collectCount <= lim; int[] sortedSlots = new int[collectCount]; for (int i = collectCount - 1; i >= 0; i--) { sortedSlots[i] = queue.pop().slot; } ArrayList<SimpleOrderedMap> bucketList = new ArrayList<>(collectCount); res.add("buckets", bucketList); boolean needFilter = deferredAggs != null || freq.getSubFacets().size() > 0; for (int slotNum : sortedSlots) { SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>(); Comparable val = calc.bitsToValue(table.vals[slotNum]); bucket.add("val", val); Query filter = needFilter ? sf.getType().getFieldQuery(null, sf, calc.formatValue(val)) : null; fillBucket(bucket, table.counts[slotNum], slotNum, null, filter); bucketList.add(bucket); } return res; }
From source file:org.apache.solr.search.facet.FacetFieldProcessorNumeric.java
License:Apache License
public SimpleOrderedMap<Object> calcFacets() throws IOException { final FacetRangeProcessor.Calc calc = FacetRangeProcessor.getNumericCalc(sf); // TODO: it would be really nice to know the number of unique values!!!! int possibleValues = fcontext.base.size(); // size smaller tables so that no resize will be necessary int currHashSize = BitUtil.nextHighestPowerOfTwo((int) (possibleValues * (1 / LongCounts.LOAD_FACTOR) + 1)); currHashSize = Math.min(currHashSize, MAXIMUM_STARTING_TABLE_SIZE); final LongCounts table = new LongCounts(currHashSize) { @Override//from ww w . j a v a 2s . c o m protected void rehash() { super.rehash(); doRehash(this); oldToNewMapping = null; // allow for gc } }; int numSlots = currHashSize; int numMissing = 0; if (freq.allBuckets) { allBucketsSlot = numSlots++; } indexOrderAcc = new SlotAcc(fcontext) { @Override public void collect(int doc, int slot) throws IOException { } @Override public int compare(int slotA, int slotB) { long s1 = calc.bitsToSortableBits(table.vals[slotA]); long s2 = calc.bitsToSortableBits(table.vals[slotB]); return Long.compare(s1, s2); } @Override public Object getValue(int slotNum) throws IOException { return null; } @Override public void reset() { } @Override public void resize(Resizer resizer) { } }; countAcc = new CountSlotAcc(fcontext) { @Override public void incrementCount(int slot, int count) { throw new UnsupportedOperationException(); } @Override public int getCount(int slot) { return table.counts[slot]; } @Override public Object getValue(int slotNum) { return getCount(slotNum); } @Override public void reset() { throw new UnsupportedOperationException(); } @Override public void collect(int doc, int slot) throws IOException { throw new UnsupportedOperationException(); } @Override public int compare(int slotA, int slotB) { return Integer.compare(table.counts[slotA], table.counts[slotB]); } @Override public void resize(Resizer resizer) { throw new UnsupportedOperationException(); } }; // we set the countAcc & indexAcc first so generic ones won't be created for us. createCollectAcc(fcontext.base.size(), numSlots); if (freq.allBuckets) { allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0); } NumericDocValues values = null; Bits docsWithField = null; // TODO: factor this code out so it can be shared... final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves(); final Iterator<LeafReaderContext> ctxIt = leaves.iterator(); LeafReaderContext ctx = null; int segBase = 0; int segMax; int adjustedMax = 0; for (DocIterator docsIt = fcontext.base.iterator(); docsIt.hasNext();) { final int doc = docsIt.nextDoc(); if (doc >= adjustedMax) { do { ctx = ctxIt.next(); segBase = ctx.docBase; segMax = ctx.reader().maxDoc(); adjustedMax = segBase + segMax; } while (doc >= adjustedMax); assert doc >= ctx.docBase; setNextReaderFirstPhase(ctx); values = DocValues.getNumeric(ctx.reader(), sf.getName()); docsWithField = DocValues.getDocsWithField(ctx.reader(), sf.getName()); } int segDoc = doc - segBase; long val = values.get(segDoc); if (val != 0 || docsWithField.get(segDoc)) { int slot = table.add(val); // this can trigger a rehash rehash // countAcc.incrementCount(slot, 1); // our countAcc is virtual, so this is not needed collectFirstPhase(segDoc, slot); } } // // collection done, time to find the top slots // int numBuckets = 0; List<Object> bucketVals = null; if (freq.numBuckets && fcontext.isShard()) { bucketVals = new ArrayList(100); } int off = fcontext.isShard() ? 0 : (int) freq.offset; // add a modest amount of over-request if this is a shard request int lim = freq.limit >= 0 ? (fcontext.isShard() ? (int) (freq.limit * 1.1 + 4) : (int) freq.limit) : Integer.MAX_VALUE; int maxsize = (int) (freq.limit >= 0 ? freq.offset + lim : Integer.MAX_VALUE - 1); maxsize = Math.min(maxsize, table.cardinality); final int sortMul = freq.sortDirection.getMultiplier(); PriorityQueue<Slot> queue = new PriorityQueue<Slot>(maxsize) { @Override protected boolean lessThan(Slot a, Slot b) { // TODO: sort-by-index-order int cmp = sortAcc.compare(a.slot, b.slot) * sortMul; return cmp == 0 ? (indexOrderAcc.compare(a.slot, b.slot) > 0) : cmp < 0; } }; // TODO: create a countAcc that wrapps the table so we can reuse more code? Slot bottom = null; for (int i = 0; i < table.counts.length; i++) { int count = table.counts[i]; if (count < effectiveMincount) { // either not a valid slot, or count not high enough continue; } numBuckets++; // can be different from the table cardinality if mincount > 1 long val = table.vals[i]; if (bucketVals != null && bucketVals.size() < 100) { bucketVals.add(calc.bitsToValue(val)); } if (bottom == null) { bottom = new Slot(); } bottom.slot = i; bottom = queue.insertWithOverflow(bottom); } SimpleOrderedMap res = new SimpleOrderedMap(); if (freq.numBuckets) { if (!fcontext.isShard()) { res.add("numBuckets", numBuckets); } else { SimpleOrderedMap map = new SimpleOrderedMap(2); map.add("numBuckets", numBuckets); map.add("vals", bucketVals); res.add("numBuckets", map); } } FacetDebugInfo fdebug = fcontext.getDebugInfo(); if (fdebug != null) fdebug.putInfoItem("numBuckets", new Long(numBuckets)); if (freq.allBuckets) { SimpleOrderedMap<Object> allBuckets = new SimpleOrderedMap<>(); // countAcc.setValues(allBuckets, allBucketsSlot); allBuckets.add("count", table.numAdds); allBucketsAcc.setValues(allBuckets, -1); // allBuckets currently doesn't execute sub-facets (because it doesn't change the domain?) res.add("allBuckets", allBuckets); } if (freq.missing) { // TODO: it would be more efficient to buid up a missing DocSet if we need it here anyway. SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>(); fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null); res.add("missing", missingBucket); } // if we are deep paging, we don't have to order the highest "offset" counts. int collectCount = Math.max(0, queue.size() - off); assert collectCount <= lim; int[] sortedSlots = new int[collectCount]; for (int i = collectCount - 1; i >= 0; i--) { sortedSlots[i] = queue.pop().slot; } ArrayList bucketList = new ArrayList(collectCount); res.add("buckets", bucketList); boolean needFilter = deferredAggs != null || freq.getSubFacets().size() > 0; for (int slotNum : sortedSlots) { SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>(); Comparable val = calc.bitsToValue(table.vals[slotNum]); bucket.add("val", val); Query filter = needFilter ? sf.getType().getFieldQuery(null, sf, calc.formatValue(val)) : null; fillBucket(bucket, table.counts[slotNum], slotNum, null, filter); bucketList.add(bucket); } return res; }
From source file:org.codelibs.bench.core.action.BenchmarkExecutor.java
License:Apache License
private CompetitionIteration.SlowRequest[] getTopN(long[] buckets, List<SearchRequest> requests, int multiplier, int topN) { final int numRequests = requests.size(); // collect the top N final PriorityQueue<IndexAndTime> topNQueue = new PriorityQueue<IndexAndTime>(topN) { @Override/*from w w w .j ava 2 s .c om*/ protected boolean lessThan(IndexAndTime a, IndexAndTime b) { return a.avgTime < b.avgTime; } }; assert multiplier > 0; for (int i = 0; i < numRequests; i++) { long sum = 0; long max = Long.MIN_VALUE; for (int j = 0; j < multiplier; j++) { final int base = (numRequests * j); sum += buckets[i + base]; max = Math.max(buckets[i + base], max); } final long avg = sum / multiplier; if (topNQueue.size() < topN) { topNQueue.add(new IndexAndTime(i, max, avg)); } else if (topNQueue.top().avgTime < max) { topNQueue.top().update(i, max, avg); topNQueue.updateTop(); } } final CompetitionIteration.SlowRequest[] slowRequests = new CompetitionIteration.SlowRequest[topNQueue .size()]; int i = topNQueue.size() - 1; while (topNQueue.size() > 0) { IndexAndTime pop = topNQueue.pop(); CompetitionIteration.SlowRequest slow = new CompetitionIteration.SlowRequest(pop.avgTime, pop.maxTime, requests.get(pop.index)); slowRequests[i--] = slow; } return slowRequests; }