List of usage examples for org.apache.lucene.util PriorityQueue insertWithOverflow
public T insertWithOverflow(T element)
From source file:com.browseengine.bobo.facets.CombinedFacetAccessible.java
License:Apache License
public List<BrowseFacet> getFacets() { if (_closed) { throw new IllegalStateException("This instance of count collector was already closed"); }//from w w w. ja va2 s . co m int maxCnt = _fspec.getMaxCount(); if (maxCnt <= 0) maxCnt = Integer.MAX_VALUE; int minHits = _fspec.getMinHitCount(); LinkedList<BrowseFacet> list = new LinkedList<BrowseFacet>(); int cnt = 0; Comparable facet = null; FacetIterator iter = (FacetIterator) this.iterator(); Comparator<BrowseFacet> comparator; if (FacetSortSpec.OrderValueAsc.equals(_fspec.getOrderBy())) { while ((facet = iter.next(minHits)) != null) { // find the next facet whose combined hit count obeys minHits list.add(new BrowseFacet(String.valueOf(facet), iter.count)); if (++cnt >= maxCnt) break; } } else if (FacetSortSpec.OrderHitsDesc.equals(_fspec.getOrderBy())) { comparator = new Comparator<BrowseFacet>() { public int compare(BrowseFacet f1, BrowseFacet f2) { int val = f2.getHitCount() - f1.getHitCount(); if (val == 0) { val = (f1.getValue().compareTo(f2.getValue())); } return val; } }; if (maxCnt != Integer.MAX_VALUE) { // we will maintain a min heap of size maxCnt // Order by hits in descending order and max count is supplied PriorityQueue queue = createPQ(maxCnt, comparator); int qsize = 0; while ((qsize < maxCnt) && ((facet = iter.next(minHits)) != null)) { queue.add(new BrowseFacet(String.valueOf(facet), iter.count)); qsize++; } if (facet != null) { BrowseFacet rootFacet = (BrowseFacet) queue.top(); minHits = rootFacet.getHitCount() + 1; // facet count less than top of min heap, it will never be added while (((facet = iter.next(minHits)) != null)) { rootFacet.setValue(String.valueOf(facet)); rootFacet.setHitCount(iter.count); rootFacet = (BrowseFacet) queue.updateTop(); minHits = rootFacet.getHitCount() + 1; } } // at this point, queue contains top maxCnt facets that have hitcount >= minHits while (qsize-- > 0) { // append each entry to the beginning of the facet list to order facets by hits descending list.addFirst((BrowseFacet) queue.pop()); } } else { // no maxCnt specified. So fetch all facets according to minHits and sort them later while ((facet = iter.next(minHits)) != null) list.add(new BrowseFacet(String.valueOf(facet), iter.count)); Collections.sort(list, comparator); } } else // FacetSortSpec.OrderByCustom.equals(_fspec.getOrderBy() { comparator = _fspec.getCustomComparatorFactory().newComparator(); if (maxCnt != Integer.MAX_VALUE) { PriorityQueue queue = createPQ(maxCnt, comparator); BrowseFacet browseFacet = new BrowseFacet(); int qsize = 0; while ((qsize < maxCnt) && ((facet = iter.next(minHits)) != null)) { queue.add(new BrowseFacet(String.valueOf(facet), iter.count)); qsize++; } if (facet != null) { while ((facet = iter.next(minHits)) != null) { // check with the top of min heap browseFacet.setHitCount(iter.count); browseFacet.setValue(String.valueOf(facet)); browseFacet = (BrowseFacet) queue.insertWithOverflow(browseFacet); } } // remove from queue and add to the list while (qsize-- > 0) list.addFirst((BrowseFacet) queue.pop()); } else { // order by custom but no max count supplied while ((facet = iter.next(minHits)) != null) list.add(new BrowseFacet(String.valueOf(facet), iter.count)); Collections.sort(list, comparator); } } return list; }
From source file:io.ssc.relationdiscovery.KMeans.java
License:Open Source License
public void printClosestPoints(int centroidIndex, int howMany, OpenIntObjectHashMap<String> patterns) { PriorityQueue<PatternWithDistance> queue = new PriorityQueue<PatternWithDistance>(howMany) { @Override/*from ww w . j a va2 s . c o m*/ protected boolean lessThan(PatternWithDistance a, PatternWithDistance b) { return a.distance < b.distance; } }; Vector centroid = centroids[centroidIndex]; for (MatrixSlice rowSlice : A) { Vector row = rowSlice.vector(); double distance = distanceMeasure.distance(centroid, row); queue.insertWithOverflow(new PatternWithDistance(distance, patterns.get(rowSlice.index()))); } while (queue.size() > 0) { System.out.println("\t" + queue.pop()); } }
From source file:org.apache.jackrabbit.core.query.lucene.WeightedHighlighter.java
License:Apache License
@Override protected String mergeFragments(TermVectorOffsetInfo[] offsets, String text, String excerptStart, String excerptEnd, String fragmentStart, String fragmentEnd, String hlStart, String hlEnd, int maxFragments, int surround) throws IOException { if (offsets == null || offsets.length == 0) { // nothing to highlight return createDefaultExcerpt(text, excerptStart, excerptEnd, fragmentStart, fragmentEnd, surround * 2); }// ww w . j ava 2s. co m PriorityQueue<FragmentInfo> bestFragments = new FragmentInfoPriorityQueue(maxFragments); for (int i = 0; i < offsets.length; i++) { if (offsets[i].getEndOffset() <= text.length()) { FragmentInfo fi = new FragmentInfo(offsets[i], surround * 2); for (int j = i + 1; j < offsets.length; j++) { if (offsets[j].getEndOffset() > text.length()) { break; } if (!fi.add(offsets[j], text)) { break; } } bestFragments.insertWithOverflow(fi); } } if (bestFragments.size() == 0) { return createDefaultExcerpt(text, excerptStart, excerptEnd, fragmentStart, fragmentEnd, surround * 2); } // retrieve fragment infos from queue and fill into list, least // fragment comes out first List<FragmentInfo> infos = new LinkedList<FragmentInfo>(); while (bestFragments.size() > 0) { FragmentInfo fi = (FragmentInfo) bestFragments.pop(); infos.add(0, fi); } Map<TermVectorOffsetInfo, Object> offsetInfos = new IdentityHashMap<TermVectorOffsetInfo, Object>(); // remove overlapping fragment infos Iterator<FragmentInfo> it = infos.iterator(); while (it.hasNext()) { FragmentInfo fi = it.next(); boolean overlap = false; Iterator<TermVectorOffsetInfo> fit = fi.iterator(); while (fit.hasNext() && !overlap) { TermVectorOffsetInfo oi = fit.next(); if (offsetInfos.containsKey(oi)) { overlap = true; } } if (overlap) { it.remove(); } else { Iterator<TermVectorOffsetInfo> oit = fi.iterator(); while (oit.hasNext()) { offsetInfos.put(oit.next(), null); } } } // create excerpts StringBuffer sb = new StringBuffer(excerptStart); it = infos.iterator(); while (it.hasNext()) { FragmentInfo fi = it.next(); sb.append(fragmentStart); int limit = Math.max(0, fi.getStartOffset() / 2 + fi.getEndOffset() / 2 - surround); int len = startFragment(sb, text, fi.getStartOffset(), limit); TermVectorOffsetInfo lastOffsetInfo = null; Iterator<TermVectorOffsetInfo> fIt = fi.iterator(); while (fIt.hasNext()) { TermVectorOffsetInfo oi = fIt.next(); if (lastOffsetInfo != null) { // fill in text between terms sb.append(escape(text.substring(lastOffsetInfo.getEndOffset(), oi.getStartOffset()))); } sb.append(hlStart); sb.append(escape(text.substring(oi.getStartOffset(), oi.getEndOffset()))); sb.append(hlEnd); lastOffsetInfo = oi; } limit = Math.min(text.length(), fi.getStartOffset() - len + (surround * 2)); endFragment(sb, text, fi.getEndOffset(), limit); sb.append(fragmentEnd); } sb.append(excerptEnd); return sb.toString(); }
From source file:org.apache.mahout.cf.taste.hadoop.item.UserVectorSplitterMapper.java
License:Apache License
private float findSmallestLargeValue(Vector userVector) { PriorityQueue<Float> topPrefValues = new PriorityQueue<Float>(maxPrefsPerUserConsidered) { @Override/*w w w . j a va 2 s . c om*/ protected boolean lessThan(Float f1, Float f2) { return f1 < f2; } }; for (Element e : userVector.nonZeroes()) { float absValue = Math.abs((float) e.get()); topPrefValues.insertWithOverflow(absValue); } return topPrefValues.top(); }
From source file:org.apache.mahout.math.neighborhood.LocalitySensitiveHashSearch.java
License:Apache License
private PriorityQueue<WeightedThing<Vector>> searchInternal(Vector query) { long queryHash = HashedVector.computeHash64(query, projection); // We keep an approximation of the closest vectors here. PriorityQueue<WeightedThing<Vector>> top = Searcher.getCandidateQueue(getSearchSize()); // We scan the vectors using bit counts as an approximation of the dot product so we can do as few // full distance computations as possible. Our goal is to only do full distance computations for // vectors with hash distance at most as large as the searchSize biggest hash distance seen so far. OnlineSummarizer[] distribution = new OnlineSummarizer[BITS + 1]; for (int i = 0; i < BITS + 1; i++) { distribution[i] = new OnlineSummarizer(); }// w w w .j a v a 2s . c o m distanceEvaluations = 0; // We keep the counts of the hash distances here. This lets us accurately // judge what hash distance cutoff we should use. int[] hashCounts = new int[BITS + 1]; // Maximum number of different bits to still consider a vector a candidate for nearest neighbor. // Starts at the maximum number of bits, but decreases and can increase. int hashLimit = BITS; int limitCount = 0; double distanceLimit = Double.POSITIVE_INFINITY; // In this loop, we have the invariants that: // // limitCount = sum_{i<hashLimit} hashCount[i] // and // limitCount >= searchSize && limitCount - hashCount[hashLimit-1] < searchSize for (HashedVector vector : trainingVectors) { // This computes the Hamming Distance between the vector's hash and the query's hash. // The result is correlated with the angle between the vectors. int bitDot = vector.hammingDistance(queryHash); if (bitDot <= hashLimit) { distanceEvaluations++; double distance = distanceMeasure.distance(query, vector); distribution[bitDot].add(distance); if (distance < distanceLimit) { top.insertWithOverflow(new WeightedThing<Vector>(vector, distance)); if (top.size() == searchSize) { distanceLimit = top.top().getWeight(); } hashCounts[bitDot]++; limitCount++; while (hashLimit > 0 && limitCount - hashCounts[hashLimit - 1] > searchSize) { hashLimit--; limitCount -= hashCounts[hashLimit]; } if (hashLimitStrategy >= 0) { while (hashLimit < MAX_HASH_LIMIT && distribution[hashLimit].getCount() > MIN_DISTRIBUTION_COUNT && ((1 - hashLimitStrategy) * distribution[hashLimit].getQuartile(0) + hashLimitStrategy * distribution[hashLimit].getQuartile(1)) < distanceLimit) { limitCount += hashCounts[hashLimit]; hashLimit++; } } } } } return top; }
From source file:org.apache.mahout.utils.vectors.VectorHelper.java
License:Apache License
public static List<Pair<Integer, Double>> topEntries(Vector vector, int maxEntries) { // Get the size of nonZero elements in the input vector int sizeOfNonZeroElementsInVector = Iterables.size(vector.nonZeroes()); // If the sizeOfNonZeroElementsInVector < maxEntries then set maxEntries = sizeOfNonZeroElementsInVector // otherwise the call to queue.pop() returns a Pair(null, null) and the subsequent call // to pair.getFirst() throws a NullPointerException if (sizeOfNonZeroElementsInVector < maxEntries) { maxEntries = sizeOfNonZeroElementsInVector; }/*from ww w . ja va 2 s . c o m*/ PriorityQueue<Pair<Integer, Double>> queue = new TDoublePQ<Integer>(-1, maxEntries); for (Element e : vector.nonZeroes()) { queue.insertWithOverflow(Pair.of(e.index(), e.get())); } List<Pair<Integer, Double>> entries = Lists.newArrayList(); Pair<Integer, Double> pair; while ((pair = queue.pop()) != null) { if (pair.getFirst() > -1) { entries.add(pair); } } Collections.sort(entries, new Comparator<Pair<Integer, Double>>() { @Override public int compare(Pair<Integer, Double> a, Pair<Integer, Double> b) { return b.getSecond().compareTo(a.getSecond()); } }); return entries; }
From source file:org.apache.solr.cloud.SizeLimitedDistributedMap.java
License:Apache License
@Override public void put(String trackingId, byte[] data) throws KeeperException, InterruptedException { if (this.size() >= maxSize) { // Bring down the size List<String> children = zookeeper.getChildren(dir, null, true); int cleanupSize = maxSize / 10; final PriorityQueue priorityQueue = new PriorityQueue<Long>(cleanupSize) { @Override//w ww. j a va2 s. c om protected boolean lessThan(Long a, Long b) { return (a > b); } }; for (String child : children) { Stat stat = zookeeper.exists(dir + "/" + child, null, true); priorityQueue.insertWithOverflow(stat.getMzxid()); } long topElementMzxId = (Long) priorityQueue.top(); for (String child : children) { Stat stat = zookeeper.exists(dir + "/" + child, null, true); if (stat.getMzxid() <= topElementMzxId) zookeeper.delete(dir + "/" + child, -1, true); } } super.put(trackingId, data); }
From source file:org.apache.solr.request.NumericFacets.java
License:Apache License
public static NamedList<Integer> getCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort) throws IOException { final boolean zeros = mincount <= 0; mincount = Math.max(mincount, 1); final SchemaField sf = searcher.getSchema().getField(fieldName); final FieldType ft = sf.getType(); final NumericType numericType = ft.getNumericType(); if (numericType == null) { throw new IllegalStateException(); }/* ww w. j av a 2 s .c o m*/ final List<AtomicReaderContext> leaves = searcher.getIndexReader().leaves(); // 1. accumulate final HashTable hashTable = new HashTable(); final Iterator<AtomicReaderContext> ctxIt = leaves.iterator(); AtomicReaderContext ctx = null; FieldCache.Longs longs = null; Bits docsWithField = null; int missingCount = 0; for (DocIterator docsIt = docs.iterator(); docsIt.hasNext();) { final int doc = docsIt.nextDoc(); if (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc()) { do { ctx = ctxIt.next(); } while (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc()); assert doc >= ctx.docBase; switch (numericType) { case LONG: longs = FieldCache.DEFAULT.getLongs(ctx.reader(), fieldName, true); break; case INT: final FieldCache.Ints ints = FieldCache.DEFAULT.getInts(ctx.reader(), fieldName, true); longs = new FieldCache.Longs() { @Override public long get(int docID) { return ints.get(docID); } }; break; case FLOAT: final FieldCache.Floats floats = FieldCache.DEFAULT.getFloats(ctx.reader(), fieldName, true); longs = new FieldCache.Longs() { @Override public long get(int docID) { return NumericUtils.floatToSortableInt(floats.get(docID)); } }; break; case DOUBLE: final FieldCache.Doubles doubles = FieldCache.DEFAULT.getDoubles(ctx.reader(), fieldName, true); longs = new FieldCache.Longs() { @Override public long get(int docID) { return NumericUtils.doubleToSortableLong(doubles.get(docID)); } }; break; default: throw new AssertionError(); } docsWithField = FieldCache.DEFAULT.getDocsWithField(ctx.reader(), fieldName); } long v = longs.get(doc - ctx.docBase); if (v != 0 || docsWithField.get(doc - ctx.docBase)) { hashTable.add(doc, v, 1); } else { ++missingCount; } } // 2. select top-k facet values final int pqSize = limit < 0 ? hashTable.size : Math.min(offset + limit, hashTable.size); final PriorityQueue<Entry> pq; if (FacetParams.FACET_SORT_COUNT.equals(sort) || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) { pq = new PriorityQueue<Entry>(pqSize) { @Override protected boolean lessThan(Entry a, Entry b) { if (a.count < b.count || (a.count == b.count && a.bits > b.bits)) { return true; } else { return false; } } }; } else { pq = new PriorityQueue<Entry>(pqSize) { @Override protected boolean lessThan(Entry a, Entry b) { return a.bits > b.bits; } }; } Entry e = null; for (int i = 0; i < hashTable.bits.length; ++i) { if (hashTable.counts[i] >= mincount) { if (e == null) { e = new Entry(); } e.bits = hashTable.bits[i]; e.count = hashTable.counts[i]; e.docID = hashTable.docIDs[i]; e = pq.insertWithOverflow(e); } } // 4. build the NamedList final ValueSource vs = ft.getValueSource(sf, null); final NamedList<Integer> result = new NamedList<Integer>(); // This stuff is complicated because if facet.mincount=0, the counts needs // to be merged with terms from the terms dict if (!zeros || FacetParams.FACET_SORT_COUNT.equals(sort) || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) { // Only keep items we're interested in final Deque<Entry> counts = new ArrayDeque<Entry>(); while (pq.size() > offset) { counts.addFirst(pq.pop()); } // Entries from the PQ first, then using the terms dictionary for (Entry entry : counts) { final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves); final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx)); result.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count); } if (zeros && (limit < 0 || result.size() < limit)) { // need to merge with the term dict if (!sf.indexed()) { throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on field " + sf.getName() + " which is not indexed"); } // Add zeros until there are limit results final Set<String> alreadySeen = new HashSet<String>(); while (pq.size() > 0) { Entry entry = pq.pop(); final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves); final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx)); alreadySeen.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase)); } for (int i = 0; i < result.size(); ++i) { alreadySeen.add(result.getName(i)); } final Terms terms = searcher.getAtomicReader().terms(fieldName); if (terms != null) { final String prefixStr = TrieField.getMainValuePrefix(ft); final BytesRef prefix; if (prefixStr != null) { prefix = new BytesRef(prefixStr); } else { prefix = new BytesRef(); } final TermsEnum termsEnum = terms.iterator(null); BytesRef term; switch (termsEnum.seekCeil(prefix)) { case FOUND: case NOT_FOUND: term = termsEnum.term(); break; case END: term = null; break; default: throw new AssertionError(); } final CharsRef spare = new CharsRef(); for (int skipped = hashTable.size; skipped < offset && term != null && StringHelper.startsWith(term, prefix);) { ft.indexedToReadable(term, spare); final String termStr = spare.toString(); if (!alreadySeen.contains(termStr)) { ++skipped; } term = termsEnum.next(); } for (; term != null && StringHelper.startsWith(term, prefix) && (limit < 0 || result.size() < limit); term = termsEnum.next()) { ft.indexedToReadable(term, spare); final String termStr = spare.toString(); if (!alreadySeen.contains(termStr)) { result.add(termStr, 0); } } } } } else { // sort=index, mincount=0 and we have less than limit items // => Merge the PQ and the terms dictionary on the fly if (!sf.indexed()) { throw new IllegalStateException("Cannot use " + FacetParams.FACET_SORT + "=" + FacetParams.FACET_SORT_INDEX + " on a field which is not indexed"); } final Map<String, Integer> counts = new HashMap<String, Integer>(); while (pq.size() > 0) { final Entry entry = pq.pop(); final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves); final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx)); counts.put(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count); } final Terms terms = searcher.getAtomicReader().terms(fieldName); if (terms != null) { final String prefixStr = TrieField.getMainValuePrefix(ft); final BytesRef prefix; if (prefixStr != null) { prefix = new BytesRef(prefixStr); } else { prefix = new BytesRef(); } final TermsEnum termsEnum = terms.iterator(null); BytesRef term; switch (termsEnum.seekCeil(prefix)) { case FOUND: case NOT_FOUND: term = termsEnum.term(); break; case END: term = null; break; default: throw new AssertionError(); } final CharsRef spare = new CharsRef(); for (int i = 0; i < offset && term != null && StringHelper.startsWith(term, prefix); ++i) { term = termsEnum.next(); } for (; term != null && StringHelper.startsWith(term, prefix) && (limit < 0 || result.size() < limit); term = termsEnum.next()) { ft.indexedToReadable(term, spare); final String termStr = spare.toString(); Integer count = counts.get(termStr); if (count == null) { count = 0; } result.add(termStr, count); } } } if (missing) { result.add(null, missingCount); } return result; }
From source file:org.apache.solr.search.facet.FacetFieldProcessorByHashNumeric.java
License:Apache License
private SimpleOrderedMap<Object> calcFacets() throws IOException { final FacetRangeProcessor.Calc calc = FacetRangeProcessor.getNumericCalc(sf); // TODO: it would be really nice to know the number of unique values!!!! int possibleValues = fcontext.base.size(); // size smaller tables so that no resize will be necessary int currHashSize = BitUtil.nextHighestPowerOfTwo((int) (possibleValues * (1 / LongCounts.LOAD_FACTOR) + 1)); currHashSize = Math.min(currHashSize, MAXIMUM_STARTING_TABLE_SIZE); final LongCounts table = new LongCounts(currHashSize) { @Override/* ww w . j av a2 s. c o m*/ protected void rehash() { super.rehash(); doRehash(this); oldToNewMapping = null; // allow for gc } }; int numSlots = currHashSize; int numMissing = 0; if (freq.allBuckets) { allBucketsSlot = numSlots++; } indexOrderAcc = new SlotAcc(fcontext) { @Override public void collect(int doc, int slot) throws IOException { } @Override public int compare(int slotA, int slotB) { long s1 = calc.bitsToSortableBits(table.vals[slotA]); long s2 = calc.bitsToSortableBits(table.vals[slotB]); return Long.compare(s1, s2); } @Override public Object getValue(int slotNum) throws IOException { return null; } @Override public void reset() { } @Override public void resize(Resizer resizer) { } }; countAcc = new CountSlotAcc(fcontext) { @Override public void incrementCount(int slot, int count) { throw new UnsupportedOperationException(); } @Override public int getCount(int slot) { return table.counts[slot]; } @Override public Object getValue(int slotNum) { return getCount(slotNum); } @Override public void reset() { throw new UnsupportedOperationException(); } @Override public void collect(int doc, int slot) throws IOException { throw new UnsupportedOperationException(); } @Override public int compare(int slotA, int slotB) { return Integer.compare(table.counts[slotA], table.counts[slotB]); } @Override public void resize(Resizer resizer) { throw new UnsupportedOperationException(); } }; // we set the countAcc & indexAcc first so generic ones won't be created for us. createCollectAcc(fcontext.base.size(), numSlots); if (freq.allBuckets) { allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0); } NumericDocValues values = null; Bits docsWithField = null; // TODO: factor this code out so it can be shared... final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves(); final Iterator<LeafReaderContext> ctxIt = leaves.iterator(); LeafReaderContext ctx = null; int segBase = 0; int segMax; int adjustedMax = 0; for (DocIterator docsIt = fcontext.base.iterator(); docsIt.hasNext();) { final int doc = docsIt.nextDoc(); if (doc >= adjustedMax) { do { ctx = ctxIt.next(); segBase = ctx.docBase; segMax = ctx.reader().maxDoc(); adjustedMax = segBase + segMax; } while (doc >= adjustedMax); assert doc >= ctx.docBase; setNextReaderFirstPhase(ctx); values = DocValues.getNumeric(ctx.reader(), sf.getName()); docsWithField = DocValues.getDocsWithField(ctx.reader(), sf.getName()); } int segDoc = doc - segBase; long val = values.get(segDoc); if (val != 0 || docsWithField.get(segDoc)) { int slot = table.add(val); // this can trigger a rehash rehash // countAcc.incrementCount(slot, 1); // our countAcc is virtual, so this is not needed collectFirstPhase(segDoc, slot); } } // // collection done, time to find the top slots // int numBuckets = 0; List<Object> bucketVals = null; if (freq.numBuckets && fcontext.isShard()) { bucketVals = new ArrayList<>(100); } int off = fcontext.isShard() ? 0 : (int) freq.offset; // add a modest amount of over-request if this is a shard request int lim = freq.limit >= 0 ? (fcontext.isShard() ? (int) (freq.limit * 1.1 + 4) : (int) freq.limit) : Integer.MAX_VALUE; int maxsize = (int) (freq.limit >= 0 ? freq.offset + lim : Integer.MAX_VALUE - 1); maxsize = Math.min(maxsize, table.cardinality); final int sortMul = freq.sortDirection.getMultiplier(); PriorityQueue<Slot> queue = new PriorityQueue<Slot>(maxsize) { @Override protected boolean lessThan(Slot a, Slot b) { // TODO: sort-by-index-order int cmp = sortAcc.compare(a.slot, b.slot) * sortMul; return cmp == 0 ? (indexOrderAcc.compare(a.slot, b.slot) > 0) : cmp < 0; } }; // TODO: create a countAcc that wrapps the table so we can reuse more code? Slot bottom = null; for (int i = 0; i < table.counts.length; i++) { int count = table.counts[i]; if (count < effectiveMincount) { // either not a valid slot, or count not high enough continue; } numBuckets++; // can be different from the table cardinality if mincount > 1 long val = table.vals[i]; if (bucketVals != null && bucketVals.size() < 100) { bucketVals.add(calc.bitsToValue(val)); } if (bottom == null) { bottom = new Slot(); } bottom.slot = i; bottom = queue.insertWithOverflow(bottom); } SimpleOrderedMap<Object> res = new SimpleOrderedMap<>(); if (freq.numBuckets) { if (!fcontext.isShard()) { res.add("numBuckets", numBuckets); } else { SimpleOrderedMap<Object> map = new SimpleOrderedMap<>(2); map.add("numBuckets", numBuckets); map.add("vals", bucketVals); res.add("numBuckets", map); } } FacetDebugInfo fdebug = fcontext.getDebugInfo(); if (fdebug != null) fdebug.putInfoItem("numBuckets", (long) numBuckets); if (freq.allBuckets) { SimpleOrderedMap<Object> allBuckets = new SimpleOrderedMap<>(); // countAcc.setValues(allBuckets, allBucketsSlot); allBuckets.add("count", table.numAdds); allBucketsAcc.setValues(allBuckets, -1); // allBuckets currently doesn't execute sub-facets (because it doesn't change the domain?) res.add("allBuckets", allBuckets); } if (freq.missing) { // TODO: it would be more efficient to buid up a missing DocSet if we need it here anyway. SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>(); fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null); res.add("missing", missingBucket); } // if we are deep paging, we don't have to order the highest "offset" counts. int collectCount = Math.max(0, queue.size() - off); assert collectCount <= lim; int[] sortedSlots = new int[collectCount]; for (int i = collectCount - 1; i >= 0; i--) { sortedSlots[i] = queue.pop().slot; } ArrayList<SimpleOrderedMap> bucketList = new ArrayList<>(collectCount); res.add("buckets", bucketList); boolean needFilter = deferredAggs != null || freq.getSubFacets().size() > 0; for (int slotNum : sortedSlots) { SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>(); Comparable val = calc.bitsToValue(table.vals[slotNum]); bucket.add("val", val); Query filter = needFilter ? sf.getType().getFieldQuery(null, sf, calc.formatValue(val)) : null; fillBucket(bucket, table.counts[slotNum], slotNum, null, filter); bucketList.add(bucket); } return res; }
From source file:org.apache.solr.search.facet.FacetFieldProcessorNumeric.java
License:Apache License
public SimpleOrderedMap<Object> calcFacets() throws IOException { final FacetRangeProcessor.Calc calc = FacetRangeProcessor.getNumericCalc(sf); // TODO: it would be really nice to know the number of unique values!!!! int possibleValues = fcontext.base.size(); // size smaller tables so that no resize will be necessary int currHashSize = BitUtil.nextHighestPowerOfTwo((int) (possibleValues * (1 / LongCounts.LOAD_FACTOR) + 1)); currHashSize = Math.min(currHashSize, MAXIMUM_STARTING_TABLE_SIZE); final LongCounts table = new LongCounts(currHashSize) { @Override/*w w w .j a v a 2 s . co m*/ protected void rehash() { super.rehash(); doRehash(this); oldToNewMapping = null; // allow for gc } }; int numSlots = currHashSize; int numMissing = 0; if (freq.allBuckets) { allBucketsSlot = numSlots++; } indexOrderAcc = new SlotAcc(fcontext) { @Override public void collect(int doc, int slot) throws IOException { } @Override public int compare(int slotA, int slotB) { long s1 = calc.bitsToSortableBits(table.vals[slotA]); long s2 = calc.bitsToSortableBits(table.vals[slotB]); return Long.compare(s1, s2); } @Override public Object getValue(int slotNum) throws IOException { return null; } @Override public void reset() { } @Override public void resize(Resizer resizer) { } }; countAcc = new CountSlotAcc(fcontext) { @Override public void incrementCount(int slot, int count) { throw new UnsupportedOperationException(); } @Override public int getCount(int slot) { return table.counts[slot]; } @Override public Object getValue(int slotNum) { return getCount(slotNum); } @Override public void reset() { throw new UnsupportedOperationException(); } @Override public void collect(int doc, int slot) throws IOException { throw new UnsupportedOperationException(); } @Override public int compare(int slotA, int slotB) { return Integer.compare(table.counts[slotA], table.counts[slotB]); } @Override public void resize(Resizer resizer) { throw new UnsupportedOperationException(); } }; // we set the countAcc & indexAcc first so generic ones won't be created for us. createCollectAcc(fcontext.base.size(), numSlots); if (freq.allBuckets) { allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0); } NumericDocValues values = null; Bits docsWithField = null; // TODO: factor this code out so it can be shared... final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves(); final Iterator<LeafReaderContext> ctxIt = leaves.iterator(); LeafReaderContext ctx = null; int segBase = 0; int segMax; int adjustedMax = 0; for (DocIterator docsIt = fcontext.base.iterator(); docsIt.hasNext();) { final int doc = docsIt.nextDoc(); if (doc >= adjustedMax) { do { ctx = ctxIt.next(); segBase = ctx.docBase; segMax = ctx.reader().maxDoc(); adjustedMax = segBase + segMax; } while (doc >= adjustedMax); assert doc >= ctx.docBase; setNextReaderFirstPhase(ctx); values = DocValues.getNumeric(ctx.reader(), sf.getName()); docsWithField = DocValues.getDocsWithField(ctx.reader(), sf.getName()); } int segDoc = doc - segBase; long val = values.get(segDoc); if (val != 0 || docsWithField.get(segDoc)) { int slot = table.add(val); // this can trigger a rehash rehash // countAcc.incrementCount(slot, 1); // our countAcc is virtual, so this is not needed collectFirstPhase(segDoc, slot); } } // // collection done, time to find the top slots // int numBuckets = 0; List<Object> bucketVals = null; if (freq.numBuckets && fcontext.isShard()) { bucketVals = new ArrayList(100); } int off = fcontext.isShard() ? 0 : (int) freq.offset; // add a modest amount of over-request if this is a shard request int lim = freq.limit >= 0 ? (fcontext.isShard() ? (int) (freq.limit * 1.1 + 4) : (int) freq.limit) : Integer.MAX_VALUE; int maxsize = (int) (freq.limit >= 0 ? freq.offset + lim : Integer.MAX_VALUE - 1); maxsize = Math.min(maxsize, table.cardinality); final int sortMul = freq.sortDirection.getMultiplier(); PriorityQueue<Slot> queue = new PriorityQueue<Slot>(maxsize) { @Override protected boolean lessThan(Slot a, Slot b) { // TODO: sort-by-index-order int cmp = sortAcc.compare(a.slot, b.slot) * sortMul; return cmp == 0 ? (indexOrderAcc.compare(a.slot, b.slot) > 0) : cmp < 0; } }; // TODO: create a countAcc that wrapps the table so we can reuse more code? Slot bottom = null; for (int i = 0; i < table.counts.length; i++) { int count = table.counts[i]; if (count < effectiveMincount) { // either not a valid slot, or count not high enough continue; } numBuckets++; // can be different from the table cardinality if mincount > 1 long val = table.vals[i]; if (bucketVals != null && bucketVals.size() < 100) { bucketVals.add(calc.bitsToValue(val)); } if (bottom == null) { bottom = new Slot(); } bottom.slot = i; bottom = queue.insertWithOverflow(bottom); } SimpleOrderedMap res = new SimpleOrderedMap(); if (freq.numBuckets) { if (!fcontext.isShard()) { res.add("numBuckets", numBuckets); } else { SimpleOrderedMap map = new SimpleOrderedMap(2); map.add("numBuckets", numBuckets); map.add("vals", bucketVals); res.add("numBuckets", map); } } FacetDebugInfo fdebug = fcontext.getDebugInfo(); if (fdebug != null) fdebug.putInfoItem("numBuckets", new Long(numBuckets)); if (freq.allBuckets) { SimpleOrderedMap<Object> allBuckets = new SimpleOrderedMap<>(); // countAcc.setValues(allBuckets, allBucketsSlot); allBuckets.add("count", table.numAdds); allBucketsAcc.setValues(allBuckets, -1); // allBuckets currently doesn't execute sub-facets (because it doesn't change the domain?) res.add("allBuckets", allBuckets); } if (freq.missing) { // TODO: it would be more efficient to buid up a missing DocSet if we need it here anyway. SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>(); fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null); res.add("missing", missingBucket); } // if we are deep paging, we don't have to order the highest "offset" counts. int collectCount = Math.max(0, queue.size() - off); assert collectCount <= lim; int[] sortedSlots = new int[collectCount]; for (int i = collectCount - 1; i >= 0; i--) { sortedSlots[i] = queue.pop().slot; } ArrayList bucketList = new ArrayList(collectCount); res.add("buckets", bucketList); boolean needFilter = deferredAggs != null || freq.getSubFacets().size() > 0; for (int slotNum : sortedSlots) { SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>(); Comparable val = calc.bitsToValue(table.vals[slotNum]); bucket.add("val", val); Query filter = needFilter ? sf.getType().getFieldQuery(null, sf, calc.formatValue(val)) : null; fillBucket(bucket, table.counts[slotNum], slotNum, null, filter); bucketList.add(bucket); } return res; }