Example usage for org.apache.lucene.util PriorityQueue insertWithOverflow

List of usage examples for org.apache.lucene.util PriorityQueue insertWithOverflow

Introduction

In this page you can find the example usage for org.apache.lucene.util PriorityQueue insertWithOverflow.

Prototype

public T insertWithOverflow(T element) 

Source Link

Document

Adds an Object to a PriorityQueue in log(size) time.

Usage

From source file:com.browseengine.bobo.facets.CombinedFacetAccessible.java

License:Apache License

public List<BrowseFacet> getFacets() {
    if (_closed) {
        throw new IllegalStateException("This instance of count collector was already closed");
    }//from w w w. ja  va2  s  . co m
    int maxCnt = _fspec.getMaxCount();
    if (maxCnt <= 0)
        maxCnt = Integer.MAX_VALUE;
    int minHits = _fspec.getMinHitCount();
    LinkedList<BrowseFacet> list = new LinkedList<BrowseFacet>();

    int cnt = 0;
    Comparable facet = null;
    FacetIterator iter = (FacetIterator) this.iterator();
    Comparator<BrowseFacet> comparator;
    if (FacetSortSpec.OrderValueAsc.equals(_fspec.getOrderBy())) {
        while ((facet = iter.next(minHits)) != null) {
            // find the next facet whose combined hit count obeys minHits
            list.add(new BrowseFacet(String.valueOf(facet), iter.count));
            if (++cnt >= maxCnt)
                break;
        }
    } else if (FacetSortSpec.OrderHitsDesc.equals(_fspec.getOrderBy())) {
        comparator = new Comparator<BrowseFacet>() {
            public int compare(BrowseFacet f1, BrowseFacet f2) {
                int val = f2.getHitCount() - f1.getHitCount();
                if (val == 0) {
                    val = (f1.getValue().compareTo(f2.getValue()));
                }
                return val;
            }
        };
        if (maxCnt != Integer.MAX_VALUE) {
            // we will maintain a min heap of size maxCnt
            // Order by hits in descending order and max count is supplied
            PriorityQueue queue = createPQ(maxCnt, comparator);
            int qsize = 0;
            while ((qsize < maxCnt) && ((facet = iter.next(minHits)) != null)) {
                queue.add(new BrowseFacet(String.valueOf(facet), iter.count));
                qsize++;
            }
            if (facet != null) {
                BrowseFacet rootFacet = (BrowseFacet) queue.top();
                minHits = rootFacet.getHitCount() + 1;
                // facet count less than top of min heap, it will never be added 
                while (((facet = iter.next(minHits)) != null)) {
                    rootFacet.setValue(String.valueOf(facet));
                    rootFacet.setHitCount(iter.count);
                    rootFacet = (BrowseFacet) queue.updateTop();
                    minHits = rootFacet.getHitCount() + 1;
                }
            }
            // at this point, queue contains top maxCnt facets that have hitcount >= minHits
            while (qsize-- > 0) {
                // append each entry to the beginning of the facet list to order facets by hits descending
                list.addFirst((BrowseFacet) queue.pop());
            }
        } else {
            // no maxCnt specified. So fetch all facets according to minHits and sort them later
            while ((facet = iter.next(minHits)) != null)
                list.add(new BrowseFacet(String.valueOf(facet), iter.count));
            Collections.sort(list, comparator);
        }
    } else // FacetSortSpec.OrderByCustom.equals(_fspec.getOrderBy()
    {
        comparator = _fspec.getCustomComparatorFactory().newComparator();
        if (maxCnt != Integer.MAX_VALUE) {
            PriorityQueue queue = createPQ(maxCnt, comparator);
            BrowseFacet browseFacet = new BrowseFacet();
            int qsize = 0;
            while ((qsize < maxCnt) && ((facet = iter.next(minHits)) != null)) {
                queue.add(new BrowseFacet(String.valueOf(facet), iter.count));
                qsize++;
            }
            if (facet != null) {
                while ((facet = iter.next(minHits)) != null) {
                    // check with the top of min heap
                    browseFacet.setHitCount(iter.count);
                    browseFacet.setValue(String.valueOf(facet));
                    browseFacet = (BrowseFacet) queue.insertWithOverflow(browseFacet);
                }
            }
            // remove from queue and add to the list
            while (qsize-- > 0)
                list.addFirst((BrowseFacet) queue.pop());
        } else {
            // order by custom but no max count supplied
            while ((facet = iter.next(minHits)) != null)
                list.add(new BrowseFacet(String.valueOf(facet), iter.count));
            Collections.sort(list, comparator);
        }
    }
    return list;
}

From source file:io.ssc.relationdiscovery.KMeans.java

License:Open Source License

public void printClosestPoints(int centroidIndex, int howMany, OpenIntObjectHashMap<String> patterns) {

    PriorityQueue<PatternWithDistance> queue = new PriorityQueue<PatternWithDistance>(howMany) {
        @Override/*from ww w  .  j  a  va2 s .  c  o  m*/
        protected boolean lessThan(PatternWithDistance a, PatternWithDistance b) {
            return a.distance < b.distance;
        }
    };

    Vector centroid = centroids[centroidIndex];

    for (MatrixSlice rowSlice : A) {
        Vector row = rowSlice.vector();
        double distance = distanceMeasure.distance(centroid, row);
        queue.insertWithOverflow(new PatternWithDistance(distance, patterns.get(rowSlice.index())));
    }

    while (queue.size() > 0) {
        System.out.println("\t" + queue.pop());
    }

}

From source file:org.apache.jackrabbit.core.query.lucene.WeightedHighlighter.java

License:Apache License

@Override
protected String mergeFragments(TermVectorOffsetInfo[] offsets, String text, String excerptStart,
        String excerptEnd, String fragmentStart, String fragmentEnd, String hlStart, String hlEnd,
        int maxFragments, int surround) throws IOException {
    if (offsets == null || offsets.length == 0) {
        // nothing to highlight
        return createDefaultExcerpt(text, excerptStart, excerptEnd, fragmentStart, fragmentEnd, surround * 2);
    }//  ww  w .  j ava 2s.  co  m

    PriorityQueue<FragmentInfo> bestFragments = new FragmentInfoPriorityQueue(maxFragments);
    for (int i = 0; i < offsets.length; i++) {
        if (offsets[i].getEndOffset() <= text.length()) {
            FragmentInfo fi = new FragmentInfo(offsets[i], surround * 2);
            for (int j = i + 1; j < offsets.length; j++) {
                if (offsets[j].getEndOffset() > text.length()) {
                    break;
                }
                if (!fi.add(offsets[j], text)) {
                    break;
                }
            }
            bestFragments.insertWithOverflow(fi);
        }
    }

    if (bestFragments.size() == 0) {
        return createDefaultExcerpt(text, excerptStart, excerptEnd, fragmentStart, fragmentEnd, surround * 2);
    }

    // retrieve fragment infos from queue and fill into list, least
    // fragment comes out first
    List<FragmentInfo> infos = new LinkedList<FragmentInfo>();
    while (bestFragments.size() > 0) {
        FragmentInfo fi = (FragmentInfo) bestFragments.pop();
        infos.add(0, fi);
    }

    Map<TermVectorOffsetInfo, Object> offsetInfos = new IdentityHashMap<TermVectorOffsetInfo, Object>();
    // remove overlapping fragment infos
    Iterator<FragmentInfo> it = infos.iterator();
    while (it.hasNext()) {
        FragmentInfo fi = it.next();
        boolean overlap = false;
        Iterator<TermVectorOffsetInfo> fit = fi.iterator();
        while (fit.hasNext() && !overlap) {
            TermVectorOffsetInfo oi = fit.next();
            if (offsetInfos.containsKey(oi)) {
                overlap = true;
            }
        }
        if (overlap) {
            it.remove();
        } else {
            Iterator<TermVectorOffsetInfo> oit = fi.iterator();
            while (oit.hasNext()) {
                offsetInfos.put(oit.next(), null);
            }
        }
    }

    // create excerpts
    StringBuffer sb = new StringBuffer(excerptStart);
    it = infos.iterator();
    while (it.hasNext()) {
        FragmentInfo fi = it.next();
        sb.append(fragmentStart);
        int limit = Math.max(0, fi.getStartOffset() / 2 + fi.getEndOffset() / 2 - surround);
        int len = startFragment(sb, text, fi.getStartOffset(), limit);
        TermVectorOffsetInfo lastOffsetInfo = null;
        Iterator<TermVectorOffsetInfo> fIt = fi.iterator();
        while (fIt.hasNext()) {
            TermVectorOffsetInfo oi = fIt.next();
            if (lastOffsetInfo != null) {
                // fill in text between terms
                sb.append(escape(text.substring(lastOffsetInfo.getEndOffset(), oi.getStartOffset())));
            }
            sb.append(hlStart);
            sb.append(escape(text.substring(oi.getStartOffset(), oi.getEndOffset())));
            sb.append(hlEnd);
            lastOffsetInfo = oi;
        }
        limit = Math.min(text.length(), fi.getStartOffset() - len + (surround * 2));
        endFragment(sb, text, fi.getEndOffset(), limit);
        sb.append(fragmentEnd);
    }
    sb.append(excerptEnd);
    return sb.toString();
}

From source file:org.apache.mahout.cf.taste.hadoop.item.UserVectorSplitterMapper.java

License:Apache License

private float findSmallestLargeValue(Vector userVector) {

    PriorityQueue<Float> topPrefValues = new PriorityQueue<Float>(maxPrefsPerUserConsidered) {
        @Override/*w w  w . j  a  va  2  s  .  c  om*/
        protected boolean lessThan(Float f1, Float f2) {
            return f1 < f2;
        }
    };

    for (Element e : userVector.nonZeroes()) {
        float absValue = Math.abs((float) e.get());
        topPrefValues.insertWithOverflow(absValue);
    }
    return topPrefValues.top();
}

From source file:org.apache.mahout.math.neighborhood.LocalitySensitiveHashSearch.java

License:Apache License

private PriorityQueue<WeightedThing<Vector>> searchInternal(Vector query) {
    long queryHash = HashedVector.computeHash64(query, projection);

    // We keep an approximation of the closest vectors here.
    PriorityQueue<WeightedThing<Vector>> top = Searcher.getCandidateQueue(getSearchSize());

    // We scan the vectors using bit counts as an approximation of the dot product so we can do as few
    // full distance computations as possible.  Our goal is to only do full distance computations for
    // vectors with hash distance at most as large as the searchSize biggest hash distance seen so far.

    OnlineSummarizer[] distribution = new OnlineSummarizer[BITS + 1];
    for (int i = 0; i < BITS + 1; i++) {
        distribution[i] = new OnlineSummarizer();
    }//  w  w  w .j  a v a 2s .  c o m

    distanceEvaluations = 0;

    // We keep the counts of the hash distances here.  This lets us accurately
    // judge what hash distance cutoff we should use.
    int[] hashCounts = new int[BITS + 1];

    // Maximum number of different bits to still consider a vector a candidate for nearest neighbor.
    // Starts at the maximum number of bits, but decreases and can increase.
    int hashLimit = BITS;
    int limitCount = 0;
    double distanceLimit = Double.POSITIVE_INFINITY;

    // In this loop, we have the invariants that:
    //
    // limitCount = sum_{i<hashLimit} hashCount[i]
    // and
    // limitCount >= searchSize && limitCount - hashCount[hashLimit-1] < searchSize
    for (HashedVector vector : trainingVectors) {
        // This computes the Hamming Distance between the vector's hash and the query's hash.
        // The result is correlated with the angle between the vectors.
        int bitDot = vector.hammingDistance(queryHash);
        if (bitDot <= hashLimit) {
            distanceEvaluations++;

            double distance = distanceMeasure.distance(query, vector);
            distribution[bitDot].add(distance);

            if (distance < distanceLimit) {
                top.insertWithOverflow(new WeightedThing<Vector>(vector, distance));
                if (top.size() == searchSize) {
                    distanceLimit = top.top().getWeight();
                }

                hashCounts[bitDot]++;
                limitCount++;
                while (hashLimit > 0 && limitCount - hashCounts[hashLimit - 1] > searchSize) {
                    hashLimit--;
                    limitCount -= hashCounts[hashLimit];
                }

                if (hashLimitStrategy >= 0) {
                    while (hashLimit < MAX_HASH_LIMIT
                            && distribution[hashLimit].getCount() > MIN_DISTRIBUTION_COUNT
                            && ((1 - hashLimitStrategy) * distribution[hashLimit].getQuartile(0)
                                    + hashLimitStrategy
                                            * distribution[hashLimit].getQuartile(1)) < distanceLimit) {
                        limitCount += hashCounts[hashLimit];
                        hashLimit++;
                    }
                }
            }
        }
    }
    return top;
}

From source file:org.apache.mahout.utils.vectors.VectorHelper.java

License:Apache License

public static List<Pair<Integer, Double>> topEntries(Vector vector, int maxEntries) {

    // Get the size of nonZero elements in the input vector
    int sizeOfNonZeroElementsInVector = Iterables.size(vector.nonZeroes());

    // If the sizeOfNonZeroElementsInVector < maxEntries then set maxEntries = sizeOfNonZeroElementsInVector
    // otherwise the call to queue.pop() returns a Pair(null, null) and the subsequent call
    // to pair.getFirst() throws a NullPointerException
    if (sizeOfNonZeroElementsInVector < maxEntries) {
        maxEntries = sizeOfNonZeroElementsInVector;
    }/*from   ww  w  . ja va 2  s . c o m*/

    PriorityQueue<Pair<Integer, Double>> queue = new TDoublePQ<Integer>(-1, maxEntries);
    for (Element e : vector.nonZeroes()) {
        queue.insertWithOverflow(Pair.of(e.index(), e.get()));
    }
    List<Pair<Integer, Double>> entries = Lists.newArrayList();
    Pair<Integer, Double> pair;
    while ((pair = queue.pop()) != null) {
        if (pair.getFirst() > -1) {
            entries.add(pair);
        }
    }
    Collections.sort(entries, new Comparator<Pair<Integer, Double>>() {
        @Override
        public int compare(Pair<Integer, Double> a, Pair<Integer, Double> b) {
            return b.getSecond().compareTo(a.getSecond());
        }
    });
    return entries;
}

From source file:org.apache.solr.cloud.SizeLimitedDistributedMap.java

License:Apache License

@Override
public void put(String trackingId, byte[] data) throws KeeperException, InterruptedException {
    if (this.size() >= maxSize) {
        // Bring down the size
        List<String> children = zookeeper.getChildren(dir, null, true);

        int cleanupSize = maxSize / 10;

        final PriorityQueue priorityQueue = new PriorityQueue<Long>(cleanupSize) {
            @Override//w ww.  j  a  va2 s.  c om
            protected boolean lessThan(Long a, Long b) {
                return (a > b);
            }
        };

        for (String child : children) {
            Stat stat = zookeeper.exists(dir + "/" + child, null, true);
            priorityQueue.insertWithOverflow(stat.getMzxid());
        }

        long topElementMzxId = (Long) priorityQueue.top();

        for (String child : children) {
            Stat stat = zookeeper.exists(dir + "/" + child, null, true);
            if (stat.getMzxid() <= topElementMzxId)
                zookeeper.delete(dir + "/" + child, -1, true);
        }
    }

    super.put(trackingId, data);
}

From source file:org.apache.solr.request.NumericFacets.java

License:Apache License

public static NamedList<Integer> getCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName,
        int offset, int limit, int mincount, boolean missing, String sort) throws IOException {
    final boolean zeros = mincount <= 0;
    mincount = Math.max(mincount, 1);
    final SchemaField sf = searcher.getSchema().getField(fieldName);
    final FieldType ft = sf.getType();
    final NumericType numericType = ft.getNumericType();
    if (numericType == null) {
        throw new IllegalStateException();
    }/* ww  w.  j av  a  2  s  .c o  m*/
    final List<AtomicReaderContext> leaves = searcher.getIndexReader().leaves();

    // 1. accumulate
    final HashTable hashTable = new HashTable();
    final Iterator<AtomicReaderContext> ctxIt = leaves.iterator();
    AtomicReaderContext ctx = null;
    FieldCache.Longs longs = null;
    Bits docsWithField = null;
    int missingCount = 0;
    for (DocIterator docsIt = docs.iterator(); docsIt.hasNext();) {
        final int doc = docsIt.nextDoc();
        if (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc()) {
            do {
                ctx = ctxIt.next();
            } while (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc());
            assert doc >= ctx.docBase;
            switch (numericType) {
            case LONG:
                longs = FieldCache.DEFAULT.getLongs(ctx.reader(), fieldName, true);
                break;
            case INT:
                final FieldCache.Ints ints = FieldCache.DEFAULT.getInts(ctx.reader(), fieldName, true);
                longs = new FieldCache.Longs() {
                    @Override
                    public long get(int docID) {
                        return ints.get(docID);
                    }
                };
                break;
            case FLOAT:
                final FieldCache.Floats floats = FieldCache.DEFAULT.getFloats(ctx.reader(), fieldName, true);
                longs = new FieldCache.Longs() {
                    @Override
                    public long get(int docID) {
                        return NumericUtils.floatToSortableInt(floats.get(docID));
                    }
                };
                break;
            case DOUBLE:
                final FieldCache.Doubles doubles = FieldCache.DEFAULT.getDoubles(ctx.reader(), fieldName, true);
                longs = new FieldCache.Longs() {
                    @Override
                    public long get(int docID) {
                        return NumericUtils.doubleToSortableLong(doubles.get(docID));
                    }
                };
                break;
            default:
                throw new AssertionError();
            }
            docsWithField = FieldCache.DEFAULT.getDocsWithField(ctx.reader(), fieldName);
        }
        long v = longs.get(doc - ctx.docBase);
        if (v != 0 || docsWithField.get(doc - ctx.docBase)) {
            hashTable.add(doc, v, 1);
        } else {
            ++missingCount;
        }
    }

    // 2. select top-k facet values
    final int pqSize = limit < 0 ? hashTable.size : Math.min(offset + limit, hashTable.size);
    final PriorityQueue<Entry> pq;
    if (FacetParams.FACET_SORT_COUNT.equals(sort) || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) {
        pq = new PriorityQueue<Entry>(pqSize) {
            @Override
            protected boolean lessThan(Entry a, Entry b) {
                if (a.count < b.count || (a.count == b.count && a.bits > b.bits)) {
                    return true;
                } else {
                    return false;
                }
            }
        };
    } else {
        pq = new PriorityQueue<Entry>(pqSize) {
            @Override
            protected boolean lessThan(Entry a, Entry b) {
                return a.bits > b.bits;
            }
        };
    }
    Entry e = null;
    for (int i = 0; i < hashTable.bits.length; ++i) {
        if (hashTable.counts[i] >= mincount) {
            if (e == null) {
                e = new Entry();
            }
            e.bits = hashTable.bits[i];
            e.count = hashTable.counts[i];
            e.docID = hashTable.docIDs[i];
            e = pq.insertWithOverflow(e);
        }
    }

    // 4. build the NamedList
    final ValueSource vs = ft.getValueSource(sf, null);
    final NamedList<Integer> result = new NamedList<Integer>();

    // This stuff is complicated because if facet.mincount=0, the counts needs
    // to be merged with terms from the terms dict
    if (!zeros || FacetParams.FACET_SORT_COUNT.equals(sort)
            || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) {
        // Only keep items we're interested in
        final Deque<Entry> counts = new ArrayDeque<Entry>();
        while (pq.size() > offset) {
            counts.addFirst(pq.pop());
        }

        // Entries from the PQ first, then using the terms dictionary
        for (Entry entry : counts) {
            final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
            final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
            result.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count);
        }

        if (zeros && (limit < 0 || result.size() < limit)) { // need to merge with the term dict
            if (!sf.indexed()) {
                throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on field "
                        + sf.getName() + " which is not indexed");
            }
            // Add zeros until there are limit results
            final Set<String> alreadySeen = new HashSet<String>();
            while (pq.size() > 0) {
                Entry entry = pq.pop();
                final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
                final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
                alreadySeen.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase));
            }
            for (int i = 0; i < result.size(); ++i) {
                alreadySeen.add(result.getName(i));
            }
            final Terms terms = searcher.getAtomicReader().terms(fieldName);
            if (terms != null) {
                final String prefixStr = TrieField.getMainValuePrefix(ft);
                final BytesRef prefix;
                if (prefixStr != null) {
                    prefix = new BytesRef(prefixStr);
                } else {
                    prefix = new BytesRef();
                }
                final TermsEnum termsEnum = terms.iterator(null);
                BytesRef term;
                switch (termsEnum.seekCeil(prefix)) {
                case FOUND:
                case NOT_FOUND:
                    term = termsEnum.term();
                    break;
                case END:
                    term = null;
                    break;
                default:
                    throw new AssertionError();
                }
                final CharsRef spare = new CharsRef();
                for (int skipped = hashTable.size; skipped < offset && term != null
                        && StringHelper.startsWith(term, prefix);) {
                    ft.indexedToReadable(term, spare);
                    final String termStr = spare.toString();
                    if (!alreadySeen.contains(termStr)) {
                        ++skipped;
                    }
                    term = termsEnum.next();
                }
                for (; term != null && StringHelper.startsWith(term, prefix)
                        && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
                    ft.indexedToReadable(term, spare);
                    final String termStr = spare.toString();
                    if (!alreadySeen.contains(termStr)) {
                        result.add(termStr, 0);
                    }
                }
            }
        }
    } else {
        // sort=index, mincount=0 and we have less than limit items
        // => Merge the PQ and the terms dictionary on the fly
        if (!sf.indexed()) {
            throw new IllegalStateException("Cannot use " + FacetParams.FACET_SORT + "="
                    + FacetParams.FACET_SORT_INDEX + " on a field which is not indexed");
        }
        final Map<String, Integer> counts = new HashMap<String, Integer>();
        while (pq.size() > 0) {
            final Entry entry = pq.pop();
            final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
            final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
            counts.put(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count);
        }
        final Terms terms = searcher.getAtomicReader().terms(fieldName);
        if (terms != null) {
            final String prefixStr = TrieField.getMainValuePrefix(ft);
            final BytesRef prefix;
            if (prefixStr != null) {
                prefix = new BytesRef(prefixStr);
            } else {
                prefix = new BytesRef();
            }
            final TermsEnum termsEnum = terms.iterator(null);
            BytesRef term;
            switch (termsEnum.seekCeil(prefix)) {
            case FOUND:
            case NOT_FOUND:
                term = termsEnum.term();
                break;
            case END:
                term = null;
                break;
            default:
                throw new AssertionError();
            }
            final CharsRef spare = new CharsRef();
            for (int i = 0; i < offset && term != null && StringHelper.startsWith(term, prefix); ++i) {
                term = termsEnum.next();
            }
            for (; term != null && StringHelper.startsWith(term, prefix)
                    && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
                ft.indexedToReadable(term, spare);
                final String termStr = spare.toString();
                Integer count = counts.get(termStr);
                if (count == null) {
                    count = 0;
                }
                result.add(termStr, count);
            }
        }
    }

    if (missing) {
        result.add(null, missingCount);
    }
    return result;
}

From source file:org.apache.solr.search.facet.FacetFieldProcessorByHashNumeric.java

License:Apache License

private SimpleOrderedMap<Object> calcFacets() throws IOException {

    final FacetRangeProcessor.Calc calc = FacetRangeProcessor.getNumericCalc(sf);

    // TODO: it would be really nice to know the number of unique values!!!!

    int possibleValues = fcontext.base.size();
    // size smaller tables so that no resize will be necessary
    int currHashSize = BitUtil.nextHighestPowerOfTwo((int) (possibleValues * (1 / LongCounts.LOAD_FACTOR) + 1));
    currHashSize = Math.min(currHashSize, MAXIMUM_STARTING_TABLE_SIZE);
    final LongCounts table = new LongCounts(currHashSize) {
        @Override/*  ww w  . j av a2  s. c o m*/
        protected void rehash() {
            super.rehash();
            doRehash(this);
            oldToNewMapping = null; // allow for gc
        }
    };

    int numSlots = currHashSize;

    int numMissing = 0;

    if (freq.allBuckets) {
        allBucketsSlot = numSlots++;
    }

    indexOrderAcc = new SlotAcc(fcontext) {
        @Override
        public void collect(int doc, int slot) throws IOException {
        }

        @Override
        public int compare(int slotA, int slotB) {
            long s1 = calc.bitsToSortableBits(table.vals[slotA]);
            long s2 = calc.bitsToSortableBits(table.vals[slotB]);
            return Long.compare(s1, s2);
        }

        @Override
        public Object getValue(int slotNum) throws IOException {
            return null;
        }

        @Override
        public void reset() {
        }

        @Override
        public void resize(Resizer resizer) {
        }
    };

    countAcc = new CountSlotAcc(fcontext) {
        @Override
        public void incrementCount(int slot, int count) {
            throw new UnsupportedOperationException();
        }

        @Override
        public int getCount(int slot) {
            return table.counts[slot];
        }

        @Override
        public Object getValue(int slotNum) {
            return getCount(slotNum);
        }

        @Override
        public void reset() {
            throw new UnsupportedOperationException();
        }

        @Override
        public void collect(int doc, int slot) throws IOException {
            throw new UnsupportedOperationException();
        }

        @Override
        public int compare(int slotA, int slotB) {
            return Integer.compare(table.counts[slotA], table.counts[slotB]);
        }

        @Override
        public void resize(Resizer resizer) {
            throw new UnsupportedOperationException();
        }
    };

    // we set the countAcc & indexAcc first so generic ones won't be created for us.
    createCollectAcc(fcontext.base.size(), numSlots);

    if (freq.allBuckets) {
        allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0);
    }

    NumericDocValues values = null;
    Bits docsWithField = null;

    // TODO: factor this code out so it can be shared...
    final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
    final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
    LeafReaderContext ctx = null;
    int segBase = 0;
    int segMax;
    int adjustedMax = 0;
    for (DocIterator docsIt = fcontext.base.iterator(); docsIt.hasNext();) {
        final int doc = docsIt.nextDoc();
        if (doc >= adjustedMax) {
            do {
                ctx = ctxIt.next();
                segBase = ctx.docBase;
                segMax = ctx.reader().maxDoc();
                adjustedMax = segBase + segMax;
            } while (doc >= adjustedMax);
            assert doc >= ctx.docBase;
            setNextReaderFirstPhase(ctx);

            values = DocValues.getNumeric(ctx.reader(), sf.getName());
            docsWithField = DocValues.getDocsWithField(ctx.reader(), sf.getName());
        }

        int segDoc = doc - segBase;
        long val = values.get(segDoc);
        if (val != 0 || docsWithField.get(segDoc)) {
            int slot = table.add(val); // this can trigger a rehash rehash

            // countAcc.incrementCount(slot, 1);
            // our countAcc is virtual, so this is not needed

            collectFirstPhase(segDoc, slot);
        }
    }

    //
    // collection done, time to find the top slots
    //

    int numBuckets = 0;
    List<Object> bucketVals = null;
    if (freq.numBuckets && fcontext.isShard()) {
        bucketVals = new ArrayList<>(100);
    }

    int off = fcontext.isShard() ? 0 : (int) freq.offset;
    // add a modest amount of over-request if this is a shard request
    int lim = freq.limit >= 0 ? (fcontext.isShard() ? (int) (freq.limit * 1.1 + 4) : (int) freq.limit)
            : Integer.MAX_VALUE;

    int maxsize = (int) (freq.limit >= 0 ? freq.offset + lim : Integer.MAX_VALUE - 1);
    maxsize = Math.min(maxsize, table.cardinality);

    final int sortMul = freq.sortDirection.getMultiplier();

    PriorityQueue<Slot> queue = new PriorityQueue<Slot>(maxsize) {
        @Override
        protected boolean lessThan(Slot a, Slot b) {
            // TODO: sort-by-index-order
            int cmp = sortAcc.compare(a.slot, b.slot) * sortMul;
            return cmp == 0 ? (indexOrderAcc.compare(a.slot, b.slot) > 0) : cmp < 0;
        }
    };

    // TODO: create a countAcc that wrapps the table so we can reuse more code?

    Slot bottom = null;
    for (int i = 0; i < table.counts.length; i++) {
        int count = table.counts[i];
        if (count < effectiveMincount) {
            // either not a valid slot, or count not high enough
            continue;
        }
        numBuckets++; // can be different from the table cardinality if mincount > 1

        long val = table.vals[i];
        if (bucketVals != null && bucketVals.size() < 100) {
            bucketVals.add(calc.bitsToValue(val));
        }

        if (bottom == null) {
            bottom = new Slot();
        }
        bottom.slot = i;

        bottom = queue.insertWithOverflow(bottom);
    }

    SimpleOrderedMap<Object> res = new SimpleOrderedMap<>();
    if (freq.numBuckets) {
        if (!fcontext.isShard()) {
            res.add("numBuckets", numBuckets);
        } else {
            SimpleOrderedMap<Object> map = new SimpleOrderedMap<>(2);
            map.add("numBuckets", numBuckets);
            map.add("vals", bucketVals);
            res.add("numBuckets", map);
        }
    }

    FacetDebugInfo fdebug = fcontext.getDebugInfo();
    if (fdebug != null)
        fdebug.putInfoItem("numBuckets", (long) numBuckets);

    if (freq.allBuckets) {
        SimpleOrderedMap<Object> allBuckets = new SimpleOrderedMap<>();
        // countAcc.setValues(allBuckets, allBucketsSlot);
        allBuckets.add("count", table.numAdds);
        allBucketsAcc.setValues(allBuckets, -1);
        // allBuckets currently doesn't execute sub-facets (because it doesn't change the domain?)
        res.add("allBuckets", allBuckets);
    }

    if (freq.missing) {
        // TODO: it would be more efficient to buid up a missing DocSet if we need it here anyway.

        SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
        fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null);
        res.add("missing", missingBucket);
    }

    // if we are deep paging, we don't have to order the highest "offset" counts.
    int collectCount = Math.max(0, queue.size() - off);
    assert collectCount <= lim;
    int[] sortedSlots = new int[collectCount];
    for (int i = collectCount - 1; i >= 0; i--) {
        sortedSlots[i] = queue.pop().slot;
    }

    ArrayList<SimpleOrderedMap> bucketList = new ArrayList<>(collectCount);
    res.add("buckets", bucketList);

    boolean needFilter = deferredAggs != null || freq.getSubFacets().size() > 0;

    for (int slotNum : sortedSlots) {
        SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
        Comparable val = calc.bitsToValue(table.vals[slotNum]);
        bucket.add("val", val);

        Query filter = needFilter ? sf.getType().getFieldQuery(null, sf, calc.formatValue(val)) : null;

        fillBucket(bucket, table.counts[slotNum], slotNum, null, filter);

        bucketList.add(bucket);
    }

    return res;
}

From source file:org.apache.solr.search.facet.FacetFieldProcessorNumeric.java

License:Apache License

public SimpleOrderedMap<Object> calcFacets() throws IOException {

    final FacetRangeProcessor.Calc calc = FacetRangeProcessor.getNumericCalc(sf);

    // TODO: it would be really nice to know the number of unique values!!!!

    int possibleValues = fcontext.base.size();
    // size smaller tables so that no resize will be necessary
    int currHashSize = BitUtil.nextHighestPowerOfTwo((int) (possibleValues * (1 / LongCounts.LOAD_FACTOR) + 1));
    currHashSize = Math.min(currHashSize, MAXIMUM_STARTING_TABLE_SIZE);
    final LongCounts table = new LongCounts(currHashSize) {
        @Override/*w w w  .j a  v  a  2  s  . co m*/
        protected void rehash() {
            super.rehash();
            doRehash(this);
            oldToNewMapping = null; // allow for gc
        }
    };

    int numSlots = currHashSize;

    int numMissing = 0;

    if (freq.allBuckets) {
        allBucketsSlot = numSlots++;
    }

    indexOrderAcc = new SlotAcc(fcontext) {
        @Override
        public void collect(int doc, int slot) throws IOException {
        }

        @Override
        public int compare(int slotA, int slotB) {
            long s1 = calc.bitsToSortableBits(table.vals[slotA]);
            long s2 = calc.bitsToSortableBits(table.vals[slotB]);
            return Long.compare(s1, s2);
        }

        @Override
        public Object getValue(int slotNum) throws IOException {
            return null;
        }

        @Override
        public void reset() {
        }

        @Override
        public void resize(Resizer resizer) {
        }
    };

    countAcc = new CountSlotAcc(fcontext) {
        @Override
        public void incrementCount(int slot, int count) {
            throw new UnsupportedOperationException();
        }

        @Override
        public int getCount(int slot) {
            return table.counts[slot];
        }

        @Override
        public Object getValue(int slotNum) {
            return getCount(slotNum);
        }

        @Override
        public void reset() {
            throw new UnsupportedOperationException();
        }

        @Override
        public void collect(int doc, int slot) throws IOException {
            throw new UnsupportedOperationException();
        }

        @Override
        public int compare(int slotA, int slotB) {
            return Integer.compare(table.counts[slotA], table.counts[slotB]);
        }

        @Override
        public void resize(Resizer resizer) {
            throw new UnsupportedOperationException();
        }
    };

    // we set the countAcc & indexAcc first so generic ones won't be created for us.
    createCollectAcc(fcontext.base.size(), numSlots);

    if (freq.allBuckets) {
        allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0);
    }

    NumericDocValues values = null;
    Bits docsWithField = null;

    // TODO: factor this code out so it can be shared...
    final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
    final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
    LeafReaderContext ctx = null;
    int segBase = 0;
    int segMax;
    int adjustedMax = 0;
    for (DocIterator docsIt = fcontext.base.iterator(); docsIt.hasNext();) {
        final int doc = docsIt.nextDoc();
        if (doc >= adjustedMax) {
            do {
                ctx = ctxIt.next();
                segBase = ctx.docBase;
                segMax = ctx.reader().maxDoc();
                adjustedMax = segBase + segMax;
            } while (doc >= adjustedMax);
            assert doc >= ctx.docBase;
            setNextReaderFirstPhase(ctx);

            values = DocValues.getNumeric(ctx.reader(), sf.getName());
            docsWithField = DocValues.getDocsWithField(ctx.reader(), sf.getName());
        }

        int segDoc = doc - segBase;
        long val = values.get(segDoc);
        if (val != 0 || docsWithField.get(segDoc)) {
            int slot = table.add(val); // this can trigger a rehash rehash

            // countAcc.incrementCount(slot, 1);
            // our countAcc is virtual, so this is not needed

            collectFirstPhase(segDoc, slot);
        }
    }

    //
    // collection done, time to find the top slots
    //

    int numBuckets = 0;
    List<Object> bucketVals = null;
    if (freq.numBuckets && fcontext.isShard()) {
        bucketVals = new ArrayList(100);
    }

    int off = fcontext.isShard() ? 0 : (int) freq.offset;
    // add a modest amount of over-request if this is a shard request
    int lim = freq.limit >= 0 ? (fcontext.isShard() ? (int) (freq.limit * 1.1 + 4) : (int) freq.limit)
            : Integer.MAX_VALUE;

    int maxsize = (int) (freq.limit >= 0 ? freq.offset + lim : Integer.MAX_VALUE - 1);
    maxsize = Math.min(maxsize, table.cardinality);

    final int sortMul = freq.sortDirection.getMultiplier();

    PriorityQueue<Slot> queue = new PriorityQueue<Slot>(maxsize) {
        @Override
        protected boolean lessThan(Slot a, Slot b) {
            // TODO: sort-by-index-order
            int cmp = sortAcc.compare(a.slot, b.slot) * sortMul;
            return cmp == 0 ? (indexOrderAcc.compare(a.slot, b.slot) > 0) : cmp < 0;
        }
    };

    // TODO: create a countAcc that wrapps the table so we can reuse more code?

    Slot bottom = null;
    for (int i = 0; i < table.counts.length; i++) {
        int count = table.counts[i];
        if (count < effectiveMincount) {
            // either not a valid slot, or count not high enough
            continue;
        }
        numBuckets++; // can be different from the table cardinality if mincount > 1

        long val = table.vals[i];
        if (bucketVals != null && bucketVals.size() < 100) {
            bucketVals.add(calc.bitsToValue(val));
        }

        if (bottom == null) {
            bottom = new Slot();
        }
        bottom.slot = i;

        bottom = queue.insertWithOverflow(bottom);
    }

    SimpleOrderedMap res = new SimpleOrderedMap();
    if (freq.numBuckets) {
        if (!fcontext.isShard()) {
            res.add("numBuckets", numBuckets);
        } else {
            SimpleOrderedMap map = new SimpleOrderedMap(2);
            map.add("numBuckets", numBuckets);
            map.add("vals", bucketVals);
            res.add("numBuckets", map);
        }
    }

    FacetDebugInfo fdebug = fcontext.getDebugInfo();
    if (fdebug != null)
        fdebug.putInfoItem("numBuckets", new Long(numBuckets));

    if (freq.allBuckets) {
        SimpleOrderedMap<Object> allBuckets = new SimpleOrderedMap<>();
        // countAcc.setValues(allBuckets, allBucketsSlot);
        allBuckets.add("count", table.numAdds);
        allBucketsAcc.setValues(allBuckets, -1);
        // allBuckets currently doesn't execute sub-facets (because it doesn't change the domain?)
        res.add("allBuckets", allBuckets);
    }

    if (freq.missing) {
        // TODO: it would be more efficient to buid up a missing DocSet if we need it here anyway.

        SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
        fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null);
        res.add("missing", missingBucket);
    }

    // if we are deep paging, we don't have to order the highest "offset" counts.
    int collectCount = Math.max(0, queue.size() - off);
    assert collectCount <= lim;
    int[] sortedSlots = new int[collectCount];
    for (int i = collectCount - 1; i >= 0; i--) {
        sortedSlots[i] = queue.pop().slot;
    }

    ArrayList bucketList = new ArrayList(collectCount);
    res.add("buckets", bucketList);

    boolean needFilter = deferredAggs != null || freq.getSubFacets().size() > 0;

    for (int slotNum : sortedSlots) {
        SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
        Comparable val = calc.bitsToValue(table.vals[slotNum]);
        bucket.add("val", val);

        Query filter = needFilter ? sf.getType().getFieldQuery(null, sf, calc.formatValue(val)) : null;

        fillBucket(bucket, table.counts[slotNum], slotNum, null, filter);

        bucketList.add(bucket);
    }

    return res;
}