Example usage for org.apache.lucene.util PriorityQueue size

List of usage examples for org.apache.lucene.util PriorityQueue size

Introduction

In this page you can find the example usage for org.apache.lucene.util PriorityQueue size.

Prototype

int size

To view the source code for org.apache.lucene.util PriorityQueue size.

Click Source Link

Usage

From source file:io.ssc.relationdiscovery.KMeans.java

License:Open Source License

public void printClosestPoints(int centroidIndex, int howMany, OpenIntObjectHashMap<String> patterns) {

    PriorityQueue<PatternWithDistance> queue = new PriorityQueue<PatternWithDistance>(howMany) {
        @Override//from   w w  w . j  av a2  s  .c  o  m
        protected boolean lessThan(PatternWithDistance a, PatternWithDistance b) {
            return a.distance < b.distance;
        }
    };

    Vector centroid = centroids[centroidIndex];

    for (MatrixSlice rowSlice : A) {
        Vector row = rowSlice.vector();
        double distance = distanceMeasure.distance(centroid, row);
        queue.insertWithOverflow(new PatternWithDistance(distance, patterns.get(rowSlice.index())));
    }

    while (queue.size() > 0) {
        System.out.println("\t" + queue.pop());
    }

}

From source file:net.dataninja.ee.textEngine.facet.GroupCounts.java

License:Open Source License

/** Construct the array of doc hits for the hit group. */
private void buildDocHits(int group, ResultGroup resultGroup) {
    PriorityQueue queue = hitQueue[group];
    int nFound = queue.size();
    DocHitImpl[] hitArray = new DocHitImpl[nFound];
    for (int i = 0; i < nFound; i++) {
        int index = nFound - i - 1;
        hitArray[index] = (DocHitImpl) queue.pop();
    }/*from  ww w  .  ja v a 2 s  . co m*/

    int start = startDoc[group];
    int max = maxDocs[group];

    int nHits = Math.max(0, Math.min(nFound - start, max));
    resultGroup.docHits = new DocHit[nHits];

    resultGroup.totalDocs = nDocHits(group);
    resultGroup.startDoc = start;
    resultGroup.endDoc = start + nHits;

    for (int i = startDoc[group]; i < nFound; i++)
        resultGroup.docHits[i - start] = hitArray[i];
}

From source file:net.dataninja.ee.textEngine.MoreLikeThisQuery.java

License:Apache License

/**
 * Create the More like query from a PriorityQueue
 *///  w w  w  .  ja va2  s. c  o m
private Query createQuery(IndexReader indexReader, PriorityQueue q) throws IOException {
    // Pop everything from the queue.
    QueryWord[] queryWords = new QueryWord[q.size()];
    for (int i = q.size() - 1; i >= 0; i--)
        queryWords[i] = (QueryWord) q.pop();

    BooleanQuery query = new BooleanQuery(true /*disable coord*/);

    // At the moment, there's no need to scale by the best score. It simply
    // clouds the query explanation. It doesn't affect the scores, since
    // Lucene applies a query normalization factor anyway.
    //
    //float bestScore = (queryWords.length > 0) ? queryWords[0].score : 0.0f;
    for (int i = 0; i < fieldNames.length; i++) {
        ArrayList fieldClauses = new ArrayList();

        for (int j = 0; j < queryWords.length; j++) {
            QueryWord qw = queryWords[j];
            Term term = new Term(fieldNames[i], qw.word);

            // Skip words not present in this field.
            int docFreq = indexReader.docFreq(term);
            if (docFreq == 0)
                continue;

            // Add it to the query.
            SpanTermQuery tq = new SpanTermQuery(term);
            if (boost)
                tq.setBoost(qw.score);
            fieldClauses.add(tq);
        } // for j

        // If no terms for this field, skip it.
        if (fieldClauses.isEmpty())
            continue;

        SpanQuery[] clauses = (SpanQuery[]) fieldClauses.toArray(new SpanQuery[fieldClauses.size()]);

        // Now make a special Or-Near query out of the clauses.
        SpanOrNearQuery fieldQuery = new SpanOrNearQuery(clauses, 10, false);

        // Boost if necessary.
        if (fieldBoosts != null)
            fieldQuery.setBoost(fieldBoosts[i]);

        // We currently don't support more-like-this queries on the full text.
        // It would involve de-chunking, and also fancier logic to pick the
        // "most interesting" terms in the first place.
        //
        if (fieldNames[i].equals("text"))
            throw new RuntimeException("MoreLikeThisQuery does not support 'text' field.");

        // And add to the main query.
        query.add(fieldQuery, BooleanClause.Occur.SHOULD);
    } // for i

    // All done.
    return query;
}

From source file:org.apache.jackrabbit.core.query.lucene.WeightedHighlighter.java

License:Apache License

protected String mergeFragments(TermVectorOffsetInfo[] offsets, String text, String excerptStart,
        String excerptEnd, String fragmentStart, String fragmentEnd, String hlStart, String hlEnd,
        int maxFragments, int surround) {

    if (offsets == null || offsets.length == 0) {
        // nothing to highlight
        StringBuffer excerpt = new StringBuffer(excerptStart);
        excerpt.append(fragmentStart);// w  w w .  ja  v a2 s.co  m
        int min = excerpt.length();
        excerpt.append(text.substring(0, Math.min(text.length(), surround * 2)));
        if (text.length() > excerpt.length()) {
            for (int i = excerpt.length() - 1; i > min; i--) {
                if (Character.isWhitespace(excerpt.charAt(i))) {
                    excerpt.delete(i, excerpt.length());
                    excerpt.append(" ...");
                    break;
                }
            }
        }
        excerpt.append(fragmentEnd).append(excerptEnd);
        return excerpt.toString();
    }

    PriorityQueue bestFragments = new FragmentInfoPriorityQueue(maxFragments);
    for (int i = 0; i < offsets.length; i++) {
        FragmentInfo fi = new FragmentInfo(offsets[i], surround * 2);
        for (int j = i + 1; j < offsets.length; j++) {
            if (!fi.add(offsets[j], text)) {
                break;
            }
        }
        bestFragments.insert(fi);
    }

    // retrieve fragment infos from queue and fill into list, least
    // fragment comes out first
    List infos = new LinkedList();
    while (bestFragments.size() > 0) {
        FragmentInfo fi = (FragmentInfo) bestFragments.pop();
        infos.add(0, fi);
    }

    Map offsetInfos = new IdentityHashMap();
    // remove overlapping fragment infos
    for (Iterator it = infos.iterator(); it.hasNext();) {
        FragmentInfo fi = (FragmentInfo) it.next();
        boolean overlap = false;
        for (Iterator fit = fi.iterator(); fit.hasNext() && !overlap;) {
            TermVectorOffsetInfo oi = (TermVectorOffsetInfo) fit.next();
            if (offsetInfos.containsKey(oi)) {
                overlap = true;
            }
        }
        if (overlap) {
            it.remove();
        } else {
            for (Iterator oit = fi.iterator(); oit.hasNext();) {
                offsetInfos.put(oit.next(), null);
            }
        }
    }

    // create excerpts
    StringBuffer sb = new StringBuffer(excerptStart);
    for (Iterator it = infos.iterator(); it.hasNext();) {
        FragmentInfo fi = (FragmentInfo) it.next();
        sb.append(fragmentStart);
        int limit = Math.max(0, fi.getStartOffset() / 2 + fi.getEndOffset() / 2 - surround);
        int len = startFragment(sb, text, fi.getStartOffset(), limit);
        TermVectorOffsetInfo lastOffsetInfo = null;
        for (Iterator fIt = fi.iterator(); fIt.hasNext();) {
            TermVectorOffsetInfo oi = (TermVectorOffsetInfo) fIt.next();
            if (lastOffsetInfo != null) {
                // fill in text between terms
                sb.append(text.substring(lastOffsetInfo.getEndOffset(), oi.getStartOffset()));
            }
            sb.append(hlStart);
            sb.append(text.substring(oi.getStartOffset(), oi.getEndOffset()));
            sb.append(hlEnd);
            lastOffsetInfo = oi;
        }
        limit = Math.min(text.length(), fi.getStartOffset() - len + (surround * 2));
        endFragment(sb, text, fi.getEndOffset(), limit);
        sb.append(fragmentEnd);
    }
    sb.append(excerptEnd);
    return sb.toString();
}

From source file:org.apache.jackrabbit.core.query.lucene.WeightedHighlighter.java

License:Apache License

@Override
protected String mergeFragments(TermVectorOffsetInfo[] offsets, String text, String excerptStart,
        String excerptEnd, String fragmentStart, String fragmentEnd, String hlStart, String hlEnd,
        int maxFragments, int surround) throws IOException {
    if (offsets == null || offsets.length == 0) {
        // nothing to highlight
        return createDefaultExcerpt(text, excerptStart, excerptEnd, fragmentStart, fragmentEnd, surround * 2);
    }/*from  w ww  .  j  av a2s.c  o  m*/

    PriorityQueue<FragmentInfo> bestFragments = new FragmentInfoPriorityQueue(maxFragments);
    for (int i = 0; i < offsets.length; i++) {
        if (offsets[i].getEndOffset() <= text.length()) {
            FragmentInfo fi = new FragmentInfo(offsets[i], surround * 2);
            for (int j = i + 1; j < offsets.length; j++) {
                if (offsets[j].getEndOffset() > text.length()) {
                    break;
                }
                if (!fi.add(offsets[j], text)) {
                    break;
                }
            }
            bestFragments.insertWithOverflow(fi);
        }
    }

    if (bestFragments.size() == 0) {
        return createDefaultExcerpt(text, excerptStart, excerptEnd, fragmentStart, fragmentEnd, surround * 2);
    }

    // retrieve fragment infos from queue and fill into list, least
    // fragment comes out first
    List<FragmentInfo> infos = new LinkedList<FragmentInfo>();
    while (bestFragments.size() > 0) {
        FragmentInfo fi = (FragmentInfo) bestFragments.pop();
        infos.add(0, fi);
    }

    Map<TermVectorOffsetInfo, Object> offsetInfos = new IdentityHashMap<TermVectorOffsetInfo, Object>();
    // remove overlapping fragment infos
    Iterator<FragmentInfo> it = infos.iterator();
    while (it.hasNext()) {
        FragmentInfo fi = it.next();
        boolean overlap = false;
        Iterator<TermVectorOffsetInfo> fit = fi.iterator();
        while (fit.hasNext() && !overlap) {
            TermVectorOffsetInfo oi = fit.next();
            if (offsetInfos.containsKey(oi)) {
                overlap = true;
            }
        }
        if (overlap) {
            it.remove();
        } else {
            Iterator<TermVectorOffsetInfo> oit = fi.iterator();
            while (oit.hasNext()) {
                offsetInfos.put(oit.next(), null);
            }
        }
    }

    // create excerpts
    StringBuffer sb = new StringBuffer(excerptStart);
    it = infos.iterator();
    while (it.hasNext()) {
        FragmentInfo fi = it.next();
        sb.append(fragmentStart);
        int limit = Math.max(0, fi.getStartOffset() / 2 + fi.getEndOffset() / 2 - surround);
        int len = startFragment(sb, text, fi.getStartOffset(), limit);
        TermVectorOffsetInfo lastOffsetInfo = null;
        Iterator<TermVectorOffsetInfo> fIt = fi.iterator();
        while (fIt.hasNext()) {
            TermVectorOffsetInfo oi = fIt.next();
            if (lastOffsetInfo != null) {
                // fill in text between terms
                sb.append(escape(text.substring(lastOffsetInfo.getEndOffset(), oi.getStartOffset())));
            }
            sb.append(hlStart);
            sb.append(escape(text.substring(oi.getStartOffset(), oi.getEndOffset())));
            sb.append(hlEnd);
            lastOffsetInfo = oi;
        }
        limit = Math.min(text.length(), fi.getStartOffset() - len + (surround * 2));
        endFragment(sb, text, fi.getEndOffset(), limit);
        sb.append(fragmentEnd);
    }
    sb.append(excerptEnd);
    return sb.toString();
}

From source file:org.apache.mahout.math.neighborhood.LocalitySensitiveHashSearch.java

License:Apache License

private PriorityQueue<WeightedThing<Vector>> searchInternal(Vector query) {
    long queryHash = HashedVector.computeHash64(query, projection);

    // We keep an approximation of the closest vectors here.
    PriorityQueue<WeightedThing<Vector>> top = Searcher.getCandidateQueue(getSearchSize());

    // We scan the vectors using bit counts as an approximation of the dot product so we can do as few
    // full distance computations as possible.  Our goal is to only do full distance computations for
    // vectors with hash distance at most as large as the searchSize biggest hash distance seen so far.

    OnlineSummarizer[] distribution = new OnlineSummarizer[BITS + 1];
    for (int i = 0; i < BITS + 1; i++) {
        distribution[i] = new OnlineSummarizer();
    }//  w  w w  .  jav a  2  s.  c  o m

    distanceEvaluations = 0;

    // We keep the counts of the hash distances here.  This lets us accurately
    // judge what hash distance cutoff we should use.
    int[] hashCounts = new int[BITS + 1];

    // Maximum number of different bits to still consider a vector a candidate for nearest neighbor.
    // Starts at the maximum number of bits, but decreases and can increase.
    int hashLimit = BITS;
    int limitCount = 0;
    double distanceLimit = Double.POSITIVE_INFINITY;

    // In this loop, we have the invariants that:
    //
    // limitCount = sum_{i<hashLimit} hashCount[i]
    // and
    // limitCount >= searchSize && limitCount - hashCount[hashLimit-1] < searchSize
    for (HashedVector vector : trainingVectors) {
        // This computes the Hamming Distance between the vector's hash and the query's hash.
        // The result is correlated with the angle between the vectors.
        int bitDot = vector.hammingDistance(queryHash);
        if (bitDot <= hashLimit) {
            distanceEvaluations++;

            double distance = distanceMeasure.distance(query, vector);
            distribution[bitDot].add(distance);

            if (distance < distanceLimit) {
                top.insertWithOverflow(new WeightedThing<Vector>(vector, distance));
                if (top.size() == searchSize) {
                    distanceLimit = top.top().getWeight();
                }

                hashCounts[bitDot]++;
                limitCount++;
                while (hashLimit > 0 && limitCount - hashCounts[hashLimit - 1] > searchSize) {
                    hashLimit--;
                    limitCount -= hashCounts[hashLimit];
                }

                if (hashLimitStrategy >= 0) {
                    while (hashLimit < MAX_HASH_LIMIT
                            && distribution[hashLimit].getCount() > MIN_DISTRIBUTION_COUNT
                            && ((1 - hashLimitStrategy) * distribution[hashLimit].getQuartile(0)
                                    + hashLimitStrategy
                                            * distribution[hashLimit].getQuartile(1)) < distanceLimit) {
                        limitCount += hashCounts[hashLimit];
                        hashLimit++;
                    }
                }
            }
        }
    }
    return top;
}

From source file:org.apache.mahout.math.neighborhood.LocalitySensitiveHashSearch.java

License:Apache License

@Override
public List<WeightedThing<Vector>> search(Vector query, int limit) {
    PriorityQueue<WeightedThing<Vector>> top = searchInternal(query);
    List<WeightedThing<Vector>> results = Lists.newArrayListWithExpectedSize(top.size());
    while (top.size() != 0) {
        WeightedThing<Vector> wv = top.pop();
        results.add(new WeightedThing<Vector>(((HashedVector) wv.getValue()).getVector(), wv.getWeight()));
    }//from  ww  w .  j a v  a2s.c  o m
    Collections.reverse(results);
    if (limit < results.size()) {
        results = results.subList(0, limit);
    }
    return results;
}

From source file:org.apache.mahout.math.neighborhood.LocalitySensitiveHashSearch.java

License:Apache License

/**
 * Returns the closest vector to the query.
 * When only one the nearest vector is needed, use this method, NOT search(query, limit) because
 * it's faster (less overhead)./*  w  w w  . j a v  a2s  .co  m*/
 * This is nearly the same as search().
 *
 * @param query the vector to search for
 * @param differentThanQuery if true, returns the closest vector different than the query (this
 *                           only matters if the query is among the searched vectors), otherwise,
 *                           returns the closest vector to the query (even the same vector).
 * @return the weighted vector closest to the query
 */
@Override
public WeightedThing<Vector> searchFirst(Vector query, boolean differentThanQuery) {
    // We get the top searchSize neighbors.
    PriorityQueue<WeightedThing<Vector>> top = searchInternal(query);
    // We then cut the number down to just the best 2.
    while (top.size() > 2) {
        top.pop();
    }
    // If there are fewer than 2 results, we just return the one we have.
    if (top.size() < 2) {
        return removeHash(top.pop());
    }
    // There are exactly 2 results.
    WeightedThing<Vector> secondBest = top.pop();
    WeightedThing<Vector> best = top.pop();
    // If the best result is the same as the query, but we don't want to return the query.
    if (differentThanQuery && best.getValue().equals(query)) {
        best = secondBest;
    }
    return removeHash(best);
}

From source file:org.apache.solr.request.NumericFacets.java

License:Apache License

public static NamedList<Integer> getCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName,
        int offset, int limit, int mincount, boolean missing, String sort) throws IOException {
    final boolean zeros = mincount <= 0;
    mincount = Math.max(mincount, 1);
    final SchemaField sf = searcher.getSchema().getField(fieldName);
    final FieldType ft = sf.getType();
    final NumericType numericType = ft.getNumericType();
    if (numericType == null) {
        throw new IllegalStateException();
    }/*from ww  w .j a  v a2 s . co m*/
    final List<AtomicReaderContext> leaves = searcher.getIndexReader().leaves();

    // 1. accumulate
    final HashTable hashTable = new HashTable();
    final Iterator<AtomicReaderContext> ctxIt = leaves.iterator();
    AtomicReaderContext ctx = null;
    FieldCache.Longs longs = null;
    Bits docsWithField = null;
    int missingCount = 0;
    for (DocIterator docsIt = docs.iterator(); docsIt.hasNext();) {
        final int doc = docsIt.nextDoc();
        if (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc()) {
            do {
                ctx = ctxIt.next();
            } while (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc());
            assert doc >= ctx.docBase;
            switch (numericType) {
            case LONG:
                longs = FieldCache.DEFAULT.getLongs(ctx.reader(), fieldName, true);
                break;
            case INT:
                final FieldCache.Ints ints = FieldCache.DEFAULT.getInts(ctx.reader(), fieldName, true);
                longs = new FieldCache.Longs() {
                    @Override
                    public long get(int docID) {
                        return ints.get(docID);
                    }
                };
                break;
            case FLOAT:
                final FieldCache.Floats floats = FieldCache.DEFAULT.getFloats(ctx.reader(), fieldName, true);
                longs = new FieldCache.Longs() {
                    @Override
                    public long get(int docID) {
                        return NumericUtils.floatToSortableInt(floats.get(docID));
                    }
                };
                break;
            case DOUBLE:
                final FieldCache.Doubles doubles = FieldCache.DEFAULT.getDoubles(ctx.reader(), fieldName, true);
                longs = new FieldCache.Longs() {
                    @Override
                    public long get(int docID) {
                        return NumericUtils.doubleToSortableLong(doubles.get(docID));
                    }
                };
                break;
            default:
                throw new AssertionError();
            }
            docsWithField = FieldCache.DEFAULT.getDocsWithField(ctx.reader(), fieldName);
        }
        long v = longs.get(doc - ctx.docBase);
        if (v != 0 || docsWithField.get(doc - ctx.docBase)) {
            hashTable.add(doc, v, 1);
        } else {
            ++missingCount;
        }
    }

    // 2. select top-k facet values
    final int pqSize = limit < 0 ? hashTable.size : Math.min(offset + limit, hashTable.size);
    final PriorityQueue<Entry> pq;
    if (FacetParams.FACET_SORT_COUNT.equals(sort) || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) {
        pq = new PriorityQueue<Entry>(pqSize) {
            @Override
            protected boolean lessThan(Entry a, Entry b) {
                if (a.count < b.count || (a.count == b.count && a.bits > b.bits)) {
                    return true;
                } else {
                    return false;
                }
            }
        };
    } else {
        pq = new PriorityQueue<Entry>(pqSize) {
            @Override
            protected boolean lessThan(Entry a, Entry b) {
                return a.bits > b.bits;
            }
        };
    }
    Entry e = null;
    for (int i = 0; i < hashTable.bits.length; ++i) {
        if (hashTable.counts[i] >= mincount) {
            if (e == null) {
                e = new Entry();
            }
            e.bits = hashTable.bits[i];
            e.count = hashTable.counts[i];
            e.docID = hashTable.docIDs[i];
            e = pq.insertWithOverflow(e);
        }
    }

    // 4. build the NamedList
    final ValueSource vs = ft.getValueSource(sf, null);
    final NamedList<Integer> result = new NamedList<Integer>();

    // This stuff is complicated because if facet.mincount=0, the counts needs
    // to be merged with terms from the terms dict
    if (!zeros || FacetParams.FACET_SORT_COUNT.equals(sort)
            || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) {
        // Only keep items we're interested in
        final Deque<Entry> counts = new ArrayDeque<Entry>();
        while (pq.size() > offset) {
            counts.addFirst(pq.pop());
        }

        // Entries from the PQ first, then using the terms dictionary
        for (Entry entry : counts) {
            final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
            final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
            result.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count);
        }

        if (zeros && (limit < 0 || result.size() < limit)) { // need to merge with the term dict
            if (!sf.indexed()) {
                throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on field "
                        + sf.getName() + " which is not indexed");
            }
            // Add zeros until there are limit results
            final Set<String> alreadySeen = new HashSet<String>();
            while (pq.size() > 0) {
                Entry entry = pq.pop();
                final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
                final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
                alreadySeen.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase));
            }
            for (int i = 0; i < result.size(); ++i) {
                alreadySeen.add(result.getName(i));
            }
            final Terms terms = searcher.getAtomicReader().terms(fieldName);
            if (terms != null) {
                final String prefixStr = TrieField.getMainValuePrefix(ft);
                final BytesRef prefix;
                if (prefixStr != null) {
                    prefix = new BytesRef(prefixStr);
                } else {
                    prefix = new BytesRef();
                }
                final TermsEnum termsEnum = terms.iterator(null);
                BytesRef term;
                switch (termsEnum.seekCeil(prefix)) {
                case FOUND:
                case NOT_FOUND:
                    term = termsEnum.term();
                    break;
                case END:
                    term = null;
                    break;
                default:
                    throw new AssertionError();
                }
                final CharsRef spare = new CharsRef();
                for (int skipped = hashTable.size; skipped < offset && term != null
                        && StringHelper.startsWith(term, prefix);) {
                    ft.indexedToReadable(term, spare);
                    final String termStr = spare.toString();
                    if (!alreadySeen.contains(termStr)) {
                        ++skipped;
                    }
                    term = termsEnum.next();
                }
                for (; term != null && StringHelper.startsWith(term, prefix)
                        && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
                    ft.indexedToReadable(term, spare);
                    final String termStr = spare.toString();
                    if (!alreadySeen.contains(termStr)) {
                        result.add(termStr, 0);
                    }
                }
            }
        }
    } else {
        // sort=index, mincount=0 and we have less than limit items
        // => Merge the PQ and the terms dictionary on the fly
        if (!sf.indexed()) {
            throw new IllegalStateException("Cannot use " + FacetParams.FACET_SORT + "="
                    + FacetParams.FACET_SORT_INDEX + " on a field which is not indexed");
        }
        final Map<String, Integer> counts = new HashMap<String, Integer>();
        while (pq.size() > 0) {
            final Entry entry = pq.pop();
            final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
            final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
            counts.put(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count);
        }
        final Terms terms = searcher.getAtomicReader().terms(fieldName);
        if (terms != null) {
            final String prefixStr = TrieField.getMainValuePrefix(ft);
            final BytesRef prefix;
            if (prefixStr != null) {
                prefix = new BytesRef(prefixStr);
            } else {
                prefix = new BytesRef();
            }
            final TermsEnum termsEnum = terms.iterator(null);
            BytesRef term;
            switch (termsEnum.seekCeil(prefix)) {
            case FOUND:
            case NOT_FOUND:
                term = termsEnum.term();
                break;
            case END:
                term = null;
                break;
            default:
                throw new AssertionError();
            }
            final CharsRef spare = new CharsRef();
            for (int i = 0; i < offset && term != null && StringHelper.startsWith(term, prefix); ++i) {
                term = termsEnum.next();
            }
            for (; term != null && StringHelper.startsWith(term, prefix)
                    && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
                ft.indexedToReadable(term, spare);
                final String termStr = spare.toString();
                Integer count = counts.get(termStr);
                if (count == null) {
                    count = 0;
                }
                result.add(termStr, count);
            }
        }
    }

    if (missing) {
        result.add(null, missingCount);
    }
    return result;
}

From source file:org.apache.solr.request.PerSegmentSingleValuedFaceting.java

License:Apache License

NamedList<Integer> getFacetCounts(Executor executor) throws IOException {

    CompletionService<SegFacet> completionService = new ExecutorCompletionService<SegFacet>(executor);

    // reuse the translation logic to go from top level set to per-segment set
    baseSet = docs.getTopFilter();/*from www  .  j  a v  a 2  s  . c  o m*/

    final List<AtomicReaderContext> leaves = searcher.getTopReaderContext().leaves();
    // The list of pending tasks that aren't immediately submitted
    // TODO: Is there a completion service, or a delegating executor that can
    // limit the number of concurrent tasks submitted to a bigger executor?
    LinkedList<Callable<SegFacet>> pending = new LinkedList<Callable<SegFacet>>();

    int threads = nThreads <= 0 ? Integer.MAX_VALUE : nThreads;

    for (final AtomicReaderContext leave : leaves) {
        final SegFacet segFacet = new SegFacet(leave);

        Callable<SegFacet> task = new Callable<SegFacet>() {
            @Override
            public SegFacet call() throws Exception {
                segFacet.countTerms();
                return segFacet;
            }
        };

        // TODO: if limiting threads, submit by largest segment first?

        if (--threads >= 0) {
            completionService.submit(task);
        } else {
            pending.add(task);
        }
    }

    // now merge the per-segment results
    PriorityQueue<SegFacet> queue = new PriorityQueue<SegFacet>(leaves.size()) {
        @Override
        protected boolean lessThan(SegFacet a, SegFacet b) {
            return a.tempBR.compareTo(b.tempBR) < 0;
        }
    };

    boolean hasMissingCount = false;
    int missingCount = 0;
    for (int i = 0, c = leaves.size(); i < c; i++) {
        SegFacet seg = null;

        try {
            Future<SegFacet> future = completionService.take();
            seg = future.get();
            if (!pending.isEmpty()) {
                completionService.submit(pending.removeFirst());
            }
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
        } catch (ExecutionException e) {
            Throwable cause = e.getCause();
            if (cause instanceof RuntimeException) {
                throw (RuntimeException) cause;
            } else {
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                        "Error in per-segment faceting on field: " + fieldName, cause);
            }
        }

        if (seg.startTermIndex < seg.endTermIndex) {
            if (seg.startTermIndex == -1) {
                hasMissingCount = true;
                missingCount += seg.counts[0];
                seg.pos = 0;
            } else {
                seg.pos = seg.startTermIndex;
            }
            if (seg.pos < seg.endTermIndex) {
                seg.tenum = seg.si.termsEnum();
                seg.tenum.seekExact(seg.pos);
                seg.tempBR = seg.tenum.term();
                queue.add(seg);
            }
        }
    }

    FacetCollector collector;
    if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
        collector = new CountSortedFacetCollector(offset, limit, mincount);
    } else {
        collector = new IndexSortedFacetCollector(offset, limit, mincount);
    }

    BytesRef val = new BytesRef();

    while (queue.size() > 0) {
        SegFacet seg = queue.top();

        // we will normally end up advancing the term enum for this segment
        // while still using "val", so we need to make a copy since the BytesRef
        // may be shared across calls.
        val.copyBytes(seg.tempBR);

        int count = 0;

        do {
            count += seg.counts[seg.pos - seg.startTermIndex];

            // TODO: OPTIMIZATION...
            // if mincount>0 then seg.pos++ can skip ahead to the next non-zero entry.
            seg.pos++;
            if (seg.pos >= seg.endTermIndex) {
                queue.pop();
                seg = queue.top();
            } else {
                seg.tempBR = seg.tenum.next();
                seg = queue.updateTop();
            }
        } while (seg != null && val.compareTo(seg.tempBR) == 0);

        boolean stop = collector.collect(val, count);
        if (stop)
            break;
    }

    NamedList<Integer> res = collector.getFacetCounts();

    // convert labels to readable form    
    FieldType ft = searcher.getSchema().getFieldType(fieldName);
    int sz = res.size();
    for (int i = 0; i < sz; i++) {
        res.setName(i, ft.indexedToReadable(res.getName(i)));
    }

    if (missing) {
        if (!hasMissingCount) {
            missingCount = SimpleFacets.getFieldMissingCount(searcher, docs, fieldName);
        }
        res.add(null, missingCount);
    }

    return res;
}