Example usage for org.apache.lucene.util PriorityQueue pop

List of usage examples for org.apache.lucene.util PriorityQueue pop

Introduction

In this page you can find the example usage for org.apache.lucene.util PriorityQueue pop.

Prototype

public final T pop() 

Source Link

Document

Removes and returns the least element of the PriorityQueue in log(size) time.

Usage

From source file:alix.lucene.MoreLikeThis.java

License:Apache License

/**
 * Create the More like query from a PriorityQueue
 *//*from  ww w  .j  av a 2 s . co  m*/
private Query createQuery(PriorityQueue<ScoreTerm> q) {
    BooleanQuery.Builder query = new BooleanQuery.Builder();
    ScoreTerm scoreTerm;
    float bestScore = -1;

    while ((scoreTerm = q.pop()) != null) {
        Query tq = new TermQuery(new Term(scoreTerm.topField, scoreTerm.word));

        if (boost) {
            if (bestScore == -1) {
                bestScore = (scoreTerm.score);
            }
            float myScore = (scoreTerm.score);
            tq = new BoostQuery(tq, boostFactor * myScore / bestScore);
        }

        try {
            query.add(tq, BooleanClause.Occur.SHOULD);
        } catch (BooleanQuery.TooManyClauses ignore) {
            break;
        }
    }
    return query.build();
}

From source file:alix.lucene.MoreLikeThis.java

License:Apache License

/**
 * @see #retrieveInterestingTerms(java.io.Reader, String)
 *//*from   w  ww.j a  v  a2  s.  c  om*/
public String[] retrieveInterestingTerms(int docNum) throws IOException {
    ArrayList<Object> al = new ArrayList<>(maxQueryTerms);
    PriorityQueue<ScoreTerm> pq = retrieveTerms(docNum);
    ScoreTerm scoreTerm;
    int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
    // we just want to return the top words
    while (((scoreTerm = pq.pop()) != null) && lim-- > 0) {
        al.add(scoreTerm.word); // the 1st entry is the interesting word
    }
    String[] res = new String[al.size()];
    return al.toArray(res);
}

From source file:alix.lucene.MoreLikeThis.java

License:Apache License

/**
 * Convenience routine to make it easy to return the most interesting words in a document.
 * More advanced users will call {@link #retrieveTerms(Reader, String) retrieveTerms()} directly.
 *
 * @param r the source document/*w  ww.  ja v  a 2  s.  c o m*/
 * @param fieldName field passed to analyzer to use when analyzing the content
 * @return the most interesting words in the document
 * @see #retrieveTerms(java.io.Reader, String)
 * @see #setMaxQueryTerms
 */
public String[] retrieveInterestingTerms(Reader r, String fieldName) throws IOException {
    ArrayList<Object> al = new ArrayList<>(maxQueryTerms);
    PriorityQueue<ScoreTerm> pq = retrieveTerms(r, fieldName);
    ScoreTerm scoreTerm;
    int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
    // we just want to return the top words
    while (((scoreTerm = pq.pop()) != null) && lim-- > 0) {
        al.add(scoreTerm.word); // the 1st entry is the interesting word
    }
    String[] res = new String[al.size()];
    return al.toArray(res);
}

From source file:com.browseengine.bobo.facets.CombinedFacetAccessible.java

License:Apache License

public List<BrowseFacet> getFacets() {
    if (_closed) {
        throw new IllegalStateException("This instance of count collector was already closed");
    }/*from   w w  w  .  j av  a 2 s. c o  m*/
    int maxCnt = _fspec.getMaxCount();
    if (maxCnt <= 0)
        maxCnt = Integer.MAX_VALUE;
    int minHits = _fspec.getMinHitCount();
    LinkedList<BrowseFacet> list = new LinkedList<BrowseFacet>();

    int cnt = 0;
    Comparable facet = null;
    FacetIterator iter = (FacetIterator) this.iterator();
    Comparator<BrowseFacet> comparator;
    if (FacetSortSpec.OrderValueAsc.equals(_fspec.getOrderBy())) {
        while ((facet = iter.next(minHits)) != null) {
            // find the next facet whose combined hit count obeys minHits
            list.add(new BrowseFacet(String.valueOf(facet), iter.count));
            if (++cnt >= maxCnt)
                break;
        }
    } else if (FacetSortSpec.OrderHitsDesc.equals(_fspec.getOrderBy())) {
        comparator = new Comparator<BrowseFacet>() {
            public int compare(BrowseFacet f1, BrowseFacet f2) {
                int val = f2.getHitCount() - f1.getHitCount();
                if (val == 0) {
                    val = (f1.getValue().compareTo(f2.getValue()));
                }
                return val;
            }
        };
        if (maxCnt != Integer.MAX_VALUE) {
            // we will maintain a min heap of size maxCnt
            // Order by hits in descending order and max count is supplied
            PriorityQueue queue = createPQ(maxCnt, comparator);
            int qsize = 0;
            while ((qsize < maxCnt) && ((facet = iter.next(minHits)) != null)) {
                queue.add(new BrowseFacet(String.valueOf(facet), iter.count));
                qsize++;
            }
            if (facet != null) {
                BrowseFacet rootFacet = (BrowseFacet) queue.top();
                minHits = rootFacet.getHitCount() + 1;
                // facet count less than top of min heap, it will never be added 
                while (((facet = iter.next(minHits)) != null)) {
                    rootFacet.setValue(String.valueOf(facet));
                    rootFacet.setHitCount(iter.count);
                    rootFacet = (BrowseFacet) queue.updateTop();
                    minHits = rootFacet.getHitCount() + 1;
                }
            }
            // at this point, queue contains top maxCnt facets that have hitcount >= minHits
            while (qsize-- > 0) {
                // append each entry to the beginning of the facet list to order facets by hits descending
                list.addFirst((BrowseFacet) queue.pop());
            }
        } else {
            // no maxCnt specified. So fetch all facets according to minHits and sort them later
            while ((facet = iter.next(minHits)) != null)
                list.add(new BrowseFacet(String.valueOf(facet), iter.count));
            Collections.sort(list, comparator);
        }
    } else // FacetSortSpec.OrderByCustom.equals(_fspec.getOrderBy()
    {
        comparator = _fspec.getCustomComparatorFactory().newComparator();
        if (maxCnt != Integer.MAX_VALUE) {
            PriorityQueue queue = createPQ(maxCnt, comparator);
            BrowseFacet browseFacet = new BrowseFacet();
            int qsize = 0;
            while ((qsize < maxCnt) && ((facet = iter.next(minHits)) != null)) {
                queue.add(new BrowseFacet(String.valueOf(facet), iter.count));
                qsize++;
            }
            if (facet != null) {
                while ((facet = iter.next(minHits)) != null) {
                    // check with the top of min heap
                    browseFacet.setHitCount(iter.count);
                    browseFacet.setValue(String.valueOf(facet));
                    browseFacet = (BrowseFacet) queue.insertWithOverflow(browseFacet);
                }
            }
            // remove from queue and add to the list
            while (qsize-- > 0)
                list.addFirst((BrowseFacet) queue.pop());
        } else {
            // order by custom but no max count supplied
            while ((facet = iter.next(minHits)) != null)
                list.add(new BrowseFacet(String.valueOf(facet), iter.count));
            Collections.sort(list, comparator);
        }
    }
    return list;
}

From source file:com.core.nlp.query.MoreLikeThis.java

License:Apache License

/**
 * Create the More like query from a PriorityQueue
 *///  w w w  .j a v  a 2 s .  c o  m
private Query createQuery(PriorityQueue<ScoreTerm> q) {
    BooleanQuery query = new BooleanQuery();
    ScoreTerm scoreTerm;
    float bestScore = -1;

    while ((scoreTerm = q.pop()) != null) {
        TermQuery tq = new TermQuery(new Term(scoreTerm.topField, scoreTerm.word));

        if (boost) {
            if (bestScore == -1) {
                bestScore = (scoreTerm.score);
            }
            float myScore = (scoreTerm.score);
            tq.setBoost(boostFactor * myScore / bestScore);
        }

        try {
            query.add(tq, BooleanClause.Occur.SHOULD);
        } catch (BooleanQuery.TooManyClauses ignore) {
            break;
        }
    }
    return query;
}

From source file:io.ssc.relationdiscovery.KMeans.java

License:Open Source License

public void printClosestPoints(int centroidIndex, int howMany, OpenIntObjectHashMap<String> patterns) {

    PriorityQueue<PatternWithDistance> queue = new PriorityQueue<PatternWithDistance>(howMany) {
        @Override/*w ww.  j  a va  2 s . c o  m*/
        protected boolean lessThan(PatternWithDistance a, PatternWithDistance b) {
            return a.distance < b.distance;
        }
    };

    Vector centroid = centroids[centroidIndex];

    for (MatrixSlice rowSlice : A) {
        Vector row = rowSlice.vector();
        double distance = distanceMeasure.distance(centroid, row);
        queue.insertWithOverflow(new PatternWithDistance(distance, patterns.get(rowSlice.index())));
    }

    while (queue.size() > 0) {
        System.out.println("\t" + queue.pop());
    }

}

From source file:net.dataninja.ee.textEngine.facet.GroupCounts.java

License:Open Source License

/** Construct the array of doc hits for the hit group. */
private void buildDocHits(int group, ResultGroup resultGroup) {
    PriorityQueue queue = hitQueue[group];
    int nFound = queue.size();
    DocHitImpl[] hitArray = new DocHitImpl[nFound];
    for (int i = 0; i < nFound; i++) {
        int index = nFound - i - 1;
        hitArray[index] = (DocHitImpl) queue.pop();
    }/*ww w .  j  ava  2 s .  c  o  m*/

    int start = startDoc[group];
    int max = maxDocs[group];

    int nHits = Math.max(0, Math.min(nFound - start, max));
    resultGroup.docHits = new DocHit[nHits];

    resultGroup.totalDocs = nDocHits(group);
    resultGroup.startDoc = start;
    resultGroup.endDoc = start + nHits;

    for (int i = startDoc[group]; i < nFound; i++)
        resultGroup.docHits[i - start] = hitArray[i];
}

From source file:net.dataninja.ee.textEngine.MoreLikeThisQuery.java

License:Apache License

/**
 * Create the More like query from a PriorityQueue
 *///from   w w w .  j a  v  a2s  .  co m
private Query createQuery(IndexReader indexReader, PriorityQueue q) throws IOException {
    // Pop everything from the queue.
    QueryWord[] queryWords = new QueryWord[q.size()];
    for (int i = q.size() - 1; i >= 0; i--)
        queryWords[i] = (QueryWord) q.pop();

    BooleanQuery query = new BooleanQuery(true /*disable coord*/);

    // At the moment, there's no need to scale by the best score. It simply
    // clouds the query explanation. It doesn't affect the scores, since
    // Lucene applies a query normalization factor anyway.
    //
    //float bestScore = (queryWords.length > 0) ? queryWords[0].score : 0.0f;
    for (int i = 0; i < fieldNames.length; i++) {
        ArrayList fieldClauses = new ArrayList();

        for (int j = 0; j < queryWords.length; j++) {
            QueryWord qw = queryWords[j];
            Term term = new Term(fieldNames[i], qw.word);

            // Skip words not present in this field.
            int docFreq = indexReader.docFreq(term);
            if (docFreq == 0)
                continue;

            // Add it to the query.
            SpanTermQuery tq = new SpanTermQuery(term);
            if (boost)
                tq.setBoost(qw.score);
            fieldClauses.add(tq);
        } // for j

        // If no terms for this field, skip it.
        if (fieldClauses.isEmpty())
            continue;

        SpanQuery[] clauses = (SpanQuery[]) fieldClauses.toArray(new SpanQuery[fieldClauses.size()]);

        // Now make a special Or-Near query out of the clauses.
        SpanOrNearQuery fieldQuery = new SpanOrNearQuery(clauses, 10, false);

        // Boost if necessary.
        if (fieldBoosts != null)
            fieldQuery.setBoost(fieldBoosts[i]);

        // We currently don't support more-like-this queries on the full text.
        // It would involve de-chunking, and also fancier logic to pick the
        // "most interesting" terms in the first place.
        //
        if (fieldNames[i].equals("text"))
            throw new RuntimeException("MoreLikeThisQuery does not support 'text' field.");

        // And add to the main query.
        query.add(fieldQuery, BooleanClause.Occur.SHOULD);
    } // for i

    // All done.
    return query;
}

From source file:org.apache.jackrabbit.core.query.lucene.WeightedHighlighter.java

License:Apache License

protected String mergeFragments(TermVectorOffsetInfo[] offsets, String text, String excerptStart,
        String excerptEnd, String fragmentStart, String fragmentEnd, String hlStart, String hlEnd,
        int maxFragments, int surround) {

    if (offsets == null || offsets.length == 0) {
        // nothing to highlight
        StringBuffer excerpt = new StringBuffer(excerptStart);
        excerpt.append(fragmentStart);//w  w w.  j a va  2s .  co  m
        int min = excerpt.length();
        excerpt.append(text.substring(0, Math.min(text.length(), surround * 2)));
        if (text.length() > excerpt.length()) {
            for (int i = excerpt.length() - 1; i > min; i--) {
                if (Character.isWhitespace(excerpt.charAt(i))) {
                    excerpt.delete(i, excerpt.length());
                    excerpt.append(" ...");
                    break;
                }
            }
        }
        excerpt.append(fragmentEnd).append(excerptEnd);
        return excerpt.toString();
    }

    PriorityQueue bestFragments = new FragmentInfoPriorityQueue(maxFragments);
    for (int i = 0; i < offsets.length; i++) {
        FragmentInfo fi = new FragmentInfo(offsets[i], surround * 2);
        for (int j = i + 1; j < offsets.length; j++) {
            if (!fi.add(offsets[j], text)) {
                break;
            }
        }
        bestFragments.insert(fi);
    }

    // retrieve fragment infos from queue and fill into list, least
    // fragment comes out first
    List infos = new LinkedList();
    while (bestFragments.size() > 0) {
        FragmentInfo fi = (FragmentInfo) bestFragments.pop();
        infos.add(0, fi);
    }

    Map offsetInfos = new IdentityHashMap();
    // remove overlapping fragment infos
    for (Iterator it = infos.iterator(); it.hasNext();) {
        FragmentInfo fi = (FragmentInfo) it.next();
        boolean overlap = false;
        for (Iterator fit = fi.iterator(); fit.hasNext() && !overlap;) {
            TermVectorOffsetInfo oi = (TermVectorOffsetInfo) fit.next();
            if (offsetInfos.containsKey(oi)) {
                overlap = true;
            }
        }
        if (overlap) {
            it.remove();
        } else {
            for (Iterator oit = fi.iterator(); oit.hasNext();) {
                offsetInfos.put(oit.next(), null);
            }
        }
    }

    // create excerpts
    StringBuffer sb = new StringBuffer(excerptStart);
    for (Iterator it = infos.iterator(); it.hasNext();) {
        FragmentInfo fi = (FragmentInfo) it.next();
        sb.append(fragmentStart);
        int limit = Math.max(0, fi.getStartOffset() / 2 + fi.getEndOffset() / 2 - surround);
        int len = startFragment(sb, text, fi.getStartOffset(), limit);
        TermVectorOffsetInfo lastOffsetInfo = null;
        for (Iterator fIt = fi.iterator(); fIt.hasNext();) {
            TermVectorOffsetInfo oi = (TermVectorOffsetInfo) fIt.next();
            if (lastOffsetInfo != null) {
                // fill in text between terms
                sb.append(text.substring(lastOffsetInfo.getEndOffset(), oi.getStartOffset()));
            }
            sb.append(hlStart);
            sb.append(text.substring(oi.getStartOffset(), oi.getEndOffset()));
            sb.append(hlEnd);
            lastOffsetInfo = oi;
        }
        limit = Math.min(text.length(), fi.getStartOffset() - len + (surround * 2));
        endFragment(sb, text, fi.getEndOffset(), limit);
        sb.append(fragmentEnd);
    }
    sb.append(excerptEnd);
    return sb.toString();
}

From source file:org.apache.jackrabbit.core.query.lucene.MoreLikeThis.java

License:Apache License

/**
 * Create the More like query from a PriorityQueue
 *//*w ww.j  av  a2  s  . c  om*/
private Query createQuery(PriorityQueue q) {
    BooleanQuery query = new BooleanQuery();
    Object cur;
    int qterms = 0;
    float bestScore = 0;

    while (((cur = q.pop()) != null)) {
        Object[] ar = (Object[]) cur;
        TermQuery tq = new JackrabbitTermQuery(new Term((String) ar[1], (String) ar[0]));

        if (boost) {
            if (qterms == 0) {
                bestScore = ((Float) ar[2]).floatValue();
            }
            float myScore = ((Float) ar[2]).floatValue();

            tq.setBoost(myScore / bestScore);
        }

        try {
            query.add(tq, BooleanClause.Occur.SHOULD);
        } catch (BooleanQuery.TooManyClauses ignore) {
            break;
        }

        qterms++;
        if (maxQueryTerms > 0 && qterms >= maxQueryTerms) {
            break;
        }
    }

    return query;
}