List of usage examples for org.apache.lucene.util PriorityQueue pop
public final T pop()
From source file:alix.lucene.MoreLikeThis.java
License:Apache License
/** * Create the More like query from a PriorityQueue *//*from ww w .j av a 2 s . co m*/ private Query createQuery(PriorityQueue<ScoreTerm> q) { BooleanQuery.Builder query = new BooleanQuery.Builder(); ScoreTerm scoreTerm; float bestScore = -1; while ((scoreTerm = q.pop()) != null) { Query tq = new TermQuery(new Term(scoreTerm.topField, scoreTerm.word)); if (boost) { if (bestScore == -1) { bestScore = (scoreTerm.score); } float myScore = (scoreTerm.score); tq = new BoostQuery(tq, boostFactor * myScore / bestScore); } try { query.add(tq, BooleanClause.Occur.SHOULD); } catch (BooleanQuery.TooManyClauses ignore) { break; } } return query.build(); }
From source file:alix.lucene.MoreLikeThis.java
License:Apache License
/** * @see #retrieveInterestingTerms(java.io.Reader, String) *//*from w ww.j a v a2 s. c om*/ public String[] retrieveInterestingTerms(int docNum) throws IOException { ArrayList<Object> al = new ArrayList<>(maxQueryTerms); PriorityQueue<ScoreTerm> pq = retrieveTerms(docNum); ScoreTerm scoreTerm; int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller... // we just want to return the top words while (((scoreTerm = pq.pop()) != null) && lim-- > 0) { al.add(scoreTerm.word); // the 1st entry is the interesting word } String[] res = new String[al.size()]; return al.toArray(res); }
From source file:alix.lucene.MoreLikeThis.java
License:Apache License
/** * Convenience routine to make it easy to return the most interesting words in a document. * More advanced users will call {@link #retrieveTerms(Reader, String) retrieveTerms()} directly. * * @param r the source document/*w ww. ja v a 2 s. c o m*/ * @param fieldName field passed to analyzer to use when analyzing the content * @return the most interesting words in the document * @see #retrieveTerms(java.io.Reader, String) * @see #setMaxQueryTerms */ public String[] retrieveInterestingTerms(Reader r, String fieldName) throws IOException { ArrayList<Object> al = new ArrayList<>(maxQueryTerms); PriorityQueue<ScoreTerm> pq = retrieveTerms(r, fieldName); ScoreTerm scoreTerm; int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller... // we just want to return the top words while (((scoreTerm = pq.pop()) != null) && lim-- > 0) { al.add(scoreTerm.word); // the 1st entry is the interesting word } String[] res = new String[al.size()]; return al.toArray(res); }
From source file:com.browseengine.bobo.facets.CombinedFacetAccessible.java
License:Apache License
public List<BrowseFacet> getFacets() { if (_closed) { throw new IllegalStateException("This instance of count collector was already closed"); }/*from w w w . j av a 2 s. c o m*/ int maxCnt = _fspec.getMaxCount(); if (maxCnt <= 0) maxCnt = Integer.MAX_VALUE; int minHits = _fspec.getMinHitCount(); LinkedList<BrowseFacet> list = new LinkedList<BrowseFacet>(); int cnt = 0; Comparable facet = null; FacetIterator iter = (FacetIterator) this.iterator(); Comparator<BrowseFacet> comparator; if (FacetSortSpec.OrderValueAsc.equals(_fspec.getOrderBy())) { while ((facet = iter.next(minHits)) != null) { // find the next facet whose combined hit count obeys minHits list.add(new BrowseFacet(String.valueOf(facet), iter.count)); if (++cnt >= maxCnt) break; } } else if (FacetSortSpec.OrderHitsDesc.equals(_fspec.getOrderBy())) { comparator = new Comparator<BrowseFacet>() { public int compare(BrowseFacet f1, BrowseFacet f2) { int val = f2.getHitCount() - f1.getHitCount(); if (val == 0) { val = (f1.getValue().compareTo(f2.getValue())); } return val; } }; if (maxCnt != Integer.MAX_VALUE) { // we will maintain a min heap of size maxCnt // Order by hits in descending order and max count is supplied PriorityQueue queue = createPQ(maxCnt, comparator); int qsize = 0; while ((qsize < maxCnt) && ((facet = iter.next(minHits)) != null)) { queue.add(new BrowseFacet(String.valueOf(facet), iter.count)); qsize++; } if (facet != null) { BrowseFacet rootFacet = (BrowseFacet) queue.top(); minHits = rootFacet.getHitCount() + 1; // facet count less than top of min heap, it will never be added while (((facet = iter.next(minHits)) != null)) { rootFacet.setValue(String.valueOf(facet)); rootFacet.setHitCount(iter.count); rootFacet = (BrowseFacet) queue.updateTop(); minHits = rootFacet.getHitCount() + 1; } } // at this point, queue contains top maxCnt facets that have hitcount >= minHits while (qsize-- > 0) { // append each entry to the beginning of the facet list to order facets by hits descending list.addFirst((BrowseFacet) queue.pop()); } } else { // no maxCnt specified. So fetch all facets according to minHits and sort them later while ((facet = iter.next(minHits)) != null) list.add(new BrowseFacet(String.valueOf(facet), iter.count)); Collections.sort(list, comparator); } } else // FacetSortSpec.OrderByCustom.equals(_fspec.getOrderBy() { comparator = _fspec.getCustomComparatorFactory().newComparator(); if (maxCnt != Integer.MAX_VALUE) { PriorityQueue queue = createPQ(maxCnt, comparator); BrowseFacet browseFacet = new BrowseFacet(); int qsize = 0; while ((qsize < maxCnt) && ((facet = iter.next(minHits)) != null)) { queue.add(new BrowseFacet(String.valueOf(facet), iter.count)); qsize++; } if (facet != null) { while ((facet = iter.next(minHits)) != null) { // check with the top of min heap browseFacet.setHitCount(iter.count); browseFacet.setValue(String.valueOf(facet)); browseFacet = (BrowseFacet) queue.insertWithOverflow(browseFacet); } } // remove from queue and add to the list while (qsize-- > 0) list.addFirst((BrowseFacet) queue.pop()); } else { // order by custom but no max count supplied while ((facet = iter.next(minHits)) != null) list.add(new BrowseFacet(String.valueOf(facet), iter.count)); Collections.sort(list, comparator); } } return list; }
From source file:com.core.nlp.query.MoreLikeThis.java
License:Apache License
/** * Create the More like query from a PriorityQueue */// w w w .j a v a 2 s . c o m private Query createQuery(PriorityQueue<ScoreTerm> q) { BooleanQuery query = new BooleanQuery(); ScoreTerm scoreTerm; float bestScore = -1; while ((scoreTerm = q.pop()) != null) { TermQuery tq = new TermQuery(new Term(scoreTerm.topField, scoreTerm.word)); if (boost) { if (bestScore == -1) { bestScore = (scoreTerm.score); } float myScore = (scoreTerm.score); tq.setBoost(boostFactor * myScore / bestScore); } try { query.add(tq, BooleanClause.Occur.SHOULD); } catch (BooleanQuery.TooManyClauses ignore) { break; } } return query; }
From source file:io.ssc.relationdiscovery.KMeans.java
License:Open Source License
public void printClosestPoints(int centroidIndex, int howMany, OpenIntObjectHashMap<String> patterns) { PriorityQueue<PatternWithDistance> queue = new PriorityQueue<PatternWithDistance>(howMany) { @Override/*w ww. j a va 2 s . c o m*/ protected boolean lessThan(PatternWithDistance a, PatternWithDistance b) { return a.distance < b.distance; } }; Vector centroid = centroids[centroidIndex]; for (MatrixSlice rowSlice : A) { Vector row = rowSlice.vector(); double distance = distanceMeasure.distance(centroid, row); queue.insertWithOverflow(new PatternWithDistance(distance, patterns.get(rowSlice.index()))); } while (queue.size() > 0) { System.out.println("\t" + queue.pop()); } }
From source file:net.dataninja.ee.textEngine.facet.GroupCounts.java
License:Open Source License
/** Construct the array of doc hits for the hit group. */ private void buildDocHits(int group, ResultGroup resultGroup) { PriorityQueue queue = hitQueue[group]; int nFound = queue.size(); DocHitImpl[] hitArray = new DocHitImpl[nFound]; for (int i = 0; i < nFound; i++) { int index = nFound - i - 1; hitArray[index] = (DocHitImpl) queue.pop(); }/*ww w . j ava 2 s . c o m*/ int start = startDoc[group]; int max = maxDocs[group]; int nHits = Math.max(0, Math.min(nFound - start, max)); resultGroup.docHits = new DocHit[nHits]; resultGroup.totalDocs = nDocHits(group); resultGroup.startDoc = start; resultGroup.endDoc = start + nHits; for (int i = startDoc[group]; i < nFound; i++) resultGroup.docHits[i - start] = hitArray[i]; }
From source file:net.dataninja.ee.textEngine.MoreLikeThisQuery.java
License:Apache License
/** * Create the More like query from a PriorityQueue *///from w w w . j a v a2s . co m private Query createQuery(IndexReader indexReader, PriorityQueue q) throws IOException { // Pop everything from the queue. QueryWord[] queryWords = new QueryWord[q.size()]; for (int i = q.size() - 1; i >= 0; i--) queryWords[i] = (QueryWord) q.pop(); BooleanQuery query = new BooleanQuery(true /*disable coord*/); // At the moment, there's no need to scale by the best score. It simply // clouds the query explanation. It doesn't affect the scores, since // Lucene applies a query normalization factor anyway. // //float bestScore = (queryWords.length > 0) ? queryWords[0].score : 0.0f; for (int i = 0; i < fieldNames.length; i++) { ArrayList fieldClauses = new ArrayList(); for (int j = 0; j < queryWords.length; j++) { QueryWord qw = queryWords[j]; Term term = new Term(fieldNames[i], qw.word); // Skip words not present in this field. int docFreq = indexReader.docFreq(term); if (docFreq == 0) continue; // Add it to the query. SpanTermQuery tq = new SpanTermQuery(term); if (boost) tq.setBoost(qw.score); fieldClauses.add(tq); } // for j // If no terms for this field, skip it. if (fieldClauses.isEmpty()) continue; SpanQuery[] clauses = (SpanQuery[]) fieldClauses.toArray(new SpanQuery[fieldClauses.size()]); // Now make a special Or-Near query out of the clauses. SpanOrNearQuery fieldQuery = new SpanOrNearQuery(clauses, 10, false); // Boost if necessary. if (fieldBoosts != null) fieldQuery.setBoost(fieldBoosts[i]); // We currently don't support more-like-this queries on the full text. // It would involve de-chunking, and also fancier logic to pick the // "most interesting" terms in the first place. // if (fieldNames[i].equals("text")) throw new RuntimeException("MoreLikeThisQuery does not support 'text' field."); // And add to the main query. query.add(fieldQuery, BooleanClause.Occur.SHOULD); } // for i // All done. return query; }
From source file:org.apache.jackrabbit.core.query.lucene.WeightedHighlighter.java
License:Apache License
protected String mergeFragments(TermVectorOffsetInfo[] offsets, String text, String excerptStart, String excerptEnd, String fragmentStart, String fragmentEnd, String hlStart, String hlEnd, int maxFragments, int surround) { if (offsets == null || offsets.length == 0) { // nothing to highlight StringBuffer excerpt = new StringBuffer(excerptStart); excerpt.append(fragmentStart);//w w w. j a va 2s . co m int min = excerpt.length(); excerpt.append(text.substring(0, Math.min(text.length(), surround * 2))); if (text.length() > excerpt.length()) { for (int i = excerpt.length() - 1; i > min; i--) { if (Character.isWhitespace(excerpt.charAt(i))) { excerpt.delete(i, excerpt.length()); excerpt.append(" ..."); break; } } } excerpt.append(fragmentEnd).append(excerptEnd); return excerpt.toString(); } PriorityQueue bestFragments = new FragmentInfoPriorityQueue(maxFragments); for (int i = 0; i < offsets.length; i++) { FragmentInfo fi = new FragmentInfo(offsets[i], surround * 2); for (int j = i + 1; j < offsets.length; j++) { if (!fi.add(offsets[j], text)) { break; } } bestFragments.insert(fi); } // retrieve fragment infos from queue and fill into list, least // fragment comes out first List infos = new LinkedList(); while (bestFragments.size() > 0) { FragmentInfo fi = (FragmentInfo) bestFragments.pop(); infos.add(0, fi); } Map offsetInfos = new IdentityHashMap(); // remove overlapping fragment infos for (Iterator it = infos.iterator(); it.hasNext();) { FragmentInfo fi = (FragmentInfo) it.next(); boolean overlap = false; for (Iterator fit = fi.iterator(); fit.hasNext() && !overlap;) { TermVectorOffsetInfo oi = (TermVectorOffsetInfo) fit.next(); if (offsetInfos.containsKey(oi)) { overlap = true; } } if (overlap) { it.remove(); } else { for (Iterator oit = fi.iterator(); oit.hasNext();) { offsetInfos.put(oit.next(), null); } } } // create excerpts StringBuffer sb = new StringBuffer(excerptStart); for (Iterator it = infos.iterator(); it.hasNext();) { FragmentInfo fi = (FragmentInfo) it.next(); sb.append(fragmentStart); int limit = Math.max(0, fi.getStartOffset() / 2 + fi.getEndOffset() / 2 - surround); int len = startFragment(sb, text, fi.getStartOffset(), limit); TermVectorOffsetInfo lastOffsetInfo = null; for (Iterator fIt = fi.iterator(); fIt.hasNext();) { TermVectorOffsetInfo oi = (TermVectorOffsetInfo) fIt.next(); if (lastOffsetInfo != null) { // fill in text between terms sb.append(text.substring(lastOffsetInfo.getEndOffset(), oi.getStartOffset())); } sb.append(hlStart); sb.append(text.substring(oi.getStartOffset(), oi.getEndOffset())); sb.append(hlEnd); lastOffsetInfo = oi; } limit = Math.min(text.length(), fi.getStartOffset() - len + (surround * 2)); endFragment(sb, text, fi.getEndOffset(), limit); sb.append(fragmentEnd); } sb.append(excerptEnd); return sb.toString(); }
From source file:org.apache.jackrabbit.core.query.lucene.MoreLikeThis.java
License:Apache License
/** * Create the More like query from a PriorityQueue *//*w ww.j av a2 s . c om*/ private Query createQuery(PriorityQueue q) { BooleanQuery query = new BooleanQuery(); Object cur; int qterms = 0; float bestScore = 0; while (((cur = q.pop()) != null)) { Object[] ar = (Object[]) cur; TermQuery tq = new JackrabbitTermQuery(new Term((String) ar[1], (String) ar[0])); if (boost) { if (qterms == 0) { bestScore = ((Float) ar[2]).floatValue(); } float myScore = ((Float) ar[2]).floatValue(); tq.setBoost(myScore / bestScore); } try { query.add(tq, BooleanClause.Occur.SHOULD); } catch (BooleanQuery.TooManyClauses ignore) { break; } qterms++; if (maxQueryTerms > 0 && qterms >= maxQueryTerms) { break; } } return query; }