List of usage examples for org.apache.commons.collections15.comparators ReverseComparator ReverseComparator
public ReverseComparator(Comparator<T> comparator)
From source file:net.sf.jtmt.summarizers.LuceneSummarizer.java
/** * Compute top term query./*from ww w .ja v a 2 s. co m*/ * * @param ramdir the ramdir * @return the query * @throws Exception the exception */ private Query computeTopTermQuery(Directory ramdir) throws Exception { final Map<String, Integer> frequencyMap = new HashMap<String, Integer>(); List<String> termlist = new ArrayList<String>(); IndexReader reader = IndexReader.open(ramdir, true); TermEnum terms = reader.terms(); while (terms.next()) { Term term = terms.term(); String termText = term.text(); int frequency = reader.docFreq(term); frequencyMap.put(termText, frequency); termlist.add(termText); } reader.close(); // sort the term map by frequency descending Collections.sort(termlist, new ReverseComparator<String>(new ByValueComparator<String, Integer>(frequencyMap))); // retrieve the top terms based on topTermCutoff List<String> topTerms = new ArrayList<String>(); float topFreq = -1.0F; for (String term : termlist) { if (topFreq < 0.0F) { // first term, capture the value topFreq = (float) frequencyMap.get(term); topTerms.add(term); } else { // not the first term, compute the ratio and discard if below // topTermCutoff score float ratio = (float) ((float) frequencyMap.get(term) / topFreq); if (ratio >= topTermCutoff) { topTerms.add(term); } else { break; } } } StringBuilder termBuf = new StringBuilder(); BooleanQuery q = new BooleanQuery(); for (String topTerm : topTerms) { termBuf.append(topTerm).append("(").append(frequencyMap.get(topTerm)).append(");"); q.add(new TermQuery(new Term("text", topTerm)), Occur.SHOULD); } System.out.println(">>> top terms: " + termBuf.toString()); System.out.println(">>> query: " + q.toString()); return q; }