List of usage examples for org.apache.lucene.analysis CharArraySet iterator
@Override @SuppressWarnings("unchecked") public Iterator<Object> iterator()
From source file:org.tallison.lucene.contrast.QueryToCorpusContraster.java
License:Apache License
public List<TermIDF> contrast(Query query, String fieldName, int numResults) throws IOException { TopScoreDocCollector results = TopScoreDocCollector.create(maxDocs, maxDocs + 10000); searcher.search(query, results);/*from www .ja v a 2 s.c o m*/ ScoreDoc[] scoreDocs = results.topDocs().scoreDocs; //if there are fewer documents than minTermFreq //return empty list now if (scoreDocs.length < minTermFreq) { return new ArrayList<TermIDF>(); } //total hack int initialSize = scoreDocs.length * 100; CharArrayMap<MutableValueInt> map = new CharArrayMap<MutableValueInt>(initialSize, ignoreCase); CharArraySet tmpSet = new CharArraySet(100, ignoreCase); Set<String> selector = new HashSet<String>(); selector.add(fieldName); for (ScoreDoc scoreDoc : scoreDocs) { //get terms from doc processDoc(scoreDoc.doc, fieldName, selector, tmpSet); //now update global doc freqs Iterator<Object> it = tmpSet.iterator(); while (it.hasNext()) { char[] token = (char[]) it.next(); MutableValueInt docCount = map.get(token, 0, token.length); if (docCount == null) { docCount = new MutableValueInt(); docCount.value = 1; } else { docCount.value++; } map.put(token, docCount); } tmpSet.clear(); } return getResults(fieldName, map, numResults); }