List of usage examples for org.apache.lucene.search TermStatistics term
BytesRef term
To view the source code for org.apache.lucene.search TermStatistics term.
Click Source Link
From source file:com.xiaomi.linden.lucene.similarity.LindenSimilarity.java
License:Apache License
@Override public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) { final long df = termStats.docFreq(); final long max = collectionStats.maxDoc(); final float idf = idfManager.getIDF(termStats.term().utf8ToString()); return new Explanation(idf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"); }
From source file:org.apache.solr.search.stats.TermStats.java
License:Apache License
public TermStats(String field, TermStatistics stats) { this.term = field + ":" + stats.term().utf8ToString(); this.t = new Term(field, stats.term()); this.docFreq = stats.docFreq(); this.totalTermFreq = stats.totalTermFreq(); }
From source file:org.elasticsearch.action.search.SearchPhaseController.java
License:Apache License
public AggregatedDfs aggregateDfs(AtomicArray<DfsSearchResult> results) { ObjectObjectHashMap<Term, TermStatistics> termStatistics = HppcMaps.newNoNullKeysMap(); ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics = HppcMaps.newNoNullKeysMap(); long aggMaxDoc = 0; for (AtomicArray.Entry<DfsSearchResult> lEntry : results.asList()) { final Term[] terms = lEntry.value.terms(); final TermStatistics[] stats = lEntry.value.termStatistics(); assert terms.length == stats.length; for (int i = 0; i < terms.length; i++) { assert terms[i] != null; TermStatistics existing = termStatistics.get(terms[i]); if (existing != null) { assert terms[i].bytes().equals(existing.term()); // totalTermFrequency is an optional statistic we need to check if either one or both // are set to -1 which means not present and then set it globally to -1 termStatistics.put(terms[i], new TermStatistics(existing.term(), existing.docFreq() + stats[i].docFreq(), optionalSum(existing.totalTermFreq(), stats[i].totalTermFreq()))); } else { termStatistics.put(terms[i], stats[i]); }//from w w w .j a v a 2 s. c o m } assert !lEntry.value.fieldStatistics().containsKey(null); final Object[] keys = lEntry.value.fieldStatistics().keys; final Object[] values = lEntry.value.fieldStatistics().values; for (int i = 0; i < keys.length; i++) { if (keys[i] != null) { String key = (String) keys[i]; CollectionStatistics value = (CollectionStatistics) values[i]; assert key != null; CollectionStatistics existing = fieldStatistics.get(key); if (existing != null) { CollectionStatistics merged = new CollectionStatistics(key, existing.maxDoc() + value.maxDoc(), optionalSum(existing.docCount(), value.docCount()), optionalSum(existing.sumTotalTermFreq(), value.sumTotalTermFreq()), optionalSum(existing.sumDocFreq(), value.sumDocFreq())); fieldStatistics.put(key, merged); } else { fieldStatistics.put(key, value); } } } aggMaxDoc += lEntry.value.maxDoc(); } return new AggregatedDfs(termStatistics, fieldStatistics, aggMaxDoc); }
From source file:org.elasticsearch.index.similarity.ClosedSimilarity.java
@Override public SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) { float idf = 0.0f; String desc = "Field: " + collectionStats.field() + " Terms: "; final long max = collectionStats.maxDoc(); for (final TermStatistics stat : termStats) { final long df = stat.docFreq(); final float termIdf = idf(df, max); idf += termIdf;/*from w ww. j a v a 2 s . co m*/ log.info("Calculating term frequency: " + stat.term().utf8ToString() + " Value: " + df); desc += stat.term().utf8ToString() + " "; } log.info("Calculating term idf: " + desc + " Value: " + idf); ClosedSimWeight csw = new ClosedSimWeight(collectionStats.field(), idf, termStats); log.info("Calculating sim weight for field: " + csw.desc); return csw; }
From source file:org.elasticsearch.search.controller.SearchPhaseController.java
License:Apache License
public AggregatedDfs aggregateDfs(AtomicArray<DfsSearchResult> results) { ObjectObjectOpenHashMap<Term, TermStatistics> termStatistics = HppcMaps.newNoNullKeysMap(); ObjectObjectOpenHashMap<String, CollectionStatistics> fieldStatistics = HppcMaps.newNoNullKeysMap(); long aggMaxDoc = 0; for (AtomicArray.Entry<DfsSearchResult> lEntry : results.asList()) { final Term[] terms = lEntry.value.terms(); final TermStatistics[] stats = lEntry.value.termStatistics(); assert terms.length == stats.length; for (int i = 0; i < terms.length; i++) { assert terms[i] != null; TermStatistics existing = termStatistics.get(terms[i]); if (existing != null) { assert terms[i].bytes().equals(existing.term()); // totalTermFrequency is an optional statistic we need to check if either one or both // are set to -1 which means not present and then set it globally to -1 termStatistics.put(terms[i], new TermStatistics(existing.term(), existing.docFreq() + stats[i].docFreq(), optionalSum(existing.totalTermFreq(), stats[i].totalTermFreq()))); } else { termStatistics.put(terms[i], stats[i]); }//ww w.ja va 2 s . com } final boolean[] states = lEntry.value.fieldStatistics().allocated; final Object[] keys = lEntry.value.fieldStatistics().keys; final Object[] values = lEntry.value.fieldStatistics().values; for (int i = 0; i < states.length; i++) { if (states[i]) { String key = (String) keys[i]; CollectionStatistics value = (CollectionStatistics) values[i]; assert key != null; CollectionStatistics existing = fieldStatistics.get(key); if (existing != null) { CollectionStatistics merged = new CollectionStatistics(key, existing.maxDoc() + value.maxDoc(), optionalSum(existing.docCount(), value.docCount()), optionalSum(existing.sumTotalTermFreq(), value.sumTotalTermFreq()), optionalSum(existing.sumDocFreq(), value.sumDocFreq())); fieldStatistics.put(key, merged); } else { fieldStatistics.put(key, value); } } } aggMaxDoc += lEntry.value.maxDoc(); } return new AggregatedDfs(termStatistics, fieldStatistics, aggMaxDoc); }
From source file:org.elasticsearch.search.dfs.AggregatedDfs.java
License:Apache License
@Override public void writeTo(final StreamOutput out) throws IOException { out.writeVInt(termStatistics.size()); final boolean[] states = termStatistics.allocated; final Object[] keys = termStatistics.keys; final Object[] values = termStatistics.values; for (int i = 0; i < states.length; i++) { if (states[i]) { Term term = (Term) keys[i];/* w w w . j a va 2s . c o m*/ out.writeString(term.field()); out.writeBytesRef(term.bytes()); TermStatistics stats = (TermStatistics) values[i]; out.writeBytesRef(stats.term()); out.writeVLong(stats.docFreq()); out.writeVLong(DfsSearchResult.addOne(stats.totalTermFreq())); } } DfsSearchResult.writeFieldStats(out, fieldStatistics); out.writeVLong(maxDoc); }