Example usage for org.apache.lucene.search TermStatistics term

List of usage examples for org.apache.lucene.search TermStatistics term

Introduction

In this page you can find the example usage for org.apache.lucene.search TermStatistics term.

Prototype

BytesRef term

To view the source code for org.apache.lucene.search TermStatistics term.

Click Source Link

Usage

From source file:com.xiaomi.linden.lucene.similarity.LindenSimilarity.java

License:Apache License

@Override
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
    final long df = termStats.docFreq();
    final long max = collectionStats.maxDoc();
    final float idf = idfManager.getIDF(termStats.term().utf8ToString());
    return new Explanation(idf, "idf(docFreq=" + df + ", maxDocs=" + max + ")");
}

From source file:org.apache.solr.search.stats.TermStats.java

License:Apache License

public TermStats(String field, TermStatistics stats) {
    this.term = field + ":" + stats.term().utf8ToString();
    this.t = new Term(field, stats.term());
    this.docFreq = stats.docFreq();
    this.totalTermFreq = stats.totalTermFreq();
}

From source file:org.elasticsearch.action.search.SearchPhaseController.java

License:Apache License

public AggregatedDfs aggregateDfs(AtomicArray<DfsSearchResult> results) {
    ObjectObjectHashMap<Term, TermStatistics> termStatistics = HppcMaps.newNoNullKeysMap();
    ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics = HppcMaps.newNoNullKeysMap();
    long aggMaxDoc = 0;
    for (AtomicArray.Entry<DfsSearchResult> lEntry : results.asList()) {
        final Term[] terms = lEntry.value.terms();
        final TermStatistics[] stats = lEntry.value.termStatistics();
        assert terms.length == stats.length;
        for (int i = 0; i < terms.length; i++) {
            assert terms[i] != null;
            TermStatistics existing = termStatistics.get(terms[i]);
            if (existing != null) {
                assert terms[i].bytes().equals(existing.term());
                // totalTermFrequency is an optional statistic we need to check if either one or both
                // are set to -1 which means not present and then set it globally to -1
                termStatistics.put(terms[i],
                        new TermStatistics(existing.term(), existing.docFreq() + stats[i].docFreq(),
                                optionalSum(existing.totalTermFreq(), stats[i].totalTermFreq())));
            } else {
                termStatistics.put(terms[i], stats[i]);
            }//from w  w w  .j  a v a  2  s.  c o  m

        }

        assert !lEntry.value.fieldStatistics().containsKey(null);
        final Object[] keys = lEntry.value.fieldStatistics().keys;
        final Object[] values = lEntry.value.fieldStatistics().values;
        for (int i = 0; i < keys.length; i++) {
            if (keys[i] != null) {
                String key = (String) keys[i];
                CollectionStatistics value = (CollectionStatistics) values[i];
                assert key != null;
                CollectionStatistics existing = fieldStatistics.get(key);
                if (existing != null) {
                    CollectionStatistics merged = new CollectionStatistics(key,
                            existing.maxDoc() + value.maxDoc(),
                            optionalSum(existing.docCount(), value.docCount()),
                            optionalSum(existing.sumTotalTermFreq(), value.sumTotalTermFreq()),
                            optionalSum(existing.sumDocFreq(), value.sumDocFreq()));
                    fieldStatistics.put(key, merged);
                } else {
                    fieldStatistics.put(key, value);
                }
            }
        }
        aggMaxDoc += lEntry.value.maxDoc();
    }
    return new AggregatedDfs(termStatistics, fieldStatistics, aggMaxDoc);
}

From source file:org.elasticsearch.index.similarity.ClosedSimilarity.java

@Override
public SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) {
    float idf = 0.0f;
    String desc = "Field: " + collectionStats.field() + " Terms: ";
    final long max = collectionStats.maxDoc();
    for (final TermStatistics stat : termStats) {
        final long df = stat.docFreq();
        final float termIdf = idf(df, max);
        idf += termIdf;/*from w  ww. j  a  v  a 2 s  . co  m*/
        log.info("Calculating term frequency: " + stat.term().utf8ToString() + " Value: " + df);
        desc += stat.term().utf8ToString() + " ";
    }
    log.info("Calculating term idf: " + desc + " Value: " + idf);
    ClosedSimWeight csw = new ClosedSimWeight(collectionStats.field(), idf, termStats);
    log.info("Calculating sim weight for field: " + csw.desc);
    return csw;
}

From source file:org.elasticsearch.search.controller.SearchPhaseController.java

License:Apache License

public AggregatedDfs aggregateDfs(AtomicArray<DfsSearchResult> results) {
    ObjectObjectOpenHashMap<Term, TermStatistics> termStatistics = HppcMaps.newNoNullKeysMap();
    ObjectObjectOpenHashMap<String, CollectionStatistics> fieldStatistics = HppcMaps.newNoNullKeysMap();
    long aggMaxDoc = 0;
    for (AtomicArray.Entry<DfsSearchResult> lEntry : results.asList()) {
        final Term[] terms = lEntry.value.terms();
        final TermStatistics[] stats = lEntry.value.termStatistics();
        assert terms.length == stats.length;
        for (int i = 0; i < terms.length; i++) {
            assert terms[i] != null;
            TermStatistics existing = termStatistics.get(terms[i]);
            if (existing != null) {
                assert terms[i].bytes().equals(existing.term());
                // totalTermFrequency is an optional statistic we need to check if either one or both
                // are set to -1 which means not present and then set it globally to -1
                termStatistics.put(terms[i],
                        new TermStatistics(existing.term(), existing.docFreq() + stats[i].docFreq(),
                                optionalSum(existing.totalTermFreq(), stats[i].totalTermFreq())));
            } else {
                termStatistics.put(terms[i], stats[i]);
            }//ww  w.ja va 2  s .  com

        }
        final boolean[] states = lEntry.value.fieldStatistics().allocated;
        final Object[] keys = lEntry.value.fieldStatistics().keys;
        final Object[] values = lEntry.value.fieldStatistics().values;
        for (int i = 0; i < states.length; i++) {
            if (states[i]) {
                String key = (String) keys[i];
                CollectionStatistics value = (CollectionStatistics) values[i];
                assert key != null;
                CollectionStatistics existing = fieldStatistics.get(key);
                if (existing != null) {
                    CollectionStatistics merged = new CollectionStatistics(key,
                            existing.maxDoc() + value.maxDoc(),
                            optionalSum(existing.docCount(), value.docCount()),
                            optionalSum(existing.sumTotalTermFreq(), value.sumTotalTermFreq()),
                            optionalSum(existing.sumDocFreq(), value.sumDocFreq()));
                    fieldStatistics.put(key, merged);
                } else {
                    fieldStatistics.put(key, value);
                }
            }
        }
        aggMaxDoc += lEntry.value.maxDoc();
    }
    return new AggregatedDfs(termStatistics, fieldStatistics, aggMaxDoc);
}

From source file:org.elasticsearch.search.dfs.AggregatedDfs.java

License:Apache License

@Override
public void writeTo(final StreamOutput out) throws IOException {
    out.writeVInt(termStatistics.size());
    final boolean[] states = termStatistics.allocated;
    final Object[] keys = termStatistics.keys;
    final Object[] values = termStatistics.values;
    for (int i = 0; i < states.length; i++) {
        if (states[i]) {
            Term term = (Term) keys[i];/* w  w  w . j  a va 2s .  c  o m*/
            out.writeString(term.field());
            out.writeBytesRef(term.bytes());
            TermStatistics stats = (TermStatistics) values[i];
            out.writeBytesRef(stats.term());
            out.writeVLong(stats.docFreq());
            out.writeVLong(DfsSearchResult.addOne(stats.totalTermFreq()));
        }
    }
    DfsSearchResult.writeFieldStats(out, fieldStatistics);
    out.writeVLong(maxDoc);
}