Example usage for org.apache.lucene.search CollectionStatistics sumTotalTermFreq

List of usage examples for org.apache.lucene.search CollectionStatistics sumTotalTermFreq

Introduction

In this page you can find the example usage for org.apache.lucene.search CollectionStatistics sumTotalTermFreq.

Prototype

long sumTotalTermFreq

To view the source code for org.apache.lucene.search CollectionStatistics sumTotalTermFreq.

Click Source Link

Usage

From source file:BM25LSimilarity.java

License:Apache License

/**
 * The default implementation computes the average as
 * <code>sumTotalTermFreq / maxDoc</code>, or returns <code>1</code> if the
 * index does not store sumTotalTermFreq: any field that omits frequency
 * information)./*from  w w w  .  j  a  v  a  2s. com*/
 */
protected float avgFieldLength(CollectionStatistics collectionStats) {
    final long sumTotalTermFreq = collectionStats.sumTotalTermFreq();
    if (sumTotalTermFreq <= 0) {
        return 1f; // field does not exist, or stat is unsupported
    } else {
        return (float) (sumTotalTermFreq / (double) collectionStats.maxDoc());
    }
}

From source file:com.o19s.bm25f.BM25FSimilarity.java

License:Apache License

/** The default implementation computes the average as <code>sumTotalTermFreq / docCount</code>,
 * or returns <code>1</code> if the index does not store sumTotalTermFreq:
 * any field that omits frequency information). */
protected float avgFieldLength(CollectionStatistics collectionStats) {
    final long sumTotalTermFreq = collectionStats.sumTotalTermFreq();
    if (sumTotalTermFreq <= 0) {
        return 1f; // field does not exist, or stat is unsupported
    } else {// w  w  w .  ja v a 2s.  c  om
        final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc()
                : collectionStats.docCount();
        return (float) (sumTotalTermFreq / (double) docCount);
    }
}

From source file:eu.europeana.ranking.bm25f.similarity.BM25FSimilarity.java

License:Apache License

/**
 * Compute the average length for a field, given its stats.
 * //ww  w . jav  a 2 s . c  om
 * @param the
 *            length statistics of a field.
 * @return the average length of the field.
 */
private float avgFieldLength(CollectionStatistics stats) {
    // logger.info("sum total term freq \t {}", stats.sumTotalTermFreq());
    // logger.info("doc count \t {}", stats.docCount());
    return (float) stats.sumTotalTermFreq() / (float) stats.docCount();
}

From source file:org.apache.solr.search.stats.CollectionStats.java

License:Apache License

public CollectionStats(CollectionStatistics stats) {
    this.field = stats.field();
    this.maxDoc = stats.maxDoc();
    this.docCount = stats.docCount();
    this.sumTotalTermFreq = stats.sumTotalTermFreq();
    this.sumDocFreq = stats.sumDocFreq();
}

From source file:org.elasticsearch.action.search.SearchPhaseController.java

License:Apache License

public AggregatedDfs aggregateDfs(AtomicArray<DfsSearchResult> results) {
    ObjectObjectHashMap<Term, TermStatistics> termStatistics = HppcMaps.newNoNullKeysMap();
    ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics = HppcMaps.newNoNullKeysMap();
    long aggMaxDoc = 0;
    for (AtomicArray.Entry<DfsSearchResult> lEntry : results.asList()) {
        final Term[] terms = lEntry.value.terms();
        final TermStatistics[] stats = lEntry.value.termStatistics();
        assert terms.length == stats.length;
        for (int i = 0; i < terms.length; i++) {
            assert terms[i] != null;
            TermStatistics existing = termStatistics.get(terms[i]);
            if (existing != null) {
                assert terms[i].bytes().equals(existing.term());
                // totalTermFrequency is an optional statistic we need to check if either one or both
                // are set to -1 which means not present and then set it globally to -1
                termStatistics.put(terms[i],
                        new TermStatistics(existing.term(), existing.docFreq() + stats[i].docFreq(),
                                optionalSum(existing.totalTermFreq(), stats[i].totalTermFreq())));
            } else {
                termStatistics.put(terms[i], stats[i]);
            }/*ww  w  . j  ava2 s. c  o  m*/

        }

        assert !lEntry.value.fieldStatistics().containsKey(null);
        final Object[] keys = lEntry.value.fieldStatistics().keys;
        final Object[] values = lEntry.value.fieldStatistics().values;
        for (int i = 0; i < keys.length; i++) {
            if (keys[i] != null) {
                String key = (String) keys[i];
                CollectionStatistics value = (CollectionStatistics) values[i];
                assert key != null;
                CollectionStatistics existing = fieldStatistics.get(key);
                if (existing != null) {
                    CollectionStatistics merged = new CollectionStatistics(key,
                            existing.maxDoc() + value.maxDoc(),
                            optionalSum(existing.docCount(), value.docCount()),
                            optionalSum(existing.sumTotalTermFreq(), value.sumTotalTermFreq()),
                            optionalSum(existing.sumDocFreq(), value.sumDocFreq()));
                    fieldStatistics.put(key, merged);
                } else {
                    fieldStatistics.put(key, value);
                }
            }
        }
        aggMaxDoc += lEntry.value.maxDoc();
    }
    return new AggregatedDfs(termStatistics, fieldStatistics, aggMaxDoc);
}

From source file:org.elasticsearch.action.termvectors.TermVectorsWriter.java

License:Apache License

private void writeFieldStatistics(CollectionStatistics fieldStats) throws IOException {
    long sttf = fieldStats.sumTotalTermFreq();
    assert (sttf >= -1);
    writePotentiallyNegativeVLong(sttf);
    long sdf = fieldStats.sumDocFreq();
    assert (sdf >= -1);
    writePotentiallyNegativeVLong(sdf);/*from  w  w w  . j a v a 2  s.c  o  m*/
    int dc = (int) fieldStats.docCount();
    assert (dc >= -1);
    writePotentiallyNegativeVInt(dc);
}

From source file:org.elasticsearch.index.similarity.ScriptedSimilarity.java

License:Apache License

@Override
public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
    Query query = new Query(boost);
    long docCount = collectionStats.docCount();
    if (docCount == -1) {
        docCount = collectionStats.maxDoc();
    }/*from   www .j  a  v  a  2  s .c om*/
    Field field = new Field(docCount, collectionStats.sumDocFreq(), collectionStats.sumTotalTermFreq());
    Term[] terms = new Term[termStats.length];
    for (int i = 0; i < termStats.length; ++i) {
        terms[i] = new Term(termStats[i].docFreq(), termStats[i].totalTermFreq());
    }
    return new Weight(collectionStats.field(), query, field, terms);
}

From source file:org.elasticsearch.search.controller.SearchPhaseController.java

License:Apache License

public AggregatedDfs aggregateDfs(AtomicArray<DfsSearchResult> results) {
    ObjectObjectOpenHashMap<Term, TermStatistics> termStatistics = HppcMaps.newNoNullKeysMap();
    ObjectObjectOpenHashMap<String, CollectionStatistics> fieldStatistics = HppcMaps.newNoNullKeysMap();
    long aggMaxDoc = 0;
    for (AtomicArray.Entry<DfsSearchResult> lEntry : results.asList()) {
        final Term[] terms = lEntry.value.terms();
        final TermStatistics[] stats = lEntry.value.termStatistics();
        assert terms.length == stats.length;
        for (int i = 0; i < terms.length; i++) {
            assert terms[i] != null;
            TermStatistics existing = termStatistics.get(terms[i]);
            if (existing != null) {
                assert terms[i].bytes().equals(existing.term());
                // totalTermFrequency is an optional statistic we need to check if either one or both
                // are set to -1 which means not present and then set it globally to -1
                termStatistics.put(terms[i],
                        new TermStatistics(existing.term(), existing.docFreq() + stats[i].docFreq(),
                                optionalSum(existing.totalTermFreq(), stats[i].totalTermFreq())));
            } else {
                termStatistics.put(terms[i], stats[i]);
            }//from   w  w  w . j  a v  a2 s .co  m

        }
        final boolean[] states = lEntry.value.fieldStatistics().allocated;
        final Object[] keys = lEntry.value.fieldStatistics().keys;
        final Object[] values = lEntry.value.fieldStatistics().values;
        for (int i = 0; i < states.length; i++) {
            if (states[i]) {
                String key = (String) keys[i];
                CollectionStatistics value = (CollectionStatistics) values[i];
                assert key != null;
                CollectionStatistics existing = fieldStatistics.get(key);
                if (existing != null) {
                    CollectionStatistics merged = new CollectionStatistics(key,
                            existing.maxDoc() + value.maxDoc(),
                            optionalSum(existing.docCount(), value.docCount()),
                            optionalSum(existing.sumTotalTermFreq(), value.sumTotalTermFreq()),
                            optionalSum(existing.sumDocFreq(), value.sumDocFreq()));
                    fieldStatistics.put(key, merged);
                } else {
                    fieldStatistics.put(key, value);
                }
            }
        }
        aggMaxDoc += lEntry.value.maxDoc();
    }
    return new AggregatedDfs(termStatistics, fieldStatistics, aggMaxDoc);
}

From source file:org.elasticsearch.search.dfs.DfsSearchResult.java

License:Apache License

public static void writeFieldStats(StreamOutput out,
        ObjectObjectOpenHashMap<String, CollectionStatistics> fieldStatistics) throws IOException {
    out.writeVInt(fieldStatistics.size());
    final boolean[] states = fieldStatistics.allocated;
    Object[] keys = fieldStatistics.keys;
    Object[] values = fieldStatistics.values;
    for (int i = 0; i < states.length; i++) {
        if (states[i]) {
            out.writeString((String) keys[i]);
            CollectionStatistics statistics = (CollectionStatistics) values[i];
            assert statistics.maxDoc() >= 0;
            out.writeVLong(statistics.maxDoc());
            out.writeVLong(addOne(statistics.docCount()));
            out.writeVLong(addOne(statistics.sumTotalTermFreq()));
            out.writeVLong(addOne(statistics.sumDocFreq()));
        }/* w  w w.  j  av a  2s .co  m*/
    }
}