Example usage for org.apache.lucene.search CollectionStatistics field

List of usage examples for org.apache.lucene.search CollectionStatistics field

Introduction

In this page you can find the example usage for org.apache.lucene.search CollectionStatistics field.

Prototype

String field

To view the source code for org.apache.lucene.search CollectionStatistics field.

Click Source Link

Usage

From source file:BM25LSimilarity.java

License:Apache License

@Override
public final SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) {
    Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0])
            : idfExplain(collectionStats, termStats);

    float avgdl = avgFieldLength(collectionStats);

    // compute freq-independent part of bm25 equation across all norm values
    float cache[] = new float[256];
    for (int i = 0; i < cache.length; i++) {
        cache[i] = k1 * (((1 - b) + b * decodeNormValue((byte) i) / avgdl) + delta);
    }/*  w w  w.j  a  va 2s .  c  om*/
    return new BM25Stats(collectionStats.field(), idf, avgdl, cache);
}

From source file:com.core.nlp.similarity.TFIDFSimilarity.java

License:Apache License

@Override
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats,
        TermStatistics... termStats) {/*from w ww. j  a  v a  2  s  .  co m*/
    final Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0])
            : idfExplain(collectionStats, termStats);
    return new IDFStats(collectionStats.field(), idf, queryBoost);
}

From source file:com.o19s.bm25f.BM25FSimilarity.java

License:Apache License

@Override
public final SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) {
    Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0])
            : idfExplain(collectionStats, termStats);

    float avgdl = avgFieldLength(collectionStats);

    // compute freq-independent part of bm25 equation across all norm values
    float cache[] = new float[256];
    for (int i = 0; i < cache.length; i++) {
        cache[i] = k1 * ((1 - b) + b * decodeNormValue((byte) i) / avgdl);
    }//from  w  w w  .  j  a  v  a  2  s .c om
    return new BM25Stats(collectionStats.field(), idf, avgdl, cache);
}

From source file:eu.europeana.ranking.bm25f.similarity.BM25FSimilarity.java

License:Apache License

@Override
public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats,
        TermStatistics... termStats) {/*from  www.ja v a2s.co  m*/
    Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0])
            : idfExplain(collectionStats, termStats);

    boosts = params.getBoosts();
    lengthBoosts = params.getbParams();
    k1 = params.getK1();

    String field = collectionStats.field();
    float avgdl = avgFieldLength(collectionStats);

    // ignoring query boost, using bm25f query boost
    float boost = 1;
    if (boosts.containsKey(field)) {
        boost = boosts.get(field);
    }

    // compute freq-independent part of bm25 equation across all norm values
    // float cache[] = new float[256];
    // for (int i = 0; i < cache.length; i++) {
    // cache[i] = ((1 - bField) + bField * decodeNormValue((byte) i)
    // / avgdl);
    // System.out.println("cache " + i + "\t" + cache[i]);
    // }

    return new BM25FSimWeight(field, idf, boost, avgdl, null, k1);
}

From source file:io.anserini.search.similarity.F2LogSimilarity.java

License:Apache License

@Override
public SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) {
    Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0])
            : idfExplain(collectionStats, termStats);

    float avgdl = avgFieldLength(collectionStats);

    // compute freq-independent part of f2log equation across all norm values
    float cache[] = new float[256];
    for (int i = 0; i < cache.length; i++) {
        cache[i] = s + s * decodeNormValue((byte) i) / avgdl;
    }/*from  w w  w.jav a  2 s. co m*/
    return new F2LogStats(collectionStats.field(), idf, avgdl, cache);
}

From source file:main.BM25VASimilarity.java

License:Apache License

@Override
public final SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) {
    Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0])
            : idfExplain(collectionStats, termStats);

    float avgdl = avgFieldLength(collectionStats);

    // compute freq-independent part of bm25 equation across all norm values
    float cache[] = new float[256];
    for (int i = 0; i < cache.length; i++) {
        //cache[i] = k1 * ((1 - b) + b * decodeNormValue((byte) i) / avgdl);
        //cache becomes cachePrime = B
        //cache[i] = ((1 - b) + b * decodeNormValue((byte) i) / avgdl);
        // B_VA/*  w  w  w  .java2 s .  c om*/
        //cache[i] = (1/(mavgtf * mavgtf) * decodeNormValue((byte) i) / Td) +
        //        ((1 - 1/mavgtf)*decodeNormValue((byte) i) / avgdl);

        //Cache should now only contain the length of doc d.
        cache[i] = decodeNormValue((byte) i);
    }
    return new BM25Stats(collectionStats.field(), idf, avgdl, cache);
}

From source file:org.apache.solr.search.stats.CollectionStats.java

License:Apache License

public CollectionStats(CollectionStatistics stats) {
    this.field = stats.field();
    this.maxDoc = stats.maxDoc();
    this.docCount = stats.docCount();
    this.sumTotalTermFreq = stats.sumTotalTermFreq();
    this.sumDocFreq = stats.sumDocFreq();
}

From source file:org.elasticsearch.index.similarity.ClosedSimilarity.java

@Override
public SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) {
    float idf = 0.0f;
    String desc = "Field: " + collectionStats.field() + " Terms: ";
    final long max = collectionStats.maxDoc();
    for (final TermStatistics stat : termStats) {
        final long df = stat.docFreq();
        final float termIdf = idf(df, max);
        idf += termIdf;//from   ww  w.  j a  v  a  2 s  .c  om
        log.info("Calculating term frequency: " + stat.term().utf8ToString() + " Value: " + df);
        desc += stat.term().utf8ToString() + " ";
    }
    log.info("Calculating term idf: " + desc + " Value: " + idf);
    ClosedSimWeight csw = new ClosedSimWeight(collectionStats.field(), idf, termStats);
    log.info("Calculating sim weight for field: " + csw.desc);
    return csw;
}

From source file:org.elasticsearch.index.similarity.PositionSimilarity.java

License:Apache License

@Override
public SimWeight computeWeight(CollectionStatistics collectionStatistics, TermStatistics... termStatisticses) {
    return new PositionStats(collectionStatistics.field(), termStatisticses);
}

From source file:org.elasticsearch.index.similarity.ScriptedSimilarity.java

License:Apache License

@Override
public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
    Query query = new Query(boost);
    long docCount = collectionStats.docCount();
    if (docCount == -1) {
        docCount = collectionStats.maxDoc();
    }/*from  www .j  a v  a  2  s .  c  o m*/
    Field field = new Field(docCount, collectionStats.sumDocFreq(), collectionStats.sumTotalTermFreq());
    Term[] terms = new Term[termStats.length];
    for (int i = 0; i < termStats.length; ++i) {
        terms[i] = new Term(termStats[i].docFreq(), termStats[i].totalTermFreq());
    }
    return new Weight(collectionStats.field(), query, field, terms);
}