Example usage for org.apache.lucene.search Explanation match

List of usage examples for org.apache.lucene.search Explanation match

Introduction

In this page you can find the example usage for org.apache.lucene.search Explanation match.

Prototype

boolean match

To view the source code for org.apache.lucene.search Explanation match.

Click Source Link

Usage

From source file:BM25LSimilarity.java

License:Apache License

/**
 * Computes a score factor for a simple term and returns an explanation for
 * that score factor.//from w w  w. j  av  a  2  s . com
 *
 * <p>
 * The default implementation uses:
 *
 * <pre class="prettyprint">
 * idf(docFreq, searcher.maxDoc());
 * </pre>
 *
 * Note that {@link CollectionStatistics#maxDoc()} is used instead of
 * {@link org.apache.lucene.index.IndexReader#numDocs() IndexReader#numDocs()}
 * because also {@link TermStatistics#docFreq()} is used, and when the
 * latter is inaccurate, so is {@link CollectionStatistics#maxDoc()}, and in
 * the same direction. In addition, {@link CollectionStatistics#maxDoc()} is
 * more efficient to compute
 *
 * @param collectionStats collection-level statistics
 * @param termStats term-level statistics for the term
 * @return an Explain object that includes both an idf score factor and an
 * explanation for the term.
 */
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
    final long df = termStats.docFreq();
    final long max = collectionStats.maxDoc();
    final float idf = idf(df, max);
    return Explanation.match(idf, "idf(docFreq=" + df + ", maxDocs=" + max + ")");
}

From source file:BM25LSimilarity.java

License:Apache License

/**
 * Computes a score factor for a phrase.
 *
 * <p>/*from   ww  w .j  a v  a2s.  com*/
 * The default implementation sums the idf factor for each term in the
 * phrase.
 *
 * @param collectionStats collection-level statistics
 * @param termStats term-level statistics for the terms in the phrase
 * @return an Explain object that includes both an idf score factor for the
 * phrase and an explanation for each term.
 */
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
    final long max = collectionStats.maxDoc();
    float idf = 0.0f;
    List<Explanation> details = new ArrayList<>();
    for (final TermStatistics stat : termStats) {
        final long df = stat.docFreq();
        final float termIdf = idf(df, max);
        details.add(Explanation.match(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"));
        idf += termIdf;
    }
    return Explanation.match(idf, "idf(), sum of:", details);
}

From source file:BM25LSimilarity.java

License:Apache License

private Explanation explainTFNorm(int doc, Explanation freq, BM25Stats stats, NumericDocValues norms) {
    List<Explanation> subs = new ArrayList<>();
    subs.add(freq);/*  w w  w .jav  a  2  s .  c  o m*/
    subs.add(Explanation.match(k1, "parameter k1"));
    if (norms == null) {
        subs.add(Explanation.match(0, "parameter b (norms omitted for field)"));
        return Explanation.match((freq.getValue() * (k1 + 1)) / (freq.getValue() + k1),
                "tfNorm, computed from:", subs);
    } else {
        float doclen = decodeNormValue((byte) norms.get(doc));
        subs.add(Explanation.match(b, "parameter b"));
        subs.add(Explanation.match(stats.avgdl, "avgFieldLength"));
        subs.add(Explanation.match(doclen, "fieldLength"));
        return Explanation.match(
                (freq.getValue() * (k1 + 1)) / (freq.getValue() + k1 * (1 - b + b * doclen / stats.avgdl)),
                "tfNorm, computed from:", subs);
    }
}

From source file:BM25LSimilarity.java

License:Apache License

private Explanation explainScore(int doc, Explanation freq, BM25Stats stats, NumericDocValues norms) {
    Explanation boostExpl = Explanation.match(stats.boost, "boost");
    List<Explanation> subs = new ArrayList<>();
    if (boostExpl.getValue() != 1.0f) {
        subs.add(boostExpl);//from w ww.  j  av a 2 s.c om
    }
    subs.add(stats.idf);
    Explanation tfNormExpl = explainTFNorm(doc, freq, stats, norms);
    subs.add(tfNormExpl);
    return Explanation.match(boostExpl.getValue() * stats.idf.getValue() * tfNormExpl.getValue(),
            "score(doc=" + doc + ",freq=" + freq + "), product of:", subs);
}

From source file:com.core.nlp.similarity.TFIDFSimilarity.java

License:Apache License

/**
 * Computes a score factor for a phrase.
 * <p/>/*from   ww w  .  ja  v a  2s .c  o m*/
 * <p/>
 * The default implementation sums the idf factor for
 * each term in the phrase.
 *
 * @param collectionStats collection-level statistics
 * @param termStats       term-level statistics for the terms in the phrase
 * @return an Explain object that includes both an idf
 * score factor for the phrase and an explanation
 * for each term.
 */
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
    final long max = collectionStats.maxDoc();
    float idf = 0.0f;
    List<Explanation> subs = new ArrayList<>();
    for (final TermStatistics stat : termStats) {
        final long df = stat.docFreq();
        final float termIdf = idf(df, max);
        subs.add(Explanation.match(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"));
        idf += termIdf;
    }
    return Explanation.match(idf, "idf(), sum of:", subs);
}

From source file:com.core.nlp.similarity.TFIDFSimilarity.java

License:Apache License

private Explanation explainQuery(IDFStats stats) {
    List<Explanation> subs = new ArrayList<>();

    Explanation boostExpl = Explanation.match(stats.queryBoost, "boost");
    if (stats.queryBoost != 1.0f)
        subs.add(boostExpl);//from  w w w. j av a2  s.co  m
    subs.add(stats.idf);

    Explanation queryNormExpl = Explanation.match(stats.queryNorm, "queryNorm");
    subs.add(queryNormExpl);

    return Explanation.match(boostExpl.getValue() * stats.idf.getValue() * queryNormExpl.getValue(),
            "queryWeight, product of:", subs);
}

From source file:com.core.nlp.similarity.TFIDFSimilarity.java

License:Apache License

private Explanation explainField(int doc, Explanation freq, IDFStats stats, NumericDocValues norms) {
    Explanation tfExplanation = Explanation.match(tf(freq.getValue()),
            "tf(freq=" + freq.getValue() + "), with freq of:", freq);
    Explanation fieldNormExpl = Explanation.match(norms != null ? decodeNormValue(norms.get(doc)) : 1.0f,
            "fieldNorm(doc=" + doc + ")");

    return Explanation.match(tfExplanation.getValue() * stats.idf.getValue() * fieldNormExpl.getValue(),
            "fieldWeight in " + doc + ", product of:", tfExplanation, stats.idf, fieldNormExpl);
}

From source file:com.core.nlp.similarity.TFIDFSimilarity.java

License:Apache License

private Explanation explainScore(int doc, Explanation freq, IDFStats stats, NumericDocValues norms) {
    Explanation queryExpl = explainQuery(stats);
    Explanation fieldExpl = explainField(doc, freq, stats, norms);
    if (queryExpl.getValue() == 1f) {
        return fieldExpl;
    }//from w w w  . j  a  v a  2s . c  o  m
    return Explanation.match(queryExpl.getValue() * fieldExpl.getValue(),
            "score(doc=" + doc + ",freq=" + freq.getValue() + "), product of:", queryExpl, fieldExpl);
}

From source file:com.o19s.bm25f.BM25FSimilarity.java

License:Apache License

/**
 * Computes a score factor for a simple term and returns an explanation
 * for that score factor.//from www.  ja v  a  2  s  .  c o  m
 *
 * <p>
 * The default implementation uses:
 *
 * <pre class="prettyprint">
 * idf(docFreq, docCount);
 * </pre>
 *
 * Note that {@link CollectionStatistics#docCount()} is used instead of
 * {@link org.apache.lucene.index.IndexReader#numDocs() IndexReader#numDocs()} because also
 * {@link TermStatistics#docFreq()} is used, and when the latter
 * is inaccurate, so is {@link CollectionStatistics#docCount()}, and in the same direction.
 * In addition, {@link CollectionStatistics#docCount()} does not skew when fields are sparse.
 *
 * @param collectionStats collection-level statistics
 * @param termStats term-level statistics for the term
 * @return an Explain object that includes both an idf score factor
and an explanation for the term.
 */
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
    final long df = termStats.docFreq();
    final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc()
            : collectionStats.docCount();
    final float idf = idf(df, docCount);
    return Explanation.match(idf, "idf(docFreq=" + df + ", docCount=" + docCount + ")");
}

From source file:com.o19s.bm25f.BM25FSimilarity.java

License:Apache License

/**
 * Computes a score factor for a phrase.
 *
 * <p>/*from   w w  w.j  a  v  a2  s  .co  m*/
 * The default implementation sums the idf factor for
 * each term in the phrase.
 *
 * @param collectionStats collection-level statistics
 * @param termStats term-level statistics for the terms in the phrase
 * @return an Explain object that includes both an idf
 *         score factor for the phrase and an explanation
 *         for each term.
 */
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
    final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc()
            : collectionStats.docCount();
    float idf = 0.0f;
    List<Explanation> details = new ArrayList<>();
    for (final TermStatistics stat : termStats) {
        final long df = stat.docFreq();
        final float termIdf = idf(df, docCount);
        details.add(Explanation.match(termIdf, "idf(docFreq=" + df + ", docCount=" + docCount + ")"));
        idf += termIdf;
    }
    return Explanation.match(idf, "idf(), sum of:", details);
}