List of usage examples for org.apache.lucene.search Explanation match
boolean match
To view the source code for org.apache.lucene.search Explanation match.
Click Source Link
From source file:BM25LSimilarity.java
License:Apache License
/** * Computes a score factor for a simple term and returns an explanation for * that score factor.//from w w w. j av a 2 s . com * * <p> * The default implementation uses: * * <pre class="prettyprint"> * idf(docFreq, searcher.maxDoc()); * </pre> * * Note that {@link CollectionStatistics#maxDoc()} is used instead of * {@link org.apache.lucene.index.IndexReader#numDocs() IndexReader#numDocs()} * because also {@link TermStatistics#docFreq()} is used, and when the * latter is inaccurate, so is {@link CollectionStatistics#maxDoc()}, and in * the same direction. In addition, {@link CollectionStatistics#maxDoc()} is * more efficient to compute * * @param collectionStats collection-level statistics * @param termStats term-level statistics for the term * @return an Explain object that includes both an idf score factor and an * explanation for the term. */ public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) { final long df = termStats.docFreq(); final long max = collectionStats.maxDoc(); final float idf = idf(df, max); return Explanation.match(idf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"); }
From source file:BM25LSimilarity.java
License:Apache License
/** * Computes a score factor for a phrase. * * <p>/*from ww w .j a v a2s. com*/ * The default implementation sums the idf factor for each term in the * phrase. * * @param collectionStats collection-level statistics * @param termStats term-level statistics for the terms in the phrase * @return an Explain object that includes both an idf score factor for the * phrase and an explanation for each term. */ public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) { final long max = collectionStats.maxDoc(); float idf = 0.0f; List<Explanation> details = new ArrayList<>(); for (final TermStatistics stat : termStats) { final long df = stat.docFreq(); final float termIdf = idf(df, max); details.add(Explanation.match(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")")); idf += termIdf; } return Explanation.match(idf, "idf(), sum of:", details); }
From source file:BM25LSimilarity.java
License:Apache License
private Explanation explainTFNorm(int doc, Explanation freq, BM25Stats stats, NumericDocValues norms) { List<Explanation> subs = new ArrayList<>(); subs.add(freq);/* w w w .jav a 2 s . c o m*/ subs.add(Explanation.match(k1, "parameter k1")); if (norms == null) { subs.add(Explanation.match(0, "parameter b (norms omitted for field)")); return Explanation.match((freq.getValue() * (k1 + 1)) / (freq.getValue() + k1), "tfNorm, computed from:", subs); } else { float doclen = decodeNormValue((byte) norms.get(doc)); subs.add(Explanation.match(b, "parameter b")); subs.add(Explanation.match(stats.avgdl, "avgFieldLength")); subs.add(Explanation.match(doclen, "fieldLength")); return Explanation.match( (freq.getValue() * (k1 + 1)) / (freq.getValue() + k1 * (1 - b + b * doclen / stats.avgdl)), "tfNorm, computed from:", subs); } }
From source file:BM25LSimilarity.java
License:Apache License
private Explanation explainScore(int doc, Explanation freq, BM25Stats stats, NumericDocValues norms) { Explanation boostExpl = Explanation.match(stats.boost, "boost"); List<Explanation> subs = new ArrayList<>(); if (boostExpl.getValue() != 1.0f) { subs.add(boostExpl);//from w ww. j av a 2 s.c om } subs.add(stats.idf); Explanation tfNormExpl = explainTFNorm(doc, freq, stats, norms); subs.add(tfNormExpl); return Explanation.match(boostExpl.getValue() * stats.idf.getValue() * tfNormExpl.getValue(), "score(doc=" + doc + ",freq=" + freq + "), product of:", subs); }
From source file:com.core.nlp.similarity.TFIDFSimilarity.java
License:Apache License
/** * Computes a score factor for a phrase. * <p/>/*from ww w . ja v a 2s .c o m*/ * <p/> * The default implementation sums the idf factor for * each term in the phrase. * * @param collectionStats collection-level statistics * @param termStats term-level statistics for the terms in the phrase * @return an Explain object that includes both an idf * score factor for the phrase and an explanation * for each term. */ public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) { final long max = collectionStats.maxDoc(); float idf = 0.0f; List<Explanation> subs = new ArrayList<>(); for (final TermStatistics stat : termStats) { final long df = stat.docFreq(); final float termIdf = idf(df, max); subs.add(Explanation.match(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")")); idf += termIdf; } return Explanation.match(idf, "idf(), sum of:", subs); }
From source file:com.core.nlp.similarity.TFIDFSimilarity.java
License:Apache License
private Explanation explainQuery(IDFStats stats) { List<Explanation> subs = new ArrayList<>(); Explanation boostExpl = Explanation.match(stats.queryBoost, "boost"); if (stats.queryBoost != 1.0f) subs.add(boostExpl);//from w w w. j av a2 s.co m subs.add(stats.idf); Explanation queryNormExpl = Explanation.match(stats.queryNorm, "queryNorm"); subs.add(queryNormExpl); return Explanation.match(boostExpl.getValue() * stats.idf.getValue() * queryNormExpl.getValue(), "queryWeight, product of:", subs); }
From source file:com.core.nlp.similarity.TFIDFSimilarity.java
License:Apache License
private Explanation explainField(int doc, Explanation freq, IDFStats stats, NumericDocValues norms) { Explanation tfExplanation = Explanation.match(tf(freq.getValue()), "tf(freq=" + freq.getValue() + "), with freq of:", freq); Explanation fieldNormExpl = Explanation.match(norms != null ? decodeNormValue(norms.get(doc)) : 1.0f, "fieldNorm(doc=" + doc + ")"); return Explanation.match(tfExplanation.getValue() * stats.idf.getValue() * fieldNormExpl.getValue(), "fieldWeight in " + doc + ", product of:", tfExplanation, stats.idf, fieldNormExpl); }
From source file:com.core.nlp.similarity.TFIDFSimilarity.java
License:Apache License
private Explanation explainScore(int doc, Explanation freq, IDFStats stats, NumericDocValues norms) { Explanation queryExpl = explainQuery(stats); Explanation fieldExpl = explainField(doc, freq, stats, norms); if (queryExpl.getValue() == 1f) { return fieldExpl; }//from w w w . j a v a 2s . c o m return Explanation.match(queryExpl.getValue() * fieldExpl.getValue(), "score(doc=" + doc + ",freq=" + freq.getValue() + "), product of:", queryExpl, fieldExpl); }
From source file:com.o19s.bm25f.BM25FSimilarity.java
License:Apache License
/** * Computes a score factor for a simple term and returns an explanation * for that score factor.//from www. ja v a 2 s . c o m * * <p> * The default implementation uses: * * <pre class="prettyprint"> * idf(docFreq, docCount); * </pre> * * Note that {@link CollectionStatistics#docCount()} is used instead of * {@link org.apache.lucene.index.IndexReader#numDocs() IndexReader#numDocs()} because also * {@link TermStatistics#docFreq()} is used, and when the latter * is inaccurate, so is {@link CollectionStatistics#docCount()}, and in the same direction. * In addition, {@link CollectionStatistics#docCount()} does not skew when fields are sparse. * * @param collectionStats collection-level statistics * @param termStats term-level statistics for the term * @return an Explain object that includes both an idf score factor and an explanation for the term. */ public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) { final long df = termStats.docFreq(); final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount(); final float idf = idf(df, docCount); return Explanation.match(idf, "idf(docFreq=" + df + ", docCount=" + docCount + ")"); }
From source file:com.o19s.bm25f.BM25FSimilarity.java
License:Apache License
/** * Computes a score factor for a phrase. * * <p>/*from w w w.j a v a2 s .co m*/ * The default implementation sums the idf factor for * each term in the phrase. * * @param collectionStats collection-level statistics * @param termStats term-level statistics for the terms in the phrase * @return an Explain object that includes both an idf * score factor for the phrase and an explanation * for each term. */ public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) { final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount(); float idf = 0.0f; List<Explanation> details = new ArrayList<>(); for (final TermStatistics stat : termStats) { final long df = stat.docFreq(); final float termIdf = idf(df, docCount); details.add(Explanation.match(termIdf, "idf(docFreq=" + df + ", docCount=" + docCount + ")")); idf += termIdf; } return Explanation.match(idf, "idf(), sum of:", details); }