List of usage examples for org.apache.lucene.search CollectionStatistics field
String field
To view the source code for org.apache.lucene.search CollectionStatistics field.
Click Source Link
From source file:BM25LSimilarity.java
License:Apache License
@Override public final SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) { Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats); float avgdl = avgFieldLength(collectionStats); // compute freq-independent part of bm25 equation across all norm values float cache[] = new float[256]; for (int i = 0; i < cache.length; i++) { cache[i] = k1 * (((1 - b) + b * decodeNormValue((byte) i) / avgdl) + delta); }/* w w w.j a va 2s . c om*/ return new BM25Stats(collectionStats.field(), idf, avgdl, cache); }
From source file:com.core.nlp.similarity.TFIDFSimilarity.java
License:Apache License
@Override public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {/*from w ww. j a v a 2 s . co m*/ final Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats); return new IDFStats(collectionStats.field(), idf, queryBoost); }
From source file:com.o19s.bm25f.BM25FSimilarity.java
License:Apache License
@Override public final SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) { Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats); float avgdl = avgFieldLength(collectionStats); // compute freq-independent part of bm25 equation across all norm values float cache[] = new float[256]; for (int i = 0; i < cache.length; i++) { cache[i] = k1 * ((1 - b) + b * decodeNormValue((byte) i) / avgdl); }//from w w w . j a v a 2 s .c om return new BM25Stats(collectionStats.field(), idf, avgdl, cache); }
From source file:eu.europeana.ranking.bm25f.similarity.BM25FSimilarity.java
License:Apache License
@Override public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {/*from www.ja v a2s.co m*/ Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats); boosts = params.getBoosts(); lengthBoosts = params.getbParams(); k1 = params.getK1(); String field = collectionStats.field(); float avgdl = avgFieldLength(collectionStats); // ignoring query boost, using bm25f query boost float boost = 1; if (boosts.containsKey(field)) { boost = boosts.get(field); } // compute freq-independent part of bm25 equation across all norm values // float cache[] = new float[256]; // for (int i = 0; i < cache.length; i++) { // cache[i] = ((1 - bField) + bField * decodeNormValue((byte) i) // / avgdl); // System.out.println("cache " + i + "\t" + cache[i]); // } return new BM25FSimWeight(field, idf, boost, avgdl, null, k1); }
From source file:io.anserini.search.similarity.F2LogSimilarity.java
License:Apache License
@Override public SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) { Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats); float avgdl = avgFieldLength(collectionStats); // compute freq-independent part of f2log equation across all norm values float cache[] = new float[256]; for (int i = 0; i < cache.length; i++) { cache[i] = s + s * decodeNormValue((byte) i) / avgdl; }/*from w w w.jav a 2 s. co m*/ return new F2LogStats(collectionStats.field(), idf, avgdl, cache); }
From source file:main.BM25VASimilarity.java
License:Apache License
@Override public final SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) { Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats); float avgdl = avgFieldLength(collectionStats); // compute freq-independent part of bm25 equation across all norm values float cache[] = new float[256]; for (int i = 0; i < cache.length; i++) { //cache[i] = k1 * ((1 - b) + b * decodeNormValue((byte) i) / avgdl); //cache becomes cachePrime = B //cache[i] = ((1 - b) + b * decodeNormValue((byte) i) / avgdl); // B_VA/* w w w .java2 s . c om*/ //cache[i] = (1/(mavgtf * mavgtf) * decodeNormValue((byte) i) / Td) + // ((1 - 1/mavgtf)*decodeNormValue((byte) i) / avgdl); //Cache should now only contain the length of doc d. cache[i] = decodeNormValue((byte) i); } return new BM25Stats(collectionStats.field(), idf, avgdl, cache); }
From source file:org.apache.solr.search.stats.CollectionStats.java
License:Apache License
public CollectionStats(CollectionStatistics stats) { this.field = stats.field(); this.maxDoc = stats.maxDoc(); this.docCount = stats.docCount(); this.sumTotalTermFreq = stats.sumTotalTermFreq(); this.sumDocFreq = stats.sumDocFreq(); }
From source file:org.elasticsearch.index.similarity.ClosedSimilarity.java
@Override public SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) { float idf = 0.0f; String desc = "Field: " + collectionStats.field() + " Terms: "; final long max = collectionStats.maxDoc(); for (final TermStatistics stat : termStats) { final long df = stat.docFreq(); final float termIdf = idf(df, max); idf += termIdf;//from ww w. j a v a 2 s .c om log.info("Calculating term frequency: " + stat.term().utf8ToString() + " Value: " + df); desc += stat.term().utf8ToString() + " "; } log.info("Calculating term idf: " + desc + " Value: " + idf); ClosedSimWeight csw = new ClosedSimWeight(collectionStats.field(), idf, termStats); log.info("Calculating sim weight for field: " + csw.desc); return csw; }
From source file:org.elasticsearch.index.similarity.PositionSimilarity.java
License:Apache License
@Override public SimWeight computeWeight(CollectionStatistics collectionStatistics, TermStatistics... termStatisticses) { return new PositionStats(collectionStatistics.field(), termStatisticses); }
From source file:org.elasticsearch.index.similarity.ScriptedSimilarity.java
License:Apache License
@Override public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { Query query = new Query(boost); long docCount = collectionStats.docCount(); if (docCount == -1) { docCount = collectionStats.maxDoc(); }/*from www .j a v a 2 s . c o m*/ Field field = new Field(docCount, collectionStats.sumDocFreq(), collectionStats.sumTotalTermFreq()); Term[] terms = new Term[termStats.length]; for (int i = 0; i < termStats.length; ++i) { terms[i] = new Term(termStats[i].docFreq(), termStats[i].totalTermFreq()); } return new Weight(collectionStats.field(), query, field, terms); }