List of usage examples for org.apache.lucene.search IndexSearcher termStatistics
@Deprecated public final TermStatistics termStatistics(Term term, TermStates context) throws IOException
From source file:com.o19s.es.explore.ExplorerQuery.java
License:Apache License
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { if (!needsScores) { return searcher.createWeight(query, false, boost); }//from www . j av a 2 s . co m final Weight subWeight = searcher.createWeight(query, true, boost); Set<Term> terms = new HashSet<>(); subWeight.extractTerms(terms); if (isCollectionScoped()) { ClassicSimilarity sim = new ClassicSimilarity(); StatisticsHelper df_stats = new StatisticsHelper(); StatisticsHelper idf_stats = new StatisticsHelper(); StatisticsHelper ttf_stats = new StatisticsHelper(); for (Term term : terms) { TermContext ctx = TermContext.build(searcher.getTopReaderContext(), term); TermStatistics tStats = searcher.termStatistics(term, ctx); df_stats.add(tStats.docFreq()); idf_stats.add(sim.idf(tStats.docFreq(), searcher.getIndexReader().numDocs())); ttf_stats.add(tStats.totalTermFreq()); } /* If no terms are parsed in the query we opt for returning 0 instead of throwing an exception that could break various pipelines. */ float constantScore; if (terms.size() > 0) { switch (type) { case ("sum_classic_idf"): constantScore = idf_stats.getSum(); break; case ("mean_classic_idf"): constantScore = idf_stats.getMean(); break; case ("max_classic_idf"): constantScore = idf_stats.getMax(); break; case ("min_classic_idf"): constantScore = idf_stats.getMin(); break; case ("stddev_classic_idf"): constantScore = idf_stats.getStdDev(); break; case "sum_raw_df": constantScore = df_stats.getSum(); break; case "min_raw_df": constantScore = df_stats.getMin(); break; case "max_raw_df": constantScore = df_stats.getMax(); break; case "mean_raw_df": constantScore = df_stats.getMean(); break; case "stddev_raw_df": constantScore = df_stats.getStdDev(); break; case "sum_raw_ttf": constantScore = ttf_stats.getSum(); break; case "min_raw_ttf": constantScore = ttf_stats.getMin(); break; case "max_raw_ttf": constantScore = ttf_stats.getMax(); break; case "mean_raw_ttf": constantScore = ttf_stats.getMean(); break; case "stddev_raw_ttf": constantScore = ttf_stats.getStdDev(); break; case "unique_terms_count": constantScore = terms.size(); break; default: throw new RuntimeException("Invalid stat type specified."); } } else { constantScore = 0.0f; } return new ConstantScoreWeight(ExplorerQuery.this, constantScore) { @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { Scorer scorer = scorer(context); int newDoc = scorer.iterator().advance(doc); assert newDoc == doc; // this is a DocIdSetIterator.all return Explanation.match(scorer.score(), "Stat Score: " + type); } @Override public Scorer scorer(LeafReaderContext context) throws IOException { return new ConstantScoreScorer(this, constantScore, DocIdSetIterator.all(context.reader().maxDoc())); } @Override public boolean isCacheable(LeafReaderContext ctx) { return true; } }; } else if (type.endsWith("_raw_tf")) { // Rewrite this into a boolean query where we can inject our PostingsExplorerQuery BooleanQuery.Builder qb = new BooleanQuery.Builder(); for (Term t : terms) { qb.add(new BooleanClause(new PostingsExplorerQuery(t, PostingsExplorerQuery.Type.TF), BooleanClause.Occur.SHOULD)); } // FIXME: completely refactor this class and stop accepting a random query but a list of terms directly // rewriting at this point is wrong, additionally we certainly build the TermContext twice for every terms // problem is that we rely on extractTerms which happen too late in the process Query q = qb.build().rewrite(searcher.getIndexReader()); return new ExplorerQuery.ExplorerWeight(this, searcher.createWeight(q, true, boost), type); } throw new IllegalArgumentException("Unknown ExplorerQuery type [" + type + "]"); }
From source file:com.xiaomi.linden.lucene.query.flexiblequery.FlexibleWeight.java
License:Apache License
public FlexibleWeight(FlexibleQuery query, IndexSearcher searcher) throws IOException { this.query = query; this.similarity = searcher.getSimilarity(); final IndexReaderContext context = searcher.getTopReaderContext(); int[] maxDocFreqs = null; long[] maxTotalTermFreqs = null; Map<Term, TermContext> builtTermMap = new HashMap<>(); if (query.enableGlobalIDF()) { FlexibleQuery.FlexibleTerm[][] globalTerms = query.getGlobalTerms(); TermContext[][] globalStates = new TermContext[globalTerms.length][]; for (int i = 0; i < globalTerms.length; ++i) { globalStates[i] = new TermContext[globalTerms[i].length]; for (int j = 0; j < globalTerms[i].length; ++j) { Term term = globalTerms[i][j].term; TermContext termContext = builtTermMap.get(term); if (termContext != null) { globalStates[i][j] = termContext; } else { globalStates[i][j] = TermContext.build(context, globalTerms[i][j].term); builtTermMap.put(term, globalStates[i][j]); }/* w ww . jav a2s .c om*/ } } maxDocFreqs = new int[globalTerms[0].length]; maxTotalTermFreqs = new long[globalTerms[0].length]; int fieldLength = globalTerms.length; int termLength = globalTerms[0].length; for (int i = 0; i < termLength; ++i) { int maxDocFreq = 0; long maxTotalTermFreq = 0; for (int j = 0; j < fieldLength; ++j) { maxDocFreq = Math.max(globalStates[j][i].docFreq(), maxDocFreq); maxTotalTermFreq = Math.max(globalStates[j][i].totalTermFreq(), maxTotalTermFreq); } maxDocFreqs[i] = maxDocFreq; maxTotalTermFreqs[i] = maxTotalTermFreq; } } FlexibleQuery.FlexibleTerm[][] terms = query.getTerms(); TermContext[][] states = new TermContext[terms.length][]; for (int i = 0; i < terms.length; ++i) { states[i] = new TermContext[terms[i].length]; for (int j = 0; j < terms[i].length; ++j) { Term term = terms[i][j].term; TermContext termContext = builtTermMap.get(term); if (termContext != null) { states[i][j] = termContext; } else { states[i][j] = TermContext.build(context, terms[i][j].term); builtTermMap.put(term, states[i][j]); } } } termStatsMatrix = new TermStats[terms.length][]; for (int i = 0; i < terms.length; ++i) { termStatsMatrix[i] = new TermStats[terms[i].length]; for (int j = 0; j < terms[i].length; ++j) { FlexibleQuery.FlexibleTerm term = terms[i][j]; TermContext state = states[i][j]; TermStatistics termStats; if (query.enableGlobalIDF()) { termStats = new TermStatistics(term.term.bytes(), maxDocFreqs[j], maxTotalTermFreqs[j]); } else { termStats = searcher.termStatistics(term.term, state); } Similarity.SimWeight stats = similarity.computeWeight(term.boost, searcher.collectionStatistics(term.term.field()), termStats); TermStats termStatsInfo = new TermStats(); termStatsInfo.stats = stats; termStatsInfo.term = term.term; termStatsInfo.termContext = state; termStatsMatrix[i][j] = termStatsInfo; } } }
From source file:org.codelibs.elasticsearch.common.lucene.all.AllTermQuery.java
License:Apache License
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { if (needsScores == false) { return new TermQuery(term).createWeight(searcher, needsScores); }/*w w w. ja v a 2 s .co m*/ final TermContext termStates = TermContext.build(searcher.getTopReaderContext(), term); final CollectionStatistics collectionStats = searcher.collectionStatistics(term.field()); final TermStatistics termStats = searcher.termStatistics(term, termStates); final Similarity similarity = searcher.getSimilarity(needsScores); final SimWeight stats = similarity.computeWeight(collectionStats, termStats); return new Weight(this) { @Override public float getValueForNormalization() throws IOException { return stats.getValueForNormalization(); } @Override public void normalize(float norm, float topLevelBoost) { stats.normalize(norm, topLevelBoost); } @Override public void extractTerms(Set<Term> terms) { terms.add(term); } @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { AllTermScorer scorer = scorer(context); if (scorer != null) { int newDoc = scorer.iterator().advance(doc); if (newDoc == doc) { float score = scorer.score(); float freq = scorer.freq(); SimScorer docScorer = similarity.simScorer(stats, context); Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq); Explanation termScoreExplanation = docScorer.explain(doc, freqExplanation); Explanation payloadBoostExplanation = Explanation.match(scorer.payloadBoost(), "payloadBoost=" + scorer.payloadBoost()); return Explanation.match(score, "weight(" + getQuery() + " in " + doc + ") [" + similarity.getClass().getSimpleName() + "], product of:", termScoreExplanation, payloadBoostExplanation); } } return Explanation.noMatch("no matching term"); } @Override public AllTermScorer scorer(LeafReaderContext context) throws IOException { final Terms terms = context.reader().terms(term.field()); if (terms == null) { return null; } final TermsEnum termsEnum = terms.iterator(); if (termsEnum == null) { return null; } final TermState state = termStates.get(context.ord); if (state == null) { // Term does not exist in this segment return null; } termsEnum.seekExact(term.bytes(), state); PostingsEnum docs = termsEnum.postings(null, PostingsEnum.PAYLOADS); assert docs != null; return new AllTermScorer(this, docs, similarity.simScorer(stats, context)); } }; }
From source file:org.frontcache.cache.impl.LuceneIndexManager.java
License:Apache License
public long getDocumentsCount(String domain) { long count = -1; IndexWriter iWriter = null;//w ww .ja va2 s . c om try { iWriter = getIndexWriter(); if (iWriter == null) { return count; } } catch (Exception e1) { logger.debug("Error during getting indexWriter. " + e1.getMessage()); return count; } IndexReader reader = null; try { reader = DirectoryReader.open(iWriter); Term domainTerm = new Term(DOMAIN_FIELD, domain); IndexSearcher searcher = new IndexSearcher(reader); TermStatistics termStat = searcher.termStatistics(domainTerm, TermContext.build(searcher.getIndexReader().getContext(), domainTerm)); count = termStat.docFreq(); } catch (Exception e1) { logger.debug("Error during reader.totalTermFreq(domainTerm). " + e1.getMessage()); } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } return count; }