Example usage for org.apache.lucene.search IndexSearcher termStatistics

List of usage examples for org.apache.lucene.search IndexSearcher termStatistics

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher termStatistics.

Prototype

@Deprecated
public final TermStatistics termStatistics(Term term, TermStates context) throws IOException 

Source Link

Document

Returns TermStatistics for a term, or null if the term does not exist.

Usage

From source file:com.o19s.es.explore.ExplorerQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    if (!needsScores) {
        return searcher.createWeight(query, false, boost);
    }//from   www  . j av a  2 s  .  co  m
    final Weight subWeight = searcher.createWeight(query, true, boost);
    Set<Term> terms = new HashSet<>();
    subWeight.extractTerms(terms);
    if (isCollectionScoped()) {
        ClassicSimilarity sim = new ClassicSimilarity();
        StatisticsHelper df_stats = new StatisticsHelper();
        StatisticsHelper idf_stats = new StatisticsHelper();
        StatisticsHelper ttf_stats = new StatisticsHelper();

        for (Term term : terms) {
            TermContext ctx = TermContext.build(searcher.getTopReaderContext(), term);
            TermStatistics tStats = searcher.termStatistics(term, ctx);
            df_stats.add(tStats.docFreq());
            idf_stats.add(sim.idf(tStats.docFreq(), searcher.getIndexReader().numDocs()));
            ttf_stats.add(tStats.totalTermFreq());
        }

        /*
        If no terms are parsed in the query we opt for returning 0
        instead of throwing an exception that could break various
        pipelines.
         */
        float constantScore;

        if (terms.size() > 0) {
            switch (type) {
            case ("sum_classic_idf"):
                constantScore = idf_stats.getSum();
                break;
            case ("mean_classic_idf"):
                constantScore = idf_stats.getMean();
                break;
            case ("max_classic_idf"):
                constantScore = idf_stats.getMax();
                break;
            case ("min_classic_idf"):
                constantScore = idf_stats.getMin();
                break;
            case ("stddev_classic_idf"):
                constantScore = idf_stats.getStdDev();
                break;
            case "sum_raw_df":
                constantScore = df_stats.getSum();
                break;
            case "min_raw_df":
                constantScore = df_stats.getMin();
                break;
            case "max_raw_df":
                constantScore = df_stats.getMax();
                break;
            case "mean_raw_df":
                constantScore = df_stats.getMean();
                break;
            case "stddev_raw_df":
                constantScore = df_stats.getStdDev();
                break;
            case "sum_raw_ttf":
                constantScore = ttf_stats.getSum();
                break;
            case "min_raw_ttf":
                constantScore = ttf_stats.getMin();
                break;
            case "max_raw_ttf":
                constantScore = ttf_stats.getMax();
                break;
            case "mean_raw_ttf":
                constantScore = ttf_stats.getMean();
                break;
            case "stddev_raw_ttf":
                constantScore = ttf_stats.getStdDev();
                break;
            case "unique_terms_count":
                constantScore = terms.size();
                break;

            default:
                throw new RuntimeException("Invalid stat type specified.");
            }
        } else {
            constantScore = 0.0f;
        }

        return new ConstantScoreWeight(ExplorerQuery.this, constantScore) {

            @Override
            public Explanation explain(LeafReaderContext context, int doc) throws IOException {
                Scorer scorer = scorer(context);
                int newDoc = scorer.iterator().advance(doc);
                assert newDoc == doc; // this is a DocIdSetIterator.all
                return Explanation.match(scorer.score(), "Stat Score: " + type);
            }

            @Override
            public Scorer scorer(LeafReaderContext context) throws IOException {
                return new ConstantScoreScorer(this, constantScore,
                        DocIdSetIterator.all(context.reader().maxDoc()));
            }

            @Override
            public boolean isCacheable(LeafReaderContext ctx) {
                return true;
            }

        };
    } else if (type.endsWith("_raw_tf")) {
        // Rewrite this into a boolean query where we can inject our PostingsExplorerQuery
        BooleanQuery.Builder qb = new BooleanQuery.Builder();
        for (Term t : terms) {
            qb.add(new BooleanClause(new PostingsExplorerQuery(t, PostingsExplorerQuery.Type.TF),
                    BooleanClause.Occur.SHOULD));
        }
        // FIXME: completely refactor this class and stop accepting a random query but a list of terms directly
        // rewriting at this point is wrong, additionally we certainly build the TermContext twice for every terms
        // problem is that we rely on extractTerms which happen too late in the process
        Query q = qb.build().rewrite(searcher.getIndexReader());
        return new ExplorerQuery.ExplorerWeight(this, searcher.createWeight(q, true, boost), type);
    }
    throw new IllegalArgumentException("Unknown ExplorerQuery type [" + type + "]");
}

From source file:com.xiaomi.linden.lucene.query.flexiblequery.FlexibleWeight.java

License:Apache License

public FlexibleWeight(FlexibleQuery query, IndexSearcher searcher) throws IOException {
    this.query = query;
    this.similarity = searcher.getSimilarity();
    final IndexReaderContext context = searcher.getTopReaderContext();

    int[] maxDocFreqs = null;
    long[] maxTotalTermFreqs = null;
    Map<Term, TermContext> builtTermMap = new HashMap<>();
    if (query.enableGlobalIDF()) {
        FlexibleQuery.FlexibleTerm[][] globalTerms = query.getGlobalTerms();
        TermContext[][] globalStates = new TermContext[globalTerms.length][];
        for (int i = 0; i < globalTerms.length; ++i) {
            globalStates[i] = new TermContext[globalTerms[i].length];
            for (int j = 0; j < globalTerms[i].length; ++j) {
                Term term = globalTerms[i][j].term;
                TermContext termContext = builtTermMap.get(term);
                if (termContext != null) {
                    globalStates[i][j] = termContext;
                } else {
                    globalStates[i][j] = TermContext.build(context, globalTerms[i][j].term);
                    builtTermMap.put(term, globalStates[i][j]);
                }/*  w  ww .  jav a2s  .c  om*/
            }
        }
        maxDocFreqs = new int[globalTerms[0].length];
        maxTotalTermFreqs = new long[globalTerms[0].length];
        int fieldLength = globalTerms.length;
        int termLength = globalTerms[0].length;
        for (int i = 0; i < termLength; ++i) {
            int maxDocFreq = 0;
            long maxTotalTermFreq = 0;
            for (int j = 0; j < fieldLength; ++j) {
                maxDocFreq = Math.max(globalStates[j][i].docFreq(), maxDocFreq);
                maxTotalTermFreq = Math.max(globalStates[j][i].totalTermFreq(), maxTotalTermFreq);
            }
            maxDocFreqs[i] = maxDocFreq;
            maxTotalTermFreqs[i] = maxTotalTermFreq;
        }
    }

    FlexibleQuery.FlexibleTerm[][] terms = query.getTerms();
    TermContext[][] states = new TermContext[terms.length][];
    for (int i = 0; i < terms.length; ++i) {
        states[i] = new TermContext[terms[i].length];
        for (int j = 0; j < terms[i].length; ++j) {
            Term term = terms[i][j].term;
            TermContext termContext = builtTermMap.get(term);
            if (termContext != null) {
                states[i][j] = termContext;
            } else {
                states[i][j] = TermContext.build(context, terms[i][j].term);
                builtTermMap.put(term, states[i][j]);
            }
        }
    }
    termStatsMatrix = new TermStats[terms.length][];
    for (int i = 0; i < terms.length; ++i) {
        termStatsMatrix[i] = new TermStats[terms[i].length];
        for (int j = 0; j < terms[i].length; ++j) {
            FlexibleQuery.FlexibleTerm term = terms[i][j];
            TermContext state = states[i][j];
            TermStatistics termStats;
            if (query.enableGlobalIDF()) {
                termStats = new TermStatistics(term.term.bytes(), maxDocFreqs[j], maxTotalTermFreqs[j]);
            } else {
                termStats = searcher.termStatistics(term.term, state);
            }
            Similarity.SimWeight stats = similarity.computeWeight(term.boost,
                    searcher.collectionStatistics(term.term.field()), termStats);
            TermStats termStatsInfo = new TermStats();
            termStatsInfo.stats = stats;
            termStatsInfo.term = term.term;
            termStatsInfo.termContext = state;
            termStatsMatrix[i][j] = termStatsInfo;
        }
    }
}

From source file:org.codelibs.elasticsearch.common.lucene.all.AllTermQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
    if (needsScores == false) {
        return new TermQuery(term).createWeight(searcher, needsScores);
    }/*w w w. ja v a 2 s .co m*/
    final TermContext termStates = TermContext.build(searcher.getTopReaderContext(), term);
    final CollectionStatistics collectionStats = searcher.collectionStatistics(term.field());
    final TermStatistics termStats = searcher.termStatistics(term, termStates);
    final Similarity similarity = searcher.getSimilarity(needsScores);
    final SimWeight stats = similarity.computeWeight(collectionStats, termStats);
    return new Weight(this) {

        @Override
        public float getValueForNormalization() throws IOException {
            return stats.getValueForNormalization();
        }

        @Override
        public void normalize(float norm, float topLevelBoost) {
            stats.normalize(norm, topLevelBoost);
        }

        @Override
        public void extractTerms(Set<Term> terms) {
            terms.add(term);
        }

        @Override
        public Explanation explain(LeafReaderContext context, int doc) throws IOException {
            AllTermScorer scorer = scorer(context);
            if (scorer != null) {
                int newDoc = scorer.iterator().advance(doc);
                if (newDoc == doc) {
                    float score = scorer.score();
                    float freq = scorer.freq();
                    SimScorer docScorer = similarity.simScorer(stats, context);
                    Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq);
                    Explanation termScoreExplanation = docScorer.explain(doc, freqExplanation);
                    Explanation payloadBoostExplanation = Explanation.match(scorer.payloadBoost(),
                            "payloadBoost=" + scorer.payloadBoost());
                    return Explanation.match(score,
                            "weight(" + getQuery() + " in " + doc + ") ["
                                    + similarity.getClass().getSimpleName() + "], product of:",
                            termScoreExplanation, payloadBoostExplanation);
                }
            }
            return Explanation.noMatch("no matching term");
        }

        @Override
        public AllTermScorer scorer(LeafReaderContext context) throws IOException {
            final Terms terms = context.reader().terms(term.field());
            if (terms == null) {
                return null;
            }
            final TermsEnum termsEnum = terms.iterator();
            if (termsEnum == null) {
                return null;
            }
            final TermState state = termStates.get(context.ord);
            if (state == null) {
                // Term does not exist in this segment
                return null;
            }
            termsEnum.seekExact(term.bytes(), state);
            PostingsEnum docs = termsEnum.postings(null, PostingsEnum.PAYLOADS);
            assert docs != null;
            return new AllTermScorer(this, docs, similarity.simScorer(stats, context));
        }

    };
}

From source file:org.frontcache.cache.impl.LuceneIndexManager.java

License:Apache License

public long getDocumentsCount(String domain) {

    long count = -1;

    IndexWriter iWriter = null;//w ww  .ja  va2  s  .  c om
    try {
        iWriter = getIndexWriter();
        if (iWriter == null) {
            return count;
        }
    } catch (Exception e1) {
        logger.debug("Error during getting indexWriter. " + e1.getMessage());
        return count;
    }

    IndexReader reader = null;
    try {
        reader = DirectoryReader.open(iWriter);
        Term domainTerm = new Term(DOMAIN_FIELD, domain);
        IndexSearcher searcher = new IndexSearcher(reader);
        TermStatistics termStat = searcher.termStatistics(domainTerm,
                TermContext.build(searcher.getIndexReader().getContext(), domainTerm));
        count = termStat.docFreq();
    } catch (Exception e1) {
        logger.debug("Error during reader.totalTermFreq(domainTerm). " + e1.getMessage());
    } finally {
        if (reader != null) {
            try {
                reader.close();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
    }

    return count;
}