Example usage for org.apache.lucene.search Weight extractTerms

List of usage examples for org.apache.lucene.search Weight extractTerms

Introduction

In this page you can find the example usage for org.apache.lucene.search Weight extractTerms.

Prototype

@Deprecated
public abstract void extractTerms(Set<Term> terms);

Source Link

Document

Expert: adds all terms occurring in this query to the terms set.

Usage

From source file:com.o19s.es.explore.ExplorerQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    if (!needsScores) {
        return searcher.createWeight(query, false, boost);
    }/*  w  w  w  . ja  va  2  s  .  c  o  m*/
    final Weight subWeight = searcher.createWeight(query, true, boost);
    Set<Term> terms = new HashSet<>();
    subWeight.extractTerms(terms);
    if (isCollectionScoped()) {
        ClassicSimilarity sim = new ClassicSimilarity();
        StatisticsHelper df_stats = new StatisticsHelper();
        StatisticsHelper idf_stats = new StatisticsHelper();
        StatisticsHelper ttf_stats = new StatisticsHelper();

        for (Term term : terms) {
            TermContext ctx = TermContext.build(searcher.getTopReaderContext(), term);
            TermStatistics tStats = searcher.termStatistics(term, ctx);
            df_stats.add(tStats.docFreq());
            idf_stats.add(sim.idf(tStats.docFreq(), searcher.getIndexReader().numDocs()));
            ttf_stats.add(tStats.totalTermFreq());
        }

        /*
        If no terms are parsed in the query we opt for returning 0
        instead of throwing an exception that could break various
        pipelines.
         */
        float constantScore;

        if (terms.size() > 0) {
            switch (type) {
            case ("sum_classic_idf"):
                constantScore = idf_stats.getSum();
                break;
            case ("mean_classic_idf"):
                constantScore = idf_stats.getMean();
                break;
            case ("max_classic_idf"):
                constantScore = idf_stats.getMax();
                break;
            case ("min_classic_idf"):
                constantScore = idf_stats.getMin();
                break;
            case ("stddev_classic_idf"):
                constantScore = idf_stats.getStdDev();
                break;
            case "sum_raw_df":
                constantScore = df_stats.getSum();
                break;
            case "min_raw_df":
                constantScore = df_stats.getMin();
                break;
            case "max_raw_df":
                constantScore = df_stats.getMax();
                break;
            case "mean_raw_df":
                constantScore = df_stats.getMean();
                break;
            case "stddev_raw_df":
                constantScore = df_stats.getStdDev();
                break;
            case "sum_raw_ttf":
                constantScore = ttf_stats.getSum();
                break;
            case "min_raw_ttf":
                constantScore = ttf_stats.getMin();
                break;
            case "max_raw_ttf":
                constantScore = ttf_stats.getMax();
                break;
            case "mean_raw_ttf":
                constantScore = ttf_stats.getMean();
                break;
            case "stddev_raw_ttf":
                constantScore = ttf_stats.getStdDev();
                break;
            case "unique_terms_count":
                constantScore = terms.size();
                break;

            default:
                throw new RuntimeException("Invalid stat type specified.");
            }
        } else {
            constantScore = 0.0f;
        }

        return new ConstantScoreWeight(ExplorerQuery.this, constantScore) {

            @Override
            public Explanation explain(LeafReaderContext context, int doc) throws IOException {
                Scorer scorer = scorer(context);
                int newDoc = scorer.iterator().advance(doc);
                assert newDoc == doc; // this is a DocIdSetIterator.all
                return Explanation.match(scorer.score(), "Stat Score: " + type);
            }

            @Override
            public Scorer scorer(LeafReaderContext context) throws IOException {
                return new ConstantScoreScorer(this, constantScore,
                        DocIdSetIterator.all(context.reader().maxDoc()));
            }

            @Override
            public boolean isCacheable(LeafReaderContext ctx) {
                return true;
            }

        };
    } else if (type.endsWith("_raw_tf")) {
        // Rewrite this into a boolean query where we can inject our PostingsExplorerQuery
        BooleanQuery.Builder qb = new BooleanQuery.Builder();
        for (Term t : terms) {
            qb.add(new BooleanClause(new PostingsExplorerQuery(t, PostingsExplorerQuery.Type.TF),
                    BooleanClause.Occur.SHOULD));
        }
        // FIXME: completely refactor this class and stop accepting a random query but a list of terms directly
        // rewriting at this point is wrong, additionally we certainly build the TermContext twice for every terms
        // problem is that we rely on extractTerms which happen too late in the process
        Query q = qb.build().rewrite(searcher.getIndexReader());
        return new ExplorerQuery.ExplorerWeight(this, searcher.createWeight(q, true, boost), type);
    }
    throw new IllegalArgumentException("Unknown ExplorerQuery type [" + type + "]");
}

From source file:org.hibernate.search.filter.impl.CachingWrapperQuery.java

License:LGPL

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
    final Weight weight = query.createWeight(searcher, needsScores);
    if (needsScores) {
        // our cache is not sufficient, we need scores too
        return weight;
    }/*from  w w w.j  ava2 s. c  om*/

    return new ConstantScoreWeight(weight.getQuery()) {

        @Override
        public void extractTerms(Set<Term> terms) {
            weight.extractTerms(terms);
        }

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            DocIdSet docIdSet = getDocIdSet(context);

            assert docIdSet != null;
            if (docIdSet == DocIdSet.EMPTY) {
                return null;
            }
            final DocIdSetIterator disi = docIdSet.iterator();
            if (disi == null) {
                return null;
            }

            return new ConstantScoreScorer(this, 0f, disi);
        }

        private DocIdSet getDocIdSet(LeafReaderContext context) throws IOException {
            final LeafReader reader = context.reader();
            final Object key = reader.getCoreCacheKey();
            Object cached = cache.get(key);
            if (cached != null) {
                return (DocIdSet) cached;
            }
            synchronized (cache) {
                cached = cache.get(key);
                if (cached != null) {
                    return (DocIdSet) cached;
                }
                final DocIdSet docIdSet;
                final Scorer scorer = weight.scorer(context);
                if (scorer == null) {
                    docIdSet = DocIdSet.EMPTY;
                } else {
                    docIdSet = cacheImpl(scorer.iterator(), reader);
                }
                cache.put(key, docIdSet);
                return docIdSet;
            }
        }
    };
}

From source file:org.tallison.lucene.search.concordance.util.SimpleTargetCounter.java

License:Apache License

/**
 * Simple utility class to perform basic term frequency/document frequency
 * counts on the individual terms within a query.  This relies on
 * IndexReader and does not perform any concordance search/retrieval;
 * it is, therefore, very fast./*  w  w  w.j  a  v  a2 s.co m*/
 * <p>
 * If you want to visit more than basic terms (e.g. SpanNear),
 * see {@link TargetVisitor}
 *
 * @param query query
 * @param searcher searcher
 * @return target term results
 * @throws java.io.IOException if there is an IOException from the searcher
 */
public SimpleTargetTermResults searchSingleTerms(Query query, IndexSearcher searcher) throws IOException {
    Query tmpQ = query.rewrite(searcher.getIndexReader());
    Set<Term> terms = new HashSet<>();
    Weight weight = tmpQ.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1.0f);
    weight.extractTerms(terms);

    Map<String, Integer> dfs = new HashMap<>();
    Map<String, Integer> tfs = new HashMap<>();

    for (Term t : terms) {
        String targ = t.text();
        int docFreq = searcher.getIndexReader().docFreq(t);
        if (docFreq == 0) {
            continue;
        }
        Integer i = new Integer(docFreq);
        dfs.put(targ, i);

        long tf = searcher.getIndexReader().totalTermFreq(t);
        tfs.put(targ, (int) tf);
    }

    SimpleTargetTermResults results = new SimpleTargetTermResults(dfs, tfs);

    return results;
}

From source file:org.tallison.lucene.search.queries.SpanQueryConverter.java

License:Apache License

@Override
protected SpanQuery convertUnknownQuery(String field, Query query) {
    if (query instanceof CommonTermsQuery) {

        // specialized since rewriting would change the result query
        // this query is TermContext sensitive.
        CommonTermsQuery ctq = (CommonTermsQuery) query;

        Set<Term> terms = new HashSet<>();
        try {//  w w w .j a  v  a 2  s.com
            Weight w = ctq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1.0f);
            w.extractTerms(terms);
        } catch (IOException e) {
            throw new RuntimeException("IOException on searcher!!!", e);
        }
        List<SpanQuery> spanQs = new LinkedList<SpanQuery>();

        for (Term term : terms) {
            if (term.field().equals(field)) {
                spanQs.add(new SpanTermQuery(term));
            }
        }
        if (spanQs.size() == 0) {
            return getEmptySpanQuery();
        } else if (spanQs.size() == 1) {
            return spanQs.get(0);
        } else {
            return new SpanOrQuery(spanQs.toArray(new SpanQuery[spanQs.size()]));
        }
    }
    super.convertUnknownQuery(field, query);
    return null;
}

From source file:uk.co.flax.luwak.util.ForceNoBulkScoringQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {

    final Weight innerWeight = inner.createWeight(searcher, needsScores);

    return new Weight(ForceNoBulkScoringQuery.this) {
        @Override/*from  w  w  w  .  ja  va2s.co  m*/
        public void extractTerms(Set<Term> set) {
            innerWeight.extractTerms(set);
        }

        @Override
        public Explanation explain(LeafReaderContext leafReaderContext, int i) throws IOException {
            return innerWeight.explain(leafReaderContext, i);
        }

        @Override
        public float getValueForNormalization() throws IOException {
            return innerWeight.getValueForNormalization();
        }

        @Override
        public void normalize(float v, float v1) {
            innerWeight.normalize(v, v1);
        }

        @Override
        public Scorer scorer(LeafReaderContext leafReaderContext) throws IOException {
            return innerWeight.scorer(leafReaderContext);
        }
    };
}