Example usage for org.apache.lucene.search MultiTermQuery rewrite

List of usage examples for org.apache.lucene.search MultiTermQuery rewrite

Introduction

In this page you can find the example usage for org.apache.lucene.search MultiTermQuery rewrite.

Prototype

@Override
public final Query rewrite(IndexReader reader) throws IOException 

Source Link

Document

To rewrite to a simpler form, instead return a simpler enum from #getTermsEnum(Terms,AttributeSource) .

Usage

From source file:com.o19s.solr.swan.highlight.SpanAwareFieldQuery.java

License:Apache License

void flatten(Query sourceQuery, IndexReader reader, Collection<Query> flatQueries) throws IOException {
    if (sourceQuery instanceof BooleanQuery) {
        BooleanQuery bq = (BooleanQuery) sourceQuery;
        for (BooleanClause clause : bq.getClauses()) {
            if (!clause.isProhibited())
                flatten(clause.getQuery(), reader, flatQueries);
        }//from   w w  w. j  av  a2  s. com
    } else if (sourceQuery instanceof DisjunctionMaxQuery) {
        DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) sourceQuery;
        for (Query query : dmq) {
            flatten(query, reader, flatQueries);
        }
    } else if (sourceQuery instanceof TermQuery) {
        if (!flatQueries.contains(sourceQuery))
            flatQueries.add(sourceQuery);
    } else if (sourceQuery instanceof MultiTermQuery && reader != null) {
        MultiTermQuery copy = (MultiTermQuery) sourceQuery.clone();
        copy.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(MAX_MTQ_TERMS));
        BooleanQuery mtqTerms = (BooleanQuery) copy.rewrite(reader);
        flatten(mtqTerms, reader, flatQueries);
    } else if (sourceQuery instanceof PhraseQuery) {
        if (!flatQueries.contains(sourceQuery)) {
            PhraseQuery pq = (PhraseQuery) sourceQuery;
            if (pq.getTerms().length > 1)
                flatQueries.add(pq);
            else if (pq.getTerms().length == 1) {
                flatQueries.add(new TermQuery(pq.getTerms()[0]));
            }
        }
    } else if (sourceQuery instanceof SpanQuery) {
        //TODO Note that the way we are doing phrases, they become SpanQueries - thus we loose
        //all of the corner case fixes for the phrases already in highlighing - the result will be
        //phrases that have different color highlights for each term
        Set<Term> terms = new LinkedHashSet<Term>();
        List<AtomicReaderContext> readerContexts = reader.getContext().leaves();

        int offset = 0;

        if (readerContexts.size() < 1) {
            return;
        }
        for (AtomicReaderContext arc : readerContexts) {
            if (sourceQuery instanceof SpanNotQuery) {
                SpanNotQuery query = (SpanNotQuery) sourceQuery;
                addSpansPositions(offset, query.getField(),
                        query.getInclude().getSpans(arc, null, new HashMap<Term, TermContext>()));
            } else {
                SpanQuery query = (SpanQuery) sourceQuery;
                addSpansPositions(offset, query.getField(),
                        query.getSpans(arc, null, new HashMap<Term, TermContext>()));
            }
            offset += arc.reader().maxDoc();
        }

        //TODO it is necessary to call getSpans first so that if there is a MultiTerm query it get's rewritten by com.o19s.solr.swan.nodes.SwanTermNode.SwanSpanMultiTermQueryWrapper
        //no easy way around this
        sourceQuery.extractTerms(terms);
        for (Term t : terms) {
            flatQueries.add(new SpanTermQuery(t));//TODO need to check that this isn't already in the flatQueries (see example above)
        }

    }
    // else discard queries
}

From source file:perf.AutoPrefixPerf.java

License:Apache License

private static void printQueryTerms(final MultiTermQuery mtq, final IndexSearcher searcher) throws IOException {
    final AtomicInteger termCount = new AtomicInteger();
    final AtomicInteger docCount = new AtomicInteger();
    // TODO: is there an easier way to see terms an MTQ matches?  this is awkward
    MultiTermQuery.RewriteMethod rewriter = mtq.getRewriteMethod();
    if (mtq instanceof TermRangeQuery) {
        TermRangeQuery trq = (TermRangeQuery) mtq;
        BytesRef lowerTerm = trq.getLowerTerm();
        BytesRef upperTerm = trq.getUpperTerm();
        System.out.println("query: " + bytesToLong(lowerTerm) + " " + lowerTerm + " - " + bytesToLong(upperTerm)
                + " " + upperTerm);
    } else {//from  w ww.  j a  va 2s  . c om
        System.out.println("query: " + mtq);
    }
    System.out.println("  query matches " + searcher.search(mtq, 1).totalHits + " docs");
    mtq.setRewriteMethod(new MultiTermQuery.RewriteMethod() {
        @Override
        public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
            for (AtomicReaderContext ctx : searcher.getIndexReader().leaves()) {
                TermsEnum termsEnum = getTermsEnum(mtq, ctx.reader().fields().terms(mtq.getField()), null);
                System.out.println("  reader=" + ctx.reader());
                BytesRef term;
                while ((term = termsEnum.next()) != null) {
                    System.out.println(
                            "    term: len=" + term.length + " " + term + " dF=" + termsEnum.docFreq());
                    termCount.incrementAndGet();
                    docCount.addAndGet(termsEnum.docFreq());
                }
            }

            return null;
        }
    });
    mtq.rewrite(searcher.getIndexReader());
    System.out.println("  total terms: " + termCount);
    System.out.println("  total docs: " + docCount);
    mtq.setRewriteMethod(rewriter);
}