Example usage for org.apache.lucene.search MultiTermQuery getRewriteMethod

List of usage examples for org.apache.lucene.search MultiTermQuery getRewriteMethod

Introduction

In this page you can find the example usage for org.apache.lucene.search MultiTermQuery getRewriteMethod.

Prototype

public RewriteMethod getRewriteMethod() 

Source Link

Usage

From source file:edu.mit.ll.vizlinc.highlight.WeightedSpanTermExtractor.java

License:Apache License

/**
 * Fills a <code>Map</code> with <@link WeightedSpanTerm>s using the terms from the supplied <code>Query</code>.
 * //  w  w  w  . j a v a 2s  .  c  om
 * @param query
 *          Query to extract Terms from
 * @param terms
 *          Map to place created WeightedSpanTerms in
 * @throws IOException
 */
private void extract(Query query, Map<String, WeightedSpanTerm> terms) throws IOException {
    if (query instanceof BooleanQuery) {
        BooleanClause[] queryClauses = ((BooleanQuery) query).getClauses();

        for (int i = 0; i < queryClauses.length; i++) {
            if (!queryClauses[i].isProhibited()) {
                extract(queryClauses[i].getQuery(), terms);
            }
        }
    } else if (query instanceof PhraseQuery) {
        PhraseQuery phraseQuery = ((PhraseQuery) query);
        Term[] phraseQueryTerms = phraseQuery.getTerms();
        SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
        for (int i = 0; i < phraseQueryTerms.length; i++) {
            clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
        }
        int slop = phraseQuery.getSlop();
        int[] positions = phraseQuery.getPositions();
        // add largest position increment to slop
        if (positions.length > 0) {
            int lastPos = positions[0];
            int largestInc = 0;
            int sz = positions.length;
            for (int i = 1; i < sz; i++) {
                int pos = positions[i];
                int inc = pos - lastPos;
                if (inc > largestInc) {
                    largestInc = inc;
                }
                lastPos = pos;
            }
            if (largestInc > 1) {
                slop += largestInc;
            }
        }

        boolean inorder = false;

        if (slop == 0) {
            inorder = true;
        }

        SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
        sp.setBoost(query.getBoost());
        extractWeightedSpanTerms(terms, sp);
    } else if (query instanceof TermQuery) {
        extractWeightedTerms(terms, query);
    } else if (query instanceof SpanQuery) {
        extractWeightedSpanTerms(terms, (SpanQuery) query);
    } else if (query instanceof FilteredQuery) {
        extract(((FilteredQuery) query).getQuery(), terms);
    } else if (query instanceof DisjunctionMaxQuery) {
        for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
            extract(iterator.next(), terms);
        }
    } else if (query instanceof MultiTermQuery && expandMultiTermQuery) {
        MultiTermQuery mtq = ((MultiTermQuery) query);
        if (mtq.getRewriteMethod() != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) {
            mtq = (MultiTermQuery) mtq.clone();
            mtq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
            query = mtq;
        }
        FakeReader fReader = new FakeReader();
        MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE.rewrite(fReader, mtq);
        if (fReader.field != null) {
            IndexReader ir = getReaderForField(fReader.field);
            extract(query.rewrite(ir), terms);
        }
    } else if (query instanceof MultiPhraseQuery) {
        final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
        final List<Term[]> termArrays = mpq.getTermArrays();
        final int[] positions = mpq.getPositions();
        if (positions.length > 0) {

            int maxPosition = positions[positions.length - 1];
            for (int i = 0; i < positions.length - 1; ++i) {
                if (positions[i] > maxPosition) {
                    maxPosition = positions[i];
                }
            }

            @SuppressWarnings("unchecked")
            final List<SpanQuery>[] disjunctLists = new List[maxPosition + 1];
            int distinctPositions = 0;

            for (int i = 0; i < termArrays.size(); ++i) {
                final Term[] termArray = termArrays.get(i);
                List<SpanQuery> disjuncts = disjunctLists[positions[i]];
                if (disjuncts == null) {
                    disjuncts = (disjunctLists[positions[i]] = new ArrayList<SpanQuery>(termArray.length));
                    ++distinctPositions;
                }
                for (int j = 0; j < termArray.length; ++j) {
                    disjuncts.add(new SpanTermQuery(termArray[j]));
                }
            }

            int positionGaps = 0;
            int position = 0;
            final SpanQuery[] clauses = new SpanQuery[distinctPositions];
            for (int i = 0; i < disjunctLists.length; ++i) {
                List<SpanQuery> disjuncts = disjunctLists[i];
                if (disjuncts != null) {
                    clauses[position++] = new SpanOrQuery(disjuncts.toArray(new SpanQuery[disjuncts.size()]));
                } else {
                    ++positionGaps;
                }
            }

            final int slop = mpq.getSlop();
            final boolean inorder = (slop == 0);

            SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
            sp.setBoost(query.getBoost());
            extractWeightedSpanTerms(terms, sp);
        }
    }
}

From source file:org.elasticsearch.search.highlight.PostingsHighlighter.java

License:Apache License

private static void overrideMultiTermRewriteMethod(Query query,
        List<Tuple<MultiTermQuery, MultiTermQuery.RewriteMethod>> modifiedMultiTermQueries) {

    if (query instanceof MultiTermQuery) {
        MultiTermQuery originalMultiTermQuery = (MultiTermQuery) query;
        if (!allowsForTermExtraction(originalMultiTermQuery.getRewriteMethod())) {
            MultiTermQuery.RewriteMethod originalRewriteMethod = originalMultiTermQuery.getRewriteMethod();
            originalMultiTermQuery.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(50));
            //we need to rewrite anyway if it is a multi term query which was rewritten with the wrong rewrite method
            modifiedMultiTermQueries.add(Tuple.tuple(originalMultiTermQuery, originalRewriteMethod));
        }//from   w  w w. j  a  va  2  s  . c  o  m
    }

    if (query instanceof BooleanQuery) {
        BooleanQuery booleanQuery = (BooleanQuery) query;
        for (BooleanClause booleanClause : booleanQuery) {
            overrideMultiTermRewriteMethod(booleanClause.getQuery(), modifiedMultiTermQueries);
        }
    }

    if (query instanceof XFilteredQuery) {
        overrideMultiTermRewriteMethod(((XFilteredQuery) query).getQuery(), modifiedMultiTermQueries);
    }

    if (query instanceof FilteredQuery) {
        overrideMultiTermRewriteMethod(((FilteredQuery) query).getQuery(), modifiedMultiTermQueries);
    }

    if (query instanceof ConstantScoreQuery) {
        overrideMultiTermRewriteMethod(((ConstantScoreQuery) query).getQuery(), modifiedMultiTermQueries);
    }
}

From source file:perf.AutoPrefixPerf.java

License:Apache License

private static void printQueryTerms(final MultiTermQuery mtq, final IndexSearcher searcher) throws IOException {
    final AtomicInteger termCount = new AtomicInteger();
    final AtomicInteger docCount = new AtomicInteger();
    // TODO: is there an easier way to see terms an MTQ matches?  this is awkward
    MultiTermQuery.RewriteMethod rewriter = mtq.getRewriteMethod();
    if (mtq instanceof TermRangeQuery) {
        TermRangeQuery trq = (TermRangeQuery) mtq;
        BytesRef lowerTerm = trq.getLowerTerm();
        BytesRef upperTerm = trq.getUpperTerm();
        System.out.println("query: " + bytesToLong(lowerTerm) + " " + lowerTerm + " - " + bytesToLong(upperTerm)
                + " " + upperTerm);
    } else {//from   ww  w.ja  va  2 s.  c  o  m
        System.out.println("query: " + mtq);
    }
    System.out.println("  query matches " + searcher.search(mtq, 1).totalHits + " docs");
    mtq.setRewriteMethod(new MultiTermQuery.RewriteMethod() {
        @Override
        public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
            for (AtomicReaderContext ctx : searcher.getIndexReader().leaves()) {
                TermsEnum termsEnum = getTermsEnum(mtq, ctx.reader().fields().terms(mtq.getField()), null);
                System.out.println("  reader=" + ctx.reader());
                BytesRef term;
                while ((term = termsEnum.next()) != null) {
                    System.out.println(
                            "    term: len=" + term.length + " " + term + " dF=" + termsEnum.docFreq());
                    termCount.incrementAndGet();
                    docCount.addAndGet(termsEnum.docFreq());
                }
            }

            return null;
        }
    });
    mtq.rewrite(searcher.getIndexReader());
    System.out.println("  total terms: " + termCount);
    System.out.println("  total docs: " + docCount);
    mtq.setRewriteMethod(rewriter);
}