List of usage examples for org.apache.lucene.search MultiTermQuery getRewriteMethod
public RewriteMethod getRewriteMethod()
From source file:edu.mit.ll.vizlinc.highlight.WeightedSpanTermExtractor.java
License:Apache License
/** * Fills a <code>Map</code> with <@link WeightedSpanTerm>s using the terms from the supplied <code>Query</code>. * // w w w . j a v a 2s . c om * @param query * Query to extract Terms from * @param terms * Map to place created WeightedSpanTerms in * @throws IOException */ private void extract(Query query, Map<String, WeightedSpanTerm> terms) throws IOException { if (query instanceof BooleanQuery) { BooleanClause[] queryClauses = ((BooleanQuery) query).getClauses(); for (int i = 0; i < queryClauses.length; i++) { if (!queryClauses[i].isProhibited()) { extract(queryClauses[i].getQuery(), terms); } } } else if (query instanceof PhraseQuery) { PhraseQuery phraseQuery = ((PhraseQuery) query); Term[] phraseQueryTerms = phraseQuery.getTerms(); SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length]; for (int i = 0; i < phraseQueryTerms.length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } int slop = phraseQuery.getSlop(); int[] positions = phraseQuery.getPositions(); // add largest position increment to slop if (positions.length > 0) { int lastPos = positions[0]; int largestInc = 0; int sz = positions.length; for (int i = 1; i < sz; i++) { int pos = positions[i]; int inc = pos - lastPos; if (inc > largestInc) { largestInc = inc; } lastPos = pos; } if (largestInc > 1) { slop += largestInc; } } boolean inorder = false; if (slop == 0) { inorder = true; } SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder); sp.setBoost(query.getBoost()); extractWeightedSpanTerms(terms, sp); } else if (query instanceof TermQuery) { extractWeightedTerms(terms, query); } else if (query instanceof SpanQuery) { extractWeightedSpanTerms(terms, (SpanQuery) query); } else if (query instanceof FilteredQuery) { extract(((FilteredQuery) query).getQuery(), terms); } else if (query instanceof DisjunctionMaxQuery) { for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) { extract(iterator.next(), terms); } } else if (query instanceof MultiTermQuery && expandMultiTermQuery) { MultiTermQuery mtq = ((MultiTermQuery) query); if (mtq.getRewriteMethod() != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) { mtq = (MultiTermQuery) mtq.clone(); mtq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); query = mtq; } FakeReader fReader = new FakeReader(); MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE.rewrite(fReader, mtq); if (fReader.field != null) { IndexReader ir = getReaderForField(fReader.field); extract(query.rewrite(ir), terms); } } else if (query instanceof MultiPhraseQuery) { final MultiPhraseQuery mpq = (MultiPhraseQuery) query; final List<Term[]> termArrays = mpq.getTermArrays(); final int[] positions = mpq.getPositions(); if (positions.length > 0) { int maxPosition = positions[positions.length - 1]; for (int i = 0; i < positions.length - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } @SuppressWarnings("unchecked") final List<SpanQuery>[] disjunctLists = new List[maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.size(); ++i) { final Term[] termArray = termArrays.get(i); List<SpanQuery> disjuncts = disjunctLists[positions[i]]; if (disjuncts == null) { disjuncts = (disjunctLists[positions[i]] = new ArrayList<SpanQuery>(termArray.length)); ++distinctPositions; } for (int j = 0; j < termArray.length; ++j) { disjuncts.add(new SpanTermQuery(termArray[j])); } } int positionGaps = 0; int position = 0; final SpanQuery[] clauses = new SpanQuery[distinctPositions]; for (int i = 0; i < disjunctLists.length; ++i) { List<SpanQuery> disjuncts = disjunctLists[i]; if (disjuncts != null) { clauses[position++] = new SpanOrQuery(disjuncts.toArray(new SpanQuery[disjuncts.size()])); } else { ++positionGaps; } } final int slop = mpq.getSlop(); final boolean inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); sp.setBoost(query.getBoost()); extractWeightedSpanTerms(terms, sp); } } }
From source file:org.elasticsearch.search.highlight.PostingsHighlighter.java
License:Apache License
private static void overrideMultiTermRewriteMethod(Query query, List<Tuple<MultiTermQuery, MultiTermQuery.RewriteMethod>> modifiedMultiTermQueries) { if (query instanceof MultiTermQuery) { MultiTermQuery originalMultiTermQuery = (MultiTermQuery) query; if (!allowsForTermExtraction(originalMultiTermQuery.getRewriteMethod())) { MultiTermQuery.RewriteMethod originalRewriteMethod = originalMultiTermQuery.getRewriteMethod(); originalMultiTermQuery.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(50)); //we need to rewrite anyway if it is a multi term query which was rewritten with the wrong rewrite method modifiedMultiTermQueries.add(Tuple.tuple(originalMultiTermQuery, originalRewriteMethod)); }//from w w w. j a va 2 s . c o m } if (query instanceof BooleanQuery) { BooleanQuery booleanQuery = (BooleanQuery) query; for (BooleanClause booleanClause : booleanQuery) { overrideMultiTermRewriteMethod(booleanClause.getQuery(), modifiedMultiTermQueries); } } if (query instanceof XFilteredQuery) { overrideMultiTermRewriteMethod(((XFilteredQuery) query).getQuery(), modifiedMultiTermQueries); } if (query instanceof FilteredQuery) { overrideMultiTermRewriteMethod(((FilteredQuery) query).getQuery(), modifiedMultiTermQueries); } if (query instanceof ConstantScoreQuery) { overrideMultiTermRewriteMethod(((ConstantScoreQuery) query).getQuery(), modifiedMultiTermQueries); } }
From source file:perf.AutoPrefixPerf.java
License:Apache License
private static void printQueryTerms(final MultiTermQuery mtq, final IndexSearcher searcher) throws IOException { final AtomicInteger termCount = new AtomicInteger(); final AtomicInteger docCount = new AtomicInteger(); // TODO: is there an easier way to see terms an MTQ matches? this is awkward MultiTermQuery.RewriteMethod rewriter = mtq.getRewriteMethod(); if (mtq instanceof TermRangeQuery) { TermRangeQuery trq = (TermRangeQuery) mtq; BytesRef lowerTerm = trq.getLowerTerm(); BytesRef upperTerm = trq.getUpperTerm(); System.out.println("query: " + bytesToLong(lowerTerm) + " " + lowerTerm + " - " + bytesToLong(upperTerm) + " " + upperTerm); } else {//from ww w.ja va 2 s. c o m System.out.println("query: " + mtq); } System.out.println(" query matches " + searcher.search(mtq, 1).totalHits + " docs"); mtq.setRewriteMethod(new MultiTermQuery.RewriteMethod() { @Override public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { for (AtomicReaderContext ctx : searcher.getIndexReader().leaves()) { TermsEnum termsEnum = getTermsEnum(mtq, ctx.reader().fields().terms(mtq.getField()), null); System.out.println(" reader=" + ctx.reader()); BytesRef term; while ((term = termsEnum.next()) != null) { System.out.println( " term: len=" + term.length + " " + term + " dF=" + termsEnum.docFreq()); termCount.incrementAndGet(); docCount.addAndGet(termsEnum.docFreq()); } } return null; } }); mtq.rewrite(searcher.getIndexReader()); System.out.println(" total terms: " + termCount); System.out.println(" total docs: " + docCount); mtq.setRewriteMethod(rewriter); }