List of usage examples for org.apache.lucene.search MultiTermQuery setRewriteMethod
public void setRewriteMethod(RewriteMethod method)
From source file:com.o19s.solr.swan.highlight.SpanAwareFieldQuery.java
License:Apache License
void flatten(Query sourceQuery, IndexReader reader, Collection<Query> flatQueries) throws IOException { if (sourceQuery instanceof BooleanQuery) { BooleanQuery bq = (BooleanQuery) sourceQuery; for (BooleanClause clause : bq.getClauses()) { if (!clause.isProhibited()) flatten(clause.getQuery(), reader, flatQueries); }//from w ww . ja va 2 s.com } else if (sourceQuery instanceof DisjunctionMaxQuery) { DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) sourceQuery; for (Query query : dmq) { flatten(query, reader, flatQueries); } } else if (sourceQuery instanceof TermQuery) { if (!flatQueries.contains(sourceQuery)) flatQueries.add(sourceQuery); } else if (sourceQuery instanceof MultiTermQuery && reader != null) { MultiTermQuery copy = (MultiTermQuery) sourceQuery.clone(); copy.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(MAX_MTQ_TERMS)); BooleanQuery mtqTerms = (BooleanQuery) copy.rewrite(reader); flatten(mtqTerms, reader, flatQueries); } else if (sourceQuery instanceof PhraseQuery) { if (!flatQueries.contains(sourceQuery)) { PhraseQuery pq = (PhraseQuery) sourceQuery; if (pq.getTerms().length > 1) flatQueries.add(pq); else if (pq.getTerms().length == 1) { flatQueries.add(new TermQuery(pq.getTerms()[0])); } } } else if (sourceQuery instanceof SpanQuery) { //TODO Note that the way we are doing phrases, they become SpanQueries - thus we loose //all of the corner case fixes for the phrases already in highlighing - the result will be //phrases that have different color highlights for each term Set<Term> terms = new LinkedHashSet<Term>(); List<AtomicReaderContext> readerContexts = reader.getContext().leaves(); int offset = 0; if (readerContexts.size() < 1) { return; } for (AtomicReaderContext arc : readerContexts) { if (sourceQuery instanceof SpanNotQuery) { SpanNotQuery query = (SpanNotQuery) sourceQuery; addSpansPositions(offset, query.getField(), query.getInclude().getSpans(arc, null, new HashMap<Term, TermContext>())); } else { SpanQuery query = (SpanQuery) sourceQuery; addSpansPositions(offset, query.getField(), query.getSpans(arc, null, new HashMap<Term, TermContext>())); } offset += arc.reader().maxDoc(); } //TODO it is necessary to call getSpans first so that if there is a MultiTerm query it get's rewritten by com.o19s.solr.swan.nodes.SwanTermNode.SwanSpanMultiTermQueryWrapper //no easy way around this sourceQuery.extractTerms(terms); for (Term t : terms) { flatQueries.add(new SpanTermQuery(t));//TODO need to check that this isn't already in the flatQueries (see example above) } } // else discard queries }
From source file:edu.mit.ll.vizlinc.highlight.WeightedSpanTermExtractor.java
License:Apache License
/** * Fills a <code>Map</code> with <@link WeightedSpanTerm>s using the terms from the supplied <code>Query</code>. * // www . j a v a 2 s. c om * @param query * Query to extract Terms from * @param terms * Map to place created WeightedSpanTerms in * @throws IOException */ private void extract(Query query, Map<String, WeightedSpanTerm> terms) throws IOException { if (query instanceof BooleanQuery) { BooleanClause[] queryClauses = ((BooleanQuery) query).getClauses(); for (int i = 0; i < queryClauses.length; i++) { if (!queryClauses[i].isProhibited()) { extract(queryClauses[i].getQuery(), terms); } } } else if (query instanceof PhraseQuery) { PhraseQuery phraseQuery = ((PhraseQuery) query); Term[] phraseQueryTerms = phraseQuery.getTerms(); SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length]; for (int i = 0; i < phraseQueryTerms.length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } int slop = phraseQuery.getSlop(); int[] positions = phraseQuery.getPositions(); // add largest position increment to slop if (positions.length > 0) { int lastPos = positions[0]; int largestInc = 0; int sz = positions.length; for (int i = 1; i < sz; i++) { int pos = positions[i]; int inc = pos - lastPos; if (inc > largestInc) { largestInc = inc; } lastPos = pos; } if (largestInc > 1) { slop += largestInc; } } boolean inorder = false; if (slop == 0) { inorder = true; } SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder); sp.setBoost(query.getBoost()); extractWeightedSpanTerms(terms, sp); } else if (query instanceof TermQuery) { extractWeightedTerms(terms, query); } else if (query instanceof SpanQuery) { extractWeightedSpanTerms(terms, (SpanQuery) query); } else if (query instanceof FilteredQuery) { extract(((FilteredQuery) query).getQuery(), terms); } else if (query instanceof DisjunctionMaxQuery) { for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) { extract(iterator.next(), terms); } } else if (query instanceof MultiTermQuery && expandMultiTermQuery) { MultiTermQuery mtq = ((MultiTermQuery) query); if (mtq.getRewriteMethod() != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) { mtq = (MultiTermQuery) mtq.clone(); mtq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); query = mtq; } FakeReader fReader = new FakeReader(); MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE.rewrite(fReader, mtq); if (fReader.field != null) { IndexReader ir = getReaderForField(fReader.field); extract(query.rewrite(ir), terms); } } else if (query instanceof MultiPhraseQuery) { final MultiPhraseQuery mpq = (MultiPhraseQuery) query; final List<Term[]> termArrays = mpq.getTermArrays(); final int[] positions = mpq.getPositions(); if (positions.length > 0) { int maxPosition = positions[positions.length - 1]; for (int i = 0; i < positions.length - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } @SuppressWarnings("unchecked") final List<SpanQuery>[] disjunctLists = new List[maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.size(); ++i) { final Term[] termArray = termArrays.get(i); List<SpanQuery> disjuncts = disjunctLists[positions[i]]; if (disjuncts == null) { disjuncts = (disjunctLists[positions[i]] = new ArrayList<SpanQuery>(termArray.length)); ++distinctPositions; } for (int j = 0; j < termArray.length; ++j) { disjuncts.add(new SpanTermQuery(termArray[j])); } } int positionGaps = 0; int position = 0; final SpanQuery[] clauses = new SpanQuery[distinctPositions]; for (int i = 0; i < disjunctLists.length; ++i) { List<SpanQuery> disjuncts = disjunctLists[i]; if (disjuncts != null) { clauses[position++] = new SpanOrQuery(disjuncts.toArray(new SpanQuery[disjuncts.size()])); } else { ++positionGaps; } } final int slop = mpq.getSlop(); final boolean inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); sp.setBoost(query.getBoost()); extractWeightedSpanTerms(terms, sp); } } }
From source file:org.apache.solr.schema.FieldType.java
License:Apache License
/** * Returns a Query instance for doing range searches on this field type. {@link org.apache.solr.search.SolrQueryParser} * currently passes part1 and part2 as null if they are '*' respectively. minInclusive and maxInclusive are both true * currently by SolrQueryParser but that may change in the future. Also, other QueryParser implementations may have * different semantics.// w w w. ja v a2 s .co m * <p/> * Sub-classes should override this method to provide their own range query implementation. They should strive to * handle nulls in part1 and/or part2 as well as unequal minInclusive and maxInclusive parameters gracefully. * * @param parser the {@link org.apache.solr.search.QParser} calling the method * @param field the schema field * @param part1 the lower boundary of the range, nulls are allowed. * @param part2 the upper boundary of the range, nulls are allowed * @param minInclusive whether the minimum of the range is inclusive or not * @param maxInclusive whether the maximum of the range is inclusive or not * @return a Query instance to perform range search according to given parameters * */ public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) { // TODO: change these all to use readableToIndexed/bytes instead (e.g. for unicode collation) if (field.hasDocValues() && !field.indexed()) { if (field.multiValued()) { return new ConstantScoreQuery(DocTermOrdsRangeFilter.newBytesRefRange(field.getName(), part1 == null ? null : new BytesRef(toInternal(part1)), part2 == null ? null : new BytesRef(toInternal(part2)), minInclusive, maxInclusive)); } else { return new ConstantScoreQuery(FieldCacheRangeFilter.newStringRange(field.getName(), part1 == null ? null : toInternal(part1), part2 == null ? null : toInternal(part2), minInclusive, maxInclusive)); } } else { MultiTermQuery rangeQuery = TermRangeQuery.newStringRange(field.getName(), part1 == null ? null : toInternal(part1), part2 == null ? null : toInternal(part2), minInclusive, maxInclusive); rangeQuery.setRewriteMethod(getRewriteMethod(parser, field)); return rangeQuery; } }
From source file:org.codelibs.elasticsearch.index.query.support.QueryParsers.java
License:Apache License
public static void setRewriteMethod(MultiTermQuery query, @Nullable MultiTermQuery.RewriteMethod rewriteMethod) { if (rewriteMethod == null) { return;//from w w w. ja v a 2 s . c o m } query.setRewriteMethod(rewriteMethod); }
From source file:org.elasticsearch.index.query.support.QueryParsers.java
License:Apache License
public static void setRewriteMethod(MultiTermQuery query, @Nullable String rewriteMethod) { if (rewriteMethod == null) { return;//www . j a v a 2 s.c om } query.setRewriteMethod(parseRewriteMethod(rewriteMethod)); }
From source file:org.elasticsearch.search.highlight.PostingsHighlighter.java
License:Apache License
private static void overrideMultiTermRewriteMethod(Query query, List<Tuple<MultiTermQuery, MultiTermQuery.RewriteMethod>> modifiedMultiTermQueries) { if (query instanceof MultiTermQuery) { MultiTermQuery originalMultiTermQuery = (MultiTermQuery) query; if (!allowsForTermExtraction(originalMultiTermQuery.getRewriteMethod())) { MultiTermQuery.RewriteMethod originalRewriteMethod = originalMultiTermQuery.getRewriteMethod(); originalMultiTermQuery.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(50)); //we need to rewrite anyway if it is a multi term query which was rewritten with the wrong rewrite method modifiedMultiTermQueries.add(Tuple.tuple(originalMultiTermQuery, originalRewriteMethod)); }/*w w w. ja va2 s .co m*/ } if (query instanceof BooleanQuery) { BooleanQuery booleanQuery = (BooleanQuery) query; for (BooleanClause booleanClause : booleanQuery) { overrideMultiTermRewriteMethod(booleanClause.getQuery(), modifiedMultiTermQueries); } } if (query instanceof XFilteredQuery) { overrideMultiTermRewriteMethod(((XFilteredQuery) query).getQuery(), modifiedMultiTermQueries); } if (query instanceof FilteredQuery) { overrideMultiTermRewriteMethod(((FilteredQuery) query).getQuery(), modifiedMultiTermQueries); } if (query instanceof ConstantScoreQuery) { overrideMultiTermRewriteMethod(((ConstantScoreQuery) query).getQuery(), modifiedMultiTermQueries); } }
From source file:org.exist.indexing.lucene.XMLToQuery.java
License:Open Source License
private void setRewriteMethod(MultiTermQuery query, Element node, Properties options) { String option = node.getAttribute("filter-rewrite"); if (option == null) option = "yes"; if (options != null) option = options.getProperty(LuceneIndexWorker.OPTION_FILTER_REWRITE, "yes"); if (option.equalsIgnoreCase("yes")) query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); else//from w w w .ja va 2s . com query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT); }
From source file:org.sindice.siren.search.TestSirenWildcardQuery.java
License:Open Source License
/** * Tests if a WildcardQuery that has no wildcard in the term is rewritten to a single * TermQuery. The boost should be preserved, and the rewrite should return * a ConstantScoreQuery if the WildcardQuery had a ConstantScore rewriteMethod. *///from w ww . j a v a 2 s .c o m public void testTermWithoutWildcard2() throws IOException { final Directory indexStore = this.getIndexStore("field", new String[] { "nowildcard", "nowildcardx" }); final IndexSearcher searcher = new IndexSearcher(indexStore, true); final MultiTermQuery wq = new WildcardQuery(new Term("field", "nowildcard")); this.assertMatches(searcher, wq, 1); wq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); wq.setBoost(0.1F); final Query q = searcher.rewrite(wq); assertTrue(q instanceof TermQuery); assertEquals(q.getBoost(), wq.getBoost()); searcher.close(); indexStore.close(); }
From source file:org.tallison.lucene.queryparser.spans.SpanQueryParserBase.java
License:Apache License
private Query wrapMultiTermRewrite(MultiTermQuery mtq) { mtq.setRewriteMethod(getMultiTermRewriteMethod(mtq.getField())); return mtq; }
From source file:perf.AutoPrefixPerf.java
License:Apache License
private static void printQueryTerms(final MultiTermQuery mtq, final IndexSearcher searcher) throws IOException { final AtomicInteger termCount = new AtomicInteger(); final AtomicInteger docCount = new AtomicInteger(); // TODO: is there an easier way to see terms an MTQ matches? this is awkward MultiTermQuery.RewriteMethod rewriter = mtq.getRewriteMethod(); if (mtq instanceof TermRangeQuery) { TermRangeQuery trq = (TermRangeQuery) mtq; BytesRef lowerTerm = trq.getLowerTerm(); BytesRef upperTerm = trq.getUpperTerm(); System.out.println("query: " + bytesToLong(lowerTerm) + " " + lowerTerm + " - " + bytesToLong(upperTerm) + " " + upperTerm); } else {/* w ww . j av a2s . c o m*/ System.out.println("query: " + mtq); } System.out.println(" query matches " + searcher.search(mtq, 1).totalHits + " docs"); mtq.setRewriteMethod(new MultiTermQuery.RewriteMethod() { @Override public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { for (AtomicReaderContext ctx : searcher.getIndexReader().leaves()) { TermsEnum termsEnum = getTermsEnum(mtq, ctx.reader().fields().terms(mtq.getField()), null); System.out.println(" reader=" + ctx.reader()); BytesRef term; while ((term = termsEnum.next()) != null) { System.out.println( " term: len=" + term.length + " " + term + " dF=" + termsEnum.docFreq()); termCount.incrementAndGet(); docCount.addAndGet(termsEnum.docFreq()); } } return null; } }); mtq.rewrite(searcher.getIndexReader()); System.out.println(" total terms: " + termCount); System.out.println(" total docs: " + docCount); mtq.setRewriteMethod(rewriter); }