List of usage examples for org.apache.lucene.search MultiPhraseQuery getSlop
public int getSlop()
From source file:edu.mit.ll.vizlinc.highlight.WeightedSpanTermExtractor.java
License:Apache License
/** * Fills a <code>Map</code> with <@link WeightedSpanTerm>s using the terms from the supplied <code>Query</code>. * /*w ww .jav a2 s.c om*/ * @param query * Query to extract Terms from * @param terms * Map to place created WeightedSpanTerms in * @throws IOException */ private void extract(Query query, Map<String, WeightedSpanTerm> terms) throws IOException { if (query instanceof BooleanQuery) { BooleanClause[] queryClauses = ((BooleanQuery) query).getClauses(); for (int i = 0; i < queryClauses.length; i++) { if (!queryClauses[i].isProhibited()) { extract(queryClauses[i].getQuery(), terms); } } } else if (query instanceof PhraseQuery) { PhraseQuery phraseQuery = ((PhraseQuery) query); Term[] phraseQueryTerms = phraseQuery.getTerms(); SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length]; for (int i = 0; i < phraseQueryTerms.length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } int slop = phraseQuery.getSlop(); int[] positions = phraseQuery.getPositions(); // add largest position increment to slop if (positions.length > 0) { int lastPos = positions[0]; int largestInc = 0; int sz = positions.length; for (int i = 1; i < sz; i++) { int pos = positions[i]; int inc = pos - lastPos; if (inc > largestInc) { largestInc = inc; } lastPos = pos; } if (largestInc > 1) { slop += largestInc; } } boolean inorder = false; if (slop == 0) { inorder = true; } SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder); sp.setBoost(query.getBoost()); extractWeightedSpanTerms(terms, sp); } else if (query instanceof TermQuery) { extractWeightedTerms(terms, query); } else if (query instanceof SpanQuery) { extractWeightedSpanTerms(terms, (SpanQuery) query); } else if (query instanceof FilteredQuery) { extract(((FilteredQuery) query).getQuery(), terms); } else if (query instanceof DisjunctionMaxQuery) { for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) { extract(iterator.next(), terms); } } else if (query instanceof MultiTermQuery && expandMultiTermQuery) { MultiTermQuery mtq = ((MultiTermQuery) query); if (mtq.getRewriteMethod() != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) { mtq = (MultiTermQuery) mtq.clone(); mtq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); query = mtq; } FakeReader fReader = new FakeReader(); MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE.rewrite(fReader, mtq); if (fReader.field != null) { IndexReader ir = getReaderForField(fReader.field); extract(query.rewrite(ir), terms); } } else if (query instanceof MultiPhraseQuery) { final MultiPhraseQuery mpq = (MultiPhraseQuery) query; final List<Term[]> termArrays = mpq.getTermArrays(); final int[] positions = mpq.getPositions(); if (positions.length > 0) { int maxPosition = positions[positions.length - 1]; for (int i = 0; i < positions.length - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } @SuppressWarnings("unchecked") final List<SpanQuery>[] disjunctLists = new List[maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.size(); ++i) { final Term[] termArray = termArrays.get(i); List<SpanQuery> disjuncts = disjunctLists[positions[i]]; if (disjuncts == null) { disjuncts = (disjunctLists[positions[i]] = new ArrayList<SpanQuery>(termArray.length)); ++distinctPositions; } for (int j = 0; j < termArray.length; ++j) { disjuncts.add(new SpanTermQuery(termArray[j])); } } int positionGaps = 0; int position = 0; final SpanQuery[] clauses = new SpanQuery[distinctPositions]; for (int i = 0; i < disjunctLists.length; ++i) { List<SpanQuery> disjuncts = disjunctLists[i]; if (disjuncts != null) { clauses[position++] = new SpanOrQuery(disjuncts.toArray(new SpanQuery[disjuncts.size()])); } else { ++positionGaps; } } final int slop = mpq.getSlop(); final boolean inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); sp.setBoost(query.getBoost()); extractWeightedSpanTerms(terms, sp); } } }
From source file:org.apache.blur.utils.HighlightHelper.java
License:Apache License
private static Query setFieldIfNeeded(Query query, String name, String fieldLessFieldName) { if (name == null) { return query; }/*from w w w . j a v a 2 s . c om*/ if (query instanceof TermQuery) { TermQuery tq = (TermQuery) query; Term term = tq.getTerm(); if (term.field().equals(fieldLessFieldName)) { return new TermQuery(new Term(name, term.bytes())); } } else if (query instanceof WildcardQuery) { WildcardQuery wq = (WildcardQuery) query; Term term = wq.getTerm(); if (term.field().equals(fieldLessFieldName)) { return new WildcardQuery(new Term(name, term.bytes())); } } else if (query instanceof MultiPhraseQuery) { MultiPhraseQuery mpq = (MultiPhraseQuery) query; int[] positions = mpq.getPositions(); List<Term[]> termArrays = mpq.getTermArrays(); if (isTermField(termArrays, fieldLessFieldName)) { MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery(); multiPhraseQuery.setSlop(mpq.getSlop()); for (int i = 0; i < termArrays.size(); i++) { multiPhraseQuery.add(changeFields(termArrays.get(i), name), positions[i]); } return multiPhraseQuery; } } else if (query instanceof PhraseQuery) { PhraseQuery pq = (PhraseQuery) query; Term[] terms = pq.getTerms(); int[] positions = pq.getPositions(); String field = terms[0].field(); if (field.equals(BlurConstants.SUPER)) { PhraseQuery phraseQuery = new PhraseQuery(); for (int i = 0; i < terms.length; i++) { phraseQuery.add(new Term(name, terms[i].bytes()), positions[i]); } phraseQuery.setSlop(pq.getSlop()); return phraseQuery; } } else if (query instanceof PrefixQuery) { PrefixQuery pq = (PrefixQuery) query; Term term = pq.getPrefix(); if (term.field().equals(BlurConstants.SUPER)) { return new PrefixQuery(new Term(name, term.bytes())); } } else if (query instanceof TermRangeQuery) { TermRangeQuery trq = (TermRangeQuery) query; BytesRef lowerTerm = trq.getLowerTerm(); BytesRef upperTerm = trq.getUpperTerm(); boolean includeUpper = trq.includesUpper(); boolean includeLower = trq.includesLower(); String field = trq.getField(); if (field.equals(BlurConstants.SUPER)) { return new TermRangeQuery(name, lowerTerm, upperTerm, includeLower, includeUpper); } } return query; }
From source file:org.tallison.lucene.queryparser.spans.SQPTestBase.java
License:Apache License
/** * THIS IS AN UNHOLY ABOMINATION: this exists here and in LUCENE-5317 and * in the highlighter package. Please, please forgive me. * * We need to factor this out into a standalone helper class until * the difference between Query and SpanQueries disappears. * * Converts a regular query to a {@link org.apache.lucene.search.spans.SpanQuery} for use in a highlighter. * Because of subtle differences in {@link org.apache.lucene.search.spans.SpanQuery} and {@link org.apache.lucene.search.Query}, this * {@link org.apache.lucene.search.spans.SpanQuery} will not necessarily return the same documents as the * initial Query. For example, the generated SpanQuery will not include * clauses of type BooleanClause.Occur.MUST_NOT. Also, the * {@link org.apache.lucene.search.spans.SpanQuery} will only cover a single field, whereas the {@link org.apache.lucene.search.Query} * might contain multiple fields.// w w w . ja va 2 s. c o m * <p> * Returns an empty SpanQuery if the {@link org.apache.lucene.search.Query} is a class that * is handled, but for some reason can't be converted from a {@link org.apache.lucene.search.Query} to a * {@link org.apache.lucene.search.spans.SpanQuery}. This can happen for many reasons: e.g. if the Query * contains no terms in the requested "field" or the Query is a MatchAllDocsQuery. * <p> * Throws IllegalArgumentException if the Query is a class that is * is not yet handled. * <p> * This class does not rewrite the SpanQuery before returning it. * Clients are required to rewrite if necessary. * <p> * Much of this code is copied directly from * oal.search.highlight.WeightedSpanTermExtractor. There are some subtle * differences. * * @param field single field to extract SpanQueries for * @param query query to convert * @return SpanQuery for use in highlighting; can return empty SpanQuery * @throws java.io.IOException for an underlying * IOException in the IndexReader or an IllegalArgumentException if the query type is not recognized */ public SpanQuery convert(String field, Query query) throws IOException { /* * copied nearly verbatim from * org.apache.lucene.search.highlight.WeightedSpanTermExtractor * TODO:refactor to avoid duplication of code if possible. * Beware: there are some subtle differences. */ if (query instanceof SpanQuery) { SpanQuery sq = (SpanQuery) query; if (sq.getField() != null && sq.getField().equals(field)) { return (SpanQuery) query; } else { return getEmptySpanQuery(); } } else if (query instanceof BooleanQuery) { List<BooleanClause> queryClauses = ((BooleanQuery) query).clauses(); List<SpanQuery> spanQs = new ArrayList<SpanQuery>(); for (int i = 0; i < queryClauses.size(); i++) { if (!queryClauses.get(i).isProhibited()) { tryToAdd(field, convert(field, queryClauses.get(i).getQuery()), spanQs); } } if (spanQs.size() == 0) { return getEmptySpanQuery(); } else if (spanQs.size() == 1) { return spanQs.get(0); } else { return new SpanOrQuery(spanQs.toArray(new SpanQuery[spanQs.size()])); } } else if (query instanceof PhraseQuery) { PhraseQuery phraseQuery = ((PhraseQuery) query); Term[] phraseQueryTerms = phraseQuery.getTerms(); if (phraseQueryTerms.length == 0) { return getEmptySpanQuery(); } else if (!phraseQueryTerms[0].field().equals(field)) { return getEmptySpanQuery(); } SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length]; for (int i = 0; i < phraseQueryTerms.length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } int slop = phraseQuery.getSlop(); int[] positions = phraseQuery.getPositions(); // sum position increments (>1) and add to slop if (positions.length > 0) { int lastPos = positions[0]; int sz = positions.length; for (int i = 1; i < sz; i++) { int pos = positions[i]; int inc = pos - lastPos - 1; slop += inc; lastPos = pos; } } boolean inorder = false; if (phraseQuery.getSlop() == 0) { inorder = true; } SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder); if (query instanceof BoostQuery) { return new SpanBoostQuery(sp, ((BoostQuery) query).getBoost()); } else { return sp; } } else if (query instanceof TermQuery) { TermQuery tq = (TermQuery) query; if (tq.getTerm().field().equals(field)) { return new SpanTermQuery(tq.getTerm()); } else { return getEmptySpanQuery(); } } else if (query instanceof ConstantScoreQuery) { return convert(field, ((ConstantScoreQuery) query).getQuery()); } else if (query instanceof DisjunctionMaxQuery) { List<SpanQuery> spanQs = new ArrayList<SpanQuery>(); for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) { tryToAdd(field, convert(field, iterator.next()), spanQs); } if (spanQs.size() == 0) { return getEmptySpanQuery(); } else if (spanQs.size() == 1) { return spanQs.get(0); } else { return new SpanOrQuery(spanQs.toArray(new SpanQuery[spanQs.size()])); } } else if (query instanceof MatchAllDocsQuery) { return getEmptySpanQuery(); } else if (query instanceof MultiPhraseQuery) { final MultiPhraseQuery mpq = (MultiPhraseQuery) query; final Term[][] termArrays = mpq.getTermArrays(); //test for empty or wrong field if (termArrays.length == 0) { return getEmptySpanQuery(); } else if (termArrays.length > 1) { Term[] ts = termArrays[0]; if (ts.length > 0) { Term t = ts[0]; if (!t.field().equals(field)) { return getEmptySpanQuery(); } } } final int[] positions = mpq.getPositions(); if (positions.length > 0) { int maxPosition = positions[positions.length - 1]; for (int i = 0; i < positions.length - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } @SuppressWarnings("unchecked") final List<SpanQuery>[] disjunctLists = new List[maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.length; ++i) { final Term[] termArray = termArrays[i]; List<SpanQuery> disjuncts = disjunctLists[positions[i]]; if (disjuncts == null) { disjuncts = (disjunctLists[positions[i]] = new ArrayList<SpanQuery>(termArray.length)); ++distinctPositions; } for (int j = 0; j < termArray.length; ++j) { disjuncts.add(new SpanTermQuery(termArray[j])); } } int positionGaps = 0; int position = 0; final SpanQuery[] clauses = new SpanQuery[distinctPositions]; for (int i = 0; i < disjunctLists.length; ++i) { List<SpanQuery> disjuncts = disjunctLists[i]; if (disjuncts != null) { if (disjuncts.size() == 1) { clauses[position++] = disjuncts.get(0); } else { clauses[position++] = new SpanOrQuery( disjuncts.toArray(new SpanQuery[disjuncts.size()])); } } else { ++positionGaps; } } final int slop = mpq.getSlop(); final boolean inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); if (query instanceof BoostQuery) { return new SpanBoostQuery(sp, ((BoostQuery) query).getBoost()); } else { return sp; } } } else if (query instanceof MultiTermQuery) { return new SpanMultiTermQueryWrapper<>((MultiTermQuery) query); } else if (query instanceof SynonymQuery) { List<SpanQuery> clauses = new ArrayList<>(); for (Term term : ((SynonymQuery) query).getTerms()) { clauses.add(new SpanTermQuery(term)); } return new SpanOrQuery(clauses.toArray(new SpanQuery[clauses.size()])); } throw new IllegalArgumentException("Can't convert query of type: " + query.getClass()); }
From source file:org.tallison.lucene.search.spans.SimpleSpanQueryConverter.java
License:Apache License
/** * Converts a regular query to a {@link org.apache.lucene.search.spans.SpanQuery} for use in a highlighter. * Because of subtle differences in {@link org.apache.lucene.search.spans.SpanQuery} and {@link org.apache.lucene.search.Query}, this * {@link org.apache.lucene.search.spans.SpanQuery} will not necessarily return the same documents as the * initial Query. For example, the generated SpanQuery will not include * clauses of type BooleanClause.Occur.MUST_NOT. Also, the * {@link org.apache.lucene.search.spans.SpanQuery} will only cover a single field, whereas the {@link org.apache.lucene.search.Query} * might contain multiple fields./*from w ww. j a va 2 s . c o m*/ * <p> * Returns an empty SpanQuery if the {@link org.apache.lucene.search.Query} is a class that * is handled, but for some reason can't be converted from a {@link org.apache.lucene.search.Query} to a * {@link org.apache.lucene.search.spans.SpanQuery}. This can happen for many reasons: e.g. if the Query * contains no terms in the requested "field" or the Query is a MatchAllDocsQuery. * <p> * Throws IllegalArgumentException if the Query is a class that is * is not yet handled. * <p> * This class does not rewrite the SpanQuery before returning it. * Clients are required to rewrite if necessary. * <p> * Much of this code is copied directly from * oal.search.highlight.WeightedSpanTermExtractor. There are some subtle * differences. * <p> * Throws IllegalArgumentException for unknown query types. * * @param field single field to extract SpanQueries for * @param queryToConvert query to convert * @return SpanQuery for use in highlighting; can return empty SpanQuery * @throws java.io.IOException if encountered during parse */ public SpanQuery convert(String field, Query queryToConvert) throws IOException { Float boost = null; Query query = queryToConvert; if (queryToConvert instanceof BoostQuery) { query = ((BoostQuery) query).getQuery(); boost = ((BoostQuery) queryToConvert).getBoost(); } /* * copied nearly verbatim from * org.apache.lucene.search.highlight.WeightedSpanTermExtractor * TODO:refactor to avoid duplication of code if possible. * Beware: there are some subtle differences. */ if (query instanceof SpanQuery) { SpanQuery sq = (SpanQuery) query; if (sq.getField().equals(field)) { return (SpanQuery) query; } else { return getEmptySpanQuery(); } } else if (query instanceof BooleanQuery) { List<BooleanClause> queryClauses = ((BooleanQuery) query).clauses(); List<SpanQuery> spanQs = new ArrayList<SpanQuery>(); for (int i = 0; i < queryClauses.size(); i++) { if (!queryClauses.get(i).isProhibited()) { tryToAdd(field, convert(field, queryClauses.get(i).getQuery()), spanQs); } } return addBoost(buildSpanOr(spanQs), boost); } else if (query instanceof PhraseQuery) { PhraseQuery phraseQuery = ((PhraseQuery) query); Term[] phraseQueryTerms = phraseQuery.getTerms(); if (phraseQueryTerms.length == 0) { return getEmptySpanQuery(); } else if (!phraseQueryTerms[0].field().equals(field)) { return getEmptySpanQuery(); } SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length]; for (int i = 0; i < phraseQueryTerms.length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } int slop = phraseQuery.getSlop(); int[] positions = phraseQuery.getPositions(); // sum position increments (>1) and add to slop if (positions.length > 0) { int lastPos = positions[0]; int sz = positions.length; for (int i = 1; i < sz; i++) { int pos = positions[i]; int inc = pos - lastPos - 1; slop += inc; lastPos = pos; } } boolean inorder = false; if (phraseQuery.getSlop() == 0) { inorder = true; } SpanQuery sp = new SpanNearQuery(clauses, slop, inorder); if (query instanceof BoostQuery) { sp = new SpanBoostQuery(sp, ((BoostQuery) query).getBoost()); } return addBoost(sp, boost); } else if (query instanceof TermQuery) { TermQuery tq = (TermQuery) query; if (tq.getTerm().field().equals(field)) { return addBoost(new SpanTermQuery(tq.getTerm()), boost); } else { return getEmptySpanQuery(); } } else if (query instanceof ConstantScoreQuery) { return convert(field, ((ConstantScoreQuery) query).getQuery()); } else if (query instanceof DisjunctionMaxQuery) { List<SpanQuery> spanQs = new ArrayList<>(); for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) { tryToAdd(field, convert(field, iterator.next()), spanQs); } if (spanQs.size() == 0) { return getEmptySpanQuery(); } else if (spanQs.size() == 1) { return addBoost(spanQs.get(0), boost); } else { return addBoost(new SpanOrQuery(spanQs.toArray(new SpanQuery[spanQs.size()])), boost); } } else if (query instanceof MatchAllDocsQuery) { return getEmptySpanQuery(); } else if (query instanceof MultiPhraseQuery) { final MultiPhraseQuery mpq = (MultiPhraseQuery) query; final Term[][] termArrays = mpq.getTermArrays(); //test for empty or wrong field if (termArrays.length == 0) { return getEmptySpanQuery(); } else if (termArrays.length > 1) { Term[] ts = termArrays[0]; if (ts.length > 0) { Term t = ts[0]; if (!t.field().equals(field)) { return getEmptySpanQuery(); } } } final int[] positions = mpq.getPositions(); if (positions.length > 0) { int maxPosition = positions[positions.length - 1]; for (int i = 0; i < positions.length - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } @SuppressWarnings("unchecked") final List<SpanQuery>[] disjunctLists = new List[maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.length; ++i) { final Term[] termArray = termArrays[i]; List<SpanQuery> disjuncts = disjunctLists[positions[i]]; if (disjuncts == null) { disjuncts = (disjunctLists[positions[i]] = new ArrayList<SpanQuery>(termArray.length)); ++distinctPositions; } for (int j = 0; j < termArray.length; ++j) { disjuncts.add(new SpanTermQuery(termArray[j])); } } int positionGaps = 0; int position = 0; final SpanQuery[] clauses = new SpanQuery[distinctPositions]; for (int i = 0; i < disjunctLists.length; ++i) { List<SpanQuery> disjuncts = disjunctLists[i]; if (disjuncts != null) { if (disjuncts.size() == 1) { clauses[position++] = disjuncts.get(0); } else { clauses[position++] = new SpanOrQuery( disjuncts.toArray(new SpanQuery[disjuncts.size()])); } } else { ++positionGaps; } } final int slop = mpq.getSlop(); final boolean inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); return addBoost(sp, boost); } } else if (query instanceof MultiTermQuery) { MultiTermQuery tq = (MultiTermQuery) query; if (!tq.getField().equals(field)) { return getEmptySpanQuery(); } return addBoost(new SpanMultiTermQueryWrapper<>((MultiTermQuery) query), boost); } else if (query instanceof SynonymQuery) { SynonymQuery sq = (SynonymQuery) query; List<SpanQuery> spanQs = new ArrayList<>(); for (Term t : sq.getTerms()) { spanQs.add(new SpanTermQuery(t)); } return addBoost(buildSpanOr(spanQs), boost); } return convertUnknownQuery(field, queryToConvert); }