Example usage for org.apache.lucene.search MultiPhraseQuery getTermArrays

List of usage examples for org.apache.lucene.search MultiPhraseQuery getTermArrays

Introduction

In this page you can find the example usage for org.apache.lucene.search MultiPhraseQuery getTermArrays.

Prototype

public Term[][] getTermArrays() 

Source Link

Document

Returns the arrays of arrays of terms in the multi-phrase.

Usage

From source file:edu.mit.ll.vizlinc.highlight.WeightedSpanTermExtractor.java

License:Apache License

/**
 * Fills a <code>Map</code> with <@link WeightedSpanTerm>s using the terms from the supplied <code>Query</code>.
 * //from  ww  w.  j  a  v a2  s . com
 * @param query
 *          Query to extract Terms from
 * @param terms
 *          Map to place created WeightedSpanTerms in
 * @throws IOException
 */
private void extract(Query query, Map<String, WeightedSpanTerm> terms) throws IOException {
    if (query instanceof BooleanQuery) {
        BooleanClause[] queryClauses = ((BooleanQuery) query).getClauses();

        for (int i = 0; i < queryClauses.length; i++) {
            if (!queryClauses[i].isProhibited()) {
                extract(queryClauses[i].getQuery(), terms);
            }
        }
    } else if (query instanceof PhraseQuery) {
        PhraseQuery phraseQuery = ((PhraseQuery) query);
        Term[] phraseQueryTerms = phraseQuery.getTerms();
        SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
        for (int i = 0; i < phraseQueryTerms.length; i++) {
            clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
        }
        int slop = phraseQuery.getSlop();
        int[] positions = phraseQuery.getPositions();
        // add largest position increment to slop
        if (positions.length > 0) {
            int lastPos = positions[0];
            int largestInc = 0;
            int sz = positions.length;
            for (int i = 1; i < sz; i++) {
                int pos = positions[i];
                int inc = pos - lastPos;
                if (inc > largestInc) {
                    largestInc = inc;
                }
                lastPos = pos;
            }
            if (largestInc > 1) {
                slop += largestInc;
            }
        }

        boolean inorder = false;

        if (slop == 0) {
            inorder = true;
        }

        SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
        sp.setBoost(query.getBoost());
        extractWeightedSpanTerms(terms, sp);
    } else if (query instanceof TermQuery) {
        extractWeightedTerms(terms, query);
    } else if (query instanceof SpanQuery) {
        extractWeightedSpanTerms(terms, (SpanQuery) query);
    } else if (query instanceof FilteredQuery) {
        extract(((FilteredQuery) query).getQuery(), terms);
    } else if (query instanceof DisjunctionMaxQuery) {
        for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
            extract(iterator.next(), terms);
        }
    } else if (query instanceof MultiTermQuery && expandMultiTermQuery) {
        MultiTermQuery mtq = ((MultiTermQuery) query);
        if (mtq.getRewriteMethod() != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) {
            mtq = (MultiTermQuery) mtq.clone();
            mtq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
            query = mtq;
        }
        FakeReader fReader = new FakeReader();
        MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE.rewrite(fReader, mtq);
        if (fReader.field != null) {
            IndexReader ir = getReaderForField(fReader.field);
            extract(query.rewrite(ir), terms);
        }
    } else if (query instanceof MultiPhraseQuery) {
        final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
        final List<Term[]> termArrays = mpq.getTermArrays();
        final int[] positions = mpq.getPositions();
        if (positions.length > 0) {

            int maxPosition = positions[positions.length - 1];
            for (int i = 0; i < positions.length - 1; ++i) {
                if (positions[i] > maxPosition) {
                    maxPosition = positions[i];
                }
            }

            @SuppressWarnings("unchecked")
            final List<SpanQuery>[] disjunctLists = new List[maxPosition + 1];
            int distinctPositions = 0;

            for (int i = 0; i < termArrays.size(); ++i) {
                final Term[] termArray = termArrays.get(i);
                List<SpanQuery> disjuncts = disjunctLists[positions[i]];
                if (disjuncts == null) {
                    disjuncts = (disjunctLists[positions[i]] = new ArrayList<SpanQuery>(termArray.length));
                    ++distinctPositions;
                }
                for (int j = 0; j < termArray.length; ++j) {
                    disjuncts.add(new SpanTermQuery(termArray[j]));
                }
            }

            int positionGaps = 0;
            int position = 0;
            final SpanQuery[] clauses = new SpanQuery[distinctPositions];
            for (int i = 0; i < disjunctLists.length; ++i) {
                List<SpanQuery> disjuncts = disjunctLists[i];
                if (disjuncts != null) {
                    clauses[position++] = new SpanOrQuery(disjuncts.toArray(new SpanQuery[disjuncts.size()]));
                } else {
                    ++positionGaps;
                }
            }

            final int slop = mpq.getSlop();
            final boolean inorder = (slop == 0);

            SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
            sp.setBoost(query.getBoost());
            extractWeightedSpanTerms(terms, sp);
        }
    }
}

From source file:newseman.TestSemanticTaggerIndexing.java

License:Apache License

public void testSemanticTokenFilter() throws IOException, ParseException {
    String text = "velk svtov revoluce byla velk jnov revoluce "
            + "s velkou extra jnovou revoluc";

    Directory ramdir = new RAMDirectory();
    Analyzer analyzer = new TestSemanticAnalyzer();
    IndexWriter writer = new IndexWriter(ramdir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
    Document doc = new Document();
    Field field1 = newField("foo", text, TextField.TYPE_STORED);
    Field field2 = newField("foox", text, TextField.TYPE_STORED);

    doc.add(field1);// w w w.j av  a  2 s  . c  om
    doc.add(field2);
    writer.addDocument(doc);
    writer.close();

    IndexSearcher ram = new IndexSearcher(DirectoryReader.open(ramdir));
    QueryParser qp1 = new QueryParser(TEST_VERSION_CURRENT, "foo", analyzer);
    QueryParser qp2 = new QueryParser(TEST_VERSION_CURRENT, "foox", analyzer);

    TopDocs hits;

    hits = ram.search(qp1.parse("foo:XXX"), 10);
    assertTrue(hits.totalHits == 1);

    hits = ram.search(qp1.parse("foox:XXX"), 10);
    assertTrue(hits.totalHits == 0);

    // currently, each token is tokenized by the qparser
    // so we don't see them together
    Query q1 = qp1.parse("\"velk jnov revoluce\"");
    Query q2 = qp2.parse("\"velk jnov revoluce\"");

    assertTrue(!q1.equals(q2));

    assertTrue(q1 instanceof MultiPhraseQuery);
    assertTrue(q2 instanceof PhraseQuery);

    MultiPhraseQuery mq = (MultiPhraseQuery) q1;
    List<Term[]> ta = mq.getTermArrays();
    StringBuffer o = new StringBuffer();
    for (int i = 0; i < ta.size(); i++) {
        for (Term t : ta.get(i)) {
            o.append(t.toString());
            o.append(" ");
        }
        o.append("|");
    }
    assertTrue(o.toString()
            .equals("foo:velk foo:velk jnov revoluce foo:XXX |foo:jnov |foo:revoluce |"));

    assertTrue(q1.toString().equals("foo:\"(velk velk jnov revoluce XXX) jnov revoluce\""));
    assertTrue(q2.toString().equals("foox:\"velk jnov revoluce\""));

    Set<Term> terms = new HashSet<Term>();
    q1.extractTerms(terms);

    // extract only the 2nd (semantic) element
    q1 = qp1.parse("revoluce");

    terms = new HashSet<Term>();
    q1.extractTerms(terms);

    Term semQ = (Term) terms.toArray()[1];
    String sem = semQ.text();

    hits = ram.search(qp1.parse(sem), 10);
    assertTrue(hits.totalHits == 1);
    hits = ram.search(qp1.parse(semQ.toString()), 10);
    assertTrue(hits.totalHits == 1);
    hits = ram.search(qp2.parse(sem), 10);
    assertTrue(hits.totalHits == 0);

    ramdir.close();
}

From source file:org.alfresco.repo.search.impl.lucene.AbstractLuceneQueryParser.java

License:Open Source License

/**
 * /*w w w .  ja  v a  2 s .  c  o m*/
 * @param mpq MultiPhraseQuery
 * @return boolean
 */
private boolean exceedsTermCount(MultiPhraseQuery mpq) {
    int termCount = 0;
    for (Iterator<?> iter = mpq.getTermArrays().iterator(); iter.hasNext(); /**/) {
        Term[] arr = (Term[]) iter.next();
        termCount += arr.length;
        if (termCount > BooleanQuery.getMaxClauseCount()) {
            return true;
        }
    }
    return false;
}

From source file:org.apache.blur.utils.HighlightHelper.java

License:Apache License

private static Query setFieldIfNeeded(Query query, String name, String fieldLessFieldName) {
    if (name == null) {
        return query;
    }//from   www  . ja v a  2 s. c o  m
    if (query instanceof TermQuery) {
        TermQuery tq = (TermQuery) query;
        Term term = tq.getTerm();
        if (term.field().equals(fieldLessFieldName)) {
            return new TermQuery(new Term(name, term.bytes()));
        }
    } else if (query instanceof WildcardQuery) {
        WildcardQuery wq = (WildcardQuery) query;
        Term term = wq.getTerm();
        if (term.field().equals(fieldLessFieldName)) {
            return new WildcardQuery(new Term(name, term.bytes()));
        }
    } else if (query instanceof MultiPhraseQuery) {
        MultiPhraseQuery mpq = (MultiPhraseQuery) query;
        int[] positions = mpq.getPositions();
        List<Term[]> termArrays = mpq.getTermArrays();
        if (isTermField(termArrays, fieldLessFieldName)) {
            MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery();
            multiPhraseQuery.setSlop(mpq.getSlop());
            for (int i = 0; i < termArrays.size(); i++) {
                multiPhraseQuery.add(changeFields(termArrays.get(i), name), positions[i]);
            }
            return multiPhraseQuery;
        }
    } else if (query instanceof PhraseQuery) {
        PhraseQuery pq = (PhraseQuery) query;
        Term[] terms = pq.getTerms();
        int[] positions = pq.getPositions();
        String field = terms[0].field();
        if (field.equals(BlurConstants.SUPER)) {
            PhraseQuery phraseQuery = new PhraseQuery();
            for (int i = 0; i < terms.length; i++) {
                phraseQuery.add(new Term(name, terms[i].bytes()), positions[i]);
            }
            phraseQuery.setSlop(pq.getSlop());
            return phraseQuery;
        }
    } else if (query instanceof PrefixQuery) {
        PrefixQuery pq = (PrefixQuery) query;
        Term term = pq.getPrefix();
        if (term.field().equals(BlurConstants.SUPER)) {
            return new PrefixQuery(new Term(name, term.bytes()));
        }
    } else if (query instanceof TermRangeQuery) {
        TermRangeQuery trq = (TermRangeQuery) query;
        BytesRef lowerTerm = trq.getLowerTerm();
        BytesRef upperTerm = trq.getUpperTerm();
        boolean includeUpper = trq.includesUpper();
        boolean includeLower = trq.includesLower();
        String field = trq.getField();
        if (field.equals(BlurConstants.SUPER)) {
            return new TermRangeQuery(name, lowerTerm, upperTerm, includeLower, includeUpper);
        }
    }
    return query;
}

From source file:org.tallison.lucene.queryparser.spans.SpanQueryParserBase.java

License:Apache License

/**
 * Converts {@link #newFieldQuery(String, String, boolean, int)} to something
 * as close as possible to a SpanQuery./*from   w  w  w.  j a v  a 2s. c o m*/
 * <p>
 * Can return null, e.g. if asked to create newFieldSpanQuery from a stop word.
 * @param fieldName field for query
 * @param termText text for term
 * @param quoted whether or not this is quoted
 * @return a SpanQuery that is as close as possible to the Query created by
 *  {@link #newFieldQuery(String, String, boolean, int)}
 * @throws ParseException if encountered during parse
 */
protected SpanQuery newFieldSpanQuery(String fieldName, String termText, boolean quoted) throws ParseException {

    Analyzer analyzer = getAnalyzer(fieldName);

    if (analyzer == null) {
        throw new ParseException("Need to have non-null analyzer for term queries within a 'near' clause.");
    }
    Query q = newFieldQuery(fieldName, termText, quoted, 0);
    if (q == null) {
        return null;
    }
    //now convert to a SpanQuery
    if (q instanceof TermQuery) {
        SpanTermQuery stq = new SpanTermQuery(((TermQuery) q).getTerm());
        return stq;
    } else if (q instanceof BooleanQuery) {
        //TODO: there are dragons here.  convertBooleanOfBooleanOrTermsToSpan
        //ignores the operators inside of the BooleanQuery
        //and just treats this as big "OR" for now
        return convertBooleanOfBooleanOrTermsToSpan((BooleanQuery) q);
    } else if (q instanceof PhraseQuery) {
        PhraseQuery pq = (PhraseQuery) q;
        Term[] terms = pq.getTerms();
        int[] positions = pq.getPositions();
        List<SpanQuery> spanTerms = new LinkedList<>();
        for (Term t : terms) {
            spanTerms.add(new SpanTermQuery(t));
        }
        int slop = positions[positions.length - 1] - (positions.length - 1);
        return buildSpanNearQuery(spanTerms, slop, true);
    } else if (q instanceof MultiPhraseQuery) {
        MultiPhraseQuery mpq = (MultiPhraseQuery) q;
        int[] positions = mpq.getPositions();
        Term[][] terms = mpq.getTermArrays();
        List<SpanQuery> spanTerms = new LinkedList<>();
        for (Term[] tArr : terms) {
            List<SpanQuery> spans = new LinkedList<>();
            for (Term t : tArr) {
                spans.add(new SpanTermQuery(t));
            }
            SpanQuery spanOr = buildSpanOrQuery(spans);
            spanTerms.add(spanOr);
        }
        int slop = positions[positions.length - 1] - positions.length;
        return buildSpanNearQuery(spanTerms, slop, true);
    } else if (q instanceof SynonymQuery) {
        SynonymQuery synonymQuery = (SynonymQuery) q;
        if (synonymQuery.getTerms().size() == 0) {
            return new SpanOrQuery();
        } else if (synonymQuery.getTerms().size() == 1) {
            return new SpanTermQuery(synonymQuery.getTerms().get(0));
        }
        SpanQuery[] clauses = new SpanQuery[((SynonymQuery) q).getTerms().size()];
        int i = 0;
        for (Term t : ((SynonymQuery) q).getTerms()) {
            clauses[i++] = new SpanTermQuery(t);
        }
        return new SpanOrQuery(clauses);
    }
    throw new IllegalArgumentException("Can't convert class >" + q.getClass() + "< to a SpanQuery");
}

From source file:org.tallison.lucene.queryparser.spans.SQPTestBase.java

License:Apache License

/**
 * THIS IS AN UNHOLY ABOMINATION: this exists here and in LUCENE-5317 and
 * in the highlighter package.  Please, please forgive me.
 *
 * We need to factor this out into a standalone helper class until
 * the difference between Query and SpanQueries disappears.
 *
 * Converts a regular query to a {@link org.apache.lucene.search.spans.SpanQuery} for use in a highlighter.
 * Because of subtle differences in {@link org.apache.lucene.search.spans.SpanQuery} and {@link org.apache.lucene.search.Query}, this
 * {@link org.apache.lucene.search.spans.SpanQuery} will not necessarily return the same documents as the
 * initial Query. For example, the generated SpanQuery will not include
 * clauses of type BooleanClause.Occur.MUST_NOT. Also, the
 * {@link org.apache.lucene.search.spans.SpanQuery} will only cover a single field, whereas the {@link org.apache.lucene.search.Query}
 * might contain multiple fields.//w ww.jav  a2s .  c  o m
 * <p>
 * Returns an empty SpanQuery if the {@link org.apache.lucene.search.Query} is a class that
 * is handled, but for some reason can't be converted from a {@link org.apache.lucene.search.Query} to a
 * {@link org.apache.lucene.search.spans.SpanQuery}. This can happen for many reasons: e.g. if the Query
 * contains no terms in the requested "field" or the Query is a MatchAllDocsQuery.
 * <p>
 * Throws IllegalArgumentException if the Query is a class that is
 * is not yet handled.
 * <p>
 * This class does not rewrite the SpanQuery before returning it.
 * Clients are required to rewrite if necessary.
 * <p>
 * Much of this code is copied directly from
 * oal.search.highlight.WeightedSpanTermExtractor. There are some subtle
 * differences.
 *
 * @param field single field to extract SpanQueries for
 * @param query query to convert
 * @return SpanQuery for use in highlighting; can return empty SpanQuery
 * @throws java.io.IOException for an underlying
 * IOException in the IndexReader or an IllegalArgumentException if the query type is not recognized
 */
public SpanQuery convert(String field, Query query) throws IOException {
    /*
     * copied nearly verbatim from
     * org.apache.lucene.search.highlight.WeightedSpanTermExtractor
     * TODO:refactor to avoid duplication of code if possible.
     * Beware: there are some subtle differences.
     */
    if (query instanceof SpanQuery) {
        SpanQuery sq = (SpanQuery) query;
        if (sq.getField() != null && sq.getField().equals(field)) {
            return (SpanQuery) query;
        } else {
            return getEmptySpanQuery();
        }
    } else if (query instanceof BooleanQuery) {
        List<BooleanClause> queryClauses = ((BooleanQuery) query).clauses();
        List<SpanQuery> spanQs = new ArrayList<SpanQuery>();
        for (int i = 0; i < queryClauses.size(); i++) {
            if (!queryClauses.get(i).isProhibited()) {
                tryToAdd(field, convert(field, queryClauses.get(i).getQuery()), spanQs);
            }
        }
        if (spanQs.size() == 0) {
            return getEmptySpanQuery();
        } else if (spanQs.size() == 1) {
            return spanQs.get(0);
        } else {
            return new SpanOrQuery(spanQs.toArray(new SpanQuery[spanQs.size()]));
        }
    } else if (query instanceof PhraseQuery) {
        PhraseQuery phraseQuery = ((PhraseQuery) query);

        Term[] phraseQueryTerms = phraseQuery.getTerms();
        if (phraseQueryTerms.length == 0) {
            return getEmptySpanQuery();
        } else if (!phraseQueryTerms[0].field().equals(field)) {
            return getEmptySpanQuery();
        }
        SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
        for (int i = 0; i < phraseQueryTerms.length; i++) {
            clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
        }
        int slop = phraseQuery.getSlop();
        int[] positions = phraseQuery.getPositions();
        // sum  position increments (>1) and add to slop
        if (positions.length > 0) {
            int lastPos = positions[0];
            int sz = positions.length;
            for (int i = 1; i < sz; i++) {
                int pos = positions[i];
                int inc = pos - lastPos - 1;
                slop += inc;
                lastPos = pos;
            }
        }

        boolean inorder = false;

        if (phraseQuery.getSlop() == 0) {
            inorder = true;
        }

        SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
        if (query instanceof BoostQuery) {
            return new SpanBoostQuery(sp, ((BoostQuery) query).getBoost());
        } else {
            return sp;
        }
    } else if (query instanceof TermQuery) {
        TermQuery tq = (TermQuery) query;
        if (tq.getTerm().field().equals(field)) {
            return new SpanTermQuery(tq.getTerm());
        } else {
            return getEmptySpanQuery();
        }
    } else if (query instanceof ConstantScoreQuery) {
        return convert(field, ((ConstantScoreQuery) query).getQuery());
    } else if (query instanceof DisjunctionMaxQuery) {
        List<SpanQuery> spanQs = new ArrayList<SpanQuery>();
        for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
            tryToAdd(field, convert(field, iterator.next()), spanQs);
        }
        if (spanQs.size() == 0) {
            return getEmptySpanQuery();
        } else if (spanQs.size() == 1) {
            return spanQs.get(0);
        } else {
            return new SpanOrQuery(spanQs.toArray(new SpanQuery[spanQs.size()]));
        }
    } else if (query instanceof MatchAllDocsQuery) {
        return getEmptySpanQuery();
    } else if (query instanceof MultiPhraseQuery) {

        final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
        final Term[][] termArrays = mpq.getTermArrays();
        //test for empty or wrong field
        if (termArrays.length == 0) {
            return getEmptySpanQuery();
        } else if (termArrays.length > 1) {
            Term[] ts = termArrays[0];
            if (ts.length > 0) {
                Term t = ts[0];
                if (!t.field().equals(field)) {
                    return getEmptySpanQuery();
                }
            }
        }
        final int[] positions = mpq.getPositions();
        if (positions.length > 0) {

            int maxPosition = positions[positions.length - 1];
            for (int i = 0; i < positions.length - 1; ++i) {
                if (positions[i] > maxPosition) {
                    maxPosition = positions[i];
                }
            }

            @SuppressWarnings("unchecked")
            final List<SpanQuery>[] disjunctLists = new List[maxPosition + 1];
            int distinctPositions = 0;

            for (int i = 0; i < termArrays.length; ++i) {
                final Term[] termArray = termArrays[i];
                List<SpanQuery> disjuncts = disjunctLists[positions[i]];
                if (disjuncts == null) {
                    disjuncts = (disjunctLists[positions[i]] = new ArrayList<SpanQuery>(termArray.length));
                    ++distinctPositions;
                }
                for (int j = 0; j < termArray.length; ++j) {
                    disjuncts.add(new SpanTermQuery(termArray[j]));
                }
            }

            int positionGaps = 0;
            int position = 0;
            final SpanQuery[] clauses = new SpanQuery[distinctPositions];
            for (int i = 0; i < disjunctLists.length; ++i) {
                List<SpanQuery> disjuncts = disjunctLists[i];
                if (disjuncts != null) {
                    if (disjuncts.size() == 1) {
                        clauses[position++] = disjuncts.get(0);
                    } else {
                        clauses[position++] = new SpanOrQuery(
                                disjuncts.toArray(new SpanQuery[disjuncts.size()]));
                    }
                } else {
                    ++positionGaps;
                }
            }

            final int slop = mpq.getSlop();
            final boolean inorder = (slop == 0);

            SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
            if (query instanceof BoostQuery) {
                return new SpanBoostQuery(sp, ((BoostQuery) query).getBoost());
            } else {
                return sp;
            }
        }

    } else if (query instanceof MultiTermQuery) {
        return new SpanMultiTermQueryWrapper<>((MultiTermQuery) query);
    } else if (query instanceof SynonymQuery) {
        List<SpanQuery> clauses = new ArrayList<>();
        for (Term term : ((SynonymQuery) query).getTerms()) {
            clauses.add(new SpanTermQuery(term));
        }
        return new SpanOrQuery(clauses.toArray(new SpanQuery[clauses.size()]));
    }
    throw new IllegalArgumentException("Can't convert query of type: " + query.getClass());
}

From source file:org.tallison.lucene.search.spans.SimpleSpanQueryConverter.java

License:Apache License

/**
 * Converts a regular query to a {@link org.apache.lucene.search.spans.SpanQuery} for use in a highlighter.
 * Because of subtle differences in {@link org.apache.lucene.search.spans.SpanQuery} and {@link org.apache.lucene.search.Query}, this
 * {@link org.apache.lucene.search.spans.SpanQuery} will not necessarily return the same documents as the
 * initial Query. For example, the generated SpanQuery will not include
 * clauses of type BooleanClause.Occur.MUST_NOT. Also, the
 * {@link org.apache.lucene.search.spans.SpanQuery} will only cover a single field, whereas the {@link org.apache.lucene.search.Query}
 * might contain multiple fields.// w  ww.  j  av a  2  s. c om
 * <p>
 * Returns an empty SpanQuery if the {@link org.apache.lucene.search.Query} is a class that
 * is handled, but for some reason can't be converted from a {@link org.apache.lucene.search.Query} to a
 * {@link org.apache.lucene.search.spans.SpanQuery}. This can happen for many reasons: e.g. if the Query
 * contains no terms in the requested "field" or the Query is a MatchAllDocsQuery.
 * <p>
 * Throws IllegalArgumentException if the Query is a class that is
 * is not yet handled.
 * <p>
 * This class does not rewrite the SpanQuery before returning it.
 * Clients are required to rewrite if necessary.
 * <p>
 * Much of this code is copied directly from
 * oal.search.highlight.WeightedSpanTermExtractor. There are some subtle
 * differences.
 * <p>
 * Throws IllegalArgumentException for unknown query types.
 *
 * @param field single field to extract SpanQueries for
 * @param queryToConvert query to convert
 * @return SpanQuery for use in highlighting; can return empty SpanQuery
 * @throws java.io.IOException if encountered during parse
 */
public SpanQuery convert(String field, Query queryToConvert) throws IOException {

    Float boost = null;
    Query query = queryToConvert;
    if (queryToConvert instanceof BoostQuery) {
        query = ((BoostQuery) query).getQuery();
        boost = ((BoostQuery) queryToConvert).getBoost();
    }
    /*
     * copied nearly verbatim from
     * org.apache.lucene.search.highlight.WeightedSpanTermExtractor 
     * TODO:refactor to avoid duplication of code if possible. 
     * Beware: there are some subtle differences.
     */
    if (query instanceof SpanQuery) {
        SpanQuery sq = (SpanQuery) query;
        if (sq.getField().equals(field)) {
            return (SpanQuery) query;
        } else {
            return getEmptySpanQuery();
        }
    } else if (query instanceof BooleanQuery) {
        List<BooleanClause> queryClauses = ((BooleanQuery) query).clauses();
        List<SpanQuery> spanQs = new ArrayList<SpanQuery>();
        for (int i = 0; i < queryClauses.size(); i++) {
            if (!queryClauses.get(i).isProhibited()) {
                tryToAdd(field, convert(field, queryClauses.get(i).getQuery()), spanQs);
            }
        }
        return addBoost(buildSpanOr(spanQs), boost);
    } else if (query instanceof PhraseQuery) {
        PhraseQuery phraseQuery = ((PhraseQuery) query);

        Term[] phraseQueryTerms = phraseQuery.getTerms();
        if (phraseQueryTerms.length == 0) {
            return getEmptySpanQuery();
        } else if (!phraseQueryTerms[0].field().equals(field)) {
            return getEmptySpanQuery();
        }
        SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
        for (int i = 0; i < phraseQueryTerms.length; i++) {
            clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
        }
        int slop = phraseQuery.getSlop();
        int[] positions = phraseQuery.getPositions();
        // sum  position increments (>1) and add to slop
        if (positions.length > 0) {
            int lastPos = positions[0];
            int sz = positions.length;
            for (int i = 1; i < sz; i++) {
                int pos = positions[i];
                int inc = pos - lastPos - 1;
                slop += inc;
                lastPos = pos;
            }
        }

        boolean inorder = false;

        if (phraseQuery.getSlop() == 0) {
            inorder = true;
        }

        SpanQuery sp = new SpanNearQuery(clauses, slop, inorder);
        if (query instanceof BoostQuery) {
            sp = new SpanBoostQuery(sp, ((BoostQuery) query).getBoost());
        }
        return addBoost(sp, boost);
    } else if (query instanceof TermQuery) {
        TermQuery tq = (TermQuery) query;
        if (tq.getTerm().field().equals(field)) {
            return addBoost(new SpanTermQuery(tq.getTerm()), boost);
        } else {
            return getEmptySpanQuery();
        }
    } else if (query instanceof ConstantScoreQuery) {
        return convert(field, ((ConstantScoreQuery) query).getQuery());
    } else if (query instanceof DisjunctionMaxQuery) {
        List<SpanQuery> spanQs = new ArrayList<>();
        for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
            tryToAdd(field, convert(field, iterator.next()), spanQs);
        }
        if (spanQs.size() == 0) {
            return getEmptySpanQuery();
        } else if (spanQs.size() == 1) {
            return addBoost(spanQs.get(0), boost);
        } else {
            return addBoost(new SpanOrQuery(spanQs.toArray(new SpanQuery[spanQs.size()])), boost);
        }
    } else if (query instanceof MatchAllDocsQuery) {
        return getEmptySpanQuery();
    } else if (query instanceof MultiPhraseQuery) {

        final MultiPhraseQuery mpq = (MultiPhraseQuery) query;

        final Term[][] termArrays = mpq.getTermArrays();
        //test for empty or wrong field
        if (termArrays.length == 0) {
            return getEmptySpanQuery();
        } else if (termArrays.length > 1) {
            Term[] ts = termArrays[0];
            if (ts.length > 0) {
                Term t = ts[0];
                if (!t.field().equals(field)) {
                    return getEmptySpanQuery();
                }
            }
        }
        final int[] positions = mpq.getPositions();
        if (positions.length > 0) {

            int maxPosition = positions[positions.length - 1];
            for (int i = 0; i < positions.length - 1; ++i) {
                if (positions[i] > maxPosition) {
                    maxPosition = positions[i];
                }
            }

            @SuppressWarnings("unchecked")
            final List<SpanQuery>[] disjunctLists = new List[maxPosition + 1];
            int distinctPositions = 0;

            for (int i = 0; i < termArrays.length; ++i) {
                final Term[] termArray = termArrays[i];
                List<SpanQuery> disjuncts = disjunctLists[positions[i]];
                if (disjuncts == null) {
                    disjuncts = (disjunctLists[positions[i]] = new ArrayList<SpanQuery>(termArray.length));
                    ++distinctPositions;
                }
                for (int j = 0; j < termArray.length; ++j) {
                    disjuncts.add(new SpanTermQuery(termArray[j]));
                }
            }

            int positionGaps = 0;
            int position = 0;
            final SpanQuery[] clauses = new SpanQuery[distinctPositions];
            for (int i = 0; i < disjunctLists.length; ++i) {
                List<SpanQuery> disjuncts = disjunctLists[i];
                if (disjuncts != null) {
                    if (disjuncts.size() == 1) {
                        clauses[position++] = disjuncts.get(0);
                    } else {
                        clauses[position++] = new SpanOrQuery(
                                disjuncts.toArray(new SpanQuery[disjuncts.size()]));
                    }
                } else {
                    ++positionGaps;
                }
            }

            final int slop = mpq.getSlop();
            final boolean inorder = (slop == 0);

            SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
            return addBoost(sp, boost);
        }
    } else if (query instanceof MultiTermQuery) {
        MultiTermQuery tq = (MultiTermQuery) query;
        if (!tq.getField().equals(field)) {
            return getEmptySpanQuery();
        }
        return addBoost(new SpanMultiTermQueryWrapper<>((MultiTermQuery) query), boost);
    } else if (query instanceof SynonymQuery) {
        SynonymQuery sq = (SynonymQuery) query;
        List<SpanQuery> spanQs = new ArrayList<>();
        for (Term t : sq.getTerms()) {
            spanQs.add(new SpanTermQuery(t));
        }
        return addBoost(buildSpanOr(spanQs), boost);
    }
    return convertUnknownQuery(field, queryToConvert);
}