org.weborganic.flint.util.Queries.java Source code

Java tutorial

Introduction

Here is the source code for org.weborganic.flint.util.Queries.java

Source

/*
 * This file is part of the Flint library.
 *
 * For licensing information please see the file license.txt included in the release.
 * A copy of this licence can also be found at
 *   http://www.opensource.org/licenses/artistic-license-2.0.php
 */
package org.weborganic.flint.util;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;

/**
 * A set of utility methods related to query objects in Lucene.
 *
 * @author  Christophe Lauret (Weborganic)
 * @version 13 August 2010
 */
public final class Queries {

    /**
     * Text that matches this pattern is considered a phrase.
     */
    private static final Pattern IS_A_PHRASE = Pattern.compile("\\\"[^\\\"]+\\\"");

    /**
     * Prevents creation of instances.
     */
    private Queries() {
    }

    /**
     * Returns the term or phrase query corresponding to the specified text.
     *
     * <p>If the text is surrounded by double quotes, this method will
     * return a {@link PhraseQuery} otherwise, it will return a simple {@link TermQuery}.
     *
     * <p>Note: Quotation marks are thrown away.
     *
     * @param field the field to construct the terms.
     * @param text  the text to construct the query from.
     * @return the corresponding query.
     */
    @Beta
    public static Query toTermOrPhraseQuery(String field, String text) {
        if (field == null)
            throw new NullPointerException("field");
        if (text == null)
            throw new NullPointerException("text");
        boolean isPhrase = IS_A_PHRASE.matcher(text).matches();
        if (isPhrase) {
            PhraseQuery phrase = new PhraseQuery();
            String[] terms = text.substring(1, text.length() - 1).split("\\s+");
            for (String t : terms) {
                phrase.add(new Term(field, t));
            }
            return phrase;
        } else {
            return new TermQuery(new Term(field, text));
        }
    }

    /**
     * Returns the term or phrase query corresponding to the specified text.
     *
     * <p>If the text is surrounded by double quotes, this method will
     * return a {@link PhraseQuery} otherwise, it will return a simple {@link TermQuery}.
     *
     * <p>Note: Quotation marks are thrown away.
     *
     * @param field the field to construct the terms.
     * @param text  the text to construct the query from.
     *
     * @return the corresponding query.
     */
    @Beta
    public static List<Query> toTermOrPhraseQueries(String field, String text, Analyzer analyzer) {
        if (field == null)
            throw new NullPointerException("field");
        if (text == null)
            throw new NullPointerException("text");
        boolean isPhrase = IS_A_PHRASE.matcher(text).matches();
        if (isPhrase) {
            PhraseQuery phrase = new PhraseQuery();
            addTermsToPhrase(field, text.substring(1, text.length() - 1), analyzer, phrase);
            return Collections.singletonList((Query) phrase);
        } else {
            List<Query> q = new ArrayList<Query>();
            for (String t : Fields.toTerms(field, text, analyzer)) {
                q.add(new TermQuery(new Term(field, t)));
            }
            return q;
        }
    }

    /**
     * Returns the terms for a field
     *
     * @param field    The field
     * @param text     The text to analyze
     * @param analyzer The analyzer
     *
     * @return the corresponding list of terms produced by the analyzer.
     *
     * @throws IOException
     */
    private static void addTermsToPhrase(String field, String text, Analyzer analyzer, PhraseQuery phrase) {
        StringReader r = new StringReader(text);
        TokenStream stream = analyzer.tokenStream(field, r);
        PositionIncrementAttribute increment = stream.addAttribute(PositionIncrementAttribute.class);
        TermAttribute attribute = stream.addAttribute(TermAttribute.class);
        try {
            int position = -1;
            stream.reset();
            while (stream.incrementToken()) {
                position += increment.getPositionIncrement();
                Term term = new Term(field, attribute.term());
                phrase.add(term, position);
            }
        } catch (IOException ex) {
            // Should not occur since we use a StringReader
            ex.printStackTrace();
        }
    }

    /**
     * Returns a boolean query combining all the specified queries in {@link Occur#MUST} clauses
     * as it is were an AND operator.
     *
     * @param queries the queries to combine with an AND.
     * @return The combined queries.
     */
    public static Query and(Query... queries) {
        BooleanQuery query = new BooleanQuery();
        for (Query q : queries) {
            query.add(q, Occur.MUST);
        }
        return query;
    }

    /**
     * Returns a boolean query combining all the specified queries in {@link Occur#MUST} clauses
     * as it is were an OR operator.
     *
     * @param queries the queries to combine with an OR.
     * @return The combined queries.
     */
    public static Query or(Query... queries) {
        BooleanQuery query = new BooleanQuery();
        for (Query q : queries) {
            query.add(q, Occur.SHOULD);
        }
        return query;
    }

    /**
     * Returns the list of similar queries by substituting one term only in the query.
     *
     * @param query  The original query
     * @param reader A reader to extract the similar terms.
     *
     * @return A list of similar queries to the specified one.
     *
     * @throws IOException If thrown by the reader while extracting fuzzy terms.
     */
    @Beta
    public static List<Query> similar(Query query, IndexReader reader) throws IOException {
        List<Query> similar = new ArrayList<Query>();
        // Extract the list of similar terms
        Set<Term> terms = new HashSet<Term>();
        query.extractTerms(terms);
        for (Term t : terms) {
            List<Term> fuzzy = Terms.fuzzy(reader, t);
            for (Term f : fuzzy) {
                Query sq = substitute(query, t, f);
                similar.add(sq);
            }
        }
        return similar;
    }

    // Substitutions
    // ==============================================================================================

    /**
     * Substitutes one term in the query for another.
     *
     * <p>This method only creates new query object if required; it does not modify the given query.
     *
     * <p>This method simply delegates to the appropriate <code>substitute</code> method based
     * on the query class. Only query types for which there is an applicable <code>substitute</code>
     * method can be substituted.
     *
     * @param query       the query where the substitution should occur.
     * @param original    the original term to replace.
     * @param replacement the term it should be replaced with.
     *
     * @return A new query where the term has been substituted;
     *         or the same query if no substitution was required or possible.
     */
    @Beta
    public static Query substitute(Query query, Term original, Term replacement) {
        if (query instanceof TermQuery) {
            return substitute((TermQuery) query, original, replacement);
        } else if (query instanceof PhraseQuery) {
            return substitute((PhraseQuery) query, original, replacement);
        } else if (query instanceof BooleanQuery) {
            return substitute((BooleanQuery) query, original, replacement);
        } else {
            return query;
        }
    }

    /**
     * Substitutes one term in the term query for another.
     *
     * <p>This method only creates new query object if required; it does not modify the given query.
     *
     * @param query       the query where the substitution should occur.
     * @param original    the original term to replace.
     * @param replacement the term it should be replaced with.
     *
     * @return A new term query where the term has been substituted;
     *         or the same query if no substitution was needed.
     */
    @Beta
    public static Query substitute(BooleanQuery query, Term original, Term replacement) {
        BooleanQuery q = new BooleanQuery();
        for (BooleanClause clause : query.getClauses()) {
            Query qx = substitute(clause.getQuery(), original, replacement);
            q.add(qx, clause.getOccur());
        }
        q.setBoost(query.getBoost());
        return q;
    }

    /**
     * Substitutes one term in the term query for another.
     *
     * <p>This method only creates new query object if required; it does not modify the given query.
     *
     * @param query       the query where the substitution should occur.
     * @param original    the original term to replace.
     * @param replacement the term it should be replaced with.
     *
     * @return A new term query where the term has been substituted;
     *         or the same query if no substitution was needed.
     */
    @Beta
    public static TermQuery substitute(TermQuery query, Term original, Term replacement) {
        Term t = query.getTerm();
        if (t.equals(original)) {
            return new TermQuery(replacement);
        } else {
            return query;
        }
    }

    /**
     * Substitutes one term in the phrase query for another.
     *
     * <p>In a phrase query the replacement term must be on the same field as the original term.
     *
     * <p>This method only creates new query object if required; it does not modify the given query.
     *
     * @param query       the query where the substitution should occur.
     * @param original    the original term to replace.
     * @param replacement the term it should be replaced with.
     *
     * @return A new term query where the term has been substituted;
     *         or the same query if no substitution was needed.
     *
     * @throws IllegalArgumentException if the replacement term is not on the same field as the original term.
     */
    @Beta
    public static PhraseQuery substitute(PhraseQuery query, Term original, Term replacement)
            throws IllegalArgumentException {
        boolean doSubstitute = false;
        // Check if we need to substitute
        for (Term t : query.getTerms()) {
            if (t.equals(original))
                doSubstitute = true;
        }
        // Substitute if required
        if (doSubstitute) {
            PhraseQuery q = new PhraseQuery();
            for (Term t : query.getTerms()) {
                q.add(t.equals(original) ? replacement : t);
            }
            q.setSlop(query.getSlop());
            q.setBoost(query.getBoost());
            return q;
            // No substitution return the query
        } else
            return query;
    }

}