org.allenai.blacklab.queryParser.lucene.QueryParserBase.java Source code

Java tutorial

Introduction

Here is the source code for org.allenai.blacklab.queryParser.lucene.QueryParserBase.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 * Adapted from Lucene 4.2 version to produce TextPattern instead of Query.
 *
 * Changed (blocks of) lines are marked with "// BL", except for lines where
 * the only change is to replace "Query" with "TextPattern".
 */

package org.allenai.blacklab.queryParser.lucene; // BL: changed package

import java.io.IOException;
import java.io.StringReader;
import java.text.DateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.TimeZone;

import org.allenai.blacklab.queryParser.lucene.helpers.TPMatchAllDocs;
import org.allenai.blacklab.queryParser.lucene.helpers.TPMultiPhrase;
import org.allenai.blacklab.queryParser.lucene.helpers.TPPhrase;
import org.allenai.blacklab.queryParser.lucene.helpers.TPTermRange;
import org.allenai.blacklab.search.TPBooleanClause;
import org.allenai.blacklab.search.TextPattern;
import org.allenai.blacklab.search.TextPatternBoolean;
import org.allenai.blacklab.search.TextPatternFuzzy;
import org.allenai.blacklab.search.TextPatternPrefix;
import org.allenai.blacklab.search.TextPatternRegex;
import org.allenai.blacklab.search.TextPatternTerm;
import org.allenai.blacklab.search.TextPatternWildcard;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser.Operator;
import org.apache.lucene.queryparser.flexible.standard.CommonQueryParserConfiguration;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BooleanQuery.TooManyClauses;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
// BL imports

/** This class is overridden by QueryParser in QueryParser.jj
 * and acts to separate the majority of the Java code from the .jj grammar file.
 */
@SuppressWarnings({ "all" }) // BL: "resource" because of possible resource leak
public abstract class QueryParserBase implements CommonQueryParserConfiguration {

    /** Do not catch this exception in your code, it means you are using methods that you should no longer use. */
    public static class MethodRemovedUseAnother extends Throwable {
    }

    static final int CONJ_NONE = 0;
    static final int CONJ_AND = 1;
    static final int CONJ_OR = 2;

    static final int MOD_NONE = 0;
    static final int MOD_NOT = 10;
    static final int MOD_REQ = 11;

    // make it possible to call setDefaultOperator() without accessing
    // the nested class:
    /** Alternative form of QueryParser.Operator.AND */
    public static final Operator AND_OPERATOR = Operator.AND;
    /** Alternative form of QueryParser.Operator.OR */
    public static final Operator OR_OPERATOR = Operator.OR;

    /** The actual operator that parser uses to combine query terms */
    Operator operator = OR_OPERATOR;

    boolean lowercaseExpandedTerms = true;
    MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
    boolean allowLeadingWildcard = false;
    boolean enablePositionIncrements = true;

    Analyzer analyzer;
    String field;
    int phraseSlop = 0;
    float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
    int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
    Locale locale = Locale.getDefault();
    TimeZone timeZone = TimeZone.getDefault();

    // the default date resolution
    DateTools.Resolution dateResolution = null;
    // maps field names to date resolutions
    Map<String, DateTools.Resolution> fieldToDateResolution = null;

    //Whether or not to analyze range terms when constructing RangeQuerys
    // (For example, analyzing terms into collation keys for locale-sensitive RangeQuery)
    boolean analyzeRangeTerms = false;

    boolean autoGeneratePhraseQueries;

    // So the generated QueryParser(CharStream) won't error out
    protected QueryParserBase() {
    }

    /** Initializes a query parser.  Called by the QueryParser constructor
     *  @param matchVersion  Lucene version to match. See <a href="QueryParser.html#version">here</a>.
     *  @param f  the default field for query terms.
     *  @param a   used to find terms in the query text.
     */
    public void init(Version matchVersion, String f, Analyzer a) {
        analyzer = a;
        field = f;
        if (matchVersion.onOrAfter(Version.LUCENE_31)) {
            setAutoGeneratePhraseQueries(false);
        } else {
            setAutoGeneratePhraseQueries(true);
        }
    }

    // the generated parser will create these in QueryParser
    public abstract void ReInit(CharStream stream);

    public abstract TextPattern TopLevelQuery(String field) throws ParseException;

    /** Parses a query string, returning a {@link TextPattern}.
     *  @param query  the query string to be parsed.
     *  @throws ParseException if the parsing fails
     */
    public TextPattern parse(String query) throws ParseException {
        ReInit(new FastCharStream(new StringReader(query)));
        try {
            // TopLevelQuery is a Query followed by the end-of-input (EOF)
            TextPattern res = TopLevelQuery(field);
            return res != null ? res : newBooleanQuery(false);
        } catch (ParseException tme) {
            // rethrow to include the original query:
            ParseException e = new ParseException("Cannot parse '" + query + "': " + tme.getMessage());
            e.initCause(tme);
            throw e;
        } catch (TokenMgrError tme) {
            ParseException e = new ParseException("Cannot parse '" + query + "': " + tme.getMessage());
            e.initCause(tme);
            throw e;
        } catch (BooleanQuery.TooManyClauses tmc) {
            ParseException e = new ParseException("Cannot parse '" + query + "': too many boolean clauses");
            e.initCause(tmc);
            throw e;
        }
    }

    /**
    * @return Returns the analyzer.
    */
    @Override
    public Analyzer getAnalyzer() {
        return analyzer;
    }

    /**
     * @return Returns the default field.
     */
    public String getField() {
        return field;
    }

    /**
     * @see #setAutoGeneratePhraseQueries(boolean)
     */
    public final boolean getAutoGeneratePhraseQueries() {
        return autoGeneratePhraseQueries;
    }

    /**
     * Set to true if phrase queries will be automatically generated
     * when the analyzer returns more than one term from whitespace
     * delimited text.
     * NOTE: this behavior may not be suitable for all languages.
     * <p>
     * Set to false if phrase queries should only be generated when
     * surrounded by double quotes.
     */
    public final void setAutoGeneratePhraseQueries(boolean value) {
        this.autoGeneratePhraseQueries = value;
    }

    /**
    * Get the minimal similarity for fuzzy queries.
    */
    @Override
    public float getFuzzyMinSim() {
        return fuzzyMinSim;
    }

    /**
     * Set the minimum similarity for fuzzy queries.
     * Default is 2f.
     */
    @Override
    public void setFuzzyMinSim(float fuzzyMinSim) {
        this.fuzzyMinSim = fuzzyMinSim;
    }

    /**
    * Get the prefix length for fuzzy queries.
    * @return Returns the fuzzyPrefixLength.
    */
    @Override
    public int getFuzzyPrefixLength() {
        return fuzzyPrefixLength;
    }

    /**
     * Set the prefix length for fuzzy queries. Default is 0.
     * @param fuzzyPrefixLength The fuzzyPrefixLength to set.
     */
    @Override
    public void setFuzzyPrefixLength(int fuzzyPrefixLength) {
        this.fuzzyPrefixLength = fuzzyPrefixLength;
    }

    /**
     * Sets the default slop for phrases.  If zero, then exact phrase matches
     * are required.  Default value is zero.
     */
    @Override
    public void setPhraseSlop(int phraseSlop) {
        this.phraseSlop = phraseSlop;
    }

    /**
     * Gets the default slop for phrases.
     */
    @Override
    public int getPhraseSlop() {
        return phraseSlop;
    }

    /**
     * Set to <code>true</code> to allow leading wildcard characters.
     * <p>
     * When set, <code>*</code> or <code>?</code> are allowed as
     * the first character of a PrefixQuery and WildcardQuery.
     * Note that this can produce very slow
     * queries on big indexes.
     * <p>
     * Default: false.
     */
    @Override
    public void setAllowLeadingWildcard(boolean allowLeadingWildcard) {
        this.allowLeadingWildcard = allowLeadingWildcard;
    }

    /**
     * @see #setAllowLeadingWildcard(boolean)
     */
    @Override
    public boolean getAllowLeadingWildcard() {
        return allowLeadingWildcard;
    }

    /**
     * Set to <code>true</code> to enable position increments in result query.
     * <p>
     * When set, result phrase and multi-phrase queries will
     * be aware of position increments.
     * Useful when e.g. a StopFilter increases the position increment of
     * the token that follows an omitted token.
     * <p>
     * Default: true.
     */
    @Override
    public void setEnablePositionIncrements(boolean enable) {
        this.enablePositionIncrements = enable;
    }

    /**
     * @see #setEnablePositionIncrements(boolean)
     */
    @Override
    public boolean getEnablePositionIncrements() {
        return enablePositionIncrements;
    }

    /**
     * Sets the boolean operator of the QueryParser.
     * In default mode (<code>OR_OPERATOR</code>) terms without any modifiers
     * are considered optional: for example <code>capital of Hungary</code> is equal to
     * <code>capital OR of OR Hungary</code>.<br/>
     * In <code>AND_OPERATOR</code> mode terms are considered to be in conjunction: the
     * above mentioned query is parsed as <code>capital AND of AND Hungary</code>
     */
    public void setDefaultOperator(Operator op) {
        this.operator = op;
    }

    /**
     * Gets implicit operator setting, which will be either AND_OPERATOR
     * or OR_OPERATOR.
     */
    public Operator getDefaultOperator() {
        return operator;
    }

    /**
     * Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically
     * lower-cased or not.  Default is <code>true</code>.
     */
    @Override
    public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) {
        this.lowercaseExpandedTerms = lowercaseExpandedTerms;
    }

    /**
     * @see #setLowercaseExpandedTerms(boolean)
     */
    @Override
    public boolean getLowercaseExpandedTerms() {
        return lowercaseExpandedTerms;
    }

    /**
     * By default QueryParser uses {@link org.apache.lucene.search.MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
     * when creating a {@link PrefixQuery}, {@link WildcardQuery} or {@link TermRangeQuery}. This implementation is generally preferable because it
     * a) Runs faster b) Does not have the scarcity of terms unduly influence score
     * c) avoids any {@link TooManyClauses} exception.
     * However, if your application really needs to use the
     * old-fashioned {@link BooleanQuery} expansion rewriting and the above
     * points are not relevant then use this to change
     * the rewrite method.
     */
    @Override
    public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) {
        multiTermRewriteMethod = method;
    }

    /**
     * @see #setMultiTermRewriteMethod
     */
    @Override
    public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() {
        return multiTermRewriteMethod;
    }

    /**
     * Set locale used by date range parsing, lowercasing, and other
     * locale-sensitive operations.
     */
    @Override
    public void setLocale(Locale locale) {
        this.locale = locale;
    }

    /**
     * Returns current locale, allowing access by subclasses.
     */
    @Override
    public Locale getLocale() {
        return locale;
    }

    @Override
    public void setTimeZone(TimeZone timeZone) {
        this.timeZone = timeZone;
    }

    @Override
    public TimeZone getTimeZone() {
        return timeZone;
    }

    /**
     * Sets the default date resolution used by RangeQueries for fields for which no
     * specific date resolutions has been set. Field specific resolutions can be set
     * with {@link #setDateResolution(String, org.apache.lucene.document.DateTools.Resolution)}.
     *
     * @param dateResolution the default date resolution to set
     */
    @Override
    public void setDateResolution(DateTools.Resolution dateResolution) {
        this.dateResolution = dateResolution;
    }

    /**
     * Sets the date resolution used by RangeQueries for a specific field.
     *
     * @param fieldName field for which the date resolution is to be set
     * @param dateResolution date resolution to set
     */
    public void setDateResolution(String fieldName, DateTools.Resolution dateResolution) {
        if (fieldName == null) {
            throw new IllegalArgumentException("Field cannot be null.");
        }

        if (fieldToDateResolution == null) {
            // lazily initialize HashMap
            fieldToDateResolution = new HashMap<String, DateTools.Resolution>();
        }

        fieldToDateResolution.put(fieldName, dateResolution);
    }

    /**
     * Returns the date resolution that is used by RangeQueries for the given field.
     * Returns null, if no default or field specific date resolution has been set
     * for the given field.
     *
     */
    public DateTools.Resolution getDateResolution(String fieldName) {
        if (fieldName == null) {
            throw new IllegalArgumentException("Field cannot be null.");
        }

        if (fieldToDateResolution == null) {
            // no field specific date resolutions set; return default date resolution instead
            return this.dateResolution;
        }

        DateTools.Resolution resolution = fieldToDateResolution.get(fieldName);
        if (resolution == null) {
            // no date resolutions set for the given field; return default date resolution instead
            resolution = this.dateResolution;
        }

        return resolution;
    }

    /**
     * Set whether or not to analyze range terms when constructing {@link TermRangeQuery}s.
     * For example, setting this to true can enable analyzing terms into
     * collation keys for locale-sensitive {@link TermRangeQuery}.
     *
     * @param analyzeRangeTerms whether or not terms should be analyzed for RangeQuerys
     */
    public void setAnalyzeRangeTerms(boolean analyzeRangeTerms) {
        this.analyzeRangeTerms = analyzeRangeTerms;
    }

    /**
     * @return whether or not to analyze range terms when constructing {@link TermRangeQuery}s.
     */
    public boolean getAnalyzeRangeTerms() {
        return analyzeRangeTerms;
    }

    // BL: BooleanClause -> TPBooleanClause
    protected void addClause(List<TPBooleanClause> clauses, int conj, int mods, TextPattern q) {
        boolean required, prohibited;

        // If this term is introduced by AND, make the preceding term required,
        // unless it's already prohibited
        if (clauses.size() > 0 && conj == CONJ_AND) {
            TPBooleanClause c = clauses.get(clauses.size() - 1);
            if (!c.isProhibited())
                c.setOccur(BooleanClause.Occur.MUST);
        }

        if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) {
            // If this term is introduced by OR, make the preceding term optional,
            // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
            // notice if the input is a OR b, first term is parsed as required; without
            // this modification a OR b would parsed as +a OR b
            TPBooleanClause c = clauses.get(clauses.size() - 1);
            if (!c.isProhibited())
                c.setOccur(BooleanClause.Occur.SHOULD);
        }

        // We might have been passed a null query; the term might have been
        // filtered away by the analyzer.
        if (q == null)
            return;

        if (operator == OR_OPERATOR) {
            // We set REQUIRED if we're introduced by AND or +; PROHIBITED if
            // introduced by NOT or -; make sure not to set both.
            prohibited = (mods == MOD_NOT);
            required = (mods == MOD_REQ);
            if (conj == CONJ_AND && !prohibited) {
                required = true;
            }
        } else {
            // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED
            // if not PROHIBITED and not introduced by OR
            prohibited = (mods == MOD_NOT);
            required = (!prohibited && conj != CONJ_OR);
        }
        if (required && !prohibited)
            clauses.add(newBooleanClause(q, BooleanClause.Occur.MUST));
        else if (!required && !prohibited)
            clauses.add(newBooleanClause(q, BooleanClause.Occur.SHOULD));
        else if (!required && prohibited)
            clauses.add(newBooleanClause(q, BooleanClause.Occur.MUST_NOT));
        else
            throw new RuntimeException("Clause cannot be both required and prohibited");
    }

    /**
     * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow
     */
    protected TextPattern getFieldQuery(String field, String queryText, boolean quoted) throws ParseException {
        return newFieldQuery(analyzer, field, queryText, quoted);
    }

    /**
     * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow
     */
    protected TextPattern newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted)
            throws ParseException {
        // Use the analyzer to get all the tokens, and then build a TermQuery,
        // PhraseQuery, or nothing based on the term count

        TokenStream source;
        try {
            source = analyzer.tokenStream(field, new StringReader(queryText));
            source.reset();
        } catch (IOException e) {
            ParseException p = new ParseException("Unable to initialize TokenStream to analyze query text");
            p.initCause(e);
            throw p;
        }
        CachingTokenFilter buffer = new CachingTokenFilter(source);
        TermToBytesRefAttribute termAtt = null;
        PositionIncrementAttribute posIncrAtt = null;
        int numTokens = 0;

        buffer.reset();

        if (buffer.hasAttribute(TermToBytesRefAttribute.class)) {
            termAtt = buffer.getAttribute(TermToBytesRefAttribute.class);
        }
        if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
            posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
        }

        int positionCount = 0;
        boolean severalTokensAtSamePosition = false;

        boolean hasMoreTokens = false;
        if (termAtt != null) {
            try {
                hasMoreTokens = buffer.incrementToken();
                while (hasMoreTokens) {
                    numTokens++;
                    int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1;
                    if (positionIncrement != 0) {
                        positionCount += positionIncrement;
                    } else {
                        severalTokensAtSamePosition = true;
                    }
                    hasMoreTokens = buffer.incrementToken();
                }
            } catch (IOException e) {
                // ignore
            }
        }
        try {
            // rewind the buffer stream
            buffer.reset();

            // close original stream - all tokens buffered
            source.close();
        } catch (IOException e) {
            ParseException p = new ParseException("Cannot close TokenStream analyzing query text");
            p.initCause(e);
            throw p;
        }

        BytesRef bytes = termAtt == null ? null : termAtt.getBytesRef();

        if (numTokens == 0)
            return null;
        else if (numTokens == 1) {
            try {
                boolean hasNext = buffer.incrementToken();
                assert hasNext == true;
                termAtt.fillBytesRef();
            } catch (IOException e) {
                // safe to ignore, because we know the number of tokens
            }
            return newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes)));
        } else {
            if (severalTokensAtSamePosition || (!quoted && !autoGeneratePhraseQueries)) {
                if (positionCount == 1 || (!quoted && !autoGeneratePhraseQueries)) {
                    // no phrase query:
                    TextPatternBoolean q = newBooleanQuery(positionCount == 1); // BL: BooleanQuery -> TextPatternBoolean

                    BooleanClause.Occur occur = positionCount > 1 && operator == AND_OPERATOR
                            ? BooleanClause.Occur.MUST
                            : BooleanClause.Occur.SHOULD;

                    for (int i = 0; i < numTokens; i++) {
                        try {
                            boolean hasNext = buffer.incrementToken();
                            assert hasNext == true;
                            termAtt.fillBytesRef();
                        } catch (IOException e) {
                            // safe to ignore, because we know the number of tokens
                        }
                        TextPattern currentQuery = newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes)));
                        q.add(currentQuery, occur);
                    }
                    return q;
                } else {
                    // phrase query:
                    TPMultiPhrase mpq = newMultiPhraseQuery(); // BL: MultiPhraseQuery -> TPMultiPhrase
                    mpq.setSlop(phraseSlop);
                    List<Term> multiTerms = new ArrayList<Term>();
                    int position = -1;
                    for (int i = 0; i < numTokens; i++) {
                        int positionIncrement = 1;
                        try {
                            boolean hasNext = buffer.incrementToken();
                            assert hasNext == true;
                            termAtt.fillBytesRef();
                            if (posIncrAtt != null) {
                                positionIncrement = posIncrAtt.getPositionIncrement();
                            }
                        } catch (IOException e) {
                            // safe to ignore, because we know the number of tokens
                        }

                        if (positionIncrement > 0 && multiTerms.size() > 0) {
                            if (enablePositionIncrements) {
                                mpq.add(multiTerms.toArray(new Term[0]), position);
                            } else {
                                mpq.add(multiTerms.toArray(new Term[0]));
                            }
                            multiTerms.clear();
                        }
                        position += positionIncrement;
                        multiTerms.add(new Term(field, BytesRef.deepCopyOf(bytes)));
                    }
                    if (enablePositionIncrements) {
                        mpq.add(multiTerms.toArray(new Term[0]), position);
                    } else {
                        mpq.add(multiTerms.toArray(new Term[0]));
                    }
                    return mpq;
                }
            } else {
                TPPhrase pq = newPhraseQuery(); // BL: PhraseQuery -> TPPhrase
                pq.setSlop(phraseSlop);
                int position = -1;

                for (int i = 0; i < numTokens; i++) {
                    int positionIncrement = 1;

                    try {
                        boolean hasNext = buffer.incrementToken();
                        assert hasNext == true;
                        termAtt.fillBytesRef();
                        if (posIncrAtt != null) {
                            positionIncrement = posIncrAtt.getPositionIncrement();
                        }
                    } catch (IOException e) {
                        // safe to ignore, because we know the number of tokens
                    }

                    if (enablePositionIncrements) {
                        position += positionIncrement;
                        pq.add(new Term(field, BytesRef.deepCopyOf(bytes)), position);
                    } else {
                        pq.add(new Term(field, BytesRef.deepCopyOf(bytes)));
                    }
                }
                return pq;
            }
        }
    }

    /**
     * Base implementation delegates to {@link #getFieldQuery(String,String,boolean)}.
     * This method may be overridden, for example, to return
     * a SpanNearQuery instead of a PhraseQuery.
     *
     * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow
     */
    protected TextPattern getFieldQuery(String field, String queryText, int slop) throws ParseException {
        TextPattern query = getFieldQuery(field, queryText, true);

        if (query instanceof TPPhrase) { // BL was: PhraseQuery
            ((TPPhrase) query).setSlop(slop);
        }
        if (query instanceof TPMultiPhrase) { // BL was: MultiPhraseQuery
            ((TPMultiPhrase) query).setSlop(slop);
        }

        return query;
    }

    protected TextPattern getRangeQuery(String field, String part1, String part2, boolean startInclusive,
            boolean endInclusive) throws ParseException {
        if (lowercaseExpandedTerms) {
            part1 = part1 == null ? null : part1.toLowerCase(locale);
            part2 = part2 == null ? null : part2.toLowerCase(locale);
        }

        DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale);
        df.setLenient(true);
        DateTools.Resolution resolution = getDateResolution(field);

        try {
            part1 = DateTools.dateToString(df.parse(part1), resolution);
        } catch (Exception e) {
        }

        try {
            Date d2 = df.parse(part2);
            if (endInclusive) {
                // The user can only specify the date, not the time, so make sure
                // the time is set to the latest possible time of that date to really
                // include all documents:
                Calendar cal = Calendar.getInstance(timeZone, locale);
                cal.setTime(d2);
                cal.set(Calendar.HOUR_OF_DAY, 23);
                cal.set(Calendar.MINUTE, 59);
                cal.set(Calendar.SECOND, 59);
                cal.set(Calendar.MILLISECOND, 999);
                d2 = cal.getTime();
            }
            part2 = DateTools.dateToString(d2, resolution);
        } catch (Exception e) {
        }

        return newRangeQuery(field, part1, part2, startInclusive, endInclusive);
    }

    /**
     * Builds a new BooleanQuery instance
     * @param disableCoord disable coord
     * @return new BooleanQuery instance
     */
    protected TextPatternBoolean newBooleanQuery(boolean disableCoord) {
        return new TextPatternBoolean(disableCoord); // BL was: BooleanQuery
    }

    /**
     * Builds a new BooleanClause instance
     * @param q sub query
     * @param occur how this clause should occur when matching documents
     * @return new BooleanClause instance
     */
    protected TPBooleanClause newBooleanClause(TextPattern q, BooleanClause.Occur occur) {
        return new TPBooleanClause(q, occur); // BL was: BooleanClause
    }

    /**
     * Builds a new TermQuery instance
     * @param term term
     * @return new TermQuery instance
     */
    protected TextPattern newTermQuery(Term term) {
        return new TextPatternTerm(term.text()); // BL was: TermQuery
    }

    /**
     * Builds a new PhraseQuery instance
     * @return new PhraseQuery instance
     */
    protected TPPhrase newPhraseQuery() {
        return new TPPhrase(); // BL was: PhraseQuery
    }

    /**
     * Builds a new MultiPhraseQuery instance
     * @return new MultiPhraseQuery instance
     */
    protected TPMultiPhrase newMultiPhraseQuery() { // BL: MultiPhraseQuery -> TPMultiPhrase
        return new TPMultiPhrase(); // BL was: MultiPhraseQuery
    }

    /**
     * Builds a new PrefixQuery instance
     * @param prefix Prefix term
     * @return new PrefixQuery instance
     */
    protected TextPattern newPrefixQuery(Term prefix) {
        TextPattern query = new TextPatternPrefix(prefix.text()); // BL was: PrefixQuery
        //query.setRewriteMethod(multiTermRewriteMethod); // BL: disabled
        return query;
    }

    /**
     * Builds a new RegexpQuery instance
     * @param regexp Regexp term
     * @return new RegexpQuery instance
     */
    protected TextPattern newRegexpQuery(Term regexp) {
        TextPattern query = new TextPatternRegex(regexp.text()); // BL was: RegexpQuery
        //query.setRewriteMethod(multiTermRewriteMethod); // BL: disabled
        return query;
    }

    /**
     * Builds a new FuzzyQuery instance
     * @param term Term
     * @param minimumSimilarity minimum similarity
     * @param prefixLength prefix length
     * @return new FuzzyQuery Instance
     */
    protected TextPattern newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) {
        // FuzzyQuery doesn't yet allow constant score rewrite
        String text = term.text();
        int numEdits = FuzzyQuery.floatToEdits(minimumSimilarity, text.codePointCount(0, text.length()));
        return new TextPatternFuzzy(term.text(), numEdits, prefixLength); // BL was: FuzzyQuery
    }

    // TODO: Should this be protected instead?
    private BytesRef analyzeMultitermTerm(String field, String part) {
        return analyzeMultitermTerm(field, part, analyzer);
    }

    protected BytesRef analyzeMultitermTerm(String field, String part, Analyzer analyzerIn) {
        TokenStream source;

        if (analyzerIn == null)
            analyzerIn = analyzer;

        try {
            source = analyzerIn.tokenStream(field, new StringReader(part));
            source.reset();
        } catch (IOException e) {
            throw new RuntimeException("Unable to initialize TokenStream to analyze multiTerm term: " + part, e);
        }

        TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
        BytesRef bytes = termAtt.getBytesRef();

        try {
            if (!source.incrementToken())
                throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part);
            termAtt.fillBytesRef();
            if (source.incrementToken())
                throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part);
        } catch (IOException e) {
            throw new RuntimeException("error analyzing range part: " + part, e);
        }

        try {
            source.end();
            source.close();
        } catch (IOException e) {
            throw new RuntimeException("Unable to end & close TokenStream after analyzing multiTerm term: " + part,
                    e);
        }

        return BytesRef.deepCopyOf(bytes);
    }

    /**
     * Builds a new {@link TermRangeQuery} instance
     * @param field Field
     * @param part1 min
     * @param part2 max
     * @param startInclusive true if the start of the range is inclusive
     * @param endInclusive true if the end of the range is inclusive
     * @return new {@link TermRangeQuery} instance
     */
    protected TextPattern newRangeQuery(String field, String part1, String part2, boolean startInclusive,
            boolean endInclusive) {
        final BytesRef start;
        final BytesRef end;

        if (part1 == null) {
            start = null;
        } else {
            start = analyzeRangeTerms ? analyzeMultitermTerm(field, part1) : new BytesRef(part1);
        }

        if (part2 == null) {
            end = null;
        } else {
            end = analyzeRangeTerms ? analyzeMultitermTerm(field, part2) : new BytesRef(part2);
        }

        // BL was: TermRangeQuery
        final TPTermRange query = new TPTermRange(field, start.utf8ToString(), end.utf8ToString(), startInclusive,
                endInclusive);

        //query.setRewriteMethod(multiTermRewriteMethod); // BL disabled
        return query;
    }

    /**
     * Builds a new MatchAllDocsQuery instance
     * @return new MatchAllDocsQuery instance
     */
    protected TextPattern newMatchAllDocsQuery() {
        return new TPMatchAllDocs(); // BL was: MatchAllDocsQuery
    }

    /**
     * Builds a new WildcardQuery instance
     * @param t wildcard term
     * @return new WildcardQuery instance
     */
    protected TextPattern newWildcardQuery(Term t) {
        TextPattern query = new TextPatternWildcard(t.text()); // BL was: WildcardQuery
        //query.setRewriteMethod(multiTermRewriteMethod); // BL disabled
        return query;
    }

    /**
     * Factory method for generating query, given a set of clauses.
     * By default creates a boolean query composed of clauses passed in.
     *
     * Can be overridden by extending classes, to modify query being
     * returned.
     *
     * @param clauses List that contains {@link org.apache.lucene.search.BooleanClause} instances
     *    to join.
     *
     * @return Resulting {@link org.apache.lucene.search.Query} object.
     * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow
     */
    protected TextPattern getBooleanQuery(List<TPBooleanClause> clauses) throws ParseException { // BL was: BooleanClause
        return getBooleanQuery(clauses, false);
    }

    /**
     * Factory method for generating query, given a set of clauses.
     * By default creates a boolean query composed of clauses passed in.
     *
     * Can be overridden by extending classes, to modify query being
     * returned.
     *
     * @param clauses List that contains {@link org.apache.lucene.search.BooleanClause} instances
     *    to join.
     * @param disableCoord true if coord scoring should be disabled.
     *
     * @return Resulting {@link org.apache.lucene.search.Query} object.
     * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow
     */
    protected TextPattern getBooleanQuery(List<TPBooleanClause> clauses, boolean disableCoord) // BL was: BooleanClause
            throws ParseException {
        if (clauses.size() == 0) {
            return null; // all clause words were filtered away by the analyzer.
        }
        TextPatternBoolean query = newBooleanQuery(disableCoord); // BL was: BooleanQuery
        for (final TPBooleanClause clause : clauses) { // BL was: BooleanClause
            query.add(clause);
        }
        return query;
    }

    /**
     * Factory method for generating a query. Called when parser
     * parses an input term token that contains one or more wildcard
     * characters (? and *), but is not a prefix term token (one
     * that has just a single * character at the end)
     *<p>
     * Depending on settings, prefix term may be lower-cased
     * automatically. It will not go through the default Analyzer,
     * however, since normal Analyzers are unlikely to work properly
     * with wildcard templates.
     *<p>
     * Can be overridden by extending classes, to provide custom handling for
     * wildcard queries, which may be necessary due to missing analyzer calls.
     *
     * @param field Name of the field query will use.
     * @param termStr Term token that contains one or more wild card
     *   characters (? or *), but is not simple prefix term
     *
     * @return Resulting {@link org.apache.lucene.search.Query} built for the term
     * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow
     */
    protected TextPattern getWildcardQuery(String field, String termStr) throws ParseException {
        if ("*".equals(field)) {
            if ("*".equals(termStr))
                return newMatchAllDocsQuery();
        }
        if (!allowLeadingWildcard && (termStr.startsWith("*") || termStr.startsWith("?")))
            throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery");
        if (lowercaseExpandedTerms) {
            termStr = termStr.toLowerCase(locale);
        }
        Term t = new Term(field, termStr);
        return newWildcardQuery(t);
    }

    /**
     * Factory method for generating a query. Called when parser
     * parses an input term token that contains a regular expression
     * query.
     *<p>
     * Depending on settings, pattern term may be lower-cased
     * automatically. It will not go through the default Analyzer,
     * however, since normal Analyzers are unlikely to work properly
     * with regular expression templates.
     *<p>
     * Can be overridden by extending classes, to provide custom handling for
     * regular expression queries, which may be necessary due to missing analyzer
     * calls.
     *
     * @param field Name of the field query will use.
     * @param termStr Term token that contains a regular expression
     *
     * @return Resulting {@link org.apache.lucene.search.Query} built for the term
     * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow
     */
    protected TextPattern getRegexpQuery(String field, String termStr) throws ParseException {
        if (lowercaseExpandedTerms) {
            termStr = termStr.toLowerCase(locale);
        }
        Term t = new Term(field, termStr);
        return newRegexpQuery(t);
    }

    /**
     * Factory method for generating a query (similar to
     * {@link #getWildcardQuery}). Called when parser parses an input term
     * token that uses prefix notation; that is, contains a single '*' wildcard
     * character as its last character. Since this is a special case
     * of generic wildcard term, and such a query can be optimized easily,
     * this usually results in a different query object.
     *<p>
     * Depending on settings, a prefix term may be lower-cased
     * automatically. It will not go through the default Analyzer,
     * however, since normal Analyzers are unlikely to work properly
     * with wildcard templates.
     *<p>
     * Can be overridden by extending classes, to provide custom handling for
     * wild card queries, which may be necessary due to missing analyzer calls.
     *
     * @param field Name of the field query will use.
     * @param termStr Term token to use for building term for the query
     *    (<b>without</b> trailing '*' character!)
     *
     * @return Resulting {@link org.apache.lucene.search.Query} built for the term
     * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow
     */
    protected TextPattern getPrefixQuery(String field, String termStr) throws ParseException {
        if (!allowLeadingWildcard && termStr.startsWith("*"))
            throw new ParseException("'*' not allowed as first character in PrefixQuery");
        if (lowercaseExpandedTerms) {
            termStr = termStr.toLowerCase(locale);
        }
        Term t = new Term(field, termStr);
        return newPrefixQuery(t);
    }

    /**
    * Factory method for generating a query (similar to
    * {@link #getWildcardQuery}). Called when parser parses
    * an input term token that has the fuzzy suffix (~) appended.
    *
    * @param field Name of the field query will use.
    * @param termStr Term token to use for building term for the query
    *
    * @return Resulting {@link org.apache.lucene.search.Query} built for the term
    * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow
    */
    protected TextPattern getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException {
        if (lowercaseExpandedTerms) {
            termStr = termStr.toLowerCase(locale);
        }
        Term t = new Term(field, termStr);
        return newFuzzyQuery(t, minSimilarity, fuzzyPrefixLength);
    }

    // extracted from the .jj grammar
    TextPattern handleBareTokenQuery(String qfield, Token term, Token fuzzySlop, boolean prefix, boolean wildcard,
            boolean fuzzy, boolean regexp) throws ParseException {
        TextPattern q;

        String termImage = discardEscapeChar(term.image);
        if (wildcard) {
            q = getWildcardQuery(qfield, term.image);
        } else if (prefix) {
            q = getPrefixQuery(qfield, discardEscapeChar(term.image.substring(0, term.image.length() - 1)));
        } else if (regexp) {
            q = getRegexpQuery(qfield, term.image.substring(1, term.image.length() - 1));
        } else if (fuzzy) {
            q = handleBareFuzzy(qfield, fuzzySlop, termImage);
        } else {
            q = getFieldQuery(qfield, termImage, false);
        }
        return q;
    }

    TextPattern handleBareFuzzy(String qfield, Token fuzzySlop, String termImage) throws ParseException {
        TextPattern q;
        float fms = fuzzyMinSim;
        try {
            fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
        } catch (Exception ignored) {
        }
        if (fms < 0.0f) {
            throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");
        } else if (fms >= 1.0f && fms != (int) fms) {
            throw new ParseException("Fractional edit distances are not allowed!");
        }
        q = getFuzzyQuery(qfield, termImage, fms);
        return q;
    }

    // extracted from the .jj grammar
    TextPattern handleQuotedTerm(String qfield, Token term, Token fuzzySlop) throws ParseException {
        int s = phraseSlop; // default
        if (fuzzySlop != null) {
            try {
                s = Float.valueOf(fuzzySlop.image.substring(1)).intValue();
            } catch (Exception ignored) {
            }
        }
        return getFieldQuery(qfield, discardEscapeChar(term.image.substring(1, term.image.length() - 1)), s);
    }

    // extracted from the .jj grammar
    TextPattern handleBoost(TextPattern q, Token boost) {
        /*
         if (boost != null) {
           float f = (float) 1.0;
           try {
             f = Float.valueOf(boost.image).floatValue();
           }
           catch (Exception ignored) {
             // Should this be handled somehow? (defaults to "no boost", if
            // boost number is invalid)
           }
            
           // avoid boosting null queries, such as those caused by stop words
           if (q != null) {
             q.setBoost(f);
           }
         }
        */
        return q; // BL: boost not supported
    }

    /**
     * Returns a String where the escape char has been
     * removed, or kept only once if there was a double escape.
     *
     * Supports escaped unicode characters, e. g. translates
     * <code>\\u0041</code> to <code>A</code>.
     *
     */
    String discardEscapeChar(String input) throws ParseException {
        // Create char array to hold unescaped char sequence
        char[] output = new char[input.length()];

        // The length of the output can be less than the input
        // due to discarded escape chars. This variable holds
        // the actual length of the output
        int length = 0;

        // We remember whether the last processed character was
        // an escape character
        boolean lastCharWasEscapeChar = false;

        // The multiplier the current unicode digit must be multiplied with.
        // E. g. the first digit must be multiplied with 16^3, the second with 16^2...
        int codePointMultiplier = 0;

        // Used to calculate the codepoint of the escaped unicode character
        int codePoint = 0;

        for (int i = 0; i < input.length(); i++) {
            char curChar = input.charAt(i);
            if (codePointMultiplier > 0) {
                codePoint += hexToInt(curChar) * codePointMultiplier;
                codePointMultiplier >>>= 4;
                if (codePointMultiplier == 0) {
                    output[length++] = (char) codePoint;
                    codePoint = 0;
                }
            } else if (lastCharWasEscapeChar) {
                if (curChar == 'u') {
                    // found an escaped unicode character
                    codePointMultiplier = 16 * 16 * 16;
                } else {
                    // this character was escaped
                    output[length] = curChar;
                    length++;
                }
                lastCharWasEscapeChar = false;
            } else {
                if (curChar == '\\') {
                    lastCharWasEscapeChar = true;
                } else {
                    output[length] = curChar;
                    length++;
                }
            }
        }

        if (codePointMultiplier > 0) {
            throw new ParseException("Truncated unicode escape sequence.");
        }

        if (lastCharWasEscapeChar) {
            throw new ParseException("Term can not end with escape character.");
        }

        return new String(output, 0, length);
    }

    /** Returns the numeric value of the hexadecimal character */
    static final int hexToInt(char c) throws ParseException {
        if ('0' <= c && c <= '9') {
            return c - '0';
        } else if ('a' <= c && c <= 'f') {
            return c - 'a' + 10;
        } else if ('A' <= c && c <= 'F') {
            return c - 'A' + 10;
        } else {
            throw new ParseException("Non-hex character in Unicode escape sequence: " + c);
        }
    }

    /**
     * Returns a String where those characters that QueryParser
     * expects to be escaped are escaped by a preceding <code>\</code>.
     */
    public static String escape(String s) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < s.length(); i++) {
            char c = s.charAt(i);
            // These characters are part of the query syntax and must be escaped
            if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' || c == '^'
                    || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~' || c == '*' || c == '?'
                    || c == '|' || c == '&' || c == '/') {
                sb.append('\\');
            }
            sb.append(c);
        }
        return sb.toString();
    }

}