org.apache.jackrabbit.core.query.lucene.JackrabbitQueryParser.java Source code

Introduction

Here is the source code for org.apache.jackrabbit.core.query.lucene.JackrabbitQueryParser.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.jackrabbit.core.query.lucene;

import java.io.IOException;
import java.io.StringReader;
import java.util.List;
import java.util.ArrayList;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.util.Version;

/**
 * <code>JackrabbitQueryParser</code> extends the standard lucene query parser
 * and adds JCR specific customizations.
 */
public class JackrabbitQueryParser extends QueryParser {

    /**
     * The Jackrabbit synonym provider or <code>null</code> if there is none.
     */
    private final SynonymProvider synonymProvider;

    private final PerQueryCache cache;

    /**
     * Creates a new query parser instance.
     *
     * @param fieldName       the field name.
     * @param analyzer        the analyzer.
     * @param synonymProvider the synonym provider or <code>null</code> if none
     *                        is available.
     */
    public JackrabbitQueryParser(String fieldName, Analyzer analyzer, SynonymProvider synonymProvider,
            PerQueryCache cache) {
        super(Version.LUCENE_24, fieldName, analyzer);
        this.synonymProvider = synonymProvider;
        this.cache = cache;
        setAllowLeadingWildcard(true);
        setDefaultOperator(Operator.AND);
    }

    /**
     * {@inheritDoc}
     */
    public Query parse(String textsearch) throws ParseException {
        // replace escaped ' with just '
        StringBuffer rewritten = new StringBuffer();
        // the default lucene query parser recognizes 'AND' and 'NOT' as
        // keywords.
        textsearch = textsearch.replaceAll("AND", "and");
        textsearch = textsearch.replaceAll("NOT", "not");
        boolean escaped = false;
        for (int i = 0; i < textsearch.length(); i++) {
            if (textsearch.charAt(i) == '\\') {
                if (escaped) {
                    rewritten.append("\\\\");
                    escaped = false;
                } else {
                    escaped = true;
                }
            } else if (textsearch.charAt(i) == '\'') {
                if (escaped) {
                    escaped = false;
                }
                rewritten.append(textsearch.charAt(i));
            } else if (textsearch.charAt(i) == '~') {
                if (i == 0 || Character.isWhitespace(textsearch.charAt(i - 1))) {
                    // escape tilde so we can use it for similarity query
                    rewritten.append("\\");
                }
                rewritten.append('~');
            } else if (textsearch.charAt(i) == ':') {
                // fields as known in lucene are not supported
                rewritten.append("\\:");
            } else {
                if (escaped) {
                    rewritten.append('\\');
                    escaped = false;
                }
                rewritten.append(textsearch.charAt(i));
            }
        }
        return super.parse(rewritten.toString());
    }

    /**
     * Factory method for generating a synonym query.
     * Called when parser parses an input term token that has the synonym
     * prefix (~term) prepended.
     *
     * @param field Name of the field query will use.
     * @param termStr Term token to use for building term for the query
     *
     * @return Resulting {@link Query} built for the term
     * @exception ParseException throw in overridden method to disallow
     */
    protected Query getSynonymQuery(String field, String termStr) throws ParseException {
        List<BooleanClause> synonyms = new ArrayList<BooleanClause>();
        synonyms.add(new BooleanClause(getFieldQuery(field, termStr), BooleanClause.Occur.SHOULD));
        if (synonymProvider != null) {
            for (String term : synonymProvider.getSynonyms(termStr)) {
                synonyms.add(new BooleanClause(getFieldQuery(field, term), BooleanClause.Occur.SHOULD));
            }
        }
        if (synonyms.size() == 1) {
            return synonyms.get(0).getQuery();
        } else {
            return getBooleanQuery(synonyms);
        }
    }

    /**
     * {@inheritDoc}
     */
    protected Query getFieldQuery(String field, String queryText) throws ParseException {
        return getFieldQuery(field, queryText, true);
    }

    /**
     * {@inheritDoc}
     */
    protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException {
        if (queryText.startsWith("~")) {
            // synonym query
            return getSynonymQuery(field, queryText.substring(1));
        } else {
            return super.getFieldQuery(field, queryText, quoted);
        }
    }

    /**
     * {@inheritDoc}
     */
    protected Query getPrefixQuery(String field, String termStr) throws ParseException {
        // only create a prefix query when the term is a single word / token
        Analyzer a = getAnalyzer();
        TokenStream ts = a.tokenStream(field, new StringReader(termStr));
        int count = 0;
        boolean isCJ = false;
        try {
            TypeAttribute t = ts.addAttribute(TypeAttribute.class);
            ts.reset();
            while (ts.incrementToken()) {
                count++;
                isCJ = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.CJ].equals(t.type());
            }
            ts.end();
        } catch (IOException e) {
            throw new ParseException(e.getMessage());
        } finally {
            try {
                ts.close();
            } catch (IOException e) {
                // ignore
            }
        }
        if (count > 1 && isCJ) {
            return getFieldQuery(field, termStr);
        } else {
            return getWildcardQuery(field, termStr + "*");
        }
    }

    /**
     * {@inheritDoc}
     */
    protected Query getWildcardQuery(String field, String termStr) throws ParseException {
        if (getLowercaseExpandedTerms()) {
            termStr = termStr.toLowerCase();
        }
        return new WildcardQuery(field, null, translateWildcards(termStr), cache);
    }

    /**
     * Translates unescaped wildcards '*' and '?' into '%' and '_'.
     *
     * @param input the input String.
     * @return the translated String.
     */
    private String translateWildcards(String input) {
        StringBuffer translated = new StringBuffer(input.length());
        boolean escaped = false;
        for (int i = 0; i < input.length(); i++) {
            if (input.charAt(i) == '\\') {
                if (escaped) {
                    translated.append("\\\\");
                    escaped = false;
                } else {
                    escaped = true;
                }
            } else if (input.charAt(i) == '*') {
                if (escaped) {
                    translated.append('*');
                    escaped = false;
                } else {
                    translated.append('%');
                }
            } else if (input.charAt(i) == '?') {
                if (escaped) {
                    translated.append('?');
                    escaped = false;
                } else {
                    translated.append('_');
                }
            } else if (input.charAt(i) == '%' || input.charAt(i) == '_') {
                // escape every occurrence of '%' and '_'
                escaped = false;
                translated.append('\\').append(input.charAt(i));
            } else {
                if (escaped) {
                    translated.append('\\');
                    escaped = false;
                }
                translated.append(input.charAt(i));
            }
        }
        return translated.toString();
    }
}