com.jaeksoft.searchlib.query.QueryParser.java Source code

Java tutorial

Introduction

Here is the source code for com.jaeksoft.searchlib.query.QueryParser.java

Source

/**   
 * License Agreement for OpenSearchServer
 *
 * Copyright (C) 2013-2015 Emmanuel Keller / Jaeksoft
 * 
 * http://www.open-search-server.com
 * 
 * This file is part of OpenSearchServer.
 *
 * OpenSearchServer is free software: you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 * OpenSearchServer is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with OpenSearchServer. 
 *  If not, see <http://www.gnu.org/licenses/>.
 **/
package com.jaeksoft.searchlib.query;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;

import org.antlr.v4.runtime.ANTLRInputStream;
import org.antlr.v4.runtime.BailErrorStrategy;
import org.antlr.v4.runtime.BaseErrorListener;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.RecognitionException;
import org.antlr.v4.runtime.Recognizer;
import org.antlr.v4.runtime.tree.TerminalNode;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;

import com.jaeksoft.searchlib.analysis.CompiledAnalyzer;
import com.jaeksoft.searchlib.query.parser.BooleanQueryBaseListener;
import com.jaeksoft.searchlib.query.parser.BooleanQueryLexer;
import com.jaeksoft.searchlib.query.parser.BooleanQueryParser;
import com.jaeksoft.searchlib.util.StringUtils;

public class QueryParser extends BooleanQueryBaseListener {

    private final Set<String> fields;
    private final String defaultField;
    private final int defaultOperator;
    private final CompiledAnalyzer analyzer;
    private final int phraseSlop;
    private final Double termBoost;
    private final Double phraseBoost;

    private String currentField;
    private int currentOperator;
    private Query holdQuery;
    private BooleanQuery booleanQuery;

    private IOException ioError;

    public QueryParser(final String defaultField, final Set<String> fields, final Occur occur,
            final CompiledAnalyzer analyzer, final int phraseSlop, final Double termBoost,
            final Double phraseBoost) {
        this.defaultField = defaultField;
        this.fields = fields;
        this.defaultOperator = getOperator(occur);
        this.analyzer = analyzer;
        this.phraseSlop = phraseSlop;
        this.termBoost = termBoost;
        this.phraseBoost = phraseBoost;
    }

    final private static int getOperator(final Occur occur) {
        if (occur == null)
            return BooleanQueryLexer.AND;
        switch (occur) {
        default:
        case MUST:
            return BooleanQueryLexer.AND;
        case MUST_NOT:
            return BooleanQueryLexer.NOT;
        case SHOULD:
            return BooleanQueryLexer.OR;
        }
    }

    final private void addBooleanClause(final Query query, final int operator) {
        Occur occur = null;
        switch (operator) {
        case BooleanQueryLexer.AND:
            occur = Occur.MUST;
            break;
        case BooleanQueryLexer.OR:
            occur = Occur.SHOULD;
            break;
        case BooleanQueryLexer.NOT:
            occur = Occur.MUST_NOT;
            break;
        }
        booleanQuery.add(new BooleanClause(query, occur));
    }

    final private void addBooleanClause(final Query query) {
        if (currentOperator == -1) {
            holdQuery = query;
        } else {
            if (holdQuery != null) {
                addBooleanClause(holdQuery,
                        currentOperator == BooleanQueryLexer.NOT ? defaultOperator : currentOperator);
                holdQuery = null;
            }
            addBooleanClause(query, currentOperator);
        }
        currentOperator = defaultOperator;
    }

    final private List<String> getWords(final String text) throws IOException {
        List<String> words = new ArrayList<String>(1);
        if (analyzer != null)
            analyzer.extractTerms(text, words);
        else
            words.add(text);
        return words;
    }

    final private void addTermQuery(final String text) throws IOException {
        for (String word : getWords(text)) {
            Term term = new Term(currentField, word);
            TermQuery termQuery = new TermQuery(term);
            if (termBoost != null)
                termQuery.setBoost(termBoost.floatValue());
            addBooleanClause(termQuery);
        }
        currentField = defaultField;
    }

    final private void addPhraseQuery(String text) throws IOException {
        int s = 0;
        if (text.startsWith("\""))
            s = 1;
        int l = text.length() + 1 - s;
        if (text.endsWith("\""))
            l--;
        if (l > 0)
            text = text.substring(s, l);
        PhraseQuery phraseQuery = new PhraseQuery();
        phraseQuery.setSlop(phraseSlop);
        if (phraseBoost != null)
            phraseQuery.setBoost(phraseBoost.floatValue());
        for (String word : getWords(text))
            phraseQuery.add(new Term(currentField, word));
        addBooleanClause(phraseQuery);
        currentField = defaultField;
    }

    final private void setCurrentField(final String text) throws IOException {
        String field = text.endsWith(":") ? text.substring(0, text.length() - 1) : text;
        if (fields != null && fields.contains(field))
            currentField = field;
        else
            addTermQuery(text);
    }

    @Override
    final public void visitTerminal(final TerminalNode node) {
        try {
            int type = node.getSymbol().getType();
            switch (type) {
            case BooleanQueryLexer.AND:
            case BooleanQueryLexer.OR:
            case BooleanQueryLexer.NOT:
                currentOperator = type;
                break;
            case BooleanQueryLexer.QSTRING:
                addPhraseQuery(node.getText());
                break;
            case BooleanQueryLexer.STRING:
                addTermQuery(node.getText());
                break;
            case BooleanQueryLexer.FIELD:
                setCurrentField(node.getText());
                break;
            case BooleanQueryLexer.WS:
                currentField = defaultField;
                break;
            default:
                break;
            }
        } catch (IOException e) {
            ioError = e;
        }
    }

    private final class ErrorListener extends BaseErrorListener {

        @Override
        public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line,
                int charPositionInLine, String msg, RecognitionException e) {
            ioError = new IOException(StringUtils.fastConcat("line: ", Integer.toString(line), " - pos: ",
                    Integer.toString(charPositionInLine), " - ", msg));
        }
    }

    public final Query parse(String query) throws IOException {
        try {
            currentOperator = -1;
            currentField = defaultField;
            holdQuery = null;
            booleanQuery = new BooleanQuery();
            ioError = null;
            ANTLRInputStream input = new ANTLRInputStream(query);
            BooleanQueryLexer lexer = new BooleanQueryLexer(input);
            ErrorListener errorListener = new ErrorListener();
            lexer.removeErrorListeners();
            lexer.addErrorListener(errorListener);
            CommonTokenStream tokens = new CommonTokenStream(lexer);
            BooleanQueryParser parser = new BooleanQueryParser(tokens);
            BailErrorStrategy errorHandler = new BailErrorStrategy();
            parser.setErrorHandler(errorHandler);
            parser.addParseListener(this);
            parser.removeErrorListeners();
            parser.addErrorListener(errorListener);
            parser.expression();
            if (ioError != null)
                throw ioError;
            if (holdQuery != null)
                addBooleanClause(holdQuery, currentOperator);
            return booleanQuery;
        } catch (org.antlr.v4.runtime.RecognitionException e) {
            if (ioError != null)
                throw ioError;
            throw new IOException(e);
        } catch (org.antlr.v4.runtime.misc.ParseCancellationException e) {
            if (ioError != null)
                throw ioError;
            throw new IOException(e);
        }
    }

    public final static void main(String[] arvs) throws IOException {
        TreeSet<String> fields = new TreeSet<String>();
        fields.add("field2");
        fields.add("field3");
        QueryParser queryParser = new QueryParser("field", fields, Occur.MUST, null, 1, null, null);

        System.out.println(queryParser.parse("word"));
        System.out.println(
                queryParser.parse("word1 field2:word2 fauxfield\\:fauxword field:field3:\"quoted words\""));
        System.out.println(queryParser.parse("field:\"quoted words\""));
        System.out.println(queryParser.parse("\"quoted words\""));
        System.out.println(queryParser.parse("\"quoted words\" word"));
        System.out.println(queryParser.parse("word OR \"quoted words\""));
        System.out.println(queryParser.parse("word OR \"quoted words"));
        System.out.println(queryParser.parse("word1 word2 AND \"quoted words\""));
        System.out.println(queryParser.parse("word1 OU word2 \"quoted words\" NON unwanted"));
        System.out.println(queryParser.parse("\"\""));
        System.out.println(queryParser.parse("\"non ending quote"));
        System.out.println(queryParser.parse("22\""));
        System.out.println(queryParser.parse("\""));
        System.out.println(queryParser.parse("OU OU"));
        System.out.println(queryParser.parse(" \"\""));
    }
}