com.flaptor.indextank.query.IndexEngineParser.java Source code

Introduction

Here is the source code for com.flaptor.indextank.query.IndexEngineParser.java
Source

/*
 * Copyright (c) 2011 LinkedIn, Inc
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package com.flaptor.indextank.query;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.util.Version;

import com.google.common.base.Preconditions;
import com.google.common.collect.AbstractIterator;

public class IndexEngineParser {
    private final Analyzer analyzer;
    private final String defaultField;

    public IndexEngineParser(String defaultField, Analyzer analyzer) {
        this.defaultField = defaultField;
        this.analyzer = analyzer;
    }

    public IndexEngineParser(String defaultField) {
        this(defaultField, new IndexEngineAnalyzer());
    }

    @SuppressWarnings("deprecation")
    public QueryNode parseQuery(final String queryStr) throws ParseException {
        org.apache.lucene.queryParser.QueryParser qp = new org.apache.lucene.queryParser.QueryParser(
                Version.LUCENE_CURRENT, defaultField, getAnalyzer());
        qp.setDefaultOperator(org.apache.lucene.queryParser.QueryParser.Operator.AND);
        org.apache.lucene.search.Query luceneQuery;
        try {
            luceneQuery = qp.parse(queryStr);
        } catch (Exception e) {
            throw new ParseException("lucene failed parsing. " + e);
        }

        return internalParse(luceneQuery, queryStr);
    }

    /**
     * Returns a lucene Analyzer that behaves like the analyzer used
     * internally in this class.
     * Try not to use this method.
     */
    public Analyzer getAnalyzer() {
        return analyzer;
    }

    public Iterator<AToken> parseDocumentField(String fieldName, String content) {
        final TokenStream tkstream = analyzer.tokenStream(fieldName, new StringReader(content));
        final TermAttribute termAtt = tkstream.addAttribute(TermAttribute.class);
        final PositionIncrementAttribute posIncrAttribute = tkstream.addAttribute(PositionIncrementAttribute.class);
        final OffsetAttribute offsetAtt = tkstream.addAttribute(OffsetAttribute.class);

        return new AbstractIterator<AToken>() {
            int currentPosition = 0;

            @Override
            protected AToken computeNext() {
                try {
                    if (!tkstream.incrementToken()) {
                        tkstream.end();
                        tkstream.close();
                        return endOfData();
                    }
                } catch (IOException e) {
                    //This should never happen, as the reader is a StringReader
                }
                //final org.apache.lucene.analysis.Token luceneTk = tkstream.getAttribute(org.apache.lucene.analysis.Token.class);
                currentPosition += posIncrAttribute.getPositionIncrement();
                final int position = currentPosition;
                final int startOffset = offsetAtt.startOffset();
                final int endOffset = offsetAtt.endOffset();
                final String text = termAtt.term();
                return new AToken() {
                    @Override
                    public String getText() {
                        return text; //luceneTk.term();
                    }

                    @Override
                    public int getPosition() {
                        return position; //luceneTk.getPositionIncrement();
                    }

                    @Override
                    public int getStartOffset() {
                        return startOffset;
                    }

                    @Override
                    public int getEndOffset() {
                        return endOffset;
                    }
                };
            }
        };

    }

    private QueryNode internalParse(org.apache.lucene.search.Query luceneQuery, final String originalStr)
            throws ParseException {
        QueryNode node;
        if (luceneQuery instanceof org.apache.lucene.search.TermQuery) {
            Term t = ((org.apache.lucene.search.TermQuery) luceneQuery).getTerm();
            String field = t.field();
            String text = t.text();
            node = new TermQuery(field, text);
        } else if (luceneQuery instanceof org.apache.lucene.search.PrefixQuery) {
            Term t = ((org.apache.lucene.search.PrefixQuery) luceneQuery).getPrefix();
            String field = t.field();
            String text = t.text();
            node = new PrefixTermQuery(field, text);
        } else if (luceneQuery instanceof org.apache.lucene.search.BooleanQuery) {
            List<BooleanClause> clauses = ((org.apache.lucene.search.BooleanQuery) luceneQuery).clauses();
            if (clauses.isEmpty()) {
                throw new ParseException("error parsing: " + originalStr);
            }
            node = internalParseBooleanQuery(clauses, originalStr);
        } else if (luceneQuery instanceof org.apache.lucene.search.PhraseQuery) {
            org.apache.lucene.search.PhraseQuery phraseQuery = (org.apache.lucene.search.PhraseQuery) luceneQuery;
            int[] positions = phraseQuery.getPositions();
            node = internalParsePhraseQuery(phraseQuery.getTerms(), positions, originalStr);
        } else {
            throw new ParseException("unimplemented");
        }
        node.setBoost(luceneQuery.getBoost());
        return node;
    }

    private QueryNode internalParsePhraseQuery(Term[] terms, int[] positions, final String originalStr) {
        Preconditions.checkArgument(terms.length > 0, "too few terms to build a phrase query");
        String[] strs = new String[terms.length];
        for (int i = 0; i < terms.length; i++) {
            strs[i] = terms[i].text();
        }
        return new SimplePhraseQuery(terms[0].field(), strs, positions);
    }

    private QueryNode internalParseBooleanQuery(List<BooleanClause> list, final String originalStr)
            throws ParseException {
        Preconditions.checkArgument(list.size() > 0, "too few terms to build a boolean query");
        List<BooleanClause> positiveClauses = new ArrayList<BooleanClause>();
        List<BooleanClause> negativeClauses = new ArrayList<BooleanClause>();
        for (BooleanClause clause : list) {
            if (clause.isProhibited()) {
                negativeClauses.add(clause);
            } else {
                positiveClauses.add(clause);
            }
        }
        if (positiveClauses.isEmpty()) {
            throw new ParseException("No positive clauses.");
        }
        QueryNode retVal = internalParsePositive(positiveClauses, originalStr);

        for (BooleanClause clause : negativeClauses) {
            retVal = new DifferenceQuery(retVal, internalParse(clause.getQuery(), null));
        }
        return retVal;
    }

    private QueryNode internalParsePositive(List<BooleanClause> list, final String originalStr)
            throws ParseException {
        Preconditions.checkArgument(list.size() > 0, "too few terms to build a boolean query");
        QueryNode firstQuery = internalParse(list.get(0).getQuery(), null);
        if (1 == list.size()) {
            return firstQuery;
        }
        if (list.get(1).isRequired()) {
            return new AndQuery(firstQuery, internalParseBooleanQuery(list.subList(1, list.size()), null));
        } else if (list.get(1).isProhibited()) {
            return new DifferenceQuery(firstQuery, internalParseBooleanQuery(list.subList(1, list.size()), null));
        } else {
            return new OrQuery(firstQuery, internalParseBooleanQuery(list.subList(1, list.size()), null));
        }
    }

    public static void main(String[] args) throws Exception {
        System.out.println("Parsing \"" + args[0] + "\" ...");
        IndexEngineParser parser = new IndexEngineParser("text");
        QueryNode query = parser.parseQuery(args[0]);
        System.out.println(query);
    }
}