org.sindice.siren.qparser.ntriple.query.processors.ResourceAnalyzerQueryNodeProcessor.java Source code

Introduction

Here is the source code for org.sindice.siren.qparser.ntriple.query.processors.ResourceAnalyzerQueryNodeProcessor.java
Source

/**
 * Copyright 2010, Renaud Delbru
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
/**
 * @project siren
 * @author Renaud Delbru [ 21 Jan 2011 ]
 * @link http://renaud.delbru.fr/
 * @copyright Copyright (C) 2010 by Renaud Delbru, All rights reserved.
 */
package org.sindice.siren.qparser.ntriple.query.processors;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.messages.MessageImpl;
import org.apache.lucene.queryParser.core.QueryNodeException;
import org.apache.lucene.queryParser.core.config.QueryConfigHandler;
import org.apache.lucene.queryParser.core.nodes.FieldQueryNode;
import org.apache.lucene.queryParser.core.nodes.FuzzyQueryNode;
import org.apache.lucene.queryParser.core.nodes.GroupQueryNode;
import org.apache.lucene.queryParser.core.nodes.ModifierQueryNode;
import org.apache.lucene.queryParser.core.nodes.ModifierQueryNode.Modifier;
import org.apache.lucene.queryParser.core.nodes.NoTokenFoundQueryNode;
import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode;
import org.apache.lucene.queryParser.core.nodes.QueryNode;
import org.apache.lucene.queryParser.core.nodes.QuotedFieldQueryNode;
import org.apache.lucene.queryParser.core.nodes.TextableQueryNode;
import org.apache.lucene.queryParser.core.nodes.TokenizedPhraseQueryNode;
import org.apache.lucene.queryParser.core.processors.QueryNodeProcessorImpl;
import org.apache.lucene.queryParser.standard.config.AnalyzerAttribute;
import org.apache.lucene.queryParser.standard.config.PositionIncrementsAttribute;
import org.apache.lucene.queryParser.standard.nodes.MultiPhraseQueryNode;
import org.apache.lucene.queryParser.standard.nodes.StandardBooleanQueryNode;
import org.apache.lucene.queryParser.standard.nodes.WildcardQueryNode;

/**
 * This processor verifies if the attribute {@link ResourceAnalyzerQueryNodeProcessor}
 * is defined in the {@link QueryConfigHandler}. If it is and the analyzer is
 * not <code>null</code>, it looks for every {@link FieldQueryNode} that is not
 * {@link WildcardQueryNode}, {@link FuzzyQueryNode} or
 * {@link ParametricQueryNode} contained in the query node tree, then it applies
 * the analyzer to that {@link FieldQueryNode} object. <br/>
 * <br/>
 * If the analyzer return only one term, the returned term is set to the
 * {@link FieldQueryNode} and it's returned. <br/>
 * <br/>
 * If the analyzer return more than one term, a {@link TokenizedPhraseQueryNode}
 * or {@link MultiPhraseQueryNode} is created, whether there is one or more
 * terms at the same position, and it's returned. <br/>
 * <br/>
 * If no term is returned by the analyzer a {@link NoTokenFoundQueryNode} object
 * is returned. <br/>
 *
 * @see Analyzer
 * @see TokenStream
 */
public class ResourceAnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {

    private Analyzer analyzer;

    private boolean positionIncrementsEnabled;

    public ResourceAnalyzerQueryNodeProcessor() {
        // empty constructor
    }

    @Override
    public QueryNode process(final QueryNode queryTree) throws QueryNodeException {

        if (this.getQueryConfigHandler().hasAttribute(AnalyzerAttribute.class)) {

            this.analyzer = this.getQueryConfigHandler().getAttribute(AnalyzerAttribute.class).getAnalyzer();

            this.positionIncrementsEnabled = false;

            if (this.getQueryConfigHandler().hasAttribute(PositionIncrementsAttribute.class)) {

                if (this.getQueryConfigHandler().getAttribute(PositionIncrementsAttribute.class)
                        .isPositionIncrementsEnabled()) {

                    this.positionIncrementsEnabled = true;

                }

            }

            if (this.analyzer != null) {
                return super.process(queryTree);
            }

        }

        return queryTree;

    }

    @Override
    protected QueryNode postProcessNode(final QueryNode node) throws QueryNodeException {

        if (node instanceof TextableQueryNode && !(node instanceof WildcardQueryNode)
                && !(node instanceof FuzzyQueryNode) && !(node instanceof ParametricQueryNode)) {

            final FieldQueryNode fieldNode = ((FieldQueryNode) node);
            final String text = fieldNode.getTextAsString();
            final String field = fieldNode.getFieldAsString();

            final TokenStream source = this.analyzer.tokenStream(field, new StringReader(text));
            final CachingTokenFilter buffer = new CachingTokenFilter(source);

            PositionIncrementAttribute posIncrAtt = null;
            int numTokens = 0;
            int positionCount = 0;
            boolean severalTokensAtSamePosition = false;

            if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
                posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
            }

            try {

                while (buffer.incrementToken()) {
                    numTokens++;
                    final int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1;
                    if (positionIncrement != 0) {
                        positionCount += positionIncrement;

                    } else {
                        severalTokensAtSamePosition = true;
                    }

                }

            } catch (final IOException e) {
                // ignore
            }

            try {
                // rewind the buffer stream
                buffer.reset();

                // close original stream - all tokens buffered
                source.close();
            } catch (final IOException e) {
                // ignore
            }

            if (!buffer.hasAttribute(CharTermAttribute.class)) {
                return new NoTokenFoundQueryNode();
            }

            final CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class);

            if (numTokens == 0) {
                return new NoTokenFoundQueryNode();

            } else if (numTokens == 1) {
                String term = null;
                try {
                    boolean hasNext;
                    hasNext = buffer.incrementToken();
                    assert hasNext == true;
                    term = termAtt.toString();

                } catch (final IOException e) {
                    // safe to ignore, because we know the number of tokens
                }

                fieldNode.setText(term);

                return fieldNode;

            } else if (severalTokensAtSamePosition || !(node instanceof QuotedFieldQueryNode)) {
                if (positionCount == 1 || !(node instanceof QuotedFieldQueryNode)) {
                    // no phrase query:
                    final LinkedList<QueryNode> children = new LinkedList<QueryNode>();

                    for (int i = 0; i < numTokens; i++) {
                        String term = null;
                        try {
                            final boolean hasNext = buffer.incrementToken();
                            assert hasNext == true;
                            term = termAtt.toString();

                        } catch (final IOException e) {
                            // safe to ignore, because we know the number of tokens
                        }

                        children.add(new FieldQueryNode(field, term, -1, -1));

                    }

                    // If multiple terms at one single position, this must be a query
                    // expansion. Perform a OR between the terms.
                    if (positionCount == 1) {
                        final LinkedList<QueryNode> modified = new LinkedList<QueryNode>();
                        for (final QueryNode child : children) {
                            modified.add(new ModifierQueryNode(child, Modifier.MOD_NONE));
                        }
                        return new GroupQueryNode(new StandardBooleanQueryNode(modified, true));
                    }
                    // Multiple terms over more than one position.
                    // Not able to support such a case. Usually, it is the result of a bad
                    // use of filters at query time. Better to throw an exception.
                    else {
                        throw new QueryNodeException(new MessageImpl("Multiple terms over more than one position"));
                    }

                } else {
                    // phrase query:
                    final MultiPhraseQueryNode mpq = new MultiPhraseQueryNode();

                    final List<FieldQueryNode> multiTerms = new ArrayList<FieldQueryNode>();
                    int position = -1;
                    int i = 0;
                    int termGroupCount = 0;

                    for (; i < numTokens; i++) {
                        String term = null;
                        int positionIncrement = 1;
                        try {
                            final boolean hasNext = buffer.incrementToken();
                            assert hasNext == true;
                            term = termAtt.toString();
                            if (posIncrAtt != null) {
                                positionIncrement = posIncrAtt.getPositionIncrement();
                            }

                        } catch (final IOException e) {
                            // safe to ignore, because we know the number of tokens
                        }

                        if (positionIncrement > 0 && multiTerms.size() > 0) {

                            for (final FieldQueryNode termNode : multiTerms) {

                                if (this.positionIncrementsEnabled) {
                                    termNode.setPositionIncrement(position);
                                } else {
                                    termNode.setPositionIncrement(termGroupCount);
                                }

                                mpq.add(termNode);
                            }

                            // Only increment once for each "group" of
                            // terms that were in the same position:
                            termGroupCount++;

                            multiTerms.clear();

                        }

                        position += positionIncrement;
                        multiTerms.add(new FieldQueryNode(field, term, -1, -1));

                    }

                    for (final FieldQueryNode termNode : multiTerms) {

                        if (this.positionIncrementsEnabled) {
                            termNode.setPositionIncrement(position);

                        } else {
                            termNode.setPositionIncrement(termGroupCount);
                        }

                        mpq.add(termNode);

                    }

                    return mpq;

                }

            } else {

                final TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode();

                int position = -1;

                for (int i = 0; i < numTokens; i++) {
                    String term = null;
                    int positionIncrement = 1;

                    try {
                        final boolean hasNext = buffer.incrementToken();
                        assert hasNext == true;
                        term = termAtt.toString();

                        if (posIncrAtt != null) {
                            positionIncrement = posIncrAtt.getPositionIncrement();
                        }

                    } catch (final IOException e) {
                        // safe to ignore, because we know the number of tokens
                    }

                    final FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1);

                    if (this.positionIncrementsEnabled) {
                        position += positionIncrement;
                        newFieldNode.setPositionIncrement(position);

                    } else {
                        newFieldNode.setPositionIncrement(i);
                    }

                    pq.add(newFieldNode);

                }

                return pq;

            }

        }

        return node;

    }

    @Override
    protected QueryNode preProcessNode(final QueryNode node) throws QueryNodeException {

        return node;

    }

    @Override
    protected List<QueryNode> setChildrenOrder(final List<QueryNode> children) throws QueryNodeException {

        return children;

    }

}