lux.index.analysis.QNameTokenFilter.java Source code

Java tutorial

Introduction

Here is the source code for lux.index.analysis.QNameTokenFilter.java

Source

package lux.index.analysis;

import java.io.IOException;
import java.util.Map;

import lux.index.attribute.QNameAttribute;
import lux.xml.QName;

import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.util.CharsRef;

/**
 * Expand the input term by adding additional terms at the same position, prefixed by the node names (QNames)
 * found in the QNameAttribute.  The node name is serialized in reverse-Clark format: localname{namespace-uri}
 * if processing is namespace-aware.  Otherwise the node name is serialized as a lexical QName: prefix:localname
 * without regard to any namespace uri binding.
 * TODO: remove the unused namespace-unaware processing, or put it in another class?
 */
final public class QNameTokenFilter extends TokenFilter {

    private final QNameAttribute qnameAtt = addAttribute(QNameAttribute.class);
    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
    private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
    private final ElementVisibility defVis;
    private final Map<String, ElementVisibility> elVis;
    private boolean namespaceAware;
    private CharsRef term;

    protected QNameTokenFilter(TokenStream input) {
        this(input, ElementVisibility.OPAQUE, null);
    }

    protected QNameTokenFilter(TokenStream input, ElementVisibility defVis, Map<String, ElementVisibility> elVis) {
        super(input);
        term = new CharsRef();
        setNamespaceAware(true);
        this.defVis = defVis;
        this.elVis = elVis;
    }

    public final void reset(TokenStream inputAgain) {
        assert (input.getAttribute(CharTermAttribute.class) == inputAgain.getAttribute(CharTermAttribute.class));
    }

    @Override
    public boolean incrementToken() throws IOException {
        if ((!qnameAtt.hasNext()) || qnameAtt.onFirst()) {
            if (!input.incrementToken()) {
                return false;
            }
            // make a copy of the current term so we can prefix it below
            term.copyChars(termAtt.buffer(), 0, termAtt.length());
        } else {
            // set posIncr = 0 if this is not the first token emitted for this term
            posAtt.setPositionIncrement(0);
        }
        // emit <qname>:<term>
        QName qname = qnameAtt.next();
        termAtt.setEmpty();
        if (namespaceAware) {
            termAtt.append(qname.getEncodedName());
        } else {
            if (qname.getPrefix().length() > 0) {
                termAtt.append(qname.getPrefix()).append(':');
            }
            termAtt.append(qname.getLocalPart());
        }
        termAtt.append(':');
        termAtt.append(term);
        return true;
    }

    /**
     * @return if true, indexed QNames include the namespace URI; otherwise they include the prefix.
     */
    public boolean isNamespaceAware() {
        return namespaceAware;
    }

    public void setNamespaceAware(boolean namespaceAware) {
        this.namespaceAware = namespaceAware;
    }

    public ElementVisibility getDefaultVisibility() {
        return defVis;
    }

    public Map<String, ElementVisibility> getElementVisibility() {
        return elVis;
    }

    public TokenStream getInput() {
        return input;
    }

}

/*
 * This Source Code Form is subject to the terms of the Mozilla Public License,
 * v. 2.0. If a copy of the MPL was not distributed with this file, You can
 * obtain one at http://mozilla.org/MPL/2.0/.
 */