Example usage for org.apache.lucene.analysis TokenStream subclass-usage

List of usage examples for org.apache.lucene.analysis TokenStream subclass-usage

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream subclass-usage.

Usage

From source file com.twitter.common.text.token.TwitterTokenStream.java

/**
 * Abstraction to enumerate a sequence of tokens. This class represents the central abstraction in
 * Twitter's text processing library, and is similar to Lucene's TokenStream, with the following
 * exceptions:
 *
 * <ul>

From source file com.zimbra.cs.index.analysis.FieldTokenStream.java

/**
 * {@link TokenStream} for structured-data field.
 * <p>
 * {@code name:Val1 val2 val3} gets tokenized to {@code name:val1}, {@code name:val2}, {@code name:val3}. If the field
 * only consists of a single integer value, it produces an extra token of which name is appended by '#' to distinguish
 * from text search and the integer value gets encoded by Lucene's {@link NumericUtils}, so that it is also searchable

From source file com.zimbra.cs.index.analysis.MimeTypeTokenStream.java

/**
 * {@code image/jpeg} becomes {@code image/jpeg} and {@code image}
 *
 * @author ysasaki
 */
public final class MimeTypeTokenStream extends TokenStream {

From source file com.zimbra.cs.index.analysis.RFC822AddressTokenStream.java

/**
 * RFC822 address tokenizer.
 * <p>
 * For example:
 * {@literal "Zimbra Japan" <support@zimbra.vmware.co.jp>} is tokenized as:
 * <ul>

From source file de.catma.indexer.PeekableTokenStream.java

public class PeekableTokenStream extends TokenStream {

    private TokenStream tokenStream;

    private CharTermAttribute termAttrBuffer;
    private OffsetAttribute offsetAttrBuffer;

From source file edu.mit.ll.vizlinc.highlight.TokenStreamFromTermPositionVector.java

public final class TokenStreamFromTermPositionVector extends TokenStream {

    private final List<Token> positionedTokens = new ArrayList<Token>();

    private Iterator<Token> tokensAtCurrentPosition;

From source file lucene.security.analysis.DocumentVisibilityTokenStream.java

public class DocumentVisibilityTokenStream extends TokenStream {

    private static final String UTF_8 = "UTF-8";

    private final String _visiblity;
    private final CharTermAttribute _tokenAtt;

From source file lux.index.analysis.EmptyTokenStream.java

final class EmptyTokenStream extends TokenStream {

    public EmptyTokenStream(TokenStream source) {
        super(source);
    }

From source file lux.index.analysis.ValueTokenStream.java

public abstract class ValueTokenStream extends TokenStream {

    protected final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
    protected int pos = 0;
    protected char[] value;
    private Iterable<char[]> values;

From source file lux.index.analysis.XmlTokenStreamBase.java

/**
 * <p>
 * This is the root of a set of xml-aware TokenStream classes that work by selecting text
 * a node at a time from an XML document, and then 
 * passing that text to the wrapped TokenStream.  The wrapped TokenStream is re-used for each text node.
 * The outermost link in the chain will be a TokenFilter that applies a sequence of structure-related