List of usage examples for org.apache.lucene.analysis TokenStream subclass-usage
From source file com.twitter.common.text.token.TwitterTokenStream.java
/**
* Abstraction to enumerate a sequence of tokens. This class represents the central abstraction in
* Twitter's text processing library, and is similar to Lucene's TokenStream, with the following
* exceptions:
*
* <ul>
From source file com.zimbra.cs.index.analysis.FieldTokenStream.java
/**
* {@link TokenStream} for structured-data field.
* <p>
* {@code name:Val1 val2 val3} gets tokenized to {@code name:val1}, {@code name:val2}, {@code name:val3}. If the field
* only consists of a single integer value, it produces an extra token of which name is appended by '#' to distinguish
* from text search and the integer value gets encoded by Lucene's {@link NumericUtils}, so that it is also searchable
From source file com.zimbra.cs.index.analysis.MimeTypeTokenStream.java
/** * {@code image/jpeg} becomes {@code image/jpeg} and {@code image} * * @author ysasaki */ public final class MimeTypeTokenStream extends TokenStream {
From source file com.zimbra.cs.index.analysis.RFC822AddressTokenStream.java
/**
* RFC822 address tokenizer.
* <p>
* For example:
* {@literal "Zimbra Japan" <support@zimbra.vmware.co.jp>} is tokenized as:
* <ul>
From source file de.catma.indexer.PeekableTokenStream.java
public class PeekableTokenStream extends TokenStream { private TokenStream tokenStream; private CharTermAttribute termAttrBuffer; private OffsetAttribute offsetAttrBuffer;
From source file edu.mit.ll.vizlinc.highlight.TokenStreamFromTermPositionVector.java
public final class TokenStreamFromTermPositionVector extends TokenStream { private final List<Token> positionedTokens = new ArrayList<Token>(); private Iterator<Token> tokensAtCurrentPosition;
From source file lucene.security.analysis.DocumentVisibilityTokenStream.java
public class DocumentVisibilityTokenStream extends TokenStream { private static final String UTF_8 = "UTF-8"; private final String _visiblity; private final CharTermAttribute _tokenAtt;
From source file lux.index.analysis.EmptyTokenStream.java
final class EmptyTokenStream extends TokenStream { public EmptyTokenStream(TokenStream source) { super(source); }
From source file lux.index.analysis.ValueTokenStream.java
public abstract class ValueTokenStream extends TokenStream { protected final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); protected int pos = 0; protected char[] value; private Iterable<char[]> values;
From source file lux.index.analysis.XmlTokenStreamBase.java
/**
* <p>
* This is the root of a set of xml-aware TokenStream classes that work by selecting text
* a node at a time from an XML document, and then
* passing that text to the wrapped TokenStream. The wrapped TokenStream is re-used for each text node.
* The outermost link in the chain will be a TokenFilter that applies a sequence of structure-related