Example usage for org.apache.lucene.analysis FilteringTokenFilter subclass-usage

List of usage examples for org.apache.lucene.analysis FilteringTokenFilter subclass-usage

Introduction

In this page you can find the example usage for org.apache.lucene.analysis FilteringTokenFilter subclass-usage.

Usage

From source file com.github.le11.nls.lucene.TypeAwareStopFilter.java

/**
 * Removes stop words from a token stream if they don't have specific types.
 * <p/>
 * <a name="version"/>
 * <p>You must specify the required {@link org.apache.lucene.util.Version}
 * compatibility when creating StopFilter:

From source file com.luc.filters.EmailFilter.java

public final class EmailFilter extends FilteringTokenFilter {

    private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);

    private final Pattern pattern = Pattern
            .compile("^[_A-Za-z0-9-]+(\\.[_A-Za-z0-9-]+)*@[A-Za-z0-9]+(\\.[A-Za-z0-9]+)*(\\.[A-Za-z]{2,})$");

From source file com.luc.filters.PhoneNumFilter.java

public final class PhoneNumFilter extends FilteringTokenFilter {

    private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);

    private final Pattern pattern = Pattern.compile("^\\(?(\\d{3})\\)?[- ]?(\\d{3})[- ]?(\\d{4})$");

From source file com.luc.filters.SSNFilter.java

public final class SSNFilter extends FilteringTokenFilter {

    private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);

    private final Pattern pattern = Pattern.compile("^\\d{3}[- ]?\\d{2}[- ]?\\d{4}$");

From source file com.shaie.annots.filter.AnnotatorTokenFilter.java

/** A {@link FilteringTokenFilter} which uses an {@link Annotator} to {@link #accept()} tokens. */
public final class AnnotatorTokenFilter extends FilteringTokenFilter {

    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
    private final Annotator annotator;

From source file com.shaie.annots.filter.SimplePreAnnotatedTokenFilter.java

/**
 * A {@link TokenFilter} which keeps only tokens with positions that are covered by a given array of annotation markers
 * and lengths. For example, if you process the text "quick brown fox and a red dog", and you give it the array
 * <code>[0,3,5,2]</code> (two annotations, {@code pos=0,len=3} and {@code pos=5,len=2}), then it will keep only the
 * tokens: "quick", "brown", "fox", "red", "dog".
 */

From source file crawler.util.analyzer.TwitterFilter.java

public class TwitterFilter extends FilteringTokenFilter {
    //private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
    private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);

    public TwitterFilter(boolean enablePositionIncrements, TokenStream input) {
        super(enablePositionIncrements, input);

From source file org.apache.pylucene.analysis.PythonFilteringTokenFilter.java

public class PythonFilteringTokenFilter extends FilteringTokenFilter {
    private long pythonObject;

    public PythonFilteringTokenFilter(TokenStream tokenStream) {
        super(tokenStream);
    }

From source file org.apache.solr.analysis.KeepWordFilter.java

/**
 * A TokenFilter that only keeps tokens with text contained in the
 * required words.  This filter behaves like the inverse of StopFilter.
 * 
 * @version $Id: KeepWordFilter.java 1065324 2011-01-30 17:20:39Z uschindler $
 * @since solr 1.3

From source file org.elasticsearch.index.analysis.Kuromoji2JapaneseKeepPartOfSpeechFilter.java

/**
 * Removes tokens that match a set of part-of-speech tags.
 */
public final class Kuromoji2JapaneseKeepPartOfSpeechFilter extends FilteringTokenFilter {
    private final Set<String> keepTags;
    private final PartOfSpeechAttribute posAtt = addAttribute(PartOfSpeechAttribute.class);