List of usage examples for org.apache.lucene.analysis FilteringTokenFilter subclass-usage
From source file com.github.le11.nls.lucene.TypeAwareStopFilter.java
/**
* Removes stop words from a token stream if they don't have specific types.
* <p/>
* <a name="version"/>
* <p>You must specify the required {@link org.apache.lucene.util.Version}
* compatibility when creating StopFilter:
From source file com.luc.filters.EmailFilter.java
public final class EmailFilter extends FilteringTokenFilter { private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); private final Pattern pattern = Pattern .compile("^[_A-Za-z0-9-]+(\\.[_A-Za-z0-9-]+)*@[A-Za-z0-9]+(\\.[A-Za-z0-9]+)*(\\.[A-Za-z]{2,})$");
From source file com.luc.filters.PhoneNumFilter.java
public final class PhoneNumFilter extends FilteringTokenFilter { private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); private final Pattern pattern = Pattern.compile("^\\(?(\\d{3})\\)?[- ]?(\\d{3})[- ]?(\\d{4})$");
From source file com.luc.filters.SSNFilter.java
public final class SSNFilter extends FilteringTokenFilter { private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); private final Pattern pattern = Pattern.compile("^\\d{3}[- ]?\\d{2}[- ]?\\d{4}$");
From source file com.shaie.annots.filter.AnnotatorTokenFilter.java
/** A {@link FilteringTokenFilter} which uses an {@link Annotator} to {@link #accept()} tokens. */ public final class AnnotatorTokenFilter extends FilteringTokenFilter { private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final Annotator annotator;
From source file com.shaie.annots.filter.SimplePreAnnotatedTokenFilter.java
/**
* A {@link TokenFilter} which keeps only tokens with positions that are covered by a given array of annotation markers
* and lengths. For example, if you process the text "quick brown fox and a red dog", and you give it the array
* <code>[0,3,5,2]</code> (two annotations, {@code pos=0,len=3} and {@code pos=5,len=2}), then it will keep only the
* tokens: "quick", "brown", "fox", "red", "dog".
*/
From source file crawler.util.analyzer.TwitterFilter.java
public class TwitterFilter extends FilteringTokenFilter { //private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); public TwitterFilter(boolean enablePositionIncrements, TokenStream input) { super(enablePositionIncrements, input);
From source file org.apache.pylucene.analysis.PythonFilteringTokenFilter.java
public class PythonFilteringTokenFilter extends FilteringTokenFilter { private long pythonObject; public PythonFilteringTokenFilter(TokenStream tokenStream) { super(tokenStream); }
From source file org.apache.solr.analysis.KeepWordFilter.java
/**
* A TokenFilter that only keeps tokens with text contained in the
* required words. This filter behaves like the inverse of StopFilter.
*
* @version $Id: KeepWordFilter.java 1065324 2011-01-30 17:20:39Z uschindler $
* @since solr 1.3
From source file org.elasticsearch.index.analysis.Kuromoji2JapaneseKeepPartOfSpeechFilter.java
/** * Removes tokens that match a set of part-of-speech tags. */ public final class Kuromoji2JapaneseKeepPartOfSpeechFilter extends FilteringTokenFilter { private final Set<String> keepTags; private final PartOfSpeechAttribute posAtt = addAttribute(PartOfSpeechAttribute.class);