List of usage examples for org.apache.lucene.analysis.miscellaneous KeywordRepeatFilter KeywordRepeatFilter
public KeywordRepeatFilter(TokenStream input)
From source file:de.walware.statet.r.internal.core.rhelp.index.DefaultAnalyzer.java
License:Open Source License
@Override protected TokenStreamComponents createComponents(final String fieldName, Reader reader) { if (this.charFilterFactory != null) { reader = this.charFilterFactory.create(reader); }/*w ww . j ava 2 s . co m*/ final Tokenizer source = new StandardTokenizer(reader); TokenStream result = source; result = new EnglishPossessiveFilter(getVersion(), result); result = new LowerCaseFilter(result); result = new StopFilter(result, this.stopwords); result = new KeywordRepeatFilter(result); result = new SnowballFilter(result, new EnglishStemmer()); result = new RemoveDuplicatesTokenFilter(result); return new TokenStreamComponents(source, result); }
From source file:org.meresco.lucene.analysis.MerescoDutchStemmingAnalyzer.java
License:Open Source License
@Override public TokenStream post_analyzer(String fieldName, TokenStream tok) { if (stemmingFields != null && stemmingFields.indexOf(fieldName) == -1) return tok; tok = new KeywordRepeatFilter(tok); // repeat every word as term and as keyword tok = new SnowballFilter(tok, new DutchStemmer()); // ignores keywords tok = new RemoveDuplicatesTokenFilter(tok); // removes one if keyword and term are still the same return tok;/*from www . j a va 2 s . c o m*/ }
From source file:uk.co.flax.luwak.presearcher.WildcardNGramPresearcher.java
License:Apache License
@Override protected TokenStream filterInputDocumentTokens(String field, TokenStream ts) throws IOException { TokenStream duped = new KeywordRepeatFilter(ts); TokenStream ngrammed = new SuffixingNGramTokenFilter(duped, ngramSuffix, extractor.getAnyToken(), maxTokenSize);// w ww . j a v a2 s. co m return new DuplicateRemovalTokenFilter(ngrammed); }