Example usage for org.apache.lucene.analysis.miscellaneous KeywordRepeatFilter KeywordRepeatFilter

List of usage examples for org.apache.lucene.analysis.miscellaneous KeywordRepeatFilter KeywordRepeatFilter

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.miscellaneous KeywordRepeatFilter KeywordRepeatFilter.

Prototype

public KeywordRepeatFilter(TokenStream input) 

Source Link

Document

Construct a token stream filtering the given input.

Usage

From source file:de.walware.statet.r.internal.core.rhelp.index.DefaultAnalyzer.java

License:Open Source License

@Override
protected TokenStreamComponents createComponents(final String fieldName, Reader reader) {
    if (this.charFilterFactory != null) {
        reader = this.charFilterFactory.create(reader);
    }/*w  ww . j ava 2 s . co m*/
    final Tokenizer source = new StandardTokenizer(reader);
    TokenStream result = source;
    result = new EnglishPossessiveFilter(getVersion(), result);
    result = new LowerCaseFilter(result);
    result = new StopFilter(result, this.stopwords);
    result = new KeywordRepeatFilter(result);
    result = new SnowballFilter(result, new EnglishStemmer());
    result = new RemoveDuplicatesTokenFilter(result);
    return new TokenStreamComponents(source, result);
}

From source file:org.meresco.lucene.analysis.MerescoDutchStemmingAnalyzer.java

License:Open Source License

@Override
public TokenStream post_analyzer(String fieldName, TokenStream tok) {
    if (stemmingFields != null && stemmingFields.indexOf(fieldName) == -1)
        return tok;
    tok = new KeywordRepeatFilter(tok); // repeat every word as term and as keyword
    tok = new SnowballFilter(tok, new DutchStemmer()); // ignores keywords
    tok = new RemoveDuplicatesTokenFilter(tok); // removes one if keyword and term are still the same
    return tok;/*from www . j  a va 2  s  .  c o m*/
}

From source file:uk.co.flax.luwak.presearcher.WildcardNGramPresearcher.java

License:Apache License

@Override
protected TokenStream filterInputDocumentTokens(String field, TokenStream ts) throws IOException {
    TokenStream duped = new KeywordRepeatFilter(ts);
    TokenStream ngrammed = new SuffixingNGramTokenFilter(duped, ngramSuffix, extractor.getAnyToken(),
            maxTokenSize);// w  ww  . j a v  a2 s. co m
    return new DuplicateRemovalTokenFilter(ngrammed);
}