List of usage examples for org.apache.lucene.analysis.de GermanStemFilter GermanStemFilter
public GermanStemFilter(TokenStream in)
From source file:ie.cmrc.smtx.lucene.analysis.EuropeanAnalyzer.java
License:Apache License
/** * Returns an aggressive stemming filter suitable for the provided language * @param language Two-letter code of a language * @param input {@code org.apache.lucene.analysis.TokenStream} input to * filter//w w w . j a va 2 s .c o m * @return {@code org.apache.lucene.analysis.TokenStream} that filters the * provided {@code input} */ protected TokenStream getStemFilter(String language, TokenStream input) { String lang = language; if (lang != null) lang = lang.trim().toLowerCase(); if (SUPPORTED_LANGUAGES.contains(lang)) { if (lang.equals(LANG_EN)) { return new PorterStemFilter(input); } else if (lang.equals(LANG_FR)) { return new SnowballFilter(input, new FrenchStemmer()); } else if (lang.equals(LANG_ES)) { return new SnowballFilter(input, new SpanishStemmer()); } else if (lang.equals(LANG_PT)) { return new PortugueseStemFilter(input); } else if (lang.equals(LANG_IT)) { return new SnowballFilter(input, new ItalianStemmer()); } else if (lang.equals(LANG_DE)) { return new GermanStemFilter(input); } else if (lang.equals(LANG_NO)) { return new SnowballFilter(input, new NorwegianStemmer()); } } return input; }
From source file:org.apache.solr.analysis.GermanStemFilterFactory.java
License:Apache License
public GermanStemFilter create(TokenStream in) { return new GermanStemFilter(in); }
From source file:org.crosswire.jsword.index.lucene.analysis.GermanLuceneAnalyzer.java
License:Open Source License
@Override public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = new LowerCaseTokenizer(reader); if (doStopWords && stopSet != null) { result = new StopFilter(false, result, stopSet); }/*from w w w.j a v a 2 s.c om*/ if (doStemming) { result = new GermanStemFilter(result); } return result; }
From source file:org.crosswire.jsword.index.lucene.analysis.GermanLuceneAnalyzer.java
License:Open Source License
@Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(new LowerCaseTokenizer(reader)); if (doStopWords && stopSet != null) { streams.setResult(new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), streams.getResult(), stopSet)); }//from w ww . j a v a 2 s.c o m if (doStemming) { streams.setResult(new GermanStemFilter(streams.getResult())); } setPreviousTokenStream(streams); } else { streams.getSource().reset(reader); } return streams.getResult(); }
From source file:org.elasticsearch.analysis.common.GermanStemTokenFilterFactory.java
License:Apache License
@Override public TokenStream create(TokenStream tokenStream) { return new GermanStemFilter(new SetKeywordMarkerFilter(tokenStream, exclusions)); }