List of usage examples for org.apache.lucene.analysis.core DecimalDigitFilter DecimalDigitFilter
public DecimalDigitFilter(TokenStream input)
From source file:com.github.msarhan.lucene.ArabicRootExtractorAnalyzer.java
License:Open Source License
/** * Creates/*ww w . j a v a2 s . c o m*/ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * used to tokenize all the text in the provided {@link Reader}. * * @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link LowerCaseFilter}, {@link DecimalDigitFilter}, {@link StopFilter}, * {@link ArabicRootExtractorStemFilter}, {@link SetKeywordMarkerFilter} */ @Override protected TokenStreamComponents createComponents(String fieldName) { final Tokenizer source; if (getVersion().onOrAfter(Version.LATEST)) { source = new StandardTokenizer(); } else { source = new StandardTokenizer40(); } TokenStream result = new LowerCaseFilter(source); if (getVersion().onOrAfter(Version.LUCENE_5_4_0)) { result = new DecimalDigitFilter(result); } // the order here is important: the stopword list is not normalized! result = new StopFilter(result, stopwords); result = new ArabicRootExtractorStemFilter(result); if (!stemExclusionSet.isEmpty()) { result = new SetKeywordMarkerFilter(result, stemExclusionSet); } return new TokenStreamComponents(source, result); }
From source file:org.elasticsearch.analysis.common.DecimalDigitFilterFactory.java
License:Apache License
@Override public TokenStream create(TokenStream tokenStream) { return new DecimalDigitFilter(tokenStream); }