List of usage examples for org.apache.lucene.analysis.fa PersianNormalizationFilter PersianNormalizationFilter
public PersianNormalizationFilter(TokenStream input)
From source file:org.apache.solr.analysis.PersianNormalizationFilterFactory.java
License:Apache License
public PersianNormalizationFilter create(TokenStream input) { return new PersianNormalizationFilter(input); }
From source file:org.crosswire.jsword.index.lucene.analysis.PersianLuceneAnalyzer.java
License:Open Source License
@Override public final TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = new ArabicLetterTokenizer(reader); result = new LowerCaseFilter(result); result = new ArabicNormalizationFilter(result); /* additional persian-specific normalization */ result = new PersianNormalizationFilter(result); /*// w w w .j a v a 2 s . c o m * the order here is important: the stop set is normalized with the * above! */ if (doStopWords && stopSet != null) { result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet); } return result; }
From source file:org.crosswire.jsword.index.lucene.analysis.PersianLuceneAnalyzer.java
License:Open Source License
/** * Returns a (possibly reused) {@link TokenStream} which tokenizes all the * text in the provided {@link Reader}./* w w w . j a v a 2 s . c om*/ * * @return A {@link TokenStream} built from a {@link ArabicLetterTokenizer} * filtered with {@link LowerCaseFilter}, * {@link ArabicNormalizationFilter}, * {@link PersianNormalizationFilter} and Persian Stop words */ @Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(new ArabicLetterTokenizer(reader)); streams.setResult(new LowerCaseFilter(streams.getResult())); streams.setResult(new ArabicNormalizationFilter(streams.getResult())); /* additional persian-specific normalization */ streams.setResult(new PersianNormalizationFilter(streams.getResult())); /* * the order here is important: the stop set is normalized with the * above! */ if (doStopWords && stopSet != null) { streams.setResult(new StopFilter(false, streams.getResult(), stopSet)); } setPreviousTokenStream(streams); } else { streams.getSource().reset(reader); } return streams.getResult(); }
From source file:org.elasticsearch.analysis.common.PersianNormalizationFilterFactory.java
License:Apache License
@Override public TokenStream create(TokenStream tokenStream) { return new PersianNormalizationFilter(tokenStream); }