List of usage examples for org.apache.lucene.analysis.ar ArabicNormalizationFilter ArabicNormalizationFilter
public ArabicNormalizationFilter(TokenStream input)
From source file:org.apache.solr.analysis.ArabicNormalizationFilterFactory.java
License:Apache License
public ArabicNormalizationFilter create(TokenStream input) { return new ArabicNormalizationFilter(input); }
From source file:org.crosswire.jsword.index.lucene.analysis.ArabicLuceneAnalyzer.java
License:Open Source License
@Override public final TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = new ArabicLetterTokenizer(reader); result = new LowerCaseFilter(result); result = new ArabicNormalizationFilter(result); if (doStopWords && stopSet != null) { result = new StopFilter(false, result, stopSet); }//from w w w.j av a 2s .c o m if (doStemming) { result = new ArabicStemFilter(result); } return result; }
From source file:org.crosswire.jsword.index.lucene.analysis.ArabicLuceneAnalyzer.java
License:Open Source License
@Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(new ArabicLetterTokenizer(reader)); streams.setResult(new LowerCaseFilter(streams.getResult())); streams.setResult(new ArabicNormalizationFilter(streams.getResult())); if (doStopWords && stopSet != null) { streams.setResult(new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), streams.getResult(), stopSet)); }// ww w .j a v a 2s .c o m if (doStemming) { streams.setResult(new ArabicStemFilter(streams.getResult())); } setPreviousTokenStream(streams); } else { streams.getSource().reset(reader); } return streams.getResult(); }
From source file:org.crosswire.jsword.index.lucene.analysis.PersianLuceneAnalyzer.java
License:Open Source License
@Override public final TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = new ArabicLetterTokenizer(reader); result = new LowerCaseFilter(result); result = new ArabicNormalizationFilter(result); /* additional persian-specific normalization */ result = new PersianNormalizationFilter(result); /*/*from w w w. ja v a 2 s . c om*/ * the order here is important: the stop set is normalized with the * above! */ if (doStopWords && stopSet != null) { result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet); } return result; }
From source file:org.crosswire.jsword.index.lucene.analysis.PersianLuceneAnalyzer.java
License:Open Source License
/** * Returns a (possibly reused) {@link TokenStream} which tokenizes all the * text in the provided {@link Reader}./*from w w w .j av a 2 s . com*/ * * @return A {@link TokenStream} built from a {@link ArabicLetterTokenizer} * filtered with {@link LowerCaseFilter}, * {@link ArabicNormalizationFilter}, * {@link PersianNormalizationFilter} and Persian Stop words */ @Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(new ArabicLetterTokenizer(reader)); streams.setResult(new LowerCaseFilter(streams.getResult())); streams.setResult(new ArabicNormalizationFilter(streams.getResult())); /* additional persian-specific normalization */ streams.setResult(new PersianNormalizationFilter(streams.getResult())); /* * the order here is important: the stop set is normalized with the * above! */ if (doStopWords && stopSet != null) { streams.setResult(new StopFilter(false, streams.getResult(), stopSet)); } setPreviousTokenStream(streams); } else { streams.getSource().reset(reader); } return streams.getResult(); }
From source file:org.elasticsearch.analysis.common.ArabicNormalizationFilterFactory.java
License:Apache License
@Override public TokenStream create(TokenStream tokenStream) { return new ArabicNormalizationFilter(tokenStream); }