List of usage examples for org.apache.lucene.analysis StopFilter StopFilter
public StopFilter(TokenStream in, CharArraySet stopWords)
From source file:brazilianStemmer.BrazilianAnalyzer.java
License:Apache License
/** * Creates a TokenStream which tokenizes all the text in the provided * Reader./*w w w. jav a2 s.com*/ * * @return A TokenStream build from a StandardTokenizer filtered with * StandardFilter, StopFilter, GermanStemFilter and LowerCaseFilter. */ public final TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = new StandardTokenizer(reader); /** Convert to lowercase after stemming! */ result = new LowerCaseFilter(result); result = new StopFilter(result, englishStopWords); result = new BrazilianAccentsFilter(result); result = new StopFilter(result, stopWords); result = new BrazilianStemFilter(result, stopWords); return result; }
From source file:com.appeligo.lucene.PorterStemAnalyzer.java
License:Apache License
/** Filters LowerCaseTokenizer with StopFilter. */ public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = new StandardTokenizer(reader); result = new StandardFilter(result); result = new LowerCaseFilter(result); result = new StopFilter(result, stopWords); result = new PorterStemFilter(result); return result; }
From source file:com.bluexml.side.framework.alfresco.commons.lucene.MyAnalyzer.java
License:Open Source License
/** Filters LowerCaseTokenizer with StopFilter. */ public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream stopFilter = new StopFilter(new LowerCaseTokenizer(reader), stopWords); stopFilter = new ISOLatin1AccentFilter(stopFilter); return stopFilter; }
From source file:com.bluexml.side.framework.alfresco.commons.lucene.MyAnalyzer.java
License:Open Source License
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); streams.source = new LowerCaseTokenizer(reader); streams.result = new StopFilter(streams.source, stopWords); streams.result = new ISOLatin1AccentFilter(streams.result); setPreviousTokenStream(streams); } else/*from w w w . j ava2 s. c o m*/ streams.source.reset(reader); return streams.result; }
From source file:com.duroty.lucene.analysis.AnalyzerISOLatin1.java
License:Open Source License
/** * DOCUMENT ME!// w w w . j a v a 2s .c om * * @param fieldName DOCUMENT ME! * @param reader DOCUMENT ME! * * @return DOCUMENT ME! */ public final TokenStream tokenStream(String fieldName, Reader reader) { // The token stream that will be returned. TokenStream result; // Builds the chain... /*result = new StandardTokenizer(reader); result = new StandardFilter(result); result = new LowerCaseFilter(result);*/ result = new RdLowerCaseTokenizer(reader); if (stopTable != null) { result = new StopFilter(result, stopTable); } else { } result = new ISOLatin1AccentFilter(result); result = new SnowballFilter(result, "English"); result = new SnowballFilter(result, "Spanish"); //result = new SnowballFilter(result, "French"); //result = new SnowballFilter(result, "Italian"); return result; }
From source file:com.duroty.lucene.analysis.DefaultAnalyzer.java
License:Open Source License
/** * DOCUMENT ME!/*from w w w . j ava2 s . c o m*/ * * @param fieldName DOCUMENT ME! * @param reader DOCUMENT ME! * * @return DOCUMENT ME! */ public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = new StandardTokenizer(reader); result = new StandardFilter(result); result = new LowerCaseFilter(result); if (stopTable != null) { return new StopFilter(result, stopTable); } else { return result; } }
From source file:com.duroty.lucene.analysis.DictionaryAnalyzer.java
License:Apache License
/** * DOCUMENT ME!//from ww w. jav a2s . co m * * @param fieldName DOCUMENT ME! * @param reader DOCUMENT ME! * * @return DOCUMENT ME! */ public final TokenStream tokenStream(String fieldName, Reader reader) { // The token stream that will be returned. TokenStream result; // Builds the chain... /*result = new StandardTokenizer(reader); result = new StandardFilter(result); result = new LowerCaseFilter(result);*/ result = new RdLowerCaseTokenizer(reader); if (stopTable != null) { result = new StopFilter(result, stopTable); } else { } result = new ISOLatin1AccentFilter(result); return result; }
From source file:com.fdt.sdl.core.analyzer.phonetix.lucene.PhoneticAnalyzer.java
License:Open Source License
/** * Constructs a {@link StandardTokenizer} filtered by a {@link * StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter}, * and a {@link PhoneticFilter}.//from w w w . j av a 2 s .c om */ public TokenStream tokenStream(String fieldname, final Reader reader) { TokenStream result = new StandardTokenizer(reader); result = new StandardFilter(result); result = new LowerCaseFilter(result); result = new StopFilter(result, stopTable); result = new PhoneticFilter(result, encoder); return result; }
From source file:com.google.ie.common.search.analyzer.IdeaExchangeQueryAnalyzer.java
License:Apache License
@Override public TokenStream tokenStream(String fieldName, Reader reader) { StandardTokenizer tokenStream = new StandardTokenizer(reader, false); tokenStream.setMaxTokenLength(maxTokenLength); TokenStream result = new StandardFilter(tokenStream); result = new LowerCaseFilter(result); result = new StopFilter(result, stopSet); fieldName = DEFAULT_LANGUAGE;//from ww w .j ava2s. com result = new SnowballFilter(result, fieldName); return result; }
From source file:com.google.ie.common.search.analyzer.IdeaExchangeQueryAnalyzer.java
License:Apache License
@Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); setPreviousTokenStream(streams); streams.tokenStream = new StandardTokenizer(reader); streams.filteredTokenStream = new StandardFilter(streams.tokenStream); streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream); streams.filteredTokenStream = new StopFilter(streams.filteredTokenStream, stopSet); fieldName = DEFAULT_LANGUAGE;// w ww .j a v a 2 s. c om streams.filteredTokenStream = new SnowballFilter(streams.filteredTokenStream, fieldName); } else { streams.tokenStream.reset(reader); } streams.tokenStream.setMaxTokenLength(maxTokenLength); return streams.filteredTokenStream; }