List of usage examples for org.apache.lucene.analysis LowerCaseFilter LowerCaseFilter
public LowerCaseFilter(TokenStream in)
From source file:LogAnalyzer.java
License:Open Source License
public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = new LogFilter(new LowerCaseFilter(new WhitespaceTokenizer(reader))); return result; }
From source file:analysis.StandardAnalyzer.java
License:Apache License
/** Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */ @Override//w ww .j av a 2s .c om public TokenStream tokenStream(String fieldName, Reader reader) { StandardTokenizer tokenStream = new StandardTokenizer(matchVersion, reader); tokenStream.setMaxTokenLength(maxTokenLength); TokenStream result = new StandardFilter(tokenStream); result = new LowerCaseFilter(result); result = new StopFilter(enableStopPositionIncrements, result, stopSet); return result; }
From source file:analysis.StandardAnalyzer.java
License:Apache License
@Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { if (overridesTokenStreamMethod) { // LUCENE-1678: force fallback to tokenStream() if we // have been subclassed and that subclass overrides // tokenStream but not reusableTokenStream return tokenStream(fieldName, reader); }/*w ww .j a v a 2s . c om*/ SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); setPreviousTokenStream(streams); streams.tokenStream = new StandardTokenizer(matchVersion, reader); streams.filteredTokenStream = new StandardFilter(streams.tokenStream); streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream); streams.filteredTokenStream = new StopFilter(enableStopPositionIncrements, streams.filteredTokenStream, stopSet); } else { streams.tokenStream.reset(reader); } streams.tokenStream.setMaxTokenLength(maxTokenLength); streams.tokenStream.setReplaceInvalidAcronym(replaceInvalidAcronym); return streams.filteredTokenStream; }
From source file:analysis.SynonymAnalyzer.java
License:Apache License
public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = new SynonymFilter(new StopFilter(true, new LowerCaseFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_30, reader))), StopAnalyzer.ENGLISH_STOP_WORDS_SET), engine); return result; }
From source file:aos.lucene.analysis.stopanalyzer.StopAnalyzer2.java
License:Apache License
public TokenStream tokenStream(String fieldName, Reader reader) { return new StopFilter(true, new LowerCaseFilter(new LetterTokenizer(reader)), stopWords); }
From source file:aos.lucene.analysis.stopanalyzer.StopAnalyzerFlawed.java
License:Apache License
/** * Ordering mistake here//from w ww . j a v a 2s .co m */ public TokenStream tokenStream(String fieldName, Reader reader) { return new LowerCaseFilter(new StopFilter(true, new LetterTokenizer(reader), stopWords)); }
From source file:aos.lucene.analysis.synonym.SynonymAnalyzer.java
License:Apache License
public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = new SynonymFilter(new StopFilter(true, new LowerCaseFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_46, reader))), StopAnalyzer.ENGLISH_STOP_WORDS_SET), engine); return result; }
From source file:brazilianStemmer.BrazilianAnalyzer.java
License:Apache License
/** * Creates a TokenStream which tokenizes all the text in the provided * Reader.// w ww.j a v a2s .c om * * @return A TokenStream build from a StandardTokenizer filtered with * StandardFilter, StopFilter, GermanStemFilter and LowerCaseFilter. */ public final TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = new StandardTokenizer(reader); /** Convert to lowercase after stemming! */ result = new LowerCaseFilter(result); result = new StopFilter(result, englishStopWords); result = new BrazilianAccentsFilter(result); result = new StopFilter(result, stopWords); result = new BrazilianStemFilter(result, stopWords); return result; }
From source file:com.appeligo.lucene.PorterStemAnalyzer.java
License:Apache License
/** Filters LowerCaseTokenizer with StopFilter. */ public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = new StandardTokenizer(reader); result = new StandardFilter(result); result = new LowerCaseFilter(result); result = new StopFilter(result, stopWords); result = new PorterStemFilter(result); return result; }
From source file:com.bigdata.search.FullTextIndex.java
License:Open Source License
/** * Tokenize text using an {@link Analyzer} that is appropriate to the * specified language family.//from w ww.j a v a 2 s . c o m * * @param languageCode * The language code -or- <code>null</code> to use the default * {@link Locale}). * * @param r * A reader on the text to be indexed. * * @param filterStopwords * if true, filter stopwords from the token stream * * @return The extracted token stream. */ protected TokenStream getTokenStream(final String languageCode, final Reader r, final boolean filterStopwords) { /* * Note: This is stripping out stopwords by default. * * @todo is it using a language family specific stopword list? */ final Analyzer a = getAnalyzer(languageCode, filterStopwords); TokenStream tokenStream = a.tokenStream(null/* @todo field? */, r); // force to lower case. tokenStream = new LowerCaseFilter(tokenStream); return tokenStream; }