Example usage for org.apache.lucene.analysis LowerCaseFilter LowerCaseFilter

List of usage examples for org.apache.lucene.analysis LowerCaseFilter LowerCaseFilter

Introduction

In this page you can find the example usage for org.apache.lucene.analysis LowerCaseFilter LowerCaseFilter.

Prototype

public LowerCaseFilter(TokenStream in) 

Source Link

Document

Create a new LowerCaseFilter, that normalizes token text to lower case.

Usage

From source file:LogAnalyzer.java

License:Open Source License

public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new LogFilter(new LowerCaseFilter(new WhitespaceTokenizer(reader)));
    return result;
}

From source file:analysis.StandardAnalyzer.java

License:Apache License

/** Constructs a {@link StandardTokenizer} filtered by a {@link
StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */
@Override//w  ww .j av  a 2s .c  om
public TokenStream tokenStream(String fieldName, Reader reader) {
    StandardTokenizer tokenStream = new StandardTokenizer(matchVersion, reader);
    tokenStream.setMaxTokenLength(maxTokenLength);
    TokenStream result = new StandardFilter(tokenStream);
    result = new LowerCaseFilter(result);
    result = new StopFilter(enableStopPositionIncrements, result, stopSet);
    return result;
}

From source file:analysis.StandardAnalyzer.java

License:Apache License

@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
    if (overridesTokenStreamMethod) {
        // LUCENE-1678: force fallback to tokenStream() if we
        // have been subclassed and that subclass overrides
        // tokenStream but not reusableTokenStream
        return tokenStream(fieldName, reader);
    }/*w ww  .j  a  v a 2s .  c om*/
    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
        streams = new SavedStreams();
        setPreviousTokenStream(streams);
        streams.tokenStream = new StandardTokenizer(matchVersion, reader);
        streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
        streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
        streams.filteredTokenStream = new StopFilter(enableStopPositionIncrements, streams.filteredTokenStream,
                stopSet);
    } else {
        streams.tokenStream.reset(reader);
    }
    streams.tokenStream.setMaxTokenLength(maxTokenLength);

    streams.tokenStream.setReplaceInvalidAcronym(replaceInvalidAcronym);

    return streams.filteredTokenStream;
}

From source file:analysis.SynonymAnalyzer.java

License:Apache License

public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new SynonymFilter(new StopFilter(true,
            new LowerCaseFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_30, reader))),
            StopAnalyzer.ENGLISH_STOP_WORDS_SET), engine);
    return result;
}

From source file:aos.lucene.analysis.stopanalyzer.StopAnalyzer2.java

License:Apache License

public TokenStream tokenStream(String fieldName, Reader reader) {
    return new StopFilter(true, new LowerCaseFilter(new LetterTokenizer(reader)), stopWords);
}

From source file:aos.lucene.analysis.stopanalyzer.StopAnalyzerFlawed.java

License:Apache License

/**
 * Ordering mistake here//from w ww  . j a  v  a 2s  .co  m
 */
public TokenStream tokenStream(String fieldName, Reader reader) {
    return new LowerCaseFilter(new StopFilter(true, new LetterTokenizer(reader), stopWords));
}

From source file:aos.lucene.analysis.synonym.SynonymAnalyzer.java

License:Apache License

public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new SynonymFilter(new StopFilter(true,
            new LowerCaseFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_46, reader))),
            StopAnalyzer.ENGLISH_STOP_WORDS_SET), engine);
    return result;
}

From source file:brazilianStemmer.BrazilianAnalyzer.java

License:Apache License

/**
 * Creates a TokenStream which tokenizes all the text in the provided
 * Reader.// w  ww.j a v  a2s  .c om
 * 
 * @return A TokenStream build from a StandardTokenizer filtered with
 *         StandardFilter, StopFilter, GermanStemFilter and LowerCaseFilter.
 */
public final TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new StandardTokenizer(reader);

    /** Convert to lowercase after stemming! */
    result = new LowerCaseFilter(result);
    result = new StopFilter(result, englishStopWords);

    result = new BrazilianAccentsFilter(result);
    result = new StopFilter(result, stopWords);

    result = new BrazilianStemFilter(result, stopWords);

    return result;
}

From source file:com.appeligo.lucene.PorterStemAnalyzer.java

License:Apache License

/** Filters LowerCaseTokenizer with StopFilter. */
public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new StandardTokenizer(reader);
    result = new StandardFilter(result);
    result = new LowerCaseFilter(result);
    result = new StopFilter(result, stopWords);
    result = new PorterStemFilter(result);
    return result;
}

From source file:com.bigdata.search.FullTextIndex.java

License:Open Source License

/**
 * Tokenize text using an {@link Analyzer} that is appropriate to the
 * specified language family.//from   w ww.j  a v  a  2  s  . c o  m
 * 
 * @param languageCode
 *            The language code -or- <code>null</code> to use the default
 *            {@link Locale}).
 * 
 * @param r
 *            A reader on the text to be indexed.
 *            
 * @param filterStopwords
 *            if true, filter stopwords from the token stream            
 * 
 * @return The extracted token stream.
 */
protected TokenStream getTokenStream(final String languageCode, final Reader r, final boolean filterStopwords) {

    /*
     * Note: This is stripping out stopwords by default.
     * 
     * @todo is it using a language family specific stopword list?
     */
    final Analyzer a = getAnalyzer(languageCode, filterStopwords);

    TokenStream tokenStream = a.tokenStream(null/* @todo field? */, r);

    // force to lower case.
    tokenStream = new LowerCaseFilter(tokenStream);

    return tokenStream;

}