Example usage for org.apache.lucene.analysis StopFilter StopFilter

List of usage examples for org.apache.lucene.analysis StopFilter StopFilter

Introduction

In this page you can find the example usage for org.apache.lucene.analysis StopFilter StopFilter.

Prototype

public StopFilter(TokenStream in, CharArraySet stopWords) 

Source Link

Document

Constructs a filter which removes words from the input TokenStream that are named in the Set.

Usage

From source file:brazilianStemmer.BrazilianAnalyzer.java

License:Apache License

/**
 * Creates a TokenStream which tokenizes all the text in the provided
 * Reader./*w w  w.  jav  a2  s.com*/
 * 
 * @return A TokenStream build from a StandardTokenizer filtered with
 *         StandardFilter, StopFilter, GermanStemFilter and LowerCaseFilter.
 */
public final TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new StandardTokenizer(reader);

    /** Convert to lowercase after stemming! */
    result = new LowerCaseFilter(result);
    result = new StopFilter(result, englishStopWords);

    result = new BrazilianAccentsFilter(result);
    result = new StopFilter(result, stopWords);

    result = new BrazilianStemFilter(result, stopWords);

    return result;
}

From source file:com.appeligo.lucene.PorterStemAnalyzer.java

License:Apache License

/** Filters LowerCaseTokenizer with StopFilter. */
public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new StandardTokenizer(reader);
    result = new StandardFilter(result);
    result = new LowerCaseFilter(result);
    result = new StopFilter(result, stopWords);
    result = new PorterStemFilter(result);
    return result;
}

From source file:com.bluexml.side.framework.alfresco.commons.lucene.MyAnalyzer.java

License:Open Source License

/** Filters LowerCaseTokenizer with StopFilter. */
public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream stopFilter = new StopFilter(new LowerCaseTokenizer(reader), stopWords);
    stopFilter = new ISOLatin1AccentFilter(stopFilter);
    return stopFilter;
}

From source file:com.bluexml.side.framework.alfresco.commons.lucene.MyAnalyzer.java

License:Open Source License

public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
        streams = new SavedStreams();
        streams.source = new LowerCaseTokenizer(reader);
        streams.result = new StopFilter(streams.source, stopWords);
        streams.result = new ISOLatin1AccentFilter(streams.result);
        setPreviousTokenStream(streams);
    } else/*from  w  w w .  j ava2 s. c  o  m*/
        streams.source.reset(reader);
    return streams.result;
}

From source file:com.duroty.lucene.analysis.AnalyzerISOLatin1.java

License:Open Source License

/**
 * DOCUMENT ME!//  w w  w  .  j a  v a  2s .c om
 *
 * @param fieldName DOCUMENT ME!
 * @param reader DOCUMENT ME!
 *
 * @return DOCUMENT ME!
 */
public final TokenStream tokenStream(String fieldName, Reader reader) {
    // The token stream that will be returned.
    TokenStream result;

    // Builds the chain...
    /*result = new StandardTokenizer(reader);
    result = new StandardFilter(result);
    result = new LowerCaseFilter(result);*/
    result = new RdLowerCaseTokenizer(reader);

    if (stopTable != null) {
        result = new StopFilter(result, stopTable);
    } else {
    }

    result = new ISOLatin1AccentFilter(result);

    result = new SnowballFilter(result, "English");
    result = new SnowballFilter(result, "Spanish");

    //result = new SnowballFilter(result, "French");
    //result = new SnowballFilter(result, "Italian");
    return result;
}

From source file:com.duroty.lucene.analysis.DefaultAnalyzer.java

License:Open Source License

/**
 * DOCUMENT ME!/*from w  w  w .  j ava2 s  .  c o  m*/
 *
 * @param fieldName DOCUMENT ME!
 * @param reader DOCUMENT ME!
 *
 * @return DOCUMENT ME!
 */
public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new StandardTokenizer(reader);

    result = new StandardFilter(result);
    result = new LowerCaseFilter(result);

    if (stopTable != null) {
        return new StopFilter(result, stopTable);
    } else {
        return result;
    }
}

From source file:com.duroty.lucene.analysis.DictionaryAnalyzer.java

License:Apache License

/**
 * DOCUMENT ME!//from  ww  w. jav  a2s  . co m
 *
 * @param fieldName DOCUMENT ME!
 * @param reader DOCUMENT ME!
 *
 * @return DOCUMENT ME!
 */
public final TokenStream tokenStream(String fieldName, Reader reader) {
    // The token stream that will be returned.
    TokenStream result;

    // Builds the chain...
    /*result = new StandardTokenizer(reader);
    result = new StandardFilter(result);
    result = new LowerCaseFilter(result);*/
    result = new RdLowerCaseTokenizer(reader);

    if (stopTable != null) {
        result = new StopFilter(result, stopTable);
    } else {
    }

    result = new ISOLatin1AccentFilter(result);

    return result;
}

From source file:com.fdt.sdl.core.analyzer.phonetix.lucene.PhoneticAnalyzer.java

License:Open Source License

/**
 * Constructs a {@link StandardTokenizer} filtered by a {@link
 * StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter},
 * and a {@link PhoneticFilter}.//from  w w w  . j av  a 2 s .c om
 */
public TokenStream tokenStream(String fieldname, final Reader reader) {
    TokenStream result = new StandardTokenizer(reader);
    result = new StandardFilter(result);
    result = new LowerCaseFilter(result);
    result = new StopFilter(result, stopTable);
    result = new PhoneticFilter(result, encoder);
    return result;
}

From source file:com.google.ie.common.search.analyzer.IdeaExchangeQueryAnalyzer.java

License:Apache License

@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
    StandardTokenizer tokenStream = new StandardTokenizer(reader, false);
    tokenStream.setMaxTokenLength(maxTokenLength);
    TokenStream result = new StandardFilter(tokenStream);
    result = new LowerCaseFilter(result);
    result = new StopFilter(result, stopSet);
    fieldName = DEFAULT_LANGUAGE;//from ww  w .j  ava2s. com
    result = new SnowballFilter(result, fieldName);
    return result;
}

From source file:com.google.ie.common.search.analyzer.IdeaExchangeQueryAnalyzer.java

License:Apache License

@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
        streams = new SavedStreams();
        setPreviousTokenStream(streams);
        streams.tokenStream = new StandardTokenizer(reader);
        streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
        streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
        streams.filteredTokenStream = new StopFilter(streams.filteredTokenStream, stopSet);
        fieldName = DEFAULT_LANGUAGE;//  w  ww .j  a  v  a 2 s.  c om
        streams.filteredTokenStream = new SnowballFilter(streams.filteredTokenStream, fieldName);
    } else {
        streams.tokenStream.reset(reader);
    }
    streams.tokenStream.setMaxTokenLength(maxTokenLength);
    return streams.filteredTokenStream;
}