Example usage for org.apache.lucene.analysis.en EnglishPossessiveFilter EnglishPossessiveFilter

List of usage examples for org.apache.lucene.analysis.en EnglishPossessiveFilter EnglishPossessiveFilter

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.en EnglishPossessiveFilter EnglishPossessiveFilter.

Prototype

public EnglishPossessiveFilter(TokenStream input) 

Source Link

Usage

From source file:com.hourglassapps.cpi_ii.stem.snowball.lucene.SnowballAnalyzer.java

License:Apache License

/** Constructs a {@link StandardTokenizer} filtered by a {@link
    StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter},
    and a {@link SnowballFilter} */
@Override//from  w w w . jav a  2s. c o  m
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
    Tokenizer tokenizer = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(matchVersion, tokenizer);
    // remove the possessive 's for english stemmers
    if (matchVersion.onOrAfter(Version.LUCENE_3_1)
            && (name.equals("English") || name.equals("Porter") || name.equals("Lovins")))
        result = new EnglishPossessiveFilter(result);
    // Use a special lowercase filter for turkish, the stemmer expects it.
    if (matchVersion.onOrAfter(Version.LUCENE_3_1) && name.equals("Turkish"))
        result = new TurkishLowerCaseFilter(result);
    else
        result = new LowerCaseFilter(matchVersion, result);
    if (stopSet != null)
        result = new StopFilter(matchVersion, result, stopSet);
    result = new SnowballFilter(result, name);
    return new TokenStreamComponents(tokenizer, result);
}

From source file:com.mozilla.grouperfish.lucene.analysis.en.EnglishAnalyzer.java

License:Apache License

/**
 * Creates a/*from w w  w  .  j  a va2s.  c om*/
 * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
 * which tokenizes all the text in the provided {@link Reader}.
 * 
 * @return A
 *         {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
 *         built from an {@link StandardTokenizer} filtered with
 *         {@link StandardFilter}, {@link LowerCaseFilter},
 *         {@link StopFilter} , {@link KeywordMarkerFilter} if a stem
 *         exclusion set is provided and {@link PorterStemFilter}.
 */
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(matchVersion, source);
    // prior to this we get the classic behavior, standardfilter does it for
    // us.
    if (matchVersion.onOrAfter(Version.LUCENE_31)) {
        result = new EnglishPossessiveFilter(result);
    }
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if (stem) {
        if (!stemExclusionSet.isEmpty())
            result = new KeywordMarkerFilter(result, stemExclusionSet);
        result = new PorterStemFilter(result);
        result = new StopFilter(matchVersion, result, stopwords);
    }
    return new TokenStreamComponents(source, result);
}

From source file:com.mozilla.grouperfish.lucene.analysis.en.NGramEnglishAnalyzer.java

License:Apache License

@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(matchVersion, source);
    if (matchVersion.onOrAfter(Version.LUCENE_31)) {
        result = new EnglishPossessiveFilter(result);
    }/*from   w  w w .jav  a 2s  .c om*/
    result = new LowerCaseFilter(matchVersion, result);
    ShingleAllStopFilter sf = new ShingleAllStopFilter(result, minNGram, maxNGram, stopwords);
    sf.setOutputUnigrams(outputUnigrams);
    if (!outputUnigrams) {
        sf.setOutputUnigramsIfNoShingles(false);
    }
    result = sf;

    if (stem) {
        if (!stemExclusionSet.isEmpty()) {
            result = new KeywordMarkerFilter(result, stemExclusionSet);
        }
        result = new PorterStemFilter(result);
    }

    return new TokenStreamComponents(source, result);
}

From source file:com.romeikat.datamessie.core.processing.service.stemming.text.EnglishAnalyzer.java

License:Open Source License

/**
 * Creates a {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} which tokenizes all
 * the text in the provided {@link Reader}.
 *
 * @return A {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} built from an
 *         {@link StandardTokenizer} filtered with {@link StandardFilter},
 *         {@link EnglishPossessiveFilter}, {@link LowerCaseFilter}, {@link StopFilter} ,
 *         {@link SetKeywordMarkerFilter} if a stem exclusion set is provided and
 *         {@link PorterStemFilter}./* w  ww .j a v a 2s.c om*/
 */
@Override
protected TokenStreamComponents createComponents(final String fieldName) {
    final Tokenizer source;
    source = new StandardTokenizer();
    TokenStream result = new StandardFilter(source);

    // Remove terms that do not contain any alphabetic character
    result = new NumberFilter(result);

    // Remove possessives (trailing 's)
    result = new EnglishPossessiveFilter(result);

    // Converting to lower case is not necessary as this is done before stemming
    // result = new LowerCaseFilter(result);

    // Remove stopwords
    result = new StopFilter(result, stopwords);

    // Mark keywords
    if (!stemExclusionSet.isEmpty()) {
        result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    }

    // Stem
    result = new SnowballFilter(result, new German2Stemmer());
    // Alternatives to the SnowballFilter:
    // result = new PorterStemFilter(result);

    return new TokenStreamComponents(source, result);
}

From source file:de.unihildesheim.iw.lucene.analyzer.EnglishAnalyzer.java

License:Open Source License

/**
 * This configuration must match with the configuration used for the index!
 *
 * @param fieldName Document field//from  www .j  a  va 2 s  .  co  m
 * @return Token stream
 */
@SuppressWarnings("resource")
@Override
protected TokenStreamComponents createComponents(final String fieldName) {
    final Tokenizer source = new StandardTokenizer();
    TokenStream result = new StandardFilter(source);
    result = new EnglishPossessiveFilter(result);
    result = new LowerCaseFilter(result);
    result = new StopFilter(result, getStopwordSet());
    result = new PorterStemFilter(result);
    return new TokenStreamComponents(source, result);
}

From source file:edu.illinois.cs.cogcomp.bigdata.lucene.ASCIIEnglishAnalyzer.java

License:Open Source License

@Override
protected TokenStreamComponents createComponents(String fieldName) {
    final Tokenizer source = new StandardTokenizer();
    TokenStream result = new StandardFilter(source);
    result = new ASCIIFoldingFilter(result);
    result = new EnglishPossessiveFilter(result);
    result = new WordDelimiterFilter(result, WordDelimiterFilter.ALPHA, null);
    result = new LowerCaseFilter(result);
    result = new StopFilter(result, EnglishAnalyzer.getDefaultStopSet());
    result = new PorterStemFilter(result);
    return new TokenStreamComponents(source, result);
}

From source file:edu.illinois.cs.cogcomp.bigdata.lucene.MinimalAnalyzer.java

License:Open Source License

@Override
protected TokenStreamComponents createComponents(String fieldName) {
    final Tokenizer source = new StandardTokenizer();
    TokenStream result = new StandardFilter(source);
    result = new ASCIIFoldingFilter(result);
    result = new LowerCaseFilter(result);
    result = new EnglishPossessiveFilter(result);
    result = new StopFilter(result, stopwords);
    result = new WordDelimiterFilter(result, WordDelimiterFilter.ALPHA, null);
    result = new PorterStemFilter(result);
    return new TokenStreamComponents(source, result);
}

From source file:org.apache.solr.analysis.EnglishPossessiveFilterFactory.java

License:Apache License

public TokenStream create(TokenStream input) {
    return new EnglishPossessiveFilter(input);
}

From source file:org.codelibs.elasticsearch.index.analysis.SnowballAnalyzer.java

License:Apache License

/** Constructs a {StandardTokenizer} filtered by a {@link
    StandardFilter}, a {LowerCaseFilter}, a {StopFilter},
    and a {SnowballFilter} *///  w w  w . ja v  a  2s  .c o m
@Override
public TokenStreamComponents createComponents(String fieldName) {
    final Tokenizer tokenizer = new StandardTokenizer();
    TokenStream result = tokenizer;
    // remove the possessive 's for english stemmers
    if (name.equals("English") || name.equals("Porter") || name.equals("Lovins")) {
        result = new EnglishPossessiveFilter(result);
    }
    // Use a special lowercase filter for turkish, the stemmer expects it.
    if (name.equals("Turkish")) {
        result = new TurkishLowerCaseFilter(result);
    } else {
        result = new LowerCaseFilter(result);
    }
    if (stopSet != null) {
        result = new StopFilter(result, stopSet);
    }
    result = new SnowballFilter(result, name);
    return new TokenStreamComponents(tokenizer, result);
}

From source file:org.elasticsearch.analysis.common.SnowballAnalyzer.java

License:Apache License

/** Constructs a {@link StandardTokenizer} filtered by a {@link
    StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter},
    and a {@link SnowballFilter} */
@Override/*from w ww  .  j  a  v a  2s .c o m*/
public TokenStreamComponents createComponents(String fieldName) {
    final Tokenizer tokenizer = new StandardTokenizer();
    TokenStream result = tokenizer;
    // remove the possessive 's for english stemmers
    if (name.equals("English") || name.equals("Porter") || name.equals("Lovins"))
        result = new EnglishPossessiveFilter(result);
    // Use a special lowercase filter for turkish, the stemmer expects it.
    if (name.equals("Turkish"))
        result = new TurkishLowerCaseFilter(result);
    else
        result = new LowerCaseFilter(result);
    if (stopSet != null)
        result = new StopFilter(result, stopSet);
    result = new SnowballFilter(result, name);
    return new TokenStreamComponents(tokenizer, result);
}