Example usage for org.apache.lucene.analysis.shingle ShingleFilter setFillerToken

List of usage examples for org.apache.lucene.analysis.shingle ShingleFilter setFillerToken

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.shingle ShingleFilter setFillerToken.

Prototype

public void setFillerToken(String fillerToken) 

Source Link

Document

Sets the string to insert for each position at which there is no token (i.e., when position increment is greater than one).

Usage

From source file:be.ugent.tiwi.sleroux.newsrec.recommendationstester.EnAnalyzer.java

License:Apache License

/**
 *
 * @param fieldName//from   w w  w .  jav  a 2s  .  co m
 * @param reader
 * @return
 */
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    //reader = new HTMLStripCharFilter(reader);
    Tokenizer t = new StandardTokenizer(Config.LUCENE_VERSION, reader);
    TokenStream result = t;

    //result = new SynonymFilter(result, synonyms, true);
    result = new StandardFilter(Config.LUCENE_VERSION, result);
    result = new LowerCaseFilter(Config.LUCENE_VERSION, result);
    result = new TrimFilter(Config.LUCENE_VERSION, result);
    result = new ASCIIFoldingFilter(result);
    if (stopwords != null) {
        result = new StopFilter(Config.LUCENE_VERSION, result, stopwords);
    } else {
        logger.warn("No stopwordsfile provided, no stopword removal");
    }
    //result = new LowerCaseFilter(Version.LUCENE_46, result);
    result = new EnglishPossessiveFilter(Config.LUCENE_VERSION, result);
    //result = new PorterStemFilter(result);
    result = new SnowballFilter(result, new EnglishStemmer());
    ShingleFilter sf = new ShingleFilter(result, 2, 3);
    sf.setFillerToken(null);
    return new TokenStreamComponents(t, sf);

}