List of usage examples for org.apache.lucene.analysis.shingle ShingleFilter setFillerToken
public void setFillerToken(String fillerToken)
From source file:be.ugent.tiwi.sleroux.newsrec.recommendationstester.EnAnalyzer.java
License:Apache License
/** * * @param fieldName//from w w w . jav a 2s . co m * @param reader * @return */ @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { //reader = new HTMLStripCharFilter(reader); Tokenizer t = new StandardTokenizer(Config.LUCENE_VERSION, reader); TokenStream result = t; //result = new SynonymFilter(result, synonyms, true); result = new StandardFilter(Config.LUCENE_VERSION, result); result = new LowerCaseFilter(Config.LUCENE_VERSION, result); result = new TrimFilter(Config.LUCENE_VERSION, result); result = new ASCIIFoldingFilter(result); if (stopwords != null) { result = new StopFilter(Config.LUCENE_VERSION, result, stopwords); } else { logger.warn("No stopwordsfile provided, no stopword removal"); } //result = new LowerCaseFilter(Version.LUCENE_46, result); result = new EnglishPossessiveFilter(Config.LUCENE_VERSION, result); //result = new PorterStemFilter(result); result = new SnowballFilter(result, new EnglishStemmer()); ShingleFilter sf = new ShingleFilter(result, 2, 3); sf.setFillerToken(null); return new TokenStreamComponents(t, sf); }