List of usage examples for org.apache.lucene.analysis.ngram NGramTokenizer NGramTokenizer
public NGramTokenizer(AttributeFactory factory, int minGram, int maxGram)
From source file:org.zanata.hibernate.search.ConfigurableNgramAnalyzer.java
License:Open Source License
@SuppressWarnings("resource") @Override/*from www .j ava 2s.c o m*/ public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream tokenStream; NGramTokenizer ngramTokenizer = new NGramTokenizer(reader, ngramMinLength, ngramMaxLength); if (foldCase) { tokenStream = new ULowerCaseFilter(ngramTokenizer); } else { tokenStream = ngramTokenizer; } return tokenStream; }
From source file:org.zanata.hibernate.search.UnigramAnalyzer.java
License:Open Source License
/** * IMPORTANT: make sure this matches the AnalyzerDef in * {@link org.zanata.model.HTextContainer}. *//* w w w.ja v a 2 s.c o m*/ @Override public TokenStream tokenStream(String fieldName, Reader reader) { NGramTokenizer source = new NGramTokenizer(reader, 1, 1); if (fieldName != null && fieldName.contains("content-nocase")) { // TODO should we be using ULowerCaseFilter (also in HTextContainer)? return new LowerCaseFilter(matchVersion, source); } else { return source; } }