List of usage examples for org.apache.lucene.analysis.ngram NGramTokenFilter NGramTokenFilter
public NGramTokenFilter(TokenStream input, int minGram, int maxGram, boolean preserveOriginal)
From source file:com.stripe.ctf.instantcodesearch.CaseSensitiveAnalyzer.java
License:Apache License
@Override protected TokenStreamComponents createComponents(final String fieldName, Reader reader) { final WhitespaceTokenizer src = new WhitespaceTokenizer(matchVersion, reader); // src.setMaxTokenLength(maxTokenLength); // TokenStream tok = new StandardFilter(matchVersion, src); // tok = new StopFilter(matchVersion, tok, stopwords); TokenStream tok = new NGramTokenFilter(matchVersion, src, 3, 25); return new TokenStreamComponents(src, tok) { @Override//w w w . j a va 2s . c om protected void setReader(final Reader reader) throws IOException { // src.setMaxTokenLength(CaseSensitiveAnalyzer.this.maxTokenLength); super.setReader(reader); } }; }
From source file:org.elasticsearch.index.analysis.NGramTokenFilterFactory.java
License:Apache License
@SuppressWarnings("deprecation") @Override//from w ww . ja va 2 s. c o m public TokenStream create(TokenStream tokenStream) { final Version version = this.version == Version.LUCENE_43 ? Version.LUCENE_44 : this.version; // we supported it since 4.3 return new NGramTokenFilter(version, tokenStream, minGram, maxGram); }
From source file:org.zenoss.zep.index.impl.lucene.LuceneIdentifierAnalyzer.java
License:Open Source License
@Override protected TokenStreamComponents createComponents(String s, Reader reader) { final Tokenizer source = new WhitespaceTokenizer(IndexConstants.LUCENE_VERSION, reader); TokenStream filter = new LowerCaseFilter(IndexConstants.LUCENE_VERSION, source); // Use the 4.3 NGram filter here because it changed a lot >=4.4 filter = new NGramTokenFilter(Version.LUCENE_43, filter, MIN_NGRAM_SIZE, MAX_NGRAM_SIZE); return new TokenStreamComponents(source, filter); }