Example usage for org.apache.lucene.analysis.ngram NGramTokenFilter NGramTokenFilter

List of usage examples for org.apache.lucene.analysis.ngram NGramTokenFilter NGramTokenFilter

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.ngram NGramTokenFilter NGramTokenFilter.

Prototype

public NGramTokenFilter(TokenStream input, int minGram, int maxGram, boolean preserveOriginal) 

Source Link

Document

Creates an NGramTokenFilter that, for a given input term, produces all contained n-grams with lengths >= minGram and <= maxGram.

Usage

From source file:com.stripe.ctf.instantcodesearch.CaseSensitiveAnalyzer.java

License:Apache License

@Override
protected TokenStreamComponents createComponents(final String fieldName, Reader reader) {
    final WhitespaceTokenizer src = new WhitespaceTokenizer(matchVersion, reader);
    // src.setMaxTokenLength(maxTokenLength);
    // TokenStream tok = new StandardFilter(matchVersion, src);
    // tok = new StopFilter(matchVersion, tok, stopwords);
    TokenStream tok = new NGramTokenFilter(matchVersion, src, 3, 25);
    return new TokenStreamComponents(src, tok) {
        @Override//w w  w  .  j  a va  2s  . c om
        protected void setReader(final Reader reader) throws IOException {
            // src.setMaxTokenLength(CaseSensitiveAnalyzer.this.maxTokenLength);
            super.setReader(reader);
        }
    };
}

From source file:org.elasticsearch.index.analysis.NGramTokenFilterFactory.java

License:Apache License

@SuppressWarnings("deprecation")
@Override//from  w ww . ja va  2  s. c o  m
public TokenStream create(TokenStream tokenStream) {
    final Version version = this.version == Version.LUCENE_43 ? Version.LUCENE_44 : this.version; // we supported it since 4.3
    return new NGramTokenFilter(version, tokenStream, minGram, maxGram);
}

From source file:org.zenoss.zep.index.impl.lucene.LuceneIdentifierAnalyzer.java

License:Open Source License

@Override
protected TokenStreamComponents createComponents(String s, Reader reader) {
    final Tokenizer source = new WhitespaceTokenizer(IndexConstants.LUCENE_VERSION, reader);
    TokenStream filter = new LowerCaseFilter(IndexConstants.LUCENE_VERSION, source);
    // Use the 4.3 NGram filter here because it changed a lot >=4.4
    filter = new NGramTokenFilter(Version.LUCENE_43, filter, MIN_NGRAM_SIZE, MAX_NGRAM_SIZE);
    return new TokenStreamComponents(source, filter);
}