Example usage for org.apache.lucene.analysis.ngram NGramTokenizer NGramTokenizer

List of usage examples for org.apache.lucene.analysis.ngram NGramTokenizer NGramTokenizer

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.ngram NGramTokenizer NGramTokenizer.

Prototype

public NGramTokenizer(AttributeFactory factory, int minGram, int maxGram) 

Source Link

Document

Creates NGramTokenizer with given min and max n-grams.

Usage

From source file:drakkar.mast.retrieval.analysis.NGramAnalyzer.java

/**
 *
 * @param fieldName/*  w ww  .  j  ava2 s  .  c  o m*/
 * @param reader
 * @return
 */
public TokenStream tokenStream(String fieldName, Reader reader) {

    TokenStream stream = new NGramTokenizer(reader, 1, 30);
    stream = new LowerCaseFilter(stream);

    return stream;
}

From source file:drakkar.mast.retrieval.analysis.NGramAnalyzer.java

/**
 *
 * @param fieldName/*from   w  w w.  j av  a2s .  c  om*/
 * @param reader
 * @return
 * @throws IOException
 */
@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
        streams = new SavedStreams();
        streams.source = new NGramTokenizer(reader, 1, 30);
        streams.result = new LowerCaseFilter(streams.source);
        setPreviousTokenStream(streams);
    } else {
        streams.source.reset(reader);
    }
    return streams.result;
}

From source file:drakkar.mast.retrieval.analysis.NGramAnalyzerCaseSensitive.java

/**
 *
 * @param fieldName/*from   w w  w .ja  v a  2  s.  c  o m*/
 * @param reader
 * @return
 */
public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream stream = new NGramTokenizer(reader, 1, 30);
    return stream;
}

From source file:drakkar.mast.retrieval.analysis.NGramAnalyzerCaseSensitive.java

/**
 *
 * @param fieldName/*w  w w. j av  a 2s . c  o m*/
 * @param reader
 * @return
 * @throws IOException
 */
@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
    Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream();
    if (tokenizer == null) {
        tokenizer = new NGramTokenizer(reader, 1, 30);
        setPreviousTokenStream(tokenizer);
    } else {
        tokenizer.reset(reader);
    }
    return tokenizer;
}

From source file:drakkar.mast.retrieval.ngram.NGramAnalyzer.java

public TokenStream tokenStream(String fieldName, Reader reader) {

    TokenStream stream = new NGramTokenizer(reader, 1, 30);
    stream = new LowerCaseFilter(stream);
    stream = new PorterStemFilter(stream);
    stream = new StopFilter(false, stream, stopwords, true);

    return stream;
}

From source file:drakkar.mast.retrieval.ngram.NGramAnalyzer.java

@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
        streams = new SavedStreams();
        streams.source = new NGramTokenizer(reader, 1, 30);
        streams.result = new LowerCaseFilter(streams.source);
        streams.result = new PorterStemFilter(streams.source);
        streams.result = new StopFilter(false, streams.source, stopwords, true);

        setPreviousTokenStream(streams);
    } else {/*from w  ww .  j  a  v a  2 s .co  m*/
        streams.source.reset(reader);
    }
    return streams.result;
}

From source file:drakkar.mast.retrieval.ngram.NGramAnalyzerCaseSensitive.java

public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream stream = new NGramTokenizer(reader, 1, 30);
    return stream;
}

From source file:drakkar.mast.retrieval.ngram.NGramAnalyzerCaseSensitive.java

@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
    Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream();
    if (tokenizer == null) {
        tokenizer = new NGramTokenizer(reader, 1, 30);
        setPreviousTokenStream(tokenizer);
    } else {//from ww w .  j  a v a  2  s .c o m
        tokenizer.reset(reader);
    }
    return tokenizer;
}

From source file:org.apache.solr.analysis.NGramTokenizerFactory.java

License:Apache License

/** Creates the {@link TokenStream} of n-grams from the given {@link Reader}. */
public NGramTokenizer create(Reader input) {
    return new NGramTokenizer(input, minGramSize, maxGramSize);
}

From source file:org.dbpedia.spotlight.lucene.analysis.NGramAnalyzer.java

License:Apache License

@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream s = new NGramTokenizer(reader, minGram, maxGram);
    s = new PositionFilter(s);
    return s;/*from   www .ja va  2s.co  m*/
}