List of usage examples for org.apache.lucene.analysis.ngram NGramTokenizer NGramTokenizer
public NGramTokenizer(AttributeFactory factory, int minGram, int maxGram)
From source file:drakkar.mast.retrieval.analysis.NGramAnalyzer.java
/** * * @param fieldName/* w ww . j ava2 s . c o m*/ * @param reader * @return */ public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream stream = new NGramTokenizer(reader, 1, 30); stream = new LowerCaseFilter(stream); return stream; }
From source file:drakkar.mast.retrieval.analysis.NGramAnalyzer.java
/** * * @param fieldName/*from w w w. j av a2s . c om*/ * @param reader * @return * @throws IOException */ @Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); streams.source = new NGramTokenizer(reader, 1, 30); streams.result = new LowerCaseFilter(streams.source); setPreviousTokenStream(streams); } else { streams.source.reset(reader); } return streams.result; }
From source file:drakkar.mast.retrieval.analysis.NGramAnalyzerCaseSensitive.java
/** * * @param fieldName/*from w w w .ja v a 2 s. c o m*/ * @param reader * @return */ public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream stream = new NGramTokenizer(reader, 1, 30); return stream; }
From source file:drakkar.mast.retrieval.analysis.NGramAnalyzerCaseSensitive.java
/** * * @param fieldName/*w w w. j av a 2s . c o m*/ * @param reader * @return * @throws IOException */ @Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream(); if (tokenizer == null) { tokenizer = new NGramTokenizer(reader, 1, 30); setPreviousTokenStream(tokenizer); } else { tokenizer.reset(reader); } return tokenizer; }
From source file:drakkar.mast.retrieval.ngram.NGramAnalyzer.java
public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream stream = new NGramTokenizer(reader, 1, 30); stream = new LowerCaseFilter(stream); stream = new PorterStemFilter(stream); stream = new StopFilter(false, stream, stopwords, true); return stream; }
From source file:drakkar.mast.retrieval.ngram.NGramAnalyzer.java
@Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); streams.source = new NGramTokenizer(reader, 1, 30); streams.result = new LowerCaseFilter(streams.source); streams.result = new PorterStemFilter(streams.source); streams.result = new StopFilter(false, streams.source, stopwords, true); setPreviousTokenStream(streams); } else {/*from w ww . j a v a 2 s .co m*/ streams.source.reset(reader); } return streams.result; }
From source file:drakkar.mast.retrieval.ngram.NGramAnalyzerCaseSensitive.java
public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream stream = new NGramTokenizer(reader, 1, 30); return stream; }
From source file:drakkar.mast.retrieval.ngram.NGramAnalyzerCaseSensitive.java
@Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream(); if (tokenizer == null) { tokenizer = new NGramTokenizer(reader, 1, 30); setPreviousTokenStream(tokenizer); } else {//from ww w . j a v a 2 s .c o m tokenizer.reset(reader); } return tokenizer; }
From source file:org.apache.solr.analysis.NGramTokenizerFactory.java
License:Apache License
/** Creates the {@link TokenStream} of n-grams from the given {@link Reader}. */ public NGramTokenizer create(Reader input) { return new NGramTokenizer(input, minGramSize, maxGramSize); }
From source file:org.dbpedia.spotlight.lucene.analysis.NGramAnalyzer.java
License:Apache License
@Override public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream s = new NGramTokenizer(reader, minGram, maxGram); s = new PositionFilter(s); return s;/*from www .ja va 2s.co m*/ }