List of usage examples for org.apache.lucene.analysis.standard StandardTokenizer setMaxTokenLength
public void setMaxTokenLength(int length)
From source file:mllab_lucene.StandardAnalyzerHtml.java
License:Apache License
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final StandardTokenizer src = new StandardTokenizer(matchVersion, reader); src.setMaxTokenLength(maxTokenLength); TokenStream tok = new StandardFilter(matchVersion, src); tok = new LowerCaseFilter(matchVersion, tok); tok = new StopFilter(matchVersion, tok, stopwords); return new TokenStreamComponents(src, tok) { @Override//from ww w . j a v a2s . c om protected void setReader(final Reader reader) throws IOException { src.setMaxTokenLength(StandardAnalyzerHtml.this.maxTokenLength); super.setReader(reader); } }; }
From source file:nl.knaw.huygens.timbuctoo.lucene.accentanalyzer.MySearchAnalyzer.java
License:Apache License
@Override protected TokenStreamComponents createComponents(final String fieldName) { final StandardTokenizer src = new StandardTokenizer(); src.setMaxTokenLength(maxTokenLength); TokenStream tok = new StandardFilter(src); return new TokenStreamComponents(src, tok) { @Override//from ww w . jav a2s .co m protected void setReader(final Reader reader) { src.setMaxTokenLength(MySearchAnalyzer.this.maxTokenLength); super.setReader(reader); } }; }
From source file:org.apache.blur.analysis.NoStopWordStandardAnalyzer.java
License:Apache License
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final StandardTokenizer src = new StandardTokenizer(matchVersion, reader); src.setMaxTokenLength(maxTokenLength); TokenStream tok = new StandardFilter(matchVersion, src); tok = new LowerCaseFilter(matchVersion, tok); tok = new StopFilter(matchVersion, tok, stopwords); return new TokenStreamComponents(src, tok) { @Override//from w ww .ja v a2 s. co m protected void setReader(final Reader reader) throws IOException { src.setMaxTokenLength(NoStopWordStandardAnalyzer.this.maxTokenLength); super.setReader(reader); } }; }
From source file:org.apache.solr.analysis.StandardTokenizerFactory.java
License:Apache License
public StandardTokenizer create(Reader input) { StandardTokenizer tokenizer = new StandardTokenizer(luceneMatchVersion, input); tokenizer.setMaxTokenLength(maxTokenLength); return tokenizer; }
From source file:org.apache.vxquery.runtime.functions.index.CaseSensitiveAnalyzer.java
License:Apache License
@Override protected TokenStreamComponents createComponents(final String fieldName) { final Tokenizer src; if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) { StandardTokenizer t = new StandardTokenizer(); t.setMaxTokenLength(maxTokenLength); src = t;/*w ww .ja va2s. c o m*/ } else { StandardTokenizer40 t = new StandardTokenizer40(); t.setMaxTokenLength(maxTokenLength); src = t; } TokenStream tok = new StandardFilter(src); tok = new StopFilter(tok, stopwords); return new TokenStreamComponents(src, tok) { @Override protected void setReader(final Reader reader) { int m = CaseSensitiveAnalyzer.this.maxTokenLength; if (src instanceof StandardTokenizer) { ((StandardTokenizer) src).setMaxTokenLength(m); } else { ((StandardTokenizer40) src).setMaxTokenLength(m); } super.setReader(reader); } }; }
From source file:org.elasticsearch.index.analysis.NGram.NGramAnalyzer.java
License:Apache License
@Override protected Analyzer.TokenStreamComponents createComponents(String fieldName, Reader reader) { final StandardTokenizer source = new StandardTokenizer(Version.LUCENE_43, reader); source.setMaxTokenLength(maxTokenLength); TokenStream result = new NGramTokenFilter(source, 3, 10); return new Analyzer.TokenStreamComponents(source, result); }
From source file:org.elasticsearch.index.analysis.StandardHtmlStripAnalyzer.java
License:Apache License
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final StandardTokenizer src = new StandardTokenizer(matchVersion, reader); src.setMaxTokenLength(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH); TokenStream tok = new StandardFilter(matchVersion, src); tok = new LowerCaseFilter(matchVersion, tok); if (!stopwords.isEmpty()) { tok = new StopFilter(matchVersion, tok, stopwords); }/*from w w w. j ava 2 s. c o m*/ return new TokenStreamComponents(src, tok) { @Override protected void setReader(final Reader reader) throws IOException { src.setMaxTokenLength(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH); super.setReader(reader); } }; }
From source file:org.elasticsearch.index.analysis.StandardTokenizerFactory.java
License:Apache License
@Override public Tokenizer create(Reader reader) { StandardTokenizer tokenizer = new StandardTokenizer(version, reader); tokenizer.setMaxTokenLength(maxTokenLength); return tokenizer; }
From source file:org.karsha.tokenize.DefaultTokenizer.java
License:Open Source License
public TokenStream tokenStream(Reader reader) { //StandardTokenizer tokenStream = new StandardTokenizer(reader, replaceInvalidAcronym); StandardTokenizer tokenStream = new StandardTokenizer(Version.LUCENE_35, reader); tokenStream.setMaxTokenLength(maxTokenLength); TokenStream result = new StandardFilter(tokenStream); result = new LowerCaseFilter(result); result = new StopFilter(Version.LUCENE_35, result, lu_stop_words); result = new StopFilter(Version.LUCENE_35, result, te_stop_words); //result = new PorterStemFilter(result); return result; }
From source file:org.karsha.tokenize.SimpleTokenizer.java
License:Open Source License
public TokenStream tokenStream(Reader reader) { //StandardTokenizer tokenStream = new StandardTokenizer(reader, replaceInvalidAcronym); StandardTokenizer tokenStream = new StandardTokenizer(Version.LUCENE_35, reader); tokenStream.setMaxTokenLength(maxTokenLength); TokenStream result = new StandardFilter(tokenStream); result = new LowerCaseFilter(result); //result = new StopFilter(result, TERRIER_STOP_WORDS); //result = new PorterStemFilter(result); return result; }