List of usage examples for org.apache.lucene.analysis.standard StandardTokenizer setMaxTokenLength
public void setMaxTokenLength(int length)
From source file:de.jetwick.es.JetwickAnalyzer.java
License:Apache License
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final StandardTokenizer src = new StandardTokenizer(matchVersion, reader); src.setMaxTokenLength(maxTokenLength); src.setReplaceInvalidAcronym(replaceInvalidAcronym); TokenStream tok = JetwickFilterFactory.myCreate(src, handleAsChar, handleAsDigit, generateWordParts, generateNumberParts, catenateWords, catenateNumbers, catenateAll, splitOnCaseChange, preserveOriginal, splitOnNumerics, stemEnglishPossessive, protectedWords); tok = new LowerCaseFilter(matchVersion, tok); return new TokenStreamComponents(src, tok) { @Override/*from w ww . ja va 2 s. co m*/ protected boolean reset(final Reader reader) throws IOException { src.setMaxTokenLength(JetwickAnalyzer.this.maxTokenLength); return super.reset(reader); } }; }
From source file:dk.defxws.fgslucene.PhaidraAnalyzer.java
License:Apache License
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final StandardTokenizer src = new StandardTokenizer(matchVersion, reader); src.setMaxTokenLength(maxTokenLength); TokenStream tok = new StandardFilter(matchVersion, src); // unipd adding ElisionFilter for apostrophes tok = new ElisionFilter(matchVersion, tok, DEFAULT_ARTICLES); tok = new LowerCaseFilter(matchVersion, tok); tok = new StopFilter(matchVersion, tok, stopwords); // rasta: adding ASCIIFoldingFilter to enable search for accent tok = new ASCIIFoldingFilter(tok); return new TokenStreamComponents(src, tok) { @Override// ww w . j av a 2 s . c o m protected boolean reset(final Reader reader) throws IOException { src.setMaxTokenLength(PhaidraAnalyzer.this.maxTokenLength); return super.reset(reader); } }; }
From source file:edu.harvard.iq.dvn.core.index.DVNSearchAnalyzer.java
License:Apache License
public TokenStream tokenStream(String fieldName, Reader reader) { /* It seems that LUCENE_CURRENT is ultimately what we should use, but there is an ominous warning in the source code about using it -- using LUCENE_29 for now, will look into this further */ // StandardTokenizer tokenStream = new StandardTokenizer(Version.LUCENE_CURRENT,reader); StandardTokenizer tokenStream = new StandardTokenizer(Version.LUCENE_29, reader); tokenStream.setMaxTokenLength(maxTokenLength); TokenStream result = new StandardFilter(tokenStream); result = new LowerCaseFilter(result); result = new PorterStemFilter(result); return result; }
From source file:edu.ur.lucene.analysis.StandardWithACIIFoldingFilter.java
License:Apache License
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final StandardTokenizer src = new StandardTokenizer(matchVersion, reader); src.setMaxTokenLength(maxTokenLength); src.setReplaceInvalidAcronym(replaceInvalidAcronym); TokenStream tok = new StandardFilter(matchVersion, src); tok = new LowerCaseFilter(matchVersion, tok); tok = new StopFilter(matchVersion, tok, stopwords); tok = new ICUFoldingFilter(tok); return new TokenStreamComponents(src, tok) { @Override/*from w w w . java2s . co m*/ protected boolean reset(final Reader reader) throws IOException { src.setMaxTokenLength(StandardWithACIIFoldingFilter.this.maxTokenLength); return super.reset(reader); } }; }
From source file:indexing.MyStandardAnalyzer.java
License:Apache License
@Override public TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final StandardTokenizer src = new StandardTokenizer(matchVersion, reader); src.setMaxTokenLength(maxTokenLength); TokenStream tok = new StandardFilter(matchVersion, src); tok = new LowerCaseFilter(matchVersion, tok); tok = new StopFilter(matchVersion, tok, stopwords); return new TokenStreamComponents(src, tok) { @Override// w w w. jav a2 s . c o m public void setReader(final Reader reader) throws IOException { src.setMaxTokenLength(MyStandardAnalyzer.this.maxTokenLength); super.setReader(reader); } }; }
From source file:intelligentWebAlgorithms.algos.search.lucene.analyzer.CustomAnalyzer.java
License:Apache License
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final StandardTokenizer src = new StandardTokenizer(matchVersion, reader); src.setMaxTokenLength(maxTokenLength); TokenStream tok = new StandardFilter(matchVersion, src); tok = new LowerCaseFilter(matchVersion, tok); tok = new StopFilter(matchVersion, tok, stopwords); return new TokenStreamComponents(src, tok) { @Override//from w w w .j a va 2 s .c om protected void setReader(final Reader reader) throws IOException { src.setMaxTokenLength(CustomAnalyzer.this.maxTokenLength); super.setReader(reader); } }; }
From source file:ivory.tokenize.LuceneTokenizer.java
License:Apache License
private TokenStream tokenStream(Reader reader) { StandardTokenizer tokenStream = new StandardTokenizer(reader, replaceInvalidAcronym); tokenStream.setMaxTokenLength(maxTokenLength); TokenStream result = new StandardFilter(tokenStream); result = new LowerCaseFilter(result); result = new StopFilter(result, TERRIER_STOP_WORDS); result = new PorterStemFilter(result); return result; }
From source file:lucenejavafx.CustomAnalyzer.java
License:Apache License
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final StandardTokenizer src = new StandardTokenizer(matchVersion, reader); src.setMaxTokenLength(maxTokenLength); //src.setReplaceInvalidAcronym(replaceInvalidAcronym); TokenStream tok = new StandardFilter(matchVersion, src); tok = new LowerCaseFilter(matchVersion, tok); tok = new StopFilter(matchVersion, tok, stopwords); tok = new PorterStemFilter(tok); return new TokenStreamComponents(src, tok) { protected boolean reset(final Reader reader) throws IOException { src.setMaxTokenLength(CustomAnalyzer.this.maxTokenLength); return reset(reader); }// www .j av a2 s . co m }; }
From source file:luceneprueba.CustomAnalyzers.ReviewAnalyzer.java
@Override protected TokenStreamComponents createComponents(final String fieldName) { final StandardTokenizer src = new StandardTokenizer(); src.setMaxTokenLength(maxTokenLength); TokenStream tokenizer = new StandardFilter(src); tokenizer = new LowerCaseFilter(tokenizer); tokenizer = new StopFilter(tokenizer, stopwords); return new TokenStreamComponents(src, tokenizer) { @Override// w w w.j av a 2s.com protected void setReader(final Reader reader) { src.setMaxTokenLength(ReviewAnalyzer.this.maxTokenLength); super.setReader(reader); } }; }
From source file:mj.ocraptor.database.StandardAnalyzer.java
License:Apache License
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final StandardTokenizer src = new StandardTokenizer(matchVersion, reader); src.setMaxTokenLength(maxTokenLength); TokenStream tok = new StandardFilter(matchVersion, src); tok = new LowerCaseFilter(matchVersion, tok); tok = new StopFilter(matchVersion, tok, stopwords); return new TokenStreamComponents(src, tok) { @Override/*w ww . ja v a 2 s. c om*/ protected boolean reset(final Reader reader) throws IOException { Reader reader2 = new MappingCharFilter(map, CharReader.get(reader)); src.setMaxTokenLength(StandardAnalyzer.this.maxTokenLength); return super.reset(reader2); } }; }