List of usage examples for org.apache.lucene.analysis.standard UAX29URLEmailTokenizer UAX29URLEmailTokenizer
public UAX29URLEmailTokenizer()
From source file:com.qwazr.search.bench.test.MultiField.PayloadAnalyzer.java
License:Apache License
@Override final protected TokenStreamComponents createComponents(final String fieldName) { final Tokenizer tokenizer = new UAX29URLEmailTokenizer(); // Read the payload from the first token final FirstTokenPayloadFilter firstTokenPayloadFilter = new FirstTokenPayloadFilter(tokenizer); TokenStream stream = new WordDelimiterGraphFilter(firstTokenPayloadFilter, WordDelimiterGraphFilter.GENERATE_WORD_PARTS | WordDelimiterGraphFilter.GENERATE_NUMBER_PARTS | WordDelimiterGraphFilter.SPLIT_ON_NUMERICS | WordDelimiterGraphFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterGraphFilter.CATENATE_ALL | WordDelimiterGraphFilter.CATENATE_NUMBERS | WordDelimiterGraphFilter.CATENATE_WORDS | WordDelimiterGraphFilter.PRESERVE_ORIGINAL, CharArraySet.EMPTY_SET);//from w w w . java2 s . c om stream = SmartAnalyzerSet.ascii(stream); // Set the payload to any token stream = firstTokenPayloadFilter.newSetter(stream); return new TokenStreamComponents(tokenizer, stream) { @Override protected void setReader(final Reader reader) { super.setReader(reader); } }; }
From source file:org.elasticsearch.analysis.common.UAX29URLEmailTokenizerFactory.java
License:Apache License
@Override public Tokenizer create() { UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(); tokenizer.setMaxTokenLength(maxTokenLength); return tokenizer; }