List of usage examples for org.apache.lucene.analysis.miscellaneous WordDelimiterGraphFilter CATENATE_ALL
int CATENATE_ALL
To view the source code for org.apache.lucene.analysis.miscellaneous WordDelimiterGraphFilter CATENATE_ALL.
Click Source Link
"wi-fi-4000" => "wifi4000"
From source file:com.qwazr.search.bench.test.MultiField.PayloadAnalyzer.java
License:Apache License
@Override final protected TokenStreamComponents createComponents(final String fieldName) { final Tokenizer tokenizer = new UAX29URLEmailTokenizer(); // Read the payload from the first token final FirstTokenPayloadFilter firstTokenPayloadFilter = new FirstTokenPayloadFilter(tokenizer); TokenStream stream = new WordDelimiterGraphFilter(firstTokenPayloadFilter, WordDelimiterGraphFilter.GENERATE_WORD_PARTS | WordDelimiterGraphFilter.GENERATE_NUMBER_PARTS | WordDelimiterGraphFilter.SPLIT_ON_NUMERICS | WordDelimiterGraphFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterGraphFilter.CATENATE_ALL | WordDelimiterGraphFilter.CATENATE_NUMBERS | WordDelimiterGraphFilter.CATENATE_WORDS | WordDelimiterGraphFilter.PRESERVE_ORIGINAL, CharArraySet.EMPTY_SET);//from w ww . j av a 2 s. co m stream = SmartAnalyzerSet.ascii(stream); // Set the payload to any token stream = firstTokenPayloadFilter.newSetter(stream); return new TokenStreamComponents(tokenizer, stream) { @Override protected void setReader(final Reader reader) { super.setReader(reader); } }; }