List of usage examples for org.apache.lucene.analysis.miscellaneous WordDelimiterGraphFilter CATENATE_NUMBERS
int CATENATE_NUMBERS
To view the source code for org.apache.lucene.analysis.miscellaneous WordDelimiterGraphFilter CATENATE_NUMBERS.
Click Source Link
"500-42" => "50042"
From source file:com.qwazr.search.bench.test.MultiField.PayloadAnalyzer.java
License:Apache License
@Override final protected TokenStreamComponents createComponents(final String fieldName) { final Tokenizer tokenizer = new UAX29URLEmailTokenizer(); // Read the payload from the first token final FirstTokenPayloadFilter firstTokenPayloadFilter = new FirstTokenPayloadFilter(tokenizer); TokenStream stream = new WordDelimiterGraphFilter(firstTokenPayloadFilter, WordDelimiterGraphFilter.GENERATE_WORD_PARTS | WordDelimiterGraphFilter.GENERATE_NUMBER_PARTS | WordDelimiterGraphFilter.SPLIT_ON_NUMERICS | WordDelimiterGraphFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterGraphFilter.CATENATE_ALL | WordDelimiterGraphFilter.CATENATE_NUMBERS | WordDelimiterGraphFilter.CATENATE_WORDS | WordDelimiterGraphFilter.PRESERVE_ORIGINAL, CharArraySet.EMPTY_SET);//from w w w . ja va2 s .c om stream = SmartAnalyzerSet.ascii(stream); // Set the payload to any token stream = firstTokenPayloadFilter.newSetter(stream); return new TokenStreamComponents(tokenizer, stream) { @Override protected void setReader(final Reader reader) { super.setReader(reader); } }; }