List of usage examples for org.apache.lucene.analysis.miscellaneous WordDelimiterFilter CATENATE_WORDS
int CATENATE_WORDS
To view the source code for org.apache.lucene.analysis.miscellaneous WordDelimiterFilter CATENATE_WORDS.
Click Source Link
"wi-fi" => "wifi"
From source file:org.owasp.dependencycheck.data.lucene.FieldAnalyzer.java
License:Apache License
/** * Creates the TokenStreamComponents/*from w w w. j ava 2 s. com*/ * * @param fieldName the field name being analyzed * @param reader the reader containing the input * @return the TokenStreamComponents */ @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { final Tokenizer source = new AlphaNumericTokenizer(version, reader); TokenStream stream = source; stream = new WordDelimiterFilter(stream, WordDelimiterFilter.CATENATE_WORDS | WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS | WordDelimiterFilter.PRESERVE_ORIGINAL | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null); stream = new LowerCaseFilter(version, stream); stream = new StopFilter(version, stream, StopAnalyzer.ENGLISH_STOP_WORDS_SET); return new TokenStreamComponents(source, stream); }