List of usage examples for org.apache.lucene.analysis.miscellaneous WordDelimiterFilter PRESERVE_ORIGINAL
int PRESERVE_ORIGINAL
To view the source code for org.apache.lucene.analysis.miscellaneous WordDelimiterFilter PRESERVE_ORIGINAL.
Click Source Link
"500-42" => "500" "42" "500-42"
From source file:org.apache.jackrabbit.oak.plugins.index.lucene.OakAnalyzer.java
License:Apache License
/** * Create a new {@link OakAnalyzer} with configurable flag to preserve * original term being analyzed too.//from w ww .j ava 2 s . c o m * @param matchVersion Lucene version to match See {@link #matchVersion above} * @param indexOriginalTerm flag to setup analyzer such that * {@link WordDelimiterFilter#PRESERVE_ORIGINAL} * is set to oonfigure word delimeter */ public OakAnalyzer(Version matchVersion, boolean indexOriginalTerm) { this.matchVersion = matchVersion; INDEX_ORIGINAL_TERM = indexOriginalTerm ? WordDelimiterFilter.PRESERVE_ORIGINAL : 0; }
From source file:org.owasp.dependencycheck.data.lucene.FieldAnalyzer.java
License:Apache License
/** * Creates the TokenStreamComponents/* ww w. ja v a2s.c om*/ * * @param fieldName the field name being analyzed * @param reader the reader containing the input * @return the TokenStreamComponents */ @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { final Tokenizer source = new AlphaNumericTokenizer(version, reader); TokenStream stream = source; stream = new WordDelimiterFilter(stream, WordDelimiterFilter.CATENATE_WORDS | WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS | WordDelimiterFilter.PRESERVE_ORIGINAL | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null); stream = new LowerCaseFilter(version, stream); stream = new StopFilter(version, stream, StopAnalyzer.ENGLISH_STOP_WORDS_SET); return new TokenStreamComponents(source, stream); }
From source file:org.owasp.dependencycheck.data.lucene.SearchFieldAnalyzer.java
License:Apache License
/** * Creates a the TokenStreamComponents used to analyze the stream. * * @param fieldName the field that this lucene analyzer will process * @param reader a reader containing the tokens * @return the token stream filter chain *///from w w w .j av a2s .c om @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { final Tokenizer source = new AlphaNumericTokenizer(version, reader); TokenStream stream = source; stream = new WordDelimiterFilter(stream, WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS | WordDelimiterFilter.PRESERVE_ORIGINAL | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null); stream = new LowerCaseFilter(version, stream); stream = new UrlTokenizingFilter(stream); concatenatingFilter = new TokenPairConcatenatingFilter(stream); stream = concatenatingFilter; stream = new StopFilter(version, stream, StopAnalyzer.ENGLISH_STOP_WORDS_SET); return new TokenStreamComponents(source, stream); }