Example usage for org.apache.lucene.analysis.miscellaneous WordDelimiterFilter PRESERVE_ORIGINAL

List of usage examples for org.apache.lucene.analysis.miscellaneous WordDelimiterFilter PRESERVE_ORIGINAL

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.miscellaneous WordDelimiterFilter PRESERVE_ORIGINAL.

Prototype

int PRESERVE_ORIGINAL

To view the source code for org.apache.lucene.analysis.miscellaneous WordDelimiterFilter PRESERVE_ORIGINAL.

Click Source Link

Document

Causes original words are preserved and added to the subword list (Defaults to false)

"500-42" => "500" "42" "500-42"

Usage

From source file:org.apache.jackrabbit.oak.plugins.index.lucene.OakAnalyzer.java

License:Apache License

/**
 * Create a new {@link OakAnalyzer} with configurable flag to preserve
 * original term being analyzed too.//from   w ww  .j  ava 2 s . c o m
 * @param matchVersion Lucene version to match See {@link #matchVersion above}
 * @param indexOriginalTerm flag to setup analyzer such that
 *                              {@link WordDelimiterFilter#PRESERVE_ORIGINAL}
 *                              is set to oonfigure word delimeter
 */
public OakAnalyzer(Version matchVersion, boolean indexOriginalTerm) {
    this.matchVersion = matchVersion;
    INDEX_ORIGINAL_TERM = indexOriginalTerm ? WordDelimiterFilter.PRESERVE_ORIGINAL : 0;
}

From source file:org.owasp.dependencycheck.data.lucene.FieldAnalyzer.java

License:Apache License

/**
 * Creates the TokenStreamComponents/* ww w.  ja v a2s.c om*/
 *
 * @param fieldName the field name being analyzed
 * @param reader the reader containing the input
 * @return the TokenStreamComponents
 */
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    final Tokenizer source = new AlphaNumericTokenizer(version, reader);

    TokenStream stream = source;

    stream = new WordDelimiterFilter(stream,
            WordDelimiterFilter.CATENATE_WORDS | WordDelimiterFilter.GENERATE_WORD_PARTS
                    | WordDelimiterFilter.GENERATE_NUMBER_PARTS | WordDelimiterFilter.PRESERVE_ORIGINAL
                    | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS
                    | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE,
            null);

    stream = new LowerCaseFilter(version, stream);
    stream = new StopFilter(version, stream, StopAnalyzer.ENGLISH_STOP_WORDS_SET);

    return new TokenStreamComponents(source, stream);
}

From source file:org.owasp.dependencycheck.data.lucene.SearchFieldAnalyzer.java

License:Apache License

/**
 * Creates a the TokenStreamComponents used to analyze the stream.
 *
 * @param fieldName the field that this lucene analyzer will process
 * @param reader a reader containing the tokens
 * @return the token stream filter chain
 *///from   w  w  w  .j av a2s .c om
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    final Tokenizer source = new AlphaNumericTokenizer(version, reader);

    TokenStream stream = source;

    stream = new WordDelimiterFilter(stream,
            WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS
                    | WordDelimiterFilter.PRESERVE_ORIGINAL | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE
                    | WordDelimiterFilter.SPLIT_ON_NUMERICS | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE,
            null);

    stream = new LowerCaseFilter(version, stream);
    stream = new UrlTokenizingFilter(stream);
    concatenatingFilter = new TokenPairConcatenatingFilter(stream);
    stream = concatenatingFilter;
    stream = new StopFilter(version, stream, StopAnalyzer.ENGLISH_STOP_WORDS_SET);

    return new TokenStreamComponents(source, stream);
}