Example usage for org.apache.lucene.analysis.charfilter HTMLStripCharFilterFactory HTMLStripCharFilterFactory

List of usage examples for org.apache.lucene.analysis.charfilter HTMLStripCharFilterFactory HTMLStripCharFilterFactory

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.charfilter HTMLStripCharFilterFactory HTMLStripCharFilterFactory.

Prototype

public HTMLStripCharFilterFactory(Map<String, String> args) 

Source Link

Document

Creates a new HTMLStripCharFilterFactory

Usage

From source file:com.basistech.IndexFiles.java

License:Open Source License

static Analyzer setupAnalyzer(final boolean filters) {
    Map<String, String> tokenizerFactoryArgs = Maps.newHashMap();
    tokenizerFactoryArgs.put("rlpContext", "rlp-context.xml");
    tokenizerFactoryArgs.put("lang", "kor");
    tokenizerFactoryArgs.put("postLemma", "true");
    tokenizerFactoryArgs.put("postCompoundComponents", "true");
    tokenizerFactoryArgs.put("postPartOfSpeech", "true");

    final RLPTokenizerFactory tokenizerFactory = new RLPTokenizerFactory(tokenizerFactoryArgs);
    Map<String, String> emptyOptions = Maps.newHashMap();
    final HTMLStripCharFilterFactory charFilterFactory = new HTMLStripCharFilterFactory(emptyOptions);
    final ICUFoldingFilterFactory foldingFilterFactory = new ICUFoldingFilterFactory(emptyOptions);
    final ReversedWildcardFilterFactory reversedWildcardFilterFactory = new ReversedWildcardFilterFactory(
            emptyOptions);//from w  w w.  j a v a  2  s  .  co m
    final RemoveDuplicatesTokenFilterFactory removeDuplicatesTokenFilterFactory = new RemoveDuplicatesTokenFilterFactory(
            emptyOptions);

    return new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            final Tokenizer source = tokenizerFactory.create(reader);
            if (filters) {
                TokenStream filter = foldingFilterFactory.create(source);
                filter = reversedWildcardFilterFactory.create(filter);
                filter = removeDuplicatesTokenFilterFactory.create(filter);
                return new TokenStreamComponents(source, filter);
            } else {
                return new TokenStreamComponents(source);
            }
        }

        @Override
        protected Reader initReader(String fieldName, Reader reader) {
            if (filters) {
                return charFilterFactory.create(reader);
            } else {
                return reader;
            }
        }
    };
}

From source file:de.walware.statet.r.internal.core.rhelp.index.WriteAnalyzer.java

License:Open Source License

public WriteAnalyzer() {
    super(PER_FIELD_REUSE_STRATEGY);
    this.defaultAnalyzer = new DefaultAnalyzer();
    this.htmlAnalyzers = new DefaultAnalyzer(
            new HTMLStripCharFilterFactory(Collections.<String, String>emptyMap()));
}