Example usage for org.apache.lucene.analysis.util TokenizerFactory create

List of usage examples for org.apache.lucene.analysis.util TokenizerFactory create

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.util TokenizerFactory create.

Prototype

public final Tokenizer create() 

Source Link

Document

Creates a TokenStream of the specified input using the default attribute factory.

Usage

From source file:org.elasticsearch.plugin.ingest.kuromoji_part_of_speech_extract.KuromojiPartOfSpeechExtractProcessor.java

License:Apache License

private Analyzer loadAnalyzer(List<String> posTags) {
    Map<String, String> tokenizerOptions = new HashMap<>();
    tokenizerOptions.put("mode", JapaneseTokenizer.Mode.NORMAL.toString());
    TokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(tokenizerOptions);
    TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[1];
    tokenFilterFactories[0] = new JapanesePartOfSpeechKeepFilterFactory(new HashMap<>(), this.posTags);

    Analyzer analyzer = new Analyzer() {
        @Override/*ww  w  .j  ava 2 s. c o  m*/
        protected TokenStreamComponents createComponents(String s) {
            Tokenizer tokenizer = tokenizerFactory.create();
            TokenStream tokenStream = tokenizer;
            for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) {
                tokenStream = tokenFilterFactory.create(tokenStream);
            }
            return new TokenStreamComponents(tokenizer, tokenStream);
        }
    };

    return analyzer;
}