List of usage examples for org.apache.lucene.analysis.util TokenizerFactory create
public final Tokenizer create()
From source file:org.elasticsearch.plugin.ingest.kuromoji_part_of_speech_extract.KuromojiPartOfSpeechExtractProcessor.java
License:Apache License
private Analyzer loadAnalyzer(List<String> posTags) { Map<String, String> tokenizerOptions = new HashMap<>(); tokenizerOptions.put("mode", JapaneseTokenizer.Mode.NORMAL.toString()); TokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(tokenizerOptions); TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[1]; tokenFilterFactories[0] = new JapanesePartOfSpeechKeepFilterFactory(new HashMap<>(), this.posTags); Analyzer analyzer = new Analyzer() { @Override/*ww w .j ava 2 s. c o m*/ protected TokenStreamComponents createComponents(String s) { Tokenizer tokenizer = tokenizerFactory.create(); TokenStream tokenStream = tokenizer; for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) { tokenStream = tokenFilterFactory.create(tokenStream); } return new TokenStreamComponents(tokenizer, tokenStream); } }; return analyzer; }