List of usage examples for org.apache.lucene.analysis.ja JapaneseAnalyzer JapaneseAnalyzer
public JapaneseAnalyzer(UserDictionary userDict, Mode mode, CharArraySet stopwords, Set<String> stoptags)
From source file:com.github.riccardove.easyjasub.lucene.LuceneParser.java
License:Apache License
public LuceneParser(boolean ignoreDefaultWordSet) throws IOException { CharArraySet stopSet = ignoreDefaultWordSet ? JapaneseAnalyzer.getDefaultStopSet() : new CharArraySet(new ArrayList<String>(), true); Set<String> stopTags = ignoreDefaultWordSet ? JapaneseAnalyzer.getDefaultStopTags() : new HashSet<String>(); analyzer = new JapaneseAnalyzer(null, JapaneseTokenizer.Mode.NORMAL, stopSet, stopTags); }
From source file:hivemall.nlp.tokenizer.KuromojiUDF.java
License:Apache License
@Override public List<Text> evaluate(DeferredObject[] arguments) throws HiveException { JapaneseAnalyzer analyzer = _analyzer; if (analyzer == null) { CharArraySet stopwords = stopWords(_stopWordsArray); analyzer = new JapaneseAnalyzer(null, _mode, stopwords, _stoptags); this._analyzer = analyzer; }/*from www .ja v a 2s .c o m*/ Object arg0 = arguments[0].get(); if (arg0 == null) { return null; } String line = arg0.toString(); final List<Text> results = new ArrayList<Text>(32); TokenStream stream = null; try { stream = analyzer.tokenStream("", line); if (stream != null) { analyzeTokens(stream, results); } } catch (IOException e) { IOUtils.closeQuietly(analyzer); throw new HiveException(e); } finally { IOUtils.closeQuietly(stream); } return results; }
From source file:org.elasticsearch.index.analysis.Kuromoji2AnalyzerProvider.java
License:Apache License
public Kuromoji2AnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); final Set<?> stopWords = Analysis.parseStopWords(env, settings, JapaneseAnalyzer.getDefaultStopSet()); final JapaneseTokenizer.Mode mode = Kuromoji2TokenizerFactory.getMode(settings); final UserDictionary userDictionary = Kuromoji2TokenizerFactory.getUserDictionary(env, settings); analyzer = new JapaneseAnalyzer(userDictionary, mode, CharArraySet.copy(stopWords), JapaneseAnalyzer.getDefaultStopTags()); }
From source file:org.elasticsearch.index.analysis.KuromojiAnalyzerProvider.java
License:Apache License
@Inject public KuromojiAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); final Set<?> stopWords = Analysis.parseStopWords(env, settings, JapaneseAnalyzer.getDefaultStopSet()); final JapaneseTokenizer.Mode mode = KuromojiTokenizerFactory.getMode(settings); final UserDictionary userDictionary = KuromojiTokenizerFactory.getUserDictionary(env, settings); analyzer = new JapaneseAnalyzer(userDictionary, mode, CharArraySet.copy(stopWords), JapaneseAnalyzer.getDefaultStopTags()); }