List of usage examples for org.apache.lucene.analysis.ja JapaneseAnalyzer getDefaultStopSet
public static CharArraySet getDefaultStopSet()
From source file:com.github.riccardove.easyjasub.lucene.LuceneParser.java
License:Apache License
public LuceneParser(boolean ignoreDefaultWordSet) throws IOException { CharArraySet stopSet = ignoreDefaultWordSet ? JapaneseAnalyzer.getDefaultStopSet() : new CharArraySet(new ArrayList<String>(), true); Set<String> stopTags = ignoreDefaultWordSet ? JapaneseAnalyzer.getDefaultStopTags() : new HashSet<String>(); analyzer = new JapaneseAnalyzer(null, JapaneseTokenizer.Mode.NORMAL, stopSet, stopTags); }
From source file:hivemall.nlp.tokenizer.KuromojiUDF.java
License:Apache License
@Nonnull private static CharArraySet stopWords(@Nonnull final String[] array) throws UDFArgumentException { if (array == null) { return JapaneseAnalyzer.getDefaultStopSet(); }/*www.j av a 2s . com*/ if (array.length == 0) { return CharArraySet.EMPTY_SET; } CharArraySet results = new CharArraySet(Arrays.asList(array), /* ignoreCase */true); return results; }
From source file:org.elasticsearch.index.analysis.JapaneseStopTokenFilterFactory.java
License:Apache License
@Inject public JapaneseStopTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); this.ignoreCase = settings.getAsBoolean("ignore_case", false); this.removeTrailing = settings.getAsBoolean("remove_trailing", true); Map<String, Set<?>> namedStopWords = MapBuilder.<String, Set<?>>newMapBuilder() .put("_japanese_", JapaneseAnalyzer.getDefaultStopSet()).immutableMap(); this.stopWords = Analysis.parseWords(env, settings, "stopwords", JapaneseAnalyzer.getDefaultStopSet(), namedStopWords, ignoreCase); }
From source file:org.elasticsearch.index.analysis.Kuromoji2AnalyzerProvider.java
License:Apache License
public Kuromoji2AnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); final Set<?> stopWords = Analysis.parseStopWords(env, settings, JapaneseAnalyzer.getDefaultStopSet()); final JapaneseTokenizer.Mode mode = Kuromoji2TokenizerFactory.getMode(settings); final UserDictionary userDictionary = Kuromoji2TokenizerFactory.getUserDictionary(env, settings); analyzer = new JapaneseAnalyzer(userDictionary, mode, CharArraySet.copy(stopWords), JapaneseAnalyzer.getDefaultStopTags()); }
From source file:org.elasticsearch.index.analysis.Kuromoji2JapaneseStopTokenFilterFactory.java
License:Apache License
public Kuromoji2JapaneseStopTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {//from w w w . j ava 2 s . c om super(indexSettings, name, settings); this.ignoreCase = settings.getAsBoolean("ignore_case", false); this.removeTrailing = settings.getAsBoolean("remove_trailing", true); this.stopWords = Analysis.parseWords(env, settings, "stopwords", JapaneseAnalyzer.getDefaultStopSet(), NAMED_STOP_WORDS, ignoreCase); }
From source file:org.elasticsearch.index.analysis.KuromojiAnalyzerProvider.java
License:Apache License
@Inject public KuromojiAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); final Set<?> stopWords = Analysis.parseStopWords(env, settings, JapaneseAnalyzer.getDefaultStopSet()); final JapaneseTokenizer.Mode mode = KuromojiTokenizerFactory.getMode(settings); final UserDictionary userDictionary = KuromojiTokenizerFactory.getUserDictionary(env, settings); analyzer = new JapaneseAnalyzer(userDictionary, mode, CharArraySet.copy(stopWords), JapaneseAnalyzer.getDefaultStopTags()); }
From source file:org.omegat.tokenizer.LuceneJapaneseTokenizer.java
License:Open Source License
@Override protected TokenStream getTokenStream(String strOrig, boolean stemsAllowed, boolean stopWordsAllowed) { if (stemsAllowed) { // Blank out tags when stemming only strOrig = blankOutTags(strOrig); CharArraySet stopWords = stopWordsAllowed ? JapaneseAnalyzer.getDefaultStopSet() : new CharArraySet(getBehavior(), 0, false); Set<String> stopTags = stopWordsAllowed ? JapaneseAnalyzer.getDefaultStopTags() : Collections.EMPTY_SET; return new JapaneseAnalyzer(getBehavior(), null, Mode.SEARCH, stopWords, stopTags).tokenStream("", new StringReader(strOrig)); } else {/* w ww .jav a2 s .co m*/ return new TagJoiningFilter(new JapaneseTokenizer(new StringReader(strOrig), null, false, Mode.NORMAL)); } }