Example usage for org.apache.lucene.analysis.ja JapaneseAnalyzer getDefaultStopSet

List of usage examples for org.apache.lucene.analysis.ja JapaneseAnalyzer getDefaultStopSet

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.ja JapaneseAnalyzer getDefaultStopSet.

Prototype

public static CharArraySet getDefaultStopSet() 

Source Link

Usage

From source file:com.github.riccardove.easyjasub.lucene.LuceneParser.java

License:Apache License

public LuceneParser(boolean ignoreDefaultWordSet) throws IOException {
    CharArraySet stopSet = ignoreDefaultWordSet ? JapaneseAnalyzer.getDefaultStopSet()
            : new CharArraySet(new ArrayList<String>(), true);
    Set<String> stopTags = ignoreDefaultWordSet ? JapaneseAnalyzer.getDefaultStopTags() : new HashSet<String>();
    analyzer = new JapaneseAnalyzer(null, JapaneseTokenizer.Mode.NORMAL, stopSet, stopTags);
}

From source file:hivemall.nlp.tokenizer.KuromojiUDF.java

License:Apache License

@Nonnull
private static CharArraySet stopWords(@Nonnull final String[] array) throws UDFArgumentException {
    if (array == null) {
        return JapaneseAnalyzer.getDefaultStopSet();
    }/*www.j  av a 2s . com*/
    if (array.length == 0) {
        return CharArraySet.EMPTY_SET;
    }
    CharArraySet results = new CharArraySet(Arrays.asList(array), /* ignoreCase */true);
    return results;
}

From source file:org.elasticsearch.index.analysis.JapaneseStopTokenFilterFactory.java

License:Apache License

@Inject
public JapaneseStopTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env,
        @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettings, name, settings);
    this.ignoreCase = settings.getAsBoolean("ignore_case", false);
    this.removeTrailing = settings.getAsBoolean("remove_trailing", true);
    Map<String, Set<?>> namedStopWords = MapBuilder.<String, Set<?>>newMapBuilder()
            .put("_japanese_", JapaneseAnalyzer.getDefaultStopSet()).immutableMap();
    this.stopWords = Analysis.parseWords(env, settings, "stopwords", JapaneseAnalyzer.getDefaultStopSet(),
            namedStopWords, ignoreCase);
}

From source file:org.elasticsearch.index.analysis.Kuromoji2AnalyzerProvider.java

License:Apache License

public Kuromoji2AnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    final Set<?> stopWords = Analysis.parseStopWords(env, settings, JapaneseAnalyzer.getDefaultStopSet());
    final JapaneseTokenizer.Mode mode = Kuromoji2TokenizerFactory.getMode(settings);
    final UserDictionary userDictionary = Kuromoji2TokenizerFactory.getUserDictionary(env, settings);
    analyzer = new JapaneseAnalyzer(userDictionary, mode, CharArraySet.copy(stopWords),
            JapaneseAnalyzer.getDefaultStopTags());
}

From source file:org.elasticsearch.index.analysis.Kuromoji2JapaneseStopTokenFilterFactory.java

License:Apache License

public Kuromoji2JapaneseStopTokenFilterFactory(IndexSettings indexSettings, Environment env, String name,
        Settings settings) {//from  w w  w . j  ava 2  s  . c om
    super(indexSettings, name, settings);
    this.ignoreCase = settings.getAsBoolean("ignore_case", false);
    this.removeTrailing = settings.getAsBoolean("remove_trailing", true);
    this.stopWords = Analysis.parseWords(env, settings, "stopwords", JapaneseAnalyzer.getDefaultStopSet(),
            NAMED_STOP_WORDS, ignoreCase);
}

From source file:org.elasticsearch.index.analysis.KuromojiAnalyzerProvider.java

License:Apache License

@Inject
public KuromojiAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env,
        @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettings, name, settings);
    final Set<?> stopWords = Analysis.parseStopWords(env, settings, JapaneseAnalyzer.getDefaultStopSet());
    final JapaneseTokenizer.Mode mode = KuromojiTokenizerFactory.getMode(settings);
    final UserDictionary userDictionary = KuromojiTokenizerFactory.getUserDictionary(env, settings);
    analyzer = new JapaneseAnalyzer(userDictionary, mode, CharArraySet.copy(stopWords),
            JapaneseAnalyzer.getDefaultStopTags());
}

From source file:org.omegat.tokenizer.LuceneJapaneseTokenizer.java

License:Open Source License

@Override
protected TokenStream getTokenStream(String strOrig, boolean stemsAllowed, boolean stopWordsAllowed) {
    if (stemsAllowed) {
        // Blank out tags when stemming only
        strOrig = blankOutTags(strOrig);
        CharArraySet stopWords = stopWordsAllowed ? JapaneseAnalyzer.getDefaultStopSet()
                : new CharArraySet(getBehavior(), 0, false);
        Set<String> stopTags = stopWordsAllowed ? JapaneseAnalyzer.getDefaultStopTags() : Collections.EMPTY_SET;
        return new JapaneseAnalyzer(getBehavior(), null, Mode.SEARCH, stopWords, stopTags).tokenStream("",
                new StringReader(strOrig));
    } else {/* w ww  .jav  a2  s .co  m*/
        return new TagJoiningFilter(new JapaneseTokenizer(new StringReader(strOrig), null, false, Mode.NORMAL));
    }
}