Example usage for org.apache.lucene.analysis.ja JapaneseAnalyzer JapaneseAnalyzer

List of usage examples for org.apache.lucene.analysis.ja JapaneseAnalyzer JapaneseAnalyzer

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.ja JapaneseAnalyzer JapaneseAnalyzer.

Prototype

public JapaneseAnalyzer(UserDictionary userDict, Mode mode, CharArraySet stopwords, Set<String> stoptags) 

Source Link

Usage

From source file:com.github.riccardove.easyjasub.lucene.LuceneParser.java

License:Apache License

public LuceneParser(boolean ignoreDefaultWordSet) throws IOException {
    CharArraySet stopSet = ignoreDefaultWordSet ? JapaneseAnalyzer.getDefaultStopSet()
            : new CharArraySet(new ArrayList<String>(), true);
    Set<String> stopTags = ignoreDefaultWordSet ? JapaneseAnalyzer.getDefaultStopTags() : new HashSet<String>();
    analyzer = new JapaneseAnalyzer(null, JapaneseTokenizer.Mode.NORMAL, stopSet, stopTags);
}

From source file:hivemall.nlp.tokenizer.KuromojiUDF.java

License:Apache License

@Override
public List<Text> evaluate(DeferredObject[] arguments) throws HiveException {
    JapaneseAnalyzer analyzer = _analyzer;
    if (analyzer == null) {
        CharArraySet stopwords = stopWords(_stopWordsArray);
        analyzer = new JapaneseAnalyzer(null, _mode, stopwords, _stoptags);
        this._analyzer = analyzer;
    }/*from www .ja v  a  2s  .c  o  m*/

    Object arg0 = arguments[0].get();
    if (arg0 == null) {
        return null;
    }
    String line = arg0.toString();

    final List<Text> results = new ArrayList<Text>(32);
    TokenStream stream = null;
    try {
        stream = analyzer.tokenStream("", line);
        if (stream != null) {
            analyzeTokens(stream, results);
        }
    } catch (IOException e) {
        IOUtils.closeQuietly(analyzer);
        throw new HiveException(e);
    } finally {
        IOUtils.closeQuietly(stream);
    }
    return results;
}

From source file:org.elasticsearch.index.analysis.Kuromoji2AnalyzerProvider.java

License:Apache License

public Kuromoji2AnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    final Set<?> stopWords = Analysis.parseStopWords(env, settings, JapaneseAnalyzer.getDefaultStopSet());
    final JapaneseTokenizer.Mode mode = Kuromoji2TokenizerFactory.getMode(settings);
    final UserDictionary userDictionary = Kuromoji2TokenizerFactory.getUserDictionary(env, settings);
    analyzer = new JapaneseAnalyzer(userDictionary, mode, CharArraySet.copy(stopWords),
            JapaneseAnalyzer.getDefaultStopTags());
}

From source file:org.elasticsearch.index.analysis.KuromojiAnalyzerProvider.java

License:Apache License

@Inject
public KuromojiAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env,
        @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettings, name, settings);
    final Set<?> stopWords = Analysis.parseStopWords(env, settings, JapaneseAnalyzer.getDefaultStopSet());
    final JapaneseTokenizer.Mode mode = KuromojiTokenizerFactory.getMode(settings);
    final UserDictionary userDictionary = KuromojiTokenizerFactory.getUserDictionary(env, settings);
    analyzer = new JapaneseAnalyzer(userDictionary, mode, CharArraySet.copy(stopWords),
            JapaneseAnalyzer.getDefaultStopTags());
}