Example usage for org.apache.lucene.analysis.cn.smart SmartChineseAnalyzer tokenStream

List of usage examples for org.apache.lucene.analysis.cn.smart SmartChineseAnalyzer tokenStream

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.cn.smart SmartChineseAnalyzer tokenStream.

Prototype

public final TokenStream tokenStream(final String fieldName, final Reader reader) 

Source Link

Document

Returns a TokenStream suitable for fieldName, tokenizing the contents of reader.

Usage

From source file:hivemall.nlp.tokenizer.SmartcnUDF.java

License:Apache License

@Override
public List<Text> evaluate(DeferredObject[] arguments) throws HiveException {
    SmartChineseAnalyzer analyzer = _analyzer;
    if (analyzer == null) {
        CharArraySet stopwords = stopWords(_stopWordsArray);
        analyzer = new SmartChineseAnalyzer(stopwords);
        this._analyzer = analyzer;
    }//from   w ww.j a  va2  s.c o m

    Object arg0 = arguments[0].get();
    if (arg0 == null) {
        return null;
    }
    String line = arg0.toString();

    final List<Text> results = new ArrayList<Text>(32);
    TokenStream stream = null;
    try {
        stream = analyzer.tokenStream("", line);
        if (stream != null) {
            analyzeTokens(stream, results);
        }
    } catch (IOException e) {
        IOUtils.closeQuietly(analyzer);
        throw new HiveException(e);
    } finally {
        IOUtils.closeQuietly(stream);
    }
    return results;
}

From source file:org.omegat.tokenizer.LuceneSmartChineseTokenizer.java

License:Open Source License

@Override
protected TokenStream getTokenStream(final String strOrig, final boolean stemsAllowed,
        final boolean stopWordsAllowed) {
    if (stemsAllowed) {
        SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(getBehavior(), stopWordsAllowed);
        return analyzer.tokenStream("", new StringReader(strOrig));
    } else {/*from w w w . j  ava2  s  .c o m*/
        return new WordTokenFilter(new SentenceTokenizer(new StringReader(strOrig)));
    }
}