Example usage for org.apache.lucene.analysis.ja.tokenattributes BaseFormAttribute getBaseForm

List of usage examples for org.apache.lucene.analysis.ja.tokenattributes BaseFormAttribute getBaseForm

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.ja.tokenattributes BaseFormAttribute getBaseForm.

Prototype

public String getBaseForm();

Source Link

Usage

From source file:aak.as.preProcess.japanese.JaSegmenter.java

License:Open Source License

@Override
public List<String> segmentWords(String text) {

    List<String> ret = new ArrayList<String>();

    StringReader textreader = new StringReader(text);
    JapaneseTokenizer segmenter = new JapaneseTokenizer(textreader, null, true, JapaneseTokenizer.Mode.SEARCH);

    JaStemmer.lemma.clear();/*from w  w  w . ja  v  a  2s.c  om*/
    CharTermAttribute termAtt = segmenter.getAttribute(CharTermAttribute.class);
    BaseFormAttribute baseAtt = segmenter.getAttribute(BaseFormAttribute.class);
    try {
        segmenter.reset();
        while (segmenter.incrementToken()) {
            //segmenter.clearAttributes();
            ret.add(termAtt.toString());
            if (baseAtt.getBaseForm() != null)
                JaStemmer.lemma.put(termAtt.toString(), baseAtt.getBaseForm());
        }

        segmenter.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block.
        e.printStackTrace();
    }

    return ret;
}

From source file:com.github.riccardove.easyjasub.lucene.LuceneParser.java

License:Apache License

private void readBaseForm(TokenStream tokenStream, LuceneToken token) {
    BaseFormAttribute baseForm = tokenStream.getAttribute(BaseFormAttribute.class);
    if (baseForm != null) {
        token.setBaseForm(baseForm.getBaseForm());
    }//from w  ww. ja  va  2s.  c o m
}