List of usage examples for org.apache.lucene.analysis.ja JapaneseTokenizer incrementToken
@Override
public boolean incrementToken() throws IOException
From source file:aak.as.preProcess.japanese.JaSegmenter.java
License:Open Source License
@Override public List<String> segmentWords(String text) { List<String> ret = new ArrayList<String>(); StringReader textreader = new StringReader(text); JapaneseTokenizer segmenter = new JapaneseTokenizer(textreader, null, true, JapaneseTokenizer.Mode.SEARCH); JaStemmer.lemma.clear();/* w w w . jav a2s . c o m*/ CharTermAttribute termAtt = segmenter.getAttribute(CharTermAttribute.class); BaseFormAttribute baseAtt = segmenter.getAttribute(BaseFormAttribute.class); try { segmenter.reset(); while (segmenter.incrementToken()) { //segmenter.clearAttributes(); ret.add(termAtt.toString()); if (baseAtt.getBaseForm() != null) JaStemmer.lemma.put(termAtt.toString(), baseAtt.getBaseForm()); } segmenter.close(); } catch (IOException e) { // TODO Auto-generated catch block. e.printStackTrace(); } return ret; }