List of usage examples for org.apache.lucene.analysis.ngram NGramTokenizer NGramTokenizer
public NGramTokenizer()
From source file:org.wltea.analyzer.sample.LuceneTokenizerDemo.java
License:Apache License
/** * ?NGramTokenizer//from w w w.j a v a2 s. co m * min:1,max:2 */ public void testNT() { Tokenizer tokenizer = new NGramTokenizer(); try { tokenizer.setReader(new StringReader( "?????IKAnalyer can analysis english text too")); } catch (IOException e) { throw new RuntimeException(); } TokenStreamComponents tsc = new TokenStreamComponents(tokenizer); TokenStream ts = tsc.getTokenStream(); OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class); CharTermAttribute term = ts.addAttribute(CharTermAttribute.class); TypeAttribute type = ts.addAttribute(TypeAttribute.class); try { ts.reset(); while (ts.incrementToken()) { System.out.println(term.toString() + "->" + offset.startOffset() + "-" + offset.endOffset() + "->" + type.type()); } ts.end(); } catch (IOException e) { throw new RuntimeException(); } }