List of usage examples for org.apache.lucene.analysis TokenStream reset
public void reset() throws IOException
From source file:jp.sf.fess.solr.plugin.analysis.synonym.NGramSynonymTokenizerTest.java
License:Apache License
@Test public void testComplex2() throws Exception { Analyzer a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa/b,bb"); TokenStream stream = a.tokenStream("f", new StringReader("cabca")); stream.reset(); assertTokenStream(stream, "c,0,1,1/a,1,2,1/b,2,3,1/c,3,4,1/a,4,5,1"); a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa/b,bb"); stream = a.tokenStream("f", new StringReader("ccabcca")); stream.reset();// w w w . java2 s . c o m assertTokenStream(stream, "cc,0,2,1/a,2,3,1/b,3,4,1/cc,4,6,1/a,6,7,1"); a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa/b,bb"); stream = a.tokenStream("f", new StringReader("edcabcdea")); stream.reset(); assertTokenStream(stream, "ed,0,2,1/dc,1,3,1/a,3,4,1/b,4,5,1/cd,5,7,1/de,6,8,1/a"); a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa/b,bb"); stream = a.tokenStream("f", new StringReader("fedcabcdefa")); stream.reset(); assertTokenStream(stream, "fe,0,2,1/ed,1,3,1/dc,2,4,1/a,4,5,1/b,5,6,1/cd,6,8,1/de,7,9,1/ef,8,10,1/a,10,11,1"); }
From source file:jp.sf.fess.solr.plugin.analysis.synonym.NGramSynonymTokenizerTest.java
License:Apache License
@Test public void testComplex3() throws Exception { Analyzer a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa/b,bb"); TokenStream stream = a.tokenStream("f", new StringReader("cabca")); stream.reset(); assertTokenStream(stream, "c,0,1,1/a,1,2,1/b,2,3,1/c,3,4,1/a,4,5,1"); a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa/b,bb"); stream = a.tokenStream("f", new StringReader("ccabcca")); stream.reset();//w ww. j ava2 s. c om assertTokenStream(stream, "cc,0,2,1/a,2,3,1/b,3,4,1/cc,4,6,1/a,6,7,1"); a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa/b,bb"); stream = a.tokenStream("f", new StringReader("edcabcdea")); stream.reset(); assertTokenStream(stream, "edc,0,3,1/a,3,4,1/b,4,5,1/cde,5,8,1/a,8,9,1"); a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa/b,bb"); stream = a.tokenStream("f", new StringReader("fedcabcdefa")); stream.reset(); assertTokenStream(stream, "fed,0,3,1/edc,1,4,1/a,4,5,1/b,5,6,1/cde,6,9,1/def,7,10,1/a,10,11,1"); a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa/b,bb"); stream = a.tokenStream("f", new StringReader("gfedcabcdefga")); stream.reset(); assertTokenStream(stream, "gfe,0,3,1/fed,1,4,1/edc,2,5,1/a,5,6,1/b,6,7,1/cde,7,10,1/def,8,11,1/efg,9,12,1/a,12,13,1"); }
From source file:jp.sf.fess.solr.plugin.analysis.synonym.NGramSynonymTokenizerTest.java
License:Apache License
@Test public void testComplex4() throws Exception { Analyzer a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa/b,bb"); TokenStream stream = a.tokenStream("f", new StringReader("cabca")); stream.reset(); assertTokenStream(stream, "c,0,1,1/a,1,2,1/b,2,3,1/c,3,4,1/a,4,5,1"); a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa/b,bb"); stream = a.tokenStream("f", new StringReader("ccabcca")); stream.reset();//from www . ja v a 2 s. co m assertTokenStream(stream, "cc,0,2,1/a,2,3,1/b,3,4,1/cc,4,6,1/a,6,7,1"); a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa/b,bb"); stream = a.tokenStream("f", new StringReader("edcabcdea")); stream.reset(); assertTokenStream(stream, "edc,0,3,1/a,3,4,1/b,4,5,1/cde,5,8,1/a,8,9,1"); a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa/b,bb"); stream = a.tokenStream("f", new StringReader("fedcabcdefa")); stream.reset(); assertTokenStream(stream, "fedc,0,4,1/a,4,5,1/b,5,6,1/cdef,6,10,1/a,10,11,1"); a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa/b,bb"); stream = a.tokenStream("f", new StringReader("gfedcabcdefga")); stream.reset(); assertTokenStream(stream, "gfed,0,4,1/fedc,1,5,1/a,5,6,1/b,6,7,1/cdef,7,11,1/defg,8,12,1/a,12,13,1"); }
From source file:jp.sf.fess.solr.plugin.analysis.synonym.NGramSynonymTokenizerTest.java
License:Apache License
@Test public void testComplexExpand1() throws Exception { Analyzer a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa/b,bb"); TokenStream stream = a.tokenStream("f", new StringReader("cabca")); stream.reset(); assertTokenStream(stream, "c,0,1,1/a,1,2,1/aa,1,2,0/b,2,3,1/bb,2,3,0/c,3,4,1/a,4,5,1/aa,4,5,0"); a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa/b,bb"); stream = a.tokenStream("f", new StringReader("ccabcca")); stream.reset();/*from w ww . j a va 2 s . c o m*/ assertTokenStream(stream, "c,0,1,1/c,1,2,1/a,2,3,1/aa,2,3,0/b,3,4,1/bb,3,4,0/c,4,5,1/c,5,6,1/a,6,7,1/aa,6,7,0"); a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa/b,bb"); stream = a.tokenStream("f", new StringReader("edcabcdea")); stream.reset(); assertTokenStream(stream, "e,0,1,1/d,1,2,1/c,2,3,1/a,3,4,1/aa,3,4,0/b,4,5,1/bb,4,5,0/c,5,6,1/d,6,7,1/e,7,8,1/a,8,9,1/aa,8,9,0"); a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa/b,bb"); stream = a.tokenStream("f", new StringReader("fedcabcdefa")); stream.reset(); assertTokenStream(stream, "f,0,1,1/e,1,2,1/d,2,3,1/c,3,4,1/a,4,5,1/aa,4,5,0/b,5,6,1/bb,5,6,0/c,6,7,1/d,7,8,1/e,8,9,1/f,9,10,1/a,10,11,1/aa,10,11,0"); }
From source file:jp.sf.fess.solr.plugin.analysis.synonym.NGramSynonymTokenizerTest.java
License:Apache License
@Test public void testComplexExpand2() throws Exception { Analyzer a = new NGramSynonymTokenizerTestAnalyzer(2, true, "a,aa/b,bb"); TokenStream stream = a.tokenStream("f", new StringReader("cabca")); stream.reset(); assertTokenStream(stream, "c,0,1,1/a,1,2,1/aa,1,2,0/b,2,3,1/bb,2,3,0/c,3,4,1/a,4,5,1/aa,4,5,0"); a = new NGramSynonymTokenizerTestAnalyzer(2, true, "a,aa/b,bb"); stream = a.tokenStream("f", new StringReader("ccabcca")); stream.reset();/*from w w w . ja v a 2 s. c o m*/ assertTokenStream(stream, "cc,0,2,1/c,1,2,0/a,2,3,1/aa,2,3,0/b,3,4,1/bb,3,4,0/c,4,5,1/cc,4,6,0/c,5,6,0/a,6,7,1/aa,6,7,0"); a = new NGramSynonymTokenizerTestAnalyzer(2, true, "a,aa/b,bb"); stream = a.tokenStream("f", new StringReader("edcabcdea")); stream.reset(); assertTokenStream(stream, "ed,0,2,1/dc,1,3,1/c,2,3,0/a,3,4,1/aa,3,4,0/b,4,5,1/bb,4,5,0/c,5,6,1/cd,5,7,0/de,6,8,1/e,7,8,0/a,8,9,1/aa,8,9,0"); a = new NGramSynonymTokenizerTestAnalyzer(2, true, "a,aa/b,bb"); stream = a.tokenStream("f", new StringReader("fedcabcdefa")); stream.reset(); assertTokenStream(stream, "fe,0,2,1/ed,1,3,1/dc,2,4,1/c,3,4,0/a,4,5,1/aa,4,5,0/b,5,6,1/bb,5,6,0/c,6,7,1/cd,6,8,0/de,7,9,1/ef,8,10,1/f,9,10,0/a,10,11,1/aa,10,11,0"); }
From source file:jp.sf.fess.solr.plugin.analysis.synonym.NGramSynonymTokenizerTest.java
License:Apache License
@Test public void testComplexExpand3() throws Exception { Analyzer a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa/b,bb"); TokenStream stream = a.tokenStream("f", new StringReader("cabca")); stream.reset(); assertTokenStream(stream, "c,0,1,1/a,1,2,1/aa,1,2,0/b,2,3,1/bb,2,3,0/c,3,4,1/a,4,5,1/aa,4,5,0"); a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa/b,bb"); stream = a.tokenStream("f", new StringReader("ccabcca")); stream.reset();//ww w .ja v a2 s.c om assertTokenStream(stream, "cc,0,2,1/c,1,2,0/a,2,3,1/aa,2,3,0/b,3,4,1/bb,3,4,0/c,4,5,1/cc,4,6,0/c,5,6,0/a,6,7,1/aa,6,7,0"); a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa/b,bb"); stream = a.tokenStream("f", new StringReader("edcabcdea")); stream.reset(); assertTokenStream(stream, "edc,0,3,1/dc,1,3,0/c,2,3,0/a,3,4,1/aa,3,4,0/b,4,5,1/bb,4,5,0/c,5,6,1/cd,5,7,0/cde,5,8,0/de,6,8,0/e,7,8,0/a,8,9,1/aa,8,9,0"); a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa/b,bb"); stream = a.tokenStream("f", new StringReader("fedcabcdefa")); stream.reset(); assertTokenStream(stream, "fed,0,3,1/edc,1,4,1/dc,2,4,0/c,3,4,0/a,4,5,1/aa,4,5,0/b,5,6,1/bb,5,6,0/c,6,7,1/cd,6,8,0/cde,6,9,0/def,7,10,1/ef,8,10,0/f,9,10,0/a,10,11,1/aa,10,11,0"); a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa/b,bb"); stream = a.tokenStream("f", new StringReader("gfedcabcdefga")); stream.reset(); assertTokenStream(stream, "gfe,0,3,1/fed,1,4,1/edc,2,5,1/dc,3,5,0/c,4,5,0/a,5,6,1/aa,5,6,0/b,6,7,1/bb,6,7,0/c,7,8,1/cd,7,9,0/cde,7,10,0/def,8,11,1/efg,9,12,1/fg,10,12,0/g,11,12,0/a,12,13,1/aa,12,13,0"); }
From source file:jp.sf.fess.solr.plugin.analysis.synonym.NGramSynonymTokenizerTest.java
License:Apache License
@Test public void testComplexExpand4() throws Exception { Analyzer a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa/b,bb"); TokenStream stream = a.tokenStream("f", new StringReader("cabca")); stream.reset(); assertTokenStream(stream, "c,0,1,1/a,1,2,1/aa,1,2,0/b,2,3,1/bb,2,3,0/c,3,4,1/a,4,5,1/aa,4,5,0"); a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa/b,bb"); stream = a.tokenStream("f", new StringReader("ccabcca")); stream.reset();/* w ww . j ava 2 s . c om*/ assertTokenStream(stream, "cc,0,2,1/c,1,2,0/a,2,3,1/aa,2,3,0/b,3,4,1/bb,3,4,0/c,4,5,1/cc,4,6,0/c,5,6,0/a,6,7,1/aa,6,7,0"); a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa/b,bb"); stream = a.tokenStream("f", new StringReader("edcabcdea")); stream.reset(); assertTokenStream(stream, "edc,0,3,1/dc,1,3,0/c,2,3,0/a,3,4,1/aa,3,4,0/b,4,5,1/bb,4,5,0/c,5,6,1/cd,5,7,0/cde,5,8,0/de,6,8,0/e,7,8,0/a,8,9,1/aa,8,9,0"); a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa/b,bb"); stream = a.tokenStream("f", new StringReader("fedcabcdefa")); stream.reset(); assertTokenStream(stream, "fedc,0,4,1/edc,1,4,0/dc,2,4,0/c,3,4,0/a,4,5,1/aa,4,5,0/b,5,6,1/bb,5,6,0/c,6,7,1/cd,6,8,0/cde,6,9,0/cdef,6,10,0/def,7,10,0/ef,8,10,0/f,9,10,0/a,10,11,1/aa,10,11,0"); a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa/b,bb"); stream = a.tokenStream("f", new StringReader("gfedcabcdefga")); stream.reset(); assertTokenStream(stream, "gfed,0,4,1/fedc,1,5,1/edc,2,5,0/dc,3,5,0/c,4,5,0/a,5,6,1/aa,5,6,0/b,6,7,1/bb,6,7,0/c,7,8,1/cd,7,9,0/cde,7,10,0/cdef,7,11,0/defg,8,12,1/efg,9,12,0/fg,10,12,0/g,11,12,0/a,12,13,1/aa,12,13,0"); }
From source file:kafka.examples.Producer.java
License:Apache License
public void run() { while (true) { String access_token = "2.009F1d9BmHHChD7abcd6de0a0jui5Y"; int count = 20; Timeline tm = new Timeline(access_token); Analyzer analyzer4 = new IKAnalyzer(false);// ? try {//from w w w . j av a 2 s . co m StatusWapper status = tm.getPublicTimeline(count, 0); //------------------------------------------- try { TokenStream tokenstream = analyzer4.tokenStream("", new StringReader(status.toString())); CharTermAttribute termAttribute = tokenstream.addAttribute(CharTermAttribute.class);// token tokenstream.reset();// ? while (tokenstream.incrementToken()) {// ??token String prTxt = new String(termAttribute.buffer(), 0, termAttribute.length()); //producer.send(new KeyedMessage<Integer, String>(topic, ptTxt + " ")); System.out.print(prTxt + " "); } //System.out.println(); tokenstream.close();//TokenStream } catch (IOException e) { e.printStackTrace(); } //------------------------------------------- producer.send(new KeyedMessage<Integer, String>(topic, status.toString())); Log.logInfo(status.toString()); } catch (WeiboException e) { e.printStackTrace(); } } }
From source file:lia.analysis.CopyOfAnalyzerDemo.java
License:Apache License
private static void analyze(String text) throws IOException { System.out.println("Analyzing \"" + text + "\""); for (Analyzer analyzer : analyzers) { String name = analyzer.getClass().getSimpleName(); System.out.println(name); TokenStream stream = analyzer.tokenStream("dummy", text); stream.reset(); CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class); OffsetAttribute offsetAttr = stream.addAttribute(OffsetAttribute.class); TypeAttribute typeAttr = stream.addAttribute(TypeAttribute.class); PositionIncrementAttribute positionAttr = stream.addAttribute(PositionIncrementAttribute.class); while (stream.incrementToken()) { System.out.print("[" + termAttr + "] "); }/*from www.ja v a 2 s. c om*/ System.out.println(""); } }
From source file:lia.chapter4.SimpleAnalyzer.java
License:Apache License
public static void main(String[] args) throws IOException { TokenStream tokenStream = new SimpleAnalyzer().tokenStream("text", "The quick brown fox.."); OffsetAttribute offsetAttribute = tokenStream.addAttribute(OffsetAttribute.class); CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); tokenStream.reset(); while (tokenStream.incrementToken()) { int startOffset = offsetAttribute.startOffset(); System.out.println(startOffset); int endOffset = offsetAttribute.endOffset(); System.out.println(endOffset); String term = charTermAttribute.toString(); System.out.println(term); }/* w ww . j a va 2 s . c o m*/ /*AnalyzerUtils.displayTokensWithFullDetails(new SimpleAnalyzer(), "The quick brown fox....");*/ }