Example usage for org.apache.lucene.analysis TokenStream reset

List of usage examples for org.apache.lucene.analysis TokenStream reset

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream reset.

Prototype

public void reset() throws IOException 

Source Link

Document

This method is called by a consumer before it begins consumption using #incrementToken() .

Usage

From source file:jp.sf.fess.solr.plugin.analysis.ja.TestJapaneseNumberFilter.java

License:Apache License

public void analyze(final Analyzer analyzer, final Reader reader, final Writer writer) throws IOException {
    final TokenStream stream = analyzer.tokenStream("dummy", reader);

    stream.reset();

    final CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class);

    while (stream.incrementToken()) {
        writer.write(termAttr.toString());
        writer.write("\n");
    }/*from  w  w w  .  j a va 2s .  co  m*/

    reader.close();
    writer.close();
}

From source file:jp.sf.fess.solr.plugin.analysis.synonym.NGramSynonymTokenizerTest.java

License:Apache License

@Test
public void testNullSynonyms() throws Exception {
    Analyzer a = new NGramSynonymTokenizerTestAnalyzer(1);
    TokenStream stream = a.tokenStream("f", new StringReader(""));
    stream.reset();
    assertTokenStream(stream, ",0,1,1/,1,2,1/,2,3,1/,3,4,1/,4,5,1/,5,6,1");

    a = new NGramSynonymTokenizerTestAnalyzer(2);
    stream = a.tokenStream("f", new StringReader(""));
    stream.reset();/*from w w w.  j av  a  2  s.co m*/
    assertTokenStream(stream, ",0,2,1/,1,3,1/,2,4,1/,3,5,1/,4,6,1");
    stream.close();
    stream = a.tokenStream("f", new StringReader(""));
    stream.reset();
    assertTokenStream(stream, ",0,1,1");
    stream.close();
    stream = a.tokenStream("f", new StringReader(""));
    stream.reset();
    assertTokenStream(stream, ",0,2,1");

    a = new NGramSynonymTokenizerTestAnalyzer(3);
    stream = a.tokenStream("f", new StringReader(""));
    stream.reset();
    assertTokenStream(stream, ",0,3,1/,1,4,1/,2,5,1/,3,6,1");

    a = new NGramSynonymTokenizerTestAnalyzer(4);
    stream = a.tokenStream("f", new StringReader(""));
    stream.reset();
    assertTokenStream(stream, ",0,4,1/,1,5,1/,2,6,1");

    a = new NGramSynonymTokenizerTestAnalyzer(5);
    stream = a.tokenStream("f", new StringReader(""));
    stream.reset();
    assertTokenStream(stream, ",0,5,1/,1,6,1");

    a = new NGramSynonymTokenizerTestAnalyzer(6);
    stream = a.tokenStream("f", new StringReader(""));
    stream.reset();
    assertTokenStream(stream, ",0,6,1");

    a = new NGramSynonymTokenizerTestAnalyzer(7);
    stream = a.tokenStream("f", new StringReader(""));
    stream.reset();
    assertTokenStream(stream, ",0,6,1");

    a = new NGramSynonymTokenizerTestAnalyzer(8);
    stream = a.tokenStream("f", new StringReader(""));
    stream.reset();
    assertTokenStream(stream, ",0,6,1");
}

From source file:jp.sf.fess.solr.plugin.analysis.synonym.NGramSynonymTokenizerTest.java

License:Apache License

@Test
public void testSingleSynonym() throws Exception {
    Analyzer a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa,aaa");
    TokenStream stream = a.tokenStream("f", new StringReader("a"));
    stream.reset();
    assertTokenStream(stream, "a,0,1,1");

    a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa,aaa");
    stream = a.tokenStream("f", new StringReader("aa"));
    stream.reset();/* w  w w  . jav a  2 s .  c  o m*/
    assertTokenStream(stream, "aa,0,2,1");

    a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa,aaa");
    stream = a.tokenStream("f", new StringReader("aaa"));
    stream.reset();
    assertTokenStream(stream, "aaa,0,3,1");

    a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a");
    stream = a.tokenStream("f", new StringReader("a"));
    stream.reset();
    assertTokenStream(stream, "a,0,1,1");
}

From source file:jp.sf.fess.solr.plugin.analysis.synonym.NGramSynonymTokenizerTest.java

License:Apache License

@Test
public void testSingleSynonymIgnoreCase() throws Exception {
    final Analyzer a = new NGramSynonymTokenizerTestAnalyzer(2, false, "A,AA,AAA");
    final TokenStream stream = a.tokenStream("f", new StringReader("aaa"));
    stream.reset();
    assertTokenStream(stream, "aaa,0,3,1");
}

From source file:jp.sf.fess.solr.plugin.analysis.synonym.NGramSynonymTokenizerTest.java

License:Apache License

@Test
public void testSingleSynonymExpand() throws Exception {
    Analyzer a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa,aaa");
    TokenStream stream = a.tokenStream("f", new StringReader("a"));
    stream.reset();
    assertTokenStream(stream, "a,0,1,1/aa,0,1,0/aaa,0,1,0");

    a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa,aaa");
    stream = a.tokenStream("f", new StringReader("aa"));
    stream.reset();//from w  w  w  . jav  a2 s  .  c o m
    assertTokenStream(stream, "aa,0,2,1/a,0,2,0/aaa,0,2,0");

    a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa,aaa");
    stream = a.tokenStream("f", new StringReader("aaa"));
    stream.reset();
    assertTokenStream(stream, "aaa,0,3,1/a,0,3,0/aa,0,3,0");

    a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a");
    stream = a.tokenStream("f", new StringReader("a"));
    stream.reset();
    assertTokenStream(stream, "a,0,1,1");
}

From source file:jp.sf.fess.solr.plugin.analysis.synonym.NGramSynonymTokenizerTest.java

License:Apache License

@Test
public void testMultipleSynonyms() throws Exception {
    Analyzer a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa/b,bb");
    TokenStream stream = a.tokenStream("f", new StringReader("ababb"));
    stream.reset();
    assertTokenStream(stream, "a,0,1,1/b,1,2,1/a,2,3,1/bb,3,5,1");

    a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa/b,bb/c,cc");
    stream = a.tokenStream("f", new StringReader("cba"));
    stream.reset();//from  w w w .j av  a2s .c  om
    assertTokenStream(stream, "c,0,1,1/b,1,2,1/a,2,3,1");
}

From source file:jp.sf.fess.solr.plugin.analysis.synonym.NGramSynonymTokenizerTest.java

License:Apache License

@Test
public void testMultipleSynonymsExpand() throws Exception {
    Analyzer a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa/b,bb");
    TokenStream stream = a.tokenStream("f", new StringReader("ababb"));
    stream.reset();
    assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/bb,1,2,0/a,2,3,1/aa,2,3,0/bb,3,5,1/b,3,5,0");

    a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa/b,bb/c,cc");
    stream = a.tokenStream("f", new StringReader("cba"));
    stream.reset();//from www.  ja v  a  2  s  . com
    assertTokenStream(stream, "c,0,1,1/cc,0,1,0/b,1,2,1/bb,1,2,0/a,2,3,1/aa,2,3,0");
}

From source file:jp.sf.fess.solr.plugin.analysis.synonym.NGramSynonymTokenizerTest.java

License:Apache License

@Test
public void testPrevStrSingleSynonym1() throws Exception {
    Analyzer a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa");
    TokenStream stream = a.tokenStream("f", new StringReader("ba"));
    stream.reset();
    assertTokenStream(stream, "b,0,1,1/a,1,2,1");

    a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa");
    stream = a.tokenStream("f", new StringReader("bba"));
    stream.reset();//from w w  w  . j av a2 s.  c om
    assertTokenStream(stream, "b,0,1,1/b,1,2,1/a,2,3,1");

    a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa");
    stream = a.tokenStream("f", new StringReader("dcba"));
    stream.reset();
    assertTokenStream(stream, "d,0,1,1/c,1,2,1/b,2,3,1/a,3,4,1");

    a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa");
    stream = a.tokenStream("f", new StringReader("edcba"));
    stream.reset();
    assertTokenStream(stream, "e,0,1,1/d,1,2,1/c,2,3,1/b,3,4,1/a,4,5,1");
}

From source file:jp.sf.fess.solr.plugin.analysis.synonym.NGramSynonymTokenizerTest.java

License:Apache License

@Test
public void testPrevStrSingleSynonym2() throws Exception {
    Analyzer a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa");
    TokenStream stream = a.tokenStream("f", new StringReader("ba"));
    stream.reset();
    assertTokenStream(stream, "b,0,1,1/a,1,2,1");

    a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa");
    stream = a.tokenStream("f", new StringReader("bba"));
    stream.reset();/* www.  ja v a2s.c o  m*/
    assertTokenStream(stream, "bb,0,2,1/a,2,3,1");

    a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa");
    stream = a.tokenStream("f", new StringReader("dcba"));
    stream.reset();
    assertTokenStream(stream, "dc,0,2,1/cb,1,3,1/a,3,4,1");

    a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa");
    stream = a.tokenStream("f", new StringReader("edcba"));
    stream.reset();
    assertTokenStream(stream, "ed,0,2,1/dc,1,3,1/cb,2,4,1/a,4,5,1");
}

From source file:jp.sf.fess.solr.plugin.analysis.synonym.NGramSynonymTokenizerTest.java

License:Apache License

@Test
public void testPrevStrSingleSynonym3() throws Exception {
    Analyzer a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa");
    TokenStream stream = a.tokenStream("f", new StringReader("ba"));
    stream.reset();
    assertTokenStream(stream, "b,0,1,1/a,1,2,1");

    a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa");
    stream = a.tokenStream("f", new StringReader("bba"));
    stream.reset();/*from ww w.ja  v a2 s .com*/
    assertTokenStream(stream, "bb,0,2,1/a,2,3,1");

    a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa");
    stream = a.tokenStream("f", new StringReader("dcba"));
    stream.reset();
    assertTokenStream(stream, "dcb,0,3,1/a,3,4,1");

    a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa");
    stream = a.tokenStream("f", new StringReader("edcba"));
    stream.reset();
    assertTokenStream(stream, "edc,0,3,1/dcb,1,4,1/a,4,5,1");
}