Example usage for org.apache.lucene.analysis BaseTokenStreamTestCase assertAnalyzesTo

List of usage examples for org.apache.lucene.analysis BaseTokenStreamTestCase assertAnalyzesTo

Introduction

In this page you can find the example usage for org.apache.lucene.analysis BaseTokenStreamTestCase assertAnalyzesTo.

Prototype

public static void assertAnalyzesTo(Analyzer a, String input, String[] output) throws IOException 

Source Link

Usage

From source file:org.lexevs.dao.indexer.lucene.analyzers.SnowballAnalyzerTest.java

License:Open Source License

@Test
public void testDontKeepOrigional() throws Exception {
    Analyzer temp = new Analyzer() {

        @Override//from w  w w . ja  v a2s.  co m
        protected TokenStreamComponents createComponents(String fieldName) {
            final StandardTokenizer source = new StandardTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
            source.setMaxTokenLength(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
            TokenStream filter = new StandardFilter(source);
            filter = new LowerCaseFilter(filter);
            filter = new StopFilter(filter, StandardAnalyzer.STOP_WORDS_SET);
            filter = new SnowballFilter(filter, "English");
            return new TokenStreamComponents(source, filter);
        }
    };

    String input = new String("The trees have Leaves!");
    String[] output = { "tree", "have", "leav" };
    BaseTokenStreamTestCase.assertAnalyzesTo(temp, input, output);
}

From source file:org.lexevs.dao.indexer.lucene.analyzers.StringAnalyzerTest.java

License:Open Source License

public void testStringAnalyzer() throws Exception {

    String input = new String("The<:>trees<:>have<:>Leaves!");
    // Was        String[] output = {"The","trees", "have","Leaves!"};
    // Changed to
    String[] output = { "the", "trees", "have", "leaves" };
    BaseTokenStreamTestCase.assertAnalyzesTo(new StandardAnalyzer(new CharArraySet(getList(), false)), input,
            output);//from   w  ww . ja  v a2  s.co  m
    //        StringAnalyzer temp = new StringAnalyzer("<:>");

    //       String input = new String("The<:>trees<:>have<:>Leaves!");

    //       StringReader reader = new StringReader(input);
    //       TokenStream result = temp.tokenStream("test", reader);

    //        Token token = result.next();
    //        assertTrue(token.termText().equals("The"));
    //        assertTrue(token.getPositionIncrement() == 1);
    //        assertTrue(token.startOffset() == 0);
    //        assertTrue(token.endOffset() == 3);
    //
    //        token = result.next();
    //        assertTrue(token.termText().equals("trees"));
    //        assertTrue(token.getPositionIncrement() == 1);
    //        assertTrue(token.startOffset() == 6);
    //        assertTrue(token.endOffset() == 11);
    //
    //        token = result.next();
    //        assertTrue(token.termText().equals("have"));
    //        assertTrue(token.getPositionIncrement() == 1);
    //        assertTrue(token.startOffset() == 14);
    //        assertTrue(token.endOffset() == 18);
    //
    //        token = result.next();
    //        assertTrue(token.termText().equals("Leaves!"));
    //        assertTrue(token.getPositionIncrement() == 1);
    //        assertTrue(token.startOffset() == 21);
    //        assertTrue(token.endOffset() == 28);
    //
    //        token = result.next();
    //
    //        assertTrue(result.next() == null);
}

From source file:org.lexevs.dao.indexer.lucene.analyzers.WhiteSpaceLowerCaseAnalyzerTest.java

License:Open Source License

public void testLowerCaseStopRemoval() throws Exception {

    String input = new String("A test String foo Foo");
    String[] output = { "a", "test", "string" };
    BaseTokenStreamTestCase.assertAnalyzesTo(new StandardAnalyzer(new CharArraySet(getList(), true)), input,
            output);//from www  . ja  v  a 2  s .  c om

    //        WhiteSpaceLowerCaseAnalyzer temp = new WhiteSpaceLowerCaseAnalyzer(new String[] { "foo", "bar" },
    //                new char[] { ',' }, new char[] { '-' });
    //        String input = new String("A test String foo Foo");
    //        StringReader reader = new StringReader(input);
    //        TokenStream result = temp.tokenStream("test", reader);

    //        Token token = result.next();
    //        assertTrue(token.termText().equals("a"));
    //        assertTrue(token.getPositionIncrement() == 1);
    //        assertTrue(token.startOffset() == 0);
    //        assertTrue(token.endOffset() == 1);
    //
    //        token = result.next();
    //        assertTrue(token.termText().equals("test"));
    //        assertTrue(token.getPositionIncrement() == 1);
    //        assertTrue(token.startOffset() == 2);
    //        assertTrue(token.endOffset() == 6);
    //
    //        token = result.next();
    //        assertTrue(token.termText().equals("string"));
    //        assertTrue(token.getPositionIncrement() == 1);
    //        assertTrue(token.startOffset() == 7);
    //        assertTrue(token.endOffset() == 13);
    //
    //        assertTrue(result.next() == null);
}

From source file:org.lexevs.dao.indexer.lucene.analyzers.WhiteSpaceLowerCaseAnalyzerTest.java

License:Open Source License

public void testCharRemoval() throws Exception {

    String input = new String("foo, test, me");
    String[] output = { "test", "me" };
    BaseTokenStreamTestCase.assertAnalyzesTo(new StandardAnalyzer(new CharArraySet(getList(), true)), input,
            output);//from   ww w.  j  ava 2  s  .co  m

    //        WhiteSpaceLowerCaseAnalyzer temp = new WhiteSpaceLowerCaseAnalyzer(new String[] { "foo", "bar" },
    //                new char[] { ',' }, new char[] { '-' });
    //        String input = new String("foo, test, me");
    //
    //        StringReader reader = new StringReader(input);
    //        TokenStream result = temp.tokenStream("test", reader);
    //
    //        Token token = result.next();
    //        assertTrue(token.termText().equals("test"));
    //        assertTrue(token.getPositionIncrement() == 1);
    //        assertTrue(token.startOffset() == 5);
    //        assertTrue(token.endOffset() == 10);
    //
    //        token = result.next();
    //        assertTrue(token.termText().equals("me"));
    //        assertTrue(token.getPositionIncrement() == 1);
    //        assertTrue(token.startOffset() == 11);
    //        assertTrue(token.endOffset() == 13);
    //
    //        assertTrue(result.next() == null);
}

From source file:org.lexevs.dao.indexer.lucene.analyzers.WhiteSpaceLowerCaseAnalyzerTest.java

License:Open Source License

public void testWhiteSpaceAdditions() throws Exception {

    String input = new String("foo,- Test-some me-");
    String[] output = { "test", "some", "me" };
    BaseTokenStreamTestCase.assertAnalyzesTo(new StandardAnalyzer(new CharArraySet(getList(), true)), input,
            output);/*from w  w w  .j a  va  2s.  c o  m*/

    //        String input = new String("foo,- Test-some me-");
    //
    //        StringReader reader = new StringReader(input);
    //        TokenStream result = temp.tokenStream("test", reader);
    //
    //        Token token = result.next();
    //        assertTrue(token.termText().equals("test"));
    //        assertTrue(token.getPositionIncrement() == 1);
    //        assertTrue(token.startOffset() == 6);
    //        assertTrue(token.endOffset() == 10);
    //
    //        token = result.next();
    //        assertTrue(token.termText().equals("some"));
    //        assertTrue(token.getPositionIncrement() == 1);
    //        assertTrue(token.startOffset() == 11);
    //        assertTrue(token.endOffset() == 15);
    //
    //        token = result.next();
    //        assertTrue(token.termText().equals("me"));
    //        assertTrue(token.getPositionIncrement() == 1);
    //        assertTrue(token.startOffset() == 16);
    //        assertTrue(token.endOffset() == 18);
    //
    //        assertTrue(result.next() == null);
}

From source file:org.lexevs.dao.indexer.lucene.analyzers.WhiteSpaceLowerCaseAnalyzerTest.java

License:Open Source License

public void testCaseSensitiveAnalyzer() throws IOException {

    String input = new String("Test");
    String[] output = { "Test" };
    BaseTokenStreamTestCase.assertAnalyzesTo(new KeywordAnalyzer(), input, output);
}