List of usage examples for org.apache.lucene.analysis BaseTokenStreamTestCase assertAnalyzesTo
public static void assertAnalyzesTo(Analyzer a, String input, String[] output) throws IOException
From source file:org.lexevs.dao.indexer.lucene.analyzers.SnowballAnalyzerTest.java
License:Open Source License
@Test public void testDontKeepOrigional() throws Exception { Analyzer temp = new Analyzer() { @Override//from w w w . ja v a2s. co m protected TokenStreamComponents createComponents(String fieldName) { final StandardTokenizer source = new StandardTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY); source.setMaxTokenLength(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH); TokenStream filter = new StandardFilter(source); filter = new LowerCaseFilter(filter); filter = new StopFilter(filter, StandardAnalyzer.STOP_WORDS_SET); filter = new SnowballFilter(filter, "English"); return new TokenStreamComponents(source, filter); } }; String input = new String("The trees have Leaves!"); String[] output = { "tree", "have", "leav" }; BaseTokenStreamTestCase.assertAnalyzesTo(temp, input, output); }
From source file:org.lexevs.dao.indexer.lucene.analyzers.StringAnalyzerTest.java
License:Open Source License
public void testStringAnalyzer() throws Exception { String input = new String("The<:>trees<:>have<:>Leaves!"); // Was String[] output = {"The","trees", "have","Leaves!"}; // Changed to String[] output = { "the", "trees", "have", "leaves" }; BaseTokenStreamTestCase.assertAnalyzesTo(new StandardAnalyzer(new CharArraySet(getList(), false)), input, output);//from w ww . ja v a2 s.co m // StringAnalyzer temp = new StringAnalyzer("<:>"); // String input = new String("The<:>trees<:>have<:>Leaves!"); // StringReader reader = new StringReader(input); // TokenStream result = temp.tokenStream("test", reader); // Token token = result.next(); // assertTrue(token.termText().equals("The")); // assertTrue(token.getPositionIncrement() == 1); // assertTrue(token.startOffset() == 0); // assertTrue(token.endOffset() == 3); // // token = result.next(); // assertTrue(token.termText().equals("trees")); // assertTrue(token.getPositionIncrement() == 1); // assertTrue(token.startOffset() == 6); // assertTrue(token.endOffset() == 11); // // token = result.next(); // assertTrue(token.termText().equals("have")); // assertTrue(token.getPositionIncrement() == 1); // assertTrue(token.startOffset() == 14); // assertTrue(token.endOffset() == 18); // // token = result.next(); // assertTrue(token.termText().equals("Leaves!")); // assertTrue(token.getPositionIncrement() == 1); // assertTrue(token.startOffset() == 21); // assertTrue(token.endOffset() == 28); // // token = result.next(); // // assertTrue(result.next() == null); }
From source file:org.lexevs.dao.indexer.lucene.analyzers.WhiteSpaceLowerCaseAnalyzerTest.java
License:Open Source License
public void testLowerCaseStopRemoval() throws Exception { String input = new String("A test String foo Foo"); String[] output = { "a", "test", "string" }; BaseTokenStreamTestCase.assertAnalyzesTo(new StandardAnalyzer(new CharArraySet(getList(), true)), input, output);//from www . ja v a 2 s . c om // WhiteSpaceLowerCaseAnalyzer temp = new WhiteSpaceLowerCaseAnalyzer(new String[] { "foo", "bar" }, // new char[] { ',' }, new char[] { '-' }); // String input = new String("A test String foo Foo"); // StringReader reader = new StringReader(input); // TokenStream result = temp.tokenStream("test", reader); // Token token = result.next(); // assertTrue(token.termText().equals("a")); // assertTrue(token.getPositionIncrement() == 1); // assertTrue(token.startOffset() == 0); // assertTrue(token.endOffset() == 1); // // token = result.next(); // assertTrue(token.termText().equals("test")); // assertTrue(token.getPositionIncrement() == 1); // assertTrue(token.startOffset() == 2); // assertTrue(token.endOffset() == 6); // // token = result.next(); // assertTrue(token.termText().equals("string")); // assertTrue(token.getPositionIncrement() == 1); // assertTrue(token.startOffset() == 7); // assertTrue(token.endOffset() == 13); // // assertTrue(result.next() == null); }
From source file:org.lexevs.dao.indexer.lucene.analyzers.WhiteSpaceLowerCaseAnalyzerTest.java
License:Open Source License
public void testCharRemoval() throws Exception { String input = new String("foo, test, me"); String[] output = { "test", "me" }; BaseTokenStreamTestCase.assertAnalyzesTo(new StandardAnalyzer(new CharArraySet(getList(), true)), input, output);//from ww w. j ava 2 s .co m // WhiteSpaceLowerCaseAnalyzer temp = new WhiteSpaceLowerCaseAnalyzer(new String[] { "foo", "bar" }, // new char[] { ',' }, new char[] { '-' }); // String input = new String("foo, test, me"); // // StringReader reader = new StringReader(input); // TokenStream result = temp.tokenStream("test", reader); // // Token token = result.next(); // assertTrue(token.termText().equals("test")); // assertTrue(token.getPositionIncrement() == 1); // assertTrue(token.startOffset() == 5); // assertTrue(token.endOffset() == 10); // // token = result.next(); // assertTrue(token.termText().equals("me")); // assertTrue(token.getPositionIncrement() == 1); // assertTrue(token.startOffset() == 11); // assertTrue(token.endOffset() == 13); // // assertTrue(result.next() == null); }
From source file:org.lexevs.dao.indexer.lucene.analyzers.WhiteSpaceLowerCaseAnalyzerTest.java
License:Open Source License
public void testWhiteSpaceAdditions() throws Exception { String input = new String("foo,- Test-some me-"); String[] output = { "test", "some", "me" }; BaseTokenStreamTestCase.assertAnalyzesTo(new StandardAnalyzer(new CharArraySet(getList(), true)), input, output);/*from w w w .j a va 2s. c o m*/ // String input = new String("foo,- Test-some me-"); // // StringReader reader = new StringReader(input); // TokenStream result = temp.tokenStream("test", reader); // // Token token = result.next(); // assertTrue(token.termText().equals("test")); // assertTrue(token.getPositionIncrement() == 1); // assertTrue(token.startOffset() == 6); // assertTrue(token.endOffset() == 10); // // token = result.next(); // assertTrue(token.termText().equals("some")); // assertTrue(token.getPositionIncrement() == 1); // assertTrue(token.startOffset() == 11); // assertTrue(token.endOffset() == 15); // // token = result.next(); // assertTrue(token.termText().equals("me")); // assertTrue(token.getPositionIncrement() == 1); // assertTrue(token.startOffset() == 16); // assertTrue(token.endOffset() == 18); // // assertTrue(result.next() == null); }
From source file:org.lexevs.dao.indexer.lucene.analyzers.WhiteSpaceLowerCaseAnalyzerTest.java
License:Open Source License
public void testCaseSensitiveAnalyzer() throws IOException { String input = new String("Test"); String[] output = { "Test" }; BaseTokenStreamTestCase.assertAnalyzesTo(new KeywordAnalyzer(), input, output); }