List of usage examples for org.apache.lucene.analysis.standard UAX29URLEmailTokenizerFactory create
@Override
public UAX29URLEmailTokenizer create(AttributeFactory factory)
From source file:ru.org.linux.solr.TestURLFilterFactory.java
License:Apache License
public void testURL1() throws Exception { Reader reader = new StringReader( "http://johno.jsmf.net/knowhow/ngrams/index.php?table=en-dickens-word-2gram¶graphs=50&length=200&no-ads=on"); UAX29URLEmailTokenizerFactory factory = new UAX29URLEmailTokenizerFactory(); factory.setLuceneMatchVersion(TEST_VERSION_CURRENT); Map<String, String> args = Collections.emptyMap(); factory.init(args);/* w w w . j a v a 2 s. co m*/ URLFilterFactory filterFactory = new URLFilterFactory(); filterFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT); filterFactory.init(args); Tokenizer tokenizer = factory.create(reader); TokenStream stream = filterFactory.create(tokenizer); assertTokenStreamContents(stream, new String[] { "http://johno.jsmf.net/knowhow/ngrams/index.php?table=en-dickens-word-2gram¶graphs=50&length=200&no-ads=on", "johno", "jsmf", "net", "knowhow", "ngrams", "index", "php", "table", "en", "dickens", "word", "2gram", "paragraphs", "50", "length", "200", "no", "ads", "on" }); }
From source file:ru.org.linux.solr.TestURLFilterFactory.java
License:Apache License
public void testURL2() throws Exception { Reader reader = new StringReader("http://johno.jsmf.net/knowhow/ngrams/index.php"); UAX29URLEmailTokenizerFactory factory = new UAX29URLEmailTokenizerFactory(); factory.setLuceneMatchVersion(TEST_VERSION_CURRENT); Map<String, String> args = Collections.emptyMap(); factory.init(args);//from w w w . j a v a 2s . c om URLFilterFactory filterFactory = new URLFilterFactory(); filterFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT); filterFactory.init(args); Tokenizer tokenizer = factory.create(reader); TokenStream stream = filterFactory.create(tokenizer); assertTokenStreamContents(stream, new String[] { "http://johno.jsmf.net/knowhow/ngrams/index.php", "johno", "jsmf", "net", "knowhow", "ngrams", "index", "php" }); }