Example usage for org.apache.lucene.analysis.standard UAX29URLEmailTokenizerFactory UAX29URLEmailTokenizerFactory

List of usage examples for org.apache.lucene.analysis.standard UAX29URLEmailTokenizerFactory UAX29URLEmailTokenizerFactory

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.standard UAX29URLEmailTokenizerFactory UAX29URLEmailTokenizerFactory.

Prototype

UAX29URLEmailTokenizerFactory

Source Link

Usage

From source file:ru.org.linux.solr.TestURLFilterFactory.java

License:Apache License

public void testURL1() throws Exception {
    Reader reader = new StringReader(
            "http://johno.jsmf.net/knowhow/ngrams/index.php?table=en-dickens-word-2gram&paragraphs=50&length=200&no-ads=on");
    UAX29URLEmailTokenizerFactory factory = new UAX29URLEmailTokenizerFactory();
    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
    Map<String, String> args = Collections.emptyMap();
    factory.init(args);// www  .j a  v  a 2s  .c o  m
    URLFilterFactory filterFactory = new URLFilterFactory();
    filterFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
    filterFactory.init(args);
    Tokenizer tokenizer = factory.create(reader);
    TokenStream stream = filterFactory.create(tokenizer);
    assertTokenStreamContents(stream, new String[] {
            "http://johno.jsmf.net/knowhow/ngrams/index.php?table=en-dickens-word-2gram&paragraphs=50&length=200&no-ads=on",
            "johno", "jsmf", "net", "knowhow", "ngrams", "index", "php", "table", "en", "dickens", "word",
            "2gram", "paragraphs", "50", "length", "200", "no", "ads", "on" });
}

From source file:ru.org.linux.solr.TestURLFilterFactory.java

License:Apache License

public void testURL2() throws Exception {
    Reader reader = new StringReader("http://johno.jsmf.net/knowhow/ngrams/index.php");
    UAX29URLEmailTokenizerFactory factory = new UAX29URLEmailTokenizerFactory();
    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
    Map<String, String> args = Collections.emptyMap();
    factory.init(args);/*from   w w  w  . j ava  2s. c  o m*/
    URLFilterFactory filterFactory = new URLFilterFactory();
    filterFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
    filterFactory.init(args);
    Tokenizer tokenizer = factory.create(reader);
    TokenStream stream = filterFactory.create(tokenizer);
    assertTokenStreamContents(stream, new String[] { "http://johno.jsmf.net/knowhow/ngrams/index.php", "johno",
            "jsmf", "net", "knowhow", "ngrams", "index", "php" });
}