Example usage for org.apache.lucene.analysis.standard UAX29URLEmailTokenizerFactory create

List of usage examples for org.apache.lucene.analysis.standard UAX29URLEmailTokenizerFactory create

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.standard UAX29URLEmailTokenizerFactory create.

Prototype

@Override
    public UAX29URLEmailTokenizer create(AttributeFactory factory) 

Source Link

Usage

From source file:ru.org.linux.solr.TestURLFilterFactory.java

License:Apache License

public void testURL1() throws Exception {
    Reader reader = new StringReader(
            "http://johno.jsmf.net/knowhow/ngrams/index.php?table=en-dickens-word-2gram&paragraphs=50&length=200&no-ads=on");
    UAX29URLEmailTokenizerFactory factory = new UAX29URLEmailTokenizerFactory();
    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
    Map<String, String> args = Collections.emptyMap();
    factory.init(args);/*  w  w  w . j  a v  a  2  s.  co  m*/
    URLFilterFactory filterFactory = new URLFilterFactory();
    filterFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
    filterFactory.init(args);
    Tokenizer tokenizer = factory.create(reader);
    TokenStream stream = filterFactory.create(tokenizer);
    assertTokenStreamContents(stream, new String[] {
            "http://johno.jsmf.net/knowhow/ngrams/index.php?table=en-dickens-word-2gram&paragraphs=50&length=200&no-ads=on",
            "johno", "jsmf", "net", "knowhow", "ngrams", "index", "php", "table", "en", "dickens", "word",
            "2gram", "paragraphs", "50", "length", "200", "no", "ads", "on" });
}

From source file:ru.org.linux.solr.TestURLFilterFactory.java

License:Apache License

public void testURL2() throws Exception {
    Reader reader = new StringReader("http://johno.jsmf.net/knowhow/ngrams/index.php");
    UAX29URLEmailTokenizerFactory factory = new UAX29URLEmailTokenizerFactory();
    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
    Map<String, String> args = Collections.emptyMap();
    factory.init(args);//from   w  w  w  .  j a  v  a  2s . c om
    URLFilterFactory filterFactory = new URLFilterFactory();
    filterFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
    filterFactory.init(args);
    Tokenizer tokenizer = factory.create(reader);
    TokenStream stream = filterFactory.create(tokenizer);
    assertTokenStreamContents(stream, new String[] { "http://johno.jsmf.net/knowhow/ngrams/index.php", "johno",
            "jsmf", "net", "knowhow", "ngrams", "index", "php" });
}