Example usage for org.apache.lucene.analysis.miscellaneous WordDelimiterFilterFactory create

List of usage examples for org.apache.lucene.analysis.miscellaneous WordDelimiterFilterFactory create

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.miscellaneous WordDelimiterFilterFactory create.

Prototype

@Override
    public TokenFilter create(TokenStream input) 

Source Link

Usage

From source file:org.apache.solr.analysis.TestWordDelimiterFilterFactory.java

License:Apache License

@Test
public void testCustomTypes() throws Exception {
    String testText = "I borrowed $5,400.00 at 25% interest-rate";
    ResourceLoader loader = new SolrResourceLoader("solr/collection1");
    Map<String, String> args = new HashMap<String, String>();
    args.put("generateWordParts", "1");
    args.put("generateNumberParts", "1");
    args.put("catenateWords", "1");
    args.put("catenateNumbers", "1");
    args.put("catenateAll", "0");
    args.put("splitOnCaseChange", "1");

    /* default behavior */
    WordDelimiterFilterFactory factoryDefault = new WordDelimiterFilterFactory(args);
    factoryDefault.inform(loader);/*from w  w w  .j a v  a2 s . co m*/

    TokenStream ts = factoryDefault
            .create(new MockTokenizer(new StringReader(testText), MockTokenizer.WHITESPACE, false));
    BaseTokenStreamTestCase.assertTokenStreamContents(ts, new String[] { "I", "borrowed", "5", "400", "00",
            "540000", "at", "25", "interest", "rate", "interestrate" });

    ts = factoryDefault
            .create(new MockTokenizer(new StringReader("foo\u200Dbar"), MockTokenizer.WHITESPACE, false));
    BaseTokenStreamTestCase.assertTokenStreamContents(ts, new String[] { "foo", "bar", "foobar" });

    /* custom behavior */
    args = new HashMap<String, String>();
    // use a custom type mapping
    args.put("generateWordParts", "1");
    args.put("generateNumberParts", "1");
    args.put("catenateWords", "1");
    args.put("catenateNumbers", "1");
    args.put("catenateAll", "0");
    args.put("splitOnCaseChange", "1");
    args.put("types", "wdftypes.txt");
    WordDelimiterFilterFactory factoryCustom = new WordDelimiterFilterFactory(args);
    factoryCustom.inform(loader);

    ts = factoryCustom.create(new MockTokenizer(new StringReader(testText), MockTokenizer.WHITESPACE, false));
    BaseTokenStreamTestCase.assertTokenStreamContents(ts,
            new String[] { "I", "borrowed", "$5,400.00", "at", "25%", "interest", "rate", "interestrate" });

    /* test custom behavior with a char > 0x7F, because we had to make a larger byte[] */
    ts = factoryCustom
            .create(new MockTokenizer(new StringReader("foo\u200Dbar"), MockTokenizer.WHITESPACE, false));
    BaseTokenStreamTestCase.assertTokenStreamContents(ts, new String[] { "foo\u200Dbar" });
}