Example usage for org.apache.lucene.analysis.icu.segmentation ICUTokenizer setReader

List of usage examples for org.apache.lucene.analysis.icu.segmentation ICUTokenizer setReader

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.icu.segmentation ICUTokenizer setReader.

Prototype

public final void setReader(Reader input) 

Source Link

Document

Expert: Set a new reader on the Tokenizer.

Usage

From source file:org.elasticsearch.index.analysis.IcuTokenizerFactoryTests.java

License:Apache License

public void testSimpleIcuTokenizer() throws IOException {
    AnalysisService analysisService = createAnalysisService();

    TokenizerFactory tokenizerFactory = analysisService.tokenizer("icu_tokenizer");
    ICUTokenizer tokenizer = (ICUTokenizer) tokenizerFactory.create();

    Reader reader = new StringReader("?, one-two");
    tokenizer.setReader(reader);
    assertTokenStreamContents(tokenizer, new String[] { "?", "one", "two" });
}

From source file:org.elasticsearch.index.analysis.IcuTokenizerFactoryTests.java

License:Apache License

public void testIcuCustomizeRuleFile() throws IOException {
    AnalysisService analysisService = createAnalysisService();

    // test the tokenizer with single rule file
    TokenizerFactory tokenizerFactory = analysisService.tokenizer("user_rule_tokenizer");
    ICUTokenizer tokenizer = (ICUTokenizer) tokenizerFactory.create();
    Reader reader = new StringReader(
            "One-two punch.  Brang-, not brung-it.  This one--not that one--is the right one, -ish.");

    tokenizer.setReader(reader);
    assertTokenStreamContents(tokenizer, new String[] { "One-two", "punch", "Brang", "not", "brung-it", "This",
            "one", "not", "that", "one", "is", "the", "right", "one", "ish" });
}

From source file:org.elasticsearch.index.analysis.IcuTokenizerFactoryTests.java

License:Apache License

public void testMultipleIcuCustomizeRuleFiles() throws IOException {
    AnalysisService analysisService = createAnalysisService();

    // test the tokenizer with two rule files
    TokenizerFactory tokenizerFactory = analysisService.tokenizer("multi_rule_tokenizer");
    ICUTokenizer tokenizer = (ICUTokenizer) tokenizerFactory.create();
    StringReader reader = new StringReader(
            "Some English.  ? ??.  ?     More English.");

    tokenizer.setReader(reader);
    assertTokenStreamContents(tokenizer,
            new String[] { "Some", "English", "? ??.  ",
                    "?     ", "More",
                    "English" });
}