Example usage for org.apache.lucene.analysis.core WhitespaceTokenizer setReader

List of usage examples for org.apache.lucene.analysis.core WhitespaceTokenizer setReader

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.core WhitespaceTokenizer setReader.

Prototype

public final void setReader(Reader input) 

Source Link

Document

Expert: Set a new reader on the Tokenizer.

Usage

From source file:org.elasticsearch.analysis.common.WhitespaceTokenizerFactoryTests.java

License:Apache License

public void testSimpleWhiteSpaceTokenizer() throws IOException {
    final Settings indexSettings = Settings.builder()
            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
    IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(new Index("test", "_na_"),
            indexSettings);//from  www. j  a  v  a2 s  .  c  o  m
    WhitespaceTokenizer tokenizer = (WhitespaceTokenizer) new WhitespaceTokenizerFactory(indexProperties, null,
            "whitespace_maxlen", Settings.EMPTY).create();

    try (Reader reader = new StringReader("one, two, three")) {
        tokenizer.setReader(reader);
        assertTokenStreamContents(tokenizer, new String[] { "one,", "two,", "three" });
    }
}

From source file:org.elasticsearch.analysis.common.WhitespaceTokenizerFactoryTests.java

License:Apache License

public void testMaxTokenLength() throws IOException {
    final Settings indexSettings = Settings.builder()
            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
    IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(new Index("test", "_na_"),
            indexSettings);//from w w  w  . j  a  va  2s  .  c om
    final Settings settings = Settings.builder().put(WhitespaceTokenizerFactory.MAX_TOKEN_LENGTH, 2).build();
    WhitespaceTokenizer tokenizer = (WhitespaceTokenizer) new WhitespaceTokenizerFactory(indexProperties, null,
            "whitespace_maxlen", settings).create();
    try (Reader reader = new StringReader("one, two, three")) {
        tokenizer.setReader(reader);
        assertTokenStreamContents(tokenizer, new String[] { "on", "e,", "tw", "o,", "th", "re", "e" });
    }

    final Settings defaultSettings = Settings.EMPTY;
    tokenizer = (WhitespaceTokenizer) new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen",
            defaultSettings).create();
    String veryLongToken = RandomStrings.randomAsciiAlphanumOfLength(random(), 256);
    try (Reader reader = new StringReader(veryLongToken)) {
        tokenizer.setReader(reader);
        assertTokenStreamContents(tokenizer,
                new String[] { veryLongToken.substring(0, 255), veryLongToken.substring(255) });
    }

    final Settings tooLongSettings = Settings.builder()
            .put(WhitespaceTokenizerFactory.MAX_TOKEN_LENGTH, 1024 * 1024 + 1).build();
    IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
            () -> new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", tooLongSettings)
                    .create());
    assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 1048577", e.getMessage());

    final Settings negativeSettings = Settings.builder().put(WhitespaceTokenizerFactory.MAX_TOKEN_LENGTH, -1)
            .build();
    e = expectThrows(IllegalArgumentException.class,
            () -> new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", negativeSettings)
                    .create());
    assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: -1", e.getMessage());
}

From source file:org.opensextant.solrtexttagger.ConcatenateFilterTest.java

License:Open Source License

public void testTypical() throws IOException {
    String NYC = "new york city";
    WhitespaceTokenizer stream = new WhitespaceTokenizer();
    stream.setReader(new StringReader(NYC));
    ConcatenateFilter filter = new ConcatenateFilter(stream);
    try {/* ww  w  .ja  v  a  2 s  . c o m*/
        assertTokenStreamContents(filter, new String[] { NYC }, new int[] { 0 }, new int[] { NYC.length() },
                new String[] { "shingle" }, new int[] { 1 }, null, NYC.length(), true);
    } catch (AssertionError e) {
        //assertTokenStreamContents tries to test if tokenStream.end() was implemented correctly.
        // It's manner of checking this is imperfect and incompatible with
        // ConcatenateFilter. Specifically it modifies a special attribute *after* incrementToken(),
        // which is weird. To the best of my ability, end() appears to be implemented correctly.
        if (!e.getMessage().equals("super.end()/clearAttributes() was not called correctly in end()"))
            throw e;
    }
}