Example usage for org.apache.lucene.analysis CannedTokenStream CannedTokenStream

List of usage examples for org.apache.lucene.analysis CannedTokenStream CannedTokenStream

Introduction

In this page you can find the example usage for org.apache.lucene.analysis CannedTokenStream CannedTokenStream.

Prototype

public CannedTokenStream(int finalPosInc, int finalOffset, Token... tokens) 

Source Link

Document

If you want trailing holes, pass a non-zero finalPosInc.

Usage

From source file:org.elasticsearch.analysis.common.FlattenGraphTokenFilterFactoryTests.java

License:Apache License

public void testBasic() throws IOException {

    Index index = new Index("test", "_na_");
    String name = "ngr";
    Settings indexSettings = newAnalysisSettingsBuilder().build();
    IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings);
    Settings settings = newAnalysisSettingsBuilder().build();

    // "wow that's funny" and "what the fudge" are separate side paths, in parallel with "wtf", on input:
    TokenStream in = new CannedTokenStream(0, 12,
            new Token[] { token("wtf", 1, 5, 0, 3), token("what", 0, 1, 0, 3), token("wow", 0, 3, 0, 3),
                    token("the", 1, 1, 0, 3), token("fudge", 1, 3, 0, 3), token("that's", 1, 1, 0, 3),
                    token("funny", 1, 1, 0, 3), token("happened", 1, 1, 4, 12) });

    TokenStream tokens = new FlattenGraphTokenFilterFactory(indexProperties, null, name, settings).create(in);

    // ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened:
    assertTokenStreamContents(tokens,//ww w. j  av a2s .  c  om
            new String[] { "wtf", "what", "wow", "the", "that's", "fudge", "funny", "happened" },
            new int[] { 0, 0, 0, 0, 0, 0, 0, 4 }, new int[] { 3, 3, 3, 3, 3, 3, 3, 12 },
            new int[] { 1, 0, 0, 1, 0, 1, 0, 1 }, new int[] { 3, 1, 1, 1, 1, 1, 1, 1 }, 12);
}

From source file:org.elasticsearch.index.mapper.core.LegacyTokenCountFieldMapperTests.java

License:Apache License

public void testCountPositions() throws IOException {
    // We're looking to make sure that we:
    Token t1 = new Token(); // Don't count tokens without an increment
    t1.setPositionIncrement(0);/*w  ww . jav  a  2  s. co m*/
    Token t2 = new Token();
    t2.setPositionIncrement(1); // Count normal tokens with one increment
    Token t3 = new Token();
    t2.setPositionIncrement(2); // Count funny tokens with more than one increment
    int finalTokenIncrement = 4; // Count the final token increment on the rare token streams that have them
    Token[] tokens = new Token[] { t1, t2, t3 };
    Collections.shuffle(Arrays.asList(tokens), random());
    final TokenStream tokenStream = new CannedTokenStream(finalTokenIncrement, 0, tokens);
    // TODO: we have no CannedAnalyzer?
    Analyzer analyzer = new Analyzer() {
        @Override
        public TokenStreamComponents createComponents(String fieldName) {
            return new TokenStreamComponents(new MockTokenizer(), tokenStream);
        }
    };
    assertThat(LegacyTokenCountFieldMapper.countPositions(analyzer, "", ""), equalTo(7));
}

From source file:org.elasticsearch.index.mapper.core.TokenCountFieldMapperTests.java

License:Apache License

@Test
public void testCountPositions() throws IOException {
    // We're looking to make sure that we:
    Token t1 = new Token(); // Don't count tokens without an increment
    t1.setPositionIncrement(0);/*from w  w  w. ja  v  a  2 s .co m*/
    Token t2 = new Token();
    t2.setPositionIncrement(1); // Count normal tokens with one increment
    Token t3 = new Token();
    t2.setPositionIncrement(2); // Count funny tokens with more than one increment
    int finalTokenIncrement = 4; // Count the final token increment on the rare token streams that have them
    Token[] tokens = new Token[] { t1, t2, t3 };
    Collections.shuffle(Arrays.asList(tokens), getRandom());
    TokenStream tokenStream = new CannedTokenStream(finalTokenIncrement, 0, tokens);
    assertThat(TokenCountFieldMapper.countPositions(tokenStream), equalTo(7));
}

From source file:org.elasticsearch.index.mapper.TokenCountFieldMapperTests.java

License:Apache License

public void testCountPositions() throws IOException {
    // We're looking to make sure that we:
    Token t1 = new Token(); // Don't count tokens without an increment
    t1.setPositionIncrement(0);//w w  w .  j  a  v  a2  s.  c  o m
    Token t2 = new Token();
    t2.setPositionIncrement(1); // Count normal tokens with one increment
    Token t3 = new Token();
    t2.setPositionIncrement(2); // Count funny tokens with more than one increment
    int finalTokenIncrement = 4; // Count the final token increment on the rare token streams that have them
    Token[] tokens = new Token[] { t1, t2, t3 };
    Collections.shuffle(Arrays.asList(tokens), random());
    final TokenStream tokenStream = new CannedTokenStream(finalTokenIncrement, 0, tokens);
    // TODO: we have no CannedAnalyzer?
    Analyzer analyzer = new Analyzer() {
        @Override
        public TokenStreamComponents createComponents(String fieldName) {
            return new TokenStreamComponents(new MockTokenizer(), tokenStream);
        }
    };
    assertThat(TokenCountFieldMapper.countPositions(analyzer, "", ""), equalTo(7));
}