List of usage examples for org.apache.lucene.analysis CannedTokenStream CannedTokenStream
public CannedTokenStream(int finalPosInc, int finalOffset, Token... tokens)
From source file:org.elasticsearch.analysis.common.FlattenGraphTokenFilterFactoryTests.java
License:Apache License
public void testBasic() throws IOException { Index index = new Index("test", "_na_"); String name = "ngr"; Settings indexSettings = newAnalysisSettingsBuilder().build(); IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings); Settings settings = newAnalysisSettingsBuilder().build(); // "wow that's funny" and "what the fudge" are separate side paths, in parallel with "wtf", on input: TokenStream in = new CannedTokenStream(0, 12, new Token[] { token("wtf", 1, 5, 0, 3), token("what", 0, 1, 0, 3), token("wow", 0, 3, 0, 3), token("the", 1, 1, 0, 3), token("fudge", 1, 3, 0, 3), token("that's", 1, 1, 0, 3), token("funny", 1, 1, 0, 3), token("happened", 1, 1, 4, 12) }); TokenStream tokens = new FlattenGraphTokenFilterFactory(indexProperties, null, name, settings).create(in); // ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened: assertTokenStreamContents(tokens,//ww w. j av a2s . c om new String[] { "wtf", "what", "wow", "the", "that's", "fudge", "funny", "happened" }, new int[] { 0, 0, 0, 0, 0, 0, 0, 4 }, new int[] { 3, 3, 3, 3, 3, 3, 3, 12 }, new int[] { 1, 0, 0, 1, 0, 1, 0, 1 }, new int[] { 3, 1, 1, 1, 1, 1, 1, 1 }, 12); }
From source file:org.elasticsearch.index.mapper.core.LegacyTokenCountFieldMapperTests.java
License:Apache License
public void testCountPositions() throws IOException { // We're looking to make sure that we: Token t1 = new Token(); // Don't count tokens without an increment t1.setPositionIncrement(0);/*w ww . jav a 2 s. co m*/ Token t2 = new Token(); t2.setPositionIncrement(1); // Count normal tokens with one increment Token t3 = new Token(); t2.setPositionIncrement(2); // Count funny tokens with more than one increment int finalTokenIncrement = 4; // Count the final token increment on the rare token streams that have them Token[] tokens = new Token[] { t1, t2, t3 }; Collections.shuffle(Arrays.asList(tokens), random()); final TokenStream tokenStream = new CannedTokenStream(finalTokenIncrement, 0, tokens); // TODO: we have no CannedAnalyzer? Analyzer analyzer = new Analyzer() { @Override public TokenStreamComponents createComponents(String fieldName) { return new TokenStreamComponents(new MockTokenizer(), tokenStream); } }; assertThat(LegacyTokenCountFieldMapper.countPositions(analyzer, "", ""), equalTo(7)); }
From source file:org.elasticsearch.index.mapper.core.TokenCountFieldMapperTests.java
License:Apache License
@Test public void testCountPositions() throws IOException { // We're looking to make sure that we: Token t1 = new Token(); // Don't count tokens without an increment t1.setPositionIncrement(0);/*from w w w. ja v a 2 s .co m*/ Token t2 = new Token(); t2.setPositionIncrement(1); // Count normal tokens with one increment Token t3 = new Token(); t2.setPositionIncrement(2); // Count funny tokens with more than one increment int finalTokenIncrement = 4; // Count the final token increment on the rare token streams that have them Token[] tokens = new Token[] { t1, t2, t3 }; Collections.shuffle(Arrays.asList(tokens), getRandom()); TokenStream tokenStream = new CannedTokenStream(finalTokenIncrement, 0, tokens); assertThat(TokenCountFieldMapper.countPositions(tokenStream), equalTo(7)); }
From source file:org.elasticsearch.index.mapper.TokenCountFieldMapperTests.java
License:Apache License
public void testCountPositions() throws IOException { // We're looking to make sure that we: Token t1 = new Token(); // Don't count tokens without an increment t1.setPositionIncrement(0);//w w w . j a v a2 s. c o m Token t2 = new Token(); t2.setPositionIncrement(1); // Count normal tokens with one increment Token t3 = new Token(); t2.setPositionIncrement(2); // Count funny tokens with more than one increment int finalTokenIncrement = 4; // Count the final token increment on the rare token streams that have them Token[] tokens = new Token[] { t1, t2, t3 }; Collections.shuffle(Arrays.asList(tokens), random()); final TokenStream tokenStream = new CannedTokenStream(finalTokenIncrement, 0, tokens); // TODO: we have no CannedAnalyzer? Analyzer analyzer = new Analyzer() { @Override public TokenStreamComponents createComponents(String fieldName) { return new TokenStreamComponents(new MockTokenizer(), tokenStream); } }; assertThat(TokenCountFieldMapper.countPositions(analyzer, "", ""), equalTo(7)); }