List of usage examples for org.apache.lucene.analysis TokenStream incrementToken
public abstract boolean incrementToken() throws IOException;
From source file:org.elasticsearch.search.suggest.CompletionTokenStreamTest.java
License:Apache License
@Test(expected = IllegalArgumentException.class) public void testInValidNumberOfExpansions() throws IOException { Builder builder = new SynonymMap.Builder(true); for (int i = 0; i < 256; i++) { builder.add(new CharsRef("" + (i + 1)), new CharsRef("" + (1000 + (i + 1))), true); }/*from w w w . j a v a 2 s. co m*/ StringBuilder valueBuilder = new StringBuilder(); for (int i = 0; i < 9; i++) { // 9 -> expands to 512 valueBuilder.append(i + 1); valueBuilder.append(" "); } MockTokenizer tokenizer = new MockTokenizer(new StringReader(valueBuilder.toString()), MockTokenizer.WHITESPACE, true); SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true); TokenStream suggestTokenStream = new CompletionTokenStream(filter, new BytesRef("Surface keyword|friggin payload|10"), new CompletionTokenStream.ToFiniteStrings() { @Override public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { Set<IntsRef> finiteStrings = suggester .toFiniteStrings(suggester.getTokenStreamToAutomaton(), stream); return finiteStrings; } }); suggestTokenStream.reset(); suggestTokenStream.incrementToken(); suggestTokenStream.close(); }
From source file:org.elasticsearch.search.suggest.CompletionTokenStreamTest.java
License:Apache License
@Test public void testSuggestTokenFilterProperlyDelegateInputStream() throws Exception { TokenStream tokenStream = new MockTokenizer(new StringReader("mykeyword"), MockTokenizer.WHITESPACE, true); BytesRef payload = new BytesRef("Surface keyword|friggin payload|10"); TokenStream suggestTokenStream = new ByteTermAttrToCharTermAttrFilter( new CompletionTokenStream(tokenStream, payload, new CompletionTokenStream.ToFiniteStrings() { @Override/*from w w w . j a v a2 s. c o m*/ public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { return suggester.toFiniteStrings(suggester.getTokenStreamToAutomaton(), stream); } })); TermToBytesRefAttribute termAtt = suggestTokenStream.getAttribute(TermToBytesRefAttribute.class); BytesRef ref = termAtt.getBytesRef(); assertNotNull(ref); suggestTokenStream.reset(); while (suggestTokenStream.incrementToken()) { termAtt.fillBytesRef(); assertThat(ref.utf8ToString(), equalTo("mykeyword")); } suggestTokenStream.end(); suggestTokenStream.close(); }
From source file:org.elasticsearch.search.suggest.CompletionTokenStreamTests.java
License:Apache License
@Test public void testValidNumberOfExpansions() throws IOException { Builder builder = new SynonymMap.Builder(true); for (int i = 0; i < 256; i++) { builder.add(new CharsRef("" + (i + 1)), new CharsRef("" + (1000 + (i + 1))), true); }//from w w w . j a va 2 s . c o m StringBuilder valueBuilder = new StringBuilder(); for (int i = 0; i < 8; i++) { valueBuilder.append(i + 1); valueBuilder.append(" "); } MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true); tokenizer.setReader(new StringReader(valueBuilder.toString())); SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true); TokenStream suggestTokenStream = new CompletionTokenStream(filter, new BytesRef("Surface keyword|friggin payload|10"), new CompletionTokenStream.ToFiniteStrings() { @Override public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { Set<IntsRef> finiteStrings = suggester.toFiniteStrings(stream); return finiteStrings; } }); suggestTokenStream.reset(); ByteTermAttribute attr = suggestTokenStream.addAttribute(ByteTermAttribute.class); PositionIncrementAttribute posAttr = suggestTokenStream.addAttribute(PositionIncrementAttribute.class); int maxPos = 0; int count = 0; while (suggestTokenStream.incrementToken()) { count++; assertNotNull(attr.getBytesRef()); assertTrue(attr.getBytesRef().length > 0); maxPos += posAttr.getPositionIncrement(); } suggestTokenStream.close(); assertEquals(count, 256); assertEquals(count, maxPos); }
From source file:org.elasticsearch.search.suggest.CompletionTokenStreamTests.java
License:Apache License
@Test(expected = IllegalArgumentException.class) public void testInValidNumberOfExpansions() throws IOException { Builder builder = new SynonymMap.Builder(true); for (int i = 0; i < 256; i++) { builder.add(new CharsRef("" + (i + 1)), new CharsRef("" + (1000 + (i + 1))), true); }/*from w w w.jav a 2 s. c om*/ StringBuilder valueBuilder = new StringBuilder(); for (int i = 0; i < 9; i++) { // 9 -> expands to 512 valueBuilder.append(i + 1); valueBuilder.append(" "); } MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true); tokenizer.setReader(new StringReader(valueBuilder.toString())); SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true); TokenStream suggestTokenStream = new CompletionTokenStream(filter, new BytesRef("Surface keyword|friggin payload|10"), new CompletionTokenStream.ToFiniteStrings() { @Override public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { Set<IntsRef> finiteStrings = suggester.toFiniteStrings(stream); return finiteStrings; } }); suggestTokenStream.reset(); suggestTokenStream.incrementToken(); suggestTokenStream.close(); }
From source file:org.elasticsearch.search.suggest.CompletionTokenStreamTests.java
License:Apache License
@Test public void testSuggestTokenFilterProperlyDelegateInputStream() throws Exception { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true); tokenizer.setReader(new StringReader("mykeyword")); BytesRef payload = new BytesRef("Surface keyword|friggin payload|10"); TokenStream suggestTokenStream = new ByteTermAttrToCharTermAttrFilter( new CompletionTokenStream(tokenizer, payload, new CompletionTokenStream.ToFiniteStrings() { @Override//from w w w.j a v a 2s .co m public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { return suggester.toFiniteStrings(stream); } })); TermToBytesRefAttribute termAtt = suggestTokenStream.getAttribute(TermToBytesRefAttribute.class); assertNotNull(termAtt.getBytesRef()); suggestTokenStream.reset(); while (suggestTokenStream.incrementToken()) { assertThat(termAtt.getBytesRef().utf8ToString(), equalTo("mykeyword")); } suggestTokenStream.end(); suggestTokenStream.close(); }
From source file:org.elasticsearch.search.suggest.SuggestUtils.java
License:Apache License
public static int analyze(TokenStream stream, TokenConsumer consumer) throws IOException { stream.reset();/*from w w w .j av a 2 s . co m*/ consumer.reset(stream); int numTokens = 0; while (stream.incrementToken()) { consumer.nextToken(); numTokens++; } consumer.end(); stream.close(); return numTokens; }
From source file:org.elasticsearch.test.unit.index.analysis.AnalysisTestsHelper.java
License:Apache License
public static void assertSimpleTSOutput(TokenStream stream, String[] expected) throws IOException { stream.reset();//from www . j a va 2 s .c o m CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class); Assert.assertNotNull(termAttr); int i = 0; while (stream.incrementToken()) { Assert.assertTrue(i < expected.length, "got extra term: " + termAttr.toString()); Assert.assertEquals(termAttr.toString(), expected[i], "expected different term at index " + i); i++; } Assert.assertEquals(i, expected.length, "not all tokens produced"); }
From source file:org.elasticsearch.test.unit.index.analysis.AnalysisTestsHelper.java
License:Apache License
public static void assertSimpleTSOutput(TokenStream stream, String[] expected, int[] posInc) throws IOException { stream.reset();//from w w w. j a v a 2s . c o m CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncAttr = stream.getAttribute(PositionIncrementAttribute.class); Assert.assertNotNull(termAttr); int i = 0; while (stream.incrementToken()) { Assert.assertTrue(i < expected.length, "got extra term: " + termAttr.toString()); Assert.assertEquals(termAttr.toString(), expected[i], "expected different term at index " + i); Assert.assertEquals(posIncAttr.getPositionIncrement(), posInc[i]); i++; } Assert.assertEquals(i, expected.length, "not all tokens produced"); }
From source file:org.exist.indexing.lucene.XMLToQuery.java
License:Open Source License
private Query phraseQuery(String field, Element node, Analyzer analyzer) throws XPathException { NodeList termList = node.getElementsByTagName("term"); if (termList.getLength() == 0) { PhraseQuery query = new PhraseQuery(); String qstr = getText(node); try {// w w w . ja v a2 s. c o m TokenStream stream = analyzer.tokenStream(field, new StringReader(qstr)); CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class); stream.reset(); while (stream.incrementToken()) { query.add(new Term(field, termAttr.toString())); } stream.end(); stream.close(); } catch (IOException e) { throw new XPathException("Error while parsing phrase query: " + qstr); } int slop = getSlop(node); if (slop > -1) query.setSlop(slop); return query; } MultiPhraseQuery query = new MultiPhraseQuery(); for (int i = 0; i < termList.getLength(); i++) { Element elem = (Element) termList.item(i); String text = getText(elem); if (text.indexOf('?') > -1 || text.indexOf('*') > 0) { Term[] expanded = expandTerms(field, text); if (expanded.length > 0) query.add(expanded); } else { String termStr = getTerm(field, text, analyzer); if (termStr != null) query.add(new Term(field, text)); } } int slop = getSlop(node); if (slop > -1) query.setSlop(slop); return query; }
From source file:org.exist.indexing.lucene.XMLToQuery.java
License:Open Source License
private SpanQuery nearQuery(String field, Element node, Analyzer analyzer) throws XPathException { int slop = getSlop(node); if (slop < 0) slop = 0;/*from w ww .j av a2s . c om*/ boolean inOrder = true; if (node.hasAttribute("ordered")) inOrder = node.getAttribute("ordered").equals("yes"); if (!hasElementContent(node)) { String qstr = getText(node); List<SpanTermQuery> list = new ArrayList<>(8); try { TokenStream stream = analyzer.tokenStream(field, new StringReader(qstr)); CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class); stream.reset(); while (stream.incrementToken()) { list.add(new SpanTermQuery(new Term(field, termAttr.toString()))); } stream.end(); stream.close(); } catch (IOException e) { throw new XPathException("Error while parsing phrase query: " + qstr); } return new SpanNearQuery(list.toArray(new SpanTermQuery[list.size()]), slop, inOrder); } SpanQuery[] children = parseSpanChildren(field, node, analyzer); return new SpanNearQuery(children, slop, inOrder); }