List of usage examples for org.apache.lucene.analysis TokenStream close
@Override public void close() throws IOException
From source file:org.elasticsearch.search.suggest.CompletionTokenStreamTest.java
License:Apache License
@Test(expected = IllegalArgumentException.class) public void testInValidNumberOfExpansions() throws IOException { Builder builder = new SynonymMap.Builder(true); for (int i = 0; i < 256; i++) { builder.add(new CharsRef("" + (i + 1)), new CharsRef("" + (1000 + (i + 1))), true); }//from w w w .j av a2 s . co m StringBuilder valueBuilder = new StringBuilder(); for (int i = 0; i < 9; i++) { // 9 -> expands to 512 valueBuilder.append(i + 1); valueBuilder.append(" "); } MockTokenizer tokenizer = new MockTokenizer(new StringReader(valueBuilder.toString()), MockTokenizer.WHITESPACE, true); SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true); TokenStream suggestTokenStream = new CompletionTokenStream(filter, new BytesRef("Surface keyword|friggin payload|10"), new CompletionTokenStream.ToFiniteStrings() { @Override public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { Set<IntsRef> finiteStrings = suggester .toFiniteStrings(suggester.getTokenStreamToAutomaton(), stream); return finiteStrings; } }); suggestTokenStream.reset(); suggestTokenStream.incrementToken(); suggestTokenStream.close(); }
From source file:org.elasticsearch.search.suggest.CompletionTokenStreamTest.java
License:Apache License
@Test public void testSuggestTokenFilterProperlyDelegateInputStream() throws Exception { TokenStream tokenStream = new MockTokenizer(new StringReader("mykeyword"), MockTokenizer.WHITESPACE, true); BytesRef payload = new BytesRef("Surface keyword|friggin payload|10"); TokenStream suggestTokenStream = new ByteTermAttrToCharTermAttrFilter( new CompletionTokenStream(tokenStream, payload, new CompletionTokenStream.ToFiniteStrings() { @Override//from w ww . j a va 2s . c o m public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { return suggester.toFiniteStrings(suggester.getTokenStreamToAutomaton(), stream); } })); TermToBytesRefAttribute termAtt = suggestTokenStream.getAttribute(TermToBytesRefAttribute.class); BytesRef ref = termAtt.getBytesRef(); assertNotNull(ref); suggestTokenStream.reset(); while (suggestTokenStream.incrementToken()) { termAtt.fillBytesRef(); assertThat(ref.utf8ToString(), equalTo("mykeyword")); } suggestTokenStream.end(); suggestTokenStream.close(); }
From source file:org.elasticsearch.search.suggest.CompletionTokenStreamTests.java
License:Apache License
@Test public void testValidNumberOfExpansions() throws IOException { Builder builder = new SynonymMap.Builder(true); for (int i = 0; i < 256; i++) { builder.add(new CharsRef("" + (i + 1)), new CharsRef("" + (1000 + (i + 1))), true); }/*from w ww . j a v a2 s . c o m*/ StringBuilder valueBuilder = new StringBuilder(); for (int i = 0; i < 8; i++) { valueBuilder.append(i + 1); valueBuilder.append(" "); } MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true); tokenizer.setReader(new StringReader(valueBuilder.toString())); SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true); TokenStream suggestTokenStream = new CompletionTokenStream(filter, new BytesRef("Surface keyword|friggin payload|10"), new CompletionTokenStream.ToFiniteStrings() { @Override public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { Set<IntsRef> finiteStrings = suggester.toFiniteStrings(stream); return finiteStrings; } }); suggestTokenStream.reset(); ByteTermAttribute attr = suggestTokenStream.addAttribute(ByteTermAttribute.class); PositionIncrementAttribute posAttr = suggestTokenStream.addAttribute(PositionIncrementAttribute.class); int maxPos = 0; int count = 0; while (suggestTokenStream.incrementToken()) { count++; assertNotNull(attr.getBytesRef()); assertTrue(attr.getBytesRef().length > 0); maxPos += posAttr.getPositionIncrement(); } suggestTokenStream.close(); assertEquals(count, 256); assertEquals(count, maxPos); }
From source file:org.elasticsearch.search.suggest.CompletionTokenStreamTests.java
License:Apache License
@Test(expected = IllegalArgumentException.class) public void testInValidNumberOfExpansions() throws IOException { Builder builder = new SynonymMap.Builder(true); for (int i = 0; i < 256; i++) { builder.add(new CharsRef("" + (i + 1)), new CharsRef("" + (1000 + (i + 1))), true); }/* w ww . j a v a2 s . co m*/ StringBuilder valueBuilder = new StringBuilder(); for (int i = 0; i < 9; i++) { // 9 -> expands to 512 valueBuilder.append(i + 1); valueBuilder.append(" "); } MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true); tokenizer.setReader(new StringReader(valueBuilder.toString())); SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true); TokenStream suggestTokenStream = new CompletionTokenStream(filter, new BytesRef("Surface keyword|friggin payload|10"), new CompletionTokenStream.ToFiniteStrings() { @Override public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { Set<IntsRef> finiteStrings = suggester.toFiniteStrings(stream); return finiteStrings; } }); suggestTokenStream.reset(); suggestTokenStream.incrementToken(); suggestTokenStream.close(); }
From source file:org.elasticsearch.search.suggest.CompletionTokenStreamTests.java
License:Apache License
@Test public void testSuggestTokenFilterProperlyDelegateInputStream() throws Exception { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true); tokenizer.setReader(new StringReader("mykeyword")); BytesRef payload = new BytesRef("Surface keyword|friggin payload|10"); TokenStream suggestTokenStream = new ByteTermAttrToCharTermAttrFilter( new CompletionTokenStream(tokenizer, payload, new CompletionTokenStream.ToFiniteStrings() { @Override/*from www.j a va 2s . co m*/ public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { return suggester.toFiniteStrings(stream); } })); TermToBytesRefAttribute termAtt = suggestTokenStream.getAttribute(TermToBytesRefAttribute.class); assertNotNull(termAtt.getBytesRef()); suggestTokenStream.reset(); while (suggestTokenStream.incrementToken()) { assertThat(termAtt.getBytesRef().utf8ToString(), equalTo("mykeyword")); } suggestTokenStream.end(); suggestTokenStream.close(); }
From source file:org.elasticsearch.search.suggest.SuggestUtils.java
License:Apache License
public static int analyze(TokenStream stream, TokenConsumer consumer) throws IOException { stream.reset();// ww w.j av a2 s . c om consumer.reset(stream); int numTokens = 0; while (stream.incrementToken()) { consumer.nextToken(); numTokens++; } consumer.end(); stream.close(); return numTokens; }
From source file:org.eurekastreams.commons.search.analysis.AnalyzerTestBase.java
License:Apache License
/** * Assert the input Analyzer parses the input String as expected. * * @param analyzer/* w w w.j av a2s . c o m*/ * the analyzer to test * @param input * the string to tokenize * @param expectedImages * the expected results of the analyzing * @throws Exception * on error */ protected void assertAnalyzesTo(final Analyzer analyzer, final String input, final String[] expectedImages) throws Exception { TokenStream ts = analyzer.tokenStream("dummy", new StringReader(input)); final Token reusableToken = new Token(); StringBuffer sb = new StringBuffer(); Token nextToken; while ((nextToken = ts.next(reusableToken)) != null) { sb.append("\"" + nextToken.term() + "\","); } ts.close(); ts = analyzer.tokenStream("dummy", new StringReader(input)); System.out.println(sb.toString()); for (int i = 0; i < expectedImages.length; i++) { nextToken = ts.next(reusableToken); assertNotNull(reusableToken); assertEquals(expectedImages[i], nextToken.term()); } assertNull(ts.next(reusableToken)); ts.close(); }
From source file:org.exist.indexing.lucene.XMLToQuery.java
License:Open Source License
private Query phraseQuery(String field, Element node, Analyzer analyzer) throws XPathException { NodeList termList = node.getElementsByTagName("term"); if (termList.getLength() == 0) { PhraseQuery query = new PhraseQuery(); String qstr = getText(node); try {//from w w w . ja v a2 s. co m TokenStream stream = analyzer.tokenStream(field, new StringReader(qstr)); CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class); stream.reset(); while (stream.incrementToken()) { query.add(new Term(field, termAttr.toString())); } stream.end(); stream.close(); } catch (IOException e) { throw new XPathException("Error while parsing phrase query: " + qstr); } int slop = getSlop(node); if (slop > -1) query.setSlop(slop); return query; } MultiPhraseQuery query = new MultiPhraseQuery(); for (int i = 0; i < termList.getLength(); i++) { Element elem = (Element) termList.item(i); String text = getText(elem); if (text.indexOf('?') > -1 || text.indexOf('*') > 0) { Term[] expanded = expandTerms(field, text); if (expanded.length > 0) query.add(expanded); } else { String termStr = getTerm(field, text, analyzer); if (termStr != null) query.add(new Term(field, text)); } } int slop = getSlop(node); if (slop > -1) query.setSlop(slop); return query; }
From source file:org.exist.indexing.lucene.XMLToQuery.java
License:Open Source License
private SpanQuery nearQuery(String field, Element node, Analyzer analyzer) throws XPathException { int slop = getSlop(node); if (slop < 0) slop = 0;//from ww w.j av a2 s . c o m boolean inOrder = true; if (node.hasAttribute("ordered")) inOrder = node.getAttribute("ordered").equals("yes"); if (!hasElementContent(node)) { String qstr = getText(node); List<SpanTermQuery> list = new ArrayList<>(8); try { TokenStream stream = analyzer.tokenStream(field, new StringReader(qstr)); CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class); stream.reset(); while (stream.incrementToken()) { list.add(new SpanTermQuery(new Term(field, termAttr.toString()))); } stream.end(); stream.close(); } catch (IOException e) { throw new XPathException("Error while parsing phrase query: " + qstr); } return new SpanNearQuery(list.toArray(new SpanTermQuery[list.size()]), slop, inOrder); } SpanQuery[] children = parseSpanChildren(field, node, analyzer); return new SpanNearQuery(children, slop, inOrder); }
From source file:org.exist.indexing.lucene.XMLToQuery.java
License:Open Source License
private String getTerm(String field, String text, Analyzer analyzer) throws XPathException { String term = null;/*from w ww . j a va 2 s . com*/ try { TokenStream stream = analyzer.tokenStream(field, new StringReader(text)); CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class); stream.reset(); if (stream.incrementToken()) { term = termAttr.toString(); } stream.end(); stream.close(); return term; } catch (IOException e) { throw new XPathException("Lucene index error while creating query: " + e.getMessage(), e); } }