Example usage for org.apache.lucene.analysis TokenStream close

List of usage examples for org.apache.lucene.analysis TokenStream close

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream close.

Prototype

@Override
public void close() throws IOException 

Source Link

Document

Releases resources associated with this stream.

Usage

From source file:org.elasticsearch.search.suggest.CompletionTokenStreamTest.java

License:Apache License

@Test(expected = IllegalArgumentException.class)
public void testInValidNumberOfExpansions() throws IOException {
    Builder builder = new SynonymMap.Builder(true);
    for (int i = 0; i < 256; i++) {
        builder.add(new CharsRef("" + (i + 1)), new CharsRef("" + (1000 + (i + 1))), true);
    }//from   w  w w .j  av  a2  s  . co m
    StringBuilder valueBuilder = new StringBuilder();
    for (int i = 0; i < 9; i++) { // 9 -> expands to 512
        valueBuilder.append(i + 1);
        valueBuilder.append(" ");
    }
    MockTokenizer tokenizer = new MockTokenizer(new StringReader(valueBuilder.toString()),
            MockTokenizer.WHITESPACE, true);
    SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true);

    TokenStream suggestTokenStream = new CompletionTokenStream(filter,
            new BytesRef("Surface keyword|friggin payload|10"), new CompletionTokenStream.ToFiniteStrings() {
                @Override
                public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException {
                    Set<IntsRef> finiteStrings = suggester
                            .toFiniteStrings(suggester.getTokenStreamToAutomaton(), stream);
                    return finiteStrings;
                }
            });

    suggestTokenStream.reset();
    suggestTokenStream.incrementToken();
    suggestTokenStream.close();

}

From source file:org.elasticsearch.search.suggest.CompletionTokenStreamTest.java

License:Apache License

@Test
public void testSuggestTokenFilterProperlyDelegateInputStream() throws Exception {
    TokenStream tokenStream = new MockTokenizer(new StringReader("mykeyword"), MockTokenizer.WHITESPACE, true);
    BytesRef payload = new BytesRef("Surface keyword|friggin payload|10");
    TokenStream suggestTokenStream = new ByteTermAttrToCharTermAttrFilter(
            new CompletionTokenStream(tokenStream, payload, new CompletionTokenStream.ToFiniteStrings() {
                @Override//from  w  ww  . j  a va 2s  .  c o m
                public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException {
                    return suggester.toFiniteStrings(suggester.getTokenStreamToAutomaton(), stream);
                }
            }));
    TermToBytesRefAttribute termAtt = suggestTokenStream.getAttribute(TermToBytesRefAttribute.class);
    BytesRef ref = termAtt.getBytesRef();
    assertNotNull(ref);
    suggestTokenStream.reset();

    while (suggestTokenStream.incrementToken()) {
        termAtt.fillBytesRef();
        assertThat(ref.utf8ToString(), equalTo("mykeyword"));
    }
    suggestTokenStream.end();
    suggestTokenStream.close();
}

From source file:org.elasticsearch.search.suggest.CompletionTokenStreamTests.java

License:Apache License

@Test
public void testValidNumberOfExpansions() throws IOException {
    Builder builder = new SynonymMap.Builder(true);
    for (int i = 0; i < 256; i++) {
        builder.add(new CharsRef("" + (i + 1)), new CharsRef("" + (1000 + (i + 1))), true);
    }/*from w ww .  j a v a2  s .  c o  m*/
    StringBuilder valueBuilder = new StringBuilder();
    for (int i = 0; i < 8; i++) {
        valueBuilder.append(i + 1);
        valueBuilder.append(" ");
    }
    MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true);
    tokenizer.setReader(new StringReader(valueBuilder.toString()));
    SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true);

    TokenStream suggestTokenStream = new CompletionTokenStream(filter,
            new BytesRef("Surface keyword|friggin payload|10"), new CompletionTokenStream.ToFiniteStrings() {
                @Override
                public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException {
                    Set<IntsRef> finiteStrings = suggester.toFiniteStrings(stream);
                    return finiteStrings;
                }
            });

    suggestTokenStream.reset();
    ByteTermAttribute attr = suggestTokenStream.addAttribute(ByteTermAttribute.class);
    PositionIncrementAttribute posAttr = suggestTokenStream.addAttribute(PositionIncrementAttribute.class);
    int maxPos = 0;
    int count = 0;
    while (suggestTokenStream.incrementToken()) {
        count++;
        assertNotNull(attr.getBytesRef());
        assertTrue(attr.getBytesRef().length > 0);
        maxPos += posAttr.getPositionIncrement();
    }
    suggestTokenStream.close();
    assertEquals(count, 256);
    assertEquals(count, maxPos);

}

From source file:org.elasticsearch.search.suggest.CompletionTokenStreamTests.java

License:Apache License

@Test(expected = IllegalArgumentException.class)
public void testInValidNumberOfExpansions() throws IOException {
    Builder builder = new SynonymMap.Builder(true);
    for (int i = 0; i < 256; i++) {
        builder.add(new CharsRef("" + (i + 1)), new CharsRef("" + (1000 + (i + 1))), true);
    }/*  w  ww  .  j a  v  a2 s  . co  m*/
    StringBuilder valueBuilder = new StringBuilder();
    for (int i = 0; i < 9; i++) { // 9 -> expands to 512
        valueBuilder.append(i + 1);
        valueBuilder.append(" ");
    }
    MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true);
    tokenizer.setReader(new StringReader(valueBuilder.toString()));
    SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true);

    TokenStream suggestTokenStream = new CompletionTokenStream(filter,
            new BytesRef("Surface keyword|friggin payload|10"), new CompletionTokenStream.ToFiniteStrings() {
                @Override
                public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException {
                    Set<IntsRef> finiteStrings = suggester.toFiniteStrings(stream);
                    return finiteStrings;
                }
            });

    suggestTokenStream.reset();
    suggestTokenStream.incrementToken();
    suggestTokenStream.close();

}

From source file:org.elasticsearch.search.suggest.CompletionTokenStreamTests.java

License:Apache License

@Test
public void testSuggestTokenFilterProperlyDelegateInputStream() throws Exception {
    Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true);
    tokenizer.setReader(new StringReader("mykeyword"));
    BytesRef payload = new BytesRef("Surface keyword|friggin payload|10");
    TokenStream suggestTokenStream = new ByteTermAttrToCharTermAttrFilter(
            new CompletionTokenStream(tokenizer, payload, new CompletionTokenStream.ToFiniteStrings() {
                @Override/*from www.j a va 2s . co  m*/
                public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException {
                    return suggester.toFiniteStrings(stream);
                }
            }));
    TermToBytesRefAttribute termAtt = suggestTokenStream.getAttribute(TermToBytesRefAttribute.class);
    assertNotNull(termAtt.getBytesRef());
    suggestTokenStream.reset();

    while (suggestTokenStream.incrementToken()) {
        assertThat(termAtt.getBytesRef().utf8ToString(), equalTo("mykeyword"));
    }
    suggestTokenStream.end();
    suggestTokenStream.close();
}

From source file:org.elasticsearch.search.suggest.SuggestUtils.java

License:Apache License

public static int analyze(TokenStream stream, TokenConsumer consumer) throws IOException {
    stream.reset();//  ww w.j av a2 s  .  c  om
    consumer.reset(stream);
    int numTokens = 0;
    while (stream.incrementToken()) {
        consumer.nextToken();
        numTokens++;
    }
    consumer.end();
    stream.close();
    return numTokens;
}

From source file:org.eurekastreams.commons.search.analysis.AnalyzerTestBase.java

License:Apache License

/**
 * Assert the input Analyzer parses the input String as expected.
 *
 * @param analyzer/*  w  w  w.j av  a2s  .  c o m*/
 *            the analyzer to test
 * @param input
 *            the string to tokenize
 * @param expectedImages
 *            the expected results of the analyzing
 * @throws Exception
 *             on error
 */
protected void assertAnalyzesTo(final Analyzer analyzer, final String input, final String[] expectedImages)
        throws Exception {
    TokenStream ts = analyzer.tokenStream("dummy", new StringReader(input));
    final Token reusableToken = new Token();

    StringBuffer sb = new StringBuffer();
    Token nextToken;
    while ((nextToken = ts.next(reusableToken)) != null) {
        sb.append("\"" + nextToken.term() + "\",");
    }
    ts.close();
    ts = analyzer.tokenStream("dummy", new StringReader(input));

    System.out.println(sb.toString());

    for (int i = 0; i < expectedImages.length; i++) {
        nextToken = ts.next(reusableToken);
        assertNotNull(reusableToken);
        assertEquals(expectedImages[i], nextToken.term());
    }
    assertNull(ts.next(reusableToken));
    ts.close();
}

From source file:org.exist.indexing.lucene.XMLToQuery.java

License:Open Source License

private Query phraseQuery(String field, Element node, Analyzer analyzer) throws XPathException {
    NodeList termList = node.getElementsByTagName("term");
    if (termList.getLength() == 0) {
        PhraseQuery query = new PhraseQuery();
        String qstr = getText(node);
        try {//from   w w  w . ja  v a2  s.  co  m
            TokenStream stream = analyzer.tokenStream(field, new StringReader(qstr));
            CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class);
            stream.reset();
            while (stream.incrementToken()) {
                query.add(new Term(field, termAttr.toString()));
            }
            stream.end();
            stream.close();
        } catch (IOException e) {
            throw new XPathException("Error while parsing phrase query: " + qstr);
        }
        int slop = getSlop(node);
        if (slop > -1)
            query.setSlop(slop);
        return query;
    }
    MultiPhraseQuery query = new MultiPhraseQuery();
    for (int i = 0; i < termList.getLength(); i++) {
        Element elem = (Element) termList.item(i);
        String text = getText(elem);
        if (text.indexOf('?') > -1 || text.indexOf('*') > 0) {
            Term[] expanded = expandTerms(field, text);
            if (expanded.length > 0)
                query.add(expanded);
        } else {
            String termStr = getTerm(field, text, analyzer);
            if (termStr != null)
                query.add(new Term(field, text));
        }
    }
    int slop = getSlop(node);
    if (slop > -1)
        query.setSlop(slop);
    return query;
}

From source file:org.exist.indexing.lucene.XMLToQuery.java

License:Open Source License

private SpanQuery nearQuery(String field, Element node, Analyzer analyzer) throws XPathException {
    int slop = getSlop(node);
    if (slop < 0)
        slop = 0;//from ww  w.j av  a2  s . c  o m
    boolean inOrder = true;
    if (node.hasAttribute("ordered"))
        inOrder = node.getAttribute("ordered").equals("yes");

    if (!hasElementContent(node)) {
        String qstr = getText(node);
        List<SpanTermQuery> list = new ArrayList<>(8);
        try {
            TokenStream stream = analyzer.tokenStream(field, new StringReader(qstr));
            CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class);
            stream.reset();
            while (stream.incrementToken()) {
                list.add(new SpanTermQuery(new Term(field, termAttr.toString())));
            }
            stream.end();
            stream.close();
        } catch (IOException e) {
            throw new XPathException("Error while parsing phrase query: " + qstr);
        }
        return new SpanNearQuery(list.toArray(new SpanTermQuery[list.size()]), slop, inOrder);
    }
    SpanQuery[] children = parseSpanChildren(field, node, analyzer);
    return new SpanNearQuery(children, slop, inOrder);
}

From source file:org.exist.indexing.lucene.XMLToQuery.java

License:Open Source License

private String getTerm(String field, String text, Analyzer analyzer) throws XPathException {
    String term = null;/*from w ww  . j a va 2 s  .  com*/
    try {
        TokenStream stream = analyzer.tokenStream(field, new StringReader(text));
        CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class);
        stream.reset();
        if (stream.incrementToken()) {
            term = termAttr.toString();
        }
        stream.end();
        stream.close();
        return term;
    } catch (IOException e) {
        throw new XPathException("Lucene index error while creating query: " + e.getMessage(), e);
    }
}