Example usage for org.apache.lucene.analysis TokenStream reset

List of usage examples for org.apache.lucene.analysis TokenStream reset

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream reset.

Prototype

public void reset() throws IOException 

Source Link

Document

This method is called by a consumer before it begins consumption using #incrementToken() .

Usage

From source file:com.sindicetech.siren.analysis.TestConciseJsonAnalyzer.java

License:Open Source License

@Test
public void testNumeric() throws Exception {
    _a.registerDatatype(XSDDatatype.XSD_LONG.toCharArray(), new LongNumericAnalyzer(64));
    final TokenStream t = _a.tokenStream("", new StringReader("{ \"a\" : 12 }"));
    final CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
    t.reset();
    assertTrue(t.incrementToken());/*  w  w  w  .ja va 2 s  .  c o  m*/
    assertTrue(termAtt.toString().startsWith("a:"));
    t.end();
    t.close();
}

From source file:com.sindicetech.siren.qparser.keyword.processors.DatatypeAnalyzerProcessor.java

License:Open Source License

private CachingTokenFilter getBuffer(Analyzer analyzer, FieldQueryNode fieldNode) {
    final TokenStream source;
    final String text = fieldNode.getTextAsString();
    final String field = fieldNode.getFieldAsString();

    try {//from w w  w  . j a va  2  s  .  co m
        source = analyzer.tokenStream(field, new StringReader(text));
        source.reset();
    } catch (final IOException e1) {
        throw new RuntimeException(e1);
    }
    return new CachingTokenFilter(source);
}

From source file:com.sindicetech.siren.qparser.keyword.processors.PhraseQueryNodeProcessor.java

License:Open Source License

@Override
protected QueryNode postProcessNode(final QueryNode node) throws QueryNodeException {
    if (node instanceof TextableQueryNode && !(node instanceof WildcardQueryNode)
            && !(node instanceof FuzzyQueryNode) && !(node instanceof RegexpQueryNode)
            && !(node instanceof ProtectedQueryNode) && !(node.getParent() instanceof RangeQueryNode)) {

        final FieldQueryNode fieldNode = ((FieldQueryNode) node);
        final String text = fieldNode.getTextAsString();
        final String field = fieldNode.getFieldAsString();

        final TokenStream source;
        try {//w  ww .  j av a2 s  .c  o m
            source = this.analyzer.tokenStream(field, new StringReader(text));
            source.reset();
        } catch (final IOException e1) {
            throw new RuntimeException(e1);
        }
        final CachingTokenFilter buffer = new CachingTokenFilter(source);

        int numTokens = 0;
        try {
            while (buffer.incrementToken()) {
                numTokens++;
            }
        } catch (final IOException e) {
            // ignore
        }

        try {
            // rewind the buffer stream
            buffer.reset();
            // close original stream - all tokens buffered
            source.close();
        } catch (final IOException e) {
            // ignore
        }

        if (!buffer.hasAttribute(CharTermAttribute.class)) {
            return new NoTokenFoundQueryNode();
        }
        final CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class);

        if (numTokens == 0) {
            return new NoTokenFoundQueryNode();
        }
        // phrase query
        else if (numTokens != 1) {
            String datatype = (String) DatatypeProcessor.getDatatype(this.getQueryConfigHandler(), node);
            final TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode();
            // assign datatype
            pq.setTag(DatatypeQueryNode.DATATYPE_TAGID, datatype);

            for (int i = 0; i < numTokens; i++) {
                String term = null;

                try {
                    final boolean hasNext = buffer.incrementToken();
                    assert hasNext == true;
                    term = termAtt.toString();

                } catch (final IOException e) {
                    // safe to ignore, because we know the number of tokens
                }

                final FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1);
                // set position increment
                newFieldNode.setPositionIncrement(i);
                // assign datatype
                newFieldNode.setTag(DatatypeQueryNode.DATATYPE_TAGID, datatype);
                pq.add(newFieldNode);
            }
            return pq;
        }
    }
    return node;
}

From source file:com.sindicetech.siren.solr.analysis.BaseSirenStreamTestCase.java

License:Open Source License

public void assertTokenStreamContents(final TokenStream stream, final String[] expectedImages)
        throws Exception {
    assertTrue("has TermAttribute", stream.hasAttribute(CharTermAttribute.class));
    final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);

    stream.reset();
    for (int i = 0; i < expectedImages.length; i++) {
        stream.clearAttributes();/*  w  w  w .j  av a  2  s. com*/
        assertTrue("token " + i + " does not exists", stream.incrementToken());

        assertEquals(expectedImages[i], termAtt.toString());
    }

    assertFalse("end of stream", stream.incrementToken());
    stream.end();
    stream.close();
}

From source file:com.stratio.cassandra.index.query.Condition.java

License:Apache License

protected String analyze(String field, String value, ColumnMapper<?> columnMapper) {
    TokenStream source = null;
    try {//from  w ww. jav a2 s  . co m
        Analyzer analyzer = columnMapper.analyzer();
        source = analyzer.tokenStream(field, value);
        source.reset();

        TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
        BytesRef bytes = termAtt.getBytesRef();

        if (!source.incrementToken()) {
            return null;
        }
        termAtt.fillBytesRef();
        if (source.incrementToken()) {
            throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + value);
        }
        source.end();
        return BytesRef.deepCopyOf(bytes).utf8ToString();
    } catch (IOException e) {
        throw new RuntimeException("Error analyzing multiTerm term: " + value, e);
    } finally {
        IOUtils.closeWhileHandlingException(source);
    }
}

From source file:com.stratio.cassandra.lucene.schema.analysis.SnowballAnalyzerBuilderTest.java

License:Apache License

private List<String> analyze(String value, Analyzer analyzer) {
    List<String> result = new ArrayList<>();
    TokenStream stream = null;
    try {//w w  w .  ja  v a2 s. co  m
        stream = analyzer.tokenStream(null, value);
        stream.reset();
        while (stream.incrementToken()) {
            String analyzedValue = stream.getAttribute(CharTermAttribute.class).toString();
            result.add(analyzedValue);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        IOUtils.closeWhileHandlingException(stream);
    }
    return result;
}

From source file:com.sxc.lucene.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokens(TokenStream stream) throws IOException {
    stream.reset();
    CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
    while (stream.incrementToken()) {
        System.out.print("[" + term + "] "); // B
    }//from  w ww . j a v a 2s .co  m
    stream.close();
}

From source file:com.sxc.lucene.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokensWithPositions(Analyzer analyzer, String text) throws IOException {

    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    stream.reset();
    CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);

    int position = 0;
    while (stream.incrementToken()) {
        int increment = posIncr.getPositionIncrement();
        if (increment > 0) {
            position = position + increment;
            System.out.println();
            System.out.print(position + ": ");
        }//from   w w w  .j a va  2s  . co m

        System.out.print("[" + term + "] ");
    }
    stream.close();
    System.out.println();
}

From source file:com.sxc.lucene.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException {

    TokenStream stream = analyzer.tokenStream("contents", // #A
            new StringReader(text));
    stream.reset();

    CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); // #B
    PositionIncrementAttribute posIncr = // #B
            stream.addAttribute(PositionIncrementAttribute.class); // #B
    OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); // #B
    TypeAttribute type = stream.addAttribute(TypeAttribute.class); // #B

    int position = 0;
    while (stream.incrementToken()) { // #C

        int increment = posIncr.getPositionIncrement(); // #D
        if (increment > 0) { // #D
            position = position + increment; // #D
            System.out.println(); // #D
            System.out.print(position + ": "); // #D
        }/*  ww w  . j  a  v  a 2s.  co  m*/

        System.out.print("[" + // #E
                term + ":" + // #E
                offset.startOffset() + "->" + // #E
                offset.endOffset() + ":" + // #E
                type.type() + "] "); // #E
    }
    stream.close();
    System.out.println();
}

From source file:com.sxc.lucene.analysis.AnalyzerUtils.java

License:Apache License

public static void assertAnalyzesTo(Analyzer analyzer, String input, String[] output) throws Exception {
    TokenStream stream = analyzer.tokenStream("field", new StringReader(input));
    stream.reset();
    CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class);
    for (String expected : output) {
        Assert.assertTrue(stream.incrementToken());
        Assert.assertEquals(expected, termAttr.toString());
    }/*from  ww  w.ja  v a2s .c  om*/
    Assert.assertFalse(stream.incrementToken());
    stream.close();
}