Example usage for org.apache.lucene.analysis TokenStream incrementToken

List of usage examples for org.apache.lucene.analysis TokenStream incrementToken

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream incrementToken.

Prototype

public abstract boolean incrementToken() throws IOException;

Source Link

Document

Consumers (i.e., IndexWriter ) use this method to advance the stream to the next token.

Usage

From source file:com.sindicetech.siren.analysis.filter.TestASCIIFoldingExpansionFilter.java

License:Open Source License

void assertTermEquals(final String termExpected, final int posIncExpected, final TokenStream stream,
        final CharTermAttribute termAtt, final PositionIncrementAttribute posAtt) throws Exception {
    Assert.assertTrue(stream.incrementToken());
    Assert.assertEquals(termExpected, termAtt.toString());
    Assert.assertEquals(posIncExpected, posAtt.getPositionIncrement());
}

From source file:com.sindicetech.siren.analysis.filter.TestURINormalisationFilter.java

License:Open Source License

public void assertNormalisesTo(final Tokenizer t, final String input, final String[] expectedImages,
        final String[] expectedTypes) throws Exception {

    assertTrue("has CharTermAttribute", t.hasAttribute(CharTermAttribute.class));
    final CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);

    TypeAttribute typeAtt = null;//w  ww.  jav  a 2 s.com
    if (expectedTypes != null) {
        assertTrue("has TypeAttribute", t.hasAttribute(TypeAttribute.class));
        typeAtt = t.getAttribute(TypeAttribute.class);
    }

    t.setReader(new StringReader(input));
    t.reset();

    final TokenStream filter = new URINormalisationFilter(t);

    for (int i = 0; i < expectedImages.length; i++) {

        assertTrue("token " + i + " exists", filter.incrementToken());

        assertEquals(expectedImages[i], termAtt.toString());

        if (expectedTypes != null) {
            assertEquals(expectedTypes[i], typeAtt.type());
        }

    }

    assertFalse("end of stream", filter.incrementToken());
    filter.end();
    filter.close();
}

From source file:com.sindicetech.siren.analysis.NodeAnalyzerTestCase.java

License:Open Source License

public void assertAnalyzesTo(final Analyzer a, final String input, final String[] expectedImages,
        final String[] expectedTypes, final int[] expectedPosIncrs, final IntsRef[] expectedNode,
        final int[] expectedPos) throws Exception {
    final TokenStream t = a.tokenStream("", new StringReader(input));
    t.reset();//from   w  ww. java  2 s .com

    assertTrue("has TermAttribute", t.hasAttribute(CharTermAttribute.class));
    final CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);

    TypeAttribute typeAtt = null;
    if (expectedTypes != null) {
        assertTrue("has TypeAttribute", t.hasAttribute(TypeAttribute.class));
        typeAtt = t.getAttribute(TypeAttribute.class);
    }

    PositionIncrementAttribute posIncrAtt = null;
    if (expectedPosIncrs != null) {
        assertTrue("has PositionIncrementAttribute", t.hasAttribute(PositionIncrementAttribute.class));
        posIncrAtt = t.getAttribute(PositionIncrementAttribute.class);
    }

    NodeAttribute nodeAtt = null;
    if (expectedNode != null) {
        assertTrue("has NodeAttribute", t.hasAttribute(NodeAttribute.class));
        nodeAtt = t.getAttribute(NodeAttribute.class);
    }

    PositionAttribute posAtt = null;
    if (expectedPos != null) {
        assertTrue("has PositionAttribute", t.hasAttribute(PositionAttribute.class));
        posAtt = t.getAttribute(PositionAttribute.class);
    }

    for (int i = 0; i < expectedImages.length; i++) {

        assertTrue("token " + i + " exists", t.incrementToken());

        assertEquals("i=" + i, expectedImages[i], termAtt.toString());

        if (expectedTypes != null) {
            assertEquals(expectedTypes[i], typeAtt.type());
        }

        if (expectedPosIncrs != null) {
            assertEquals(expectedPosIncrs[i], posIncrAtt.getPositionIncrement());
        }

        if (expectedNode != null) {
            assertEquals(expectedNode[i], nodeAtt.node());
        }

        if (expectedPos != null) {
            assertEquals(expectedPos[i], posAtt.position());
        }
    }

    assertFalse("end of stream, received token " + termAtt.toString(), t.incrementToken());
    t.end();
    t.close();
}

From source file:com.sindicetech.siren.analysis.TestConciseJsonAnalyzer.java

License:Open Source License

@Test
public void testNumeric() throws Exception {
    _a.registerDatatype(XSDDatatype.XSD_LONG.toCharArray(), new LongNumericAnalyzer(64));
    final TokenStream t = _a.tokenStream("", new StringReader("{ \"a\" : 12 }"));
    final CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
    t.reset();/* ww w .ja v  a2  s  .c  om*/
    assertTrue(t.incrementToken());
    assertTrue(termAtt.toString().startsWith("a:"));
    t.end();
    t.close();
}

From source file:com.sindicetech.siren.analysis.TestMockSirenAnalyzer.java

License:Open Source License

@Test
public void testMockSirenAnalyzer() throws IOException {
    final MockSirenDocument doc = doc(token("aaa", node(1)), token("aaa", node(1, 0)), token("aaa", node(1)));
    final MockSirenAnalyzer analyzer = new MockSirenAnalyzer();
    final TokenStream ts = analyzer.tokenStream("", new MockSirenReader(doc));

    assertTrue(ts.incrementToken());
    assertEquals("aaa", ts.getAttribute(CharTermAttribute.class).toString());
    assertEquals(node(1), ts.getAttribute(NodeAttribute.class).node());
    assertEquals(0, ts.getAttribute(PositionAttribute.class).position());

    assertTrue(ts.incrementToken());//  ww w  .ja  v  a 2s.co m
    assertEquals("aaa", ts.getAttribute(CharTermAttribute.class).toString());
    assertEquals(node(1), ts.getAttribute(NodeAttribute.class).node());
    assertEquals(1, ts.getAttribute(PositionAttribute.class).position());

    assertTrue(ts.incrementToken());
    assertEquals("aaa", ts.getAttribute(CharTermAttribute.class).toString());
    assertEquals(node(1, 0), ts.getAttribute(NodeAttribute.class).node());
    assertEquals(0, ts.getAttribute(PositionAttribute.class).position());

}

From source file:com.sindicetech.siren.solr.analysis.BaseSirenStreamTestCase.java

License:Open Source License

public void assertTokenStreamContents(final TokenStream stream, final String[] expectedImages)
        throws Exception {
    assertTrue("has TermAttribute", stream.hasAttribute(CharTermAttribute.class));
    final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);

    stream.reset();//from   ww  w . j  ava 2 s .  com
    for (int i = 0; i < expectedImages.length; i++) {
        stream.clearAttributes();
        assertTrue("token " + i + " does not exists", stream.incrementToken());

        assertEquals(expectedImages[i], termAtt.toString());
    }

    assertFalse("end of stream", stream.incrementToken());
    stream.end();
    stream.close();
}

From source file:com.stratio.cassandra.index.query.Condition.java

License:Apache License

protected String analyze(String field, String value, ColumnMapper<?> columnMapper) {
    TokenStream source = null;
    try {/*  w  w  w .ja va2  s  . c o  m*/
        Analyzer analyzer = columnMapper.analyzer();
        source = analyzer.tokenStream(field, value);
        source.reset();

        TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
        BytesRef bytes = termAtt.getBytesRef();

        if (!source.incrementToken()) {
            return null;
        }
        termAtt.fillBytesRef();
        if (source.incrementToken()) {
            throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + value);
        }
        source.end();
        return BytesRef.deepCopyOf(bytes).utf8ToString();
    } catch (IOException e) {
        throw new RuntimeException("Error analyzing multiTerm term: " + value, e);
    } finally {
        IOUtils.closeWhileHandlingException(source);
    }
}

From source file:com.stratio.cassandra.lucene.schema.analysis.SnowballAnalyzerBuilderTest.java

License:Apache License

private List<String> analyze(String value, Analyzer analyzer) {
    List<String> result = new ArrayList<>();
    TokenStream stream = null;
    try {//from  w  w w .  jav  a2s  .c  om
        stream = analyzer.tokenStream(null, value);
        stream.reset();
        while (stream.incrementToken()) {
            String analyzedValue = stream.getAttribute(CharTermAttribute.class).toString();
            result.add(analyzedValue);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        IOUtils.closeWhileHandlingException(stream);
    }
    return result;
}

From source file:com.sxc.lucene.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokens(TokenStream stream) throws IOException {
    stream.reset();//from  ww w .j av  a  2s . c o  m
    CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
    while (stream.incrementToken()) {
        System.out.print("[" + term + "] "); // B
    }
    stream.close();
}

From source file:com.sxc.lucene.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokensWithPositions(Analyzer analyzer, String text) throws IOException {

    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    stream.reset();//  ww  w  . ja v a2  s.  c om
    CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);

    int position = 0;
    while (stream.incrementToken()) {
        int increment = posIncr.getPositionIncrement();
        if (increment > 0) {
            position = position + increment;
            System.out.println();
            System.out.print(position + ": ");
        }

        System.out.print("[" + term + "] ");
    }
    stream.close();
    System.out.println();
}