Example usage for org.apache.lucene.analysis TokenStream incrementToken

List of usage examples for org.apache.lucene.analysis TokenStream incrementToken

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream incrementToken.

Prototype

public abstract boolean incrementToken() throws IOException;

Source Link

Document

Consumers (i.e., IndexWriter ) use this method to advance the stream to the next token.

Usage

From source file:analysis.AnalyzerUtils.java

License:Apache License

public static void displayPositionIncrements(Analyzer analyzer, String text) throws IOException {
    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
    while (stream.incrementToken()) {
        System.out.println("posIncr=" + posIncr.getPositionIncrement());
    }/*from ww w .j  a va 2 s .  com*/
}

From source file:analysis.FtpFilePathAnalyzer.java

License:Apache License

public static void main(String[] args) {
    Analyzer ana = new FtpFilePathAnalyzer();
    String test2 = "c++c++";
    StringReader reader = new StringReader(test2);
    TokenStream ts = ana.tokenStream("path", reader);
    try {/*from  w  w w. j av a 2 s  . com*/
        while (ts.incrementToken()) {
            TermAttribute termAtt = (TermAttribute) ts.getAttribute(TermAttribute.class);
            OffsetAttribute offsetAtt = (OffsetAttribute) ts.getAttribute(OffsetAttribute.class);
            PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute) ts
                    .getAttribute(PositionIncrementAttribute.class);
            TypeAttribute typeAtt = (TypeAttribute) ts.getAttribute(TypeAttribute.class);
            System.out.print("(" + offsetAtt.startOffset() + "," + offsetAtt.endOffset() + ") ["
                    + posIncrAtt.getPositionIncrement() + "," + typeAtt.type() + "] " + "[" + termAtt.term()
                    + "]");
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:analysis.SynonymAnalyzerTest.java

License:Apache License

public void testJumps() throws Exception {
    TokenStream stream = synonymAnalyzer.tokenStream("contents", // #A
            new StringReader("jumps")); // #A
    TermAttribute term = stream.addAttribute(TermAttribute.class);
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);

    int i = 0;//from   w w  w.j ava  2 s . c o m
    String[] expected = new String[] { "jumps", // #B
            "hops", // #B
            "leaps" }; // #B
    while (stream.incrementToken()) {
        assertEquals(expected[i], term.term());

        int expectedPos; // #C
        if (i == 0) { // #C
            expectedPos = 1; // #C
        } else { // #C
            expectedPos = 0; // #C
        } // #C
        assertEquals(expectedPos, // #C
                posIncr.getPositionIncrement()); // #C
        i++;
    }
    assertEquals(3, i);
}

From source file:analyzers.DebugAnalyzer.java

License:Apache License

/**
* This method outputs token-by-token analysis of documents.
*
* @param    reader        the reader for the documents
* @param    analyzer      the analyzer /*w w  w. j  a  v  a 2s .c  om*/
* @throws   IOException   cannot load stream
*/
public static void showAnalysisFromStream(Reader reader, Analyzer analyzer) throws IOException {
    TokenStream stream = analyzer.tokenStream("text", reader);
    CharTermAttribute cta = stream.addAttribute(CharTermAttribute.class);
    OffsetAttribute oa = stream.addAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);

    try {
        stream.reset();
        while (stream.incrementToken()) {
            // get starting and ending offsets
            int start = oa.startOffset();
            int end = oa.endOffset();

            // text of the token
            String token = cta.toString();

            // part of speech tag for the token
            String tag = typeAtt.type();

            System.out.printf("start: %4d\tend: %4d\tlength: %4d\ttag: %s\ttoken: %s\n", start, end,
                    token.length(), tag, token);
        }
    } finally {
        stream.close();
    }
}

From source file:aos.lucene.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokens(TokenStream stream) throws IOException {

    AttributeSource term = stream.addAttribute(AttributeSource.class);
    while (stream.incrementToken()) {
        System.out.print("[" + term.term() + "] ");
    }/*from www.j  a v  a 2 s .  c  o m*/
}

From source file:aos.lucene.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokensWithPositions(Analyzer analyzer, String text) throws IOException {

    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    TermAttribute term = stream.addAttribute(TermAttribute.class);
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);

    int position = 0;
    while (stream.incrementToken()) {
        int increment = posIncr.getPositionIncrement();
        if (increment > 0) {
            position = position + increment;
            LOGGER.info();/*from  ww  w.  j  a va2  s  .  c  om*/
            System.out.print(position + ": ");
        }

        System.out.print("[" + term.term() + "] ");
    }
    LOGGER.info();
}

From source file:aos.lucene.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException {

    TokenStream stream = analyzer.tokenStream("contents", // #A
            new StringReader(text));

    TermAttribute term = stream.addAttribute(TermAttribute.class); // #B
    PositionIncrementAttribute posIncr = // #B
            stream.addAttribute(PositionIncrementAttribute.class); // #B
    OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); // #B
    TypeAttribute type = stream.addAttribute(TypeAttribute.class); // #B

    int position = 0;
    while (stream.incrementToken()) { // #C

        int increment = posIncr.getPositionIncrement(); // #D
        if (increment > 0) { // #D
            position = position + increment; // #D
            LOGGER.info(); // #D
            System.out.print(position + ": "); // #D
        }/*from   w  ww .j a va  2 s . co m*/

        System.out.print("[" + // #E
                term.term() + ":" + // #E
                offset.startOffset() + "->" + // #E
                offset.endOffset() + ":" + // #E
                type.type() + "] "); // #E
    }
    LOGGER.info();
}

From source file:aos.lucene.analysis.AnalyzerUtils.java

License:Apache License

public static void assertAnalyzesTo(Analyzer analyzer, String input, String[] output) throws Exception {

    TokenStream stream = analyzer.tokenStream("field", new StringReader(input));

    TermAttribute termAttr = stream.addAttribute(TermAttribute.class);
    for (String expected : output) {
        Assert.assertTrue(stream.incrementToken());
        Assert.assertEquals(expected, termAttr.term());
    }//  w  ww .ja  v a 2  s.c  om

    Assert.assertFalse(stream.incrementToken());
    stream.close();

}

From source file:aos.lucene.analysis.AnalyzerUtils.java

License:Apache License

public static void displayPositionIncrements(Analyzer analyzer, String text) throws IOException {
    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
    while (stream.incrementToken()) {
        LOGGER.info("posIncr=" + posIncr.getPositionIncrement());
    }/*from  ww w  .  j a  v  a2s  .c om*/
}

From source file:aos.lucene.analysis.Fragments.java

License:Apache License

public void frag3() throws Exception {
    Analyzer analyzer = null;//  ww w  .j  a va  2 s  .co  m
    String text = null;

    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    PositionIncrementAttribute posIncr = (PositionIncrementAttribute) stream
            .addAttribute(PositionIncrementAttribute.class);
    while (stream.incrementToken()) {
        LOGGER.info("posIncr=" + posIncr.getPositionIncrement());
    }

}