Example usage for org.apache.lucene.analysis TokenStream addAttribute

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream addAttribute.

Prototype

public final <T extends Attribute> T addAttribute(Class<T> attClass)

Source Link

Document

The caller must pass in a Class<?

Usage

From source file:analysis.AnalyzerUtils.java

License:Apache License

public static void displayPositionIncrements(Analyzer analyzer, String text) throws IOException {
    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
    while (stream.incrementToken()) {
        System.out.println("posIncr=" + posIncr.getPositionIncrement());
    }/*w w w .j  av a2s  .c  o  m*/
}

From source file:analysis.SynonymAnalyzerTest.java

License:Apache License

public void testJumps() throws Exception {
    TokenStream stream = synonymAnalyzer.tokenStream("contents", // #A
            new StringReader("jumps")); // #A
    TermAttribute term = stream.addAttribute(TermAttribute.class);
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);

    int i = 0;/*w  w w . j  a  va  2  s . com*/
    String[] expected = new String[] { "jumps", // #B
            "hops", // #B
            "leaps" }; // #B
    while (stream.incrementToken()) {
        assertEquals(expected[i], term.term());

        int expectedPos; // #C
        if (i == 0) { // #C
            expectedPos = 1; // #C
        } else { // #C
            expectedPos = 0; // #C
        } // #C
        assertEquals(expectedPos, // #C
                posIncr.getPositionIncrement()); // #C
        i++;
    }
    assertEquals(3, i);
}

From source file:analyzers.DebugAnalyzer.java

License:Apache License

/**
* This method outputs token-by-token analysis of documents.
*
* @param    reader        the reader for the documents
* @param    analyzer      the analyzer /*from  w  ww . ja  v a2  s . c o  m*/
* @throws   IOException   cannot load stream
*/
public static void showAnalysisFromStream(Reader reader, Analyzer analyzer) throws IOException {
    TokenStream stream = analyzer.tokenStream("text", reader);
    CharTermAttribute cta = stream.addAttribute(CharTermAttribute.class);
    OffsetAttribute oa = stream.addAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);

    try {
        stream.reset();
        while (stream.incrementToken()) {
            // get starting and ending offsets
            int start = oa.startOffset();
            int end = oa.endOffset();

            // text of the token
            String token = cta.toString();

            // part of speech tag for the token
            String tag = typeAtt.type();

            System.out.printf("start: %4d\tend: %4d\tlength: %4d\ttag: %s\ttoken: %s\n", start, end,
                    token.length(), tag, token);
        }
    } finally {
        stream.close();
    }
}

From source file:aos.lucene.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokens(TokenStream stream) throws IOException {

    AttributeSource term = stream.addAttribute(AttributeSource.class);
    while (stream.incrementToken()) {
        System.out.print("[" + term.term() + "] ");
    }/*from   w  ww . ja va 2  s.  c  o  m*/
}

From source file:aos.lucene.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokensWithPositions(Analyzer analyzer, String text) throws IOException {

    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    TermAttribute term = stream.addAttribute(TermAttribute.class);
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);

    int position = 0;
    while (stream.incrementToken()) {
        int increment = posIncr.getPositionIncrement();
        if (increment > 0) {
            position = position + increment;
            LOGGER.info();/*from w  w  w. j a  v  a  2 s  .c  o  m*/
            System.out.print(position + ": ");
        }

        System.out.print("[" + term.term() + "] ");
    }
    LOGGER.info();
}

From source file:aos.lucene.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException {

    TokenStream stream = analyzer.tokenStream("contents", // #A
            new StringReader(text));

    TermAttribute term = stream.addAttribute(TermAttribute.class); // #B
    PositionIncrementAttribute posIncr = // #B
            stream.addAttribute(PositionIncrementAttribute.class); // #B
    OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); // #B
    TypeAttribute type = stream.addAttribute(TypeAttribute.class); // #B

    int position = 0;
    while (stream.incrementToken()) { // #C

        int increment = posIncr.getPositionIncrement(); // #D
        if (increment > 0) { // #D
            position = position + increment; // #D
            LOGGER.info(); // #D
            System.out.print(position + ": "); // #D
        }/*from   ww w . j a  v  a2  s.  c o  m*/

        System.out.print("[" + // #E
                term.term() + ":" + // #E
                offset.startOffset() + "->" + // #E
                offset.endOffset() + ":" + // #E
                type.type() + "] "); // #E
    }
    LOGGER.info();
}

From source file:aos.lucene.analysis.AnalyzerUtils.java

License:Apache License

public static void assertAnalyzesTo(Analyzer analyzer, String input, String[] output) throws Exception {

    TokenStream stream = analyzer.tokenStream("field", new StringReader(input));

    TermAttribute termAttr = stream.addAttribute(TermAttribute.class);
    for (String expected : output) {
        Assert.assertTrue(stream.incrementToken());
        Assert.assertEquals(expected, termAttr.term());
    }//from  w w w.j a v a 2s  .c  o m

    Assert.assertFalse(stream.incrementToken());
    stream.close();

}

From source file:aos.lucene.analysis.AnalyzerUtils.java

License:Apache License

public static void displayPositionIncrements(Analyzer analyzer, String text) throws IOException {
    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
    while (stream.incrementToken()) {
        LOGGER.info("posIncr=" + posIncr.getPositionIncrement());
    }//from  w w  w  .  ja va 2 s . c om
}

From source file:aos.lucene.analysis.Fragments.java

License:Apache License

public void frag3() throws Exception {
    Analyzer analyzer = null;/*w  w w .  java 2  s . co m*/
    String text = null;

    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    PositionIncrementAttribute posIncr = (PositionIncrementAttribute) stream
            .addAttribute(PositionIncrementAttribute.class);
    while (stream.incrementToken()) {
        LOGGER.info("posIncr=" + posIncr.getPositionIncrement());
    }

}

From source file:aos.lucene.analysis.i18n.ChineseDemo.java

License:Apache License

private static void analyze(String string, Analyzer analyzer) throws IOException {
    StringBuffer buffer = new StringBuffer();

    TokenStream stream = analyzer.tokenStream("contents", new StringReader(string));
    TermAttribute term = stream.addAttribute(TermAttribute.class);

    while (stream.incrementToken()) { //C
        buffer.append("[");
        buffer.append(term.term());/*from  w  w  w . ja v  a 2 s  . c o  m*/
        buffer.append("] ");
    }

    String output = buffer.toString();

    Frame f = new Frame();
    f.setTitle(analyzer.getClass().getSimpleName() + " : " + string);
    f.setResizable(true);

    Font font = new Font(null, Font.PLAIN, 36);
    int width = getWidth(f.getFontMetrics(font), output);

    f.setSize((width < 250) ? 250 : width + 50, 75);

    // NOTE: if Label doesn't render the Chinese characters
    // properly, try using javax.swing.JLabel instead
    Label label = new Label(output); //D
    label.setSize(width, 75);
    label.setAlignment(Label.CENTER);
    label.setFont(font);
    f.add(label);

    f.setVisible(true);
}