Example usage for org.apache.lucene.analysis TokenStream addAttribute

List of usage examples for org.apache.lucene.analysis TokenStream addAttribute

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream addAttribute.

Prototype

public final <T extends Attribute> T addAttribute(Class<T> attClass) 

Source Link

Document

The caller must pass in a Class<?

Usage

From source file:analysis.AnalyzerUtils.java

License:Apache License

public static void displayPositionIncrements(Analyzer analyzer, String text) throws IOException {
    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
    while (stream.incrementToken()) {
        System.out.println("posIncr=" + posIncr.getPositionIncrement());
    }/*w w w .j  av a2s  .c  o  m*/
}

From source file:analysis.SynonymAnalyzerTest.java

License:Apache License

public void testJumps() throws Exception {
    TokenStream stream = synonymAnalyzer.tokenStream("contents", // #A
            new StringReader("jumps")); // #A
    TermAttribute term = stream.addAttribute(TermAttribute.class);
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);

    int i = 0;/*w  w w . j  a  va  2  s . com*/
    String[] expected = new String[] { "jumps", // #B
            "hops", // #B
            "leaps" }; // #B
    while (stream.incrementToken()) {
        assertEquals(expected[i], term.term());

        int expectedPos; // #C
        if (i == 0) { // #C
            expectedPos = 1; // #C
        } else { // #C
            expectedPos = 0; // #C
        } // #C
        assertEquals(expectedPos, // #C
                posIncr.getPositionIncrement()); // #C
        i++;
    }
    assertEquals(3, i);
}

From source file:analyzers.DebugAnalyzer.java

License:Apache License

/**
* This method outputs token-by-token analysis of documents.
*
* @param    reader        the reader for the documents
* @param    analyzer      the analyzer /*from  w  ww . ja  v a2  s . c o  m*/
* @throws   IOException   cannot load stream
*/
public static void showAnalysisFromStream(Reader reader, Analyzer analyzer) throws IOException {
    TokenStream stream = analyzer.tokenStream("text", reader);
    CharTermAttribute cta = stream.addAttribute(CharTermAttribute.class);
    OffsetAttribute oa = stream.addAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);

    try {
        stream.reset();
        while (stream.incrementToken()) {
            // get starting and ending offsets
            int start = oa.startOffset();
            int end = oa.endOffset();

            // text of the token
            String token = cta.toString();

            // part of speech tag for the token
            String tag = typeAtt.type();

            System.out.printf("start: %4d\tend: %4d\tlength: %4d\ttag: %s\ttoken: %s\n", start, end,
                    token.length(), tag, token);
        }
    } finally {
        stream.close();
    }
}

From source file:aos.lucene.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokens(TokenStream stream) throws IOException {

    AttributeSource term = stream.addAttribute(AttributeSource.class);
    while (stream.incrementToken()) {
        System.out.print("[" + term.term() + "] ");
    }/*from   w  ww . ja va 2  s.  c  o  m*/
}

From source file:aos.lucene.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokensWithPositions(Analyzer analyzer, String text) throws IOException {

    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    TermAttribute term = stream.addAttribute(TermAttribute.class);
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);

    int position = 0;
    while (stream.incrementToken()) {
        int increment = posIncr.getPositionIncrement();
        if (increment > 0) {
            position = position + increment;
            LOGGER.info();/*from w  w  w. j a  v  a  2 s  .c  o  m*/
            System.out.print(position + ": ");
        }

        System.out.print("[" + term.term() + "] ");
    }
    LOGGER.info();
}

From source file:aos.lucene.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException {

    TokenStream stream = analyzer.tokenStream("contents", // #A
            new StringReader(text));

    TermAttribute term = stream.addAttribute(TermAttribute.class); // #B
    PositionIncrementAttribute posIncr = // #B
            stream.addAttribute(PositionIncrementAttribute.class); // #B
    OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); // #B
    TypeAttribute type = stream.addAttribute(TypeAttribute.class); // #B

    int position = 0;
    while (stream.incrementToken()) { // #C

        int increment = posIncr.getPositionIncrement(); // #D
        if (increment > 0) { // #D
            position = position + increment; // #D
            LOGGER.info(); // #D
            System.out.print(position + ": "); // #D
        }/*from   ww w . j a  v  a2  s.  c o  m*/

        System.out.print("[" + // #E
                term.term() + ":" + // #E
                offset.startOffset() + "->" + // #E
                offset.endOffset() + ":" + // #E
                type.type() + "] "); // #E
    }
    LOGGER.info();
}

From source file:aos.lucene.analysis.AnalyzerUtils.java

License:Apache License

public static void assertAnalyzesTo(Analyzer analyzer, String input, String[] output) throws Exception {

    TokenStream stream = analyzer.tokenStream("field", new StringReader(input));

    TermAttribute termAttr = stream.addAttribute(TermAttribute.class);
    for (String expected : output) {
        Assert.assertTrue(stream.incrementToken());
        Assert.assertEquals(expected, termAttr.term());
    }//from  w w w.j a v a 2s  .c  o m

    Assert.assertFalse(stream.incrementToken());
    stream.close();

}

From source file:aos.lucene.analysis.AnalyzerUtils.java

License:Apache License

public static void displayPositionIncrements(Analyzer analyzer, String text) throws IOException {
    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
    while (stream.incrementToken()) {
        LOGGER.info("posIncr=" + posIncr.getPositionIncrement());
    }//from  w w  w  .  ja va 2 s . c om
}

From source file:aos.lucene.analysis.Fragments.java

License:Apache License

public void frag3() throws Exception {
    Analyzer analyzer = null;/*w  w w .  java 2  s . co m*/
    String text = null;

    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    PositionIncrementAttribute posIncr = (PositionIncrementAttribute) stream
            .addAttribute(PositionIncrementAttribute.class);
    while (stream.incrementToken()) {
        LOGGER.info("posIncr=" + posIncr.getPositionIncrement());
    }

}

From source file:aos.lucene.analysis.i18n.ChineseDemo.java

License:Apache License

private static void analyze(String string, Analyzer analyzer) throws IOException {
    StringBuffer buffer = new StringBuffer();

    TokenStream stream = analyzer.tokenStream("contents", new StringReader(string));
    TermAttribute term = stream.addAttribute(TermAttribute.class);

    while (stream.incrementToken()) { //C
        buffer.append("[");
        buffer.append(term.term());/*from  w  w  w . ja v  a 2 s  . c o  m*/
        buffer.append("] ");
    }

    String output = buffer.toString();

    Frame f = new Frame();
    f.setTitle(analyzer.getClass().getSimpleName() + " : " + string);
    f.setResizable(true);

    Font font = new Font(null, Font.PLAIN, 36);
    int width = getWidth(f.getFontMetrics(font), output);

    f.setSize((width < 250) ? 250 : width + 50, 75);

    // NOTE: if Label doesn't render the Chinese characters
    // properly, try using javax.swing.JLabel instead
    Label label = new Label(output); //D
    label.setSize(width, 75);
    label.setAlignment(Label.CENTER);
    label.setFont(font);
    f.add(label);

    f.setVisible(true);
}