Example usage for org.apache.lucene.analysis TokenStream addAttribute

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream addAttribute.

Prototype

public final <T extends Attribute> T addAttribute(Class<T> attClass)

Source Link

Document

The caller must pass in a Class<?

Usage

From source file:test.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokensWithPositions(Analyzer analyzer, String text) throws IOException {

    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
    stream.reset();//ww w .  ja v  a  2s . c o  m
    int position = 0;
    while (stream.incrementToken()) {
        int increment = posIncr.getPositionIncrement();
        if (increment > 0) {
            position = position + increment;
            System.out.println();
            System.out.print(position + ": ");
        }

        System.out.print("[" + term + "] ");
    }
    System.out.println();
    stream.close();
}

From source file:test.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException {

    TokenStream stream = analyzer.tokenStream("contents", // #A
            new StringReader(text));

    CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); // #B
    PositionIncrementAttribute posIncr = // #B 
            stream.addAttribute(PositionIncrementAttribute.class); // #B
    OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); // #B
    TypeAttribute type = stream.addAttribute(TypeAttribute.class); // #B

    stream.reset();/*from  w ww . ja v  a2 s  . co m*/
    int position = 0;
    while (stream.incrementToken()) { // #C

        int increment = posIncr.getPositionIncrement(); // #D
        if (increment > 0) { // #D
            position = position + increment; // #D
            System.out.println(); // #D
            System.out.print(position + ": "); // #D
        }

        System.out.print("[" + // #E
                term + ":" + // #E
                offset.startOffset() + "->" + // #E
                offset.endOffset() + ":" + // #E
                type.type() + "] "); // #E
    }
    System.out.println();
    stream.close();
}

From source file:test.analysis.AnalyzerUtils.java

License:Apache License

public static void assertAnalyzesTo(Analyzer analyzer, String input, String[] output) throws Exception {
    TokenStream stream = analyzer.tokenStream("field", new StringReader(input));

    CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class);
    for (String expected : output) {
        assertTrue(stream.incrementToken());
        assertEquals(expected, termAttr.toString());
    }// w  ww  . ja v  a  2  s  .c o  m
    assertFalse(stream.incrementToken());
    stream.close();
}

From source file:test.AnalzyerDemo.java

License:Apache License

public static void main(String[] args) {
    Analyzer analyzer = new BaseAnalyzer();
    // Analyzer analyzer = new org.apache.lucene.analysis.cjk.CJKAnalyzer();
    // ?LuceneTokenStream
    TokenStream ts = null;
    try {/*from  w w  w  .  j av a 2 s.  co m*/
        ts = analyzer.tokenStream("myfield", new StringReader(
                "????????????????2?3noneok???BaseAnalyer can analysis english text too"));
        // ???
        OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
        // ??
        CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
        // ??
        TypeAttribute type = ts.addAttribute(TypeAttribute.class);
        // ?TokenStream?StringReader
        ts.reset();
        // ??
        while (ts.incrementToken()) {
            System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString()
                    + " | " + type.type());
        }
        // TokenStreamStringReader
        ts.end(); // Perform end-of-stream operations, e.g. set the final offset.
    } catch (IOException e) {
        e.printStackTrace();
        analyzer.close();
    } finally {
        // TokenStream?
        if (ts != null) {
            try {
                ts.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:text_analyzer.AnalyzerUtils.java

License:Apache License

public static void assertAnalyzesTo(Analyzer analyzer, String input, String[] output) throws Exception {
    TokenStream stream = analyzer.tokenStream("field", new StringReader(input));

    TermAttribute termAttr = stream.addAttribute(TermAttribute.class);
    for (String expected : output) {
        //Assert.assertTrue(stream.incrementToken());
        //Assert.assertEquals(expected, termAttr.term());
    }//w  w w. j  a  v  a 2  s.c  o  m
    //Assert.assertFalse(stream.incrementToken());
    stream.close();
}

From source file:tweetembeding.AnalyzerClass.java

public String analizeString(String FIELD, String txt) throws IOException {
    this.analyzer = setAnalyzer();
    TokenStream stream = analyzer.tokenStream(FIELD, new StringReader(txt));
    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
    stream.reset();// w ww.j  a  va2s  .c o  m

    StringBuffer tokenizedContentBuff = new StringBuffer();
    while (stream.incrementToken()) {
        String term = termAtt.toString();
        if (!term.equals("nbsp"))
            tokenizedContentBuff.append(term).append(" ");
    }

    stream.end();
    stream.close();

    return tokenizedContentBuff.toString();
}

From source file:ucas.IKAnalzyerDemo.java

License:Apache License

public static String Spilt2Words(String content) {
    String resString = "";
    //IK?smart??//  ww w .ja  va 2 s .co m
    Analyzer analyzer = new IKAnalyzer(true);

    //?LuceneTokenStream
    TokenStream ts = null;
    try {
        //myfield??
        ts = analyzer.tokenStream("myfield", new StringReader(content));
        //??
        CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);

        //?TokenStream?StringReader
        ts.reset();
        //??
        while (ts.incrementToken()) {
            resString += term.toString() + "|";
        }
        //TokenStreamStringReader
        ts.end(); // Perform end-of-stream operations, e.g. set the final offset.

    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        //TokenStream?
        if (ts != null) {
            try {
                ts.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
    return resString;
}

From source file:uib.scratch.AnalyzerUtils.java

public static Token insertB(Analyzer analyzer, String text) throws IOException {
    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    TermAttribute term = stream.addAttribute(TermAttribute.class);
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
    OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);

    StringBuilder currenttoken = new StringBuilder(64);
    // currenttoken.append('[');
    char[] character = new char[1];
    int i = posIncr.getPositionIncrement();
    // reset our states :)
    //posIncr//from  www. java 2 s  .  c  o  m

    boolean tokenstart = false;
    boolean tokenend = false;
    stream.reset();
    while (stream.incrementToken()) {

        /* end of stream reached ...    
        if (i == 0) return null;
                
        if (character[0] == '[') { // token starts here ...
        tokenstart = true;
        } else if (character[0] == ']') { // token ends here ...
        tokenend = true;
        } else if (tokenstart && !tokenend) { // between end and start ...
        currenttoken.append(character[0]);
        }
        // we found our token and return it ...
        if (tokenstart && tokenend) {
        // currenttoken.append(']');
        // prepend a token because lucene does not allow leading wildcards. 
        //currenttoken.insert(0, '_');*/
        //String tokenString = currenttoken.toString().toLowerCase().replace(' ', '_').trim();
        String tokenString = term.toString();
        Token t = new Token(tokenString, 0, tokenString.length() - 1);
        System.out.println(t);
        //return t;

    }
    return null;
}

From source file:uib.scratch.AnalyzerUtils.java

public static void insertBracket(Analyzer analyzer, String text) throws IOException {
    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    TermAttribute term = stream.addAttribute(TermAttribute.class);
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
    OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
    StringBuilder currentToken = new StringBuilder(64);

    int position = 0;
    while (stream.incrementToken()) {
        //final String token = new StringTokenizer();
        int increment = posIncr.getPositionIncrement();
        if (increment > 0) {
            position += increment;/*from   ww  w  .j  av  a 2s. co m*/
            offset.endOffset();
            currentToken.append(term);
            currentToken.insert(0, "_");
            String tokenString = currentToken.toString().toLowerCase().replace(' ', '_').trim();
            Token t = new Token(tokenString, 0, tokenString.length() - 1);
            t.setTermBuffer(tokenString);

            System.out.println("test " + " " + t);
        }
    }
}

From source file:uib.scratch.AnalyzerUtils.java

public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException {

    TokenStream stream = analyzer.tokenStream("contents", // #A
            new StringReader(text));

    TermAttribute term = stream.addAttribute(TermAttribute.class); // #B
    PositionIncrementAttribute posIncr = // #B 
            stream.addAttribute(PositionIncrementAttribute.class); // #B
    OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); // #B
    TypeAttribute type = stream.addAttribute(TypeAttribute.class); // #B

    int position = 0;
    stream.reset();//from w  w w. j a va2s .  co m
    while (stream.incrementToken()) { // #C

        int increment = posIncr.getPositionIncrement(); // #D
        if (increment > 0) { // #D
            position = position + increment; // #D
            System.out.println(); // #D
            System.out.print(position + ": "); // #D
        }

        System.out.print("[" + // #E
                term.term() + ":" + // #E
                offset.startOffset() + "->" + // #E
                offset.endOffset() + ":" + // #E
                type.type() + "] "); // #E
    }
    System.out.println();
}