Example usage for org.apache.lucene.analysis TokenStream addAttribute

List of usage examples for org.apache.lucene.analysis TokenStream addAttribute

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream addAttribute.

Prototype

public final <T extends Attribute> T addAttribute(Class<T> attClass) 

Source Link

Document

The caller must pass in a Class<?

Usage

From source file:test.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokensWithPositions(Analyzer analyzer, String text) throws IOException {

    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
    stream.reset();//ww w .  ja v  a  2s . c o  m
    int position = 0;
    while (stream.incrementToken()) {
        int increment = posIncr.getPositionIncrement();
        if (increment > 0) {
            position = position + increment;
            System.out.println();
            System.out.print(position + ": ");
        }

        System.out.print("[" + term + "] ");
    }
    System.out.println();
    stream.close();
}

From source file:test.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException {

    TokenStream stream = analyzer.tokenStream("contents", // #A
            new StringReader(text));

    CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); // #B
    PositionIncrementAttribute posIncr = // #B 
            stream.addAttribute(PositionIncrementAttribute.class); // #B
    OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); // #B
    TypeAttribute type = stream.addAttribute(TypeAttribute.class); // #B

    stream.reset();/*from  w ww . ja v  a2 s  . co m*/
    int position = 0;
    while (stream.incrementToken()) { // #C

        int increment = posIncr.getPositionIncrement(); // #D
        if (increment > 0) { // #D
            position = position + increment; // #D
            System.out.println(); // #D
            System.out.print(position + ": "); // #D
        }

        System.out.print("[" + // #E
                term + ":" + // #E
                offset.startOffset() + "->" + // #E
                offset.endOffset() + ":" + // #E
                type.type() + "] "); // #E
    }
    System.out.println();
    stream.close();
}

From source file:test.analysis.AnalyzerUtils.java

License:Apache License

public static void assertAnalyzesTo(Analyzer analyzer, String input, String[] output) throws Exception {
    TokenStream stream = analyzer.tokenStream("field", new StringReader(input));

    CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class);
    for (String expected : output) {
        assertTrue(stream.incrementToken());
        assertEquals(expected, termAttr.toString());
    }// w  ww  . ja v  a  2  s  .c o  m
    assertFalse(stream.incrementToken());
    stream.close();
}

From source file:test.AnalzyerDemo.java

License:Apache License

public static void main(String[] args) {
    Analyzer analyzer = new BaseAnalyzer();
    // Analyzer analyzer = new org.apache.lucene.analysis.cjk.CJKAnalyzer();
    // ?LuceneTokenStream
    TokenStream ts = null;
    try {/*from  w w  w  .  j av a 2 s.  co m*/
        ts = analyzer.tokenStream("myfield", new StringReader(
                "????????????????2?3noneok???BaseAnalyer can analysis english text too"));
        // ???
        OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
        // ??
        CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
        // ??
        TypeAttribute type = ts.addAttribute(TypeAttribute.class);
        // ?TokenStream?StringReader
        ts.reset();
        // ??
        while (ts.incrementToken()) {
            System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString()
                    + " | " + type.type());
        }
        // TokenStreamStringReader
        ts.end(); // Perform end-of-stream operations, e.g. set the final offset.
    } catch (IOException e) {
        e.printStackTrace();
        analyzer.close();
    } finally {
        // TokenStream?
        if (ts != null) {
            try {
                ts.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:text_analyzer.AnalyzerUtils.java

License:Apache License

public static void assertAnalyzesTo(Analyzer analyzer, String input, String[] output) throws Exception {
    TokenStream stream = analyzer.tokenStream("field", new StringReader(input));

    TermAttribute termAttr = stream.addAttribute(TermAttribute.class);
    for (String expected : output) {
        //Assert.assertTrue(stream.incrementToken());
        //Assert.assertEquals(expected, termAttr.term());
    }//w  w w. j  a  v  a 2  s.c  o  m
    //Assert.assertFalse(stream.incrementToken());
    stream.close();
}

From source file:tweetembeding.AnalyzerClass.java

public String analizeString(String FIELD, String txt) throws IOException {
    this.analyzer = setAnalyzer();
    TokenStream stream = analyzer.tokenStream(FIELD, new StringReader(txt));
    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
    stream.reset();// w ww.j  a  va2s  .c o  m

    StringBuffer tokenizedContentBuff = new StringBuffer();
    while (stream.incrementToken()) {
        String term = termAtt.toString();
        if (!term.equals("nbsp"))
            tokenizedContentBuff.append(term).append(" ");
    }

    stream.end();
    stream.close();

    return tokenizedContentBuff.toString();
}

From source file:ucas.IKAnalzyerDemo.java

License:Apache License

public static String Spilt2Words(String content) {
    String resString = "";
    //IK?smart??//  ww w .ja  va 2 s .co m
    Analyzer analyzer = new IKAnalyzer(true);

    //?LuceneTokenStream
    TokenStream ts = null;
    try {
        //myfield??
        ts = analyzer.tokenStream("myfield", new StringReader(content));
        //??
        CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);

        //?TokenStream?StringReader
        ts.reset();
        //??
        while (ts.incrementToken()) {
            resString += term.toString() + "|";
        }
        //TokenStreamStringReader
        ts.end(); // Perform end-of-stream operations, e.g. set the final offset.

    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        //TokenStream?
        if (ts != null) {
            try {
                ts.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
    return resString;
}

From source file:uib.scratch.AnalyzerUtils.java

public static Token insertB(Analyzer analyzer, String text) throws IOException {
    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    TermAttribute term = stream.addAttribute(TermAttribute.class);
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
    OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);

    StringBuilder currenttoken = new StringBuilder(64);
    // currenttoken.append('[');
    char[] character = new char[1];
    int i = posIncr.getPositionIncrement();
    // reset our states :)
    //posIncr//from  www. java 2 s  .  c  o  m

    boolean tokenstart = false;
    boolean tokenend = false;
    stream.reset();
    while (stream.incrementToken()) {

        /* end of stream reached ...    
        if (i == 0) return null;
                
        if (character[0] == '[') { // token starts here ...
        tokenstart = true;
        } else if (character[0] == ']') { // token ends here ...
        tokenend = true;
        } else if (tokenstart && !tokenend) { // between end and start ...
        currenttoken.append(character[0]);
        }
        // we found our token and return it ...
        if (tokenstart && tokenend) {
        // currenttoken.append(']');
        // prepend a token because lucene does not allow leading wildcards. 
        //currenttoken.insert(0, '_');*/
        //String tokenString = currenttoken.toString().toLowerCase().replace(' ', '_').trim();
        String tokenString = term.toString();
        Token t = new Token(tokenString, 0, tokenString.length() - 1);
        System.out.println(t);
        //return t;

    }
    return null;
}

From source file:uib.scratch.AnalyzerUtils.java

public static void insertBracket(Analyzer analyzer, String text) throws IOException {
    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    TermAttribute term = stream.addAttribute(TermAttribute.class);
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
    OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
    StringBuilder currentToken = new StringBuilder(64);

    int position = 0;
    while (stream.incrementToken()) {
        //final String token = new StringTokenizer();
        int increment = posIncr.getPositionIncrement();
        if (increment > 0) {
            position += increment;/*from   ww  w  .j  av  a 2s. co m*/
            offset.endOffset();
            currentToken.append(term);
            currentToken.insert(0, "_");
            String tokenString = currentToken.toString().toLowerCase().replace(' ', '_').trim();
            Token t = new Token(tokenString, 0, tokenString.length() - 1);
            t.setTermBuffer(tokenString);

            System.out.println("test " + " " + t);
        }
    }
}

From source file:uib.scratch.AnalyzerUtils.java

public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException {

    TokenStream stream = analyzer.tokenStream("contents", // #A
            new StringReader(text));

    TermAttribute term = stream.addAttribute(TermAttribute.class); // #B
    PositionIncrementAttribute posIncr = // #B 
            stream.addAttribute(PositionIncrementAttribute.class); // #B
    OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); // #B
    TypeAttribute type = stream.addAttribute(TypeAttribute.class); // #B

    int position = 0;
    stream.reset();//from w  w w. j a va2s .  co m
    while (stream.incrementToken()) { // #C

        int increment = posIncr.getPositionIncrement(); // #D
        if (increment > 0) { // #D
            position = position + increment; // #D
            System.out.println(); // #D
            System.out.print(position + ": "); // #D
        }

        System.out.print("[" + // #E
                term.term() + ":" + // #E
                offset.startOffset() + "->" + // #E
                offset.endOffset() + ":" + // #E
                type.type() + "] "); // #E
    }
    System.out.println();
}