Example usage for org.apache.lucene.analysis TokenStream addAttribute

List of usage examples for org.apache.lucene.analysis TokenStream addAttribute

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream addAttribute.

Prototype

public final <T extends Attribute> T addAttribute(Class<T> attClass) 

Source Link

Document

The caller must pass in a Class<?

Usage

From source file:org.wltea.analyzer.sample.IKAnalzyerDemo.java

License:Apache License

public static void main(String[] args) {
    //IK?smart??//from  w  w w  . j  a v  a2s  .  c  o m
    Analyzer analyzer = new IKAnalyzerP(true);

    //?LuceneTokenStream
    TokenStream ts = null;
    try {
        //         ts = analyzer.tokenStream("myfield", new StringReader("WORLD ,.. html DATA</html>HELLO"));
        ts = analyzer.tokenStream("myfield", new StringReader(
                "?????IKAnalyer can analysis english text too"));
        //         ts = analyzer.tokenStream("myfield", new StringReader("???pinyin hanyu Contribute index to jpinyin development by creating an account on GitHub"));
        //???
        OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
        //??
        CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
        //??
        TypeAttribute type = ts.addAttribute(TypeAttribute.class);

        //?TokenStream?StringReader
        ts.reset();
        //??
        while (ts.incrementToken()) {
            System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString()
                    + " | " + type.type());
        }
        //TokenStreamStringReader
        ts.end(); // Perform end-of-stream operations, e.g. set the final offset.

    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        //TokenStream?
        if (ts != null) {
            try {
                ts.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

}

From source file:org.wltea.analyzer.sample.LuceneTokenizerDemo.java

License:Apache License

/**
 * ?StandardTokenizer//from   www .  j ava2  s  .  c o  m
 */
public void testST() {
    Tokenizer tokenizer = new StandardTokenizer();
    try {
        tokenizer.setReader(new StringReader(
                "?????IKAnalyer can analysis english text too"));
    } catch (IOException e) {
        throw new RuntimeException();
    }
    TokenStreamComponents tsc = new TokenStreamComponents(tokenizer);
    TokenStream ts = tsc.getTokenStream();
    OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
    CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
    TypeAttribute type = ts.addAttribute(TypeAttribute.class);
    try {
        ts.reset();
        while (ts.incrementToken()) {
            System.out.println(term.toString() + "->" + offset.startOffset() + "-" + offset.endOffset() + "->"
                    + type.type());
        }
        ts.end();
    } catch (IOException e) {
        throw new RuntimeException();
    }
}

From source file:org.wltea.analyzer.sample.LuceneTokenizerDemo.java

License:Apache License

/**
 * ?ClassTokenizer/*from  ww  w .  j a v a2  s .c o  m*/
 */
public void testCT() {
    Tokenizer tokenizer = new ClassicTokenizer();
    try {
        tokenizer.setReader(new StringReader(
                "?????IKAnalyer can analysis english text too"));
    } catch (IOException e) {
        throw new RuntimeException();
    }
    TokenStreamComponents tsc = new TokenStreamComponents(tokenizer);
    TokenStream ts = tsc.getTokenStream();
    OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
    CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
    TypeAttribute type = ts.addAttribute(TypeAttribute.class);
    try {
        ts.reset();
        while (ts.incrementToken()) {
            System.out.println(term.toString() + "->" + offset.startOffset() + "-" + offset.endOffset() + "->"
                    + type.type());
        }
        ts.end();
    } catch (IOException e) {
        throw new RuntimeException();
    }
}

From source file:org.wltea.analyzer.sample.LuceneTokenizerDemo.java

License:Apache License

/**
 * ?NGramTokenizer//  w w w. ja v  a  2 s . c  om
 * min:1,max:2
 */
public void testNT() {
    Tokenizer tokenizer = new NGramTokenizer();
    try {
        tokenizer.setReader(new StringReader(
                "?????IKAnalyer can analysis english text too"));
    } catch (IOException e) {
        throw new RuntimeException();
    }
    TokenStreamComponents tsc = new TokenStreamComponents(tokenizer);
    TokenStream ts = tsc.getTokenStream();
    OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
    CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
    TypeAttribute type = ts.addAttribute(TypeAttribute.class);
    try {
        ts.reset();
        while (ts.incrementToken()) {
            System.out.println(term.toString() + "->" + offset.startOffset() + "-" + offset.endOffset() + "->"
                    + type.type());
        }
        ts.end();
    } catch (IOException e) {
        throw new RuntimeException();
    }
}

From source file:org.wltea.analyzer.sample.ThulacAnalzyerDemo.java

License:Apache License

public static void main(String[] args) {
    //Thulac?smart??
    Analyzer analyzer = new ThulacAnalyzer(true);
    //?LuceneTokenStream
    TokenStream ts = null;
    try {//from w w w.j  av  a2 s  .c o m
        long start = System.currentTimeMillis();
        ts = analyzer.tokenStream("myfield", new StringReader(
                "?????IKAnalyer can analysis english text too"));
        //???
        OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
        //??
        CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
        //??
        TypeAttribute type = ts.addAttribute(TypeAttribute.class);

        //?TokenStream?StringReader
        ts.reset();
        //??
        while (ts.incrementToken()) {
            System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString()
                    + " | " + type.type());
        }
        //TokenStreamStringReader
        ts.end(); // Perform end-of-stream operations, e.g. set the final offset.
        System.out.println("wast:" + (System.currentTimeMillis() - start));
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        //TokenStream?
        if (ts != null) {
            try {
                ts.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

}

From source file:org.wltea.analyzer.test.IKAnalzyerDemo.java

License:Apache License

public static void main(String[] args) {
    //IK?smart??/*from   ww  w. j  a v a 2s .  com*/
    Analyzer analyzer = new IKAnalyzer4PinYin(true);

    //?LuceneTokenStream
    TokenStream ts = null;
    try {
        ts = analyzer.tokenStream("myfield", new StringReader(
                "?????IKAnalyer can analysis english text too"));
        //???
        OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
        //??
        CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
        //??
        TypeAttribute type = ts.addAttribute(TypeAttribute.class);

        //?TokenStream?StringReader
        ts.reset();
        //??
        while (ts.incrementToken()) {
            System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString()
                    + " | " + type.type());
        }
        //TokenStreamStringReader
        ts.end(); // Perform end-of-stream operations, e.g. set the final offset.

    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        //TokenStream?
        if (ts != null) {
            try {
                ts.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

}

From source file:org.xbib.elasticsearch.test.AnalyzerUtils.java

License:Apache License

public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException {

    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));

    CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
    OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
    TypeAttribute type = stream.addAttribute(TypeAttribute.class);
    PayloadAttribute payload = stream.addAttribute(PayloadAttribute.class);

    int position = 0;
    while (stream.incrementToken()) {

        int increment = posIncr.getPositionIncrement();
        if (increment > 0) {
            position = position + increment;
            System.out.println();
            System.out.print(position + ":");
        }/*from w  w  w  .  java 2s  .  c om*/

        BytesRef pl = payload.getPayload();

        if (pl != null) {
            System.out.print("[" + term.toString() + ":" + offset.startOffset() + "->" + offset.endOffset()
                    + ":" + type.type() + ":" + new String(pl.bytes) + "] ");

        } else {
            System.out.print("[" + term.toString() + ":" + offset.startOffset() + "->" + offset.endOffset()
                    + ":" + type.type() + "] ");

        }
    }
    System.out.println();
}

From source file:org.zenoss.zep.index.impl.lucene.LuceneQueryBuilder.java

License:Open Source License

/**
 * Tokenizes the given query using the same behavior as when the field is analyzed.
 *
 * @param fieldName The field name in the index.
 * @param analyzer  The analyzer to use to tokenize the query.
 * @param query     The query to tokenize.
 * @return The tokens from the query./*  w  w  w  .  j  av a2 s  .  c o  m*/
 * @throws ZepException If an exception occur.
 */
private static List<String> getTokens(String fieldName, Analyzer analyzer, String query) throws ZepException {
    final List<String> tokens = new ArrayList<String>();
    try {
        TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(query));
        CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
        try {
            ts.reset();
            while (ts.incrementToken()) {
                tokens.add(term.toString());
            }
            ts.end();
        } catch (IOException e) {
            throw new ZepException(e.getLocalizedMessage(), e);
        } finally {
            ts.close();
        }
    } catch (IOException e) {
        throw new ZepException(e.getLocalizedMessage(), e);
    }
    return tokens;
}

From source file:pl.litwiniuk.rowicki.modsynonyms.SlowSynonymFilterFactory.java

License:Apache License

private static List<String> splitByTokenizer(String source, TokenizerFactory tokFactory) throws IOException {
    StringReader reader = new StringReader(source);
    TokenStream ts = loadTokenizer(tokFactory, reader);
    List<String> tokList = new ArrayList<String>();
    try {//www  .j  a v  a 2  s . co  m
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        while (ts.incrementToken()) {
            if (termAtt.length() > 0)
                tokList.add(termAtt.toString());
        }
    } finally {
        reader.close();
    }
    return tokList;
}

From source file:practica2_1.Practica2_1.java

public static List<String> tokenizeString(Analyzer analyzer, String string) {
    List<String> result = new ArrayList<String>();

    String cad;/*from ww w  .j  a v a2s .  com*/
    try {

        TokenStream stream = analyzer.tokenStream(null, new StringReader(string));
        //OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
        CharTermAttribute cAtt = stream.addAttribute(CharTermAttribute.class);
        stream.reset();

        while (stream.incrementToken()) {
            //cad = stream.getAttribute(CharTermAttribute.class).toString();
            result.add(cAtt.toString());
        }
        stream.close();
        stream.end();
    } catch (IOException e) {
        // not thrown b/c we're using a string reader...
        throw new RuntimeException(e);
    }
    return result;
}