Example usage for org.apache.lucene.analysis TokenStream addAttribute

List of usage examples for org.apache.lucene.analysis TokenStream addAttribute

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream addAttribute.

Prototype

public final <T extends Attribute> T addAttribute(Class<T> attClass) 

Source Link

Document

The caller must pass in a Class<?

Usage

From source file:com.leavesfly.lia.analysis.i18n.ChineseDemo.java

License:Apache License

private static void analyze(String string, Analyzer analyzer) throws IOException {
    StringBuffer buffer = new StringBuffer();

    TokenStream stream = analyzer.tokenStream("contents", new StringReader(string));
    TermAttribute term = stream.addAttribute(TermAttribute.class);

    while (stream.incrementToken()) { // C
        buffer.append("[");
        buffer.append(term.term());/*  w  w w .  j  a v a 2  s  .  c  o m*/
        buffer.append("] ");
    }

    String output = buffer.toString();

    Frame f = new Frame();
    f.setTitle(analyzer.getClass().getSimpleName() + " : " + string);
    f.setResizable(true);

    Font font = new Font(null, Font.PLAIN, 36);
    int width = getWidth(f.getFontMetrics(font), output);

    f.setSize((width < 250) ? 250 : width + 50, 75);

    // NOTE: if Label doesn't render the Chinese characters
    // properly, try using javax.swing.JLabel instead
    Label label = new Label(output); // D
    label.setSize(width, 75);
    label.setAlignment(Label.CENTER);
    label.setFont(font);
    f.add(label);

    f.setVisible(true);
}

From source file:com.liferay.events.global.mobile.Utils.java

License:Open Source License

public static String removeStopWords(String words) throws IOException {
    if (Validator.isNull(EventContactServiceImpl.stopWords)) {
        EventContactServiceImpl.stopWords = new TreeSet<String>();
        BufferedReader r = new BufferedReader(new InputStreamReader(
                EventContactService.class.getClassLoader().getResourceAsStream("stopwords/words.txt")));
        String nextLine;//from ww  w  .  j  a  v  a2  s . c  o  m

        while ((nextLine = r.readLine()) != null) {
            String word = nextLine.trim();
            if (Validator.isNotNull(word)) {
                EventContactServiceImpl.stopWords.add(nextLine.trim());
            }
        }
        r.close();
    }
    // remove punctuation and stuff

    final CharArraySet stopSet = new CharArraySet(Version.LUCENE_35, EventContactServiceImpl.stopWords, true);

    TokenStream tokenStream = new StopFilter(Version.LUCENE_35,
            new StandardTokenizer(Version.LUCENE_35, new StringReader(words)), stopSet);

    StringBuilder sb = new StringBuilder();
    CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);

    tokenStream.reset();

    while (tokenStream.incrementToken()) {
        String term = charTermAttribute.toString();
        sb.append(term).append(" ");
    }

    return sb.toString();
}

From source file:com.lorelib.analyzer.sample.IKAnalzyerDemo.java

License:Apache License

public static void main(String[] args) {
    //IK?smart??//w  ww  . ja  va  2 s  .  co  m
    Analyzer analyzer = new IKAnalyzer(true);

    //?LuceneTokenStream
    TokenStream ts = null;
    try {
        ts = analyzer.tokenStream("myfield", new StringReader(
                "?????IKAnalyer can analysis english text too"));
        //???
        OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
        //??
        CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
        //??
        TypeAttribute type = ts.addAttribute(TypeAttribute.class);

        //?TokenStream?StringReader
        ts.reset();
        //??
        while (ts.incrementToken()) {
            System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString()
                    + " | " + type.type());
        }
        //TokenStreamStringReader
        ts.end(); // Perform end-of-stream operations, e.g. set the final offset.

    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        //TokenStream?
        if (ts != null) {
            try {
                ts.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

}

From source file:com.lou.simhasher.seg.WordsSegment.java

License:Open Source License

/**
 * ?//from   w  w  w  .  j  a  va  2s .  c o  m
 * 
 * @param str 
 * @return
 */
public static List<String> getCutWords(String str) {
    Analyzer analyzer = new IKAnalyzer();
    Reader r = new StringReader(str);
    TokenStream ts = analyzer.tokenStream("searchValue", r);
    ts.addAttribute(CharTermAttribute.class);

    List<String> list = new ArrayList<String>();
    try {
        while (ts.incrementToken()) {
            CharTermAttribute ta = ts.getAttribute(CharTermAttribute.class);
            String word = ta.toString();
            list.add(word);
        }
    } catch (IOException e) {
        logger.error("?IO" + e.getMessage());
    }
    return list;
}

From source file:com.mathworks.xzheng.advsearching.SpanQueryTest.java

License:Apache License

private void dumpSpans(SpanQuery query) throws IOException {
    Spans spans = query.getSpans(reader.getContext());
    System.out.println(query + ":");
    int numSpans = 0;

    TopDocs hits = searcher.search(query, 10);
    float[] scores = new float[2];
    for (ScoreDoc sd : hits.scoreDocs) {
        scores[sd.doc] = sd.score;//from  w w w .j  a v a2  s . com
    }

    while (spans.next()) { // A
        numSpans++;

        int id = spans.doc();
        Document doc = reader.document(id); // B

        TokenStream stream = analyzer.tokenStream("contents", // C
                new StringReader(doc.get("f"))); // C
        CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);

        StringBuilder buffer = new StringBuilder();
        buffer.append("   ");
        int i = 0;
        while (stream.incrementToken()) { // D
            if (i == spans.start()) { // E
                buffer.append("<"); // E
            } // E
            buffer.append(term.toString()); // E
            if (i + 1 == spans.end()) { // E
                buffer.append(">"); // E
            } // E
            buffer.append(" ");
            i++;
        }
        buffer.append("(").append(scores[id]).append(") ");
        System.out.println(buffer);
    }

    if (numSpans == 0) {
        System.out.println("   No spans");
    }
    System.out.println();
}

From source file:com.mathworks.xzheng.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokens(TokenStream stream) throws IOException {

    OffsetAttribute offsetAttribute = stream.addAttribute(OffsetAttribute.class);
    CharTermAttribute charTermAttribute = stream.addAttribute(CharTermAttribute.class);

    stream.reset();//  w w  w. j av  a 2s  . co  m
    while (stream.incrementToken()) {
        int startOffset = offsetAttribute.startOffset();
        int endOffset = offsetAttribute.endOffset();
        System.out.print("[" + charTermAttribute.toString() + "] "); //B
    }
}

From source file:com.mathworks.xzheng.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokensWithPositions(Analyzer analyzer, String text) throws IOException {

    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);

    int position = 0;
    while (stream.incrementToken()) {
        int increment = posIncr.getPositionIncrement();
        if (increment > 0) {
            position = position + increment;
            System.out.println();
            System.out.print(position + ": ");
        }/*w ww  .j av a2  s.  c o  m*/

        System.out.print("[" + term.toString() + "] ");
    }
    System.out.println();
}

From source file:com.mathworks.xzheng.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException {

    TokenStream stream = analyzer.tokenStream("contents", // #A
            new StringReader(text));

    CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); // #B
    PositionIncrementAttribute posIncr = // #B 
            stream.addAttribute(PositionIncrementAttribute.class); // #B
    OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); // #B
    TypeAttribute type = stream.addAttribute(TypeAttribute.class); // #B

    int position = 0;
    while (stream.incrementToken()) { // #C

        int increment = posIncr.getPositionIncrement(); // #D
        if (increment > 0) { // #D
            position = position + increment; // #D
            System.out.println(); // #D
            System.out.print(position + ": "); // #D
        }//from   w  w w.ja  v  a  2  s .  c  om

        System.out.print("[" + // #E
                term.toString() + ":" + // #E
                offset.startOffset() + "->" + // #E
                offset.endOffset() + ":" + // #E
                type.type() + "] "); // #E
    }
    System.out.println();
}

From source file:com.mathworks.xzheng.analysis.AnalyzerUtils.java

License:Apache License

public static void assertAnalyzesTo(Analyzer analyzer, String input, String[] output) throws Exception {
    TokenStream stream = analyzer.tokenStream("field", new StringReader(input));

    CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class);
    for (String expected : output) {
        Assert.assertTrue(stream.incrementToken());
        Assert.assertEquals(expected, termAttr.toString());
    }/*from   w w w  .ja v a  2s .c  om*/
    Assert.assertFalse(stream.incrementToken());
    stream.close();
}

From source file:com.mathworks.xzheng.analysis.i18n.ChineseDemo.java

License:Apache License

private static void analyze(String string, Analyzer analyzer) throws IOException {
    StringBuffer buffer = new StringBuffer();

    TokenStream stream = analyzer.tokenStream("contents", new StringReader(string));
    CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);

    while (stream.incrementToken()) { //C
        buffer.append("[");
        buffer.append(term.toString());//  w ww  .ja  v  a 2  s  . c  o  m
        buffer.append("] ");
    }

    String output = buffer.toString();

    Frame f = new Frame();
    f.setTitle(analyzer.getClass().getSimpleName() + " : " + string);
    f.setResizable(true);

    Font font = new Font(null, Font.PLAIN, 36);
    int width = getWidth(f.getFontMetrics(font), output);

    f.setSize((width < 250) ? 250 : width + 50, 75);

    // NOTE: if Label doesn't render the Chinese characters
    // properly, try using javax.swing.JLabel instead
    Label label = new Label(output); //D
    label.setSize(width, 75);
    label.setAlignment(Label.CENTER);
    label.setFont(font);
    f.add(label);

    f.setVisible(true);
}