Example usage for org.apache.lucene.analysis TokenStream addAttribute

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream addAttribute.

Prototype

public final <T extends Attribute> T addAttribute(Class<T> attClass)

Source Link

Document

The caller must pass in a Class<?

Usage

From source file:com.leavesfly.lia.analysis.i18n.ChineseDemo.java

License:Apache License

private static void analyze(String string, Analyzer analyzer) throws IOException {
    StringBuffer buffer = new StringBuffer();

    TokenStream stream = analyzer.tokenStream("contents", new StringReader(string));
    TermAttribute term = stream.addAttribute(TermAttribute.class);

    while (stream.incrementToken()) { // C
        buffer.append("[");
        buffer.append(term.term());/*  w  w w .  j  a v a 2  s  .  c  o m*/
        buffer.append("] ");
    }

    String output = buffer.toString();

    Frame f = new Frame();
    f.setTitle(analyzer.getClass().getSimpleName() + " : " + string);
    f.setResizable(true);

    Font font = new Font(null, Font.PLAIN, 36);
    int width = getWidth(f.getFontMetrics(font), output);

    f.setSize((width < 250) ? 250 : width + 50, 75);

    // NOTE: if Label doesn't render the Chinese characters
    // properly, try using javax.swing.JLabel instead
    Label label = new Label(output); // D
    label.setSize(width, 75);
    label.setAlignment(Label.CENTER);
    label.setFont(font);
    f.add(label);

    f.setVisible(true);
}

From source file:com.liferay.events.global.mobile.Utils.java

License:Open Source License

public static String removeStopWords(String words) throws IOException {
    if (Validator.isNull(EventContactServiceImpl.stopWords)) {
        EventContactServiceImpl.stopWords = new TreeSet<String>();
        BufferedReader r = new BufferedReader(new InputStreamReader(
                EventContactService.class.getClassLoader().getResourceAsStream("stopwords/words.txt")));
        String nextLine;//from ww  w  .  j  a  v  a2  s . c  o  m

        while ((nextLine = r.readLine()) != null) {
            String word = nextLine.trim();
            if (Validator.isNotNull(word)) {
                EventContactServiceImpl.stopWords.add(nextLine.trim());
            }
        }
        r.close();
    }
    // remove punctuation and stuff

    final CharArraySet stopSet = new CharArraySet(Version.LUCENE_35, EventContactServiceImpl.stopWords, true);

    TokenStream tokenStream = new StopFilter(Version.LUCENE_35,
            new StandardTokenizer(Version.LUCENE_35, new StringReader(words)), stopSet);

    StringBuilder sb = new StringBuilder();
    CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);

    tokenStream.reset();

    while (tokenStream.incrementToken()) {
        String term = charTermAttribute.toString();
        sb.append(term).append(" ");
    }

    return sb.toString();
}

From source file:com.lorelib.analyzer.sample.IKAnalzyerDemo.java

License:Apache License

public static void main(String[] args) {
    //IK?smart??//w  ww  . ja  va  2 s  .  co  m
    Analyzer analyzer = new IKAnalyzer(true);

    //?LuceneTokenStream
    TokenStream ts = null;
    try {
        ts = analyzer.tokenStream("myfield", new StringReader(
                "?????IKAnalyer can analysis english text too"));
        //???
        OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
        //??
        CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
        //??
        TypeAttribute type = ts.addAttribute(TypeAttribute.class);

        //?TokenStream?StringReader
        ts.reset();
        //??
        while (ts.incrementToken()) {
            System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString()
                    + " | " + type.type());
        }
        //TokenStreamStringReader
        ts.end(); // Perform end-of-stream operations, e.g. set the final offset.

    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        //TokenStream?
        if (ts != null) {
            try {
                ts.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

}

From source file:com.lou.simhasher.seg.WordsSegment.java

License:Open Source License

/**
 * ?//from   w  w  w  .  j  a  va  2s .  c o  m
 * 
 * @param str 
 * @return
 */
public static List<String> getCutWords(String str) {
    Analyzer analyzer = new IKAnalyzer();
    Reader r = new StringReader(str);
    TokenStream ts = analyzer.tokenStream("searchValue", r);
    ts.addAttribute(CharTermAttribute.class);

    List<String> list = new ArrayList<String>();
    try {
        while (ts.incrementToken()) {
            CharTermAttribute ta = ts.getAttribute(CharTermAttribute.class);
            String word = ta.toString();
            list.add(word);
        }
    } catch (IOException e) {
        logger.error("?IO" + e.getMessage());
    }
    return list;
}

From source file:com.mathworks.xzheng.advsearching.SpanQueryTest.java

License:Apache License

private void dumpSpans(SpanQuery query) throws IOException {
    Spans spans = query.getSpans(reader.getContext());
    System.out.println(query + ":");
    int numSpans = 0;

    TopDocs hits = searcher.search(query, 10);
    float[] scores = new float[2];
    for (ScoreDoc sd : hits.scoreDocs) {
        scores[sd.doc] = sd.score;//from  w w w .j  a v a2  s . com
    }

    while (spans.next()) { // A
        numSpans++;

        int id = spans.doc();
        Document doc = reader.document(id); // B

        TokenStream stream = analyzer.tokenStream("contents", // C
                new StringReader(doc.get("f"))); // C
        CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);

        StringBuilder buffer = new StringBuilder();
        buffer.append("   ");
        int i = 0;
        while (stream.incrementToken()) { // D
            if (i == spans.start()) { // E
                buffer.append("<"); // E
            } // E
            buffer.append(term.toString()); // E
            if (i + 1 == spans.end()) { // E
                buffer.append(">"); // E
            } // E
            buffer.append(" ");
            i++;
        }
        buffer.append("(").append(scores[id]).append(") ");
        System.out.println(buffer);
    }

    if (numSpans == 0) {
        System.out.println("   No spans");
    }
    System.out.println();
}

From source file:com.mathworks.xzheng.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokens(TokenStream stream) throws IOException {

    OffsetAttribute offsetAttribute = stream.addAttribute(OffsetAttribute.class);
    CharTermAttribute charTermAttribute = stream.addAttribute(CharTermAttribute.class);

    stream.reset();//  w w  w. j av  a 2s  . co  m
    while (stream.incrementToken()) {
        int startOffset = offsetAttribute.startOffset();
        int endOffset = offsetAttribute.endOffset();
        System.out.print("[" + charTermAttribute.toString() + "] "); //B
    }
}

From source file:com.mathworks.xzheng.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokensWithPositions(Analyzer analyzer, String text) throws IOException {

    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);

    int position = 0;
    while (stream.incrementToken()) {
        int increment = posIncr.getPositionIncrement();
        if (increment > 0) {
            position = position + increment;
            System.out.println();
            System.out.print(position + ": ");
        }/*w ww  .j av a2  s.  c o  m*/

        System.out.print("[" + term.toString() + "] ");
    }
    System.out.println();
}

From source file:com.mathworks.xzheng.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException {

    TokenStream stream = analyzer.tokenStream("contents", // #A
            new StringReader(text));

    CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); // #B
    PositionIncrementAttribute posIncr = // #B 
            stream.addAttribute(PositionIncrementAttribute.class); // #B
    OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); // #B
    TypeAttribute type = stream.addAttribute(TypeAttribute.class); // #B

    int position = 0;
    while (stream.incrementToken()) { // #C

        int increment = posIncr.getPositionIncrement(); // #D
        if (increment > 0) { // #D
            position = position + increment; // #D
            System.out.println(); // #D
            System.out.print(position + ": "); // #D
        }//from   w  w w.ja  v  a  2  s .  c  om

        System.out.print("[" + // #E
                term.toString() + ":" + // #E
                offset.startOffset() + "->" + // #E
                offset.endOffset() + ":" + // #E
                type.type() + "] "); // #E
    }
    System.out.println();
}

From source file:com.mathworks.xzheng.analysis.AnalyzerUtils.java

License:Apache License

public static void assertAnalyzesTo(Analyzer analyzer, String input, String[] output) throws Exception {
    TokenStream stream = analyzer.tokenStream("field", new StringReader(input));

    CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class);
    for (String expected : output) {
        Assert.assertTrue(stream.incrementToken());
        Assert.assertEquals(expected, termAttr.toString());
    }/*from   w w w  .ja v a  2s .c  om*/
    Assert.assertFalse(stream.incrementToken());
    stream.close();
}

From source file:com.mathworks.xzheng.analysis.i18n.ChineseDemo.java

License:Apache License

private static void analyze(String string, Analyzer analyzer) throws IOException {
    StringBuffer buffer = new StringBuffer();

    TokenStream stream = analyzer.tokenStream("contents", new StringReader(string));
    CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);

    while (stream.incrementToken()) { //C
        buffer.append("[");
        buffer.append(term.toString());//  w ww  .ja  v  a 2  s  . c  o  m
        buffer.append("] ");
    }

    String output = buffer.toString();

    Frame f = new Frame();
    f.setTitle(analyzer.getClass().getSimpleName() + " : " + string);
    f.setResizable(true);

    Font font = new Font(null, Font.PLAIN, 36);
    int width = getWidth(f.getFontMetrics(font), output);

    f.setSize((width < 250) ? 250 : width + 50, 75);

    // NOTE: if Label doesn't render the Chinese characters
    // properly, try using javax.swing.JLabel instead
    Label label = new Label(output); //D
    label.setSize(width, 75);
    label.setAlignment(Label.CENTER);
    label.setFont(font);
    f.add(label);

    f.setVisible(true);
}