Example usage for org.apache.lucene.analysis TokenStream addAttribute

List of usage examples for org.apache.lucene.analysis TokenStream addAttribute

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream addAttribute.

Prototype

public final <T extends Attribute> T addAttribute(Class<T> attClass) 

Source Link

Document

The caller must pass in a Class<?

Usage

From source file:jp.scaleout.elasticsearch.plugins.queryparser.classic.MapperQueryParser.java

License:Apache License

private Query getPossiblyAnalyzedPrefixQuery(String field, String termStr) throws ParseException {
    if (!analyzeWildcard) {
        return super.getPrefixQuery(field, termStr);
    }//from ww w . ja va 2s. c om
    // get Analyzer from superclass and tokenize the term
    TokenStream source;
    try {
        source = getAnalyzer().tokenStream(field, termStr);
        source.reset();
    } catch (IOException e) {
        return super.getPrefixQuery(field, termStr);
    }
    List<String> tlist = new ArrayList<>();
    CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);

    while (true) {
        try {
            if (!source.incrementToken())
                break;
        } catch (IOException e) {
            break;
        }
        tlist.add(termAtt.toString());
    }

    try {
        source.close();
    } catch (IOException e) {
        // ignore
    }

    if (tlist.size() == 1) {
        return super.getPrefixQuery(field, tlist.get(0));
    } else {
        // build a boolean query with prefix on each one...
        List<BooleanClause> clauses = new ArrayList<>();
        for (String token : tlist) {
            clauses.add(new BooleanClause(super.getPrefixQuery(field, token), BooleanClause.Occur.SHOULD));
        }
        return getBooleanQuery(clauses, true);

        //return super.getPrefixQuery(field, termStr);

        /* this means that the analyzer used either added or consumed
        * (common for a stemmer) tokens, and we can't build a PrefixQuery */
        //            throw new ParseException("Cannot build PrefixQuery with analyzer "
        //                    + getAnalyzer().getClass()
        //                    + (tlist.size() > 1 ? " - token(s) added" : " - token consumed"));
    }

}

From source file:jp.scaleout.elasticsearch.plugins.queryparser.classic.MapperQueryParser.java

License:Apache License

private Query getPossiblyAnalyzedWildcardQuery(String field, String termStr) throws ParseException {
    if (!analyzeWildcard) {
        return super.getWildcardQuery(field, termStr);
    }/*  ww w  .j  a  v a  2  s  .  com*/
    boolean isWithinToken = (!termStr.startsWith("?") && !termStr.startsWith("*"));
    StringBuilder aggStr = new StringBuilder();
    StringBuilder tmp = new StringBuilder();
    for (int i = 0; i < termStr.length(); i++) {
        char c = termStr.charAt(i);
        if (c == '?' || c == '*') {
            if (isWithinToken) {
                try {
                    TokenStream source = getAnalyzer().tokenStream(field, tmp.toString());
                    source.reset();
                    CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
                    if (source.incrementToken()) {
                        String term = termAtt.toString();
                        if (term.length() == 0) {
                            // no tokens, just use what we have now
                            aggStr.append(tmp);
                        } else {
                            aggStr.append(term);
                        }
                    } else {
                        // no tokens, just use what we have now
                        aggStr.append(tmp);
                    }
                    source.close();
                } catch (IOException e) {
                    aggStr.append(tmp);
                }
                tmp.setLength(0);
            }
            isWithinToken = false;
            aggStr.append(c);
        } else {
            tmp.append(c);
            isWithinToken = true;
        }
    }
    if (isWithinToken) {
        try {
            TokenStream source = getAnalyzer().tokenStream(field, tmp.toString());
            source.reset();
            CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
            if (source.incrementToken()) {
                String term = termAtt.toString();
                if (term.length() == 0) {
                    // no tokens, just use what we have now
                    aggStr.append(tmp);
                } else {
                    aggStr.append(term);
                }
            } else {
                // no tokens, just use what we have now
                aggStr.append(tmp);
            }
            source.close();
        } catch (IOException e) {
            aggStr.append(tmp);
        }
    }

    return super.getWildcardQuery(field, aggStr.toString());
}

From source file:jp.sf.fess.solr.plugin.analysis.ja.TestJapaneseNumberFilter.java

License:Apache License

public void analyze(final Analyzer analyzer, final Reader reader, final Writer writer) throws IOException {
    final TokenStream stream = analyzer.tokenStream("dummy", reader);

    stream.reset();/*from  w w w. j a  va2 s .  c o m*/

    final CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class);

    while (stream.incrementToken()) {
        writer.write(termAttr.toString());
        writer.write("\n");
    }

    reader.close();
    writer.close();
}

From source file:kafka.examples.Producer.java

License:Apache License

public void run() {
    while (true) {
        String access_token = "2.009F1d9BmHHChD7abcd6de0a0jui5Y";
        int count = 20;
        Timeline tm = new Timeline(access_token);
        Analyzer analyzer4 = new IKAnalyzer(false);// ?

        try {/*from ww  w.j  a v a2  s.  c  om*/
            StatusWapper status = tm.getPublicTimeline(count, 0);
            //-------------------------------------------
            try {
                TokenStream tokenstream = analyzer4.tokenStream("", new StringReader(status.toString()));
                CharTermAttribute termAttribute = tokenstream.addAttribute(CharTermAttribute.class);// token

                tokenstream.reset();// ?

                while (tokenstream.incrementToken()) {// ??token
                    String prTxt = new String(termAttribute.buffer(), 0, termAttribute.length());
                    //producer.send(new KeyedMessage<Integer, String>(topic, ptTxt + " "));
                    System.out.print(prTxt + "  ");
                }
                //System.out.println();
                tokenstream.close();//TokenStream
            } catch (IOException e) {
                e.printStackTrace();
            }
            //-------------------------------------------
            producer.send(new KeyedMessage<Integer, String>(topic, status.toString()));
            Log.logInfo(status.toString());

        } catch (WeiboException e) {
            e.printStackTrace();
        }
    }
}

From source file:lia.analysis.CopyOfAnalyzerDemo.java

License:Apache License

private static void analyze(String text) throws IOException {
    System.out.println("Analyzing \"" + text + "\"");
    for (Analyzer analyzer : analyzers) {
        String name = analyzer.getClass().getSimpleName();
        System.out.println(name);
        TokenStream stream = analyzer.tokenStream("dummy", text);
        stream.reset();/*from   www.ja  va2s. c  o  m*/

        CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAttr = stream.addAttribute(OffsetAttribute.class);
        TypeAttribute typeAttr = stream.addAttribute(TypeAttribute.class);
        PositionIncrementAttribute positionAttr = stream.addAttribute(PositionIncrementAttribute.class);

        while (stream.incrementToken()) {

            System.out.print("[" + termAttr + "] ");
        }
        System.out.println("");
    }
}

From source file:lia.analysis.i18n.ChineseDemo.java

License:Apache License

private static void analyze(String string, Analyzer analyzer) throws IOException {
    StringBuffer buffer = new StringBuffer();

    TokenStream stream = analyzer.tokenStream("contents", new StringReader(string));
    TermAttribute term = stream.addAttribute(TermAttribute.class);

    while (stream.incrementToken()) { //C
        buffer.append("[");
        buffer.append(term.term());/*from   w  w w.  java2  s  . c o  m*/
        buffer.append("] ");
    }

    String output = buffer.toString();

    Frame f = new Frame();
    f.setTitle(analyzer.getClass().getSimpleName() + " : " + string);
    f.setResizable(true);

    Font font = new Font(null, Font.PLAIN, 36);
    int width = getWidth(f.getFontMetrics(font), output);

    f.setSize((width < 250) ? 250 : width + 50, 75);

    // NOTE: if Label doesn't render the Chinese characters
    // properly, try using javax.swing.JLabel instead
    JLabel label = new JLabel(output); //D
    label.setSize(width, 75);
    //label.setAlignment(JLabel.CENTER);
    label.setFont(font);
    f.add(label);

    f.setVisible(true);
}

From source file:lia.chapter4.AnalyzerUtils.java

License:Apache License

public static void displayTokens(TokenStream stream) throws IOException {

    TermToBytesRefAttribute term = stream.addAttribute(TermToBytesRefAttribute.class);
    while (stream.incrementToken()) {
        System.out.print("[" + term.getBytesRef().utf8ToString() + "] "); //B
    }//from   ww w. j a  va 2  s . c o  m
}

From source file:lia.chapter4.AnalyzerUtils.java

License:Apache License

public static void displayTokensWithPositions(Analyzer analyzer, String text) throws IOException {

    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    TermToBytesRefAttribute term = stream.addAttribute(TermToBytesRefAttribute.class);
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);

    int position = 0;
    while (stream.incrementToken()) {
        int increment = posIncr.getPositionIncrement();
        if (increment > 0) {
            position = position + increment;
            System.out.println();
            System.out.print(position + ": ");
        }//from   www  . j a  va 2s.  c  o m

        System.out.print("[" + term.getBytesRef().utf8ToString() + "] ");
    }
    System.out.println();
}

From source file:lia.chapter4.AnalyzerUtils.java

License:Apache License

public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException {

    TokenStream tokenStream = analyzer.tokenStream("contents", // #A
            new StringReader(text));
    CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAttribute = tokenStream.addAttribute(OffsetAttribute.class); // #B
    TypeAttribute type = tokenStream.addAttribute(TypeAttribute.class); // #B

    while (tokenStream.incrementToken()) { // #C

        int startOffset = offsetAttribute.startOffset();
        System.out.println(startOffset);
        int endOffset = offsetAttribute.endOffset();
        System.out.println(endOffset);
        String term = charTermAttribute.toString();
        System.out.println(term);
        System.out.println(type.toString());
    }//  ww w.  j  ava  2  s  . c o m
}

From source file:lia.chapter4.AnalyzerUtils.java

License:Apache License

public static void assertAnalyzesTo(Analyzer analyzer, String input, String[] output) throws Exception {
    TokenStream stream = analyzer.tokenStream("field", new StringReader(input));

    TermToBytesRefAttribute termAttr = stream.addAttribute(TermToBytesRefAttribute.class);
    for (String expected : output) {
        Assert.assertTrue(stream.incrementToken());
        Assert.assertEquals(expected, termAttr.getBytesRef().utf8ToString());
    }/*from w  w w . j a  v  a2  s.  com*/
    Assert.assertFalse(stream.incrementToken());
    stream.close();
}