Example usage for org.apache.lucene.analysis TokenStream addAttribute

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream addAttribute.

Prototype

public final <T extends Attribute> T addAttribute(Class<T> attClass)

Source Link

Document

The caller must pass in a Class<?

Usage

From source file:jp.scaleout.elasticsearch.plugins.queryparser.classic.MapperQueryParser.java

License:Apache License

private Query getPossiblyAnalyzedPrefixQuery(String field, String termStr) throws ParseException {
    if (!analyzeWildcard) {
        return super.getPrefixQuery(field, termStr);
    }//from ww w . ja va 2s. c om
    // get Analyzer from superclass and tokenize the term
    TokenStream source;
    try {
        source = getAnalyzer().tokenStream(field, termStr);
        source.reset();
    } catch (IOException e) {
        return super.getPrefixQuery(field, termStr);
    }
    List<String> tlist = new ArrayList<>();
    CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);

    while (true) {
        try {
            if (!source.incrementToken())
                break;
        } catch (IOException e) {
            break;
        }
        tlist.add(termAtt.toString());
    }

    try {
        source.close();
    } catch (IOException e) {
        // ignore
    }

    if (tlist.size() == 1) {
        return super.getPrefixQuery(field, tlist.get(0));
    } else {
        // build a boolean query with prefix on each one...
        List<BooleanClause> clauses = new ArrayList<>();
        for (String token : tlist) {
            clauses.add(new BooleanClause(super.getPrefixQuery(field, token), BooleanClause.Occur.SHOULD));
        }
        return getBooleanQuery(clauses, true);

        //return super.getPrefixQuery(field, termStr);

        /* this means that the analyzer used either added or consumed
        * (common for a stemmer) tokens, and we can't build a PrefixQuery */
        //            throw new ParseException("Cannot build PrefixQuery with analyzer "
        //                    + getAnalyzer().getClass()
        //                    + (tlist.size() > 1 ? " - token(s) added" : " - token consumed"));
    }

}

From source file:jp.scaleout.elasticsearch.plugins.queryparser.classic.MapperQueryParser.java

License:Apache License

private Query getPossiblyAnalyzedWildcardQuery(String field, String termStr) throws ParseException {
    if (!analyzeWildcard) {
        return super.getWildcardQuery(field, termStr);
    }/*  ww w  .j  a  v a  2  s  .  com*/
    boolean isWithinToken = (!termStr.startsWith("?") && !termStr.startsWith("*"));
    StringBuilder aggStr = new StringBuilder();
    StringBuilder tmp = new StringBuilder();
    for (int i = 0; i < termStr.length(); i++) {
        char c = termStr.charAt(i);
        if (c == '?' || c == '*') {
            if (isWithinToken) {
                try {
                    TokenStream source = getAnalyzer().tokenStream(field, tmp.toString());
                    source.reset();
                    CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
                    if (source.incrementToken()) {
                        String term = termAtt.toString();
                        if (term.length() == 0) {
                            // no tokens, just use what we have now
                            aggStr.append(tmp);
                        } else {
                            aggStr.append(term);
                        }
                    } else {
                        // no tokens, just use what we have now
                        aggStr.append(tmp);
                    }
                    source.close();
                } catch (IOException e) {
                    aggStr.append(tmp);
                }
                tmp.setLength(0);
            }
            isWithinToken = false;
            aggStr.append(c);
        } else {
            tmp.append(c);
            isWithinToken = true;
        }
    }
    if (isWithinToken) {
        try {
            TokenStream source = getAnalyzer().tokenStream(field, tmp.toString());
            source.reset();
            CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
            if (source.incrementToken()) {
                String term = termAtt.toString();
                if (term.length() == 0) {
                    // no tokens, just use what we have now
                    aggStr.append(tmp);
                } else {
                    aggStr.append(term);
                }
            } else {
                // no tokens, just use what we have now
                aggStr.append(tmp);
            }
            source.close();
        } catch (IOException e) {
            aggStr.append(tmp);
        }
    }

    return super.getWildcardQuery(field, aggStr.toString());
}

From source file:jp.sf.fess.solr.plugin.analysis.ja.TestJapaneseNumberFilter.java

License:Apache License

public void analyze(final Analyzer analyzer, final Reader reader, final Writer writer) throws IOException {
    final TokenStream stream = analyzer.tokenStream("dummy", reader);

    stream.reset();/*from  w w w. j a  va2 s .  c o m*/

    final CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class);

    while (stream.incrementToken()) {
        writer.write(termAttr.toString());
        writer.write("\n");
    }

    reader.close();
    writer.close();
}

From source file:kafka.examples.Producer.java

License:Apache License

public void run() {
    while (true) {
        String access_token = "2.009F1d9BmHHChD7abcd6de0a0jui5Y";
        int count = 20;
        Timeline tm = new Timeline(access_token);
        Analyzer analyzer4 = new IKAnalyzer(false);// ?

        try {/*from ww  w.j  a v a2  s.  c  om*/
            StatusWapper status = tm.getPublicTimeline(count, 0);
            //-------------------------------------------
            try {
                TokenStream tokenstream = analyzer4.tokenStream("", new StringReader(status.toString()));
                CharTermAttribute termAttribute = tokenstream.addAttribute(CharTermAttribute.class);// token

                tokenstream.reset();// ?

                while (tokenstream.incrementToken()) {// ??token
                    String prTxt = new String(termAttribute.buffer(), 0, termAttribute.length());
                    //producer.send(new KeyedMessage<Integer, String>(topic, ptTxt + " "));
                    System.out.print(prTxt + "  ");
                }
                //System.out.println();
                tokenstream.close();//TokenStream
            } catch (IOException e) {
                e.printStackTrace();
            }
            //-------------------------------------------
            producer.send(new KeyedMessage<Integer, String>(topic, status.toString()));
            Log.logInfo(status.toString());

        } catch (WeiboException e) {
            e.printStackTrace();
        }
    }
}

From source file:lia.analysis.CopyOfAnalyzerDemo.java

License:Apache License

private static void analyze(String text) throws IOException {
    System.out.println("Analyzing \"" + text + "\"");
    for (Analyzer analyzer : analyzers) {
        String name = analyzer.getClass().getSimpleName();
        System.out.println(name);
        TokenStream stream = analyzer.tokenStream("dummy", text);
        stream.reset();/*from   www.ja  va2s. c  o  m*/

        CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAttr = stream.addAttribute(OffsetAttribute.class);
        TypeAttribute typeAttr = stream.addAttribute(TypeAttribute.class);
        PositionIncrementAttribute positionAttr = stream.addAttribute(PositionIncrementAttribute.class);

        while (stream.incrementToken()) {

            System.out.print("[" + termAttr + "] ");
        }
        System.out.println("");
    }
}

From source file:lia.analysis.i18n.ChineseDemo.java

License:Apache License

private static void analyze(String string, Analyzer analyzer) throws IOException {
    StringBuffer buffer = new StringBuffer();

    TokenStream stream = analyzer.tokenStream("contents", new StringReader(string));
    TermAttribute term = stream.addAttribute(TermAttribute.class);

    while (stream.incrementToken()) { //C
        buffer.append("[");
        buffer.append(term.term());/*from   w  w w.  java2  s  . c o  m*/
        buffer.append("] ");
    }

    String output = buffer.toString();

    Frame f = new Frame();
    f.setTitle(analyzer.getClass().getSimpleName() + " : " + string);
    f.setResizable(true);

    Font font = new Font(null, Font.PLAIN, 36);
    int width = getWidth(f.getFontMetrics(font), output);

    f.setSize((width < 250) ? 250 : width + 50, 75);

    // NOTE: if Label doesn't render the Chinese characters
    // properly, try using javax.swing.JLabel instead
    JLabel label = new JLabel(output); //D
    label.setSize(width, 75);
    //label.setAlignment(JLabel.CENTER);
    label.setFont(font);
    f.add(label);

    f.setVisible(true);
}

From source file:lia.chapter4.AnalyzerUtils.java

License:Apache License

public static void displayTokens(TokenStream stream) throws IOException {

    TermToBytesRefAttribute term = stream.addAttribute(TermToBytesRefAttribute.class);
    while (stream.incrementToken()) {
        System.out.print("[" + term.getBytesRef().utf8ToString() + "] "); //B
    }//from   ww w. j a  va 2  s . c o  m
}

From source file:lia.chapter4.AnalyzerUtils.java

License:Apache License

public static void displayTokensWithPositions(Analyzer analyzer, String text) throws IOException {

    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    TermToBytesRefAttribute term = stream.addAttribute(TermToBytesRefAttribute.class);
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);

    int position = 0;
    while (stream.incrementToken()) {
        int increment = posIncr.getPositionIncrement();
        if (increment > 0) {
            position = position + increment;
            System.out.println();
            System.out.print(position + ": ");
        }//from   www  . j a  va 2s.  c  o m

        System.out.print("[" + term.getBytesRef().utf8ToString() + "] ");
    }
    System.out.println();
}

From source file:lia.chapter4.AnalyzerUtils.java

License:Apache License

public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException {

    TokenStream tokenStream = analyzer.tokenStream("contents", // #A
            new StringReader(text));
    CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAttribute = tokenStream.addAttribute(OffsetAttribute.class); // #B
    TypeAttribute type = tokenStream.addAttribute(TypeAttribute.class); // #B

    while (tokenStream.incrementToken()) { // #C

        int startOffset = offsetAttribute.startOffset();
        System.out.println(startOffset);
        int endOffset = offsetAttribute.endOffset();
        System.out.println(endOffset);
        String term = charTermAttribute.toString();
        System.out.println(term);
        System.out.println(type.toString());
    }//  ww w.  j  ava  2  s  . c o m
}

From source file:lia.chapter4.AnalyzerUtils.java

License:Apache License

public static void assertAnalyzesTo(Analyzer analyzer, String input, String[] output) throws Exception {
    TokenStream stream = analyzer.tokenStream("field", new StringReader(input));

    TermToBytesRefAttribute termAttr = stream.addAttribute(TermToBytesRefAttribute.class);
    for (String expected : output) {
        Assert.assertTrue(stream.incrementToken());
        Assert.assertEquals(expected, termAttr.getBytesRef().utf8ToString());
    }/*from w  w w . j a  v  a2  s.  com*/
    Assert.assertFalse(stream.incrementToken());
    stream.close();
}