Example usage for org.apache.lucene.analysis TokenStream reset

List of usage examples for org.apache.lucene.analysis TokenStream reset

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream reset.

Prototype

public void reset() throws IOException 

Source Link

Document

This method is called by a consumer before it begins consumption using #incrementToken() .

Usage

From source file:wt10g.WTDocument.java

String preProcess(String text) throws Exception {

    StringBuffer tokenizedContentBuff = new StringBuffer();

    TokenStream stream = analyzer.tokenStream("dummy", new StringReader(text));
    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
    stream.reset();

    while (stream.incrementToken()) {
        String term = termAtt.toString();
        term = term.toLowerCase();/*from  w  w w.j  a  v a 2 s  .c o  m*/
        tokenizedContentBuff.append(term).append(" ");
    }

    stream.end();
    stream.close();
    return tokenizedContentBuff.toString();
}

From source file:yasoco.TermScore.java

Query constructQuery(int docId) throws Exception {
    Query q = null;//from w w w  .j a v  a2  s.c  om
    boolean formSelectiveQueries = Boolean.parseBoolean(prop.getProperty("toptermquery", "true"));
    /* MoreLikeThis not woking for some reason!
    if (formSelectiveQueries) {   
       q = mlt.like(docId);
       return q;
    }
    */

    Document queryDoc = reader.document(docId);
    q = new BooleanQuery();
    int termCount = 0;
    TokenStream fs = null;

    List<IndexableField> fields = queryDoc.getFields();

    for (IndexableField field : fields) {
        String fieldName = field.name();
        if (fieldName.equals(JavaSCTree.FIELD_DOCNAME) || fieldName.equals(JavaSCTree.FIELD_SC))
            continue; // ignore non-searchable fields

        if (formSelectiveQueries) {
            List<TermScore> topList = selTerms(docId, field.name(), q);
            for (TermScore ts : topList) {
                Term thisTerm = new Term(field.name(), ts.term);
                ((BooleanQuery) q).add(new TermQuery(thisTerm), BooleanClause.Occur.SHOULD);
            }
        } else {
            fs = queryDoc.getField(fieldName).tokenStream(analyzer);
            CharTermAttribute termAtt = fs.addAttribute(CharTermAttribute.class);
            fs.reset();

            // print all tokens until stream is exhausted
            while (fs.incrementToken()) {
                Term thisTerm = new Term(field.name(), termAtt.toString());
                termCount++;
                if (termCount == maxlimit) {
                    maxlimit = maxlimit << 1;
                    BooleanQuery.setMaxClauseCount(maxlimit);
                }
                ((BooleanQuery) q).add(new TermQuery(thisTerm), BooleanClause.Occur.SHOULD);
            }
            fs.end();
            fs.close();
        }
    }
    return q;
}