Example usage for org.apache.lucene.analysis TokenStream addAttribute

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream addAttribute.

Prototype

public final <T extends Attribute> T addAttribute(Class<T> attClass)

Source Link

Document

The caller must pass in a Class<?

Usage

From source file:webdocs.WebDocAnalyzer.java

String preprocessText(String html, boolean title) throws IOException {

    int freqCutoffThreshold = title ? 1 : this.freqCutoffThreshold;

    HashMap<String, Integer> tfMap = new HashMap<>();

    StringBuffer buff = new StringBuffer();
    CharArraySet stopList = StopFilter.makeStopSet(Version.LUCENE_4_9, indexer.buildStopwordList("stopfile"));

    Analyzer webdocAnalyzer = new WebDocAnalyzer(indexer.getProperties(), stopList);
    TokenStream stream = webdocAnalyzer.tokenStream("field", new StringReader(html));
    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);

    stream.reset();/*from w  w  w .ja  v  a  2  s. c om*/
    while (stream.incrementToken()) {
        String token = termAtt.toString();
        Integer tf = tfMap.get(token);
        if (tf == null) {
            tf = new Integer(0);
        }
        tf++;
        tfMap.put(token, tf);
    }

    stream.end();
    stream.close();

    for (Map.Entry<String, Integer> e : tfMap.entrySet()) {
        String word = e.getKey();
        int tf = e.getValue();
        if (tf >= freqCutoffThreshold) {
            for (int i = 0; i < tf; i++) { // print this word tf times... word order doesn't matter!
                buff.append(word).append(" ");
            }
        }
    }
    return buff.toString();
}

From source file:wt10g.WTDocument.java

String preProcess(String text) throws Exception {

    StringBuffer tokenizedContentBuff = new StringBuffer();

    TokenStream stream = analyzer.tokenStream("dummy", new StringReader(text));
    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
    stream.reset();/*from  ww w  .j  av a2 s. co  m*/

    while (stream.incrementToken()) {
        String term = termAtt.toString();
        term = term.toLowerCase();
        tokenizedContentBuff.append(term).append(" ");
    }

    stream.end();
    stream.close();
    return tokenizedContentBuff.toString();
}

From source file:yasoco.TermScore.java

Query constructQuery(int docId) throws Exception {
    Query q = null;/*from   www . ja v a 2s  .co  m*/
    boolean formSelectiveQueries = Boolean.parseBoolean(prop.getProperty("toptermquery", "true"));
    /* MoreLikeThis not woking for some reason!
    if (formSelectiveQueries) {   
       q = mlt.like(docId);
       return q;
    }
    */

    Document queryDoc = reader.document(docId);
    q = new BooleanQuery();
    int termCount = 0;
    TokenStream fs = null;

    List<IndexableField> fields = queryDoc.getFields();

    for (IndexableField field : fields) {
        String fieldName = field.name();
        if (fieldName.equals(JavaSCTree.FIELD_DOCNAME) || fieldName.equals(JavaSCTree.FIELD_SC))
            continue; // ignore non-searchable fields

        if (formSelectiveQueries) {
            List<TermScore> topList = selTerms(docId, field.name(), q);
            for (TermScore ts : topList) {
                Term thisTerm = new Term(field.name(), ts.term);
                ((BooleanQuery) q).add(new TermQuery(thisTerm), BooleanClause.Occur.SHOULD);
            }
        } else {
            fs = queryDoc.getField(fieldName).tokenStream(analyzer);
            CharTermAttribute termAtt = fs.addAttribute(CharTermAttribute.class);
            fs.reset();

            // print all tokens until stream is exhausted
            while (fs.incrementToken()) {
                Term thisTerm = new Term(field.name(), termAtt.toString());
                termCount++;
                if (termCount == maxlimit) {
                    maxlimit = maxlimit << 1;
                    BooleanQuery.setMaxClauseCount(maxlimit);
                }
                ((BooleanQuery) q).add(new TermQuery(thisTerm), BooleanClause.Occur.SHOULD);
            }
            fs.end();
            fs.close();
        }
    }
    return q;
}