TFIDFSmoother.java :  » Natural-Language-Processing » Dragon-Toolkit » dragon » ir » search » smooth » Java Open Source

Java Open Source » Natural Language Processing » Dragon Toolkit 
Dragon Toolkit » dragon » ir » search » smooth » TFIDFSmoother.java
package dragon.ir.search.smooth;

import dragon.ir.index.*;
import dragon.ir.query.SimpleTermPredicate;
/**
 * <p>TF-IDF Smoother for vector space model</p>
 * <p></p>
 * <p>Copyright: Copyright (c) 2005</p>
 * <p>Company: IST, Drexel University</p>
 * @author Davis Zhou
 * @version 1.0
 */

public class TFIDFSmoother extends AbstractSmoother{
    private int docNum;
    private double curTermIDF, curDocLengthRatio;
    private double avgDocLength;
    private double bm25k1, bm25b;
    private double param1, param2;
    private boolean useBM25;

    public TFIDFSmoother(IRCollection collection) {
        docNum=collection.getDocNum();
        this.useLog=false;
        this.docFirstOptimal=true;
        this.querytermFirstOptimal=true;
        this.useBM25 =false;
    }

    public TFIDFSmoother(IRCollection collection, double bm25k1, double bm25b) {
        docNum=collection.getDocNum();
        avgDocLength=collection.getTermCount()*1.0/docNum;
        this.bm25b=bm25b;
        this.bm25k1 =bm25k1;
        this.useBM25 =true;
        this.useLog=false;
        this.docFirstOptimal=true;
        this.querytermFirstOptimal=true;
        param1=bm25k1*(1-bm25b);
        param2=bm25k1*bm25b;
    }

    public boolean setParameters(double[] params){
        if(params!=null && params.length>=2)
        {
            this.bm25k1 =params[0];
            this.bm25b =params[1];
            param1=bm25k1*(1-bm25b);
            param2=bm25k1*bm25b;
            return true;
        }
        else
            return false;
    }

    public void setQueryTerm(SimpleTermPredicate queryTerm){
        this.queryWeight =queryTerm.getWeight();
        curTermIDF=Math.log((1+docNum)/(0.5+queryTerm.getDocFrequency()));
    }

    public void setDoc(IRDoc doc){
        if(useBM25)
            curDocLengthRatio=param2*doc.getTermCount()/avgDocLength;
    }

    protected double computeSmoothedProb(int termFrequency){
        if(useBM25)
            return queryWeight*termFrequency*curTermIDF/(param1+curDocLengthRatio+termFrequency);
        else
            return queryWeight*termFrequency*curTermIDF;
    }
}
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.