Example usage for org.apache.lucene.search Explanation.IDFExplanation Explanation.IDFExplanation

List of usage examples for org.apache.lucene.search Explanation.IDFExplanation Explanation.IDFExplanation

Introduction

In this page you can find the example usage for org.apache.lucene.search Explanation.IDFExplanation Explanation.IDFExplanation.

Prototype

Explanation.IDFExplanation

Source Link

Usage

From source file:org.dbpedia.spotlight.lucene.similarity.InvCandFreqSimilarity.java

License:Apache License

@Override
public Explanation.IDFExplanation idfExplain(final Term term, final Searcher searcher) throws IOException {
    final int df = searcher.docFreq(term);
    final int max = searcher.maxDoc();
    final float idf = idf(df, max);

    return new Explanation.IDFExplanation() {

        long sf = 0;

        boolean isSurfaceFormField = term.field()
                .equals(LuceneManager.DBpediaResourceField.SURFACE_FORM.toString());

        private long sf() {

            try {
                IndexReader reader = ((IndexSearcher) searcher).getIndexReader();
                TermsFilter filter = new TermsFilter();
                filter.addTerm(term);// w  w w.  java 2 s . co m
                OpenBitSet it = (OpenBitSet) filter.getDocIdSet(reader);

                if (isSurfaceFormField) { // Here we set the surface form specific information.
                    surfaceFormTerm = term; // Store the surface form
                    surrogateDocIdSet = (OpenBitSet) it; // Store what documents are possible surrogates (URIs that can be represented by this surface form(
                    maxSf = surrogateDocIdSet.cardinality(); // This is the number of documents that contain the surface form (size of surrogate set)
                } else {
                    it.and(surrogateDocIdSet); // Find what surrogates contain this term in the context
                }

                //                    long maxSurfaceFormFreq = surrogateDocIdSet.cardinality();
                //                    long termFreq = it.cardinality();
                //                    LOG.trace(term);
                //                    LOG.trace("surrogateDocIdSet.cardinality() ="+c1);
                //                    LOG.trace("it.cardinality() ="+c2);

                // If this is a SURFACE_FORM term: number of documents in which the surface form occurred
                // Else, this is a CONTEXT term: number of docs the term and the surface form occurred
                sf = ((OpenBitSet) it).cardinality();
            } catch (IOException e) {
                e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
            }

            return sf;
        }

        //            /** Implemented as <code>log(numDocs/(docFreq+1)) + 1</code>. */
        //            public float idf(int docFreq, int numDocs) {
        //                return (float)(Math.log(numDocs/(double)(docFreq+1)) + 1.0);
        //            }

        public float isf(long senseFreq, long maxSenseFreq) {
            return senseFreq == 0 ? 0
                    : (float) (Math.log(new Float(maxSenseFreq) / new Float(senseFreq)) + 1.0);
        }

        @Override
        public String explain() {
            return "isf(docFreq=" + sf + ", maxDocs=" + maxSf + ")";
        }

        @Override
        public float getIdf() {
            sf = sf();
            float isf = isf(sf, maxSf);
            //return 2;
            return isf;
            //return idf * isf;
        }
    };
}

From source file:org.dbpedia.spotlight.lucene.similarity.NewSimilarity.java

License:Apache License

@Override
public Explanation.IDFExplanation idfExplain(final Term term, final Searcher searcher) throws IOException {
    //final int df = searcher.docFreq(term);
    //final int max = searcher.maxDoc();
    //final float idf = idf(df, max);

    return new Explanation.IDFExplanation() {

        long sf = 0;

        boolean isSurfaceFormField = term.field()
                .equals(LuceneManager.DBpediaResourceField.SURFACE_FORM.toString());

        private long sf() {

            try {
                IndexReader reader = ((IndexSearcher) searcher).getIndexReader();

                if (isSurfaceFormField) { // Here we set the surface form specific information.
                    surfaceFormTerm = term; // Store the surface form
                    maxSf = termCache.cardinality(reader, surfaceFormTerm); // This is the number of documents that contain the surface form (size of surrogate set)
                    sf = maxSf; // setting sf = maxSf generates isf=1, leading to tf*isf = tf
                    promiscuity = termCache.getPromiscuity(reader, new SurfaceForm(term.text()));
                } else {
                    sf = termCache.cardinality(reader, surfaceFormTerm, term); // This is the number of docs containing sf + term
                }//from ww w . ja  v  a2  s. c  o  m

            } catch (IOException e) {
                e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
            }

            return sf;
        }

        //            /** Implemented as <code>log(numDocs/(docFreq+1)) + 1</code>. */
        //            public float idf(int docFreq, int numDocs) {
        //                return (float)(Math.log(numDocs/(double)(docFreq+1)) + 1.0);
        //            }

        // How convinced are we that this surface form should be tagged
        public float spotBelief(long maxSenseFreq, long promiscuity) {
            //return promiscuity==0 ? 0 : (float) (Math.log(new Float(maxSenseFreq) / new Float(promiscuity)) + 1.0);
            return round(new Float(maxSenseFreq) / new Float(promiscuity));
        }

        public float isf(long senseFreq, long maxSenseFreq) {
            return senseFreq == 0 ? 0
                    : (float) (Math.log(new Float(maxSenseFreq) / new Float(senseFreq)) + 1.0);
        }

        @Override
        public String explain() {
            return "isf(docFreq=" + sf + ", maxDocs=" + maxSf + ")";
        }

        @Override
        public float getIdf() {
            sf = sf();
            float isf = isf(sf, maxSf);
            return isf * spotBelief(maxSf, promiscuity);
        }
    };
}