List of usage examples for org.apache.lucene.search Explanation.IDFExplanation Explanation.IDFExplanation
Explanation.IDFExplanation
From source file:org.dbpedia.spotlight.lucene.similarity.InvCandFreqSimilarity.java
License:Apache License
@Override public Explanation.IDFExplanation idfExplain(final Term term, final Searcher searcher) throws IOException { final int df = searcher.docFreq(term); final int max = searcher.maxDoc(); final float idf = idf(df, max); return new Explanation.IDFExplanation() { long sf = 0; boolean isSurfaceFormField = term.field() .equals(LuceneManager.DBpediaResourceField.SURFACE_FORM.toString()); private long sf() { try { IndexReader reader = ((IndexSearcher) searcher).getIndexReader(); TermsFilter filter = new TermsFilter(); filter.addTerm(term);// w w w. java 2 s . co m OpenBitSet it = (OpenBitSet) filter.getDocIdSet(reader); if (isSurfaceFormField) { // Here we set the surface form specific information. surfaceFormTerm = term; // Store the surface form surrogateDocIdSet = (OpenBitSet) it; // Store what documents are possible surrogates (URIs that can be represented by this surface form( maxSf = surrogateDocIdSet.cardinality(); // This is the number of documents that contain the surface form (size of surrogate set) } else { it.and(surrogateDocIdSet); // Find what surrogates contain this term in the context } // long maxSurfaceFormFreq = surrogateDocIdSet.cardinality(); // long termFreq = it.cardinality(); // LOG.trace(term); // LOG.trace("surrogateDocIdSet.cardinality() ="+c1); // LOG.trace("it.cardinality() ="+c2); // If this is a SURFACE_FORM term: number of documents in which the surface form occurred // Else, this is a CONTEXT term: number of docs the term and the surface form occurred sf = ((OpenBitSet) it).cardinality(); } catch (IOException e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. } return sf; } // /** Implemented as <code>log(numDocs/(docFreq+1)) + 1</code>. */ // public float idf(int docFreq, int numDocs) { // return (float)(Math.log(numDocs/(double)(docFreq+1)) + 1.0); // } public float isf(long senseFreq, long maxSenseFreq) { return senseFreq == 0 ? 0 : (float) (Math.log(new Float(maxSenseFreq) / new Float(senseFreq)) + 1.0); } @Override public String explain() { return "isf(docFreq=" + sf + ", maxDocs=" + maxSf + ")"; } @Override public float getIdf() { sf = sf(); float isf = isf(sf, maxSf); //return 2; return isf; //return idf * isf; } }; }
From source file:org.dbpedia.spotlight.lucene.similarity.NewSimilarity.java
License:Apache License
@Override public Explanation.IDFExplanation idfExplain(final Term term, final Searcher searcher) throws IOException { //final int df = searcher.docFreq(term); //final int max = searcher.maxDoc(); //final float idf = idf(df, max); return new Explanation.IDFExplanation() { long sf = 0; boolean isSurfaceFormField = term.field() .equals(LuceneManager.DBpediaResourceField.SURFACE_FORM.toString()); private long sf() { try { IndexReader reader = ((IndexSearcher) searcher).getIndexReader(); if (isSurfaceFormField) { // Here we set the surface form specific information. surfaceFormTerm = term; // Store the surface form maxSf = termCache.cardinality(reader, surfaceFormTerm); // This is the number of documents that contain the surface form (size of surrogate set) sf = maxSf; // setting sf = maxSf generates isf=1, leading to tf*isf = tf promiscuity = termCache.getPromiscuity(reader, new SurfaceForm(term.text())); } else { sf = termCache.cardinality(reader, surfaceFormTerm, term); // This is the number of docs containing sf + term }//from ww w . ja v a2 s. c o m } catch (IOException e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. } return sf; } // /** Implemented as <code>log(numDocs/(docFreq+1)) + 1</code>. */ // public float idf(int docFreq, int numDocs) { // return (float)(Math.log(numDocs/(double)(docFreq+1)) + 1.0); // } // How convinced are we that this surface form should be tagged public float spotBelief(long maxSenseFreq, long promiscuity) { //return promiscuity==0 ? 0 : (float) (Math.log(new Float(maxSenseFreq) / new Float(promiscuity)) + 1.0); return round(new Float(maxSenseFreq) / new Float(promiscuity)); } public float isf(long senseFreq, long maxSenseFreq) { return senseFreq == 0 ? 0 : (float) (Math.log(new Float(maxSenseFreq) / new Float(senseFreq)) + 1.0); } @Override public String explain() { return "isf(docFreq=" + sf + ", maxDocs=" + maxSf + ")"; } @Override public float getIdf() { sf = sf(); float isf = isf(sf, maxSf); return isf * spotBelief(maxSf, promiscuity); } }; }