List of usage examples for org.apache.lucene.index LeafReader getNormValues
public abstract NumericDocValues getNormValues(String field) throws IOException;
From source file:main.BM25VASimilarity.java
License:Apache License
@Override public final SimScorer simScorer(SimWeight stats, LeafReaderContext context) throws IOException { BM25Stats bm25stats = (BM25Stats) stats; LeafReader reader = context.reader(); //int docCount = reader.getDocCount(bm25stats.field); //BVA calculated for each document float[] BVA = new float[reader.maxDoc()]; float sumOfAverageTermFrequencies = 0.0f; //length of each doc float[] Ld = new float[reader.maxDoc()]; //the number of unique terms in the doc. float[] Td = new float[reader.maxDoc()]; NumericDocValues norms = reader.getNormValues(bm25stats.field); // int nulldocs = 0; for (int i = 0; i < reader.maxDoc(); i++) { Terms terms = reader.getTermVector(i, bm25stats.field); //norm should be the decoded length of doc d, Ld. float norm = norms == null ? k1 : bm25stats.cache[(byte) norms.get(i) & 0xFF]; Ld[i] = norm;//from w w w . jav a2s . co m //using terms.size() returns Td, the number of unique terms in the doc. Td[i] = terms.size(); // if (terms == null) { // nulldocs++; // continue; // } float averageTermFrequency = Ld[i] / Td[i]; sumOfAverageTermFrequencies += averageTermFrequency; } //calculate mean average term frequency of all documents float mavgtf = sumOfAverageTermFrequencies / reader.maxDoc(); //calculate B_VA for each document for (int i = 0; i < reader.maxDoc(); i++) { BVA[i] = 1 / (mavgtf * mavgtf) * Ld[i] / Td[i] + (1 - 1 / mavgtf) * Ld[i] / bm25stats.avgdl; } // System.out.println("Null docs: "+nulldocs); // System.out.println("Max docs: "+reader.maxDoc()); // System.out.println("Doc count: "+reader.getDocCount(bm25stats.field)); // System.out.println("max docs minus null docs: "+(reader.maxDoc() - nulldocs)); return new BM25DocScorer(bm25stats, BVA); }
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java
License:Open Source License
/** * test filtering two text fields/*from www .j av a2 s . c o m*/ */ public void testNorms() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); IndexWriter iw = new IndexWriter(dir, iwc); // add document with 2 fields Document doc = new Document(); doc.add(new TextField("fieldA", "test", Field.Store.NO)); doc.add(new TextField("fieldB", "test", Field.Store.NO)); iw.addDocument(doc); // open reader DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw), new CharacterRunAutomaton(Automata.makeString("fieldA"))); // see only one field LeafReader segmentReader = ir.leaves().get(0).reader(); assertNotNull(segmentReader.getNormValues("fieldA")); assertNull(segmentReader.getNormValues("fieldB")); TestUtil.checkReader(ir); IOUtils.close(ir, iw, dir); }