Example usage for org.apache.lucene.index LeafReader getNormValues

Introduction

In this page you can find the example usage for org.apache.lucene.index LeafReader getNormValues.

Prototype

public abstract NumericDocValues getNormValues(String field) throws IOException;

Source Link

Document

Returns NumericDocValues representing norms for this field, or null if no NumericDocValues were indexed.

Usage

From source file:main.BM25VASimilarity.java

License:Apache License

@Override
public final SimScorer simScorer(SimWeight stats, LeafReaderContext context) throws IOException {
    BM25Stats bm25stats = (BM25Stats) stats;
    LeafReader reader = context.reader();
    //int docCount = reader.getDocCount(bm25stats.field);

    //BVA calculated for each document
    float[] BVA = new float[reader.maxDoc()];
    float sumOfAverageTermFrequencies = 0.0f;

    //length of each doc
    float[] Ld = new float[reader.maxDoc()];
    //the number of unique terms in the doc.
    float[] Td = new float[reader.maxDoc()];

    NumericDocValues norms = reader.getNormValues(bm25stats.field);

    //        int nulldocs = 0;
    for (int i = 0; i < reader.maxDoc(); i++) {
        Terms terms = reader.getTermVector(i, bm25stats.field);
        //norm should be the decoded length of doc d, Ld.
        float norm = norms == null ? k1 : bm25stats.cache[(byte) norms.get(i) & 0xFF];
        Ld[i] = norm;//from   w  w w  . jav a2s  .  co m
        //using terms.size() returns Td, the number of unique terms in the doc.
        Td[i] = terms.size();
        //            if (terms == null) {
        //                nulldocs++;
        //                continue;
        //            }

        float averageTermFrequency = Ld[i] / Td[i];
        sumOfAverageTermFrequencies += averageTermFrequency;
    }
    //calculate mean average term frequency of all documents
    float mavgtf = sumOfAverageTermFrequencies / reader.maxDoc();

    //calculate B_VA for each document
    for (int i = 0; i < reader.maxDoc(); i++) {
        BVA[i] = 1 / (mavgtf * mavgtf) * Ld[i] / Td[i] + (1 - 1 / mavgtf) * Ld[i] / bm25stats.avgdl;
    }

    //        System.out.println("Null docs: "+nulldocs);
    //        System.out.println("Max docs: "+reader.maxDoc());
    //        System.out.println("Doc count: "+reader.getDocCount(bm25stats.field));
    //        System.out.println("max docs minus null docs: "+(reader.maxDoc() - nulldocs));

    return new BM25DocScorer(bm25stats, BVA);
}

From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java

License:Open Source License

/**
 * test filtering two text fields/*from  www  .j  av a2 s . c o  m*/
 */
public void testNorms() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    IndexWriter iw = new IndexWriter(dir, iwc);

    // add document with 2 fields
    Document doc = new Document();
    doc.add(new TextField("fieldA", "test", Field.Store.NO));
    doc.add(new TextField("fieldB", "test", Field.Store.NO));
    iw.addDocument(doc);

    // open reader
    DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw),
            new CharacterRunAutomaton(Automata.makeString("fieldA")));

    // see only one field
    LeafReader segmentReader = ir.leaves().get(0).reader();
    assertNotNull(segmentReader.getNormValues("fieldA"));
    assertNull(segmentReader.getNormValues("fieldB"));

    TestUtil.checkReader(ir);
    IOUtils.close(ir, iw, dir);
}