Example usage for org.apache.lucene.index LeafReader getNormValues

List of usage examples for org.apache.lucene.index LeafReader getNormValues

Introduction

In this page you can find the example usage for org.apache.lucene.index LeafReader getNormValues.

Prototype

public abstract NumericDocValues getNormValues(String field) throws IOException;

Source Link

Document

Returns NumericDocValues representing norms for this field, or null if no NumericDocValues were indexed.

Usage

From source file:main.BM25VASimilarity.java

License:Apache License

@Override
public final SimScorer simScorer(SimWeight stats, LeafReaderContext context) throws IOException {
    BM25Stats bm25stats = (BM25Stats) stats;
    LeafReader reader = context.reader();
    //int docCount = reader.getDocCount(bm25stats.field);

    //BVA calculated for each document
    float[] BVA = new float[reader.maxDoc()];
    float sumOfAverageTermFrequencies = 0.0f;

    //length of each doc
    float[] Ld = new float[reader.maxDoc()];
    //the number of unique terms in the doc.
    float[] Td = new float[reader.maxDoc()];

    NumericDocValues norms = reader.getNormValues(bm25stats.field);

    //        int nulldocs = 0;
    for (int i = 0; i < reader.maxDoc(); i++) {
        Terms terms = reader.getTermVector(i, bm25stats.field);
        //norm should be the decoded length of doc d, Ld.
        float norm = norms == null ? k1 : bm25stats.cache[(byte) norms.get(i) & 0xFF];
        Ld[i] = norm;//from   w  w w  . jav a2s  .  co m
        //using terms.size() returns Td, the number of unique terms in the doc.
        Td[i] = terms.size();
        //            if (terms == null) {
        //                nulldocs++;
        //                continue;
        //            }

        float averageTermFrequency = Ld[i] / Td[i];
        sumOfAverageTermFrequencies += averageTermFrequency;
    }
    //calculate mean average term frequency of all documents
    float mavgtf = sumOfAverageTermFrequencies / reader.maxDoc();

    //calculate B_VA for each document
    for (int i = 0; i < reader.maxDoc(); i++) {
        BVA[i] = 1 / (mavgtf * mavgtf) * Ld[i] / Td[i] + (1 - 1 / mavgtf) * Ld[i] / bm25stats.avgdl;
    }

    //        System.out.println("Null docs: "+nulldocs);
    //        System.out.println("Max docs: "+reader.maxDoc());
    //        System.out.println("Doc count: "+reader.getDocCount(bm25stats.field));
    //        System.out.println("max docs minus null docs: "+(reader.maxDoc() - nulldocs));

    return new BM25DocScorer(bm25stats, BVA);
}

From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java

License:Open Source License

/**
 * test filtering two text fields/*from  www  .j  av a2 s . c o  m*/
 */
public void testNorms() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    IndexWriter iw = new IndexWriter(dir, iwc);

    // add document with 2 fields
    Document doc = new Document();
    doc.add(new TextField("fieldA", "test", Field.Store.NO));
    doc.add(new TextField("fieldB", "test", Field.Store.NO));
    iw.addDocument(doc);

    // open reader
    DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw),
            new CharacterRunAutomaton(Automata.makeString("fieldA")));

    // see only one field
    LeafReader segmentReader = ir.leaves().get(0).reader();
    assertNotNull(segmentReader.getNormValues("fieldA"));
    assertNull(segmentReader.getNormValues("fieldB"));

    TestUtil.checkReader(ir);
    IOUtils.close(ir, iw, dir);
}