ro.ranking.technique.bm25.BM25BooleanScorer.java Source code

Java tutorial

Introduction

Here is the source code for ro.ranking.technique.bm25.BM25BooleanScorer.java

Source

package ro.ranking.technique.bm25;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Similarity;

import ro.ranking.technique.bm25.BM25BooleanQuery.BooleanTermQuery;

/**
 * BM25BooleanScorer, calculates the total relevance value based in a boolean
 * expression.<BR>
 *
 *
 */
public class BM25BooleanScorer extends Scorer {

    private AbstractBooleanScorer shouldBooleanScorer;
    private AbstractBooleanScorer mustBooleanScorer;
    private AbstractBooleanScorer notBooleanScorer;
    private boolean hasMoreShould = false;
    private boolean hasMoreMust = false;
    private boolean hasMoreNot = false;
    private int doc = -1;
    private int ndocs;
    private boolean initialized = false;

    /**
     * Build a BM25BooleanScorer composed of atoms that are BM25TermScorers.
     * The scorer will give the score for a boolean formula combining the subscorers.
     * @param reader
     * @param should - array of BM25TermScorers appearing as SHOULD
     * @param must - array of BM25TermScorers appearing as MUST
     * @param not - array of BM25TermScorers appearing as NOT
     * @param similarity
     * @throws IOException
     */
    public BM25BooleanScorer(IndexReader reader, BooleanTermQuery[] should, BooleanTermQuery[] must,
            BooleanTermQuery[] not, Similarity similarity) throws IOException {
        super(similarity);

        this.ndocs = reader.numDocs();

        if (should != null && should.length > 0) {

            Scorer[] shouldScorer = new Scorer[should.length];
            for (int i = 0; i < shouldScorer.length; i++) {
                shouldScorer[i] = new BM25TermScorer(reader, should[i].termQuery, similarity);
            }
            this.shouldBooleanScorer = new ShouldBooleanScorer(similarity, shouldScorer);

        } else
            this.shouldBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs);

        if (must != null && must.length > 0) {
            Scorer[] mustScorer = new Scorer[must.length];
            for (int i = 0; i < mustScorer.length; i++) {
                mustScorer[i] = new BM25TermScorer(reader, must[i].termQuery, similarity);
            }

            this.mustBooleanScorer = new MustBooleanScorer(similarity, mustScorer);
        } else
            this.mustBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs);

        if (not != null && not.length > 0) {
            Scorer[] notScorer = new Scorer[not.length];
            for (int i = 0; i < notScorer.length; i++) {
                notScorer[i] = new BM25TermScorer(reader, not[i].termQuery, similarity);
            }

            this.notBooleanScorer = new NotBooleanScorer(similarity, notScorer, this.ndocs);
        } else
            this.notBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs);
    }

    /**
     * Build a BM25BooleanScorer composed of atoms that are BM25FTermScorers.
     * The scorer will give the score for a boolean formula combining the subscorers.
     * Each subscorer combines the fields' scores using the given boosts and bParams.
     * @param reader
     * @param should - array of BM25FTermScorers appearing as SHOULD
     * @param must - array of BM25FTermScorers appearing as MUST
     * @param not - array of BM25FTermScorers appearing as NOT
     * @param similarity
     * @throws IOException
     */
    public BM25BooleanScorer(IndexReader reader, BooleanTermQuery[] should, BooleanTermQuery[] must,
            BooleanTermQuery[] not, Similarity similarity, String[] fields, float[] boosts, float[] bParams)
            throws IOException {
        super(similarity);
        this.ndocs = reader.numDocs();
        if (should != null && should.length > 0) {
            Scorer[] shouldScorer = new Scorer[should.length];
            for (int i = 0; i < shouldScorer.length; i++) {
                shouldScorer[i] = new BM25FTermScorer(reader, should[i].termQuery, fields, boosts, bParams,
                        similarity);
            }

            this.shouldBooleanScorer = new ShouldBooleanScorer(similarity, shouldScorer);
        } else
            this.shouldBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs);

        if (must != null && must.length > 0) {
            Scorer[] mustScorer = new Scorer[must.length];
            for (int i = 0; i < mustScorer.length; i++) {
                mustScorer[i] = new BM25FTermScorer(reader, must[i].termQuery, fields, boosts, bParams, similarity);
            }

            this.mustBooleanScorer = new MustBooleanScorer(similarity, mustScorer);
        } else
            this.mustBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs);

        if (not != null && not.length > 0) {
            Scorer[] notScorer = new Scorer[not.length];
            for (int i = 0; i < notScorer.length; i++) {
                notScorer[i] = new BM25FTermScorer(reader, not[i].termQuery, fields, boosts, bParams, similarity);
            }

            this.notBooleanScorer = new NotBooleanScorer(similarity, notScorer, this.ndocs);
        } else
            this.notBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs);

    }

    /*
     * (non-Javadoc)
     * 
     * @see org.apache.lucene.search.Scorer#docID()
     */
    @Override
    public int docID() {
        return this.doc;
    }

    private void init() throws IOException {
        this.hasMoreShould = (this.shouldBooleanScorer.nextDoc() != NO_MORE_DOCS);
        this.hasMoreMust = (this.mustBooleanScorer.nextDoc() != NO_MORE_DOCS);
        this.hasMoreNot = (this.notBooleanScorer.nextDoc() != NO_MORE_DOCS);
    }

    private void doNext() throws IOException {
        if (this.hasMoreShould && this.shouldBooleanScorer.docID() == this.doc)
            this.hasMoreShould = (this.shouldBooleanScorer.nextDoc() != NO_MORE_DOCS);
        if (this.hasMoreMust && this.mustBooleanScorer.docID() == this.doc)
            this.hasMoreMust = (this.mustBooleanScorer.nextDoc() != NO_MORE_DOCS);
        if (this.hasMoreNot && this.notBooleanScorer.docID() == this.doc)
            this.hasMoreNot = (this.notBooleanScorer.nextDoc() != NO_MORE_DOCS);
    }

    /*
        * (non-Javadoc)
        *
        * @see org.apache.lucene.search.Scorer#nextDoc()
        */
    @Override
    public int nextDoc() throws IOException {

        if (!this.initialized) {
            this.initialized = true;
            this.init();
        } else {
            this.doNext();
        }

        while (this.doc < this.ndocs - 1) {
            this.doc++;
            if (this.hasMoreMust) {
                if (this.mustBooleanScorer.docID() < this.doc)
                    this.hasMoreMust = (this.mustBooleanScorer.nextDoc() != NO_MORE_DOCS);
            } else {
                this.doc = NO_MORE_DOCS;
                return NO_MORE_DOCS;
            }

            if (this.hasMoreNot) {
                if (this.notBooleanScorer.docID() < this.doc)
                    this.hasMoreNot = (this.notBooleanScorer.nextDoc() != NO_MORE_DOCS);
            } else {
                this.doc = NO_MORE_DOCS;
                return NO_MORE_DOCS;
            }

            if (this.hasMoreShould) {
                if (this.shouldBooleanScorer.docID() < this.doc)
                    this.hasMoreShould = (this.shouldBooleanScorer.nextDoc() != NO_MORE_DOCS);
            }

            if (this.hasMoreMust && this.hasMoreNot) {
                if (this.mustBooleanScorer.docID() == this.notBooleanScorer.docID())
                    return this.doc;
            } else {
                this.doc = NO_MORE_DOCS;
                return NO_MORE_DOCS;
            }
        }

        this.doc = NO_MORE_DOCS;
        return NO_MORE_DOCS;
    }

    /*
     * (non-Javadoc)
     * 
     * @see org.apache.lucene.search.Scorer#score()
     */
    @Override
    public float score() throws IOException {
        float result = 0f;
        if (this.hasMoreMust && this.mustBooleanScorer.docID() == doc)
            result += this.mustBooleanScorer.score();

        if (this.hasMoreShould && this.shouldBooleanScorer.docID() == doc)
            result += this.shouldBooleanScorer.score();

        return result;
    }

    @Override
    public int advance(int target) throws IOException {
        if (target == NO_MORE_DOCS)
            return NO_MORE_DOCS;
        while ((this.nextDoc() != NO_MORE_DOCS) && this.docID() < target) {
        }

        return this.docID();
    }

}