org.sindice.siren.search.TestSirenExactPhraseScorer.java Source code

Java tutorial

Introduction

Here is the source code for org.sindice.siren.search.TestSirenExactPhraseScorer.java

Source

/**
 * Copyright 2009, Renaud Delbru
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
/**
 * @project siren
 * @author Renaud Delbru [ 10 Dec 2009 ]
 * @link http://renaud.delbru.fr/
 * @copyright Copyright (C) 2009 by Renaud Delbru, All rights reserved.
 */
package org.sindice.siren.search;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;

import java.io.IOException;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermPositions;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Weight;
import org.junit.Test;
import org.sindice.siren.search.SirenScorer.InvalidCallException;

public class TestSirenExactPhraseScorer extends AbstractTestSirenScorer {

    /**
     * Test exact phrase scorer: should not match two words in separate cells
     *
     * @throws Exception
     */
    @Test
    public void testExactNextFail1() throws Exception {
        final String field = "content";
        _helper.addDocument("\"word1 word2 word3\" \"word4 word5\" . ");

        final SirenExactPhraseScorer scorer = this.getExactScorer(field, new int[] { 0, 1 },
                new String[] { "word1", "word4" });
        assertTrue(scorer.nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
    }

    /**
     * Test exact phrase scorer: should not match phrase with a gap of 1 between
     * the two phrase query term
     *
     * @throws Exception
     */
    @Test
    public void testExactNextFail2() throws Exception {
        final String field = "content";
        _helper.addDocument("\"word1 word2 word3\" \"word4 word5\" . ");
        final SirenExactPhraseScorer scorer = this.getExactScorer(field, new int[] { 0, 2 },
                new String[] { "word4", "word5" });
        assertTrue(scorer.nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
    }

    @Test
    public void testNextWithURI() throws Exception {
        this.assertTo(new AssertNextEntityFunctor(),
                new String[] { "<http://renaud.delbru.fr/> <http://renaud.delbru.fr/> . " },
                new String[] { "renaud", "delbru" }, 1, new int[] { 1 }, new int[] { 2 }, new int[] { 0 },
                new int[] { 0 }, new int[] { 0 }, new int[] { 0 });
        this.assertTo(new AssertNextEntityFunctor(),
                new String[] { "<http://renaud.delbru.fr/> <http://renaud.delbru.fr/> . ",
                        "<http://renaud.delbru.fr/> <http://test/name> \"Renaud Delbru\" . " },
                new String[] { "renaud", "delbru" }, 2, new int[] { 1, 1 }, new int[] { 2, 2 }, new int[] { 0, 1 },
                new int[] { 0, 0 }, new int[] { 0, 0 }, new int[] { 0, 0 });
    }

    @Test
    public void testNextPositionWithURI() throws Exception {
        this.assertTo(new AssertNextPositionEntityFunctor(),
                new String[] { "<http://renaud.delbru.fr/> <http://renaud.delbru.fr/> . " },
                new String[] { "renaud", "delbru" }, 1, new int[] { 1 }, new int[] { 2 }, new int[] { 0, 0 },
                new int[] { 0, 0 }, new int[] { 0, 1 }, new int[] { 0, 2 });
        this.assertTo(new AssertNextPositionEntityFunctor(),
                new String[] { "<http://renaud.delbru.fr/> <http://renaud.delbru.fr/> . ",
                        "<http://renaud.delbru.fr/> <http://test/name> \"Renaud Delbru\" . " },
                new String[] { "renaud" }, 2, new int[] { 1, 1 }, new int[] { 2, 2 }, new int[] { 0, 0, 1, 1 },
                new int[] { 0, 0, 0, 0 }, new int[] { 0, 1, 0, 2 }, new int[] { 0, 2, 0, 4 });
    }

    @Test
    public void testNextPositionWithMultipleOccurrencesInLiteral() throws Exception {
        this.assertTo(new AssertNextPositionEntityFunctor(),
                new String[] { "<http://renaud.delbru.fr/> \"renaud delbru delbru renaud renaud delbru\" . " },
                new String[] { "renaud", "delbru" }, 1, new int[] { 1 }, new int[] { 3 }, new int[] { 0, 0, 0 },
                new int[] { 0, 0, 0 }, new int[] { 0, 1, 1 }, new int[] { 0, 2, 6 });
    }

    @Test
    public void testSkipToEntity() throws Exception {
        for (int i = 0; i < 32; i++)
            _helper.addDocument("<http://renaud.delbru.fr/> . ");
        final SirenScorer scorer = this.getExactScorer(QueryTestingHelper.DEFAULT_FIELD,
                new String[] { "renaud", "delbru" });
        assertFalse(scorer.advance(16) == DocIdSetIterator.NO_MORE_DOCS);
        assertEquals(16, scorer.docID());
        assertEquals(16, scorer.entity());
        assertEquals(0, scorer.tuple());
        assertEquals(0, scorer.cell());
        assertEquals(-1, scorer.dataset());
        assertEquals(0, scorer.pos());
    }

    @Test
    public void testSkipToEntityNext() throws Exception {
        for (int i = 0; i < 32; i++)
            _helper.addDocument("<http://renaud.delbru.fr/> . ");
        final SirenScorer scorer = this.getExactScorer(QueryTestingHelper.DEFAULT_FIELD,
                new String[] { "renaud", "delbru" });
        assertFalse(scorer.nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
        assertFalse(scorer.advance(16) == DocIdSetIterator.NO_MORE_DOCS);
        assertEquals(16, scorer.docID());
        assertEquals(16, scorer.entity());
        assertEquals(0, scorer.tuple());
        assertEquals(0, scorer.cell());
        assertEquals(-1, scorer.dataset());
        assertEquals(0, scorer.pos());
        assertFalse(scorer.nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
        assertEquals(17, scorer.entity());
    }

    /**
     * Check if {@link SirenPhraseScorer#advance(int, int, int)} works correctly
     * when advancing to the same entity.
     */
    @Test
    public void testNextSkipToEntity1() throws Exception {
        _helper.addDocument("\"aaa bbb aaa\" . \"aaa bbb ccc\" .");
        final SirenScorer scorer = this.getExactScorer(QueryTestingHelper.DEFAULT_FIELD,
                new String[] { "bbb", "ccc" });
        assertFalse(scorer.nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
        assertEquals(0, scorer.docID());
        assertEquals(0, scorer.entity());
        assertEquals(1, scorer.tuple());
        assertEquals(0, scorer.cell());
        assertEquals(-1, scorer.dataset());
        assertEquals(4, scorer.pos());
        assertFalse(scorer.advance(0, 1, 0) == DocIdSetIterator.NO_MORE_DOCS);
        assertEquals(0, scorer.docID());
        assertEquals(0, scorer.entity());
        assertEquals(1, scorer.tuple());
        assertEquals(0, scorer.cell());
        assertEquals(-1, scorer.dataset());
        assertEquals(4, scorer.pos());
        assertTrue(scorer.nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
    }

    /**
     * Check if {@link SirenPhraseScorer#advance(int, int, int)} works correctly
     * when advancing to the same entity.
     */
    @Test
    public void testNextSkipToEntity2() throws Exception {
        _helper.addDocument("\"aaa bbb aaa\" . \"ccc bbb ccc\" . \"aaa bbb ccc\" .");
        final SirenScorer scorer = this.getExactScorer(QueryTestingHelper.DEFAULT_FIELD,
                new String[] { "bbb", "ccc" });
        assertFalse(scorer.nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
        assertEquals(0, scorer.docID());
        assertEquals(0, scorer.entity());
        assertEquals(1, scorer.tuple());
        assertEquals(0, scorer.cell());
        assertEquals(-1, scorer.dataset());
        assertEquals(4, scorer.pos());
        assertFalse(scorer.advance(0, 0) == DocIdSetIterator.NO_MORE_DOCS);
        assertEquals(0, scorer.docID());
        assertEquals(0, scorer.entity());
        assertEquals(1, scorer.tuple());
        assertEquals(0, scorer.cell());
        assertEquals(-1, scorer.dataset());
        assertFalse(scorer.advance(0, 1, 2) == DocIdSetIterator.NO_MORE_DOCS);
        assertEquals(0, scorer.docID());
        assertEquals(0, scorer.entity());
        assertEquals(2, scorer.tuple());
        assertEquals(0, scorer.cell());
        assertEquals(-1, scorer.dataset());

        assertTrue(scorer.nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
    }

    @Test
    public void testSkipToEntityNextPosition() throws Exception {
        for (int i = 0; i < 32; i++)
            _helper.addDocument("<http://renaud.delbru.fr/> . \"renaud delbru\" .");
        final SirenScorer scorer = this.getExactScorer(QueryTestingHelper.DEFAULT_FIELD,
                new String[] { "renaud", "delbru" });
        assertFalse(scorer.advance(16) == DocIdSetIterator.NO_MORE_DOCS);
        assertEquals(16, scorer.docID());
        assertEquals(16, scorer.entity());
        assertEquals(0, scorer.tuple());
        assertEquals(0, scorer.cell());
        assertEquals(-1, scorer.dataset());
        assertEquals(0, scorer.pos());

        assertFalse(scorer.nextPosition() == SirenIdIterator.NO_MORE_POS);
        assertEquals(1, scorer.tuple());
        assertEquals(0, scorer.cell());
        assertEquals(-1, scorer.dataset());
        assertEquals(2, scorer.pos());

        assertTrue(scorer.nextPosition() == SirenIdIterator.NO_MORE_POS);
    }

    @Test
    public void testSkipToEntityTuple() throws Exception {
        for (int i = 0; i < 32; i++)
            _helper.addDocument("<http://renaud.delbru.fr/> . \"renaud delbru\" . \"renaud delbru\" . ");
        final SirenScorer scorer = this.getExactScorer(QueryTestingHelper.DEFAULT_FIELD,
                new String[] { "renaud", "delbru" });
        assertFalse(scorer.advance(16, 2) == DocIdSetIterator.NO_MORE_DOCS);
        assertEquals(16, scorer.docID());
        assertEquals(16, scorer.entity());
        assertEquals(2, scorer.tuple());
        assertEquals(0, scorer.cell());
        assertEquals(-1, scorer.dataset());
        assertEquals(4, scorer.pos());
    }

    @Test
    public void testSkipToEntityTupleCell() throws Exception {
        for (int i = 0; i < 32; i++)
            _helper.addDocument("<http://renaud.delbru.fr/> . \"renaud delbru\" \"renaud delbru\" . ");
        final SirenScorer scorer = this.getExactScorer(QueryTestingHelper.DEFAULT_FIELD,
                new String[] { "renaud", "delbru" });
        assertFalse(scorer.advance(16, 1, 1) == DocIdSetIterator.NO_MORE_DOCS);
        assertEquals(16, scorer.docID());
        assertEquals(16, scorer.entity());
        assertEquals(1, scorer.tuple());
        assertEquals(1, scorer.cell());
        assertEquals(-1, scorer.dataset());
        assertEquals(4, scorer.pos());
    }

    @Test
    public void testSkipToNonExistingEntityTupleCell() throws Exception {
        for (int i = 0; i < 32; i++)
            _helper.addDocument("<http://renaud.delbru.fr/> . \"renaud delbru\" \"renaud delbru\" . ");
        final SirenScorer scorer = this.getExactScorer(QueryTestingHelper.DEFAULT_FIELD,
                new String[] { "renaud", "delbru" });
        assertFalse(scorer.advance(16, 3, 2) == DocIdSetIterator.NO_MORE_DOCS); // does not exist, should skip to entity 17
        assertEquals(17, scorer.docID());
        assertEquals(17, scorer.entity());
        assertEquals(0, scorer.tuple());
        assertEquals(0, scorer.cell());
        assertEquals(-1, scorer.dataset());
        assertEquals(0, scorer.pos());
    }

    @Test
    public void testSkipToEntityTupleCellNextPosition() throws Exception {
        for (int i = 0; i < 32; i++)
            _helper.addDocument("<http://renaud.delbru.fr/> . \"renaud delbru\" \"renaud delbru\" . ");
        final SirenScorer scorer = this.getExactScorer(QueryTestingHelper.DEFAULT_FIELD,
                new String[] { "renaud", "delbru" });
        assertFalse(scorer.advance(16, 1, 0) == DocIdSetIterator.NO_MORE_DOCS);
        assertEquals(16, scorer.docID());
        assertEquals(16, scorer.entity());
        assertEquals(1, scorer.tuple());
        assertEquals(0, scorer.cell());
        assertEquals(-1, scorer.dataset());
        assertEquals(2, scorer.pos());

        // Should not return match in first tuple (tuple 0)
        assertFalse(scorer.nextPosition() == SirenIdIterator.NO_MORE_POS);
        assertEquals(1, scorer.tuple());
        assertEquals(1, scorer.cell());
        assertEquals(-1, scorer.dataset());
        assertEquals(4, scorer.pos());

        assertTrue(scorer.nextPosition() == SirenIdIterator.NO_MORE_POS);
    }

    @Test(expected = InvalidCallException.class)
    public void testInvalidScoreCall() throws IOException {
        _helper.addDocument("\"Renaud Delbru\" . ");

        final Term t1 = new Term(QueryTestingHelper.DEFAULT_FIELD, "renaud");
        final Term t2 = new Term(QueryTestingHelper.DEFAULT_FIELD, "delbru");
        final SirenPhraseQuery query = new SirenPhraseQuery();
        query.add(t1);
        query.add(t2);
        final Weight w = query.weight(_helper.getSearcher());

        final IndexReader reader = _helper.getIndexReader();
        final TermPositions[] tps = new TermPositions[2];
        tps[0] = reader.termPositions(t1);
        tps[1] = reader.termPositions(t2);

        final SirenPhraseScorer scorer = new SirenExactPhraseScorer(w, tps, new int[] { 0, 1 },
                _helper.getSearcher().getSimilarity(), reader.norms(QueryTestingHelper.DEFAULT_FIELD));
        assertNotNull("ts is null and it shouldn't be", scorer);

        // Invalid call
        scorer.score();
    }

    @Test
    public void testScore() throws IOException {
        _helper.addDocument("\"Renaud Delbru\" . <http://renaud.delbru.fr> . ");

        final Term t1 = new Term(QueryTestingHelper.DEFAULT_FIELD, "renaud");
        final Term t2 = new Term(QueryTestingHelper.DEFAULT_FIELD, "delbru");
        final SirenPhraseQuery query = new SirenPhraseQuery();
        query.add(t1);
        query.add(t2);

        final IndexReader reader = _helper.getIndexReader();
        final TermPositions[] tps = new TermPositions[2];
        tps[0] = reader.termPositions(t1);
        tps[1] = reader.termPositions(t2);

        final SirenPhraseScorer scorer = new SirenExactPhraseScorer(new ConstantWeight(), tps, new int[] { 0, 1 },
                _helper.getSearcher().getSimilarity(), reader.norms(QueryTestingHelper.DEFAULT_FIELD));
        assertNotNull("ts is null and it shouldn't be", scorer);

        assertFalse("no doc returned", scorer.nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
        assertEquals(0, scorer.entity());
        assertEquals(0.70, scorer.score(), 0.01);
    }

    // /////////////////////////////////
    //
    // END OF TESTS
    // START HELPER METHODS AND CLASSES
    //
    // /////////////////////////////////

    @Override
    protected void assertTo(final AssertFunctor functor, final String[] input, final String[] phraseTerms,
            final int expectedNumDocs, final int[] expectedNumTuples, final int[] expectedNumCells,
            final int[] expectedEntityID, final int[] expectedTupleID, final int[] expectedCellID,
            final int[] expectedPos) throws Exception {
        _helper.reset();
        _helper.addDocuments(input);
        final IndexReader reader = _helper.getIndexReader();
        assertEquals(expectedNumDocs, reader.numDocs());

        final SirenExactPhraseScorer scorer = this.getExactScorer(QueryTestingHelper.DEFAULT_FIELD, phraseTerms);
        functor.run(scorer, expectedNumDocs, expectedNumTuples, expectedNumCells, expectedEntityID, expectedTupleID,
                expectedCellID, expectedPos);
        reader.close();
    }

}