Example usage for org.apache.lucene.search.similarities TFIDFSimilarity computeNorm

List of usage examples for org.apache.lucene.search.similarities TFIDFSimilarity computeNorm

Introduction

In this page you can find the example usage for org.apache.lucene.search.similarities TFIDFSimilarity computeNorm.

Prototype

@Override
    public final long computeNorm(FieldInvertState state) 

Source Link

Usage

From source file:org.apache.solr.search.function.TestFunctionQuery.java

License:Apache License

@Test
public void testGeneral() throws Exception {
    clearIndex();/*from   ww w  . jav  a 2 s  . com*/

    assertU(adoc("id", "1", "a_tdt", "2009-08-31T12:10:10.123Z", "b_tdt", "2009-08-31T12:10:10.124Z"));
    assertU(adoc("id", "2", "a_t", "how now brown cow"));
    assertU(commit()); // create more than one segment
    assertU(adoc("id", "3", "a_t", "brown cow"));
    assertU(adoc("id", "4"));
    assertU(commit()); // create more than one segment
    assertU(adoc("id", "5"));
    assertU(adoc("id", "6", "a_t", "cow cow cow cow cow"));
    assertU(commit());

    // test relevancy functions
    assertQ(req("fl", "*,score", "q", "{!func}numdocs()", "fq", "id:6"), "//float[@name='score']='6.0'");
    assertQ(req("fl", "*,score", "q", "{!func}maxdoc()", "fq", "id:6"), "//float[@name='score']='6.0'");
    assertQ(req("fl", "*,score", "q", "{!func}docfreq(a_t,cow)", "fq", "id:6"), "//float[@name='score']='3.0'");
    assertQ(req("fl", "*,score", "q", "{!func}docfreq('a_t','cow')", "fq", "id:6"),
            "//float[@name='score']='3.0'");
    assertQ(req("fl", "*,score", "q", "{!func}docfreq($field,$value)", "fq", "id:6", "field", "a_t", "value",
            "cow"), "//float[@name='score']='3.0'");
    assertQ(req("fl", "*,score", "q", "{!func}termfreq(a_t,cow)", "fq", "id:6"),
            "//float[@name='score']='5.0'");

    TFIDFSimilarity similarity = new DefaultSimilarity();

    // make sure it doesn't get a NPE if no terms are present in a field.
    assertQ(req("fl", "*,score", "q", "{!func}termfreq(nofield_t,cow)", "fq", "id:6"),
            "//float[@name='score']='0.0'");
    assertQ(req("fl", "*,score", "q", "{!func}docfreq(nofield_t,cow)", "fq", "id:6"),
            "//float[@name='score']='0.0'");
    assertQ(req("fl", "*,score", "q", "{!func}idf(nofield_t,cow)", "fq", "id:6"),
            "//float[@name='score']='" + similarity.idf(0, 6) + "'");
    assertQ(req("fl", "*,score", "q", "{!func}tf(nofield_t,cow)", "fq", "id:6"),
            "//float[@name='score']='" + similarity.tf(0) + "'");

    assertQ(req("fl", "*,score", "q", "{!func}idf(a_t,cow)", "fq", "id:6"),
            "//float[@name='score']='" + similarity.idf(3, 6) + "'");
    assertQ(req("fl", "*,score", "q", "{!func}tf(a_t,cow)", "fq", "id:6"),
            "//float[@name='score']='" + similarity.tf(5) + "'");
    FieldInvertState state = new FieldInvertState("a_t");
    state.setBoost(1.0f);
    state.setLength(4);
    long norm = similarity.computeNorm(state);
    float nrm = similarity.decodeNormValue((byte) norm);
    assertQ(req("fl", "*,score", "q", "{!func}norm(a_t)", "fq", "id:2"),
            "//float[@name='score']='" + nrm + "'"); // sqrt(4)==2 and is exactly representable when quantized to a byte

    // test that ord and rord are working on a global index basis, not just
    // at the segment level (since Lucene 2.9 has switched to per-segment searching)
    assertQ(req("fl", "*,score", "q", "{!func}ord(id)", "fq", "id:6"), "//float[@name='score']='5.0'");
    assertQ(req("fl", "*,score", "q", "{!func}top(ord(id))", "fq", "id:6"), "//float[@name='score']='5.0'");
    assertQ(req("fl", "*,score", "q", "{!func}rord(id)", "fq", "id:1"), "//float[@name='score']='5.0'");
    assertQ(req("fl", "*,score", "q", "{!func}top(rord(id))", "fq", "id:1"), "//float[@name='score']='5.0'");

    // test that we can subtract dates to millisecond precision
    assertQ(req("fl", "*,score", "q", "{!func}ms(a_tdt,b_tdt)", "fq", "id:1"), "//float[@name='score']='-1.0'");
    assertQ(req("fl", "*,score", "q", "{!func}ms(b_tdt,a_tdt)", "fq", "id:1"), "//float[@name='score']='1.0'");
    assertQ(req("fl", "*,score", "q", "{!func}ms(2009-08-31T12:10:10.125Z,2009-08-31T12:10:10.124Z)", "fq",
            "id:1"), "//float[@name='score']='1.0'");
    assertQ(req("fl", "*,score", "q", "{!func}ms(2009-08-31T12:10:10.124Z,a_tdt)", "fq", "id:1"),
            "//float[@name='score']='1.0'");
    assertQ(req("fl", "*,score", "q", "{!func}ms(2009-08-31T12:10:10.125Z,b_tdt)", "fq", "id:1"),
            "//float[@name='score']='1.0'");

    assertQ(req("fl", "*,score", "q",
            "{!func}ms(2009-08-31T12:10:10.125Z/SECOND,2009-08-31T12:10:10.124Z/SECOND)", "fq", "id:1"),
            "//float[@name='score']='0.0'");

    // test that we can specify "NOW"
    assertQ(req("fl", "*,score", "q", "{!func}ms(NOW)", "NOW", "1000"), "//float[@name='score']='1000.0'");

    for (int i = 100; i < 112; i++) {
        assertU(adoc("id", "" + i, "text", "batman"));
    }
    assertU(commit());
    assertU(adoc("id", "120", "text", "batman superman")); // in a smaller segment
    assertU(adoc("id", "121", "text", "superman"));
    assertU(commit());

    // superman has a higher df (thus lower idf) in one segment, but reversed in the complete index
    String q = "{!func}query($qq)";
    String fq = "id:120";
    assertQ(req("fl", "*,score", "q", q, "qq", "text:batman", "fq", fq), "//float[@name='score']<'1.0'");
    assertQ(req("fl", "*,score", "q", q, "qq", "text:superman", "fq", fq), "//float[@name='score']>'1.0'");

    // test weighting through a function range query
    assertQ(req("fl", "*,score", "fq", fq, "q", "{!frange l=1 u=10}query($qq)", "qq", "text:superman"),
            "//*[@numFound='1']");

    // test weighting through a complex function
    q = "{!func}sub(div(sum(0.0,product(1,query($qq))),1),0)";
    assertQ(req("fl", "*,score", "q", q, "qq", "text:batman", "fq", fq), "//float[@name='score']<'1.0'");
    assertQ(req("fl", "*,score", "q", q, "qq", "text:superman", "fq", fq), "//float[@name='score']>'1.0'");

    // test full param dereferencing
    assertQ(req("fl", "*,score", "q", "{!func}add($v1,$v2)", "v1", "add($v3,$v4)", "v2", "1", "v3", "2", "v4",
            "5", "fq", "id:1"), "//float[@name='score']='8.0'");

    // test ability to parse multiple values
    assertQ(req("fl", "*,score", "q", "{!func}dist(2,vector(1,1),$pt)", "pt", "3,1", "fq", "id:1"),
            "//float[@name='score']='2.0'");

    // test that extra stuff after a function causes an error
    try {
        assertQ(req("fl", "*,score", "q", "{!func}10 wow dude ignore_exception"));
        fail();
    } catch (Exception e) {
        // OK
    }

    // test that sorting by function weights correctly.  superman should sort higher than batman due to idf of the whole index

    assertQ(req("q", "*:*", "fq", "id:120 OR id:121", "sort", "{!func v=$sortfunc} desc", "sortfunc",
            "query($qq)", "qq", "text:(batman OR superman)"), "*//doc[1]/float[.='120.0']",
            "*//doc[2]/float[.='121.0']");

    FieldCache.DEFAULT.purgeAllCaches(); // avoid FC insanity
}