Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.update; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputField; import org.apache.solr.core.SolrCore; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.DocList; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.IndexSchema; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.response.ResultContext; import org.junit.BeforeClass; import org.junit.Test; /** * * */ public class DocumentBuilderTest extends SolrTestCaseJ4 { @BeforeClass public static void beforeClass() throws Exception { initCore("solrconfig.xml", "schema.xml"); } @Test public void testBuildDocument() throws Exception { SolrCore core = h.getCore(); // undefined field try { SolrInputDocument doc = new SolrInputDocument(); doc.setField("unknown field", 12345, 1.0f); DocumentBuilder.toDocument(doc, core.getLatestSchema()); fail("should throw an error"); } catch (SolrException ex) { assertEquals("should be bad request", 400, ex.code()); } } @Test public void testNullField() { SolrCore core = h.getCore(); // make sure a null value is not indexed SolrInputDocument doc = new SolrInputDocument(); doc.addField("name", null, 1.0f); Document out = DocumentBuilder.toDocument(doc, core.getLatestSchema()); assertNull(out.get("name")); } @Test public void testExceptions() { SolrCore core = h.getCore(); // make sure a null value is not indexed SolrInputDocument doc = new SolrInputDocument(); doc.addField("id", "123", 1.0f); doc.addField("unknown", "something", 1.0f); try { DocumentBuilder.toDocument(doc, core.getLatestSchema()); fail("added an unknown field"); } catch (Exception ex) { assertTrue("should have document ID", ex.getMessage().indexOf("doc=123") > 0); } doc.remove("unknown"); doc.addField("weight", "not a number", 1.0f); try { DocumentBuilder.toDocument(doc, core.getLatestSchema()); fail("invalid 'float' field value"); } catch (Exception ex) { assertTrue("should have document ID", ex.getMessage().indexOf("doc=123") > 0); assertTrue("cause is number format", ex.getCause() instanceof NumberFormatException); } // now make sure it is OK doc.setField("weight", "1.34", 1.0f); DocumentBuilder.toDocument(doc, core.getLatestSchema()); } @Test public void testMultiField() throws Exception { SolrCore core = h.getCore(); // make sure a null value is not indexed SolrInputDocument doc = new SolrInputDocument(); doc.addField("home", "2.2,3.3", 1.0f); Document out = DocumentBuilder.toDocument(doc, core.getLatestSchema()); assertNotNull(out.get("home"));//contains the stored value and term vector, if there is one assertNotNull(out.getField("home_0" + FieldType.POLY_FIELD_SEPARATOR + "double")); assertNotNull(out.getField("home_1" + FieldType.POLY_FIELD_SEPARATOR + "double")); } @Test public void testCopyFieldWithDocumentBoost() { SolrCore core = h.getCore(); IndexSchema schema = core.getLatestSchema(); assertFalse(schema.getField("title").omitNorms()); assertTrue(schema.getField("title_stringNoNorms").omitNorms()); SolrInputDocument doc = new SolrInputDocument(); doc.setDocumentBoost(3f); doc.addField("title", "mytitle"); Document out = DocumentBuilder.toDocument(doc, schema); assertNotNull(out.get("title_stringNoNorms")); assertTrue( "title_stringNoNorms has the omitNorms attribute set to true, if the boost is different than 1.0, it will fail", 1.0f == out.getField("title_stringNoNorms").boost()); assertTrue("It is OK that title has a boost of 3", 3.0f == out.getField("title").boost()); } @Test public void testCopyFieldWithFieldBoost() { SolrCore core = h.getCore(); IndexSchema schema = core.getLatestSchema(); assertFalse(schema.getField("title").omitNorms()); assertTrue(schema.getField("title_stringNoNorms").omitNorms()); SolrInputDocument doc = new SolrInputDocument(); doc.addField("title", "mytitle", 3.0f); Document out = DocumentBuilder.toDocument(doc, schema); assertNotNull(out.get("title_stringNoNorms")); assertTrue( "title_stringNoNorms has the omitNorms attribute set to true, if the boost is different than 1.0, it will fail", 1.0f == out.getField("title_stringNoNorms").boost()); assertTrue("It is OK that title has a boost of 3", 3.0f == out.getField("title").boost()); } @Test public void testWithPolyFieldsAndFieldBoost() { SolrCore core = h.getCore(); IndexSchema schema = core.getLatestSchema(); assertFalse(schema.getField("store").omitNorms()); assertTrue(schema.getField("store_0_coordinate").omitNorms()); assertTrue(schema.getField("store_1_coordinate").omitNorms()); assertFalse(schema.getField("amount").omitNorms()); assertTrue(schema.getField("amount" + FieldType.POLY_FIELD_SEPARATOR + "_currency").omitNorms()); assertTrue(schema.getField("amount" + FieldType.POLY_FIELD_SEPARATOR + "_amount_raw").omitNorms()); SolrInputDocument doc = new SolrInputDocument(); doc.addField("store", "40.7143,-74.006", 3.0f); doc.addField("amount", "10.5", 3.0f); Document out = DocumentBuilder.toDocument(doc, schema); assertNotNull(out.get("store")); assertNotNull(out.get("amount")); assertNotNull(out.getField("store_0_coordinate")); //NOTE: As the subtypes have omitNorm=true, they must have boost=1F, otherwise this is going to fail when adding the doc to Lucene. assertTrue(1f == out.getField("store_0_coordinate").boost()); assertTrue(1f == out.getField("store_1_coordinate").boost()); assertTrue(1f == out.getField("amount" + FieldType.POLY_FIELD_SEPARATOR + "_currency").boost()); assertTrue(1f == out.getField("amount" + FieldType.POLY_FIELD_SEPARATOR + "_amount_raw").boost()); } @Test public void testWithPolyFieldsAndDocumentBoost() { SolrCore core = h.getCore(); IndexSchema schema = core.getLatestSchema(); assertFalse(schema.getField("store").omitNorms()); assertTrue(schema.getField("store_0_coordinate").omitNorms()); assertTrue(schema.getField("store_1_coordinate").omitNorms()); assertFalse(schema.getField("amount").omitNorms()); assertTrue(schema.getField("amount" + FieldType.POLY_FIELD_SEPARATOR + "_currency").omitNorms()); assertTrue(schema.getField("amount" + FieldType.POLY_FIELD_SEPARATOR + "_amount_raw").omitNorms()); SolrInputDocument doc = new SolrInputDocument(); doc.setDocumentBoost(3.0f); doc.addField("store", "40.7143,-74.006"); doc.addField("amount", "10.5"); Document out = DocumentBuilder.toDocument(doc, schema); assertNotNull(out.get("store")); assertNotNull(out.getField("store_0_coordinate")); //NOTE: As the subtypes have omitNorm=true, they must have boost=1F, otherwise this is going to fail when adding the doc to Lucene. assertTrue(1f == out.getField("store_0_coordinate").boost()); assertTrue(1f == out.getField("store_1_coordinate").boost()); assertTrue(1f == out.getField("amount" + FieldType.POLY_FIELD_SEPARATOR + "_currency").boost()); assertTrue(1f == out.getField("amount" + FieldType.POLY_FIELD_SEPARATOR + "_amount_raw").boost()); } /** * Its ok to boost a field if it has norms */ public void testBoost() throws Exception { XmlDoc xml = new XmlDoc(); xml.xml = "<doc>" + "<field name=\"id\">0</field>" + "<field name=\"title\" boost=\"3.0\">mytitle</field>" + "</doc>"; assertNull(h.validateUpdate(add(xml, new String[0]))); } public void testMultiValuedFieldAndDocBoosts() throws Exception { SolrCore core = h.getCore(); IndexSchema schema = core.getLatestSchema(); SolrInputDocument doc = new SolrInputDocument(); doc.setDocumentBoost(3.0f); SolrInputField field = new SolrInputField("foo_t"); field.addValue("summer time", 1.0f); field.addValue("in the city", 5.0f); // using boost field.addValue("living is easy", 1.0f); doc.put(field.getName(), field); Document out = DocumentBuilder.toDocument(doc, schema); IndexableField[] outF = out.getFields(field.getName()); assertEquals("wrong number of field values", 3, outF.length); // since Lucene no longer has native documnt boosts, we should find // the doc boost multiplied into the boost o nthe first field value // all other field values should be 1.0f // (lucene will multiply all of the field boosts later) assertEquals(15.0f, outF[0].boost(), 0.0f); assertEquals(1.0f, outF[1].boost(), 0.0f); assertEquals(1.0f, outF[2].boost(), 0.0f); } public void testCopyFieldsAndFieldBoostsAndDocBoosts() throws Exception { SolrCore core = h.getCore(); IndexSchema schema = core.getLatestSchema(); SolrInputDocument doc = new SolrInputDocument(); final float DOC_BOOST = 3.0F; doc.setDocumentBoost(DOC_BOOST); doc.addField("id", "42"); SolrInputField inTitle = new SolrInputField("title"); inTitle.addValue("titleA", 2.0F); inTitle.addValue("titleB", 7.0F); final float TITLE_BOOST = 2.0F * 7.0F; assertEquals(TITLE_BOOST, inTitle.getBoost(), 0.0F); doc.put(inTitle.getName(), inTitle); SolrInputField inFoo = new SolrInputField("foo_t"); inFoo.addValue("summer time", 1.0F); inFoo.addValue("in the city", 5.0F); inFoo.addValue("living is easy", 11.0F); final float FOO_BOOST = 1.0F * 5.0F * 11.0F; assertEquals(FOO_BOOST, inFoo.getBoost(), 0.0F); doc.put(inFoo.getName(), inFoo); Document out = DocumentBuilder.toDocument(doc, schema); IndexableField[] outTitle = out.getFields(inTitle.getName()); assertEquals("wrong number of title values", 2, outTitle.length); IndexableField[] outNoNorms = out.getFields("title_stringNoNorms"); assertEquals("wrong number of nonorms values", 2, outNoNorms.length); IndexableField[] outFoo = out.getFields(inFoo.getName()); assertEquals("wrong number of foo values", 3, outFoo.length); IndexableField[] outText = out.getFields("text"); assertEquals("wrong number of text values", 5, outText.length); // since Lucene no longer has native document boosts, we should find // the doc boost multiplied into the boost on the first field value // of each field. All other field values should be 1.0f // (lucene will multiply all of the field value boosts later) assertEquals(TITLE_BOOST * DOC_BOOST, outTitle[0].boost(), 0.0F); assertEquals(1.0F, outTitle[1].boost(), 0.0F); // assertEquals(FOO_BOOST * DOC_BOOST, outFoo[0].boost(), 0.0F); assertEquals(1.0F, outFoo[1].boost(), 0.0F); assertEquals(1.0F, outFoo[2].boost(), 0.0F); // assertEquals(TITLE_BOOST * DOC_BOOST, outText[0].boost(), 0.0F); assertEquals(1.0F, outText[1].boost(), 0.0F); assertEquals(FOO_BOOST, outText[2].boost(), 0.0F); assertEquals(1.0F, outText[3].boost(), 0.0F); assertEquals(1.0F, outText[4].boost(), 0.0F); // copyField dest with no norms should not have recieved any boost assertEquals(1.0F, outNoNorms[0].boost(), 0.0F); assertEquals(1.0F, outNoNorms[1].boost(), 0.0F); // now index that SolrInputDocument to check the computed norms assertU(adoc(doc)); assertU(commit()); SolrQueryRequest req = req("q", "id:42"); try { // very hack-ish SolrQueryResponse rsp = new SolrQueryResponse(); core.execute(core.getRequestHandler(req.getParams().get(CommonParams.QT)), req, rsp); DocList dl = ((ResultContext) rsp.getValues().get("response")).docs; assertTrue("can't find the doc we just added", 1 == dl.size()); int docid = dl.iterator().nextDoc(); SolrIndexSearcher searcher = req.getSearcher(); AtomicReader reader = SlowCompositeReaderWrapper.wrap(searcher.getTopReaderContext().reader()); assertTrue( "similarity doesn't extend DefaultSimilarity, " + "config or defaults have changed since test was written", searcher.getSimilarity() instanceof DefaultSimilarity); DefaultSimilarity sim = (DefaultSimilarity) searcher.getSimilarity(); NumericDocValues titleNorms = reader.getNormValues("title"); NumericDocValues fooNorms = reader.getNormValues("foo_t"); NumericDocValues textNorms = reader.getNormValues("text"); assertEquals(expectedNorm(sim, 2, TITLE_BOOST * DOC_BOOST), titleNorms.get(docid)); assertEquals(expectedNorm(sim, 8 - 3, FOO_BOOST * DOC_BOOST), fooNorms.get(docid)); assertEquals(expectedNorm(sim, 2 + 8 - 3, TITLE_BOOST * FOO_BOOST * DOC_BOOST), textNorms.get(docid)); } finally { req.close(); } } /** * Given a length, and boost returns the expected encoded norm */ private static byte expectedNorm(final DefaultSimilarity sim, final int length, final float boost) { return (byte) sim.encodeNormValue(boost / ((float) Math.sqrt(length))); } public void testBoostOmitNorms() throws Exception { XmlDoc xml = new XmlDoc(); // explicitly boosting a field if that omits norms is not ok xml.xml = "<doc>" + "<field name=\"id\">ignore_exception</field>" + "<field name=\"title_stringNoNorms\" boost=\"3.0\">mytitle</field>" + "</doc>"; try { assertNull(h.validateUpdate(add(xml, new String[0]))); fail("didn't get expected exception for boosting omit norms field"); } catch (SolrException expected) { // expected exception } // boosting a field that is copied to another field that omits norms is ok xml.xml = "<doc>" + "<field name=\"id\">42</field>" + "<field name=\"title\" boost=\"3.0\">mytitle</field>" + "</doc>"; assertNull(h.validateUpdate(add(xml, new String[0]))); } /** * Its ok to supply a document boost even if a field omits norms */ public void testDocumentBoostOmitNorms() throws Exception { XmlDoc xml = new XmlDoc(); xml.xml = "<doc boost=\"3.0\">" + "<field name=\"id\">2</field>" + "<field name=\"title_stringNoNorms\">mytitle</field>" + "</doc>"; assertNull(h.validateUpdate(add(xml, new String[0]))); } }