org.apache.solr.uninverting.TestUninvertingReader.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.solr.uninverting.TestUninvertingReader.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.uninverting;

import java.io.IOException;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.legacy.LegacyFieldType;
import org.apache.lucene.legacy.LegacyIntField;
import org.apache.lucene.legacy.LegacyLongField;
import org.apache.lucene.legacy.LegacyNumericUtils;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.index.SlowCompositeReaderWrapper;
import org.apache.solr.uninverting.UninvertingReader.Type;

public class TestUninvertingReader extends LuceneTestCase {

    public void testSortedSetInteger() throws IOException {
        Directory dir = newDirectory();
        IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));

        Document doc = new Document();
        doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
        iw.addDocument(doc);

        doc = new Document();
        doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
        doc.add(new LegacyIntField("foo", -3, Field.Store.NO));
        iw.addDocument(doc);

        iw.forceMerge(1);
        iw.close();

        DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
                Collections.singletonMap("foo", Type.SORTED_SET_INTEGER));
        LeafReader ar = ir.leaves().get(0).reader();
        SortedSetDocValues v = ar.getSortedSetDocValues("foo");
        assertEquals(2, v.getValueCount());

        assertEquals(0, v.nextDoc());
        assertEquals(1, v.nextOrd());
        assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

        assertEquals(1, v.nextDoc());
        assertEquals(0, v.nextOrd());
        assertEquals(1, v.nextOrd());
        assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

        BytesRef value = v.lookupOrd(0);
        assertEquals(-3, LegacyNumericUtils.prefixCodedToInt(value));

        value = v.lookupOrd(1);
        assertEquals(5, LegacyNumericUtils.prefixCodedToInt(value));
        TestUtil.checkReader(ir);
        ir.close();
        dir.close();
    }

    public void testSortedSetFloat() throws IOException {
        Directory dir = newDirectory();
        IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));

        Document doc = new Document();
        doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(5f), Field.Store.NO));
        iw.addDocument(doc);

        doc = new Document();
        doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(5f), Field.Store.NO));
        doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(-3f), Field.Store.NO));
        iw.addDocument(doc);

        iw.forceMerge(1);
        iw.close();

        DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
                Collections.singletonMap("foo", Type.SORTED_SET_FLOAT));
        LeafReader ar = ir.leaves().get(0).reader();

        SortedSetDocValues v = ar.getSortedSetDocValues("foo");
        assertEquals(2, v.getValueCount());

        assertEquals(0, v.nextDoc());
        assertEquals(1, v.nextOrd());
        assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

        assertEquals(1, v.nextDoc());
        assertEquals(0, v.nextOrd());
        assertEquals(1, v.nextOrd());
        assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

        BytesRef value = v.lookupOrd(0);
        assertEquals(Float.floatToRawIntBits(-3f), LegacyNumericUtils.prefixCodedToInt(value));

        value = v.lookupOrd(1);
        assertEquals(Float.floatToRawIntBits(5f), LegacyNumericUtils.prefixCodedToInt(value));
        TestUtil.checkReader(ir);
        ir.close();
        dir.close();
    }

    public void testSortedSetLong() throws IOException {
        Directory dir = newDirectory();
        IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));

        Document doc = new Document();
        doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
        iw.addDocument(doc);

        doc = new Document();
        doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
        doc.add(new LegacyLongField("foo", -3, Field.Store.NO));
        iw.addDocument(doc);

        iw.forceMerge(1);
        iw.close();

        DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
                Collections.singletonMap("foo", Type.SORTED_SET_LONG));
        LeafReader ar = ir.leaves().get(0).reader();
        SortedSetDocValues v = ar.getSortedSetDocValues("foo");
        assertEquals(2, v.getValueCount());

        assertEquals(0, v.nextDoc());
        assertEquals(1, v.nextOrd());
        assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

        assertEquals(1, v.nextDoc());
        assertEquals(0, v.nextOrd());
        assertEquals(1, v.nextOrd());
        assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

        BytesRef value = v.lookupOrd(0);
        assertEquals(-3, LegacyNumericUtils.prefixCodedToLong(value));

        value = v.lookupOrd(1);
        assertEquals(5, LegacyNumericUtils.prefixCodedToLong(value));
        TestUtil.checkReader(ir);
        ir.close();
        dir.close();
    }

    public void testSortedSetDouble() throws IOException {
        Directory dir = newDirectory();
        IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));

        Document doc = new Document();
        doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(5d), Field.Store.NO));
        iw.addDocument(doc);

        doc = new Document();
        doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(5d), Field.Store.NO));
        doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(-3d), Field.Store.NO));
        iw.addDocument(doc);

        iw.forceMerge(1);
        iw.close();

        DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
                Collections.singletonMap("foo", Type.SORTED_SET_DOUBLE));
        LeafReader ar = ir.leaves().get(0).reader();
        SortedSetDocValues v = ar.getSortedSetDocValues("foo");
        assertEquals(2, v.getValueCount());

        assertEquals(0, v.nextDoc());
        assertEquals(1, v.nextOrd());
        assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

        assertEquals(1, v.nextDoc());
        assertEquals(0, v.nextOrd());
        assertEquals(1, v.nextOrd());
        assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

        BytesRef value = v.lookupOrd(0);
        assertEquals(Double.doubleToRawLongBits(-3d), LegacyNumericUtils.prefixCodedToLong(value));

        value = v.lookupOrd(1);
        assertEquals(Double.doubleToRawLongBits(5d), LegacyNumericUtils.prefixCodedToLong(value));
        TestUtil.checkReader(ir);
        ir.close();
        dir.close();
    }

    /** Tests {@link Type#SORTED_SET_INTEGER} using Integer based fields, with and w/o precision steps */
    public void testSortedSetIntegerManyValues() throws IOException {
        final Directory dir = newDirectory();
        final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));

        final LegacyFieldType NO_TRIE_TYPE = new LegacyFieldType(LegacyIntField.TYPE_NOT_STORED);
        NO_TRIE_TYPE.setNumericPrecisionStep(Integer.MAX_VALUE);

        final Map<String, Type> UNINVERT_MAP = new LinkedHashMap<String, Type>();
        UNINVERT_MAP.put("notrie_single", Type.SORTED_SET_INTEGER);
        UNINVERT_MAP.put("notrie_multi", Type.SORTED_SET_INTEGER);
        UNINVERT_MAP.put("trie_single", Type.SORTED_SET_INTEGER);
        UNINVERT_MAP.put("trie_multi", Type.SORTED_SET_INTEGER);
        final Set<String> MULTI_VALUES = new LinkedHashSet<String>();
        MULTI_VALUES.add("trie_multi");
        MULTI_VALUES.add("notrie_multi");

        final int NUM_DOCS = TestUtil.nextInt(random(), 200, 1500);
        final int MIN = TestUtil.nextInt(random(), 10, 100);
        final int MAX = MIN + TestUtil.nextInt(random(), 10, 100);
        final long EXPECTED_VALSET_SIZE = 1 + MAX - MIN;

        { // (at least) one doc should have every value, so that at least one segment has every value
            final Document doc = new Document();
            for (int i = MIN; i <= MAX; i++) {
                doc.add(new LegacyIntField("trie_multi", i, Field.Store.NO));
                doc.add(new LegacyIntField("notrie_multi", i, NO_TRIE_TYPE));
            }
            iw.addDocument(doc);
        }

        // now add some more random docs (note: starting at i=1 because of previously added doc)
        for (int i = 1; i < NUM_DOCS; i++) {
            final Document doc = new Document();
            if (0 != TestUtil.nextInt(random(), 0, 9)) {
                int val = TestUtil.nextInt(random(), MIN, MAX);
                doc.add(new LegacyIntField("trie_single", val, Field.Store.NO));
                doc.add(new LegacyIntField("notrie_single", val, NO_TRIE_TYPE));
            }
            if (0 != TestUtil.nextInt(random(), 0, 9)) {
                int numMulti = atLeast(1);
                while (0 < numMulti--) {
                    int val = TestUtil.nextInt(random(), MIN, MAX);
                    doc.add(new LegacyIntField("trie_multi", val, Field.Store.NO));
                    doc.add(new LegacyIntField("notrie_multi", val, NO_TRIE_TYPE));
                }
            }
            iw.addDocument(doc);
        }

        iw.close();

        final DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), UNINVERT_MAP);
        TestUtil.checkReader(ir);

        final int NUM_LEAVES = ir.leaves().size();

        // check the leaves: no more then total set size
        for (LeafReaderContext rc : ir.leaves()) {
            final LeafReader ar = rc.reader();
            for (String f : UNINVERT_MAP.keySet()) {
                final SortedSetDocValues v = DocValues.getSortedSet(ar, f);
                final long valSetSize = v.getValueCount();
                assertTrue(f + ": Expected no more then " + EXPECTED_VALSET_SIZE + " values per segment, got "
                        + valSetSize + " from: " + ar.toString(), valSetSize <= EXPECTED_VALSET_SIZE);

                if (1 == NUM_LEAVES && MULTI_VALUES.contains(f)) {
                    // tighter check on multi fields in single segment index since we know one doc has all of them
                    assertEquals(
                            f + ": Single segment LeafReader's value set should have had exactly expected size",
                            EXPECTED_VALSET_SIZE, valSetSize);
                }
            }
        }

        // check the composite of all leaves: exact expectation of set size
        final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
        TestUtil.checkReader(composite);

        for (String f : MULTI_VALUES) {
            final SortedSetDocValues v = composite.getSortedSetDocValues(f);
            final long valSetSize = v.getValueCount();
            assertEquals(f + ": Composite reader value set should have had exactly expected size",
                    EXPECTED_VALSET_SIZE, valSetSize);
        }

        ir.close();
        dir.close();
    }

    public void testSortedSetEmptyIndex() throws IOException {
        final Directory dir = newDirectory();
        final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
        iw.close();

        final Map<String, Type> UNINVERT_MAP = new LinkedHashMap<String, Type>();
        for (Type t : EnumSet.allOf(Type.class)) {
            UNINVERT_MAP.put(t.name(), t);
        }

        final DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), UNINVERT_MAP);
        TestUtil.checkReader(ir);

        final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
        TestUtil.checkReader(composite);

        for (String f : UNINVERT_MAP.keySet()) {
            // check the leaves
            // (normally there are none for an empty index, so this is really just future
            // proofing in case that changes for some reason)
            for (LeafReaderContext rc : ir.leaves()) {
                final LeafReader ar = rc.reader();
                assertNull(f + ": Expected no doc values from empty index (leaf)", ar.getSortedSetDocValues(f));
            }

            // check the composite
            assertNull(f + ": Expected no doc values from empty index (composite)",
                    composite.getSortedSetDocValues(f));

        }

        ir.close();
        dir.close();
    }

    public void testFieldInfos() throws IOException {
        Directory dir = newDirectory();
        IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));

        Document doc = new Document();
        BytesRef idBytes = new BytesRef("id");
        doc.add(new StringField("id", idBytes, Store.YES));
        doc.add(new LegacyIntField("int", 5, Store.YES));
        doc.add(new NumericDocValuesField("dv", 5));
        doc.add(new IntPoint("dint", 5));
        doc.add(new StoredField("stored", 5)); // not indexed
        iw.addDocument(doc);

        iw.forceMerge(1);
        iw.close();

        Map<String, Type> uninvertingMap = new HashMap<>();
        uninvertingMap.put("int", Type.LEGACY_INTEGER);
        uninvertingMap.put("dv", Type.LEGACY_INTEGER);
        uninvertingMap.put("dint", Type.INTEGER_POINT);

        DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), uninvertingMap);
        LeafReader leafReader = ir.leaves().get(0).reader();

        FieldInfo intFInfo = leafReader.getFieldInfos().fieldInfo("int");
        assertEquals(DocValuesType.NUMERIC, intFInfo.getDocValuesType());
        assertEquals(0, intFInfo.getPointDimensionCount());
        assertEquals(0, intFInfo.getPointNumBytes());

        FieldInfo dintFInfo = leafReader.getFieldInfos().fieldInfo("dint");
        assertEquals(DocValuesType.NUMERIC, dintFInfo.getDocValuesType());
        assertEquals(1, dintFInfo.getPointDimensionCount());
        assertEquals(4, dintFInfo.getPointNumBytes());

        FieldInfo dvFInfo = leafReader.getFieldInfos().fieldInfo("dv");
        assertEquals(DocValuesType.NUMERIC, dvFInfo.getDocValuesType());

        FieldInfo storedFInfo = leafReader.getFieldInfos().fieldInfo("stored");
        assertEquals(DocValuesType.NONE, storedFInfo.getDocValuesType());

        TestUtil.checkReader(ir);
        ir.close();
        dir.close();
    }

}