Example usage for org.apache.lucene.index LeafReader getSortedSetDocValues

List of usage examples for org.apache.lucene.index LeafReader getSortedSetDocValues

Introduction

In this page you can find the example usage for org.apache.lucene.index LeafReader getSortedSetDocValues.

Prototype

public abstract SortedSetDocValues getSortedSetDocValues(String field) throws IOException;

Source Link

Document

Returns SortedSetDocValues for this field, or null if no SortedSetDocValues were indexed for this field.

Usage

From source file:org.apache.solr.uninverting.TestUninvertingReader.java

License:Apache License

public void testSortedSetDouble() throws IOException {
    Directory dir = newDirectory();//w ww  .  jav  a 2 s. c  o  m
    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));

    Document doc = new Document();
    doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(5d), Field.Store.NO));
    iw.addDocument(doc);

    doc = new Document();
    doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(5d), Field.Store.NO));
    doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(-3d), Field.Store.NO));
    iw.addDocument(doc);

    iw.forceMerge(1);
    iw.close();

    DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
            Collections.singletonMap("foo", Type.SORTED_SET_DOUBLE));
    LeafReader ar = ir.leaves().get(0).reader();
    SortedSetDocValues v = ar.getSortedSetDocValues("foo");
    assertEquals(2, v.getValueCount());

    assertEquals(0, v.nextDoc());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    assertEquals(1, v.nextDoc());
    assertEquals(0, v.nextOrd());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    BytesRef value = v.lookupOrd(0);
    assertEquals(Double.doubleToRawLongBits(-3d), LegacyNumericUtils.prefixCodedToLong(value));

    value = v.lookupOrd(1);
    assertEquals(Double.doubleToRawLongBits(5d), LegacyNumericUtils.prefixCodedToLong(value));
    TestUtil.checkReader(ir);
    ir.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestUninvertingReader.java

License:Apache License

/** Tests {@link Type#SORTED_SET_INTEGER} using Integer based fields, with and w/o precision steps */
public void testSortedSetIntegerManyValues() throws IOException {
    final Directory dir = newDirectory();
    final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));

    final LegacyFieldType NO_TRIE_TYPE = new LegacyFieldType(LegacyIntField.TYPE_NOT_STORED);
    NO_TRIE_TYPE.setNumericPrecisionStep(Integer.MAX_VALUE);

    final Map<String, Type> UNINVERT_MAP = new LinkedHashMap<String, Type>();
    UNINVERT_MAP.put("notrie_single", Type.SORTED_SET_INTEGER);
    UNINVERT_MAP.put("notrie_multi", Type.SORTED_SET_INTEGER);
    UNINVERT_MAP.put("trie_single", Type.SORTED_SET_INTEGER);
    UNINVERT_MAP.put("trie_multi", Type.SORTED_SET_INTEGER);
    final Set<String> MULTI_VALUES = new LinkedHashSet<String>();
    MULTI_VALUES.add("trie_multi");
    MULTI_VALUES.add("notrie_multi");

    final int NUM_DOCS = TestUtil.nextInt(random(), 200, 1500);
    final int MIN = TestUtil.nextInt(random(), 10, 100);
    final int MAX = MIN + TestUtil.nextInt(random(), 10, 100);
    final long EXPECTED_VALSET_SIZE = 1 + MAX - MIN;

    { // (at least) one doc should have every value, so that at least one segment has every value
        final Document doc = new Document();
        for (int i = MIN; i <= MAX; i++) {
            doc.add(new LegacyIntField("trie_multi", i, Field.Store.NO));
            doc.add(new LegacyIntField("notrie_multi", i, NO_TRIE_TYPE));
        }/*from w ww .  jav  a 2  s . c om*/
        iw.addDocument(doc);
    }

    // now add some more random docs (note: starting at i=1 because of previously added doc)
    for (int i = 1; i < NUM_DOCS; i++) {
        final Document doc = new Document();
        if (0 != TestUtil.nextInt(random(), 0, 9)) {
            int val = TestUtil.nextInt(random(), MIN, MAX);
            doc.add(new LegacyIntField("trie_single", val, Field.Store.NO));
            doc.add(new LegacyIntField("notrie_single", val, NO_TRIE_TYPE));
        }
        if (0 != TestUtil.nextInt(random(), 0, 9)) {
            int numMulti = atLeast(1);
            while (0 < numMulti--) {
                int val = TestUtil.nextInt(random(), MIN, MAX);
                doc.add(new LegacyIntField("trie_multi", val, Field.Store.NO));
                doc.add(new LegacyIntField("notrie_multi", val, NO_TRIE_TYPE));
            }
        }
        iw.addDocument(doc);
    }

    iw.close();

    final DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), UNINVERT_MAP);
    TestUtil.checkReader(ir);

    final int NUM_LEAVES = ir.leaves().size();

    // check the leaves: no more then total set size
    for (LeafReaderContext rc : ir.leaves()) {
        final LeafReader ar = rc.reader();
        for (String f : UNINVERT_MAP.keySet()) {
            final SortedSetDocValues v = DocValues.getSortedSet(ar, f);
            final long valSetSize = v.getValueCount();
            assertTrue(f + ": Expected no more then " + EXPECTED_VALSET_SIZE + " values per segment, got "
                    + valSetSize + " from: " + ar.toString(), valSetSize <= EXPECTED_VALSET_SIZE);

            if (1 == NUM_LEAVES && MULTI_VALUES.contains(f)) {
                // tighter check on multi fields in single segment index since we know one doc has all of them
                assertEquals(
                        f + ": Single segment LeafReader's value set should have had exactly expected size",
                        EXPECTED_VALSET_SIZE, valSetSize);
            }
        }
    }

    // check the composite of all leaves: exact expectation of set size
    final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
    TestUtil.checkReader(composite);

    for (String f : MULTI_VALUES) {
        final SortedSetDocValues v = composite.getSortedSetDocValues(f);
        final long valSetSize = v.getValueCount();
        assertEquals(f + ": Composite reader value set should have had exactly expected size",
                EXPECTED_VALSET_SIZE, valSetSize);
    }

    ir.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestUninvertingReader.java

License:Apache License

public void testSortedSetEmptyIndex() throws IOException {
    final Directory dir = newDirectory();
    final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
    iw.close();//from   ww  w  .  j  a va 2  s  .c  o m

    final Map<String, Type> UNINVERT_MAP = new LinkedHashMap<String, Type>();
    for (Type t : EnumSet.allOf(Type.class)) {
        UNINVERT_MAP.put(t.name(), t);
    }

    final DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), UNINVERT_MAP);
    TestUtil.checkReader(ir);

    final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
    TestUtil.checkReader(composite);

    for (String f : UNINVERT_MAP.keySet()) {
        // check the leaves
        // (normally there are none for an empty index, so this is really just future
        // proofing in case that changes for some reason)
        for (LeafReaderContext rc : ir.leaves()) {
            final LeafReader ar = rc.reader();
            assertNull(f + ": Expected no doc values from empty index (leaf)", ar.getSortedSetDocValues(f));
        }

        // check the composite
        assertNull(f + ": Expected no doc values from empty index (composite)",
                composite.getSortedSetDocValues(f));

    }

    ir.close();
    dir.close();
}

From source file:org.apache.tika.eval.tools.SlowCompositeReaderWrapper.java

License:Apache License

@Override
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
    ensureOpen();/* ww w  . j  a  va  2  s . c  o  m*/
    OrdinalMap map = null;
    synchronized (cachedOrdMaps) {
        map = cachedOrdMaps.get(field);
        if (map == null) {
            // uncached, or not a multi dv
            SortedSetDocValues dv = MultiDocValues.getSortedSetValues(in, field);
            if (dv instanceof MultiDocValues.MultiSortedSetDocValues) {
                map = ((MultiDocValues.MultiSortedSetDocValues) dv).mapping;
                IndexReader.CacheHelper cacheHelper = getReaderCacheHelper();
                if (cacheHelper != null && map.owner == cacheHelper.getKey()) {
                    cachedOrdMaps.put(field, map);
                }
            }
            return dv;
        }
    }

    assert map != null;
    int size = in.leaves().size();
    final SortedSetDocValues[] values = new SortedSetDocValues[size];
    final int[] starts = new int[size + 1];
    long cost = 0;
    for (int i = 0; i < size; i++) {
        LeafReaderContext context = in.leaves().get(i);
        final LeafReader reader = context.reader();
        final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
        if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED_SET) {
            return null;
        }
        SortedSetDocValues v = reader.getSortedSetDocValues(field);
        if (v == null) {
            v = DocValues.emptySortedSet();
        }
        values[i] = v;
        starts[i] = context.docBase;
        cost += v.cost();
    }
    starts[size] = maxDoc();
    return new MultiDocValues.MultiSortedSetDocValues(values, starts, map, cost);
}

From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java

License:Open Source License

/**
 * test filtering two sortedset dv fields
 *//*  ww w . j  a  va 2s.  c  o m*/
public void testSortedSetDocValues() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    IndexWriter iw = new IndexWriter(dir, iwc);

    // add document with 2 fields
    Document doc = new Document();
    doc.add(new SortedSetDocValuesField("fieldA", new BytesRef("testA")));
    doc.add(new SortedSetDocValuesField("fieldB", new BytesRef("testB")));
    iw.addDocument(doc);

    // open reader
    DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw),
            new CharacterRunAutomaton(Automata.makeString("fieldA")));

    // see only one field
    LeafReader segmentReader = ir.leaves().get(0).reader();
    SortedSetDocValues dv = segmentReader.getSortedSetDocValues("fieldA");
    assertNotNull(dv);
    assertTrue(dv.advanceExact(0));
    assertEquals(0, dv.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, dv.nextOrd());
    assertEquals(new BytesRef("testA"), dv.lookupOrd(0));
    assertNull(segmentReader.getSortedSetDocValues("fieldB"));

    TestUtil.checkReader(ir);
    IOUtils.close(ir, iw, dir);
}

From source file:suonos.lucene.fields.IndexedFieldCountsBuilder.java

License:Apache License

public IndexedFieldCountsBuilder addField(String fieldName, String filter) throws IOException {

    final IndexedField fld = models.indexedField(fieldName);
    final Map<String, IndexedFieldTermCount> valuesMap = AntLib.newHashMap();
    final TIntIntHashMap ordCounts = new TIntIntHashMap();

    if (filter != null) {
        filter = filter.toLowerCase();//from   w w  w .  j  ava2  s  .  co  m
    }

    // Get count of segments.
    //
    int sz = ir.leaves().size();

    for (int i = 0; i != sz; i++) {
        // Get the segment reader.
        //
        LeafReader lr = ir.leaves().get(i).reader();

        // Doc count for field. Eg "album_genres"
        //
        lr.getDocCount(fld.getName());

        // Get all documents that have the field "album_genres"
        //
        Bits docs = lr.getDocsWithField(fld.getName());
        ordCounts.clear();

        // Enumerate the field terms.
        //
        if (fld.isDocValues()) {
            if (fld.isMultiValue()) {
                // docvalues & multivalue is a SortedSetDocValues
                // Per-Document values in a SortedDocValues are
                // deduplicated, dereferenced, and sorted into a dictionary
                // of
                // unique values. A pointer to the dictionary value
                // (ordinal) can be retrieved for each document.
                // Ordinals are dense and in increasing sorted order.
                //
                SortedSetDocValues set = lr.getSortedSetDocValues(fld.getName());

                if (set != null) {
                    // For all documents that have the field "album_genres":
                    //
                    for (int docId = 0; docId != docs.length(); docId++) {
                        if (docs.get(docId)) {
                            // Enumerate the set of [terms] of
                            // "album_genres" for the document represented
                            // by docId.
                            // Each ord represents the term value.
                            //
                            set.setDocument(docId);

                            // For each term bump up the frequency.
                            //
                            long ord;
                            while ((ord = set.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                                ordCounts.adjustOrPutValue((int) ord, 1, 1);

                                System.out.println("term=" + set.lookupOrd(ord).utf8ToString());
                            }
                        }
                    }

                    TermsEnum te = set.termsEnum();
                    BytesRef term;

                    while ((term = te.next()) != null) {

                        int ord = (int) te.ord();

                        add(fld, valuesMap, filter, term, ordCounts.get(ord));
                    }

                }

            } else {
                SortedDocValues set = lr.getSortedDocValues(fld.getName());

                if (set != null) {
                    // For all documents that have the field "album_genres":
                    //
                    for (int docId = 0; docId != docs.length(); docId++) {
                        if (docs.get(docId)) {
                            // Get the term - Classical, Rock, etc.
                            //
                            BytesRef term = set.get(docId);

                            add(fld, valuesMap, filter, term, 1);
                        }
                    }
                }
            }
        } else {
            // Normal field, not a doc value.
            //
            Terms terms = lr.terms(fld.getName());
            TermsEnum te = terms.iterator();

            BytesRef term;
            while ((term = te.next()) != null) {
                add(fld, valuesMap, filter, term, te.docFreq());
            }
        }

        /*
         * SORTED doc[0] = "aardvark" doc[1] = "beaver" doc[2] = "aardvark"
         * 
         * doc[0] = 0 doc[1] = 1 doc[2] = 0
         * 
         * term[0] = "aardvark" term[1] = "beaver"
         */

        // http://127.0.0.1:8080/api/facets?fields=track_title_a
        // the above should return B:(4) because titles starting with B are
        // 4!
    }

    // Get the array of term counters.
    //
    IndexedFieldTermCount[] list = valuesMap.values().toArray(new IndexedFieldTermCount[0]);

    // Sort by term.
    //
    Arrays.sort(list);

    // add to the map.
    //
    this.fieldCounts.put(fld.getName(), list);

    return this;
}