Example usage for org.apache.lucene.index LeafReader getSortedSetDocValues

Introduction

In this page you can find the example usage for org.apache.lucene.index LeafReader getSortedSetDocValues.

Prototype

public abstract SortedSetDocValues getSortedSetDocValues(String field) throws IOException;

Source Link

Document

Returns SortedSetDocValues for this field, or null if no SortedSetDocValues were indexed for this field.

Usage

From source file:org.apache.solr.uninverting.TestUninvertingReader.java

License:Apache License

public void testSortedSetDouble() throws IOException {
    Directory dir = newDirectory();//w ww  .  jav  a 2 s. c  o  m
    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));

    Document doc = new Document();
    doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(5d), Field.Store.NO));
    iw.addDocument(doc);

    doc = new Document();
    doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(5d), Field.Store.NO));
    doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(-3d), Field.Store.NO));
    iw.addDocument(doc);

    iw.forceMerge(1);
    iw.close();

    DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
            Collections.singletonMap("foo", Type.SORTED_SET_DOUBLE));
    LeafReader ar = ir.leaves().get(0).reader();
    SortedSetDocValues v = ar.getSortedSetDocValues("foo");
    assertEquals(2, v.getValueCount());

    assertEquals(0, v.nextDoc());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    assertEquals(1, v.nextDoc());
    assertEquals(0, v.nextOrd());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    BytesRef value = v.lookupOrd(0);
    assertEquals(Double.doubleToRawLongBits(-3d), LegacyNumericUtils.prefixCodedToLong(value));

    value = v.lookupOrd(1);
    assertEquals(Double.doubleToRawLongBits(5d), LegacyNumericUtils.prefixCodedToLong(value));
    TestUtil.checkReader(ir);
    ir.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestUninvertingReader.java

License:Apache License

/** Tests {@link Type#SORTED_SET_INTEGER} using Integer based fields, with and w/o precision steps */
public void testSortedSetIntegerManyValues() throws IOException {
    final Directory dir = newDirectory();
    final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));

    final LegacyFieldType NO_TRIE_TYPE = new LegacyFieldType(LegacyIntField.TYPE_NOT_STORED);
    NO_TRIE_TYPE.setNumericPrecisionStep(Integer.MAX_VALUE);

    final Map<String, Type> UNINVERT_MAP = new LinkedHashMap<String, Type>();
    UNINVERT_MAP.put("notrie_single", Type.SORTED_SET_INTEGER);
    UNINVERT_MAP.put("notrie_multi", Type.SORTED_SET_INTEGER);
    UNINVERT_MAP.put("trie_single", Type.SORTED_SET_INTEGER);
    UNINVERT_MAP.put("trie_multi", Type.SORTED_SET_INTEGER);
    final Set<String> MULTI_VALUES = new LinkedHashSet<String>();
    MULTI_VALUES.add("trie_multi");
    MULTI_VALUES.add("notrie_multi");

    final int NUM_DOCS = TestUtil.nextInt(random(), 200, 1500);
    final int MIN = TestUtil.nextInt(random(), 10, 100);
    final int MAX = MIN + TestUtil.nextInt(random(), 10, 100);
    final long EXPECTED_VALSET_SIZE = 1 + MAX - MIN;

    { // (at least) one doc should have every value, so that at least one segment has every value
        final Document doc = new Document();
        for (int i = MIN; i <= MAX; i++) {
            doc.add(new LegacyIntField("trie_multi", i, Field.Store.NO));
            doc.add(new LegacyIntField("notrie_multi", i, NO_TRIE_TYPE));
        }/*from w ww .  jav  a 2  s . c om*/
        iw.addDocument(doc);
    }

    // now add some more random docs (note: starting at i=1 because of previously added doc)
    for (int i = 1; i < NUM_DOCS; i++) {
        final Document doc = new Document();
        if (0 != TestUtil.nextInt(random(), 0, 9)) {
            int val = TestUtil.nextInt(random(), MIN, MAX);
            doc.add(new LegacyIntField("trie_single", val, Field.Store.NO));
            doc.add(new LegacyIntField("notrie_single", val, NO_TRIE_TYPE));
        }
        if (0 != TestUtil.nextInt(random(), 0, 9)) {
            int numMulti = atLeast(1);
            while (0 < numMulti--) {
                int val = TestUtil.nextInt(random(), MIN, MAX);
                doc.add(new LegacyIntField("trie_multi", val, Field.Store.NO));
                doc.add(new LegacyIntField("notrie_multi", val, NO_TRIE_TYPE));
            }
        }
        iw.addDocument(doc);
    }

    iw.close();

    final DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), UNINVERT_MAP);
    TestUtil.checkReader(ir);

    final int NUM_LEAVES = ir.leaves().size();

    // check the leaves: no more then total set size
    for (LeafReaderContext rc : ir.leaves()) {
        final LeafReader ar = rc.reader();
        for (String f : UNINVERT_MAP.keySet()) {
            final SortedSetDocValues v = DocValues.getSortedSet(ar, f);
            final long valSetSize = v.getValueCount();
            assertTrue(f + ": Expected no more then " + EXPECTED_VALSET_SIZE + " values per segment, got "
                    + valSetSize + " from: " + ar.toString(), valSetSize <= EXPECTED_VALSET_SIZE);

            if (1 == NUM_LEAVES && MULTI_VALUES.contains(f)) {
                // tighter check on multi fields in single segment index since we know one doc has all of them
                assertEquals(
                        f + ": Single segment LeafReader's value set should have had exactly expected size",
                        EXPECTED_VALSET_SIZE, valSetSize);
            }
        }
    }

    // check the composite of all leaves: exact expectation of set size
    final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
    TestUtil.checkReader(composite);

    for (String f : MULTI_VALUES) {
        final SortedSetDocValues v = composite.getSortedSetDocValues(f);
        final long valSetSize = v.getValueCount();
        assertEquals(f + ": Composite reader value set should have had exactly expected size",
                EXPECTED_VALSET_SIZE, valSetSize);
    }

    ir.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestUninvertingReader.java

License:Apache License

public void testSortedSetEmptyIndex() throws IOException {
    final Directory dir = newDirectory();
    final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
    iw.close();//from   ww  w  .  j  a va 2  s  .c  o m

    final Map<String, Type> UNINVERT_MAP = new LinkedHashMap<String, Type>();
    for (Type t : EnumSet.allOf(Type.class)) {
        UNINVERT_MAP.put(t.name(), t);
    }

    final DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), UNINVERT_MAP);
    TestUtil.checkReader(ir);

    final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
    TestUtil.checkReader(composite);

    for (String f : UNINVERT_MAP.keySet()) {
        // check the leaves
        // (normally there are none for an empty index, so this is really just future
        // proofing in case that changes for some reason)
        for (LeafReaderContext rc : ir.leaves()) {
            final LeafReader ar = rc.reader();
            assertNull(f + ": Expected no doc values from empty index (leaf)", ar.getSortedSetDocValues(f));
        }

        // check the composite
        assertNull(f + ": Expected no doc values from empty index (composite)",
                composite.getSortedSetDocValues(f));

    }

    ir.close();
    dir.close();
}

From source file:org.apache.tika.eval.tools.SlowCompositeReaderWrapper.java

License:Apache License

@Override
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
    ensureOpen();/* ww w  . j  a  va  2  s . c  o  m*/
    OrdinalMap map = null;
    synchronized (cachedOrdMaps) {
        map = cachedOrdMaps.get(field);
        if (map == null) {
            // uncached, or not a multi dv
            SortedSetDocValues dv = MultiDocValues.getSortedSetValues(in, field);
            if (dv instanceof MultiDocValues.MultiSortedSetDocValues) {
                map = ((MultiDocValues.MultiSortedSetDocValues) dv).mapping;
                IndexReader.CacheHelper cacheHelper = getReaderCacheHelper();
                if (cacheHelper != null && map.owner == cacheHelper.getKey()) {
                    cachedOrdMaps.put(field, map);
                }
            }
            return dv;
        }
    }

    assert map != null;
    int size = in.leaves().size();
    final SortedSetDocValues[] values = new SortedSetDocValues[size];
    final int[] starts = new int[size + 1];
    long cost = 0;
    for (int i = 0; i < size; i++) {
        LeafReaderContext context = in.leaves().get(i);
        final LeafReader reader = context.reader();
        final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
        if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED_SET) {
            return null;
        }
        SortedSetDocValues v = reader.getSortedSetDocValues(field);
        if (v == null) {
            v = DocValues.emptySortedSet();
        }
        values[i] = v;
        starts[i] = context.docBase;
        cost += v.cost();
    }
    starts[size] = maxDoc();
    return new MultiDocValues.MultiSortedSetDocValues(values, starts, map, cost);
}

From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java

License:Open Source License

/**
 * test filtering two sortedset dv fields
 *//*  ww w . j  a  va 2s.  c  o m*/
public void testSortedSetDocValues() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    IndexWriter iw = new IndexWriter(dir, iwc);

    // add document with 2 fields
    Document doc = new Document();
    doc.add(new SortedSetDocValuesField("fieldA", new BytesRef("testA")));
    doc.add(new SortedSetDocValuesField("fieldB", new BytesRef("testB")));
    iw.addDocument(doc);

    // open reader
    DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw),
            new CharacterRunAutomaton(Automata.makeString("fieldA")));

    // see only one field
    LeafReader segmentReader = ir.leaves().get(0).reader();
    SortedSetDocValues dv = segmentReader.getSortedSetDocValues("fieldA");
    assertNotNull(dv);
    assertTrue(dv.advanceExact(0));
    assertEquals(0, dv.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, dv.nextOrd());
    assertEquals(new BytesRef("testA"), dv.lookupOrd(0));
    assertNull(segmentReader.getSortedSetDocValues("fieldB"));

    TestUtil.checkReader(ir);
    IOUtils.close(ir, iw, dir);
}

From source file:suonos.lucene.fields.IndexedFieldCountsBuilder.java

License:Apache License

public IndexedFieldCountsBuilder addField(String fieldName, String filter) throws IOException {

    final IndexedField fld = models.indexedField(fieldName);
    final Map<String, IndexedFieldTermCount> valuesMap = AntLib.newHashMap();
    final TIntIntHashMap ordCounts = new TIntIntHashMap();

    if (filter != null) {
        filter = filter.toLowerCase();//from   w w  w .  j  ava2  s  .  co  m
    }

    // Get count of segments.
    //
    int sz = ir.leaves().size();

    for (int i = 0; i != sz; i++) {
        // Get the segment reader.
        //
        LeafReader lr = ir.leaves().get(i).reader();

        // Doc count for field. Eg "album_genres"
        //
        lr.getDocCount(fld.getName());

        // Get all documents that have the field "album_genres"
        //
        Bits docs = lr.getDocsWithField(fld.getName());
        ordCounts.clear();

        // Enumerate the field terms.
        //
        if (fld.isDocValues()) {
            if (fld.isMultiValue()) {
                // docvalues & multivalue is a SortedSetDocValues
                // Per-Document values in a SortedDocValues are
                // deduplicated, dereferenced, and sorted into a dictionary
                // of
                // unique values. A pointer to the dictionary value
                // (ordinal) can be retrieved for each document.
                // Ordinals are dense and in increasing sorted order.
                //
                SortedSetDocValues set = lr.getSortedSetDocValues(fld.getName());

                if (set != null) {
                    // For all documents that have the field "album_genres":
                    //
                    for (int docId = 0; docId != docs.length(); docId++) {
                        if (docs.get(docId)) {
                            // Enumerate the set of [terms] of
                            // "album_genres" for the document represented
                            // by docId.
                            // Each ord represents the term value.
                            //
                            set.setDocument(docId);

                            // For each term bump up the frequency.
                            //
                            long ord;
                            while ((ord = set.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                                ordCounts.adjustOrPutValue((int) ord, 1, 1);

                                System.out.println("term=" + set.lookupOrd(ord).utf8ToString());
                            }
                        }
                    }

                    TermsEnum te = set.termsEnum();
                    BytesRef term;

                    while ((term = te.next()) != null) {

                        int ord = (int) te.ord();

                        add(fld, valuesMap, filter, term, ordCounts.get(ord));
                    }

                }

            } else {
                SortedDocValues set = lr.getSortedDocValues(fld.getName());

                if (set != null) {
                    // For all documents that have the field "album_genres":
                    //
                    for (int docId = 0; docId != docs.length(); docId++) {
                        if (docs.get(docId)) {
                            // Get the term - Classical, Rock, etc.
                            //
                            BytesRef term = set.get(docId);

                            add(fld, valuesMap, filter, term, 1);
                        }
                    }
                }
            }
        } else {
            // Normal field, not a doc value.
            //
            Terms terms = lr.terms(fld.getName());
            TermsEnum te = terms.iterator();

            BytesRef term;
            while ((term = te.next()) != null) {
                add(fld, valuesMap, filter, term, te.docFreq());
            }
        }

        /*
         * SORTED doc[0] = "aardvark" doc[1] = "beaver" doc[2] = "aardvark"
         * 
         * doc[0] = 0 doc[1] = 1 doc[2] = 0
         * 
         * term[0] = "aardvark" term[1] = "beaver"
         */

        // http://127.0.0.1:8080/api/facets?fields=track_title_a
        // the above should return B:(4) because titles starting with B are
        // 4!
    }

    // Get the array of term counters.
    //
    IndexedFieldTermCount[] list = valuesMap.values().toArray(new IndexedFieldTermCount[0]);

    // Sort by term.
    //
    Arrays.sort(list);

    // add to the map.
    //
    this.fieldCounts.put(fld.getName(), list);

    return this;
}