Example usage for org.apache.lucene.index LeafReader getSortedSetDocValues

Introduction

In this page you can find the example usage for org.apache.lucene.index LeafReader getSortedSetDocValues.

Prototype

public abstract SortedSetDocValues getSortedSetDocValues(String field) throws IOException;

Source Link

Document

Returns SortedSetDocValues for this field, or null if no SortedSetDocValues were indexed for this field.

Usage

From source file:com.qwazr.search.field.ValueConverter.java

License:Apache License

final static ValueConverter newConverter(FieldDefinition fieldDef, LeafReader dvReader, FieldInfo fieldInfo)
        throws IOException {
    if (fieldInfo == null)
        return null;
    DocValuesType type = fieldInfo.getDocValuesType();
    if (type == null)
        return null;
    switch (type) {
    case BINARY://from   w  w  w .j av  a2s  . com
        BinaryDocValues binaryDocValue = dvReader.getBinaryDocValues(fieldInfo.name);
        if (binaryDocValue == null)
            return null;
        return new BinaryDVConverter(binaryDocValue);
    case SORTED:
        SortedDocValues sortedDocValues = dvReader.getSortedDocValues(fieldInfo.name);
        if (sortedDocValues == null)
            return null;
        return new SortedDVConverter(sortedDocValues);
    case NONE:
        break;
    case NUMERIC:
        NumericDocValues numericDocValues = dvReader.getNumericDocValues(fieldInfo.name);
        if (numericDocValues == null)
            return null;
        return newNumericConverter(fieldDef, numericDocValues);
    case SORTED_NUMERIC:
        SortedNumericDocValues sortedNumericDocValues = dvReader.getSortedNumericDocValues(fieldInfo.name);
        if (sortedNumericDocValues == null)
            return null;
        return newSortedNumericConverter(fieldDef, sortedNumericDocValues);
    case SORTED_SET:
        SortedSetDocValues sortedSetDocValues = dvReader.getSortedSetDocValues(fieldInfo.name);
        if (sortedSetDocValues == null)
            return null;
        return null;
    default:
        throw new IOException("Unsupported doc value type: " + type + " for field: " + fieldInfo.name);
    }
    return null;
}

From source file:com.qwazr.search.index.IndexUtils.java

License:Apache License

final static SortedSetDocValuesReaderState getNewFacetsState(IndexReader indexReader) throws IOException {
    LeafReader topReader = SlowCompositeReaderWrapper.wrap(indexReader);
    if (topReader == null)
        return null;
    SortedSetDocValues dv = topReader.getSortedSetDocValues(FieldDefinition.FACET_FIELD);
    if (dv == null)
        return null;
    return new DefaultSortedSetDocValuesReaderState(indexReader, FieldDefinition.FACET_FIELD);
}

From source file:org.apache.solr.index.SlowCompositeReaderWrapper.java

License:Apache License

@Override
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
    ensureOpen();// w ww .jav  a 2  s.  c  o m
    OrdinalMap map = null;
    synchronized (cachedOrdMaps) {
        map = cachedOrdMaps.get(field);
        if (map == null) {
            // uncached, or not a multi dv
            SortedSetDocValues dv = MultiDocValues.getSortedSetValues(in, field);
            if (dv instanceof MultiDocValues.MultiSortedSetDocValues) {
                map = ((MultiDocValues.MultiSortedSetDocValues) dv).mapping;
                if (map.owner == getCoreCacheKey() && merging == false) {
                    cachedOrdMaps.put(field, map);
                }
            }
            return dv;
        }
    }

    assert map != null;
    int size = in.leaves().size();
    final SortedSetDocValues[] values = new SortedSetDocValues[size];
    final int[] starts = new int[size + 1];
    long cost = 0;
    for (int i = 0; i < size; i++) {
        LeafReaderContext context = in.leaves().get(i);
        final LeafReader reader = context.reader();
        final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
        if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED_SET) {
            return null;
        }
        SortedSetDocValues v = reader.getSortedSetDocValues(field);
        if (v == null) {
            v = DocValues.emptySortedSet();
        }
        values[i] = v;
        starts[i] = context.docBase;
        cost += v.cost();
    }
    starts[size] = maxDoc();
    return new MultiDocValues.MultiSortedSetDocValues(values, starts, map, cost);
}

From source file:org.apache.solr.schema.TestSortableTextField.java

License:Apache License

public void testWhiteboxIndexReader() throws Exception {
    assertU(adoc("id", "1", "whitespace_stxt", "how now brown cow ?", "whitespace_m_stxt", "xxx",
            "whitespace_m_stxt", "yyy", "whitespace_f_stxt", "aaa bbb", "keyword_stxt", "Blarggghhh!"));
    assertU(commit());/*from ww  w.  j av  a  2 s  . c o m*/

    final RefCounted<SolrIndexSearcher> searcher = h.getCore().getNewestSearcher(false);
    try {
        final LeafReader r = searcher.get().getSlowAtomicReader();

        // common cases...
        for (String field : Arrays.asList("keyword_stxt", "keyword_dv_stxt", "whitespace_stxt",
                "whitespace_f_stxt", "whitespace_l_stxt")) {
            assertNotNull("FieldInfos: " + field, r.getFieldInfos().fieldInfo(field));
            assertEquals("DocValuesType: " + field, DocValuesType.SORTED,
                    r.getFieldInfos().fieldInfo(field).getDocValuesType());
            assertNotNull("DocValues: " + field, r.getSortedDocValues(field));
            assertNotNull("Terms: " + field, r.terms(field));

        }

        // special cases...
        assertNotNull(r.getFieldInfos().fieldInfo("whitespace_nodv_stxt"));
        assertEquals(DocValuesType.NONE,
                r.getFieldInfos().fieldInfo("whitespace_nodv_stxt").getDocValuesType());
        assertNull(r.getSortedDocValues("whitespace_nodv_stxt"));
        assertNotNull(r.terms("whitespace_nodv_stxt"));
        // 
        assertNotNull(r.getFieldInfos().fieldInfo("whitespace_nois_stxt"));
        assertEquals(DocValuesType.SORTED,
                r.getFieldInfos().fieldInfo("whitespace_nois_stxt").getDocValuesType());
        assertNotNull(r.getSortedDocValues("whitespace_nois_stxt"));
        assertNull(r.terms("whitespace_nois_stxt"));
        //
        assertNotNull(r.getFieldInfos().fieldInfo("whitespace_m_stxt"));
        assertEquals(DocValuesType.SORTED_SET,
                r.getFieldInfos().fieldInfo("whitespace_m_stxt").getDocValuesType());
        assertNotNull(r.getSortedSetDocValues("whitespace_m_stxt"));
        assertNotNull(r.terms("whitespace_m_stxt"));

    } finally {
        if (null != searcher) {
            searcher.decref();
        }
    }
}

From source file:org.apache.solr.search.SolrDocumentFetcher.java

License:Apache License

/**
 * This will fetch and add the docValues fields to a given SolrDocument/SolrInputDocument
 *
 * @param doc//from ww w  .j  ava  2  s  .com
 *          A SolrDocument or SolrInputDocument instance where docValues will be added
 * @param docid
 *          The lucene docid of the document to be populated
 * @param fields
 *          The list of docValues fields to be decorated
 */
public void decorateDocValueFields(@SuppressWarnings("rawtypes") SolrDocumentBase doc, int docid,
        Set<String> fields) throws IOException {
    final List<LeafReaderContext> leafContexts = searcher.getLeafContexts();
    final int subIndex = ReaderUtil.subIndex(docid, leafContexts);
    final int localId = docid - leafContexts.get(subIndex).docBase;
    final LeafReader leafReader = leafContexts.get(subIndex).reader();
    for (String fieldName : fields) {
        final SchemaField schemaField = searcher.getSchema().getFieldOrNull(fieldName);
        if (schemaField == null || !schemaField.hasDocValues() || doc.containsKey(fieldName)) {
            log.warn("Couldn't decorate docValues for field: [{}], schemaField: [{}]", fieldName, schemaField);
            continue;
        }
        FieldInfo fi = searcher.getFieldInfos().fieldInfo(fieldName);
        if (fi == null) {
            continue; // Searcher doesn't have info about this field, hence ignore it.
        }
        final DocValuesType dvType = fi.getDocValuesType();
        switch (dvType) {
        case NUMERIC:
            final NumericDocValues ndv = leafReader.getNumericDocValues(fieldName);
            if (ndv == null) {
                continue;
            }
            Long val;
            if (ndv.advanceExact(localId)) {
                val = ndv.longValue();
            } else {
                continue;
            }
            Object newVal = val;
            if (schemaField.getType().isPointField()) {
                // TODO: Maybe merge PointField with TrieFields here
                NumberType type = schemaField.getType().getNumberType();
                switch (type) {
                case INTEGER:
                    newVal = val.intValue();
                    break;
                case LONG:
                    newVal = val.longValue();
                    break;
                case FLOAT:
                    newVal = Float.intBitsToFloat(val.intValue());
                    break;
                case DOUBLE:
                    newVal = Double.longBitsToDouble(val);
                    break;
                case DATE:
                    newVal = new Date(val);
                    break;
                default:
                    throw new AssertionError("Unexpected PointType: " + type);
                }
            } else {
                if (schemaField.getType() instanceof TrieIntField) {
                    newVal = val.intValue();
                } else if (schemaField.getType() instanceof TrieFloatField) {
                    newVal = Float.intBitsToFloat(val.intValue());
                } else if (schemaField.getType() instanceof TrieDoubleField) {
                    newVal = Double.longBitsToDouble(val);
                } else if (schemaField.getType() instanceof TrieDateField) {
                    newVal = new Date(val);
                } else if (schemaField.getType() instanceof EnumField) {
                    newVal = ((EnumField) schemaField.getType()).intValueToStringValue(val.intValue());
                }
            }
            doc.addField(fieldName, newVal);
            break;
        case BINARY:
            BinaryDocValues bdv = leafReader.getBinaryDocValues(fieldName);
            if (bdv == null) {
                continue;
            }
            BytesRef value;
            if (bdv.advanceExact(localId)) {
                value = BytesRef.deepCopyOf(bdv.binaryValue());
            } else {
                continue;
            }
            doc.addField(fieldName, value);
            break;
        case SORTED:
            SortedDocValues sdv = leafReader.getSortedDocValues(fieldName);
            if (sdv == null) {
                continue;
            }
            if (sdv.advanceExact(localId)) {
                final BytesRef bRef = sdv.binaryValue();
                // Special handling for Boolean fields since they're stored as 'T' and 'F'.
                if (schemaField.getType() instanceof BoolField) {
                    doc.addField(fieldName, schemaField.getType().toObject(schemaField, bRef));
                } else {
                    doc.addField(fieldName, bRef.utf8ToString());
                }
            }
            break;
        case SORTED_NUMERIC:
            final SortedNumericDocValues numericDv = leafReader.getSortedNumericDocValues(fieldName);
            NumberType type = schemaField.getType().getNumberType();
            if (numericDv != null) {
                if (numericDv.advance(localId) == localId) {
                    final List<Object> outValues = new ArrayList<Object>(numericDv.docValueCount());
                    for (int i = 0; i < numericDv.docValueCount(); i++) {
                        long number = numericDv.nextValue();
                        switch (type) {
                        case INTEGER:
                            outValues.add((int) number);
                            break;
                        case LONG:
                            outValues.add(number);
                            break;
                        case FLOAT:
                            outValues.add(NumericUtils.sortableIntToFloat((int) number));
                            break;
                        case DOUBLE:
                            outValues.add(NumericUtils.sortableLongToDouble(number));
                            break;
                        case DATE:
                            outValues.add(new Date(number));
                            break;
                        default:
                            throw new AssertionError("Unexpected PointType: " + type);
                        }
                    }
                    assert outValues.size() > 0;
                    doc.addField(fieldName, outValues);
                }
            }
        case SORTED_SET:
            final SortedSetDocValues values = leafReader.getSortedSetDocValues(fieldName);
            if (values != null && values.getValueCount() > 0) {
                if (values.advance(localId) == localId) {
                    final List<Object> outValues = new LinkedList<>();
                    for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values
                            .nextOrd()) {
                        value = values.lookupOrd(ord);
                        outValues.add(schemaField.getType().toObject(schemaField, value));
                    }
                    assert outValues.size() > 0;
                    doc.addField(fieldName, outValues);
                }
            }
        case NONE:
            break;
        }
    }
}

From source file:org.apache.solr.uninverting.FieldCacheImpl.java

License:Apache License

public SortedSetDocValues getDocTermOrds(LeafReader reader, String field, BytesRef prefix) throws IOException {
    // not a general purpose filtering mechanism...
    assert prefix == null || prefix == INT32_TERM_PREFIX || prefix == INT64_TERM_PREFIX;

    SortedSetDocValues dv = reader.getSortedSetDocValues(field);
    if (dv != null) {
        return dv;
    }//from  w w  w .j a  v  a2  s  . c  o m

    SortedDocValues sdv = reader.getSortedDocValues(field);
    if (sdv != null) {
        return DocValues.singleton(sdv);
    }

    final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
    if (info == null) {
        return DocValues.emptySortedSet();
    } else if (info.getDocValuesType() != DocValuesType.NONE) {
        throw new IllegalStateException(
                "Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
    } else if (info.getIndexOptions() == IndexOptions.NONE) {
        return DocValues.emptySortedSet();
    }

    // ok we need to uninvert. check if we can optimize a bit.

    Terms terms = reader.terms(field);
    if (terms == null) {
        return DocValues.emptySortedSet();
    } else {
        // if #postings = #docswithfield we know that the field is "single valued enough".
        // it's possible the same term might appear twice in the same document, but SORTED_SET discards frequency.
        // it's still ok with filtering (which we limit to numerics), it just means precisionStep = Inf
        long numPostings = terms.getSumDocFreq();
        if (numPostings != -1 && numPostings == terms.getDocCount()) {
            return DocValues.singleton(getTermsIndex(reader, field));
        }
    }

    DocTermOrds dto = (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, prefix));
    return dto.iterator(reader);
}

From source file:org.apache.solr.uninverting.TestFieldCacheVsDocValues.java

License:Apache License

private void doTestSortedSetVsUninvertedField(int minLength, int maxLength) throws Exception {
    Directory dir = newDirectory();/*from  w  w w. j ava2  s.  com*/
    IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random()));
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);

    // index some docs
    int numDocs = atLeast(300);
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
        doc.add(idField);
        final int length = TestUtil.nextInt(random(), minLength, maxLength);
        int numValues = random().nextInt(17);
        // create a random list of strings
        List<String> values = new ArrayList<>();
        for (int v = 0; v < numValues; v++) {
            values.add(TestUtil.randomSimpleString(random(), minLength, length));
        }

        // add in any order to the indexed field
        ArrayList<String> unordered = new ArrayList<>(values);
        Collections.shuffle(unordered, random());
        for (String v : values) {
            doc.add(newStringField("indexed", v, Field.Store.NO));
        }

        // add in any order to the dv field
        ArrayList<String> unordered2 = new ArrayList<>(values);
        Collections.shuffle(unordered2, random());
        for (String v : unordered2) {
            doc.add(new SortedSetDocValuesField("dv", new BytesRef(v)));
        }

        writer.addDocument(doc);
        if (random().nextInt(31) == 0) {
            writer.commit();
        }
    }

    // delete some docs
    int numDeletions = random().nextInt(numDocs / 10);
    for (int i = 0; i < numDeletions; i++) {
        int id = random().nextInt(numDocs);
        writer.deleteDocuments(new Term("id", Integer.toString(id)));
    }

    // compare per-segment
    DirectoryReader ir = writer.getReader();
    for (LeafReaderContext context : ir.leaves()) {
        LeafReader r = context.reader();
        SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(r, "indexed", null);
        SortedSetDocValues actual = r.getSortedSetDocValues("dv");
        assertEquals(r.maxDoc(), expected, actual);
    }
    ir.close();

    writer.forceMerge(1);

    // now compare again after the merge
    ir = writer.getReader();
    LeafReader ar = getOnlyLeafReader(ir);
    SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(ar, "indexed", null);
    SortedSetDocValues actual = ar.getSortedSetDocValues("dv");
    assertEquals(ir.maxDoc(), expected, actual);
    ir.close();

    writer.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestUninvertingReader.java

License:Apache License

public void testSortedSetInteger() throws IOException {
    Directory dir = newDirectory();/*from  w  w w . j  a v  a  2s .com*/
    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));

    Document doc = new Document();
    doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
    iw.addDocument(doc);

    doc = new Document();
    doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
    doc.add(new LegacyIntField("foo", -3, Field.Store.NO));
    iw.addDocument(doc);

    iw.forceMerge(1);
    iw.close();

    DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
            Collections.singletonMap("foo", Type.SORTED_SET_INTEGER));
    LeafReader ar = ir.leaves().get(0).reader();
    SortedSetDocValues v = ar.getSortedSetDocValues("foo");
    assertEquals(2, v.getValueCount());

    assertEquals(0, v.nextDoc());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    assertEquals(1, v.nextDoc());
    assertEquals(0, v.nextOrd());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    BytesRef value = v.lookupOrd(0);
    assertEquals(-3, LegacyNumericUtils.prefixCodedToInt(value));

    value = v.lookupOrd(1);
    assertEquals(5, LegacyNumericUtils.prefixCodedToInt(value));
    TestUtil.checkReader(ir);
    ir.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestUninvertingReader.java

License:Apache License

public void testSortedSetFloat() throws IOException {
    Directory dir = newDirectory();//from   w w  w  .ja v a  2  s  .  c o  m
    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));

    Document doc = new Document();
    doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(5f), Field.Store.NO));
    iw.addDocument(doc);

    doc = new Document();
    doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(5f), Field.Store.NO));
    doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(-3f), Field.Store.NO));
    iw.addDocument(doc);

    iw.forceMerge(1);
    iw.close();

    DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
            Collections.singletonMap("foo", Type.SORTED_SET_FLOAT));
    LeafReader ar = ir.leaves().get(0).reader();

    SortedSetDocValues v = ar.getSortedSetDocValues("foo");
    assertEquals(2, v.getValueCount());

    assertEquals(0, v.nextDoc());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    assertEquals(1, v.nextDoc());
    assertEquals(0, v.nextOrd());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    BytesRef value = v.lookupOrd(0);
    assertEquals(Float.floatToRawIntBits(-3f), LegacyNumericUtils.prefixCodedToInt(value));

    value = v.lookupOrd(1);
    assertEquals(Float.floatToRawIntBits(5f), LegacyNumericUtils.prefixCodedToInt(value));
    TestUtil.checkReader(ir);
    ir.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestUninvertingReader.java

License:Apache License

public void testSortedSetLong() throws IOException {
    Directory dir = newDirectory();//  ww  w. j  av a2 s. c om
    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));

    Document doc = new Document();
    doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
    iw.addDocument(doc);

    doc = new Document();
    doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
    doc.add(new LegacyLongField("foo", -3, Field.Store.NO));
    iw.addDocument(doc);

    iw.forceMerge(1);
    iw.close();

    DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
            Collections.singletonMap("foo", Type.SORTED_SET_LONG));
    LeafReader ar = ir.leaves().get(0).reader();
    SortedSetDocValues v = ar.getSortedSetDocValues("foo");
    assertEquals(2, v.getValueCount());

    assertEquals(0, v.nextDoc());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    assertEquals(1, v.nextDoc());
    assertEquals(0, v.nextOrd());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    BytesRef value = v.lookupOrd(0);
    assertEquals(-3, LegacyNumericUtils.prefixCodedToLong(value));

    value = v.lookupOrd(1);
    assertEquals(5, LegacyNumericUtils.prefixCodedToLong(value));
    TestUtil.checkReader(ir);
    ir.close();
    dir.close();
}