Example usage for org.apache.lucene.index LeafReader getSortedSetDocValues

List of usage examples for org.apache.lucene.index LeafReader getSortedSetDocValues

Introduction

In this page you can find the example usage for org.apache.lucene.index LeafReader getSortedSetDocValues.

Prototype

public abstract SortedSetDocValues getSortedSetDocValues(String field) throws IOException;

Source Link

Document

Returns SortedSetDocValues for this field, or null if no SortedSetDocValues were indexed for this field.

Usage

From source file:com.qwazr.search.field.ValueConverter.java

License:Apache License

final static ValueConverter newConverter(FieldDefinition fieldDef, LeafReader dvReader, FieldInfo fieldInfo)
        throws IOException {
    if (fieldInfo == null)
        return null;
    DocValuesType type = fieldInfo.getDocValuesType();
    if (type == null)
        return null;
    switch (type) {
    case BINARY://from   w  w  w .j av  a2s  . com
        BinaryDocValues binaryDocValue = dvReader.getBinaryDocValues(fieldInfo.name);
        if (binaryDocValue == null)
            return null;
        return new BinaryDVConverter(binaryDocValue);
    case SORTED:
        SortedDocValues sortedDocValues = dvReader.getSortedDocValues(fieldInfo.name);
        if (sortedDocValues == null)
            return null;
        return new SortedDVConverter(sortedDocValues);
    case NONE:
        break;
    case NUMERIC:
        NumericDocValues numericDocValues = dvReader.getNumericDocValues(fieldInfo.name);
        if (numericDocValues == null)
            return null;
        return newNumericConverter(fieldDef, numericDocValues);
    case SORTED_NUMERIC:
        SortedNumericDocValues sortedNumericDocValues = dvReader.getSortedNumericDocValues(fieldInfo.name);
        if (sortedNumericDocValues == null)
            return null;
        return newSortedNumericConverter(fieldDef, sortedNumericDocValues);
    case SORTED_SET:
        SortedSetDocValues sortedSetDocValues = dvReader.getSortedSetDocValues(fieldInfo.name);
        if (sortedSetDocValues == null)
            return null;
        return null;
    default:
        throw new IOException("Unsupported doc value type: " + type + " for field: " + fieldInfo.name);
    }
    return null;
}

From source file:com.qwazr.search.index.IndexUtils.java

License:Apache License

final static SortedSetDocValuesReaderState getNewFacetsState(IndexReader indexReader) throws IOException {
    LeafReader topReader = SlowCompositeReaderWrapper.wrap(indexReader);
    if (topReader == null)
        return null;
    SortedSetDocValues dv = topReader.getSortedSetDocValues(FieldDefinition.FACET_FIELD);
    if (dv == null)
        return null;
    return new DefaultSortedSetDocValuesReaderState(indexReader, FieldDefinition.FACET_FIELD);
}

From source file:org.apache.solr.index.SlowCompositeReaderWrapper.java

License:Apache License

@Override
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
    ensureOpen();// w ww .jav  a 2  s.  c  o m
    OrdinalMap map = null;
    synchronized (cachedOrdMaps) {
        map = cachedOrdMaps.get(field);
        if (map == null) {
            // uncached, or not a multi dv
            SortedSetDocValues dv = MultiDocValues.getSortedSetValues(in, field);
            if (dv instanceof MultiDocValues.MultiSortedSetDocValues) {
                map = ((MultiDocValues.MultiSortedSetDocValues) dv).mapping;
                if (map.owner == getCoreCacheKey() && merging == false) {
                    cachedOrdMaps.put(field, map);
                }
            }
            return dv;
        }
    }

    assert map != null;
    int size = in.leaves().size();
    final SortedSetDocValues[] values = new SortedSetDocValues[size];
    final int[] starts = new int[size + 1];
    long cost = 0;
    for (int i = 0; i < size; i++) {
        LeafReaderContext context = in.leaves().get(i);
        final LeafReader reader = context.reader();
        final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
        if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED_SET) {
            return null;
        }
        SortedSetDocValues v = reader.getSortedSetDocValues(field);
        if (v == null) {
            v = DocValues.emptySortedSet();
        }
        values[i] = v;
        starts[i] = context.docBase;
        cost += v.cost();
    }
    starts[size] = maxDoc();
    return new MultiDocValues.MultiSortedSetDocValues(values, starts, map, cost);
}

From source file:org.apache.solr.schema.TestSortableTextField.java

License:Apache License

public void testWhiteboxIndexReader() throws Exception {
    assertU(adoc("id", "1", "whitespace_stxt", "how now brown cow ?", "whitespace_m_stxt", "xxx",
            "whitespace_m_stxt", "yyy", "whitespace_f_stxt", "aaa bbb", "keyword_stxt", "Blarggghhh!"));
    assertU(commit());/*from ww  w.  j av  a  2 s  . c o m*/

    final RefCounted<SolrIndexSearcher> searcher = h.getCore().getNewestSearcher(false);
    try {
        final LeafReader r = searcher.get().getSlowAtomicReader();

        // common cases...
        for (String field : Arrays.asList("keyword_stxt", "keyword_dv_stxt", "whitespace_stxt",
                "whitespace_f_stxt", "whitespace_l_stxt")) {
            assertNotNull("FieldInfos: " + field, r.getFieldInfos().fieldInfo(field));
            assertEquals("DocValuesType: " + field, DocValuesType.SORTED,
                    r.getFieldInfos().fieldInfo(field).getDocValuesType());
            assertNotNull("DocValues: " + field, r.getSortedDocValues(field));
            assertNotNull("Terms: " + field, r.terms(field));

        }

        // special cases...
        assertNotNull(r.getFieldInfos().fieldInfo("whitespace_nodv_stxt"));
        assertEquals(DocValuesType.NONE,
                r.getFieldInfos().fieldInfo("whitespace_nodv_stxt").getDocValuesType());
        assertNull(r.getSortedDocValues("whitespace_nodv_stxt"));
        assertNotNull(r.terms("whitespace_nodv_stxt"));
        // 
        assertNotNull(r.getFieldInfos().fieldInfo("whitespace_nois_stxt"));
        assertEquals(DocValuesType.SORTED,
                r.getFieldInfos().fieldInfo("whitespace_nois_stxt").getDocValuesType());
        assertNotNull(r.getSortedDocValues("whitespace_nois_stxt"));
        assertNull(r.terms("whitespace_nois_stxt"));
        //
        assertNotNull(r.getFieldInfos().fieldInfo("whitespace_m_stxt"));
        assertEquals(DocValuesType.SORTED_SET,
                r.getFieldInfos().fieldInfo("whitespace_m_stxt").getDocValuesType());
        assertNotNull(r.getSortedSetDocValues("whitespace_m_stxt"));
        assertNotNull(r.terms("whitespace_m_stxt"));

    } finally {
        if (null != searcher) {
            searcher.decref();
        }
    }
}

From source file:org.apache.solr.search.SolrDocumentFetcher.java

License:Apache License

/**
 * This will fetch and add the docValues fields to a given SolrDocument/SolrInputDocument
 *
 * @param doc//from ww w  .j  ava  2  s  .com
 *          A SolrDocument or SolrInputDocument instance where docValues will be added
 * @param docid
 *          The lucene docid of the document to be populated
 * @param fields
 *          The list of docValues fields to be decorated
 */
public void decorateDocValueFields(@SuppressWarnings("rawtypes") SolrDocumentBase doc, int docid,
        Set<String> fields) throws IOException {
    final List<LeafReaderContext> leafContexts = searcher.getLeafContexts();
    final int subIndex = ReaderUtil.subIndex(docid, leafContexts);
    final int localId = docid - leafContexts.get(subIndex).docBase;
    final LeafReader leafReader = leafContexts.get(subIndex).reader();
    for (String fieldName : fields) {
        final SchemaField schemaField = searcher.getSchema().getFieldOrNull(fieldName);
        if (schemaField == null || !schemaField.hasDocValues() || doc.containsKey(fieldName)) {
            log.warn("Couldn't decorate docValues for field: [{}], schemaField: [{}]", fieldName, schemaField);
            continue;
        }
        FieldInfo fi = searcher.getFieldInfos().fieldInfo(fieldName);
        if (fi == null) {
            continue; // Searcher doesn't have info about this field, hence ignore it.
        }
        final DocValuesType dvType = fi.getDocValuesType();
        switch (dvType) {
        case NUMERIC:
            final NumericDocValues ndv = leafReader.getNumericDocValues(fieldName);
            if (ndv == null) {
                continue;
            }
            Long val;
            if (ndv.advanceExact(localId)) {
                val = ndv.longValue();
            } else {
                continue;
            }
            Object newVal = val;
            if (schemaField.getType().isPointField()) {
                // TODO: Maybe merge PointField with TrieFields here
                NumberType type = schemaField.getType().getNumberType();
                switch (type) {
                case INTEGER:
                    newVal = val.intValue();
                    break;
                case LONG:
                    newVal = val.longValue();
                    break;
                case FLOAT:
                    newVal = Float.intBitsToFloat(val.intValue());
                    break;
                case DOUBLE:
                    newVal = Double.longBitsToDouble(val);
                    break;
                case DATE:
                    newVal = new Date(val);
                    break;
                default:
                    throw new AssertionError("Unexpected PointType: " + type);
                }
            } else {
                if (schemaField.getType() instanceof TrieIntField) {
                    newVal = val.intValue();
                } else if (schemaField.getType() instanceof TrieFloatField) {
                    newVal = Float.intBitsToFloat(val.intValue());
                } else if (schemaField.getType() instanceof TrieDoubleField) {
                    newVal = Double.longBitsToDouble(val);
                } else if (schemaField.getType() instanceof TrieDateField) {
                    newVal = new Date(val);
                } else if (schemaField.getType() instanceof EnumField) {
                    newVal = ((EnumField) schemaField.getType()).intValueToStringValue(val.intValue());
                }
            }
            doc.addField(fieldName, newVal);
            break;
        case BINARY:
            BinaryDocValues bdv = leafReader.getBinaryDocValues(fieldName);
            if (bdv == null) {
                continue;
            }
            BytesRef value;
            if (bdv.advanceExact(localId)) {
                value = BytesRef.deepCopyOf(bdv.binaryValue());
            } else {
                continue;
            }
            doc.addField(fieldName, value);
            break;
        case SORTED:
            SortedDocValues sdv = leafReader.getSortedDocValues(fieldName);
            if (sdv == null) {
                continue;
            }
            if (sdv.advanceExact(localId)) {
                final BytesRef bRef = sdv.binaryValue();
                // Special handling for Boolean fields since they're stored as 'T' and 'F'.
                if (schemaField.getType() instanceof BoolField) {
                    doc.addField(fieldName, schemaField.getType().toObject(schemaField, bRef));
                } else {
                    doc.addField(fieldName, bRef.utf8ToString());
                }
            }
            break;
        case SORTED_NUMERIC:
            final SortedNumericDocValues numericDv = leafReader.getSortedNumericDocValues(fieldName);
            NumberType type = schemaField.getType().getNumberType();
            if (numericDv != null) {
                if (numericDv.advance(localId) == localId) {
                    final List<Object> outValues = new ArrayList<Object>(numericDv.docValueCount());
                    for (int i = 0; i < numericDv.docValueCount(); i++) {
                        long number = numericDv.nextValue();
                        switch (type) {
                        case INTEGER:
                            outValues.add((int) number);
                            break;
                        case LONG:
                            outValues.add(number);
                            break;
                        case FLOAT:
                            outValues.add(NumericUtils.sortableIntToFloat((int) number));
                            break;
                        case DOUBLE:
                            outValues.add(NumericUtils.sortableLongToDouble(number));
                            break;
                        case DATE:
                            outValues.add(new Date(number));
                            break;
                        default:
                            throw new AssertionError("Unexpected PointType: " + type);
                        }
                    }
                    assert outValues.size() > 0;
                    doc.addField(fieldName, outValues);
                }
            }
        case SORTED_SET:
            final SortedSetDocValues values = leafReader.getSortedSetDocValues(fieldName);
            if (values != null && values.getValueCount() > 0) {
                if (values.advance(localId) == localId) {
                    final List<Object> outValues = new LinkedList<>();
                    for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values
                            .nextOrd()) {
                        value = values.lookupOrd(ord);
                        outValues.add(schemaField.getType().toObject(schemaField, value));
                    }
                    assert outValues.size() > 0;
                    doc.addField(fieldName, outValues);
                }
            }
        case NONE:
            break;
        }
    }
}

From source file:org.apache.solr.uninverting.FieldCacheImpl.java

License:Apache License

public SortedSetDocValues getDocTermOrds(LeafReader reader, String field, BytesRef prefix) throws IOException {
    // not a general purpose filtering mechanism...
    assert prefix == null || prefix == INT32_TERM_PREFIX || prefix == INT64_TERM_PREFIX;

    SortedSetDocValues dv = reader.getSortedSetDocValues(field);
    if (dv != null) {
        return dv;
    }//from  w w  w .j a  v  a2  s  . c  o m

    SortedDocValues sdv = reader.getSortedDocValues(field);
    if (sdv != null) {
        return DocValues.singleton(sdv);
    }

    final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
    if (info == null) {
        return DocValues.emptySortedSet();
    } else if (info.getDocValuesType() != DocValuesType.NONE) {
        throw new IllegalStateException(
                "Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
    } else if (info.getIndexOptions() == IndexOptions.NONE) {
        return DocValues.emptySortedSet();
    }

    // ok we need to uninvert. check if we can optimize a bit.

    Terms terms = reader.terms(field);
    if (terms == null) {
        return DocValues.emptySortedSet();
    } else {
        // if #postings = #docswithfield we know that the field is "single valued enough".
        // it's possible the same term might appear twice in the same document, but SORTED_SET discards frequency.
        // it's still ok with filtering (which we limit to numerics), it just means precisionStep = Inf
        long numPostings = terms.getSumDocFreq();
        if (numPostings != -1 && numPostings == terms.getDocCount()) {
            return DocValues.singleton(getTermsIndex(reader, field));
        }
    }

    DocTermOrds dto = (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, prefix));
    return dto.iterator(reader);
}

From source file:org.apache.solr.uninverting.TestFieldCacheVsDocValues.java

License:Apache License

private void doTestSortedSetVsUninvertedField(int minLength, int maxLength) throws Exception {
    Directory dir = newDirectory();/*from  w  w w. j ava2  s.  com*/
    IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random()));
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);

    // index some docs
    int numDocs = atLeast(300);
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
        doc.add(idField);
        final int length = TestUtil.nextInt(random(), minLength, maxLength);
        int numValues = random().nextInt(17);
        // create a random list of strings
        List<String> values = new ArrayList<>();
        for (int v = 0; v < numValues; v++) {
            values.add(TestUtil.randomSimpleString(random(), minLength, length));
        }

        // add in any order to the indexed field
        ArrayList<String> unordered = new ArrayList<>(values);
        Collections.shuffle(unordered, random());
        for (String v : values) {
            doc.add(newStringField("indexed", v, Field.Store.NO));
        }

        // add in any order to the dv field
        ArrayList<String> unordered2 = new ArrayList<>(values);
        Collections.shuffle(unordered2, random());
        for (String v : unordered2) {
            doc.add(new SortedSetDocValuesField("dv", new BytesRef(v)));
        }

        writer.addDocument(doc);
        if (random().nextInt(31) == 0) {
            writer.commit();
        }
    }

    // delete some docs
    int numDeletions = random().nextInt(numDocs / 10);
    for (int i = 0; i < numDeletions; i++) {
        int id = random().nextInt(numDocs);
        writer.deleteDocuments(new Term("id", Integer.toString(id)));
    }

    // compare per-segment
    DirectoryReader ir = writer.getReader();
    for (LeafReaderContext context : ir.leaves()) {
        LeafReader r = context.reader();
        SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(r, "indexed", null);
        SortedSetDocValues actual = r.getSortedSetDocValues("dv");
        assertEquals(r.maxDoc(), expected, actual);
    }
    ir.close();

    writer.forceMerge(1);

    // now compare again after the merge
    ir = writer.getReader();
    LeafReader ar = getOnlyLeafReader(ir);
    SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(ar, "indexed", null);
    SortedSetDocValues actual = ar.getSortedSetDocValues("dv");
    assertEquals(ir.maxDoc(), expected, actual);
    ir.close();

    writer.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestUninvertingReader.java

License:Apache License

public void testSortedSetInteger() throws IOException {
    Directory dir = newDirectory();/*from  w  w w . j  a v  a  2s .com*/
    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));

    Document doc = new Document();
    doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
    iw.addDocument(doc);

    doc = new Document();
    doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
    doc.add(new LegacyIntField("foo", -3, Field.Store.NO));
    iw.addDocument(doc);

    iw.forceMerge(1);
    iw.close();

    DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
            Collections.singletonMap("foo", Type.SORTED_SET_INTEGER));
    LeafReader ar = ir.leaves().get(0).reader();
    SortedSetDocValues v = ar.getSortedSetDocValues("foo");
    assertEquals(2, v.getValueCount());

    assertEquals(0, v.nextDoc());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    assertEquals(1, v.nextDoc());
    assertEquals(0, v.nextOrd());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    BytesRef value = v.lookupOrd(0);
    assertEquals(-3, LegacyNumericUtils.prefixCodedToInt(value));

    value = v.lookupOrd(1);
    assertEquals(5, LegacyNumericUtils.prefixCodedToInt(value));
    TestUtil.checkReader(ir);
    ir.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestUninvertingReader.java

License:Apache License

public void testSortedSetFloat() throws IOException {
    Directory dir = newDirectory();//from   w w  w  .ja v a  2  s  .  c o  m
    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));

    Document doc = new Document();
    doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(5f), Field.Store.NO));
    iw.addDocument(doc);

    doc = new Document();
    doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(5f), Field.Store.NO));
    doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(-3f), Field.Store.NO));
    iw.addDocument(doc);

    iw.forceMerge(1);
    iw.close();

    DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
            Collections.singletonMap("foo", Type.SORTED_SET_FLOAT));
    LeafReader ar = ir.leaves().get(0).reader();

    SortedSetDocValues v = ar.getSortedSetDocValues("foo");
    assertEquals(2, v.getValueCount());

    assertEquals(0, v.nextDoc());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    assertEquals(1, v.nextDoc());
    assertEquals(0, v.nextOrd());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    BytesRef value = v.lookupOrd(0);
    assertEquals(Float.floatToRawIntBits(-3f), LegacyNumericUtils.prefixCodedToInt(value));

    value = v.lookupOrd(1);
    assertEquals(Float.floatToRawIntBits(5f), LegacyNumericUtils.prefixCodedToInt(value));
    TestUtil.checkReader(ir);
    ir.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestUninvertingReader.java

License:Apache License

public void testSortedSetLong() throws IOException {
    Directory dir = newDirectory();//  ww  w. j  av a2 s. c om
    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));

    Document doc = new Document();
    doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
    iw.addDocument(doc);

    doc = new Document();
    doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
    doc.add(new LegacyLongField("foo", -3, Field.Store.NO));
    iw.addDocument(doc);

    iw.forceMerge(1);
    iw.close();

    DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
            Collections.singletonMap("foo", Type.SORTED_SET_LONG));
    LeafReader ar = ir.leaves().get(0).reader();
    SortedSetDocValues v = ar.getSortedSetDocValues("foo");
    assertEquals(2, v.getValueCount());

    assertEquals(0, v.nextDoc());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    assertEquals(1, v.nextDoc());
    assertEquals(0, v.nextOrd());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    BytesRef value = v.lookupOrd(0);
    assertEquals(-3, LegacyNumericUtils.prefixCodedToLong(value));

    value = v.lookupOrd(1);
    assertEquals(5, LegacyNumericUtils.prefixCodedToLong(value));
    TestUtil.checkReader(ir);
    ir.close();
    dir.close();
}