Example usage for org.apache.lucene.index LeafReader getSortedDocValues

Introduction

In this page you can find the example usage for org.apache.lucene.index LeafReader getSortedDocValues.

Prototype

public abstract SortedDocValues getSortedDocValues(String field) throws IOException;

Source Link

Document

Returns SortedDocValues for this field, or null if no SortedDocValues were indexed for this field.

Usage

From source file:com.qwazr.search.field.ValueConverter.java

License:Apache License

final static ValueConverter newConverter(FieldDefinition fieldDef, LeafReader dvReader, FieldInfo fieldInfo)
        throws IOException {
    if (fieldInfo == null)
        return null;
    DocValuesType type = fieldInfo.getDocValuesType();
    if (type == null)
        return null;
    switch (type) {
    case BINARY://from w  w  w.  j  ava2  s  .  c  om
        BinaryDocValues binaryDocValue = dvReader.getBinaryDocValues(fieldInfo.name);
        if (binaryDocValue == null)
            return null;
        return new BinaryDVConverter(binaryDocValue);
    case SORTED:
        SortedDocValues sortedDocValues = dvReader.getSortedDocValues(fieldInfo.name);
        if (sortedDocValues == null)
            return null;
        return new SortedDVConverter(sortedDocValues);
    case NONE:
        break;
    case NUMERIC:
        NumericDocValues numericDocValues = dvReader.getNumericDocValues(fieldInfo.name);
        if (numericDocValues == null)
            return null;
        return newNumericConverter(fieldDef, numericDocValues);
    case SORTED_NUMERIC:
        SortedNumericDocValues sortedNumericDocValues = dvReader.getSortedNumericDocValues(fieldInfo.name);
        if (sortedNumericDocValues == null)
            return null;
        return newSortedNumericConverter(fieldDef, sortedNumericDocValues);
    case SORTED_SET:
        SortedSetDocValues sortedSetDocValues = dvReader.getSortedSetDocValues(fieldInfo.name);
        if (sortedSetDocValues == null)
            return null;
        return null;
    default:
        throw new IOException("Unsupported doc value type: " + type + " for field: " + fieldInfo.name);
    }
    return null;
}

From source file:org.apache.solr.handler.ExportWriter.java

License:Apache License

private SortDoc getSortDoc(SolrIndexSearcher searcher, SortField[] sortFields) throws IOException {
    SortValue[] sortValues = new SortValue[sortFields.length];
    IndexSchema schema = searcher.getSchema();
    for (int i = 0; i < sortFields.length; ++i) {
        SortField sf = sortFields[i];//from  w w  w . j  a  v  a  2s.  com
        String field = sf.getField();
        boolean reverse = sf.getReverse();
        SchemaField schemaField = schema.getField(field);
        FieldType ft = schemaField.getType();

        if (!schemaField.hasDocValues()) {
            throw new IOException(field + " must have DocValues to use this feature.");
        }

        if (ft instanceof TrieIntField) {
            if (reverse) {
                sortValues[i] = new IntValue(field, new IntDesc());
            } else {
                sortValues[i] = new IntValue(field, new IntAsc());
            }
        } else if (ft instanceof TrieFloatField) {
            if (reverse) {
                sortValues[i] = new FloatValue(field, new FloatDesc());
            } else {
                sortValues[i] = new FloatValue(field, new FloatAsc());
            }
        } else if (ft instanceof TrieDoubleField) {
            if (reverse) {
                sortValues[i] = new DoubleValue(field, new DoubleDesc());
            } else {
                sortValues[i] = new DoubleValue(field, new DoubleAsc());
            }
        } else if (ft instanceof TrieLongField) {
            if (reverse) {
                sortValues[i] = new LongValue(field, new LongDesc());
            } else {
                sortValues[i] = new LongValue(field, new LongAsc());
            }
        } else if (ft instanceof StrField) {
            LeafReader reader = searcher.getSlowAtomicReader();
            SortedDocValues vals = reader.getSortedDocValues(field);
            if (reverse) {
                sortValues[i] = new StringValue(vals, field, new IntDesc());
            } else {
                sortValues[i] = new StringValue(vals, field, new IntAsc());
            }
        } else if (ft instanceof TrieDateField) {
            if (reverse) {
                sortValues[i] = new LongValue(field, new LongDesc());
            } else {
                sortValues[i] = new LongValue(field, new LongAsc());
            }
        } else if (ft instanceof BoolField) {
            // This is a bit of a hack, but since the boolean field stores ByteRefs, just like Strings
            // _and_ since "F" happens to sort before "T" (thus false sorts "less" than true)
            // we can just use the existing StringValue here.
            LeafReader reader = searcher.getSlowAtomicReader();
            SortedDocValues vals = reader.getSortedDocValues(field);
            if (reverse) {
                sortValues[i] = new StringValue(vals, field, new IntDesc());
            } else {
                sortValues[i] = new StringValue(vals, field, new IntAsc());
            }
        } else {
            throw new IOException(
                    "Sort fields must be one of the following types: int,float,long,double,string,date,boolean");
        }
    }

    if (sortValues.length == 1) {
        return new SingleValueSortDoc(sortValues[0]);
    } else if (sortValues.length == 2) {
        return new DoubleValueSortDoc(sortValues[0], sortValues[1]);
    } else if (sortValues.length == 3) {
        return new TripleValueSortDoc(sortValues[0], sortValues[1], sortValues[2]);
    } else if (sortValues.length == 4) {
        return new QuadValueSortDoc(sortValues[0], sortValues[1], sortValues[2], sortValues[3]);
    } else {
        throw new IOException("A max of 4 sorts can be specified");
    }
}

From source file:org.apache.solr.index.SlowCompositeReaderWrapper.java

License:Apache License

@Override
public SortedDocValues getSortedDocValues(String field) throws IOException {
    ensureOpen();/*from w  w w  .jav  a 2 s  . c o m*/
    OrdinalMap map = null;
    synchronized (cachedOrdMaps) {
        map = cachedOrdMaps.get(field);
        if (map == null) {
            // uncached, or not a multi dv
            SortedDocValues dv = MultiDocValues.getSortedValues(in, field);
            if (dv instanceof MultiSortedDocValues) {
                map = ((MultiSortedDocValues) dv).mapping;
                if (map.owner == getCoreCacheKey() && merging == false) {
                    cachedOrdMaps.put(field, map);
                }
            }
            return dv;
        }
    }
    int size = in.leaves().size();
    final SortedDocValues[] values = new SortedDocValues[size];
    final int[] starts = new int[size + 1];
    long totalCost = 0;
    for (int i = 0; i < size; i++) {
        LeafReaderContext context = in.leaves().get(i);
        final LeafReader reader = context.reader();
        final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
        if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED) {
            return null;
        }
        SortedDocValues v = reader.getSortedDocValues(field);
        if (v == null) {
            v = DocValues.emptySorted();
        }
        totalCost += v.cost();
        values[i] = v;
        starts[i] = context.docBase;
    }
    starts[size] = maxDoc();
    return new MultiSortedDocValues(values, starts, map, totalCost);
}

From source file:org.apache.solr.index.UninvertDocValuesMergePolicyTest.java

License:Apache License

public void testIndexAndAddDocValues() throws Exception {
    Random rand = random();//from  www .j av a2  s. c o m

    for (int i = 0; i < 100; i++) {
        assertU(adoc(ID_FIELD, String.valueOf(i), TEST_FIELD, String.valueOf(i)));

        if (rand.nextBoolean()) {
            assertU(commit());
        }
    }

    assertU(commit());

    // Assert everything has been indexed and there are no docvalues
    withNewRawReader(h, topReader -> {
        assertEquals(100, topReader.numDocs());

        final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);

        // The global field type should not have docValues yet
        assertEquals(DocValuesType.NONE, infos.fieldInfo(TEST_FIELD).getDocValuesType());
    });

    addDocValuesTo(h, TEST_FIELD);

    // Add some more documents with doc values turned on including updating some
    for (int i = 90; i < 110; i++) {
        assertU(adoc(ID_FIELD, String.valueOf(i), TEST_FIELD, String.valueOf(i)));

        if (rand.nextBoolean()) {
            assertU(commit());
        }
    }

    assertU(commit());

    withNewRawReader(h, topReader -> {
        assertEquals(110, topReader.numDocs());

        final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
        // The global field type should have docValues because a document with dvs was added
        assertEquals(DocValuesType.SORTED, infos.fieldInfo(TEST_FIELD).getDocValuesType());
    });

    int optimizeSegments = 1;
    assertU(optimize("maxSegments", String.valueOf(optimizeSegments)));

    // Assert all docs have the right docvalues
    withNewRawReader(h, topReader -> {
        // Assert merged into one segment 
        assertEquals(110, topReader.numDocs());
        assertEquals(optimizeSegments, topReader.leaves().size());

        final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
        // The global field type should have docValues because a document with dvs was added
        assertEquals(DocValuesType.SORTED, infos.fieldInfo(TEST_FIELD).getDocValuesType());

        // Check that all segments have the right docvalues type with the correct value
        // Also check that other fields (e.g. the id field) didn't mistakenly get docvalues added
        for (LeafReaderContext ctx : topReader.leaves()) {
            LeafReader r = ctx.reader();
            SortedDocValues docvalues = r.getSortedDocValues(TEST_FIELD);
            for (int i = 0; i < r.numDocs(); ++i) {
                Document doc = r.document(i);
                String v = doc.getField(TEST_FIELD).stringValue();
                String id = doc.getField(ID_FIELD).stringValue();
                assertEquals(DocValuesType.SORTED, r.getFieldInfos().fieldInfo(TEST_FIELD).getDocValuesType());
                assertEquals(DocValuesType.NONE, r.getFieldInfos().fieldInfo(ID_FIELD).getDocValuesType());
                assertEquals(v, id);

                docvalues.nextDoc();
                assertEquals(v, docvalues.binaryValue().utf8ToString());
            }
        }
    });
}

From source file:org.apache.solr.index.UninvertDocValuesMergePolicyTest.java

License:Apache License

public void testNonIndexedFieldDoesNonFail() throws Exception {
    // Remove Indexed from fieldType
    removeIndexFrom(h, TEST_FIELD);//from   w  ww  . j a  va2 s.co m

    assertU(adoc(ID_FIELD, String.valueOf(1), TEST_FIELD, String.valueOf(1)));
    assertU(commit());

    addDocValuesTo(h, TEST_FIELD);

    assertU(adoc(ID_FIELD, String.valueOf(2), TEST_FIELD, String.valueOf(2)));
    assertU(commit());

    assertU(optimize("maxSegments", "1"));

    withNewRawReader(h, topReader -> {
        // Assert merged into one segment 
        assertEquals(2, topReader.numDocs());
        assertEquals(1, topReader.leaves().size());

        final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
        // The global field type should have docValues because a document with dvs was added
        assertEquals(DocValuesType.SORTED, infos.fieldInfo(TEST_FIELD).getDocValuesType());

        for (LeafReaderContext ctx : topReader.leaves()) {
            LeafReader r = ctx.reader();
            SortedDocValues docvalues = r.getSortedDocValues(TEST_FIELD);
            for (int i = 0; i < r.numDocs(); ++i) {
                Document doc = r.document(i);
                String v = doc.getField(TEST_FIELD).stringValue();
                String id = doc.getField(ID_FIELD).stringValue();
                assertEquals(DocValuesType.SORTED, r.getFieldInfos().fieldInfo(TEST_FIELD).getDocValuesType());
                assertEquals(DocValuesType.NONE, r.getFieldInfos().fieldInfo(ID_FIELD).getDocValuesType());

                if (id.equals("2")) {
                    assertTrue(docvalues.advanceExact(i));
                    assertEquals(v, docvalues.binaryValue().utf8ToString());
                } else {
                    assertFalse(docvalues.advanceExact(i));
                }

            }
        }
    });
}

From source file:org.apache.solr.schema.TestSortableTextField.java

License:Apache License

public void testWhiteboxIndexReader() throws Exception {
    assertU(adoc("id", "1", "whitespace_stxt", "how now brown cow ?", "whitespace_m_stxt", "xxx",
            "whitespace_m_stxt", "yyy", "whitespace_f_stxt", "aaa bbb", "keyword_stxt", "Blarggghhh!"));
    assertU(commit());//  w w  w .  ja v a 2 s . c  om

    final RefCounted<SolrIndexSearcher> searcher = h.getCore().getNewestSearcher(false);
    try {
        final LeafReader r = searcher.get().getSlowAtomicReader();

        // common cases...
        for (String field : Arrays.asList("keyword_stxt", "keyword_dv_stxt", "whitespace_stxt",
                "whitespace_f_stxt", "whitespace_l_stxt")) {
            assertNotNull("FieldInfos: " + field, r.getFieldInfos().fieldInfo(field));
            assertEquals("DocValuesType: " + field, DocValuesType.SORTED,
                    r.getFieldInfos().fieldInfo(field).getDocValuesType());
            assertNotNull("DocValues: " + field, r.getSortedDocValues(field));
            assertNotNull("Terms: " + field, r.terms(field));

        }

        // special cases...
        assertNotNull(r.getFieldInfos().fieldInfo("whitespace_nodv_stxt"));
        assertEquals(DocValuesType.NONE,
                r.getFieldInfos().fieldInfo("whitespace_nodv_stxt").getDocValuesType());
        assertNull(r.getSortedDocValues("whitespace_nodv_stxt"));
        assertNotNull(r.terms("whitespace_nodv_stxt"));
        // 
        assertNotNull(r.getFieldInfos().fieldInfo("whitespace_nois_stxt"));
        assertEquals(DocValuesType.SORTED,
                r.getFieldInfos().fieldInfo("whitespace_nois_stxt").getDocValuesType());
        assertNotNull(r.getSortedDocValues("whitespace_nois_stxt"));
        assertNull(r.terms("whitespace_nois_stxt"));
        //
        assertNotNull(r.getFieldInfos().fieldInfo("whitespace_m_stxt"));
        assertEquals(DocValuesType.SORTED_SET,
                r.getFieldInfos().fieldInfo("whitespace_m_stxt").getDocValuesType());
        assertNotNull(r.getSortedSetDocValues("whitespace_m_stxt"));
        assertNotNull(r.terms("whitespace_m_stxt"));

    } finally {
        if (null != searcher) {
            searcher.decref();
        }
    }
}

From source file:org.apache.solr.search.SolrDocumentFetcher.java

License:Apache License

/**
 * This will fetch and add the docValues fields to a given SolrDocument/SolrInputDocument
 *
 * @param doc/*from   www  . ja  v  a 2s.  c o m*/
 *          A SolrDocument or SolrInputDocument instance where docValues will be added
 * @param docid
 *          The lucene docid of the document to be populated
 * @param fields
 *          The list of docValues fields to be decorated
 */
public void decorateDocValueFields(@SuppressWarnings("rawtypes") SolrDocumentBase doc, int docid,
        Set<String> fields) throws IOException {
    final List<LeafReaderContext> leafContexts = searcher.getLeafContexts();
    final int subIndex = ReaderUtil.subIndex(docid, leafContexts);
    final int localId = docid - leafContexts.get(subIndex).docBase;
    final LeafReader leafReader = leafContexts.get(subIndex).reader();
    for (String fieldName : fields) {
        final SchemaField schemaField = searcher.getSchema().getFieldOrNull(fieldName);
        if (schemaField == null || !schemaField.hasDocValues() || doc.containsKey(fieldName)) {
            log.warn("Couldn't decorate docValues for field: [{}], schemaField: [{}]", fieldName, schemaField);
            continue;
        }
        FieldInfo fi = searcher.getFieldInfos().fieldInfo(fieldName);
        if (fi == null) {
            continue; // Searcher doesn't have info about this field, hence ignore it.
        }
        final DocValuesType dvType = fi.getDocValuesType();
        switch (dvType) {
        case NUMERIC:
            final NumericDocValues ndv = leafReader.getNumericDocValues(fieldName);
            if (ndv == null) {
                continue;
            }
            Long val;
            if (ndv.advanceExact(localId)) {
                val = ndv.longValue();
            } else {
                continue;
            }
            Object newVal = val;
            if (schemaField.getType().isPointField()) {
                // TODO: Maybe merge PointField with TrieFields here
                NumberType type = schemaField.getType().getNumberType();
                switch (type) {
                case INTEGER:
                    newVal = val.intValue();
                    break;
                case LONG:
                    newVal = val.longValue();
                    break;
                case FLOAT:
                    newVal = Float.intBitsToFloat(val.intValue());
                    break;
                case DOUBLE:
                    newVal = Double.longBitsToDouble(val);
                    break;
                case DATE:
                    newVal = new Date(val);
                    break;
                default:
                    throw new AssertionError("Unexpected PointType: " + type);
                }
            } else {
                if (schemaField.getType() instanceof TrieIntField) {
                    newVal = val.intValue();
                } else if (schemaField.getType() instanceof TrieFloatField) {
                    newVal = Float.intBitsToFloat(val.intValue());
                } else if (schemaField.getType() instanceof TrieDoubleField) {
                    newVal = Double.longBitsToDouble(val);
                } else if (schemaField.getType() instanceof TrieDateField) {
                    newVal = new Date(val);
                } else if (schemaField.getType() instanceof EnumField) {
                    newVal = ((EnumField) schemaField.getType()).intValueToStringValue(val.intValue());
                }
            }
            doc.addField(fieldName, newVal);
            break;
        case BINARY:
            BinaryDocValues bdv = leafReader.getBinaryDocValues(fieldName);
            if (bdv == null) {
                continue;
            }
            BytesRef value;
            if (bdv.advanceExact(localId)) {
                value = BytesRef.deepCopyOf(bdv.binaryValue());
            } else {
                continue;
            }
            doc.addField(fieldName, value);
            break;
        case SORTED:
            SortedDocValues sdv = leafReader.getSortedDocValues(fieldName);
            if (sdv == null) {
                continue;
            }
            if (sdv.advanceExact(localId)) {
                final BytesRef bRef = sdv.binaryValue();
                // Special handling for Boolean fields since they're stored as 'T' and 'F'.
                if (schemaField.getType() instanceof BoolField) {
                    doc.addField(fieldName, schemaField.getType().toObject(schemaField, bRef));
                } else {
                    doc.addField(fieldName, bRef.utf8ToString());
                }
            }
            break;
        case SORTED_NUMERIC:
            final SortedNumericDocValues numericDv = leafReader.getSortedNumericDocValues(fieldName);
            NumberType type = schemaField.getType().getNumberType();
            if (numericDv != null) {
                if (numericDv.advance(localId) == localId) {
                    final List<Object> outValues = new ArrayList<Object>(numericDv.docValueCount());
                    for (int i = 0; i < numericDv.docValueCount(); i++) {
                        long number = numericDv.nextValue();
                        switch (type) {
                        case INTEGER:
                            outValues.add((int) number);
                            break;
                        case LONG:
                            outValues.add(number);
                            break;
                        case FLOAT:
                            outValues.add(NumericUtils.sortableIntToFloat((int) number));
                            break;
                        case DOUBLE:
                            outValues.add(NumericUtils.sortableLongToDouble(number));
                            break;
                        case DATE:
                            outValues.add(new Date(number));
                            break;
                        default:
                            throw new AssertionError("Unexpected PointType: " + type);
                        }
                    }
                    assert outValues.size() > 0;
                    doc.addField(fieldName, outValues);
                }
            }
        case SORTED_SET:
            final SortedSetDocValues values = leafReader.getSortedSetDocValues(fieldName);
            if (values != null && values.getValueCount() > 0) {
                if (values.advance(localId) == localId) {
                    final List<Object> outValues = new LinkedList<>();
                    for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values
                            .nextOrd()) {
                        value = values.lookupOrd(ord);
                        outValues.add(schemaField.getType().toObject(schemaField, value));
                    }
                    assert outValues.size() > 0;
                    doc.addField(fieldName, outValues);
                }
            }
        case NONE:
            break;
        }
    }
}

From source file:org.apache.solr.uninverting.FieldCacheImpl.java

License:Apache License

public SortedDocValues getTermsIndex(LeafReader reader, String field, float acceptableOverheadRatio)
        throws IOException {
    SortedDocValues valuesIn = reader.getSortedDocValues(field);
    if (valuesIn != null) {
        // Not cached here by FieldCacheImpl (cached instead
        // per-thread by SegmentReader):
        return valuesIn;
    } else {/*  w  ww .  j a  v a2  s . c o  m*/
        final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
        if (info == null) {
            return DocValues.emptySorted();
        } else if (info.getDocValuesType() != DocValuesType.NONE) {
            // we don't try to build a sorted instance from numeric/binary doc
            // values because dedup can be very costly
            throw new IllegalStateException(
                    "Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
        } else if (info.getIndexOptions() == IndexOptions.NONE) {
            return DocValues.emptySorted();
        }
        SortedDocValuesImpl impl = (SortedDocValuesImpl) caches.get(SortedDocValues.class).get(reader,
                new CacheKey(field, acceptableOverheadRatio));
        return impl.iterator();
    }
}

From source file:org.apache.solr.uninverting.FieldCacheImpl.java

License:Apache License

public BinaryDocValues getTerms(LeafReader reader, String field, float acceptableOverheadRatio)
        throws IOException {
    BinaryDocValues valuesIn = reader.getBinaryDocValues(field);
    if (valuesIn == null) {
        valuesIn = reader.getSortedDocValues(field);
    }/*from  w  w w .  j a  v  a 2s . c  om*/

    if (valuesIn != null) {
        // Not cached here by FieldCacheImpl (cached instead
        // per-thread by SegmentReader):
        return valuesIn;
    }

    final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
    if (info == null) {
        return DocValues.emptyBinary();
    } else if (info.getDocValuesType() != DocValuesType.NONE) {
        throw new IllegalStateException(
                "Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
    } else if (info.getIndexOptions() == IndexOptions.NONE) {
        return DocValues.emptyBinary();
    }

    BinaryDocValuesImpl impl = (BinaryDocValuesImpl) caches.get(BinaryDocValues.class).get(reader,
            new CacheKey(field, acceptableOverheadRatio));
    return impl.iterator();
}

From source file:org.apache.solr.uninverting.FieldCacheImpl.java

License:Apache License

public SortedSetDocValues getDocTermOrds(LeafReader reader, String field, BytesRef prefix) throws IOException {
    // not a general purpose filtering mechanism...
    assert prefix == null || prefix == INT32_TERM_PREFIX || prefix == INT64_TERM_PREFIX;

    SortedSetDocValues dv = reader.getSortedSetDocValues(field);
    if (dv != null) {
        return dv;
    }/*from ww  w . j a v  a 2  s  .  com*/

    SortedDocValues sdv = reader.getSortedDocValues(field);
    if (sdv != null) {
        return DocValues.singleton(sdv);
    }

    final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
    if (info == null) {
        return DocValues.emptySortedSet();
    } else if (info.getDocValuesType() != DocValuesType.NONE) {
        throw new IllegalStateException(
                "Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
    } else if (info.getIndexOptions() == IndexOptions.NONE) {
        return DocValues.emptySortedSet();
    }

    // ok we need to uninvert. check if we can optimize a bit.

    Terms terms = reader.terms(field);
    if (terms == null) {
        return DocValues.emptySortedSet();
    } else {
        // if #postings = #docswithfield we know that the field is "single valued enough".
        // it's possible the same term might appear twice in the same document, but SORTED_SET discards frequency.
        // it's still ok with filtering (which we limit to numerics), it just means precisionStep = Inf
        long numPostings = terms.getSumDocFreq();
        if (numPostings != -1 && numPostings == terms.getDocCount()) {
            return DocValues.singleton(getTermsIndex(reader, field));
        }
    }

    DocTermOrds dto = (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, prefix));
    return dto.iterator(reader);
}