Example usage for org.apache.lucene.index LeafReader getSortedDocValues

List of usage examples for org.apache.lucene.index LeafReader getSortedDocValues

Introduction

In this page you can find the example usage for org.apache.lucene.index LeafReader getSortedDocValues.

Prototype

public abstract SortedDocValues getSortedDocValues(String field) throws IOException;

Source Link

Document

Returns SortedDocValues for this field, or null if no SortedDocValues were indexed for this field.

Usage

From source file:com.qwazr.search.field.ValueConverter.java

License:Apache License

final static ValueConverter newConverter(FieldDefinition fieldDef, LeafReader dvReader, FieldInfo fieldInfo)
        throws IOException {
    if (fieldInfo == null)
        return null;
    DocValuesType type = fieldInfo.getDocValuesType();
    if (type == null)
        return null;
    switch (type) {
    case BINARY://from w  w  w.  j  ava2  s  .  c  om
        BinaryDocValues binaryDocValue = dvReader.getBinaryDocValues(fieldInfo.name);
        if (binaryDocValue == null)
            return null;
        return new BinaryDVConverter(binaryDocValue);
    case SORTED:
        SortedDocValues sortedDocValues = dvReader.getSortedDocValues(fieldInfo.name);
        if (sortedDocValues == null)
            return null;
        return new SortedDVConverter(sortedDocValues);
    case NONE:
        break;
    case NUMERIC:
        NumericDocValues numericDocValues = dvReader.getNumericDocValues(fieldInfo.name);
        if (numericDocValues == null)
            return null;
        return newNumericConverter(fieldDef, numericDocValues);
    case SORTED_NUMERIC:
        SortedNumericDocValues sortedNumericDocValues = dvReader.getSortedNumericDocValues(fieldInfo.name);
        if (sortedNumericDocValues == null)
            return null;
        return newSortedNumericConverter(fieldDef, sortedNumericDocValues);
    case SORTED_SET:
        SortedSetDocValues sortedSetDocValues = dvReader.getSortedSetDocValues(fieldInfo.name);
        if (sortedSetDocValues == null)
            return null;
        return null;
    default:
        throw new IOException("Unsupported doc value type: " + type + " for field: " + fieldInfo.name);
    }
    return null;
}

From source file:org.apache.solr.handler.ExportWriter.java

License:Apache License

private SortDoc getSortDoc(SolrIndexSearcher searcher, SortField[] sortFields) throws IOException {
    SortValue[] sortValues = new SortValue[sortFields.length];
    IndexSchema schema = searcher.getSchema();
    for (int i = 0; i < sortFields.length; ++i) {
        SortField sf = sortFields[i];//from  w w  w . j  a  v  a  2s.  com
        String field = sf.getField();
        boolean reverse = sf.getReverse();
        SchemaField schemaField = schema.getField(field);
        FieldType ft = schemaField.getType();

        if (!schemaField.hasDocValues()) {
            throw new IOException(field + " must have DocValues to use this feature.");
        }

        if (ft instanceof TrieIntField) {
            if (reverse) {
                sortValues[i] = new IntValue(field, new IntDesc());
            } else {
                sortValues[i] = new IntValue(field, new IntAsc());
            }
        } else if (ft instanceof TrieFloatField) {
            if (reverse) {
                sortValues[i] = new FloatValue(field, new FloatDesc());
            } else {
                sortValues[i] = new FloatValue(field, new FloatAsc());
            }
        } else if (ft instanceof TrieDoubleField) {
            if (reverse) {
                sortValues[i] = new DoubleValue(field, new DoubleDesc());
            } else {
                sortValues[i] = new DoubleValue(field, new DoubleAsc());
            }
        } else if (ft instanceof TrieLongField) {
            if (reverse) {
                sortValues[i] = new LongValue(field, new LongDesc());
            } else {
                sortValues[i] = new LongValue(field, new LongAsc());
            }
        } else if (ft instanceof StrField) {
            LeafReader reader = searcher.getSlowAtomicReader();
            SortedDocValues vals = reader.getSortedDocValues(field);
            if (reverse) {
                sortValues[i] = new StringValue(vals, field, new IntDesc());
            } else {
                sortValues[i] = new StringValue(vals, field, new IntAsc());
            }
        } else if (ft instanceof TrieDateField) {
            if (reverse) {
                sortValues[i] = new LongValue(field, new LongDesc());
            } else {
                sortValues[i] = new LongValue(field, new LongAsc());
            }
        } else if (ft instanceof BoolField) {
            // This is a bit of a hack, but since the boolean field stores ByteRefs, just like Strings
            // _and_ since "F" happens to sort before "T" (thus false sorts "less" than true)
            // we can just use the existing StringValue here.
            LeafReader reader = searcher.getSlowAtomicReader();
            SortedDocValues vals = reader.getSortedDocValues(field);
            if (reverse) {
                sortValues[i] = new StringValue(vals, field, new IntDesc());
            } else {
                sortValues[i] = new StringValue(vals, field, new IntAsc());
            }
        } else {
            throw new IOException(
                    "Sort fields must be one of the following types: int,float,long,double,string,date,boolean");
        }
    }

    if (sortValues.length == 1) {
        return new SingleValueSortDoc(sortValues[0]);
    } else if (sortValues.length == 2) {
        return new DoubleValueSortDoc(sortValues[0], sortValues[1]);
    } else if (sortValues.length == 3) {
        return new TripleValueSortDoc(sortValues[0], sortValues[1], sortValues[2]);
    } else if (sortValues.length == 4) {
        return new QuadValueSortDoc(sortValues[0], sortValues[1], sortValues[2], sortValues[3]);
    } else {
        throw new IOException("A max of 4 sorts can be specified");
    }
}

From source file:org.apache.solr.index.SlowCompositeReaderWrapper.java

License:Apache License

@Override
public SortedDocValues getSortedDocValues(String field) throws IOException {
    ensureOpen();/*from w  w w  .jav  a 2 s  . c o m*/
    OrdinalMap map = null;
    synchronized (cachedOrdMaps) {
        map = cachedOrdMaps.get(field);
        if (map == null) {
            // uncached, or not a multi dv
            SortedDocValues dv = MultiDocValues.getSortedValues(in, field);
            if (dv instanceof MultiSortedDocValues) {
                map = ((MultiSortedDocValues) dv).mapping;
                if (map.owner == getCoreCacheKey() && merging == false) {
                    cachedOrdMaps.put(field, map);
                }
            }
            return dv;
        }
    }
    int size = in.leaves().size();
    final SortedDocValues[] values = new SortedDocValues[size];
    final int[] starts = new int[size + 1];
    long totalCost = 0;
    for (int i = 0; i < size; i++) {
        LeafReaderContext context = in.leaves().get(i);
        final LeafReader reader = context.reader();
        final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
        if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED) {
            return null;
        }
        SortedDocValues v = reader.getSortedDocValues(field);
        if (v == null) {
            v = DocValues.emptySorted();
        }
        totalCost += v.cost();
        values[i] = v;
        starts[i] = context.docBase;
    }
    starts[size] = maxDoc();
    return new MultiSortedDocValues(values, starts, map, totalCost);
}

From source file:org.apache.solr.index.UninvertDocValuesMergePolicyTest.java

License:Apache License

public void testIndexAndAddDocValues() throws Exception {
    Random rand = random();//from  www .j av a2  s. c o m

    for (int i = 0; i < 100; i++) {
        assertU(adoc(ID_FIELD, String.valueOf(i), TEST_FIELD, String.valueOf(i)));

        if (rand.nextBoolean()) {
            assertU(commit());
        }
    }

    assertU(commit());

    // Assert everything has been indexed and there are no docvalues
    withNewRawReader(h, topReader -> {
        assertEquals(100, topReader.numDocs());

        final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);

        // The global field type should not have docValues yet
        assertEquals(DocValuesType.NONE, infos.fieldInfo(TEST_FIELD).getDocValuesType());
    });

    addDocValuesTo(h, TEST_FIELD);

    // Add some more documents with doc values turned on including updating some
    for (int i = 90; i < 110; i++) {
        assertU(adoc(ID_FIELD, String.valueOf(i), TEST_FIELD, String.valueOf(i)));

        if (rand.nextBoolean()) {
            assertU(commit());
        }
    }

    assertU(commit());

    withNewRawReader(h, topReader -> {
        assertEquals(110, topReader.numDocs());

        final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
        // The global field type should have docValues because a document with dvs was added
        assertEquals(DocValuesType.SORTED, infos.fieldInfo(TEST_FIELD).getDocValuesType());
    });

    int optimizeSegments = 1;
    assertU(optimize("maxSegments", String.valueOf(optimizeSegments)));

    // Assert all docs have the right docvalues
    withNewRawReader(h, topReader -> {
        // Assert merged into one segment 
        assertEquals(110, topReader.numDocs());
        assertEquals(optimizeSegments, topReader.leaves().size());

        final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
        // The global field type should have docValues because a document with dvs was added
        assertEquals(DocValuesType.SORTED, infos.fieldInfo(TEST_FIELD).getDocValuesType());

        // Check that all segments have the right docvalues type with the correct value
        // Also check that other fields (e.g. the id field) didn't mistakenly get docvalues added
        for (LeafReaderContext ctx : topReader.leaves()) {
            LeafReader r = ctx.reader();
            SortedDocValues docvalues = r.getSortedDocValues(TEST_FIELD);
            for (int i = 0; i < r.numDocs(); ++i) {
                Document doc = r.document(i);
                String v = doc.getField(TEST_FIELD).stringValue();
                String id = doc.getField(ID_FIELD).stringValue();
                assertEquals(DocValuesType.SORTED, r.getFieldInfos().fieldInfo(TEST_FIELD).getDocValuesType());
                assertEquals(DocValuesType.NONE, r.getFieldInfos().fieldInfo(ID_FIELD).getDocValuesType());
                assertEquals(v, id);

                docvalues.nextDoc();
                assertEquals(v, docvalues.binaryValue().utf8ToString());
            }
        }
    });
}

From source file:org.apache.solr.index.UninvertDocValuesMergePolicyTest.java

License:Apache License

public void testNonIndexedFieldDoesNonFail() throws Exception {
    // Remove Indexed from fieldType
    removeIndexFrom(h, TEST_FIELD);//from   w  ww  . j a  va2 s.co m

    assertU(adoc(ID_FIELD, String.valueOf(1), TEST_FIELD, String.valueOf(1)));
    assertU(commit());

    addDocValuesTo(h, TEST_FIELD);

    assertU(adoc(ID_FIELD, String.valueOf(2), TEST_FIELD, String.valueOf(2)));
    assertU(commit());

    assertU(optimize("maxSegments", "1"));

    withNewRawReader(h, topReader -> {
        // Assert merged into one segment 
        assertEquals(2, topReader.numDocs());
        assertEquals(1, topReader.leaves().size());

        final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
        // The global field type should have docValues because a document with dvs was added
        assertEquals(DocValuesType.SORTED, infos.fieldInfo(TEST_FIELD).getDocValuesType());

        for (LeafReaderContext ctx : topReader.leaves()) {
            LeafReader r = ctx.reader();
            SortedDocValues docvalues = r.getSortedDocValues(TEST_FIELD);
            for (int i = 0; i < r.numDocs(); ++i) {
                Document doc = r.document(i);
                String v = doc.getField(TEST_FIELD).stringValue();
                String id = doc.getField(ID_FIELD).stringValue();
                assertEquals(DocValuesType.SORTED, r.getFieldInfos().fieldInfo(TEST_FIELD).getDocValuesType());
                assertEquals(DocValuesType.NONE, r.getFieldInfos().fieldInfo(ID_FIELD).getDocValuesType());

                if (id.equals("2")) {
                    assertTrue(docvalues.advanceExact(i));
                    assertEquals(v, docvalues.binaryValue().utf8ToString());
                } else {
                    assertFalse(docvalues.advanceExact(i));
                }

            }
        }
    });
}

From source file:org.apache.solr.schema.TestSortableTextField.java

License:Apache License

public void testWhiteboxIndexReader() throws Exception {
    assertU(adoc("id", "1", "whitespace_stxt", "how now brown cow ?", "whitespace_m_stxt", "xxx",
            "whitespace_m_stxt", "yyy", "whitespace_f_stxt", "aaa bbb", "keyword_stxt", "Blarggghhh!"));
    assertU(commit());//  w w  w .  ja v a 2 s . c  om

    final RefCounted<SolrIndexSearcher> searcher = h.getCore().getNewestSearcher(false);
    try {
        final LeafReader r = searcher.get().getSlowAtomicReader();

        // common cases...
        for (String field : Arrays.asList("keyword_stxt", "keyword_dv_stxt", "whitespace_stxt",
                "whitespace_f_stxt", "whitespace_l_stxt")) {
            assertNotNull("FieldInfos: " + field, r.getFieldInfos().fieldInfo(field));
            assertEquals("DocValuesType: " + field, DocValuesType.SORTED,
                    r.getFieldInfos().fieldInfo(field).getDocValuesType());
            assertNotNull("DocValues: " + field, r.getSortedDocValues(field));
            assertNotNull("Terms: " + field, r.terms(field));

        }

        // special cases...
        assertNotNull(r.getFieldInfos().fieldInfo("whitespace_nodv_stxt"));
        assertEquals(DocValuesType.NONE,
                r.getFieldInfos().fieldInfo("whitespace_nodv_stxt").getDocValuesType());
        assertNull(r.getSortedDocValues("whitespace_nodv_stxt"));
        assertNotNull(r.terms("whitespace_nodv_stxt"));
        // 
        assertNotNull(r.getFieldInfos().fieldInfo("whitespace_nois_stxt"));
        assertEquals(DocValuesType.SORTED,
                r.getFieldInfos().fieldInfo("whitespace_nois_stxt").getDocValuesType());
        assertNotNull(r.getSortedDocValues("whitespace_nois_stxt"));
        assertNull(r.terms("whitespace_nois_stxt"));
        //
        assertNotNull(r.getFieldInfos().fieldInfo("whitespace_m_stxt"));
        assertEquals(DocValuesType.SORTED_SET,
                r.getFieldInfos().fieldInfo("whitespace_m_stxt").getDocValuesType());
        assertNotNull(r.getSortedSetDocValues("whitespace_m_stxt"));
        assertNotNull(r.terms("whitespace_m_stxt"));

    } finally {
        if (null != searcher) {
            searcher.decref();
        }
    }
}

From source file:org.apache.solr.search.SolrDocumentFetcher.java

License:Apache License

/**
 * This will fetch and add the docValues fields to a given SolrDocument/SolrInputDocument
 *
 * @param doc/*from   www  . ja  v  a 2s.  c o m*/
 *          A SolrDocument or SolrInputDocument instance where docValues will be added
 * @param docid
 *          The lucene docid of the document to be populated
 * @param fields
 *          The list of docValues fields to be decorated
 */
public void decorateDocValueFields(@SuppressWarnings("rawtypes") SolrDocumentBase doc, int docid,
        Set<String> fields) throws IOException {
    final List<LeafReaderContext> leafContexts = searcher.getLeafContexts();
    final int subIndex = ReaderUtil.subIndex(docid, leafContexts);
    final int localId = docid - leafContexts.get(subIndex).docBase;
    final LeafReader leafReader = leafContexts.get(subIndex).reader();
    for (String fieldName : fields) {
        final SchemaField schemaField = searcher.getSchema().getFieldOrNull(fieldName);
        if (schemaField == null || !schemaField.hasDocValues() || doc.containsKey(fieldName)) {
            log.warn("Couldn't decorate docValues for field: [{}], schemaField: [{}]", fieldName, schemaField);
            continue;
        }
        FieldInfo fi = searcher.getFieldInfos().fieldInfo(fieldName);
        if (fi == null) {
            continue; // Searcher doesn't have info about this field, hence ignore it.
        }
        final DocValuesType dvType = fi.getDocValuesType();
        switch (dvType) {
        case NUMERIC:
            final NumericDocValues ndv = leafReader.getNumericDocValues(fieldName);
            if (ndv == null) {
                continue;
            }
            Long val;
            if (ndv.advanceExact(localId)) {
                val = ndv.longValue();
            } else {
                continue;
            }
            Object newVal = val;
            if (schemaField.getType().isPointField()) {
                // TODO: Maybe merge PointField with TrieFields here
                NumberType type = schemaField.getType().getNumberType();
                switch (type) {
                case INTEGER:
                    newVal = val.intValue();
                    break;
                case LONG:
                    newVal = val.longValue();
                    break;
                case FLOAT:
                    newVal = Float.intBitsToFloat(val.intValue());
                    break;
                case DOUBLE:
                    newVal = Double.longBitsToDouble(val);
                    break;
                case DATE:
                    newVal = new Date(val);
                    break;
                default:
                    throw new AssertionError("Unexpected PointType: " + type);
                }
            } else {
                if (schemaField.getType() instanceof TrieIntField) {
                    newVal = val.intValue();
                } else if (schemaField.getType() instanceof TrieFloatField) {
                    newVal = Float.intBitsToFloat(val.intValue());
                } else if (schemaField.getType() instanceof TrieDoubleField) {
                    newVal = Double.longBitsToDouble(val);
                } else if (schemaField.getType() instanceof TrieDateField) {
                    newVal = new Date(val);
                } else if (schemaField.getType() instanceof EnumField) {
                    newVal = ((EnumField) schemaField.getType()).intValueToStringValue(val.intValue());
                }
            }
            doc.addField(fieldName, newVal);
            break;
        case BINARY:
            BinaryDocValues bdv = leafReader.getBinaryDocValues(fieldName);
            if (bdv == null) {
                continue;
            }
            BytesRef value;
            if (bdv.advanceExact(localId)) {
                value = BytesRef.deepCopyOf(bdv.binaryValue());
            } else {
                continue;
            }
            doc.addField(fieldName, value);
            break;
        case SORTED:
            SortedDocValues sdv = leafReader.getSortedDocValues(fieldName);
            if (sdv == null) {
                continue;
            }
            if (sdv.advanceExact(localId)) {
                final BytesRef bRef = sdv.binaryValue();
                // Special handling for Boolean fields since they're stored as 'T' and 'F'.
                if (schemaField.getType() instanceof BoolField) {
                    doc.addField(fieldName, schemaField.getType().toObject(schemaField, bRef));
                } else {
                    doc.addField(fieldName, bRef.utf8ToString());
                }
            }
            break;
        case SORTED_NUMERIC:
            final SortedNumericDocValues numericDv = leafReader.getSortedNumericDocValues(fieldName);
            NumberType type = schemaField.getType().getNumberType();
            if (numericDv != null) {
                if (numericDv.advance(localId) == localId) {
                    final List<Object> outValues = new ArrayList<Object>(numericDv.docValueCount());
                    for (int i = 0; i < numericDv.docValueCount(); i++) {
                        long number = numericDv.nextValue();
                        switch (type) {
                        case INTEGER:
                            outValues.add((int) number);
                            break;
                        case LONG:
                            outValues.add(number);
                            break;
                        case FLOAT:
                            outValues.add(NumericUtils.sortableIntToFloat((int) number));
                            break;
                        case DOUBLE:
                            outValues.add(NumericUtils.sortableLongToDouble(number));
                            break;
                        case DATE:
                            outValues.add(new Date(number));
                            break;
                        default:
                            throw new AssertionError("Unexpected PointType: " + type);
                        }
                    }
                    assert outValues.size() > 0;
                    doc.addField(fieldName, outValues);
                }
            }
        case SORTED_SET:
            final SortedSetDocValues values = leafReader.getSortedSetDocValues(fieldName);
            if (values != null && values.getValueCount() > 0) {
                if (values.advance(localId) == localId) {
                    final List<Object> outValues = new LinkedList<>();
                    for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values
                            .nextOrd()) {
                        value = values.lookupOrd(ord);
                        outValues.add(schemaField.getType().toObject(schemaField, value));
                    }
                    assert outValues.size() > 0;
                    doc.addField(fieldName, outValues);
                }
            }
        case NONE:
            break;
        }
    }
}

From source file:org.apache.solr.uninverting.FieldCacheImpl.java

License:Apache License

public SortedDocValues getTermsIndex(LeafReader reader, String field, float acceptableOverheadRatio)
        throws IOException {
    SortedDocValues valuesIn = reader.getSortedDocValues(field);
    if (valuesIn != null) {
        // Not cached here by FieldCacheImpl (cached instead
        // per-thread by SegmentReader):
        return valuesIn;
    } else {/*  w  ww .  j a  v a2  s . c o  m*/
        final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
        if (info == null) {
            return DocValues.emptySorted();
        } else if (info.getDocValuesType() != DocValuesType.NONE) {
            // we don't try to build a sorted instance from numeric/binary doc
            // values because dedup can be very costly
            throw new IllegalStateException(
                    "Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
        } else if (info.getIndexOptions() == IndexOptions.NONE) {
            return DocValues.emptySorted();
        }
        SortedDocValuesImpl impl = (SortedDocValuesImpl) caches.get(SortedDocValues.class).get(reader,
                new CacheKey(field, acceptableOverheadRatio));
        return impl.iterator();
    }
}

From source file:org.apache.solr.uninverting.FieldCacheImpl.java

License:Apache License

public BinaryDocValues getTerms(LeafReader reader, String field, float acceptableOverheadRatio)
        throws IOException {
    BinaryDocValues valuesIn = reader.getBinaryDocValues(field);
    if (valuesIn == null) {
        valuesIn = reader.getSortedDocValues(field);
    }/*from  w  w w .  j a  v  a 2s . c  om*/

    if (valuesIn != null) {
        // Not cached here by FieldCacheImpl (cached instead
        // per-thread by SegmentReader):
        return valuesIn;
    }

    final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
    if (info == null) {
        return DocValues.emptyBinary();
    } else if (info.getDocValuesType() != DocValuesType.NONE) {
        throw new IllegalStateException(
                "Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
    } else if (info.getIndexOptions() == IndexOptions.NONE) {
        return DocValues.emptyBinary();
    }

    BinaryDocValuesImpl impl = (BinaryDocValuesImpl) caches.get(BinaryDocValues.class).get(reader,
            new CacheKey(field, acceptableOverheadRatio));
    return impl.iterator();
}

From source file:org.apache.solr.uninverting.FieldCacheImpl.java

License:Apache License

public SortedSetDocValues getDocTermOrds(LeafReader reader, String field, BytesRef prefix) throws IOException {
    // not a general purpose filtering mechanism...
    assert prefix == null || prefix == INT32_TERM_PREFIX || prefix == INT64_TERM_PREFIX;

    SortedSetDocValues dv = reader.getSortedSetDocValues(field);
    if (dv != null) {
        return dv;
    }/*from ww  w . j a v  a 2  s  .  com*/

    SortedDocValues sdv = reader.getSortedDocValues(field);
    if (sdv != null) {
        return DocValues.singleton(sdv);
    }

    final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
    if (info == null) {
        return DocValues.emptySortedSet();
    } else if (info.getDocValuesType() != DocValuesType.NONE) {
        throw new IllegalStateException(
                "Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
    } else if (info.getIndexOptions() == IndexOptions.NONE) {
        return DocValues.emptySortedSet();
    }

    // ok we need to uninvert. check if we can optimize a bit.

    Terms terms = reader.terms(field);
    if (terms == null) {
        return DocValues.emptySortedSet();
    } else {
        // if #postings = #docswithfield we know that the field is "single valued enough".
        // it's possible the same term might appear twice in the same document, but SORTED_SET discards frequency.
        // it's still ok with filtering (which we limit to numerics), it just means precisionStep = Inf
        long numPostings = terms.getSumDocFreq();
        if (numPostings != -1 && numPostings == terms.getDocCount()) {
            return DocValues.singleton(getTermsIndex(reader, field));
        }
    }

    DocTermOrds dto = (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, prefix));
    return dto.iterator(reader);
}