org.apache.solr.search.SolrDocumentFetcher.java Source code

Introduction

Here is the source code for org.apache.solr.search.SolrDocumentFetcher.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.solr.search;

import java.io.IOException;
import java.io.Reader;
import java.lang.invoke.MethodHandles;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DocumentStoredFieldVisitor;
import org.apache.lucene.document.LazyDocument;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.apache.solr.common.SolrDocumentBase;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.schema.BoolField;
import org.apache.solr.schema.EnumField;
import org.apache.solr.schema.NumberType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.TrieDateField;
import org.apache.solr.schema.TrieDoubleField;
import org.apache.solr.schema.TrieFloatField;
import org.apache.solr.schema.TrieIntField;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A helper class of {@link org.apache.solr.search.SolrIndexSearcher} for stored Document related matters
 * including DocValue substitutions.
 */
public class SolrDocumentFetcher {

    private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

    private final SolrIndexSearcher searcher;

    private final boolean enableLazyFieldLoading;

    private final SolrCache<Integer, Document> documentCache;

    /** Contains the names/patterns of all docValues=true,stored=false fields in the schema. */
    private final Set<String> allNonStoredDVs;

    /** Contains the names/patterns of all docValues=true,stored=false,useDocValuesAsStored=true fields in the schema. */
    private final Set<String> nonStoredDVsUsedAsStored;

    /** Contains the names/patterns of all docValues=true,stored=false fields, excluding those that are copyField targets in the schema. */
    private final Set<String> nonStoredDVsWithoutCopyTargets;

    private static int largeValueLengthCacheThreshold = Integer.getInteger("solr.largeField.cacheThreshold",
            512 * 1024); // internal setting

    private final Set<String> largeFields;

    private Collection<String> storedHighlightFieldNames; // lazy populated; use getter

    SolrDocumentFetcher(SolrIndexSearcher searcher, SolrConfig solrConfig, boolean cachingEnabled) {
        this.searcher = searcher;
        this.enableLazyFieldLoading = solrConfig.enableLazyFieldLoading;
        if (cachingEnabled) {
            documentCache = solrConfig.documentCacheConfig == null ? null
                    : solrConfig.documentCacheConfig.newInstance();
        } else {
            documentCache = null;
        }

        final Set<String> nonStoredDVsUsedAsStored = new HashSet<>();
        final Set<String> allNonStoredDVs = new HashSet<>();
        final Set<String> nonStoredDVsWithoutCopyTargets = new HashSet<>();
        final Set<String> storedLargeFields = new HashSet<>();

        for (FieldInfo fieldInfo : searcher.getFieldInfos()) { // can find materialized dynamic fields, unlike using the Solr IndexSchema.
            final SchemaField schemaField = searcher.getSchema().getFieldOrNull(fieldInfo.name);
            if (schemaField == null) {
                continue;
            }
            if (!schemaField.stored() && schemaField.hasDocValues()) {
                if (schemaField.useDocValuesAsStored()) {
                    nonStoredDVsUsedAsStored.add(fieldInfo.name);
                }
                allNonStoredDVs.add(fieldInfo.name);
                if (!searcher.getSchema().isCopyFieldTarget(schemaField)) {
                    nonStoredDVsWithoutCopyTargets.add(fieldInfo.name);
                }
            }
            if (schemaField.stored() && schemaField.isLarge()) {
                storedLargeFields.add(schemaField.getName());
            }
        }

        this.nonStoredDVsUsedAsStored = Collections.unmodifiableSet(nonStoredDVsUsedAsStored);
        this.allNonStoredDVs = Collections.unmodifiableSet(allNonStoredDVs);
        this.nonStoredDVsWithoutCopyTargets = Collections.unmodifiableSet(nonStoredDVsWithoutCopyTargets);
        this.largeFields = Collections.unmodifiableSet(storedLargeFields);
    }

    public boolean isLazyFieldLoadingEnabled() {
        return enableLazyFieldLoading;
    }

    public SolrCache<Integer, Document> getDocumentCache() {
        return documentCache;
    }

    /**
     * Returns a collection of the names of all stored fields which can be highlighted the index reader knows about.
     */
    public Collection<String> getStoredHighlightFieldNames() {
        synchronized (this) {
            if (storedHighlightFieldNames == null) {
                storedHighlightFieldNames = new LinkedList<>();
                for (FieldInfo fieldInfo : searcher.getFieldInfos()) {
                    final String fieldName = fieldInfo.name;
                    try {
                        SchemaField field = searcher.getSchema().getField(fieldName);
                        if (field.stored() && ((field.getType() instanceof org.apache.solr.schema.TextField)
                                || (field.getType() instanceof org.apache.solr.schema.StrField))) {
                            storedHighlightFieldNames.add(fieldName);
                        }
                    } catch (RuntimeException e) { // getField() throws a SolrException, but it arrives as a RuntimeException
                        log.warn("Field [{}] found in index, but not defined in schema.", fieldName);
                    }
                }
            }
            return storedHighlightFieldNames;
        }
    }

    /** @see SolrIndexSearcher#doc(int) */
    public Document doc(int docId) throws IOException {
        return doc(docId, (Set<String>) null);
    }

    /**
     * Retrieve the {@link Document} instance corresponding to the document id.
     * <p>
     * <b>NOTE</b>: the document will have all fields accessible, but if a field filter is provided, only the provided
     * fields will be loaded (the remainder will be available lazily).
     *
     * @see SolrIndexSearcher#doc(int, Set)
     */
    public Document doc(int i, Set<String> fields) throws IOException {
        Document d;
        if (documentCache != null) {
            d = documentCache.get(i);
            if (d != null)
                return d;
        }

        final DirectoryReader reader = searcher.getIndexReader();
        if (documentCache != null && !enableLazyFieldLoading) {
            // we do not filter the fields in this case because that would return an incomplete document which would
            // be eventually cached. The alternative would be to read the stored fields twice; once with the fields
            // and then without for caching leading to a performance hit
            // see SOLR-8858 for related discussion
            fields = null;
        }
        final SolrDocumentStoredFieldVisitor visitor = new SolrDocumentStoredFieldVisitor(fields, reader, i);
        reader.document(i, visitor);
        d = visitor.getDocument();

        if (documentCache != null) {
            documentCache.put(i, d);
        }

        return d;
    }

    /** {@link StoredFieldVisitor} which loads the specified fields eagerly (or all if null).
     * If {@link #enableLazyFieldLoading} then the rest get special lazy field entries.  Designated "large"
     * fields will always get a special field entry. */
    private class SolrDocumentStoredFieldVisitor extends DocumentStoredFieldVisitor {
        private final Document doc;
        private final LazyDocument lazyFieldProducer; // arguably a better name than LazyDocument; at least how we use it here
        private final int docId;
        private final boolean addLargeFieldsLazily;

        SolrDocumentStoredFieldVisitor(Set<String> toLoad, IndexReader reader, int docId) {
            super(toLoad);
            this.docId = docId;
            this.doc = getDocument();
            this.lazyFieldProducer = toLoad != null && enableLazyFieldLoading ? new LazyDocument(reader, docId)
                    : null;
            this.addLargeFieldsLazily = (documentCache != null && !largeFields.isEmpty());
            //TODO can we return Status.STOP after a val is loaded and we know there are no other fields of interest?
            //    When: toLoad is one single-valued field, no lazyFieldProducer
        }

        @Override
        public Status needsField(FieldInfo fieldInfo) throws IOException {
            Status status = super.needsField(fieldInfo);
            assert status != Status.STOP : "Status.STOP not supported or expected";
            if (addLargeFieldsLazily && largeFields.contains(fieldInfo.name)) { // load "large" fields using this lazy mechanism
                if (lazyFieldProducer != null || status == Status.YES) {
                    doc.add(new LargeLazyField(fieldInfo.name, docId));
                }
                return Status.NO;
            }
            if (status == Status.NO && lazyFieldProducer != null) { // lazy
                doc.add(lazyFieldProducer.getField(fieldInfo));
            }
            return status;
        }
    }

    /** @see SolrIndexSearcher#doc(int, StoredFieldVisitor) */
    public void doc(int docId, StoredFieldVisitor visitor) throws IOException {
        if (documentCache != null) {
            Document cached = documentCache.get(docId);
            if (cached != null) {
                visitFromCached(cached, visitor);
                return;
            }
        }
        searcher.getIndexReader().document(docId, visitor);
    }

    /** Executes a stored field visitor against a hit from the document cache */
    private void visitFromCached(Document document, StoredFieldVisitor visitor) throws IOException {
        for (IndexableField f : document) {
            final FieldInfo info = searcher.getFieldInfos().fieldInfo(f.name());
            final StoredFieldVisitor.Status needsField = visitor.needsField(info);
            if (needsField == StoredFieldVisitor.Status.STOP)
                return;
            if (needsField == StoredFieldVisitor.Status.NO)
                continue;
            BytesRef binaryValue = f.binaryValue();
            if (binaryValue != null) {
                visitor.binaryField(info, toByteArrayUnwrapIfPossible(binaryValue));
                continue;
            }
            Number numericValue = f.numericValue();
            if (numericValue != null) {
                if (numericValue instanceof Double) {
                    visitor.doubleField(info, numericValue.doubleValue());
                } else if (numericValue instanceof Integer) {
                    visitor.intField(info, numericValue.intValue());
                } else if (numericValue instanceof Float) {
                    visitor.floatField(info, numericValue.floatValue());
                } else if (numericValue instanceof Long) {
                    visitor.longField(info, numericValue.longValue());
                } else {
                    throw new AssertionError();
                }
                continue;
            }
            // must be String
            if (f instanceof LargeLazyField) { // optimization to avoid premature string conversion
                visitor.stringField(info, toByteArrayUnwrapIfPossible(((LargeLazyField) f).readBytes()));
            } else {
                visitor.stringField(info, f.stringValue().getBytes(StandardCharsets.UTF_8));
            }
        }
    }

    private byte[] toByteArrayUnwrapIfPossible(BytesRef bytesRef) {
        if (bytesRef.offset == 0 && bytesRef.bytes.length == bytesRef.length) {
            return bytesRef.bytes;
        } else {
            return Arrays.copyOfRange(bytesRef.bytes, bytesRef.offset, bytesRef.offset + bytesRef.length);
        }
    }

    /** Unlike LazyDocument.LazyField, we (a) don't cache large values, and (b) provide access to the byte[]. */
    class LargeLazyField implements IndexableField {

        final String name;
        final int docId;
        // synchronize on 'this' to access:
        BytesRef cachedBytes; // we only conditionally populate this if it's big enough

        private LargeLazyField(String name, int docId) {
            this.name = name;
            this.docId = docId;
        }

        @Override
        public String toString() {
            return fieldType().toString() + "<" + name() + ">"; // mimic Field.java
        }

        @Override
        public String name() {
            return name;
        }

        @Override
        public IndexableFieldType fieldType() {
            return searcher.getSchema().getField(name());
        }

        @Override
        public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
            return analyzer.tokenStream(name(), stringValue()); // or we could throw unsupported exception?
        }

        /** (for tests) */
        synchronized boolean hasBeenLoaded() {
            return cachedBytes != null;
        }

        @Override
        public synchronized String stringValue() {
            try {
                return readBytes().utf8ToString();
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }

        synchronized BytesRef readBytes() throws IOException {
            if (cachedBytes != null) {
                return cachedBytes;
            } else {
                BytesRef bytesRef = new BytesRef();
                searcher.getIndexReader().document(docId, new StoredFieldVisitor() {
                    boolean done = false;

                    @Override
                    public Status needsField(FieldInfo fieldInfo) throws IOException {
                        if (done) {
                            return Status.STOP;
                        }
                        return fieldInfo.name.equals(name()) ? Status.YES : Status.NO;
                    }

                    @Override
                    public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
                        bytesRef.bytes = value;
                        bytesRef.length = value.length;
                        done = true;
                    }

                    @Override
                    public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
                        throw new UnsupportedOperationException("'large' binary fields are not (yet) supported");
                    }
                });
                if (bytesRef.length < largeValueLengthCacheThreshold) {
                    return cachedBytes = bytesRef;
                } else {
                    return bytesRef;
                }
            }
        }

        @Override
        public BytesRef binaryValue() {
            return null;
        }

        @Override
        public Reader readerValue() {
            return null;
        }

        @Override
        public Number numericValue() {
            return null;
        }
    }

    /**
     * This will fetch and add the docValues fields to a given SolrDocument/SolrInputDocument
     *
     * @param doc
     *          A SolrDocument or SolrInputDocument instance where docValues will be added
     * @param docid
     *          The lucene docid of the document to be populated
     * @param fields
     *          The list of docValues fields to be decorated
     */
    public void decorateDocValueFields(@SuppressWarnings("rawtypes") SolrDocumentBase doc, int docid,
            Set<String> fields) throws IOException {
        final List<LeafReaderContext> leafContexts = searcher.getLeafContexts();
        final int subIndex = ReaderUtil.subIndex(docid, leafContexts);
        final int localId = docid - leafContexts.get(subIndex).docBase;
        final LeafReader leafReader = leafContexts.get(subIndex).reader();
        for (String fieldName : fields) {
            final SchemaField schemaField = searcher.getSchema().getFieldOrNull(fieldName);
            if (schemaField == null || !schemaField.hasDocValues() || doc.containsKey(fieldName)) {
                log.warn("Couldn't decorate docValues for field: [{}], schemaField: [{}]", fieldName, schemaField);
                continue;
            }
            FieldInfo fi = searcher.getFieldInfos().fieldInfo(fieldName);
            if (fi == null) {
                continue; // Searcher doesn't have info about this field, hence ignore it.
            }
            final DocValuesType dvType = fi.getDocValuesType();
            switch (dvType) {
            case NUMERIC:
                final NumericDocValues ndv = leafReader.getNumericDocValues(fieldName);
                if (ndv == null) {
                    continue;
                }
                Long val;
                if (ndv.advanceExact(localId)) {
                    val = ndv.longValue();
                } else {
                    continue;
                }
                Object newVal = val;
                if (schemaField.getType().isPointField()) {
                    // TODO: Maybe merge PointField with TrieFields here
                    NumberType type = schemaField.getType().getNumberType();
                    switch (type) {
                    case INTEGER:
                        newVal = val.intValue();
                        break;
                    case LONG:
                        newVal = val.longValue();
                        break;
                    case FLOAT:
                        newVal = Float.intBitsToFloat(val.intValue());
                        break;
                    case DOUBLE:
                        newVal = Double.longBitsToDouble(val);
                        break;
                    case DATE:
                        newVal = new Date(val);
                        break;
                    default:
                        throw new AssertionError("Unexpected PointType: " + type);
                    }
                } else {
                    if (schemaField.getType() instanceof TrieIntField) {
                        newVal = val.intValue();
                    } else if (schemaField.getType() instanceof TrieFloatField) {
                        newVal = Float.intBitsToFloat(val.intValue());
                    } else if (schemaField.getType() instanceof TrieDoubleField) {
                        newVal = Double.longBitsToDouble(val);
                    } else if (schemaField.getType() instanceof TrieDateField) {
                        newVal = new Date(val);
                    } else if (schemaField.getType() instanceof EnumField) {
                        newVal = ((EnumField) schemaField.getType()).intValueToStringValue(val.intValue());
                    }
                }
                doc.addField(fieldName, newVal);
                break;
            case BINARY:
                BinaryDocValues bdv = leafReader.getBinaryDocValues(fieldName);
                if (bdv == null) {
                    continue;
                }
                BytesRef value;
                if (bdv.advanceExact(localId)) {
                    value = BytesRef.deepCopyOf(bdv.binaryValue());
                } else {
                    continue;
                }
                doc.addField(fieldName, value);
                break;
            case SORTED:
                SortedDocValues sdv = leafReader.getSortedDocValues(fieldName);
                if (sdv == null) {
                    continue;
                }
                if (sdv.advanceExact(localId)) {
                    final BytesRef bRef = sdv.binaryValue();
                    // Special handling for Boolean fields since they're stored as 'T' and 'F'.
                    if (schemaField.getType() instanceof BoolField) {
                        doc.addField(fieldName, schemaField.getType().toObject(schemaField, bRef));
                    } else {
                        doc.addField(fieldName, bRef.utf8ToString());
                    }
                }
                break;
            case SORTED_NUMERIC:
                final SortedNumericDocValues numericDv = leafReader.getSortedNumericDocValues(fieldName);
                NumberType type = schemaField.getType().getNumberType();
                if (numericDv != null) {
                    if (numericDv.advance(localId) == localId) {
                        final List<Object> outValues = new ArrayList<Object>(numericDv.docValueCount());
                        for (int i = 0; i < numericDv.docValueCount(); i++) {
                            long number = numericDv.nextValue();
                            switch (type) {
                            case INTEGER:
                                outValues.add((int) number);
                                break;
                            case LONG:
                                outValues.add(number);
                                break;
                            case FLOAT:
                                outValues.add(NumericUtils.sortableIntToFloat((int) number));
                                break;
                            case DOUBLE:
                                outValues.add(NumericUtils.sortableLongToDouble(number));
                                break;
                            case DATE:
                                outValues.add(new Date(number));
                                break;
                            default:
                                throw new AssertionError("Unexpected PointType: " + type);
                            }
                        }
                        assert outValues.size() > 0;
                        doc.addField(fieldName, outValues);
                    }
                }
            case SORTED_SET:
                final SortedSetDocValues values = leafReader.getSortedSetDocValues(fieldName);
                if (values != null && values.getValueCount() > 0) {
                    if (values.advance(localId) == localId) {
                        final List<Object> outValues = new LinkedList<>();
                        for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values
                                .nextOrd()) {
                            value = values.lookupOrd(ord);
                            outValues.add(schemaField.getType().toObject(schemaField, value));
                        }
                        assert outValues.size() > 0;
                        doc.addField(fieldName, outValues);
                    }
                }
            case NONE:
                break;
            }
        }
    }

    /**
     * Returns an unmodifiable set of non-stored docValues field names.
     *
     * @param onlyUseDocValuesAsStored
     *          If false, returns all non-stored docValues. If true, returns only those non-stored docValues which have
     *          the {@link SchemaField#useDocValuesAsStored()} flag true.
     */
    public Set<String> getNonStoredDVs(boolean onlyUseDocValuesAsStored) {
        return onlyUseDocValuesAsStored ? nonStoredDVsUsedAsStored : allNonStoredDVs;
    }

    /**
     * Returns an unmodifiable set of names of non-stored docValues fields, except those that are targets of a copy field.
     */
    public Set<String> getNonStoredDVsWithoutCopyTargets() {
        return nonStoredDVsWithoutCopyTargets;
    }

}