org.apache.solr.request.DocValuesStats.java Source code

Introduction

Here is the source code for org.apache.solr.request.DocValuesStats.java
Source

package org.apache.solr.request;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;
import java.util.List;
import java.util.Map;

import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.MultiDocValues.MultiSortedDocValues;
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongValues;
import org.apache.solr.common.SolrException;
import org.apache.solr.handler.component.FieldFacetStats;
import org.apache.solr.handler.component.StatsValues;
import org.apache.solr.handler.component.StatsValuesFactory;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.SolrIndexSearcher;

/**
 * Computes term stats for docvalues field (single or multivalued).
 * <p>
 * Instead of working on a top-level reader view (binary-search per docid),
 * it collects per-segment, but maps ordinals to global ordinal space using
 * MultiDocValues' OrdinalMap.
 */
public class DocValuesStats {
    private DocValuesStats() {
    }

    public static StatsValues getCounts(SolrIndexSearcher searcher, String fieldName, DocSet docs,
            boolean calcDistinct, String[] facet) throws IOException {
        SchemaField schemaField = searcher.getSchema().getField(fieldName);
        FieldType ft = schemaField.getType();
        StatsValues res = StatsValuesFactory.createStatsValues(schemaField, calcDistinct);

        //Initialize facetstats, if facets have been passed in
        final FieldFacetStats[] facetStats = new FieldFacetStats[facet.length];
        int upto = 0;

        for (String facetField : facet) {
            SchemaField fsf = searcher.getSchema().getField(facetField);
            if (fsf.multiValued()) {
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                        "Stats can only facet on single-valued fields, not: " + facetField);
            }

            SchemaField facetSchemaField = searcher.getSchema().getField(facetField);
            facetStats[upto++] = new FieldFacetStats(searcher, facetField, schemaField, facetSchemaField,
                    calcDistinct);
        }
        // TODO: remove multiValuedFieldCache(), check dv type / uninversion type?
        final boolean multiValued = schemaField.multiValued() || ft.multiValuedFieldCache();

        SortedSetDocValues si; // for term lookups only
        OrdinalMap ordinalMap = null; // for mapping per-segment ords to global ones
        if (multiValued) {
            si = searcher.getAtomicReader().getSortedSetDocValues(fieldName);

            if (si instanceof MultiSortedSetDocValues) {
                ordinalMap = ((MultiSortedSetDocValues) si).mapping;
            }
        } else {
            SortedDocValues single = searcher.getAtomicReader().getSortedDocValues(fieldName);
            si = single == null ? null : DocValues.singleton(single);
            if (single instanceof MultiSortedDocValues) {
                ordinalMap = ((MultiSortedDocValues) single).mapping;
            }
        }
        if (si == null) {
            si = DocValues.emptySortedSet();
        }
        if (si.getValueCount() >= Integer.MAX_VALUE) {
            throw new UnsupportedOperationException(
                    "Currently this stats method is limited to " + Integer.MAX_VALUE + " unique terms");
        }

        int missingDocCountTotal = 0;
        final int nTerms = (int) si.getValueCount();
        // count collection array only needs to be as big as the number of terms we are
        // going to collect counts for.
        final int[] counts = new int[nTerms];

        Filter filter = docs.getTopFilter();
        List<AtomicReaderContext> leaves = searcher.getTopReaderContext().leaves();

        for (int subIndex = 0; subIndex < leaves.size(); subIndex++) {
            AtomicReaderContext leaf = leaves.get(subIndex);
            DocIdSet dis = filter.getDocIdSet(leaf, null); // solr docsets already exclude any deleted docs
            DocIdSetIterator disi = null;

            if (dis != null) {
                disi = dis.iterator();
            }
            if (disi != null) {
                int docBase = leaf.docBase;

                if (multiValued) {
                    SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName);
                    if (sub == null) {
                        sub = DocValues.emptySortedSet();
                    }
                    final SortedDocValues singleton = DocValues.unwrapSingleton(sub);
                    if (singleton != null) {
                        // some codecs may optimize SORTED_SET storage for single-valued fields
                        missingDocCountTotal += accumSingle(counts, docBase, facetStats, singleton, disi, subIndex,
                                ordinalMap);
                    } else {
                        missingDocCountTotal += accumMulti(counts, docBase, facetStats, sub, disi, subIndex,
                                ordinalMap);
                    }
                } else {
                    SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName);
                    if (sub == null) {
                        sub = DocValues.emptySorted();
                    }
                    missingDocCountTotal += accumSingle(counts, docBase, facetStats, sub, disi, subIndex,
                            ordinalMap);
                }
            }
        }
        // add results in index order
        for (int ord = 0; ord < counts.length; ord++) {
            int count = counts[ord];

            if (count > 0) {
                final BytesRef value = si.lookupOrd(ord);
                res.accumulate(value, count);
                for (FieldFacetStats f : facetStats) {
                    f.accumulateTermNum(ord, value);
                }
            }
        }
        res.addMissing(missingDocCountTotal);

        if (facetStats.length > 0) {
            for (FieldFacetStats f : facetStats) {
                Map<String, StatsValues> facetStatsValues = f.facetStatsValues;
                f.accumulateMissing();
                res.addFacet(f.name, facetStatsValues);
            }
        }

        return res;
    }

    /** accumulates per-segment single-valued stats */
    static int accumSingle(int counts[], int docBase, FieldFacetStats[] facetStats, SortedDocValues si,
            DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
        final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex);
        int missingDocCount = 0;
        int doc;
        while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            int term = si.getOrd(doc);
            if (term >= 0) {
                if (map != null) {
                    term = (int) ordMap.get(term);
                }
                counts[term]++;
                for (FieldFacetStats f : facetStats) {
                    f.facetTermNum(docBase + doc, term);
                }
            } else {
                for (FieldFacetStats f : facetStats) {
                    f.facetMissingNum(docBase + doc);
                }

                missingDocCount++;
            }
        }
        return missingDocCount;
    }

    /** accumulates per-segment multi-valued stats */

    static int accumMulti(int counts[], int docBase, FieldFacetStats[] facetStats, SortedSetDocValues si,
            DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
        final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex);
        int missingDocCount = 0;
        int doc;
        while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            si.setDocument(doc);
            long ord;
            boolean emptyTerm = true;
            while ((ord = si.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                emptyTerm = false;
                int term = (int) ord;
                if (map != null) {
                    term = (int) ordMap.get(term);
                }
                counts[term]++;
                for (FieldFacetStats f : facetStats) {
                    f.facetTermNum(docBase + doc, term);
                }
            }
            if (emptyTerm) {
                for (FieldFacetStats f : facetStats) {
                    f.facetMissingNum(docBase + doc);
                }

                missingDocCount++;
            }
        }

        return missingDocCount;
    }
}