Example usage for org.apache.lucene.index SingletonSortedSetDocValues SingletonSortedSetDocValues

List of usage examples for org.apache.lucene.index SingletonSortedSetDocValues SingletonSortedSetDocValues

Introduction

In this page you can find the example usage for org.apache.lucene.index SingletonSortedSetDocValues SingletonSortedSetDocValues.

Prototype

public SingletonSortedSetDocValues(SortedDocValues in) 

Source Link

Document

Creates a multi-valued view over the provided SortedDocValues

Usage

From source file:org.apache.solr.request.DocValuesFacets.java

License:Apache License

public static NamedList<Integer> getCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName,
        int offset, int limit, int mincount, boolean missing, String sort, String prefix) throws IOException {
    SchemaField schemaField = searcher.getSchema().getField(fieldName);
    FieldType ft = schemaField.getType();
    NamedList<Integer> res = new NamedList<Integer>();

    final SortedSetDocValues si; // for term lookups only
    OrdinalMap ordinalMap = null; // for mapping per-segment ords to global ones
    if (schemaField.multiValued()) {
        si = searcher.getAtomicReader().getSortedSetDocValues(fieldName);
        if (si instanceof MultiSortedSetDocValues) {
            ordinalMap = ((MultiSortedSetDocValues) si).mapping;
        }/*from ww w  .  j av a 2 s  .c o m*/
    } else {
        SortedDocValues single = searcher.getAtomicReader().getSortedDocValues(fieldName);
        si = single == null ? null : new SingletonSortedSetDocValues(single);
        if (single instanceof MultiSortedDocValues) {
            ordinalMap = ((MultiSortedDocValues) single).mapping;
        }
    }
    if (si == null) {
        return finalize(res, searcher, schemaField, docs, -1, missing);
    }
    if (si.getValueCount() >= Integer.MAX_VALUE) {
        throw new UnsupportedOperationException(
                "Currently this faceting method is limited to " + Integer.MAX_VALUE + " unique terms");
    }

    final BytesRef br = new BytesRef();

    final BytesRef prefixRef;
    if (prefix == null) {
        prefixRef = null;
    } else if (prefix.length() == 0) {
        prefix = null;
        prefixRef = null;
    } else {
        prefixRef = new BytesRef(prefix);
    }

    int startTermIndex, endTermIndex;
    if (prefix != null) {
        startTermIndex = (int) si.lookupTerm(prefixRef);
        if (startTermIndex < 0)
            startTermIndex = -startTermIndex - 1;
        prefixRef.append(UnicodeUtil.BIG_TERM);
        endTermIndex = (int) si.lookupTerm(prefixRef);
        assert endTermIndex < 0;
        endTermIndex = -endTermIndex - 1;
    } else {
        startTermIndex = -1;
        endTermIndex = (int) si.getValueCount();
    }

    final int nTerms = endTermIndex - startTermIndex;
    int missingCount = -1;
    final CharsRef charsRef = new CharsRef(10);
    if (nTerms > 0 && docs.size() >= mincount) {

        // count collection array only needs to be as big as the number of terms we are
        // going to collect counts for.
        final int[] counts = new int[nTerms];

        Filter filter = docs.getTopFilter();
        List<AtomicReaderContext> leaves = searcher.getTopReaderContext().leaves();
        for (int subIndex = 0; subIndex < leaves.size(); subIndex++) {
            AtomicReaderContext leaf = leaves.get(subIndex);
            DocIdSet dis = filter.getDocIdSet(leaf, null); // solr docsets already exclude any deleted docs
            DocIdSetIterator disi = null;
            if (dis != null) {
                disi = dis.iterator();
            }
            if (disi != null) {
                if (schemaField.multiValued()) {
                    SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName);
                    if (sub == null) {
                        sub = SortedSetDocValues.EMPTY;
                    }
                    if (sub instanceof SingletonSortedSetDocValues) {
                        // some codecs may optimize SORTED_SET storage for single-valued fields
                        final SortedDocValues values = ((SingletonSortedSetDocValues) sub).getSortedDocValues();
                        accumSingle(counts, startTermIndex, values, disi, subIndex, ordinalMap);
                    } else {
                        accumMulti(counts, startTermIndex, sub, disi, subIndex, ordinalMap);
                    }
                } else {
                    SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName);
                    if (sub == null) {
                        sub = SortedDocValues.EMPTY;
                    }
                    accumSingle(counts, startTermIndex, sub, disi, subIndex, ordinalMap);
                }
            }
        }

        if (startTermIndex == -1) {
            missingCount = counts[0];
        }

        // IDEA: we could also maintain a count of "other"... everything that fell outside
        // of the top 'N'

        int off = offset;
        int lim = limit >= 0 ? limit : Integer.MAX_VALUE;

        if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
            int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1;
            maxsize = Math.min(maxsize, nTerms);
            LongPriorityQueue queue = new LongPriorityQueue(Math.min(maxsize, 1000), maxsize, Long.MIN_VALUE);

            int min = mincount - 1; // the smallest value in the top 'N' values
            for (int i = (startTermIndex == -1) ? 1 : 0; i < nTerms; i++) {
                int c = counts[i];
                if (c > min) {
                    // NOTE: we use c>min rather than c>=min as an optimization because we are going in
                    // index order, so we already know that the keys are ordered.  This can be very
                    // important if a lot of the counts are repeated (like zero counts would be).

                    // smaller term numbers sort higher, so subtract the term number instead
                    long pair = (((long) c) << 32) + (Integer.MAX_VALUE - i);
                    boolean displaced = queue.insert(pair);
                    if (displaced)
                        min = (int) (queue.top() >>> 32);
                }
            }

            // if we are deep paging, we don't have to order the highest "offset" counts.
            int collectCount = Math.max(0, queue.size() - off);
            assert collectCount <= lim;

            // the start and end indexes of our list "sorted" (starting with the highest value)
            int sortedIdxStart = queue.size() - (collectCount - 1);
            int sortedIdxEnd = queue.size() + 1;
            final long[] sorted = queue.sort(collectCount);

            for (int i = sortedIdxStart; i < sortedIdxEnd; i++) {
                long pair = sorted[i];
                int c = (int) (pair >>> 32);
                int tnum = Integer.MAX_VALUE - (int) pair;
                si.lookupOrd(startTermIndex + tnum, br);
                ft.indexedToReadable(br, charsRef);
                res.add(charsRef.toString(), c);
            }

        } else {
            // add results in index order
            int i = (startTermIndex == -1) ? 1 : 0;
            if (mincount <= 0) {
                // if mincount<=0, then we won't discard any terms and we know exactly
                // where to start.
                i += off;
                off = 0;
            }

            for (; i < nTerms; i++) {
                int c = counts[i];
                if (c < mincount || --off >= 0)
                    continue;
                if (--lim < 0)
                    break;
                si.lookupOrd(startTermIndex + i, br);
                ft.indexedToReadable(br, charsRef);
                res.add(charsRef.toString(), c);
            }
        }
    }

    return finalize(res, searcher, schemaField, docs, missingCount, missing);
}