org.apache.solr.search.facet.UniqueSlotAcc.java Source code

Introduction

Here is the source code for org.apache.solr.search.facet.UniqueSlotAcc.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.search.facet;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.solr.util.hll.HLL;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.common.util.Hash;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.SchemaField;

abstract class UniqueSlotAcc extends SlotAcc {
    HLLAgg.HLLFactory factory;
    SchemaField field;
    FixedBitSet[] arr;
    int currentDocBase;
    int[] counts; // populated with the cardinality once
    int nTerms;

    public UniqueSlotAcc(FacetContext fcontext, String field, int numSlots, HLLAgg.HLLFactory factory)
            throws IOException {
        super(fcontext);
        this.factory = factory;
        arr = new FixedBitSet[numSlots];
        this.field = fcontext.searcher.getSchema().getField(field);
    }

    @Override
    public void reset() {
        counts = null;
        for (FixedBitSet bits : arr) {
            if (bits == null)
                continue;
            bits.clear(0, bits.length());
        }
    }

    @Override
    public void setNextReader(LeafReaderContext readerContext) throws IOException {
        currentDocBase = readerContext.docBase;
    }

    @Override
    public Object getValue(int slot) throws IOException {
        if (fcontext.isShard()) {
            return getShardValue(slot);
        }
        if (counts != null) { // will only be pre-populated if this was used for sorting.
            return counts[slot];
        }

        FixedBitSet bs = arr[slot];
        return bs == null ? 0 : bs.cardinality();
    }

    private Object getShardHLL(int slot) throws IOException {
        FixedBitSet ords = arr[slot];
        if (ords == null)
            return HLLAgg.NO_VALUES;

        HLL hll = factory.getHLL();
        long maxOrd = ords.length();
        Hash.LongPair hashResult = new Hash.LongPair();
        for (int ord = -1; ++ord < maxOrd;) {
            ord = ords.nextSetBit(ord);
            if (ord == DocIdSetIterator.NO_MORE_DOCS)
                break;
            BytesRef val = lookupOrd(ord);
            // way to avoid recomputing hash across slots?  Prob not worth space
            Hash.murmurhash3_x64_128(val.bytes, val.offset, val.length, 0, hashResult);
            // idea: if the set is small enough, just send the hashes?  We can add at the top
            // level or even just do a hash table at the top level.
            hll.addRaw(hashResult.val1);
        }

        SimpleOrderedMap map = new SimpleOrderedMap();
        map.add("hll", hll.toBytes());
        return map;
    }

    private Object getShardValue(int slot) throws IOException {
        if (factory != null)
            return getShardHLL(slot);
        FixedBitSet ords = arr[slot];
        int unique;
        if (counts != null) {
            unique = counts[slot];
        } else {
            unique = ords == null ? 0 : ords.cardinality();
        }

        SimpleOrderedMap map = new SimpleOrderedMap();
        map.add("unique", unique);
        map.add("nTerms", nTerms);

        int maxExplicit = 100;
        // TODO: make configurable
        // TODO: share values across buckets
        if (unique > 0) {

            List lst = new ArrayList(Math.min(unique, maxExplicit));

            long maxOrd = ords.length();
            if (ords != null && ords.length() > 0) {
                for (int ord = 0; lst.size() < maxExplicit;) {
                    ord = ords.nextSetBit(ord);
                    if (ord == DocIdSetIterator.NO_MORE_DOCS)
                        break;
                    BytesRef val = lookupOrd(ord);
                    Object o = field.getType().toObject(field, val);
                    lst.add(o);
                    if (++ord >= maxOrd)
                        break;
                }
            }

            map.add("vals", lst);
        }

        return map;
    }

    protected abstract BytesRef lookupOrd(int ord) throws IOException;

    // we only calculate all the counts when sorting by count
    public void calcCounts() {
        counts = new int[arr.length];
        for (int i = 0; i < arr.length; i++) {
            FixedBitSet bs = arr[i];
            counts[i] = bs == null ? 0 : bs.cardinality();
        }
    }

    @Override
    public int compare(int slotA, int slotB) {
        if (counts == null) { // TODO: a more efficient way to do this?  prepareSort?
            calcCounts();
        }
        return counts[slotA] - counts[slotB];
    }

    @Override
    public void resize(Resizer resizer) {
        arr = resizer.resize(arr, null);
    }
}