Example usage for org.apache.lucene.util PriorityQueue PriorityQueue

List of usage examples for org.apache.lucene.util PriorityQueue PriorityQueue

Introduction

In this page you can find the example usage for org.apache.lucene.util PriorityQueue PriorityQueue.

Prototype

public PriorityQueue(int maxSize) 

Source Link

Document

Create an empty priority queue of the configured size.

Usage

From source file:io.ssc.relationdiscovery.KMeans.java

License:Open Source License

public void printClosestPoints(int centroidIndex, int howMany, OpenIntObjectHashMap<String> patterns) {

    PriorityQueue<PatternWithDistance> queue = new PriorityQueue<PatternWithDistance>(howMany) {
        @Override/*  ww w  .  j  ava  2 s .  c om*/
        protected boolean lessThan(PatternWithDistance a, PatternWithDistance b) {
            return a.distance < b.distance;
        }
    };

    Vector centroid = centroids[centroidIndex];

    for (MatrixSlice rowSlice : A) {
        Vector row = rowSlice.vector();
        double distance = distanceMeasure.distance(centroid, row);
        queue.insertWithOverflow(new PatternWithDistance(distance, patterns.get(rowSlice.index())));
    }

    while (queue.size() > 0) {
        System.out.println("\t" + queue.pop());
    }

}

From source file:org.apache.mahout.cf.taste.hadoop.item.UserVectorSplitterMapper.java

License:Apache License

private float findSmallestLargeValue(Vector userVector) {

    PriorityQueue<Float> topPrefValues = new PriorityQueue<Float>(maxPrefsPerUserConsidered) {
        @Override/*from  w  w  w .j a  v  a  2s . c om*/
        protected boolean lessThan(Float f1, Float f2) {
            return f1 < f2;
        }
    };

    for (Element e : userVector.nonZeroes()) {
        float absValue = Math.abs((float) e.get());
        topPrefValues.insertWithOverflow(absValue);
    }
    return topPrefValues.top();
}

From source file:org.apache.mahout.math.neighborhood.Searcher.java

License:Apache License

/**
 * Returns a bounded size priority queue, in reverse order that keeps track of the best nearest neighbor vectors.
 * @param limit maximum size of the heap.
 * @return the priority queue.//w  w w  .ja v  a  2 s  .  co  m
 */
public static PriorityQueue<WeightedThing<Vector>> getCandidateQueue(int limit) {
    return new PriorityQueue<WeightedThing<Vector>>(limit) {
        @Override
        protected boolean lessThan(WeightedThing<Vector> a, WeightedThing<Vector> b) {
            return a.getWeight() > b.getWeight();
        }
    };
}

From source file:org.apache.solr.cloud.SizeLimitedDistributedMap.java

License:Apache License

@Override
public void put(String trackingId, byte[] data) throws KeeperException, InterruptedException {
    if (this.size() >= maxSize) {
        // Bring down the size
        List<String> children = zookeeper.getChildren(dir, null, true);

        int cleanupSize = maxSize / 10;

        final PriorityQueue priorityQueue = new PriorityQueue<Long>(cleanupSize) {
            @Override//from w  ww.ja  v a  2  s  . c  om
            protected boolean lessThan(Long a, Long b) {
                return (a > b);
            }
        };

        for (String child : children) {
            Stat stat = zookeeper.exists(dir + "/" + child, null, true);
            priorityQueue.insertWithOverflow(stat.getMzxid());
        }

        long topElementMzxId = (Long) priorityQueue.top();

        for (String child : children) {
            Stat stat = zookeeper.exists(dir + "/" + child, null, true);
            if (stat.getMzxid() <= topElementMzxId)
                zookeeper.delete(dir + "/" + child, -1, true);
        }
    }

    super.put(trackingId, data);
}

From source file:org.apache.solr.request.NumericFacets.java

License:Apache License

public static NamedList<Integer> getCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName,
        int offset, int limit, int mincount, boolean missing, String sort) throws IOException {
    final boolean zeros = mincount <= 0;
    mincount = Math.max(mincount, 1);
    final SchemaField sf = searcher.getSchema().getField(fieldName);
    final FieldType ft = sf.getType();
    final NumericType numericType = ft.getNumericType();
    if (numericType == null) {
        throw new IllegalStateException();
    }/*from  www  . j a v  a 2s  .  c o m*/
    final List<AtomicReaderContext> leaves = searcher.getIndexReader().leaves();

    // 1. accumulate
    final HashTable hashTable = new HashTable();
    final Iterator<AtomicReaderContext> ctxIt = leaves.iterator();
    AtomicReaderContext ctx = null;
    FieldCache.Longs longs = null;
    Bits docsWithField = null;
    int missingCount = 0;
    for (DocIterator docsIt = docs.iterator(); docsIt.hasNext();) {
        final int doc = docsIt.nextDoc();
        if (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc()) {
            do {
                ctx = ctxIt.next();
            } while (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc());
            assert doc >= ctx.docBase;
            switch (numericType) {
            case LONG:
                longs = FieldCache.DEFAULT.getLongs(ctx.reader(), fieldName, true);
                break;
            case INT:
                final FieldCache.Ints ints = FieldCache.DEFAULT.getInts(ctx.reader(), fieldName, true);
                longs = new FieldCache.Longs() {
                    @Override
                    public long get(int docID) {
                        return ints.get(docID);
                    }
                };
                break;
            case FLOAT:
                final FieldCache.Floats floats = FieldCache.DEFAULT.getFloats(ctx.reader(), fieldName, true);
                longs = new FieldCache.Longs() {
                    @Override
                    public long get(int docID) {
                        return NumericUtils.floatToSortableInt(floats.get(docID));
                    }
                };
                break;
            case DOUBLE:
                final FieldCache.Doubles doubles = FieldCache.DEFAULT.getDoubles(ctx.reader(), fieldName, true);
                longs = new FieldCache.Longs() {
                    @Override
                    public long get(int docID) {
                        return NumericUtils.doubleToSortableLong(doubles.get(docID));
                    }
                };
                break;
            default:
                throw new AssertionError();
            }
            docsWithField = FieldCache.DEFAULT.getDocsWithField(ctx.reader(), fieldName);
        }
        long v = longs.get(doc - ctx.docBase);
        if (v != 0 || docsWithField.get(doc - ctx.docBase)) {
            hashTable.add(doc, v, 1);
        } else {
            ++missingCount;
        }
    }

    // 2. select top-k facet values
    final int pqSize = limit < 0 ? hashTable.size : Math.min(offset + limit, hashTable.size);
    final PriorityQueue<Entry> pq;
    if (FacetParams.FACET_SORT_COUNT.equals(sort) || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) {
        pq = new PriorityQueue<Entry>(pqSize) {
            @Override
            protected boolean lessThan(Entry a, Entry b) {
                if (a.count < b.count || (a.count == b.count && a.bits > b.bits)) {
                    return true;
                } else {
                    return false;
                }
            }
        };
    } else {
        pq = new PriorityQueue<Entry>(pqSize) {
            @Override
            protected boolean lessThan(Entry a, Entry b) {
                return a.bits > b.bits;
            }
        };
    }
    Entry e = null;
    for (int i = 0; i < hashTable.bits.length; ++i) {
        if (hashTable.counts[i] >= mincount) {
            if (e == null) {
                e = new Entry();
            }
            e.bits = hashTable.bits[i];
            e.count = hashTable.counts[i];
            e.docID = hashTable.docIDs[i];
            e = pq.insertWithOverflow(e);
        }
    }

    // 4. build the NamedList
    final ValueSource vs = ft.getValueSource(sf, null);
    final NamedList<Integer> result = new NamedList<Integer>();

    // This stuff is complicated because if facet.mincount=0, the counts needs
    // to be merged with terms from the terms dict
    if (!zeros || FacetParams.FACET_SORT_COUNT.equals(sort)
            || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) {
        // Only keep items we're interested in
        final Deque<Entry> counts = new ArrayDeque<Entry>();
        while (pq.size() > offset) {
            counts.addFirst(pq.pop());
        }

        // Entries from the PQ first, then using the terms dictionary
        for (Entry entry : counts) {
            final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
            final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
            result.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count);
        }

        if (zeros && (limit < 0 || result.size() < limit)) { // need to merge with the term dict
            if (!sf.indexed()) {
                throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on field "
                        + sf.getName() + " which is not indexed");
            }
            // Add zeros until there are limit results
            final Set<String> alreadySeen = new HashSet<String>();
            while (pq.size() > 0) {
                Entry entry = pq.pop();
                final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
                final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
                alreadySeen.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase));
            }
            for (int i = 0; i < result.size(); ++i) {
                alreadySeen.add(result.getName(i));
            }
            final Terms terms = searcher.getAtomicReader().terms(fieldName);
            if (terms != null) {
                final String prefixStr = TrieField.getMainValuePrefix(ft);
                final BytesRef prefix;
                if (prefixStr != null) {
                    prefix = new BytesRef(prefixStr);
                } else {
                    prefix = new BytesRef();
                }
                final TermsEnum termsEnum = terms.iterator(null);
                BytesRef term;
                switch (termsEnum.seekCeil(prefix)) {
                case FOUND:
                case NOT_FOUND:
                    term = termsEnum.term();
                    break;
                case END:
                    term = null;
                    break;
                default:
                    throw new AssertionError();
                }
                final CharsRef spare = new CharsRef();
                for (int skipped = hashTable.size; skipped < offset && term != null
                        && StringHelper.startsWith(term, prefix);) {
                    ft.indexedToReadable(term, spare);
                    final String termStr = spare.toString();
                    if (!alreadySeen.contains(termStr)) {
                        ++skipped;
                    }
                    term = termsEnum.next();
                }
                for (; term != null && StringHelper.startsWith(term, prefix)
                        && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
                    ft.indexedToReadable(term, spare);
                    final String termStr = spare.toString();
                    if (!alreadySeen.contains(termStr)) {
                        result.add(termStr, 0);
                    }
                }
            }
        }
    } else {
        // sort=index, mincount=0 and we have less than limit items
        // => Merge the PQ and the terms dictionary on the fly
        if (!sf.indexed()) {
            throw new IllegalStateException("Cannot use " + FacetParams.FACET_SORT + "="
                    + FacetParams.FACET_SORT_INDEX + " on a field which is not indexed");
        }
        final Map<String, Integer> counts = new HashMap<String, Integer>();
        while (pq.size() > 0) {
            final Entry entry = pq.pop();
            final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
            final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
            counts.put(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count);
        }
        final Terms terms = searcher.getAtomicReader().terms(fieldName);
        if (terms != null) {
            final String prefixStr = TrieField.getMainValuePrefix(ft);
            final BytesRef prefix;
            if (prefixStr != null) {
                prefix = new BytesRef(prefixStr);
            } else {
                prefix = new BytesRef();
            }
            final TermsEnum termsEnum = terms.iterator(null);
            BytesRef term;
            switch (termsEnum.seekCeil(prefix)) {
            case FOUND:
            case NOT_FOUND:
                term = termsEnum.term();
                break;
            case END:
                term = null;
                break;
            default:
                throw new AssertionError();
            }
            final CharsRef spare = new CharsRef();
            for (int i = 0; i < offset && term != null && StringHelper.startsWith(term, prefix); ++i) {
                term = termsEnum.next();
            }
            for (; term != null && StringHelper.startsWith(term, prefix)
                    && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
                ft.indexedToReadable(term, spare);
                final String termStr = spare.toString();
                Integer count = counts.get(termStr);
                if (count == null) {
                    count = 0;
                }
                result.add(termStr, count);
            }
        }
    }

    if (missing) {
        result.add(null, missingCount);
    }
    return result;
}

From source file:org.apache.solr.request.PerSegmentSingleValuedFaceting.java

License:Apache License

NamedList<Integer> getFacetCounts(Executor executor) throws IOException {

    CompletionService<SegFacet> completionService = new ExecutorCompletionService<SegFacet>(executor);

    // reuse the translation logic to go from top level set to per-segment set
    baseSet = docs.getTopFilter();//from w  w w.  j  ava2  s .  co m

    final List<AtomicReaderContext> leaves = searcher.getTopReaderContext().leaves();
    // The list of pending tasks that aren't immediately submitted
    // TODO: Is there a completion service, or a delegating executor that can
    // limit the number of concurrent tasks submitted to a bigger executor?
    LinkedList<Callable<SegFacet>> pending = new LinkedList<Callable<SegFacet>>();

    int threads = nThreads <= 0 ? Integer.MAX_VALUE : nThreads;

    for (final AtomicReaderContext leave : leaves) {
        final SegFacet segFacet = new SegFacet(leave);

        Callable<SegFacet> task = new Callable<SegFacet>() {
            @Override
            public SegFacet call() throws Exception {
                segFacet.countTerms();
                return segFacet;
            }
        };

        // TODO: if limiting threads, submit by largest segment first?

        if (--threads >= 0) {
            completionService.submit(task);
        } else {
            pending.add(task);
        }
    }

    // now merge the per-segment results
    PriorityQueue<SegFacet> queue = new PriorityQueue<SegFacet>(leaves.size()) {
        @Override
        protected boolean lessThan(SegFacet a, SegFacet b) {
            return a.tempBR.compareTo(b.tempBR) < 0;
        }
    };

    boolean hasMissingCount = false;
    int missingCount = 0;
    for (int i = 0, c = leaves.size(); i < c; i++) {
        SegFacet seg = null;

        try {
            Future<SegFacet> future = completionService.take();
            seg = future.get();
            if (!pending.isEmpty()) {
                completionService.submit(pending.removeFirst());
            }
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
        } catch (ExecutionException e) {
            Throwable cause = e.getCause();
            if (cause instanceof RuntimeException) {
                throw (RuntimeException) cause;
            } else {
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                        "Error in per-segment faceting on field: " + fieldName, cause);
            }
        }

        if (seg.startTermIndex < seg.endTermIndex) {
            if (seg.startTermIndex == -1) {
                hasMissingCount = true;
                missingCount += seg.counts[0];
                seg.pos = 0;
            } else {
                seg.pos = seg.startTermIndex;
            }
            if (seg.pos < seg.endTermIndex) {
                seg.tenum = seg.si.termsEnum();
                seg.tenum.seekExact(seg.pos);
                seg.tempBR = seg.tenum.term();
                queue.add(seg);
            }
        }
    }

    FacetCollector collector;
    if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
        collector = new CountSortedFacetCollector(offset, limit, mincount);
    } else {
        collector = new IndexSortedFacetCollector(offset, limit, mincount);
    }

    BytesRef val = new BytesRef();

    while (queue.size() > 0) {
        SegFacet seg = queue.top();

        // we will normally end up advancing the term enum for this segment
        // while still using "val", so we need to make a copy since the BytesRef
        // may be shared across calls.
        val.copyBytes(seg.tempBR);

        int count = 0;

        do {
            count += seg.counts[seg.pos - seg.startTermIndex];

            // TODO: OPTIMIZATION...
            // if mincount>0 then seg.pos++ can skip ahead to the next non-zero entry.
            seg.pos++;
            if (seg.pos >= seg.endTermIndex) {
                queue.pop();
                seg = queue.top();
            } else {
                seg.tempBR = seg.tenum.next();
                seg = queue.updateTop();
            }
        } while (seg != null && val.compareTo(seg.tempBR) == 0);

        boolean stop = collector.collect(val, count);
        if (stop)
            break;
    }

    NamedList<Integer> res = collector.getFacetCounts();

    // convert labels to readable form    
    FieldType ft = searcher.getSchema().getFieldType(fieldName);
    int sz = res.size();
    for (int i = 0; i < sz; i++) {
        res.setName(i, ft.indexedToReadable(res.getName(i)));
    }

    if (missing) {
        if (!hasMissingCount) {
            missingCount = SimpleFacets.getFieldMissingCount(searcher, docs, fieldName);
        }
        res.add(null, missingCount);
    }

    return res;
}

From source file:org.apache.solr.search.facet.FacetFieldProcessor.java

License:Apache License

/** Processes the collected data to finds the top slots, and composes it in the response NamedList. */
SimpleOrderedMap<Object> findTopSlots(final int numSlots, final int slotCardinality,
        IntFunction<Comparable> bucketValFromSlotNumFunc, Function<Comparable, String> fieldQueryValFunc)
        throws IOException {
    int numBuckets = 0;
    List<Object> bucketVals = null;
    if (freq.numBuckets && fcontext.isShard()) {
        bucketVals = new ArrayList<>(100);
    }//from w  ww. j  a va 2 s .c  o m

    final int off = fcontext.isShard() ? 0 : (int) freq.offset;

    long effectiveLimit = Integer.MAX_VALUE; // use max-int instead of max-long to avoid overflow
    if (freq.limit >= 0) {
        effectiveLimit = freq.limit;
        if (fcontext.isShard()) {
            // add over-request if this is a shard request
            if (freq.overrequest == -1) {
                effectiveLimit = (long) (effectiveLimit * 1.1 + 4); // default: add 10% plus 4 (to overrequest for very small limits)
            } else {
                effectiveLimit += freq.overrequest;
            }
        }
    }

    final int sortMul = freq.sortDirection.getMultiplier();

    int maxTopVals = (int) (effectiveLimit >= 0 ? Math.min(off + effectiveLimit, Integer.MAX_VALUE - 1)
            : Integer.MAX_VALUE - 1);
    maxTopVals = Math.min(maxTopVals, slotCardinality);
    final SlotAcc sortAcc = this.sortAcc, indexOrderAcc = this.indexOrderAcc;
    final BiPredicate<Slot, Slot> orderPredicate;
    if (indexOrderAcc != null && indexOrderAcc != sortAcc) {
        orderPredicate = (a, b) -> {
            int cmp = sortAcc.compare(a.slot, b.slot) * sortMul;
            return cmp == 0 ? (indexOrderAcc.compare(a.slot, b.slot) > 0) : cmp < 0;
        };
    } else {
        orderPredicate = (a, b) -> {
            int cmp = sortAcc.compare(a.slot, b.slot) * sortMul;
            return cmp == 0 ? b.slot < a.slot : cmp < 0;
        };
    }
    final PriorityQueue<Slot> queue = new PriorityQueue<Slot>(maxTopVals) {
        @Override
        protected boolean lessThan(Slot a, Slot b) {
            return orderPredicate.test(a, b);
        }
    };

    // note: We avoid object allocation by having a Slot and re-using the 'bottom'.
    Slot bottom = null;
    Slot scratchSlot = new Slot();
    for (int slotNum = 0; slotNum < numSlots; slotNum++) {
        // screen out buckets not matching mincount immediately (i.e. don't even increment numBuckets)
        if (effectiveMincount > 0 && countAcc.getCount(slotNum) < effectiveMincount) {
            continue;
        }

        numBuckets++;
        if (bucketVals != null && bucketVals.size() < 100) {
            Object val = bucketValFromSlotNumFunc.apply(slotNum);
            bucketVals.add(val);
        }

        if (bottom != null) {
            scratchSlot.slot = slotNum; // scratchSlot is only used to hold this slotNum for the following line
            if (orderPredicate.test(bottom, scratchSlot)) {
                bottom.slot = slotNum;
                bottom = queue.updateTop();
            }
        } else if (effectiveLimit > 0) {
            // queue not full
            Slot s = new Slot();
            s.slot = slotNum;
            queue.add(s);
            if (queue.size() >= maxTopVals) {
                bottom = queue.top();
            }
        }
    }

    assert queue.size() <= numBuckets;

    SimpleOrderedMap<Object> res = new SimpleOrderedMap<>();
    if (freq.numBuckets) {
        if (!fcontext.isShard()) {
            res.add("numBuckets", numBuckets);
        } else {
            SimpleOrderedMap<Object> map = new SimpleOrderedMap<>(2);
            map.add("numBuckets", numBuckets);
            map.add("vals", bucketVals);
            res.add("numBuckets", map);
        }
    }

    FacetDebugInfo fdebug = fcontext.getDebugInfo();
    if (fdebug != null)
        fdebug.putInfoItem("numBuckets", (long) numBuckets);

    if (freq.allBuckets) {
        SimpleOrderedMap<Object> allBuckets = new SimpleOrderedMap<>();
        // countAcc.setValues(allBuckets, allBucketsSlot);
        allBuckets.add("count", allBucketsAcc.getSpecialCount());
        allBucketsAcc.setValues(allBuckets, -1); // -1 slotNum is unused for SpecialSlotAcc
        // allBuckets currently doesn't execute sub-facets (because it doesn't change the domain?)
        res.add("allBuckets", allBuckets);
    }

    if (freq.missing) {
        // TODO: it would be more efficient to build up a missing DocSet if we need it here anyway.
        SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
        fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null);
        res.add("missing", missingBucket);
    }

    // if we are deep paging, we don't have to order the highest "offset" counts.
    int collectCount = Math.max(0, queue.size() - off);
    assert collectCount <= effectiveLimit;
    int[] sortedSlots = new int[collectCount];
    for (int i = collectCount - 1; i >= 0; i--) {
        sortedSlots[i] = queue.pop().slot;
    }

    ArrayList<SimpleOrderedMap> bucketList = new ArrayList<>(collectCount);
    res.add("buckets", bucketList);

    boolean needFilter = deferredAggs != null || freq.getSubFacets().size() > 0;

    for (int slotNum : sortedSlots) {
        SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
        Comparable val = bucketValFromSlotNumFunc.apply(slotNum);
        bucket.add("val", val);

        Query filter = needFilter ? sf.getType().getFieldQuery(null, sf, fieldQueryValFunc.apply(val)) : null;

        fillBucket(bucket, countAcc.getCount(slotNum), slotNum, null, filter);

        bucketList.add(bucket);
    }

    return res;
}

From source file:org.apache.solr.search.facet.FacetFieldProcessorByHashNumeric.java

License:Apache License

private SimpleOrderedMap<Object> calcFacets() throws IOException {

    final FacetRangeProcessor.Calc calc = FacetRangeProcessor.getNumericCalc(sf);

    // TODO: it would be really nice to know the number of unique values!!!!

    int possibleValues = fcontext.base.size();
    // size smaller tables so that no resize will be necessary
    int currHashSize = BitUtil.nextHighestPowerOfTwo((int) (possibleValues * (1 / LongCounts.LOAD_FACTOR) + 1));
    currHashSize = Math.min(currHashSize, MAXIMUM_STARTING_TABLE_SIZE);
    final LongCounts table = new LongCounts(currHashSize) {
        @Override//from   w w  w  .j a  v  a2  s.c om
        protected void rehash() {
            super.rehash();
            doRehash(this);
            oldToNewMapping = null; // allow for gc
        }
    };

    int numSlots = currHashSize;

    int numMissing = 0;

    if (freq.allBuckets) {
        allBucketsSlot = numSlots++;
    }

    indexOrderAcc = new SlotAcc(fcontext) {
        @Override
        public void collect(int doc, int slot) throws IOException {
        }

        @Override
        public int compare(int slotA, int slotB) {
            long s1 = calc.bitsToSortableBits(table.vals[slotA]);
            long s2 = calc.bitsToSortableBits(table.vals[slotB]);
            return Long.compare(s1, s2);
        }

        @Override
        public Object getValue(int slotNum) throws IOException {
            return null;
        }

        @Override
        public void reset() {
        }

        @Override
        public void resize(Resizer resizer) {
        }
    };

    countAcc = new CountSlotAcc(fcontext) {
        @Override
        public void incrementCount(int slot, int count) {
            throw new UnsupportedOperationException();
        }

        @Override
        public int getCount(int slot) {
            return table.counts[slot];
        }

        @Override
        public Object getValue(int slotNum) {
            return getCount(slotNum);
        }

        @Override
        public void reset() {
            throw new UnsupportedOperationException();
        }

        @Override
        public void collect(int doc, int slot) throws IOException {
            throw new UnsupportedOperationException();
        }

        @Override
        public int compare(int slotA, int slotB) {
            return Integer.compare(table.counts[slotA], table.counts[slotB]);
        }

        @Override
        public void resize(Resizer resizer) {
            throw new UnsupportedOperationException();
        }
    };

    // we set the countAcc & indexAcc first so generic ones won't be created for us.
    createCollectAcc(fcontext.base.size(), numSlots);

    if (freq.allBuckets) {
        allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0);
    }

    NumericDocValues values = null;
    Bits docsWithField = null;

    // TODO: factor this code out so it can be shared...
    final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
    final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
    LeafReaderContext ctx = null;
    int segBase = 0;
    int segMax;
    int adjustedMax = 0;
    for (DocIterator docsIt = fcontext.base.iterator(); docsIt.hasNext();) {
        final int doc = docsIt.nextDoc();
        if (doc >= adjustedMax) {
            do {
                ctx = ctxIt.next();
                segBase = ctx.docBase;
                segMax = ctx.reader().maxDoc();
                adjustedMax = segBase + segMax;
            } while (doc >= adjustedMax);
            assert doc >= ctx.docBase;
            setNextReaderFirstPhase(ctx);

            values = DocValues.getNumeric(ctx.reader(), sf.getName());
            docsWithField = DocValues.getDocsWithField(ctx.reader(), sf.getName());
        }

        int segDoc = doc - segBase;
        long val = values.get(segDoc);
        if (val != 0 || docsWithField.get(segDoc)) {
            int slot = table.add(val); // this can trigger a rehash rehash

            // countAcc.incrementCount(slot, 1);
            // our countAcc is virtual, so this is not needed

            collectFirstPhase(segDoc, slot);
        }
    }

    //
    // collection done, time to find the top slots
    //

    int numBuckets = 0;
    List<Object> bucketVals = null;
    if (freq.numBuckets && fcontext.isShard()) {
        bucketVals = new ArrayList<>(100);
    }

    int off = fcontext.isShard() ? 0 : (int) freq.offset;
    // add a modest amount of over-request if this is a shard request
    int lim = freq.limit >= 0 ? (fcontext.isShard() ? (int) (freq.limit * 1.1 + 4) : (int) freq.limit)
            : Integer.MAX_VALUE;

    int maxsize = (int) (freq.limit >= 0 ? freq.offset + lim : Integer.MAX_VALUE - 1);
    maxsize = Math.min(maxsize, table.cardinality);

    final int sortMul = freq.sortDirection.getMultiplier();

    PriorityQueue<Slot> queue = new PriorityQueue<Slot>(maxsize) {
        @Override
        protected boolean lessThan(Slot a, Slot b) {
            // TODO: sort-by-index-order
            int cmp = sortAcc.compare(a.slot, b.slot) * sortMul;
            return cmp == 0 ? (indexOrderAcc.compare(a.slot, b.slot) > 0) : cmp < 0;
        }
    };

    // TODO: create a countAcc that wrapps the table so we can reuse more code?

    Slot bottom = null;
    for (int i = 0; i < table.counts.length; i++) {
        int count = table.counts[i];
        if (count < effectiveMincount) {
            // either not a valid slot, or count not high enough
            continue;
        }
        numBuckets++; // can be different from the table cardinality if mincount > 1

        long val = table.vals[i];
        if (bucketVals != null && bucketVals.size() < 100) {
            bucketVals.add(calc.bitsToValue(val));
        }

        if (bottom == null) {
            bottom = new Slot();
        }
        bottom.slot = i;

        bottom = queue.insertWithOverflow(bottom);
    }

    SimpleOrderedMap<Object> res = new SimpleOrderedMap<>();
    if (freq.numBuckets) {
        if (!fcontext.isShard()) {
            res.add("numBuckets", numBuckets);
        } else {
            SimpleOrderedMap<Object> map = new SimpleOrderedMap<>(2);
            map.add("numBuckets", numBuckets);
            map.add("vals", bucketVals);
            res.add("numBuckets", map);
        }
    }

    FacetDebugInfo fdebug = fcontext.getDebugInfo();
    if (fdebug != null)
        fdebug.putInfoItem("numBuckets", (long) numBuckets);

    if (freq.allBuckets) {
        SimpleOrderedMap<Object> allBuckets = new SimpleOrderedMap<>();
        // countAcc.setValues(allBuckets, allBucketsSlot);
        allBuckets.add("count", table.numAdds);
        allBucketsAcc.setValues(allBuckets, -1);
        // allBuckets currently doesn't execute sub-facets (because it doesn't change the domain?)
        res.add("allBuckets", allBuckets);
    }

    if (freq.missing) {
        // TODO: it would be more efficient to buid up a missing DocSet if we need it here anyway.

        SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
        fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null);
        res.add("missing", missingBucket);
    }

    // if we are deep paging, we don't have to order the highest "offset" counts.
    int collectCount = Math.max(0, queue.size() - off);
    assert collectCount <= lim;
    int[] sortedSlots = new int[collectCount];
    for (int i = collectCount - 1; i >= 0; i--) {
        sortedSlots[i] = queue.pop().slot;
    }

    ArrayList<SimpleOrderedMap> bucketList = new ArrayList<>(collectCount);
    res.add("buckets", bucketList);

    boolean needFilter = deferredAggs != null || freq.getSubFacets().size() > 0;

    for (int slotNum : sortedSlots) {
        SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
        Comparable val = calc.bitsToValue(table.vals[slotNum]);
        bucket.add("val", val);

        Query filter = needFilter ? sf.getType().getFieldQuery(null, sf, calc.formatValue(val)) : null;

        fillBucket(bucket, table.counts[slotNum], slotNum, null, filter);

        bucketList.add(bucket);
    }

    return res;
}

From source file:org.apache.solr.search.facet.FacetFieldProcessorNumeric.java

License:Apache License

public SimpleOrderedMap<Object> calcFacets() throws IOException {

    final FacetRangeProcessor.Calc calc = FacetRangeProcessor.getNumericCalc(sf);

    // TODO: it would be really nice to know the number of unique values!!!!

    int possibleValues = fcontext.base.size();
    // size smaller tables so that no resize will be necessary
    int currHashSize = BitUtil.nextHighestPowerOfTwo((int) (possibleValues * (1 / LongCounts.LOAD_FACTOR) + 1));
    currHashSize = Math.min(currHashSize, MAXIMUM_STARTING_TABLE_SIZE);
    final LongCounts table = new LongCounts(currHashSize) {
        @Override//from ww  w  .  j  a v  a 2s .  c o m
        protected void rehash() {
            super.rehash();
            doRehash(this);
            oldToNewMapping = null; // allow for gc
        }
    };

    int numSlots = currHashSize;

    int numMissing = 0;

    if (freq.allBuckets) {
        allBucketsSlot = numSlots++;
    }

    indexOrderAcc = new SlotAcc(fcontext) {
        @Override
        public void collect(int doc, int slot) throws IOException {
        }

        @Override
        public int compare(int slotA, int slotB) {
            long s1 = calc.bitsToSortableBits(table.vals[slotA]);
            long s2 = calc.bitsToSortableBits(table.vals[slotB]);
            return Long.compare(s1, s2);
        }

        @Override
        public Object getValue(int slotNum) throws IOException {
            return null;
        }

        @Override
        public void reset() {
        }

        @Override
        public void resize(Resizer resizer) {
        }
    };

    countAcc = new CountSlotAcc(fcontext) {
        @Override
        public void incrementCount(int slot, int count) {
            throw new UnsupportedOperationException();
        }

        @Override
        public int getCount(int slot) {
            return table.counts[slot];
        }

        @Override
        public Object getValue(int slotNum) {
            return getCount(slotNum);
        }

        @Override
        public void reset() {
            throw new UnsupportedOperationException();
        }

        @Override
        public void collect(int doc, int slot) throws IOException {
            throw new UnsupportedOperationException();
        }

        @Override
        public int compare(int slotA, int slotB) {
            return Integer.compare(table.counts[slotA], table.counts[slotB]);
        }

        @Override
        public void resize(Resizer resizer) {
            throw new UnsupportedOperationException();
        }
    };

    // we set the countAcc & indexAcc first so generic ones won't be created for us.
    createCollectAcc(fcontext.base.size(), numSlots);

    if (freq.allBuckets) {
        allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0);
    }

    NumericDocValues values = null;
    Bits docsWithField = null;

    // TODO: factor this code out so it can be shared...
    final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
    final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
    LeafReaderContext ctx = null;
    int segBase = 0;
    int segMax;
    int adjustedMax = 0;
    for (DocIterator docsIt = fcontext.base.iterator(); docsIt.hasNext();) {
        final int doc = docsIt.nextDoc();
        if (doc >= adjustedMax) {
            do {
                ctx = ctxIt.next();
                segBase = ctx.docBase;
                segMax = ctx.reader().maxDoc();
                adjustedMax = segBase + segMax;
            } while (doc >= adjustedMax);
            assert doc >= ctx.docBase;
            setNextReaderFirstPhase(ctx);

            values = DocValues.getNumeric(ctx.reader(), sf.getName());
            docsWithField = DocValues.getDocsWithField(ctx.reader(), sf.getName());
        }

        int segDoc = doc - segBase;
        long val = values.get(segDoc);
        if (val != 0 || docsWithField.get(segDoc)) {
            int slot = table.add(val); // this can trigger a rehash rehash

            // countAcc.incrementCount(slot, 1);
            // our countAcc is virtual, so this is not needed

            collectFirstPhase(segDoc, slot);
        }
    }

    //
    // collection done, time to find the top slots
    //

    int numBuckets = 0;
    List<Object> bucketVals = null;
    if (freq.numBuckets && fcontext.isShard()) {
        bucketVals = new ArrayList(100);
    }

    int off = fcontext.isShard() ? 0 : (int) freq.offset;
    // add a modest amount of over-request if this is a shard request
    int lim = freq.limit >= 0 ? (fcontext.isShard() ? (int) (freq.limit * 1.1 + 4) : (int) freq.limit)
            : Integer.MAX_VALUE;

    int maxsize = (int) (freq.limit >= 0 ? freq.offset + lim : Integer.MAX_VALUE - 1);
    maxsize = Math.min(maxsize, table.cardinality);

    final int sortMul = freq.sortDirection.getMultiplier();

    PriorityQueue<Slot> queue = new PriorityQueue<Slot>(maxsize) {
        @Override
        protected boolean lessThan(Slot a, Slot b) {
            // TODO: sort-by-index-order
            int cmp = sortAcc.compare(a.slot, b.slot) * sortMul;
            return cmp == 0 ? (indexOrderAcc.compare(a.slot, b.slot) > 0) : cmp < 0;
        }
    };

    // TODO: create a countAcc that wrapps the table so we can reuse more code?

    Slot bottom = null;
    for (int i = 0; i < table.counts.length; i++) {
        int count = table.counts[i];
        if (count < effectiveMincount) {
            // either not a valid slot, or count not high enough
            continue;
        }
        numBuckets++; // can be different from the table cardinality if mincount > 1

        long val = table.vals[i];
        if (bucketVals != null && bucketVals.size() < 100) {
            bucketVals.add(calc.bitsToValue(val));
        }

        if (bottom == null) {
            bottom = new Slot();
        }
        bottom.slot = i;

        bottom = queue.insertWithOverflow(bottom);
    }

    SimpleOrderedMap res = new SimpleOrderedMap();
    if (freq.numBuckets) {
        if (!fcontext.isShard()) {
            res.add("numBuckets", numBuckets);
        } else {
            SimpleOrderedMap map = new SimpleOrderedMap(2);
            map.add("numBuckets", numBuckets);
            map.add("vals", bucketVals);
            res.add("numBuckets", map);
        }
    }

    FacetDebugInfo fdebug = fcontext.getDebugInfo();
    if (fdebug != null)
        fdebug.putInfoItem("numBuckets", new Long(numBuckets));

    if (freq.allBuckets) {
        SimpleOrderedMap<Object> allBuckets = new SimpleOrderedMap<>();
        // countAcc.setValues(allBuckets, allBucketsSlot);
        allBuckets.add("count", table.numAdds);
        allBucketsAcc.setValues(allBuckets, -1);
        // allBuckets currently doesn't execute sub-facets (because it doesn't change the domain?)
        res.add("allBuckets", allBuckets);
    }

    if (freq.missing) {
        // TODO: it would be more efficient to buid up a missing DocSet if we need it here anyway.

        SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
        fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null);
        res.add("missing", missingBucket);
    }

    // if we are deep paging, we don't have to order the highest "offset" counts.
    int collectCount = Math.max(0, queue.size() - off);
    assert collectCount <= lim;
    int[] sortedSlots = new int[collectCount];
    for (int i = collectCount - 1; i >= 0; i--) {
        sortedSlots[i] = queue.pop().slot;
    }

    ArrayList bucketList = new ArrayList(collectCount);
    res.add("buckets", bucketList);

    boolean needFilter = deferredAggs != null || freq.getSubFacets().size() > 0;

    for (int slotNum : sortedSlots) {
        SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
        Comparable val = calc.bitsToValue(table.vals[slotNum]);
        bucket.add("val", val);

        Query filter = needFilter ? sf.getType().getFieldQuery(null, sf, calc.formatValue(val)) : null;

        fillBucket(bucket, table.counts[slotNum], slotNum, null, filter);

        bucketList.add(bucket);
    }

    return res;
}

From source file:org.codelibs.bench.core.action.BenchmarkExecutor.java

License:Apache License

private CompetitionIteration.SlowRequest[] getTopN(long[] buckets, List<SearchRequest> requests, int multiplier,
        int topN) {

    final int numRequests = requests.size();
    // collect the top N
    final PriorityQueue<IndexAndTime> topNQueue = new PriorityQueue<IndexAndTime>(topN) {
        @Override/*from   w w w .j  ava 2 s .c  om*/
        protected boolean lessThan(IndexAndTime a, IndexAndTime b) {
            return a.avgTime < b.avgTime;
        }
    };
    assert multiplier > 0;
    for (int i = 0; i < numRequests; i++) {
        long sum = 0;
        long max = Long.MIN_VALUE;
        for (int j = 0; j < multiplier; j++) {
            final int base = (numRequests * j);
            sum += buckets[i + base];
            max = Math.max(buckets[i + base], max);
        }
        final long avg = sum / multiplier;
        if (topNQueue.size() < topN) {
            topNQueue.add(new IndexAndTime(i, max, avg));
        } else if (topNQueue.top().avgTime < max) {
            topNQueue.top().update(i, max, avg);
            topNQueue.updateTop();

        }
    }

    final CompetitionIteration.SlowRequest[] slowRequests = new CompetitionIteration.SlowRequest[topNQueue
            .size()];
    int i = topNQueue.size() - 1;

    while (topNQueue.size() > 0) {
        IndexAndTime pop = topNQueue.pop();
        CompetitionIteration.SlowRequest slow = new CompetitionIteration.SlowRequest(pop.avgTime, pop.maxTime,
                requests.get(pop.index));
        slowRequests[i--] = slow;
    }

    return slowRequests;
}