List of usage examples for org.apache.lucene.util BitUtil nextHighestPowerOfTwo
public static long nextHighestPowerOfTwo(long v)
From source file:org.apache.solr.search.facet.FacetFieldProcessorByHashDV.java
License:Apache License
private SimpleOrderedMap<Object> calcFacets() throws IOException { if (sf.getType().getNumericType() != null) { calc = FacetRangeProcessor.getNumericCalc(sf); } else {/*from www .j a v a 2s. c o m*/ calc = new TermOrdCalc(); // kind of a hack } // TODO: Use the number of indexed terms, if present, as an estimate! // Even for NumericDocValues, we could check for a terms index for an estimate. // Our estimation should aim high to avoid expensive rehashes. int possibleValues = fcontext.base.size(); // size smaller tables so that no resize will be necessary int currHashSize = BitUtil.nextHighestPowerOfTwo((int) (possibleValues * (1 / LongCounts.LOAD_FACTOR) + 1)); currHashSize = Math.min(currHashSize, MAXIMUM_STARTING_TABLE_SIZE); table = new LongCounts(currHashSize) { @Override protected void rehash() { super.rehash(); doRehash(this); oldToNewMapping = null; // allow for gc } }; // note: these methods/phases align with FacetFieldProcessorByArray's createCollectAcc(); collectDocs(); return super.findTopSlots(table.numSlots(), table.cardinality(), slotNum -> calc.bitsToValue(table.vals[slotNum]), // getBucketValFromSlotNum val -> calc.formatValue(val)); // getFieldQueryVal }
From source file:org.apache.solr.search.facet.FacetFieldProcessorByHashNumeric.java
License:Apache License
private SimpleOrderedMap<Object> calcFacets() throws IOException { final FacetRangeProcessor.Calc calc = FacetRangeProcessor.getNumericCalc(sf); // TODO: it would be really nice to know the number of unique values!!!! int possibleValues = fcontext.base.size(); // size smaller tables so that no resize will be necessary int currHashSize = BitUtil.nextHighestPowerOfTwo((int) (possibleValues * (1 / LongCounts.LOAD_FACTOR) + 1)); currHashSize = Math.min(currHashSize, MAXIMUM_STARTING_TABLE_SIZE); final LongCounts table = new LongCounts(currHashSize) { @Override/*from ww w.j av a2s . c o m*/ protected void rehash() { super.rehash(); doRehash(this); oldToNewMapping = null; // allow for gc } }; int numSlots = currHashSize; int numMissing = 0; if (freq.allBuckets) { allBucketsSlot = numSlots++; } indexOrderAcc = new SlotAcc(fcontext) { @Override public void collect(int doc, int slot) throws IOException { } @Override public int compare(int slotA, int slotB) { long s1 = calc.bitsToSortableBits(table.vals[slotA]); long s2 = calc.bitsToSortableBits(table.vals[slotB]); return Long.compare(s1, s2); } @Override public Object getValue(int slotNum) throws IOException { return null; } @Override public void reset() { } @Override public void resize(Resizer resizer) { } }; countAcc = new CountSlotAcc(fcontext) { @Override public void incrementCount(int slot, int count) { throw new UnsupportedOperationException(); } @Override public int getCount(int slot) { return table.counts[slot]; } @Override public Object getValue(int slotNum) { return getCount(slotNum); } @Override public void reset() { throw new UnsupportedOperationException(); } @Override public void collect(int doc, int slot) throws IOException { throw new UnsupportedOperationException(); } @Override public int compare(int slotA, int slotB) { return Integer.compare(table.counts[slotA], table.counts[slotB]); } @Override public void resize(Resizer resizer) { throw new UnsupportedOperationException(); } }; // we set the countAcc & indexAcc first so generic ones won't be created for us. createCollectAcc(fcontext.base.size(), numSlots); if (freq.allBuckets) { allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0); } NumericDocValues values = null; Bits docsWithField = null; // TODO: factor this code out so it can be shared... final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves(); final Iterator<LeafReaderContext> ctxIt = leaves.iterator(); LeafReaderContext ctx = null; int segBase = 0; int segMax; int adjustedMax = 0; for (DocIterator docsIt = fcontext.base.iterator(); docsIt.hasNext();) { final int doc = docsIt.nextDoc(); if (doc >= adjustedMax) { do { ctx = ctxIt.next(); segBase = ctx.docBase; segMax = ctx.reader().maxDoc(); adjustedMax = segBase + segMax; } while (doc >= adjustedMax); assert doc >= ctx.docBase; setNextReaderFirstPhase(ctx); values = DocValues.getNumeric(ctx.reader(), sf.getName()); docsWithField = DocValues.getDocsWithField(ctx.reader(), sf.getName()); } int segDoc = doc - segBase; long val = values.get(segDoc); if (val != 0 || docsWithField.get(segDoc)) { int slot = table.add(val); // this can trigger a rehash rehash // countAcc.incrementCount(slot, 1); // our countAcc is virtual, so this is not needed collectFirstPhase(segDoc, slot); } } // // collection done, time to find the top slots // int numBuckets = 0; List<Object> bucketVals = null; if (freq.numBuckets && fcontext.isShard()) { bucketVals = new ArrayList<>(100); } int off = fcontext.isShard() ? 0 : (int) freq.offset; // add a modest amount of over-request if this is a shard request int lim = freq.limit >= 0 ? (fcontext.isShard() ? (int) (freq.limit * 1.1 + 4) : (int) freq.limit) : Integer.MAX_VALUE; int maxsize = (int) (freq.limit >= 0 ? freq.offset + lim : Integer.MAX_VALUE - 1); maxsize = Math.min(maxsize, table.cardinality); final int sortMul = freq.sortDirection.getMultiplier(); PriorityQueue<Slot> queue = new PriorityQueue<Slot>(maxsize) { @Override protected boolean lessThan(Slot a, Slot b) { // TODO: sort-by-index-order int cmp = sortAcc.compare(a.slot, b.slot) * sortMul; return cmp == 0 ? (indexOrderAcc.compare(a.slot, b.slot) > 0) : cmp < 0; } }; // TODO: create a countAcc that wrapps the table so we can reuse more code? Slot bottom = null; for (int i = 0; i < table.counts.length; i++) { int count = table.counts[i]; if (count < effectiveMincount) { // either not a valid slot, or count not high enough continue; } numBuckets++; // can be different from the table cardinality if mincount > 1 long val = table.vals[i]; if (bucketVals != null && bucketVals.size() < 100) { bucketVals.add(calc.bitsToValue(val)); } if (bottom == null) { bottom = new Slot(); } bottom.slot = i; bottom = queue.insertWithOverflow(bottom); } SimpleOrderedMap<Object> res = new SimpleOrderedMap<>(); if (freq.numBuckets) { if (!fcontext.isShard()) { res.add("numBuckets", numBuckets); } else { SimpleOrderedMap<Object> map = new SimpleOrderedMap<>(2); map.add("numBuckets", numBuckets); map.add("vals", bucketVals); res.add("numBuckets", map); } } FacetDebugInfo fdebug = fcontext.getDebugInfo(); if (fdebug != null) fdebug.putInfoItem("numBuckets", (long) numBuckets); if (freq.allBuckets) { SimpleOrderedMap<Object> allBuckets = new SimpleOrderedMap<>(); // countAcc.setValues(allBuckets, allBucketsSlot); allBuckets.add("count", table.numAdds); allBucketsAcc.setValues(allBuckets, -1); // allBuckets currently doesn't execute sub-facets (because it doesn't change the domain?) res.add("allBuckets", allBuckets); } if (freq.missing) { // TODO: it would be more efficient to buid up a missing DocSet if we need it here anyway. SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>(); fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null); res.add("missing", missingBucket); } // if we are deep paging, we don't have to order the highest "offset" counts. int collectCount = Math.max(0, queue.size() - off); assert collectCount <= lim; int[] sortedSlots = new int[collectCount]; for (int i = collectCount - 1; i >= 0; i--) { sortedSlots[i] = queue.pop().slot; } ArrayList<SimpleOrderedMap> bucketList = new ArrayList<>(collectCount); res.add("buckets", bucketList); boolean needFilter = deferredAggs != null || freq.getSubFacets().size() > 0; for (int slotNum : sortedSlots) { SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>(); Comparable val = calc.bitsToValue(table.vals[slotNum]); bucket.add("val", val); Query filter = needFilter ? sf.getType().getFieldQuery(null, sf, calc.formatValue(val)) : null; fillBucket(bucket, table.counts[slotNum], slotNum, null, filter); bucketList.add(bucket); } return res; }
From source file:org.apache.solr.search.facet.FacetFieldProcessorNumeric.java
License:Apache License
public SimpleOrderedMap<Object> calcFacets() throws IOException { final FacetRangeProcessor.Calc calc = FacetRangeProcessor.getNumericCalc(sf); // TODO: it would be really nice to know the number of unique values!!!! int possibleValues = fcontext.base.size(); // size smaller tables so that no resize will be necessary int currHashSize = BitUtil.nextHighestPowerOfTwo((int) (possibleValues * (1 / LongCounts.LOAD_FACTOR) + 1)); currHashSize = Math.min(currHashSize, MAXIMUM_STARTING_TABLE_SIZE); final LongCounts table = new LongCounts(currHashSize) { @Override//w w w. j ava 2s .co m protected void rehash() { super.rehash(); doRehash(this); oldToNewMapping = null; // allow for gc } }; int numSlots = currHashSize; int numMissing = 0; if (freq.allBuckets) { allBucketsSlot = numSlots++; } indexOrderAcc = new SlotAcc(fcontext) { @Override public void collect(int doc, int slot) throws IOException { } @Override public int compare(int slotA, int slotB) { long s1 = calc.bitsToSortableBits(table.vals[slotA]); long s2 = calc.bitsToSortableBits(table.vals[slotB]); return Long.compare(s1, s2); } @Override public Object getValue(int slotNum) throws IOException { return null; } @Override public void reset() { } @Override public void resize(Resizer resizer) { } }; countAcc = new CountSlotAcc(fcontext) { @Override public void incrementCount(int slot, int count) { throw new UnsupportedOperationException(); } @Override public int getCount(int slot) { return table.counts[slot]; } @Override public Object getValue(int slotNum) { return getCount(slotNum); } @Override public void reset() { throw new UnsupportedOperationException(); } @Override public void collect(int doc, int slot) throws IOException { throw new UnsupportedOperationException(); } @Override public int compare(int slotA, int slotB) { return Integer.compare(table.counts[slotA], table.counts[slotB]); } @Override public void resize(Resizer resizer) { throw new UnsupportedOperationException(); } }; // we set the countAcc & indexAcc first so generic ones won't be created for us. createCollectAcc(fcontext.base.size(), numSlots); if (freq.allBuckets) { allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0); } NumericDocValues values = null; Bits docsWithField = null; // TODO: factor this code out so it can be shared... final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves(); final Iterator<LeafReaderContext> ctxIt = leaves.iterator(); LeafReaderContext ctx = null; int segBase = 0; int segMax; int adjustedMax = 0; for (DocIterator docsIt = fcontext.base.iterator(); docsIt.hasNext();) { final int doc = docsIt.nextDoc(); if (doc >= adjustedMax) { do { ctx = ctxIt.next(); segBase = ctx.docBase; segMax = ctx.reader().maxDoc(); adjustedMax = segBase + segMax; } while (doc >= adjustedMax); assert doc >= ctx.docBase; setNextReaderFirstPhase(ctx); values = DocValues.getNumeric(ctx.reader(), sf.getName()); docsWithField = DocValues.getDocsWithField(ctx.reader(), sf.getName()); } int segDoc = doc - segBase; long val = values.get(segDoc); if (val != 0 || docsWithField.get(segDoc)) { int slot = table.add(val); // this can trigger a rehash rehash // countAcc.incrementCount(slot, 1); // our countAcc is virtual, so this is not needed collectFirstPhase(segDoc, slot); } } // // collection done, time to find the top slots // int numBuckets = 0; List<Object> bucketVals = null; if (freq.numBuckets && fcontext.isShard()) { bucketVals = new ArrayList(100); } int off = fcontext.isShard() ? 0 : (int) freq.offset; // add a modest amount of over-request if this is a shard request int lim = freq.limit >= 0 ? (fcontext.isShard() ? (int) (freq.limit * 1.1 + 4) : (int) freq.limit) : Integer.MAX_VALUE; int maxsize = (int) (freq.limit >= 0 ? freq.offset + lim : Integer.MAX_VALUE - 1); maxsize = Math.min(maxsize, table.cardinality); final int sortMul = freq.sortDirection.getMultiplier(); PriorityQueue<Slot> queue = new PriorityQueue<Slot>(maxsize) { @Override protected boolean lessThan(Slot a, Slot b) { // TODO: sort-by-index-order int cmp = sortAcc.compare(a.slot, b.slot) * sortMul; return cmp == 0 ? (indexOrderAcc.compare(a.slot, b.slot) > 0) : cmp < 0; } }; // TODO: create a countAcc that wrapps the table so we can reuse more code? Slot bottom = null; for (int i = 0; i < table.counts.length; i++) { int count = table.counts[i]; if (count < effectiveMincount) { // either not a valid slot, or count not high enough continue; } numBuckets++; // can be different from the table cardinality if mincount > 1 long val = table.vals[i]; if (bucketVals != null && bucketVals.size() < 100) { bucketVals.add(calc.bitsToValue(val)); } if (bottom == null) { bottom = new Slot(); } bottom.slot = i; bottom = queue.insertWithOverflow(bottom); } SimpleOrderedMap res = new SimpleOrderedMap(); if (freq.numBuckets) { if (!fcontext.isShard()) { res.add("numBuckets", numBuckets); } else { SimpleOrderedMap map = new SimpleOrderedMap(2); map.add("numBuckets", numBuckets); map.add("vals", bucketVals); res.add("numBuckets", map); } } FacetDebugInfo fdebug = fcontext.getDebugInfo(); if (fdebug != null) fdebug.putInfoItem("numBuckets", new Long(numBuckets)); if (freq.allBuckets) { SimpleOrderedMap<Object> allBuckets = new SimpleOrderedMap<>(); // countAcc.setValues(allBuckets, allBucketsSlot); allBuckets.add("count", table.numAdds); allBucketsAcc.setValues(allBuckets, -1); // allBuckets currently doesn't execute sub-facets (because it doesn't change the domain?) res.add("allBuckets", allBuckets); } if (freq.missing) { // TODO: it would be more efficient to buid up a missing DocSet if we need it here anyway. SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>(); fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null); res.add("missing", missingBucket); } // if we are deep paging, we don't have to order the highest "offset" counts. int collectCount = Math.max(0, queue.size() - off); assert collectCount <= lim; int[] sortedSlots = new int[collectCount]; for (int i = collectCount - 1; i >= 0; i--) { sortedSlots[i] = queue.pop().slot; } ArrayList bucketList = new ArrayList(collectCount); res.add("buckets", bucketList); boolean needFilter = deferredAggs != null || freq.getSubFacets().size() > 0; for (int slotNum : sortedSlots) { SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>(); Comparable val = calc.bitsToValue(table.vals[slotNum]); bucket.add("val", val); Query filter = needFilter ? sf.getType().getFieldQuery(null, sf, calc.formatValue(val)) : null; fillBucket(bucket, table.counts[slotNum], slotNum, null, filter); bucketList.add(bucket); } return res; }
From source file:org.apache.solr.search.HashDocSet.java
License:Apache License
/** Create a HashDocSet from a list of *unique* ids */ public HashDocSet(int[] docs, int offset, int len, float inverseLoadFactor) { int tsize = Math.max(BitUtil.nextHighestPowerOfTwo(len), 1); if (tsize < len * inverseLoadFactor) { tsize <<= 1;/* w ww . j a v a 2s .com*/ } mask = tsize - 1; table = new int[tsize]; // (for now) better then: Arrays.fill(table, EMPTY); // https://issues.apache.org/jira/browse/SOLR-390 for (int i = tsize - 1; i >= 0; i--) table[i] = EMPTY; int end = offset + len; for (int i = offset; i < end; i++) { put(docs[i]); } size = len; }
From source file:org.apache.solr.update.VersionInfo.java
License:Apache License
public VersionInfo(UpdateLog ulog, int nBuckets) { this.ulog = ulog; IndexSchema schema = ulog.uhandler.core.getLatestSchema(); versionField = getAndCheckVersionField(schema); idField = schema.getUniqueKeyField(); buckets = new VersionBucket[BitUtil.nextHighestPowerOfTwo(nBuckets)]; for (int i = 0; i < buckets.length; i++) { buckets[i] = new VersionBucket(); }//from ww w.j a va 2 s . com }