List of usage examples for org.apache.lucene.util.packed PackedInts bitsRequired
public static int bitsRequired(long maxValue)
From source file:com.lucure.core.codec.CompressingStoredFieldsIndexWriter.java
License:Apache License
private void writeBlock() throws IOException { assert blockChunks > 0; fieldsIndexOut.writeVInt(blockChunks); // The trick here is that we only store the difference from the average start // pointer or doc base, this helps save bits per value. // And in order to prevent a few chunks that would be far from the average to // raise the number of bits per value for all of them, we only encode blocks // of 1024 chunks at once // See LUCENE-4512 // doc bases// w w w. j a v a2 s . c o m final int avgChunkDocs; if (blockChunks == 1) { avgChunkDocs = 0; } else { avgChunkDocs = Math.round((float) (blockDocs - docBaseDeltas[blockChunks - 1]) / (blockChunks - 1)); } fieldsIndexOut.writeVInt(totalDocs - blockDocs); // docBase fieldsIndexOut.writeVInt(avgChunkDocs); int docBase = 0; long maxDelta = 0; for (int i = 0; i < blockChunks; ++i) { final int delta = docBase - avgChunkDocs * i; maxDelta |= zigZagEncode(delta); docBase += docBaseDeltas[i]; } final int bitsPerDocBase = PackedInts.bitsRequired(maxDelta); fieldsIndexOut.writeVInt(bitsPerDocBase); PackedInts.Writer writer = PackedInts.getWriterNoHeader(fieldsIndexOut, PackedInts.Format.PACKED, blockChunks, bitsPerDocBase, 1); docBase = 0; for (int i = 0; i < blockChunks; ++i) { final long delta = docBase - avgChunkDocs * i; assert PackedInts.bitsRequired(zigZagEncode(delta)) <= writer.bitsPerValue(); writer.add(zigZagEncode(delta)); docBase += docBaseDeltas[i]; } writer.finish(); // start pointers fieldsIndexOut.writeVLong(firstStartPointer); final long avgChunkSize; if (blockChunks == 1) { avgChunkSize = 0; } else { avgChunkSize = (maxStartPointer - firstStartPointer) / (blockChunks - 1); } fieldsIndexOut.writeVLong(avgChunkSize); long startPointer = 0; maxDelta = 0; for (int i = 0; i < blockChunks; ++i) { startPointer += startPointerDeltas[i]; final long delta = startPointer - avgChunkSize * i; maxDelta |= zigZagEncode(delta); } final int bitsPerStartPointer = PackedInts.bitsRequired(maxDelta); fieldsIndexOut.writeVInt(bitsPerStartPointer); writer = PackedInts.getWriterNoHeader(fieldsIndexOut, PackedInts.Format.PACKED, blockChunks, bitsPerStartPointer, 1); startPointer = 0; for (int i = 0; i < blockChunks; ++i) { startPointer += startPointerDeltas[i]; final long delta = startPointer - avgChunkSize * i; assert PackedInts.bitsRequired(zigZagEncode(delta)) <= writer.bitsPerValue(); writer.add(zigZagEncode(delta)); } writer.finish(); }
From source file:com.lucure.core.codec.CompressingStoredFieldsWriter.java
License:Apache License
private static void saveInts(int[] values, int length, DataOutput out) throws IOException { assert length > 0; if (length == 1) { out.writeVInt(values[0]);//from ww w . ja v a 2s . c om } else { boolean allEqual = true; for (int i = 1; i < length; ++i) { if (values[i] != values[0]) { allEqual = false; break; } } if (allEqual) { out.writeVInt(0); out.writeVInt(values[0]); } else { long max = 0; for (int i = 0; i < length; ++i) { max |= values[i]; } final int bitsRequired = PackedInts.bitsRequired(max); out.writeVInt(bitsRequired); final PackedInts.Writer w = PackedInts.getWriterNoHeader(out, PackedInts.Format.PACKED, length, bitsRequired, 1); for (int i = 0; i < length; ++i) { w.add(values[i]); } w.finish(); } } }
From source file:com.lucure.core.codec.ForUtil.java
License:Apache License
/** * Compute the number of bits required to serialize any of the longs in * <code>data</code>./*from www . j a v a2 s .c o m*/ */ private static int bitsRequired(final int[] data) { long or = 0; for (int i = 0; i < BLOCK_SIZE; ++i) { assert data[i] >= 0; or |= data[i]; } return PackedInts.bitsRequired(or); }
From source file:org.apache.solr.search.DocSetBuilder.java
License:Apache License
public DocSet build(FixedBitSet filter) { if (bitSet != null) { if (filter != null) { bitSet.and(filter);/*from ww w .j a v a2 s .co m*/ } return new BitDocSet(bitSet); // TODO - if this set will be cached, should we make it smaller if it's below DocSetUtil.smallSetSize? } else { LSBRadixSorter sorter = new LSBRadixSorter(); sorter.sort(PackedInts.bitsRequired(maxDoc - 1), buffer, pos); final int l = dedup(buffer, pos, filter); assert l <= pos; return new SortedIntDocSet(buffer, l); // TODO: have option to not shrink in the future if it will be a temporary set } }
From source file:org.codelibs.elasticsearch.search.aggregations.metrics.cardinality.HyperLogLogPlusPlus.java
License:Apache License
/** * Compute the required precision so that <code>count</code> distinct entries * would be counted with linear counting. *///w w w .j a va 2 s. c om public static int precisionFromThreshold(long count) { final long hashTableEntries = (long) Math.ceil(count / MAX_LOAD_FACTOR); int precision = PackedInts.bitsRequired(hashTableEntries * Integer.BYTES); precision = Math.max(precision, MIN_PRECISION); precision = Math.min(precision, MAX_PRECISION); return precision; }
From source file:org.codelibs.elasticsearch.search.aggregations.metrics.cardinality.HyperLogLogPlusPlus.java
License:Apache License
/** * Encode the hash on 32 bits. The encoded hash cannot be equal to <code>0</code>. *//* w w w . j a va 2 s . c o m*/ static int encodeHash(long hash, int p) { final long e = hash >>> (64 - P2); final long encoded; if ((e & mask(P2 - p)) == 0) { final int runLen = 1 + Math.min(Long.numberOfLeadingZeros(hash << P2), 64 - P2); encoded = (e << 7) | (runLen << 1) | 1; } else { encoded = e << 1; } assert PackedInts.bitsRequired(encoded) <= 32; assert encoded != 0; return (int) encoded; }
From source file:org.elasticsearch.index.fielddata.ordinals.MultiOrdinals.java
License:Apache License
/** * Return true if this impl is going to be smaller than {@link SinglePackedOrdinals} by at least 20%. *///from www. j av a2 s .c o m public static boolean significantlySmallerThanSinglePackedOrdinals(int maxDoc, int numDocsWithValue, long numOrds, float acceptableOverheadRatio) { int bitsPerOrd = PackedInts.bitsRequired(numOrds); bitsPerOrd = PackedInts.fastestFormatAndBits(numDocsWithValue, bitsPerOrd, acceptableOverheadRatio).bitsPerValue; // Compute the worst-case number of bits per value for offsets in the worst case, eg. if no docs have a value at the // beginning of the block and all docs have one at the end of the block final float avgValuesPerDoc = (float) numDocsWithValue / maxDoc; final int maxDelta = (int) Math.ceil(OFFSETS_PAGE_SIZE * (1 - avgValuesPerDoc) * avgValuesPerDoc); int bitsPerOffset = PackedInts.bitsRequired(maxDelta) + 1; // +1 because of the sign bitsPerOffset = PackedInts.fastestFormatAndBits(maxDoc, bitsPerOffset, acceptableOverheadRatio).bitsPerValue; final long expectedMultiSizeInBytes = (long) numDocsWithValue * bitsPerOrd + (long) maxDoc * bitsPerOffset; final long expectedSingleSizeInBytes = (long) maxDoc * bitsPerOrd; return expectedMultiSizeInBytes < 0.8f * expectedSingleSizeInBytes; }
From source file:org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder.java
License:Apache License
public OrdinalsBuilder(long numTerms, int maxDoc, float acceptableOverheadRatio) throws IOException { this.maxDoc = maxDoc; int startBitsPerValue = 8; if (numTerms >= 0) { startBitsPerValue = PackedInts.bitsRequired(numTerms); }//from w w w.j av a 2s . co m ordinals = new OrdinalsStore(maxDoc, startBitsPerValue, acceptableOverheadRatio); spare = new LongsRef(); }
From source file:org.elasticsearch.index.fielddata.ordinals.SinglePackedOrdinals.java
License:Apache License
public SinglePackedOrdinals(OrdinalsBuilder builder, float acceptableOverheadRatio) { assert builder.getNumMultiValuesDocs() == 0; this.numOrds = builder.getNumOrds(); this.maxOrd = builder.getNumOrds() + 1; // We don't reuse the builder as-is because it might have been built with a higher overhead ratio final PackedInts.Mutable reader = PackedInts.getMutable(builder.maxDoc(), PackedInts.bitsRequired(getNumOrds()), acceptableOverheadRatio); PackedInts.copy(builder.getFirstOrdinals(), 0, reader, 0, builder.maxDoc(), 8 * 1024); this.reader = reader; }
From source file:org.elasticsearch.index.fielddata.plain.PackedArrayIndexFieldData.java
License:Apache License
@Override public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception { AtomicReader reader = context.reader(); Terms terms = reader.terms(getFieldNames().indexName()); PackedArrayAtomicFieldData data = null; PackedArrayEstimator estimator = new PackedArrayEstimator(breakerService.getBreaker(), getNumericType()); if (terms == null) { data = PackedArrayAtomicFieldData.empty(reader.maxDoc()); estimator.adjustForNoTerms(data.getMemorySizeInBytes()); return data; }// w w w . j av a 2 s .c o m // TODO: how can we guess the number of terms? numerics end up creating more terms per value... // Lucene encodes numeric data so that the lexicographical (encoded) order matches the integer order so we know the sequence of // longs is going to be monotonically increasing final MonotonicAppendingLongBuffer values = new MonotonicAppendingLongBuffer(); final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat( "acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO); OrdinalsBuilder builder = new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio); TermsEnum termsEnum = estimator.beforeLoad(terms); boolean success = false; try { BytesRefIterator iter = builder.buildFromTerms(termsEnum); BytesRef term; assert !getNumericType().isFloatingPoint(); final boolean indexedAsLong = getNumericType().requiredBits() > 32; while ((term = iter.next()) != null) { final long value = indexedAsLong ? NumericUtils.prefixCodedToLong(term) : NumericUtils.prefixCodedToInt(term); assert values.size() == 0 || value > values.get(values.size() - 1); values.add(value); } Ordinals build = builder.build(fieldDataType.getSettings()); if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) { Docs ordinals = build.ordinals(); final FixedBitSet set = builder.buildDocsWithValuesSet(); long minValue, maxValue; minValue = maxValue = 0; if (values.size() > 0) { minValue = values.get(0); maxValue = values.get(values.size() - 1); } // Encode document without a value with a special value long missingValue = 0; if (set != null) { if ((maxValue - minValue + 1) == values.size()) { // values are dense if (minValue > Long.MIN_VALUE) { missingValue = --minValue; } else { assert maxValue != Long.MAX_VALUE; missingValue = ++maxValue; } } else { for (long i = 1; i < values.size(); ++i) { if (values.get(i) > values.get(i - 1) + 1) { missingValue = values.get(i - 1) + 1; break; } } } missingValue -= minValue; // delta } final long delta = maxValue - minValue; final int bitsRequired = delta < 0 ? 64 : PackedInts.bitsRequired(delta); final float acceptableOverheadRatio = fieldDataType.getSettings() .getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT); final PackedInts.FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(reader.maxDoc(), bitsRequired, acceptableOverheadRatio); // there's sweet spot where due to low unique value count, using ordinals will consume less memory final long singleValuesSize = formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, reader.maxDoc(), formatAndBits.bitsPerValue) * 8L; final long uniqueValuesSize = values.ramBytesUsed(); final long ordinalsSize = build.getMemorySizeInBytes(); if (uniqueValuesSize + ordinalsSize < singleValuesSize) { data = new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build); } else { final PackedInts.Mutable sValues = PackedInts.getMutable(reader.maxDoc(), bitsRequired, acceptableOverheadRatio); if (missingValue != 0) { sValues.fill(0, sValues.size(), missingValue); } for (int i = 0; i < reader.maxDoc(); i++) { final long ord = ordinals.getOrd(i); if (ord != Ordinals.MISSING_ORDINAL) { sValues.set(i, values.get(ord - 1) - minValue); } } if (set == null) { data = new PackedArrayAtomicFieldData.Single(sValues, minValue, reader.maxDoc(), ordinals.getNumOrds()); } else { data = new PackedArrayAtomicFieldData.SingleSparse(sValues, minValue, reader.maxDoc(), missingValue, ordinals.getNumOrds()); } } } else { data = new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build); } success = true; return data; } finally { if (!success) { // If something went wrong, unwind any current estimations we've made estimator.afterLoad(termsEnum, 0); } else { // Adjust as usual, based on the actual size of the field data estimator.afterLoad(termsEnum, data.getMemorySizeInBytes()); } builder.close(); } }