List of usage examples for org.apache.lucene.util.packed PackedInts fastestFormatAndBits
public static FormatAndBits fastestFormatAndBits(int valueCount, int bitsPerValue, float acceptableOverheadRatio)
acceptableOverheadRatio. From source file:com.lucure.core.codec.ForUtil.java
License:Apache License
/** * Create a new {@link ForUtil} instance and save state into <code>out</code>. *//*from ww w. j a v a 2 s .c om*/ ForUtil(float acceptableOverheadRatio, DataOutput out) throws IOException { out.writeVInt(PackedInts.VERSION_CURRENT); encodedSizes = new int[33]; encoders = new PackedInts.Encoder[33]; decoders = new PackedInts.Decoder[33]; iterations = new int[33]; for (int bpv = 1; bpv <= 32; ++bpv) { final FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(BLOCK_SIZE, bpv, acceptableOverheadRatio); assert formatAndBits.format.isSupported(formatAndBits.bitsPerValue); assert formatAndBits.bitsPerValue <= 32; encodedSizes[bpv] = encodedSize(formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue); encoders[bpv] = PackedInts.getEncoder(formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue); decoders[bpv] = PackedInts.getDecoder(formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue); iterations[bpv] = computeIterations(decoders[bpv]); out.writeVInt(formatAndBits.format.getId() << 5 | (formatAndBits.bitsPerValue - 1)); } }
From source file:org.elasticsearch.index.fielddata.ordinals.MultiOrdinals.java
License:Apache License
/** * Return true if this impl is going to be smaller than {@link SinglePackedOrdinals} by at least 20%. *///from ww w.j ava 2 s .c o m public static boolean significantlySmallerThanSinglePackedOrdinals(int maxDoc, int numDocsWithValue, long numOrds, float acceptableOverheadRatio) { int bitsPerOrd = PackedInts.bitsRequired(numOrds); bitsPerOrd = PackedInts.fastestFormatAndBits(numDocsWithValue, bitsPerOrd, acceptableOverheadRatio).bitsPerValue; // Compute the worst-case number of bits per value for offsets in the worst case, eg. if no docs have a value at the // beginning of the block and all docs have one at the end of the block final float avgValuesPerDoc = (float) numDocsWithValue / maxDoc; final int maxDelta = (int) Math.ceil(OFFSETS_PAGE_SIZE * (1 - avgValuesPerDoc) * avgValuesPerDoc); int bitsPerOffset = PackedInts.bitsRequired(maxDelta) + 1; // +1 because of the sign bitsPerOffset = PackedInts.fastestFormatAndBits(maxDoc, bitsPerOffset, acceptableOverheadRatio).bitsPerValue; final long expectedMultiSizeInBytes = (long) numDocsWithValue * bitsPerOrd + (long) maxDoc * bitsPerOffset; final long expectedSingleSizeInBytes = (long) maxDoc * bitsPerOrd; return expectedMultiSizeInBytes < 0.8f * expectedSingleSizeInBytes; }
From source file:org.elasticsearch.index.fielddata.plain.PackedArrayIndexFieldData.java
License:Apache License
@Override public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception { AtomicReader reader = context.reader(); Terms terms = reader.terms(getFieldNames().indexName()); PackedArrayAtomicFieldData data = null; PackedArrayEstimator estimator = new PackedArrayEstimator(breakerService.getBreaker(), getNumericType()); if (terms == null) { data = PackedArrayAtomicFieldData.empty(reader.maxDoc()); estimator.adjustForNoTerms(data.getMemorySizeInBytes()); return data; }/*from w ww . j ava 2s .co m*/ // TODO: how can we guess the number of terms? numerics end up creating more terms per value... // Lucene encodes numeric data so that the lexicographical (encoded) order matches the integer order so we know the sequence of // longs is going to be monotonically increasing final MonotonicAppendingLongBuffer values = new MonotonicAppendingLongBuffer(); final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat( "acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO); OrdinalsBuilder builder = new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio); TermsEnum termsEnum = estimator.beforeLoad(terms); boolean success = false; try { BytesRefIterator iter = builder.buildFromTerms(termsEnum); BytesRef term; assert !getNumericType().isFloatingPoint(); final boolean indexedAsLong = getNumericType().requiredBits() > 32; while ((term = iter.next()) != null) { final long value = indexedAsLong ? NumericUtils.prefixCodedToLong(term) : NumericUtils.prefixCodedToInt(term); assert values.size() == 0 || value > values.get(values.size() - 1); values.add(value); } Ordinals build = builder.build(fieldDataType.getSettings()); if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) { Docs ordinals = build.ordinals(); final FixedBitSet set = builder.buildDocsWithValuesSet(); long minValue, maxValue; minValue = maxValue = 0; if (values.size() > 0) { minValue = values.get(0); maxValue = values.get(values.size() - 1); } // Encode document without a value with a special value long missingValue = 0; if (set != null) { if ((maxValue - minValue + 1) == values.size()) { // values are dense if (minValue > Long.MIN_VALUE) { missingValue = --minValue; } else { assert maxValue != Long.MAX_VALUE; missingValue = ++maxValue; } } else { for (long i = 1; i < values.size(); ++i) { if (values.get(i) > values.get(i - 1) + 1) { missingValue = values.get(i - 1) + 1; break; } } } missingValue -= minValue; // delta } final long delta = maxValue - minValue; final int bitsRequired = delta < 0 ? 64 : PackedInts.bitsRequired(delta); final float acceptableOverheadRatio = fieldDataType.getSettings() .getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT); final PackedInts.FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(reader.maxDoc(), bitsRequired, acceptableOverheadRatio); // there's sweet spot where due to low unique value count, using ordinals will consume less memory final long singleValuesSize = formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, reader.maxDoc(), formatAndBits.bitsPerValue) * 8L; final long uniqueValuesSize = values.ramBytesUsed(); final long ordinalsSize = build.getMemorySizeInBytes(); if (uniqueValuesSize + ordinalsSize < singleValuesSize) { data = new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build); } else { final PackedInts.Mutable sValues = PackedInts.getMutable(reader.maxDoc(), bitsRequired, acceptableOverheadRatio); if (missingValue != 0) { sValues.fill(0, sValues.size(), missingValue); } for (int i = 0; i < reader.maxDoc(); i++) { final long ord = ordinals.getOrd(i); if (ord != Ordinals.MISSING_ORDINAL) { sValues.set(i, values.get(ord - 1) - minValue); } } if (set == null) { data = new PackedArrayAtomicFieldData.Single(sValues, minValue, reader.maxDoc(), ordinals.getNumOrds()); } else { data = new PackedArrayAtomicFieldData.SingleSparse(sValues, minValue, reader.maxDoc(), missingValue, ordinals.getNumOrds()); } } } else { data = new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build); } success = true; return data; } finally { if (!success) { // If something went wrong, unwind any current estimations we've made estimator.afterLoad(termsEnum, 0); } else { // Adjust as usual, based on the actual size of the field data estimator.afterLoad(termsEnum, data.getMemorySizeInBytes()); } builder.close(); } }