Example usage for org.apache.lucene.util.packed PackedInts fastestFormatAndBits

List of usage examples for org.apache.lucene.util.packed PackedInts fastestFormatAndBits

Introduction

In this page you can find the example usage for org.apache.lucene.util.packed PackedInts fastestFormatAndBits.

Prototype

public static FormatAndBits fastestFormatAndBits(int valueCount, int bitsPerValue,
        float acceptableOverheadRatio) 

Source Link

Document

Try to find the Format and number of bits per value that would restore from disk the fastest reader whose overhead is less than acceptableOverheadRatio.

Usage

From source file:com.lucure.core.codec.ForUtil.java

License:Apache License

/**
 * Create a new {@link ForUtil} instance and save state into <code>out</code>.
 *//*from  ww  w.  j  a  v  a 2  s .c om*/
ForUtil(float acceptableOverheadRatio, DataOutput out) throws IOException {
    out.writeVInt(PackedInts.VERSION_CURRENT);
    encodedSizes = new int[33];
    encoders = new PackedInts.Encoder[33];
    decoders = new PackedInts.Decoder[33];
    iterations = new int[33];

    for (int bpv = 1; bpv <= 32; ++bpv) {
        final FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(BLOCK_SIZE, bpv,
                acceptableOverheadRatio);
        assert formatAndBits.format.isSupported(formatAndBits.bitsPerValue);
        assert formatAndBits.bitsPerValue <= 32;
        encodedSizes[bpv] = encodedSize(formatAndBits.format, PackedInts.VERSION_CURRENT,
                formatAndBits.bitsPerValue);
        encoders[bpv] = PackedInts.getEncoder(formatAndBits.format, PackedInts.VERSION_CURRENT,
                formatAndBits.bitsPerValue);
        decoders[bpv] = PackedInts.getDecoder(formatAndBits.format, PackedInts.VERSION_CURRENT,
                formatAndBits.bitsPerValue);
        iterations[bpv] = computeIterations(decoders[bpv]);

        out.writeVInt(formatAndBits.format.getId() << 5 | (formatAndBits.bitsPerValue - 1));
    }
}

From source file:org.elasticsearch.index.fielddata.ordinals.MultiOrdinals.java

License:Apache License

/**
 * Return true if this impl is going to be smaller than {@link SinglePackedOrdinals} by at least 20%.
 *///from   ww w.j  ava  2 s  .c o  m
public static boolean significantlySmallerThanSinglePackedOrdinals(int maxDoc, int numDocsWithValue,
        long numOrds, float acceptableOverheadRatio) {
    int bitsPerOrd = PackedInts.bitsRequired(numOrds);
    bitsPerOrd = PackedInts.fastestFormatAndBits(numDocsWithValue, bitsPerOrd,
            acceptableOverheadRatio).bitsPerValue;
    // Compute the worst-case number of bits per value for offsets in the worst case, eg. if no docs have a value at the
    // beginning of the block and all docs have one at the end of the block
    final float avgValuesPerDoc = (float) numDocsWithValue / maxDoc;
    final int maxDelta = (int) Math.ceil(OFFSETS_PAGE_SIZE * (1 - avgValuesPerDoc) * avgValuesPerDoc);
    int bitsPerOffset = PackedInts.bitsRequired(maxDelta) + 1; // +1 because of the sign
    bitsPerOffset = PackedInts.fastestFormatAndBits(maxDoc, bitsPerOffset,
            acceptableOverheadRatio).bitsPerValue;

    final long expectedMultiSizeInBytes = (long) numDocsWithValue * bitsPerOrd + (long) maxDoc * bitsPerOffset;
    final long expectedSingleSizeInBytes = (long) maxDoc * bitsPerOrd;
    return expectedMultiSizeInBytes < 0.8f * expectedSingleSizeInBytes;
}

From source file:org.elasticsearch.index.fielddata.plain.PackedArrayIndexFieldData.java

License:Apache License

@Override
public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception {
    AtomicReader reader = context.reader();
    Terms terms = reader.terms(getFieldNames().indexName());
    PackedArrayAtomicFieldData data = null;
    PackedArrayEstimator estimator = new PackedArrayEstimator(breakerService.getBreaker(), getNumericType());
    if (terms == null) {
        data = PackedArrayAtomicFieldData.empty(reader.maxDoc());
        estimator.adjustForNoTerms(data.getMemorySizeInBytes());
        return data;
    }/*from   w  ww  .  j ava 2s  .co m*/
    // TODO: how can we guess the number of terms? numerics end up creating more terms per value...
    // Lucene encodes numeric data so that the lexicographical (encoded) order matches the integer order so we know the sequence of
    // longs is going to be monotonically increasing
    final MonotonicAppendingLongBuffer values = new MonotonicAppendingLongBuffer();

    final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat(
            "acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
    OrdinalsBuilder builder = new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio);
    TermsEnum termsEnum = estimator.beforeLoad(terms);
    boolean success = false;
    try {
        BytesRefIterator iter = builder.buildFromTerms(termsEnum);
        BytesRef term;
        assert !getNumericType().isFloatingPoint();
        final boolean indexedAsLong = getNumericType().requiredBits() > 32;
        while ((term = iter.next()) != null) {
            final long value = indexedAsLong ? NumericUtils.prefixCodedToLong(term)
                    : NumericUtils.prefixCodedToInt(term);
            assert values.size() == 0 || value > values.get(values.size() - 1);
            values.add(value);
        }
        Ordinals build = builder.build(fieldDataType.getSettings());

        if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) {
            Docs ordinals = build.ordinals();
            final FixedBitSet set = builder.buildDocsWithValuesSet();

            long minValue, maxValue;
            minValue = maxValue = 0;
            if (values.size() > 0) {
                minValue = values.get(0);
                maxValue = values.get(values.size() - 1);
            }

            // Encode document without a value with a special value
            long missingValue = 0;
            if (set != null) {
                if ((maxValue - minValue + 1) == values.size()) {
                    // values are dense
                    if (minValue > Long.MIN_VALUE) {
                        missingValue = --minValue;
                    } else {
                        assert maxValue != Long.MAX_VALUE;
                        missingValue = ++maxValue;
                    }
                } else {
                    for (long i = 1; i < values.size(); ++i) {
                        if (values.get(i) > values.get(i - 1) + 1) {
                            missingValue = values.get(i - 1) + 1;
                            break;
                        }
                    }
                }
                missingValue -= minValue; // delta
            }

            final long delta = maxValue - minValue;
            final int bitsRequired = delta < 0 ? 64 : PackedInts.bitsRequired(delta);
            final float acceptableOverheadRatio = fieldDataType.getSettings()
                    .getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
            final PackedInts.FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(reader.maxDoc(),
                    bitsRequired, acceptableOverheadRatio);

            // there's sweet spot where due to low unique value count, using ordinals will consume less memory
            final long singleValuesSize = formatAndBits.format.longCount(PackedInts.VERSION_CURRENT,
                    reader.maxDoc(), formatAndBits.bitsPerValue) * 8L;
            final long uniqueValuesSize = values.ramBytesUsed();
            final long ordinalsSize = build.getMemorySizeInBytes();

            if (uniqueValuesSize + ordinalsSize < singleValuesSize) {
                data = new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build);
            } else {
                final PackedInts.Mutable sValues = PackedInts.getMutable(reader.maxDoc(), bitsRequired,
                        acceptableOverheadRatio);
                if (missingValue != 0) {
                    sValues.fill(0, sValues.size(), missingValue);
                }
                for (int i = 0; i < reader.maxDoc(); i++) {
                    final long ord = ordinals.getOrd(i);
                    if (ord != Ordinals.MISSING_ORDINAL) {
                        sValues.set(i, values.get(ord - 1) - minValue);
                    }
                }
                if (set == null) {
                    data = new PackedArrayAtomicFieldData.Single(sValues, minValue, reader.maxDoc(),
                            ordinals.getNumOrds());
                } else {
                    data = new PackedArrayAtomicFieldData.SingleSparse(sValues, minValue, reader.maxDoc(),
                            missingValue, ordinals.getNumOrds());
                }
            }
        } else {
            data = new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build);
        }

        success = true;
        return data;
    } finally {
        if (!success) {
            // If something went wrong, unwind any current estimations we've made
            estimator.afterLoad(termsEnum, 0);
        } else {
            // Adjust as usual, based on the actual size of the field data
            estimator.afterLoad(termsEnum, data.getMemorySizeInBytes());
        }
        builder.close();
    }

}