Example usage for org.apache.lucene.util.packed PackedInts bitsRequired

List of usage examples for org.apache.lucene.util.packed PackedInts bitsRequired

Introduction

In this page you can find the example usage for org.apache.lucene.util.packed PackedInts bitsRequired.

Prototype

public static int bitsRequired(long maxValue) 

Source Link

Document

Returns how many bits are required to hold values up to and including maxValue NOTE: This method returns at least 1.

Usage

From source file:com.lucure.core.codec.CompressingStoredFieldsIndexWriter.java

License:Apache License

private void writeBlock() throws IOException {
    assert blockChunks > 0;
    fieldsIndexOut.writeVInt(blockChunks);

    // The trick here is that we only store the difference from the average start
    // pointer or doc base, this helps save bits per value.
    // And in order to prevent a few chunks that would be far from the average to
    // raise the number of bits per value for all of them, we only encode blocks
    // of 1024 chunks at once
    // See LUCENE-4512

    // doc bases//  w  w w.  j  a v  a2  s  . c o  m
    final int avgChunkDocs;
    if (blockChunks == 1) {
        avgChunkDocs = 0;
    } else {
        avgChunkDocs = Math.round((float) (blockDocs - docBaseDeltas[blockChunks - 1]) / (blockChunks - 1));
    }
    fieldsIndexOut.writeVInt(totalDocs - blockDocs); // docBase
    fieldsIndexOut.writeVInt(avgChunkDocs);
    int docBase = 0;
    long maxDelta = 0;
    for (int i = 0; i < blockChunks; ++i) {
        final int delta = docBase - avgChunkDocs * i;
        maxDelta |= zigZagEncode(delta);
        docBase += docBaseDeltas[i];
    }

    final int bitsPerDocBase = PackedInts.bitsRequired(maxDelta);
    fieldsIndexOut.writeVInt(bitsPerDocBase);
    PackedInts.Writer writer = PackedInts.getWriterNoHeader(fieldsIndexOut, PackedInts.Format.PACKED,
            blockChunks, bitsPerDocBase, 1);
    docBase = 0;
    for (int i = 0; i < blockChunks; ++i) {
        final long delta = docBase - avgChunkDocs * i;
        assert PackedInts.bitsRequired(zigZagEncode(delta)) <= writer.bitsPerValue();
        writer.add(zigZagEncode(delta));
        docBase += docBaseDeltas[i];
    }
    writer.finish();

    // start pointers
    fieldsIndexOut.writeVLong(firstStartPointer);
    final long avgChunkSize;
    if (blockChunks == 1) {
        avgChunkSize = 0;
    } else {
        avgChunkSize = (maxStartPointer - firstStartPointer) / (blockChunks - 1);
    }
    fieldsIndexOut.writeVLong(avgChunkSize);
    long startPointer = 0;
    maxDelta = 0;
    for (int i = 0; i < blockChunks; ++i) {
        startPointer += startPointerDeltas[i];
        final long delta = startPointer - avgChunkSize * i;
        maxDelta |= zigZagEncode(delta);
    }

    final int bitsPerStartPointer = PackedInts.bitsRequired(maxDelta);
    fieldsIndexOut.writeVInt(bitsPerStartPointer);
    writer = PackedInts.getWriterNoHeader(fieldsIndexOut, PackedInts.Format.PACKED, blockChunks,
            bitsPerStartPointer, 1);
    startPointer = 0;
    for (int i = 0; i < blockChunks; ++i) {
        startPointer += startPointerDeltas[i];
        final long delta = startPointer - avgChunkSize * i;
        assert PackedInts.bitsRequired(zigZagEncode(delta)) <= writer.bitsPerValue();
        writer.add(zigZagEncode(delta));
    }
    writer.finish();
}

From source file:com.lucure.core.codec.CompressingStoredFieldsWriter.java

License:Apache License

private static void saveInts(int[] values, int length, DataOutput out) throws IOException {
    assert length > 0;
    if (length == 1) {
        out.writeVInt(values[0]);//from   ww w .  ja v  a  2s  . c  om
    } else {
        boolean allEqual = true;
        for (int i = 1; i < length; ++i) {
            if (values[i] != values[0]) {
                allEqual = false;
                break;
            }
        }
        if (allEqual) {
            out.writeVInt(0);
            out.writeVInt(values[0]);
        } else {
            long max = 0;
            for (int i = 0; i < length; ++i) {
                max |= values[i];
            }
            final int bitsRequired = PackedInts.bitsRequired(max);
            out.writeVInt(bitsRequired);
            final PackedInts.Writer w = PackedInts.getWriterNoHeader(out, PackedInts.Format.PACKED, length,
                    bitsRequired, 1);
            for (int i = 0; i < length; ++i) {
                w.add(values[i]);
            }
            w.finish();
        }
    }
}

From source file:com.lucure.core.codec.ForUtil.java

License:Apache License

/**
 * Compute the number of bits required to serialize any of the longs in
 * <code>data</code>./*from www .  j a v a2 s  .c o m*/
 */
private static int bitsRequired(final int[] data) {
    long or = 0;
    for (int i = 0; i < BLOCK_SIZE; ++i) {
        assert data[i] >= 0;
        or |= data[i];
    }
    return PackedInts.bitsRequired(or);
}

From source file:org.apache.solr.search.DocSetBuilder.java

License:Apache License

public DocSet build(FixedBitSet filter) {
    if (bitSet != null) {
        if (filter != null) {
            bitSet.and(filter);/*from   ww w .j  a v a2  s  .co  m*/
        }
        return new BitDocSet(bitSet);
        // TODO - if this set will be cached, should we make it smaller if it's below DocSetUtil.smallSetSize?
    } else {
        LSBRadixSorter sorter = new LSBRadixSorter();
        sorter.sort(PackedInts.bitsRequired(maxDoc - 1), buffer, pos);
        final int l = dedup(buffer, pos, filter);
        assert l <= pos;
        return new SortedIntDocSet(buffer, l); // TODO: have option to not shrink in the future if it will be a temporary set
    }
}

From source file:org.codelibs.elasticsearch.search.aggregations.metrics.cardinality.HyperLogLogPlusPlus.java

License:Apache License

/**
 * Compute the required precision so that <code>count</code> distinct entries
 * would be counted with linear counting.
 *///w  w w  .j  a  va 2  s.  c om
public static int precisionFromThreshold(long count) {
    final long hashTableEntries = (long) Math.ceil(count / MAX_LOAD_FACTOR);
    int precision = PackedInts.bitsRequired(hashTableEntries * Integer.BYTES);
    precision = Math.max(precision, MIN_PRECISION);
    precision = Math.min(precision, MAX_PRECISION);
    return precision;
}

From source file:org.codelibs.elasticsearch.search.aggregations.metrics.cardinality.HyperLogLogPlusPlus.java

License:Apache License

/**
 * Encode the hash on 32 bits. The encoded hash cannot be equal to <code>0</code>.
 *//*  w  w w  . j a va  2 s  . c  o  m*/
static int encodeHash(long hash, int p) {
    final long e = hash >>> (64 - P2);
    final long encoded;
    if ((e & mask(P2 - p)) == 0) {
        final int runLen = 1 + Math.min(Long.numberOfLeadingZeros(hash << P2), 64 - P2);
        encoded = (e << 7) | (runLen << 1) | 1;
    } else {
        encoded = e << 1;
    }
    assert PackedInts.bitsRequired(encoded) <= 32;
    assert encoded != 0;
    return (int) encoded;
}

From source file:org.elasticsearch.index.fielddata.ordinals.MultiOrdinals.java

License:Apache License

/**
 * Return true if this impl is going to be smaller than {@link SinglePackedOrdinals} by at least 20%.
 *///from www. j av a2 s  .c  o  m
public static boolean significantlySmallerThanSinglePackedOrdinals(int maxDoc, int numDocsWithValue,
        long numOrds, float acceptableOverheadRatio) {
    int bitsPerOrd = PackedInts.bitsRequired(numOrds);
    bitsPerOrd = PackedInts.fastestFormatAndBits(numDocsWithValue, bitsPerOrd,
            acceptableOverheadRatio).bitsPerValue;
    // Compute the worst-case number of bits per value for offsets in the worst case, eg. if no docs have a value at the
    // beginning of the block and all docs have one at the end of the block
    final float avgValuesPerDoc = (float) numDocsWithValue / maxDoc;
    final int maxDelta = (int) Math.ceil(OFFSETS_PAGE_SIZE * (1 - avgValuesPerDoc) * avgValuesPerDoc);
    int bitsPerOffset = PackedInts.bitsRequired(maxDelta) + 1; // +1 because of the sign
    bitsPerOffset = PackedInts.fastestFormatAndBits(maxDoc, bitsPerOffset,
            acceptableOverheadRatio).bitsPerValue;

    final long expectedMultiSizeInBytes = (long) numDocsWithValue * bitsPerOrd + (long) maxDoc * bitsPerOffset;
    final long expectedSingleSizeInBytes = (long) maxDoc * bitsPerOrd;
    return expectedMultiSizeInBytes < 0.8f * expectedSingleSizeInBytes;
}

From source file:org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder.java

License:Apache License

public OrdinalsBuilder(long numTerms, int maxDoc, float acceptableOverheadRatio) throws IOException {
    this.maxDoc = maxDoc;
    int startBitsPerValue = 8;
    if (numTerms >= 0) {
        startBitsPerValue = PackedInts.bitsRequired(numTerms);
    }//from   w  w  w.j av a  2s . co m
    ordinals = new OrdinalsStore(maxDoc, startBitsPerValue, acceptableOverheadRatio);
    spare = new LongsRef();
}

From source file:org.elasticsearch.index.fielddata.ordinals.SinglePackedOrdinals.java

License:Apache License

public SinglePackedOrdinals(OrdinalsBuilder builder, float acceptableOverheadRatio) {
    assert builder.getNumMultiValuesDocs() == 0;
    this.numOrds = builder.getNumOrds();
    this.maxOrd = builder.getNumOrds() + 1;
    // We don't reuse the builder as-is because it might have been built with a higher overhead ratio
    final PackedInts.Mutable reader = PackedInts.getMutable(builder.maxDoc(),
            PackedInts.bitsRequired(getNumOrds()), acceptableOverheadRatio);
    PackedInts.copy(builder.getFirstOrdinals(), 0, reader, 0, builder.maxDoc(), 8 * 1024);
    this.reader = reader;
}

From source file:org.elasticsearch.index.fielddata.plain.PackedArrayIndexFieldData.java

License:Apache License

@Override
public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception {
    AtomicReader reader = context.reader();
    Terms terms = reader.terms(getFieldNames().indexName());
    PackedArrayAtomicFieldData data = null;
    PackedArrayEstimator estimator = new PackedArrayEstimator(breakerService.getBreaker(), getNumericType());
    if (terms == null) {
        data = PackedArrayAtomicFieldData.empty(reader.maxDoc());
        estimator.adjustForNoTerms(data.getMemorySizeInBytes());
        return data;
    }// w  w w  . j av a  2  s .c  o  m
    // TODO: how can we guess the number of terms? numerics end up creating more terms per value...
    // Lucene encodes numeric data so that the lexicographical (encoded) order matches the integer order so we know the sequence of
    // longs is going to be monotonically increasing
    final MonotonicAppendingLongBuffer values = new MonotonicAppendingLongBuffer();

    final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat(
            "acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
    OrdinalsBuilder builder = new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio);
    TermsEnum termsEnum = estimator.beforeLoad(terms);
    boolean success = false;
    try {
        BytesRefIterator iter = builder.buildFromTerms(termsEnum);
        BytesRef term;
        assert !getNumericType().isFloatingPoint();
        final boolean indexedAsLong = getNumericType().requiredBits() > 32;
        while ((term = iter.next()) != null) {
            final long value = indexedAsLong ? NumericUtils.prefixCodedToLong(term)
                    : NumericUtils.prefixCodedToInt(term);
            assert values.size() == 0 || value > values.get(values.size() - 1);
            values.add(value);
        }
        Ordinals build = builder.build(fieldDataType.getSettings());

        if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) {
            Docs ordinals = build.ordinals();
            final FixedBitSet set = builder.buildDocsWithValuesSet();

            long minValue, maxValue;
            minValue = maxValue = 0;
            if (values.size() > 0) {
                minValue = values.get(0);
                maxValue = values.get(values.size() - 1);
            }

            // Encode document without a value with a special value
            long missingValue = 0;
            if (set != null) {
                if ((maxValue - minValue + 1) == values.size()) {
                    // values are dense
                    if (minValue > Long.MIN_VALUE) {
                        missingValue = --minValue;
                    } else {
                        assert maxValue != Long.MAX_VALUE;
                        missingValue = ++maxValue;
                    }
                } else {
                    for (long i = 1; i < values.size(); ++i) {
                        if (values.get(i) > values.get(i - 1) + 1) {
                            missingValue = values.get(i - 1) + 1;
                            break;
                        }
                    }
                }
                missingValue -= minValue; // delta
            }

            final long delta = maxValue - minValue;
            final int bitsRequired = delta < 0 ? 64 : PackedInts.bitsRequired(delta);
            final float acceptableOverheadRatio = fieldDataType.getSettings()
                    .getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
            final PackedInts.FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(reader.maxDoc(),
                    bitsRequired, acceptableOverheadRatio);

            // there's sweet spot where due to low unique value count, using ordinals will consume less memory
            final long singleValuesSize = formatAndBits.format.longCount(PackedInts.VERSION_CURRENT,
                    reader.maxDoc(), formatAndBits.bitsPerValue) * 8L;
            final long uniqueValuesSize = values.ramBytesUsed();
            final long ordinalsSize = build.getMemorySizeInBytes();

            if (uniqueValuesSize + ordinalsSize < singleValuesSize) {
                data = new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build);
            } else {
                final PackedInts.Mutable sValues = PackedInts.getMutable(reader.maxDoc(), bitsRequired,
                        acceptableOverheadRatio);
                if (missingValue != 0) {
                    sValues.fill(0, sValues.size(), missingValue);
                }
                for (int i = 0; i < reader.maxDoc(); i++) {
                    final long ord = ordinals.getOrd(i);
                    if (ord != Ordinals.MISSING_ORDINAL) {
                        sValues.set(i, values.get(ord - 1) - minValue);
                    }
                }
                if (set == null) {
                    data = new PackedArrayAtomicFieldData.Single(sValues, minValue, reader.maxDoc(),
                            ordinals.getNumOrds());
                } else {
                    data = new PackedArrayAtomicFieldData.SingleSparse(sValues, minValue, reader.maxDoc(),
                            missingValue, ordinals.getNumOrds());
                }
            }
        } else {
            data = new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build);
        }

        success = true;
        return data;
    } finally {
        if (!success) {
            // If something went wrong, unwind any current estimations we've made
            estimator.afterLoad(termsEnum, 0);
        } else {
            // Adjust as usual, based on the actual size of the field data
            estimator.afterLoad(termsEnum, data.getMemorySizeInBytes());
        }
        builder.close();
    }

}