List of usage examples for org.apache.lucene.util.packed PackedInts getMutable
public static Mutable getMutable(int valueCount, int bitsPerValue, PackedInts.Format format)
From source file:org.elasticsearch.index.fielddata.ordinals.SinglePackedOrdinals.java
License:Apache License
public SinglePackedOrdinals(OrdinalsBuilder builder, float acceptableOverheadRatio) { assert builder.getNumMultiValuesDocs() == 0; this.numOrds = builder.getNumOrds(); this.maxOrd = builder.getNumOrds() + 1; // We don't reuse the builder as-is because it might have been built with a higher overhead ratio final PackedInts.Mutable reader = PackedInts.getMutable(builder.maxDoc(), PackedInts.bitsRequired(getNumOrds()), acceptableOverheadRatio); PackedInts.copy(builder.getFirstOrdinals(), 0, reader, 0, builder.maxDoc(), 8 * 1024); this.reader = reader; }
From source file:org.elasticsearch.index.fielddata.plain.PackedArrayIndexFieldData.java
License:Apache License
@Override public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception { AtomicReader reader = context.reader(); Terms terms = reader.terms(getFieldNames().indexName()); PackedArrayAtomicFieldData data = null; PackedArrayEstimator estimator = new PackedArrayEstimator(breakerService.getBreaker(), getNumericType()); if (terms == null) { data = PackedArrayAtomicFieldData.empty(reader.maxDoc()); estimator.adjustForNoTerms(data.getMemorySizeInBytes()); return data; }//w w w . j ava 2 s .c o m // TODO: how can we guess the number of terms? numerics end up creating more terms per value... // Lucene encodes numeric data so that the lexicographical (encoded) order matches the integer order so we know the sequence of // longs is going to be monotonically increasing final MonotonicAppendingLongBuffer values = new MonotonicAppendingLongBuffer(); final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat( "acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO); OrdinalsBuilder builder = new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio); TermsEnum termsEnum = estimator.beforeLoad(terms); boolean success = false; try { BytesRefIterator iter = builder.buildFromTerms(termsEnum); BytesRef term; assert !getNumericType().isFloatingPoint(); final boolean indexedAsLong = getNumericType().requiredBits() > 32; while ((term = iter.next()) != null) { final long value = indexedAsLong ? NumericUtils.prefixCodedToLong(term) : NumericUtils.prefixCodedToInt(term); assert values.size() == 0 || value > values.get(values.size() - 1); values.add(value); } Ordinals build = builder.build(fieldDataType.getSettings()); if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) { Docs ordinals = build.ordinals(); final FixedBitSet set = builder.buildDocsWithValuesSet(); long minValue, maxValue; minValue = maxValue = 0; if (values.size() > 0) { minValue = values.get(0); maxValue = values.get(values.size() - 1); } // Encode document without a value with a special value long missingValue = 0; if (set != null) { if ((maxValue - minValue + 1) == values.size()) { // values are dense if (minValue > Long.MIN_VALUE) { missingValue = --minValue; } else { assert maxValue != Long.MAX_VALUE; missingValue = ++maxValue; } } else { for (long i = 1; i < values.size(); ++i) { if (values.get(i) > values.get(i - 1) + 1) { missingValue = values.get(i - 1) + 1; break; } } } missingValue -= minValue; // delta } final long delta = maxValue - minValue; final int bitsRequired = delta < 0 ? 64 : PackedInts.bitsRequired(delta); final float acceptableOverheadRatio = fieldDataType.getSettings() .getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT); final PackedInts.FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(reader.maxDoc(), bitsRequired, acceptableOverheadRatio); // there's sweet spot where due to low unique value count, using ordinals will consume less memory final long singleValuesSize = formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, reader.maxDoc(), formatAndBits.bitsPerValue) * 8L; final long uniqueValuesSize = values.ramBytesUsed(); final long ordinalsSize = build.getMemorySizeInBytes(); if (uniqueValuesSize + ordinalsSize < singleValuesSize) { data = new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build); } else { final PackedInts.Mutable sValues = PackedInts.getMutable(reader.maxDoc(), bitsRequired, acceptableOverheadRatio); if (missingValue != 0) { sValues.fill(0, sValues.size(), missingValue); } for (int i = 0; i < reader.maxDoc(); i++) { final long ord = ordinals.getOrd(i); if (ord != Ordinals.MISSING_ORDINAL) { sValues.set(i, values.get(ord - 1) - minValue); } } if (set == null) { data = new PackedArrayAtomicFieldData.Single(sValues, minValue, reader.maxDoc(), ordinals.getNumOrds()); } else { data = new PackedArrayAtomicFieldData.SingleSparse(sValues, minValue, reader.maxDoc(), missingValue, ordinals.getNumOrds()); } } } else { data = new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build); } success = true; return data; } finally { if (!success) { // If something went wrong, unwind any current estimations we've made estimator.afterLoad(termsEnum, 0); } else { // Adjust as usual, based on the actual size of the field data estimator.afterLoad(termsEnum, data.getMemorySizeInBytes()); } builder.close(); } }