Example usage for org.apache.lucene.util.packed PackedInts DEFAULT

List of usage examples for org.apache.lucene.util.packed PackedInts DEFAULT

Introduction

In this page you can find the example usage for org.apache.lucene.util.packed PackedInts DEFAULT.

Prototype

float DEFAULT

To view the source code for org.apache.lucene.util.packed PackedInts DEFAULT.

Click Source Link

Document

At most 25% memory overhead.

Usage

From source file:org.codelibs.elasticsearch.search.aggregations.bucket.BestBucketsDeferringCollector.java

License:Apache License

@Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx) throws IOException {
    finishLeaf();//from   www. j av a2 s.com

    context = ctx;
    docDeltas = PackedLongValues.packedBuilder(PackedInts.DEFAULT);
    buckets = PackedLongValues.packedBuilder(PackedInts.DEFAULT);

    return new LeafBucketCollector() {
        int lastDoc = 0;

        @Override
        public void collect(int doc, long bucket) throws IOException {
            docDeltas.add(doc - lastDoc);
            buckets.add(bucket);
            lastDoc = doc;
            maxBucket = Math.max(maxBucket, bucket);
        }
    };
}

From source file:org.elasticsearch.index.codec.postingsformat.MemoryPostingsFormatProvider.java

License:Apache License

@Inject
public MemoryPostingsFormatProvider(@Assisted String name, @Assisted Settings postingsFormatSettings) {
    super(name);/*from ww  w  . java2s  .co m*/
    this.packFst = postingsFormatSettings.getAsBoolean("pack_fst", false);
    this.acceptableOverheadRatio = postingsFormatSettings.getAsFloat("acceptable_overhead_ratio",
            PackedInts.DEFAULT);
    // TODO this should really be an ENUM?
    this.postingsFormat = new MemoryPostingsFormat(packFst, acceptableOverheadRatio);
}

From source file:org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsBuilder.java

License:Apache License

/**
 * Build global ordinals for the provided {@link IndexReader}.
 *//*  ww  w  . j a v  a  2  s  . c  o  m*/
public static IndexOrdinalsFieldData build(final IndexReader indexReader, IndexOrdinalsFieldData indexFieldData,
        Settings settings, CircuitBreakerService breakerService, ESLogger logger) throws IOException {
    assert indexReader.leaves().size() > 1;
    long startTimeNS = System.nanoTime();

    final AtomicOrdinalsFieldData[] atomicFD = new AtomicOrdinalsFieldData[indexReader.leaves().size()];
    final RandomAccessOrds[] subs = new RandomAccessOrds[indexReader.leaves().size()];
    for (int i = 0; i < indexReader.leaves().size(); ++i) {
        atomicFD[i] = indexFieldData.load(indexReader.leaves().get(i));
        subs[i] = atomicFD[i].getOrdinalsValues();
    }
    final OrdinalMap ordinalMap = OrdinalMap.build(null, subs, PackedInts.DEFAULT);
    final long memorySizeInBytes = ordinalMap.ramBytesUsed();
    breakerService.getBreaker(CircuitBreaker.FIELDDATA).addWithoutBreaking(memorySizeInBytes);

    if (logger.isDebugEnabled()) {
        logger.debug("Global-ordinals[{}][{}] took {} ms", indexFieldData.getFieldNames().fullName(),
                ordinalMap.getValueCount(), TimeValue.nsecToMSec(System.nanoTime() - startTimeNS));
    }
    return new InternalGlobalOrdinalsIndexFieldData(indexFieldData.index(), settings,
            indexFieldData.getFieldNames(), indexFieldData.getFieldDataType(), atomicFD, ordinalMap,
            memorySizeInBytes);
}

From source file:org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsBuilder.java

License:Apache License

public static IndexOrdinalsFieldData buildEmpty(Index index, Settings settings, final IndexReader indexReader,
        IndexOrdinalsFieldData indexFieldData) throws IOException {
    assert indexReader.leaves().size() > 1;

    final AtomicOrdinalsFieldData[] atomicFD = new AtomicOrdinalsFieldData[indexReader.leaves().size()];
    final RandomAccessOrds[] subs = new RandomAccessOrds[indexReader.leaves().size()];
    for (int i = 0; i < indexReader.leaves().size(); ++i) {
        atomicFD[i] = new AbstractAtomicOrdinalsFieldData() {
            @Override/*from   w  w w .  ja v  a2  s .  c  o m*/
            public RandomAccessOrds getOrdinalsValues() {
                return DocValues.emptySortedSet();
            }

            @Override
            public long ramBytesUsed() {
                return 0;
            }

            @Override
            public Collection<Accountable> getChildResources() {
                return Collections.emptyList();
            }

            @Override
            public void close() {
            }
        };
        subs[i] = atomicFD[i].getOrdinalsValues();
    }
    final OrdinalMap ordinalMap = OrdinalMap.build(null, subs, PackedInts.DEFAULT);
    return new InternalGlobalOrdinalsIndexFieldData(index, settings, indexFieldData.getFieldNames(),
            indexFieldData.getFieldDataType(), atomicFD, ordinalMap, 0);
}

From source file:org.elasticsearch.index.fielddata.plain.PackedArrayIndexFieldData.java

License:Apache License

@Override
public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception {
    AtomicReader reader = context.reader();
    Terms terms = reader.terms(getFieldNames().indexName());
    PackedArrayAtomicFieldData data = null;
    PackedArrayEstimator estimator = new PackedArrayEstimator(breakerService.getBreaker(), getNumericType());
    if (terms == null) {
        data = PackedArrayAtomicFieldData.empty(reader.maxDoc());
        estimator.adjustForNoTerms(data.getMemorySizeInBytes());
        return data;
    }/*from   w w  w .  ja v a2s  .  c o  m*/
    // TODO: how can we guess the number of terms? numerics end up creating more terms per value...
    // Lucene encodes numeric data so that the lexicographical (encoded) order matches the integer order so we know the sequence of
    // longs is going to be monotonically increasing
    final MonotonicAppendingLongBuffer values = new MonotonicAppendingLongBuffer();

    final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat(
            "acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
    OrdinalsBuilder builder = new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio);
    TermsEnum termsEnum = estimator.beforeLoad(terms);
    boolean success = false;
    try {
        BytesRefIterator iter = builder.buildFromTerms(termsEnum);
        BytesRef term;
        assert !getNumericType().isFloatingPoint();
        final boolean indexedAsLong = getNumericType().requiredBits() > 32;
        while ((term = iter.next()) != null) {
            final long value = indexedAsLong ? NumericUtils.prefixCodedToLong(term)
                    : NumericUtils.prefixCodedToInt(term);
            assert values.size() == 0 || value > values.get(values.size() - 1);
            values.add(value);
        }
        Ordinals build = builder.build(fieldDataType.getSettings());

        if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) {
            Docs ordinals = build.ordinals();
            final FixedBitSet set = builder.buildDocsWithValuesSet();

            long minValue, maxValue;
            minValue = maxValue = 0;
            if (values.size() > 0) {
                minValue = values.get(0);
                maxValue = values.get(values.size() - 1);
            }

            // Encode document without a value with a special value
            long missingValue = 0;
            if (set != null) {
                if ((maxValue - minValue + 1) == values.size()) {
                    // values are dense
                    if (minValue > Long.MIN_VALUE) {
                        missingValue = --minValue;
                    } else {
                        assert maxValue != Long.MAX_VALUE;
                        missingValue = ++maxValue;
                    }
                } else {
                    for (long i = 1; i < values.size(); ++i) {
                        if (values.get(i) > values.get(i - 1) + 1) {
                            missingValue = values.get(i - 1) + 1;
                            break;
                        }
                    }
                }
                missingValue -= minValue; // delta
            }

            final long delta = maxValue - minValue;
            final int bitsRequired = delta < 0 ? 64 : PackedInts.bitsRequired(delta);
            final float acceptableOverheadRatio = fieldDataType.getSettings()
                    .getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
            final PackedInts.FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(reader.maxDoc(),
                    bitsRequired, acceptableOverheadRatio);

            // there's sweet spot where due to low unique value count, using ordinals will consume less memory
            final long singleValuesSize = formatAndBits.format.longCount(PackedInts.VERSION_CURRENT,
                    reader.maxDoc(), formatAndBits.bitsPerValue) * 8L;
            final long uniqueValuesSize = values.ramBytesUsed();
            final long ordinalsSize = build.getMemorySizeInBytes();

            if (uniqueValuesSize + ordinalsSize < singleValuesSize) {
                data = new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build);
            } else {
                final PackedInts.Mutable sValues = PackedInts.getMutable(reader.maxDoc(), bitsRequired,
                        acceptableOverheadRatio);
                if (missingValue != 0) {
                    sValues.fill(0, sValues.size(), missingValue);
                }
                for (int i = 0; i < reader.maxDoc(); i++) {
                    final long ord = ordinals.getOrd(i);
                    if (ord != Ordinals.MISSING_ORDINAL) {
                        sValues.set(i, values.get(ord - 1) - minValue);
                    }
                }
                if (set == null) {
                    data = new PackedArrayAtomicFieldData.Single(sValues, minValue, reader.maxDoc(),
                            ordinals.getNumOrds());
                } else {
                    data = new PackedArrayAtomicFieldData.SingleSparse(sValues, minValue, reader.maxDoc(),
                            missingValue, ordinals.getNumOrds());
                }
            }
        } else {
            data = new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build);
        }

        success = true;
        return data;
    } finally {
        if (!success) {
            // If something went wrong, unwind any current estimations we've made
            estimator.afterLoad(termsEnum, 0);
        } else {
            // Adjust as usual, based on the actual size of the field data
            estimator.afterLoad(termsEnum, data.getMemorySizeInBytes());
        }
        builder.close();
    }

}

From source file:org.elasticsearch.index.merge.policy.ElasticsearchMergePolicy.java

License:Apache License

/** Return an "upgraded" view of the reader. */
static AtomicReader filter(AtomicReader reader) throws IOException {
    final FieldInfos fieldInfos = reader.getFieldInfos();
    final FieldInfo versionInfo = fieldInfos.fieldInfo(VersionFieldMapper.NAME);
    if (versionInfo != null && versionInfo.hasDocValues()) {
        // the reader is a recent one, it has versions and they are stored
        // in a numeric doc values field
        return reader;
    }//  ww  w .ja  v a2  s.c om
    // The segment is an old one, load all versions in memory and hide
    // them behind a numeric doc values field
    final Terms terms = reader.terms(UidFieldMapper.NAME);
    if (terms == null || !terms.hasPayloads()) {
        // The segment doesn't have an _uid field or doesn't have paylods
        // don't try to do anything clever. If any other segment has versions
        // all versions of this segment will be initialized to 0
        return reader;
    }
    final TermsEnum uids = terms.iterator(null);
    final GrowableWriter versions = new GrowableWriter(2, reader.maxDoc(), PackedInts.DEFAULT);
    DocsAndPositionsEnum dpe = null;
    for (BytesRef uid = uids.next(); uid != null; uid = uids.next()) {
        dpe = uids.docsAndPositions(reader.getLiveDocs(), dpe, DocsAndPositionsEnum.FLAG_PAYLOADS);
        assert dpe != null : "field has payloads";
        for (int doc = dpe.nextDoc(); doc != DocsEnum.NO_MORE_DOCS; doc = dpe.nextDoc()) {
            dpe.nextPosition();
            final BytesRef payload = dpe.getPayload();
            if (payload != null && payload.length == 8) {
                final long version = Numbers.bytesToLong(payload);
                versions.set(doc, version);
                break;
            }
        }
    }
    // Build new field infos, doc values, and return a filter reader
    final FieldInfo newVersionInfo;
    if (versionInfo == null) {
        // Find a free field number
        int fieldNumber = 0;
        for (FieldInfo fi : fieldInfos) {
            fieldNumber = Math.max(fieldNumber, fi.number + 1);
        }
        newVersionInfo = new FieldInfo(VersionFieldMapper.NAME, false, fieldNumber, false, true, false,
                IndexOptions.DOCS_ONLY, DocValuesType.NUMERIC, DocValuesType.NUMERIC, -1,
                Collections.<String, String>emptyMap());
    } else {
        newVersionInfo = new FieldInfo(VersionFieldMapper.NAME, versionInfo.isIndexed(), versionInfo.number,
                versionInfo.hasVectors(), versionInfo.omitsNorms(), versionInfo.hasPayloads(),
                versionInfo.getIndexOptions(), versionInfo.getDocValuesType(), versionInfo.getNormType(),
                versionInfo.getDocValuesGen(), versionInfo.attributes());
    }
    final ArrayList<FieldInfo> fieldInfoList = new ArrayList<>();
    for (FieldInfo info : fieldInfos) {
        if (info != versionInfo) {
            fieldInfoList.add(info);
        }
    }
    fieldInfoList.add(newVersionInfo);
    final FieldInfos newFieldInfos = new FieldInfos(fieldInfoList.toArray(new FieldInfo[fieldInfoList.size()]));
    final NumericDocValues versionValues = new NumericDocValues() {
        @Override
        public long get(int index) {
            return versions.get(index);
        }
    };
    return new FilterAtomicReader(reader) {
        @Override
        public FieldInfos getFieldInfos() {
            return newFieldInfos;
        }

        @Override
        public NumericDocValues getNumericDocValues(String field) throws IOException {
            if (VersionFieldMapper.NAME.equals(field)) {
                return versionValues;
            }
            return super.getNumericDocValues(field);
        }

        @Override
        public Bits getDocsWithField(String field) throws IOException {
            return new Bits.MatchAllBits(in.maxDoc());
        }
    };
}

From source file:org.elasticsearch.index.merge.policy.IndexUpgraderMergePolicy.java

License:Apache License

/** Return an "upgraded" view of the reader. */
static AtomicReader filter(AtomicReader reader) throws IOException {
    final FieldInfos fieldInfos = reader.getFieldInfos();
    final FieldInfo versionInfo = fieldInfos.fieldInfo(VersionFieldMapper.NAME);
    if (versionInfo != null && versionInfo.hasDocValues()) {
        // the reader is a recent one, it has versions and they are stored
        // in a numeric doc values field
        return reader;
    }//from   w  w w  .j  a  v a2  s  . c o  m
    // The segment is an old one, load all versions in memory and hide
    // them behind a numeric doc values field
    final Terms terms = reader.terms(UidFieldMapper.NAME);
    if (terms == null || !terms.hasPayloads()) {
        // The segment doesn't have an _uid field or doesn't have paylods
        // don't try to do anything clever. If any other segment has versions
        // all versions of this segment will be initialized to 0
        return reader;
    }
    final TermsEnum uids = terms.iterator(null);
    final GrowableWriter versions = new GrowableWriter(2, reader.maxDoc(), PackedInts.DEFAULT);
    DocsAndPositionsEnum dpe = null;
    for (BytesRef uid = uids.next(); uid != null; uid = uids.next()) {
        dpe = uids.docsAndPositions(reader.getLiveDocs(), dpe, DocsAndPositionsEnum.FLAG_PAYLOADS);
        assert dpe != null : "field has payloads";
        for (int doc = dpe.nextDoc(); doc != DocsEnum.NO_MORE_DOCS; doc = dpe.nextDoc()) {
            dpe.nextPosition();
            final BytesRef payload = dpe.getPayload();
            if (payload != null && payload.length == 8) {
                final long version = Numbers.bytesToLong(payload);
                versions.set(doc, version);
                break;
            }
        }
    }
    // Build new field infos, doc values, and return a filter reader
    final FieldInfo newVersionInfo;
    if (versionInfo == null) {
        // Find a free field number
        int fieldNumber = 0;
        for (FieldInfo fi : fieldInfos) {
            fieldNumber = Math.max(fieldNumber, fi.number + 1);
        }
        newVersionInfo = new FieldInfo(VersionFieldMapper.NAME, false, fieldNumber, false, true, false,
                IndexOptions.DOCS_ONLY, DocValuesType.NUMERIC, DocValuesType.NUMERIC,
                Collections.<String, String>emptyMap());
    } else {
        newVersionInfo = new FieldInfo(VersionFieldMapper.NAME, versionInfo.isIndexed(), versionInfo.number,
                versionInfo.hasVectors(), versionInfo.omitsNorms(), versionInfo.hasPayloads(),
                versionInfo.getIndexOptions(), versionInfo.getDocValuesType(), versionInfo.getNormType(),
                versionInfo.attributes());
    }
    final ArrayList<FieldInfo> fieldInfoList = new ArrayList<FieldInfo>();
    for (FieldInfo info : fieldInfos) {
        if (info != versionInfo) {
            fieldInfoList.add(info);
        }
    }
    fieldInfoList.add(newVersionInfo);
    final FieldInfos newFieldInfos = new FieldInfos(fieldInfoList.toArray(new FieldInfo[fieldInfoList.size()]));
    final NumericDocValues versionValues = new NumericDocValues() {
        @Override
        public long get(int index) {
            return versions.get(index);
        }
    };
    return new FilterAtomicReader(reader) {
        @Override
        public FieldInfos getFieldInfos() {
            return newFieldInfos;
        }

        @Override
        public NumericDocValues getNumericDocValues(String field) throws IOException {
            if (VersionFieldMapper.NAME.equals(field)) {
                return versionValues;
            }
            return super.getNumericDocValues(field);
        }

        @Override
        public Bits getDocsWithField(String field) throws IOException {
            return new Bits.MatchAllBits(in.maxDoc());
        }
    };
}

From source file:org.elasticsearch.search.aggregations.bucket.MergingBucketsDeferringCollector.java

License:Apache License

@Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx) throws IOException {
    finishLeaf();//from  w w w.ja  v a2s  .  com

    context = ctx;
    docDeltas = PackedLongValues.packedBuilder(PackedInts.DEFAULT);
    buckets = PackedLongValues.packedBuilder(PackedInts.DEFAULT);

    return new LeafBucketCollector() {
        int lastDoc = 0;

        @Override
        public void collect(int doc, long bucket) {
            docDeltas.add(doc - lastDoc);
            buckets.add(bucket);
            lastDoc = doc;
            maxBucket = Math.max(maxBucket, bucket);
        }
    };
}

From source file:org.elasticsearch.search.aggregations.bucket.MergingBucketsDeferringCollector.java

License:Apache License

public void mergeBuckets(long[] mergeMap) {

    List<Entry> newEntries = new ArrayList<>(entries.size());
    for (Entry sourceEntry : entries) {
        PackedLongValues.Builder newBuckets = PackedLongValues.packedBuilder(PackedInts.DEFAULT);
        for (PackedLongValues.Iterator itr = sourceEntry.buckets.iterator(); itr.hasNext();) {
            long bucket = itr.next();
            newBuckets.add(mergeMap[Math.toIntExact(bucket)]);
        }//from  w  ww .  j av a2  s  . c om
        newEntries.add(new Entry(sourceEntry.context, sourceEntry.docDeltas, newBuckets.build()));
    }
    entries = newEntries;

    // if there are buckets that have been collected in the current segment
    // we need to update the bucket ordinals there too
    if (buckets.size() > 0) {
        PackedLongValues currentBuckets = buckets.build();
        PackedLongValues.Builder newBuckets = PackedLongValues.packedBuilder(PackedInts.DEFAULT);
        for (PackedLongValues.Iterator itr = currentBuckets.iterator(); itr.hasNext();) {
            long bucket = itr.next();
            newBuckets.add(mergeMap[Math.toIntExact(bucket)]);
        }
        buckets = newBuckets;
    }
}