org.elasticsearch.index.fielddata.ordinals.InternalGlobalOrdinalsBuilder.java Source code

Java tutorial

Introduction

Here is the source code for org.elasticsearch.index.fielddata.ordinals.InternalGlobalOrdinalsBuilder.java

Source

/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.elasticsearch.index.fielddata.ordinals;

import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.packed.AppendingPackedLongBuffer;
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.AbstractIndexComponent;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.AtomicFieldData;
import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.indices.fielddata.breaker.CircuitBreakerService;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;

/**
 */
public class InternalGlobalOrdinalsBuilder extends AbstractIndexComponent implements GlobalOrdinalsBuilder {

    public final static int ORDINAL_MAPPING_THRESHOLD_DEFAULT = 2048;
    public final static String ORDINAL_MAPPING_THRESHOLD_KEY = "global_ordinals_compress_threshold";
    public final static String ORDINAL_MAPPING_THRESHOLD_INDEX_SETTING_KEY = "index."
            + ORDINAL_MAPPING_THRESHOLD_KEY;

    public InternalGlobalOrdinalsBuilder(Index index, @IndexSettings Settings indexSettings) {
        super(index, indexSettings);
    }

    @Override
    public IndexFieldData.WithOrdinals build(final IndexReader indexReader,
            IndexFieldData.WithOrdinals indexFieldData, Settings settings, CircuitBreakerService breakerService)
            throws IOException {
        assert indexReader.leaves().size() > 1;
        long startTime = System.currentTimeMillis();

        // It makes sense to make the overhead ratio configurable for the mapping from segment ords to global ords
        // However, other mappings are never the bottleneck and only used to get the original value from an ord, so
        // it makes sense to force COMPACT for them
        final float acceptableOverheadRatio = settings.getAsFloat("acceptable_overhead_ratio", PackedInts.FAST);
        final AppendingPackedLongBuffer globalOrdToFirstSegment = new AppendingPackedLongBuffer(PackedInts.COMPACT);
        final MonotonicAppendingLongBuffer globalOrdToFirstSegmentDelta = new MonotonicAppendingLongBuffer(
                PackedInts.COMPACT);

        FieldDataType fieldDataType = indexFieldData.getFieldDataType();
        int defaultThreshold = settings.getAsInt(ORDINAL_MAPPING_THRESHOLD_INDEX_SETTING_KEY,
                ORDINAL_MAPPING_THRESHOLD_DEFAULT);
        int threshold = fieldDataType.getSettings().getAsInt(ORDINAL_MAPPING_THRESHOLD_KEY, defaultThreshold);
        OrdinalMappingSourceBuilder ordinalMappingBuilder = new OrdinalMappingSourceBuilder(
                indexReader.leaves().size(), acceptableOverheadRatio, threshold);

        long currentGlobalOrdinal = 0;
        final AtomicFieldData.WithOrdinals[] withOrdinals = new AtomicFieldData.WithOrdinals[indexReader.leaves()
                .size()];
        TermIterator termIterator = new TermIterator(indexFieldData, indexReader.leaves(), withOrdinals);
        for (BytesRef term = termIterator.next(); term != null; term = termIterator.next()) {
            globalOrdToFirstSegment.add(termIterator.firstReaderIndex());
            long globalOrdinalDelta = currentGlobalOrdinal - termIterator.firstLocalOrdinal();
            globalOrdToFirstSegmentDelta.add(globalOrdinalDelta);
            for (TermIterator.LeafSource leafSource : termIterator.competitiveLeafs()) {
                ordinalMappingBuilder.onOrdinal(leafSource.context.ord, leafSource.tenum.ord(),
                        currentGlobalOrdinal);
            }
            currentGlobalOrdinal++;
        }

        // ram used for the globalOrd to segmentOrd and segmentOrd to firstReaderIndex lookups
        long memorySizeInBytesCounter = 0;
        globalOrdToFirstSegment.freeze();
        memorySizeInBytesCounter += globalOrdToFirstSegment.ramBytesUsed();
        globalOrdToFirstSegmentDelta.freeze();
        memorySizeInBytesCounter += globalOrdToFirstSegmentDelta.ramBytesUsed();

        final long maxOrd = currentGlobalOrdinal;
        OrdinalMappingSource[] segmentOrdToGlobalOrdLookups = ordinalMappingBuilder.build(maxOrd);
        // add ram used for the main segmentOrd to globalOrd lookups
        memorySizeInBytesCounter += ordinalMappingBuilder.getMemorySizeInBytes();

        final long memorySizeInBytes = memorySizeInBytesCounter;
        breakerService.getBreaker().addWithoutBreaking(memorySizeInBytes);

        if (logger.isDebugEnabled()) {
            // this does include the [] from the array in the impl name
            String implName = segmentOrdToGlobalOrdLookups.getClass().getSimpleName();
            logger.debug("Global-ordinals[{}][{}][{}] took {} ms", implName,
                    indexFieldData.getFieldNames().fullName(), maxOrd, (System.currentTimeMillis() - startTime));
        }
        return new InternalGlobalOrdinalsIndexFieldData(indexFieldData.index(), settings,
                indexFieldData.getFieldNames(), fieldDataType, withOrdinals, globalOrdToFirstSegment,
                globalOrdToFirstSegmentDelta, segmentOrdToGlobalOrdLookups, memorySizeInBytes);
    }

    public interface OrdinalMappingSource {

        Ordinals.Docs globalOrdinals(Ordinals.Docs segmentOrdinals);

    }

    public static abstract class GlobalOrdinalMapping implements Ordinals.Docs {

        protected final Ordinals.Docs segmentOrdinals;
        private final long memorySizeInBytes;
        protected final long maxOrd;

        protected long currentGlobalOrd;

        private GlobalOrdinalMapping(Ordinals.Docs segmentOrdinals, long memorySizeInBytes, long maxOrd) {
            this.segmentOrdinals = segmentOrdinals;
            this.memorySizeInBytes = memorySizeInBytes;
            this.maxOrd = maxOrd;
        }

        @Override
        public final long getMaxOrd() {
            return maxOrd;
        }

        @Override
        public final boolean isMultiValued() {
            return segmentOrdinals.isMultiValued();
        }

        @Override
        public final int setDocument(int docId) {
            return segmentOrdinals.setDocument(docId);
        }

        @Override
        public final long currentOrd() {
            return currentGlobalOrd;
        }

        @Override
        public final long getOrd(int docId) {
            long segmentOrd = segmentOrdinals.getOrd(docId);
            if (segmentOrd == Ordinals.MISSING_ORDINAL) {
                return currentGlobalOrd = Ordinals.MISSING_ORDINAL;
            } else {
                return currentGlobalOrd = getGlobalOrd(segmentOrd);
            }
        }

        @Override
        public final long nextOrd() {
            long segmentOrd = segmentOrdinals.nextOrd();
            return currentGlobalOrd = getGlobalOrd(segmentOrd);
        }

        public abstract long getGlobalOrd(long segmentOrd);

    }

    private final static class OrdinalMappingSourceBuilder {

        final MonotonicAppendingLongBuffer[] segmentOrdToGlobalOrdDeltas;
        final float acceptableOverheadRatio;
        final int numSegments;
        final int threshold;

        long memorySizeInBytesCounter;

        private OrdinalMappingSourceBuilder(int numSegments, float acceptableOverheadRatio, int threshold) {
            segmentOrdToGlobalOrdDeltas = new MonotonicAppendingLongBuffer[numSegments];
            for (int i = 0; i < segmentOrdToGlobalOrdDeltas.length; i++) {
                segmentOrdToGlobalOrdDeltas[i] = new MonotonicAppendingLongBuffer(acceptableOverheadRatio);
            }
            this.numSegments = numSegments;
            this.acceptableOverheadRatio = acceptableOverheadRatio;
            this.threshold = threshold;
        }

        public void onOrdinal(int readerIndex, long segmentOrdinal, long globalOrdinal) {
            long delta = globalOrdinal - segmentOrdinal;
            segmentOrdToGlobalOrdDeltas[readerIndex].add(delta);
        }

        public OrdinalMappingSource[] build(long maxOrd) {
            // If we find out that there are less then predefined number of ordinals, it is better to put the the
            // segment ordinal to global ordinal mapping in a packed ints, since the amount values are small and
            // will most likely fit in the CPU caches and MonotonicAppendingLongBuffer's compression will just be
            // unnecessary.

            if (maxOrd <= threshold) {
                // Rebuilding from MonotonicAppendingLongBuffer to PackedInts.Mutable is fast
                PackedInts.Mutable[] newSegmentOrdToGlobalOrdDeltas = new PackedInts.Mutable[numSegments];
                for (int i = 0; i < segmentOrdToGlobalOrdDeltas.length; i++) {
                    newSegmentOrdToGlobalOrdDeltas[i] = PackedInts.getMutable(
                            (int) segmentOrdToGlobalOrdDeltas[i].size(), PackedInts.bitsRequired(maxOrd),
                            acceptableOverheadRatio);
                }

                for (int readerIndex = 0; readerIndex < segmentOrdToGlobalOrdDeltas.length; readerIndex++) {
                    MonotonicAppendingLongBuffer segmentOrdToGlobalOrdDelta = segmentOrdToGlobalOrdDeltas[readerIndex];

                    for (long ordIndex = 0; ordIndex < segmentOrdToGlobalOrdDelta.size(); ordIndex++) {
                        long ordDelta = segmentOrdToGlobalOrdDelta.get(ordIndex);
                        newSegmentOrdToGlobalOrdDeltas[readerIndex].set((int) ordIndex, ordDelta);
                    }
                }

                PackedIntOrdinalMappingSource[] sources = new PackedIntOrdinalMappingSource[numSegments];
                for (int i = 0; i < newSegmentOrdToGlobalOrdDeltas.length; i++) {
                    PackedInts.Reader segmentOrdToGlobalOrdDelta = newSegmentOrdToGlobalOrdDeltas[i];
                    if (segmentOrdToGlobalOrdDelta.size() == maxOrd) {
                        // This means that a segment contains all the value and in that case segment ordinals
                        // can be used as global ordinals. This will save an extra lookup per hit.
                        sources[i] = null;
                    } else {
                        long ramUsed = segmentOrdToGlobalOrdDelta.ramBytesUsed();
                        sources[i] = new PackedIntOrdinalMappingSource(segmentOrdToGlobalOrdDelta, ramUsed, maxOrd);
                        memorySizeInBytesCounter += ramUsed;
                    }

                }
                return sources;
            } else {
                OrdinalMappingSource[] sources = new OrdinalMappingSource[segmentOrdToGlobalOrdDeltas.length];
                for (int i = 0; i < segmentOrdToGlobalOrdDeltas.length; i++) {
                    MonotonicAppendingLongBuffer segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdDeltas[i];
                    if (segmentOrdToGlobalOrdLookup.size() == maxOrd) {
                        // idem as above
                        sources[i] = null;
                    } else {
                        segmentOrdToGlobalOrdLookup.freeze();
                        long ramUsed = segmentOrdToGlobalOrdLookup.ramBytesUsed();
                        sources[i] = new CompressedOrdinalMappingSource(segmentOrdToGlobalOrdLookup, ramUsed,
                                maxOrd);
                        memorySizeInBytesCounter += ramUsed;
                    }
                }
                return sources;
            }
        }

        public long getMemorySizeInBytes() {
            return memorySizeInBytesCounter;
        }
    }

    private final static class CompressedOrdinalMappingSource implements OrdinalMappingSource {

        private final MonotonicAppendingLongBuffer globalOrdinalMapping;
        private final long memorySizeInBytes;
        private final long maxOrd;

        private CompressedOrdinalMappingSource(MonotonicAppendingLongBuffer globalOrdinalMapping,
                long memorySizeInBytes, long maxOrd) {
            this.globalOrdinalMapping = globalOrdinalMapping;
            this.memorySizeInBytes = memorySizeInBytes;
            this.maxOrd = maxOrd;
        }

        @Override
        public Ordinals.Docs globalOrdinals(Ordinals.Docs segmentOrdinals) {
            return new GlobalOrdinalsDocs(segmentOrdinals, globalOrdinalMapping, memorySizeInBytes, maxOrd);
        }

        private final static class GlobalOrdinalsDocs extends GlobalOrdinalMapping {

            private final MonotonicAppendingLongBuffer segmentOrdToGlobalOrdLookup;

            private GlobalOrdinalsDocs(Ordinals.Docs segmentOrdinals,
                    MonotonicAppendingLongBuffer segmentOrdToGlobalOrdLookup, long memorySizeInBytes, long maxOrd) {
                super(segmentOrdinals, memorySizeInBytes, maxOrd);
                this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup;
            }

            @Override
            public long getGlobalOrd(long segmentOrd) {
                return segmentOrd + segmentOrdToGlobalOrdLookup.get(segmentOrd);
            }
        }

    }

    private static final class PackedIntOrdinalMappingSource implements OrdinalMappingSource {

        private final PackedInts.Reader segmentOrdToGlobalOrdLookup;
        private final long memorySizeInBytes;
        private final long maxOrd;

        private PackedIntOrdinalMappingSource(PackedInts.Reader segmentOrdToGlobalOrdLookup, long memorySizeInBytes,
                long maxOrd) {
            this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup;
            this.memorySizeInBytes = memorySizeInBytes;
            this.maxOrd = maxOrd;
        }

        @Override
        public Ordinals.Docs globalOrdinals(Ordinals.Docs segmentOrdinals) {
            return new GlobalOrdinalsDocs(segmentOrdinals, memorySizeInBytes, maxOrd, segmentOrdToGlobalOrdLookup);
        }

        private final static class GlobalOrdinalsDocs extends GlobalOrdinalMapping {

            private final PackedInts.Reader segmentOrdToGlobalOrdLookup;

            private GlobalOrdinalsDocs(Ordinals.Docs segmentOrdinals, long memorySizeInBytes, long maxOrd,
                    PackedInts.Reader segmentOrdToGlobalOrdLookup) {
                super(segmentOrdinals, memorySizeInBytes, maxOrd);
                this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup;
            }

            @Override
            public long getGlobalOrd(long segmentOrd) {
                return segmentOrd + segmentOrdToGlobalOrdLookup.get((int) segmentOrd);
            }
        }

    }

    private final static class TermIterator implements BytesRefIterator {

        private final LeafSourceQueue sources;
        private final List<LeafSource> competitiveLeafs = new ArrayList<>();

        private TermIterator(IndexFieldData.WithOrdinals indexFieldData, List<AtomicReaderContext> leaves,
                AtomicFieldData.WithOrdinals[] withOrdinals) throws IOException {
            this.sources = new LeafSourceQueue(leaves.size());
            for (int i = 0; i < leaves.size(); i++) {
                AtomicReaderContext atomicReaderContext = leaves.get(i);
                AtomicFieldData.WithOrdinals afd = indexFieldData.load(atomicReaderContext);
                withOrdinals[i] = afd;
                LeafSource leafSource = new LeafSource(afd, atomicReaderContext);
                if (leafSource.current != null) {
                    sources.add(leafSource);
                }
            }
        }

        public BytesRef next() throws IOException {
            for (LeafSource top : competitiveLeafs) {
                if (top.next() != null) {
                    sources.add(top);
                }
            }
            competitiveLeafs.clear();
            if (sources.size() == 0) {
                return null;
            }

            do {
                LeafSource competitiveLeaf = sources.pop();
                competitiveLeafs.add(competitiveLeaf);
            } while (sources.size() > 0 && competitiveLeafs.get(0).current.equals(sources.top().current));
            return competitiveLeafs.get(0).current;
        }

        @Override
        public Comparator<BytesRef> getComparator() {
            return BytesRef.getUTF8SortedAsUnicodeComparator();
        }

        List<LeafSource> competitiveLeafs() throws IOException {
            return competitiveLeafs;
        }

        int firstReaderIndex() {
            return competitiveLeafs.get(0).context.ord;
        }

        long firstLocalOrdinal() throws IOException {
            return competitiveLeafs.get(0).tenum.ord();
        }

        private static class LeafSource {

            final TermsEnum tenum;
            final AtomicReaderContext context;

            BytesRef current;

            private LeafSource(AtomicFieldData.WithOrdinals afd, AtomicReaderContext context) throws IOException {
                this.tenum = afd.getTermsEnum();
                this.context = context;
                this.current = tenum.next();
            }

            BytesRef next() throws IOException {
                return current = tenum.next();
            }

        }

        private final static class LeafSourceQueue extends PriorityQueue<LeafSource> {

            private final Comparator<BytesRef> termComp = BytesRef.getUTF8SortedAsUnicodeComparator();

            LeafSourceQueue(int size) {
                super(size);
            }

            @Override
            protected boolean lessThan(LeafSource termsA, LeafSource termsB) {
                final int cmp = termComp.compare(termsA.current, termsB.current);
                if (cmp != 0) {
                    return cmp < 0;
                } else {
                    return termsA.context.ord < termsB.context.ord;
                }
            }
        }

    }
}