Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.druid.segment; import com.google.common.base.Predicate; import com.google.common.base.Predicates; import com.google.common.collect.Lists; import com.google.common.primitives.Ints; import it.unimi.dsi.fastutil.ints.IntArrays; import it.unimi.dsi.fastutil.ints.IntIterator; import it.unimi.dsi.fastutil.objects.Object2IntMap; import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap; import it.unimi.dsi.fastutil.objects.Object2IntRBTreeMap; import it.unimi.dsi.fastutil.objects.Object2IntSortedMap; import org.apache.druid.collections.bitmap.BitmapFactory; import org.apache.druid.collections.bitmap.MutableBitmap; import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.impl.DimensionSchema.MultiValueHandling; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.extraction.ExtractionFn; import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.data.ArrayBasedIndexedInts; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.data.IndexedIterable; import org.apache.druid.segment.filter.BooleanValueMatcher; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexRow; import org.apache.druid.segment.incremental.IncrementalIndexRowHolder; import javax.annotation.Nullable; import java.util.Arrays; import java.util.BitSet; import java.util.Iterator; import java.util.List; import java.util.concurrent.locks.ReentrantReadWriteLock; public class StringDimensionIndexer implements DimensionIndexer<Integer, int[], String> { @Nullable private static String emptyToNullIfNeeded(@Nullable Object o) { return o != null ? NullHandling.emptyToNullIfNeeded(o.toString()) : null; } private static final int ABSENT_VALUE_ID = -1; private static class DimensionDictionary { @Nullable private String minValue = null; @Nullable private String maxValue = null; private int idForNull = ABSENT_VALUE_ID; private final Object2IntMap<String> valueToId = new Object2IntOpenHashMap<>(); private final List<String> idToValue = Lists.newArrayList(); private final ReentrantReadWriteLock lock; public DimensionDictionary() { this.lock = new ReentrantReadWriteLock(); valueToId.defaultReturnValue(-1); } public int getId(String value) { lock.readLock().lock(); try { if (value == null) { return idForNull; } return valueToId.getInt(value); } finally { lock.readLock().unlock(); } } public String getValue(int id) { lock.readLock().lock(); try { if (id == idForNull) { return null; } return idToValue.get(id); } finally { lock.readLock().unlock(); } } public int size() { lock.readLock().lock(); try { // using idToValue rather than valueToId because the valueToId doesn't account null value, if it is present. return idToValue.size(); } finally { lock.readLock().unlock(); } } public int add(@Nullable String originalValue) { lock.writeLock().lock(); try { if (originalValue == null) { if (idForNull == ABSENT_VALUE_ID) { idForNull = idToValue.size(); idToValue.add(null); } return idForNull; } int prev = valueToId.getInt(originalValue); if (prev >= 0) { return prev; } final int index = idToValue.size(); valueToId.put(originalValue, index); idToValue.add(originalValue); minValue = minValue == null || minValue.compareTo(originalValue) > 0 ? originalValue : minValue; maxValue = maxValue == null || maxValue.compareTo(originalValue) < 0 ? originalValue : maxValue; return index; } finally { lock.writeLock().unlock(); } } public String getMinValue() { lock.readLock().lock(); try { return minValue; } finally { lock.readLock().unlock(); } } public String getMaxValue() { lock.readLock().lock(); try { return maxValue; } finally { lock.readLock().unlock(); } } public SortedDimensionDictionary sort() { lock.readLock().lock(); try { return new SortedDimensionDictionary(idToValue, idToValue.size()); } finally { lock.readLock().unlock(); } } } private static class SortedDimensionDictionary { private final List<String> sortedVals; private final int[] idToIndex; private final int[] indexToId; public SortedDimensionDictionary(List<String> idToValue, int length) { Object2IntSortedMap<String> sortedMap = new Object2IntRBTreeMap<>(Comparators.naturalNullsFirst()); for (int id = 0; id < length; id++) { String value = idToValue.get(id); sortedMap.put(value, id); } this.sortedVals = Lists.newArrayList(sortedMap.keySet()); this.idToIndex = new int[length]; this.indexToId = new int[length]; int index = 0; for (IntIterator iterator = sortedMap.values().iterator(); iterator.hasNext();) { int id = iterator.nextInt(); idToIndex[id] = index; indexToId[index] = id; index++; } } public int getUnsortedIdFromSortedId(int index) { return indexToId[index]; } public int getSortedIdFromUnsortedId(int id) { return idToIndex[id]; } public String getValueFromSortedId(int index) { return sortedVals.get(index); } } private final DimensionDictionary dimLookup; private final MultiValueHandling multiValueHandling; private final boolean hasBitmapIndexes; private SortedDimensionDictionary sortedLookup; private boolean hasMultipleValues = false; public StringDimensionIndexer(MultiValueHandling multiValueHandling, boolean hasBitmapIndexes) { this.dimLookup = new DimensionDictionary(); this.multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling; this.hasBitmapIndexes = hasBitmapIndexes; } @Override public int[] processRowValsToUnsortedEncodedKeyComponent(Object dimValues, boolean reportParseExceptions) { final int[] encodedDimensionValues; final int oldDictSize = dimLookup.size(); if (dimValues == null) { final int nullId = dimLookup.getId(null); encodedDimensionValues = nullId == ABSENT_VALUE_ID ? new int[] { dimLookup.add(null) } : new int[] { nullId }; } else if (dimValues instanceof List) { List<Object> dimValuesList = (List) dimValues; if (dimValuesList.isEmpty()) { dimLookup.add(null); encodedDimensionValues = IntArrays.EMPTY_ARRAY; } else if (dimValuesList.size() == 1) { encodedDimensionValues = new int[] { dimLookup.add(emptyToNullIfNeeded(dimValuesList.get(0))) }; } else { hasMultipleValues = true; final String[] dimensionValues = new String[dimValuesList.size()]; for (int i = 0; i < dimValuesList.size(); i++) { dimensionValues[i] = emptyToNullIfNeeded(dimValuesList.get(i)); } if (multiValueHandling.needSorting()) { // Sort multival row by their unencoded values first. Arrays.sort(dimensionValues, Comparators.naturalNullsFirst()); } final int[] retVal = new int[dimensionValues.length]; int prevId = -1; int pos = 0; for (String dimensionValue : dimensionValues) { if (multiValueHandling != MultiValueHandling.SORTED_SET) { retVal[pos++] = dimLookup.add(dimensionValue); continue; } int index = dimLookup.add(dimensionValue); if (index != prevId) { prevId = retVal[pos++] = index; } } encodedDimensionValues = pos == retVal.length ? retVal : Arrays.copyOf(retVal, pos); } } else { encodedDimensionValues = new int[] { dimLookup.add(emptyToNullIfNeeded(dimValues)) }; } // If dictionary size has changed, the sorted lookup is no longer valid. if (oldDictSize != dimLookup.size()) { sortedLookup = null; } return encodedDimensionValues; } @Override public long estimateEncodedKeyComponentSize(int[] key) { // string length is being accounted for each time they are referenced, based on dimension handler interface, // even though they are stored just once. It may overestimate the size by a bit, but we wanted to leave // more buffer to be safe long estimatedSize = key.length * Integer.BYTES; estimatedSize += Arrays.stream(key).filter(element -> dimLookup.getValue(element) != null) .mapToLong(element -> dimLookup.getValue(element).length() * Character.BYTES).sum(); return estimatedSize; } public Integer getSortedEncodedValueFromUnsorted(Integer unsortedIntermediateValue) { return sortedLookup().getSortedIdFromUnsortedId(unsortedIntermediateValue); } @Override public Integer getUnsortedEncodedValueFromSorted(Integer sortedIntermediateValue) { return sortedLookup().getUnsortedIdFromSortedId(sortedIntermediateValue); } @Override public CloseableIndexed<String> getSortedIndexedValues() { return new CloseableIndexed<String>() { @Override public int size() { return getCardinality(); } @Override public String get(int index) { return getActualValue(index, true); } @Override public int indexOf(String value) { int id = getEncodedValue(value, false); return id < 0 ? ABSENT_VALUE_ID : getSortedEncodedValueFromUnsorted(id); } @Override public Iterator<String> iterator() { return IndexedIterable.create(this).iterator(); } @Override public void inspectRuntimeShape(RuntimeShapeInspector inspector) { // nothing to inspect } @Override public void close() { // nothing to close } }; } @Override public String getMinValue() { return dimLookup.getMinValue(); } @Override public String getMaxValue() { return dimLookup.getMaxValue(); } @Override public int getCardinality() { return dimLookup.size(); } @Override public int compareUnsortedEncodedKeyComponents(int[] lhs, int[] rhs) { int lhsLen = lhs.length; int rhsLen = rhs.length; int retVal = Ints.compare(lhsLen, rhsLen); int valsIndex = 0; while (retVal == 0 && valsIndex < lhsLen) { int lhsVal = lhs[valsIndex]; int rhsVal = rhs[valsIndex]; if (lhsVal != rhsVal) { final String lhsValActual = getActualValue(lhsVal, false); final String rhsValActual = getActualValue(rhsVal, false); if (lhsValActual != null && rhsValActual != null) { retVal = lhsValActual.compareTo(rhsValActual); } else if (lhsValActual == null ^ rhsValActual == null) { retVal = lhsValActual == null ? -1 : 1; } } ++valsIndex; } return retVal; } @Override public boolean checkUnsortedEncodedKeyComponentsEqual(int[] lhs, int[] rhs) { return Arrays.equals(lhs, rhs); } @Override public int getUnsortedEncodedKeyComponentHashCode(int[] key) { return Arrays.hashCode(key); } @Override public DimensionSelector makeDimensionSelector(final DimensionSpec spec, final IncrementalIndexRowHolder currEntry, final IncrementalIndex.DimensionDesc desc) { final ExtractionFn extractionFn = spec.getExtractionFn(); final int dimIndex = desc.getIndex(); final int maxId = getCardinality(); class IndexerDimensionSelector implements DimensionSelector, IdLookup { private final ArrayBasedIndexedInts indexedInts = new ArrayBasedIndexedInts(); private int[] nullIdIntArray; @Override public IndexedInts getRow() { final Object[] dims = currEntry.get().getDims(); int[] indices; if (dimIndex < dims.length) { indices = (int[]) dims[dimIndex]; } else { indices = null; } int[] row = null; int rowSize = 0; // usually due to currEntry's rowIndex is smaller than the row's rowIndex in which this dim first appears if (indices == null || indices.length == 0) { if (hasMultipleValues) { row = IntArrays.EMPTY_ARRAY; rowSize = 0; } else { final int nullId = getEncodedValue(null, false); if (nullId > -1) { if (nullIdIntArray == null) { nullIdIntArray = new int[] { nullId }; } row = nullIdIntArray; rowSize = 1; } else { // doesn't contain nullId, then empty array is used // Choose to use ArrayBasedIndexedInts later, instead of special "empty" IndexedInts, for monomorphism row = IntArrays.EMPTY_ARRAY; rowSize = 0; } } } if (row == null && indices != null && indices.length > 0) { row = indices; rowSize = indices.length; } indexedInts.setValues(row, rowSize); return indexedInts; } @Override public ValueMatcher makeValueMatcher(final String value) { if (extractionFn == null) { final int valueId = lookupId(value); if (valueId >= 0 || value == null) { return new ValueMatcher() { @Override public boolean matches() { Object[] dims = currEntry.get().getDims(); if (dimIndex >= dims.length) { return value == null; } int[] dimsInt = (int[]) dims[dimIndex]; if (dimsInt == null || dimsInt.length == 0) { return value == null; } for (int id : dimsInt) { if (id == valueId) { return true; } } return false; } @Override public void inspectRuntimeShape(RuntimeShapeInspector inspector) { // nothing to inspect } }; } else { return BooleanValueMatcher.of(false); } } else { // Employ caching BitSet optimization return makeValueMatcher(Predicates.equalTo(value)); } } @Override public ValueMatcher makeValueMatcher(final Predicate<String> predicate) { final BitSet checkedIds = new BitSet(maxId); final BitSet matchingIds = new BitSet(maxId); final boolean matchNull = predicate.apply(null); // Lazy matcher; only check an id if matches() is called. return new ValueMatcher() { @Override public boolean matches() { Object[] dims = currEntry.get().getDims(); if (dimIndex >= dims.length) { return matchNull; } int[] dimsInt = (int[]) dims[dimIndex]; if (dimsInt == null || dimsInt.length == 0) { return matchNull; } for (int id : dimsInt) { if (checkedIds.get(id)) { if (matchingIds.get(id)) { return true; } } else { final boolean matches = predicate.apply(lookupName(id)); checkedIds.set(id); if (matches) { matchingIds.set(id); return true; } } } return false; } @Override public void inspectRuntimeShape(RuntimeShapeInspector inspector) { // nothing to inspect } }; } @Override public int getValueCardinality() { return maxId; } @Override public String lookupName(int id) { if (id >= maxId) { throw new ISE("id[%d] >= maxId[%d]", id, maxId); } final String strValue = getActualValue(id, false); return extractionFn == null ? strValue : extractionFn.apply(strValue); } @Override public boolean nameLookupPossibleInAdvance() { return true; } @Nullable @Override public IdLookup idLookup() { return extractionFn == null ? this : null; } @Override public int lookupId(String name) { if (extractionFn != null) { throw new UnsupportedOperationException( "cannot perform lookup when applying an extraction function"); } return getEncodedValue(name, false); } @SuppressWarnings("deprecation") @Nullable @Override public Object getObject() { IncrementalIndexRow key = currEntry.get(); if (key == null) { return null; } Object[] dims = key.getDims(); if (dimIndex >= dims.length) { return null; } return convertUnsortedEncodedKeyComponentToActualList((int[]) dims[dimIndex]); } @SuppressWarnings("deprecation") @Override public Class classOfObject() { return Object.class; } @Override public void inspectRuntimeShape(RuntimeShapeInspector inspector) { // nothing to inspect } } return new IndexerDimensionSelector(); } @Override public ColumnValueSelector<?> makeColumnValueSelector(IncrementalIndexRowHolder currEntry, IncrementalIndex.DimensionDesc desc) { return makeDimensionSelector(DefaultDimensionSpec.of(desc.getName()), currEntry, desc); } @Nullable @Override public Object convertUnsortedEncodedKeyComponentToActualList(int[] key) { if (key == null || key.length == 0) { return null; } if (key.length == 1) { return getActualValue(key[0], false); } else { String[] rowArray = new String[key.length]; for (int i = 0; i < key.length; i++) { String val = getActualValue(key[i], false); rowArray[i] = NullHandling.nullToEmptyIfNeeded(val); } return Arrays.asList(rowArray); } } @Override public ColumnValueSelector convertUnsortedValuesToSorted(ColumnValueSelector selectorWithUnsortedValues) { DimensionSelector dimSelectorWithUnsortedValues = (DimensionSelector) selectorWithUnsortedValues; class SortedDimensionSelector implements DimensionSelector, IndexedInts { @Override public int size() { return dimSelectorWithUnsortedValues.getRow().size(); } @Override public int get(int index) { return sortedLookup().getSortedIdFromUnsortedId(dimSelectorWithUnsortedValues.getRow().get(index)); } @Override public IndexedInts getRow() { return this; } @Override public ValueMatcher makeValueMatcher(@Nullable String value) { throw new UnsupportedOperationException(); } @Override public ValueMatcher makeValueMatcher(Predicate<String> predicate) { throw new UnsupportedOperationException(); } @Override public int getValueCardinality() { return dimSelectorWithUnsortedValues.getValueCardinality(); } @Nullable @Override public String lookupName(int id) { throw new UnsupportedOperationException(); } @Override public boolean nameLookupPossibleInAdvance() { throw new UnsupportedOperationException(); } @Nullable @Override public IdLookup idLookup() { throw new UnsupportedOperationException(); } @Override public void inspectRuntimeShape(RuntimeShapeInspector inspector) { inspector.visit("dimSelectorWithUnsortedValues", dimSelectorWithUnsortedValues); } @Nullable @Override public Object getObject() { return dimSelectorWithUnsortedValues.getObject(); } @Override public Class classOfObject() { return dimSelectorWithUnsortedValues.classOfObject(); } } return new SortedDimensionSelector(); } @Override public void fillBitmapsFromUnsortedEncodedKeyComponent(int[] key, int rowNum, MutableBitmap[] bitmapIndexes, BitmapFactory factory) { if (!hasBitmapIndexes) { throw new UnsupportedOperationException("This column does not include bitmap indexes"); } for (int dimValIdx : key) { if (bitmapIndexes[dimValIdx] == null) { bitmapIndexes[dimValIdx] = factory.makeEmptyMutableBitmap(); } bitmapIndexes[dimValIdx].add(rowNum); } } private SortedDimensionDictionary sortedLookup() { return sortedLookup == null ? sortedLookup = dimLookup.sort() : sortedLookup; } private String getActualValue(int intermediateValue, boolean idSorted) { if (idSorted) { return sortedLookup().getValueFromSortedId(intermediateValue); } else { return dimLookup.getValue(intermediateValue); } } private int getEncodedValue(String fullValue, boolean idSorted) { int unsortedId = dimLookup.getId(fullValue); if (idSorted) { return sortedLookup().getSortedIdFromUnsortedId(unsortedId); } else { return unsortedId; } } }