Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.druid.segment.realtime.plumber; import com.google.common.base.Predicate; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; import org.apache.druid.data.input.InputRow; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.ISE; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.ReferenceCountingSegment; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexAddResult; import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.incremental.IndexSizeExceededException; import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.realtime.FireHydrant; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.Overshadowable; import org.apache.druid.timeline.partition.ShardSpec; import org.joda.time.Interval; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.atomic.AtomicInteger; public class Sink implements Iterable<FireHydrant>, Overshadowable<Sink> { private static final IncrementalIndexAddResult ALREADY_SWAPPED = new IncrementalIndexAddResult(-1, -1, null, "write after index swapped"); private final Object hydrantLock = new Object(); private final Interval interval; private final DataSchema schema; private final ShardSpec shardSpec; private final String version; private final int maxRowsInMemory; private final long maxBytesInMemory; private final boolean reportParseExceptions; private final CopyOnWriteArrayList<FireHydrant> hydrants = new CopyOnWriteArrayList<>(); private final LinkedHashSet<String> dimOrder = new LinkedHashSet<>(); private final AtomicInteger numRowsExcludingCurrIndex = new AtomicInteger(); private volatile FireHydrant currHydrant; private volatile boolean writable = true; private final String dedupColumn; private final Set<Long> dedupSet = new HashSet<>(); public Sink(Interval interval, DataSchema schema, ShardSpec shardSpec, String version, int maxRowsInMemory, long maxBytesInMemory, boolean reportParseExceptions, String dedupColumn) { this.schema = schema; this.shardSpec = shardSpec; this.interval = interval; this.version = version; this.maxRowsInMemory = maxRowsInMemory; this.maxBytesInMemory = maxBytesInMemory; this.reportParseExceptions = reportParseExceptions; this.dedupColumn = dedupColumn; makeNewCurrIndex(interval.getStartMillis(), schema); } public Sink(Interval interval, DataSchema schema, ShardSpec shardSpec, String version, int maxRowsInMemory, long maxBytesInMemory, boolean reportParseExceptions, String dedupColumn, List<FireHydrant> hydrants) { this.schema = schema; this.shardSpec = shardSpec; this.interval = interval; this.version = version; this.maxRowsInMemory = maxRowsInMemory; this.maxBytesInMemory = maxBytesInMemory; this.reportParseExceptions = reportParseExceptions; this.dedupColumn = dedupColumn; int maxCount = -1; for (int i = 0; i < hydrants.size(); ++i) { final FireHydrant hydrant = hydrants.get(i); if (hydrant.getCount() <= maxCount) { throw new ISE("hydrant[%s] not the right count[%s]", hydrant, i); } maxCount = hydrant.getCount(); ReferenceCountingSegment segment = hydrant.getIncrementedSegment(); try { numRowsExcludingCurrIndex.addAndGet(segment.asQueryableIndex().getNumRows()); } finally { segment.decrement(); } } this.hydrants.addAll(hydrants); makeNewCurrIndex(interval.getStartMillis(), schema); } public void clearDedupCache() { dedupSet.clear(); } public Interval getInterval() { return interval; } public FireHydrant getCurrHydrant() { return currHydrant; } public IncrementalIndexAddResult add(InputRow row, boolean skipMaxRowsInMemoryCheck) throws IndexSizeExceededException { if (currHydrant == null) { throw new IAE("No currHydrant but given row[%s]", row); } synchronized (hydrantLock) { if (!writable) { return Plumber.NOT_WRITABLE; } IncrementalIndex index = currHydrant.getIndex(); if (index == null) { return ALREADY_SWAPPED; // the hydrant was swapped without being replaced } if (checkInDedupSet(row)) { return Plumber.DUPLICATE; } return index.add(row, skipMaxRowsInMemoryCheck); } } public boolean canAppendRow() { synchronized (hydrantLock) { return writable && currHydrant != null && currHydrant.getIndex().canAppendRow(); } } public boolean isEmpty() { synchronized (hydrantLock) { return hydrants.size() == 1 && currHydrant.getIndex().isEmpty(); } } public boolean isWritable() { return writable; } /** * If currHydrant is A, creates a new index B, sets currHydrant to B and returns A. * * @return the current index after swapping in a new one */ public FireHydrant swap() { return makeNewCurrIndex(interval.getStartMillis(), schema); } public boolean swappable() { synchronized (hydrantLock) { return writable && currHydrant.getIndex() != null && currHydrant.getIndex().size() != 0; } } public boolean finished() { return !writable; } /** * Marks sink as 'finished', preventing further writes. * @return 'true' if sink was sucessfully finished, 'false' if sink was already finished */ public boolean finishWriting() { synchronized (hydrantLock) { if (!writable) { return false; } writable = false; clearDedupCache(); } return true; } public DataSegment getSegment() { return new DataSegment(schema.getDataSource(), interval, version, ImmutableMap.of(), Collections.emptyList(), Lists.transform(Arrays.asList(schema.getAggregators()), AggregatorFactory::getName), shardSpec, null, 0); } public int getNumRows() { synchronized (hydrantLock) { return numRowsExcludingCurrIndex.get() + getNumRowsInMemory(); } } public int getNumRowsInMemory() { synchronized (hydrantLock) { IncrementalIndex index = currHydrant.getIndex(); if (index == null) { return 0; } return currHydrant.getIndex().size(); } } public long getBytesInMemory() { synchronized (hydrantLock) { IncrementalIndex index = currHydrant.getIndex(); if (index == null) { return 0; } return currHydrant.getIndex().getBytesInMemory().get(); } } private boolean checkInDedupSet(InputRow row) { if (dedupColumn != null) { Object value = row.getRaw(dedupColumn); if (value != null) { if (value instanceof List) { throw new IAE("Dedup on multi-value field not support"); } Long pk; if (value instanceof Long || value instanceof Integer) { pk = ((Number) value).longValue(); } else { // use long type hashcode to reduce heap cost. // maybe hash collision, but it's more important to avoid OOM pk = pkHash(String.valueOf(value)); } if (dedupSet.contains(pk)) { return true; } dedupSet.add(pk); } } return false; } private long pkHash(String s) { long seed = 131; // 31 131 1313 13131 131313 etc.. BKDRHash long hash = 0; for (int i = 0; i < s.length(); i++) { hash = (hash * seed) + s.charAt(i); } return hash; } private FireHydrant makeNewCurrIndex(long minTimestamp, DataSchema schema) { final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder() .withMinTimestamp(minTimestamp).withTimestampSpec(schema.getParser()) .withQueryGranularity(schema.getGranularitySpec().getQueryGranularity()) .withDimensionsSpec(schema.getParser()).withMetrics(schema.getAggregators()) .withRollup(schema.getGranularitySpec().isRollup()).build(); final IncrementalIndex newIndex = new IncrementalIndex.Builder().setIndexSchema(indexSchema) .setReportParseExceptions(reportParseExceptions).setMaxRowCount(maxRowsInMemory) .setMaxBytesInMemory(maxBytesInMemory).buildOnheap(); final FireHydrant old; synchronized (hydrantLock) { if (writable) { old = currHydrant; int newCount = 0; int numHydrants = hydrants.size(); if (numHydrants > 0) { FireHydrant lastHydrant = hydrants.get(numHydrants - 1); newCount = lastHydrant.getCount() + 1; if (!indexSchema.getDimensionsSpec().hasCustomDimensions()) { Map<String, ColumnCapabilitiesImpl> oldCapabilities; if (lastHydrant.hasSwapped()) { oldCapabilities = new HashMap<>(); ReferenceCountingSegment segment = lastHydrant.getIncrementedSegment(); try { QueryableIndex oldIndex = segment.asQueryableIndex(); for (String dim : oldIndex.getAvailableDimensions()) { dimOrder.add(dim); oldCapabilities.put(dim, (ColumnCapabilitiesImpl) oldIndex.getColumnHolder(dim) .getCapabilities()); } } finally { segment.decrement(); } } else { IncrementalIndex oldIndex = lastHydrant.getIndex(); dimOrder.addAll(oldIndex.getDimensionOrder()); oldCapabilities = oldIndex.getColumnCapabilities(); } newIndex.loadDimensionIterable(dimOrder, oldCapabilities); } } currHydrant = new FireHydrant(newIndex, newCount, getSegment().getId()); if (old != null) { numRowsExcludingCurrIndex.addAndGet(old.getIndex().size()); } hydrants.add(currHydrant); } else { // Oops, someone called finishWriting while we were making this new index. newIndex.close(); throw new ISE("finishWriting() called during swap"); } } return old; } @Override public Iterator<FireHydrant> iterator() { return Iterators.filter(hydrants.iterator(), new Predicate<FireHydrant>() { @Override public boolean apply(FireHydrant input) { final IncrementalIndex index = input.getIndex(); return index == null || index.size() != 0; } }); } @Override public String toString() { return "Sink{" + "interval=" + interval + ", schema=" + schema + '}'; } @Override public boolean overshadows(Sink other) { // Sink is currently used in timeline only for querying stream data. // In this case, sinks never overshadow each other. return false; } @Override public int getStartRootPartitionId() { return shardSpec.getStartRootPartitionId(); } @Override public int getEndRootPartitionId() { return shardSpec.getEndRootPartitionId(); } @Override public String getVersion() { return version; } @Override public short getMinorVersion() { return shardSpec.getMinorVersion(); } @Override public short getAtomicUpdateGroupSize() { return shardSpec.getAtomicUpdateGroupSize(); } }