io.druid.segment.realtime.appenderator.AppenderatorPlumber.java Source code

Java tutorial

Introduction

Here is the source code for io.druid.segment.realtime.appenderator.AppenderatorPlumber.java

Source

/*
 * Licensed to Metamarkets Group Inc. (Metamarkets) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. Metamarkets licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package io.druid.segment.realtime.appenderator;

import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Stopwatch;
import com.google.common.base.Supplier;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.util.concurrent.FutureCallback;
import com.google.common.util.concurrent.Futures;
import com.metamx.emitter.EmittingLogger;
import io.druid.common.guava.ThreadRenamingCallable;
import io.druid.concurrent.Execs;
import io.druid.data.input.Committer;
import io.druid.data.input.InputRow;
import io.druid.java.util.common.ISE;
import io.druid.java.util.common.StringUtils;
import io.druid.java.util.common.concurrent.ScheduledExecutors;
import io.druid.java.util.common.granularity.Granularity;
import io.druid.java.util.common.guava.Sequence;
import io.druid.query.Query;
import io.druid.query.QueryPlus;
import io.druid.query.QueryRunner;
import io.druid.segment.incremental.IndexSizeExceededException;
import io.druid.segment.indexing.DataSchema;
import io.druid.segment.indexing.RealtimeTuningConfig;
import io.druid.segment.realtime.FireDepartmentMetrics;
import io.druid.segment.realtime.SegmentPublisher;
import io.druid.segment.realtime.plumber.Committers;
import io.druid.segment.realtime.plumber.Plumber;
import io.druid.segment.realtime.plumber.RejectionPolicy;
import io.druid.segment.realtime.plumber.SegmentHandoffNotifier;
import io.druid.segment.realtime.plumber.VersioningPolicy;
import io.druid.server.coordination.DataSegmentAnnouncer;
import io.druid.timeline.DataSegment;
import org.joda.time.DateTime;
import org.joda.time.Duration;
import org.joda.time.Interval;
import org.joda.time.Period;

import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;

public class AppenderatorPlumber implements Plumber {
    private static final EmittingLogger log = new EmittingLogger(AppenderatorPlumber.class);
    private static final int WARN_DELAY = 1000;

    private final DataSchema schema;
    private final RealtimeTuningConfig config;
    private final RejectionPolicy rejectionPolicy;
    private final FireDepartmentMetrics metrics;
    private final DataSegmentAnnouncer segmentAnnouncer;
    private final SegmentPublisher segmentPublisher;
    private final SegmentHandoffNotifier handoffNotifier;
    private final Object handoffCondition = new Object();
    private final Map<Long, SegmentIdentifier> segments = Maps.newConcurrentMap();
    private final Appenderator appenderator;

    private volatile boolean shuttingDown = false;
    private volatile boolean stopped = false;
    private volatile boolean cleanShutdown = true;
    private volatile ScheduledExecutorService scheduledExecutor = null;

    private volatile Supplier<Committer> lastCommitterSupplier = null;

    public AppenderatorPlumber(DataSchema schema, RealtimeTuningConfig config, FireDepartmentMetrics metrics,
            DataSegmentAnnouncer segmentAnnouncer, SegmentPublisher segmentPublisher,
            SegmentHandoffNotifier handoffNotifier, Appenderator appenderator) {
        this.schema = schema;
        this.config = config;
        this.rejectionPolicy = config.getRejectionPolicyFactory().create(config.getWindowPeriod());
        this.metrics = metrics;
        this.segmentAnnouncer = segmentAnnouncer;
        this.segmentPublisher = segmentPublisher;
        this.handoffNotifier = handoffNotifier;
        this.appenderator = appenderator;

        log.info("Creating plumber using rejectionPolicy[%s]", getRejectionPolicy());
    }

    public Map<Long, SegmentIdentifier> getSegmentsView() {
        return ImmutableMap.copyOf(segments);
    }

    public DataSchema getSchema() {
        return schema;
    }

    public RealtimeTuningConfig getConfig() {
        return config;
    }

    public RejectionPolicy getRejectionPolicy() {
        return rejectionPolicy;
    }

    @Override
    public Object startJob() {
        handoffNotifier.start();
        Object retVal = appenderator.startJob();
        initializeExecutors();
        startPersistThread();
        // Push pending sinks bootstrapped from previous run
        mergeAndPush();
        return retVal;
    }

    @Override
    public int add(InputRow row, Supplier<Committer> committerSupplier) throws IndexSizeExceededException {
        final SegmentIdentifier identifier = getSegmentIdentifier(row.getTimestampFromEpoch());
        if (identifier == null) {
            return -1;
        }

        final int numRows;

        try {
            numRows = appenderator.add(identifier, row, committerSupplier);
            lastCommitterSupplier = committerSupplier;
            return numRows;
        } catch (SegmentNotWritableException e) {
            // Segment already started handoff
            return -1;
        }
    }

    @Override
    public <T> QueryRunner<T> getQueryRunner(final Query<T> query) {
        return new QueryRunner<T>() {
            @Override
            public Sequence<T> run(final QueryPlus<T> queryPlus, final Map<String, Object> responseContext) {
                return queryPlus.run(appenderator, responseContext);
            }
        };
    }

    @Override
    public void persist(final Committer committer) {
        final Stopwatch runExecStopwatch = Stopwatch.createStarted();
        appenderator.persistAll(committer);

        final long startDelay = runExecStopwatch.elapsed(TimeUnit.MILLISECONDS);
        metrics.incrementPersistBackPressureMillis(startDelay);
        if (startDelay > WARN_DELAY) {
            log.warn("Ingestion was throttled for [%,d] millis because persists were pending.", startDelay);
        }
        runExecStopwatch.stop();
    }

    @Override
    public void finishJob() {
        log.info("Shutting down...");

        shuttingDown = true;

        List<SegmentIdentifier> pending = appenderator.getSegments();
        if (pending.isEmpty()) {
            log.info("No segments to hand off.");
        } else {
            log.info("Pushing segments: %s", Joiner.on(", ").join(pending));
        }

        try {
            if (lastCommitterSupplier != null) {
                // Push all remaining data
                mergeAndPush();
            }

            synchronized (handoffCondition) {
                while (!segments.isEmpty()) {
                    log.info("Waiting to hand off: %s", Joiner.on(", ").join(pending));
                    handoffCondition.wait();
                    pending = appenderator.getSegments();
                }
            }
        } catch (Exception e) {
            throw Throwables.propagate(e);
        } finally {
            stopped = true;
            handoffNotifier.close();
            shutdownExecutors();
            appenderator.close();
        }

        if (!cleanShutdown) {
            throw new ISE("Exception occurred during persist and merge.");
        }
    }

    private SegmentIdentifier getSegmentIdentifier(long timestamp) {
        if (!rejectionPolicy.accept(timestamp)) {
            return null;
        }

        final Granularity segmentGranularity = schema.getGranularitySpec().getSegmentGranularity();
        final VersioningPolicy versioningPolicy = config.getVersioningPolicy();

        final long truncatedTime = segmentGranularity.bucketStart(new DateTime(timestamp)).getMillis();

        SegmentIdentifier retVal = segments.get(truncatedTime);

        if (retVal == null) {
            final Interval interval = new Interval(new DateTime(truncatedTime),
                    segmentGranularity.increment(new DateTime(truncatedTime)));

            retVal = new SegmentIdentifier(schema.getDataSource(), interval, versioningPolicy.getVersion(interval),
                    config.getShardSpec());
            addSegment(retVal);

        }

        return retVal;
    }

    protected void initializeExecutors() {
        if (scheduledExecutor == null) {
            scheduledExecutor = Execs.scheduledSingleThreaded("plumber_scheduled_%d");
        }
    }

    protected void shutdownExecutors() {
        if (scheduledExecutor != null) {
            scheduledExecutor.shutdown();
        }
    }

    private void addSegment(final SegmentIdentifier identifier) {
        segments.put(identifier.getInterval().getStartMillis(), identifier);
        try {
            segmentAnnouncer.announceSegment(new DataSegment(identifier.getDataSource(), identifier.getInterval(),
                    identifier.getVersion(), ImmutableMap.<String, Object>of(), ImmutableList.<String>of(),
                    ImmutableList.<String>of(), identifier.getShardSpec(), null, 0));
        } catch (IOException e) {
            log.makeAlert(e, "Failed to announce new segment[%s]", identifier.getDataSource())
                    .addData("interval", identifier.getInterval()).emit();
        }
    }

    public void dropSegment(final SegmentIdentifier identifier) {
        log.info("Dropping segment: %s", identifier);
        segments.remove(identifier.getInterval().getStartMillis());

        Futures.addCallback(appenderator.drop(identifier), new FutureCallback<Object>() {
            @Override
            public void onSuccess(Object result) {
                log.info("Dropped segment: %s", identifier);
            }

            @Override
            public void onFailure(Throwable e) {
                // TODO: Retry?
                log.warn(e, "Failed to drop segment: %s", identifier);
            }
        });
    }

    private void startPersistThread() {
        final Granularity segmentGranularity = schema.getGranularitySpec().getSegmentGranularity();
        final Period windowPeriod = config.getWindowPeriod();

        final DateTime truncatedNow = segmentGranularity.bucketStart(new DateTime());
        final long windowMillis = windowPeriod.toStandardDuration().getMillis();

        log.info("Expect to run at [%s]", new DateTime().plus(new Duration(System.currentTimeMillis(),
                segmentGranularity.increment(truncatedNow).getMillis() + windowMillis)));

        ScheduledExecutors.scheduleAtFixedRate(scheduledExecutor,
                new Duration(System.currentTimeMillis(),
                        segmentGranularity.increment(truncatedNow).getMillis() + windowMillis),
                new Duration(truncatedNow, segmentGranularity.increment(truncatedNow)),
                new ThreadRenamingCallable<ScheduledExecutors.Signal>(StringUtils.format("%s-overseer-%d",
                        schema.getDataSource(), config.getShardSpec().getPartitionNum())) {
                    @Override
                    public ScheduledExecutors.Signal doCall() {
                        if (stopped) {
                            log.info("Stopping merge-n-push overseer thread");
                            return ScheduledExecutors.Signal.STOP;
                        }

                        mergeAndPush();

                        if (stopped) {
                            log.info("Stopping merge-n-push overseer thread");
                            return ScheduledExecutors.Signal.STOP;
                        } else {
                            return ScheduledExecutors.Signal.REPEAT;
                        }
                    }
                });
    }

    private void mergeAndPush() {
        final Granularity segmentGranularity = schema.getGranularitySpec().getSegmentGranularity();
        final Period windowPeriod = config.getWindowPeriod();

        final long windowMillis = windowPeriod.toStandardDuration().getMillis();
        log.info("Starting merge and push.");
        DateTime minTimestampAsDate = segmentGranularity.bucketStart(
                new DateTime(Math.max(windowMillis, rejectionPolicy.getCurrMaxTime().getMillis()) - windowMillis));
        long minTimestamp = minTimestampAsDate.getMillis();

        final List<SegmentIdentifier> appenderatorSegments = appenderator.getSegments();
        final List<SegmentIdentifier> segmentsToPush = Lists.newArrayList();

        if (shuttingDown) {
            log.info("Found [%,d] segments. Attempting to hand off all of them.", appenderatorSegments.size());
            segmentsToPush.addAll(appenderatorSegments);
        } else {
            log.info("Found [%,d] segments. Attempting to hand off segments that start before [%s].",
                    appenderatorSegments.size(), minTimestampAsDate);

            for (SegmentIdentifier segment : appenderatorSegments) {
                final Long intervalStart = segment.getInterval().getStartMillis();
                if (intervalStart < minTimestamp) {
                    log.info("Adding entry [%s] for merge and push.", segment);
                    segmentsToPush.add(segment);
                } else {
                    log.info(
                            "Skipping persist and merge for entry [%s] : Start time [%s] >= [%s] min timestamp required in this run. Segment will be picked up in a future run.",
                            segment, new DateTime(intervalStart), minTimestampAsDate);
                }
            }
        }

        log.info("Found [%,d] segments to persist and merge", segmentsToPush.size());

        final Function<Throwable, Void> errorHandler = new Function<Throwable, Void>() {
            @Override
            public Void apply(Throwable throwable) {
                final List<String> segmentIdentifierStrings = Lists.transform(segmentsToPush,
                        new Function<SegmentIdentifier, String>() {
                            @Override
                            public String apply(SegmentIdentifier input) {
                                return input.getIdentifierAsString();
                            }
                        });

                log.makeAlert(throwable, "Failed to publish merged indexes[%s]", schema.getDataSource())
                        .addData("segments", segmentIdentifierStrings).emit();

                if (shuttingDown) {
                    // We're trying to shut down, and these segments failed to push. Let's just get rid of them.
                    // This call will also delete possibly-partially-written files, so we don't need to do it explicitly.
                    cleanShutdown = false;
                    for (SegmentIdentifier identifier : segmentsToPush) {
                        dropSegment(identifier);
                    }
                }

                return null;
            }
        };

        // WARNING: Committers.nil() here means that on-disk data can get out of sync with committing.
        Futures.addCallback(appenderator.push(segmentsToPush, Committers.nil()),
                new FutureCallback<SegmentsAndMetadata>() {
                    @Override
                    public void onSuccess(SegmentsAndMetadata result) {
                        // Immediately publish after pushing
                        for (DataSegment pushedSegment : result.getSegments()) {
                            try {
                                segmentPublisher.publishSegment(pushedSegment);
                            } catch (Exception e) {
                                errorHandler.apply(e);
                            }
                        }

                        log.info("Published [%,d] sinks.", segmentsToPush.size());
                    }

                    @Override
                    public void onFailure(Throwable e) {
                        log.warn(e, "Failed to push [%,d] segments.", segmentsToPush.size());
                        errorHandler.apply(e);
                    }
                });
    }
}