io.druid.indexing.overlord.autoscaling.PendingTaskBasedWorkerResourceManagementStrategy.java Source code

Java tutorial

Introduction

Here is the source code for io.druid.indexing.overlord.autoscaling.PendingTaskBasedWorkerResourceManagementStrategy.java

Source

/*
 * Licensed to Metamarkets Group Inc. (Metamarkets) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. Metamarkets licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package io.druid.indexing.overlord.autoscaling;

import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Optional;
import com.google.common.base.Predicate;
import com.google.common.base.Supplier;
import com.google.common.collect.Collections2;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.inject.Inject;
import com.metamx.emitter.EmittingLogger;
import io.druid.indexing.common.task.Task;
import io.druid.indexing.overlord.ImmutableWorkerInfo;
import io.druid.indexing.overlord.WorkerTaskRunner;
import io.druid.indexing.overlord.config.WorkerTaskRunnerConfig;
import io.druid.indexing.overlord.setup.WorkerBehaviorConfig;
import io.druid.indexing.overlord.setup.WorkerSelectStrategy;
import io.druid.indexing.worker.Worker;
import io.druid.java.util.common.concurrent.ScheduledExecutorFactory;

import org.joda.time.DateTime;
import org.joda.time.Duration;

import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ScheduledExecutorService;

/**
 */
public class PendingTaskBasedWorkerResourceManagementStrategy extends AbstractWorkerResourceManagementStrategy {
    private static final EmittingLogger log = new EmittingLogger(
            PendingTaskBasedWorkerResourceManagementStrategy.class);

    private final PendingTaskBasedWorkerResourceManagementConfig config;
    private final Supplier<WorkerBehaviorConfig> workerConfigRef;
    private final ScalingStats scalingStats;

    private final Object lock = new Object();
    private final Set<String> currentlyProvisioning = Sets.newHashSet();
    private final Set<String> currentlyTerminating = Sets.newHashSet();

    private DateTime lastProvisionTime = new DateTime();
    private DateTime lastTerminateTime = new DateTime();

    @Inject
    public PendingTaskBasedWorkerResourceManagementStrategy(PendingTaskBasedWorkerResourceManagementConfig config,
            Supplier<WorkerBehaviorConfig> workerConfigRef,
            ResourceManagementSchedulerConfig resourceManagementSchedulerConfig, ScheduledExecutorFactory factory) {
        this(config, workerConfigRef, resourceManagementSchedulerConfig,
                factory.create(1, "PendingTaskBasedResourceManagement-manager--%d"));
    }

    public PendingTaskBasedWorkerResourceManagementStrategy(PendingTaskBasedWorkerResourceManagementConfig config,
            Supplier<WorkerBehaviorConfig> workerConfigRef,
            ResourceManagementSchedulerConfig resourceManagementSchedulerConfig, ScheduledExecutorService exec) {
        super(resourceManagementSchedulerConfig, exec);
        this.config = config;
        this.workerConfigRef = workerConfigRef;
        this.scalingStats = new ScalingStats(config.getNumEventsToTrack());
    }

    @Override
    public boolean doProvision(WorkerTaskRunner runner) {
        Collection<Task> pendingTasks = runner.getPendingTaskPayloads();
        Collection<ImmutableWorkerInfo> workers = runner.getWorkers();
        synchronized (lock) {
            boolean didProvision = false;
            final WorkerBehaviorConfig workerConfig = workerConfigRef.get();
            if (workerConfig == null || workerConfig.getAutoScaler() == null) {
                log.error("No workerConfig available, cannot provision new workers.");
                return false;
            }

            final Collection<String> workerNodeIds = getWorkerNodeIDs(
                    Collections2.transform(workers, new Function<ImmutableWorkerInfo, Worker>() {
                        @Override
                        public Worker apply(ImmutableWorkerInfo input) {
                            return input.getWorker();
                        }
                    }), workerConfig);
            currentlyProvisioning.removeAll(workerNodeIds);
            if (currentlyProvisioning.isEmpty()) {
                int want = getScaleUpNodeCount(runner.getConfig(), workerConfig, pendingTasks, workers);
                while (want > 0) {
                    final AutoScalingData provisioned = workerConfig.getAutoScaler().provision();
                    final List<String> newNodes = provisioned == null ? ImmutableList.<String>of()
                            : provisioned.getNodeIds();
                    if (newNodes.isEmpty()) {
                        log.warn("NewNodes is empty, returning from provision loop");
                        break;
                    } else {
                        currentlyProvisioning.addAll(newNodes);
                        lastProvisionTime = new DateTime();
                        scalingStats.addProvisionEvent(provisioned);
                        want -= provisioned.getNodeIds().size();
                        didProvision = true;
                    }
                }
            } else {
                Duration durSinceLastProvision = new Duration(lastProvisionTime, new DateTime());
                log.info("%s provisioning. Current wait time: %s", currentlyProvisioning, durSinceLastProvision);
                if (durSinceLastProvision.isLongerThan(config.getMaxScalingDuration().toStandardDuration())) {
                    log.makeAlert("Worker node provisioning taking too long!")
                            .addData("millisSinceLastProvision", durSinceLastProvision.getMillis())
                            .addData("provisioningCount", currentlyProvisioning.size()).emit();

                    workerConfig.getAutoScaler().terminateWithIds(Lists.newArrayList(currentlyProvisioning));
                    currentlyProvisioning.clear();
                }
            }

            return didProvision;
        }
    }

    private static Collection<String> getWorkerNodeIDs(Collection<Worker> workers,
            WorkerBehaviorConfig workerConfig) {
        return workerConfig.getAutoScaler()
                .ipToIdLookup(Lists.newArrayList(Iterables.transform(workers, new Function<Worker, String>() {
                    @Override
                    public String apply(Worker input) {
                        return input.getIp();
                    }
                })));
    }

    int getScaleUpNodeCount(final WorkerTaskRunnerConfig remoteTaskRunnerConfig,
            final WorkerBehaviorConfig workerConfig, final Collection<Task> pendingTasks,
            final Collection<ImmutableWorkerInfo> workers) {
        final int minWorkerCount = workerConfig.getAutoScaler().getMinNumWorkers();
        final int maxWorkerCount = workerConfig.getAutoScaler().getMaxNumWorkers();
        final Predicate<ImmutableWorkerInfo> isValidWorker = ResourceManagementUtil
                .createValidWorkerPredicate(config);
        final int currValidWorkers = Collections2.filter(workers, isValidWorker).size();

        // If there are no worker, spin up minWorkerCount, we cannot determine the exact capacity here to fulfill the need since
        // we are not aware of the expectedWorkerCapacity.
        int moreWorkersNeeded = currValidWorkers == 0 ? minWorkerCount
                : getWorkersNeededToAssignTasks(remoteTaskRunnerConfig, workerConfig, pendingTasks, workers);

        int want = Math.max(minWorkerCount - currValidWorkers,
                // Additional workers needed to reach minWorkerCount
                Math.min(config.getMaxScalingStep(), moreWorkersNeeded)
        // Additional workers needed to run current pending tasks
        );

        if (want > 0 && currValidWorkers >= maxWorkerCount) {
            log.warn("Unable to provision more workers. Current workerCount[%d] maximum workerCount[%d].",
                    currValidWorkers, maxWorkerCount);
            return 0;
        }
        want = Math.min(want, maxWorkerCount - currValidWorkers);
        return want;
    }

    int getWorkersNeededToAssignTasks(final WorkerTaskRunnerConfig workerTaskRunnerConfig,
            final WorkerBehaviorConfig workerConfig, final Collection<Task> pendingTasks,
            final Collection<ImmutableWorkerInfo> workers) {
        final Collection<ImmutableWorkerInfo> validWorkers = Collections2.filter(workers,
                ResourceManagementUtil.createValidWorkerPredicate(config));

        Map<String, ImmutableWorkerInfo> workersMap = Maps.newHashMap();
        for (ImmutableWorkerInfo worker : validWorkers) {
            workersMap.put(worker.getWorker().getHost(), worker);
        }
        WorkerSelectStrategy workerSelectStrategy = workerConfig.getSelectStrategy();
        int need = 0;
        int capacity = getExpectedWorkerCapacity(workers);

        // Simulate assigning tasks to dummy workers using configured workerSelectStrategy
        // the number of additional workers needed to assign all the pending tasks is noted
        for (Task task : pendingTasks) {
            Optional<ImmutableWorkerInfo> selectedWorker = workerSelectStrategy
                    .findWorkerForTask(workerTaskRunnerConfig, ImmutableMap.copyOf(workersMap), task);
            final ImmutableWorkerInfo workerRunningTask;
            if (selectedWorker.isPresent()) {
                workerRunningTask = selectedWorker.get();
            } else {
                // None of the existing worker can run this task, we need to provision one worker for it.
                // create a dummy worker and try to simulate assigning task to it.
                workerRunningTask = createDummyWorker("dummy" + need, capacity,
                        workerTaskRunnerConfig.getMinWorkerVersion());
                need++;
            }
            // Update map with worker running task
            workersMap.put(workerRunningTask.getWorker().getHost(), workerWithTask(workerRunningTask, task));
        }
        return need;
    }

    @Override
    public boolean doTerminate(WorkerTaskRunner runner) {
        Collection<ImmutableWorkerInfo> zkWorkers = runner.getWorkers();
        synchronized (lock) {
            final WorkerBehaviorConfig workerConfig = workerConfigRef.get();
            if (workerConfig == null) {
                log.warn("No workerConfig available, cannot terminate workers.");
                return false;
            }

            if (!currentlyProvisioning.isEmpty()) {
                log.debug("Already provisioning nodes, Not Terminating any nodes.");
                return false;
            }

            boolean didTerminate = false;
            final Collection<String> workerNodeIds = getWorkerNodeIDs(runner.getLazyWorkers(), workerConfig);
            final Set<String> stillExisting = Sets.newHashSet();
            for (String s : currentlyTerminating) {
                if (workerNodeIds.contains(s)) {
                    stillExisting.add(s);
                }
            }
            currentlyTerminating.clear();
            currentlyTerminating.addAll(stillExisting);

            if (currentlyTerminating.isEmpty()) {
                final int maxWorkersToTerminate = maxWorkersToTerminate(zkWorkers, workerConfig);
                final Predicate<ImmutableWorkerInfo> isLazyWorker = ResourceManagementUtil
                        .createLazyWorkerPredicate(config);
                final List<String> laziestWorkerIps = Lists.newArrayList(
                        Collections2.transform(runner.markWorkersLazy(isLazyWorker, maxWorkersToTerminate),
                                new Function<Worker, String>() {
                                    @Override
                                    public String apply(Worker zkWorker) {
                                        return zkWorker.getIp();
                                    }
                                }));
                if (laziestWorkerIps.isEmpty()) {
                    log.debug("Found no lazy workers");
                } else {
                    log.info("Terminating %,d lazy workers: %s", laziestWorkerIps.size(),
                            Joiner.on(", ").join(laziestWorkerIps));

                    final AutoScalingData terminated = workerConfig.getAutoScaler().terminate(laziestWorkerIps);
                    if (terminated != null) {
                        currentlyTerminating.addAll(terminated.getNodeIds());
                        lastTerminateTime = new DateTime();
                        scalingStats.addTerminateEvent(terminated);
                        didTerminate = true;
                    }
                }
            } else {
                Duration durSinceLastTerminate = new Duration(lastTerminateTime, new DateTime());

                log.info("%s terminating. Current wait time: %s", currentlyTerminating, durSinceLastTerminate);

                if (durSinceLastTerminate.isLongerThan(config.getMaxScalingDuration().toStandardDuration())) {
                    log.makeAlert("Worker node termination taking too long!")
                            .addData("millisSinceLastTerminate", durSinceLastTerminate.getMillis())
                            .addData("terminatingCount", currentlyTerminating.size()).emit();

                    currentlyTerminating.clear();
                }
            }

            return didTerminate;
        }
    }

    private int maxWorkersToTerminate(Collection<ImmutableWorkerInfo> zkWorkers,
            WorkerBehaviorConfig workerConfig) {
        final Predicate<ImmutableWorkerInfo> isValidWorker = ResourceManagementUtil
                .createValidWorkerPredicate(config);
        final int currValidWorkers = Collections2.filter(zkWorkers, isValidWorker).size();
        final int invalidWorkers = zkWorkers.size() - currValidWorkers;
        final int minWorkers = workerConfig.getAutoScaler().getMinNumWorkers();

        // Max workers that can be terminated
        // All invalid workers + any lazy workers above minCapacity
        return invalidWorkers + Math.max(0, Math.min(config.getMaxScalingStep(), currValidWorkers - minWorkers));
    }

    @Override
    public ScalingStats getStats() {
        return scalingStats;
    }

    private static int getExpectedWorkerCapacity(final Collection<ImmutableWorkerInfo> workers) {
        int size = workers.size();
        if (size == 0) {
            // No existing workers assume capacity per worker as 1
            return 1;
        } else {
            // Assume all workers have same capacity
            return workers.iterator().next().getWorker().getCapacity();
        }
    }

    private static ImmutableWorkerInfo workerWithTask(ImmutableWorkerInfo immutableWorker, Task task) {
        return new ImmutableWorkerInfo(immutableWorker.getWorker(), immutableWorker.getCurrCapacityUsed() + 1,
                Sets.union(immutableWorker.getAvailabilityGroups(),
                        Sets.newHashSet(task.getTaskResource().getAvailabilityGroup())),
                Sets.union(immutableWorker.getRunningTasks(), Sets.newHashSet(task.getId())), DateTime.now());
    }

    private static ImmutableWorkerInfo createDummyWorker(String host, int capacity, String version) {
        return new ImmutableWorkerInfo(new Worker(host, "-2", capacity, version), 0, Sets.<String>newHashSet(),
                Sets.<String>newHashSet(), DateTime.now());
    }
}