com.metamx.druid.indexing.coordinator.RemoteTaskRunner.java Source code

Java tutorial

Introduction

Here is the source code for com.metamx.druid.indexing.coordinator.RemoteTaskRunner.java

Source

/*
 * Druid - a distributed column store.
 * Copyright (C) 2012  Metamarkets Group Inc.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */

package com.metamx.druid.indexing.coordinator;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.api.client.util.Maps;
import com.google.common.base.Charsets;
import com.google.common.base.Joiner;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.base.Stopwatch;
import com.google.common.base.Throwables;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.io.InputSupplier;
import com.google.common.primitives.Ints;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.SettableFuture;
import com.metamx.common.ISE;
import com.metamx.common.lifecycle.LifecycleStart;
import com.metamx.common.lifecycle.LifecycleStop;
import com.metamx.druid.curator.cache.PathChildrenCacheFactory;
import com.metamx.druid.indexing.common.TaskStatus;
import com.metamx.druid.indexing.common.task.Task;
import com.metamx.druid.indexing.common.tasklogs.TaskLogProvider;
import com.metamx.druid.indexing.coordinator.config.RemoteTaskRunnerConfig;
import com.metamx.druid.indexing.coordinator.setup.WorkerSetupData;
import com.metamx.druid.indexing.worker.TaskAnnouncement;
import com.metamx.druid.indexing.worker.Worker;
import com.metamx.emitter.EmittingLogger;
import com.metamx.http.client.HttpClient;
import com.metamx.http.client.response.InputStreamResponseHandler;
import com.metamx.http.client.response.StatusResponseHandler;
import com.metamx.http.client.response.StatusResponseHolder;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.recipes.cache.ChildData;
import org.apache.curator.framework.recipes.cache.PathChildrenCache;
import org.apache.curator.framework.recipes.cache.PathChildrenCacheEvent;
import org.apache.curator.framework.recipes.cache.PathChildrenCacheListener;
import org.apache.curator.utils.ZKPaths;
import org.apache.zookeeper.CreateMode;
import org.jboss.netty.handler.codec.http.HttpResponseStatus;
import org.joda.time.DateTime;

import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.TreeSet;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;

/**
 * The RemoteTaskRunner's primary responsibility is to assign tasks to worker nodes.
 * The RemoteTaskRunner uses Zookeeper to keep track of which workers are running which tasks. Tasks are assigned by
 * creating ephemeral nodes in ZK that workers must remove. Workers announce the statuses of the tasks they are running.
 * Once a task completes, it is up to the RTR to remove the task status and run any necessary cleanup.
 * The RemoteTaskRunner is event driven and updates state according to ephemeral node changes in ZK.
 * <p/>
 * The RemoteTaskRunner will assign tasks to a node until the node hits capacity. At that point, task assignment will
 * fail. The RemoteTaskRunner depends on another component to create additional worker resources.
 * For example, {@link com.metamx.druid.indexing.coordinator.scaling.ResourceManagementScheduler} can take care of these duties.
 * <p/>
 * If a worker node becomes inexplicably disconnected from Zk, the RemoteTaskRunner will fail any tasks associated with the worker.
 * <p/>
 * The RemoteTaskRunner uses ZK for job management and assignment and http for IPC messages.
 */
public class RemoteTaskRunner implements TaskRunner, TaskLogProvider {
    private static final EmittingLogger log = new EmittingLogger(RemoteTaskRunner.class);
    private static final StatusResponseHandler RESPONSE_HANDLER = new StatusResponseHandler(Charsets.UTF_8);
    private static final Joiner JOINER = Joiner.on("/");

    private final ObjectMapper jsonMapper;
    private final RemoteTaskRunnerConfig config;
    private final CuratorFramework cf;
    private final PathChildrenCacheFactory pathChildrenCacheFactory;
    private final PathChildrenCache workerPathCache;
    private final AtomicReference<WorkerSetupData> workerSetupData;
    private final HttpClient httpClient;

    // all workers that exist in ZK
    private final Map<String, ZkWorker> zkWorkers = new ConcurrentHashMap<String, ZkWorker>();
    // all tasks that have been assigned to a worker
    private final RemoteTaskRunnerWorkQueue runningTasks = new RemoteTaskRunnerWorkQueue();
    // tasks that have not yet run
    private final RemoteTaskRunnerWorkQueue pendingTasks = new RemoteTaskRunnerWorkQueue();

    private final ExecutorService runPendingTasksExec = Executors.newSingleThreadExecutor();

    private final Object statusLock = new Object();

    private volatile boolean started = false;

    public RemoteTaskRunner(ObjectMapper jsonMapper, RemoteTaskRunnerConfig config, CuratorFramework cf,
            PathChildrenCacheFactory pathChildrenCacheFactory, AtomicReference<WorkerSetupData> workerSetupData,
            HttpClient httpClient) {
        this.jsonMapper = jsonMapper;
        this.config = config;
        this.cf = cf;
        this.pathChildrenCacheFactory = pathChildrenCacheFactory;
        this.workerPathCache = pathChildrenCacheFactory.make(cf, config.getIndexerAnnouncementPath());
        this.workerSetupData = workerSetupData;
        this.httpClient = httpClient;
    }

    @LifecycleStart
    public void start() {
        try {
            if (started) {
                return;
            }

            // Add listener for creation/deletion of workers
            workerPathCache.getListenable().addListener(new PathChildrenCacheListener() {
                @Override
                public void childEvent(CuratorFramework client, final PathChildrenCacheEvent event)
                        throws Exception {
                    Worker worker;
                    switch (event.getType()) {
                    case CHILD_ADDED:
                        worker = jsonMapper.readValue(event.getData().getData(), Worker.class);
                        addWorker(worker, PathChildrenCache.StartMode.NORMAL);
                        break;
                    case CHILD_REMOVED:
                        worker = jsonMapper.readValue(event.getData().getData(), Worker.class);
                        removeWorker(worker);
                        break;
                    default:
                        break;
                    }
                }
            });
            workerPathCache.start(PathChildrenCache.StartMode.BUILD_INITIAL_CACHE);

            for (ChildData childData : workerPathCache.getCurrentData()) {
                final Worker worker = jsonMapper.readValue(childData.getData(), Worker.class);
                addWorker(worker, PathChildrenCache.StartMode.BUILD_INITIAL_CACHE);
            }

            started = true;
        } catch (Exception e) {
            throw Throwables.propagate(e);
        }
    }

    @LifecycleStop
    public void stop() {
        try {
            if (!started) {
                return;
            }

            for (ZkWorker zkWorker : zkWorkers.values()) {
                zkWorker.close();
            }
            workerPathCache.close();
        } catch (Exception e) {
            throw Throwables.propagate(e);
        } finally {
            started = false;
        }
    }

    @Override
    public Collection<ZkWorker> getWorkers() {
        return zkWorkers.values();
    }

    @Override
    public Collection<RemoteTaskRunnerWorkItem> getRunningTasks() {
        return runningTasks.values();
    }

    @Override
    public Collection<RemoteTaskRunnerWorkItem> getPendingTasks() {
        return pendingTasks.values();
    }

    public ZkWorker findWorkerRunningTask(String taskId) {
        for (ZkWorker zkWorker : zkWorkers.values()) {
            if (zkWorker.isRunningTask(taskId)) {
                return zkWorker;
            }
        }
        return null;
    }

    public boolean isWorkerRunningTask(Worker worker, Task task) {
        ZkWorker zkWorker = zkWorkers.get(worker.getHost());

        return (zkWorker != null && zkWorker.isRunningTask(task.getId()));
    }

    @Override
    public void bootstrap(List<Task> tasks) {
        try {
            if (!started) {
                throw new ISE("Must start RTR first before calling bootstrap!");
            }

            Map<String, Worker> existingTasks = Maps.newHashMap();
            for (ZkWorker zkWorker : zkWorkers.values()) {
                for (String runningTask : zkWorker.getRunningTasks().keySet()) {
                    existingTasks.put(runningTask, zkWorker.getWorker());
                }
            }

            for (Task task : tasks) {
                Worker worker = existingTasks.get(task.getId());
                if (worker != null) {
                    log.info("Bootstrap found [%s] running on [%s].", task.getId(), worker.getHost());
                    runningTasks.put(task.getId(),
                            new RemoteTaskRunnerWorkItem(task, SettableFuture.<TaskStatus>create(), worker));
                }
            }
        } catch (Exception e) {
            throw Throwables.propagate(e);
        }
    }

    /**
     * A task will be run only if there is no current knowledge in the RemoteTaskRunner of the task.
     *
     * @param task task to run
     */
    @Override
    public ListenableFuture<TaskStatus> run(final Task task) {
        RemoteTaskRunnerWorkItem runningTask = runningTasks.get(task.getId());
        if (runningTask != null) {
            ZkWorker zkWorker = findWorkerRunningTask(task.getId());
            if (zkWorker == null) {
                log.makeAlert(
                        "Told to run task that is in the running queue but no worker is actually running it?!")
                        .addData("taskId", task.getId()).emit();
                runningTasks.remove(task.getId());
            } else {
                log.info("Task[%s] already running on %s.", task.getId(), zkWorker.getWorker().getHost());
                TaskAnnouncement announcement = zkWorker.getRunningTasks().get(task.getId());
                if (announcement.getTaskStatus().isComplete()) {
                    taskComplete(runningTask, zkWorker, task.getId(), announcement.getTaskStatus());
                }
                return runningTask.getResult();
            }
        }

        RemoteTaskRunnerWorkItem pendingTask = pendingTasks.get(task.getId());
        if (pendingTask != null) {
            log.info("Assigned a task[%s] that is already pending, not doing anything", task.getId());
            return pendingTask.getResult();
        }

        RemoteTaskRunnerWorkItem taskRunnerWorkItem = new RemoteTaskRunnerWorkItem(task,
                SettableFuture.<TaskStatus>create(), null);
        addPendingTask(taskRunnerWorkItem);
        return taskRunnerWorkItem.getResult();
    }

    /**
     * Finds the worker running the task and forwards the shutdown signal to the worker.
     *
     * @param taskId - task id to shutdown
     */
    @Override
    public void shutdown(String taskId) {
        if (pendingTasks.containsKey(taskId)) {
            pendingTasks.remove(taskId);
            return;
        }

        final ZkWorker zkWorker = findWorkerRunningTask(taskId);

        if (zkWorker == null) {
            log.info("Can't shutdown! No worker running task %s", taskId);
            return;
        }

        try {
            final URL url = makeWorkerURL(zkWorker.getWorker(), String.format("/task/%s/shutdown", taskId));
            final StatusResponseHolder response = httpClient.post(url).go(RESPONSE_HANDLER).get();

            log.info("Sent shutdown message to worker: %s, status %s, response: %s", zkWorker.getWorker().getHost(),
                    response.getStatus(), response.getContent());

            if (!response.getStatus().equals(HttpResponseStatus.ACCEPTED)) {
                log.error("Shutdown failed for %s! Are you sure the task was running?", taskId);
            }
        } catch (Exception e) {
            throw Throwables.propagate(e);
        }
    }

    @Override
    public Optional<InputSupplier<InputStream>> streamTaskLog(final String taskId, final long offset) {
        final ZkWorker zkWorker = findWorkerRunningTask(taskId);

        if (zkWorker == null) {
            // Worker is not running this task, it might be available in deep storage
            return Optional.absent();
        } else {
            // Worker is still running this task
            final URL url = makeWorkerURL(zkWorker.getWorker(),
                    String.format("/task/%s/log?offset=%d", taskId, offset));
            return Optional.<InputSupplier<InputStream>>of(new InputSupplier<InputStream>() {
                @Override
                public InputStream getInput() throws IOException {
                    try {
                        return httpClient.get(url).go(new InputStreamResponseHandler()).get();
                    } catch (InterruptedException e) {
                        throw Throwables.propagate(e);
                    } catch (ExecutionException e) {
                        // Unwrap if possible
                        Throwables.propagateIfPossible(e.getCause(), IOException.class);
                        throw Throwables.propagate(e);
                    }
                }
            });
        }
    }

    private URL makeWorkerURL(Worker worker, String path) {
        Preconditions.checkArgument(path.startsWith("/"), "path must start with '/': %s", path);

        try {
            return new URL(String.format("http://%s/druid/worker/v1%s", worker.getHost(), path));
        } catch (MalformedURLException e) {
            throw Throwables.propagate(e);
        }
    }

    /**
     * Adds a task to the pending queue
     */
    private void addPendingTask(final RemoteTaskRunnerWorkItem taskRunnerWorkItem) {
        log.info("Added pending task %s", taskRunnerWorkItem.getTask().getId());

        pendingTasks.put(taskRunnerWorkItem.getTask().getId(), taskRunnerWorkItem);
        runPendingTasks();
    }

    /**
     * This method uses a single threaded executor to extract all pending tasks and attempt to run them. Any tasks that
     * are successfully assigned to a worker will be moved from pendingTasks to runningTasks. This method is thread-safe.
     * This method should be run each time there is new worker capacity or if new tasks are assigned.
     */
    private void runPendingTasks() {
        runPendingTasksExec.submit(new Callable<Void>() {
            @Override
            public Void call() throws Exception {
                try {
                    // make a copy of the pending tasks because assignTask may delete tasks from pending and move them
                    // into running status
                    List<RemoteTaskRunnerWorkItem> copy = Lists.newArrayList(pendingTasks.values());
                    for (RemoteTaskRunnerWorkItem taskWrapper : copy) {
                        assignTask(taskWrapper);
                    }
                } catch (Exception e) {
                    log.makeAlert(e, "Exception in running pending tasks").emit();
                }

                return null;
            }
        });
    }

    /**
     * Removes a task from the running queue and clears out the ZK status path of the task.
     *
     * @param workerId - the worker that was previously running the task
     * @param taskId   - the task to cleanup
     */
    private void cleanup(final String workerId, final String taskId) {
        log.info("Cleaning up [%s]", taskId);
        runningTasks.remove(taskId);
        final String statusPath = JOINER.join(config.getIndexerStatusPath(), workerId, taskId);
        try {
            cf.delete().guaranteed().forPath(statusPath);
        } catch (Exception e) {
            log.info("Tried to delete status path[%s] that didn't exist! Must've gone away already?", statusPath);
        }
    }

    /**
     * Ensures no workers are already running a task before assigning the task to a worker.
     * It is possible that a worker is running a task that the RTR has no knowledge of. This occurs when the RTR
     * needs to bootstrap after a restart.
     *
     * @param taskRunnerWorkItem - the task to assign
     */
    private void assignTask(RemoteTaskRunnerWorkItem taskRunnerWorkItem) {
        try {
            final String taskId = taskRunnerWorkItem.getTask().getId();

            if (runningTasks.containsKey(taskId) || findWorkerRunningTask(taskId) != null) {
                log.info("Task[%s] already running.", taskId);
            } else {
                // Nothing running this task, announce it in ZK for a worker to run it
                ZkWorker zkWorker = findWorkerForTask(taskRunnerWorkItem.getTask());
                if (zkWorker != null) {
                    announceTask(zkWorker.getWorker(), taskRunnerWorkItem);
                }
            }
        } catch (Exception e) {
            log.makeAlert("Exception while trying to run task")
                    .addData("taskId", taskRunnerWorkItem.getTask().getId()).emit();
        }
    }

    /**
     * Creates a ZK entry under a specific path associated with a worker. The worker is responsible for
     * removing the task ZK entry and creating a task status ZK entry.
     *
     * @param theWorker          The worker the task is assigned to
     * @param taskRunnerWorkItem The task to be assigned
     */
    private void announceTask(Worker theWorker, RemoteTaskRunnerWorkItem taskRunnerWorkItem) throws Exception {
        final Task task = taskRunnerWorkItem.getTask();

        log.info("Coordinator asking Worker[%s] to add task[%s]", theWorker.getHost(), task.getId());

        byte[] rawBytes = jsonMapper.writeValueAsBytes(task);
        if (rawBytes.length > config.getMaxNumBytes()) {
            throw new ISE("Length of raw bytes for task too large[%,d > %,d]", rawBytes.length,
                    config.getMaxNumBytes());
        }

        String taskPath = JOINER.join(config.getIndexerTaskPath(), theWorker.getHost(), task.getId());

        if (cf.checkExists().forPath(taskPath) == null) {
            cf.create().withMode(CreateMode.EPHEMERAL).forPath(taskPath, rawBytes);
        }

        RemoteTaskRunnerWorkItem workItem = pendingTasks.remove(task.getId());
        if (workItem == null) {
            log.makeAlert("WTF?! Got a null work item from pending tasks?! How can this be?!")
                    .addData("taskId", task.getId()).emit();
            return;
        }

        RemoteTaskRunnerWorkItem newWorkItem = workItem.withWorker(theWorker);
        runningTasks.put(task.getId(), newWorkItem);
        log.info("Task %s switched from pending to running (on [%s])", task.getId(),
                newWorkItem.getWorker().getHost());

        // Syncing state with Zookeeper - don't assign new tasks until the task we just assigned is actually running
        // on a worker - this avoids overflowing a worker with tasks
        Stopwatch timeoutStopwatch = new Stopwatch();
        timeoutStopwatch.start();
        synchronized (statusLock) {
            while (!isWorkerRunningTask(theWorker, task)) {
                statusLock.wait(config.getTaskAssignmentTimeoutDuration().getMillis());
                if (timeoutStopwatch.elapsed(TimeUnit.MILLISECONDS) >= config.getTaskAssignmentTimeoutDuration()
                        .getMillis()) {
                    log.error("Something went wrong! %s never ran task %s after %s!", theWorker.getHost(),
                            task.getId(), config.getTaskAssignmentTimeoutDuration());

                    taskRunnerWorkItem.setResult(TaskStatus.failure(taskRunnerWorkItem.getTask().getId()));
                    break;
                }
            }
        }
    }

    /**
     * When a new worker appears, listeners are registered for status changes associated with tasks assigned to
     * the worker. Status changes indicate the creation or completion of a task.
     * The RemoteTaskRunner updates state according to these changes.
     *
     * @param worker - contains metadata for a worker that has appeared in ZK
     */
    private ZkWorker addWorker(final Worker worker, PathChildrenCache.StartMode startMode) {
        log.info("Worker[%s] reportin' for duty!", worker.getHost());

        try {
            final String workerStatusPath = JOINER.join(config.getIndexerStatusPath(), worker.getHost());
            final PathChildrenCache statusCache = pathChildrenCacheFactory.make(cf, workerStatusPath);
            final ZkWorker zkWorker = new ZkWorker(worker, statusCache, jsonMapper);

            // Add status listener to the watcher for status changes
            zkWorker.addListener(new PathChildrenCacheListener() {
                @Override
                public void childEvent(CuratorFramework client, PathChildrenCacheEvent event) throws Exception {
                    String taskId;
                    RemoteTaskRunnerWorkItem taskRunnerWorkItem;
                    synchronized (statusLock) {
                        try {
                            switch (event.getType()) {
                            case CHILD_ADDED:
                            case CHILD_UPDATED:
                                taskId = ZKPaths.getNodeFromPath(event.getData().getPath());
                                final TaskStatus taskStatus = jsonMapper.readValue(event.getData().getData(),
                                        TaskStatus.class);

                                log.info("Worker[%s] wrote %s status for task: %s", zkWorker.getWorker().getHost(),
                                        taskStatus.getStatusCode(), taskId);

                                // Synchronizing state with ZK
                                statusLock.notify();

                                taskRunnerWorkItem = runningTasks.get(taskId);
                                if (taskRunnerWorkItem == null) {
                                    log.warn(
                                            "WTF?! Worker[%s] announcing a status for a task I didn't know about: %s",
                                            zkWorker.getWorker().getHost(), taskId);
                                }

                                if (taskStatus.isComplete()) {
                                    taskComplete(taskRunnerWorkItem, zkWorker, taskId, taskStatus);
                                    runPendingTasks();
                                }
                                break;
                            case CHILD_REMOVED:
                                taskId = ZKPaths.getNodeFromPath(event.getData().getPath());
                                taskRunnerWorkItem = runningTasks.remove(taskId);
                                if (taskRunnerWorkItem != null) {
                                    log.info("Task[%s] just disappeared!", taskId);
                                    taskRunnerWorkItem
                                            .setResult(TaskStatus.failure(taskRunnerWorkItem.getTask().getId()));
                                } else {
                                    log.warn("Task[%s] just disappeared but I didn't know about it?!", taskId);
                                }
                                break;
                            }
                        } catch (Exception e) {
                            log.makeAlert(e, "Failed to handle new worker status")
                                    .addData("worker", zkWorker.getWorker().getHost())
                                    .addData("znode", event.getData().getPath()).emit();
                        }
                    }
                }
            });

            zkWorker.start(startMode);
            zkWorkers.put(worker.getHost(), zkWorker);

            runPendingTasks();

            return zkWorker;
        } catch (Exception e) {
            throw Throwables.propagate(e);
        }
    }

    /**
     * When a ephemeral worker node disappears from ZK, incomplete running tasks will be retried by
     * the logic in the status listener. We still have to make sure there are no tasks assigned
     * to the worker but not yet running.
     *
     * @param worker - the removed worker
     */
    private void removeWorker(final Worker worker) {
        log.info("Kaboom! Worker[%s] removed!", worker.getHost());

        final ZkWorker zkWorker = zkWorkers.get(worker.getHost());
        if (zkWorker != null) {
            try {
                List<String> tasksToFail = Lists.newArrayList(
                        cf.getChildren().forPath(JOINER.join(config.getIndexerTaskPath(), worker.getHost())));
                log.info("[%s]: Found %d tasks assigned", worker.getHost(), tasksToFail.size());

                for (Map.Entry<String, RemoteTaskRunnerWorkItem> entry : runningTasks.entrySet()) {
                    if (entry.getValue() == null) {
                        log.error("Huh? null work item for [%s]", entry.getKey());
                    } else if (entry.getValue().getWorker() == null) {
                        log.error("Huh? no worker for [%s]", entry.getKey());
                    } else if (entry.getValue().getWorker().getHost().equalsIgnoreCase(worker.getHost())) {
                        log.info("[%s]: Found [%s] running", worker.getHost(), entry.getKey());
                        tasksToFail.add(entry.getKey());
                    }
                }

                for (String assignedTask : tasksToFail) {
                    RemoteTaskRunnerWorkItem taskRunnerWorkItem = runningTasks.remove(assignedTask);
                    if (taskRunnerWorkItem != null) {
                        String taskPath = JOINER.join(config.getIndexerTaskPath(), worker.getHost(), assignedTask);
                        if (cf.checkExists().forPath(taskPath) != null) {
                            cf.delete().guaranteed().forPath(taskPath);
                        }

                        log.info("Failing task[%s]", assignedTask);
                        taskRunnerWorkItem.setResult(TaskStatus.failure(taskRunnerWorkItem.getTask().getId()));
                    } else {
                        log.warn("RemoteTaskRunner has no knowledge of task[%s]", assignedTask);
                    }
                }
            } catch (Exception e) {
                throw Throwables.propagate(e);
            } finally {
                try {
                    zkWorker.close();
                } catch (Exception e) {
                    log.error(e, "Exception closing worker[%s]!", worker.getHost());
                }
                zkWorkers.remove(worker.getHost());
            }
        }
    }

    private ZkWorker findWorkerForTask(final Task task) {
        TreeSet<ZkWorker> sortedWorkers = Sets.newTreeSet(new Comparator<ZkWorker>() {
            @Override
            public int compare(ZkWorker zkWorker, ZkWorker zkWorker2) {
                int retVal = -Ints.compare(zkWorker.getCurrCapacityUsed(), zkWorker2.getCurrCapacityUsed());
                if (retVal == 0) {
                    retVal = zkWorker.getWorker().getHost().compareTo(zkWorker2.getWorker().getHost());
                }

                return retVal;
            }
        });
        sortedWorkers.addAll(zkWorkers.values());
        final String configMinWorkerVer = workerSetupData.get().getMinVersion();
        final String minWorkerVer = configMinWorkerVer == null ? config.getWorkerVersion() : configMinWorkerVer;

        for (ZkWorker zkWorker : sortedWorkers) {
            if (zkWorker.canRunTask(task) && zkWorker.isValidVersion(minWorkerVer)) {
                return zkWorker;
            }
        }
        log.debug("Worker nodes %s do not have capacity to run any more tasks!", zkWorkers.values());
        return null;
    }

    private void taskComplete(RemoteTaskRunnerWorkItem taskRunnerWorkItem, ZkWorker zkWorker, String taskId,
            TaskStatus taskStatus) {
        if (taskRunnerWorkItem != null) {
            final ListenableFuture<TaskStatus> result = taskRunnerWorkItem.getResult();
            if (result != null) {
                ((SettableFuture<TaskStatus>) result).set(taskStatus);
            }
        }

        // Worker is done with this task
        zkWorker.setLastCompletedTaskTime(new DateTime());
        cleanup(zkWorker.getWorker().getHost(), taskId);
    }
}