com.spotify.helios.agent.TaskRunner.java Source code

Java tutorial

Introduction

Here is the source code for com.spotify.helios.agent.TaskRunner.java

Source

/*
 * Copyright (c) 2014 Spotify AB.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.spotify.helios.agent;

import static com.google.common.base.Preconditions.checkNotNull;
import static java.util.concurrent.TimeUnit.SECONDS;

import com.spotify.docker.client.DockerClient;
import com.spotify.docker.client.exceptions.ContainerNotFoundException;
import com.spotify.docker.client.exceptions.DockerException;
import com.spotify.docker.client.exceptions.DockerTimeoutException;
import com.spotify.docker.client.exceptions.ImageNotFoundException;
import com.spotify.docker.client.exceptions.ImagePullFailedException;
import com.spotify.docker.client.messages.ContainerConfig;
import com.spotify.docker.client.messages.ContainerCreation;
import com.spotify.docker.client.messages.ContainerExit;
import com.spotify.docker.client.messages.ContainerInfo;
import com.spotify.docker.client.messages.ContainerState;
import com.spotify.docker.client.messages.HostConfig;
import com.spotify.docker.client.messages.ImageInfo;
import com.spotify.helios.common.HeliosRuntimeException;
import com.spotify.helios.serviceregistration.NopServiceRegistrar;
import com.spotify.helios.serviceregistration.ServiceRegistrar;
import com.spotify.helios.serviceregistration.ServiceRegistrationHandle;
import com.spotify.helios.servicescommon.InterruptingExecutionThreadService;

import com.google.common.base.Optional;
import com.google.common.base.Stopwatch;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.SettableFuture;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A runner service that starts a container once.
 */
class TaskRunner extends InterruptingExecutionThreadService {

    private static final Logger log = LoggerFactory.getLogger(TaskRunner.class);

    private final long delayMillis;
    private final SettableFuture<Integer> result = SettableFuture.create();
    private final TaskConfig config;
    private final DockerClient docker;
    private final String existingContainerId;
    private final Listener listener;
    private final ServiceRegistrar registrar;
    private final Optional<HealthChecker> healthChecker;
    private Optional<ServiceRegistrationHandle> serviceRegistrationHandle;
    private Optional<String> containerId;
    private final String containerName;
    private int secondsToWaitBeforeKill;

    private TaskRunner(final Builder builder) {
        super("TaskRunner(" + builder.taskConfig.name() + ")");
        this.delayMillis = builder.delayMillis;
        this.config = checkNotNull(builder.taskConfig, "config");
        this.containerName = config.containerName();
        this.docker = checkNotNull(builder.docker, "docker");
        this.listener = checkNotNull(builder.listener, "listener");
        this.existingContainerId = builder.existingContainerId;
        this.registrar = checkNotNull(builder.registrar, "registrar");
        this.secondsToWaitBeforeKill = checkNotNull(builder.secondsToWaitBeforeKill, "waitBeforeKill");
        this.healthChecker = Optional.fromNullable(builder.healthChecker);
        this.serviceRegistrationHandle = Optional.absent();
        this.containerId = Optional.absent();
    }

    public Result<Integer> result() {
        return Result.of(result);
    }

    public ListenableFuture<Integer> resultFuture() {
        return result;
    }

    /**
     * Unregister a set of service endpoints previously registered.
     *
     * @return boolean true if service registration handle was present, false otherwise
     */
    public boolean unregister() {
        if (serviceRegistrationHandle.isPresent()) {
            registrar.unregister(serviceRegistrationHandle.get());
            serviceRegistrationHandle = Optional.absent();
            return true;
        }
        return false;
    }

    /**
     * Stops this container.
     */
    public void stop() throws InterruptedException {
        // Tell docker to stop or eventually kill the container
        final String container = containerId.or(containerName);

        // Interrupt the thread blocking on waitContainer
        stopAsync().awaitTerminated();

        try {
            docker.stopContainer(container, secondsToWaitBeforeKill);
        } catch (DockerException e) {
            if ((e instanceof ContainerNotFoundException) && !containerId.isPresent()) {
                // we tried to stop the container by name but no container of the given name existed.
                // this isn't surprising or exceptional, just means the container wasn't started yet.
            } else {
                log.warn("Stopping container {} failed", container, e);
            }
        }
    }

    protected String getContainerError() {
        final ContainerInfo info;
        try {
            // If we don't know our containerId at this point there's not a lot we can do.
            info = getContainerInfo(containerId.orNull());
        } catch (DockerException | InterruptedException e) {
            log.warn("failed to propagate container error: {}", e);
            return "";
        }
        if (info == null) {
            return "";
        }
        return info.state().error();
    }

    @Override
    protected void run() {
        try {
            final int exitCode = run0();
            result.set(exitCode);
        } catch (Exception e) {
            listener.failed(e, getContainerError());
            result.setException(e);
        }
    }

    private int run0() throws InterruptedException, DockerException {
        // Delay
        Thread.sleep(delayMillis);

        // Check if the container is already running
        final ContainerInfo info = getContainerInfo(existingContainerId);
        final String containerId;

        if (info != null && info.state().running()) {
            containerId = existingContainerId;
            this.containerId = Optional.of(existingContainerId);
        } else {
            // Create and start container if necessary
            containerId = createAndStartContainer();
            this.containerId = Optional.of(containerId);

            if (healthChecker.isPresent()) {
                listener.healthChecking();

                final RetryScheduler retryScheduler = BoundedRandomExponentialBackoff.newBuilder()
                        .setMinIntervalMillis(SECONDS.toMillis(1)).setMaxIntervalMillis(SECONDS.toMillis(30))
                        .build().newScheduler();

                while (!healthChecker.get().check(containerId)) {
                    final ContainerState state = getContainerState(containerId);
                    if (state == null) {
                        final String err = "container " + containerId + " was not found during health "
                                + "checking, or has no State object";
                        log.warn(err);
                        throw new RuntimeException(err);
                    }
                    if (!state.running()) {
                        final String err = "container " + containerId + " exited during health checking. "
                                + "Exit code: " + state.exitCode() + ", Config: " + config;
                        log.warn(err);
                        throw new RuntimeException(err);
                    }

                    final long retryMillis = retryScheduler.nextMillis();
                    log.warn("container failed healthcheck, will retry in {}ms: {}: {}", retryMillis, config,
                            containerId);
                    Thread.sleep(retryMillis);
                }

                log.info("healthchecking complete of containerId={} taskConfig={}", containerId, config);
            } else {
                log.info("no healthchecks configured for containerId={} taskConfig={}", containerId, config);
            }
        }

        listener.running();

        // Register and wait for container to exit
        serviceRegistrationHandle = Optional.fromNullable(registrar.register(config.registration()));
        final ContainerExit exit;
        try {
            exit = docker.waitContainer(containerId);
        } finally {
            unregister();
            this.containerId = Optional.absent();
        }

        log.info("container exited: {}: {}: {}", config, containerId, exit.statusCode());
        listener.exited(exit.statusCode());

        return exit.statusCode();
    }

    private String createAndStartContainer() throws DockerException, InterruptedException {

        // Ensure we have the image
        boolean serializePulls = false;
        final Optional<String> dockerVersion = tryGetDockerVersion();
        if (dockerVersion.isPresent()) {
            final String version = dockerVersion.get();
            if (version.startsWith("1.6.") || version.startsWith("1.7.") || version.startsWith("1.8.")) {
                // Docker versions 1.6 through 1.8 have issues with concurrent pulls
                serializePulls = true;
            }
        }

        final String image = config.containerImage();
        if (serializePulls) {
            synchronized (docker) {
                pullImage(image);
            }
        } else {
            pullImage(image);
        }

        return startContainer(image, dockerVersion);
    }

    private String startContainer(final String image, final Optional<String> dockerVersion)
            throws InterruptedException, DockerException {

        // Get container image info
        final ImageInfo imageInfo = docker.inspectImage(image);
        if (imageInfo == null) {
            throw new HeliosRuntimeException("docker inspect image returned null on image " + image);
        }

        // Create container
        final HostConfig hostConfig = config.hostConfig(dockerVersion);
        final ContainerConfig containerConfig = config.containerConfig(imageInfo, dockerVersion).toBuilder()
                .hostConfig(hostConfig).build();
        listener.creating();
        final ContainerCreation container = docker.createContainer(containerConfig, containerName);
        log.info("created container: {}: {}, {}", config, container, containerConfig);
        listener.created(container.id());

        // Start container
        log.info("starting container: {}: {} {}", config, container.id(), hostConfig);
        listener.starting();
        docker.startContainer(container.id());
        log.info("started container: {}: {}", config, container.id());
        listener.started();

        return container.id();
    }

    private ContainerInfo getContainerInfo(final String existingContainerId)
            throws DockerException, InterruptedException {
        if (existingContainerId == null) {
            return null;
        }
        log.info("inspecting container: {}: {}", config, existingContainerId);
        try {
            return docker.inspectContainer(existingContainerId);
        } catch (ContainerNotFoundException e) {
            return null;
        }
    }

    private ContainerState getContainerState(final String existingContainerId)
            throws DockerException, InterruptedException {
        final ContainerInfo info = getContainerInfo(existingContainerId);
        if (info == null) {
            return null;
        }
        return info.state();
    }

    private Optional<String> tryGetDockerVersion() {
        try {
            return Optional.fromNullable(docker.version().version());
        } catch (Exception e) {
            log.error("couldn't fetch Docker version: {}", e);
            return Optional.absent();
        }
    }

    private void pullImage(final String image) throws DockerException, InterruptedException {
        listener.pulling();

        DockerTimeoutException wasTimeout = null;
        final Stopwatch pullTime = Stopwatch.createStarted();

        // Attempt to pull.  Failure, while less than ideal, is ok.
        try {
            docker.pull(image);
            listener.pulled();
            log.info("Pulled image {} in {}s", image, pullTime.elapsed(SECONDS));
        } catch (DockerTimeoutException e) {
            log.warn("Pulling image {} failed with timeout after {}s", image, pullTime.elapsed(SECONDS), e);
            listener.pullFailed();
            wasTimeout = e;
        } catch (DockerException e) {
            log.warn("Pulling image {} failed after {}s", image, pullTime.elapsed(SECONDS), e);
            listener.pullFailed();
        }

        try {
            // If we don't have the image by now, fail.
            docker.inspectImage(image);
        } catch (ImageNotFoundException e) {
            // If we get not found, see if we timed out above, since that's what we actually care
            // to know, as the pull should have fixed the not found-ness.
            if (wasTimeout != null) {
                throw new ImagePullFailedException("Failed pulling image " + image + " because of timeout",
                        wasTimeout);
            }
            throw e;
        }
    }

    public interface Listener {

        void failed(Throwable t, String containerError);

        void pulling();

        void pulled();

        void pullFailed();

        void creating();

        void created(String containerId);

        void starting();

        void started();

        void healthChecking();

        void running();

        void exited(int code);
    }

    public static Builder builder() {
        return new Builder();
    }

    public static class Builder {

        private Builder() {
        }

        private long delayMillis;
        private TaskConfig taskConfig;
        private DockerClient docker;
        private String existingContainerId;
        private Listener listener;
        private HealthChecker healthChecker;
        private int secondsToWaitBeforeKill;
        public ServiceRegistrar registrar = new NopServiceRegistrar();

        public Builder delayMillis(final long delayMillis) {
            this.delayMillis = delayMillis;
            return this;
        }

        public Builder config(final TaskConfig config) {
            this.taskConfig = config;
            return this;
        }

        public Builder docker(final DockerClient docker) {
            this.docker = docker;
            return this;
        }

        public Builder existingContainerId(final String existingContainerId) {
            this.existingContainerId = existingContainerId;
            return this;
        }

        public Builder listener(final Listener listener) {
            this.listener = listener;
            return this;
        }

        public Builder healthChecker(final HealthChecker healthChecker) {
            this.healthChecker = healthChecker;
            return this;
        }

        public Builder registrar(final ServiceRegistrar registrar) {
            this.registrar = registrar;
            return this;
        }

        public Builder secondsToWaitBeforeKill(int seconds) {
            this.secondsToWaitBeforeKill = seconds;
            return this;
        }

        public TaskRunner build() {
            return new TaskRunner(this);
        }
    }

    public static class NopListener implements Listener {

        @Override
        public void failed(final Throwable t, final String containerError) {

        }

        @Override
        public void pulling() {

        }

        @Override
        public void pulled() {

        }

        @Override
        public void pullFailed() {

        }

        @Override
        public void creating() {

        }

        @Override
        public void created(final String containerId) {

        }

        @Override
        public void starting() {

        }

        @Override
        public void started() {

        }

        @Override
        public void healthChecking() {

        }

        @Override
        public void running() {

        }

        @Override
        public void exited(final int code) {

        }
    }
}