org.apache.aurora.scheduler.updater.JobUpdateControllerImpl.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.aurora.scheduler.updater.JobUpdateControllerImpl.java

Source

/**
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.aurora.scheduler.updater;

import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ScheduledExecutorService;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.Functions;
import com.google.common.base.Optional;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Maps;
import com.google.common.collect.Ordering;
import com.google.inject.Inject;

import org.apache.aurora.common.application.Lifecycle;
import org.apache.aurora.common.collections.Pair;
import org.apache.aurora.common.quantity.Amount;
import org.apache.aurora.common.quantity.Time;
import org.apache.aurora.common.util.Clock;
import org.apache.aurora.gen.JobInstanceUpdateEvent;
import org.apache.aurora.gen.JobUpdateAction;
import org.apache.aurora.gen.JobUpdateEvent;
import org.apache.aurora.gen.JobUpdatePulseStatus;
import org.apache.aurora.gen.JobUpdateQuery;
import org.apache.aurora.gen.JobUpdateStatus;
import org.apache.aurora.gen.Lock;
import org.apache.aurora.gen.LockKey;
import org.apache.aurora.scheduler.BatchWorker;
import org.apache.aurora.scheduler.SchedulerModule.TaskEventBatchWorker;
import org.apache.aurora.scheduler.base.InstanceKeys;
import org.apache.aurora.scheduler.base.JobKeys;
import org.apache.aurora.scheduler.base.Query;
import org.apache.aurora.scheduler.state.LockManager;
import org.apache.aurora.scheduler.state.LockManager.LockException;
import org.apache.aurora.scheduler.state.StateManager;
import org.apache.aurora.scheduler.storage.JobUpdateStore;
import org.apache.aurora.scheduler.storage.Storage;
import org.apache.aurora.scheduler.storage.Storage.MutateWork.NoResult;
import org.apache.aurora.scheduler.storage.TaskStore;
import org.apache.aurora.scheduler.storage.entities.IInstanceKey;
import org.apache.aurora.scheduler.storage.entities.IJobInstanceUpdateEvent;
import org.apache.aurora.scheduler.storage.entities.IJobKey;
import org.apache.aurora.scheduler.storage.entities.IJobUpdate;
import org.apache.aurora.scheduler.storage.entities.IJobUpdateDetails;
import org.apache.aurora.scheduler.storage.entities.IJobUpdateEvent;
import org.apache.aurora.scheduler.storage.entities.IJobUpdateInstructions;
import org.apache.aurora.scheduler.storage.entities.IJobUpdateKey;
import org.apache.aurora.scheduler.storage.entities.IJobUpdateQuery;
import org.apache.aurora.scheduler.storage.entities.IJobUpdateSummary;
import org.apache.aurora.scheduler.storage.entities.ILock;
import org.apache.aurora.scheduler.storage.entities.ILockKey;
import org.apache.aurora.scheduler.storage.entities.IScheduledTask;
import org.apache.aurora.scheduler.updater.StateEvaluator.Failure;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static java.util.Objects.requireNonNull;

import static com.google.common.base.Preconditions.checkState;

import static org.apache.aurora.gen.JobUpdateStatus.ABORTED;
import static org.apache.aurora.gen.JobUpdateStatus.ERROR;
import static org.apache.aurora.gen.JobUpdateStatus.ROLLED_BACK;
import static org.apache.aurora.gen.JobUpdateStatus.ROLLED_FORWARD;
import static org.apache.aurora.gen.JobUpdateStatus.ROLLING_BACK;
import static org.apache.aurora.gen.JobUpdateStatus.ROLLING_FORWARD;
import static org.apache.aurora.gen.JobUpdateStatus.ROLL_FORWARD_AWAITING_PULSE;
import static org.apache.aurora.scheduler.base.AsyncUtil.shutdownOnError;
import static org.apache.aurora.scheduler.base.Jobs.AWAITING_PULSE_STATES;
import static org.apache.aurora.scheduler.storage.Storage.MutableStoreProvider;
import static org.apache.aurora.scheduler.updater.JobUpdateStateMachine.ACTIVE_QUERY;
import static org.apache.aurora.scheduler.updater.JobUpdateStateMachine.AUTO_RESUME_STATES;
import static org.apache.aurora.scheduler.updater.JobUpdateStateMachine.GET_ACTIVE_RESUME_STATE;
import static org.apache.aurora.scheduler.updater.JobUpdateStateMachine.GET_BLOCKED_RESUME_STATE;
import static org.apache.aurora.scheduler.updater.JobUpdateStateMachine.GET_PAUSE_STATE;
import static org.apache.aurora.scheduler.updater.JobUpdateStateMachine.GET_UNBLOCKED_STATE;
import static org.apache.aurora.scheduler.updater.JobUpdateStateMachine.MonitorAction;
import static org.apache.aurora.scheduler.updater.JobUpdateStateMachine.MonitorAction.ROLL_BACK;
import static org.apache.aurora.scheduler.updater.JobUpdateStateMachine.MonitorAction.ROLL_FORWARD;
import static org.apache.aurora.scheduler.updater.JobUpdateStateMachine.MonitorAction.STOP_WATCHING;
import static org.apache.aurora.scheduler.updater.JobUpdateStateMachine.assertTransitionAllowed;
import static org.apache.aurora.scheduler.updater.JobUpdateStateMachine.getBlockedState;
import static org.apache.aurora.scheduler.updater.OneWayJobUpdater.EvaluationResult;
import static org.apache.aurora.scheduler.updater.OneWayJobUpdater.OneWayStatus;
import static org.apache.aurora.scheduler.updater.OneWayJobUpdater.OneWayStatus.SUCCEEDED;
import static org.apache.aurora.scheduler.updater.SideEffect.InstanceUpdateStatus;

/**
 * Implementation of an updater that orchestrates the process of gradually updating the
 * configuration of tasks in a job.
 * <p>
 * TODO(wfarner): Consider using AbstractIdleService here.
 */
class JobUpdateControllerImpl implements JobUpdateController {
    private static final Logger LOG = LoggerFactory.getLogger(JobUpdateControllerImpl.class);
    private static final String FATAL_ERROR_FORMAT = "Unexpected problem running asynchronous updater for: %s. Triggering shutdown";

    private final UpdateFactory updateFactory;
    private final LockManager lockManager;
    private final Storage storage;
    private final ScheduledExecutorService executor;
    private final StateManager stateManager;
    private final Clock clock;
    private final PulseHandler pulseHandler;
    private final Lifecycle lifecycle;
    private final TaskEventBatchWorker batchWorker;
    private final UpdateAgentReserver updateAgentReserver;

    // Currently-active updaters. An active updater is one that is rolling forward or back. Paused
    // and completed updates are represented only in storage, not here.
    private final Map<IJobKey, UpdateFactory.Update> updates = Collections.synchronizedMap(Maps.newHashMap());

    @Inject
    JobUpdateControllerImpl(UpdateFactory updateFactory, LockManager lockManager, Storage storage,
            ScheduledExecutorService executor, StateManager stateManager, UpdateAgentReserver updateAgentReserver,
            Clock clock, Lifecycle lifecycle, TaskEventBatchWorker batchWorker) {

        this.updateFactory = requireNonNull(updateFactory);
        this.lockManager = requireNonNull(lockManager);
        this.storage = requireNonNull(storage);
        this.executor = requireNonNull(executor);
        this.stateManager = requireNonNull(stateManager);
        this.clock = requireNonNull(clock);
        this.lifecycle = requireNonNull(lifecycle);
        this.batchWorker = requireNonNull(batchWorker);
        this.pulseHandler = new PulseHandler(clock);
        this.updateAgentReserver = requireNonNull(updateAgentReserver);
    }

    @Override
    public void start(final IJobUpdate update, final AuditData auditData) throws UpdateStateException {

        requireNonNull(update);
        requireNonNull(auditData);

        storage.write((NoResult<UpdateStateException>) storeProvider -> {
            IJobUpdateSummary summary = update.getSummary();
            IJobUpdateInstructions instructions = update.getInstructions();
            IJobKey job = summary.getKey().getJob();

            // Validate the update configuration by making sure we can create an updater for it.
            updateFactory.newUpdate(update.getInstructions(), true);

            if (instructions.getInitialState().isEmpty() && !instructions.isSetDesiredState()) {
                throw new IllegalArgumentException("Update instruction is a no-op.");
            }

            List<IJobUpdateSummary> activeJobUpdates = storeProvider.getJobUpdateStore()
                    .fetchJobUpdateSummaries(queryActiveByJob(job));
            if (!activeJobUpdates.isEmpty()) {
                if (activeJobUpdates.size() > 1) {
                    LOG.error("Multiple active updates exist for this job. {}", activeJobUpdates);
                    throw new UpdateStateException(
                            String.format("Multiple active updates exist for this job. %s", activeJobUpdates));
                }

                IJobUpdateSummary activeJobUpdate = activeJobUpdates.get(0);
                throw new UpdateInProgressException(
                        "An active update already exists for this job, "
                                + "please terminate it before starting another. "
                                + "Active updates are those in states " + Updates.ACTIVE_JOB_UPDATE_STATES,
                        activeJobUpdate);
            }

            LOG.info("Starting update for job " + job);
            ILock lock;
            try {
                lock = lockManager.acquireLock(ILockKey.build(LockKey.job(job.newBuilder())), auditData.getUser());
            } catch (LockException e) {
                throw new UpdateStateException(e.getMessage(), e);
            }

            storeProvider.getJobUpdateStore().saveJobUpdate(update, Optional.of(requireNonNull(lock.getToken())));

            JobUpdateStatus status = ROLLING_FORWARD;
            if (isCoordinatedUpdate(instructions)) {
                status = ROLL_FORWARD_AWAITING_PULSE;
                pulseHandler.initializePulseState(update, status, 0L);
            }

            recordAndChangeJobUpdateStatus(storeProvider, summary.getKey(),
                    addAuditData(newEvent(status), auditData));
        });
    }

    @Override
    public void pause(final IJobUpdateKey key, AuditData auditData) throws UpdateStateException {
        requireNonNull(key);
        LOG.info("Attempting to pause update " + key);
        unscopedChangeUpdateStatus(key, Functions.compose(createAuditedEvent(auditData), GET_PAUSE_STATE));
    }

    @Override
    public void resume(final IJobUpdateKey key, final AuditData auditData) throws UpdateStateException {

        requireNonNull(key);
        requireNonNull(auditData);
        LOG.info("Attempting to resume update " + key);
        storage.write((NoResult<UpdateStateException>) storeProvider -> {
            IJobUpdateDetails details = Iterables.getOnlyElement(
                    storeProvider.getJobUpdateStore().fetchJobUpdateDetails(queryByUpdate(key)), null);

            if (details == null) {
                throw new UpdateStateException("Update does not exist: " + key);
            }

            IJobUpdate update = details.getUpdate();
            IJobUpdateKey key1 = update.getSummary().getKey();
            Function<JobUpdateStatus, JobUpdateStatus> stateChange = isCoordinatedAndPulseExpired(key1,
                    update.getInstructions()) ? GET_BLOCKED_RESUME_STATE : GET_ACTIVE_RESUME_STATE;

            JobUpdateStatus newStatus = stateChange.apply(update.getSummary().getState().getStatus());
            changeUpdateStatus(storeProvider, update.getSummary(), addAuditData(newEvent(newStatus), auditData));
        });
    }

    @Override
    public void abort(IJobUpdateKey key, AuditData auditData) throws UpdateStateException {
        unscopedChangeUpdateStatus(key,
                Functions.compose(createAuditedEvent(auditData), Functions.constant(ABORTED)));
    }

    @Override
    public void rollback(IJobUpdateKey key, AuditData auditData) throws UpdateStateException {
        unscopedChangeUpdateStatus(key,
                Functions.compose(createAuditedEvent(auditData), Functions.constant(ROLLING_BACK)));
    }

    private static Function<JobUpdateStatus, JobUpdateEvent> createAuditedEvent(final AuditData auditData) {

        return status -> addAuditData(newEvent(status), auditData);
    }

    private static final Ordering<IJobUpdateEvent> CHRON_ORDERING = Ordering
            .from(Comparator.comparingLong(IJobUpdateEvent::getTimestampMs));

    private long inferLastPulseTimestamp(IJobUpdateDetails details) {
        // Pulse timestamps are not durably stored by design. However, on system recovery,
        // setting the timestamp of the last pulse to 0L (aka no pulse) is not correct.
        // By inspecting the job update events we can infer a reasonable time stamp to initialize to.
        // In this case, if the upgrade was not waiting for a pulse previously, we can reuse the
        // timestamp of the last event. This does reset the counter for pulses, but reflects the
        // most likely behaviour of a healthy system.

        // This is safe because we always write at least one job update event on job update creation
        IJobUpdateEvent mostRecent = CHRON_ORDERING.max(details.getUpdateEvents());

        long ts = 0L;

        if (!AWAITING_PULSE_STATES.contains(mostRecent.getStatus())) {
            ts = mostRecent.getTimestampMs();
        }

        return ts;
    }

    @Override
    public void systemResume() {
        storage.write((NoResult.Quiet) storeProvider -> {
            for (IJobUpdateDetails details : storeProvider.getJobUpdateStore()
                    .fetchJobUpdateDetails(ACTIVE_QUERY)) {

                IJobUpdateSummary summary = details.getUpdate().getSummary();
                IJobUpdateInstructions instructions = details.getUpdate().getInstructions();
                IJobUpdateKey key = summary.getKey();
                JobUpdateStatus status = summary.getState().getStatus();

                if (isCoordinatedUpdate(instructions)) {
                    LOG.info("Automatically restoring pulse state for " + key);

                    long pulseMs = inferLastPulseTimestamp(details);
                    pulseHandler.initializePulseState(details.getUpdate(), status, pulseMs);
                }

                if (AUTO_RESUME_STATES.contains(status)) {
                    LOG.info("Automatically resuming update " + key);

                    try {
                        changeJobUpdateStatus(storeProvider, key, newEvent(status), false);
                    } catch (UpdateStateException e) {
                        throw new RuntimeException(e);
                    }
                }
            }
        });
    }

    @Override
    public JobUpdatePulseStatus pulse(final IJobUpdateKey key) throws UpdateStateException {
        final PulseState state = pulseHandler.pulseAndGet(key);
        if (state == null) {
            LOG.info("Not pulsing inactive job update: " + key);
            return JobUpdatePulseStatus.FINISHED;
        }

        LOG.debug("Job update {} has been pulsed. Timeout of {} msec is reset.", key, state.getPulseTimeoutMs());

        if (JobUpdateStateMachine.isAwaitingPulse(state.getStatus())) {
            // Attempt to unblock a job update previously blocked on expired pulse.
            executor.execute(shutdownOnError(lifecycle, LOG, String.format(FATAL_ERROR_FORMAT, key), () -> {
                try {
                    unscopedChangeUpdateStatus(key,
                            status -> new JobUpdateEvent().setStatus(GET_UNBLOCKED_STATE.apply(status)));
                } catch (UpdateStateException e) {
                    LOG.error(String.format("Error processing job update pulse for %s: %s", key, e));
                }
            }));
        }

        return JobUpdatePulseStatus.OK;
    }

    @Override
    public void instanceChangedState(final IScheduledTask updatedTask) {
        instanceChanged(InstanceKeys.from(updatedTask.getAssignedTask().getTask().getJob(),
                updatedTask.getAssignedTask().getInstanceId()), Optional.of(updatedTask));
    }

    @Override
    public void instanceDeleted(IInstanceKey instance) {
        // This is primarily used to detect when an instance was stuck in PENDING and killed, which
        // results in deletion.
        instanceChanged(instance, Optional.absent());
    }

    private void instanceChanged(final IInstanceKey instance, final Optional<IScheduledTask> state) {
        batchWorker.execute(storeProvider -> {
            IJobKey job = instance.getJobKey();
            UpdateFactory.Update update = updates.get(job);
            if (update != null) {
                if (update.getUpdater().containsInstance(instance.getInstanceId())) {
                    LOG.info("Forwarding task change for " + InstanceKeys.toString(instance));
                    try {
                        evaluateUpdater(storeProvider, update,
                                getOnlyMatch(storeProvider.getJobUpdateStore(), queryActiveByJob(job)),
                                ImmutableMap.of(instance.getInstanceId(), state));
                    } catch (UpdateStateException e) {
                        throw new RuntimeException(e);
                    }
                } else {
                    LOG.info("Instance " + instance + " is not part of active update for "
                            + JobKeys.canonicalString(job));
                }
            }
            return BatchWorker.NO_RESULT;
        });
    }

    private IJobUpdateSummary getOnlyMatch(JobUpdateStore store, IJobUpdateQuery query) {
        return Iterables.getOnlyElement(store.fetchJobUpdateSummaries(query));
    }

    @VisibleForTesting
    static IJobUpdateQuery queryActiveByJob(IJobKey job) {
        return IJobUpdateQuery.build(new JobUpdateQuery().setJobKey(job.newBuilder())
                .setUpdateStatuses(Updates.ACTIVE_JOB_UPDATE_STATES));
    }

    /**
     * Changes the state of an update, without the 'scope' of an update ID.  This should only be used
     * when responding to outside inputs that are inherently un-scoped, such as a user action or task
     * state change.
     *
     * @param key Update identifier.
     * @param stateChange State change computation, based on the current state of the update.
     * @throws UpdateStateException If no active update exists for the provided {@code job}, or
     *                              if the proposed state transition is not allowed.
     */
    private void unscopedChangeUpdateStatus(final IJobUpdateKey key,
            final Function<? super JobUpdateStatus, JobUpdateEvent> stateChange) throws UpdateStateException {

        storage.write((NoResult<UpdateStateException>) storeProvider -> {

            IJobUpdateSummary update = Iterables.getOnlyElement(
                    storeProvider.getJobUpdateStore().fetchJobUpdateSummaries(queryByUpdate(key)), null);
            if (update == null) {
                throw new UpdateStateException("Update does not exist " + key);
            }

            changeUpdateStatus(storeProvider, update, stateChange.apply(update.getState().getStatus()));
        });
    }

    private void changeUpdateStatus(MutableStoreProvider storeProvider, IJobUpdateSummary updateSummary,
            JobUpdateEvent event) throws UpdateStateException {

        if (updateSummary.getState().getStatus() == event.getStatus()) {
            return;
        }

        assertTransitionAllowed(updateSummary.getState().getStatus(), event.getStatus());
        recordAndChangeJobUpdateStatus(storeProvider, updateSummary.getKey(), event);
    }

    private void recordAndChangeJobUpdateStatus(MutableStoreProvider storeProvider, IJobUpdateKey key,
            JobUpdateEvent event) throws UpdateStateException {

        changeJobUpdateStatus(storeProvider, key, event, true);
    }

    private static final Set<JobUpdateStatus> TERMINAL_STATES = ImmutableSet.of(ROLLED_FORWARD, ROLLED_BACK,
            ABORTED, JobUpdateStatus.FAILED, ERROR);

    private void changeJobUpdateStatus(MutableStoreProvider storeProvider, IJobUpdateKey key,
            JobUpdateEvent proposedEvent, boolean recordChange) throws UpdateStateException {

        JobUpdateStatus status;
        boolean record;

        JobUpdateStore.Mutable updateStore = storeProvider.getJobUpdateStore();
        Optional<String> updateLock = updateStore.getLockToken(key);
        if (updateLock.isPresent()) {
            status = proposedEvent.getStatus();
            record = recordChange;
        } else {
            LOG.error("Update " + key + " does not have a lock");
            status = ERROR;
            record = true;
        }

        LOG.info("Update {} is now in state {}", key, status);
        if (record) {
            updateStore.saveJobUpdateEvent(key,
                    IJobUpdateEvent.build(proposedEvent.setTimestampMs(clock.nowMillis()).setStatus(status)));
        }

        if (TERMINAL_STATES.contains(status)) {
            if (updateLock.isPresent()) {
                lockManager.releaseLock(ILock.build(
                        new Lock().setKey(LockKey.job(key.getJob().newBuilder())).setToken(updateLock.get())));
            }

            pulseHandler.remove(key);
        } else {
            pulseHandler.updatePulseStatus(key, status);
        }

        MonitorAction action = JobUpdateStateMachine.getActionForStatus(status);
        IJobKey job = key.getJob();
        if (action == STOP_WATCHING) {
            updates.remove(job);
        } else if (action == ROLL_FORWARD || action == ROLL_BACK) {
            if (action == ROLL_BACK) {
                updates.remove(job);
            } else {
                checkState(!updates.containsKey(job), "Updater already exists for %s", job);
            }

            IJobUpdate jobUpdate = updateStore.fetchJobUpdate(key).get();
            UpdateFactory.Update update;
            try {
                update = updateFactory.newUpdate(jobUpdate.getInstructions(), action == ROLL_FORWARD);
            } catch (RuntimeException e) {
                LOG.warn("Uncaught exception: " + e, e);
                changeJobUpdateStatus(storeProvider, key,
                        newEvent(ERROR).setMessage("Internal scheduler error: " + e.getMessage()), true);
                return;
            }
            updates.put(job, update);
            evaluateUpdater(storeProvider, update, jobUpdate.getSummary(), ImmutableMap.of());
        }
    }

    private static Optional<IScheduledTask> getActiveInstance(TaskStore taskStore, IJobKey job, int instanceId) {

        return Optional.fromNullable(Iterables
                .getOnlyElement(taskStore.fetchTasks(Query.instanceScoped(job, instanceId).active()), null));
    }

    private static final Set<InstanceUpdateStatus> NOOP_INSTANCE_UPDATE = ImmutableSet
            .of(InstanceUpdateStatus.WORKING, InstanceUpdateStatus.SUCCEEDED);

    private static boolean isCoordinatedUpdate(IJobUpdateInstructions instructions) {
        return instructions.getSettings().getBlockIfNoPulsesAfterMs() > 0;
    }

    private boolean isCoordinatedAndPulseExpired(IJobUpdateKey key, IJobUpdateInstructions instructions) {

        if (isCoordinatedUpdate(instructions)) {
            PulseState pulseState = pulseHandler.get(key);
            boolean result = pulseState == null || pulseState.isBlocked(clock);
            LOG.info("Coordinated update {} pulse expired: {}", key, result);
            return result;
        } else {
            return false;
        }
    }

    @VisibleForTesting
    static final String LOST_LOCK_MESSAGE = "Updater has lost its exclusive lock, unable to proceed.";

    @VisibleForTesting
    static final String PULSE_TIMEOUT_MESSAGE = "Pulses from external service have timed out.";

    private void evaluateUpdater(final MutableStoreProvider storeProvider, final UpdateFactory.Update update,
            IJobUpdateSummary summary, Map<Integer, Optional<IScheduledTask>> changedInstance)
            throws UpdateStateException {

        JobUpdateStatus updaterStatus = summary.getState().getStatus();
        final IJobUpdateKey key = summary.getKey();

        JobUpdateStore.Mutable updateStore = storeProvider.getJobUpdateStore();
        if (!updateStore.getLockToken(key).isPresent()) {
            recordAndChangeJobUpdateStatus(storeProvider, key, newEvent(ERROR).setMessage(LOST_LOCK_MESSAGE));
            return;
        }

        IJobUpdateInstructions instructions = updateStore.fetchJobUpdateInstructions(key).get();
        if (isCoordinatedAndPulseExpired(key, instructions)) {
            // Move coordinated update into awaiting pulse state.
            JobUpdateStatus blockedStatus = getBlockedState(summary.getState().getStatus());
            changeUpdateStatus(storeProvider, summary, newEvent(blockedStatus).setMessage(PULSE_TIMEOUT_MESSAGE));
            return;
        }

        InstanceStateProvider<Integer, Optional<IScheduledTask>> stateProvider = instanceId -> getActiveInstance(
                storeProvider.getTaskStore(), key.getJob(), instanceId);

        EvaluationResult<Integer> result = update.getUpdater().evaluate(changedInstance, stateProvider);

        LOG.info(key + " evaluation result: " + result);

        for (Map.Entry<Integer, SideEffect> entry : result.getSideEffects().entrySet()) {
            Iterable<InstanceUpdateStatus> statusChanges;

            int instanceId = entry.getKey();
            List<IJobInstanceUpdateEvent> savedEvents = updateStore.fetchInstanceEvents(key, instanceId);

            Set<JobUpdateAction> savedActions = FluentIterable.from(savedEvents).transform(EVENT_TO_ACTION).toSet();

            // Don't bother persisting a sequence of status changes that represents an instance that
            // was immediately recognized as being healthy and in the desired state.
            if (entry.getValue().getStatusChanges().equals(NOOP_INSTANCE_UPDATE) && savedEvents.isEmpty()) {

                LOG.info("Suppressing no-op update for instance " + instanceId);
                statusChanges = ImmutableSet.of();
            } else {
                statusChanges = entry.getValue().getStatusChanges();
            }

            for (InstanceUpdateStatus statusChange : statusChanges) {
                JobUpdateAction action = STATE_MAP.get(Pair.of(statusChange, updaterStatus));
                requireNonNull(action);

                // A given instance update action may only be issued once during the update lifecycle.
                // Suppress duplicate events due to pause/resume operations.
                if (savedActions.contains(action)) {
                    LOG.info("Suppressing duplicate update {} for instance {}.", action, instanceId);
                } else {
                    IJobInstanceUpdateEvent event = IJobInstanceUpdateEvent.build(new JobInstanceUpdateEvent()
                            .setInstanceId(instanceId).setTimestampMs(clock.nowMillis()).setAction(action));
                    updateStore.saveJobInstanceUpdateEvent(summary.getKey(), event);
                }
            }
        }

        OneWayStatus status = result.getStatus();
        if (status == SUCCEEDED || status == OneWayStatus.FAILED) {
            if (SideEffect.hasActions(result.getSideEffects().values())) {
                throw new IllegalArgumentException("A terminal state should not specify actions: " + result);
            }

            JobUpdateEvent event = new JobUpdateEvent();
            if (status == SUCCEEDED) {
                event.setStatus(update.getSuccessStatus());
            } else {
                event.setStatus(update.getFailureStatus());
                // Generate a transition message based on one (arbitrary) instance in the group that pushed
                // the update over the failure threshold (in all likelihood this group is of size 1).
                // This is done as a rough cut to aid in diagnosing a failed update, as generating a
                // complete summary would likely be of dubious value.
                for (Map.Entry<Integer, SideEffect> entry : result.getSideEffects().entrySet()) {
                    Optional<Failure> failure = entry.getValue().getFailure();
                    if (failure.isPresent()) {
                        event.setMessage(failureMessage(entry.getKey(), failure.get()));
                        break;
                    }
                }
            }
            changeUpdateStatus(storeProvider, summary, event);
        } else {
            LOG.info("Executing side-effects for update of " + key + ": " + result.getSideEffects());
            for (Map.Entry<Integer, SideEffect> entry : result.getSideEffects().entrySet()) {
                IInstanceKey instance = InstanceKeys.from(key.getJob(), entry.getKey());

                Optional<InstanceAction> action = entry.getValue().getAction();
                if (action.isPresent()) {
                    Optional<InstanceActionHandler> handler = action.get().getHandler();
                    if (handler.isPresent()) {
                        Optional<Amount<Long, Time>> reevaluateDelay = handler.get().getReevaluationDelay(instance,
                                instructions, storeProvider, stateManager, updateAgentReserver, updaterStatus, key);
                        if (reevaluateDelay.isPresent()) {
                            executor.schedule(getDeferredEvaluator(instance, key), reevaluateDelay.get().getValue(),
                                    reevaluateDelay.get().getUnit().getTimeUnit());
                        }
                    }
                }
            }
        }
    }

    @VisibleForTesting
    static final Function<IJobInstanceUpdateEvent, JobUpdateAction> EVENT_TO_ACTION = IJobInstanceUpdateEvent::getAction;

    @VisibleForTesting
    static String failureMessage(int instanceId, Failure failure) {
        return String.format("Latest failure: instance %d %s", instanceId, failure.getReason());
    }

    /**
     * Associates an instance updater state change and the job's update status to an action.
     */
    private static final Map<Pair<InstanceUpdateStatus, JobUpdateStatus>, JobUpdateAction> STATE_MAP = ImmutableMap
            .<Pair<InstanceUpdateStatus, JobUpdateStatus>, JobUpdateAction>builder()
            .put(Pair.of(InstanceUpdateStatus.WORKING, ROLLING_FORWARD), JobUpdateAction.INSTANCE_UPDATING)
            .put(Pair.of(InstanceUpdateStatus.SUCCEEDED, ROLLING_FORWARD), JobUpdateAction.INSTANCE_UPDATED)
            .put(Pair.of(InstanceUpdateStatus.FAILED, ROLLING_FORWARD), JobUpdateAction.INSTANCE_UPDATE_FAILED)
            .put(Pair.of(InstanceUpdateStatus.WORKING, ROLLING_BACK), JobUpdateAction.INSTANCE_ROLLING_BACK)
            .put(Pair.of(InstanceUpdateStatus.SUCCEEDED, ROLLING_BACK), JobUpdateAction.INSTANCE_ROLLED_BACK)
            .put(Pair.of(InstanceUpdateStatus.FAILED, ROLLING_BACK), JobUpdateAction.INSTANCE_ROLLBACK_FAILED)
            .build();

    @VisibleForTesting
    static IJobUpdateQuery queryByUpdate(IJobUpdateKey key) {
        return IJobUpdateQuery.build(new JobUpdateQuery().setKey(key.newBuilder()));
    }

    private static JobUpdateEvent newEvent(JobUpdateStatus status) {
        return new JobUpdateEvent().setStatus(status);
    }

    private static JobUpdateEvent addAuditData(JobUpdateEvent event, AuditData auditData) {
        return event.setMessage(auditData.getMessage().orNull()).setUser(auditData.getUser());
    }

    private Runnable getDeferredEvaluator(final IInstanceKey instance, final IJobUpdateKey key) {
        return shutdownOnError(lifecycle, LOG,
                String.format(FATAL_ERROR_FORMAT, "Key: " + key + " Instance key: " + instance),
                () -> storage.write((NoResult.Quiet) storeProvider -> {
                    IJobUpdateSummary summary = getOnlyMatch(storeProvider.getJobUpdateStore(), queryByUpdate(key));
                    JobUpdateStatus status = summary.getState().getStatus();
                    // Suppress this evaluation if the updater is not currently active.
                    if (JobUpdateStateMachine.isActive(status)) {
                        UpdateFactory.Update update = updates.get(instance.getJobKey());
                        try {
                            evaluateUpdater(storeProvider, update, summary,
                                    ImmutableMap.of(instance.getInstanceId(),
                                            getActiveInstance(storeProvider.getTaskStore(), instance.getJobKey(),
                                                    instance.getInstanceId())));
                        } catch (UpdateStateException e) {
                            LOG.error(String.format("Error running deferred evaluation for %s: %s", instance, e));
                            throw new RuntimeException(e);
                        }
                    }
                }));
    }

    private static class PulseHandler {
        private final Clock clock;

        // TODO(maxim): expose this data via a debug endpoint AURORA-1103.
        // Currently active coordinated update pulse states. A pulse state is added when a coordinated
        // update is created and removed only when an update reaches terminal state. A PAUSED update
        // pulse state is still retained in the map and accepts pulses.
        private final Map<IJobUpdateKey, PulseState> pulseStates = Maps.newHashMap();

        PulseHandler(Clock clock) {
            this.clock = requireNonNull(clock);
        }

        synchronized void initializePulseState(IJobUpdate update, JobUpdateStatus status, long ts) {
            pulseStates.put(update.getSummary().getKey(),
                    new PulseState(status, update.getInstructions().getSettings().getBlockIfNoPulsesAfterMs(), ts));
        }

        synchronized PulseState pulseAndGet(IJobUpdateKey key) {
            PulseState state = pulseStates.get(key);
            if (state != null) {
                state = pulseStates.put(key,
                        new PulseState(state.getStatus(), state.getPulseTimeoutMs(), clock.nowMillis()));
            }
            return state;
        }

        synchronized void updatePulseStatus(IJobUpdateKey key, JobUpdateStatus status) {
            PulseState state = pulseStates.get(key);
            if (state != null) {
                pulseStates.put(key, new PulseState(status, state.getPulseTimeoutMs(), state.getLastPulseMs()));
            }
        }

        synchronized void remove(IJobUpdateKey key) {
            pulseStates.remove(key);
        }

        synchronized PulseState get(IJobUpdateKey key) {
            return pulseStates.get(key);
        }
    }

    private static class PulseState {
        private final JobUpdateStatus status;
        private final long pulseTimeoutMs;
        private final long lastPulseMs;

        PulseState(JobUpdateStatus status, long pulseTimeoutMs, long lastPulseMs) {
            this.status = requireNonNull(status);
            this.pulseTimeoutMs = pulseTimeoutMs;
            this.lastPulseMs = lastPulseMs;
        }

        JobUpdateStatus getStatus() {
            return status;
        }

        long getPulseTimeoutMs() {
            return pulseTimeoutMs;
        }

        long getLastPulseMs() {
            return lastPulseMs;
        }

        boolean isBlocked(Clock clock) {
            return clock.nowMillis() - lastPulseMs >= pulseTimeoutMs;
        }
    }
}