org.apache.samza.job.yarn.SamzaTaskManager.java Source code

Introduction

Here is the source code for org.apache.samza.job.yarn.SamzaTaskManager.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.samza.job.yarn;

import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.client.api.AMRMClient;
import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.samza.config.Config;
import org.apache.samza.config.JobConfig;
import org.apache.samza.config.YarnConfig;
import org.apache.samza.coordinator.stream.messages.SetContainerHostMapping;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.HashMap;
import java.util.Map;

/**
 * Samza's application master is mostly interested in requesting containers to
 * run Samza jobs. SamzaTaskManager is responsible for requesting new
 * containers, handling failures, and notifying the application master that the
 * job is done.
 *
 * The following main threads are involved in the execution of the Samza AM:
 *  - The main thread (defined in SamzaAppMaster) that drive the AM to send out container requests to RM
 *  - The callback handler thread that receives the responses from RM and handles:
 *      - Populating a buffer when a container is allocated by the RM
 *        (allocatedContainers in {@link org.apache.samza.job.yarn.ContainerRequestState}
 *      - Identifying the cause of container failure & re-request containers from RM by adding request to the
 *        internal requestQueue in {@link org.apache.samza.job.yarn.ContainerRequestState}
 *  - The allocator thread defined here assigns the allocated containers to pending requests
 *    (See {@link org.apache.samza.job.yarn.ContainerAllocator} or {@link org.apache.samza.job.yarn.HostAwareContainerAllocator})
 */

class SamzaTaskManager implements YarnAppMasterListener {
    private static final Logger log = LoggerFactory.getLogger(SamzaTaskManager.class);

    private final boolean hostAffinityEnabled;
    private final SamzaAppState state;

    // Derived configs
    private final JobConfig jobConfig;
    private final YarnConfig yarnConfig;

    private final AbstractContainerAllocator containerAllocator;
    private final Thread allocatorThread;

    // State
    private boolean tooManyFailedContainers = false;
    private Map<Integer, ContainerFailure> containerFailures = new HashMap<Integer, ContainerFailure>();

    public SamzaTaskManager(Config config, SamzaAppState state,
            AMRMClientAsync<AMRMClient.ContainerRequest> amClient, YarnConfiguration conf) {
        this.state = state;
        this.jobConfig = new JobConfig(config);
        this.yarnConfig = new YarnConfig(config);

        this.hostAffinityEnabled = yarnConfig.getHostAffinityEnabled();

        if (this.hostAffinityEnabled) {
            this.containerAllocator = new HostAwareContainerAllocator(amClient,
                    new ContainerUtil(config, state, conf), yarnConfig);
        } else {
            this.containerAllocator = new ContainerAllocator(amClient, new ContainerUtil(config, state, conf),
                    yarnConfig);
        }

        this.allocatorThread = new Thread(this.containerAllocator, "Container Allocator Thread");
    }

    @Override
    public boolean shouldShutdown() {
        return tooManyFailedContainers || state.completedContainers.get() == state.containerCount
                || !allocatorThread.isAlive();
    }

    @Override
    public void onInit() {
        state.containerCount = jobConfig.getContainerCount();

        state.neededContainers.set(state.containerCount);

        // Request initial set of containers
        Map<Integer, String> containerToHostMapping = state.jobCoordinator.jobModel().getAllContainerLocality();

        containerAllocator.requestContainers(containerToHostMapping);

        // Start container allocator thread
        log.info("Starting the container allocator thread");
        allocatorThread.start();
    }

    @Override
    public void onReboot() {

    }

    @Override
    public void onShutdown() {
        // Shutdown allocator thread
        containerAllocator.setIsRunning(false);
        try {
            allocatorThread.join();
        } catch (InterruptedException ie) {
            log.info("Allocator Thread join() threw an interrupted exception", ie);
            // Should we throw exception here??
        }
    }

    @Override
    public void onContainerAllocated(Container container) {
        containerAllocator.addContainer(container);
    }

    /**
     * This methods handles the onContainerCompleted callback from the RM. Based on the ContainerExitStatus, it decides
     * whether a container that exited is marked as complete or failure.
     */
    @Override
    public void onContainerCompleted(ContainerStatus containerStatus) {
        String containerIdStr = ConverterUtils.toString(containerStatus.getContainerId());
        int containerId = -1;
        for (Map.Entry<Integer, YarnContainer> entry : state.runningContainers.entrySet()) {
            if (entry.getValue().id().equals(containerStatus.getContainerId())) {
                containerId = entry.getKey();
                break;
            }
        }
        state.runningContainers.remove(containerId);

        int exitStatus = containerStatus.getExitStatus();
        switch (exitStatus) {
        case ContainerExitStatus.SUCCESS:
            log.info("Container {} completed successfully.", containerIdStr);

            state.completedContainers.incrementAndGet();

            if (containerId != -1) {
                state.finishedContainers.add(containerId);
                containerFailures.remove(containerId);
            }

            if (state.completedContainers.get() == state.containerCount) {
                log.info("Setting job status to SUCCEEDED, since all containers have been marked as completed.");
                state.status = FinalApplicationStatus.SUCCEEDED;
            }
            break;

        case ContainerExitStatus.DISKS_FAILED:
        case ContainerExitStatus.ABORTED:
        case ContainerExitStatus.PREEMPTED:
            log.info(
                    "Got an exit code of {}. This means that container {} was "
                            + "killed by YARN, either due to being released by the application "
                            + "master or being 'lost' due to node failures etc. or due to preemption by the RM",
                    exitStatus, containerIdStr);

            state.releasedContainers.incrementAndGet();

            // If this container was assigned some partitions (a containerId), then
            // clean up, and request a new container for the tasks. This only
            // should happen if the container was 'lost' due to node failure, not
            // if the AM released the container.
            if (containerId != -1) {
                log.info(
                        "Released container {} was assigned task group ID {}. Requesting a new container for the task group.",
                        containerIdStr, containerId);

                state.neededContainers.incrementAndGet();
                state.jobHealthy.set(false);

                // request a container on new host
                containerAllocator.requestContainer(containerId, ContainerAllocator.ANY_HOST);
            }
            break;

        default:
            // TODO: Handle failure more intelligently. Should track NodeFailures!
            log.info("Container failed for some reason. Let's start it again");
            log.info("Container " + containerIdStr + " failed with exit code " + exitStatus + " - "
                    + containerStatus.getDiagnostics());

            state.failedContainers.incrementAndGet();
            state.failedContainersStatus.put(containerIdStr, containerStatus);
            state.jobHealthy.set(false);

            if (containerId != -1) {
                state.neededContainers.incrementAndGet();
                // Find out previously running container location
                String lastSeenOn = state.jobCoordinator.jobModel().getContainerToHostValue(containerId,
                        SetContainerHostMapping.HOST_KEY);
                if (!hostAffinityEnabled || lastSeenOn == null) {
                    lastSeenOn = ContainerAllocator.ANY_HOST;
                }
                // A container failed for an unknown reason. Let's check to see if
                // we need to shutdown the whole app master if too many container
                // failures have happened. The rules for failing are that the
                // failure count for a task group id must be > the configured retry
                // count, and the last failure (the one prior to this one) must have
                // happened less than retry window ms ago. If retry count is set to
                // 0, the app master will fail on any container failure. If the
                // retry count is set to a number < 0, a container failure will
                // never trigger an app master failure.
                int retryCount = yarnConfig.getContainerRetryCount();
                int retryWindowMs = yarnConfig.getContainerRetryWindowMs();

                if (retryCount == 0) {
                    log.error(
                            "Container ID {} ({}) failed, and retry count is set to 0, so shutting down the application master, and marking the job as failed.",
                            containerId, containerIdStr);

                    tooManyFailedContainers = true;
                } else if (retryCount > 0) {
                    int currentFailCount;
                    long lastFailureTime;
                    if (containerFailures.containsKey(containerId)) {
                        ContainerFailure failure = containerFailures.get(containerId);
                        currentFailCount = failure.getCount() + 1;
                        lastFailureTime = failure.getLastFailure();
                    } else {
                        currentFailCount = 1;
                        lastFailureTime = 0L;
                    }
                    if (currentFailCount >= retryCount) {
                        long lastFailureMsDiff = System.currentTimeMillis() - lastFailureTime;

                        if (lastFailureMsDiff < retryWindowMs) {
                            log.error("Container ID " + containerId + "(" + containerIdStr + ") has failed "
                                    + currentFailCount + " times, with last failure " + lastFailureMsDiff
                                    + "ms ago. This is greater than retry count of " + retryCount
                                    + " and window of " + retryWindowMs
                                    + "ms , so shutting down the application master, and marking the job as failed.");

                            // We have too many failures, and we're within the window
                            // boundary, so reset shut down the app master.
                            tooManyFailedContainers = true;
                            state.status = FinalApplicationStatus.FAILED;
                        } else {
                            log.info(
                                    "Resetting fail count for container ID {} back to 1, since last container failure ({}) for "
                                            + "this container ID was outside the bounds of the retry window.",
                                    containerId, containerIdStr);

                            // Reset counter back to 1, since the last failure for this
                            // container happened outside the window boundary.
                            containerFailures.put(containerId, new ContainerFailure(1, System.currentTimeMillis()));
                        }
                    } else {
                        log.info("Current fail count for container ID {} is {}.", containerId, currentFailCount);
                        containerFailures.put(containerId,
                                new ContainerFailure(currentFailCount, System.currentTimeMillis()));
                    }
                }

                if (!tooManyFailedContainers) {
                    // Request a new container
                    containerAllocator.requestContainer(containerId, lastSeenOn);
                }
            }

        }
    }
}