org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.yarn.server.resourcemanager;

import com.google.common.annotations.VisibleForTesting;
import com.sun.jersey.spi.container.servlet.ServletContainer;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.curator.framework.AuthInfo;
import org.apache.curator.framework.CuratorFramework;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.ha.HAServiceProtocol;
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import org.apache.hadoop.http.HttpServer2;
import org.apache.hadoop.metrics2.MetricsSystem;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.source.JvmMetrics;
import org.apache.hadoop.metrics2.util.MBeans;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.authorize.ProxyUsers;
import org.apache.hadoop.service.CompositeService;
import org.apache.hadoop.service.Service;
import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.JvmPauseMonitor;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.ShutdownHookManager;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.VersionInfo;
import org.apache.hadoop.util.curator.ZKCuratorManager;
import org.apache.hadoop.yarn.YarnUncaughtExceptionHandler;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.conf.ConfigurationProvider;
import org.apache.hadoop.yarn.conf.ConfigurationProviderFactory;
import org.apache.hadoop.yarn.conf.HAUtil;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.AsyncDispatcher;
import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.EventDispatcher;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.nodelabels.NodeAttributesManager;
import org.apache.hadoop.yarn.server.resourcemanager.ahs.RMApplicationHistoryWriter;
import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher;
import org.apache.hadoop.yarn.server.resourcemanager.federation.FederationStateStoreService;
import org.apache.hadoop.yarn.server.resourcemanager.metrics.CombinedSystemMetricsPublisher;
import org.apache.hadoop.yarn.server.resourcemanager.metrics.NoOpSystemMetricPublisher;
import org.apache.hadoop.yarn.server.resourcemanager.metrics.SystemMetricsPublisher;
import org.apache.hadoop.yarn.server.resourcemanager.metrics.TimelineServiceV1Publisher;
import org.apache.hadoop.yarn.server.resourcemanager.metrics.TimelineServiceV2Publisher;
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.NodeAttributesManagerImpl;
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMDelegatedNodeLabelsUpdater;
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.NullRMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreFactory;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.Recoverable;
import org.apache.hadoop.yarn.server.resourcemanager.reservation.AbstractReservationSystem;
import org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationSystem;
import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceProfilesManager;
import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceProfilesManagerImpl;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.monitor.RMAppLifetimeMonitor;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.AllocationTagsManager;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.MemoryPlacementConstraintManager;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.PlacementConstraintManagerService;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.MultiNodeSortingManager;
import org.apache.hadoop.yarn.server.resourcemanager.security.DelegationTokenRenewer;
import org.apache.hadoop.yarn.server.resourcemanager.security.QueueACLsManager;
import org.apache.hadoop.yarn.server.resourcemanager.timelineservice.RMTimelineCollectorManager;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.RMWebApp;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.RMWebAppUtil;
import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
import org.apache.hadoop.yarn.server.service.SystemServiceManager;
import org.apache.hadoop.yarn.server.webproxy.AppReportFetcher;
import org.apache.hadoop.yarn.server.webproxy.ProxyUriUtils;
import org.apache.hadoop.yarn.server.webproxy.WebAppProxy;
import org.apache.hadoop.yarn.server.webproxy.WebAppProxyServlet;
import org.apache.hadoop.yarn.webapp.WebApp;
import org.apache.hadoop.yarn.webapp.WebApps;
import org.apache.hadoop.yarn.webapp.WebApps.Builder;
import org.apache.hadoop.yarn.webapp.util.WebAppUtils;
import org.apache.zookeeper.server.auth.DigestAuthenticationProvider;
import org.eclipse.jetty.webapp.WebAppContext;

import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.net.InetSocketAddress;
import java.net.URI;
import java.net.URL;
import java.net.URLClassLoader;
import java.nio.charset.Charset;
import java.security.PrivilegedExceptionAction;
import java.security.SecureRandom;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;

/**
 * The ResourceManager is the main class that is a set of components.
 * "I am the ResourceManager. All your resources belong to us..."
 *
 */
@SuppressWarnings("unchecked")
public class ResourceManager extends CompositeService implements Recoverable, ResourceManagerMXBean {

    /**
     * Priority of the ResourceManager shutdown hook.
     */
    public static final int SHUTDOWN_HOOK_PRIORITY = 30;

    /**
     * Used for generation of various ids.
     */
    public static final int EPOCH_BIT_SHIFT = 40;

    private static final Log LOG = LogFactory.getLog(ResourceManager.class);
    private static long clusterTimeStamp = System.currentTimeMillis();

    /*
     * UI2 webapp name
     */
    public static final String UI2_WEBAPP_NAME = "/ui2";

    /**
     * "Always On" services. Services that need to run always irrespective of
     * the HA state of the RM.
     */
    @VisibleForTesting
    protected RMContextImpl rmContext;
    private Dispatcher rmDispatcher;
    @VisibleForTesting
    protected AdminService adminService;

    /**
     * "Active" services. Services that need to run only on the Active RM.
     * These services are managed (initialized, started, stopped) by the
     * {@link CompositeService} RMActiveServices.
     *
     * RM is active when (1) HA is disabled, or (2) HA is enabled and the RM is
     * in Active state.
     */
    protected RMActiveServices activeServices;
    protected RMSecretManagerService rmSecretManagerService;

    protected ResourceScheduler scheduler;
    protected ReservationSystem reservationSystem;
    private ClientRMService clientRM;
    protected ApplicationMasterService masterService;
    protected NMLivelinessMonitor nmLivelinessMonitor;
    protected NodesListManager nodesListManager;
    protected RMAppManager rmAppManager;
    protected ApplicationACLsManager applicationACLsManager;
    protected QueueACLsManager queueACLsManager;
    private FederationStateStoreService federationStateStoreService;
    private WebApp webApp;
    private AppReportFetcher fetcher = null;
    protected ResourceTrackerService resourceTracker;
    private JvmMetrics jvmMetrics;
    private boolean curatorEnabled = false;
    private ZKCuratorManager zkManager;
    private final String zkRootNodePassword = Long.toString(new SecureRandom().nextLong());
    private boolean recoveryEnabled;

    @VisibleForTesting
    protected String webAppAddress;
    private ConfigurationProvider configurationProvider = null;
    /** End of Active services */

    private Configuration conf;

    private UserGroupInformation rmLoginUGI;

    public ResourceManager() {
        super("ResourceManager");
    }

    public RMContext getRMContext() {
        return this.rmContext;
    }

    public static long getClusterTimeStamp() {
        return clusterTimeStamp;
    }

    public String getRMLoginUser() {
        return rmLoginUGI.getShortUserName();
    }

    @VisibleForTesting
    protected static void setClusterTimeStamp(long timestamp) {
        clusterTimeStamp = timestamp;
    }

    @VisibleForTesting
    Dispatcher getRmDispatcher() {
        return rmDispatcher;
    }

    @VisibleForTesting
    protected ResourceProfilesManager createResourceProfileManager() {
        ResourceProfilesManager resourceProfilesManager = new ResourceProfilesManagerImpl();
        return resourceProfilesManager;
    }

    @Override
    protected void serviceInit(Configuration conf) throws Exception {
        this.conf = conf;
        this.rmContext = new RMContextImpl();
        rmContext.setResourceManager(this);

        this.configurationProvider = ConfigurationProviderFactory.getConfigurationProvider(conf);
        this.configurationProvider.init(this.conf);
        rmContext.setConfigurationProvider(configurationProvider);

        // load core-site.xml
        loadConfigurationXml(YarnConfiguration.CORE_SITE_CONFIGURATION_FILE);

        // Do refreshSuperUserGroupsConfiguration with loaded core-site.xml
        // Or use RM specific configurations to overwrite the common ones first
        // if they exist
        RMServerUtils.processRMProxyUsersConf(conf);
        ProxyUsers.refreshSuperUserGroupsConfiguration(this.conf);

        // load yarn-site.xml
        loadConfigurationXml(YarnConfiguration.YARN_SITE_CONFIGURATION_FILE);

        validateConfigs(this.conf);

        // Set HA configuration should be done before login
        this.rmContext.setHAEnabled(HAUtil.isHAEnabled(this.conf));
        if (this.rmContext.isHAEnabled()) {
            HAUtil.verifyAndSetConfiguration(this.conf);
        }

        // Set UGI and do login
        // If security is enabled, use login user
        // If security is not enabled, use current user
        this.rmLoginUGI = UserGroupInformation.getCurrentUser();
        try {
            doSecureLogin();
        } catch (IOException ie) {
            throw new YarnRuntimeException("Failed to login", ie);
        }

        // register the handlers for all AlwaysOn services using setupDispatcher().
        rmDispatcher = setupDispatcher();
        addIfService(rmDispatcher);
        rmContext.setDispatcher(rmDispatcher);

        // The order of services below should not be changed as services will be
        // started in same order
        // As elector service needs admin service to be initialized and started,
        // first we add admin service then elector service

        adminService = createAdminService();
        addService(adminService);
        rmContext.setRMAdminService(adminService);

        // elector must be added post adminservice
        if (this.rmContext.isHAEnabled()) {
            // If the RM is configured to use an embedded leader elector,
            // initialize the leader elector.
            if (HAUtil.isAutomaticFailoverEnabled(conf) && HAUtil.isAutomaticFailoverEmbedded(conf)) {
                EmbeddedElector elector = createEmbeddedElector();
                addIfService(elector);
                rmContext.setLeaderElectorService(elector);
            }
        }

        rmContext.setYarnConfiguration(conf);

        createAndInitActiveServices(false);

        webAppAddress = WebAppUtils.getWebAppBindURL(this.conf, YarnConfiguration.RM_BIND_HOST,
                WebAppUtils.getRMWebAppURLWithoutScheme(this.conf));

        RMApplicationHistoryWriter rmApplicationHistoryWriter = createRMApplicationHistoryWriter();
        addService(rmApplicationHistoryWriter);
        rmContext.setRMApplicationHistoryWriter(rmApplicationHistoryWriter);

        // initialize the RM timeline collector first so that the system metrics
        // publisher can bind to it
        if (YarnConfiguration.timelineServiceV2Enabled(this.conf)) {
            RMTimelineCollectorManager timelineCollectorManager = createRMTimelineCollectorManager();
            addService(timelineCollectorManager);
            rmContext.setRMTimelineCollectorManager(timelineCollectorManager);
        }

        SystemMetricsPublisher systemMetricsPublisher = createSystemMetricsPublisher();
        addIfService(systemMetricsPublisher);
        rmContext.setSystemMetricsPublisher(systemMetricsPublisher);

        registerMXBean();

        super.serviceInit(this.conf);
    }

    private void loadConfigurationXml(String configurationFile) throws YarnException, IOException {
        InputStream configurationInputStream = this.configurationProvider.getConfigurationInputStream(this.conf,
                configurationFile);
        if (configurationInputStream != null) {
            this.conf.addResource(configurationInputStream, configurationFile);
        }
    }

    protected EmbeddedElector createEmbeddedElector() throws IOException {
        EmbeddedElector elector;
        curatorEnabled = conf.getBoolean(YarnConfiguration.CURATOR_LEADER_ELECTOR,
                YarnConfiguration.DEFAULT_CURATOR_LEADER_ELECTOR_ENABLED);
        if (curatorEnabled) {
            this.zkManager = createAndStartZKManager(conf);
            elector = new CuratorBasedElectorService(this);
        } else {
            elector = new ActiveStandbyElectorBasedElectorService(this);
        }
        return elector;
    }

    /**
     * Get ZooKeeper Curator manager, creating and starting if not exists.
     * @param config Configuration for the ZooKeeper curator.
     * @return ZooKeeper Curator manager.
     * @throws IOException If it cannot create the manager.
     */
    public ZKCuratorManager createAndStartZKManager(Configuration config) throws IOException {
        ZKCuratorManager manager = new ZKCuratorManager(config);

        // Get authentication
        List<AuthInfo> authInfos = new ArrayList<>();
        if (HAUtil.isHAEnabled(config) && HAUtil
                .getConfValueForRMInstance(YarnConfiguration.ZK_RM_STATE_STORE_ROOT_NODE_ACL, config) == null) {
            String zkRootNodeUsername = HAUtil.getConfValueForRMInstance(YarnConfiguration.RM_ADDRESS,
                    YarnConfiguration.DEFAULT_RM_ADDRESS, config);
            String defaultFencingAuth = zkRootNodeUsername + ":" + zkRootNodePassword;
            byte[] defaultFencingAuthData = defaultFencingAuth.getBytes(Charset.forName("UTF-8"));
            String scheme = new DigestAuthenticationProvider().getScheme();
            AuthInfo authInfo = new AuthInfo(scheme, defaultFencingAuthData);
            authInfos.add(authInfo);
        }

        manager.start(authInfos);
        return manager;
    }

    public ZKCuratorManager getZKManager() {
        return zkManager;
    }

    public CuratorFramework getCurator() {
        if (this.zkManager == null) {
            return null;
        }
        return this.zkManager.getCurator();
    }

    public String getZkRootNodePassword() {
        return this.zkRootNodePassword;
    }

    protected QueueACLsManager createQueueACLsManager(ResourceScheduler scheduler, Configuration conf) {
        return new QueueACLsManager(scheduler, conf);
    }

    @VisibleForTesting
    protected void setRMStateStore(RMStateStore rmStore) {
        rmStore.setRMDispatcher(rmDispatcher);
        rmStore.setResourceManager(this);
        rmContext.setStateStore(rmStore);
    }

    protected EventHandler<SchedulerEvent> createSchedulerEventDispatcher() {
        return new EventDispatcher(this.scheduler, "SchedulerEventDispatcher");
    }

    protected Dispatcher createDispatcher() {
        return new AsyncDispatcher("RM Event dispatcher");
    }

    protected ResourceScheduler createScheduler() {
        String schedulerClassName = conf.get(YarnConfiguration.RM_SCHEDULER,
                YarnConfiguration.DEFAULT_RM_SCHEDULER);
        LOG.info("Using Scheduler: " + schedulerClassName);
        try {
            Class<?> schedulerClazz = Class.forName(schedulerClassName);
            if (ResourceScheduler.class.isAssignableFrom(schedulerClazz)) {
                return (ResourceScheduler) ReflectionUtils.newInstance(schedulerClazz, this.conf);
            } else {
                throw new YarnRuntimeException("Class: " + schedulerClassName + " not instance of "
                        + ResourceScheduler.class.getCanonicalName());
            }
        } catch (ClassNotFoundException e) {
            throw new YarnRuntimeException("Could not instantiate Scheduler: " + schedulerClassName, e);
        }
    }

    protected ReservationSystem createReservationSystem() {
        String reservationClassName = conf.get(YarnConfiguration.RM_RESERVATION_SYSTEM_CLASS,
                AbstractReservationSystem.getDefaultReservationSystem(scheduler));
        if (reservationClassName == null) {
            return null;
        }
        LOG.info("Using ReservationSystem: " + reservationClassName);
        try {
            Class<?> reservationClazz = Class.forName(reservationClassName);
            if (ReservationSystem.class.isAssignableFrom(reservationClazz)) {
                return (ReservationSystem) ReflectionUtils.newInstance(reservationClazz, this.conf);
            } else {
                throw new YarnRuntimeException("Class: " + reservationClassName + " not instance of "
                        + ReservationSystem.class.getCanonicalName());
            }
        } catch (ClassNotFoundException e) {
            throw new YarnRuntimeException("Could not instantiate ReservationSystem: " + reservationClassName, e);
        }
    }

    protected SystemServiceManager createServiceManager() {
        String schedulerClassName = YarnConfiguration.DEFAULT_YARN_API_SYSTEM_SERVICES_CLASS;
        LOG.info("Using SystemServiceManager: " + schedulerClassName);
        try {
            Class<?> schedulerClazz = Class.forName(schedulerClassName);
            if (SystemServiceManager.class.isAssignableFrom(schedulerClazz)) {
                return (SystemServiceManager) ReflectionUtils.newInstance(schedulerClazz, this.conf);
            } else {
                throw new YarnRuntimeException("Class: " + schedulerClassName + " not instance of "
                        + SystemServiceManager.class.getCanonicalName());
            }
        } catch (ClassNotFoundException e) {
            throw new YarnRuntimeException("Could not instantiate SystemServiceManager: " + schedulerClassName, e);
        }
    }

    protected ApplicationMasterLauncher createAMLauncher() {
        return new ApplicationMasterLauncher(this.rmContext);
    }

    private NMLivelinessMonitor createNMLivelinessMonitor() {
        return new NMLivelinessMonitor(this.rmContext.getDispatcher());
    }

    protected AMLivelinessMonitor createAMLivelinessMonitor() {
        return new AMLivelinessMonitor(this.rmDispatcher);
    }

    protected RMNodeLabelsManager createNodeLabelManager() throws InstantiationException, IllegalAccessException {
        return new RMNodeLabelsManager();
    }

    protected NodeAttributesManager createNodeAttributesManager() {
        NodeAttributesManagerImpl namImpl = new NodeAttributesManagerImpl();
        namImpl.setRMContext(rmContext);
        return namImpl;
    }

    protected AllocationTagsManager createAllocationTagsManager() {
        return new AllocationTagsManager(this.rmContext);
    }

    protected PlacementConstraintManagerService createPlacementConstraintManager() {
        // Use the in memory Placement Constraint Manager.
        return new MemoryPlacementConstraintManager();
    }

    protected DelegationTokenRenewer createDelegationTokenRenewer() {
        return new DelegationTokenRenewer();
    }

    protected RMAppManager createRMAppManager() {
        return new RMAppManager(this.rmContext, this.scheduler, this.masterService, this.applicationACLsManager,
                this.conf);
    }

    protected RMApplicationHistoryWriter createRMApplicationHistoryWriter() {
        return new RMApplicationHistoryWriter();
    }

    private RMTimelineCollectorManager createRMTimelineCollectorManager() {
        return new RMTimelineCollectorManager(this);
    }

    private FederationStateStoreService createFederationStateStoreService() {
        return new FederationStateStoreService(rmContext);
    }

    protected MultiNodeSortingManager<SchedulerNode> createMultiNodeSortingManager() {
        return new MultiNodeSortingManager<SchedulerNode>();
    }

    protected SystemMetricsPublisher createSystemMetricsPublisher() {
        List<SystemMetricsPublisher> publishers = new ArrayList<SystemMetricsPublisher>();
        if (YarnConfiguration.timelineServiceV1Enabled(conf)) {
            SystemMetricsPublisher publisherV1 = new TimelineServiceV1Publisher();
            publishers.add(publisherV1);
        }
        if (YarnConfiguration.timelineServiceV2Enabled(conf)) {
            // we're dealing with the v.2.x publisher
            LOG.info("system metrics publisher with the timeline service V2 is " + "configured");
            SystemMetricsPublisher publisherV2 = new TimelineServiceV2Publisher(
                    rmContext.getRMTimelineCollectorManager());
            publishers.add(publisherV2);
        }
        if (publishers.isEmpty()) {
            LOG.info("TimelineServicePublisher is not configured");
            SystemMetricsPublisher noopPublisher = new NoOpSystemMetricPublisher();
            publishers.add(noopPublisher);
        }

        for (SystemMetricsPublisher publisher : publishers) {
            addIfService(publisher);
        }

        SystemMetricsPublisher combinedPublisher = new CombinedSystemMetricsPublisher(publishers);
        return combinedPublisher;
    }

    // sanity check for configurations
    protected static void validateConfigs(Configuration conf) {
        // validate max-attempts
        int globalMaxAppAttempts = conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
                YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
        if (globalMaxAppAttempts <= 0) {
            throw new YarnRuntimeException(
                    "Invalid global max attempts configuration" + ", " + YarnConfiguration.RM_AM_MAX_ATTEMPTS + "="
                            + globalMaxAppAttempts + ", it should be a positive integer.");
        }

        // validate expireIntvl >= heartbeatIntvl
        long expireIntvl = conf.getLong(YarnConfiguration.RM_NM_EXPIRY_INTERVAL_MS,
                YarnConfiguration.DEFAULT_RM_NM_EXPIRY_INTERVAL_MS);
        long heartbeatIntvl = conf.getLong(YarnConfiguration.RM_NM_HEARTBEAT_INTERVAL_MS,
                YarnConfiguration.DEFAULT_RM_NM_HEARTBEAT_INTERVAL_MS);
        if (expireIntvl < heartbeatIntvl) {
            throw new YarnRuntimeException("Nodemanager expiry interval should be no"
                    + " less than heartbeat interval, " + YarnConfiguration.RM_NM_EXPIRY_INTERVAL_MS + "="
                    + expireIntvl + ", " + YarnConfiguration.RM_NM_HEARTBEAT_INTERVAL_MS + "=" + heartbeatIntvl);
        }
    }

    /**
     * RMActiveServices handles all the Active services in the RM.
     */
    @Private
    public class RMActiveServices extends CompositeService {

        private DelegationTokenRenewer delegationTokenRenewer;
        private EventHandler<SchedulerEvent> schedulerDispatcher;
        private ApplicationMasterLauncher applicationMasterLauncher;
        private ContainerAllocationExpirer containerAllocationExpirer;
        private ResourceManager rm;
        private boolean fromActive = false;
        private StandByTransitionRunnable standByTransitionRunnable;

        RMActiveServices(ResourceManager rm) {
            super("RMActiveServices");
            this.rm = rm;
        }

        @Override
        protected void serviceInit(Configuration configuration) throws Exception {
            standByTransitionRunnable = new StandByTransitionRunnable();

            rmSecretManagerService = createRMSecretManagerService();
            addService(rmSecretManagerService);

            containerAllocationExpirer = new ContainerAllocationExpirer(rmDispatcher);
            addService(containerAllocationExpirer);
            rmContext.setContainerAllocationExpirer(containerAllocationExpirer);

            AMLivelinessMonitor amLivelinessMonitor = createAMLivelinessMonitor();
            addService(amLivelinessMonitor);
            rmContext.setAMLivelinessMonitor(amLivelinessMonitor);

            AMLivelinessMonitor amFinishingMonitor = createAMLivelinessMonitor();
            addService(amFinishingMonitor);
            rmContext.setAMFinishingMonitor(amFinishingMonitor);

            RMAppLifetimeMonitor rmAppLifetimeMonitor = createRMAppLifetimeMonitor();
            addService(rmAppLifetimeMonitor);
            rmContext.setRMAppLifetimeMonitor(rmAppLifetimeMonitor);

            RMNodeLabelsManager nlm = createNodeLabelManager();
            nlm.setRMContext(rmContext);
            addService(nlm);
            rmContext.setNodeLabelManager(nlm);

            NodeAttributesManager nam = createNodeAttributesManager();
            addService(nam);
            rmContext.setNodeAttributesManager(nam);

            AllocationTagsManager allocationTagsManager = createAllocationTagsManager();
            rmContext.setAllocationTagsManager(allocationTagsManager);

            PlacementConstraintManagerService placementConstraintManager = createPlacementConstraintManager();
            addService(placementConstraintManager);
            rmContext.setPlacementConstraintManager(placementConstraintManager);

            // add resource profiles here because it's used by AbstractYarnScheduler
            ResourceProfilesManager resourceProfilesManager = createResourceProfileManager();
            resourceProfilesManager.init(conf);
            rmContext.setResourceProfilesManager(resourceProfilesManager);

            MultiNodeSortingManager<SchedulerNode> multiNodeSortingManager = createMultiNodeSortingManager();
            multiNodeSortingManager.setRMContext(rmContext);
            addService(multiNodeSortingManager);
            rmContext.setMultiNodeSortingManager(multiNodeSortingManager);

            RMDelegatedNodeLabelsUpdater delegatedNodeLabelsUpdater = createRMDelegatedNodeLabelsUpdater();
            if (delegatedNodeLabelsUpdater != null) {
                addService(delegatedNodeLabelsUpdater);
                rmContext.setRMDelegatedNodeLabelsUpdater(delegatedNodeLabelsUpdater);
            }

            recoveryEnabled = conf.getBoolean(YarnConfiguration.RECOVERY_ENABLED,
                    YarnConfiguration.DEFAULT_RM_RECOVERY_ENABLED);

            RMStateStore rmStore = null;
            if (recoveryEnabled) {
                rmStore = RMStateStoreFactory.getStore(conf);
                boolean isWorkPreservingRecoveryEnabled = conf.getBoolean(
                        YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED,
                        YarnConfiguration.DEFAULT_RM_WORK_PRESERVING_RECOVERY_ENABLED);
                rmContext.setWorkPreservingRecoveryEnabled(isWorkPreservingRecoveryEnabled);
            } else {
                rmStore = new NullRMStateStore();
            }

            try {
                rmStore.setResourceManager(rm);
                rmStore.init(conf);
                rmStore.setRMDispatcher(rmDispatcher);
            } catch (Exception e) {
                // the Exception from stateStore.init() needs to be handled for
                // HA and we need to give up master status if we got fenced
                LOG.error("Failed to init state store", e);
                throw e;
            }
            rmContext.setStateStore(rmStore);

            if (UserGroupInformation.isSecurityEnabled()) {
                delegationTokenRenewer = createDelegationTokenRenewer();
                rmContext.setDelegationTokenRenewer(delegationTokenRenewer);
            }

            // Register event handler for NodesListManager
            nodesListManager = new NodesListManager(rmContext);
            rmDispatcher.register(NodesListManagerEventType.class, nodesListManager);
            addService(nodesListManager);
            rmContext.setNodesListManager(nodesListManager);

            // Initialize the scheduler
            scheduler = createScheduler();
            scheduler.setRMContext(rmContext);
            addIfService(scheduler);
            rmContext.setScheduler(scheduler);

            schedulerDispatcher = createSchedulerEventDispatcher();
            addIfService(schedulerDispatcher);
            rmDispatcher.register(SchedulerEventType.class, schedulerDispatcher);

            // Register event handler for RmAppEvents
            rmDispatcher.register(RMAppEventType.class, new ApplicationEventDispatcher(rmContext));

            // Register event handler for RmAppAttemptEvents
            rmDispatcher.register(RMAppAttemptEventType.class, new ApplicationAttemptEventDispatcher(rmContext));

            // Register event handler for RmNodes
            rmDispatcher.register(RMNodeEventType.class, new NodeEventDispatcher(rmContext));

            nmLivelinessMonitor = createNMLivelinessMonitor();
            addService(nmLivelinessMonitor);

            resourceTracker = createResourceTrackerService();
            addService(resourceTracker);
            rmContext.setResourceTrackerService(resourceTracker);

            MetricsSystem ms = DefaultMetricsSystem.initialize("ResourceManager");
            if (fromActive) {
                JvmMetrics.reattach(ms, jvmMetrics);
                UserGroupInformation.reattachMetrics();
            } else {
                jvmMetrics = JvmMetrics.initSingleton("ResourceManager", null);
            }

            JvmPauseMonitor pauseMonitor = new JvmPauseMonitor();
            addService(pauseMonitor);
            jvmMetrics.setPauseMonitor(pauseMonitor);

            // Initialize the Reservation system
            if (conf.getBoolean(YarnConfiguration.RM_RESERVATION_SYSTEM_ENABLE,
                    YarnConfiguration.DEFAULT_RM_RESERVATION_SYSTEM_ENABLE)) {
                reservationSystem = createReservationSystem();
                if (reservationSystem != null) {
                    reservationSystem.setRMContext(rmContext);
                    addIfService(reservationSystem);
                    rmContext.setReservationSystem(reservationSystem);
                    LOG.info("Initialized Reservation system");
                }
            }

            masterService = createApplicationMasterService();
            createAndRegisterOpportunisticDispatcher(masterService);
            addService(masterService);
            rmContext.setApplicationMasterService(masterService);

            applicationACLsManager = new ApplicationACLsManager(conf);

            queueACLsManager = createQueueACLsManager(scheduler, conf);

            rmAppManager = createRMAppManager();
            // Register event handler for RMAppManagerEvents
            rmDispatcher.register(RMAppManagerEventType.class, rmAppManager);

            clientRM = createClientRMService();
            addService(clientRM);
            rmContext.setClientRMService(clientRM);

            applicationMasterLauncher = createAMLauncher();
            rmDispatcher.register(AMLauncherEventType.class, applicationMasterLauncher);

            addService(applicationMasterLauncher);
            if (UserGroupInformation.isSecurityEnabled()) {
                addService(delegationTokenRenewer);
                delegationTokenRenewer.setRMContext(rmContext);
            }

            if (HAUtil.isFederationEnabled(conf)) {
                String cId = YarnConfiguration.getClusterId(conf);
                if (cId.isEmpty()) {
                    String errMsg = "Cannot initialize RM as Federation is enabled"
                            + " but cluster id is not configured.";
                    LOG.error(errMsg);
                    throw new YarnRuntimeException(errMsg);
                }
                federationStateStoreService = createFederationStateStoreService();
                addIfService(federationStateStoreService);
                LOG.info("Initialized Federation membership.");
            }

            new RMNMInfo(rmContext, scheduler);

            if (conf.getBoolean(YarnConfiguration.YARN_API_SERVICES_ENABLE, false)) {
                SystemServiceManager systemServiceManager = createServiceManager();
                addIfService(systemServiceManager);
            }

            super.serviceInit(conf);
        }

        private void createAndRegisterOpportunisticDispatcher(ApplicationMasterService service) {
            if (!isOpportunisticSchedulingEnabled(conf)) {
                return;
            }
            EventDispatcher oppContainerAllocEventDispatcher = new EventDispatcher(
                    (OpportunisticContainerAllocatorAMService) service,
                    OpportunisticContainerAllocatorAMService.class.getName());
            // Add an event dispatcher for the
            // OpportunisticContainerAllocatorAMService to handle node
            // additions, updates and removals. Since the SchedulerEvent is currently
            // a super set of theses, we register interest for it.
            addService(oppContainerAllocEventDispatcher);
            rmDispatcher.register(SchedulerEventType.class, oppContainerAllocEventDispatcher);
        }

        @Override
        protected void serviceStart() throws Exception {
            RMStateStore rmStore = rmContext.getStateStore();
            // The state store needs to start irrespective of recoveryEnabled as apps
            // need events to move to further states.
            rmStore.start();

            if (recoveryEnabled) {
                try {
                    LOG.info("Recovery started");
                    rmStore.checkVersion();
                    if (rmContext.isWorkPreservingRecoveryEnabled()) {
                        rmContext.setEpoch(rmStore.getAndIncrementEpoch());
                    }
                    RMState state = rmStore.loadState();
                    recover(state);
                    LOG.info("Recovery ended");
                } catch (Exception e) {
                    // the Exception from loadState() needs to be handled for
                    // HA and we need to give up master status if we got fenced
                    LOG.error("Failed to load/recover state", e);
                    throw e;
                }
            } else {
                if (HAUtil.isFederationEnabled(conf)) {
                    long epoch = conf.getLong(YarnConfiguration.RM_EPOCH, YarnConfiguration.DEFAULT_RM_EPOCH);
                    rmContext.setEpoch(epoch);
                    LOG.info("Epoch set for Federation: " + epoch);
                }
            }

            super.serviceStart();
        }

        @Override
        protected void serviceStop() throws Exception {

            super.serviceStop();

            DefaultMetricsSystem.shutdown();
            if (rmContext != null) {
                RMStateStore store = rmContext.getStateStore();
                try {
                    if (null != store) {
                        store.close();
                    }
                } catch (Exception e) {
                    LOG.error("Error closing store.", e);
                }
            }

        }
    }

    @Private
    private class RMFatalEventDispatcher implements EventHandler<RMFatalEvent> {
        @Override
        public void handle(RMFatalEvent event) {
            LOG.error("Received " + event);

            if (HAUtil.isHAEnabled(getConfig())) {
                // If we're in an HA config, the right answer is always to go into
                // standby.
                LOG.warn("Transitioning the resource manager to standby.");
                handleTransitionToStandByInNewThread();
            } else {
                // If we're stand-alone, we probably want to shut down, but the if and
                // how depends on the event.
                switch (event.getType()) {
                case STATE_STORE_FENCED:
                    LOG.fatal("State store fenced even though the resource manager "
                            + "is not configured for high availability. Shutting down this "
                            + "resource manager to protect the integrity of the state store.");
                    ExitUtil.terminate(1, event.getExplanation());
                    break;
                case STATE_STORE_OP_FAILED:
                    if (YarnConfiguration.shouldRMFailFast(getConfig())) {
                        LOG.fatal("Shutting down the resource manager because a state "
                                + "store operation failed, and the resource manager is "
                                + "configured to fail fast. See the yarn.fail-fast and "
                                + "yarn.resourcemanager.fail-fast properties.");
                        ExitUtil.terminate(1, event.getExplanation());
                    } else {
                        LOG.warn("Ignoring state store operation failure because the "
                                + "resource manager is not configured to fail fast. See the "
                                + "yarn.fail-fast and yarn.resourcemanager.fail-fast " + "properties.");
                    }
                    break;
                default:
                    LOG.fatal("Shutting down the resource manager.");
                    ExitUtil.terminate(1, event.getExplanation());
                }
            }
        }
    }

    /**
     * Transition to standby state in a new thread. The transition operation is
     * asynchronous to avoid deadlock caused by cyclic dependency.
     */
    private void handleTransitionToStandByInNewThread() {
        Thread standByTransitionThread = new Thread(activeServices.standByTransitionRunnable);
        standByTransitionThread.setName("StandByTransitionThread");
        standByTransitionThread.start();
    }

    /**
     * The class to transition RM to standby state. The same
     * {@link StandByTransitionRunnable} object could be used in multiple threads,
     * but runs only once. That's because RM can go back to active state after
     * transition to standby state, the same runnable in the old context can't
     * transition RM to standby state again. A new runnable is created every time
     * RM transitions to active state.
     */
    private class StandByTransitionRunnable implements Runnable {
        // The atomic variable to make sure multiple threads with the same runnable
        // run only once.
        private final AtomicBoolean hasAlreadyRun = new AtomicBoolean(false);

        @Override
        public void run() {
            // Run this only once, even if multiple threads end up triggering
            // this simultaneously.
            if (hasAlreadyRun.getAndSet(true)) {
                return;
            }

            if (rmContext.isHAEnabled()) {
                try {
                    // Transition to standby and reinit active services
                    LOG.info("Transitioning RM to Standby mode");
                    transitionToStandby(true);
                    EmbeddedElector elector = rmContext.getLeaderElectorService();
                    if (elector != null) {
                        elector.rejoinElection();
                    }
                } catch (Exception e) {
                    LOG.fatal("Failed to transition RM to Standby mode.", e);
                    ExitUtil.terminate(1, e);
                }
            }
        }
    }

    @Private
    public static final class ApplicationEventDispatcher implements EventHandler<RMAppEvent> {

        private final RMContext rmContext;

        public ApplicationEventDispatcher(RMContext rmContext) {
            this.rmContext = rmContext;
        }

        @Override
        public void handle(RMAppEvent event) {
            ApplicationId appID = event.getApplicationId();
            RMApp rmApp = this.rmContext.getRMApps().get(appID);
            if (rmApp != null) {
                try {
                    rmApp.handle(event);
                } catch (Throwable t) {
                    LOG.error("Error in handling event type " + event.getType() + " for application " + appID, t);
                }
            }
        }
    }

    @Private
    public static final class ApplicationAttemptEventDispatcher implements EventHandler<RMAppAttemptEvent> {

        private final RMContext rmContext;

        public ApplicationAttemptEventDispatcher(RMContext rmContext) {
            this.rmContext = rmContext;
        }

        @Override
        public void handle(RMAppAttemptEvent event) {
            ApplicationAttemptId appAttemptId = event.getApplicationAttemptId();
            ApplicationId appId = appAttemptId.getApplicationId();
            RMApp rmApp = this.rmContext.getRMApps().get(appId);
            if (rmApp != null) {
                RMAppAttempt rmAppAttempt = rmApp.getRMAppAttempt(appAttemptId);
                if (rmAppAttempt != null) {
                    try {
                        rmAppAttempt.handle(event);
                    } catch (Throwable t) {
                        LOG.error("Error in handling event type " + event.getType() + " for applicationAttempt "
                                + appAttemptId, t);
                    }
                } else if (rmApp.getApplicationSubmissionContext() != null
                        && rmApp.getApplicationSubmissionContext().getKeepContainersAcrossApplicationAttempts()
                        && event.getType() == RMAppAttemptEventType.CONTAINER_FINISHED) {
                    // For work-preserving AM restart, failed attempts are still
                    // capturing CONTAINER_FINISHED events and record the finished
                    // containers which will be used by current attempt.
                    // We just keep 'yarn.resourcemanager.am.max-attempts' in
                    // RMStateStore. If the finished container's attempt is deleted, we
                    // use the first attempt in app.attempts to deal with these events.

                    RMAppAttempt previousFailedAttempt = rmApp.getAppAttempts().values().iterator().next();
                    if (previousFailedAttempt != null) {
                        try {
                            LOG.debug("Event " + event.getType() + " handled by " + previousFailedAttempt);
                            previousFailedAttempt.handle(event);
                        } catch (Throwable t) {
                            LOG.error("Error in handling event type " + event.getType() + " for applicationAttempt "
                                    + appAttemptId + " with " + previousFailedAttempt, t);
                        }
                    } else {
                        LOG.error(
                                "Event " + event.getType() + " not handled, because previousFailedAttempt is null");
                    }
                }
            }
        }
    }

    @Private
    public static final class NodeEventDispatcher implements EventHandler<RMNodeEvent> {

        private final RMContext rmContext;

        public NodeEventDispatcher(RMContext rmContext) {
            this.rmContext = rmContext;
        }

        @Override
        public void handle(RMNodeEvent event) {
            NodeId nodeId = event.getNodeId();
            RMNode node = this.rmContext.getRMNodes().get(nodeId);
            if (node != null) {
                try {
                    ((EventHandler<RMNodeEvent>) node).handle(event);
                } catch (Throwable t) {
                    LOG.error("Error in handling event type " + event.getType() + " for node " + nodeId, t);
                }
            }
        }
    }

    /**
     * Return a HttpServer.Builder that the journalnode / namenode / secondary
     * namenode can use to initialize their HTTP / HTTPS server.
     *
     * @param conf configuration object
     * @param httpAddr HTTP address
     * @param httpsAddr HTTPS address
     * @param name  Name of the server
     * @throws IOException from Builder
     * @return builder object
     */
    public static HttpServer2.Builder httpServerTemplateForRM(Configuration conf, final InetSocketAddress httpAddr,
            final InetSocketAddress httpsAddr, String name) throws IOException {
        HttpServer2.Builder builder = new HttpServer2.Builder().setName(name).setConf(conf)
                .setSecurityEnabled(false);

        if (httpAddr.getPort() == 0) {
            builder.setFindPort(true);
        }

        URI uri = URI.create("http://" + NetUtils.getHostPortString(httpAddr));
        builder.addEndpoint(uri);
        LOG.info("Starting Web-server for " + name + " at: " + uri);

        return builder;
    }

    protected void startWepApp() {
        Map<String, String> serviceConfig = null;
        Configuration conf = getConfig();

        RMWebAppUtil.setupSecurityAndFilters(conf, getClientRMService().rmDTSecretManager);

        Map<String, String> params = new HashMap<String, String>();
        if (getConfig().getBoolean(YarnConfiguration.YARN_API_SERVICES_ENABLE, false)) {
            String apiPackages = "org.apache.hadoop.yarn.service.webapp;" + "org.apache.hadoop.yarn.webapp";
            params.put("com.sun.jersey.config.property.resourceConfigClass",
                    "com.sun.jersey.api.core.PackagesResourceConfig");
            params.put("com.sun.jersey.config.property.packages", apiPackages);
        }

        Builder<ApplicationMasterService> builder = WebApps
                .$for("cluster", ApplicationMasterService.class, masterService, "ws").with(conf)
                .withServlet("API-Service", "/app/*", ServletContainer.class, params, false)
                .withHttpSpnegoPrincipalKey(YarnConfiguration.RM_WEBAPP_SPNEGO_USER_NAME_KEY)
                .withHttpSpnegoKeytabKey(YarnConfiguration.RM_WEBAPP_SPNEGO_KEYTAB_FILE_KEY)
                .withCSRFProtection(YarnConfiguration.RM_CSRF_PREFIX)
                .withXFSProtection(YarnConfiguration.RM_XFS_PREFIX).at(webAppAddress);
        String proxyHostAndPort = rmContext.getProxyHostAndPort(conf);
        if (WebAppUtils.getResolvedRMWebAppURLWithoutScheme(conf).equals(proxyHostAndPort)) {
            if (HAUtil.isHAEnabled(conf)) {
                fetcher = new AppReportFetcher(conf);
            } else {
                fetcher = new AppReportFetcher(conf, getClientRMService());
            }
            builder.withServlet(ProxyUriUtils.PROXY_SERVLET_NAME, ProxyUriUtils.PROXY_PATH_SPEC,
                    WebAppProxyServlet.class);
            builder.withAttribute(WebAppProxy.FETCHER_ATTRIBUTE, fetcher);
            String[] proxyParts = proxyHostAndPort.split(":");
            builder.withAttribute(WebAppProxy.PROXY_HOST_ATTRIBUTE, proxyParts[0]);
        }

        WebAppContext uiWebAppContext = null;
        if (getConfig().getBoolean(YarnConfiguration.YARN_WEBAPP_UI2_ENABLE,
                YarnConfiguration.DEFAULT_YARN_WEBAPP_UI2_ENABLE)) {
            String onDiskPath = getConfig().get(YarnConfiguration.YARN_WEBAPP_UI2_WARFILE_PATH);

            uiWebAppContext = new WebAppContext();
            uiWebAppContext.setContextPath(UI2_WEBAPP_NAME);

            if (null == onDiskPath) {
                String war = "hadoop-yarn-ui-" + VersionInfo.getVersion() + ".war";
                URLClassLoader cl = (URLClassLoader) ClassLoader.getSystemClassLoader();
                URL url = cl.findResource(war);

                if (null == url) {
                    onDiskPath = getWebAppsPath("ui2");
                } else {
                    onDiskPath = url.getFile();
                }
            }
            if (onDiskPath == null || onDiskPath.isEmpty()) {
                LOG.error("No war file or webapps found for ui2 !");
            } else {
                if (onDiskPath.endsWith(".war")) {
                    uiWebAppContext.setWar(onDiskPath);
                    LOG.info("Using war file at: " + onDiskPath);
                } else {
                    uiWebAppContext.setResourceBase(onDiskPath);
                    LOG.info("Using webapps at: " + onDiskPath);
                }
            }
        }

        builder.withAttribute(IsResourceManagerActiveServlet.RM_ATTRIBUTE, this);
        builder.withServlet(IsResourceManagerActiveServlet.SERVLET_NAME, IsResourceManagerActiveServlet.PATH_SPEC,
                IsResourceManagerActiveServlet.class);

        webApp = builder.start(new RMWebApp(this), uiWebAppContext);
    }

    private String getWebAppsPath(String appName) {
        URL url = getClass().getClassLoader().getResource("webapps/" + appName);
        if (url == null) {
            return "";
        }
        return url.toString();
    }

    /**
     * Helper method to create and init {@link #activeServices}. This creates an
     * instance of {@link RMActiveServices} and initializes it.
     *
     * @param fromActive Indicates if the call is from the active state transition
     *                   or the RM initialization.
     */
    protected void createAndInitActiveServices(boolean fromActive) {
        activeServices = new RMActiveServices(this);
        activeServices.fromActive = fromActive;
        activeServices.init(conf);
    }

    /**
     * Helper method to start {@link #activeServices}.
     * @throws Exception
     */
    void startActiveServices() throws Exception {
        if (activeServices != null) {
            clusterTimeStamp = System.currentTimeMillis();
            activeServices.start();
        }
    }

    /**
     * Helper method to stop {@link #activeServices}.
     * @throws Exception
     */
    void stopActiveServices() {
        if (activeServices != null) {
            activeServices.stop();
            activeServices = null;
        }
    }

    void reinitialize(boolean initialize) {
        ClusterMetrics.destroy();
        QueueMetrics.clearQueueMetrics();
        getResourceScheduler().resetSchedulerMetrics();
        if (initialize) {
            resetRMContext();
            createAndInitActiveServices(true);
        }
    }

    @VisibleForTesting
    protected boolean areActiveServicesRunning() {
        return activeServices != null && activeServices.isInState(STATE.STARTED);
    }

    synchronized void transitionToActive() throws Exception {
        if (rmContext.getHAServiceState() == HAServiceProtocol.HAServiceState.ACTIVE) {
            LOG.info("Already in active state");
            return;
        }
        LOG.info("Transitioning to active state");

        this.rmLoginUGI.doAs(new PrivilegedExceptionAction<Void>() {
            @Override
            public Void run() throws Exception {
                try {
                    startActiveServices();
                    return null;
                } catch (Exception e) {
                    reinitialize(true);
                    throw e;
                }
            }
        });

        rmContext.setHAServiceState(HAServiceProtocol.HAServiceState.ACTIVE);
        LOG.info("Transitioned to active state");
    }

    synchronized void transitionToStandby(boolean initialize) throws Exception {
        if (rmContext.getHAServiceState() == HAServiceProtocol.HAServiceState.STANDBY) {
            LOG.info("Already in standby state");
            return;
        }

        LOG.info("Transitioning to standby state");
        HAServiceState state = rmContext.getHAServiceState();
        rmContext.setHAServiceState(HAServiceProtocol.HAServiceState.STANDBY);
        if (state == HAServiceProtocol.HAServiceState.ACTIVE) {
            stopActiveServices();
            reinitialize(initialize);
        }
        LOG.info("Transitioned to standby state");
    }

    @Override
    protected void serviceStart() throws Exception {
        if (this.rmContext.isHAEnabled()) {
            transitionToStandby(false);
        }

        startWepApp();
        if (getConfig().getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
            int port = webApp.port();
            WebAppUtils.setRMWebAppPort(conf, port);
        }
        super.serviceStart();

        // Non HA case, start after RM services are started.
        if (!this.rmContext.isHAEnabled()) {
            transitionToActive();
        }
    }

    protected void doSecureLogin() throws IOException {
        InetSocketAddress socAddr = getBindAddress(conf);
        SecurityUtil.login(this.conf, YarnConfiguration.RM_KEYTAB, YarnConfiguration.RM_PRINCIPAL,
                socAddr.getHostName());

        // if security is enable, set rmLoginUGI as UGI of loginUser
        if (UserGroupInformation.isSecurityEnabled()) {
            this.rmLoginUGI = UserGroupInformation.getLoginUser();
        }
    }

    @Override
    protected void serviceStop() throws Exception {
        if (webApp != null) {
            webApp.stop();
        }
        if (fetcher != null) {
            fetcher.stop();
        }
        if (configurationProvider != null) {
            configurationProvider.close();
        }
        super.serviceStop();
        if (zkManager != null) {
            zkManager.close();
        }
        transitionToStandby(false);
        rmContext.setHAServiceState(HAServiceState.STOPPING);
    }

    protected ResourceTrackerService createResourceTrackerService() {
        return new ResourceTrackerService(this.rmContext, this.nodesListManager, this.nmLivelinessMonitor,
                this.rmContext.getContainerTokenSecretManager(), this.rmContext.getNMTokenSecretManager());
    }

    protected ClientRMService createClientRMService() {
        return new ClientRMService(this.rmContext, scheduler, this.rmAppManager, this.applicationACLsManager,
                this.queueACLsManager, this.rmContext.getRMDelegationTokenSecretManager());
    }

    protected ApplicationMasterService createApplicationMasterService() {
        Configuration config = this.rmContext.getYarnConfiguration();
        if (isOpportunisticSchedulingEnabled(conf)) {
            if (YarnConfiguration.isDistSchedulingEnabled(config)
                    && !YarnConfiguration.isOpportunisticContainerAllocationEnabled(config)) {
                throw new YarnRuntimeException("Invalid parameters: opportunistic container allocation has to "
                        + "be enabled when distributed scheduling is enabled.");
            }
            OpportunisticContainerAllocatorAMService oppContainerAllocatingAMService = new OpportunisticContainerAllocatorAMService(
                    this.rmContext, scheduler);
            this.rmContext.setContainerQueueLimitCalculator(
                    oppContainerAllocatingAMService.getNodeManagerQueueLimitCalculator());
            return oppContainerAllocatingAMService;
        }
        return new ApplicationMasterService(this.rmContext, scheduler);
    }

    protected AdminService createAdminService() {
        return new AdminService(this);
    }

    protected RMSecretManagerService createRMSecretManagerService() {
        return new RMSecretManagerService(conf, rmContext);
    }

    private boolean isOpportunisticSchedulingEnabled(Configuration conf) {
        return YarnConfiguration.isOpportunisticContainerAllocationEnabled(conf)
                || YarnConfiguration.isDistSchedulingEnabled(conf);
    }

    /**
     * Create RMDelegatedNodeLabelsUpdater based on configuration.
     */
    protected RMDelegatedNodeLabelsUpdater createRMDelegatedNodeLabelsUpdater() {
        if (conf.getBoolean(YarnConfiguration.NODE_LABELS_ENABLED, YarnConfiguration.DEFAULT_NODE_LABELS_ENABLED)
                && YarnConfiguration.isDelegatedCentralizedNodeLabelConfiguration(conf)) {
            return new RMDelegatedNodeLabelsUpdater(rmContext);
        } else {
            return null;
        }
    }

    @Private
    public ClientRMService getClientRMService() {
        return this.clientRM;
    }

    /**
     * return the scheduler.
     * @return the scheduler for the Resource Manager.
     */
    @Private
    public ResourceScheduler getResourceScheduler() {
        return this.scheduler;
    }

    /**
     * return the resource tracking component.
     * @return the resource tracking component.
     */
    @Private
    public ResourceTrackerService getResourceTrackerService() {
        return this.resourceTracker;
    }

    @Private
    public ApplicationMasterService getApplicationMasterService() {
        return this.masterService;
    }

    @Private
    public ApplicationACLsManager getApplicationACLsManager() {
        return this.applicationACLsManager;
    }

    @Private
    public QueueACLsManager getQueueACLsManager() {
        return this.queueACLsManager;
    }

    @Private
    @VisibleForTesting
    public FederationStateStoreService getFederationStateStoreService() {
        return this.federationStateStoreService;
    }

    @Private
    WebApp getWebapp() {
        return this.webApp;
    }

    @Override
    public void recover(RMState state) throws Exception {
        // recover RMdelegationTokenSecretManager
        rmContext.getRMDelegationTokenSecretManager().recover(state);

        // recover AMRMTokenSecretManager
        rmContext.getAMRMTokenSecretManager().recover(state);

        // recover reservations
        if (reservationSystem != null) {
            reservationSystem.recover(state);
        }
        // recover applications
        rmAppManager.recover(state);

        setSchedulerRecoveryStartAndWaitTime(state, conf);
    }

    public static void main(String argv[]) {
        Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler());
        StringUtils.startupShutdownMessage(ResourceManager.class, argv, LOG);
        try {
            Configuration conf = new YarnConfiguration();
            GenericOptionsParser hParser = new GenericOptionsParser(conf, argv);
            argv = hParser.getRemainingArgs();
            // If -format-state-store, then delete RMStateStore; else startup normally
            if (argv.length >= 1) {
                if (argv[0].equals("-format-state-store")) {
                    deleteRMStateStore(conf);
                } else if (argv[0].equals("-remove-application-from-state-store") && argv.length == 2) {
                    removeApplication(conf, argv[1]);
                } else {
                    printUsage(System.err);
                }
            } else {
                ResourceManager resourceManager = new ResourceManager();
                ShutdownHookManager.get().addShutdownHook(new CompositeServiceShutdownHook(resourceManager),
                        SHUTDOWN_HOOK_PRIORITY);
                resourceManager.init(conf);
                resourceManager.start();
            }
        } catch (Throwable t) {
            LOG.fatal("Error starting ResourceManager", t);
            System.exit(-1);
        }
    }

    /**
     * Register the handlers for alwaysOn services
     */
    private Dispatcher setupDispatcher() {
        Dispatcher dispatcher = createDispatcher();
        dispatcher.register(RMFatalEventType.class, new ResourceManager.RMFatalEventDispatcher());
        return dispatcher;
    }

    private void resetRMContext() {
        RMContextImpl rmContextImpl = new RMContextImpl();
        // transfer service context to new RM service Context
        rmContextImpl.setServiceContext(rmContext.getServiceContext());

        // reset dispatcher
        Dispatcher dispatcher = setupDispatcher();
        ((Service) dispatcher).init(this.conf);
        ((Service) dispatcher).start();
        removeService((Service) rmDispatcher);
        // Need to stop previous rmDispatcher before assigning new dispatcher
        // otherwise causes "AsyncDispatcher event handler" thread leak
        ((Service) rmDispatcher).stop();
        rmDispatcher = dispatcher;
        addIfService(rmDispatcher);
        rmContextImpl.setDispatcher(dispatcher);

        rmContext = rmContextImpl;
    }

    private void setSchedulerRecoveryStartAndWaitTime(RMState state, Configuration conf) {
        if (!state.getApplicationState().isEmpty()) {
            long waitTime = conf.getLong(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS,
                    YarnConfiguration.DEFAULT_RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS);
            rmContext.setSchedulerRecoveryStartAndWaitTime(waitTime);
        }
    }

    /**
     * Retrieve RM bind address from configuration
     * 
     * @param conf
     * @return InetSocketAddress
     */
    public static InetSocketAddress getBindAddress(Configuration conf) {
        return conf.getSocketAddr(YarnConfiguration.RM_ADDRESS, YarnConfiguration.DEFAULT_RM_ADDRESS,
                YarnConfiguration.DEFAULT_RM_PORT);
    }

    /**
     * Deletes the RMStateStore
     *
     * @param conf
     * @throws Exception
     */
    @VisibleForTesting
    static void deleteRMStateStore(Configuration conf) throws Exception {
        RMStateStore rmStore = RMStateStoreFactory.getStore(conf);
        rmStore.setResourceManager(new ResourceManager());
        rmStore.init(conf);
        rmStore.start();
        try {
            LOG.info("Deleting ResourceManager state store...");
            rmStore.deleteStore();
            LOG.info("State store deleted");
        } finally {
            rmStore.stop();
        }
    }

    @VisibleForTesting
    static void removeApplication(Configuration conf, String applicationId) throws Exception {
        RMStateStore rmStore = RMStateStoreFactory.getStore(conf);
        rmStore.setResourceManager(new ResourceManager());
        rmStore.init(conf);
        rmStore.start();
        try {
            ApplicationId removeAppId = ApplicationId.fromString(applicationId);
            LOG.info("Deleting application " + removeAppId + " from state store");
            rmStore.removeApplication(removeAppId);
            LOG.info("Application is deleted from state store");
        } finally {
            rmStore.stop();
        }
    }

    private static void printUsage(PrintStream out) {
        out.println("Usage: yarn resourcemanager [-format-state-store]");
        out.println("                            " + "[-remove-application-from-state-store <appId>]" + "\n");
    }

    protected RMAppLifetimeMonitor createRMAppLifetimeMonitor() {
        return new RMAppLifetimeMonitor(this.rmContext);
    }

    /**
     * Register ResourceManagerMXBean.
     */
    private void registerMXBean() {
        MBeans.register("ResourceManager", "ResourceManager", this);
    }

    @Override
    public boolean isSecurityEnabled() {
        return UserGroupInformation.isSecurityEnabled();
    }
}