co.cask.cdap.master.startup.YarnCheck.java Source code

Java tutorial

Introduction

Here is the source code for co.cask.cdap.master.startup.YarnCheck.java

Source

/*
 * Copyright  2016 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.cdap.master.startup;

import co.cask.cdap.common.conf.CConfiguration;
import co.cask.cdap.common.conf.Constants;
import com.google.common.base.Joiner;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.google.inject.Inject;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.NodeReport;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.client.api.YarnClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;

/**
 * Checks that YARN is available and has enough resources to run all system services.
 */
// class is picked up through classpath examination
@SuppressWarnings("unused")
class YarnCheck extends AbstractMasterCheck {
    private static final Logger LOG = LoggerFactory.getLogger(YarnCheck.class);
    private final Configuration hConf;

    @Inject
    private YarnCheck(CConfiguration cConf, Configuration hConf) {
        super(cConf);
        this.hConf = hConf;
    }

    @Override
    public void run() {
        int yarnConnectTimeout = cConf.getInt(Constants.Startup.YARN_CONNECT_TIMEOUT_SECONDS, 60);
        LOG.info("Checking YARN availability -- may take up to {} seconds.", yarnConnectTimeout);

        final YarnClient yarnClient = YarnClient.createYarnClient();
        yarnClient.init(hConf);

        List<NodeReport> nodeReports;
        // if yarn is not up, yarnClient.start() will hang.
        ExecutorService executorService = Executors
                .newSingleThreadExecutor(new ThreadFactoryBuilder().setNameFormat("startup-checker").build());
        try {
            Future<List<NodeReport>> result = executorService.submit(new Callable<List<NodeReport>>() {
                @Override
                public List<NodeReport> call() throws Exception {
                    yarnClient.start();
                    return yarnClient.getNodeReports();
                }
            });
            nodeReports = result.get(yarnConnectTimeout, TimeUnit.SECONDS);
            LOG.info("  YARN availability successfully verified.");
        } catch (Exception e) {
            throw new RuntimeException("Unable to get status of YARN nodemanagers. "
                    + "Please check that YARN is running "
                    + "and that the correct Hadoop configuration (core-site.xml, yarn-site.xml) and libraries "
                    + "are included in the CDAP master classpath.", e);
        } finally {
            try {
                yarnClient.stop();
            } catch (Exception e) {
                LOG.warn("Error stopping yarn client.", e);
            } finally {
                executorService.shutdown();
            }
        }

        checkResources(nodeReports);
    }

    private void checkResources(List<NodeReport> nodeReports) {
        LOG.info("Checking that YARN has enough resources to run all system services.");
        int memoryCapacity = 0;
        int vcoresCapacity = 0;
        int memoryUsed = 0;
        int vcoresUsed = 0;
        int availableNodes = 0;
        for (NodeReport nodeReport : nodeReports) {
            NodeId nodeId = nodeReport.getNodeId();
            LOG.debug("Got report for node {}", nodeId);
            if (!nodeReport.getNodeState().isUnusable()) {
                Resource nodeCapability = nodeReport.getCapability();
                Resource nodeUsed = nodeReport.getUsed();

                // some versions of hadoop return null, others do not
                if (nodeCapability != null) {
                    LOG.debug("node {} resource capability: memory = {}, vcores = {}", nodeId,
                            nodeCapability.getMemory(), nodeCapability.getVirtualCores());
                    memoryCapacity += nodeCapability.getMemory();
                    vcoresCapacity += nodeCapability.getVirtualCores();
                }

                if (nodeUsed != null) {
                    LOG.debug("node {} resources used: memory = {}, vcores = {}", nodeId, nodeUsed.getMemory(),
                            nodeUsed.getVirtualCores());
                    memoryUsed += nodeUsed.getMemory();
                    vcoresUsed += nodeUsed.getVirtualCores();
                }

                availableNodes++;
            }
        }
        LOG.debug("YARN resource capacity: {} MB of memory and {} virtual cores.", memoryCapacity, vcoresCapacity);
        LOG.debug("YARN resources used: {} MB of memory and {} virtual cores.", memoryUsed, vcoresUsed);

        // calculate memory and vcores required by CDAP
        int requiredMemoryMB = 0;
        int requiredVCores = 0;
        Set<String> invalidKeys = new HashSet<>();
        for (ServiceResourceKeys serviceResourceKeys : systemServicesResourceKeys) {
            boolean hasConfigError = false;
            int instances = 0;
            int memoryMB = 0;
            int vcores = 0;

            try {
                instances = cConf.getInt(serviceResourceKeys.getInstancesKey());
            } catch (Exception e) {
                invalidKeys.add(serviceResourceKeys.getInstancesKey());
                hasConfigError = true;
            }
            try {
                memoryMB = cConf.getInt(serviceResourceKeys.getMemoryKey());
            } catch (Exception e) {
                invalidKeys.add(serviceResourceKeys.getMemoryKey());
                hasConfigError = true;
            }
            try {
                vcores = cConf.getInt(serviceResourceKeys.getVcoresKey());
            } catch (Exception e) {
                invalidKeys.add(serviceResourceKeys.getVcoresKey());
                hasConfigError = true;
            }

            if (!hasConfigError) {
                LOG.debug("Resource settings for system service {}: {}={}, {}={}, {}={}",
                        serviceResourceKeys.getServiceName(), serviceResourceKeys.getInstancesKey(), instances,
                        serviceResourceKeys.getMemoryKey(), memoryMB, serviceResourceKeys.getVcoresKey(), vcores);
                requiredMemoryMB += memoryMB * instances;
                requiredVCores += vcores * instances;
            }
        }

        if (!invalidKeys.isEmpty()) {
            throw new RuntimeException("YARN resources check failed to invalid config settings for keys: "
                    + Joiner.on(',').join(invalidKeys));
        }

        LOG.debug("{} MB of memory and {} virtual cores are required.", requiredMemoryMB, requiredVCores);
        int availableMemoryMB = memoryCapacity - memoryUsed;
        int availableVCores = vcoresCapacity - vcoresUsed;
        boolean memoryOK = requiredMemoryMB <= availableMemoryMB;
        // if this is negative or zero just assume its not using vcores
        boolean vcoresOK = vcoresCapacity <= 0 || requiredVCores <= availableVCores;

        if (!memoryOK && !vcoresOK) {
            LOG.warn(
                    "Services require {} MB of memory and {} vcores, "
                            + "but the cluster only has {} MB of memory and {} vcores available.",
                    requiredMemoryMB, requiredVCores, availableMemoryMB, availableVCores);
        } else if (!memoryOK) {
            LOG.warn("Services require {} MB of memory but the cluster only has {} MB of memory available.",
                    requiredMemoryMB, availableMemoryMB);
        } else if (!vcoresOK) {
            LOG.warn("Services require {} vcores but the cluster only has {} vcores available.", requiredVCores,
                    availableVCores);
        } else {
            LOG.info("  YARN resources successfully verified.");
        }
    }
}