org.apache.fluo.cluster.runner.YarnAppRunner.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.fluo.cluster.runner.YarnAppRunner.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
 * agreements. See the NOTICE file distributed with this work for additional information regarding
 * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License. You may obtain a
 * copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package org.apache.fluo.cluster.runner;

import java.io.File;
import java.net.URI;
import java.nio.charset.StandardCharsets;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ExecutionException;

import org.apache.curator.framework.CuratorFramework;
import org.apache.fluo.accumulo.util.ZookeeperPath;
import org.apache.fluo.api.config.FluoConfiguration;
import org.apache.fluo.api.exceptions.FluoException;
import org.apache.fluo.cluster.runnable.OracleRunnable;
import org.apache.fluo.cluster.runnable.WorkerRunnable;
import org.apache.fluo.cluster.util.FluoYarnConfig;
import org.apache.fluo.cluster.yarn.FluoTwillApp;
import org.apache.fluo.cluster.yarn.TwillUtil;
import org.apache.fluo.core.client.FluoAdminImpl;
import org.apache.fluo.core.util.CuratorUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.twill.api.ResourceReport;
import org.apache.twill.api.TwillController;
import org.apache.twill.api.TwillPreparer;
import org.apache.twill.api.TwillRunResources;
import org.apache.twill.api.TwillRunnerService;
import org.apache.twill.internal.RunIds;
import org.apache.twill.yarn.YarnTwillRunnerService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Yarn Implementation of ClusterAppRunner
 */
public class YarnAppRunner extends ClusterAppRunner implements AutoCloseable {

    private static final Logger log = LoggerFactory.getLogger(YarnAppRunner.class);
    private Map<String, TwillRunnerService> twillRunners = new HashMap<>();
    private Map<String, CuratorFramework> curators = new HashMap<>();
    private String hadoopPrefix;

    public YarnAppRunner(String hadoopPrefix) {
        super("fluo");
        this.hadoopPrefix = hadoopPrefix;
    }

    private synchronized CuratorFramework getAppCurator(FluoConfiguration config) {
        if (!curators.containsKey(config.getApplicationName())) {
            CuratorFramework curator = CuratorUtil.newAppCurator(config);
            curator.start();
            curators.put(config.getApplicationName(), curator);
        }
        return curators.get(config.getApplicationName());
    }

    private synchronized TwillRunnerService getTwillRunner(FluoConfiguration config) {
        if (!twillRunners.containsKey(config.getApplicationName())) {
            YarnConfiguration yarnConfig = new YarnConfiguration();
            yarnConfig.addResource(new Path(hadoopPrefix + "/etc/hadoop/core-site.xml"));
            yarnConfig.addResource(new Path(hadoopPrefix + "/etc/hadoop/yarn-site.xml"));

            TwillRunnerService twillRunner = new YarnTwillRunnerService(yarnConfig,
                    config.getAppZookeepers() + ZookeeperPath.TWILL);
            twillRunner.start();

            twillRunners.put(config.getApplicationName(), twillRunner);

            // sleep to give twill time to retrieve state from zookeeper
            try {
                Thread.sleep(1000);
            } catch (InterruptedException e) {
                throw new IllegalStateException(e);
            }
        }
        return twillRunners.get(config.getApplicationName());
    }

    private void checkIfInitialized(FluoConfiguration config) {

        try (FluoAdminImpl admin = new FluoAdminImpl(config)) {
            if (!admin.zookeeperInitialized()) {
                throw new FluoException("A Fluo '" + config.getApplicationName() + "' application has not "
                        + "been initialized yet in Zookeeper at " + config.getAppZookeepers());
            }
        }
    }

    public void list(FluoConfiguration config) {
        try (CuratorFramework curator = CuratorUtil.newFluoCurator(config)) {
            curator.start();

            try {
                if (curator.checkExists().forPath("/") == null) {
                    System.out.println("Fluo instance (" + config.getInstanceZookeepers() + ") has not been "
                            + "created yet in Zookeeper.  It will be created when the first Fluo application is "
                            + "initialized for this instance.");
                    return;
                }
                List<String> children = curator.getChildren().forPath("/");
                if (children.isEmpty()) {
                    System.out.println("Fluo instance (" + config.getInstanceZookeepers() + ") does not "
                            + "contain any Fluo applications.");
                    return;
                }
                Collections.sort(children);

                System.out.println("Fluo instance (" + config.getInstanceZookeepers() + ") contains "
                        + children.size() + " application(s)\n");
                System.out.println("Application     Status      YarnAppName          YarnAppId");
                System.out.println("-----------     ------      -----------          ---------");
                for (String path : children) {
                    FluoConfiguration appConfig = new FluoConfiguration(config);
                    appConfig.setApplicationName(path);
                    String state = "INITIALIZED";
                    String yarnId = "not started yet";
                    String yarnAppName = getYarnApplicationName(path);
                    if (twillIdExists(appConfig)) {
                        String twillId = getTwillId(appConfig);
                        yarnId = getAppId(appConfig);
                        TwillController controller = getTwillRunner(appConfig).lookup(
                                getYarnApplicationName(appConfig.getApplicationName()), RunIds.fromString(twillId));
                        if (controller == null) {
                            state = "STOPPED";
                        } else {
                            state = "RUNNING";
                        }
                    }
                    System.out.format("%-15s %-11s %-20s %s\n", path, state, yarnAppName, yarnId);
                }
            } catch (Exception e) {
                throw new FluoException(e);
            }
        }
    }

    public void start(FluoConfiguration config, String appConfDir, String appLibPath, String fluoLibPath) {

        checkIfInitialized(config);

        if (twillIdExists(config)) {
            String runId = getTwillId(config);

            TwillController controller = getTwillRunner(config)
                    .lookup(getYarnApplicationName(config.getApplicationName()), RunIds.fromString(runId));
            if ((controller != null) && isReady(controller)) {
                throw new FluoException(
                        "A YARN application " + getAppInfo(config) + " is already running for the Fluo '"
                                + config.getApplicationName() + "' application!  Please stop it using 'fluo stop "
                                + config.getApplicationName() + "' before starting a new one.");
            }
        }

        if (!config.hasRequiredOracleProps() || !config.hasRequiredWorkerProps()) {
            throw new FluoException("Failed to start Fluo '" + config.getApplicationName()
                    + "' application because fluo.properties is missing required properties.");
        }

        try {
            config.validate();
        } catch (IllegalArgumentException e) {
            throw new FluoException("Invalid fluo.properties due to " + e.getMessage());
        } catch (Exception e) {
            throw new FluoException("Invalid fluo.properties due to " + e.getMessage(), e);
        }

        TwillPreparer preparer = getTwillRunner(config).prepare(new FluoTwillApp(config, appConfDir));

        // Add jars from fluo lib/ directory that are not being loaded by Twill.
        try {
            File libDir = new File(fluoLibPath);
            File[] libFiles = libDir.listFiles();
            if (libFiles != null) {
                for (File f : libFiles) {
                    if (f.isFile()) {
                        String jarPath = "file:" + f.getCanonicalPath();
                        log.trace("Adding library jar (" + f.getName() + ") to Fluo application.");
                        preparer.withResources(new URI(jarPath));
                    }
                }
            }

            // Add jars found in application's lib dir
            File appLibDir = new File(appLibPath);
            File[] appFiles = appLibDir.listFiles();
            if (appFiles != null) {
                for (File f : appFiles) {
                    String jarPath = "file:" + f.getCanonicalPath();
                    log.debug("Adding application jar (" + f.getName() + ") to Fluo application.");
                    preparer.withResources(new URI(jarPath));
                }
            }
        } catch (Exception e) {
            throw new IllegalStateException(e);
        }

        Objects.requireNonNull(preparer, "Failed to prepare twill application");

        log.info("Starting Fluo '{}' application in YARN...", config.getApplicationName());
        TwillController controller = preparer.start();

        try {
            // set twill run id zookeeper
            String twillId = controller.getRunId().toString();
            CuratorUtil.putData(getAppCurator(config), ZookeeperPath.YARN_TWILL_ID,
                    twillId.getBytes(StandardCharsets.UTF_8), CuratorUtil.NodeExistsPolicy.OVERWRITE);

            // set app id in zookeeper
            String appId = getResourceReport(controller, -1).getApplicationId();
            Objects.requireNonNull(appId, "Failed to retrieve YARN app ID from Twill");
            CuratorUtil.putData(getAppCurator(config), ZookeeperPath.YARN_APP_ID,
                    appId.getBytes(StandardCharsets.UTF_8), CuratorUtil.NodeExistsPolicy.OVERWRITE);

            log.info("The Fluo '{}' application is running in YARN {}", config.getApplicationName(),
                    getAppInfo(config));

            log.info("Waiting for all desired containers to start...");
            int checks = 0;
            while (!allContainersRunning(controller, config)) {
                Thread.sleep(500);
                checks++;
                if (checks == 30) {
                    log.warn("Still waiting... YARN may not have enough resources available for this "
                            + "application.  Use ctrl-c to stop waiting and check status using "
                            + "'fluo info <app>'.");
                }
            }
            log.info("All desired containers are running in YARN " + getAppInfo(config));
        } catch (Exception e) {
            throw new IllegalStateException(e);
        }
    }

    public void stop(FluoConfiguration config) throws InterruptedException, ExecutionException {
        checkIfInitialized(config);
        String twillId = verifyTwillId(config);

        TwillController controller = getTwillRunner(config)
                .lookup(getYarnApplicationName(config.getApplicationName()), RunIds.fromString(twillId));
        if ((controller != null) && isReady(controller)) {
            System.out.print("Stopping Fluo '" + config.getApplicationName() + "' application " + getAppInfo(config)
                    + "...");
            controller.terminate().get();
            System.out.println("DONE");
        } else {
            System.out.println("Fluo '" + config.getApplicationName() + "' application " + getAppInfo(config)
                    + " is already stopped.");
        }
    }

    public void kill(FluoConfiguration config) throws Exception {
        checkIfInitialized(config);

        String twillId = verifyTwillId(config);

        TwillController controller = getTwillRunner(config)
                .lookup(getYarnApplicationName(config.getApplicationName()), RunIds.fromString(twillId));
        if (controller != null) {
            System.out.print(
                    "Killing Fluo '" + config.getApplicationName() + "' application " + getAppInfo(config) + "...");
            controller.kill();
            System.out.println("DONE");
        } else {
            System.out.println("Fluo '" + config.getApplicationName() + "' application " + getAppInfo(config)
                    + " is already stopped.");
        }
    }

    /**
     * Attempts to retrieves ResourceReport until maxWaitMs time is reached. Set maxWaitMs to -1 to
     * retry forever.
     */
    private ResourceReport getResourceReport(TwillController controller, int maxWaitMs) {
        ResourceReport report = controller.getResourceReport();
        int elapsed = 0;
        while (report == null) {
            report = controller.getResourceReport();
            try {
                Thread.sleep(500);
            } catch (InterruptedException e) {
                throw new IllegalStateException(e);
            }
            elapsed += 500;
            if ((maxWaitMs != -1) && (elapsed > maxWaitMs)) {
                String msg = String.format(
                        "Exceeded max wait time to retrieve ResourceReport from Twill." + " Elapsed time = %s ms",
                        elapsed);
                log.error(msg);
                throw new IllegalStateException(msg);
            }
            if ((elapsed % 10000) == 0) {
                log.info("Waiting for ResourceReport from Twill. Elapsed time = {} ms", elapsed);
            }
        }
        return report;
    }

    private boolean isReady(TwillController controller) {
        try {
            if (getResourceReport(controller, 30000) != null) {
                return true;
            }
        } catch (Exception e) {
            log.error("Exception occurred while getting Twill resource report", e);
        }
        return false;
    }

    private boolean allContainersRunning(TwillController controller, FluoConfiguration config) {
        return TwillUtil.numRunning(controller, OracleRunnable.ORACLE_NAME) == FluoYarnConfig
                .getOracleInstances(config)
                && TwillUtil.numRunning(controller, WorkerRunnable.WORKER_NAME) == FluoYarnConfig
                        .getWorkerInstances(config);
    }

    private String containerStatus(TwillController controller, FluoConfiguration config) {
        return "" + TwillUtil.numRunning(controller, OracleRunnable.ORACLE_NAME) + " of "
                + FluoYarnConfig.getOracleInstances(config) + " Oracle containers and "
                + TwillUtil.numRunning(controller, WorkerRunnable.WORKER_NAME) + " of "
                + FluoYarnConfig.getWorkerInstances(config) + " Worker containers";
    }

    public void status(FluoConfiguration config, boolean extraInfo) {
        checkIfInitialized(config);
        if (!twillIdExists(config)) {
            System.out.println("Fluo '" + config.getApplicationName()
                    + "' application was initialized but has not been started.");
            return;
        }
        String twillId = getTwillId(config);
        TwillController controller = getTwillRunner(config)
                .lookup(getYarnApplicationName(config.getApplicationName()), RunIds.fromString(twillId));
        if (controller == null) {
            System.out.print("Fluo '" + config.getApplicationName() + "' application " + getAppInfo(config)
                    + " has stopped.");
        } else {
            System.out.println("A Fluo '" + config.getApplicationName() + "' application is running" + " in YARN "
                    + getFullInfo(config));

            if (!allContainersRunning(controller, config)) {
                System.out.println("\nWARNING - The Fluo application is not running all desired "
                        + "containers!  YARN may not have enough available resources.  Application is "
                        + "currently running " + containerStatus(controller, config));
            }

            if (extraInfo) {
                ResourceReport report = getResourceReport(controller, 30000);
                Collection<TwillRunResources> resources;
                resources = report.getRunnableResources(OracleRunnable.ORACLE_NAME);
                System.out.println("\nThe application has " + resources.size() + " of "
                        + FluoYarnConfig.getOracleInstances(config) + " desired Oracle containers:\n");
                TwillUtil.printResources(resources);

                resources = report.getRunnableResources(WorkerRunnable.WORKER_NAME);
                System.out.println("\nThe application has " + resources.size() + " of "
                        + FluoYarnConfig.getWorkerInstances(config) + " desired Worker containers:\n");
                TwillUtil.printResources(resources);
            }
        }
    }

    private String verifyTwillId(FluoConfiguration config) {
        if (!twillIdExists(config)) {
            throw new FluoException("A YARN application is not referenced in Zookeeper for this "
                    + " Fluo application.  Check if there is a Fluo application running in YARN using the "
                    + "command 'yarn application -list`. If so, verify that your fluo.properties is "
                    + "configured correctly.");
        }
        return getTwillId(config);
    }

    private String getAppInfo(FluoConfiguration config) {
        return "(yarn id = " + getAppId(config) + ")";
    }

    private String getFullInfo(FluoConfiguration config) {
        return "(yarn id = " + getAppId(config) + ", twill id = " + getTwillId(config) + ")";
    }

    private boolean twillIdExists(FluoConfiguration config) {
        try {
            return getAppCurator(config).checkExists().forPath(ZookeeperPath.YARN_TWILL_ID) != null;
        } catch (Exception e) {
            throw new IllegalStateException(e);
        }
    }

    private String getTwillId(FluoConfiguration config) {
        try {
            return new String(getAppCurator(config).getData().forPath(ZookeeperPath.YARN_TWILL_ID),
                    StandardCharsets.UTF_8);
        } catch (Exception e) {
            throw new IllegalStateException(e);
        }
    }

    private String getAppId(FluoConfiguration config) {
        try {
            return new String(getAppCurator(config).getData().forPath(ZookeeperPath.YARN_APP_ID),
                    StandardCharsets.UTF_8);
        } catch (Exception e) {
            throw new IllegalStateException(e);
        }
    }

    public static String getYarnApplicationName(String appName) {
        return String.format("fluo-app-%s", appName);
    }

    @Override
    public void close() throws Exception {
        for (TwillRunnerService twillRunner : twillRunners.values()) {
            twillRunner.stop();
        }
        for (CuratorFramework curator : curators.values()) {
            curator.close();
        }
    }
}