alluxio.yarn.Client.java Source code

Java tutorial

Introduction

Here is the source code for alluxio.yarn.Client.java

Source

/*
 * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
 * (the "License"). You may not use this work except in compliance with the License, which is
 * available at www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied, as more fully set forth in the License.
 *
 * See the NOTICE file distributed with this work for information regarding copyright ownership.
 */

package alluxio.yarn;

import alluxio.Configuration;
import alluxio.PropertyKey;
import alluxio.exception.ExceptionMessage;
import alluxio.Constants;
import alluxio.util.CommonUtils;
import alluxio.util.io.PathUtils;
import alluxio.yarn.YarnUtils.YarnContainerType;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.TokenIdentifier;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.client.api.YarnClient;
import org.apache.hadoop.yarn.client.api.YarnClientApplication;
import org.apache.hadoop.yarn.client.ClientRMProxy;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.util.Apps;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.Records;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

import javax.annotation.concurrent.NotThreadSafe;

/**
 * The client to submit the application to run Alluxio to YARN ResourceManager.
 *
 * <p>
 * Launch Alluxio on YARN:
 * </p>
 * {@code
 * $ yarn jar alluxio-assemblies-0.8.0-SNAPSHOT-jar-with-dependencies.jar alluxio.yarn.Client \
 *     -num_workers NumAlluxioWorkers \
 *     -master_address MasterAddress \
 *     -resource_path ResourcePath
 * }
 *
 * <p>
 * Get help and a full list of options:
 * </p>
 * {@code
 * $ yarn jar alluxio-assemblies-0.8.0-SNAPSHOT-jar-with-dependencies.jar alluxio.yarn.Client -help
 * }
 */
@NotThreadSafe
public final class Client {
    private static final Log LOG = LogFactory.getLog(Client.class);

    /** Yarn client to talk to resource manager. */
    private YarnClient mYarnClient;
    /** Yarn configuration. */
    private YarnConfiguration mYarnConf = new YarnConfiguration();
    /** Container context to launch application master. */
    private ContainerLaunchContext mAmContainer;
    /** ApplicationMaster specific info to register a new Application. */
    private ApplicationSubmissionContext mAppContext;
    /** Application name. */
    private String mAppName;
    /** ApplicationMaster priority. */
    private int mAmPriority;
    /** Queue for ApplicationMaster. */
    private String mAmQueue;
    /** Amount of memory to request for running the ApplicationMaster. */
    private int mAmMemoryInMB;
    /** Number of virtual cores to request for running the ApplicationMaster. */
    private int mAmVCores;
    /** ApplicationMaster jar file on HDFS. */
    private String mResourcePath;
    /** Number of Alluxio workers. */
    private int mNumWorkers;
    /** Address to run Alluxio master. */
    private String mMasterAddress;
    /** Maximum number of workers to allow on a single host. */
    private int mMaxWorkersPerHost;
    /** Id of the application. */
    private ApplicationId mAppId;
    /** Command line options. */
    private Options mOptions;

    /**
     * Constructs a new client for launching an Alluxio application master.
     */
    public Client() {
        mOptions = new Options();
        mOptions.addOption("appname", true, "Application Name. Default 'Alluxio'");
        mOptions.addOption("priority", true, "Application Priority. Default 0");
        mOptions.addOption("queue", true,
                "RM Queue in which this application is to be submitted. Default 'default'");
        mOptions.addOption("am_memory", true,
                "Amount of memory in MB to request to run ApplicationMaster. Default 256");
        mOptions.addOption("am_vcores", true,
                "Amount of virtual cores to request to run ApplicationMaster. Default 1");
        mOptions.addOption("resource_path", true, "(Required) HDFS path containing the Application Master");
        mOptions.addOption("alluxio_home", true,
                "(Required) Path of the home dir of Alluxio deployment on YARN slave machines");
        mOptions.addOption("master_address", true, "(Required) Address to run Alluxio master");
        mOptions.addOption("help", false, "Print usage");
        mOptions.addOption("num_workers", true, "Number of Alluxio workers to launch. Default 1");
    }

    /**
     * Constructs a new client for launching an Alluxio application master and
     * parses command line options.
     *
     * @param args Command line arguments
     * @throws ParseException if an error occurs when parsing the argument
     */
    public Client(String[] args) throws ParseException {
        this();
        parseArgs(args);
    }

    /**
     * @param args Command line arguments
     */
    public static void main(String[] args) {
        try {
            Client client = new Client();
            System.out.println("Initializing Client");
            if (!client.parseArgs(args)) {
                System.out.println("Cannot parse commandline: " + Arrays.toString(args));
                System.exit(0);
            }
            System.out.println("Starting Client");
            client.run();
        } catch (Exception e) {
            System.err.println("Error running Client " + e);
            System.exit(1);
        }
    }

    /**
     * Main run function for the client.
     *
     * @throws IOException if errors occur from ResourceManager
     * @throws YarnException if errors occur from ResourceManager
     */
    public void run() throws IOException, YarnException {
        submitApplication();
    }

    /**
     * Helper function to print out usage.
     */
    private void printUsage() {
        new HelpFormatter().printHelp("Client", mOptions);
    }

    /**
     * Parses command line options.
     *
     * @param args Parsed command line options
     * @return Whether the parseArgs was successful to run the client
     * @throws ParseException if an error occurs when parsing the argument
     */
    private boolean parseArgs(String[] args) throws ParseException {
        Preconditions.checkArgument(args.length > 0, "No args specified for client to initialize");
        CommandLine cliParser = new GnuParser().parse(mOptions, args);

        if (cliParser.hasOption("help")) {
            printUsage();
            return false;
        }
        if (!cliParser.hasOption("resource_path")) {
            System.out.println("Required to specify resource_path");
            printUsage();
            return false;
        }

        mResourcePath = cliParser.getOptionValue("resource_path");
        mMasterAddress = cliParser.getOptionValue("master_address");
        mAppName = cliParser.getOptionValue("appname", "Alluxio");
        mAmPriority = Integer.parseInt(cliParser.getOptionValue("priority", "0"));
        mAmQueue = cliParser.getOptionValue("queue", "default");
        mAmMemoryInMB = Integer.parseInt(cliParser.getOptionValue("am_memory", "256"));
        mAmVCores = Integer.parseInt(cliParser.getOptionValue("am_vcores", "1"));
        mNumWorkers = Integer.parseInt(cliParser.getOptionValue("num_workers", "1"));
        mMaxWorkersPerHost = Configuration.getInt(PropertyKey.INTEGRATION_YARN_WORKERS_PER_HOST_MAX);

        Preconditions.checkArgument(mAmMemoryInMB > 0,
                "Invalid memory specified for application master, " + "exiting. Specified memory=" + mAmMemoryInMB);
        Preconditions.checkArgument(mAmVCores > 0,
                "Invalid virtual cores specified for application master, exiting." + " Specified virtual cores="
                        + mAmVCores);
        return true;
    }

    /**
     * Submits an application to the ResourceManager to run ApplicationMaster.
     *
     * The stable Yarn API provides a convenience method (YarnClient#createApplication) for creating
     * applications and setting up the application submission context. This was not available in the
     * alpha API.
     */
    private void submitApplication() throws YarnException, IOException {
        // Initialize a YarnClient
        mYarnClient = YarnClient.createYarnClient();
        mYarnClient.init(mYarnConf);
        mYarnClient.start();

        // Create an application, get and check the information about the cluster
        YarnClientApplication app = mYarnClient.createApplication();
        // Get a response of this application, containing information of the cluster
        GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
        // Check if the cluster has enough resource to launch the ApplicationMaster
        checkClusterResource(appResponse);

        // Check that there are enough hosts in the cluster to support the desired number of workers
        checkNodesAvailable();

        // Set up the container launch context for the application master
        mAmContainer = Records.newRecord(ContainerLaunchContext.class);
        setupContainerLaunchContext();

        // Finally, set-up ApplicationSubmissionContext for the application
        mAppContext = app.getApplicationSubmissionContext();
        setupApplicationSubmissionContext();

        // Submit the application to the applications manager.
        // Ignore the response as either a valid response object is returned on success
        // or an exception thrown to denote some form of a failure
        mAppId = mAppContext.getApplicationId();
        System.out.println("Submitting application of id " + mAppId + " to ResourceManager");
        mYarnClient.submitApplication(mAppContext);
        monitorApplication();
    }

    // Checks if the cluster has enough resource to launch application master,
    // alluxio master and alluxio workers
    private void checkClusterResource(GetNewApplicationResponse appResponse) {
        int maxMem = appResponse.getMaximumResourceCapability().getMemory();
        int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores();

        if (mAmMemoryInMB > maxMem) {
            throw new RuntimeException(ExceptionMessage.YARN_NOT_ENOUGH_RESOURCES.getMessage("ApplicationMaster",
                    "memory", mAmMemoryInMB, maxMem));
        }

        if (mAmVCores > maxVCores) {
            throw new RuntimeException(ExceptionMessage.YARN_NOT_ENOUGH_RESOURCES.getMessage("ApplicationMaster",
                    "virtual cores", mAmVCores, maxVCores));
        }

        int masterMemInMB = (int) (Configuration.getBytes(PropertyKey.INTEGRATION_MASTER_RESOURCE_MEM)
                / Constants.MB);
        if (masterMemInMB > maxMem) {
            throw new RuntimeException(ExceptionMessage.YARN_NOT_ENOUGH_RESOURCES.getMessage("Alluxio Master",
                    "memory", masterMemInMB, maxMem));
        }

        int masterVCores = Configuration.getInt(PropertyKey.INTEGRATION_MASTER_RESOURCE_CPU);
        if (masterVCores > maxVCores) {
            throw new RuntimeException(ExceptionMessage.YARN_NOT_ENOUGH_RESOURCES.getMessage("Alluxio Master",
                    "virtual cores", masterVCores, maxVCores));
        }

        int workerMemInMB = (int) (Configuration.getBytes(PropertyKey.INTEGRATION_WORKER_RESOURCE_MEM)
                / Constants.MB);
        int ramdiskMemInMB = (int) (Configuration.getBytes(PropertyKey.WORKER_MEMORY_SIZE) / Constants.MB);

        if ((workerMemInMB + ramdiskMemInMB) > maxMem) {
            throw new RuntimeException(ExceptionMessage.YARN_NOT_ENOUGH_RESOURCES.getMessage("Alluxio Worker",
                    "memory", (workerMemInMB + ramdiskMemInMB), maxMem));
        }

        int workerVCore = Configuration.getInt(PropertyKey.INTEGRATION_WORKER_RESOURCE_CPU);
        if (workerVCore > maxVCores) {
            throw new RuntimeException(ExceptionMessage.YARN_NOT_ENOUGH_RESOURCES.getMessage("Alluxio Worker",
                    "virtual cores", workerVCore, maxVCores));
        }
    }

    // Checks that there are enough nodes in the cluster to run the desired number of workers
    private void checkNodesAvailable() throws YarnException, IOException {
        Set<String> hosts = YarnUtils.getNodeHosts(mYarnClient);
        Preconditions.checkArgument(mNumWorkers <= hosts.size() * mMaxWorkersPerHost,
                "Not enough nodes in cluster to support specified number of workers, " + String.format(
                        "specified=%s, but there are only %d usable hosts and %d workers allowed per host: %s",
                        mNumWorkers, hosts.size(), mMaxWorkersPerHost, hosts));
    }

    private void setupContainerLaunchContext() throws IOException, YarnException {
        Map<String, String> applicationMasterArgs = ImmutableMap.<String, String>of("-num_workers",
                Integer.toString(mNumWorkers), "-master_address", mMasterAddress, "-resource_path", mResourcePath);

        final String amCommand = YarnUtils.buildCommand(YarnContainerType.APPLICATION_MASTER,
                applicationMasterArgs);

        System.out.println("ApplicationMaster command: " + amCommand);
        mAmContainer.setCommands(Collections.singletonList(amCommand));

        // Setup local resources
        Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
        localResources.put("alluxio.tar.gz",
                YarnUtils.createLocalResourceOfFile(mYarnConf, mResourcePath + "/alluxio.tar.gz"));
        localResources.put("alluxio-yarn-setup.sh",
                YarnUtils.createLocalResourceOfFile(mYarnConf, mResourcePath + "/alluxio-yarn-setup.sh"));
        localResources.put("alluxio.jar",
                YarnUtils.createLocalResourceOfFile(mYarnConf, mResourcePath + "/alluxio.jar"));
        mAmContainer.setLocalResources(localResources);

        // Setup CLASSPATH for ApplicationMaster
        Map<String, String> appMasterEnv = new HashMap<String, String>();
        setupAppMasterEnv(appMasterEnv);
        mAmContainer.setEnvironment(appMasterEnv);

        // Set up security tokens for launching our ApplicationMaster container.
        if (UserGroupInformation.isSecurityEnabled()) {
            Credentials credentials = new Credentials();
            String tokenRenewer = mYarnConf.get(YarnConfiguration.RM_PRINCIPAL);
            if (tokenRenewer == null || tokenRenewer.length() == 0) {
                throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
            }
            org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.get(mYarnConf);
            // getting tokens for the default file-system.
            final Token<?>[] tokens = fs.addDelegationTokens(tokenRenewer, credentials);
            if (tokens != null) {
                for (Token<?> token : tokens) {
                    LOG.info("Got dt for " + fs.getUri() + "; " + token);
                }
            }
            // getting yarn resource manager token
            org.apache.hadoop.conf.Configuration config = mYarnClient.getConfig();
            Token<TokenIdentifier> token = ConverterUtils.convertFromYarn(
                    mYarnClient.getRMDelegationToken(new org.apache.hadoop.io.Text(tokenRenewer)),
                    ClientRMProxy.getRMDelegationTokenService(config));
            LOG.info("Added RM delegation token: " + token);
            credentials.addToken(token.getService(), token);

            DataOutputBuffer dob = new DataOutputBuffer();
            credentials.writeTokenStorageToStream(dob);
            ByteBuffer buffer = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
            mAmContainer.setTokens(buffer);
        }
    }

    private void setupAppMasterEnv(Map<String, String> appMasterEnv) throws IOException {
        String classpath = ApplicationConstants.Environment.CLASSPATH.name();
        for (String path : mYarnConf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
                YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) {
            Apps.addToEnvironment(appMasterEnv, classpath, path.trim(), ApplicationConstants.CLASS_PATH_SEPARATOR);
        }
        Apps.addToEnvironment(appMasterEnv, classpath, PathUtils.concatPath(Environment.PWD.$(), "*"),
                ApplicationConstants.CLASS_PATH_SEPARATOR);

        appMasterEnv.put("ALLUXIO_HOME", ApplicationConstants.Environment.PWD.$());

        if (UserGroupInformation.isSecurityEnabled()) {
            appMasterEnv.put("ALLUXIO_USER", UserGroupInformation.getCurrentUser().getShortUserName());
        }
    }

    /**
     * Sets up the application submission context.
     */
    private void setupApplicationSubmissionContext() {
        // set the application name
        mAppContext.setApplicationName(mAppName);

        // Set up resource type requirements
        // For now, both memory and vcores are supported, so we set memory and vcores requirements
        Resource capability = Resource.newInstance(mAmMemoryInMB, mAmVCores);
        mAppContext.setResource(capability);

        // Set the queue to which this application is to be submitted in the RM
        mAppContext.setQueue(mAmQueue);

        // Set the AM container spec
        mAppContext.setAMContainerSpec(mAmContainer);

        // Set the priority for the application master
        mAppContext.setPriority(Priority.newInstance(mAmPriority));
    }

    /**
     * Monitor the submitted application until app is running, finished, killed or failed.
     *
     * @throws YarnException if errors occur when obtaining application report from ResourceManager
     * @throws IOException if errors occur when obtaining application report from ResourceManager
     */
    private void monitorApplication() throws YarnException, IOException {
        while (true) {
            // Check app status every 5 seconds
            CommonUtils.sleepMs(5 * Constants.SECOND_MS);
            // Get application report for the appId we are interested in
            ApplicationReport report = mYarnClient.getApplicationReport(mAppId);

            YarnApplicationState state = report.getYarnApplicationState();
            FinalApplicationStatus dsStatus = report.getFinalApplicationStatus();
            switch (state) {
            case RUNNING:
                System.out.println("Application is running. Tracking url is " + report.getTrackingUrl());
                return;
            case FINISHED:
                if (FinalApplicationStatus.SUCCEEDED == dsStatus) {
                    System.out.println("Application has completed successfully");
                } else {
                    System.out.println("Application finished unsuccessfully. YarnState=" + state.toString()
                            + ", DSFinalStatus=" + dsStatus.toString());
                }
                return;
            case KILLED: // intended to fall through
            case FAILED:
                System.out.println("Application did not finish. YarnState=" + state.toString() + ", DSFinalStatus="
                        + dsStatus.toString());
                return;
            default:
                System.out.println("Application is in state " + state + ". Waiting.");
            }
        }
    }
}