org.apache.samza.job.yarn.refactor.YarnContainerRunner.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.samza.job.yarn.refactor.YarnContainerRunner.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.samza.job.yarn.refactor;

import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.TokenIdentifier;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
import org.apache.hadoop.yarn.api.records.*;
import org.apache.hadoop.yarn.api.records.URL;
import org.apache.hadoop.yarn.client.api.NMClient;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.Records;
import org.apache.samza.SamzaException;
import org.apache.samza.clustermanager.SamzaContainerLaunchException;
import org.apache.samza.config.Config;
import org.apache.samza.config.JobConfig;
import org.apache.samza.config.TaskConfig;
import org.apache.samza.config.YarnConfig;
import org.apache.samza.job.CommandBuilder;
import org.apache.samza.job.ShellCommandBuilder;
import org.apache.samza.util.Util;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.*;

/**
 * A Helper class to run container processes on Yarn. This encapsulates quite a bit of YarnContainer
 * boiler plate.
 */
public class YarnContainerRunner {
    private static final Logger log = LoggerFactory.getLogger(YarnContainerRunner.class);

    private final Config config;
    private final YarnConfiguration yarnConfiguration;

    private final NMClient nmClient;
    private final YarnConfig yarnConfig;
    private final TaskConfig taskConfig;

    /**
     * Create a new Runner from a Config.
     * @param config to instantiate the runner with
     * @param yarnConfiguration the yarn config for the cluster to connect to.
     */

    public YarnContainerRunner(Config config, YarnConfiguration yarnConfiguration) {
        this.config = config;
        this.yarnConfiguration = yarnConfiguration;

        this.nmClient = NMClient.createNMClient();
        nmClient.init(this.yarnConfiguration);

        this.yarnConfig = new YarnConfig(config);
        this.taskConfig = new TaskConfig(config);
    }

    /**
     * Runs a process as specified by the command builder on the container.
     * @param samzaContainerId id of the samza Container to run (passed as a command line parameter to the process)
     * @param container the samza container to run.
     * @param cmdBuilder the command builder that encapsulates the command, and the context
     *
     * @throws SamzaContainerLaunchException  when there's an exception in submitting the request to the RM.
     *
     */
    //TODO: we don't need samzaContainerId as a param here.
    public void runContainer(int samzaContainerId, Container container, CommandBuilder cmdBuilder)
            throws SamzaContainerLaunchException {
        String containerIdStr = ConverterUtils.toString(container.getId());
        log.info("Got available container ID ({}) for container: {}", samzaContainerId, container);

        // check if we have framework path specified. If yes - use it, if not use default ./__package/
        String jobLib = ""; // in case of separate framework, this directory will point at the job's libraries
        String cmdPath = "./__package/";

        String fwkPath = JobConfig.getFwkPath(config);
        if (fwkPath != null && (!fwkPath.isEmpty())) {
            cmdPath = fwkPath;
            jobLib = "export JOB_LIB_DIR=./__package/lib";
        }
        log.info("In runContainer in util: fwkPath= " + fwkPath + ";cmdPath=" + cmdPath + ";jobLib=" + jobLib);
        cmdBuilder.setCommandPath(cmdPath);

        String command = cmdBuilder.buildCommand();
        log.info("Container ID {} using command {}", samzaContainerId, command);

        Map<String, String> env = getEscapedEnvironmentVariablesMap(cmdBuilder);
        printContainerEnvironmentVariables(samzaContainerId, env);

        log.info("Samza FWK path: " + command + "; env=" + env);

        Path path = new Path(yarnConfig.getPackagePath());
        log.info("Starting container ID {} using package path {}", samzaContainerId, path);

        startContainer(path, container, env, getFormattedCommand(ApplicationConstants.LOG_DIR_EXPANSION_VAR, jobLib,
                command, ApplicationConstants.STDOUT, ApplicationConstants.STDERR));

        log.info("Claimed container ID {} for container {} on node {} (http://{}/node/containerlogs/{}).",
                new Object[] { samzaContainerId, containerIdStr, container.getNodeId().getHost(),
                        container.getNodeHttpAddress(), containerIdStr });

        log.info("Started container ID {}", samzaContainerId);
    }

    /**
     *    Runs a command as a process on the container. All binaries needed by the physical process are packaged in the URL
     *    specified by packagePath.
     */
    private void startContainer(Path packagePath, Container container, Map<String, String> env, final String cmd)
            throws SamzaContainerLaunchException {
        log.info("starting container {} {} {} {}", new Object[] { packagePath, container, env, cmd });

        // set the local package so that the containers and app master are provisioned with it
        LocalResource packageResource = Records.newRecord(LocalResource.class);
        URL packageUrl = ConverterUtils.getYarnUrlFromPath(packagePath);
        FileStatus fileStatus;
        try {
            fileStatus = packagePath.getFileSystem(yarnConfiguration).getFileStatus(packagePath);
        } catch (IOException ioe) {
            log.error("IO Exception when accessing the package status from the filesystem", ioe);
            throw new SamzaContainerLaunchException(
                    "IO Exception when accessing the package status from the filesystem");
        }

        packageResource.setResource(packageUrl);
        packageResource.setSize(fileStatus.getLen());
        packageResource.setTimestamp(fileStatus.getModificationTime());
        packageResource.setType(LocalResourceType.ARCHIVE);
        packageResource.setVisibility(LocalResourceVisibility.APPLICATION);

        ByteBuffer allTokens;
        // copy tokens (copied from dist shell example)
        try {
            Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
            DataOutputBuffer dob = new DataOutputBuffer();
            credentials.writeTokenStorageToStream(dob);

            // now remove the AM->RM token so that containers cannot access it
            Iterator iter = credentials.getAllTokens().iterator();
            while (iter.hasNext()) {
                TokenIdentifier token = ((Token) iter.next()).decodeIdentifier();
                if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
                    iter.remove();
                }
            }
            allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());

        } catch (IOException ioe) {
            log.error("IOException when writing credentials.", ioe);
            throw new SamzaContainerLaunchException("IO Exception when writing credentials to output buffer");
        }

        ContainerLaunchContext context = Records.newRecord(ContainerLaunchContext.class);
        context.setEnvironment(env);
        context.setTokens(allTokens.duplicate());
        context.setCommands(new ArrayList<String>() {
            {
                add(cmd);
            }
        });
        context.setLocalResources(Collections.singletonMap("__package", packageResource));

        log.debug("setting package to {}", packageResource);
        log.debug("setting context to {}", context);

        StartContainerRequest startContainerRequest = Records.newRecord(StartContainerRequest.class);
        startContainerRequest.setContainerLaunchContext(context);
        try {
            nmClient.startContainer(container, context);
        } catch (YarnException ye) {
            log.error("Received YarnException when starting container: " + container.getId(), ye);
            throw new SamzaContainerLaunchException(
                    "Received YarnException when starting container: " + container.getId(), ye);
        } catch (IOException ioe) {
            log.error("Received IOException when starting container: " + container.getId(), ioe);
            throw new SamzaContainerLaunchException(
                    "Received IOException when starting container: " + container.getId(), ioe);
        }
    }

    /**
     * @param samzaContainerId  the Samza container Id for logging purposes.
     * @param env               the Map of environment variables to their respective values.
     */
    private void printContainerEnvironmentVariables(int samzaContainerId, Map<String, String> env) {
        StringBuilder sb = new StringBuilder();
        for (Map.Entry<String, String> entry : env.entrySet()) {
            sb.append(String.format("\n%s=%s", entry.getKey(), entry.getValue()));
        }
        log.info("Container ID {} using environment variables: {}", samzaContainerId, sb.toString());
    }

    /**
     * Gets the environment variables from the specified {@link CommandBuilder} and escapes certain characters.
     *
     * @param cmdBuilder        the command builder containing the environment variables.
     * @return                  the map containing the escaped environment variables.
     */
    private Map<String, String> getEscapedEnvironmentVariablesMap(CommandBuilder cmdBuilder) {
        Map<String, String> env = new HashMap<String, String>();
        for (Map.Entry<String, String> entry : cmdBuilder.buildEnvironment().entrySet()) {
            String escapedValue = Util.envVarEscape(entry.getValue());
            env.put(entry.getKey(), escapedValue);
        }
        return env;
    }

    private String getFormattedCommand(String logDirExpansionVar, String jobLib, String command, String stdOut,
            String stdErr) {
        if (!jobLib.isEmpty()) {
            jobLib = "&& " + jobLib; // add job's libraries exported to an env variable
        }

        return String.format("export SAMZA_LOG_DIR=%s %s && ln -sfn %s logs && exec %s 1>logs/%s 2>logs/%s",
                logDirExpansionVar, jobLib, logDirExpansionVar, command, stdOut, stdErr);
    }
}