com.continuuity.weave.yarn.YarnWeavePreparer.java Source code

Java tutorial

Introduction

Here is the source code for com.continuuity.weave.yarn.YarnWeavePreparer.java

Source

/*
 * Copyright 2012-2013 Continuuity,Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.continuuity.weave.yarn;

import com.continuuity.weave.api.LocalFile;
import com.continuuity.weave.api.RunId;
import com.continuuity.weave.api.RuntimeSpecification;
import com.continuuity.weave.api.WeaveController;
import com.continuuity.weave.api.WeavePreparer;
import com.continuuity.weave.api.WeaveSpecification;
import com.continuuity.weave.api.logging.LogHandler;
import com.continuuity.weave.filesystem.Location;
import com.continuuity.weave.filesystem.LocationFactory;
import com.continuuity.weave.internal.ApplicationBundler;
import com.continuuity.weave.internal.Arguments;
import com.continuuity.weave.internal.DefaultLocalFile;
import com.continuuity.weave.internal.DefaultRuntimeSpecification;
import com.continuuity.weave.internal.DefaultWeaveSpecification;
import com.continuuity.weave.internal.EnvKeys;
import com.continuuity.weave.internal.RunIds;
import com.continuuity.weave.internal.WeaveContainerMain;
import com.continuuity.weave.internal.appmaster.ApplicationMasterMain;
import com.continuuity.weave.internal.json.ArgumentsCodec;
import com.continuuity.weave.internal.json.LocalFileCodec;
import com.continuuity.weave.internal.json.WeaveSpecificationAdapter;
import com.continuuity.weave.internal.utils.Dependencies;
import com.continuuity.weave.launcher.WeaveLauncher;
import com.continuuity.weave.yarn.utils.YarnUtils;
import com.continuuity.weave.zookeeper.ZKClient;
import com.continuuity.weave.zookeeper.ZKClients;
import com.google.common.base.Charsets;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicates;
import com.google.common.base.Throwables;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.ListMultimap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Multimap;
import com.google.common.collect.Sets;
import com.google.common.io.ByteStreams;
import com.google.common.reflect.TypeToken;
import com.google.gson.GsonBuilder;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.client.YarnClient;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.Records;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.jar.JarEntry;
import java.util.jar.JarOutputStream;

/**
 * Implementation for {@link WeavePreparer} to prepare and launch distributed application on Hadoop YARN.
 */
final class YarnWeavePreparer implements WeavePreparer {

    private static final Logger LOG = LoggerFactory.getLogger(YarnWeavePreparer.class);
    private static final String KAFKA_ARCHIVE = "kafka-0.7.2.tgz";
    private static final int APP_MASTER_MEMORY_MB = 1024;

    private final WeaveSpecification weaveSpec;
    private final YarnClient yarnClient;
    private final ZKClient zkClient;
    private final LocationFactory locationFactory;
    private final RunId runId;

    private final List<LogHandler> logHandlers = Lists.newArrayList();
    private final List<String> arguments = Lists.newArrayList();
    private final Set<Class<?>> dependencies = Sets.newIdentityHashSet();
    private final List<URI> resources = Lists.newArrayList();
    private final List<String> classPaths = Lists.newArrayList();
    private final ListMultimap<String, String> runnableArgs = ArrayListMultimap.create();

    YarnWeavePreparer(WeaveSpecification weaveSpec, YarnClient yarnClient, ZKClient zkClient,
            LocationFactory locationFactory) {
        this.weaveSpec = weaveSpec;
        this.yarnClient = yarnClient;
        this.zkClient = ZKClients.namespace(zkClient, "/" + weaveSpec.getName());
        this.locationFactory = locationFactory;
        this.runId = RunIds.generate();
    }

    @Override
    public WeavePreparer addLogHandler(LogHandler handler) {
        logHandlers.add(handler);
        return this;
    }

    @Override
    public WeavePreparer withApplicationArguments(String... args) {
        return withApplicationArguments(ImmutableList.copyOf(args));
    }

    @Override
    public WeavePreparer withApplicationArguments(Iterable<String> args) {
        Iterables.addAll(arguments, args);
        return this;
    }

    @Override
    public WeavePreparer withArguments(String runnableName, String... args) {
        return withArguments(runnableName, ImmutableList.copyOf(args));
    }

    @Override
    public WeavePreparer withArguments(String runnableName, Iterable<String> args) {
        runnableArgs.putAll(runnableName, args);
        return this;
    }

    @Override
    public WeavePreparer withDependencies(Class<?>... classes) {
        return withDependencies(ImmutableList.copyOf(classes));
    }

    @Override
    public WeavePreparer withDependencies(Iterable<Class<?>> classes) {
        Iterables.addAll(dependencies, classes);
        return this;
    }

    @Override
    public WeavePreparer withResources(URI... resources) {
        return withResources(ImmutableList.copyOf(resources));
    }

    @Override
    public WeavePreparer withResources(Iterable<URI> resources) {
        Iterables.addAll(this.resources, resources);
        return this;
    }

    @Override
    public WeavePreparer withClassPaths(String... classPaths) {
        return withClassPaths(ImmutableList.copyOf(classPaths));
    }

    @Override
    public WeavePreparer withClassPaths(Iterable<String> classPaths) {
        Iterables.addAll(this.classPaths, classPaths);
        return this;
    }

    @Override
    public WeaveController start() {
        // TODO: Unify this with {@link ProcessLauncher}
        try {
            GetNewApplicationResponse response = yarnClient.getNewApplication();
            ApplicationId applicationId = response.getApplicationId();

            ApplicationSubmissionContext appSubmissionContext = Records
                    .newRecord(ApplicationSubmissionContext.class);
            appSubmissionContext.setApplicationId(applicationId);
            appSubmissionContext.setApplicationName(weaveSpec.getName());

            Map<String, LocalResource> localResources = Maps.newHashMap();

            Multimap<String, LocalFile> transformedLocalFiles = HashMultimap.create();

            createAppMasterJar(createBundler(), localResources);
            createContainerJar(createBundler(), localResources);
            populateRunnableResources(weaveSpec, transformedLocalFiles);
            saveWeaveSpec(weaveSpec, transformedLocalFiles, localResources);
            saveLogback(localResources);
            saveLauncher(localResources);
            saveKafka(localResources);
            saveArguments(arguments, runnableArgs, localResources);
            saveLocalFiles(localResources, ImmutableSet.of("weaveSpec.json", "logback-template.xml",
                    "container.jar", "launcher.jar", "arguments.json"));

            ContainerLaunchContext containerLaunchContext = Records.newRecord(ContainerLaunchContext.class);
            containerLaunchContext.setLocalResources(localResources);

            // java -cp launcher.jar:$HADOOP_CONF_DIR -XmxMemory
            //     com.continuuity.weave.internal.WeaveLauncher
            //     appMaster.jar
            //     com.continuuity.weave.internal.appmaster.ApplicationMasterMain
            //     false
            containerLaunchContext.setCommands(ImmutableList.of("java", "-cp", "launcher.jar:$HADOOP_CONF_DIR",
                    "-Xmx" + APP_MASTER_MEMORY_MB + "m", WeaveLauncher.class.getName(), "appMaster.jar",
                    ApplicationMasterMain.class.getName(), Boolean.FALSE.toString(),
                    " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout",
                    " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr"));

            containerLaunchContext.setEnvironment(ImmutableMap.<String, String>builder()
                    .put(EnvKeys.WEAVE_APP_ID, Integer.toString(applicationId.getId()))
                    .put(EnvKeys.WEAVE_APP_ID_CLUSTER_TIME, Long.toString(applicationId.getClusterTimestamp()))
                    .put(EnvKeys.WEAVE_APP_DIR, getAppLocation().toURI().toASCIIString())
                    .put(EnvKeys.WEAVE_ZK_CONNECT, zkClient.getConnectString())
                    .put(EnvKeys.WEAVE_RUN_ID, runId.getId()).build());
            Resource capability = Records.newRecord(Resource.class);
            capability.setMemory(APP_MASTER_MEMORY_MB);
            containerLaunchContext.setResource(capability);

            appSubmissionContext.setAMContainerSpec(containerLaunchContext);

            yarnClient.submitApplication(appSubmissionContext);

            return createController(applicationId, runId, logHandlers);
        } catch (Exception e) {
            throw Throwables.propagate(e);
        }
    }

    private WeaveController createController(ApplicationId applicationId, RunId runId,
            Iterable<LogHandler> logHandlers) {
        YarnWeaveController controller = new YarnWeaveController(yarnClient, zkClient, applicationId, runId,
                logHandlers);
        controller.start();
        return controller;
    }

    private ApplicationBundler createBundler() {
        return new ApplicationBundler(ImmutableList.<String>of());

    }

    private void createAppMasterJar(ApplicationBundler bundler, Map<String, LocalResource> localResources)
            throws IOException {
        LOG.debug("Create and copy appMaster.jar");
        Location location = createTempLocation("appMaster", ".jar");
        bundler.createBundle(location, ApplicationMasterMain.class);
        LOG.debug("Done appMaster.jar");

        localResources.put("appMaster.jar", YarnUtils.createLocalResource(location));
    }

    private void createContainerJar(ApplicationBundler bundler, Map<String, LocalResource> localResources)
            throws IOException {
        try {
            Set<Class<?>> classes = Sets.newIdentityHashSet();
            classes.add(WeaveContainerMain.class);
            classes.addAll(dependencies);

            ClassLoader classLoader = getClass().getClassLoader();
            for (RuntimeSpecification spec : weaveSpec.getRunnables().values()) {
                classes.add(classLoader.loadClass(spec.getRunnableSpecification().getClassName()));
            }

            LOG.debug("Create and copy container.jar");
            Location location = createTempLocation("container", ".jar");
            bundler.createBundle(location, classes, resources);
            LOG.debug("Done container.jar");

            localResources.put("container.jar", YarnUtils.createLocalResource(location));

        } catch (ClassNotFoundException e) {
            throw Throwables.propagate(e);
        }
    }

    /**
     * Based on the given {@link WeaveSpecification}, upload LocalFiles to Yarn Cluster.
     * @param weaveSpec The {@link WeaveSpecification} for populating resource.
     * @param localFiles A Multimap to store runnable name to transformed LocalFiles.
     * @throws IOException
     */
    private void populateRunnableResources(WeaveSpecification weaveSpec, Multimap<String, LocalFile> localFiles)
            throws IOException {

        LOG.debug("Populating Runnable LocalFiles");
        for (Map.Entry<String, RuntimeSpecification> entry : weaveSpec.getRunnables().entrySet()) {
            String name = entry.getKey();
            for (LocalFile localFile : entry.getValue().getLocalFiles()) {
                Location location;

                URI uri = localFile.getURI();
                if ("hdfs".equals(uri.getScheme())) {
                    // Assuming the location factory is HDFS one. If it is not, it will failed, which is the correct behavior.
                    location = locationFactory.create(uri);
                } else {
                    URL url = uri.toURL();
                    LOG.debug("Create and copy {} : {}", name, url);
                    // Temp file suffix is repeated with the file name to preserve original suffix for expansion.
                    String path = url.getFile();
                    location = copyFromURL(url,
                            createTempLocation(localFile.getName(), path.substring(path.lastIndexOf('/') + 1)));
                    LOG.debug("Done {} : {}", name, url);
                }

                localFiles.put(name, new DefaultLocalFile(localFile.getName(), location.toURI(),
                        location.lastModified(), location.length(), localFile.isArchive(), localFile.getPattern()));
            }
        }
        LOG.debug("Done Runnable LocalFiles");
    }

    private void saveWeaveSpec(WeaveSpecification spec, final Multimap<String, LocalFile> localFiles,
            Map<String, LocalResource> localResources) throws IOException {
        // Rewrite LocalFiles inside weaveSpec
        Map<String, RuntimeSpecification> runtimeSpec = Maps.transformEntries(spec.getRunnables(),
                new Maps.EntryTransformer<String, RuntimeSpecification, RuntimeSpecification>() {
                    @Override
                    public RuntimeSpecification transformEntry(String key, RuntimeSpecification value) {
                        return new DefaultRuntimeSpecification(value.getName(), value.getRunnableSpecification(),
                                value.getResourceSpecification(), localFiles.get(key));
                    }
                });

        // Serialize into a local temp file.
        LOG.debug("Create and copy weaveSpec.json");
        Location location = createTempLocation("weaveSpec", ".json");
        Writer writer = new OutputStreamWriter(location.getOutputStream(), Charsets.UTF_8);
        try {
            WeaveSpecificationAdapter.create()
                    .toJson(new DefaultWeaveSpecification(spec.getName(), runtimeSpec, spec.getOrders()), writer);
        } finally {
            writer.close();
        }
        LOG.debug("Done weaveSpec.json");
        localResources.put("weaveSpec.json", YarnUtils.createLocalResource(location));
    }

    private void saveLogback(Map<String, LocalResource> localResources) throws IOException {
        LOG.debug("Create and copy logback-template.xml");
        Location location = copyFromURL(getClass().getClassLoader().getResource("logback-template.xml"),
                createTempLocation("logback-template", ".xml"));
        LOG.debug("Done logback-template.xml");
        localResources.put("logback-template.xml", YarnUtils.createLocalResource(location));
    }

    /**
     * Creates the launcher.jar.
     */
    private void saveLauncher(Map<String, LocalResource> localResources) throws URISyntaxException, IOException {

        LOG.debug("Create and copy launcher.jar");
        Location location = createTempLocation("launcher", ".jar");

        final String launcherName = WeaveLauncher.class.getName();

        // Create a jar file with the WeaveLauncher optionally a json serialized classpath.json in it.
        final JarOutputStream jarOut = new JarOutputStream(location.getOutputStream());
        ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
        if (classLoader == null) {
            classLoader = getClass().getClassLoader();
        }
        Dependencies.findClassDependencies(classLoader, new Dependencies.ClassAcceptor() {
            @Override
            public boolean accept(String className, URL classUrl, URL classPathUrl) {
                Preconditions.checkArgument(className.startsWith(launcherName),
                        "Launcher jar should not have dependencies: %s", className);
                try {
                    jarOut.putNextEntry(new JarEntry(className.replace('.', '/') + ".class"));
                    InputStream is = classUrl.openStream();
                    try {
                        ByteStreams.copy(is, jarOut);
                    } finally {
                        is.close();
                    }
                } catch (IOException e) {
                    throw Throwables.propagate(e);
                }
                return true;
            }
        }, WeaveLauncher.class.getName());

        try {
            if (!classPaths.isEmpty()) {
                jarOut.putNextEntry(new JarEntry("classpath"));
                jarOut.write(Joiner.on(':').join(classPaths).getBytes(Charsets.UTF_8));
            }
        } finally {
            jarOut.close();
        }
        LOG.debug("Done launcher.jar");
        localResources.put("launcher.jar", YarnUtils.createLocalResource(location));
    }

    private void saveKafka(Map<String, LocalResource> localResources) throws IOException {
        LOG.debug("Copy kafka.tgz");
        Location location = copyFromURL(getClass().getClassLoader().getResource(KAFKA_ARCHIVE),
                createTempLocation("kafka", ".tgz"));
        LOG.debug("Done kafka.tgz");
        LocalResource localResource = YarnUtils.createLocalResource(location);
        localResource.setType(LocalResourceType.ARCHIVE);
        localResources.put("kafka.tgz", localResource);
    }

    private void saveArguments(List<String> appArgs, Multimap<String, String> runnableArgs,
            Map<String, LocalResource> localResources) throws IOException {
        LOG.debug("Create and copy arguments.json");
        Location location = createTempLocation("arguments", ".json");
        Writer writer = new OutputStreamWriter(location.getOutputStream(), Charsets.UTF_8);
        try {
            new GsonBuilder().registerTypeAdapter(Arguments.class, new ArgumentsCodec()).create()
                    .toJson(new Arguments(appArgs, runnableArgs), writer);
        } finally {
            writer.close();
        }

        LOG.debug("Done arguments.json");
        localResources.put("arguments.json", YarnUtils.createLocalResource(location));
    }

    private void saveLocalFiles(Map<String, LocalResource> localResources, Set<String> keys) throws IOException {
        Map<String, LocalFile> localFiles = Maps.transformEntries(
                Maps.filterKeys(localResources, Predicates.in(keys)),
                new Maps.EntryTransformer<String, LocalResource, LocalFile>() {
                    @Override
                    public LocalFile transformEntry(String key, LocalResource value) {
                        try {
                            return new DefaultLocalFile(key,
                                    ConverterUtils.getPathFromYarnURL(value.getResource()).toUri(),
                                    value.getTimestamp(), value.getSize(),
                                    value.getType() != LocalResourceType.FILE, value.getPattern());
                        } catch (URISyntaxException e) {
                            throw Throwables.propagate(e);
                        }
                    }
                });

        LOG.debug("Create and copy localFiles.json");
        Location location = createTempLocation("localFiles", ".json");
        Writer writer = new OutputStreamWriter(location.getOutputStream(), Charsets.UTF_8);
        try {
            new GsonBuilder().registerTypeAdapter(LocalFile.class, new LocalFileCodec()).create()
                    .toJson(localFiles.values(), new TypeToken<List<LocalFile>>() {
                    }.getType(), writer);
        } finally {
            writer.close();
        }
        LOG.debug("Done localFiles.json");
        localResources.put("localFiles.json", YarnUtils.createLocalResource(location));
    }

    private Location copyFromURL(URL url, Location target) throws IOException {
        InputStream is = url.openStream();
        try {
            OutputStream os = new BufferedOutputStream(target.getOutputStream());
            try {
                ByteStreams.copy(is, os);
            } finally {
                os.close();
            }
        } finally {
            is.close();
        }
        return target;
    }

    private Location createTempLocation(String path, String suffix) {
        try {
            return getAppLocation().append(path).getTempFile(suffix);
        } catch (IOException e) {
            throw Throwables.propagate(e);
        }
    }

    private Location getAppLocation() {
        return locationFactory.create(String.format("/%s/%s", weaveSpec.getName(), runId.getId()));
    }
}