org.apache.hive.hcatalog.templeton.LauncherDelegator.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hive.hcatalog.templeton.LauncherDelegator.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.hive.hcatalog.templeton;

import java.io.IOException;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.shims.HadoopShimsSecure;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.hive.shims.HadoopShims.WebHCatJTShim;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hive.hcatalog.templeton.tool.JobState;
import org.apache.hive.hcatalog.templeton.tool.TempletonControllerJob;
import org.apache.hive.hcatalog.templeton.tool.TempletonStorage;
import org.apache.hive.hcatalog.templeton.tool.TempletonUtils;
import org.apache.hive.hcatalog.templeton.tool.ZooKeeperStorage;

/**
 * The helper class for all the Templeton delegator classes that
 * launch child jobs.
 */
public class LauncherDelegator extends TempletonDelegator {
    private static final Logger LOG = LoggerFactory.getLogger(LauncherDelegator.class);
    protected String runAs = null;

    static public enum JobType {
        JAR, STREAMING, PIG, HIVE, SQOOP
    }

    private boolean secureMeatastoreAccess = false;
    private final String HIVE_SHIMS_FILENAME_PATTERN = ".*hive-shims.*";

    public LauncherDelegator(AppConfig appConf) {
        super(appConf);
    }

    public void registerJob(String id, String user, String callback, Map<String, Object> userArgs)
            throws IOException {
        JobState state = null;
        try {
            state = new JobState(id, Main.getAppConfigInstance());
            state.setUser(user);
            state.setCallback(callback);
            state.setUserArgs(userArgs);
        } finally {
            if (state != null)
                state.close();
        }
    }

    /**
     * Enqueue the TempletonControllerJob directly calling doAs.
     */
    public EnqueueBean enqueueController(String user, Map<String, Object> userArgs, String callback,
            List<String> args) throws NotAuthorizedException, BusyException, IOException, QueueException {
        try {
            UserGroupInformation ugi = UgiFactory.getUgi(user);

            final long startTime = System.nanoTime();

            String id = queueAsUser(ugi, args);

            long elapsed = ((System.nanoTime() - startTime) / ((int) 1e6));
            LOG.debug("queued job " + id + " in " + elapsed + " ms");

            if (id == null) {
                throw new QueueException("Unable to get job id");
            }

            registerJob(id, user, callback, userArgs);

            return new EnqueueBean(id);
        } catch (InterruptedException e) {
            throw new QueueException("Unable to launch job " + e);
        }
    }

    private String queueAsUser(UserGroupInformation ugi, final List<String> args)
            throws IOException, InterruptedException {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Launching job: " + args);
        }
        return ugi.doAs(new PrivilegedExceptionAction<String>() {
            public String run() throws Exception {
                String[] array = new String[args.size()];
                TempletonControllerJob ctrl = new TempletonControllerJob(secureMeatastoreAccess, appConf);
                ToolRunner.run(ctrl, args.toArray(array));
                return ctrl.getSubmittedId();
            }
        });
    }

    public List<String> makeLauncherArgs(AppConfig appConf, String statusdir, String completedUrl,
            List<String> copyFiles, boolean enablelog, Boolean enableJobReconnect, JobType jobType) {
        ArrayList<String> args = new ArrayList<String>();

        //note that in ToolRunner this is expected to be a local FS path
        //see GenericOptionsParser.getLibJars()
        args.add("-libjars");

        // Include shim and admin specified libjars
        String libJars = String.format("%s,%s", getShimLibjars(), appConf.libJars());
        args.add(libJars);

        addCacheFiles(args, appConf);

        // Hadoop vars
        addDef(args, "user.name", runAs);
        addDef(args, AppConfig.HADOOP_SPECULATIVE_NAME, "false");
        addDef(args, AppConfig.HADOOP_CHILD_JAVA_OPTS, appConf.controllerMRChildOpts());

        // Internal vars
        addDef(args, TempletonControllerJob.STATUSDIR_NAME, statusdir);
        //Use of ToolRunner "-files" option could be considered here
        addDef(args, TempletonControllerJob.COPY_NAME, TempletonUtils.encodeArray(copyFiles));
        addDef(args, TempletonControllerJob.OVERRIDE_CLASSPATH, makeOverrideClasspath(appConf));
        addDef(args, TempletonControllerJob.ENABLE_LOG, Boolean.toString(enablelog));
        addDef(args, TempletonControllerJob.JOB_TYPE, jobType.toString());
        addDef(args, TempletonControllerJob.TEMPLETON_JOB_LAUNCH_TIME_NAME,
                Long.toString(System.currentTimeMillis()));

        if (enableJobReconnect == null) {
            // If enablejobreconnect param was not passed by a user, use a cluster
            // wide default
            if (appConf.enableJobReconnectDefault() != null) {
                enableJobReconnect = Boolean.parseBoolean(appConf.enableJobReconnectDefault());
            } else {
                // default is false
                enableJobReconnect = false;
            }
        }
        addDef(args, TempletonControllerJob.ENABLE_JOB_RECONNECT, Boolean.toString(enableJobReconnect));

        // Hadoop queue information
        addDef(args, "mapred.job.queue.name", appConf.hadoopQueueName());

        // Job vars
        addStorageVars(args);
        addCompletionVars(args, completedUrl);

        return args;
    }

    /**
     * Dynamically determine the list of hive shim jars that need to be added
     * to the Templeton launcher job classpath.
     */
    private String getShimLibjars() {
        WebHCatJTShim shim = null;
        try {
            shim = ShimLoader.getHadoopShims().getWebHCatShim(appConf, UserGroupInformation.getCurrentUser());
        } catch (IOException e) {
            throw new RuntimeException("Failed to get WebHCatShim", e);
        }

        // Besides the HiveShims jar which is Hadoop version dependent we also
        // always need to include hive shims common jars.
        Path shimCommonJar = new Path(
                TempletonUtils.findContainingJar(ShimLoader.class, HIVE_SHIMS_FILENAME_PATTERN));
        Path shimCommonSecureJar = new Path(
                TempletonUtils.findContainingJar(HadoopShimsSecure.class, HIVE_SHIMS_FILENAME_PATTERN));
        Path shimJar = new Path(TempletonUtils.findContainingJar(shim.getClass(), HIVE_SHIMS_FILENAME_PATTERN));

        return String.format("%s,%s,%s", shimCommonJar.toString(), shimCommonSecureJar.toString(),
                shimJar.toString());
    }

    // Storage vars
    private void addStorageVars(List<String> args) {
        addDef(args, TempletonStorage.STORAGE_CLASS, appConf.get(TempletonStorage.STORAGE_CLASS));
        addDef(args, TempletonStorage.STORAGE_ROOT, appConf.get(TempletonStorage.STORAGE_ROOT));
        addDef(args, ZooKeeperStorage.ZK_HOSTS, appConf.get(ZooKeeperStorage.ZK_HOSTS));
        addDef(args, ZooKeeperStorage.ZK_SESSION_TIMEOUT, appConf.get(ZooKeeperStorage.ZK_SESSION_TIMEOUT));
    }

    // Completion notifier vars
    private void addCompletionVars(List<String> args, String completedUrl) {
        addDef(args, AppConfig.HADOOP_END_RETRY_NAME, appConf.get(AppConfig.CALLBACK_RETRY_NAME));
        addDef(args, AppConfig.HADOOP_END_INTERVAL_NAME, appConf.get(AppConfig.CALLBACK_INTERVAL_NAME));
        addDef(args, AppConfig.HADOOP_END_URL_NAME, completedUrl);
    }

    /**
     * Add files to the Distributed Cache for the controller job.
     */
    public static void addCacheFiles(List<String> args, AppConfig appConf) {
        String overrides = appConf.overrideJarsString();
        if (overrides != null) {
            args.add("-files");
            args.add(overrides);
        }
    }

    /**
     * Create the override classpath, which will be added to
     * HADOOP_CLASSPATH at runtime by the controller job.
     */
    public static String makeOverrideClasspath(AppConfig appConf) {
        String[] overrides = appConf.overrideJars();
        if (overrides == null) {
            return null;
        }

        ArrayList<String> cp = new ArrayList<String>();
        for (String fname : overrides) {
            Path p = new Path(fname);
            cp.add(p.getName());
        }
        return StringUtils.join(":", cp);
    }

    /**
     * Add a Hadoop command line definition to args if the value is
     * not null.
     */
    public static void addDef(List<String> args, String name, String val) {
        if (val != null) {
            args.add("-D");
            args.add(name + "=" + val);
        }
    }

    /**
     * This is called by subclasses when they determined that the sumbmitted job requires
     * metastore access (e.g. Pig job that uses HCatalog).  This then determines if 
     * secure access is required and causes TempletonControllerJob to set up a delegation token.
     * @see TempletonControllerJob
     */
    void addHiveMetaStoreTokenArg() {
        //in order for this to work hive-site.xml must be on the classpath
        HiveConf hiveConf = new HiveConf();
        if (!hiveConf.getBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL)) {
            return;
        }
        secureMeatastoreAccess = true;
    }
}