org.apache.oozie.service.HadoopAccessorService.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.oozie.service.HadoopAccessorService.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.oozie.service;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.oozie.ErrorCode;
import org.apache.oozie.action.hadoop.JavaActionExecutor;
import org.apache.oozie.util.ParamChecker;
import org.apache.oozie.util.XConfiguration;
import org.apache.oozie.util.XLog;
import org.apache.oozie.util.JobUtils;

import java.io.File;
import java.io.FileInputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.net.InetAddress;
import java.net.URI;
import java.net.URISyntaxException;
import java.security.PrivilegedExceptionAction;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.HashSet;
import java.util.concurrent.ConcurrentHashMap;

/**
 * The HadoopAccessorService returns HadoopAccessor instances configured to work on behalf of a user-group. <p> The
 * default accessor used is the base accessor which just injects the UGI into the configuration instance used to
 * create/obtain JobClient and FileSystem instances.
 */
public class HadoopAccessorService implements Service {

    private static XLog LOG = XLog.getLog(HadoopAccessorService.class);

    public static final String CONF_PREFIX = Service.CONF_PREFIX + "HadoopAccessorService.";
    public static final String JOB_TRACKER_WHITELIST = CONF_PREFIX + "jobTracker.whitelist";
    public static final String NAME_NODE_WHITELIST = CONF_PREFIX + "nameNode.whitelist";
    public static final String HADOOP_CONFS = CONF_PREFIX + "hadoop.configurations";
    public static final String ACTION_CONFS = CONF_PREFIX + "action.configurations";
    public static final String ACTION_CONFS_LOAD_DEFAULT_RESOURCES = ACTION_CONFS + ".load.default.resources";
    public static final String KERBEROS_AUTH_ENABLED = CONF_PREFIX + "kerberos.enabled";
    public static final String KERBEROS_KEYTAB = CONF_PREFIX + "keytab.file";
    public static final String KERBEROS_PRINCIPAL = CONF_PREFIX + "kerberos.principal";
    public static final Text MR_TOKEN_ALIAS = new Text("oozie mr token");

    protected static final String OOZIE_HADOOP_ACCESSOR_SERVICE_CREATED = "oozie.HadoopAccessorService.created";
    /** The Kerberos principal for the job tracker.*/
    protected static final String JT_PRINCIPAL = "mapreduce.jobtracker.kerberos.principal";
    /** The Kerberos principal for the resource manager.*/
    protected static final String RM_PRINCIPAL = "yarn.resourcemanager.principal";
    protected static final String HADOOP_JOB_TRACKER = "mapred.job.tracker";
    protected static final String HADOOP_JOB_TRACKER_2 = "mapreduce.jobtracker.address";
    protected static final String HADOOP_YARN_RM = "yarn.resourcemanager.address";
    private static final Map<String, Text> mrTokenRenewers = new HashMap<String, Text>();

    private static Configuration cachedConf;

    private static final String DEFAULT_ACTIONNAME = "default";

    private Set<String> jobTrackerWhitelist = new HashSet<String>();
    private Set<String> nameNodeWhitelist = new HashSet<String>();
    private Map<String, Configuration> hadoopConfigs = new HashMap<String, Configuration>();
    private Map<String, File> actionConfigDirs = new HashMap<String, File>();
    private Map<String, Map<String, XConfiguration>> actionConfigs = new HashMap<String, Map<String, XConfiguration>>();

    private UserGroupInformationService ugiService;

    /**
     * Supported filesystem schemes for namespace federation
     */
    public static final String SUPPORTED_FILESYSTEMS = CONF_PREFIX + "supported.filesystems";
    private Set<String> supportedSchemes;
    private boolean allSchemesSupported;

    public void init(Services services) throws ServiceException {
        this.ugiService = services.get(UserGroupInformationService.class);
        init(services.getConf());
    }

    //for testing purposes, see XFsTestCase
    public void init(Configuration conf) throws ServiceException {
        for (String name : ConfigurationService.getStrings(conf, JOB_TRACKER_WHITELIST)) {
            String tmp = name.toLowerCase().trim();
            if (tmp.length() == 0) {
                continue;
            }
            jobTrackerWhitelist.add(tmp);
        }
        LOG.info("JOB_TRACKER_WHITELIST :" + jobTrackerWhitelist.toString() + ", Total entries :"
                + jobTrackerWhitelist.size());
        for (String name : ConfigurationService.getStrings(conf, NAME_NODE_WHITELIST)) {
            String tmp = name.toLowerCase().trim();
            if (tmp.length() == 0) {
                continue;
            }
            nameNodeWhitelist.add(tmp);
        }
        LOG.info("NAME_NODE_WHITELIST :" + nameNodeWhitelist.toString() + ", Total entries :"
                + nameNodeWhitelist.size());

        boolean kerberosAuthOn = ConfigurationService.getBoolean(conf, KERBEROS_AUTH_ENABLED);
        LOG.info("Oozie Kerberos Authentication [{0}]", (kerberosAuthOn) ? "enabled" : "disabled");
        if (kerberosAuthOn) {
            kerberosInit(conf);
        } else {
            Configuration ugiConf = new Configuration();
            ugiConf.set("hadoop.security.authentication", "simple");
            UserGroupInformation.setConfiguration(ugiConf);
        }

        if (ugiService == null) { //for testing purposes, see XFsTestCase
            this.ugiService = new UserGroupInformationService();
        }

        loadHadoopConfigs(conf);
        preLoadActionConfigs(conf);

        supportedSchemes = new HashSet<String>();
        String[] schemesFromConf = ConfigurationService.getStrings(conf, SUPPORTED_FILESYSTEMS);
        if (schemesFromConf != null) {
            for (String scheme : schemesFromConf) {
                scheme = scheme.trim();
                // If user gives "*", supportedSchemes will be empty, so that checking is not done i.e. all schemes allowed
                if (scheme.equals("*")) {
                    if (schemesFromConf.length > 1) {
                        throw new ServiceException(ErrorCode.E0100, getClass().getName(), SUPPORTED_FILESYSTEMS
                                + " should contain either only wildcard or explicit list, not both");
                    }
                    allSchemesSupported = true;
                }
                supportedSchemes.add(scheme);
            }
        }
    }

    private void kerberosInit(Configuration serviceConf) throws ServiceException {
        try {
            String keytabFile = ConfigurationService.get(serviceConf, KERBEROS_KEYTAB).trim();
            if (keytabFile.length() == 0) {
                throw new ServiceException(ErrorCode.E0026, KERBEROS_KEYTAB);
            }
            String principal = SecurityUtil.getServerPrincipal(
                    serviceConf.get(KERBEROS_PRINCIPAL, "oozie/localhost@LOCALHOST"),
                    InetAddress.getLocalHost().getCanonicalHostName());
            if (principal.length() == 0) {
                throw new ServiceException(ErrorCode.E0026, KERBEROS_PRINCIPAL);
            }
            Configuration conf = new Configuration();
            conf.set("hadoop.security.authentication", "kerberos");
            UserGroupInformation.setConfiguration(conf);
            UserGroupInformation.loginUserFromKeytab(principal, keytabFile);
            LOG.info("Got Kerberos ticket, keytab [{0}], Oozie principal principal [{1}]", keytabFile, principal);
        } catch (ServiceException ex) {
            throw ex;
        } catch (Exception ex) {
            throw new ServiceException(ErrorCode.E0100, getClass().getName(), ex.getMessage(), ex);
        }
    }

    private static final String[] HADOOP_CONF_FILES = { "core-site.xml", "hdfs-site.xml", "mapred-site.xml",
            "yarn-site.xml", "hadoop-site.xml", "ssl-client.xml" };

    private Configuration loadHadoopConf(File dir) throws IOException {
        Configuration hadoopConf = new XConfiguration();
        for (String file : HADOOP_CONF_FILES) {
            File f = new File(dir, file);
            if (f.exists()) {
                InputStream is = new FileInputStream(f);
                Configuration conf = new XConfiguration(is);
                is.close();
                XConfiguration.copy(conf, hadoopConf);
            }
        }
        return hadoopConf;
    }

    private Map<String, File> parseConfigDirs(String[] confDefs, String type) throws ServiceException, IOException {
        Map<String, File> map = new HashMap<String, File>();
        File configDir = new File(ConfigurationService.getConfigurationDirectory());
        for (String confDef : confDefs) {
            if (confDef.trim().length() > 0) {
                String[] parts = confDef.split("=");
                if (parts.length == 2) {
                    String hostPort = parts[0];
                    String confDir = parts[1];
                    File dir = new File(confDir);
                    if (!dir.isAbsolute()) {
                        dir = new File(configDir, confDir);
                    }
                    if (dir.exists()) {
                        map.put(hostPort.toLowerCase(), dir);
                    } else {
                        throw new ServiceException(ErrorCode.E0100, getClass().getName(),
                                "could not find " + type + " configuration directory: " + dir.getAbsolutePath());
                    }
                } else {
                    throw new ServiceException(ErrorCode.E0100, getClass().getName(),
                            "Incorrect " + type + " configuration definition: " + confDef);
                }
            }
        }
        return map;
    }

    private void loadHadoopConfigs(Configuration serviceConf) throws ServiceException {
        try {
            Map<String, File> map = parseConfigDirs(ConfigurationService.getStrings(serviceConf, HADOOP_CONFS),
                    "hadoop");
            for (Map.Entry<String, File> entry : map.entrySet()) {
                hadoopConfigs.put(entry.getKey(), loadHadoopConf(entry.getValue()));
            }
        } catch (ServiceException ex) {
            throw ex;
        } catch (Exception ex) {
            throw new ServiceException(ErrorCode.E0100, getClass().getName(), ex.getMessage(), ex);
        }
    }

    private void preLoadActionConfigs(Configuration serviceConf) throws ServiceException {
        try {
            actionConfigDirs = parseConfigDirs(ConfigurationService.getStrings(serviceConf, ACTION_CONFS),
                    "action");
            for (String hostport : actionConfigDirs.keySet()) {
                actionConfigs.put(hostport, new ConcurrentHashMap<String, XConfiguration>());
            }
        } catch (ServiceException ex) {
            throw ex;
        } catch (Exception ex) {
            throw new ServiceException(ErrorCode.E0100, getClass().getName(), ex.getMessage(), ex);
        }
    }

    public void destroy() {
    }

    public Class<? extends Service> getInterface() {
        return HadoopAccessorService.class;
    }

    private UserGroupInformation getUGI(String user) throws IOException {
        return ugiService.getProxyUser(user);
    }

    /**
     * Creates a JobConf using the site configuration for the specified hostname:port.
     * <p>
     * If the specified hostname:port is not defined it falls back to the '*' site
     * configuration if available. If the '*' site configuration is not available,
     * the JobConf has all Hadoop defaults.
     *
     * @param hostPort hostname:port to lookup Hadoop site configuration.
     * @return a JobConf with the corresponding site configuration for hostPort.
     */
    public JobConf createJobConf(String hostPort) {
        JobConf jobConf = new JobConf(getCachedConf());
        XConfiguration.copy(getConfiguration(hostPort), jobConf);
        jobConf.setBoolean(OOZIE_HADOOP_ACCESSOR_SERVICE_CREATED, true);
        return jobConf;
    }

    public Configuration getCachedConf() {
        if (cachedConf == null) {
            loadCachedConf();
        }
        return cachedConf;
    }

    private void loadCachedConf() {
        cachedConf = new Configuration();
        //for lazy loading
        cachedConf.size();
    }

    private XConfiguration loadActionConf(String hostPort, String action) {
        File dir = actionConfigDirs.get(hostPort);
        XConfiguration actionConf = new XConfiguration();
        if (dir != null) {
            // See if a dir with the action name exists.   If so, load all the xml files in the dir
            File actionConfDir = new File(dir, action);

            if (actionConfDir.exists() && actionConfDir.isDirectory()) {
                LOG.info("Processing configuration files under [{0}]" + " for action [{1}] and hostPort [{2}]",
                        actionConfDir.getAbsolutePath(), action, hostPort);
                File[] xmlFiles = actionConfDir.listFiles(new FilenameFilter() {
                    @Override
                    public boolean accept(File dir, String name) {
                        return name.endsWith(".xml");
                    }
                });
                Arrays.sort(xmlFiles, new Comparator<File>() {
                    @Override
                    public int compare(File o1, File o2) {
                        return o1.getName().compareTo(o2.getName());
                    }
                });
                for (File f : xmlFiles) {
                    if (f.isFile() && f.canRead()) {
                        LOG.info("Processing configuration file [{0}]", f.getName());
                        FileInputStream fis = null;
                        try {
                            fis = new FileInputStream(f);
                            XConfiguration conf = new XConfiguration(fis);
                            XConfiguration.copy(conf, actionConf);
                        } catch (IOException ex) {
                            LOG.warn("Could not read file [{0}] for action [{1}] configuration and hostPort [{2}]",
                                    f.getAbsolutePath(), action, hostPort);
                        } finally {
                            if (fis != null) {
                                try {
                                    fis.close();
                                } catch (IOException ioe) {
                                }
                            }
                        }
                    }
                }
            }
        }

        // Now check for <action.xml>   This way <action.xml> has priority over <action-dir>/*.xml

        File actionConfFile = new File(dir, action + ".xml");
        if (actionConfFile.exists()) {
            try {
                XConfiguration conf = new XConfiguration(new FileInputStream(actionConfFile));
                XConfiguration.copy(conf, actionConf);
            } catch (IOException ex) {
                LOG.warn("Could not read file [{0}] for action [{1}] configuration for hostPort [{2}]",
                        actionConfFile.getAbsolutePath(), action, hostPort);
            }
        }

        return actionConf;
    }

    /**
     * Returns a Configuration containing any defaults for an action for a particular cluster.
     * <p>
     * This configuration is used as default for the action configuration and enables cluster
     * level default values per action.
     *
     * @param hostPort hostname"port to lookup the action default confiugration.
     * @param action action name.
     * @return the default configuration for the action for the specified cluster.
     */
    public XConfiguration createActionDefaultConf(String hostPort, String action) {
        hostPort = (hostPort != null) ? hostPort.toLowerCase() : null;
        Map<String, XConfiguration> hostPortActionConfigs = actionConfigs.get(hostPort);
        if (hostPortActionConfigs == null) {
            hostPortActionConfigs = actionConfigs.get("*");
            hostPort = "*";
        }
        XConfiguration actionConf = hostPortActionConfigs.get(action);
        if (actionConf == null) {
            // doing lazy loading as we don't know upfront all actions, no need to synchronize
            // as it is a read operation an in case of a race condition loading and inserting
            // into the Map is idempotent and the action-config Map is a ConcurrentHashMap

            // We first load a action of type default
            // This allows for global configuration for all actions - for example
            // all launchers in one queue and actions in another queue
            // Are some configuration that applies to multiple actions - like
            // config libraries path etc
            actionConf = loadActionConf(hostPort, DEFAULT_ACTIONNAME);

            // Action specific default configuration will override the default action config

            XConfiguration.copy(loadActionConf(hostPort, action), actionConf);
            hostPortActionConfigs.put(action, actionConf);
        }
        return new XConfiguration(actionConf.toProperties());
    }

    private Configuration getConfiguration(String hostPort) {
        hostPort = (hostPort != null) ? hostPort.toLowerCase() : null;
        Configuration conf = hadoopConfigs.get(hostPort);
        if (conf == null) {
            conf = hadoopConfigs.get("*");
            if (conf == null) {
                conf = new XConfiguration();
            }
        }
        return conf;
    }

    /**
     * Return a JobClient created with the provided user/group.
     *
     *
     * @param conf JobConf with all necessary information to create the
     *        JobClient.
     * @return JobClient created with the provided user/group.
     * @throws HadoopAccessorException if the client could not be created.
     */
    public JobClient createJobClient(String user, final JobConf conf) throws HadoopAccessorException {
        ParamChecker.notEmpty(user, "user");
        if (!conf.getBoolean(OOZIE_HADOOP_ACCESSOR_SERVICE_CREATED, false)) {
            throw new HadoopAccessorException(ErrorCode.E0903);
        }
        String jobTracker = conf.get(JavaActionExecutor.HADOOP_JOB_TRACKER);
        validateJobTracker(jobTracker);
        try {
            UserGroupInformation ugi = getUGI(user);
            JobClient jobClient = ugi.doAs(new PrivilegedExceptionAction<JobClient>() {
                public JobClient run() throws Exception {
                    return new JobClient(conf);
                }
            });
            Token<DelegationTokenIdentifier> mrdt = jobClient.getDelegationToken(getMRDelegationTokenRenewer(conf));
            conf.getCredentials().addToken(MR_TOKEN_ALIAS, mrdt);
            return jobClient;
        } catch (InterruptedException ex) {
            throw new HadoopAccessorException(ErrorCode.E0902, ex.getMessage(), ex);
        } catch (IOException ex) {
            throw new HadoopAccessorException(ErrorCode.E0902, ex.getMessage(), ex);
        }
    }

    /**
     * Return a FileSystem created with the provided user for the specified URI.
     *
     *
     * @param uri file system URI.
     * @param conf Configuration with all necessary information to create the FileSystem.
     * @return FileSystem created with the provided user/group.
     * @throws HadoopAccessorException if the filesystem could not be created.
     */
    public FileSystem createFileSystem(String user, final URI uri, final Configuration conf)
            throws HadoopAccessorException {
        ParamChecker.notEmpty(user, "user");
        if (!conf.getBoolean(OOZIE_HADOOP_ACCESSOR_SERVICE_CREATED, false)) {
            throw new HadoopAccessorException(ErrorCode.E0903);
        }

        checkSupportedFilesystem(uri);

        String nameNode = uri.getAuthority();
        if (nameNode == null) {
            nameNode = conf.get("fs.default.name");
            if (nameNode != null) {
                try {
                    nameNode = new URI(nameNode).getAuthority();
                } catch (URISyntaxException ex) {
                    throw new HadoopAccessorException(ErrorCode.E0902, ex.getMessage(), ex);
                }
            }
        }
        validateNameNode(nameNode);

        try {
            UserGroupInformation ugi = getUGI(user);
            return ugi.doAs(new PrivilegedExceptionAction<FileSystem>() {
                public FileSystem run() throws Exception {
                    return FileSystem.get(uri, conf);
                }
            });
        } catch (InterruptedException ex) {
            throw new HadoopAccessorException(ErrorCode.E0902, ex.getMessage(), ex);
        } catch (IOException ex) {
            throw new HadoopAccessorException(ErrorCode.E0902, ex.getMessage(), ex);
        }
    }

    /**
     * Validate Job tracker
     * @param jobTrackerUri
     * @throws HadoopAccessorException
     */
    protected void validateJobTracker(String jobTrackerUri) throws HadoopAccessorException {
        validate(jobTrackerUri, jobTrackerWhitelist, ErrorCode.E0900);
    }

    /**
     * Validate Namenode list
     * @param nameNodeUri
     * @throws HadoopAccessorException
     */
    protected void validateNameNode(String nameNodeUri) throws HadoopAccessorException {
        validate(nameNodeUri, nameNodeWhitelist, ErrorCode.E0901);
    }

    private void validate(String uri, Set<String> whitelist, ErrorCode error) throws HadoopAccessorException {
        if (uri != null) {
            uri = uri.toLowerCase().trim();
            if (whitelist.size() > 0 && !whitelist.contains(uri)) {
                throw new HadoopAccessorException(error, uri, whitelist);
            }
        }
    }

    public Text getMRDelegationTokenRenewer(JobConf jobConf) throws IOException {
        if (UserGroupInformation.isSecurityEnabled()) { // secure cluster
            return getMRTokenRenewerInternal(jobConf);
        } else {
            return MR_TOKEN_ALIAS; //Doesn't matter what we pass as renewer
        }
    }

    // Package private for unit test purposes
    Text getMRTokenRenewerInternal(JobConf jobConf) throws IOException {
        // Getting renewer correctly for JT principal also though JT in hadoop 1.x does not have
        // support for renewing/cancelling tokens
        String servicePrincipal = jobConf.get(RM_PRINCIPAL, jobConf.get(JT_PRINCIPAL));
        Text renewer;
        if (servicePrincipal != null) { // secure cluster
            renewer = mrTokenRenewers.get(servicePrincipal);
            if (renewer == null) {
                // Mimic org.apache.hadoop.mapred.Master.getMasterPrincipal()
                String target = jobConf.get(HADOOP_YARN_RM, jobConf.get(HADOOP_JOB_TRACKER_2));
                if (target == null) {
                    target = jobConf.get(HADOOP_JOB_TRACKER);
                }
                try {
                    String addr = NetUtils.createSocketAddr(target).getHostName();
                    renewer = new Text(SecurityUtil.getServerPrincipal(servicePrincipal, addr));
                    LOG.info("Delegation Token Renewer details: Principal=" + servicePrincipal + ",Target=" + target
                            + ",Renewer=" + renewer);
                } catch (IllegalArgumentException iae) {
                    renewer = new Text(servicePrincipal.split("[/@]")[0]);
                    LOG.info("Delegation Token Renewer for " + servicePrincipal + " is " + renewer);
                }
                mrTokenRenewers.put(servicePrincipal, renewer);
            }
        } else {
            renewer = MR_TOKEN_ALIAS; //Doesn't matter what we pass as renewer
        }
        return renewer;
    }

    public void addFileToClassPath(String user, final Path file, final Configuration conf) throws IOException {
        ParamChecker.notEmpty(user, "user");
        try {
            UserGroupInformation ugi = getUGI(user);
            ugi.doAs(new PrivilegedExceptionAction<Void>() {
                @Override
                public Void run() throws Exception {
                    JobUtils.addFileToClassPath(file, conf, null);
                    return null;
                }
            });

        } catch (InterruptedException ex) {
            throw new IOException(ex);
        }

    }

    /**
     * checks configuration parameter if filesystem scheme is among the list of supported ones
     * this makes system robust to filesystems other than HDFS also
     */

    public void checkSupportedFilesystem(URI uri) throws HadoopAccessorException {
        if (allSchemesSupported)
            return;
        String uriScheme = uri.getScheme();
        if (uriScheme != null) { // skip the check if no scheme is given
            if (!supportedSchemes.isEmpty()) {
                LOG.debug("Checking if filesystem " + uriScheme + " is supported");
                if (!supportedSchemes.contains(uriScheme)) {
                    throw new HadoopAccessorException(ErrorCode.E0904, uriScheme, uri.toString());
                }
            }
        }
    }

    public Set<String> getSupportedSchemes() {
        return supportedSchemes;
    }

}