org.springframework.data.hadoop.mapreduce.ExecutionUtils.java Source code

Java tutorial

Introduction

Here is the source code for org.springframework.data.hadoop.mapreduce.ExecutionUtils.java

Source

/*
 * Copyright 2011-2013 the original author or authors.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.springframework.data.hadoop.mapreduce;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.net.URL;
import java.security.Permission;
import java.security.Policy;
import java.sql.DriverManager;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.jar.JarEntry;
import java.util.jar.JarInputStream;
import java.util.jar.Manifest;

import javax.imageio.ImageIO;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalDirAllocator;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.mapred.Counters;
import org.springframework.core.io.Resource;
import org.springframework.util.ClassUtils;
import org.springframework.util.ReflectionUtils;
import org.springframework.util.StringUtils;

/**
 * Code execution utilities.
 * 
 * @author Costin Leau
 * @author Jarred Li
 */
// NOTE: jars with nested /classes/ are supported as well but this functionality is disabled
// as it seems to have not been used in hadoop.
abstract class ExecutionUtils {

    private static final Log log = LogFactory.getLog(ExecutionUtils.class);

    static class ExitTrapped extends Error {

        private int exitCode;

        ExitTrapped(String permissionName) {
            // handle non-Sun JDKs 
            int hasDot = permissionName.indexOf(".");
            this.exitCode = Integer
                    .valueOf((hasDot > 0 ? permissionName.substring(hasDot + 1) : permissionName.substring(7)));
        }

        public int getExitCode() {
            return exitCode;
        }
    }

    private static Field CLASS_CACHE;
    private static Method UTILS_CONSTRUCTOR_CACHE;

    static {
        CLASS_CACHE = ReflectionUtils.findField(Configuration.class, "CACHE_CLASS");
        if (CLASS_CACHE != null) {
            ReflectionUtils.makeAccessible(CLASS_CACHE);
        }

        UTILS_CONSTRUCTOR_CACHE = ReflectionUtils.findMethod(org.apache.hadoop.util.ReflectionUtils.class,
                "clearCache");
        ReflectionUtils.makeAccessible(UTILS_CONSTRUCTOR_CACHE);
    }

    private static final Set<String> JVM_THREAD_NAMES = new HashSet<String>();

    static {
        JVM_THREAD_NAMES.add("system");
        JVM_THREAD_NAMES.add("RMI Runtime");
    }

    private static SecurityManager oldSM = null;

    static void disableSystemExitCall() {
        final SecurityManager securityManager = new SecurityManager() {

            @Override
            public void checkPermission(Permission permission) {
                String name = permission.getName();
                if (name.startsWith("exitVM")) {
                    throw new ExitTrapped(name);
                }
            }
        };

        oldSM = System.getSecurityManager();
        System.setSecurityManager(securityManager);
    }

    static void enableSystemExitCall() {
        System.setSecurityManager(oldSM);
    }

    static ClassLoader createParentLastClassLoader(Resource jar, ClassLoader parentClassLoader, Configuration cfg) {
        ClassLoader cl = null;

        // sanity check
        if (parentClassLoader == null) {
            parentClassLoader = ClassUtils.getDefaultClassLoader();
            cl = parentClassLoader;
        }

        // check if a custom CL is needed
        if (jar != null) {
            // check if unjarring is required (it's a legacy JAR)
            try {
                if (isLegacyJar(jar)) {
                    URL[] extractedURLs = expandedJarClassPath(jar, cfg);
                    cl = new ParentLastURLClassLoader(extractedURLs, parentClassLoader);
                } else {
                    cl = new ParentLastURLClassLoader(new URL[] { jar.getURL() }, parentClassLoader);
                }

            } catch (IOException e) {
                throw new IllegalStateException("Cannot open jar file", e);
            }
        }

        return cl;
    }

    private static boolean isLegacyJar(Resource jar) throws IOException {
        JarInputStream jis = new JarInputStream(jar.getInputStream());
        JarEntry entry = null;
        try {
            while ((entry = jis.getNextJarEntry()) != null) {
                String name = entry.getName();
                if (name.startsWith("lib/") //|| name.startsWith("classes/")
                ) {
                    return true;
                }
            }
        } finally {
            IOUtils.closeStream(jis);
        }
        return false;
    }

    private static URL[] expandedJarClassPath(Resource jar, Configuration cfg) throws IOException {
        // detect base dir
        File baseDir = detectBaseDir(cfg);

        // expand the jar
        unjar(jar, baseDir);

        // build classpath
        List<URL> cp = new ArrayList<URL>();
        cp.add(new File(baseDir + "/").toURI().toURL());

        //cp.add(new File(baseDir + "/classes/").toURI().toURL());
        File[] libs = new File(baseDir, "lib").listFiles();
        if (libs != null) {
            for (int i = 0; i < libs.length; i++) {
                cp.add(libs[i].toURI().toURL());
            }
        }

        return cp.toArray(new URL[cp.size()]);
    }

    private static File detectBaseDir(Configuration cfg) throws IOException {
        File tmpDir = null;

        if (cfg != null) {
            tmpDir = new File(cfg.get("hadoop.tmp.dir"));
            tmpDir.mkdirs();
            if (!tmpDir.isDirectory()) {
                tmpDir = null;
            }
        }

        final File workDir = File.createTempFile("hadoop-unjar", "", tmpDir);
        workDir.delete();
        workDir.mkdirs();

        return workDir;
    }

    private static void unjar(Resource jar, File baseDir) throws IOException {
        JarInputStream jis = new JarInputStream(jar.getInputStream());
        JarEntry entry = null;
        try {
            while ((entry = jis.getNextJarEntry()) != null) {
                if (!entry.isDirectory()) {
                    File file = new File(baseDir, entry.getName());
                    if (!file.getParentFile().mkdirs()) {
                        if (!file.getParentFile().isDirectory()) {
                            throw new IOException("Mkdirs failed to create " + file.getParentFile().toString());
                        }
                    }
                    OutputStream out = new FileOutputStream(file);
                    try {
                        byte[] buffer = new byte[8192];
                        int i;
                        while ((i = jis.read(buffer)) != -1) {
                            out.write(buffer, 0, i);
                        }
                    } finally {
                        IOUtils.closeStream(out);
                    }
                }
            }
        } finally {
            IOUtils.closeStream(jis);
        }
    }

    static String mainClass(Resource jar) throws IOException {
        JarInputStream jis = new JarInputStream(jar.getInputStream());
        try {
            Manifest mf = jis.getManifest();
            if (mf != null) {
                String main = mf.getMainAttributes().getValue("Main-Class");
                if (StringUtils.hasText(main)) {
                    return main.replace("/", ".");
                }
            }
            return null;
        } finally {
            IOUtils.closeStream(jis);
        }
    }

    /**
     * Utility method used before invoking custom code for preventing custom classloader, set as the Thread
     * context class-loader, to leak (typically through JDK classes).
     */
    static void preventJreTcclLeaks() {
        if (log.isDebugEnabled()) {
            log.debug("Preventing JRE TCCL leaks");
        }

        // get the root CL to be used instead
        ClassLoader sysLoader = ClassLoader.getSystemClassLoader();

        ClassLoader cl = Thread.currentThread().getContextClassLoader();
        try {
            // set the sysCL as the TCCL
            Thread.currentThread().setContextClassLoader(sysLoader);

            //
            // Handle security
            //

            // Policy holds the TCCL as static
            ClassUtils.resolveClassName("javax.security.auth.Policy", sysLoader);
            // since the class init may be lazy, call the method directly
            Policy.getPolicy();
            // Configuration holds the TCCL as static
            // call method with minimal side-effects (since just doing class loading doesn't seem to trigger the static init)
            try {
                javax.security.auth.login.Configuration.getInstance(null, null, (String) null);
            } catch (Exception ex) {
                // ignore
            }
            // seems to cause side-effects/exceptions
            // javax.security.auth.login.Configuration.getConfiguration();
            java.security.Security.getProviders();

            // load the JDBC drivers (used by Hive and co)
            DriverManager.getDrivers();
            // Initialize
            // sun.awt.AppContext.getAppContext()
            ImageIO.getCacheDirectory();

        } finally {
            Thread.currentThread().setContextClassLoader(cl);
        }
    }

    /**
     * Utility for doing static init for preventing Hadoop leaks during initialization (mainly based on TCCL).
     */
    static void preventHadoopLeaks(ClassLoader hadoopCL) {

        ClassLoader cl = Thread.currentThread().getContextClassLoader();
        try {
            // set the sysCL as the TCCL
            Thread.currentThread().setContextClassLoader(ClassLoader.getSystemClassLoader());

            // fix org.apache.hadoop.mapred.Counters#MAX_COUNTER_LIMIT
            // calling constructor since class loading is lazy
            new Counters();
        } finally {
            Thread.currentThread().setContextClassLoader(cl);
        }
    }

    /**
     * Leak-preventing method analyzing the threads started by the JVM which hold a reference
     * to a classloader that should be reclaimed. 
     * 
     * @param leakedClassLoader
     * @param replacementClassLoader
     */
    static void patchLeakedClassLoader(ClassLoader leakedClassLoader, ClassLoader replacementClassLoader) {
        if (log.isDebugEnabled()) {
            log.debug("Patching TCCL leaks");
        }

        replaceTccl(leakedClassLoader, replacementClassLoader);

        fixHadoopReflectionUtilsLeak(leakedClassLoader);
        fixHadoopReflectionUtilsLeak();
        cleanHadoopLocalDirAllocator();
    }

    /**
     * Clean the LocalDirAllocator#contexts
     */
    private static void cleanHadoopLocalDirAllocator() {
        Field field = ReflectionUtils.findField(LocalDirAllocator.class, "contexts");
        ReflectionUtils.makeAccessible(field);
        Map contexts = (Map) ReflectionUtils.getField(field, null);
        if (contexts != null) {
            contexts.clear();
        }
    }

    private static void fixHadoopReflectionUtilsLeak(ClassLoader leakedClassLoader) {
        // replace Configuration#CLASS_CACHE in Hadoop 2.0 which prevents CL from being recycled
        // this is a best-effort really as the leak can occur again - see HADOOP-8632

        // only available on Hadoop-2.0/CDH4
        if (CLASS_CACHE == null) {
            return;
        }

        Map<?, ?> cache = (Map<?, ?>) ReflectionUtils.getField(CLASS_CACHE, null);
        cache.remove(leakedClassLoader);
    }

    private static void fixHadoopReflectionUtilsLeak() {
        // org.apache.hadoop.util.ReflectionUtils.clearCache();
        ReflectionUtils.invokeMethod(UTILS_CONSTRUCTOR_CACHE, null);
    }

    private static void replaceTccl(ClassLoader leakedClassLoader, ClassLoader replacementClassLoader) {
        for (Thread thread : threads()) {
            if (thread != null) {
                ClassLoader cl = thread.getContextClassLoader();
                // do identity check to prevent expensive (and potentially dangerous) equals()
                if (leakedClassLoader == cl) {
                    log.warn("Trying to patch leaked cl [" + leakedClassLoader + "] in thread [" + thread + "]");
                    ThreadGroup tg = thread.getThreadGroup();
                    // it's a JVM thread so use the System ClassLoader always
                    boolean debug = log.isDebugEnabled();
                    if (tg != null && JVM_THREAD_NAMES.contains(tg.getName())) {
                        thread.setContextClassLoader(ClassLoader.getSystemClassLoader());
                        if (debug) {
                            log.debug("Replaced leaked cl in thread [" + thread + "] with system classloader");
                        }
                    } else {
                        thread.setContextClassLoader(replacementClassLoader);
                        if (debug) {
                            log.debug(
                                    "Replaced leaked cl in thread [" + thread + "] with " + replacementClassLoader);
                        }
                    }
                }
            }
        }
    }

    /**
     * Most jars don't close the file system.
     * 
     * @param cfg
     */
    static void shutdownFileSystem(Configuration cfg) {
        FileSystem fs;
        try {
            fs = FileSystem.get(cfg);
            if (fs != null) {
                fs.close();
            }
        } catch (Exception ex) {
        }
        try {
            fs = FileSystem.getLocal(cfg);
            if (fs != null) {
                fs.close();
            }
        } catch (Exception ex) {
        }
    }

    /**
     * Returns the threads running inside the current JVM.
     * 
     * @return running threads
     */
    static Thread[] threads() {
        // Could have used the code below but it tends to be somewhat ineffective and slow 
        // Set<Thread> threadSet = Thread.getAllStackTraces().keySet();

        // Get the current thread group 
        ThreadGroup tg = Thread.currentThread().getThreadGroup();
        // Find the root thread group
        while (tg.getParent() != null) {
            tg = tg.getParent();
        }

        int threadCountGuess = tg.activeCount() + 50;
        Thread[] threads = new Thread[threadCountGuess];
        int threadCountActual = tg.enumerate(threads);
        // Make sure we don't miss any threads
        while (threadCountActual == threadCountGuess) {
            threadCountGuess *= 2;
            threads = new Thread[threadCountGuess];
            // Note tg.enumerate(Thread[]) silently ignores any threads that
            // can't fit into the array 
            threadCountActual = tg.enumerate(threads);
        }

        return threads;
    }

    static void earlyLeaseDaemonInit(Configuration config) throws IOException {
        ClassLoader cl = config.getClassLoader();
        if (cl instanceof ParentLastURLClassLoader) {
            if (log.isDebugEnabled()) {
                log.debug("Preventing DFS LeaseDaemon TCCL leak");
            }

            FileSystem fs = FileSystem.get(config);
            Thread.currentThread().setContextClassLoader(ClassLoader.getSystemClassLoader());
            Path p = new Path("/tmp/shdp-lease-early-init-" + UUID.randomUUID().toString());
            // create/delete
            fs.create(p).close();
            fs.delete(p, false);
        }
    }
}