org.springframework.data.hadoop.scripting.HdfsScriptRunner.java Source code

Java tutorial

Introduction

Here is the source code for org.springframework.data.hadoop.scripting.HdfsScriptRunner.java

Source

/*
 * Copyright 2011-2013 the original author or authors.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.springframework.data.hadoop.scripting;

import java.io.IOException;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.springframework.beans.BeansException;
import org.springframework.beans.factory.BeanFactoryUtils;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;
import org.springframework.data.hadoop.fs.DistCp;
import org.springframework.data.hadoop.fs.FsShell;
import org.springframework.data.hadoop.fs.HdfsResourceLoader;
import org.springframework.data.hadoop.fs.SimplerFileSystem;
import org.springframework.util.Assert;

/**
 * Hadoop-customized runner that exposes Hadoop specific variables to scripting languages.
 * 
 * The instances exposes are reused from the enclosing context (using naming conventions or autowiring strategies)
 * or created on demand (in case of lightweight objects).
 * <p/>
 * These are :
 * 
 * <table>
 *  <th>
 *  <tr>
 *    <td>Name</td><td>Type</td><td>Description</td>
 *  </tr>
 *  </th>
 *  <tr><td>cfg</td><td>org.apache.hadoop.conf.Configuration</td><td>Hadoop Configuration (relies on 'hadoop-configuration' bean or singleton type match)</td></tr>
 *  <tr><td>cl</td><td>java.lang.ClassLoader</td><td>ClassLoader used for executing this script</td></tr>
 *  <tr><td>ctx</td><td>org.springframework.context.ApplicationContext</td><td>Enclosing application context</td></tr>
 *  <tr><td>ctxRL</td><td>org.springframework.io.support.ResourcePatternResolver</td><td>Enclosing application context ResourceLoader (same as ctx)</td></tr>
 *  <tr><td>distcp</td><td>org.springframework.data.hadoop.fs.DistributedCopyUtil</td><td>programmatic access to DistCp</td></tr>
 *    <tr><td>fs</td><td>org.apache.hadoop.fs.FileSystem</td><td>Hadoop File System (relies on 'hadoop-fs' bean or singleton type match, falls back to creating one based on 'cfg')</td></tr>
 *  <tr><td>fsh</td><td>org.springframework.data.hadoop.fs.FsShell</td><td>File System shell, exposing hadoop 'fs' commands as an API</td></tr>
 *  <tr><td>hdfsRL</td><td>org.springframework.data.hadoop.io.HdfsResourceLoader</td><td>HdfsResourceLoader (relies on 'hadoop-resource-loader' or singleton type match, falls back to creating one automatically based on 'cfg')</td></tr>
 * </table>
 * 
 * <p/>
 * Note that the above variables are added only if found (have a non-null value) and the keys are not bound already.
 * To make the runner execute at startup, use {@link #setRunAtStartup(boolean)}.
 * 
 * @see HdfsResourceLoader
 * @see FsShell
 * @see ApplicationContext
 * @author Costin Leau
 */
public class HdfsScriptRunner extends Jsr223ScriptRunner implements ApplicationContextAware {

    private static final Log log = LogFactory.getLog(HdfsScriptRunner.class);

    private ApplicationContext ctx;
    private Configuration configuration;

    @Override
    protected void postProcess(Map<String, Object> args) {
        // rather ugly initialization
        // forced to postpone as much as possible the instance lookup
        // if not needed

        String name = "cfg";

        if (!hasBinding(args, name, Configuration.class)) {
            putIfAbsent(args, name, detectCfg(name));
        }

        Configuration cfg = (Configuration) args.get(name);

        name = "hdfsRL";

        if (!hasBinding(args, name, HdfsResourceLoader.class)) {
            putIfAbsent(args, name, detectHdfsRL(name, cfg));
        }

        name = "fs";

        if (!hasBinding(args, name, FileSystem.class)) {
            putIfAbsent(args, name, detectFS(name, cfg));
        }

        FileSystem fs = (FileSystem) args.get(name);

        name = "distcp";

        if (!hasBinding(args, name, DistCp.class)) {
            if (cfg == null) {
                log.warn(String.format(
                        "No Hadoop Configuration detected; not binding DistCp as variable '%s' to script", name));
            } else {
                putIfAbsent(args, name, new DistCp(cfg));
            }
        }

        name = "fsh";

        if (!hasBinding(args, name, FsShell.class)) {
            if (cfg == null) {
                log.warn(String.format(
                        "No Hadoop Configuration detected; not binding FsShell as variable '%s' to script", name));
            } else {
                putIfAbsent(args, name, new FsShell(cfg, fs));
            }
        }

        putIfAbsent(args, "cl", ctx.getClassLoader());
        putIfAbsent(args, "ctxRL", ctx);
        putIfAbsent(args, "ctx", ctx);
    }

    private boolean hasBinding(Map<String, Object> args, String key, Class<?> type) {
        if (args.containsKey(key)) {
            Assert.isInstanceOf(type, args.get(key), "Invalid property '" + key + "' ");
        }
        return false;
    }

    private Configuration detectCfg(String variableName) {
        if (configuration != null) {
            return configuration;
        }

        String defaultName = "hadoopConfiguration";
        Class<Configuration> defaultType = Configuration.class;

        if (ctx.containsBean(defaultName))
            return ctx.getBean(defaultName, defaultType);
        String[] names = BeanFactoryUtils.beanNamesForTypeIncludingAncestors(ctx, defaultType);
        if (names != null && names.length == 1) {
            return ctx.getBean(names[0], defaultType);
        }

        log.warn(String.format(
                "No Hadoop Configuration detected; not binding Configuration as variable '%s' to script",
                variableName));
        return null;
    }

    private HdfsResourceLoader detectHdfsRL(String variableName, Configuration cfg) {
        String defaultName = "hadoopResourceLoader";
        Class<HdfsResourceLoader> defaultType = HdfsResourceLoader.class;

        if (ctx.containsBean(defaultName))
            return ctx.getBean(defaultName, defaultType);
        String[] names = BeanFactoryUtils.beanNamesForTypeIncludingAncestors(ctx, defaultType);
        if (names != null && names.length == 1) {
            return ctx.getBean(names[0], defaultType);
        }

        // sanity check
        if (cfg == null) {
            log.warn(String.format(
                    "No Hadoop Configuration or ResourceLoader detected; not binding variable '%s' to script",
                    variableName));
            return null;
        }

        // create one instance
        return new HdfsResourceLoader(cfg);
    }

    private Object detectFS(String variableName, Configuration detectedCfg) {
        String defaultName = "hadoopFs";
        Class<?> defaultType = FileSystem.class;

        if (ctx.containsBean(defaultName)) {
            FileSystem fs = (FileSystem) ctx.getBean(defaultName, defaultType);
            return (fs instanceof SimplerFileSystem ? fs : new SimplerFileSystem(fs));
        }

        String[] names = BeanFactoryUtils.beanNamesForTypeIncludingAncestors(ctx, defaultType);
        if (names != null && names.length == 1) {
            return ctx.getBean(names[0], defaultType);
        }

        // sanity check
        if (detectedCfg == null) {
            log.warn(String.format(
                    "No Hadoop Configuration or FileSystem detected; not binding variable '%s' to script",
                    variableName));
            return null;
        }

        try {
            FileSystem fs = FileSystem.get(detectedCfg);
            return (fs instanceof SimplerFileSystem ? fs : new SimplerFileSystem(fs));
        } catch (IOException ex) {
            log.warn(String.format("Cannot create HDFS file system'; not binding variable '%s' to script",
                    defaultName, defaultType, variableName), ex);
        }

        return null;
    }

    private void putIfAbsent(Map<String, Object> arguments, String key, Object value) {
        if (value != null && !arguments.containsKey(key)) {
            arguments.put(key, value);
        }
    }

    @Override
    public void afterPropertiesSet() throws Exception {
        super.afterPropertiesSet();
        Assert.notNull(ctx, "an ApplicationContext is required");
    }

    @Override
    public void setApplicationContext(ApplicationContext applicationContext) throws BeansException {
        this.ctx = applicationContext;
    }

    /**
     * Sets the Hadoop configuration to use.
     * 
     * @param configuration The configuration to set.
     */
    public void setConfiguration(Configuration configuration) {
        this.configuration = configuration;
    }

}