voldemort.store.readwrite.mr.HadoopRWStoreJobRunner.java Source code

Introduction

Here is the source code for voldemort.store.readwrite.mr.HadoopRWStoreJobRunner.java
Source

/*
 * Copyright 2010 LinkedIn, Inc
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package voldemort.store.readwrite.mr;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import joptsimple.OptionParser;
import joptsimple.OptionSet;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.jdom.JDOMException;

import voldemort.cluster.Cluster;
import voldemort.server.VoldemortConfig;
import voldemort.store.StoreDefinition;
import voldemort.utils.CmdUtils;
import voldemort.utils.ReflectUtils;
import voldemort.xml.ClusterMapper;
import voldemort.xml.StoreDefinitionsMapper;

import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableCollection;

/**
 * A runner class to facilitate the launching of HadoopStoreBuilder from the
 * command-line.
 */
@SuppressWarnings("deprecation")
public class HadoopRWStoreJobRunner extends Configured implements Tool {

    private static void printUsage(OptionParser parser, Exception e) throws IOException {
        System.err.println("Usage: $VOLDEMORT_HOME/bin/hadoop-build-readwrite-store.sh \\");
        System.err.println("          [genericOptions] [options]\n");
        System.err.println("Options:");
        parser.printHelpOn(System.err);
        System.err.println();
        ToolRunner.printGenericCommandUsage(System.err);

        if (e != null) {
            System.err.println("\nAn exception ocurred:");
            e.printStackTrace(System.err);
        }
    }

    private static OptionParser configureParser() {
        OptionParser parser = new OptionParser();
        parser.accepts("input", "input file(s) for the Map step.").withRequiredArg();
        parser.accepts("temp", "temporary directory for the Reduce dump.").withRequiredArg();
        parser.accepts("mapper", "store builder mapper class.").withRequiredArg();
        parser.accepts("cluster", "local path to cluster.xml.").withRequiredArg();
        parser.accepts("storedefinitions", "local path to stores.xml.").withRequiredArg();
        parser.accepts("storename", "store name from store definition.").withRequiredArg();
        parser.accepts("inputformat", "JavaClassName (default=text).").withRequiredArg();
        parser.accepts("reducerspernode", "number of reducers per node (default=1)").withRequiredArg()
                .ofType(Integer.class);
        parser.accepts("jar", "mapper class jar if not in $HADOOP_CLASSPATH.").withRequiredArg();
        parser.accepts("vectornodeid", "node id whose vector clock to set (default=master)").withRequiredArg()
                .ofType(Integer.class);
        parser.accepts("vectorversion", "version of vector clock (default=1)").withRequiredArg().ofType(Long.class);
        parser.accepts("help", "print usage information");
        return parser;
    }

    @SuppressWarnings("unchecked")
    public int run(String[] args) throws Exception {

        OptionParser parser = configureParser();
        OptionSet options = parser.parse(args);

        if (options.has("help")) {
            printUsage(parser, null);
            System.exit(0);
        }

        Set<String> missing = CmdUtils.missing(options, "input", "temp", "mapper", "cluster", "storedefinitions",
                "storename");

        if (missing.size() > 0) {
            System.err.println("Missing required arguments: " + Joiner.on(", ").join(missing) + "\n");
            printUsage(parser, null);
            System.exit(1);
        }

        File clusterFile = new File((String) options.valueOf("cluster"));
        Cluster cluster = new ClusterMapper().readCluster(new BufferedReader(new FileReader(clusterFile)));
        File storeDefFile = new File((String) options.valueOf("storedefinitions"));
        String storeName = (String) options.valueOf("storename");
        Path inputPath = new Path((String) options.valueOf("input"));
        Path tempPath = new Path((String) options.valueOf("temp"));

        List<StoreDefinition> stores;
        stores = new StoreDefinitionsMapper().readStoreList(new BufferedReader(new FileReader(storeDefFile)));
        StoreDefinition storeDef = null;
        for (StoreDefinition def : stores) {
            if (def.getName().equals(storeName))
                storeDef = def;
        }

        if (storeDef == null) {
            System.err.println("Missing store definition for store name '" + storeName + "'");
            printUsage(parser, null);
            System.exit(1);
        }

        List<String> addJars = new ArrayList<String>();
        ClassLoader cl = Thread.currentThread().getContextClassLoader();
        if (options.has("jar")) {
            String jar = (String) options.valueOf("jar");
            URL[] urls = new URL[1];
            urls[0] = new File(jar).toURI().toURL();
            cl = new URLClassLoader(urls);
            addJars.add(jar);
        }

        Class<? extends AbstractRWHadoopStoreBuilderMapper<?, ?>> mapperClass = (Class<? extends AbstractRWHadoopStoreBuilderMapper<?, ?>>) ReflectUtils
                .loadClass((String) options.valueOf("mapper"), cl);

        Class<? extends InputFormat<?, ?>> inputFormatClass = TextInputFormat.class;
        if (options.has("inputformat")) {
            String inputFormatClassName = (String) options.valueOf("inputformat");
            if (!inputFormatClassName.equalsIgnoreCase("TextInputFormat")) {
                inputFormatClass = (Class<? extends InputFormat<?, ?>>) ReflectUtils.loadClass(inputFormatClassName,
                        cl);
            }
        }

        if (inputFormatClass == null) {
            inputFormatClass = TextInputFormat.class;
        }

        int vectorNodeId;
        if (options.has("vectornodeid")) {
            vectorNodeId = (Integer) options.valueOf("vectornodeid");
        } else {
            vectorNodeId = -1; // To denote master
        }

        long vectorNodeVersion;
        if (options.has("vectorversion")) {
            vectorNodeVersion = (Long) options.valueOf("vectorversion");
        } else {
            vectorNodeVersion = 1L;
        }

        int reducersPerNode;
        if (options.has("reducerspernode")) {
            reducersPerNode = (Integer) options.valueOf("reducerspernode");
        } else {
            reducersPerNode = 1;
        }

        Configuration conf = getConf();

        Class[] deps = new Class[] { ImmutableCollection.class, JDOMException.class, VoldemortConfig.class,
                HadoopRWStoreJobRunner.class, mapperClass, com.google.protobuf.Message.class,
                org.jdom.Content.class, com.google.common.collect.ImmutableList.class,
                org.apache.commons.io.IOUtils.class };

        addDepJars(conf, deps, addJars);

        HadoopRWStoreBuilder builder = new HadoopRWStoreBuilder(conf, mapperClass, inputFormatClass, cluster,
                storeDef, reducersPerNode, vectorNodeId, vectorNodeVersion, tempPath, inputPath);

        builder.build();
        return 0;
    }

    public static String findInClasspath(String className) {
        return findInClasspath(className, HadoopRWStoreJobRunner.class.getClassLoader());
    }

    /**
     * @return a jar file path or a base directory or null if not found.
     */
    public static String findInClasspath(String className, ClassLoader loader) {

        String relPath = className;
        relPath = relPath.replace('.', '/');
        relPath += ".class";
        java.net.URL classUrl = loader.getResource(relPath);

        String codePath;
        if (classUrl != null) {
            boolean inJar = classUrl.getProtocol().equals("jar");
            codePath = classUrl.toString();
            if (codePath.startsWith("jar:")) {
                codePath = codePath.substring("jar:".length());
            }
            if (codePath.startsWith("file:")) { // can have both
                codePath = codePath.substring("file:".length());
            }
            if (inJar) {
                // A jar spec: remove class suffix in
                // /path/my.jar!/package/Class
                int bang = codePath.lastIndexOf('!');
                codePath = codePath.substring(0, bang);
            } else {
                // A class spec: remove the /my/package/Class.class portion
                int pos = codePath.lastIndexOf(relPath);
                if (pos == -1) {
                    throw new IllegalArgumentException(
                            "invalid codePath: className=" + className + " codePath=" + codePath);
                }
                codePath = codePath.substring(0, pos);
            }
        } else {
            codePath = null;
        }
        return codePath;
    }

    public static void addDepJars(Configuration conf, Class<?>[] deps, List<String> additionalJars)
            throws IOException {
        FileSystem localFs = FileSystem.getLocal(conf);
        Set<String> depJars = new HashSet<String>();
        for (Class<?> dep : deps) {
            String tmp = findInClasspath(dep.getCanonicalName());
            if (tmp != null) {
                Path path = new Path(tmp);
                depJars.add(path.makeQualified(localFs).toString());
            }
        }

        for (String additional : additionalJars) {
            Path path = new Path(additional);
            depJars.add(path.makeQualified(localFs).toString());
        }

        String[] tmpjars = conf.get("tmpjars", "").split(",");
        for (String tmpjar : tmpjars) {
            if (!StringUtils.isEmpty(tmpjar)) {
                depJars.add(tmpjar.trim());
            }
        }
        conf.set("tmpjars", StringUtils.join(depJars.iterator(), ','));
    }

    public static void main(String[] args) {
        int res;
        try {
            res = ToolRunner.run(new Configuration(), new HadoopRWStoreJobRunner(), args);
            System.exit(res);
        } catch (Exception e) {
            e.printStackTrace();
            System.err.print("\nTry '--help' for more information.");
            System.exit(1);
        }
    }
}