com.cloudera.branchreduce.BranchReduceJob.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudera.branchreduce.BranchReduceJob.java

Source

/**
 * Copyright (c) 2012, Cloudera, Inc. All Rights Reserved.
 *
 * Cloudera, Inc. licenses this file to you under the Apache License,
 * Version 2.0 (the "License"). You may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied. See the License for
 * the specific language governing permissions and limitations under the
 * License.
 */
package com.cloudera.branchreduce;

import java.io.File;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.ReflectionUtils;

import com.cloudera.branchreduce.impl.distributed.TaskSupplier;
import com.cloudera.branchreduce.impl.local.SingleThreadedBranchReduceEngine;
import com.cloudera.branchreduce.impl.thrift.LocalRealisticBranchReduceEngine;
import com.cloudera.branchreduce.impl.thrift.ThriftBranchReduceEngine;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;

/**
 *
 */
public class BranchReduceJob<T extends Writable, G extends GlobalState<G>> implements BranchReduceConfig {

    private static final Log LOG = LogFactory.getLog(BranchReduceJob.class);

    private static final String DEFAULT_KITTEN_CONFIG_FILE = "lua/branchreduce.lua";

    private final Configuration conf;
    private final boolean runLocally;

    private final Map<String, Object> env;
    private final Map<String, String> resources;
    private String kittenConfigFile;
    private String kittenJob = "branchreduce";

    public BranchReduceJob(boolean runLocally) {
        this(runLocally, new Configuration(), DEFAULT_KITTEN_CONFIG_FILE);
    }

    public BranchReduceJob(boolean runLocally, Configuration conf) {
        this(runLocally, conf, DEFAULT_KITTEN_CONFIG_FILE);
    }

    public BranchReduceJob(boolean runLocally, Configuration conf, String kittenConfigFile) {
        this.conf = conf;
        this.env = Maps.newHashMap();
        this.resources = Maps.newHashMap();
        this.runLocally = runLocally;
        this.kittenConfigFile = kittenConfigFile;
        initEnv();
    }

    private void initEnv() {
        env.put(NUM_THREADS, 1);
        env.put(NUM_WORKERS, 2);
        env.put(MEMORY_PER_TASK, 512);
        env.put(JOB_NAME, "BranchReduce Job");
    }

    public Configuration getConfiguration() {
        return conf;
    }

    public BranchReduceJob<T, G> addFileResource(File file) {
        return addResource(file.getName(), file.getAbsolutePath());
    }

    public BranchReduceJob<T, G> addResource(String name, String filePath) {
        this.resources.put(name, filePath);
        return this;
    }

    public BranchReduceJob<T, G> setHadoopHome(String hadoopHome) {
        env.put(HADOOP_HOME, hadoopHome);
        return this;
    }

    public BranchReduceJob<T, G> setJarByClass(Class<?> clazz) {
        String jarPath = JobConf.findContainingJar(clazz);
        if (jarPath != null) {
            File jarFile = new File(jarPath);
            env.put(BRANCH_REDUCE_JAR_PATH, jarPath);
            env.put(BRANCH_REDUCE_JAR, jarFile.getName());
        } else {
            LOG.warn("branch reduce jar file not set; class files may not be found");
        }
        return this;
    }

    public BranchReduceJob<T, G> setProcessorClass(Class<? extends Processor<T, G>> processorClass) {
        conf.setClass(PROCESSOR_CLASS, processorClass, Processor.class);
        return this;
    }

    public BranchReduceJob<T, G> setTaskClass(Class<T> taskClass) {
        conf.setClass(TASK_CLASS, taskClass, Writable.class);
        return this;
    }

    public BranchReduceJob<T, G> setGlobalStateClass(Class<G> globalStateClass) {
        conf.setClass(GLOBAL_STATE_CLASS, globalStateClass, GlobalState.class);
        return this;
    }

    public BranchReduceJob<T, G> setTaskSupplierClass(Class<? extends TaskSupplier<T, G>> taskSupplierClass) {
        conf.setClass(TASK_SUPPLIER_CLASS, taskSupplierClass, TaskSupplier.class);
        return this;
    }

    public BranchReduceJob<T, G> setNumWorkers(int numWorkers) {
        Preconditions.checkArgument(numWorkers > 0);
        env.put(NUM_WORKERS, numWorkers);
        return this;
    }

    public BranchReduceJob<T, G> setNumThreads(int numThreads) {
        Preconditions.checkArgument(numThreads > 0);
        env.put(NUM_THREADS, numThreads);
        return this;
    }

    public BranchReduceJob<T, G> setMemory(int megabytes) {
        Preconditions.checkArgument(megabytes > 0);
        env.put(MEMORY_PER_TASK, megabytes);
        return this;
    }

    public BranchReduceJob<T, G> setJobName(String jobName) {
        env.put(JOB_NAME, jobName);
        return this;
    }

    public boolean isLocal() {
        return runLocally;
    }

    public BranchReduceContext<T, G> submit() {
        BranchReduceEngine engine = null;
        if (runLocally) {
            int threads = (Integer) env.get(NUM_THREADS);
            engine = threads == 1 ? new SingleThreadedBranchReduceEngine()
                    : new LocalRealisticBranchReduceEngine(threads, conf);
        } else {
            engine = new ThriftBranchReduceEngine(kittenConfigFile, kittenJob, env, resources);
        }

        return engine.run(this);
    }

    @SuppressWarnings("unchecked")
    public Class<G> getGlobalStateClass() {
        return (Class<G>) conf.getClass(GLOBAL_STATE_CLASS, null);
    }

    @SuppressWarnings("unchecked")
    public Class<T> getTaskClass() {
        return (Class<T>) conf.getClass(TASK_CLASS, null);
    }

    @SuppressWarnings("unchecked")
    public Processor<T, G> constructProcessor() {
        return (Processor<T, G>) ReflectionUtils.newInstance(conf.getClass(PROCESSOR_CLASS, null), conf);
    }

    @SuppressWarnings("unchecked")
    public T constructInitialTask() {
        return (T) ReflectionUtils.newInstance(conf.getClass(TASK_CLASS, null), conf);
    }

    @SuppressWarnings("unchecked")
    public TaskSupplier<T, G> constructTaskSupplier() {
        return (TaskSupplier<T, G>) ReflectionUtils
                .newInstance(conf.getClass(TASK_SUPPLIER_CLASS, DEFAULT_TASK_SUPPLIER_CLASS), conf);
    }

    @SuppressWarnings("unchecked")
    public G constructGlobalState() {
        return (G) ReflectionUtils.newInstance(conf.getClass(GLOBAL_STATE_CLASS, null), conf);
    }
}