com.facebook.hiveio.common.HadoopUtils.java Source code

Java tutorial

Introduction

Here is the source code for com.facebook.hiveio.common.HadoopUtils.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.facebook.hiveio.common;

import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobConfigurable;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;

/**
 * Helpers for dealing with Hadoop
 */
public class HadoopUtils {
    /** Logger */
    private static final Logger LOG = LoggerFactory.getLogger(HadoopUtils.class);

    /** Key in Configuration for output directory */
    private static final String OUTPUT_DIR_KEY = "mapred.output.dir";

    /** Don't construct, allow inheritance */
    protected HadoopUtils() {
    }

    /**
     * Set Configuration if object is Configurable
     * @param object Object to check
     * @param conf Configuration to set
     */
    public static void setConfIfPossible(Object object, Configuration conf) {
        if (object instanceof Configurable) {
            ((Configurable) object).setConf(conf);
        }
    }

    /**
     * Set JobConf if object is JobConfigurable
     * @param object Object to check
     * @param jobConf JobConf to set
     */
    public static void setJobConfIfPossible(Object object, JobConf jobConf) {
        if (object instanceof JobConfigurable) {
            ((JobConfigurable) object).configure(jobConf);
        }
    }

    /**
     * Hack to configure InputFormats before they get used.
     * @param inputFormat InputFormat to configure
     * @param conf Configuration to use
     */
    public static void configureInputFormat(InputFormat inputFormat, Configuration conf) {
        JobConf jobConf = new JobConf(conf);
        setJobConfIfPossible(inputFormat, jobConf);
        // TextInputFormat is not always JobConfigurable, so we need to explicitly
        // call this here to make sure it gets configured with the
        // compression codecs.
        if (inputFormat instanceof TextInputFormat) {
            ((TextInputFormat) inputFormat).configure(jobConf);
        }
    }

    /**
     * Get Configuration if object is Configurable
     * @param object Object to check
     * @return Configuration if object is Configurable, else null
     */
    public static Configuration getConfIfPossible(Object object) {
        if (object instanceof Configurable) {
            return ((Configurable) object).getConf();
        }
        return null;
    }

    /**
     * Get output directory
     * @param conf Configuration to use
     * @return output directory
     */
    public static String getOutputDir(Configuration conf) {
        return conf.get(OUTPUT_DIR_KEY);
    }

    /**
     * Get path to output directory as Path
     * @param conf Configuration to use
     * @return Path to output directory
     */
    public static Path getOutputPath(Configuration conf) {
        return new Path(getOutputDir(conf));
    }

    /**
     * Set output directory to use
     * @param conf Configuration to use
     * @param path output directory
     */
    public static void setOutputDir(Configuration conf, String path) {
        conf.set(OUTPUT_DIR_KEY, path);
    }

    /**
     * Delete output directory for this job
     * @param conf Configuration to use
     * @throws IOException I/O errors
     */
    public static void deleteOutputDir(Configuration conf) throws IOException {
        Path outputPath = getOutputPath(conf);
        outputPath.getFileSystem(conf).delete(outputPath, true);
    }

    /**
     * Set number of map task attempts
     * @param conf Configuration
     * @param numAttempts number of attempts
     */
    public static void setMapAttempts(Configuration conf, int numAttempts) {
        conf.setInt("mapred.map.max.attempts", numAttempts);
    }

    /**
     * Set Hadoop Pool to use
     * @param conf Configuration
     * @param pool Hadoop pool to use
     */
    public static void setPool(Configuration conf, String pool) {
        conf.set("mapred.fairscheduler.pool", pool);
    }

    /**
     * Check if output committer needs success marker
     * @param conf Configuration to use
     * @return true if success marker required
     */
    public static boolean needSuccessMarker(Configuration conf) {
        return conf.getBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
    }

    /**
     * Set Hadoop Output Key class
     * @param conf Configuration to use
     * @param writableClass Class that is Writable
     */
    public static void setOutputKeyWritableClass(Configuration conf, Class<? extends Writable> writableClass) {
        conf.set("mapred.output.key.class", writableClass.getName());
    }

    /**
     * Set Hadoop Output Value class
     * @param conf Configuration to use
     * @param writableClass Class that is Writable
     */
    public static void setOutputValueWritableClass(Configuration conf, Class<? extends Writable> writableClass) {
        conf.get("mapred.output.value.class", writableClass.getName());
    }

    /**
     * Set worker output directory
     * @param context Task context
     * @throws IOException I/O errors
     */
    public static void setWorkOutputDir(TaskAttemptContext context) throws IOException {
        Configuration conf = context.getConfiguration();
        String outputPath = getOutputDir(conf);
        // we need to do this to get the task path and set it for mapred
        // implementation since it can't be done automatically because of
        // mapreduce->mapred abstraction
        if (outputPath != null) {
            FileOutputCommitter foc = new FileOutputCommitter(getOutputPath(conf), context);
            Path path = foc.getWorkPath();
            FileSystem fs = path.getFileSystem(conf);
            fs.mkdirs(path);
            conf.set("mapred.work.output.dir", path.toString());
            LOG.info("Setting mapred.work.output.dir to {}", path.toString());
        }
    }

    /**
     * Set MapReduce input directory
     *
     * @param conf Configuration to use
     * @param path path to set
     */
    public static void setInputDir(Configuration conf, String path) {
        conf.set("mapred.input.dir", path);
    }
}