org.pentaho.hadoop.shim.common.ConfigurationProxyV2.java Source code

Introduction

Here is the source code for org.pentaho.hadoop.shim.common.ConfigurationProxyV2.java
Source

/*! ******************************************************************************
 *
 * Pentaho Big Data
 *
 * Copyright (C) 2002-2017 by Hitachi Vantara : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/
package org.pentaho.hadoop.shim.common;

import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Cluster;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.security.UserGroupInformation;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.hadoop.mapreduce.YarnQueueAclsException;
import org.pentaho.hadoop.mapreduce.YarnQueueAclsVerifier;
import org.pentaho.hadoop.shim.api.Configuration;
import org.pentaho.hadoop.shim.api.mapred.RunningJob;

import java.io.IOException;

/**
 * User: Dzmitry Stsiapanau Date: 7/22/14 Time: 11:59 AM
 */
public class ConfigurationProxyV2 implements Configuration {

    protected Job job;

    public ConfigurationProxyV2() throws IOException {
        job = Job.getInstance();
        addConfigsForJobConf();
    }

    @VisibleForTesting
    void addConfigsForJobConf() {
        job.getConfiguration().addResource("hdfs-site.xml");
        job.getConfiguration().addResource("hive-site.xml");
        job.getConfiguration().addResource("hbase-site.xml");
    }

    public JobConf getJobConf() {
        return (JobConf) job.getConfiguration();
    }

    public Job getJob() {
        return job;
    }

    /**
     * Sets the MapReduce job name.
     *
     * @param jobName Name of job
     */
    @Override
    public void setJobName(String jobName) {
        getJob().setJobName(jobName);
    }

    /**
     * Sets the property {@code name}'s value to {@code value}.
     *
     * @param name  Name of property
     * @param value Value of property
     */
    @Override
    public void set(String name, String value) {
        getJobConf().set(name, value);
    }

    /**
     * Look up the value of a property.
     *
     * @param name Name of property
     * @return Value of the property named {@code name}
     */
    @Override
    public String get(String name) {
        return getJobConf().get(name);
    }

    /**
     * Look up the value of a property optionally returning a default value if the property is not set.
     *
     * @param name         Name of property
     * @param defaultValue Value to return if the property is not set
     * @return Value of property named {@code name} or {@code defaultValue} if {@code name} is not set
     */
    @Override
    public String get(String name, String defaultValue) {
        return getJobConf().get(name, defaultValue);
    }

    /**
     * Set the key class for the map output data.
     *
     * @param c the map output key class
     */
    @Override
    public void setMapOutputKeyClass(Class<?> c) {
        getJob().setMapOutputKeyClass(c);
    }

    /**
     * Set the value class for the map output data.
     *
     * @param c the map output value class
     */
    @Override
    public void setMapOutputValueClass(Class<?> c) {
        getJob().setMapOutputValueClass(c);
    }

    @SuppressWarnings("unchecked")
    @Override
    public void setMapperClass(Class<?> c) {
        if (org.apache.hadoop.mapred.Mapper.class.isAssignableFrom(c)) {
            setUseOldMapApi();
            getJobConf().setMapperClass((Class<? extends org.apache.hadoop.mapred.Mapper>) c);
        } else if (org.apache.hadoop.mapreduce.Mapper.class.isAssignableFrom(c)) {
            getJob().setMapperClass((Class<? extends org.apache.hadoop.mapreduce.Mapper>) c);
        }
    }

    private void setUseOldMapApi() {
        set("mapred.mapper.new-api", "false");
    }

    @SuppressWarnings("unchecked")
    @Override
    public void setCombinerClass(Class<?> c) {
        if (org.apache.hadoop.mapred.Reducer.class.isAssignableFrom(c)) {
            setUseOldRedApi();
            getJobConf().setCombinerClass((Class<? extends org.apache.hadoop.mapred.Reducer>) c);
        } else if (org.apache.hadoop.mapreduce.Reducer.class.isAssignableFrom(c)) {
            getJob().setCombinerClass((Class<? extends org.apache.hadoop.mapreduce.Reducer>) c);
        }
    }

    private void setUseOldRedApi() {
        set("mapred.reducer.new-api", "false");
    }

    @SuppressWarnings("unchecked")
    @Override
    public void setReducerClass(Class<?> c) {
        if (org.apache.hadoop.mapred.Reducer.class.isAssignableFrom(c)) {
            setUseOldRedApi();
            getJobConf().setReducerClass((Class<? extends org.apache.hadoop.mapred.Reducer>) c);
        } else if (org.apache.hadoop.mapreduce.Reducer.class.isAssignableFrom(c)) {
            getJob().setReducerClass((Class<? extends org.apache.hadoop.mapreduce.Reducer>) c);
        }
    }

    @Override
    public void setOutputKeyClass(Class<?> c) {
        getJob().setOutputKeyClass(c);
    }

    @Override
    public void setOutputValueClass(Class<?> c) {
        getJob().setOutputValueClass(c);
    }

    @SuppressWarnings("unchecked")
    @Override
    public void setMapRunnerClass(Class<?> c) {
        if (org.apache.hadoop.mapred.MapRunnable.class.isAssignableFrom(c)) {
            getJobConf().setMapRunnerClass((Class<? extends org.apache.hadoop.mapred.MapRunnable>) c);
        }
    }

    @SuppressWarnings("unchecked")
    @Override
    public void setInputFormat(Class<?> inputFormat) {
        if (org.apache.hadoop.mapred.InputFormat.class.isAssignableFrom(inputFormat)) {
            setUseOldMapApi();
            getJobConf().setInputFormat((Class<? extends org.apache.hadoop.mapred.InputFormat>) inputFormat);
        } else if (org.apache.hadoop.mapreduce.InputFormat.class.isAssignableFrom(inputFormat)) {
            getJob().setInputFormatClass((Class<? extends org.apache.hadoop.mapreduce.InputFormat>) inputFormat);
        }
    }

    @SuppressWarnings("unchecked")
    @Override
    public void setOutputFormat(Class<?> outputFormat) {
        if (org.apache.hadoop.mapred.OutputFormat.class.isAssignableFrom(outputFormat)) {
            setUseOldRedApi();
            if (getJobConf().getNumReduceTasks() == 0 || get("mapred.partitioner.class") != null) {
                setUseOldMapApi();
            }
            getJobConf().setOutputFormat((Class<? extends org.apache.hadoop.mapred.OutputFormat>) outputFormat);
        } else if (org.apache.hadoop.mapreduce.OutputFormat.class.isAssignableFrom(outputFormat)) {
            getJob().setOutputFormatClass((Class<? extends org.apache.hadoop.mapreduce.OutputFormat>) outputFormat);
        }
    }

    @Override
    public void setInputPaths(org.pentaho.hadoop.shim.api.fs.Path... paths) {
        if (paths == null) {
            return;
        }
        Path[] actualPaths = new Path[paths.length];
        for (int i = 0; i < paths.length; i++) {
            actualPaths[i] = ShimUtils.asPath(paths[i]);
        }
        try {
            FileInputFormat.setInputPaths(getJob(), actualPaths);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    @Override
    public void setOutputPath(org.pentaho.hadoop.shim.api.fs.Path path) {
        FileOutputFormat.setOutputPath(getJob(), ShimUtils.asPath(path));
    }

    @Override
    public void setJarByClass(Class<?> c) {
        getJob().setJarByClass(c);
    }

    @Override
    public void setJar(String url) {
        getJob().setJar(url);
    }

    /**
     * Provide a hint to Hadoop for the number of map tasks to start for the MapReduce job submitted with this
     * configuration.
     *
     * @param n the number of map tasks for this job
     */
    @Override
    public void setNumMapTasks(int n) {
        getJobConf().setNumMapTasks(n);
    }

    /**
     * Sets the requisite number of reduce tasks for the MapReduce job submitted with this configuration.  <p>If {@code n}
     * is {@code zero} there will not be a reduce (or sort/shuffle) phase and the output of the map tasks will be written
     * directly to the distributed file system under the path specified via {@link #setOutputPath(org.pentaho.hadoop
     * .shim.api.fs.Path)</p>
     *
     * @param n the number of reduce tasks required for this job
     * @param n
     */
    @Override
    public void setNumReduceTasks(int n) {
        getJob().setNumReduceTasks(n);
    }

    /**
     * Set the array of string values for the <code>name</code> property as as comma delimited values.
     *
     * @param name   property name.
     * @param values The values
     */
    @Override
    public void setStrings(String name, String... values) {
        getJobConf().setStrings(name, values);
    }

    /**
     * Get the default file system URL as stored in this configuration.
     *
     * @return the default URL if it was set, otherwise empty string
     */
    @Override
    public String getDefaultFileSystemURL() {
        return get("fs.default.name", "");
    }

    /**
     * Hack Return this configuration as was asked with provided delegate class (If it is possible).
     *
     * @param delegate class of desired return object
     * @return this configuration delegate object if possible
     */
    @Override
    public <T> T getAsDelegateConf(Class<T> delegate) {
        if (delegate.isAssignableFrom(this.getClass())) {
            return (T) this;
        } else if (delegate.isAssignableFrom(JobConf.class)) {
            return (T) getJobConf();
        } else {
            return null;
        }
    }

    /**
     * Submit job for the current configuration provided by this implementation.
     *
     * @return RunningJob implementation
     */
    @Override
    public RunningJob submit() throws IOException, ClassNotFoundException, InterruptedException {
        if (YarnQueueAclsVerifier
                .verify((createClusterDescription(getJob().getConfiguration())).getQueueAclsForCurrentUser())) {
            getJob().submit();
            return new RunningJobProxyV2(getJob());
        } else {
            throw new YarnQueueAclsException(
                    BaseMessages.getString(ConfigurationProxy.class, "ConfigurationProxy.UserHasNoPermissions",
                            UserGroupInformation.getCurrentUser().getUserName()));
        }
    }

    Cluster createClusterDescription(org.apache.hadoop.conf.Configuration configuration) throws IOException {
        return new Cluster(configuration);
    }
}