org.pentaho.hadoop.mapreduce.PentahoMapReduceBase.java Source code

Java tutorial

Introduction

Here is the source code for org.pentaho.hadoop.mapreduce.PentahoMapReduceBase.java

Source

/*******************************************************************************
 *
 * Pentaho Big Data
 *
 * Copyright (C) 2002-2017 by Hitachi Vantara : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/

package org.pentaho.hadoop.mapreduce;

import com.thoughtworks.xstream.XStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Reporter;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.logging.LogLevel;
import org.pentaho.di.core.row.RowMeta;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.variables.VariableSpace;
import org.pentaho.di.core.variables.Variables;
import org.pentaho.di.trans.RowProducer;
import org.pentaho.di.trans.Trans;
import org.pentaho.hadoop.mapreduce.converter.spi.ITypeConverter;

import java.io.IOException;
import java.util.Iterator;
import java.util.Map.Entry;
import java.util.UUID;

import static org.pentaho.hadoop.shim.api.Configuration.STRING_COMBINE_SINGLE_THREADED;
import static org.pentaho.hadoop.shim.api.Configuration.STRING_REDUCE_SINGLE_THREADED;

@SuppressWarnings("deprecation")
public class PentahoMapReduceBase<K, V> extends MapReduceBase {

    protected static enum Counter {
        INPUT_RECORDS, OUTPUT_RECORDS, OUT_RECORD_WITH_NULL_KEY, OUT_RECORD_WITH_NULL_VALUE
    }

    private final String ENVIRONMENT_VARIABLE_PREFIX = "java.system.";
    private final String KETTLE_VARIABLE_PREFIX = "KETTLE_";
    protected String transMapXml;
    protected String transCombinerXml;
    protected String transReduceXml;

    protected String mapInputStepName;
    protected String combinerInputStepName;
    protected String reduceInputStepName;

    protected String mapOutputStepName;
    protected String combinerOutputStepName;
    protected String reduceOutputStepName;

    protected VariableSpace variableSpace = null;

    protected Class<K> outClassK;
    protected Class<V> outClassV;

    protected String id = UUID.randomUUID().toString();

    protected boolean debug = false;

    protected LogLevel logLevel;

    //  the transformation that will be used as a mapper or reducer
    protected Trans trans;

    //  One of these is what trans is to be used as
    public static enum MROperations {
        Map, Combine, Reduce
    }

    //  we set this to what this object is being used for - map or reduce
    protected MROperations mrOperation;

    protected OutputCollectorRowListener<K, V> rowCollector;
    protected boolean combineSingleThreaded;
    protected boolean reduceSingleThreaded;

    public PentahoMapReduceBase() throws KettleException {
    }

    @SuppressWarnings("unchecked")
    @Override
    public void configure(JobConf job) {
        super.configure(job);

        debug = "true".equalsIgnoreCase(job.get("debug")); //$NON-NLS-1$

        transMapXml = job.get("transformation-map-xml");
        transCombinerXml = job.get("transformation-combiner-xml");
        transReduceXml = job.get("transformation-reduce-xml");
        mapInputStepName = job.get("transformation-map-input-stepname");
        mapOutputStepName = job.get("transformation-map-output-stepname");
        combinerInputStepName = job.get("transformation-combiner-input-stepname");
        combinerOutputStepName = job.get("transformation-combiner-output-stepname");
        combineSingleThreaded = isCombinerSingleThreaded(job);
        reduceInputStepName = job.get("transformation-reduce-input-stepname");
        reduceOutputStepName = job.get("transformation-reduce-output-stepname");
        reduceSingleThreaded = isReducerSingleThreaded(job);
        String xmlVariableSpace = job.get("variableSpace");

        if (!Const.isEmpty(xmlVariableSpace)) {
            setDebugStatus("PentahoMapReduceBase. variableSpace was retrieved from the job.  The contents: ");

            //  deserialize from xml to variable space
            XStream xStream = new XStream();

            if (xStream != null) {
                setDebugStatus("PentahoMapReduceBase: Setting classes variableSpace property.: ");
                variableSpace = (VariableSpace) xStream.fromXML(xmlVariableSpace);

                for (String variableName : variableSpace.listVariables()) {
                    if (variableName.startsWith(KETTLE_VARIABLE_PREFIX)) {
                        System.setProperty(variableName, variableSpace.getVariable(variableName));
                    }
                }
            }
        } else {
            setDebugStatus(
                    "PentahoMapReduceBase: The PDI Job's variable space was not found in the job configuration.");
            variableSpace = new Variables();
        }

        // Check for environment variables in the userDefined variables
        Iterator<Entry<String, String>> iter = job.iterator();
        while (iter.hasNext()) {
            Entry<String, String> entry = iter.next();
            if (entry.getKey().startsWith(ENVIRONMENT_VARIABLE_PREFIX)) {
                System.setProperty(entry.getKey().substring(ENVIRONMENT_VARIABLE_PREFIX.length()),
                        entry.getValue());
            } else if (entry.getKey().startsWith(KETTLE_VARIABLE_PREFIX)) {
                System.setProperty(entry.getKey(), entry.getValue());
            }
        }

        MRUtil.passInformationToTransformation(variableSpace, job);

        switch (mrOperation) {
        case Combine:
            outClassK = (Class<K>) job.getMapOutputKeyClass();
            outClassV = (Class<V>) job.getMapOutputValueClass();
            break;
        case Reduce:
            outClassK = (Class<K>) job.getOutputKeyClass();
            outClassV = (Class<V>) job.getOutputValueClass();
            break;
        default:
            throw new IllegalArgumentException("Unsupported MapReduce operation: " + mrOperation);
        }

        if (debug) {
            System.out.println("Job configuration>");
            System.out.println("Output key class: " + outClassK.getName());
            System.out.println("Output value class: " + outClassV.getName());
        }

        //  set the log level to what the level of the job is
        String stringLogLevel = job.get("logLevel");
        if (!Const.isEmpty(stringLogLevel)) {
            logLevel = LogLevel.valueOf(stringLogLevel);
            setDebugStatus("Log level set to " + stringLogLevel);
        } else {
            System.out.println(
                    "Could not retrieve the log level from the job configuration.  logLevel will not be set.");
        }

        createTrans(job);
    }

    @Override
    public void close() throws IOException {
        super.close();
    }

    @Deprecated
    /**
     * Use the other injectValue method - The paramters have been arranged to be more uniform
     */
    public void injectValue(Object key, ITypeConverter inConverterK, ITypeConverter inConverterV,
            RowMeta injectorRowMeta, RowProducer rowProducer, Object value, Reporter reporter) throws Exception {
        injectValue(key, inConverterK, value, inConverterV, injectorRowMeta, rowProducer, reporter);
    }

    public void injectValue(Object key, ITypeConverter inConverterK, Object value, ITypeConverter inConverterV,
            RowMetaInterface injectorRowMeta, RowProducer rowProducer, Reporter reporter) throws Exception {

        injectValue(key, 0, inConverterK, value, 1, inConverterV, injectorRowMeta, rowProducer, reporter);
    }

    public void injectValue(Object key, int keyOrdinal, ITypeConverter inConverterK, Object value, int valueOrdinal,
            ITypeConverter inConverterV, RowMetaInterface injectorRowMeta, RowProducer rowProducer,
            Reporter reporter) throws Exception {
        Object[] row = new Object[injectorRowMeta.size()];
        row[keyOrdinal] = inConverterK != null ? inConverterK.convert(injectorRowMeta.getValueMeta(keyOrdinal), key)
                : key;
        row[valueOrdinal] = inConverterV != null
                ? inConverterV.convert(injectorRowMeta.getValueMeta(valueOrdinal), value)
                : value;

        if (debug) {
            setDebugStatus(reporter,
                    "Injecting input record [" + row[keyOrdinal] + "] - [" + row[valueOrdinal] + "]");
        }

        rowProducer.putRow(injectorRowMeta, row);
    }

    protected void createTrans(final Configuration conf) {

        if (mrOperation == null) {
            throw new RuntimeException(
                    "Map or reduce operation has not been specified.  Call setMRType from implementing classes constructor.");
        }

        try {
            if (mrOperation.equals(MROperations.Map)) {
                setDebugStatus("Creating a transformation for a map.");
                trans = MRUtil.getTrans(conf, transMapXml, false);
            } else if (mrOperation.equals(MROperations.Combine)) {
                setDebugStatus("Creating a transformation for a combiner.");
                trans = MRUtil.getTrans(conf, transCombinerXml, isCombinerSingleThreaded(conf));
            } else if (mrOperation.equals(MROperations.Reduce)) {
                setDebugStatus("Creating a transformation for a reduce.");
                trans = MRUtil.getTrans(conf, transReduceXml, isReducerSingleThreaded(conf));
            }
        } catch (KettleException ke) {
            throw new RuntimeException("Error loading transformation for " + mrOperation, ke); //$NON-NLS-1$
        }

    }

    private boolean isCombinerSingleThreaded(final Configuration conf) {
        return "true".equalsIgnoreCase(conf.get(STRING_COMBINE_SINGLE_THREADED));
    }

    private boolean isReducerSingleThreaded(final Configuration conf) {
        return "true".equalsIgnoreCase(conf.get(STRING_REDUCE_SINGLE_THREADED));
    }

    public void setMRType(MROperations mrOperation) {
        this.mrOperation = mrOperation;
    }

    public String getTransMapXml() {
        return transMapXml;
    }

    public void setTransMapXml(String transMapXml) {
        this.transMapXml = transMapXml;
    }

    public String getTransCombinerXml() {
        return transCombinerXml;
    }

    public void setCombinerMapXml(String transCombinerXml) {
        this.transCombinerXml = transCombinerXml;
    }

    public String getTransReduceXml() {
        return transReduceXml;
    }

    public void setTransReduceXml(String transReduceXml) {
        this.transReduceXml = transReduceXml;
    }

    public String getMapInputStepName() {
        return mapInputStepName;
    }

    public void setMapInputStepName(String mapInputStepName) {
        this.mapInputStepName = mapInputStepName;
    }

    public String getMapOutputStepName() {
        return mapOutputStepName;
    }

    public void setMapOutputStepName(String mapOutputStepName) {
        this.mapOutputStepName = mapOutputStepName;
    }

    public String getCombinerInputStepName() {
        return combinerInputStepName;
    }

    public void setCombinerInputStepName(String combinerInputStepName) {
        this.combinerInputStepName = combinerInputStepName;
    }

    public String getCombinerOutputStepName() {
        return combinerOutputStepName;
    }

    public void setCombinerOutputStepName(String combinerOutputStepName) {
        this.combinerOutputStepName = combinerOutputStepName;
    }

    public String getReduceInputStepName() {
        return reduceInputStepName;
    }

    public void setReduceInputStepName(String reduceInputStepName) {
        this.reduceInputStepName = reduceInputStepName;
    }

    public String getReduceOutputStepName() {
        return reduceOutputStepName;
    }

    public void setReduceOutputStepName(String reduceOutputStepName) {
        this.reduceOutputStepName = reduceOutputStepName;
    }

    public Class<?> getOutClassK() {
        return outClassK;
    }

    public void setOutClassK(Class<K> outClassK) {
        this.outClassK = outClassK;
    }

    public Class<?> getOutClassV() {
        return outClassV;
    }

    public void setOutClassV(Class<V> outClassV) {
        this.outClassV = outClassV;
    }

    public Trans getTrans() {
        return trans;
    }

    public void setTrans(Trans trans) {
        this.trans = trans;
    }

    public String getId() {
        return id;
    }

    public void setId(String id) {
        this.id = id;
    }

    public Exception getException() {
        return rowCollector != null ? rowCollector.getException() : null;
    }

    public void setDebugStatus(Reporter reporter, String message) {
        if (debug) {
            System.out.println(message);
            reporter.setStatus(message);
        }
    }

    private void setDebugStatus(String message) {
        if (debug) {
            System.out.println(message);
        }
    }
}