com.ery.hadoop.mrddx.hive.HiveOutputFormat.java Source code

Java tutorial

Introduction

Here is the source code for com.ery.hadoop.mrddx.hive.HiveOutputFormat.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.ery.hadoop.mrddx.hive;

import java.io.IOException;
import java.sql.Connection;
import java.sql.SQLException;
import java.sql.Statement;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import com.ery.hadoop.mrddx.DBGroupReducer;
import com.ery.hadoop.mrddx.DBPartitionReducer;
import com.ery.hadoop.mrddx.DBRecord;
import com.ery.hadoop.mrddx.DBReducer;
import com.ery.hadoop.mrddx.IHandleFormat;
import com.ery.hadoop.mrddx.MRConfiguration;
import com.ery.hadoop.mrddx.db.mapreduce.FileWritable;
import com.ery.hadoop.mrddx.log.MRLog;
import com.ery.hadoop.mrddx.util.HDFSUtils;

/**
 * Hive?
 * 
    
    
    
 * @createDate 2013-1-18
 * @version v1.0
 * @param <K>
 * @param <V>
 */
@InterfaceAudience.Public
@InterfaceStability.Stable
public class HiveOutputFormat<K extends FileWritable, V> extends FileOutputFormat<K, NullWritable>
        implements IHandleFormat {
    // 
    private static final Log LOG = LogFactory.getLog(HiveOutputFormat.class);

    @Override
    public RecordWriter<K, NullWritable> getRecordWriter(TaskAttemptContext context)
            throws IOException, InterruptedException {
        return new HiveRecordWriter<K, NullWritable>(context, this);
        //
        // // public RecordWriter<K, NullWritable> getRecordWriter(FileSystem
        // // ignored, JobConf job, String name,
        // // Progressable progress) throws IOException {
        // HiveConfiguration hiveConf = new
        // HiveConfiguration(context.getConfiguration());
        //
        // String fieldSeparator = hiveConf.getOutputHiveFileFieldSplitChars();
        // String rowSeparator = hiveConf.getOutputHiveFileRowsSplitChars();
        // String[] fieldNames = hiveConf.getOutputFieldNames();
        // boolean isCompressed = hiveConf.getOutputHiveCompress();
        //
        // // 
        // if (!isCompressed) {
        // // Path file = FileOutputFormat.getTaskOutputPath(context);
        // String name = context.getConfiguration().get(name, "outData");
        // String extension = ".data";
        // Path file = null;
        // file = FileOutputFormat.getPathForWorkFile((TaskInputOutputContext)
        // context, name, extension);
        // file = this.getDefaultWorkFile(context, extension);
        //
        // FileSystem fs = FileSystem.get(context.getConfiguration());//
        // file.getFileSystem(job);
        // FSDataOutputStream fileOut = fs.create(file, context);
        //
        // }
        //
        // // ?
        // // ?
        // String compresseCodec = hiveConf.getOutputHiveCompressCodec();
        // CompressionCodec codec = HDFSUtils.getCompressCodec(compresseCodec,
        // context.getConfiguration());
        // String name =
        // context.getConfiguration().get(getOutputHiveOrderFileNamePrefix,
        // "outData");
        // String extension = codec.getDefaultExtension();
        // Path file = null;
        // file = FileOutputFormat.getPathForWorkFile((TaskInputOutputContext)
        // context, name, extension);
        // file = this.getDefaultWorkFile(context, extension);
        //
        // // build the filename including the extension
        // Path file = FileOutputFormat.getTaskOutputPath(job, name +
        // codec.getDefaultExtension());
        // FileSystem fs = file.getFileSystem(job);
        // FSDataOutputStream fileOut = fs.create(file, progress);
        // DataOutputStream dos = new
        // DataOutputStream(codec.createOutputStream(fileOut));
        // return new HiveRecordWriter<K, NullWritable>(job, dos,
        // fieldSeparator, rowSeparator, fieldNames);
    }

    /**
     * ???
     * 
     * @param job The job
     * @param tableName The table to insert data into
     * @param fieldNames The field names in the table.
     * @param ddlHQL require execute HQL before mapreduce running
     */
    public static void setOutputParameter(Configuration job, boolean compress, String compressCodec,
            String fieldSplitChars, String rowsSplitChars, String ddlHQL) {
        HiveConfiguration hiveConf = new HiveConfiguration(job);
        hiveConf.setOutputHiveCompress(compress);
        hiveConf.setOutputHiveCompressCodec(compressCodec);
        hiveConf.setOutputHiveFileFieldSplitChars(fieldSplitChars);
        hiveConf.setOutputHiveFileRowsSplitChars(rowsSplitChars);
        hiveConf.setOutputHiveExecuteDDLHQL(ddlHQL);
        try {
            executeDDLHQL(hiveConf);
            MRLog.info(LOG, "execute ddl hive sql success!");
        } catch (SQLException e) {
            MRLog.error(LOG, "execute ddl hive sql error!");
            e.printStackTrace();
        }
    }

    /**
     * ???
     * 
     * @param job jobconf
     * @param tableName ??
     */
    public static void setOutput(Job job, String tableName) {
        job.setOutputFormatClass(HiveOutputFormat.class);
        job.setReduceSpeculativeExecution(false);
        HiveConfiguration dbConf = new HiveConfiguration(job.getConfiguration());
        dbConf.setOutputHiveTableName(tableName);
    }

    /**
     * ddlHQL?
     * 
     * @param hiveConf hive?
     * @throws SQLException
     */
    public static void executeDDLHQL(HiveConfiguration hiveConf) throws SQLException {
        String ddls = hiveConf.getOutputHiveExecuteDDLHQL();
        if (null == ddls || ddls.trim().length() <= 0) {
            return;
        }
        String ddl[] = ddls.split(";");
        Connection conn = null;
        try {
            conn = hiveConf.getOutputConnection();
        } catch (ClassNotFoundException e) {
            MRLog.error(LOG, "create hive conn error!");
            e.printStackTrace();
        }

        Statement stat = conn.createStatement();
        for (int i = 0; i < ddl.length; i++) {
            try {
                stat.executeQuery(ddl[i]);
            } catch (Exception e) {
                MRLog.errorException(LOG, "execute ddl error, hql:" + ddl[i], e);
            }
        }

        // 
        close(conn);
    }

    /**
     * 
     * 
     * @param conn 
     */
    public static void close(Connection conn) {
        if (null != conn) {
            try {
                conn.close();
            } catch (SQLException e) {
                MRLog.error(LOG, "Close connection error!");
            }
        }
    }

    /**
     * ddlHQL?
     * 
     * @param hiveConf hive?
     * @throws SQLException
     */
    public static void executeDDLHQL(HiveConfiguration hiveConf, String ddl) throws SQLException {
        if (null == ddl || ddl.trim().length() <= 0) {
            return;
        }
        Connection conn = null;
        try {
            conn = hiveConf.getOutputConnection();
        } catch (ClassNotFoundException e) {
            MRLog.error(LOG, "create hive conn error!");
            e.printStackTrace();
        }

        Statement stat = conn.createStatement();
        try {
            stat.execute(ddl);
        } catch (Exception e) {
            MRLog.errorException(LOG, "execute ddl error, hql:" + ddl, e);
        }

        // 
        close(conn);
    }

    @Override
    public void checkOutputSpecs(JobContext context) throws IOException {

    }

    @Override
    public void handle(Job conf) throws Exception {
        /**
         * ?
         */
        HiveConfiguration hconf = new HiveConfiguration(conf.getConfiguration());
        // ?
        String outRowChars = hconf.getOutputHiveFileRowsSplitChars();
        if (null == outRowChars || outRowChars.length() <= 0) {
            String meg = "<" + HiveConfiguration.OUTPUT_HIVE_ROWS_SPLITCHARS + ">";
            MRLog.error(LOG, meg);
            throw new Exception(meg);
        }

        // 
        String outFileSplitChars = hconf.getOutputHiveFileFieldSplitChars();
        if (null == outFileSplitChars || outFileSplitChars.trim().length() <= 0) {
            String meg = "<" + HiveConfiguration.OUTPUT_HIVE_FIELD_SPLITCHARS + ">";
            MRLog.error(LOG, meg);
            throw new Exception(meg);
        }

        boolean para = hconf.getOutputHiveCompress();
        // ? (?HDFSUtils.CompressCodec)
        String outCompressCodec = hconf.getOutputHiveCompressCodec();
        if (para && !HDFSUtils.isExistCompressCodec(outCompressCodec)) {
            String meg = "[MR ERROR]?<" + HiveConfiguration.OUTPUT_HIVE_COMPRESS_CODEC
                    + ">?.";
            MRLog.error(LOG, meg);
            throw new Exception(meg);
        }

        // ?MR
        String outTargetpath = hconf.getOutputTargetFilePath();
        hconf.setOutputTargetPath(outTargetpath);
        if (null == outTargetpath || outTargetpath.trim().length() <= 0) {
            MRLog.warn(LOG,
                    "MR<" + HiveConfiguration.OUTPUT_HIVE_TARGET_PATH + ">");
        }

        // ?hive??
        String hiveUrl = hconf.getOutPutHiveConfigUrl();
        if (null == hiveUrl || hiveUrl.trim().length() <= 0) {
            String meg = "[MR ERROR]Hive??<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_URL
                    + ">?.";
            LOG.error(meg);
            throw new Exception(meg);
        }

        // hive???
        String hiveUser = hconf.getOutPutHiveConfigUser();
        if (null == hiveUser || hiveUser.trim().length() <= 0) {
            LOG.warn("[MR WARN]hive???<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_USER + ">.");
        }

        // hive??
        String hivePwd = hconf.getOutPutHiveConfigPassword();
        if (null == hivePwd || hivePwd.trim().length() <= 0) {
            LOG.warn("[MR WARN]hive??<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_PASSWORD + ">.");
        }

        // ??
        String tableName = hconf.getOutputHiveTableName();
        if (null == tableName || tableName.trim().length() <= 0) {
            String meg = "[MR ERROR]Hive??<" + HiveConfiguration.OUTPUT_TABLE + ">?.";
            LOG.error(meg);
            throw new Exception(meg);
        }

        // ??
        String partitionField[] = hconf.getOutputHivePartitionField();
        if (null != partitionField && partitionField.length > 0) {
            // 
            String[] outputFieldName = hconf.getOutputFieldNames();
            if (null == outputFieldName || outputFieldName.length <= 0) {
                String meg = "<" + MRConfiguration.SYS_OUTPUT_FIELD_NAMES_PROPERTY + ">.";
                MRLog.error(LOG, meg);
                throw new Exception(meg);
            }

            for (int i = 0; i < partitionField.length; i++) {
                boolean isExist = false;
                for (String s : outputFieldName) {
                    if (s.equals(partitionField[i])) {
                        isExist = true;
                        break;
                    }
                }

                if (!isExist) {
                    String meg = "" + partitionField[i] + "<"
                            + HiveConfiguration.OUTPUT_HIVE_PARTITION_FIELD + ">?<"
                            + MRConfiguration.SYS_OUTPUT_FIELD_NAMES_PROPERTY + "";
                    MRLog.error(LOG, meg);
                    throw new Exception(meg);
                }
            }

            String orderOutputTempPath = hconf.getOutputHiveOrderTempPath();
            if (null == orderOutputTempPath || orderOutputTempPath.trim().length() <= 0) {
                String meg = "<" + HiveConfiguration.OUTPUT_HIVE_ORDER_TEMP_PATH + ">.";
                MRLog.error(LOG, meg);
                throw new Exception(meg);
            }

            String orderOutputFileNamePrefix = hconf.getOutputHiveOrderFileNamePrefix();
            if (null == orderOutputFileNamePrefix || orderOutputFileNamePrefix.trim().length() <= 0) {
                String meg = "???<" + HiveConfiguration.OUTPUT_HIVE_ORDER_TEMP_PATH + ">.";
                MRLog.warn(LOG, meg);
            }

            long orderOutputFileMaxCount = hconf.getOutputHiveOrderFileMaxCount();
            if (orderOutputFileMaxCount == 0) {
                String meg = "?<" + HiveConfiguration.OUTPUT_HIVE_ORDER_FILEMAXCOUNT
                        + ">0 -1(??).";
                MRLog.error(LOG, meg);
                throw new Exception(meg);
            }
        }

        // DDL?
        String ddlHQL = hconf.getOutputHiveExecuteDDLHQL();
        if (null == ddlHQL || ddlHQL.trim().length() <= 0) {
            LOG.warn("[MR WARN]hive?<" + HiveConfiguration.OUTPUT_HIVE_DDL_HQL + ">.");
        }

        try {
            executeDDLHQL(hconf);
            MRLog.info(LOG, "execute ddl hive sql success!");
        } catch (SQLException e) {
            MRLog.error(LOG, "execute ddl hive sql error!");
            e.printStackTrace();
        }

        conf.setReduceSpeculativeExecution(false);
        conf.setOutputFormatClass(HiveOutputFormat.class);
        conf.setOutputKeyClass(DBRecord.class);
        conf.setOutputValueClass(NullWritable.class);
        if (null != partitionField && partitionField.length > 0) {
            conf.setCombinerClass(DBGroupReducer.class);
            conf.setReducerClass(DBPartitionReducer.class);
        } else {
            conf.setCombinerClass(DBGroupReducer.class);
            conf.setReducerClass(DBReducer.class);
        }
    }
}