org.apache.sqoop.mapreduce.hcat.SqoopHCatExportHelper.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.sqoop.mapreduce.hcat.SqoopHCatExportHelper.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.sqoop.mapreduce.hcat;

import java.io.IOException;
import java.math.BigDecimal;
import java.sql.Date;
import java.sql.Time;
import java.sql.Timestamp;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.DefaultStringifier;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hive.hcatalog.common.HCatConstants;
import org.apache.hive.hcatalog.common.HCatUtil;
import org.apache.hive.hcatalog.data.HCatRecord;
import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
import org.apache.hive.hcatalog.data.schema.HCatSchema;
import org.apache.hive.hcatalog.mapreduce.InputJobInfo;
import org.apache.sqoop.lib.SqoopRecord;
import org.apache.sqoop.mapreduce.ExportJobBase;
import org.apache.sqoop.mapreduce.ImportJobBase;

/**
 * Helper class for Sqoop HCat Integration export jobs.
 */
public class SqoopHCatExportHelper {
    private InputJobInfo jobInfo;
    private HCatSchema hCatFullTableSchema;
    public static final Log LOG = LogFactory.getLog(SqoopHCatExportHelper.class.getName());
    private SqoopRecord sqoopRecord;
    private boolean bigDecimalFormatString;
    private static final String TIMESTAMP_TYPE = "java.sql.Timestamp";
    private static final String TIME_TYPE = "java.sql.Time";
    private static final String DATE_TYPE = "java.sql.Date";
    private static final String BIG_DECIMAL_TYPE = "java.math.BigDecimal";
    private static final String FLOAT_TYPE = "Float";
    private static final String DOUBLE_TYPE = "Double";
    private static final String BYTE_TYPE = "Byte";
    private static final String SHORT_TYPE = "Short";
    private static final String INTEGER_TYPE = "Integer";
    private static final String LONG_TYPE = "Long";
    private static final String BOOLEAN_TYPE = "Boolean";
    private static final String STRING_TYPE = "String";
    private static final String BYTESWRITABLE = "org.apache.hadoop.io.BytesWritable";
    private static boolean debugHCatExportMapper = false;
    private MapWritable colTypesJava;
    private MapWritable colTypesSql;

    public SqoopHCatExportHelper(Configuration conf) throws IOException, InterruptedException {

        colTypesJava = DefaultStringifier.load(conf, SqoopHCatUtilities.HCAT_DB_OUTPUT_COLTYPES_JAVA,
                MapWritable.class);
        colTypesSql = DefaultStringifier.load(conf, SqoopHCatUtilities.HCAT_DB_OUTPUT_COLTYPES_SQL,
                MapWritable.class);
        // Instantiate a copy of the user's class to hold and parse the record.

        String recordClassName = conf.get(ExportJobBase.SQOOP_EXPORT_TABLE_CLASS_KEY);
        if (null == recordClassName) {
            throw new IOException(
                    "Export table class name (" + ExportJobBase.SQOOP_EXPORT_TABLE_CLASS_KEY + ") is not set!");
        }

        bigDecimalFormatString = conf.getBoolean(ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT,
                ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT_DEFAULT);

        debugHCatExportMapper = conf.getBoolean(SqoopHCatUtilities.DEBUG_HCAT_EXPORT_MAPPER_PROP, false);
        try {
            Class<?> cls = Class.forName(recordClassName, true, Thread.currentThread().getContextClassLoader());
            sqoopRecord = (SqoopRecord) ReflectionUtils.newInstance(cls, conf);
        } catch (ClassNotFoundException cnfe) {
            throw new IOException(cnfe);
        }

        if (null == sqoopRecord) {
            throw new IOException("Could not instantiate object of type " + recordClassName);
        }

        String inputJobInfoStr = conf.get(HCatConstants.HCAT_KEY_JOB_INFO);
        jobInfo = (InputJobInfo) HCatUtil.deserialize(inputJobInfoStr);
        HCatSchema tableSchema = jobInfo.getTableInfo().getDataColumns();
        HCatSchema partitionSchema = jobInfo.getTableInfo().getPartitionColumns();
        hCatFullTableSchema = new HCatSchema(tableSchema.getFields());
        for (HCatFieldSchema hfs : partitionSchema.getFields()) {
            hCatFullTableSchema.append(hfs);
        }
    }

    public SqoopRecord convertToSqoopRecord(HCatRecord hcr) throws IOException {
        Text key = new Text();
        for (Map.Entry<String, Object> e : sqoopRecord.getFieldMap().entrySet()) {
            String colName = e.getKey();
            String hfn = colName.toLowerCase();
            key.set(hfn);
            String javaColType = colTypesJava.get(key).toString();
            int sqlType = ((IntWritable) colTypesSql.get(key)).get();
            HCatFieldSchema field = hCatFullTableSchema.get(hfn);
            HCatFieldSchema.Type fieldType = field.getType();
            Object hCatVal = hcr.get(hfn, hCatFullTableSchema);
            String hCatTypeString = field.getTypeString();
            Object sqlVal = convertToSqoop(hCatVal, fieldType, javaColType, hCatTypeString);
            if (debugHCatExportMapper) {
                LOG.debug("hCatVal " + hCatVal + " of type "
                        + (hCatVal == null ? null : hCatVal.getClass().getName()) + ",sqlVal " + sqlVal
                        + " of type " + (sqlVal == null ? null : sqlVal.getClass().getName()) + ",java type "
                        + javaColType + ", sql type = " + SqoopHCatUtilities.sqlTypeString(sqlType));
            }
            sqoopRecord.setField(colName, sqlVal);
        }
        return sqoopRecord;
    }

    private Object convertToSqoop(Object val, HCatFieldSchema.Type fieldType, String javaColType,
            String hCatTypeString) throws IOException {

        if (val == null) {
            return null;
        }

        switch (fieldType) {
        case INT:
        case TINYINT:
        case SMALLINT:
        case FLOAT:
        case DOUBLE:
            val = convertNumberTypes(val, javaColType);
            if (val != null) {
                return val;
            }
            break;
        case BOOLEAN:
            val = convertBooleanTypes(val, javaColType);
            if (val != null) {
                return val;
            }
            break;
        case BIGINT:
            if (javaColType.equals(DATE_TYPE)) {
                return new Date((Long) val);
            } else if (javaColType.equals(TIME_TYPE)) {
                return new Time((Long) val);
            } else if (javaColType.equals(TIMESTAMP_TYPE)) {
                return new Timestamp((Long) val);
            } else {
                val = convertNumberTypes(val, javaColType);
                if (val != null) {
                    return val;
                }
            }
            break;
        case DATE:
            Date date = (Date) val;
            if (javaColType.equals(DATE_TYPE)) {
                return date;
            } else if (javaColType.equals(TIME_TYPE)) {
                return new Time(date.getTime());
            } else if (javaColType.equals(TIMESTAMP_TYPE)) {
                return new Timestamp(date.getTime());
            }
            break;
        case TIMESTAMP:
            Timestamp ts = (Timestamp) val;
            if (javaColType.equals(DATE_TYPE)) {
                return new Date(ts.getTime());
            } else if (javaColType.equals(TIME_TYPE)) {
                return new Time(ts.getTime());
            } else if (javaColType.equals(TIMESTAMP_TYPE)) {
                return ts;
            }
            break;
        case STRING:
        case VARCHAR:
        case CHAR:
            val = convertStringTypes(val, javaColType);
            if (val != null) {
                return val;
            }
            break;
        case BINARY:
            val = convertBinaryTypes(val, javaColType);
            if (val != null) {
                return val;
            }
            break;
        case DECIMAL:
            val = convertDecimalTypes(val, javaColType);
            if (val != null) {
                return val;
            }
            break;
        case ARRAY:
        case MAP:
        case STRUCT:
        default:
            throw new IOException("Cannot convert HCatalog type " + fieldType);
        }
        LOG.error("Cannot convert HCatalog object of " + " type " + hCatTypeString + " to java object type "
                + javaColType);
        return null;
    }

    private Object convertDecimalTypes(Object val, String javaColType) {
        HiveDecimal hd = (HiveDecimal) val;
        BigDecimal bd = hd.bigDecimalValue();

        if (javaColType.equals(BIG_DECIMAL_TYPE)) {
            return bd;
        } else if (javaColType.equals(STRING_TYPE)) {
            String bdStr = null;
            if (bigDecimalFormatString) {
                bdStr = bd.toPlainString();
            } else {
                bdStr = bd.toString();
            }
            return bdStr;
        }
        return null;
    }

    private Object convertBinaryTypes(Object val, String javaColType) {
        byte[] bb = (byte[]) val;
        if (javaColType.equals(BYTESWRITABLE)) {
            BytesWritable bw = new BytesWritable();
            bw.set(bb, 0, bb.length);
            return bw;
        }
        return null;
    }

    private Object convertStringTypes(Object val, String javaColType) {
        String valStr = val.toString();
        if (javaColType.equals(BIG_DECIMAL_TYPE)) {
            return new BigDecimal(valStr);
        } else if (javaColType.equals(DATE_TYPE) || javaColType.equals(TIME_TYPE)
                || javaColType.equals(TIMESTAMP_TYPE)) {
            // Oracle expects timestamps for Date also by default based on version
            // Just allow all date types to be assignment compatible
            if (valStr.length() == 10 && valStr.matches("^\\d{4}-\\d{2}-\\d{2}$")) {
                // Date in yyyy-mm-dd format
                Date d = Date.valueOf(valStr);
                if (javaColType.equals(DATE_TYPE)) {
                    return d;
                } else if (javaColType.equals(TIME_TYPE)) {
                    return new Time(d.getTime());
                } else if (javaColType.equals(TIMESTAMP_TYPE)) {
                    return new Timestamp(d.getTime());
                }
            } else if (valStr.length() == 8 && valStr.matches("^\\d{2}:\\d{2}:\\d{2}$")) {
                // time in hh:mm:ss
                Time t = Time.valueOf(valStr);
                if (javaColType.equals(DATE_TYPE)) {
                    return new Date(t.getTime());
                } else if (javaColType.equals(TIME_TYPE)) {
                    return t;
                } else if (javaColType.equals(TIMESTAMP_TYPE)) {
                    return new Timestamp(t.getTime());
                }
            } else if (valStr.length() >= 19 && valStr.length() <= 26
                    && valStr.matches("^\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}(.\\d+)?$")) {
                // timestamp in yyyy-mm-dd hh:mm:ss
                Timestamp ts = Timestamp.valueOf(valStr);
                if (javaColType.equals(DATE_TYPE)) {
                    return new Date(ts.getTime());
                } else if (javaColType.equals(TIME_TYPE)) {
                    return new Time(ts.getTime());
                } else if (javaColType.equals(TIMESTAMP_TYPE)) {
                    return ts;
                }
            } else {
                return null;
            }
        } else if (javaColType.equals(STRING_TYPE)) {
            return valStr;
        } else if (javaColType.equals(BOOLEAN_TYPE)) {
            return Boolean.valueOf(valStr);
        } else if (javaColType.equals(BYTE_TYPE)) {
            return Byte.parseByte(valStr);
        } else if (javaColType.equals(SHORT_TYPE)) {
            return Short.parseShort(valStr);
        } else if (javaColType.equals(INTEGER_TYPE)) {
            return Integer.parseInt(valStr);
        } else if (javaColType.equals(LONG_TYPE)) {
            return Long.parseLong(valStr);
        } else if (javaColType.equals(FLOAT_TYPE)) {
            return Float.parseFloat(valStr);
        } else if (javaColType.equals(DOUBLE_TYPE)) {
            return Double.parseDouble(valStr);
        }
        return null;
    }

    private Object convertBooleanTypes(Object val, String javaColType) {
        Boolean b = (Boolean) val;
        if (javaColType.equals(BOOLEAN_TYPE)) {
            return b;
        } else if (javaColType.equals(BYTE_TYPE)) {
            return (byte) (b ? 1 : 0);
        } else if (javaColType.equals(SHORT_TYPE)) {
            return (short) (b ? 1 : 0);
        } else if (javaColType.equals(INTEGER_TYPE)) {
            return (int) (b ? 1 : 0);
        } else if (javaColType.equals(LONG_TYPE)) {
            return (long) (b ? 1 : 0);
        } else if (javaColType.equals(FLOAT_TYPE)) {
            return (float) (b ? 1 : 0);
        } else if (javaColType.equals(DOUBLE_TYPE)) {
            return (double) (b ? 1 : 0);
        } else if (javaColType.equals(BIG_DECIMAL_TYPE)) {
            return new BigDecimal(b ? 1 : 0);
        } else if (javaColType.equals(STRING_TYPE)) {
            return val.toString();
        }
        return null;
    }

    private Object convertNumberTypes(Object val, String javaColType) {
        Number n = (Number) val;
        if (javaColType.equals(BYTE_TYPE)) {
            return n.byteValue();
        } else if (javaColType.equals(SHORT_TYPE)) {
            return n.shortValue();
        } else if (javaColType.equals(INTEGER_TYPE)) {
            return n.intValue();
        } else if (javaColType.equals(LONG_TYPE)) {
            return n.longValue();
        } else if (javaColType.equals(FLOAT_TYPE)) {
            return n.floatValue();
        } else if (javaColType.equals(DOUBLE_TYPE)) {
            return n.doubleValue();
        } else if (javaColType.equals(BIG_DECIMAL_TYPE)) {
            return new BigDecimal(n.doubleValue());
        } else if (javaColType.equals(BOOLEAN_TYPE)) {
            return n.byteValue() == 0 ? Boolean.FALSE : Boolean.TRUE;
        } else if (javaColType.equals(STRING_TYPE)) {
            return n.toString();
        }
        return null;
    }
}