hydrograph.engine.cascading.scheme.hive.parquet.ParquetWritableUtils.java Source code

Java tutorial

Introduction

Here is the source code for hydrograph.engine.cascading.scheme.hive.parquet.ParquetWritableUtils.java

Source

/*******************************************************************************
 * Copyright 2017 Capital One Services, LLC and Bitwise, Inc.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License
 *******************************************************************************/
package hydrograph.engine.cascading.scheme.hive.parquet;

import cascading.tuple.Tuple;

import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.io.*;
import org.apache.hadoop.hive.serde2.objectinspector.*;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.ShortWritable;

import parquet.io.api.Binary;

import java.io.UnsupportedEncodingException;
import java.math.BigDecimal;
import java.sql.Date;
import java.sql.Timestamp;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

/**
 *
 * A ParquetHiveSerDe for Hive (with the deprecated package mapred)
 *
 */
public class ParquetWritableUtils {
    public static final Text MAP_KEY = new Text("key");
    public static final Text MAP_VALUE = new Text("value");
    public static final Text MAP = new Text("map");
    public static final Text ARRAY = new Text("bag");

    public static ArrayWritable createStruct(final Object obj, final StructObjectInspector inspector)
            throws SerDeException {
        final List<? extends StructField> fields = inspector.getAllStructFieldRefs();
        final Writable[] arr = new Writable[fields.size()];
        for (int i = 0; i < fields.size(); i++) {
            final StructField field = fields.get(i);
            final Object subObj = inspector.getStructFieldData(obj, field);
            final ObjectInspector subInspector = field.getFieldObjectInspector();
            arr[i] = createObject(subObj, subInspector);
        }
        return new ArrayWritable(Writable.class, arr);
    }

    private static Writable createMap(final Object obj, final MapObjectInspector inspector) throws SerDeException {
        final Map<?, ?> sourceMap = inspector.getMap(obj);
        final ObjectInspector keyInspector = inspector.getMapKeyObjectInspector();
        final ObjectInspector valueInspector = inspector.getMapValueObjectInspector();
        final List<ArrayWritable> array = new ArrayList<ArrayWritable>();

        if (sourceMap != null) {
            for (final Entry<?, ?> keyValue : sourceMap.entrySet()) {
                final Writable key = createObject(keyValue.getKey(), keyInspector);
                final Writable value = createObject(keyValue.getValue(), valueInspector);
                if (key != null) {
                    Writable[] arr = new Writable[2];
                    arr[0] = key;
                    arr[1] = value;
                    array.add(new ArrayWritable(Writable.class, arr));
                }
            }
        }
        if (array.size() > 0) {
            final ArrayWritable subArray = new ArrayWritable(ArrayWritable.class,
                    array.toArray(new ArrayWritable[array.size()]));
            return new ArrayWritable(Writable.class, new Writable[] { subArray });
        } else {
            return null;
        }
    }

    private static ArrayWritable createArray(final Object obj, final ListObjectInspector inspector)
            throws SerDeException {
        final ObjectInspector subInspector = inspector.getListElementObjectInspector();
        Tuple tuple = (Tuple) obj;
        final List<Writable> array = new ArrayList<Writable>();
        for (int i = 0; i < tuple.size(); i++) {
            array.add(createObject(tuple.getObject(i), subInspector));
        }

        if (array.size() > 0) {
            final ArrayWritable subArray = new ArrayWritable(array.get(0).getClass(),
                    array.toArray(new Writable[array.size()]));
            return new ArrayWritable(Writable.class, new Writable[] { subArray });
        } else {
            return null;
        }

    }

    private static Writable createPrimitive(final Object obj, final PrimitiveObjectInspector inspector)
            throws SerDeException {
        if (obj == null) {
            return null;
        }

        switch (inspector.getPrimitiveCategory()) {
        case VOID:
            return null;
        case BOOLEAN:
            return new BooleanWritable(
                    ((BooleanObjectInspector) inspector).get(new BooleanWritable((boolean) obj)));
        case BYTE:
            return new ByteWritable(((ByteObjectInspector) inspector).get(new ByteWritable((byte) obj)));
        case DOUBLE:
            return new DoubleWritable(((DoubleObjectInspector) inspector).get(new DoubleWritable((double) obj)));
        case FLOAT:
            return new FloatWritable(((FloatObjectInspector) inspector).get(new FloatWritable((float) obj)));
        case INT:
            return new IntWritable(((IntObjectInspector) inspector).get(new IntWritable((int) obj)));
        case LONG:
            return new LongWritable(((LongObjectInspector) inspector).get(new LongWritable((long) obj)));
        case SHORT:
            return new ShortWritable(((ShortObjectInspector) inspector).get(new ShortWritable((short) obj)));
        case STRING:
            String v;
            if (obj instanceof Long) {
                SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd");
                Date date = new Date((long) obj);
                v = df.format(date);
            } else if (obj instanceof BigDecimal) {
                BigDecimal bigDecimalObj = (BigDecimal) obj;
                v = bigDecimalObj.toString();
            } else {
                v = ((StringObjectInspector) inspector).getPrimitiveJavaObject(obj);
            }
            try {
                return new BytesWritable(v.getBytes("UTF-8"));
            } catch (UnsupportedEncodingException e) {
                throw new SerDeException("Failed to encode string in UTF-8", e);
            }
        case DECIMAL:
            HiveDecimal hd;
            if (obj instanceof Double) {
                hd = HiveDecimal.create(new BigDecimal((Double) obj));
            } else if (obj instanceof BigDecimal) {
                hd = HiveDecimal.create((BigDecimal) obj);
            } else {
                // if "obj" is other than Double or BigDecimal and a vaild
                // number, .toString, will get its correct number representation
                // and a BigDecimal object will be created
                hd = HiveDecimal.create(new BigDecimal(obj.toString()));
            }
            return new HiveDecimalWritable(hd);
        case TIMESTAMP:
            return new TimestampWritable(((TimestampObjectInspector) inspector)
                    .getPrimitiveJavaObject(new TimestampWritable(new Timestamp((long) obj))));
        case DATE:
            return new DateWritable(((DateObjectInspector) inspector)
                    .getPrimitiveJavaObject(new DateWritable(new Date((long) obj))));
        case CHAR:
            String strippedValue = ((HiveCharObjectInspector) inspector).getPrimitiveJavaObject(obj)
                    .getStrippedValue();
            return new BytesWritable(Binary.fromString(strippedValue).getBytes());
        case VARCHAR:
            String value = ((HiveVarcharObjectInspector) inspector).getPrimitiveJavaObject(obj).getValue();
            return new BytesWritable(Binary.fromString(value).getBytes());
        default:
            throw new SerDeException("Unknown primitive : " + inspector.getPrimitiveCategory());
        }
    }

    private static Writable createObject(final Object obj, final ObjectInspector inspector) throws SerDeException {
        switch (inspector.getCategory()) {
        case STRUCT:
            return createStruct(obj, (StructObjectInspector) inspector);
        case LIST:
            return createArray(obj, (ListObjectInspector) inspector);
        case MAP:
            return createMap(obj, (MapObjectInspector) inspector);
        case PRIMITIVE:
            return createPrimitive(obj, (PrimitiveObjectInspector) inspector);
        default:
            throw new SerDeException("Unknown data type" + inspector.getCategory());
        }
    }

}