com.ebay.nest.io.sede.binarysortable.BinarySortableSerDe.java Source code

Java tutorial

Introduction

Here is the source code for com.ebay.nest.io.sede.binarysortable.BinarySortableSerDe.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.ebay.nest.io.sede.binarysortable;

import java.io.IOException;
import java.math.BigInteger;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;

import com.ebay.nest.io.nestfile.HiveDecimal;
import com.ebay.nest.io.sede.AbstractSerDe;
import com.ebay.nest.io.sede.ByteWritable;
import com.ebay.nest.io.sede.DateWritable;
import com.ebay.nest.io.sede.DoubleWritable;
import com.ebay.nest.io.sede.HiveDecimalWritable;
import com.ebay.nest.io.sede.HiveVarcharWritable;
import com.ebay.nest.io.sede.SerDeException;
import com.ebay.nest.io.sede.SerDeStats;
import com.ebay.nest.io.sede.ShortWritable;
import com.ebay.nest.io.sede.TimestampWritable;
import com.ebay.nest.io.sede.serdeConstants;
import com.ebay.nest.io.sede.objectinspector.ListObjectInspector;
import com.ebay.nest.io.sede.objectinspector.MapObjectInspector;
import com.ebay.nest.io.sede.objectinspector.ObjectInspector;
import com.ebay.nest.io.sede.objectinspector.PrimitiveObjectInspector;
import com.ebay.nest.io.sede.objectinspector.StandardUnionObjectInspector.StandardUnion;
import com.ebay.nest.io.sede.objectinspector.StructField;
import com.ebay.nest.io.sede.objectinspector.StructObjectInspector;
import com.ebay.nest.io.sede.objectinspector.UnionObjectInspector;
import com.ebay.nest.io.sede.objectinspector.primitive.BinaryObjectInspector;
import com.ebay.nest.io.sede.objectinspector.primitive.BooleanObjectInspector;
import com.ebay.nest.io.sede.objectinspector.primitive.ByteObjectInspector;
import com.ebay.nest.io.sede.objectinspector.primitive.DateObjectInspector;
import com.ebay.nest.io.sede.objectinspector.primitive.DoubleObjectInspector;
import com.ebay.nest.io.sede.objectinspector.primitive.FloatObjectInspector;
import com.ebay.nest.io.sede.objectinspector.primitive.HiveDecimalObjectInspector;
import com.ebay.nest.io.sede.objectinspector.primitive.HiveVarcharObjectInspector;
import com.ebay.nest.io.sede.objectinspector.primitive.IntObjectInspector;
import com.ebay.nest.io.sede.objectinspector.primitive.LongObjectInspector;
import com.ebay.nest.io.sede.objectinspector.primitive.ShortObjectInspector;
import com.ebay.nest.io.sede.objectinspector.primitive.StringObjectInspector;
import com.ebay.nest.io.sede.objectinspector.primitive.TimestampObjectInspector;
import com.ebay.nest.io.sede.typeinfo.ListTypeInfo;
import com.ebay.nest.io.sede.typeinfo.MapTypeInfo;
import com.ebay.nest.io.sede.typeinfo.PrimitiveTypeInfo;
import com.ebay.nest.io.sede.typeinfo.StructTypeInfo;
import com.ebay.nest.io.sede.typeinfo.TypeInfo;
import com.ebay.nest.io.sede.typeinfo.TypeInfoFactory;
import com.ebay.nest.io.sede.typeinfo.TypeInfoUtils;
import com.ebay.nest.io.sede.typeinfo.UnionTypeInfo;
import com.ebay.nest.io.sede.typeinfo.VarcharTypeParams;

/**
 * BinarySortableSerDe can be used to write data in a way that the data can be
 * compared byte-by-byte with the same order.
 *
 * The data format: NULL: a single byte \0 NON-NULL Primitives: ALWAYS prepend a
 * single byte \1, and then: Boolean: FALSE = \1, TRUE = \2 Byte: flip the
 * sign-bit to make sure negative comes before positive Short: flip the sign-bit
 * to make sure negative comes before positive Int: flip the sign-bit to make
 * sure negative comes before positive Long: flip the sign-bit to make sure
 * negative comes before positive Double: flip the sign-bit for positive double,
 * and all bits for negative double values String: NULL-terminated UTF-8 string,
 * with NULL escaped to \1 \1, and \1 escaped to \1 \2 NON-NULL Complex Types:
 * ALWAYS prepend a single byte \1, and then: Struct: one field by one field.
 * List: \1 followed by each element, and \0 to terminate Map: \1 followed by
 * each key and then each value, and \0 to terminate
 *
 * This SerDe takes an additional parameter SERIALIZATION_SORT_ORDER which is a
 * string containing only "+" and "-". The length of the string should equal to
 * the number of fields in the top-level struct for serialization. "+" means the
 * field should be sorted ascendingly, and "-" means descendingly. The sub
 * fields in the same top-level field will have the same sort order.
 *
 */
public class BinarySortableSerDe extends AbstractSerDe {

    public static final Log LOG = LogFactory.getLog(BinarySortableSerDe.class.getName());

    List<String> columnNames;
    List<TypeInfo> columnTypes;

    TypeInfo rowTypeInfo;
    StructObjectInspector rowObjectInspector;

    boolean[] columnSortOrderIsDesc;

    private static byte[] decimalBuffer = null;
    private static Charset decimalCharSet = Charset.forName("US-ASCII");

    @Override
    public void initialize(Configuration conf, Properties tbl) throws SerDeException {

        // Get column names and sort order
        String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
        String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
        if (columnNameProperty.length() == 0) {
            columnNames = new ArrayList<String>();
        } else {
            columnNames = Arrays.asList(columnNameProperty.split(","));
        }
        if (columnTypeProperty.length() == 0) {
            columnTypes = new ArrayList<TypeInfo>();
        } else {
            columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
        }
        assert (columnNames.size() == columnTypes.size());

        // Create row related objects
        rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
        rowObjectInspector = (StructObjectInspector) TypeInfoUtils
                .getStandardWritableObjectInspectorFromTypeInfo(rowTypeInfo);
        row = new ArrayList<Object>(columnNames.size());
        for (int i = 0; i < columnNames.size(); i++) {
            row.add(null);
        }

        // Get the sort order
        String columnSortOrder = tbl.getProperty(serdeConstants.SERIALIZATION_SORT_ORDER);
        columnSortOrderIsDesc = new boolean[columnNames.size()];
        for (int i = 0; i < columnSortOrderIsDesc.length; i++) {
            columnSortOrderIsDesc[i] = (columnSortOrder != null && columnSortOrder.charAt(i) == '-');
        }
    }

    @Override
    public Class<? extends Writable> getSerializedClass() {
        return BytesWritable.class;
    }

    @Override
    public ObjectInspector getObjectInspector() throws SerDeException {
        return rowObjectInspector;
    }

    ArrayList<Object> row;
    InputByteBuffer inputByteBuffer = new InputByteBuffer();

    @Override
    public Object deserialize(Writable blob) throws SerDeException {
        BytesWritable data = (BytesWritable) blob;
        inputByteBuffer.reset(data.getBytes(), 0, data.getLength());

        try {
            for (int i = 0; i < columnNames.size(); i++) {
                row.set(i, deserialize(inputByteBuffer, columnTypes.get(i), columnSortOrderIsDesc[i], row.get(i)));
            }
        } catch (IOException e) {
            throw new SerDeException(e);
        }

        return row;
    }

    static Object deserialize(InputByteBuffer buffer, TypeInfo type, boolean invert, Object reuse)
            throws IOException {

        // Is this field a null?
        byte isNull = buffer.read(invert);
        if (isNull == 0) {
            return null;
        }
        assert (isNull == 1);

        switch (type.getCategory()) {
        case PRIMITIVE: {
            PrimitiveTypeInfo ptype = (PrimitiveTypeInfo) type;
            switch (ptype.getPrimitiveCategory()) {
            case VOID: {
                return null;
            }
            case BOOLEAN: {
                BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse;
                byte b = buffer.read(invert);
                assert (b == 1 || b == 2);
                r.set(b == 2);
                return r;
            }
            case BYTE: {
                ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse;
                r.set((byte) (buffer.read(invert) ^ 0x80));
                return r;
            }
            case SHORT: {
                ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse;
                int v = buffer.read(invert) ^ 0x80;
                v = (v << 8) + (buffer.read(invert) & 0xff);
                r.set((short) v);
                return r;
            }
            case INT: {
                IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
                r.set(deserializeInt(buffer, invert));
                return r;
            }
            case LONG: {
                LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse;
                long v = buffer.read(invert) ^ 0x80;
                for (int i = 0; i < 7; i++) {
                    v = (v << 8) + (buffer.read(invert) & 0xff);
                }
                r.set(v);
                return r;
            }
            case FLOAT: {
                FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse;
                int v = 0;
                for (int i = 0; i < 4; i++) {
                    v = (v << 8) + (buffer.read(invert) & 0xff);
                }
                if ((v & (1 << 31)) == 0) {
                    // negative number, flip all bits
                    v = ~v;
                } else {
                    // positive number, flip the first bit
                    v = v ^ (1 << 31);
                }
                r.set(Float.intBitsToFloat(v));
                return r;
            }
            case DOUBLE: {
                DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse;
                long v = 0;
                for (int i = 0; i < 8; i++) {
                    v = (v << 8) + (buffer.read(invert) & 0xff);
                }
                if ((v & (1L << 63)) == 0) {
                    // negative number, flip all bits
                    v = ~v;
                } else {
                    // positive number, flip the first bit
                    v = v ^ (1L << 63);
                }
                r.set(Double.longBitsToDouble(v));
                return r;
            }
            case STRING: {
                Text r = reuse == null ? new Text() : (Text) reuse;
                return deserializeText(buffer, invert, r);
            }

            case VARCHAR: {
                HiveVarcharWritable r = reuse == null ? new HiveVarcharWritable() : (HiveVarcharWritable) reuse;
                // Use HiveVarchar's internal Text member to read the value.
                deserializeText(buffer, invert, r.getTextValue());
                // If we cache helper data for deserialization we could avoid having
                // to call getVarcharMaxLength() on every deserialize call.
                r.enforceMaxLength(getVarcharMaxLength(type));
                return r;
            }

            case BINARY: {
                BytesWritable bw = new BytesWritable();
                // Get the actual length first
                int start = buffer.tell();
                int length = 0;
                do {
                    byte b = buffer.read(invert);
                    if (b == 0) {
                        // end of string
                        break;
                    }
                    if (b == 1) {
                        // the last char is an escape char. read the actual char
                        buffer.read(invert);
                    }
                    length++;
                } while (true);

                if (length == buffer.tell() - start) {
                    // No escaping happened, so we are already done.
                    bw.set(buffer.getData(), start, length);
                } else {
                    // Escaping happened, we need to copy byte-by-byte.
                    // 1. Set the length first.
                    bw.set(buffer.getData(), start, length);
                    // 2. Reset the pointer.
                    buffer.seek(start);
                    // 3. Copy the data.
                    byte[] rdata = bw.getBytes();
                    for (int i = 0; i < length; i++) {
                        byte b = buffer.read(invert);
                        if (b == 1) {
                            // The last char is an escape char, read the actual char.
                            // The serialization format escape \0 to \1, and \1 to \2,
                            // to make sure the string is null-terminated.
                            b = (byte) (buffer.read(invert) - 1);
                        }
                        rdata[i] = b;
                    }
                    // 4. Read the null terminator.
                    byte b = buffer.read(invert);
                    assert (b == 0);
                }
                return bw;
            }

            case DATE: {
                DateWritable d = reuse == null ? new DateWritable() : (DateWritable) reuse;
                d.set(deserializeInt(buffer, invert));
                return d;
            }

            case TIMESTAMP:
                TimestampWritable t = (reuse == null ? new TimestampWritable() : (TimestampWritable) reuse);
                byte[] bytes = new byte[TimestampWritable.BINARY_SORTABLE_LENGTH];

                for (int i = 0; i < bytes.length; i++) {
                    bytes[i] = buffer.read(invert);
                }
                t.setBinarySortable(bytes, 0);
                return t;

            case DECIMAL: {
                // See serialization of decimal for explanation (below)

                HiveDecimalWritable bdw = (reuse == null ? new HiveDecimalWritable() : (HiveDecimalWritable) reuse);

                int b = buffer.read(invert) - 1;
                assert (b == 1 || b == -1 || b == 0);
                boolean positive = b != -1;

                int factor = buffer.read(invert) ^ 0x80;
                for (int i = 0; i < 3; i++) {
                    factor = (factor << 8) + (buffer.read(invert) & 0xff);
                }

                if (!positive) {
                    factor = -factor;
                }

                int start = buffer.tell();
                int length = 0;

                do {
                    b = buffer.read(positive ? invert : !invert);
                    assert (b != 1);

                    if (b == 0) {
                        // end of digits
                        break;
                    }

                    length++;
                } while (true);

                if (decimalBuffer == null || decimalBuffer.length < length) {
                    decimalBuffer = new byte[length];
                }

                buffer.seek(start);
                for (int i = 0; i < length; ++i) {
                    decimalBuffer[i] = buffer.read(positive ? invert : !invert);
                }

                // read the null byte again
                buffer.read(positive ? invert : !invert);

                String digits = new String(decimalBuffer, 0, length, decimalCharSet);
                BigInteger bi = new BigInteger(digits);
                HiveDecimal bd = new HiveDecimal(bi).scaleByPowerOfTen(factor - length);

                if (!positive) {
                    bd = bd.negate();
                }

                bdw.set(bd);
                return bdw;
            }

            default: {
                throw new RuntimeException("Unrecognized type: " + ptype.getPrimitiveCategory());
            }
            }
        }

        case LIST: {
            ListTypeInfo ltype = (ListTypeInfo) type;
            TypeInfo etype = ltype.getListElementTypeInfo();

            // Create the list if needed
            ArrayList<Object> r = reuse == null ? new ArrayList<Object>() : (ArrayList<Object>) reuse;

            // Read the list
            int size = 0;
            while (true) {
                int more = buffer.read(invert);
                if (more == 0) {
                    // \0 to terminate
                    break;
                }
                // \1 followed by each element
                assert (more == 1);
                if (size == r.size()) {
                    r.add(null);
                }
                r.set(size, deserialize(buffer, etype, invert, r.get(size)));
                size++;
            }
            // Remove additional elements if the list is reused
            while (r.size() > size) {
                r.remove(r.size() - 1);
            }
            return r;
        }
        case MAP: {
            MapTypeInfo mtype = (MapTypeInfo) type;
            TypeInfo ktype = mtype.getMapKeyTypeInfo();
            TypeInfo vtype = mtype.getMapValueTypeInfo();

            // Create the map if needed
            Map<Object, Object> r;
            if (reuse == null) {
                r = new HashMap<Object, Object>();
            } else {
                r = (HashMap<Object, Object>) reuse;
                r.clear();
            }

            while (true) {
                int more = buffer.read(invert);
                if (more == 0) {
                    // \0 to terminate
                    break;
                }
                // \1 followed by each key and then each value
                assert (more == 1);
                Object k = deserialize(buffer, ktype, invert, null);
                Object v = deserialize(buffer, vtype, invert, null);
                r.put(k, v);
            }
            return r;
        }
        case STRUCT: {
            StructTypeInfo stype = (StructTypeInfo) type;
            List<TypeInfo> fieldTypes = stype.getAllStructFieldTypeInfos();
            int size = fieldTypes.size();
            // Create the struct if needed
            ArrayList<Object> r = reuse == null ? new ArrayList<Object>(size) : (ArrayList<Object>) reuse;
            assert (r.size() <= size);
            // Set the size of the struct
            while (r.size() < size) {
                r.add(null);
            }
            // Read one field by one field
            for (int eid = 0; eid < size; eid++) {
                r.set(eid, deserialize(buffer, fieldTypes.get(eid), invert, r.get(eid)));
            }
            return r;
        }
        case UNION: {
            UnionTypeInfo utype = (UnionTypeInfo) type;
            StandardUnion r = reuse == null ? new StandardUnion() : (StandardUnion) reuse;
            // Read the tag
            byte tag = buffer.read(invert);
            r.setTag(tag);
            r.setObject(deserialize(buffer, utype.getAllUnionObjectTypeInfos().get(tag), invert, null));
            return r;
        }
        default: {
            throw new RuntimeException("Unrecognized type: " + type.getCategory());
        }
        }
    }

    private static int deserializeInt(InputByteBuffer buffer, boolean invert) throws IOException {
        int v = buffer.read(invert) ^ 0x80;
        for (int i = 0; i < 3; i++) {
            v = (v << 8) + (buffer.read(invert) & 0xff);
        }
        return v;
    }

    static int getVarcharMaxLength(TypeInfo type) {
        VarcharTypeParams typeParams = (VarcharTypeParams) ((PrimitiveTypeInfo) type).getTypeParams();
        if (typeParams != null) {
            return typeParams.length;
        }
        return -1;
    }

    static Text deserializeText(InputByteBuffer buffer, boolean invert, Text r) throws IOException {
        // Get the actual length first
        int start = buffer.tell();
        int length = 0;
        do {
            byte b = buffer.read(invert);
            if (b == 0) {
                // end of string
                break;
            }
            if (b == 1) {
                // the last char is an escape char. read the actual char
                buffer.read(invert);
            }
            length++;
        } while (true);

        if (length == buffer.tell() - start) {
            // No escaping happened, so we are already done.
            r.set(buffer.getData(), start, length);
        } else {
            // Escaping happened, we need to copy byte-by-byte.
            // 1. Set the length first.
            r.set(buffer.getData(), start, length);
            // 2. Reset the pointer.
            buffer.seek(start);
            // 3. Copy the data.
            byte[] rdata = r.getBytes();
            for (int i = 0; i < length; i++) {
                byte b = buffer.read(invert);
                if (b == 1) {
                    // The last char is an escape char, read the actual char.
                    // The serialization format escape \0 to \1, and \1 to \2,
                    // to make sure the string is null-terminated.
                    b = (byte) (buffer.read(invert) - 1);
                }
                rdata[i] = b;
            }
            // 4. Read the null terminator.
            byte b = buffer.read(invert);
            assert (b == 0);
        }
        return r;
    }

    BytesWritable serializeBytesWritable = new BytesWritable();
    OutputByteBuffer outputByteBuffer = new OutputByteBuffer();

    @Override
    public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
        outputByteBuffer.reset();
        StructObjectInspector soi = (StructObjectInspector) objInspector;
        List<? extends StructField> fields = soi.getAllStructFieldRefs();

        for (int i = 0; i < columnNames.size(); i++) {
            serialize(outputByteBuffer, soi.getStructFieldData(obj, fields.get(i)),
                    fields.get(i).getFieldObjectInspector(), columnSortOrderIsDesc[i]);
        }

        serializeBytesWritable.set(outputByteBuffer.getData(), 0, outputByteBuffer.getLength());
        return serializeBytesWritable;
    }

    static void serialize(OutputByteBuffer buffer, Object o, ObjectInspector oi, boolean invert)
            throws SerDeException {
        // Is this field a null?
        if (o == null) {
            buffer.write((byte) 0, invert);
            return;
        }
        // This field is not a null.
        buffer.write((byte) 1, invert);

        switch (oi.getCategory()) {
        case PRIMITIVE: {
            PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
            switch (poi.getPrimitiveCategory()) {
            case VOID: {
                return;
            }
            case BOOLEAN: {
                boolean v = ((BooleanObjectInspector) poi).get(o);
                buffer.write((byte) (v ? 2 : 1), invert);
                return;
            }
            case BYTE: {
                ByteObjectInspector boi = (ByteObjectInspector) poi;
                byte v = boi.get(o);
                buffer.write((byte) (v ^ 0x80), invert);
                return;
            }
            case SHORT: {
                ShortObjectInspector spoi = (ShortObjectInspector) poi;
                short v = spoi.get(o);
                buffer.write((byte) ((v >> 8) ^ 0x80), invert);
                buffer.write((byte) v, invert);
                return;
            }
            case INT: {
                IntObjectInspector ioi = (IntObjectInspector) poi;
                int v = ioi.get(o);
                serializeInt(buffer, v, invert);
                return;
            }
            case LONG: {
                LongObjectInspector loi = (LongObjectInspector) poi;
                long v = loi.get(o);
                buffer.write((byte) ((v >> 56) ^ 0x80), invert);
                buffer.write((byte) (v >> 48), invert);
                buffer.write((byte) (v >> 40), invert);
                buffer.write((byte) (v >> 32), invert);
                buffer.write((byte) (v >> 24), invert);
                buffer.write((byte) (v >> 16), invert);
                buffer.write((byte) (v >> 8), invert);
                buffer.write((byte) v, invert);
                return;
            }
            case FLOAT: {
                FloatObjectInspector foi = (FloatObjectInspector) poi;
                int v = Float.floatToIntBits(foi.get(o));
                if ((v & (1 << 31)) != 0) {
                    // negative number, flip all bits
                    v = ~v;
                } else {
                    // positive number, flip the first bit
                    v = v ^ (1 << 31);
                }
                buffer.write((byte) (v >> 24), invert);
                buffer.write((byte) (v >> 16), invert);
                buffer.write((byte) (v >> 8), invert);
                buffer.write((byte) v, invert);
                return;
            }
            case DOUBLE: {
                DoubleObjectInspector doi = (DoubleObjectInspector) poi;
                long v = Double.doubleToLongBits(doi.get(o));
                if ((v & (1L << 63)) != 0) {
                    // negative number, flip all bits
                    v = ~v;
                } else {
                    // positive number, flip the first bit
                    v = v ^ (1L << 63);
                }
                buffer.write((byte) (v >> 56), invert);
                buffer.write((byte) (v >> 48), invert);
                buffer.write((byte) (v >> 40), invert);
                buffer.write((byte) (v >> 32), invert);
                buffer.write((byte) (v >> 24), invert);
                buffer.write((byte) (v >> 16), invert);
                buffer.write((byte) (v >> 8), invert);
                buffer.write((byte) v, invert);
                return;
            }
            case STRING: {
                StringObjectInspector soi = (StringObjectInspector) poi;
                Text t = soi.getPrimitiveWritableObject(o);
                serializeBytes(buffer, t.getBytes(), t.getLength(), invert);
                return;
            }

            case VARCHAR: {
                HiveVarcharObjectInspector hcoi = (HiveVarcharObjectInspector) poi;
                HiveVarcharWritable hc = hcoi.getPrimitiveWritableObject(o);
                // use varchar's text field directly
                Text t = hc.getTextValue();
                serializeBytes(buffer, t.getBytes(), t.getLength(), invert);
                return;
            }

            case BINARY: {
                BinaryObjectInspector baoi = (BinaryObjectInspector) poi;
                BytesWritable ba = baoi.getPrimitiveWritableObject(o);
                byte[] toSer = new byte[ba.getLength()];
                System.arraycopy(ba.getBytes(), 0, toSer, 0, ba.getLength());
                serializeBytes(buffer, toSer, ba.getLength(), invert);
                return;
            }
            case DATE: {
                DateObjectInspector doi = (DateObjectInspector) poi;
                int v = doi.getPrimitiveWritableObject(o).getDays();
                serializeInt(buffer, v, invert);
                return;
            }
            case TIMESTAMP: {
                TimestampObjectInspector toi = (TimestampObjectInspector) poi;
                TimestampWritable t = toi.getPrimitiveWritableObject(o);
                byte[] data = t.getBinarySortable();
                for (int i = 0; i < data.length; i++) {
                    buffer.write(data[i], invert);
                }
                return;
            }
            case DECIMAL: {
                // decimals are encoded in three pieces:
                // sign: 1, 2 or 3 for smaller, equal or larger than 0 respectively
                // factor: Number that indicates the amount of digits you have to move
                // the decimal point left or right until the resulting number is smaller
                // than zero but has something other than 0 as the first digit.
                // digits: which is a string of all the digits in the decimal. If the number
                // is negative the binary string will be inverted to get the correct ordering.
                // Example: 0.00123
                // Sign is 3 (bigger than 0)
                // Factor is -2 (move decimal point 2 positions right)
                // Digits are: 123

                HiveDecimalObjectInspector boi = (HiveDecimalObjectInspector) poi;
                HiveDecimal dec = boi.getPrimitiveJavaObject(o);

                // get the sign of the big decimal
                int sign = dec.compareTo(HiveDecimal.ZERO);

                // we'll encode the absolute value (sign is separate)
                dec = dec.abs();

                // get the scale factor to turn big decimal into a decimal < 1
                int factor = dec.precision() - dec.scale();
                factor = sign == 1 ? factor : -factor;

                // convert the absolute big decimal to string
                dec.scaleByPowerOfTen(Math.abs(dec.scale()));
                String digits = dec.unscaledValue().toString();

                // finally write out the pieces (sign, scale, digits)
                buffer.write((byte) (sign + 1), invert);
                buffer.write((byte) ((factor >> 24) ^ 0x80), invert);
                buffer.write((byte) (factor >> 16), invert);
                buffer.write((byte) (factor >> 8), invert);
                buffer.write((byte) factor, invert);
                serializeBytes(buffer, digits.getBytes(decimalCharSet), digits.length(),
                        sign == -1 ? !invert : invert);
                return;
            }

            default: {
                throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
            }
            }
        }
        case LIST: {
            ListObjectInspector loi = (ListObjectInspector) oi;
            ObjectInspector eoi = loi.getListElementObjectInspector();

            // \1 followed by each element
            int size = loi.getListLength(o);
            for (int eid = 0; eid < size; eid++) {
                buffer.write((byte) 1, invert);
                serialize(buffer, loi.getListElement(o, eid), eoi, invert);
            }
            // and \0 to terminate
            buffer.write((byte) 0, invert);
            return;
        }
        case MAP: {
            MapObjectInspector moi = (MapObjectInspector) oi;
            ObjectInspector koi = moi.getMapKeyObjectInspector();
            ObjectInspector voi = moi.getMapValueObjectInspector();

            // \1 followed by each key and then each value
            Map<?, ?> map = moi.getMap(o);
            for (Map.Entry<?, ?> entry : map.entrySet()) {
                buffer.write((byte) 1, invert);
                serialize(buffer, entry.getKey(), koi, invert);
                serialize(buffer, entry.getValue(), voi, invert);
            }
            // and \0 to terminate
            buffer.write((byte) 0, invert);
            return;
        }
        case STRUCT: {
            StructObjectInspector soi = (StructObjectInspector) oi;
            List<? extends StructField> fields = soi.getAllStructFieldRefs();

            for (int i = 0; i < fields.size(); i++) {
                serialize(buffer, soi.getStructFieldData(o, fields.get(i)), fields.get(i).getFieldObjectInspector(),
                        invert);
            }
            return;
        }
        case UNION: {
            UnionObjectInspector uoi = (UnionObjectInspector) oi;
            byte tag = uoi.getTag(o);
            buffer.write(tag, invert);
            serialize(buffer, uoi.getField(o), uoi.getObjectInspectors().get(tag), invert);
            return;
        }
        default: {
            throw new RuntimeException("Unrecognized type: " + oi.getCategory());
        }
        }

    }

    private static void serializeBytes(OutputByteBuffer buffer, byte[] data, int length, boolean invert) {
        for (int i = 0; i < length; i++) {
            if (data[i] == 0 || data[i] == 1) {
                buffer.write((byte) 1, invert);
                buffer.write((byte) (data[i] + 1), invert);
            } else {
                buffer.write(data[i], invert);
            }
        }
        buffer.write((byte) 0, invert);
    }

    private static void serializeInt(OutputByteBuffer buffer, int v, boolean invert) {
        buffer.write((byte) ((v >> 24) ^ 0x80), invert);
        buffer.write((byte) (v >> 16), invert);
        buffer.write((byte) (v >> 8), invert);
        buffer.write((byte) v, invert);
    }

    @Override
    public SerDeStats getSerDeStats() {
        // no support for statistics
        return null;
    }
}