Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package edu.uci.ics.hivesterix.serde.lazy; import java.io.DataOutput; import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.WritableUtils; import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyObjectInspectorFactory; import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.PrimitiveObjectInspectorFactory; /** * LazyUtils. */ public final class LazyUtils { /** * Convert the byte array to an int starting from the given offset. Refer to * code by aeden on DZone Snippets: * * @param b * the byte array * @param offset * the array offset * @return the integer */ public static int byteArrayToInt(byte[] b, int offset) { int value = 0; for (int i = 0; i < 4; i++) { int shift = (4 - 1 - i) * 8; value += (b[i + offset] & 0x000000FF) << shift; } return value; } /** * Convert the byte array to a long starting from the given offset. * * @param b * the byte array * @param offset * the array offset * @return the long */ public static long byteArrayToLong(byte[] b, int offset) { long value = 0; for (int i = 0; i < 8; i++) { int shift = (8 - 1 - i) * 8; value += ((long) (b[i + offset] & 0x00000000000000FF)) << shift; } return value; } /** * Convert the byte array to a short starting from the given offset. * * @param b * the byte array * @param offset * the array offset * @return the short */ public static short byteArrayToShort(byte[] b, int offset) { short value = 0; value += (b[offset] & 0x000000FF) << 8; value += (b[offset + 1] & 0x000000FF); return value; } /** * Record is the unit that data is serialized in. A record includes two * parts. The first part stores the size of the element and the second part * stores the real element. size element record -> * |----|-------------------------| * A RecordInfo stores two information of a record, the size of the "size" * part which is the element offset and the size of the element part which * is element size. */ public static class RecordInfo { public RecordInfo() { elementOffset = 0; elementSize = 0; } public byte elementOffset; public int elementSize; @Override public String toString() { return "(" + elementOffset + ", " + elementSize + ")"; } } static VInt vInt = new LazyUtils.VInt(); /** * Check a particular field and set its size and offset in bytes based on * the field type and the bytes arrays. * For void, boolean, byte, short, int, long, float and double, there is no * offset and the size is fixed. For string, map, list, struct, the first * four bytes are used to store the size. So the offset is 4 and the size is * computed by concating the first four bytes together. The first four bytes * are defined with respect to the offset in the bytes arrays. * * @param objectInspector * object inspector of the field * @param bytes * bytes arrays store the table row * @param offset * offset of this field * @param recordInfo * modify this byteinfo object and return it */ public static void checkObjectByteInfo(ObjectInspector objectInspector, byte[] bytes, int offset, RecordInfo recordInfo) { Category category = objectInspector.getCategory(); switch (category) { case PRIMITIVE: PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector) objectInspector) .getPrimitiveCategory(); switch (primitiveCategory) { case VOID: recordInfo.elementOffset = 0; recordInfo.elementSize = 0; break; case BOOLEAN: case BYTE: recordInfo.elementOffset = 0; recordInfo.elementSize = 1; break; case SHORT: recordInfo.elementOffset = 0; recordInfo.elementSize = 2; break; case FLOAT: recordInfo.elementOffset = 0; recordInfo.elementSize = 4; break; case DOUBLE: recordInfo.elementOffset = 0; recordInfo.elementSize = 8; break; case INT: recordInfo.elementOffset = 0; recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]); break; case LONG: recordInfo.elementOffset = 0; recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]); break; case STRING: // using vint instead of 4 bytes LazyUtils.readVInt(bytes, offset, vInt); recordInfo.elementOffset = vInt.length; recordInfo.elementSize = vInt.value; break; default: { throw new RuntimeException("Unrecognized primitive type: " + primitiveCategory); } } break; case LIST: case MAP: case STRUCT: recordInfo.elementOffset = 4; recordInfo.elementSize = LazyUtils.byteArrayToInt(bytes, offset); break; default: { throw new RuntimeException("Unrecognized non-primitive type: " + category); } } } /** * A zero-compressed encoded long. */ public static class VLong { public VLong() { value = 0; length = 0; } public long value; public byte length; }; /** * Reads a zero-compressed encoded long from a byte array and returns it. * * @param bytes * the byte array * @param offset * offset of the array to read from * @param vlong * storing the deserialized long and its size in byte */ public static void readVLong(byte[] bytes, int offset, VLong vlong) { byte firstByte = bytes[offset]; vlong.length = (byte) WritableUtils.decodeVIntSize(firstByte); if (vlong.length == 1) { vlong.value = firstByte; return; } long i = 0; for (int idx = 0; idx < vlong.length - 1; idx++) { byte b = bytes[offset + 1 + idx]; i = i << 8; i = i | (b & 0xFF); } vlong.value = (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1L) : i); } /** * A zero-compressed encoded integer. */ public static class VInt implements Serializable { private static final long serialVersionUID = 1L; public VInt() { value = 0; length = 0; } public int value; public byte length; }; /** * Reads a zero-compressed encoded int from a byte array and returns it. * * @param bytes * the byte array * @param offset * offset of the array to read from * @param vInt * storing the deserialized int and its size in byte */ public static void readVInt(byte[] bytes, int offset, VInt vInt) { byte firstByte = bytes[offset]; vInt.length = (byte) WritableUtils.decodeVIntSize(firstByte); if (vInt.length == 1) { vInt.value = firstByte; return; } int i = 0; for (int idx = 0; idx < vInt.length - 1; idx++) { byte b = bytes[offset + 1 + idx]; i = i << 8; i = i | (b & 0xFF); } vInt.value = (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1) : i); } /** * Writes a zero-compressed encoded int to a byte array. * * @param byteStream * the byte array/stream * @param i * the int */ public static void writeVInt(Output byteStream, int i) { writeVLong(byteStream, i); } /** * Write a zero-compressed encoded long to a byte array. * * @param byteStream * the byte array/stream * @param l * the long */ public static void writeVLong(Output byteStream, long l) { if (l >= -112 && l <= 127) { byteStream.write((byte) l); return; } int len = -112; if (l < 0) { l ^= -1L; // take one's complement' len = -120; } long tmp = l; while (tmp != 0) { tmp = tmp >> 8; len--; } byteStream.write((byte) len); len = (len < -120) ? -(len + 120) : -(len + 112); for (int idx = len; idx != 0; idx--) { int shiftbits = (idx - 1) * 8; long mask = 0xFFL << shiftbits; byteStream.write((byte) ((l & mask) >> shiftbits)); } } static Map<TypeInfo, ObjectInspector> cachedLazyObjectInspector = new ConcurrentHashMap<TypeInfo, ObjectInspector>(); /** * Returns the lazy binary object inspector that can be used to inspect an * lazy binary object of that typeInfo * For primitive types, we use the standard writable object inspector. */ public static ObjectInspector getLazyObjectInspectorFromTypeInfo(TypeInfo typeInfo, boolean topLevel) { if (typeInfo == null) throw new IllegalStateException("illegal type null "); ObjectInspector result = cachedLazyObjectInspector.get(typeInfo); if (result == null) { switch (typeInfo.getCategory()) { case PRIMITIVE: { result = PrimitiveObjectInspectorFactory .getPrimitiveLazyObjectInspector(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()); break; } case LIST: { ObjectInspector elementObjectInspector = getLazyObjectInspectorFromTypeInfo( ((ListTypeInfo) typeInfo).getListElementTypeInfo(), false); result = LazyObjectInspectorFactory.getLazyListObjectInspector(elementObjectInspector); break; } case MAP: { MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; ObjectInspector keyObjectInspector = getLazyObjectInspectorFromTypeInfo( mapTypeInfo.getMapKeyTypeInfo(), false); ObjectInspector valueObjectInspector = getLazyObjectInspectorFromTypeInfo( mapTypeInfo.getMapValueTypeInfo(), false); result = LazyObjectInspectorFactory.getLazyMapObjectInspector(keyObjectInspector, valueObjectInspector); break; } case STRUCT: { StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; List<String> fieldNames = structTypeInfo.getAllStructFieldNames(); List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(fieldTypeInfos.size()); for (int i = 0; i < fieldTypeInfos.size(); i++) { fieldObjectInspectors.add(getLazyObjectInspectorFromTypeInfo(fieldTypeInfos.get(i), false)); } // if it is top level then create columnar if (topLevel) result = LazyObjectInspectorFactory.getLazyColumnarObjectInspector(fieldNames, fieldObjectInspectors); // if it is not top level then create struct else result = LazyObjectInspectorFactory.getLazyStructObjectInspector(fieldNames, fieldObjectInspectors); break; } default: { result = null; } } cachedLazyObjectInspector.put(typeInfo, result); } return result; } /** * get top-level lazy object inspector * * @param fieldNames * @param fieldTypeInfos * @return */ public static ObjectInspector getLazyObjectInspector(List<String> fieldNames, List<TypeInfo> fieldTypeInfos) { List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(fieldTypeInfos.size()); for (int i = 0; i < fieldTypeInfos.size(); i++) { fieldObjectInspectors.add(getLazyObjectInspectorFromTypeInfo(fieldTypeInfos.get(i), false)); } return LazyObjectInspectorFactory.getLazyColumnarObjectInspector(fieldNames, fieldObjectInspectors); } private LazyUtils() { // prevent instantiation } /** * Returns -1 if the first byte sequence is lexicographically less than the * second; returns +1 if the second byte sequence is lexicographically less * than the first; otherwise return 0. */ public static int compare(byte[] b1, int start1, int length1, byte[] b2, int start2, int length2) { int min = Math.min(length1, length2); for (int i = 0; i < min; i++) { if (b1[start1 + i] == b2[start2 + i]) { continue; } if (b1[start1 + i] < b2[start2 + i]) { return -1; } else { return 1; } } if (length1 < length2) { return -1; } if (length1 > length2) { return 1; } return 0; } public static int hashBytes(byte[] data, int start, int len) { int hash = 1; for (int i = start; i < len; i++) { hash = (31 * hash) + data[i]; } return hash; } /** * Writes a zero-compressed encoded int to a byte array. * * @param byteStream * the byte array/stream * @param i * the int */ public static void writeVInt(DataOutput byteStream, int i) throws IOException { writeVLong(byteStream, i); } /** * Write a zero-compressed encoded long to a byte array. * * @param byteStream * the byte array/stream * @param l * the long */ public static void writeVLong(DataOutput byteStream, long l) throws IOException { if (l >= -112 && l <= 127) { byteStream.write((byte) l); return; } int len = -112; if (l < 0) { l ^= -1L; // take one's complement' len = -120; } long tmp = l; while (tmp != 0) { tmp = tmp >> 8; len--; } byteStream.write((byte) len); len = (len < -120) ? -(len + 120) : -(len + 112); for (int idx = len; idx != 0; idx--) { int shiftbits = (idx - 1) * 8; long mask = 0xFFL << shiftbits; byteStream.write((byte) ((l & mask) >> shiftbits)); } } }