Java tutorial
/** * (c) Copyright 2013 WibiData, Inc. * * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.moz.fiji.hive.io; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.sql.Timestamp; import java.util.List; import java.util.Map; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.apache.avro.Schema; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.IndexedRecord; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableFactories; import org.apache.hadoop.io.WritableUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.moz.fiji.hive.utils.AvroTypeAdapter; import com.moz.fiji.schema.FijiCell; /** * Writable version of the data stored within a FijiCell. */ public class FijiCellWritable implements Writable { private static final Logger LOG = LoggerFactory.getLogger(FijiCellWritable.class); private long mTimestamp; private Schema mSchema; private Object mData; /** Required so that this can be built by WritableFactories. */ public FijiCellWritable() { } /** * Constructs a FijiCellWritable from an existing FijiCell. * * @param fijiCell from a FijiRowData. */ public FijiCellWritable(FijiCell fijiCell) { mTimestamp = fijiCell.getTimestamp(); mSchema = fijiCell.getWriterSchema(); mData = fijiCell.getData(); } /** * Constructor for a FijiCellWritable from a Hive representation of a cell. This is * typically a Hive struct containing two fields, one for the timestamp and one for the object. * * @param timestampedCellObjectInspector StructObjectInspector for this Hive object * @param hiveObj representing a struct containing a Hive timestamp and data pair. */ public FijiCellWritable(StructObjectInspector timestampedCellObjectInspector, Object hiveObj) { List<Object> timestampedCellFields = timestampedCellObjectInspector.getStructFieldsDataAsList(hiveObj); if (timestampedCellFields.isEmpty()) { LOG.warn("Passed in Hive object is empty. Returning an empty FijiCellWritable"); mData = null; } else { Preconditions.checkState(timestampedCellFields.size() == 2, "FijiCellWritable must be created with exactly 2 fields. Found %s", timestampedCellFields.size()); Timestamp timestampObject = (Timestamp) timestampedCellFields.get(0); mTimestamp = timestampObject.getTime(); mData = timestampedCellFields.get(1); } StructField dataStructField = timestampedCellObjectInspector.getAllStructFieldRefs().get(1); mSchema = AvroTypeAdapter.get().toAvroSchema(dataStructField.getFieldObjectInspector()); } /** * @return The timestamp associated with this cell. */ public long getTimestamp() { return mTimestamp; } /** * @return the schema associated with this cell. */ public Schema getSchema() { return mSchema; } /** * @return the cell content. */ public Object getData() { return mData; } /** * @return if this cell has data in it. */ public boolean hasData() { return null != mData; } @Override public void write(DataOutput out) throws IOException { WritableUtils.writeVLong(out, mTimestamp); WritableUtils.writeString(out, mSchema.toString()); writeData(out, mData, mSchema); } @Override public void readFields(DataInput in) throws IOException { mTimestamp = WritableUtils.readVLong(in); String schemaString = WritableUtils.readString(in); mSchema = new Schema.Parser().parse(schemaString); mData = readData(in, mSchema); } /** * Reads and converts data according to the specified schema. * * @param out DataOutput to serialize this object into. * @param data data to be serialized. * @param schema Schema to be used for serializing this data. * @throws IOException if there was an error writing. */ private static void writeData(DataOutput out, Object data, Schema schema) throws IOException { switch (schema.getType()) { case INT: Integer intData = (Integer) data; WritableUtils.writeVInt(out, intData); break; case LONG: Long longData = (Long) data; WritableUtils.writeVLong(out, longData); break; case DOUBLE: Double doubleData = (Double) data; DoubleWritable doubleWritable = new DoubleWritable(doubleData); doubleWritable.write(out); break; case ENUM: case STRING: String stringData = data.toString(); WritableUtils.writeString(out, stringData); break; case FLOAT: Float floatData = (Float) data; FloatWritable floatWritable = new FloatWritable(floatData); floatWritable.write(out); break; case ARRAY: List<Object> listData = (List<Object>) data; WritableUtils.writeVInt(out, listData.size()); for (Object listElement : listData) { writeData(out, listElement, schema.getElementType()); } break; case RECORD: IndexedRecord recordData = (IndexedRecord) data; WritableUtils.writeVInt(out, schema.getFields().size()); for (Schema.Field field : schema.getFields()) { WritableUtils.writeString(out, field.name()); writeData(out, recordData.get(field.pos()), field.schema()); } break; case MAP: Map<String, Object> mapData = (Map<String, Object>) data; WritableUtils.writeVInt(out, mapData.size()); for (Map.Entry<String, Object> entry : mapData.entrySet()) { WritableUtils.writeString(out, entry.getKey()); writeData(out, entry.getValue(), schema.getValueType()); } break; case UNION: final Integer tag = GenericData.get().resolveUnion(schema, data); WritableUtils.writeVInt(out, tag); Schema unionSubSchema = schema.getTypes().get(tag); writeData(out, data, unionSubSchema); break; case BYTES: byte[] bytesData = (byte[]) data; WritableUtils.writeCompressedByteArray(out, bytesData); break; case BOOLEAN: Boolean booleanData = (Boolean) data; BooleanWritable booleanWritable = new BooleanWritable(booleanData); booleanWritable.write(out); break; case NULL: // Don't need to write anything for null. break; case FIXED: default: throw new UnsupportedOperationException("Unsupported type: " + schema.getType()); } } /** * Reads and converts data according to the specified schema. * * @param in DataInput to deserialize this object from. * @param schema Schema to be used for deserializing this data. * @return the data read and converted according to the schema. * @throws IOException if there was an error reading. */ private static Object readData(DataInput in, Schema schema) throws IOException { switch (schema.getType()) { case INT: Integer intData = WritableUtils.readVInt(in); return intData; case LONG: Long longData = WritableUtils.readVLong(in); return longData; case DOUBLE: DoubleWritable doubleWritable = (DoubleWritable) WritableFactories.newInstance(DoubleWritable.class); doubleWritable.readFields(in); return doubleWritable.get(); case ENUM: case STRING: String stringData = WritableUtils.readString(in); return stringData; case FLOAT: FloatWritable floatWritable = (FloatWritable) WritableFactories.newInstance(FloatWritable.class); floatWritable.readFields(in); return floatWritable.get(); case ARRAY: List<Object> listData = Lists.newArrayList(); Integer numElements = WritableUtils.readVInt(in); for (int c = 0; c < numElements; c++) { Object listElement = readData(in, schema.getElementType()); listData.add(listElement); } return listData; case RECORD: GenericRecord recordData = new GenericData.Record(schema); Integer numFields = WritableUtils.readVInt(in); for (int c = 0; c < numFields; c++) { String fieldName = WritableUtils.readString(in); Object fieldData = readData(in, schema.getField(fieldName).schema()); recordData.put(fieldName, fieldData); } return recordData; case MAP: Map<String, Object> mapData = Maps.newHashMap(); Integer numEntries = WritableUtils.readVInt(in); for (int c = 0; c < numEntries; c++) { String key = WritableUtils.readString(in); Object value = readData(in, schema.getValueType()); mapData.put(key, value); } return mapData; case UNION: Integer tag = WritableUtils.readVInt(in); Schema unionSubSchema = schema.getTypes().get(tag); Object unionData = readData(in, unionSubSchema); return unionData; case BYTES: byte[] bytesData = WritableUtils.readCompressedByteArray(in); return bytesData; case BOOLEAN: BooleanWritable booleanWritable = (BooleanWritable) WritableFactories .newInstance(BooleanWritable.class); booleanWritable.readFields(in); return booleanWritable.get(); case NULL: return null; default: throw new UnsupportedOperationException("Unsupported type: " + schema.getType()); } } }