Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.gora.util; import java.io.ByteArrayOutputStream; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.io.InputStream; import java.io.ObjectInput; import java.io.ObjectInputStream; import java.io.ObjectOutput; import java.io.ObjectOutputStream; import java.io.OutputStream; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; import org.apache.avro.Schema; import org.apache.avro.io.BinaryDecoder; import org.apache.avro.io.BinaryEncoder; import org.apache.avro.io.Decoder; import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; import org.apache.avro.specific.SpecificDatumReader; import org.apache.avro.specific.SpecificDatumWriter; import org.apache.avro.specific.SpecificRecord; import org.apache.avro.util.ByteBufferInputStream; import org.apache.avro.util.ByteBufferOutputStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.DataInputBuffer; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.DefaultStringifier; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.WritableUtils; import org.apache.hadoop.io.serializer.Deserializer; import org.apache.hadoop.io.serializer.SerializationFactory; import org.apache.hadoop.io.serializer.Serializer; /** * An utility class for I/O related functionality. */ public class IOUtils { public static final int BUFFER_SIZE = 8192; private static BinaryDecoder decoder; private static Configuration getOrCreateConf(Configuration conf) { return conf != null ? conf : new Configuration(); } public static Object readObject(DataInput in) throws ClassNotFoundException, IOException { if (in instanceof ObjectInput) { return ((ObjectInput) in).readObject(); } else { if (in instanceof InputStream) { ObjectInput objIn = new ObjectInputStream((InputStream) in); Object obj = objIn.readObject(); return obj; } } throw new IOException("cannot write to DataOutput of instance:" + in.getClass()); } public static void writeObject(DataOutput out, Object obj) throws IOException { if (out instanceof ObjectOutput) { ((ObjectOutput) out).writeObject(obj); } else { if (out instanceof OutputStream) { ObjectOutput objOut = new ObjectOutputStream((OutputStream) out); objOut.writeObject(obj); } } throw new IOException("cannot write to DataOutput of instance:" + out.getClass()); } /** Serializes the object to the given dataoutput using * available Hadoop serializations * @throws IOException */ public static <T> void serialize(Configuration conf, DataOutput out, T obj, Class<T> objClass) throws IOException { SerializationFactory serializationFactory = new SerializationFactory(getOrCreateConf(conf)); Serializer<T> serializer = serializationFactory.getSerializer(objClass); ByteBufferOutputStream os = new ByteBufferOutputStream(); try { serializer.open(os); serializer.serialize(obj); int length = 0; List<ByteBuffer> buffers = os.getBufferList(); for (ByteBuffer buffer : buffers) { length += buffer.limit() - buffer.arrayOffset(); } WritableUtils.writeVInt(out, length); for (ByteBuffer buffer : buffers) { byte[] arr = buffer.array(); out.write(arr, buffer.arrayOffset(), buffer.limit()); } } finally { if (serializer != null) serializer.close(); if (os != null) os.close(); } } /** Serializes the object to the given dataoutput using * available Hadoop serializations * @throws IOException */ @SuppressWarnings("unchecked") public static <T> void serialize(Configuration conf, DataOutput out, T obj) throws IOException { Text.writeString(out, obj.getClass().getName()); serialize(conf, out, obj, (Class<T>) obj.getClass()); } /** Serializes the object to the given dataoutput using * available Hadoop serializations*/ public static <T> byte[] serialize(Configuration conf, T obj) throws IOException { DataOutputBuffer buffer = new DataOutputBuffer(); serialize(conf, buffer, obj); return buffer.getData(); } /** * Serializes the field object using the datumWriter. */ public static <T extends SpecificRecord> void serialize(OutputStream os, SpecificDatumWriter<T> datumWriter, Schema schema, T object) throws IOException { BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(os, null); datumWriter.write(object, encoder); encoder.flush(); } /** * Serializes the field object using the datumWriter. */ public static <T> void serialize(OutputStream os, SpecificDatumWriter<T> datumWriter, Schema schema, T object) throws IOException { BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(os, null); datumWriter.write(object, encoder); encoder.flush(); } /** * Serializes the field object using the datumWriter. */ public static <T extends SpecificRecord> byte[] serialize(SpecificDatumWriter<T> datumWriter, Schema schema, T object) throws IOException { ByteArrayOutputStream os = new ByteArrayOutputStream(); serialize(os, datumWriter, schema, object); return os.toByteArray(); } /** * Serializes the field object using the datumWriter. */ public static <T> byte[] serialize(SpecificDatumWriter<T> datumWriter, Schema schema, T object) throws IOException { ByteArrayOutputStream os = new ByteArrayOutputStream(); serialize(os, datumWriter, schema, object); return os.toByteArray(); } /** Deserializes the object in the given datainput using * available Hadoop serializations. * @throws IOException * @throws ClassNotFoundException */ @SuppressWarnings("unchecked") public static <T> T deserialize(Configuration conf, DataInput in, T obj, String objClass) throws IOException, ClassNotFoundException { Class<T> c = (Class<T>) ClassLoadingUtils.loadClass(objClass); return deserialize(conf, in, obj, c); } /** Deserializes the object in the given datainput using * available Hadoop serializations. * @throws IOException */ public static <T> T deserialize(Configuration conf, DataInput in, T obj, Class<T> objClass) throws IOException { SerializationFactory serializationFactory = new SerializationFactory(getOrCreateConf(conf)); Deserializer<T> deserializer = serializationFactory.getDeserializer(objClass); int length = WritableUtils.readVInt(in); byte[] arr = new byte[length]; in.readFully(arr); List<ByteBuffer> list = new ArrayList<ByteBuffer>(); list.add(ByteBuffer.wrap(arr)); ByteBufferInputStream is = new ByteBufferInputStream(list); try { deserializer.open(is); T newObj = deserializer.deserialize(obj); return newObj; } finally { if (deserializer != null) deserializer.close(); if (is != null) is.close(); } } /** Deserializes the object in the given datainput using * available Hadoop serializations. * @throws IOException * @throws ClassNotFoundException */ @SuppressWarnings("unchecked") public static <T> T deserialize(Configuration conf, DataInput in, T obj) throws IOException, ClassNotFoundException { String clazz = Text.readString(in); Class<T> c = (Class<T>) ClassLoadingUtils.loadClass(clazz); return deserialize(conf, in, obj, c); } /** Deserializes the object in the given datainput using * available Hadoop serializations. * @throws IOException * @throws ClassNotFoundException */ public static <T> T deserialize(Configuration conf, byte[] in, T obj) throws IOException, ClassNotFoundException { DataInputBuffer buffer = new DataInputBuffer(); buffer.reset(in, in.length); return deserialize(conf, buffer, obj); } /** * Deserializes the field object using the datumReader. */ public static <K, T extends SpecificRecord> T deserialize(InputStream is, SpecificDatumReader<T> datumReader, Schema schema, T object) throws IOException { decoder = DecoderFactory.get().binaryDecoder(is, decoder); return (T) datumReader.read(object, decoder); } /** * Deserializes the field object using the datumReader. */ public static <K, T extends SpecificRecord> T deserialize(byte[] bytes, SpecificDatumReader<T> datumReader, Schema schema, T object) throws IOException { decoder = DecoderFactory.get().binaryDecoder(bytes, decoder); return (T) datumReader.read(object, decoder); } /** * Deserializes the field object using the datumReader. */ public static <K, T> T deserialize(byte[] bytes, SpecificDatumReader<T> datumReader, Schema schema, T object) throws IOException { decoder = DecoderFactory.get().binaryDecoder(bytes, decoder); return (T) datumReader.read(object, decoder); } /** * Writes a byte[] to the output, representing whether each given field is null * or not. A Vint and ceil( fields.length / 8 ) bytes are written to the output. * @param out the output to write to * @param fields the fields to check for null * @see #readNullFieldsInfo(DataInput) */ public static void writeNullFieldsInfo(DataOutput out, Object... fields) throws IOException { boolean[] isNull = new boolean[fields.length]; for (int i = 0; i < fields.length; i++) { isNull[i] = (fields[i] == null); } writeBoolArray(out, isNull); } /** * Reads the data written by {@link #writeNullFieldsInfo(DataOutput, Object...)} * and returns a boolean array representing whether each field is null or not. * @param in the input to read from * @return a boolean[] representing whether each field is null or not. */ public static boolean[] readNullFieldsInfo(DataInput in) throws IOException { return readBoolArray(in); } /** * Writes a boolean[] to the output. */ public static void writeBoolArray(DataOutput out, boolean[] boolArray) throws IOException { WritableUtils.writeVInt(out, boolArray.length); byte b = 0; int i = 0; for (i = 0; i < boolArray.length; i++) { if (i % 8 == 0 && i != 0) { out.writeByte(b); b = 0; } b >>= 1; if (boolArray[i]) b |= 0x80; else b &= 0x7F; } if (i % 8 != 0) { for (int j = 0; j < 8 - (i % 8); j++) { //shift for the remaining byte b >>= 1; b &= 0x7F; } } out.writeByte(b); } /** * Reads a boolean[] from input * @throws IOException */ public static boolean[] readBoolArray(DataInput in) throws IOException { int length = WritableUtils.readVInt(in); boolean[] arr = new boolean[length]; byte b = 0; for (int i = 0; i < length; i++) { if (i % 8 == 0) { b = in.readByte(); } arr[i] = (b & 0x01) > 0; b >>= 1; } return arr; } /** * Writes a boolean[] to the output. */ public static void writeBoolArray(Encoder out, boolean[] boolArray) throws IOException { out.writeInt(boolArray.length); int byteArrLength = (int) Math.ceil(boolArray.length / 8.0); byte b = 0; byte[] arr = new byte[byteArrLength]; int i = 0; int arrIndex = 0; for (i = 0; i < boolArray.length; i++) { if (i % 8 == 0 && i != 0) { arr[arrIndex++] = b; b = 0; } b >>= 1; if (boolArray[i]) b |= 0x80; else b &= 0x7F; } if (i % 8 != 0) { for (int j = 0; j < 8 - (i % 8); j++) { //shift for the remaining byte b >>= 1; b &= 0x7F; } } arr[arrIndex++] = b; out.writeFixed(arr); } /** * Reads a boolean[] from input * @throws IOException */ public static boolean[] readBoolArray(Decoder in) throws IOException { int length = in.readInt(); boolean[] boolArr = new boolean[length]; int byteArrLength = (int) Math.ceil(length / 8.0); byte[] byteArr = new byte[byteArrLength]; in.readFixed(byteArr); int arrIndex = 0; byte b = 0; for (int i = 0; i < length; i++) { if (i % 8 == 0) { b = byteArr[arrIndex++]; } boolArr[i] = (b & 0x01) > 0; b >>= 1; } return boolArr; } /** * Writes the String array to the given DataOutput. * @param out the data output to write to * @param arr the array to write * @see #readStringArray(DataInput) */ public static void writeStringArray(DataOutput out, String[] arr) throws IOException { WritableUtils.writeVInt(out, arr.length); for (String str : arr) { Text.writeString(out, str); } } /** * Reads and returns a String array that is written by * {@link #writeStringArray(DataOutput, String[])}. * @param in the data input to read from * @return read String[] */ public static String[] readStringArray(DataInput in) throws IOException { int len = WritableUtils.readVInt(in); String[] arr = new String[len]; for (int i = 0; i < len; i++) { arr[i] = Text.readString(in); } return arr; } /** * Stores the given object in the configuration under the given dataKey * @param obj the object to store * @param conf the configuration to store the object into * @param dataKey the key to store the data */ public static <T> void storeToConf(T obj, Configuration conf, String dataKey) throws IOException { String classKey = dataKey + "._class"; conf.set(classKey, obj.getClass().getName()); DefaultStringifier.store(conf, obj, dataKey); } /** * Loads the object stored by {@link #storeToConf(Object, Configuration, String)} * method from the configuration under the given dataKey. * @param conf the configuration to read from * @param dataKey the key to get the data from * @return the store object */ @SuppressWarnings("unchecked") public static <T> T loadFromConf(Configuration conf, String dataKey) throws IOException { String classKey = dataKey + "._class"; String className = conf.get(classKey); try { T obj = (T) DefaultStringifier.load(conf, dataKey, ClassLoadingUtils.loadClass(className)); return obj; } catch (Exception ex) { throw new IOException(ex); } } /** * Copies the contents of the buffers into a single byte[] */ //TODO: not tested public static byte[] getAsBytes(List<ByteBuffer> buffers) { //find total size int size = 0; for (ByteBuffer buffer : buffers) { size += buffer.remaining(); } byte[] arr = new byte[size]; int offset = 0; for (ByteBuffer buffer : buffers) { int len = buffer.remaining(); buffer.get(arr, offset, len); offset += len; } return arr; } /** * Reads until the end of the input stream, and returns the contents as a byte[] */ public static byte[] readFully(InputStream in) throws IOException { List<ByteBuffer> buffers = new ArrayList<ByteBuffer>(4); while (true) { ByteBuffer buffer = ByteBuffer.allocate(BUFFER_SIZE); int count = in.read(buffer.array(), 0, BUFFER_SIZE); if (count > 0) { buffer.limit(count); buffers.add(buffer); } if (count < BUFFER_SIZE) break; } return getAsBytes(buffers); } }