org.apache.gora.util.ByteUtils.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.gora.util.ByteUtils.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/**
 * Copyright 2009 The Apache Software Foundation
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.gora.util;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;

import org.apache.avro.Schema;
import org.apache.avro.Schema.Type;
import org.apache.avro.reflect.ReflectData;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificDatumWriter;
import org.apache.avro.specific.SpecificRecord;
import org.apache.avro.util.Utf8;
import org.apache.hadoop.io.WritableUtils;

//  This code is copied almost directly from HBase project's Bytes class.
/**
 * Utility class that handles byte arrays, conversions to/from other types.
 *
 */
public class ByteUtils {

    /**
     * Size of boolean in bytes
     */
    public static final int SIZEOF_BOOLEAN = Byte.SIZE / Byte.SIZE;

    /**
     * Size of byte in bytes
     */
    public static final int SIZEOF_BYTE = SIZEOF_BOOLEAN;

    /**
     * Size of char in bytes
     */
    public static final int SIZEOF_CHAR = Character.SIZE / Byte.SIZE;

    /**
     * Size of double in bytes
     */
    public static final int SIZEOF_DOUBLE = Double.SIZE / Byte.SIZE;

    /**
     * Size of float in bytes
     */
    public static final int SIZEOF_FLOAT = Float.SIZE / Byte.SIZE;

    /**
     * Size of int in bytes
     */
    public static final int SIZEOF_INT = Integer.SIZE / Byte.SIZE;

    /**
     * Size of long in bytes
     */
    public static final int SIZEOF_LONG = Long.SIZE / Byte.SIZE;

    /**
     * Size of short in bytes
     */
    public static final int SIZEOF_SHORT = Short.SIZE / Byte.SIZE;

    /**
     * Put bytes at the specified byte array position.
     * @param tgtBytes the byte array
     * @param tgtOffset position in the array
     * @param srcBytes byte to write out
     * @param srcOffset
     * @param srcLength
     * @return incremented offset
     */
    public static int putBytes(byte[] tgtBytes, int tgtOffset, byte[] srcBytes, int srcOffset, int srcLength) {
        System.arraycopy(srcBytes, srcOffset, tgtBytes, tgtOffset, srcLength);
        return tgtOffset + srcLength;
    }

    /**
     * Write a single byte out to the specified byte array position.
     * @param bytes the byte array
     * @param offset position in the array
     * @param b byte to write out
     * @return incremented offset
     */
    public static int putByte(byte[] bytes, int offset, byte b) {
        bytes[offset] = b;
        return offset + 1;
    }

    /**
     * Returns a new byte array, copied from the passed ByteBuffer.
     * @param bb A ByteBuffer
     * @return the byte array
     */
    public static byte[] toBytes(ByteBuffer bb) {
        int length = bb.limit();
        byte[] result = new byte[length];
        System.arraycopy(bb.array(), bb.arrayOffset(), result, 0, length);
        return result;
    }

    /**
     * @param b Presumed UTF-8 encoded byte array.
     * @return String made from <code>b</code>
     */
    public static String toString(final byte[] b) {
        if (b == null) {
            return null;
        }
        return toString(b, 0, b.length);
    }

    public static String toString(final byte[] b1, String sep, final byte[] b2) {
        return toString(b1, 0, b1.length) + sep + toString(b2, 0, b2.length);
    }

    /**
     * @param b Presumed UTF-8 encoded byte array.
     * @param off
     * @param len
     * @return String made from <code>b</code>
     */
    public static String toString(final byte[] b, int off, int len) {
        if (b == null) {
            return null;
        }
        if (len == 0) {
            return "";
        }
        String result = null;
        try {
            result = new String(b, off, len, "UTF-8");
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        return result;
    }

    /**
     * Converts a string to a UTF-8 byte array.
     * @param s
     * @return the byte array
     */
    public static byte[] toBytes(String s) {
        if (s == null) {
            throw new IllegalArgumentException("string cannot be null");
        }
        byte[] result = null;
        try {
            result = s.getBytes("UTF-8");
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        return result;
    }

    /**
     * Convert a boolean to a byte array.
     * @param b
     * @return <code>b</code> encoded in a byte array.
     */
    public static byte[] toBytes(final boolean b) {
        byte[] bb = new byte[1];
        bb[0] = b ? (byte) -1 : (byte) 0;
        return bb;
    }

    /**
     * @param b
     * @return True or false.
     */
    public static boolean toBoolean(final byte[] b) {
        if (b == null || b.length > 1) {
            throw new IllegalArgumentException("Array is wrong size");
        }
        return b[0] != (byte) 0;
    }

    /**
     * Convert a long value to a byte array
     * @param val
     * @return the byte array
     */
    public static byte[] toBytes(long val) {
        byte[] b = new byte[8];
        for (int i = 7; i > 0; i--) {
            b[i] = (byte) (val);
            val >>>= 8;
        }
        b[0] = (byte) (val);
        return b;
    }

    /**
     * Converts a byte array to a long value
     * @param bytes
     * @return the long value
     */
    public static long toLong(byte[] bytes) {
        return toLong(bytes, 0);
    }

    /**
     * Converts a byte array to a long value
     * @param bytes
     * @param offset
     * @return the long value
     */
    public static long toLong(byte[] bytes, int offset) {
        return toLong(bytes, offset, SIZEOF_LONG);
    }

    /**
     * Converts a byte array to a long value
     * @param bytes
     * @param offset
     * @param length
     * @return the long value
     */
    public static long toLong(byte[] bytes, int offset, final int length) {
        if (bytes == null || length != SIZEOF_LONG || (offset + length > bytes.length)) {
            return -1L;
        }
        long l = 0;
        for (int i = offset; i < (offset + length); i++) {
            l <<= 8;
            l ^= (long) bytes[i] & 0xFF;
        }
        return l;
    }

    /**
     * Presumes float encoded as IEEE 754 floating-point "single format"
     * @param bytes
     * @return Float made from passed byte array.
     */
    public static float toFloat(byte[] bytes) {
        return toFloat(bytes, 0);
    }

    /**
     * Presumes float encoded as IEEE 754 floating-point "single format"
     * @param bytes
     * @param offset
     * @return Float made from passed byte array.
     */
    public static float toFloat(byte[] bytes, int offset) {
        int i = toInt(bytes, offset);
        return Float.intBitsToFloat(i);
    }

    /**
     * @param f
     * @return the float represented as byte []
     */
    public static byte[] toBytes(final float f) {
        // Encode it as int
        int i = Float.floatToRawIntBits(f);
        return toBytes(i);
    }

    /**
     * @param bytes
     * @return Return double made from passed bytes.
     */
    public static double toDouble(final byte[] bytes) {
        return toDouble(bytes, 0);
    }

    /**
     * @param bytes
     * @param offset
     * @return Return double made from passed bytes.
     */
    public static double toDouble(final byte[] bytes, final int offset) {
        long l = toLong(bytes, offset);
        return Double.longBitsToDouble(l);
    }

    /**
     * @param d
     * @return the double represented as byte []
     */
    public static byte[] toBytes(final double d) {
        // Encode it as a long
        long l = Double.doubleToRawLongBits(d);
        return toBytes(l);
    }

    /**
     * Convert an int value to a byte array
     * @param val
     * @return the byte array
     */
    public static byte[] toBytes(int val) {
        byte[] b = new byte[4];
        for (int i = 3; i > 0; i--) {
            b[i] = (byte) (val);
            val >>>= 8;
        }
        b[0] = (byte) (val);
        return b;
    }

    /**
     * Converts a byte array to an int value
     * @param bytes
     * @return the int value
     */
    public static int toInt(byte[] bytes) {
        return toInt(bytes, 0);
    }

    /**
     * Converts a byte array to an int value
     * @param bytes
     * @param offset
     * @return the int value
     */
    public static int toInt(byte[] bytes, int offset) {
        return toInt(bytes, offset, SIZEOF_INT);
    }

    /**
     * Converts a byte array to an int value
     * @param bytes
     * @param offset
     * @param length
     * @return the int value
     */
    public static int toInt(byte[] bytes, int offset, final int length) {
        if (bytes == null || length != SIZEOF_INT || (offset + length > bytes.length)) {
            return -1;
        }
        int n = 0;
        for (int i = offset; i < (offset + length); i++) {
            n <<= 8;
            n ^= bytes[i] & 0xFF;
        }
        return n;
    }

    /**
     * Convert a short value to a byte array
     * @param val
     * @return the byte array
     */
    public static byte[] toBytes(short val) {
        byte[] b = new byte[SIZEOF_SHORT];
        b[1] = (byte) (val);
        val >>= 8;
        b[0] = (byte) (val);
        return b;
    }

    /**
     * Converts a byte array to a short value
     * @param bytes
     * @return the short value
     */
    public static short toShort(byte[] bytes) {
        return toShort(bytes, 0);
    }

    /**
     * Converts a byte array to a short value
     * @param bytes
     * @param offset
     * @return the short value
     */
    public static short toShort(byte[] bytes, int offset) {
        return toShort(bytes, offset, SIZEOF_SHORT);
    }

    /**
     * Converts a byte array to a short value
     * @param bytes
     * @param offset
     * @param length
     * @return the short value
     */
    public static short toShort(byte[] bytes, int offset, final int length) {
        if (bytes == null || length != SIZEOF_SHORT || (offset + length > bytes.length)) {
            return -1;
        }
        short n = 0;
        n ^= bytes[offset] & 0xFF;
        n <<= 8;
        n ^= bytes[offset + 1] & 0xFF;
        return n;
    }

    /**
     * Convert a char value to a byte array
     *
     * @param val
     * @return the byte array
     */
    public static byte[] toBytes(char val) {
        byte[] b = new byte[SIZEOF_CHAR];
        b[1] = (byte) (val);
        val >>= 8;
        b[0] = (byte) (val);
        return b;
    }

    /**
     * Converts a byte array to a char value
     *
     * @param bytes
     * @return the char value
     */
    public static char toChar(byte[] bytes) {
        return toChar(bytes, 0);
    }

    /**
     * Converts a byte array to a char value
     *
     * @param bytes
     * @param offset
     * @return the char value
     */
    public static char toChar(byte[] bytes, int offset) {
        return toChar(bytes, offset, SIZEOF_CHAR);
    }

    /**
     * Converts a byte array to a char value
     *
     * @param bytes
     * @param offset
     * @param length
     * @return the char value
     */
    public static char toChar(byte[] bytes, int offset, final int length) {
        if (bytes == null || length != SIZEOF_CHAR || (offset + length > bytes.length)) {
            return (char) -1;
        }
        char n = 0;
        n ^= bytes[offset] & 0xFF;
        n <<= 8;
        n ^= bytes[offset + 1] & 0xFF;
        return n;
    }

    /**
     * Converts a byte array to a char array value
     *
     * @param bytes
     * @return the char value
     */
    public static char[] toChars(byte[] bytes) {
        return toChars(bytes, 0, bytes.length);
    }

    /**
     * Converts a byte array to a char array value
     *
     * @param bytes
     * @param offset
     * @return the char value
     */
    public static char[] toChars(byte[] bytes, int offset) {
        return toChars(bytes, offset, bytes.length - offset);
    }

    /**
     * Converts a byte array to a char array value
     *
     * @param bytes
     * @param offset
     * @param length
     * @return the char value
     */
    public static char[] toChars(byte[] bytes, int offset, final int length) {
        int max = offset + length;
        if (bytes == null || (max > bytes.length) || length % 2 == 1) {
            return null;
        }

        char[] chars = new char[length / 2];
        for (int i = 0, j = offset; i < chars.length && j < max; i++, j += 2) {
            char c = 0;
            c ^= bytes[j] & 0xFF;
            c <<= 8;
            c ^= bytes[j + 1] & 0xFF;
            chars[i] = c;
        }
        return chars;
    }

    /**
     * @param vint Integer to make a vint of.
     * @return Vint as bytes array.
     */
    public static byte[] vintToBytes(final long vint) {
        long i = vint;
        int size = WritableUtils.getVIntSize(i);
        byte[] result = new byte[size];
        int offset = 0;
        if (i >= -112 && i <= 127) {
            result[offset] = ((byte) i);
            return result;
        }

        int len = -112;
        if (i < 0) {
            i ^= -1L; // take one's complement'
            len = -120;
        }

        long tmp = i;
        while (tmp != 0) {
            tmp = tmp >> 8;
            len--;
        }

        result[offset++] = (byte) len;

        len = (len < -120) ? -(len + 120) : -(len + 112);

        for (int idx = len; idx != 0; idx--) {
            int shiftbits = (idx - 1) * 8;
            long mask = 0xFFL << shiftbits;
            result[offset++] = (byte) ((i & mask) >> shiftbits);
        }
        return result;
    }

    /**
     * @param buffer
     * @return vint bytes as an integer.
     */
    public static long bytesToVlong(final byte[] buffer) {
        int offset = 0;
        byte firstByte = buffer[offset++];
        int len = WritableUtils.decodeVIntSize(firstByte);
        if (len == 1) {
            return firstByte;
        }
        long i = 0;
        for (int idx = 0; idx < len - 1; idx++) {
            byte b = buffer[offset++];
            i = i << 8;
            i = i | (b & 0xFF);
        }
        return (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1L) : i);
    }

    /**
     * @param buffer
     * @return vint bytes as an integer.
     */
    public static int bytesToVint(final byte[] buffer) {
        int offset = 0;
        byte firstByte = buffer[offset++];
        int len = WritableUtils.decodeVIntSize(firstByte);
        if (len == 1) {
            return firstByte;
        }
        long i = 0;
        for (int idx = 0; idx < len - 1; idx++) {
            byte b = buffer[offset++];
            i = i << 8;
            i = i | (b & 0xFF);
        }
        return (int) (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1L) : i);
    }

    /**
     * Reads a zero-compressed encoded long from input stream and returns it.
     * @param buffer Binary array
     * @param offset Offset into array at which vint begins.
     * @throws java.io.IOException
     * @return deserialized long from stream.
     */
    public static long readVLong(final byte[] buffer, final int offset) throws IOException {
        byte firstByte = buffer[offset];
        int len = WritableUtils.decodeVIntSize(firstByte);
        if (len == 1) {
            return firstByte;
        }
        long i = 0;
        for (int idx = 0; idx < len - 1; idx++) {
            byte b = buffer[offset + 1 + idx];
            i = i << 8;
            i = i | (b & 0xFF);
        }
        return (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1L) : i);
    }

    /**
     * @param left
     * @param right
     * @return 0 if equal, < 0 if left is less than right, etc.
     */
    public static int compareTo(final byte[] left, final byte[] right) {
        return compareTo(left, 0, left.length, right, 0, right.length);
    }

    /**
     * @param b1
     * @param b2
     * @param s1 Where to start comparing in the left buffer
     * @param s2 Where to start comparing in the right buffer
     * @param l1 How much to compare from the left buffer
     * @param l2 How much to compare from the right buffer
     * @return 0 if equal, < 0 if left is less than right, etc.
     */
    public static int compareTo(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
        // Bring WritableComparator code local
        int end1 = s1 + l1;
        int end2 = s2 + l2;
        for (int i = s1, j = s2; i < end1 && j < end2; i++, j++) {
            int a = (b1[i] & 0xff);
            int b = (b2[j] & 0xff);
            if (a != b) {
                return a - b;
            }
        }
        return l1 - l2;
    }

    /**
     * @param left
     * @param right
     * @return True if equal
     */
    public static boolean equals(final byte[] left, final byte[] right) {
        // Could use Arrays.equals?
        return left == null && right == null ? true
                : (left == null || right == null || (left.length != right.length)) ? false
                        : compareTo(left, right) == 0;
    }

    @SuppressWarnings("unchecked")
    public static <T> T fromBytes(byte[] val, Schema schema, SpecificDatumReader<T> datumReader, T object)
            throws IOException {
        Type type = schema.getType();
        switch (type) {
        case ENUM:
            String symbol = schema.getEnumSymbols().get(val[0]);
            return (T) Enum.valueOf(ReflectData.get().getClass(schema), symbol);
        case STRING:
            return (T) new Utf8(toString(val));
        case BYTES:
            return (T) ByteBuffer.wrap(val);
        case INT:
            return (T) Integer.valueOf(bytesToVint(val));
        case LONG:
            return (T) Long.valueOf(bytesToVlong(val));
        case FLOAT:
            return (T) Float.valueOf(toFloat(val));
        case DOUBLE:
            return (T) Double.valueOf(toDouble(val));
        case BOOLEAN:
            return (T) Boolean.valueOf(val[0] != 0);
        case RECORD: //fall
        case MAP:
        case ARRAY:
            return (T) IOUtils.deserialize(val, (SpecificDatumReader<SpecificRecord>) datumReader, schema,
                    (SpecificRecord) object);
        default:
            throw new RuntimeException("Unknown type: " + type);
        }
    }

    @SuppressWarnings("unchecked")
    public static <T> byte[] toBytes(T o, Schema schema, SpecificDatumWriter<T> datumWriter) throws IOException {
        Type type = schema.getType();
        switch (type) {
        case STRING:
            return toBytes(((Utf8) o).toString()); // TODO: maybe ((Utf8)o).getBytes(); ?
        case BYTES:
            return ((ByteBuffer) o).array();
        case INT:
            return vintToBytes((Integer) o);
        case LONG:
            return vintToBytes((Long) o);
        case FLOAT:
            return toBytes((Float) o);
        case DOUBLE:
            return toBytes((Double) o);
        case BOOLEAN:
            return (Boolean) o ? new byte[] { 1 } : new byte[] { 0 };
        case ENUM:
            return new byte[] { (byte) ((Enum<?>) o).ordinal() };
        case RECORD: //fall
        case MAP:
        case ARRAY:
            return IOUtils.serialize((SpecificDatumWriter<SpecificRecord>) datumWriter, schema, (SpecificRecord) o);
        default:
            throw new RuntimeException("Unknown type: " + type);
        }
    }
}