Android UTF8 Encode bytes2StringUTF8(byte[] buf, int bufOffset, int bufLength, boolean bigEndian)

Here you can find the source of bytes2StringUTF8(byte[] buf, int bufOffset, int bufLength, boolean bigEndian)

Description

format the byte[] to String in UTF-8 encode

Parameter

Parameter Description
buf a parameter
bigEndian a parameter

Declaration

public static String bytes2StringUTF8(byte[] buf, int bufOffset,
        int bufLength, boolean bigEndian) 

Method Source Code

//package com.java2s;

public class Main {
    /**/*  www  .  j a  va2 s .co  m*/
     * format the byte[] to String in UTF-8 encode
     * @param buf
     * @param bigEndian
     * @return
     */
    public static String bytes2StringUTF8(byte[] buf, int bufOffset,
            int bufLength, boolean bigEndian) {
        int len = bytesUTF8len(buf, bufOffset, bufLength);
        char[] cbuf = new char[len];

        len = bytes2charsUTF8(buf, bufOffset, bufLength, cbuf, bigEndian);
        String str = new String(cbuf, 0, len);
        cbuf = null;
        return str;
    }

    public static String bytes2StringUTF8(byte[] buf) {
        return bytes2StringUTF8(buf, 0, buf.length, true);
    }

    /**
     * get the length of the bytes in UTF-8
     * rules: 0xxxxxxx or 11xxxxxx is the first the byte of the 
     * @param buf
     * @return
     */
    private static int bytesUTF8len(byte[] buf, int bufOffset, int bufLength) {
        int len = 0;
        for (int i = bufOffset; i < (bufOffset + bufLength); i++) {
            if (((buf[i]) & 0x80) == 0x00 || ((buf[i]) & 0xc0) == 0xc0) {
                len++;
            }
        }
        return len;
    }

    /**
     * encode the byte[] to char[] in UTF-8
     * @param buf
     * @param cbuf
     * @return      the cbuf valid length
     */
    private static int bytes2charsUTF8(byte[] buf, int bufOffset,
            int bufLength, char[] cbuf, boolean bigEndian) {
        int cpos = 0, pos = bufOffset;
        byte b1, b2;
        b1 = 0;
        b2 = 0;
        while (pos < (bufOffset + bufLength)) {
            if ((buf[pos] & 0x80) == 0x00) {
                //U-00000000 - U-0000007F:  0xxxxxxx
                b1 = 0;
                b2 = buf[pos];
                pos++;
            } else if ((buf[pos] & 0xe0) == 0xc0) {
                //U-00000080 - U-000007FF:  110xxxxx 10xxxxxx
                if ((buf[pos + 1] & 0x80) == 0x80) {
                    b1 = (byte) (((buf[pos] & 0x1f) >> 2) & 0xff);
                    b2 = (byte) (((buf[pos] & 0x03) << 6) | (buf[pos + 1] & 0x3f) & 0xff);
                    pos += 2;
                } else {
                    /* invalid format, use ? instead 
                     * -- 2006-3-29 13:55:32 */
                    b1 = 0x00;
                    b2 = 0x3f;
                    pos += 1;
                }
            } else if ((buf[pos] & 0xf0) == 0xe0) { //U-00000800 - U-0000FFFF:  1110xxxx 10xxxxxx 10xxxxxx
                if (((buf[pos + 1] & 0x80) == 0x80)
                        && ((buf[pos + 2] & 0x80) == 0x80)) {
                    b1 = (byte) ((((buf[pos] & 0x0f) << 4) | ((buf[pos + 1] & 0x3f) >> 2)) & 0xff);
                    b2 = (byte) (((buf[pos + 1] & 0x03) << 6) | (buf[pos + 2] & 0x3f) & 0xff);
                    pos += 3;
                } else if ((buf[pos + 1] & 0x80) == 0x80) {
                    /* invalid format, use ? instead 
                     * -- 2006-3-29 13:55:32 */
                    b1 = 0x00;
                    b2 = 0x3f;
                    pos += 2;
                } else {
                    /* invalid format, use ? instead 
                     * -- 2006-3-29 13:55:32 */
                    b1 = 0x00;
                    b2 = 0x3f;
                    pos += 1;
                }
            } else {
                b1 = 0;
                b2 = 0;
                pos++;
                continue;
            }
            if (bigEndian) {
                cbuf[cpos] = (char) (((b1 & 0xff) << 8 | (b2 & 0xff)) & 0xffff);
            } else {
                cbuf[cpos] = (char) (((b2 & 0xff) << 8 | (b1 & 0xff)) & 0xffff);
            }
            cpos++;
        }
        return cpos;
    }
}

Related

  1. utf8Encode(String str)
  2. utf8Encode(String str, String defultReturn)
  3. toUtf8(String str)
  4. bytesUtf8(int c)
  5. bytes2StringUTF8(byte[] buf)
  6. bytes2charsUTF8(byte[] buf, int bufOffset, int bufLength, char[] cbuf, boolean bigEndian)
  7. bytesUTF8len(byte[] buf, int bufOffset, int bufLength)
  8. char2ByteUTF8(String input, int inOff, int inEnd, byte[] output, int outOff, int outEnd, boolean getLengthFlag)
  9. getStringInUtf8(final String str)