Android UTF8 to String Convert utf8BytesToString(byte[] bytes, int start, int length)

Here you can find the source of utf8BytesToString(byte[] bytes, int start, int length)

Description

Converts an array of UTF-8 bytes into a string.

License

Apache License

Parameter

Parameter Description
bytes non-null; the bytes to convert
start the start index of the utf8 string to convert
length the length of the utf8 string to convert, not including any null-terminator that might be present

Return

non-null; the converted string

Declaration

public static String utf8BytesToString(byte[] bytes, int start,
        int length) 

Method Source Code

/*//from www. ja  v  a 2 s  . c om
 * Copyright (C) 2007 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

public class Main{
    private static final ThreadLocal<char[]> localBuffer = new ThreadLocal<char[]>() {
        @Override
        protected char[] initialValue() {
            // A reasonably sized initial value
            return new char[256];
        }
    };
    /**
     * Converts an array of UTF-8 bytes into a string.
     *
     * @param bytes non-null; the bytes to convert
     * @param start the start index of the utf8 string to convert
     * @param length the length of the utf8 string to convert, not including any null-terminator that might be present
     * @return non-null; the converted string
     */
    public static String utf8BytesToString(byte[] bytes, int start,
            int length) {
        char[] chars = localBuffer.get();
        if (chars == null || chars.length < length) {
            chars = new char[length];
            localBuffer.set(chars);
        }
        int outAt = 0;

        for (int at = start; length > 0; /*at*/) {
            int v0 = bytes[at] & 0xFF;
            char out;
            switch (v0 >> 4) {
            case 0x00:
            case 0x01:
            case 0x02:
            case 0x03:
            case 0x04:
            case 0x05:
            case 0x06:
            case 0x07: {
                // 0XXXXXXX -- single-byte encoding
                length--;
                if (v0 == 0) {
                    // A single zero byte is illegal.
                    return throwBadUtf8(v0, at);
                }
                out = (char) v0;
                at++;
                break;
            }
            case 0x0c:
            case 0x0d: {
                // 110XXXXX -- two-byte encoding
                length -= 2;
                if (length < 0) {
                    return throwBadUtf8(v0, at);
                }
                int v1 = bytes[at + 1] & 0xFF;
                if ((v1 & 0xc0) != 0x80) {
                    return throwBadUtf8(v1, at + 1);
                }
                int value = ((v0 & 0x1f) << 6) | (v1 & 0x3f);
                if ((value != 0) && (value < 0x80)) {
                    /*
                     * This should have been represented with
                     * one-byte encoding.
                     */
                    return throwBadUtf8(v1, at + 1);
                }
                out = (char) value;
                at += 2;
                break;
            }
            case 0x0e: {
                // 1110XXXX -- three-byte encoding
                length -= 3;
                if (length < 0) {
                    return throwBadUtf8(v0, at);
                }
                int v1 = bytes[at + 1] & 0xFF;
                if ((v1 & 0xc0) != 0x80) {
                    return throwBadUtf8(v1, at + 1);
                }
                int v2 = bytes[at + 2] & 0xFF;
                if ((v2 & 0xc0) != 0x80) {
                    return throwBadUtf8(v2, at + 2);
                }
                int value = ((v0 & 0x0f) << 12) | ((v1 & 0x3f) << 6)
                        | (v2 & 0x3f);
                if (value < 0x800) {
                    /*
                     * This should have been represented with one- or
                     * two-byte encoding.
                     */
                    return throwBadUtf8(v2, at + 2);
                }
                out = (char) value;
                at += 3;
                break;
            }
            default: {
                // 10XXXXXX, 1111XXXX -- illegal
                return throwBadUtf8(v0, at);
            }
            }
            chars[outAt] = out;
            outAt++;
        }

        return new String(chars, 0, outAt);
    }
    /**
     * Helper for {@link #utf8BytesToString}, which throws the right
     * exception for a bogus utf-8 byte.
     *
     * @param value the byte value
     * @param offset the file offset
     * @return never
     * @throws IllegalArgumentException always thrown
     */
    private static String throwBadUtf8(int value, int offset) {
        throw new IllegalArgumentException("bad utf-8 byte "
                + Hex.u1(value) + " at offset " + Hex.u4(offset));
    }
}

Related

  1. utf8BytesWithUtf16LengthToString( @Nonnull byte[] bytes, int start, int utf16Length)
  2. utf8BytesWithUtf16LengthToString( @Nonnull byte[] bytes, int start, int utf16Length, @Nullable int[] readLength)
  3. utf8decoder(byte[] data, int offset, int length)