Java UTF8 Convert To UTF8ToCodePoint(byte[] b, int s)

Here you can find the source of UTF8ToCodePoint(byte[] b, int s)

Description

UTF To Code Point

License

Apache License

Declaration

public static int UTF8ToCodePoint(byte[] b, int s) 

Method Source Code

//package com.java2s;
/**/*from   w  w w  .j a v a  2  s . c  o  m*/
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

public class Main {
    public static int UTF8ToCodePoint(byte[] b, int s) {
        if (b[s] >> 7 == 0) {
            // 1 byte
            return b[s];
        } else if ((b[s] & 0xe0) == 0xc0) { /*0xe0 = 0b1110000*/
            // 2 bytes
            return (b[s] & 0x1f) << 6 | /*0x3f = 0b00111111*/
                    (b[s + 1] & 0x3f);
        } else if ((b[s] & 0xf0) == 0xe0) {
            // 3bytes
            return (b[s] & 0xf) << 12 | (b[s + 1] & 0x3f) << 6 | (b[s + 2] & 0x3f);
        } else if ((b[s] & 0xf8) == 0xf0) {
            // 4bytes
            return (b[s] & 0x7) << 18 | (b[s + 1] & 0x3f) << 12 | (b[s + 2] & 0x3f) << 6 | (b[s + 3] & 0x3f);
        } else if ((b[s] & 0xfc) == 0xf8) {
            // 5bytes
            return (b[s] & 0x3) << 24 | (b[s + 1] & 0x3f) << 18 | (b[s + 2] & 0x3f) << 12 | (b[s + 3] & 0x3f) << 6
                    | (b[s + 4] & 0x3f);
        } else if ((b[s] & 0xfe) == 0xfc) {
            // 6bytes
            return (b[s] & 0x1) << 30 | (b[s + 1] & 0x3f) << 24 | (b[s + 2] & 0x3f) << 18 | (b[s + 3] & 0x3f) << 12
                    | (b[s + 4] & 0x3f) << 6 | (b[s + 5] & 0x3f);
        }
        return 0;
    }
}

Related

  1. getBytesUtf8(String str)
  2. getBytesUtf8(String string)
  3. getBytesUtf8(String string)
  4. getBytesUTF8(String string)
  5. getBytesUtf8(String string)
  6. utf8ToCodePoint(int b1, int b2, int b3, int b4)
  7. utf8Togb2312(String str)
  8. utf8ToString(byte[] src, int stPos, int utf8Len)
  9. utf8ToUnicode(String inStr)