Example usage for java.nio.charset MalformedInputException MalformedInputException

List of usage examples for java.nio.charset MalformedInputException MalformedInputException

Introduction

In this page you can find the example usage for java.nio.charset MalformedInputException MalformedInputException.

Prototype

public MalformedInputException(int inputLength) 

Source Link

Document

Constructs an MalformedInputException with the given length.

Usage

From source file:org.apache.arrow.vector.util.Text.java

/**
 * Check to see if a byte array is valid utf-8
 *
 * @param utf8/*from   www.j a v a 2s  . c  o m*/
 *          the array of bytes
 * @param start
 *          the offset of the first byte in the array
 * @param len
 *          the length of the byte sequence
 * @throws MalformedInputException
 *           if the byte array contains invalid bytes
 */
public static void validateUTF8(byte[] utf8, int start, int len) throws MalformedInputException {
    int count = start;
    int leadByte = 0;
    int length = 0;
    int state = LEAD_BYTE;
    while (count < start + len) {
        int aByte = utf8[count] & 0xFF;

        switch (state) {
        case LEAD_BYTE:
            leadByte = aByte;
            length = bytesFromUTF8[aByte];

            switch (length) {
            case 0: // check for ASCII
                if (leadByte > 0x7F) {
                    throw new MalformedInputException(count);
                }
                break;
            case 1:
                if (leadByte < 0xC2 || leadByte > 0xDF) {
                    throw new MalformedInputException(count);
                }
                state = TRAIL_BYTE_1;
                break;
            case 2:
                if (leadByte < 0xE0 || leadByte > 0xEF) {
                    throw new MalformedInputException(count);
                }
                state = TRAIL_BYTE_1;
                break;
            case 3:
                if (leadByte < 0xF0 || leadByte > 0xF4) {
                    throw new MalformedInputException(count);
                }
                state = TRAIL_BYTE_1;
                break;
            default:
                // too long! Longest valid UTF-8 is 4 bytes (lead + three)
                // or if < 0 we got a trail byte in the lead byte position
                throw new MalformedInputException(count);
            } // switch (length)
            break;

        case TRAIL_BYTE_1:
            if (leadByte == 0xF0 && aByte < 0x90) {
                throw new MalformedInputException(count);
            }
            if (leadByte == 0xF4 && aByte > 0x8F) {
                throw new MalformedInputException(count);
            }
            if (leadByte == 0xE0 && aByte < 0xA0) {
                throw new MalformedInputException(count);
            }
            if (leadByte == 0xED && aByte > 0x9F) {
                throw new MalformedInputException(count);
            }
            // falls through to regular trail-byte test!!
        case TRAIL_BYTE:
            if (aByte < 0x80 || aByte > 0xBF) {
                throw new MalformedInputException(count);
            }
            if (--length == 0) {
                state = LEAD_BYTE;
            } else {
                state = TRAIL_BYTE;
            }
            break;
        default:
            break;
        } // switch (state)
        count++;
    }
}

From source file:io.Text.java

/**
 * Check to see if a byte array is valid utf-8
 * @param utf8 the array of bytes//from w w  w.j  av a2  s  .  c om
 * @param start the offset of the first byte in the array
 * @param len the length of the byte sequence
 * @throws MalformedInputException if the byte array contains invalid bytes
 */
public static void validateUTF8(byte[] utf8, int start, int len) throws MalformedInputException {
    int count = start;
    int leadByte = 0;
    int length = 0;
    int state = LEAD_BYTE;
    while (count < start + len) {
        int aByte = ((int) utf8[count] & 0xFF);

        switch (state) {
        case LEAD_BYTE:
            leadByte = aByte;
            length = bytesFromUTF8[aByte];

            switch (length) {
            case 0: // check for ASCII
                if (leadByte > 0x7F)
                    throw new MalformedInputException(count);
                break;
            case 1:
                if (leadByte < 0xC2 || leadByte > 0xDF)
                    throw new MalformedInputException(count);
                state = TRAIL_BYTE_1;
                break;
            case 2:
                if (leadByte < 0xE0 || leadByte > 0xEF)
                    throw new MalformedInputException(count);
                state = TRAIL_BYTE_1;
                break;
            case 3:
                if (leadByte < 0xF0 || leadByte > 0xF4)
                    throw new MalformedInputException(count);
                state = TRAIL_BYTE_1;
                break;
            default:
                // too long! Longest valid UTF-8 is 4 bytes (lead + three)
                // or if < 0 we got a trail byte in the lead byte position
                throw new MalformedInputException(count);
            } // switch (length)
            break;

        case TRAIL_BYTE_1:
            if (leadByte == 0xF0 && aByte < 0x90)
                throw new MalformedInputException(count);
            if (leadByte == 0xF4 && aByte > 0x8F)
                throw new MalformedInputException(count);
            if (leadByte == 0xE0 && aByte < 0xA0)
                throw new MalformedInputException(count);
            if (leadByte == 0xED && aByte > 0x9F)
                throw new MalformedInputException(count);
            // falls through to regular trail-byte test!!
        case TRAIL_BYTE:
            if (aByte < 0x80 || aByte > 0xBF)
                throw new MalformedInputException(count);
            if (--length == 0) {
                state = LEAD_BYTE;
            } else {
                state = TRAIL_BYTE;
            }
            break;
        } // switch (state)
        count++;
    }
}