Java UTF8 GetUtf8Bytes(String str, boolean replace)

Here you can find the source of GetUtf8Bytes(String str, boolean replace)

Description

Encodes a string in UTF-8 as a byte array.

License

Creative Commons License

Parameter

Parameter Description
str A text string.
replace If true, replaces unpaired surrogate code points with the replacement character (U + FFFD). If false, stops processing when an unpaired surrogate code point is seen.

Exception

Parameter Description
NullPointerException The parameter str is null.
IllegalArgumentException The string contains an unpaired surrogate codepoint and replace is false, or an internal error occurred.

Return

The string encoded in UTF-8.

Declaration

public static byte[] GetUtf8Bytes(String str, boolean replace) 

Method Source Code


//package com.java2s;
import java.io.*;

public class Main {
    private static final int StreamedStringBufferLength = 4096;

    /**//from w  w w  . j  a  v a 2s.  c  o  m
     * Encodes a string in UTF-8 as a byte array.
     * @param str A text string.
     * @param replace If true, replaces unpaired surrogate code points with the
     * replacement character (U + FFFD). If false, stops processing when an
     * unpaired surrogate code point is seen.
     * @return The string encoded in UTF-8.
     * @throws NullPointerException The parameter {@code str} is null.
     * @throws IllegalArgumentException The string contains an unpaired surrogate code
     * point and {@code replace} is false, or an internal error occurred.
     */
    public static byte[] GetUtf8Bytes(String str, boolean replace) {
        return GetUtf8Bytes(str, replace, false);
    }

    /**
     * Encodes a string in UTF-8 as a byte array.
     * @param str A text string.
     * @param replace If true, replaces unpaired surrogate code points with the
     * replacement character (U + FFFD). If false, stops processing when an
     * unpaired surrogate code point is seen.
     * @param lenientLineBreaks A Boolean object.
     * @return The string encoded in UTF-8.
     * @throws NullPointerException The parameter {@code str} is null.
     * @throws IllegalArgumentException The string contains an unpaired surrogate code
     * point and {@code replace} is false, or an internal error occurred.
     */
    public static byte[] GetUtf8Bytes(String str, boolean replace, boolean lenientLineBreaks) {
        if (str == null) {
            throw new NullPointerException("str");
        }
        try {
            java.io.ByteArrayOutputStream ms = null;
            try {
                ms = new java.io.ByteArrayOutputStream();

                if (WriteUtf8(str, ms, replace) != 0) {
                    throw new IllegalArgumentException("Unpaired surrogate code point");
                }
                return ms.toByteArray();
            } finally {
                try {
                    if (ms != null)
                        ms.close();
                } catch (java.io.IOException ex) {
                }
            }
        } catch (IOException ex) {
            throw new IllegalArgumentException("I/O error occurred", ex);
        }
    }

    /**
     * Writes a portion of a string in UTF-8 encoding to a data stream.
     * @param str A string to write.
     * @param offset The zero-based index where the string portion to write begins.
     * @param length The length of the string portion to write.
     * @param stream A writable data stream.
     * @param replace If true, replaces unpaired surrogate code points with the
     * replacement character (U + FFFD). If false, stops processing when an
     * unpaired surrogate code point is seen.
     * @return 0 if the entire string portion was written; or -1 if the string
     * portion contains an unpaired surrogate code point and {@code replace}
     * is false.
     * @throws NullPointerException The parameter {@code str} is null or {@code
     * stream} is null.
     * @throws IllegalArgumentException The parameter {@code offset} is less than 0,
     * {@code length} is less than 0, or {@code offset} plus {@code length}
     * is greater than the string's length.
     * @throws java.io.IOException An I/O error occurred.
     */
    public static int WriteUtf8(String str, int offset, int length, OutputStream stream, boolean replace)
            throws java.io.IOException {
        return WriteUtf8(str, offset, length, stream, replace, false);
    }

    /**
     * Writes a portion of a string in UTF-8 encoding to a data stream.
     * @param str A string to write.
     * @param offset The zero-based index where the string portion to write begins.
     * @param length The length of the string portion to write.
     * @param stream A writable data stream.
     * @param replace If true, replaces unpaired surrogate code points with the
     * replacement character (U + FFFD). If false, stops processing when an
     * unpaired surrogate code point is seen.
     * @param lenientLineBreaks If true, replaces carriage return (CR) not followed
     * by line feed (LF) and LF not preceded by CR with CR-LF pairs.
     * @return 0 if the entire string portion was written; or -1 if the string
     * portion contains an unpaired surrogate code point and {@code replace}
     * is false.
     * @throws NullPointerException The parameter {@code str} is null or {@code
     * stream} is null.
     * @throws IllegalArgumentException The parameter {@code offset} is less than 0,
     * {@code length} is less than 0, or {@code offset} plus {@code length}
     * is greater than the string's length.
     * @throws java.io.IOException An I/O error occurred.
     */
    public static int WriteUtf8(String str, int offset, int length, OutputStream stream, boolean replace,
            boolean lenientLineBreaks) throws java.io.IOException {
        if (stream == null) {
            throw new NullPointerException("stream");
        }
        if (str == null) {
            throw new NullPointerException("str");
        }
        if (offset < 0) {
            throw new IllegalArgumentException("offset (" + offset + ") is less than " + "0");
        }
        if (offset > str.length()) {
            throw new IllegalArgumentException("offset (" + offset + ") is more than " + str.length());
        }
        if (length < 0) {
            throw new IllegalArgumentException("length (" + length + ") is less than " + "0");
        }
        if (length > str.length()) {
            throw new IllegalArgumentException("length (" + length + ") is more than " + str.length());
        }
        if (str.length() - offset < length) {
            throw new IllegalArgumentException(
                    "str.length() minus offset (" + (str.length() - offset) + ") is less than " + length);
        }
        byte[] bytes;
        int retval = 0;
        bytes = new byte[StreamedStringBufferLength];
        int byteIndex = 0;
        int endIndex = offset + length;
        for (int index = offset; index < endIndex; ++index) {
            int c = str.charAt(index);
            if (c <= 0x7f) {
                if (lenientLineBreaks) {
                    if (c == 0x0d && (index + 1 >= endIndex || str.charAt(index + 1) != 0x0a)) {
                        // bare CR, convert to CRLF
                        if (byteIndex + 2 > StreamedStringBufferLength) {
                            // Write bytes retrieved so far
                            stream.write(bytes, 0, byteIndex);
                            byteIndex = 0;
                        }
                        bytes[byteIndex++] = 0x0d;
                        bytes[byteIndex++] = 0x0a;
                        continue;
                    } else if (c == 0x0d) {
                        // CR-LF pair
                        if (byteIndex + 2 > StreamedStringBufferLength) {
                            // Write bytes retrieved so far
                            stream.write(bytes, 0, byteIndex);
                            byteIndex = 0;
                        }
                        bytes[byteIndex++] = 0x0d;
                        bytes[byteIndex++] = 0x0a;
                        ++index;
                        continue;
                    }
                    if (c == 0x0a) {
                        // bare LF, convert to CRLF
                        if (byteIndex + 2 > StreamedStringBufferLength) {
                            // Write bytes retrieved so far
                            stream.write(bytes, 0, byteIndex);
                            byteIndex = 0;
                        }
                        bytes[byteIndex++] = 0x0d;
                        bytes[byteIndex++] = 0x0a;
                        continue;
                    }
                }
                if (byteIndex >= StreamedStringBufferLength) {
                    // Write bytes retrieved so far
                    stream.write(bytes, 0, byteIndex);
                    byteIndex = 0;
                }
                bytes[byteIndex++] = (byte) c;
            } else if (c <= 0x7ff) {
                if (byteIndex + 2 > StreamedStringBufferLength) {
                    // Write bytes retrieved so far
                    stream.write(bytes, 0, byteIndex);
                    byteIndex = 0;
                }
                bytes[byteIndex++] = (byte) (0xc0 | ((c >> 6) & 0x1f));
                bytes[byteIndex++] = (byte) (0x80 | (c & 0x3f));
            } else {
                if ((c & 0xfc00) == 0xd800 && index + 1 < endIndex && str.charAt(index + 1) >= 0xdc00
                        && str.charAt(index + 1) <= 0xdfff) {
                    // Get the Unicode code point for the surrogate pair
                    c = 0x10000 + ((c - 0xd800) << 10) + (str.charAt(index + 1) - 0xdc00);
                    ++index;
                } else if ((c & 0xf800) == 0xd800) {
                    // unpaired surrogate
                    if (!replace) {
                        retval = -1;
                        break; // write bytes read so far
                    }
                    c = 0xfffd;
                }
                if (c <= 0xffff) {
                    if (byteIndex + 3 > StreamedStringBufferLength) {
                        // Write bytes retrieved so far
                        stream.write(bytes, 0, byteIndex);
                        byteIndex = 0;
                    }
                    bytes[byteIndex++] = (byte) (0xe0 | ((c >> 12) & 0x0f));
                    bytes[byteIndex++] = (byte) (0x80 | ((c >> 6) & 0x3f));
                    bytes[byteIndex++] = (byte) (0x80 | (c & 0x3f));
                } else {
                    if (byteIndex + 4 > StreamedStringBufferLength) {
                        // Write bytes retrieved so far
                        stream.write(bytes, 0, byteIndex);
                        byteIndex = 0;
                    }
                    bytes[byteIndex++] = (byte) (0xf0 | ((c >> 18) & 0x07));
                    bytes[byteIndex++] = (byte) (0x80 | ((c >> 12) & 0x3f));
                    bytes[byteIndex++] = (byte) (0x80 | ((c >> 6) & 0x3f));
                    bytes[byteIndex++] = (byte) (0x80 | (c & 0x3f));
                }
            }
        }
        stream.write(bytes, 0, byteIndex);
        return retval;
    }

    /**
     * Writes a string in UTF-8 encoding to a data stream.
     * @param str A string to write.
     * @param stream A writable data stream.
     * @param replace If true, replaces unpaired surrogate code points with the
     * replacement character (U + FFFD). If false, stops processing when an
     * unpaired surrogate code point is seen.
     * @return 0 if the entire string was written; or -1 if the string contains an
     * unpaired surrogate code point and {@code replace} is false.
     * @throws NullPointerException The parameter {@code str} is null or {@code
     * stream} is null.
     * @throws java.io.IOException An I/O error occurred.
     */
    public static int WriteUtf8(String str, OutputStream stream, boolean replace) throws java.io.IOException {
        if (str == null) {
            throw new NullPointerException("str");
        }
        return WriteUtf8(str, 0, str.length(), stream, replace);
    }
}

Related

  1. getUTF8()
  2. getUtf8()
  3. getUTF8(byte[] data, int offset, int length)
  4. getUTF8Bytes(String s)
  5. getUtf8Bytes(String s)
  6. getUTF8Bytes(String string)
  7. getUTF8BytesFromString(String str)
  8. getUtf8Decoder()
  9. getUtf8OrDefault()