Java Byte Array to String by Charset charSetForBOM(byte[] buffer, int offset)

Here you can find the source of charSetForBOM(byte[] buffer, int offset)

Description

Determine UTF character set via byte order mark

License

Open Source License

Parameter

Parameter Description
buffer a parameter

Return

UTF_16BE,UTF_16LE or UTF_8 if BOM is found, else nothing

Declaration

public static Optional<Charset> charSetForBOM(byte[] buffer, int offset) 

Method Source Code

//package com.java2s;
/**//from w  w w.j a v a  2  s.  c o  m
 *    Copyright 2015 Sven Ewald
 *
 *    This file is part of JSONBeam.
 *
 *    JSONBeam is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, any
 *    later version.
 *
 *    JSONBeam is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with JSONBeam.  If not, see <http://www.gnu.org/licenses/>.
 */

import java.nio.charset.Charset;

import java.nio.charset.StandardCharsets;
import java.util.Optional;

public class Main {
    /**
     * Determine UTF character set via byte order mark
     * 
     * @param buffer
     * @return UTF_16BE,UTF_16LE or UTF_8 if BOM is found, else nothing
     */
    public static Optional<Charset> charSetForBOM(byte[] buffer, int offset) {
        if (buffer.length < 2 + offset) {
            return Optional.empty();
        }
        int o = +offset;
        if ((buffer[o] == (byte) 0xFE) && (buffer[o + 1] == (byte) 0xFF)) {
            return Optional.of(StandardCharsets.UTF_16BE);
        }
        if ((buffer[o] == (byte) 0xFF) && (buffer[o + 1] == (byte) 0xFE)) {
            return Optional.of(StandardCharsets.UTF_16LE);
        }
        if (buffer.length < 3 + offset) {
            return Optional.empty();
        }
        if ((buffer[o] == (byte) 0xEF) && (buffer[o + 1] == (byte) 0xBB) && (buffer[o + 2] == (byte) 0xBF)) {
            return Optional.of(StandardCharsets.UTF_8);
        }
        return Optional.empty();
    }
}

Related

  1. bytesToString(byte[] bytes, Charset charset)
  2. byteToString(byte[] b, String charset)
  3. byteToString(Charset charset, byte[] data)
  4. changeEncoding(byte[] byteArray, Charset charsetFrom, Charset charsetTo)
  5. charSequence2Bytes(CharSequence sb, Charset charset)
  6. convertCharset(byte[] content, Charset fromCharset, Charset toCharset)
  7. convertToBytes(final String str, final Charset charset)
  8. convertToChars(final byte[] bytes, final Charset charset)
  9. convertToString(byte[] bytes, Charset charset)