Java Text File Read by Charset createBOMStrippedReader(InputStream stream, String defaultCharset)

Here you can find the source of createBOMStrippedReader(InputStream stream, String defaultCharset)

Description

Create BOM stripped reader from the stream.

License

Open Source License

Parameter

Parameter Description
stream input stream
defaultCharset default charset

Exception

Parameter Description
IOException if I/O exception occurred

Return

reader for the stream without BOM

Declaration

public static Reader createBOMStrippedReader(InputStream stream, String defaultCharset) throws IOException 

Method Source Code


//package com.java2s;
/*/*from  w w w .j  ava 2s .c  o m*/
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * See LICENSE.txt included in this distribution for the specific
 * language governing permissions and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at LICENSE.txt.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

import java.io.BufferedInputStream;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;

public class Main {
    /**
     * Create BOM stripped reader from the stream.
     * Charset of the reader is set to UTF-8, UTF-16 or system's default.
     * @param stream input stream
     * @return reader for the stream without BOM
     * @throws IOException if I/O exception occurred
     */
    public static Reader createBOMStrippedReader(InputStream stream) throws IOException {
        return createBOMStrippedReader(stream, Charset.defaultCharset().name());
    }

    /**
     * Create BOM stripped reader from the stream.
     * Charset of the reader is set to UTF-8, UTF-16 or default.
     * @param stream input stream
     * @param defaultCharset default charset
     * @return reader for the stream without BOM
     * @throws IOException if I/O exception occurred
     */
    public static Reader createBOMStrippedReader(InputStream stream, String defaultCharset) throws IOException {
        InputStream in = stream.markSupported() ? stream : new BufferedInputStream(stream);

        String charset = null;

        in.mark(3);

        byte[] head = new byte[3];
        int br = in.read(head, 0, 3);

        if (br >= 2 && (head[0] == (byte) 0xFE && head[1] == (byte) 0xFF)
                || (head[0] == (byte) 0xFF && head[1] == (byte) 0xFE)) {
            charset = "UTF-16";
            in.reset();
        } else if (br >= 3 && head[0] == (byte) 0xEF && head[1] == (byte) 0xBB && head[2] == (byte) 0xBF) {
            // InputStreamReader does not properly discard BOM on UTF8 streams,
            // so don't reset the stream.
            charset = StandardCharsets.UTF_8.name();
        }

        if (charset == null) {
            in.reset();
            charset = defaultCharset;
        }

        return new InputStreamReader(in, charset);
    }
}

Related

  1. asReader(InputStream input, Charset charset)
  2. convert(File file, Charset from, String toEncoding, ByteArrayOutputStream bytearray, boolean headersOn, int totalLinesToRead)
  3. copy(Reader input, OutputStream output, Charset encoding)
  4. createBufferedReaderWithGuessedCharset(File file)
  5. createInputStreamReader(File file, String charsetName)
  6. createReader(Path p, Charset cs)
  7. getDecoder(Charset charset, ThreadLocal> localDecoder)