UTF8BOMSkipper.java :  » HTML-Parser » nekohtml » org » cyberneko » html » Java Open Source

Java Open Source » HTML Parser » nekohtml 
nekohtml » org » cyberneko » html » UTF8BOMSkipper.java
/* 
 * Copyright 2005-2008 Andy Clark
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.cyberneko.html;

import java.io.*;

/**
 * This class is an input stream filter that skips the first
 * three bytes read if they match the UTF-8 byte order mark,
 * 0xEFBBBF. The UTF-8 BOM is most often generated by Windows®
 * tools.
 * 
 * @author Andy Clark
 */
public class UTF8BOMSkipper 
    extends FilterInputStream {

    //
    // Data
    //

    /** Start of reading. */
    private boolean fStart = true;

    /** Byte offset. */
    private int fOffset;

    /** First three bytes. */
    private int[] fFirst3Bytes;

    //
    // Constructors
    //

    /** Constructs a UTF-8 BOM skipper. */
    public UTF8BOMSkipper(InputStream stream) {
        super(stream);
    } // <init>(InputStream)

    //
    // InputStream methods
    //

    /** Returns the next byte. */
    public int read() throws IOException {

        // read first three bytes in order to skip UTF-8 BOM, if present
        if (fStart) {
            fStart = false;
            int b1 = super.read();
            int b2 = super.read();
            int b3 = super.read();
            if (b1 != 0xEF || b2 != 0xBB || b3 != 0xBF) {
                fFirst3Bytes = new int[3];
                fFirst3Bytes[0] = b1;
                fFirst3Bytes[1] = b2;
                fFirst3Bytes[2] = b3;
            }
        }

        // return read bytes
        if (fFirst3Bytes != null) {
            int b = fFirst3Bytes[fOffset++];
            if (fOffset == fFirst3Bytes.length) {
                fFirst3Bytes = null;
            }
            return b;
        }

        // return next char
        return super.read();

    } // read():int

    /** Reads bytes into specified buffer and returns total bytes read. */
    public int read(byte[] buffer, int offset, int length) throws IOException {

        if (fStart || fFirst3Bytes != null) {
            for (int i = 0; i < length; i++) {
                int b = this.read();
                if (b == -1) {
                    return i > 0 ? i : -1;
                }
                buffer[offset + i] = (byte)b;
            }
            return length;
        }

        return super.read(buffer, offset, length);

    } // read(byte[],int,int):int

    /** Mark is not supported for this input stream. */
    public boolean markSupported() {
        return false;
    } // markSupported():boolean

    /** Returns the number of bytes available. */
    public int available() throws IOException {
        if (fFirst3Bytes != null) {
            return fFirst3Bytes.length - fOffset;
        }
        return super.available();
    } // available():int

} // class UTF8BOMSkipper
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.