Example usage for java.io PushbackInputStream read

List of usage examples for java.io PushbackInputStream read


In this page you can find the example usage for java.io PushbackInputStream read.


public int read(byte[] b, int off, int len) throws IOException 

Source Link


Reads up to len bytes of data from this input stream into an array of bytes.


From source file:Main.java

public static void main(String[] args) {

    byte[] arrByte = new byte[1024];

    byte[] byteArray = new byte[] { 'j', 'a', 'v', 'a', '2', 's', '.', 'c', 'o', 'm' };

    // create object of PushbackInputStream class for specified stream
    InputStream is = new ByteArrayInputStream(byteArray);
    PushbackInputStream pis = new PushbackInputStream(is);

    try {//from w  w  w. j a va 2s .co  m

        // read a char into our array
        pis.read(arrByte, 0, 3);

        // print arrByte
        for (int i = 0; i < 3; i++) {
            System.out.println((char) arrByte[i]);

    } catch (Exception ex) {

From source file:Main.java

 * Creates a reader allowing to read the contents of specified text source.
 * <p>This method implements the detection of the encoding.
 * <p>Note that the detection of the encoding always works 
 * because it uses a fallback value./*w ww . j  a va 2 s.c o  m*/
 * @param in the text source
 * @param encoding the detected encoding is copied there.
 * May be <code>null</code>.
 * @return a reader allowing to read the contents of the text source.
 * This reader will automatically skip the BOM if any.
 * @exception IOException if there is an I/O problem
public static Reader createReader(InputStream in, String fallbackEncoding, String[] encoding)
        throws IOException {
    byte[] bytes = new byte[1024];
    int byteCount = -1;

    PushbackInputStream in2 = new PushbackInputStream(in, bytes.length);
    try {
        int count = in2.read(bytes, 0, bytes.length);
        if (count > 0) {
            in2.unread(bytes, 0, count);
        byteCount = count;
    } catch (IOException ignored) {

    String charset = null;

    if (byteCount > 0) {
        if (byteCount >= 2) {
            // Use BOM ---

            int b0 = (bytes[0] & 0xFF);
            int b1 = (bytes[1] & 0xFF);

            switch ((b0 << 8) | b1) {
            case 0xFEFF:
                charset = "UTF-16BE";
                // We don't want to read the BOM.
            case 0xFFFE:
                charset = "UTF-16LE";
            case 0xEFBB:
                if (byteCount >= 3 && (bytes[2] & 0xFF) == 0xBF) {
                    charset = "UTF-8";

        if (charset == null) {
            // Unsupported characters are replaced by U+FFFD.
            String text = new String(bytes, 0, byteCount, "US-ASCII");

            if (text.startsWith("<?xml")) {
                Pattern pattern = Pattern.compile("encoding\\s*=\\s*['\"]([^'\"]+)");
                Matcher matcher = pattern.matcher(text);
                if (matcher.find()) {
                    charset = matcher.group(1);
                } else {
                    charset = "UTF-8";

    if (charset == null) {
        charset = fallbackEncoding;
        if (charset == null) {
            charset = "UTF-8";

    if (encoding != null) {
        encoding[0] = charset;
    return new InputStreamReader(in2, charset);

From source file:XmlReader.java

private void useEncodingDecl(PushbackInputStream pb, String encoding) throws IOException {
    byte buffer[] = new byte[MAXPUSHBACK];
    int len;//  w  w  w . j a  v a2 s  .c om
    Reader r;
    int c;

    // Buffer up a bunch of input, and set up to read it in
    // the specified encoding ... we can skip the first four
    // bytes since we know that "<?xm" was read to determine
    // what encoding to use!
    len = pb.read(buffer, 0, buffer.length);
    pb.unread(buffer, 0, len);
    r = new InputStreamReader(new ByteArrayInputStream(buffer, 4, len), encoding);

    // Next must be "l" (and whitespace) else we conclude
    // error and choose UTF-8.
    if ((c = r.read()) != 'l') {
        setEncoding(pb, "UTF-8");

    // Then, we'll skip any
    //  S version="..."   [or single quotes]
    // bit and get any subsequent 
    //  S encoding="..."  [or single quotes]
    // We put an arbitrary size limit on how far we read; lots
    // of space will break this algorithm.
    StringBuffer buf = new StringBuffer();
    StringBuffer keyBuf = null;
    String key = null;
    boolean sawEq = false;
    char quoteChar = 0;
    boolean sawQuestion = false;

    XmlDecl: for (int i = 0; i < MAXPUSHBACK - 5; ++i) {
        if ((c = r.read()) == -1)

        // ignore whitespace before/between "key = 'value'"
        if (c == ' ' || c == '\t' || c == '\n' || c == '\r')

        // ... but require at least a little!
        if (i == 0)

        // terminate the loop ASAP
        if (c == '?')
            sawQuestion = true;
        else if (sawQuestion) {
            if (c == '>')
            sawQuestion = false;

        // did we get the "key =" bit yet?
        if (key == null || !sawEq) {
            if (keyBuf == null) {
                if (Character.isWhitespace((char) c))
                keyBuf = buf;
                buf.append((char) c);
                sawEq = false;
            } else if (Character.isWhitespace((char) c)) {
                key = keyBuf.toString();
            } else if (c == '=') {
                if (key == null)
                    key = keyBuf.toString();
                sawEq = true;
                keyBuf = null;
                quoteChar = 0;
            } else
                keyBuf.append((char) c);

        // space before quoted value
        if (Character.isWhitespace((char) c))
        if (c == '"' || c == '\'') {
            if (quoteChar == 0) {
                quoteChar = (char) c;
            } else if (c == quoteChar) {
                if ("encoding".equals(key)) {
                    assignedEncoding = buf.toString();

                    // [81] Encname ::= [A-Za-z] ([A-Za-z0-9._]|'-')*
                    for (i = 0; i < assignedEncoding.length(); i++) {
                        c = assignedEncoding.charAt(i);
                        if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
                        if (i == 0)
                            break XmlDecl;
                        if (i > 0 && (c == '-' || (c >= '0' && c <= '9') || c == '.' || c == '_'))
                        // map illegal names to UTF-8 default
                        break XmlDecl;

                    setEncoding(pb, assignedEncoding);

                } else {
                    key = null;
        buf.append((char) c);

    setEncoding(pb, "UTF-8");

From source file:com.digitalpebble.storm.crawler.protocol.http.HttpResponse.java

private void readChunkedContent(PushbackInputStream in, StringBuffer line) throws HttpException, IOException {
    boolean doneChunks = false;
    int contentBytesRead = 0;
    byte[] bytes = new byte[HttpProtocol.BUFFER_SIZE];
    ByteArrayOutputStream out = new ByteArrayOutputStream(HttpProtocol.BUFFER_SIZE);

    while (!doneChunks) {
        if (HttpProtocol.LOGGER.isTraceEnabled()) {
            HttpProtocol.LOGGER.trace("Http: starting chunk");
        }/*  ww w . java  2s. co  m*/

        readLine(in, line, false);

        String chunkLenStr;
        // if (LOG.isTraceEnabled()) { LOG.trace("chunk-header: '" + line +
        // "'"); }

        int pos = line.indexOf(";");
        if (pos < 0) {
            chunkLenStr = line.toString();
        } else {
            chunkLenStr = line.substring(0, pos);
            // if (LOG.isTraceEnabled()) { LOG.trace("got chunk-ext: " +
            // line.substring(pos+1)); }
        chunkLenStr = chunkLenStr.trim();
        int chunkLen;
        try {
            chunkLen = Integer.parseInt(chunkLenStr, 16);
        } catch (NumberFormatException e) {
            throw new HttpException("bad chunk length: " + line.toString());

        if (chunkLen == 0) {
            doneChunks = true;

        if (http.getMaxContent() >= 0 && (contentBytesRead + chunkLen) > http.getMaxContent())
            chunkLen = http.getMaxContent() - contentBytesRead;

        // read one chunk
        int chunkBytesRead = 0;
        while (chunkBytesRead < chunkLen) {

            int toRead = (chunkLen - chunkBytesRead) < HttpProtocol.BUFFER_SIZE ? (chunkLen - chunkBytesRead)
                    : HttpProtocol.BUFFER_SIZE;
            int len = in.read(bytes, 0, toRead);

            if (len == -1)
                throw new HttpException("chunk eof after " + contentBytesRead + " bytes in successful chunks"
                        + " and " + chunkBytesRead + " in current chunk");

            // DANGER!!! Will printed GZIPed stuff right to your
            // terminal!
            // if (LOG.isTraceEnabled()) { LOG.trace("read: " + new
            // String(bytes, 0, len)); }

            out.write(bytes, 0, len);
            chunkBytesRead += len;

        readLine(in, line, false);


    if (!doneChunks) {
        if (contentBytesRead != http.getMaxContent())
            throw new HttpException("chunk eof: !doneChunk && didn't max out");

    content = out.toByteArray();
    parseHeaders(in, line);


From source file:org.adl.parsers.dom.ADLDOMParser.java

 * Sets up the file source for the test subject file.
 * @param iFileName file to setup input source for.
 * @return InputSource// w  ww  .  j a  v a 2 s.c  o  m
private InputSource setupFileSource(String iFileName) {
    String msgText;
    boolean defaultEncoding = true;
    String encoding = null;
    PushbackInputStream inputStream;
    FileInputStream inFile;

    try {
        File xmlFile = new File(iFileName);

        if (xmlFile.isFile()) {
            InputSource is = null;

            defaultEncoding = true;
            if (xmlFile.length() > 1) {
                inFile = new FileInputStream(xmlFile);
                inputStream = new PushbackInputStream(inFile, 4);

                // Reads the initial 4 bytes of the file to check for a Byte
                // Order Mark and determine the encoding

                byte bom[] = new byte[4];
                int n, pushBack;
                n = inputStream.read(bom, 0, bom.length);

                // UTF-8 Encoded
                if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
                    encoding = "UTF-8";
                    defaultEncoding = false;
                    pushBack = n - 3;
                // UTF-16 Big Endian Encoded
                else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
                    encoding = "UTF-16BE";
                    defaultEncoding = false;
                    pushBack = n - 2;
                // UTF-16 Little Endian Encoded               
                else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
                    encoding = "UTF-16LE";
                    defaultEncoding = false;
                    pushBack = n - 2;
                // Default encoding
                else {
                    // Unicode BOM mark not found, unread all bytes                  
                    pushBack = n;

                // Place any non-BOM bytes back into the stream
                if (pushBack > 0) {
                    inputStream.unread(bom, (n - pushBack), pushBack);

                if (defaultEncoding == true) { //Reads in ASCII file.
                    FileReader fr = new FileReader(xmlFile);
                    is = new InputSource(fr);
                // Reads the file in the determined encoding
                else {
                    //Creates a buffer with the size of the xml encoded file
                    BufferedReader inStream = new BufferedReader(new InputStreamReader(inputStream, encoding));
                    StringBuffer dataString = new StringBuffer();
                    String s = "";

                    //Builds the encoded file to be parsed
                    while ((s = inStream.readLine()) != null) {

                    is = new InputSource(new StringReader(dataString.toString()));
            return is;
        } else if ((iFileName.length() > 6)
                && (iFileName.substring(0, 5).equals("http:") || iFileName.substring(0, 6).equals("https:"))) {
            URL xmlURL = new URL(iFileName);
            InputStream xmlIS = xmlURL.openStream();
            InputSource is = new InputSource(xmlIS);
            return is;
        } else {
            msgText = "XML File: " + iFileName + " is not a file or URL";
    } catch (NullPointerException npe) {
        msgText = "Null pointer exception" + npe;
    } catch (SecurityException se) {
        msgText = "Security Exception" + se;
    } catch (FileNotFoundException fnfe) {
        msgText = "File Not Found Exception" + fnfe;
    } catch (Exception e) {
        msgText = "General Exception" + e;


    return new InputSource();

From source file:org.apache.axis2.builder.BuilderUtil.java

 * Use the BOM Mark to identify the encoding to be used. Fall back to default encoding
 * specified/*from  w w w . j  a  v  a2 s . c  o m*/
 * @param is2             PushBackInputStream (it must be a pushback input stream so that we can
 *                        unread the BOM)
 * @param defaultEncoding default encoding style if no BOM
 * @return the selected character set encoding
 * @throws java.io.IOException
public static String getCharSetEncoding(PushbackInputStream is2, String defaultEncoding) throws IOException {
    String encoding;
    byte bom[] = new byte[BOM_SIZE];
    int n, unread;

    n = is2.read(bom, 0, bom.length);

    if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
        encoding = "UTF-8";
        if (log.isDebugEnabled()) {
            log.debug("char set encoding set from BOM =" + encoding);
        unread = n - 3;
    } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
        encoding = "UTF-16BE";
        if (log.isDebugEnabled()) {
            log.debug("char set encoding set from BOM =" + encoding);
        unread = n - 2;
    } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
        encoding = "UTF-16LE";
        if (log.isDebugEnabled()) {
            log.debug("char set encoding set from BOM =" + encoding);
        unread = n - 2;
    } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE)
            && (bom[3] == (byte) 0xFF)) {
        encoding = "UTF-32BE";
        if (log.isDebugEnabled()) {
            log.debug("char set encoding set from BOM =" + encoding);
        unread = n - 4;
    } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00)
            && (bom[3] == (byte) 0x00)) {
        encoding = "UTF-32LE";
        if (log.isDebugEnabled()) {
            log.debug("char set encoding set from BOM =" + encoding);
        unread = n - 4;
    } else {

        // Unicode BOM mark not found, unread all bytes
        encoding = defaultEncoding;
        if (log.isDebugEnabled()) {
            log.debug("char set encoding set from default =" + encoding);
        unread = n;

    if (unread > 0) {
        is2.unread(bom, (n - unread), unread);
    return encoding;

From source file:org.apache.cocoon.components.flow.javascript.fom.FOM_JavaScriptInterpreter.java

 * Find the encoding of the stream, or null if not specified
 *//* ww  w. ja v  a2s  .c o  m*/
String findEncoding(PushbackInputStream is) throws IOException {
    // Read some bytes
    byte[] buffer = new byte[ENCODING_BUF_SIZE];
    int len = is.read(buffer, 0, buffer.length);
    // and push them back
    is.unread(buffer, 0, len);

    // Interpret them as an ASCII string
    String str = new String(buffer, 0, len, "ASCII");
    RE re = new RE(encodingRE);
    if (re.match(str)) {
        return re.getParen(1);
    return null;

From source file:org.apache.nutch.protocol.htmlunit.HttpResponse.java

 * /*from   ww  w .  j a  va  2s . c om*/
 * @param in
 * @param line
 * @throws HttpException
 * @throws IOException
private void readChunkedContent(PushbackInputStream in, StringBuffer line) throws HttpException, IOException {
    boolean doneChunks = false;
    int contentBytesRead = 0;
    byte[] bytes = new byte[Http.BUFFER_SIZE];
    ByteArrayOutputStream out = new ByteArrayOutputStream(Http.BUFFER_SIZE);

    while (!doneChunks) {
        if (Http.LOG.isTraceEnabled()) {
            Http.LOG.trace("Http: starting chunk");

        readLine(in, line, false);

        String chunkLenStr;
        // if (LOG.isTraceEnabled()) { LOG.trace("chunk-header: '" + line + "'"); }

        int pos = line.indexOf(";");
        if (pos < 0) {
            chunkLenStr = line.toString();
        } else {
            chunkLenStr = line.substring(0, pos);
            // if (LOG.isTraceEnabled()) { LOG.trace("got chunk-ext: " + line.substring(pos+1)); }
        chunkLenStr = chunkLenStr.trim();
        int chunkLen;
        try {
            chunkLen = Integer.parseInt(chunkLenStr, 16);
        } catch (NumberFormatException e) {
            throw new HttpException("bad chunk length: " + line.toString());

        if (chunkLen == 0) {
            doneChunks = true;

        if ((contentBytesRead + chunkLen) > http.getMaxContent())
            chunkLen = http.getMaxContent() - contentBytesRead;

        // read one chunk
        int chunkBytesRead = 0;
        while (chunkBytesRead < chunkLen) {

            int toRead = (chunkLen - chunkBytesRead) < Http.BUFFER_SIZE ? (chunkLen - chunkBytesRead)
                    : Http.BUFFER_SIZE;
            int len = in.read(bytes, 0, toRead);

            if (len == -1)
                throw new HttpException("chunk eof after " + contentBytesRead + " bytes in successful chunks"
                        + " and " + chunkBytesRead + " in current chunk");

            // DANGER!!! Will printed GZIPed stuff right to your
            // terminal!
            // if (LOG.isTraceEnabled()) { LOG.trace("read: " +  new String(bytes, 0, len)); }

            out.write(bytes, 0, len);
            chunkBytesRead += len;

        readLine(in, line, false);


    if (!doneChunks) {
        if (contentBytesRead != http.getMaxContent())
            throw new HttpException("chunk eof: !doneChunk && didn't max out");

    content = out.toByteArray();
    parseHeaders(in, line);


From source file:org.apache.nutch.protocol.http.HttpResponse.java

 * @param in/*from ww w .j  a v  a 2s  .  c om*/
 * @param line
 * @throws HttpException
 * @throws IOException
private void readChunkedContent(PushbackInputStream in, StringBuffer line) throws HttpException, IOException {
    boolean doneChunks = false;
    int contentBytesRead = 0;
    byte[] bytes = new byte[Http.BUFFER_SIZE];
    ByteArrayOutputStream out = new ByteArrayOutputStream(Http.BUFFER_SIZE);

    while (!doneChunks) {
        if (Http.LOG.isTraceEnabled()) {
            Http.LOG.trace("Http: starting chunk");

        readLine(in, line, false);

        String chunkLenStr;
        // if (LOG.isTraceEnabled()) { LOG.trace("chunk-header: '" + line + "'");
        // }

        int pos = line.indexOf(";");
        if (pos < 0) {
            chunkLenStr = line.toString();
        } else {
            chunkLenStr = line.substring(0, pos);
            // if (LOG.isTraceEnabled()) { LOG.trace("got chunk-ext: " +
            // line.substring(pos+1)); }
        chunkLenStr = chunkLenStr.trim();
        int chunkLen;
        try {
            chunkLen = Integer.parseInt(chunkLenStr, 16);
        } catch (NumberFormatException e) {
            throw new HttpException("bad chunk length: " + line.toString());

        if (chunkLen == 0) {
            doneChunks = true;

        if (http.getMaxContent() >= 0 && (contentBytesRead + chunkLen) > http.getMaxContent())
            chunkLen = http.getMaxContent() - contentBytesRead;

        // read one chunk
        int chunkBytesRead = 0;
        while (chunkBytesRead < chunkLen) {

            int toRead = (chunkLen - chunkBytesRead) < Http.BUFFER_SIZE ? (chunkLen - chunkBytesRead)
                    : Http.BUFFER_SIZE;
            int len = in.read(bytes, 0, toRead);

            if (len == -1)
                throw new HttpException("chunk eof after " + contentBytesRead + " bytes in successful chunks"
                        + " and " + chunkBytesRead + " in current chunk");

            // DANGER!!! Will printed GZIPed stuff right to your
            // terminal!
            // if (LOG.isTraceEnabled()) { LOG.trace("read: " + new String(bytes, 0,
            // len)); }

            out.write(bytes, 0, len);
            chunkBytesRead += len;

        readLine(in, line, false);


    if (!doneChunks) {
        if (contentBytesRead != http.getMaxContent())
            throw new HttpException("chunk eof: !doneChunk && didn't max out");

    content = out.toByteArray();
    parseHeaders(in, line, null);


From source file:org.apache.nutch.protocol.s2jh.HttpResponse.java

 * /*from w  w  w. j ava  2s .co  m*/
 * @param in
 * @param line
 * @throws HttpException
 * @throws IOException
private void readChunkedContent(PushbackInputStream in, StringBuffer line) throws HttpException, IOException {
    boolean doneChunks = false;
    int contentBytesRead = 0;
    byte[] bytes = new byte[Http.BUFFER_SIZE];
    ByteArrayOutputStream out = new ByteArrayOutputStream(Http.BUFFER_SIZE);

    while (!doneChunks) {
        if (Http.LOG.isTraceEnabled()) {
            Http.LOG.trace("Http: starting chunk");

        readLine(in, line, false);

        String chunkLenStr;
        // if (LOG.isTraceEnabled()) { LOG.trace("chunk-header: '" + line + "'");
        // }

        int pos = line.indexOf(";");
        if (pos < 0) {
            chunkLenStr = line.toString();
        } else {
            chunkLenStr = line.substring(0, pos);
            // if (LOG.isTraceEnabled()) { LOG.trace("got chunk-ext: " +
            // line.substring(pos+1)); }
        chunkLenStr = chunkLenStr.trim();
        int chunkLen;
        try {
            chunkLen = Integer.parseInt(chunkLenStr, 16);
        } catch (NumberFormatException e) {
            throw new HttpException("bad chunk length: " + line.toString());

        if (chunkLen == 0) {
            doneChunks = true;

        if (http.getMaxContent() >= 0 && (contentBytesRead + chunkLen) > http.getMaxContent())
            chunkLen = http.getMaxContent() - contentBytesRead;

        // read one chunk
        int chunkBytesRead = 0;
        while (chunkBytesRead < chunkLen) {

            int toRead = (chunkLen - chunkBytesRead) < Http.BUFFER_SIZE ? (chunkLen - chunkBytesRead)
                    : Http.BUFFER_SIZE;
            int len = in.read(bytes, 0, toRead);

            if (len == -1)
                throw new HttpException("chunk eof after " + contentBytesRead + " bytes in successful chunks"
                        + " and " + chunkBytesRead + " in current chunk");

            // DANGER!!! Will printed GZIPed stuff right to your
            // terminal!
            // if (LOG.isTraceEnabled()) { LOG.trace("read: " + new String(bytes, 0,
            // len)); }

            out.write(bytes, 0, len);
            chunkBytesRead += len;

        readLine(in, line, false);

    if (!doneChunks) {
        if (contentBytesRead != http.getMaxContent())
            throw new HttpException("chunk eof: !doneChunk && didn't max out");

    content = out.toByteArray();
    parseHeaders(in, line);
