Java tutorial
/*************************************************************************** Copyright 2014 Emily Estes Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ***************************************************************************/ package net.metanotion.json; import java.io.IOException; import java.io.BufferedReader; import java.io.Reader; import org.apache.commons.text.StringEscapeUtils; /** This class consumes a stream of text representing JSON encoded data and emits events to a provided Handler implementation. */ public final class StreamingParser { private static final int MAX_BUFFER = 12; private static final class Lexeme { public final Token type; public final Object value; public Lexeme(final Token type, final Object value) { this.type = type; this.value = value; } } private enum Token { START_OBJ, END_OBJ, START_LIST, END_LIST, COMMA, COLON, STRING, BOOL, NUL, INT, FLOAT }; private static final Lexeme BEG_OBJ = new Lexeme(Token.START_OBJ, null); private static final Lexeme END_OBJ = new Lexeme(Token.END_OBJ, null); private static final Lexeme BEG_LST = new Lexeme(Token.START_LIST, null); private static final Lexeme END_LST = new Lexeme(Token.END_LIST, null); private static final Lexeme COMMA = new Lexeme(Token.COMMA, null); private static final Lexeme COLON = new Lexeme(Token.COLON, null); private static final Lexeme NULL = new Lexeme(Token.NUL, null); /** Parse a JSON encoded stream and emit the events to the handler provided and return the result of finishing the stream. @param <T> The type of value produced by the final call to the handler. @param file The stream of JSON encoded text to parse. @param handler The handler to emit events to. @return The final value produced by the call to .finish(). @throws IOException if an IO error while reading from the reader. */ public <T> T parse(final Reader file, final Handler<T> handler) throws IOException { final Reader in = file.markSupported() ? file : new BufferedReader(file); return parseJson(in, handler.start()).finish(); } /* json = obj | list | value objVal = String COLON json obj = START_OBJ [ objVal ( COMMA objVal )* ] END_OBJ list = START_LIST [ json ( COMMA json )* ] END_LIST */ private Lexeme lexToken(final Reader in) throws IOException { final int c = skipWhitespace(in); switch (c) { case -1: throw new ParserException("Unexpected end of Stream"); case '{': return BEG_OBJ; case '}': return END_OBJ; case '[': return BEG_LST; case ']': return END_LST; case ',': return COMMA; case ':': return COLON; case '"': return lexString(in); case 't': return maybeTrue(in); case 'f': return maybeFalse(in); case 'n': return maybeNull(in); default: return maybeNumber(in, c); } } private static final int BUFFER_LEN3 = 3; private static final int BUFFER_LEN4 = 4; private Lexeme maybeTrue(final Reader in) throws IOException { final char[] cbuf = new char[BUFFER_LEN3]; final int ct = in.read(cbuf); if ((ct == BUFFER_LEN3) && ("rue".equals(new String(cbuf)))) { return new Lexeme(Token.BOOL, true); } else { throw new ParserException("Expected 'true', instead found: t" + new String(cbuf)); } } private Lexeme maybeFalse(final Reader in) throws IOException { final char[] cbuf = new char[BUFFER_LEN4]; final int ct = in.read(cbuf); if ((ct == BUFFER_LEN4) && ("alse".equals(new String(cbuf)))) { return new Lexeme(Token.BOOL, false); } else { throw new ParserException("Expected 'false', instead found: f" + new String(cbuf)); } } private Lexeme maybeNull(final Reader in) throws IOException { final char[] cbuf = new char[BUFFER_LEN3]; final int ct = in.read(cbuf); if ((ct == BUFFER_LEN3) && ("ull".equals(new String(cbuf)))) { return NULL; } else { throw new ParserException("Expected 'null', instead found: n" + new String(cbuf)); } } private Lexeme maybeNumber(final Reader in, int firstChar) throws IOException { // this might be a number, if it is, lex it and return the token, otherwise throw an exception. final String integer = lexInt(in, firstChar); in.mark(MAX_BUFFER); final int c = in.read(); if (c == '.') { final String decimal = integer + lexFraction(in); return new Lexeme(Token.FLOAT, Double.valueOf(decimal)); } else if (Character.toLowerCase(c) == 'e') { in.reset(); final String decimal = integer + lexExp(in); return new Lexeme(Token.FLOAT, Double.valueOf(decimal)); } else { in.reset(); return new Lexeme(Token.INT, Long.valueOf(integer)); } } private String lexFraction(final Reader in) throws IOException { return "." + lexDigits(in) + lexExp(in); } private static final String QUOTE = "'"; private String lexExp(final Reader in) throws IOException { in.mark(MAX_BUFFER); int c = in.read(); if (Character.toLowerCase(c) == 'e') { c = in.read(); if (c == '+') { return "e+" + lexDigits(in); } else if (c == '-') { return "e-" + lexDigits(in); } else if (Character.isDigit(c)) { return (new String(Character.toChars(c))) + lexDigits(in); } else if (c == -1) { throw new ParserException("Unexpected end of stream"); } else { throw new ParserException( "Expected exponent, instead found: '" + (new String(Character.toChars(c))) + QUOTE); } } else { in.reset(); return ""; } } private static final String EXPECTED_DIGIT = "Expected at least one digit [0-9]"; private String lexInt(final Reader in, final int firstChar) throws IOException { final StringBuilder sb = new StringBuilder(); int digits = 0; if (firstChar == '-') { sb.append("-"); } else if (Character.isDigit(firstChar)) { sb.append(Character.toChars(firstChar)); digits++; } else { final String found = new String(Character.toChars(firstChar)); throw new ParserException("Expecting a number, instead found: '" + found + QUOTE); } while (true) { in.mark(MAX_BUFFER); final int c = in.read(); if (Character.isDigit(c)) { digits++; sb.append(Character.toChars(c)); } else { in.reset(); if (digits == 0) { throw new ParserException(EXPECTED_DIGIT); } return sb.toString(); } } } private String lexDigits(final Reader in) throws IOException { final StringBuilder sb = new StringBuilder(); while (true) { in.mark(MAX_BUFFER); final int c = in.read(); if (Character.isDigit(c)) { sb.append(Character.toChars(c)); } else { in.reset(); if (sb.length() == 0) { throw new ParserException(EXPECTED_DIGIT); } return sb.toString(); } } } private Lexeme lexString(final Reader in) throws IOException { final StringBuilder sb = new StringBuilder(); boolean escape = false; while (true) { final int c = in.read(); if (c == -1) { throw new ParserException("Expected a character instead of end of stream"); } final String cbuf = new String(Character.toChars(c)); if (("\"".equals(cbuf)) && !escape) { return new Lexeme(Token.STRING, StringEscapeUtils.unescapeJson(sb.toString())); } else if ("\\".equals(cbuf)) { escape = true; sb.append(cbuf); } else { escape = false; sb.append(cbuf); } } } private int skipWhitespace(final Reader in) throws IOException { int c; do { c = in.read(); } while (Character.isWhitespace((char) c)); return c; } private <T> Handler<T> parseJson(final Reader in, final Handler<T> handler) throws IOException { final Lexeme tok = lexToken(in); switch (tok.type) { case START_OBJ: return objVal(in, handler.startObject()); case START_LIST: return listVal(in, handler.startList()); case STRING: return handler.string((String) tok.value); case BOOL: return handler.bool((Boolean) tok.value); case NUL: return handler.jsonNull(); case INT: return handler.integer((Long) tok.value); case FLOAT: return handler.decimal((Double) tok.value); default: throw new ParserException( "Parse Error: expected primitive value or object or list, instead found: " + tok.type); } } private static final String UNEXPECTED = "Unexpected token: "; private static final String EXPECTED = "Expected ':', instead found: "; private <T> Handler<T> objVal(final Reader in, Handler<T> handler) throws IOException { Lexeme tok = lexToken(in); if (tok.type == Token.END_OBJ) { return handler.endObject(); } else if (tok.type == Token.STRING) { handler = handler.key(tok.value.toString()); } else { throw new ParserException(UNEXPECTED + tok.type); } tok = lexToken(in); if (tok.type != Token.COLON) { throw new ParserException(EXPECTED + tok.type); } handler = parseJson(in, handler); while (true) { tok = lexToken(in); if (tok.type == Token.COMMA) { tok = lexToken(in); if (tok.type == Token.STRING) { handler = handler.key(tok.value.toString()); } else { throw new ParserException(UNEXPECTED + tok.type); } tok = lexToken(in); if (tok.type != Token.COLON) { throw new ParserException(EXPECTED + tok.type); } handler = parseJson(in, handler); } else if (tok.type == Token.END_OBJ) { return handler.endObject(); } else { throw new ParserException("Expected ',' or '}', instead found: " + tok.type); } } } private Handler listVal(final Reader in, Handler handler) throws IOException { while (true) { final Lexeme tok = lexToken(in); switch (tok.type) { case END_LIST: return handler.endList(); case START_OBJ: handler = objVal(in, handler.startObject()); break; case START_LIST: handler = listVal(in, handler.startList()); break; case STRING: handler = handler.string((String) tok.value); break; case BOOL: handler = handler.bool((Boolean) tok.value); break; case NUL: handler = handler.jsonNull(); break; case INT: handler = handler.integer((Long) tok.value); break; case FLOAT: handler = handler.decimal((Double) tok.value); break; default: throw new ParserException("Expected JSON value or ']', instead found: " + tok.type); } final Lexeme tok2 = lexToken(in); if (tok2.type == Token.COMMA) { } else if (tok2.type == Token.END_LIST) { return handler.endList(); } else { throw new ParserException("Expected ',' or ']', instead found: " + tok.type); } } } }