Playlist Generator  1.0
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Properties Defines
SBJsonTokeniser.m
Go to the documentation of this file.
00001 /*
00002  Copyright (c) 2010-2011, Stig Brautaset. All rights reserved.
00003 
00004  Redistribution and use in source and binary forms, with or without
00005  modification, are permitted provided that the following conditions are
00006  met:
00007 
00008  Redistributions of source code must retain the above copyright
00009  notice, this list of conditions and the following disclaimer.
00010 
00011  Redistributions in binary form must reproduce the above copyright
00012  notice, this list of conditions and the following disclaimer in the
00013  documentation and/or other materials provided with the distribution.
00014 
00015  Neither the name of the the author nor the names of its contributors
00016  may be used to endorse or promote products derived from this software
00017  without specific prior written permission.
00018 
00019  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
00020  IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
00021  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
00022  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
00023  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00024  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00025  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00026  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00027  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00028  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00029  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00030  */
00031 
00032 #import "SBJsonTokeniser.h"
00033 #import "SBJsonUTF8Stream.h"
00034 
00035 #define SBStringIsIllegalSurrogateHighCharacter(character) (((character) >= 0xD800UL) && ((character) <= 0xDFFFUL))
00036 #define SBStringIsSurrogateLowCharacter(character) ((character >= 0xDC00UL) && (character <= 0xDFFFUL))
00037 #define SBStringIsSurrogateHighCharacter(character) ((character >= 0xD800UL) && (character <= 0xDBFFUL))
00038 
00039 @implementation SBJsonTokeniser
00040 
00041 @synthesize error = _error;
00042 @synthesize stream = _stream;
00043 
00044 - (id)init {
00045     self = [super init];
00046     if (self) {
00047         _stream = [[SBJsonUTF8Stream alloc] init];
00048 
00049     }
00050 
00051     return self;
00052 }
00053 
00054 
00055 - (void)appendData:(NSData *)data_ {
00056     [_stream appendData:data_];
00057 }
00058 
00059 
00060 - (sbjson_token_t)match:(const char *)pattern length:(NSUInteger)len retval:(sbjson_token_t)token {
00061     if (![_stream haveRemainingCharacters:len])
00062         return sbjson_token_eof;
00063 
00064     if ([_stream skipCharacters:pattern length:len])
00065         return token;
00066 
00067     self.error = [NSString stringWithFormat:@"Expected '%s' after initial '%.1s'", pattern, pattern];
00068     return sbjson_token_error;
00069 }
00070 
00071 - (BOOL)decodeEscape:(unichar)ch into:(unichar*)decoded {
00072     switch (ch) {
00073         case '\\':
00074         case '/':
00075         case '"':
00076             *decoded = ch;
00077             break;
00078 
00079         case 'b':
00080             *decoded = '\b';
00081             break;
00082 
00083         case 'n':
00084             *decoded = '\n';
00085             break;
00086 
00087         case 'r':
00088             *decoded = '\r';
00089             break;
00090 
00091         case 't':
00092             *decoded = '\t';
00093             break;
00094 
00095         case 'f':
00096             *decoded = '\f';
00097             break;
00098 
00099         default:
00100             self.error = @"Illegal escape character";
00101             return NO;
00102             break;
00103     }
00104     return YES;
00105 }
00106 
00107 - (BOOL)decodeHexQuad:(unichar*)quad {
00108     unichar c, tmp = 0;
00109 
00110     for (int i = 0; i < 4; i++) {
00111         (void)[_stream getNextUnichar:&c];
00112         tmp *= 16;
00113         switch (c) {
00114             case '0' ... '9':
00115                 tmp += c - '0';
00116                 break;
00117 
00118             case 'a' ... 'f':
00119                 tmp += 10 + c - 'a';
00120                 break;
00121 
00122             case 'A' ... 'F':
00123                 tmp += 10 + c - 'A';
00124                 break;
00125 
00126             default:
00127                 return NO;
00128         }
00129     }
00130     *quad = tmp;
00131     return YES;
00132 }
00133 
00134 - (sbjson_token_t)getStringToken:(NSObject**)token {
00135     NSMutableString *acc = nil;
00136 
00137     for (;;) {
00138         [_stream skip];
00139         
00140         unichar ch;
00141         {
00142             NSMutableString *string = nil;
00143             @try {
00144                 if (![_stream getRetainedStringFragment:&string])
00145                     return sbjson_token_eof;
00146             
00147                 if (!string) {
00148                     self.error = @"Broken Unicode encoding";
00149                     return sbjson_token_error;
00150                 }
00151             
00152                 if (![_stream getUnichar:&ch]) {
00153                     return sbjson_token_eof;
00154                 }
00155             
00156                 if (acc) {
00157                     [acc appendString:string];
00158 
00159                 } else if (ch == '"') {
00160                     *token = [string copy];
00161                     [_stream skip];
00162                     return sbjson_token_string;
00163                 
00164                 } else {
00165                     acc = [string mutableCopy];
00166                 }
00167             }
00168             @finally {
00169                 string = nil;
00170             }
00171         }
00172 
00173         
00174         switch (ch) {
00175             case 0 ... 0x1F:
00176                 self.error = [NSString stringWithFormat:@"Unescaped control character [0x%0.2X]", (int)ch];
00177                 return sbjson_token_error;
00178                 break;
00179 
00180             case '"':
00181                 *token = acc;
00182                 [_stream skip];
00183                 return sbjson_token_string;
00184                 break;
00185 
00186             case '\\':
00187                 if (![_stream getNextUnichar:&ch])
00188                     return sbjson_token_eof;
00189 
00190                 if (ch == 'u') {
00191                     if (![_stream haveRemainingCharacters:5])
00192                         return sbjson_token_eof;
00193 
00194                     unichar hi;
00195                     if (![self decodeHexQuad:&hi]) {
00196                         self.error = @"Invalid hex quad";
00197                         return sbjson_token_error;
00198                     }
00199 
00200                     if (SBStringIsSurrogateHighCharacter(hi)) {
00201                         unichar lo;
00202 
00203                         if (![_stream haveRemainingCharacters:6])
00204                             return sbjson_token_eof;
00205 
00206                         (void)[_stream getNextUnichar:&ch];
00207                         (void)[_stream getNextUnichar:&lo];
00208                         if (ch != '\\' || lo != 'u' || ![self decodeHexQuad:&lo]) {
00209                             self.error = @"Missing low character in surrogate pair";
00210                             return sbjson_token_error;
00211                         }
00212 
00213                         if (!SBStringIsSurrogateLowCharacter(lo)) {
00214                             self.error = @"Invalid low character in surrogate pair";
00215                             return sbjson_token_error;
00216                         }
00217 
00218                         [acc appendFormat:@"%C%C", hi, lo];
00219                     } else if (SBStringIsIllegalSurrogateHighCharacter(hi)) {
00220                         self.error = @"Invalid high character in surrogate pair";
00221                         return sbjson_token_error;
00222                     } else {
00223                         [acc appendFormat:@"%C", hi];
00224                     }
00225 
00226 
00227                 } else {
00228                     unichar decoded;
00229                     if (![self decodeEscape:ch into:&decoded])
00230                         return sbjson_token_error;
00231                     [acc appendFormat:@"%C", decoded];
00232                 }
00233 
00234                 break;
00235 
00236             default: {
00237                 self.error = [NSString stringWithFormat:@"Invalid UTF-8: '%x'", (int)ch];
00238                 return sbjson_token_error;
00239                 break;
00240             }
00241         }
00242     }
00243     return sbjson_token_eof;
00244 }
00245 
00246 - (sbjson_token_t)getNumberToken:(NSObject**)token {
00247 
00248     NSUInteger numberStart = _stream.index;
00249     NSCharacterSet *digits = [NSCharacterSet decimalDigitCharacterSet];
00250 
00251     unichar ch;
00252     if (![_stream getUnichar:&ch])
00253         return sbjson_token_eof;
00254 
00255     BOOL isNegative = NO;
00256     if (ch == '-') {
00257         isNegative = YES;
00258         if (![_stream getNextUnichar:&ch])
00259             return sbjson_token_eof;
00260     }
00261 
00262     unsigned long long mantissa = 0;
00263     int mantissa_length = 0;
00264     
00265     if (ch == '0') {
00266         mantissa_length++;
00267         if (![_stream getNextUnichar:&ch])
00268             return sbjson_token_eof;
00269 
00270         if ([digits characterIsMember:ch]) {
00271             self.error = @"Leading zero is illegal in number";
00272             return sbjson_token_error;
00273         }
00274     }
00275 
00276     while ([digits characterIsMember:ch]) {
00277         mantissa *= 10;
00278         mantissa += (ch - '0');
00279         mantissa_length++;
00280 
00281         if (![_stream getNextUnichar:&ch])
00282             return sbjson_token_eof;
00283     }
00284 
00285     short exponent = 0;
00286     BOOL isFloat = NO;
00287 
00288     if (ch == '.') {
00289         isFloat = YES;
00290         if (![_stream getNextUnichar:&ch])
00291             return sbjson_token_eof;
00292 
00293         while ([digits characterIsMember:ch]) {
00294             mantissa *= 10;
00295             mantissa += (ch - '0');
00296             mantissa_length++;
00297             exponent--;
00298 
00299             if (![_stream getNextUnichar:&ch])
00300                 return sbjson_token_eof;
00301         }
00302 
00303         if (!exponent) {
00304             self.error = @"No digits after decimal point";
00305             return sbjson_token_error;
00306         }
00307     }
00308 
00309     BOOL hasExponent = NO;
00310     if (ch == 'e' || ch == 'E') {
00311         hasExponent = YES;
00312 
00313         if (![_stream getNextUnichar:&ch])
00314             return sbjson_token_eof;
00315 
00316         BOOL expIsNegative = NO;
00317         if (ch == '-') {
00318             expIsNegative = YES;
00319             if (![_stream getNextUnichar:&ch])
00320                 return sbjson_token_eof;
00321 
00322         } else if (ch == '+') {
00323             if (![_stream getNextUnichar:&ch])
00324                 return sbjson_token_eof;
00325         }
00326 
00327         short explicit_exponent = 0;
00328         short explicit_exponent_length = 0;
00329         while ([digits characterIsMember:ch]) {
00330             explicit_exponent *= 10;
00331             explicit_exponent += (ch - '0');
00332             explicit_exponent_length++;
00333 
00334             if (![_stream getNextUnichar:&ch])
00335                 return sbjson_token_eof;
00336         }
00337 
00338         if (explicit_exponent_length == 0) {
00339             self.error = @"No digits in exponent";
00340             return sbjson_token_error;
00341         }
00342 
00343         if (expIsNegative)
00344             exponent -= explicit_exponent;
00345         else
00346             exponent += explicit_exponent;
00347     }
00348 
00349     if (!mantissa_length && isNegative) {
00350         self.error = @"No digits after initial minus";
00351         return sbjson_token_error;
00352 
00353     } else if (mantissa_length >= 19) {
00354         
00355         NSString *number = [_stream stringWithRange:NSMakeRange(numberStart, _stream.index - numberStart)];
00356         *token = [NSDecimalNumber decimalNumberWithString:number];
00357 
00358     } else if (!isFloat && !hasExponent) {
00359         if (!isNegative)
00360             *token = [NSNumber numberWithUnsignedLongLong:mantissa];
00361         else
00362             *token = [NSNumber numberWithLongLong:-mantissa];
00363     } else {
00364         *token = [NSDecimalNumber decimalNumberWithMantissa:mantissa
00365                                                    exponent:exponent
00366                                                  isNegative:isNegative];
00367     }
00368 
00369     return sbjson_token_number;
00370 }
00371 
00372 - (sbjson_token_t)getToken:(NSObject **)token {
00373 
00374     [_stream skipWhitespace];
00375 
00376     unichar ch;
00377     if (![_stream getUnichar:&ch])
00378         return sbjson_token_eof;
00379 
00380     NSUInteger oldIndexLocation = _stream.index;
00381     sbjson_token_t tok;
00382 
00383     switch (ch) {
00384         case '[':
00385             tok = sbjson_token_array_start;
00386             [_stream skip];
00387             break;
00388 
00389         case ']':
00390             tok = sbjson_token_array_end;
00391             [_stream skip];
00392             break;
00393 
00394         case '{':
00395             tok = sbjson_token_object_start;
00396             [_stream skip];
00397             break;
00398 
00399         case ':':
00400             tok = sbjson_token_keyval_separator;
00401             [_stream skip];
00402             break;
00403 
00404         case '}':
00405             tok = sbjson_token_object_end;
00406             [_stream skip];
00407             break;
00408 
00409         case ',':
00410             tok = sbjson_token_separator;
00411             [_stream skip];
00412             break;
00413 
00414         case 'n':
00415             tok = [self match:"null" length:4 retval:sbjson_token_null];
00416             break;
00417 
00418         case 't':
00419             tok = [self match:"true" length:4 retval:sbjson_token_true];
00420             break;
00421 
00422         case 'f':
00423             tok = [self match:"false" length:5 retval:sbjson_token_false];
00424             break;
00425 
00426         case '"':
00427             tok = [self getStringToken:token];
00428             break;
00429 
00430         case '0' ... '9':
00431         case '-':
00432             tok = [self getNumberToken:token];
00433             break;
00434 
00435         case '+':
00436             self.error = @"Leading + is illegal in number";
00437             tok = sbjson_token_error;
00438             break;
00439 
00440         default:
00441             self.error = [NSString stringWithFormat:@"Illegal start of token [%c]", ch];
00442             tok = sbjson_token_error;
00443             break;
00444     }
00445 
00446     if (tok == sbjson_token_eof) {
00447         // We ran out of bytes in the middle of a token.
00448         // We don't know how to restart in mid-flight, so
00449         // rewind to the start of the token for next attempt.
00450         // Hopefully we'll have more data then.
00451         _stream.index = oldIndexLocation;
00452     }
00453 
00454     return tok;
00455 }
00456 
00457 
00458 @end