Austin Schuh | f417eaf | 2019-09-16 21:58:36 -0700 | [diff] [blame] | 1 | // JSON Tokenizer. Copyright (c) 2012, Rasmus Andersson. All rights reserved. |
| 2 | // Use of this source code is governed by a MIT-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | #include <stdlib.h> |
| 5 | #include <stdint.h> |
| 6 | #include <stdbool.h> |
| 7 | #include <limits.h> |
| 8 | #include <ctype.h> // isdigit |
| 9 | #include <errno.h> |
| 10 | #include <string.h> |
| 11 | #include <math.h> |
| 12 | #include <assert.h> |
| 13 | |
| 14 | // Error info |
| 15 | #ifndef JSONT_ERRINFO_CUSTOM |
| 16 | #define jsont_err_t const char* |
| 17 | #define DEF_EM(NAME, msg) static jsont_err_t JSONT_ERRINFO_##NAME = msg |
| 18 | DEF_EM(STACK_SIZE, "Stack size limit exceeded"); |
| 19 | DEF_EM(UNEXPECTED_OBJECT_END, |
| 20 | "Unexpected end of object while not in an object"); |
| 21 | DEF_EM(UNEXPECTED_ARRAY_END, "Unexpected end of array while not in an array"); |
| 22 | DEF_EM(UNEXPECTED_COMMA, "Unexpected \",\""); |
| 23 | DEF_EM(UNEXPECTED_COLON, "Unexpected \":\""); |
| 24 | DEF_EM(UNEXPECTED, "Unexpected input"); |
| 25 | DEF_EM(UNEXPECTED_UNICODE_SEQ, "Malformed unicode encoded sequence in string"); |
| 26 | #undef DEF_EM |
| 27 | #endif |
| 28 | |
| 29 | // Size of stack used for structures (in/out array and objects). This value |
| 30 | // is a balance between memory size of a ctx and how many levels deep the |
| 31 | // tokenizer can go. |
| 32 | #define _STRUCT_TYPE_STACK_SIZE 512 |
| 33 | #define _VALUE_BUF_MIN_SIZE 64 |
| 34 | |
| 35 | static const uint8_t kHexValueTable[55] = { |
| 36 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, // 0-0 |
| 37 | -1, -1, -1, -1, -1, -1, -1, |
| 38 | 10, 11, 12, 13, 14, 15, // A-F |
| 39 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| 40 | -1, -1, -1, -1, -1, -1, -1, |
| 41 | 10, 11, 12, 13, 14, 15 // a-f |
| 42 | }; |
| 43 | |
| 44 | typedef uint8_t jsont_tok_t; |
| 45 | |
| 46 | typedef struct jsont_ctx { |
| 47 | void* user_data; |
| 48 | const uint8_t* input_buf; |
| 49 | const uint8_t* input_buf_ptr; |
| 50 | size_t input_len; |
| 51 | const uint8_t* input_buf_value_start; |
| 52 | const uint8_t* input_buf_value_end; |
| 53 | struct { |
| 54 | uint8_t* data; |
| 55 | size_t size; |
| 56 | size_t length; |
| 57 | bool inuse; |
| 58 | } value_buf; |
| 59 | jsont_err_t error_info; |
| 60 | jsont_tok_t curr_tok; |
| 61 | size_t st_stack_size; |
| 62 | size_t st_stack_len; |
| 63 | jsont_tok_t st_stack[_STRUCT_TYPE_STACK_SIZE]; |
| 64 | } jsont_ctx_t; |
| 65 | |
| 66 | #define _JSONT_IN_SOURCE |
| 67 | #include <jsont.h> |
| 68 | |
| 69 | unsigned long _hex_str_to_ul(const uint8_t* bytes, size_t len) { |
| 70 | unsigned long value = 0; |
| 71 | unsigned long cutoff = ULONG_MAX / 16; |
| 72 | int cutoff_digit = (int)(ULONG_MAX - cutoff * 16); |
| 73 | |
| 74 | for (size_t i = 0; i != len; ++i) { |
| 75 | uint8_t b = bytes[i]; |
| 76 | int digit = (b > '0'-1 && b < 'f'+1) ? kHexValueTable[b-'0'] : -1; |
Austin Schuh | 3e95e5d | 2019-09-20 00:08:54 -0700 | [diff] [blame] | 77 | if (b == 0xff || // bad digit |
Austin Schuh | f417eaf | 2019-09-16 21:58:36 -0700 | [diff] [blame] | 78 | (value > cutoff) || // overflow |
| 79 | ((value == cutoff) && (digit > cutoff_digit)) ) { |
| 80 | return ULONG_MAX; |
| 81 | } else { |
| 82 | value = (value * 16) + digit; |
| 83 | } |
| 84 | } |
| 85 | |
| 86 | return value; |
| 87 | } |
| 88 | |
| 89 | jsont_ctx_t* jsont_create(void* user_data) { |
| 90 | jsont_ctx_t* ctx = (jsont_ctx_t*)calloc(1, sizeof(jsont_ctx_t)); |
| 91 | ctx->user_data = user_data; |
| 92 | ctx->st_stack_size = _STRUCT_TYPE_STACK_SIZE; |
| 93 | return ctx; |
| 94 | } |
| 95 | |
| 96 | void jsont_destroy(jsont_ctx_t* ctx) { |
| 97 | if (ctx->value_buf.data != 0) { |
| 98 | free(ctx->value_buf.data); |
| 99 | } |
| 100 | free(ctx); |
| 101 | } |
| 102 | |
| 103 | void jsont_reset(jsont_ctx_t* ctx, const uint8_t* bytes, size_t length) { |
| 104 | ctx->input_buf_ptr = ctx->input_buf = bytes; |
| 105 | ctx->input_len = length; |
| 106 | ctx->st_stack_len = 0; |
| 107 | ctx->curr_tok = JSONT_END; |
| 108 | ctx->input_buf_value_start = 0; |
| 109 | ctx->input_buf_value_end = 0; |
| 110 | ctx->value_buf.length = 0; |
| 111 | ctx->value_buf.inuse = false; |
| 112 | ctx->error_info = 0; |
| 113 | } |
| 114 | |
| 115 | jsont_tok_t jsont_current(const jsont_ctx_t* ctx) { |
| 116 | return ctx->curr_tok; |
| 117 | } |
| 118 | |
| 119 | void* jsont_user_data(const jsont_ctx_t* ctx) { |
| 120 | return ctx->user_data; |
| 121 | } |
| 122 | |
| 123 | // Get the current/last byte read. Suitable for debugging JSONT_ERR |
| 124 | uint8_t jsont_current_byte(jsont_ctx_t* ctx) { |
| 125 | return (ctx->input_buf_ptr == 0) ? 0 : *(ctx->input_buf_ptr-1); |
| 126 | } |
| 127 | |
| 128 | size_t jsont_current_offset(jsont_ctx_t* ctx) { |
| 129 | return ctx->input_buf_ptr - ctx->input_buf; |
| 130 | } |
| 131 | |
| 132 | jsont_err_t jsont_error_info(jsont_ctx_t* ctx) { |
| 133 | return ctx->error_info; |
| 134 | } |
| 135 | |
| 136 | inline static bool _no_value(jsont_ctx_t* ctx) { |
| 137 | return ctx->input_buf_value_start == 0 |
| 138 | || ctx->curr_tok < _JSONT_VALUES_START |
| 139 | || ctx->curr_tok > _JSONT_VALUES_END; |
| 140 | } |
| 141 | |
| 142 | inline static size_t _input_avail(jsont_ctx_t* ctx) { |
| 143 | return ctx->input_len - (ctx->input_buf_ptr - ctx->input_buf); |
| 144 | } |
| 145 | |
| 146 | inline static uint8_t _next_byte(jsont_ctx_t* ctx) { |
| 147 | return (_input_avail(ctx) == 0) ? 0 : *(ctx->input_buf_ptr++); |
| 148 | } |
| 149 | |
| 150 | inline static jsont_tok_t _st_stack_top(const jsont_ctx_t* ctx) { |
| 151 | return (ctx->st_stack_len != 0) ? ctx->st_stack[ctx->st_stack_len-1] |
| 152 | : JSONT_END; |
| 153 | } |
| 154 | |
| 155 | size_t jsont_data_value(jsont_ctx_t* ctx, const uint8_t** bytes) { |
| 156 | if (_no_value(ctx)) { |
| 157 | return 0; |
| 158 | } else { |
| 159 | if (ctx->value_buf.inuse) { |
| 160 | *bytes = ctx->value_buf.data; |
| 161 | return ctx->value_buf.length; |
| 162 | } else { |
| 163 | *bytes = ctx->input_buf_value_start; |
| 164 | return ctx->input_buf_value_end - ctx->input_buf_value_start; |
| 165 | } |
| 166 | } |
| 167 | } |
| 168 | |
| 169 | bool jsont_data_equals(jsont_ctx_t* ctx, const uint8_t* bytes, size_t length) { |
| 170 | if (ctx->value_buf.inuse) { |
| 171 | return (ctx->value_buf.length == length) && |
| 172 | (memcmp((const void*)ctx->value_buf.data, |
| 173 | (const void*)bytes, length) == 0); |
| 174 | } else { |
Austin Schuh | 3e95e5d | 2019-09-20 00:08:54 -0700 | [diff] [blame] | 175 | return (ctx->input_buf_value_end - ctx->input_buf_value_start == |
| 176 | (ssize_t)length) && |
| 177 | (memcmp((const void *)ctx->input_buf_value_start, |
| 178 | (const void *)bytes, length) == 0); |
Austin Schuh | f417eaf | 2019-09-16 21:58:36 -0700 | [diff] [blame] | 179 | } |
| 180 | } |
| 181 | |
| 182 | char* jsont_strcpy_value(jsont_ctx_t* ctx) { |
| 183 | if (_no_value(ctx)) { |
| 184 | return 0; |
| 185 | } else { |
| 186 | const uint8_t* bytes = 0; |
| 187 | size_t len = jsont_data_value(ctx, &bytes); |
| 188 | char* buf = (char*)malloc(len+1); |
| 189 | if (memcpy((void*)buf, (const void*)bytes, len) != buf) { |
| 190 | return 0; |
| 191 | } |
| 192 | buf[len] = 0; |
| 193 | return buf; |
| 194 | } |
| 195 | } |
| 196 | |
| 197 | int64_t jsont_int_value(jsont_ctx_t* ctx) { |
| 198 | if (_no_value(ctx)) { |
| 199 | return INT64_MIN; |
| 200 | } |
| 201 | |
| 202 | const uint8_t* start = 0; |
| 203 | size_t len = jsont_data_value(ctx, &start); |
| 204 | if (len == 0) { |
| 205 | return INT64_MIN; |
| 206 | } |
| 207 | const uint8_t* end = start + len + 1; |
| 208 | |
| 209 | bool negative; |
| 210 | uint8_t b = *start++; |
| 211 | const int base = 10; |
| 212 | |
| 213 | if (b == '-') { |
| 214 | negative = true; |
| 215 | b = *start++; |
| 216 | if (start == end) { |
| 217 | errno = EINVAL; |
| 218 | return INT64_MIN; |
| 219 | } |
| 220 | } else { |
| 221 | negative = false; |
| 222 | if (b == '+') { |
| 223 | b = *start++; |
| 224 | if (start == end) { |
| 225 | errno = EINVAL; |
| 226 | return INT64_MIN; |
| 227 | } |
| 228 | } |
| 229 | } |
| 230 | |
| 231 | uint64_t acc = 0; |
| 232 | int any = 0; |
| 233 | uint64_t cutoff = negative |
| 234 | ? (uint64_t)-(INT64_MIN + INT64_MAX) + INT64_MAX |
| 235 | : INT64_MAX; |
| 236 | int cutlim = cutoff % base; |
| 237 | cutoff /= base; |
| 238 | for ( ; start != end; b = *start++) { |
| 239 | if (b >= '0' && b <= '9') b -= '0'; else break; |
| 240 | if (any < 0 || acc > cutoff || (acc == cutoff && b > cutlim)) { |
| 241 | any = -1; |
| 242 | } else { |
| 243 | any = 1; |
| 244 | acc *= base; |
| 245 | acc += b; |
| 246 | } |
| 247 | } |
| 248 | |
| 249 | if (any < 0) { |
| 250 | acc = negative ? INT64_MIN : INT64_MAX; |
| 251 | errno = ERANGE; |
| 252 | } else if (!any) { |
| 253 | errno = EINVAL; |
| 254 | return INT64_MIN; |
| 255 | } else if (negative) { |
| 256 | acc = -acc; |
| 257 | } |
| 258 | |
| 259 | return (int64_t)acc; |
| 260 | } |
| 261 | |
| 262 | #ifdef NAN |
| 263 | #define _JSONT_NAN NAN |
| 264 | #else |
| 265 | #define _JSONT_NAN nan(0) |
| 266 | #endif |
| 267 | |
| 268 | double jsont_float_value(jsont_ctx_t* ctx) { |
| 269 | // Note: This might cause a segfault if the input is at the end, so we cause |
| 270 | // an error if we try to read a float value while at the end of the input. |
| 271 | if (_no_value(ctx) || _input_avail(ctx) == 0) { |
| 272 | errno = EINVAL; |
| 273 | return _JSONT_NAN; |
| 274 | } |
| 275 | |
| 276 | const uint8_t* bytes = 0; |
| 277 | size_t len = jsont_data_value(ctx, &bytes); |
| 278 | if (len == 0) { |
| 279 | return _JSONT_NAN; |
| 280 | } |
| 281 | return atof((const char*)bytes); |
| 282 | } |
| 283 | |
| 284 | inline static jsont_tok_t _set_tok(jsont_ctx_t* ctx, jsont_tok_t tok) { |
| 285 | ctx->curr_tok = tok; |
| 286 | |
| 287 | if (tok != JSONT_END) { |
| 288 | if (tok == JSONT_OBJECT_START) { |
| 289 | if (ctx->st_stack_len == ctx->st_stack_size) { |
| 290 | ctx->error_info = JSONT_ERRINFO_STACK_SIZE; |
| 291 | return ctx->curr_tok = JSONT_ERR; // TODO: Grow st_stack |
| 292 | } |
| 293 | ctx->st_stack[ctx->st_stack_len++] = JSONT_OBJECT_START; |
| 294 | |
| 295 | } else if (tok == JSONT_OBJECT_END) { |
| 296 | if (_st_stack_top(ctx) != JSONT_OBJECT_START) { |
| 297 | ctx->error_info = JSONT_ERRINFO_UNEXPECTED_OBJECT_END; |
| 298 | return ctx->curr_tok = JSONT_ERR; |
| 299 | } |
| 300 | --ctx->st_stack_len; |
| 301 | |
| 302 | } else if (tok == JSONT_ARRAY_START) { |
| 303 | if (ctx->st_stack_len == ctx->st_stack_size) { |
| 304 | ctx->error_info = JSONT_ERRINFO_STACK_SIZE; |
| 305 | return ctx->curr_tok = JSONT_ERR; |
| 306 | } |
| 307 | ctx->st_stack[ctx->st_stack_len++] = JSONT_ARRAY_START; |
| 308 | |
| 309 | } else if (tok == JSONT_ARRAY_END) { |
| 310 | if (_st_stack_top(ctx) != JSONT_ARRAY_START) { |
| 311 | ctx->error_info = JSONT_ERRINFO_UNEXPECTED_ARRAY_END; |
| 312 | return ctx->curr_tok = JSONT_ERR; |
| 313 | } |
| 314 | --ctx->st_stack_len; |
| 315 | } |
| 316 | } |
| 317 | |
| 318 | return tok; |
| 319 | } |
| 320 | inline static void _rewind_one_byte(jsont_ctx_t* ctx) { |
| 321 | --ctx->input_buf_ptr; |
| 322 | } |
| 323 | inline static void _rewind_bytes(jsont_ctx_t* ctx, size_t n) { |
| 324 | ctx->input_buf_ptr -= n; |
| 325 | } |
| 326 | inline static void _skip_bytes(jsont_ctx_t* ctx, size_t n) { |
| 327 | ctx->input_buf_ptr += n; |
| 328 | } |
| 329 | inline static uint8_t _read_atom(jsont_ctx_t* ctx, size_t slacklen, |
| 330 | jsont_tok_t tok) { |
| 331 | if (_input_avail(ctx) < slacklen) { |
| 332 | // rewind and wait for buffer fill |
| 333 | _rewind_one_byte(ctx); |
| 334 | return _set_tok(ctx, JSONT_END); |
| 335 | } else { |
| 336 | _skip_bytes(ctx, slacklen); // e.g. "ull" after "n" or "alse" after "f" |
| 337 | return _set_tok(ctx, tok); |
| 338 | } |
| 339 | } |
| 340 | inline static bool _expects_field_name(jsont_ctx_t* ctx) { |
| 341 | return ( ctx->curr_tok == JSONT_OBJECT_START |
| 342 | || ( ctx->curr_tok == _JSONT_COMMA |
| 343 | && _st_stack_top(ctx) == JSONT_OBJECT_START) ); |
| 344 | } |
| 345 | |
| 346 | static void _value_buf_append(jsont_ctx_t* ctx, const uint8_t* data, size_t len) { |
| 347 | //printf("_value_buf_append(<ctx>, %p, %zu)\n", data, len); |
| 348 | if (ctx->value_buf.size == 0) { |
| 349 | assert(ctx->value_buf.data == 0); |
| 350 | ctx->value_buf.length = len; |
| 351 | ctx->value_buf.size = len * 2; |
| 352 | if (ctx->value_buf.size < _VALUE_BUF_MIN_SIZE) { |
| 353 | ctx->value_buf.size = _VALUE_BUF_MIN_SIZE; |
| 354 | } |
| 355 | ctx->value_buf.data = (uint8_t*)malloc(ctx->value_buf.size); |
| 356 | if (len != 0) { |
| 357 | memcpy(ctx->value_buf.data, data, len); |
| 358 | } |
| 359 | } else { |
| 360 | if (ctx->value_buf.length + len > ctx->value_buf.size) { |
| 361 | size_t new_size = ctx->value_buf.size + (len * 2); |
| 362 | ctx->value_buf.data = realloc(ctx->value_buf.data, new_size); |
| 363 | assert(ctx->value_buf.data != 0); |
| 364 | ctx->value_buf.size = new_size; |
| 365 | } |
| 366 | memcpy(ctx->value_buf.data + ctx->value_buf.length, data, len); |
| 367 | ctx->value_buf.length += len; |
| 368 | } |
| 369 | ctx->value_buf.inuse = true; |
| 370 | } |
| 371 | |
| 372 | jsont_tok_t jsont_next(jsont_ctx_t* ctx) { |
| 373 | // |
| 374 | // { } [ ] n t f " |
| 375 | // | | | | |
| 376 | // | | | +- /[^"]*/ " |
| 377 | // | | +- a l s e |
| 378 | // | +- r u e |
| 379 | // +- u l l |
| 380 | // |
| 381 | while (1) { |
| 382 | uint8_t b = _next_byte(ctx); |
| 383 | switch (b) { |
| 384 | case '{': return _set_tok(ctx, JSONT_OBJECT_START); |
| 385 | case '}': return _set_tok(ctx, JSONT_OBJECT_END); |
| 386 | case '[': return _set_tok(ctx, JSONT_ARRAY_START); |
| 387 | case ']': return _set_tok(ctx, JSONT_ARRAY_END); |
| 388 | case 'n': return _read_atom(ctx, 3, JSONT_NULL); |
| 389 | case 't': return _read_atom(ctx, 3, JSONT_TRUE); |
| 390 | case 'f': return _read_atom(ctx, 4, JSONT_FALSE); |
| 391 | case '"': { |
| 392 | ctx->input_buf_value_start = ctx->input_buf_ptr; |
| 393 | ctx->value_buf.inuse = false; |
| 394 | ctx->value_buf.length = 0; |
| 395 | uint8_t prev_b = 0; |
| 396 | while (1) { |
| 397 | b = _next_byte(ctx); |
| 398 | |
| 399 | if (b == '\\') { |
| 400 | if (prev_b == '\\') { |
| 401 | // This is an actual '\'. |
| 402 | assert(ctx->value_buf.inuse == true); // should be buffering |
| 403 | _value_buf_append(ctx, ctx->input_buf_ptr-1, 1); // append "\" |
| 404 | } else { |
| 405 | // Okay, this is an escape prefix. Move to buffering value. |
| 406 | if (ctx->value_buf.inuse == 0) { |
| 407 | _value_buf_append(ctx, |
| 408 | ctx->input_buf_value_start, |
| 409 | // any data before the "\": |
| 410 | (ctx->input_buf_ptr-1 - ctx->input_buf_value_start) ); |
| 411 | } |
| 412 | } |
| 413 | } else { |
| 414 | // Any byte except '\' |
| 415 | |
| 416 | if (prev_b == '\\') { |
| 417 | // Currently just after an escape character |
| 418 | assert(ctx->value_buf.inuse == true); // should be buffering |
| 419 | |
| 420 | // JSON specifies a few "magic" characters that have a different |
| 421 | // meaning than their value: |
| 422 | switch (b) { |
| 423 | case 'b': |
| 424 | _value_buf_append(ctx, (const uint8_t*)"\b", 1); |
| 425 | break; |
| 426 | case 'f': |
| 427 | _value_buf_append(ctx, (const uint8_t*)"\f", 1); |
| 428 | break; |
| 429 | case 'n': |
| 430 | _value_buf_append(ctx, (const uint8_t*)"\n", 1); |
| 431 | break; |
| 432 | case 'r': |
| 433 | _value_buf_append(ctx, (const uint8_t*)"\r", 1); |
| 434 | break; |
| 435 | case 't': |
| 436 | _value_buf_append(ctx, (const uint8_t*)"\t", 1); |
| 437 | break; |
| 438 | case 'u': { |
| 439 | // 4 hex digits should follow |
| 440 | if (_input_avail(ctx) < 4) { |
| 441 | _rewind_bytes(ctx, |
| 442 | ctx->input_buf_ptr - (ctx->input_buf_value_start-1)); |
| 443 | return _set_tok(ctx, JSONT_END); |
| 444 | } |
| 445 | unsigned long utf16cp = _hex_str_to_ul(ctx->input_buf_ptr, 4); |
| 446 | ctx->input_buf_ptr += 4; |
| 447 | if (utf16cp == ULONG_MAX) { |
| 448 | ctx->error_info = JSONT_ERRINFO_UNEXPECTED_UNICODE_SEQ; |
| 449 | return _set_tok(ctx, JSONT_ERR); |
| 450 | } |
| 451 | |
| 452 | uint32_t cp = (uint16_t)(0xffff & utf16cp); |
| 453 | |
| 454 | // Is lead surrogate? |
| 455 | if (cp >= 0xd800u && cp <= 0xdbffu) { |
| 456 | // TODO: Implement pairs by reading another "\uHHHH" |
| 457 | ctx->error_info = JSONT_ERRINFO_UNEXPECTED_UNICODE_SEQ; |
| 458 | return _set_tok(ctx, JSONT_ERR); |
| 459 | } |
| 460 | |
| 461 | // Append UTF-8 byte(s) representing the Unicode codepoint `cp` |
| 462 | if (cp < 0x80) { |
| 463 | uint8_t cp8 = ((uint8_t)cp); |
| 464 | _value_buf_append(ctx, (const uint8_t*)&cp8, 1); |
| 465 | } else if (cp < 0x800) { |
| 466 | uint8_t cp8 = (uint8_t)((cp >> 6) | 0xc0); |
| 467 | _value_buf_append(ctx, (const uint8_t*)&cp8, 1); |
| 468 | cp8 = (uint8_t)((cp & 0x3f) | 0x80); |
| 469 | _value_buf_append(ctx, (const uint8_t*)&cp8, 1); |
| 470 | } else { |
| 471 | uint8_t cp8 = (uint8_t)((cp >> 12) | 0xe0); |
| 472 | _value_buf_append(ctx, (const uint8_t*)&cp8, 1); |
| 473 | cp8 = (uint8_t)(((cp >> 6) & 0x3f) | 0x80); |
| 474 | _value_buf_append(ctx, (const uint8_t*)&cp8, 1); |
| 475 | cp8 = (uint8_t)((cp & 0x3f) | 0x80); |
| 476 | _value_buf_append(ctx, (const uint8_t*)&cp8, 1); |
| 477 | } |
| 478 | |
| 479 | break; |
| 480 | } |
| 481 | default: { |
| 482 | _value_buf_append(ctx, &b, 1); |
| 483 | break; |
| 484 | } |
| 485 | } // switch |
| 486 | |
| 487 | } else { |
| 488 | // Previous character was NOT an escape character |
| 489 | |
| 490 | if (b == '"') { |
| 491 | // Well, this marks the end of a string |
| 492 | ctx->input_buf_value_end = ctx->input_buf_ptr-1; |
| 493 | return _set_tok(ctx, _expects_field_name(ctx) |
| 494 | ? JSONT_FIELD_NAME : JSONT_STRING); |
| 495 | break; |
| 496 | } else if (b == 0) { |
| 497 | // Input buffer ends in the middle of a string |
| 498 | _rewind_bytes(ctx, |
| 499 | ctx->input_buf_ptr - (ctx->input_buf_value_start-1)); |
| 500 | return _set_tok(ctx, JSONT_END); |
| 501 | } else { |
| 502 | if (ctx->value_buf.inuse) { |
| 503 | _value_buf_append(ctx, &b, 1); |
| 504 | } |
| 505 | } |
| 506 | } |
| 507 | } |
| 508 | |
| 509 | prev_b = b; |
| 510 | } |
| 511 | } |
| 512 | case ',': |
| 513 | if ( ctx->curr_tok == JSONT_OBJECT_START |
| 514 | || ctx->curr_tok == JSONT_ARRAY_START |
| 515 | || ctx->curr_tok == JSONT_END |
| 516 | || ctx->curr_tok == JSONT_ERR) { |
| 517 | if (ctx->curr_tok != JSONT_ERR) |
| 518 | ctx->error_info = JSONT_ERRINFO_UNEXPECTED_COMMA; |
| 519 | return _set_tok(ctx, JSONT_ERR); |
| 520 | } |
| 521 | _set_tok(ctx, _JSONT_COMMA); |
| 522 | // read next by simply letting the outer "while" do its thing |
| 523 | break; |
| 524 | |
| 525 | case ':': |
| 526 | if (ctx->curr_tok != JSONT_FIELD_NAME) { |
| 527 | ctx->error_info = JSONT_ERRINFO_UNEXPECTED_COLON; |
| 528 | return _set_tok(ctx, JSONT_ERR); |
| 529 | } |
| 530 | // let the outer "while" do its thing |
| 531 | break; |
| 532 | |
| 533 | case ' ': case '\r': case '\n': case '\t': |
| 534 | // ignore whitespace and let the outer "while" do its thing |
| 535 | break; |
| 536 | |
| 537 | case 0: |
| 538 | //printf("** %d\n", __LINE__); |
| 539 | return _set_tok(ctx, JSONT_END); |
| 540 | |
| 541 | default: |
| 542 | if (isdigit((int)b) || b == '+' || b == '-') { |
| 543 | // We are reading a number |
| 544 | ctx->input_buf_value_start = ctx->input_buf_ptr-1; |
| 545 | //uint8_t prev_b = 0; |
| 546 | bool is_float = false; |
| 547 | while (1) { |
| 548 | b = _next_byte(ctx); |
| 549 | if (b == '.') { |
| 550 | is_float = true; |
| 551 | } else if (!isdigit((int)b)) { |
| 552 | _rewind_one_byte(ctx); |
| 553 | ctx->input_buf_value_end = ctx->input_buf_ptr; |
| 554 | return _set_tok(ctx, is_float ? JSONT_NUMBER_FLOAT |
| 555 | : JSONT_NUMBER_INT); |
| 556 | } else if (b == 0) { |
| 557 | // Input buffer ends before we know that the number-value ended |
| 558 | _rewind_bytes(ctx, ctx->input_buf_ptr |
| 559 | - (ctx->input_buf_value_start-1)); |
| 560 | return _set_tok(ctx, JSONT_END); |
| 561 | } |
| 562 | } |
| 563 | } |
| 564 | |
| 565 | ctx->error_info = JSONT_ERRINFO_UNEXPECTED; |
| 566 | return _set_tok(ctx, JSONT_ERR); |
| 567 | } |
| 568 | } // while (1) |
| 569 | } |
| 570 | |