Squashed 'third_party/jsont/' content from commit 1536152d7
Change-Id: I51a80190772b74ca0d45fd3fadc130e872b57cc0
git-subtree-dir: third_party/jsont
git-subtree-split: 1536152d7c1926448d42e4a691acd9a15940b20c
diff --git a/jsont.c b/jsont.c
new file mode 100644
index 0000000..5863c7a
--- /dev/null
+++ b/jsont.c
@@ -0,0 +1,569 @@
+// JSON Tokenizer. Copyright (c) 2012, Rasmus Andersson. All rights reserved.
+// Use of this source code is governed by a MIT-style license that can be
+// found in the LICENSE file.
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <limits.h>
+#include <ctype.h> // isdigit
+#include <errno.h>
+#include <string.h>
+#include <math.h>
+#include <assert.h>
+
+// Error info
+#ifndef JSONT_ERRINFO_CUSTOM
+#define jsont_err_t const char*
+#define DEF_EM(NAME, msg) static jsont_err_t JSONT_ERRINFO_##NAME = msg
+DEF_EM(STACK_SIZE, "Stack size limit exceeded");
+DEF_EM(UNEXPECTED_OBJECT_END,
+ "Unexpected end of object while not in an object");
+DEF_EM(UNEXPECTED_ARRAY_END, "Unexpected end of array while not in an array");
+DEF_EM(UNEXPECTED_COMMA, "Unexpected \",\"");
+DEF_EM(UNEXPECTED_COLON, "Unexpected \":\"");
+DEF_EM(UNEXPECTED, "Unexpected input");
+DEF_EM(UNEXPECTED_UNICODE_SEQ, "Malformed unicode encoded sequence in string");
+#undef DEF_EM
+#endif
+
+// Size of stack used for structures (in/out array and objects). This value
+// is a balance between memory size of a ctx and how many levels deep the
+// tokenizer can go.
+#define _STRUCT_TYPE_STACK_SIZE 512
+#define _VALUE_BUF_MIN_SIZE 64
+
+static const uint8_t kHexValueTable[55] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, // 0-0
+ -1, -1, -1, -1, -1, -1, -1,
+ 10, 11, 12, 13, 14, 15, // A-F
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1,
+ 10, 11, 12, 13, 14, 15 // a-f
+};
+
+typedef uint8_t jsont_tok_t;
+
+typedef struct jsont_ctx {
+ void* user_data;
+ const uint8_t* input_buf;
+ const uint8_t* input_buf_ptr;
+ size_t input_len;
+ const uint8_t* input_buf_value_start;
+ const uint8_t* input_buf_value_end;
+ struct {
+ uint8_t* data;
+ size_t size;
+ size_t length;
+ bool inuse;
+ } value_buf;
+ jsont_err_t error_info;
+ jsont_tok_t curr_tok;
+ size_t st_stack_size;
+ size_t st_stack_len;
+ jsont_tok_t st_stack[_STRUCT_TYPE_STACK_SIZE];
+} jsont_ctx_t;
+
+#define _JSONT_IN_SOURCE
+#include <jsont.h>
+
+unsigned long _hex_str_to_ul(const uint8_t* bytes, size_t len) {
+ unsigned long value = 0;
+ unsigned long cutoff = ULONG_MAX / 16;
+ int cutoff_digit = (int)(ULONG_MAX - cutoff * 16);
+
+ for (size_t i = 0; i != len; ++i) {
+ uint8_t b = bytes[i];
+ int digit = (b > '0'-1 && b < 'f'+1) ? kHexValueTable[b-'0'] : -1;
+ if (b == -1 || // bad digit
+ (value > cutoff) || // overflow
+ ((value == cutoff) && (digit > cutoff_digit)) ) {
+ return ULONG_MAX;
+ } else {
+ value = (value * 16) + digit;
+ }
+ }
+
+ return value;
+}
+
+jsont_ctx_t* jsont_create(void* user_data) {
+ jsont_ctx_t* ctx = (jsont_ctx_t*)calloc(1, sizeof(jsont_ctx_t));
+ ctx->user_data = user_data;
+ ctx->st_stack_size = _STRUCT_TYPE_STACK_SIZE;
+ return ctx;
+}
+
+void jsont_destroy(jsont_ctx_t* ctx) {
+ if (ctx->value_buf.data != 0) {
+ free(ctx->value_buf.data);
+ }
+ free(ctx);
+}
+
+void jsont_reset(jsont_ctx_t* ctx, const uint8_t* bytes, size_t length) {
+ ctx->input_buf_ptr = ctx->input_buf = bytes;
+ ctx->input_len = length;
+ ctx->st_stack_len = 0;
+ ctx->curr_tok = JSONT_END;
+ ctx->input_buf_value_start = 0;
+ ctx->input_buf_value_end = 0;
+ ctx->value_buf.length = 0;
+ ctx->value_buf.inuse = false;
+ ctx->error_info = 0;
+}
+
+jsont_tok_t jsont_current(const jsont_ctx_t* ctx) {
+ return ctx->curr_tok;
+}
+
+void* jsont_user_data(const jsont_ctx_t* ctx) {
+ return ctx->user_data;
+}
+
+// Get the current/last byte read. Suitable for debugging JSONT_ERR
+uint8_t jsont_current_byte(jsont_ctx_t* ctx) {
+ return (ctx->input_buf_ptr == 0) ? 0 : *(ctx->input_buf_ptr-1);
+}
+
+size_t jsont_current_offset(jsont_ctx_t* ctx) {
+ return ctx->input_buf_ptr - ctx->input_buf;
+}
+
+jsont_err_t jsont_error_info(jsont_ctx_t* ctx) {
+ return ctx->error_info;
+}
+
+inline static bool _no_value(jsont_ctx_t* ctx) {
+ return ctx->input_buf_value_start == 0
+ || ctx->curr_tok < _JSONT_VALUES_START
+ || ctx->curr_tok > _JSONT_VALUES_END;
+}
+
+inline static size_t _input_avail(jsont_ctx_t* ctx) {
+ return ctx->input_len - (ctx->input_buf_ptr - ctx->input_buf);
+}
+
+inline static uint8_t _next_byte(jsont_ctx_t* ctx) {
+ return (_input_avail(ctx) == 0) ? 0 : *(ctx->input_buf_ptr++);
+}
+
+inline static jsont_tok_t _st_stack_top(const jsont_ctx_t* ctx) {
+ return (ctx->st_stack_len != 0) ? ctx->st_stack[ctx->st_stack_len-1]
+ : JSONT_END;
+}
+
+size_t jsont_data_value(jsont_ctx_t* ctx, const uint8_t** bytes) {
+ if (_no_value(ctx)) {
+ return 0;
+ } else {
+ if (ctx->value_buf.inuse) {
+ *bytes = ctx->value_buf.data;
+ return ctx->value_buf.length;
+ } else {
+ *bytes = ctx->input_buf_value_start;
+ return ctx->input_buf_value_end - ctx->input_buf_value_start;
+ }
+ }
+}
+
+bool jsont_data_equals(jsont_ctx_t* ctx, const uint8_t* bytes, size_t length) {
+ if (ctx->value_buf.inuse) {
+ return (ctx->value_buf.length == length) &&
+ (memcmp((const void*)ctx->value_buf.data,
+ (const void*)bytes, length) == 0);
+ } else {
+ return (ctx->input_buf_value_end - ctx->input_buf_value_start == length) &&
+ (memcmp((const void*)ctx->input_buf_value_start,
+ (const void*)bytes, length) == 0);
+ }
+}
+
+char* jsont_strcpy_value(jsont_ctx_t* ctx) {
+ if (_no_value(ctx)) {
+ return 0;
+ } else {
+ const uint8_t* bytes = 0;
+ size_t len = jsont_data_value(ctx, &bytes);
+ char* buf = (char*)malloc(len+1);
+ if (memcpy((void*)buf, (const void*)bytes, len) != buf) {
+ return 0;
+ }
+ buf[len] = 0;
+ return buf;
+ }
+}
+
+int64_t jsont_int_value(jsont_ctx_t* ctx) {
+ if (_no_value(ctx)) {
+ return INT64_MIN;
+ }
+
+ const uint8_t* start = 0;
+ size_t len = jsont_data_value(ctx, &start);
+ if (len == 0) {
+ return INT64_MIN;
+ }
+ const uint8_t* end = start + len + 1;
+
+ bool negative;
+ uint8_t b = *start++;
+ const int base = 10;
+
+ if (b == '-') {
+ negative = true;
+ b = *start++;
+ if (start == end) {
+ errno = EINVAL;
+ return INT64_MIN;
+ }
+ } else {
+ negative = false;
+ if (b == '+') {
+ b = *start++;
+ if (start == end) {
+ errno = EINVAL;
+ return INT64_MIN;
+ }
+ }
+ }
+
+ uint64_t acc = 0;
+ int any = 0;
+ uint64_t cutoff = negative
+ ? (uint64_t)-(INT64_MIN + INT64_MAX) + INT64_MAX
+ : INT64_MAX;
+ int cutlim = cutoff % base;
+ cutoff /= base;
+ for ( ; start != end; b = *start++) {
+ if (b >= '0' && b <= '9') b -= '0'; else break;
+ if (any < 0 || acc > cutoff || (acc == cutoff && b > cutlim)) {
+ any = -1;
+ } else {
+ any = 1;
+ acc *= base;
+ acc += b;
+ }
+ }
+
+ if (any < 0) {
+ acc = negative ? INT64_MIN : INT64_MAX;
+ errno = ERANGE;
+ } else if (!any) {
+ errno = EINVAL;
+ return INT64_MIN;
+ } else if (negative) {
+ acc = -acc;
+ }
+
+ return (int64_t)acc;
+}
+
+#ifdef NAN
+ #define _JSONT_NAN NAN
+#else
+ #define _JSONT_NAN nan(0)
+#endif
+
+double jsont_float_value(jsont_ctx_t* ctx) {
+ // Note: This might cause a segfault if the input is at the end, so we cause
+ // an error if we try to read a float value while at the end of the input.
+ if (_no_value(ctx) || _input_avail(ctx) == 0) {
+ errno = EINVAL;
+ return _JSONT_NAN;
+ }
+
+ const uint8_t* bytes = 0;
+ size_t len = jsont_data_value(ctx, &bytes);
+ if (len == 0) {
+ return _JSONT_NAN;
+ }
+ return atof((const char*)bytes);
+}
+
+inline static jsont_tok_t _set_tok(jsont_ctx_t* ctx, jsont_tok_t tok) {
+ ctx->curr_tok = tok;
+
+ if (tok != JSONT_END) {
+ if (tok == JSONT_OBJECT_START) {
+ if (ctx->st_stack_len == ctx->st_stack_size) {
+ ctx->error_info = JSONT_ERRINFO_STACK_SIZE;
+ return ctx->curr_tok = JSONT_ERR; // TODO: Grow st_stack
+ }
+ ctx->st_stack[ctx->st_stack_len++] = JSONT_OBJECT_START;
+
+ } else if (tok == JSONT_OBJECT_END) {
+ if (_st_stack_top(ctx) != JSONT_OBJECT_START) {
+ ctx->error_info = JSONT_ERRINFO_UNEXPECTED_OBJECT_END;
+ return ctx->curr_tok = JSONT_ERR;
+ }
+ --ctx->st_stack_len;
+
+ } else if (tok == JSONT_ARRAY_START) {
+ if (ctx->st_stack_len == ctx->st_stack_size) {
+ ctx->error_info = JSONT_ERRINFO_STACK_SIZE;
+ return ctx->curr_tok = JSONT_ERR;
+ }
+ ctx->st_stack[ctx->st_stack_len++] = JSONT_ARRAY_START;
+
+ } else if (tok == JSONT_ARRAY_END) {
+ if (_st_stack_top(ctx) != JSONT_ARRAY_START) {
+ ctx->error_info = JSONT_ERRINFO_UNEXPECTED_ARRAY_END;
+ return ctx->curr_tok = JSONT_ERR;
+ }
+ --ctx->st_stack_len;
+ }
+ }
+
+ return tok;
+}
+inline static void _rewind_one_byte(jsont_ctx_t* ctx) {
+ --ctx->input_buf_ptr;
+}
+inline static void _rewind_bytes(jsont_ctx_t* ctx, size_t n) {
+ ctx->input_buf_ptr -= n;
+}
+inline static void _skip_bytes(jsont_ctx_t* ctx, size_t n) {
+ ctx->input_buf_ptr += n;
+}
+inline static uint8_t _read_atom(jsont_ctx_t* ctx, size_t slacklen,
+ jsont_tok_t tok) {
+ if (_input_avail(ctx) < slacklen) {
+ // rewind and wait for buffer fill
+ _rewind_one_byte(ctx);
+ return _set_tok(ctx, JSONT_END);
+ } else {
+ _skip_bytes(ctx, slacklen); // e.g. "ull" after "n" or "alse" after "f"
+ return _set_tok(ctx, tok);
+ }
+}
+inline static bool _expects_field_name(jsont_ctx_t* ctx) {
+ return ( ctx->curr_tok == JSONT_OBJECT_START
+ || ( ctx->curr_tok == _JSONT_COMMA
+ && _st_stack_top(ctx) == JSONT_OBJECT_START) );
+}
+
+static void _value_buf_append(jsont_ctx_t* ctx, const uint8_t* data, size_t len) {
+ //printf("_value_buf_append(<ctx>, %p, %zu)\n", data, len);
+ if (ctx->value_buf.size == 0) {
+ assert(ctx->value_buf.data == 0);
+ ctx->value_buf.length = len;
+ ctx->value_buf.size = len * 2;
+ if (ctx->value_buf.size < _VALUE_BUF_MIN_SIZE) {
+ ctx->value_buf.size = _VALUE_BUF_MIN_SIZE;
+ }
+ ctx->value_buf.data = (uint8_t*)malloc(ctx->value_buf.size);
+ if (len != 0) {
+ memcpy(ctx->value_buf.data, data, len);
+ }
+ } else {
+ if (ctx->value_buf.length + len > ctx->value_buf.size) {
+ size_t new_size = ctx->value_buf.size + (len * 2);
+ ctx->value_buf.data = realloc(ctx->value_buf.data, new_size);
+ assert(ctx->value_buf.data != 0);
+ ctx->value_buf.size = new_size;
+ }
+ memcpy(ctx->value_buf.data + ctx->value_buf.length, data, len);
+ ctx->value_buf.length += len;
+ }
+ ctx->value_buf.inuse = true;
+}
+
+jsont_tok_t jsont_next(jsont_ctx_t* ctx) {
+ //
+ // { } [ ] n t f "
+ // | | | |
+ // | | | +- /[^"]*/ "
+ // | | +- a l s e
+ // | +- r u e
+ // +- u l l
+ //
+ while (1) {
+ uint8_t b = _next_byte(ctx);
+ switch (b) {
+ case '{': return _set_tok(ctx, JSONT_OBJECT_START);
+ case '}': return _set_tok(ctx, JSONT_OBJECT_END);
+ case '[': return _set_tok(ctx, JSONT_ARRAY_START);
+ case ']': return _set_tok(ctx, JSONT_ARRAY_END);
+ case 'n': return _read_atom(ctx, 3, JSONT_NULL);
+ case 't': return _read_atom(ctx, 3, JSONT_TRUE);
+ case 'f': return _read_atom(ctx, 4, JSONT_FALSE);
+ case '"': {
+ ctx->input_buf_value_start = ctx->input_buf_ptr;
+ ctx->value_buf.inuse = false;
+ ctx->value_buf.length = 0;
+ uint8_t prev_b = 0;
+ while (1) {
+ b = _next_byte(ctx);
+
+ if (b == '\\') {
+ if (prev_b == '\\') {
+ // This is an actual '\'.
+ assert(ctx->value_buf.inuse == true); // should be buffering
+ _value_buf_append(ctx, ctx->input_buf_ptr-1, 1); // append "\"
+ } else {
+ // Okay, this is an escape prefix. Move to buffering value.
+ if (ctx->value_buf.inuse == 0) {
+ _value_buf_append(ctx,
+ ctx->input_buf_value_start,
+ // any data before the "\":
+ (ctx->input_buf_ptr-1 - ctx->input_buf_value_start) );
+ }
+ }
+ } else {
+ // Any byte except '\'
+
+ if (prev_b == '\\') {
+ // Currently just after an escape character
+ assert(ctx->value_buf.inuse == true); // should be buffering
+
+ // JSON specifies a few "magic" characters that have a different
+ // meaning than their value:
+ switch (b) {
+ case 'b':
+ _value_buf_append(ctx, (const uint8_t*)"\b", 1);
+ break;
+ case 'f':
+ _value_buf_append(ctx, (const uint8_t*)"\f", 1);
+ break;
+ case 'n':
+ _value_buf_append(ctx, (const uint8_t*)"\n", 1);
+ break;
+ case 'r':
+ _value_buf_append(ctx, (const uint8_t*)"\r", 1);
+ break;
+ case 't':
+ _value_buf_append(ctx, (const uint8_t*)"\t", 1);
+ break;
+ case 'u': {
+ // 4 hex digits should follow
+ if (_input_avail(ctx) < 4) {
+ _rewind_bytes(ctx,
+ ctx->input_buf_ptr - (ctx->input_buf_value_start-1));
+ return _set_tok(ctx, JSONT_END);
+ }
+ unsigned long utf16cp = _hex_str_to_ul(ctx->input_buf_ptr, 4);
+ ctx->input_buf_ptr += 4;
+ if (utf16cp == ULONG_MAX) {
+ ctx->error_info = JSONT_ERRINFO_UNEXPECTED_UNICODE_SEQ;
+ return _set_tok(ctx, JSONT_ERR);
+ }
+
+ uint32_t cp = (uint16_t)(0xffff & utf16cp);
+
+ // Is lead surrogate?
+ if (cp >= 0xd800u && cp <= 0xdbffu) {
+ // TODO: Implement pairs by reading another "\uHHHH"
+ ctx->error_info = JSONT_ERRINFO_UNEXPECTED_UNICODE_SEQ;
+ return _set_tok(ctx, JSONT_ERR);
+ }
+
+ // Append UTF-8 byte(s) representing the Unicode codepoint `cp`
+ if (cp < 0x80) {
+ uint8_t cp8 = ((uint8_t)cp);
+ _value_buf_append(ctx, (const uint8_t*)&cp8, 1);
+ } else if (cp < 0x800) {
+ uint8_t cp8 = (uint8_t)((cp >> 6) | 0xc0);
+ _value_buf_append(ctx, (const uint8_t*)&cp8, 1);
+ cp8 = (uint8_t)((cp & 0x3f) | 0x80);
+ _value_buf_append(ctx, (const uint8_t*)&cp8, 1);
+ } else {
+ uint8_t cp8 = (uint8_t)((cp >> 12) | 0xe0);
+ _value_buf_append(ctx, (const uint8_t*)&cp8, 1);
+ cp8 = (uint8_t)(((cp >> 6) & 0x3f) | 0x80);
+ _value_buf_append(ctx, (const uint8_t*)&cp8, 1);
+ cp8 = (uint8_t)((cp & 0x3f) | 0x80);
+ _value_buf_append(ctx, (const uint8_t*)&cp8, 1);
+ }
+
+ break;
+ }
+ default: {
+ _value_buf_append(ctx, &b, 1);
+ break;
+ }
+ } // switch
+
+ } else {
+ // Previous character was NOT an escape character
+
+ if (b == '"') {
+ // Well, this marks the end of a string
+ ctx->input_buf_value_end = ctx->input_buf_ptr-1;
+ return _set_tok(ctx, _expects_field_name(ctx)
+ ? JSONT_FIELD_NAME : JSONT_STRING);
+ break;
+ } else if (b == 0) {
+ // Input buffer ends in the middle of a string
+ _rewind_bytes(ctx,
+ ctx->input_buf_ptr - (ctx->input_buf_value_start-1));
+ return _set_tok(ctx, JSONT_END);
+ } else {
+ if (ctx->value_buf.inuse) {
+ _value_buf_append(ctx, &b, 1);
+ }
+ }
+ }
+ }
+
+ prev_b = b;
+ }
+ }
+ case ',':
+ if ( ctx->curr_tok == JSONT_OBJECT_START
+ || ctx->curr_tok == JSONT_ARRAY_START
+ || ctx->curr_tok == JSONT_END
+ || ctx->curr_tok == JSONT_ERR) {
+ if (ctx->curr_tok != JSONT_ERR)
+ ctx->error_info = JSONT_ERRINFO_UNEXPECTED_COMMA;
+ return _set_tok(ctx, JSONT_ERR);
+ }
+ _set_tok(ctx, _JSONT_COMMA);
+ // read next by simply letting the outer "while" do its thing
+ break;
+
+ case ':':
+ if (ctx->curr_tok != JSONT_FIELD_NAME) {
+ ctx->error_info = JSONT_ERRINFO_UNEXPECTED_COLON;
+ return _set_tok(ctx, JSONT_ERR);
+ }
+ // let the outer "while" do its thing
+ break;
+
+ case ' ': case '\r': case '\n': case '\t':
+ // ignore whitespace and let the outer "while" do its thing
+ break;
+
+ case 0:
+ //printf("** %d\n", __LINE__);
+ return _set_tok(ctx, JSONT_END);
+
+ default:
+ if (isdigit((int)b) || b == '+' || b == '-') {
+ // We are reading a number
+ ctx->input_buf_value_start = ctx->input_buf_ptr-1;
+ //uint8_t prev_b = 0;
+ bool is_float = false;
+ while (1) {
+ b = _next_byte(ctx);
+ if (b == '.') {
+ is_float = true;
+ } else if (!isdigit((int)b)) {
+ _rewind_one_byte(ctx);
+ ctx->input_buf_value_end = ctx->input_buf_ptr;
+ return _set_tok(ctx, is_float ? JSONT_NUMBER_FLOAT
+ : JSONT_NUMBER_INT);
+ } else if (b == 0) {
+ // Input buffer ends before we know that the number-value ended
+ _rewind_bytes(ctx, ctx->input_buf_ptr
+ - (ctx->input_buf_value_start-1));
+ return _set_tok(ctx, JSONT_END);
+ }
+ }
+ }
+
+ ctx->error_info = JSONT_ERRINFO_UNEXPECTED;
+ return _set_tok(ctx, JSONT_ERR);
+ }
+ } // while (1)
+}
+