Blame - third_party/jsont/jsont.c - RealtimeRoboticsGroup/test

blob: b318494327b31804ab2b5ae5f462de7232e80485 [file] [log] [blame]

Austin Schuh	f417eaf	2019-09-16 21:58:36 -0700	[diff] [blame]	1	// JSON Tokenizer. Copyright (c) 2012, Rasmus Andersson. All rights reserved.
				2	// Use of this source code is governed by a MIT-style license that can be
				3	// found in the LICENSE file.
				4	#include <stdlib.h>
				5	#include <stdint.h>
				6	#include <stdbool.h>
				7	#include <limits.h>
				8	#include <ctype.h> // isdigit
				9	#include <errno.h>
				10	#include <string.h>
				11	#include <math.h>
				12	#include <assert.h>
				13
				14	// Error info
				15	#ifndef JSONT_ERRINFO_CUSTOM
				16	#define jsont_err_t const char*
				17	#define DEF_EM(NAME, msg) static jsont_err_t JSONT_ERRINFO_##NAME = msg
				18	DEF_EM(STACK_SIZE, "Stack size limit exceeded");
				19	DEF_EM(UNEXPECTED_OBJECT_END,
				20	"Unexpected end of object while not in an object");
				21	DEF_EM(UNEXPECTED_ARRAY_END, "Unexpected end of array while not in an array");
				22	DEF_EM(UNEXPECTED_COMMA, "Unexpected \",\"");
				23	DEF_EM(UNEXPECTED_COLON, "Unexpected \":\"");
				24	DEF_EM(UNEXPECTED, "Unexpected input");
				25	DEF_EM(UNEXPECTED_UNICODE_SEQ, "Malformed unicode encoded sequence in string");
				26	#undef DEF_EM
				27	#endif
				28
				29	// Size of stack used for structures (in/out array and objects). This value
				30	// is a balance between memory size of a ctx and how many levels deep the
				31	// tokenizer can go.
				32	#define _STRUCT_TYPE_STACK_SIZE 512
				33	#define _VALUE_BUF_MIN_SIZE 64
				34
				35	static const uint8_t kHexValueTable[55] = {
				36	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, // 0-0
				37	-1, -1, -1, -1, -1, -1, -1,
				38	10, 11, 12, 13, 14, 15, // A-F
				39	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				40	-1, -1, -1, -1, -1, -1, -1,
				41	10, 11, 12, 13, 14, 15 // a-f
				42	};
				43
				44	typedef uint8_t jsont_tok_t;
				45
				46	typedef struct jsont_ctx {
				47	void* user_data;
				48	const uint8_t* input_buf;
				49	const uint8_t* input_buf_ptr;
				50	size_t input_len;
				51	const uint8_t* input_buf_value_start;
				52	const uint8_t* input_buf_value_end;
				53	struct {
				54	uint8_t* data;
				55	size_t size;
				56	size_t length;
				57	bool inuse;
				58	} value_buf;
				59	jsont_err_t error_info;
				60	jsont_tok_t curr_tok;
				61	size_t st_stack_size;
				62	size_t st_stack_len;
				63	jsont_tok_t st_stack[_STRUCT_TYPE_STACK_SIZE];
				64	} jsont_ctx_t;
				65
				66	#define _JSONT_IN_SOURCE
				67	#include <jsont.h>
				68
				69	unsigned long _hex_str_to_ul(const uint8_t* bytes, size_t len) {
				70	unsigned long value = 0;
				71	unsigned long cutoff = ULONG_MAX / 16;
				72	int cutoff_digit = (int)(ULONG_MAX - cutoff * 16);
				73
				74	for (size_t i = 0; i != len; ++i) {
				75	uint8_t b = bytes[i];
				76	int digit = (b > '0'-1 && b < 'f'+1) ? kHexValueTable[b-'0'] : -1;
Austin Schuh	3e95e5d	2019-09-20 00:08:54 -0700	[diff] [blame]	77	if (b == 0xff \|\| // bad digit
Austin Schuh	f417eaf	2019-09-16 21:58:36 -0700	[diff] [blame]	78	(value > cutoff) \|\| // overflow
				79	((value == cutoff) && (digit > cutoff_digit)) ) {
				80	return ULONG_MAX;
				81	} else {
				82	value = (value * 16) + digit;
				83	}
				84	}
				85
				86	return value;
				87	}
				88
				89	jsont_ctx_t* jsont_create(void* user_data) {
				90	jsont_ctx_t* ctx = (jsont_ctx_t*)calloc(1, sizeof(jsont_ctx_t));
				91	ctx->user_data = user_data;
				92	ctx->st_stack_size = _STRUCT_TYPE_STACK_SIZE;
				93	return ctx;
				94	}
				95
				96	void jsont_destroy(jsont_ctx_t* ctx) {
				97	if (ctx->value_buf.data != 0) {
				98	free(ctx->value_buf.data);
				99	}
				100	free(ctx);
				101	}
				102
				103	void jsont_reset(jsont_ctx_t* ctx, const uint8_t* bytes, size_t length) {
				104	ctx->input_buf_ptr = ctx->input_buf = bytes;
				105	ctx->input_len = length;
				106	ctx->st_stack_len = 0;
				107	ctx->curr_tok = JSONT_END;
				108	ctx->input_buf_value_start = 0;
				109	ctx->input_buf_value_end = 0;
				110	ctx->value_buf.length = 0;
				111	ctx->value_buf.inuse = false;
				112	ctx->error_info = 0;
				113	}
				114
				115	jsont_tok_t jsont_current(const jsont_ctx_t* ctx) {
				116	return ctx->curr_tok;
				117	}
				118
				119	void* jsont_user_data(const jsont_ctx_t* ctx) {
				120	return ctx->user_data;
				121	}
				122
				123	// Get the current/last byte read. Suitable for debugging JSONT_ERR
				124	uint8_t jsont_current_byte(jsont_ctx_t* ctx) {
				125	return (ctx->input_buf_ptr == 0) ? 0 : *(ctx->input_buf_ptr-1);
				126	}
				127
				128	size_t jsont_current_offset(jsont_ctx_t* ctx) {
				129	return ctx->input_buf_ptr - ctx->input_buf;
				130	}
				131
				132	jsont_err_t jsont_error_info(jsont_ctx_t* ctx) {
				133	return ctx->error_info;
				134	}
				135
				136	inline static bool _no_value(jsont_ctx_t* ctx) {
				137	return ctx->input_buf_value_start == 0
				138	\|\| ctx->curr_tok < _JSONT_VALUES_START
				139	\|\| ctx->curr_tok > _JSONT_VALUES_END;
				140	}
				141
				142	inline static size_t _input_avail(jsont_ctx_t* ctx) {
				143	return ctx->input_len - (ctx->input_buf_ptr - ctx->input_buf);
				144	}
				145
				146	inline static uint8_t _next_byte(jsont_ctx_t* ctx) {
				147	return (_input_avail(ctx) == 0) ? 0 : *(ctx->input_buf_ptr++);
				148	}
				149
				150	inline static jsont_tok_t _st_stack_top(const jsont_ctx_t* ctx) {
				151	return (ctx->st_stack_len != 0) ? ctx->st_stack[ctx->st_stack_len-1]
				152	: JSONT_END;
				153	}
				154
				155	size_t jsont_data_value(jsont_ctx_t* ctx, const uint8_t** bytes) {
				156	if (_no_value(ctx)) {
				157	return 0;
				158	} else {
				159	if (ctx->value_buf.inuse) {
				160	*bytes = ctx->value_buf.data;
				161	return ctx->value_buf.length;
				162	} else {
				163	*bytes = ctx->input_buf_value_start;
				164	return ctx->input_buf_value_end - ctx->input_buf_value_start;
				165	}
				166	}
				167	}
				168
				169	bool jsont_data_equals(jsont_ctx_t* ctx, const uint8_t* bytes, size_t length) {
				170	if (ctx->value_buf.inuse) {
				171	return (ctx->value_buf.length == length) &&
				172	(memcmp((const void*)ctx->value_buf.data,
				173	(const void*)bytes, length) == 0);
				174	} else {
Austin Schuh	3e95e5d	2019-09-20 00:08:54 -0700	[diff] [blame]	175	return (ctx->input_buf_value_end - ctx->input_buf_value_start ==
				176	(ssize_t)length) &&
				177	(memcmp((const void *)ctx->input_buf_value_start,
				178	(const void *)bytes, length) == 0);
Austin Schuh	f417eaf	2019-09-16 21:58:36 -0700	[diff] [blame]	179	}
				180	}
				181
				182	char* jsont_strcpy_value(jsont_ctx_t* ctx) {
				183	if (_no_value(ctx)) {
				184	return 0;
				185	} else {
				186	const uint8_t* bytes = 0;
				187	size_t len = jsont_data_value(ctx, &bytes);
				188	char* buf = (char*)malloc(len+1);
				189	if (memcpy((void)buf, (const void)bytes, len) != buf) {
				190	return 0;
				191	}
				192	buf[len] = 0;
				193	return buf;
				194	}
				195	}
				196
				197	int64_t jsont_int_value(jsont_ctx_t* ctx) {
				198	if (_no_value(ctx)) {
				199	return INT64_MIN;
				200	}
				201
				202	const uint8_t* start = 0;
				203	size_t len = jsont_data_value(ctx, &start);
				204	if (len == 0) {
				205	return INT64_MIN;
				206	}
				207	const uint8_t* end = start + len + 1;
				208
				209	bool negative;
				210	uint8_t b = *start++;
				211	const int base = 10;
				212
				213	if (b == '-') {
				214	negative = true;
				215	b = *start++;
				216	if (start == end) {
				217	errno = EINVAL;
				218	return INT64_MIN;
				219	}
				220	} else {
				221	negative = false;
				222	if (b == '+') {
				223	b = *start++;
				224	if (start == end) {
				225	errno = EINVAL;
				226	return INT64_MIN;
				227	}
				228	}
				229	}
				230
				231	uint64_t acc = 0;
				232	int any = 0;
				233	uint64_t cutoff = negative
				234	? (uint64_t)-(INT64_MIN + INT64_MAX) + INT64_MAX
				235	: INT64_MAX;
				236	int cutlim = cutoff % base;
				237	cutoff /= base;
				238	for ( ; start != end; b = *start++) {
				239	if (b >= '0' && b <= '9') b -= '0'; else break;
				240	if (any < 0 \|\| acc > cutoff \|\| (acc == cutoff && b > cutlim)) {
				241	any = -1;
				242	} else {
				243	any = 1;
				244	acc *= base;
				245	acc += b;
				246	}
				247	}
				248
				249	if (any < 0) {
				250	acc = negative ? INT64_MIN : INT64_MAX;
				251	errno = ERANGE;
				252	} else if (!any) {
				253	errno = EINVAL;
				254	return INT64_MIN;
				255	} else if (negative) {
				256	acc = -acc;
				257	}
				258
				259	return (int64_t)acc;
				260	}
				261
				262	#ifdef NAN
				263	#define _JSONT_NAN NAN
				264	#else
				265	#define _JSONT_NAN nan(0)
				266	#endif
				267
				268	double jsont_float_value(jsont_ctx_t* ctx) {
				269	// Note: This might cause a segfault if the input is at the end, so we cause
				270	// an error if we try to read a float value while at the end of the input.
				271	if (_no_value(ctx) \|\| _input_avail(ctx) == 0) {
				272	errno = EINVAL;
				273	return _JSONT_NAN;
				274	}
				275
				276	const uint8_t* bytes = 0;
				277	size_t len = jsont_data_value(ctx, &bytes);
				278	if (len == 0) {
				279	return _JSONT_NAN;
				280	}
				281	return atof((const char*)bytes);
				282	}
				283
				284	inline static jsont_tok_t _set_tok(jsont_ctx_t* ctx, jsont_tok_t tok) {
				285	ctx->curr_tok = tok;
				286
				287	if (tok != JSONT_END) {
				288	if (tok == JSONT_OBJECT_START) {
				289	if (ctx->st_stack_len == ctx->st_stack_size) {
				290	ctx->error_info = JSONT_ERRINFO_STACK_SIZE;
				291	return ctx->curr_tok = JSONT_ERR; // TODO: Grow st_stack
				292	}
				293	ctx->st_stack[ctx->st_stack_len++] = JSONT_OBJECT_START;
				294
				295	} else if (tok == JSONT_OBJECT_END) {
				296	if (_st_stack_top(ctx) != JSONT_OBJECT_START) {
				297	ctx->error_info = JSONT_ERRINFO_UNEXPECTED_OBJECT_END;
				298	return ctx->curr_tok = JSONT_ERR;
				299	}
				300	--ctx->st_stack_len;
				301
				302	} else if (tok == JSONT_ARRAY_START) {
				303	if (ctx->st_stack_len == ctx->st_stack_size) {
				304	ctx->error_info = JSONT_ERRINFO_STACK_SIZE;
				305	return ctx->curr_tok = JSONT_ERR;
				306	}
				307	ctx->st_stack[ctx->st_stack_len++] = JSONT_ARRAY_START;
				308
				309	} else if (tok == JSONT_ARRAY_END) {
				310	if (_st_stack_top(ctx) != JSONT_ARRAY_START) {
				311	ctx->error_info = JSONT_ERRINFO_UNEXPECTED_ARRAY_END;
				312	return ctx->curr_tok = JSONT_ERR;
				313	}
				314	--ctx->st_stack_len;
				315	}
				316	}
				317
				318	return tok;
				319	}
				320	inline static void _rewind_one_byte(jsont_ctx_t* ctx) {
				321	--ctx->input_buf_ptr;
				322	}
				323	inline static void _rewind_bytes(jsont_ctx_t* ctx, size_t n) {
				324	ctx->input_buf_ptr -= n;
				325	}
				326	inline static void _skip_bytes(jsont_ctx_t* ctx, size_t n) {
				327	ctx->input_buf_ptr += n;
				328	}
				329	inline static uint8_t _read_atom(jsont_ctx_t* ctx, size_t slacklen,
				330	jsont_tok_t tok) {
				331	if (_input_avail(ctx) < slacklen) {
				332	// rewind and wait for buffer fill
				333	_rewind_one_byte(ctx);
				334	return _set_tok(ctx, JSONT_END);
				335	} else {
				336	_skip_bytes(ctx, slacklen); // e.g. "ull" after "n" or "alse" after "f"
				337	return _set_tok(ctx, tok);
				338	}
				339	}
				340	inline static bool _expects_field_name(jsont_ctx_t* ctx) {
				341	return ( ctx->curr_tok == JSONT_OBJECT_START
				342	\|\| ( ctx->curr_tok == _JSONT_COMMA
				343	&& _st_stack_top(ctx) == JSONT_OBJECT_START) );
				344	}
				345
				346	static void _value_buf_append(jsont_ctx_t* ctx, const uint8_t* data, size_t len) {
				347	//printf("_value_buf_append(<ctx>, %p, %zu)\n", data, len);
				348	if (ctx->value_buf.size == 0) {
				349	assert(ctx->value_buf.data == 0);
				350	ctx->value_buf.length = len;
				351	ctx->value_buf.size = len * 2;
				352	if (ctx->value_buf.size < _VALUE_BUF_MIN_SIZE) {
				353	ctx->value_buf.size = _VALUE_BUF_MIN_SIZE;
				354	}
				355	ctx->value_buf.data = (uint8_t*)malloc(ctx->value_buf.size);
				356	if (len != 0) {
				357	memcpy(ctx->value_buf.data, data, len);
				358	}
				359	} else {
				360	if (ctx->value_buf.length + len > ctx->value_buf.size) {
				361	size_t new_size = ctx->value_buf.size + (len * 2);
				362	ctx->value_buf.data = realloc(ctx->value_buf.data, new_size);
				363	assert(ctx->value_buf.data != 0);
				364	ctx->value_buf.size = new_size;
				365	}
				366	memcpy(ctx->value_buf.data + ctx->value_buf.length, data, len);
				367	ctx->value_buf.length += len;
				368	}
				369	ctx->value_buf.inuse = true;
				370	}
				371
				372	jsont_tok_t jsont_next(jsont_ctx_t* ctx) {
				373	//
				374	// { } [ ] n t f "
				375	// \| \| \| \|
				376	// \| \| \| +- /[^"]*/ "
				377	// \| \| +- a l s e
				378	// \| +- r u e
				379	// +- u l l
				380	//
				381	while (1) {
				382	uint8_t b = _next_byte(ctx);
				383	switch (b) {
				384	case '{': return _set_tok(ctx, JSONT_OBJECT_START);
				385	case '}': return _set_tok(ctx, JSONT_OBJECT_END);
				386	case '[': return _set_tok(ctx, JSONT_ARRAY_START);
				387	case ']': return _set_tok(ctx, JSONT_ARRAY_END);
				388	case 'n': return _read_atom(ctx, 3, JSONT_NULL);
				389	case 't': return _read_atom(ctx, 3, JSONT_TRUE);
				390	case 'f': return _read_atom(ctx, 4, JSONT_FALSE);
				391	case '"': {
				392	ctx->input_buf_value_start = ctx->input_buf_ptr;
				393	ctx->value_buf.inuse = false;
				394	ctx->value_buf.length = 0;
				395	uint8_t prev_b = 0;
				396	while (1) {
				397	b = _next_byte(ctx);
				398
				399	if (b == '\\') {
				400	if (prev_b == '\\') {
				401	// This is an actual '\'.
				402	assert(ctx->value_buf.inuse == true); // should be buffering
				403	_value_buf_append(ctx, ctx->input_buf_ptr-1, 1); // append "\"
				404	} else {
				405	// Okay, this is an escape prefix. Move to buffering value.
				406	if (ctx->value_buf.inuse == 0) {
				407	_value_buf_append(ctx,
				408	ctx->input_buf_value_start,
				409	// any data before the "\":
				410	(ctx->input_buf_ptr-1 - ctx->input_buf_value_start) );
				411	}
				412	}
				413	} else {
				414	// Any byte except '\'
				415
				416	if (prev_b == '\\') {
				417	// Currently just after an escape character
				418	assert(ctx->value_buf.inuse == true); // should be buffering
				419
				420	// JSON specifies a few "magic" characters that have a different
				421	// meaning than their value:
				422	switch (b) {
				423	case 'b':
				424	_value_buf_append(ctx, (const uint8_t*)"\b", 1);
				425	break;
				426	case 'f':
				427	_value_buf_append(ctx, (const uint8_t*)"\f", 1);
				428	break;
				429	case 'n':
				430	_value_buf_append(ctx, (const uint8_t*)"\n", 1);
				431	break;
				432	case 'r':
				433	_value_buf_append(ctx, (const uint8_t*)"\r", 1);
				434	break;
				435	case 't':
				436	_value_buf_append(ctx, (const uint8_t*)"\t", 1);
				437	break;
				438	case 'u': {
				439	// 4 hex digits should follow
				440	if (_input_avail(ctx) < 4) {
				441	_rewind_bytes(ctx,
				442	ctx->input_buf_ptr - (ctx->input_buf_value_start-1));
				443	return _set_tok(ctx, JSONT_END);
				444	}
				445	unsigned long utf16cp = _hex_str_to_ul(ctx->input_buf_ptr, 4);
				446	ctx->input_buf_ptr += 4;
				447	if (utf16cp == ULONG_MAX) {
				448	ctx->error_info = JSONT_ERRINFO_UNEXPECTED_UNICODE_SEQ;
				449	return _set_tok(ctx, JSONT_ERR);
				450	}
				451
				452	uint32_t cp = (uint16_t)(0xffff & utf16cp);
				453
				454	// Is lead surrogate?
				455	if (cp >= 0xd800u && cp <= 0xdbffu) {
				456	// TODO: Implement pairs by reading another "\uHHHH"
				457	ctx->error_info = JSONT_ERRINFO_UNEXPECTED_UNICODE_SEQ;
				458	return _set_tok(ctx, JSONT_ERR);
				459	}
				460
				461	// Append UTF-8 byte(s) representing the Unicode codepoint `cp`
				462	if (cp < 0x80) {
				463	uint8_t cp8 = ((uint8_t)cp);
				464	_value_buf_append(ctx, (const uint8_t*)&cp8, 1);
				465	} else if (cp < 0x800) {
				466	uint8_t cp8 = (uint8_t)((cp >> 6) \| 0xc0);
				467	_value_buf_append(ctx, (const uint8_t*)&cp8, 1);
				468	cp8 = (uint8_t)((cp & 0x3f) \| 0x80);
				469	_value_buf_append(ctx, (const uint8_t*)&cp8, 1);
				470	} else {
				471	uint8_t cp8 = (uint8_t)((cp >> 12) \| 0xe0);
				472	_value_buf_append(ctx, (const uint8_t*)&cp8, 1);
				473	cp8 = (uint8_t)(((cp >> 6) & 0x3f) \| 0x80);
				474	_value_buf_append(ctx, (const uint8_t*)&cp8, 1);
				475	cp8 = (uint8_t)((cp & 0x3f) \| 0x80);
				476	_value_buf_append(ctx, (const uint8_t*)&cp8, 1);
				477	}
				478
				479	break;
				480	}
				481	default: {
				482	_value_buf_append(ctx, &b, 1);
				483	break;
				484	}
				485	} // switch
				486
				487	} else {
				488	// Previous character was NOT an escape character
				489
				490	if (b == '"') {
				491	// Well, this marks the end of a string
				492	ctx->input_buf_value_end = ctx->input_buf_ptr-1;
				493	return _set_tok(ctx, _expects_field_name(ctx)
				494	? JSONT_FIELD_NAME : JSONT_STRING);
				495	break;
				496	} else if (b == 0) {
				497	// Input buffer ends in the middle of a string
				498	_rewind_bytes(ctx,
				499	ctx->input_buf_ptr - (ctx->input_buf_value_start-1));
				500	return _set_tok(ctx, JSONT_END);
				501	} else {
				502	if (ctx->value_buf.inuse) {
				503	_value_buf_append(ctx, &b, 1);
				504	}
				505	}
				506	}
				507	}
				508
				509	prev_b = b;
				510	}
				511	}
				512	case ',':
				513	if ( ctx->curr_tok == JSONT_OBJECT_START
				514	\|\| ctx->curr_tok == JSONT_ARRAY_START
				515	\|\| ctx->curr_tok == JSONT_END
				516	\|\| ctx->curr_tok == JSONT_ERR) {
				517	if (ctx->curr_tok != JSONT_ERR)
				518	ctx->error_info = JSONT_ERRINFO_UNEXPECTED_COMMA;
				519	return _set_tok(ctx, JSONT_ERR);
				520	}
				521	_set_tok(ctx, _JSONT_COMMA);
				522	// read next by simply letting the outer "while" do its thing
				523	break;
				524
				525	case ':':
				526	if (ctx->curr_tok != JSONT_FIELD_NAME) {
				527	ctx->error_info = JSONT_ERRINFO_UNEXPECTED_COLON;
				528	return _set_tok(ctx, JSONT_ERR);
				529	}
				530	// let the outer "while" do its thing
				531	break;
				532
				533	case ' ': case '\r': case '\n': case '\t':
				534	// ignore whitespace and let the outer "while" do its thing
				535	break;
				536
				537	case 0:
				538	//printf("** %d\n", __LINE__);
				539	return _set_tok(ctx, JSONT_END);
				540
				541	default:
				542	if (isdigit((int)b) \|\| b == '+' \|\| b == '-') {
				543	// We are reading a number
				544	ctx->input_buf_value_start = ctx->input_buf_ptr-1;
				545	//uint8_t prev_b = 0;
				546	bool is_float = false;
				547	while (1) {
				548	b = _next_byte(ctx);
				549	if (b == '.') {
				550	is_float = true;
				551	} else if (!isdigit((int)b)) {
				552	_rewind_one_byte(ctx);
				553	ctx->input_buf_value_end = ctx->input_buf_ptr;
				554	return _set_tok(ctx, is_float ? JSONT_NUMBER_FLOAT
				555	: JSONT_NUMBER_INT);
				556	} else if (b == 0) {
				557	// Input buffer ends before we know that the number-value ended
				558	_rewind_bytes(ctx, ctx->input_buf_ptr
				559	- (ctx->input_buf_value_start-1));
				560	return _set_tok(ctx, JSONT_END);
				561	}
				562	}
				563	}
				564
				565	ctx->error_info = JSONT_ERRINFO_UNEXPECTED;
				566	return _set_tok(ctx, JSONT_ERR);
				567	}
				568	} // while (1)
				569	}
				570