Blame - aos/json_tokenizer.cc - RealtimeRoboticsGroup/test

blob: 38ff4e368b086bf5d33af141936ab64e2151c756 [file] [log] [blame]

Austin Schuh	d7e252d	2019-10-06 13:51:02 -0700	[diff] [blame]	1	#include "aos/json_tokenizer.h"
				2
				3	namespace aos {
				4
				5	void Tokenizer::ConsumeWhitespace() {
				6	while (true) {
				7	if (AtEnd()) {
				8	return;
				9	}
				10	// Skip any whitespace.
				11	if (Char() == ' ' \|\| Char() == '\r' \|\| Char() == '\t') {
				12	ConsumeChar();
				13	} else if (Char() == '\n') {
				14	ConsumeChar();
				15	++linenumber_;
				16	} else {
				17	// There is no fail. Once we are out of whitespace (including 0 of it),
				18	// declare success.
				19	return;
				20	}
				21	}
				22	}
				23
				24	bool Tokenizer::Consume(const char *token) {
				25	const absl::string_view original = data_;
				26	while (true) {
				27	// Finishing the token is success.
				28	if (*token == '\0') {
				29	return true;
				30	}
				31
				32	// But finishing the data first is failure.
				33	if (AtEnd()) {
				34	data_ = original;
				35	return false;
				36	}
				37
				38	// Missmatch is failure.
				39	if (*token != Char()) {
				40	data_ = original;
				41	return false;
				42	}
				43
				44	ConsumeChar();
				45	++token;
				46	}
				47	}
				48
				49	bool Tokenizer::ConsumeString(::std::string *s) {
				50	// Under no conditions is it acceptible to run out of data while parsing a
				51	// string. Any AtEnd checks should confirm that.
				52	const absl::string_view original = data_;
				53	if (AtEnd()) {
				54	return false;
				55	}
				56
				57	// Expect the leading "
				58	if (Char() != '"') {
				59	return false;
				60	}
				61
				62	ConsumeChar();
				63	absl::string_view last_parsed_data = data_;
				64	*s = ::std::string();
				65
				66	while (true) {
				67	if (AtEnd()) {
				68	data_ = original;
				69	return false;
				70	}
				71
				72	// If we get an end or an escape, do something special.
				73	if (Char() == '"' \|\| Char() == '\\') {
				74	// Save what we found up until now, not including this character.
				75	*s += ::std::string(
				76	last_parsed_data.substr(0, last_parsed_data.size() - data_.size()));
				77
				78	// Update the pointer.
				79	last_parsed_data = data_;
				80
				81	// " is the end, declare victory.
				82	if (Char() == '"') {
				83	ConsumeChar();
				84	return true;
				85	} else {
				86	ConsumeChar();
				87	// Now consume valid escape characters and add their representation onto
				88	// the output string.
				89	if (AtEnd()) {
				90	data_ = original;
				91	return false;
				92	} else if (Char() == '"') {
				93	*s += "\"";
				94	} else if (Char() == '\\') {
				95	*s += "\\";
				96	} else if (Char() == '/') {
				97	*s += "/";
				98	} else if (Char() == 'b') {
				99	*s += "\b";
				100	} else if (Char() == 'f') {
				101	*s += "\f";
				102	} else if (Char() == 'n') {
				103	*s += "\n";
				104	} else if (Char() == 'r') {
				105	*s += "\r";
				106	} else if (Char() == 't') {
				107	*s += "\t";
				108	} else if (Char() == 'u') {
				109	// TODO(austin): Unicode should be valid, but I really don't care to
				110	// do this now...
				111	fprintf(stderr, "Unexpected unicode on line %d\n", linenumber_);
				112	data_ = original;
				113	return false;
				114	}
				115	}
				116	// And skip the escaped character.
				117	last_parsed_data = data_.substr(1);
				118	}
				119
				120	ConsumeChar();
				121	}
				122	}
				123
				124	bool Tokenizer::ConsumeNumber(::std::string *s) {
				125	// Under no conditions is it acceptible to run out of data while parsing a
				126	// number. Any AtEnd() checks should confirm that.
				127	*s = ::std::string();
				128	const absl::string_view original = data_;
				129
				130	// Consume the leading - unconditionally.
				131	Consume("-");
				132
				133	// Then, we either get a 0, or we get a nonzero. Only nonzero can be followed
				134	// by a second number.
				135	if (!Consume("0")) {
				136	if (AtEnd()) {
				137	return false;
				138	} else if (Char() >= '1' && Char() <= '9') {
				139	// This wasn't a zero, but was a valid digit. Consume it.
				140	ConsumeChar();
				141	} else {
				142	return false;
				143	}
				144
				145	// Now consume any number of any digits.
				146	while (true) {
				147	if (AtEnd()) {
				148	data_ = original;
				149	return false;
				150	}
				151	if (Char() < '0' \|\| Char() > '9') {
				152	break;
				153	}
				154	ConsumeChar();
				155	}
				156	}
				157
				158	// We could now have a decimal.
				159	if (Char() == '.') {
				160	ConsumeChar();
				161	while (true) {
				162	if (AtEnd()) {
				163	data_ = original;
				164	return false;
				165	}
				166	// And any number of digits.
				167	if (Char() < '0' \|\| Char() > '9') {
				168	break;
				169	}
				170	ConsumeChar();
				171	}
				172	}
				173
				174	// And now an exponent.
				175	if (Char() == 'e' \|\| Char() == 'E') {
				176	ConsumeChar();
				177	if (AtEnd()) {
				178	data_ = original;
				179	return false;
				180	}
				181
				182	// Which could have a +-
				183	if (Char() == '+' \|\| Char() == '-') {
				184	ConsumeChar();
				185	}
				186	int count = 0;
				187	while (true) {
				188	if (AtEnd()) {
				189	data_ = original;
				190	return false;
				191	}
				192	// And digits.
				193	if (Char() < '0' \|\| Char() > '9') {
				194	break;
				195	}
				196	ConsumeChar();
				197	++count;
				198	}
				199	// But, it is an error to have an exponent and nothing following it.
				200	if (count == 0) {
				201	data_ = original;
				202	return false;
				203	}
				204	}
				205
				206	*s = ::std::string(original.substr(0, original.size() - data_.size()));
				207	return true;
				208	}
				209
				210	Tokenizer::TokenType Tokenizer::Next() {
				211	switch (state_) {
				212	case State::kExpectObjectStart:
				213	// We should always start out with a {
				214	if (!Consume("{")) return TokenType::kError;
				215
				216	// Document that we just started an object.
				217	object_type_.push_back(ObjectType::kObject);
				218
				219	ConsumeWhitespace();
				220
				221	if (Consume("}")) {
				222	ConsumeWhitespace();
				223	state_ = State::kExpectObjectEnd;
				224	} else {
				225	state_ = State::kExpectField;
				226	}
				227	return TokenType::kStartObject;
				228
				229	case State::kExpectField: {
				230	// Fields are built up of strings, whitespace, and then a : (followed by
				231	// whitespace...)
				232	::std::string s;
				233	if (!ConsumeString(&s)) {
				234	fprintf(stderr, "Error on line %d, expected string for field name.\n",
				235	linenumber_);
				236	return TokenType::kError;
				237	}
				238	field_name_ = ::std::move(s);
				239
				240	ConsumeWhitespace();
				241
				242	if (!Consume(":")) {
				243	fprintf(stderr, "Error on line %d\n", linenumber_);
				244	return TokenType::kError;
				245	}
				246
				247	ConsumeWhitespace();
				248
				249	state_ = State::kExpectValue;
				250
				251	return TokenType::kField;
				252	} break;
				253	case State::kExpectValue: {
				254	TokenType result = TokenType::kError;
				255
				256	::std::string s;
				257	if (Consume("{")) {
				258	// Fields are in objects. Record and recurse.
				259	object_type_.push_back(ObjectType::kObject);
				260
				261	ConsumeWhitespace();
				262
				263	state_ = State::kExpectField;
				264	return TokenType::kStartObject;
				265	} else if (Consume("[")) {
				266	// Values are in arrays. Record and recurse.
				267	object_type_.push_back(ObjectType::kArray);
				268
				269	ConsumeWhitespace();
				270	state_ = State::kExpectValue;
				271	return TokenType::kStartArray;
				272	} else if (ConsumeString(&s)) {
				273	// Parsed as a string, grab it.
				274	field_value_ = ::std::move(s);
				275	result = TokenType::kStringValue;
				276	} else if (ConsumeNumber(&s)) {
				277	// Parsed as a number, grab it.
				278	field_value_ = ::std::move(s);
				279	result = TokenType::kNumberValue;
				280	} else if (Consume("true")) {
				281	// Parsed as a true, grab it.
				282	field_value_ = "true";
				283	result = TokenType::kTrueValue;
				284	} else if (Consume("false")) {
				285	// Parsed as a false, grab it.
				286	field_value_ = "false";
				287	result = TokenType::kFalseValue;
				288	} else {
				289	// Couldn't parse, so we have a syntax error.
				290	fprintf(stderr, "Error line %d, invalid field value.\n", linenumber_);
				291	}
				292
				293	ConsumeWhitespace();
				294
				295	// After a field, we either have a , and another field (or value if we are
				296	// in an array), or we should be closing out the object (or array).
				297	if (Consume(",")) {
				298	ConsumeWhitespace();
				299	switch (object_type_.back()) {
				300	case ObjectType::kObject:
				301	state_ = State::kExpectField;
				302	break;
				303	case ObjectType::kArray:
				304	state_ = State::kExpectValue;
				305	break;
				306	}
				307	} else {
				308	// Sanity check that the stack is deep enough.
				309	if (object_type_.size() == 0) {
				310	fprintf(stderr, "Error on line %d\n", linenumber_);
				311	return TokenType::kError;
				312	}
				313
				314	// And then require closing out the object.
				315	switch (object_type_.back()) {
				316	case ObjectType::kObject:
				317	if (Consume("}")) {
				318	ConsumeWhitespace();
				319	state_ = State::kExpectObjectEnd;
				320	} else {
				321	return TokenType::kError;
				322	}
				323	break;
				324	case ObjectType::kArray:
				325	if (Consume("]")) {
				326	ConsumeWhitespace();
				327	state_ = State::kExpectArrayEnd;
				328	} else {
				329	return TokenType::kError;
				330	}
				331	break;
				332	}
				333	}
				334	return result;
				335	} break;
				336
				337	case State::kExpectArrayEnd:
				338	case State::kExpectObjectEnd: {
				339	const TokenType result = state_ == State::kExpectArrayEnd
				340	? TokenType::kEndArray
				341	: TokenType::kEndObject;
				342	// This is a transient state so we can send 2 tokens out in a row. We
				343	// discover the object or array end at the end of reading the value.
				344	object_type_.pop_back();
				345	if (object_type_.size() == 0) {
				346	// We unwound the outer object. We should send kEnd next.
				347	state_ = State::kExpectEnd;
				348	} else if (object_type_.back() == ObjectType::kObject) {
				349	// If we are going into an object, it should either have another field
				350	// or end.
				351	if (Consume(",")) {
				352	ConsumeWhitespace();
				353	state_ = State::kExpectField;
				354	} else if (Consume("}")) {
				355	ConsumeWhitespace();
				356	state_ = State::kExpectObjectEnd;
				357	} else {
				358	return TokenType::kError;
				359	}
				360	} else if (object_type_.back() == ObjectType::kArray) {
				361	// If we are going into an array, it should either have another value
				362	// or end.
				363	if (Consume(",")) {
				364	ConsumeWhitespace();
				365	state_ = State::kExpectValue;
				366	} else if (Consume("]")) {
				367	ConsumeWhitespace();
				368	state_ = State::kExpectArrayEnd;
				369	} else {
				370	return TokenType::kError;
				371	}
				372	}
				373	// And then send out the correct token.
				374	return result;
				375	}
				376	case State::kExpectEnd:
				377	// If we are supposed to be done, confirm nothing is after the end.
				378	if (AtEnd()) {
				379	return TokenType::kEnd;
				380	} else {
				381	fprintf(stderr, "Data past end at line %d\n", linenumber_);
				382	return TokenType::kError;
				383	}
				384	}
				385	return TokenType::kError;
				386	}
				387
				388	bool Tokenizer::FieldAsInt(long long *value) {
				389	const char *pos = field_value().c_str();
				390	errno = 0;
				391	value = strtoll(field_value().c_str(), const_cast<char *>(&pos), 10);
				392	if (pos != field_value().c_str() + field_value().size() \|\| errno != 0) {
				393	return false;
				394	}
				395	return true;
				396	}
				397
				398	bool Tokenizer::FieldAsDouble(double *value) {
				399	const char *pos = field_value().c_str();
				400	errno = 0;
				401	value = strtod(field_value().c_str(), const_cast<char *>(&pos));
				402
				403	if (pos != field_value().c_str() + field_value().size() \|\| errno != 0) {
				404	return false;
				405	}
				406	return true;
				407	}
				408
				409	} // namespace aos