Blame - aos/json_tokenizer.cc - RealtimeRoboticsGroup/test

blob: 78bf46ef64edd7e7cb79c8cb2724c99d5c22e131 [file] [log] [blame]

Austin Schuh	d7e252d	2019-10-06 13:51:02 -0700	[diff] [blame]	1	#include "aos/json_tokenizer.h"
				2
				3	namespace aos {
				4
				5	void Tokenizer::ConsumeWhitespace() {
				6	while (true) {
				7	if (AtEnd()) {
				8	return;
				9	}
				10	// Skip any whitespace.
				11	if (Char() == ' ' \|\| Char() == '\r' \|\| Char() == '\t') {
				12	ConsumeChar();
				13	} else if (Char() == '\n') {
				14	ConsumeChar();
				15	++linenumber_;
Austin Schuh	81da4b2	2019-10-06 14:03:24 -0700	[diff] [blame]	16	} else if (Consume("/*")) {
				17	while (!Consume("*/")) {
				18	if (Char() == '\n') {
				19	++linenumber_;
				20	}
				21	ConsumeChar();
				22	}
Austin Schuh	d7e252d	2019-10-06 13:51:02 -0700	[diff] [blame]	23	} else {
				24	// There is no fail. Once we are out of whitespace (including 0 of it),
				25	// declare success.
				26	return;
				27	}
				28	}
				29	}
				30
				31	bool Tokenizer::Consume(const char *token) {
				32	const absl::string_view original = data_;
				33	while (true) {
				34	// Finishing the token is success.
				35	if (*token == '\0') {
				36	return true;
				37	}
				38
				39	// But finishing the data first is failure.
				40	if (AtEnd()) {
				41	data_ = original;
				42	return false;
				43	}
				44
				45	// Missmatch is failure.
				46	if (*token != Char()) {
				47	data_ = original;
				48	return false;
				49	}
				50
				51	ConsumeChar();
				52	++token;
				53	}
				54	}
				55
				56	bool Tokenizer::ConsumeString(::std::string *s) {
				57	// Under no conditions is it acceptible to run out of data while parsing a
				58	// string. Any AtEnd checks should confirm that.
				59	const absl::string_view original = data_;
				60	if (AtEnd()) {
				61	return false;
				62	}
				63
				64	// Expect the leading "
				65	if (Char() != '"') {
				66	return false;
				67	}
				68
				69	ConsumeChar();
				70	absl::string_view last_parsed_data = data_;
				71	*s = ::std::string();
				72
				73	while (true) {
				74	if (AtEnd()) {
				75	data_ = original;
				76	return false;
				77	}
				78
				79	// If we get an end or an escape, do something special.
				80	if (Char() == '"' \|\| Char() == '\\') {
				81	// Save what we found up until now, not including this character.
				82	*s += ::std::string(
				83	last_parsed_data.substr(0, last_parsed_data.size() - data_.size()));
				84
				85	// Update the pointer.
				86	last_parsed_data = data_;
				87
				88	// " is the end, declare victory.
				89	if (Char() == '"') {
				90	ConsumeChar();
				91	return true;
				92	} else {
				93	ConsumeChar();
				94	// Now consume valid escape characters and add their representation onto
				95	// the output string.
				96	if (AtEnd()) {
				97	data_ = original;
				98	return false;
				99	} else if (Char() == '"') {
				100	*s += "\"";
				101	} else if (Char() == '\\') {
				102	*s += "\\";
				103	} else if (Char() == '/') {
				104	*s += "/";
				105	} else if (Char() == 'b') {
				106	*s += "\b";
				107	} else if (Char() == 'f') {
				108	*s += "\f";
				109	} else if (Char() == 'n') {
				110	*s += "\n";
				111	} else if (Char() == 'r') {
				112	*s += "\r";
				113	} else if (Char() == 't') {
				114	*s += "\t";
				115	} else if (Char() == 'u') {
				116	// TODO(austin): Unicode should be valid, but I really don't care to
				117	// do this now...
				118	fprintf(stderr, "Unexpected unicode on line %d\n", linenumber_);
				119	data_ = original;
				120	return false;
				121	}
				122	}
				123	// And skip the escaped character.
				124	last_parsed_data = data_.substr(1);
				125	}
				126
				127	ConsumeChar();
				128	}
				129	}
				130
				131	bool Tokenizer::ConsumeNumber(::std::string *s) {
				132	// Under no conditions is it acceptible to run out of data while parsing a
				133	// number. Any AtEnd() checks should confirm that.
				134	*s = ::std::string();
				135	const absl::string_view original = data_;
				136
				137	// Consume the leading - unconditionally.
				138	Consume("-");
				139
				140	// Then, we either get a 0, or we get a nonzero. Only nonzero can be followed
				141	// by a second number.
				142	if (!Consume("0")) {
				143	if (AtEnd()) {
				144	return false;
				145	} else if (Char() >= '1' && Char() <= '9') {
				146	// This wasn't a zero, but was a valid digit. Consume it.
				147	ConsumeChar();
				148	} else {
				149	return false;
				150	}
				151
				152	// Now consume any number of any digits.
				153	while (true) {
				154	if (AtEnd()) {
				155	data_ = original;
				156	return false;
				157	}
				158	if (Char() < '0' \|\| Char() > '9') {
				159	break;
				160	}
				161	ConsumeChar();
				162	}
				163	}
				164
				165	// We could now have a decimal.
				166	if (Char() == '.') {
				167	ConsumeChar();
				168	while (true) {
				169	if (AtEnd()) {
				170	data_ = original;
				171	return false;
				172	}
				173	// And any number of digits.
				174	if (Char() < '0' \|\| Char() > '9') {
				175	break;
				176	}
				177	ConsumeChar();
				178	}
				179	}
				180
				181	// And now an exponent.
				182	if (Char() == 'e' \|\| Char() == 'E') {
				183	ConsumeChar();
				184	if (AtEnd()) {
				185	data_ = original;
				186	return false;
				187	}
				188
				189	// Which could have a +-
				190	if (Char() == '+' \|\| Char() == '-') {
				191	ConsumeChar();
				192	}
				193	int count = 0;
				194	while (true) {
				195	if (AtEnd()) {
				196	data_ = original;
				197	return false;
				198	}
				199	// And digits.
				200	if (Char() < '0' \|\| Char() > '9') {
				201	break;
				202	}
				203	ConsumeChar();
				204	++count;
				205	}
				206	// But, it is an error to have an exponent and nothing following it.
				207	if (count == 0) {
				208	data_ = original;
				209	return false;
				210	}
				211	}
				212
				213	*s = ::std::string(original.substr(0, original.size() - data_.size()));
				214	return true;
				215	}
				216
				217	Tokenizer::TokenType Tokenizer::Next() {
				218	switch (state_) {
				219	case State::kExpectObjectStart:
				220	// We should always start out with a {
				221	if (!Consume("{")) return TokenType::kError;
				222
				223	// Document that we just started an object.
				224	object_type_.push_back(ObjectType::kObject);
				225
				226	ConsumeWhitespace();
				227
				228	if (Consume("}")) {
				229	ConsumeWhitespace();
				230	state_ = State::kExpectObjectEnd;
				231	} else {
				232	state_ = State::kExpectField;
				233	}
				234	return TokenType::kStartObject;
				235
				236	case State::kExpectField: {
				237	// Fields are built up of strings, whitespace, and then a : (followed by
				238	// whitespace...)
				239	::std::string s;
				240	if (!ConsumeString(&s)) {
				241	fprintf(stderr, "Error on line %d, expected string for field name.\n",
				242	linenumber_);
				243	return TokenType::kError;
				244	}
				245	field_name_ = ::std::move(s);
				246
				247	ConsumeWhitespace();
				248
				249	if (!Consume(":")) {
				250	fprintf(stderr, "Error on line %d\n", linenumber_);
				251	return TokenType::kError;
				252	}
				253
				254	ConsumeWhitespace();
				255
				256	state_ = State::kExpectValue;
				257
				258	return TokenType::kField;
				259	} break;
				260	case State::kExpectValue: {
				261	TokenType result = TokenType::kError;
				262
				263	::std::string s;
				264	if (Consume("{")) {
				265	// Fields are in objects. Record and recurse.
				266	object_type_.push_back(ObjectType::kObject);
				267
				268	ConsumeWhitespace();
				269
				270	state_ = State::kExpectField;
				271	return TokenType::kStartObject;
				272	} else if (Consume("[")) {
				273	// Values are in arrays. Record and recurse.
				274	object_type_.push_back(ObjectType::kArray);
				275
				276	ConsumeWhitespace();
				277	state_ = State::kExpectValue;
				278	return TokenType::kStartArray;
				279	} else if (ConsumeString(&s)) {
				280	// Parsed as a string, grab it.
				281	field_value_ = ::std::move(s);
				282	result = TokenType::kStringValue;
				283	} else if (ConsumeNumber(&s)) {
				284	// Parsed as a number, grab it.
				285	field_value_ = ::std::move(s);
				286	result = TokenType::kNumberValue;
				287	} else if (Consume("true")) {
				288	// Parsed as a true, grab it.
				289	field_value_ = "true";
				290	result = TokenType::kTrueValue;
				291	} else if (Consume("false")) {
				292	// Parsed as a false, grab it.
				293	field_value_ = "false";
				294	result = TokenType::kFalseValue;
				295	} else {
				296	// Couldn't parse, so we have a syntax error.
				297	fprintf(stderr, "Error line %d, invalid field value.\n", linenumber_);
				298	}
				299
				300	ConsumeWhitespace();
				301
				302	// After a field, we either have a , and another field (or value if we are
				303	// in an array), or we should be closing out the object (or array).
				304	if (Consume(",")) {
				305	ConsumeWhitespace();
				306	switch (object_type_.back()) {
				307	case ObjectType::kObject:
				308	state_ = State::kExpectField;
				309	break;
				310	case ObjectType::kArray:
				311	state_ = State::kExpectValue;
				312	break;
				313	}
				314	} else {
				315	// Sanity check that the stack is deep enough.
				316	if (object_type_.size() == 0) {
				317	fprintf(stderr, "Error on line %d\n", linenumber_);
				318	return TokenType::kError;
				319	}
				320
				321	// And then require closing out the object.
				322	switch (object_type_.back()) {
				323	case ObjectType::kObject:
				324	if (Consume("}")) {
				325	ConsumeWhitespace();
				326	state_ = State::kExpectObjectEnd;
				327	} else {
				328	return TokenType::kError;
				329	}
				330	break;
				331	case ObjectType::kArray:
				332	if (Consume("]")) {
				333	ConsumeWhitespace();
				334	state_ = State::kExpectArrayEnd;
				335	} else {
				336	return TokenType::kError;
				337	}
				338	break;
				339	}
				340	}
				341	return result;
				342	} break;
				343
				344	case State::kExpectArrayEnd:
				345	case State::kExpectObjectEnd: {
				346	const TokenType result = state_ == State::kExpectArrayEnd
				347	? TokenType::kEndArray
				348	: TokenType::kEndObject;
				349	// This is a transient state so we can send 2 tokens out in a row. We
				350	// discover the object or array end at the end of reading the value.
				351	object_type_.pop_back();
				352	if (object_type_.size() == 0) {
				353	// We unwound the outer object. We should send kEnd next.
				354	state_ = State::kExpectEnd;
				355	} else if (object_type_.back() == ObjectType::kObject) {
				356	// If we are going into an object, it should either have another field
				357	// or end.
				358	if (Consume(",")) {
				359	ConsumeWhitespace();
				360	state_ = State::kExpectField;
				361	} else if (Consume("}")) {
				362	ConsumeWhitespace();
				363	state_ = State::kExpectObjectEnd;
				364	} else {
				365	return TokenType::kError;
				366	}
				367	} else if (object_type_.back() == ObjectType::kArray) {
				368	// If we are going into an array, it should either have another value
				369	// or end.
				370	if (Consume(",")) {
				371	ConsumeWhitespace();
				372	state_ = State::kExpectValue;
				373	} else if (Consume("]")) {
				374	ConsumeWhitespace();
				375	state_ = State::kExpectArrayEnd;
				376	} else {
				377	return TokenType::kError;
				378	}
				379	}
				380	// And then send out the correct token.
				381	return result;
				382	}
				383	case State::kExpectEnd:
				384	// If we are supposed to be done, confirm nothing is after the end.
				385	if (AtEnd()) {
				386	return TokenType::kEnd;
				387	} else {
				388	fprintf(stderr, "Data past end at line %d\n", linenumber_);
				389	return TokenType::kError;
				390	}
				391	}
				392	return TokenType::kError;
				393	}
				394
				395	bool Tokenizer::FieldAsInt(long long *value) {
				396	const char *pos = field_value().c_str();
				397	errno = 0;
				398	value = strtoll(field_value().c_str(), const_cast<char *>(&pos), 10);
				399	if (pos != field_value().c_str() + field_value().size() \|\| errno != 0) {
				400	return false;
				401	}
				402	return true;
				403	}
				404
				405	bool Tokenizer::FieldAsDouble(double *value) {
				406	const char *pos = field_value().c_str();
				407	errno = 0;
				408	value = strtod(field_value().c_str(), const_cast<char *>(&pos));
				409
				410	if (pos != field_value().c_str() + field_value().size() \|\| errno != 0) {
				411	return false;
				412	}
				413	return true;
				414	}
				415
				416	} // namespace aos