Factor tokenizer out to a separate file.
json_to_flatbuffer.{cc,h} was getting too big.
Change-Id: I38c10001b15da7fa2c0bfef66be5e640d1fa446f
diff --git a/aos/BUILD b/aos/BUILD
index 0631e66..95418ea 100644
--- a/aos/BUILD
+++ b/aos/BUILD
@@ -426,11 +426,21 @@
)
cc_library(
+ name = "json_tokenizer",
+ srcs = ["json_tokenizer.cc"],
+ hdrs = ["json_tokenizer.h"],
+ deps = [
+ "@com_google_absl//absl/strings",
+ ],
+)
+
+cc_library(
name = "json_to_flatbuffer",
srcs = ["json_to_flatbuffer.cc"],
hdrs = ["json_to_flatbuffer.h"],
deps = [
":flatbuffer_utils",
+ ":json_tokenizer",
"//aos/logging",
"@com_github_google_flatbuffers//:flatbuffers",
"@com_google_absl//absl/strings",
diff --git a/aos/json_to_flatbuffer.cc b/aos/json_to_flatbuffer.cc
index 68d3b03..85e2d7f 100644
--- a/aos/json_to_flatbuffer.cc
+++ b/aos/json_to_flatbuffer.cc
@@ -6,6 +6,7 @@
#include "absl/strings/string_view.h"
#include "aos/flatbuffer_utils.h"
#include "aos/logging/logging.h"
+#include "aos/json_tokenizer.h"
#include "flatbuffers/flatbuffers.h"
#include "flatbuffers/minireflect.h"
@@ -729,408 +730,4 @@
return tostring_visitor.s;
}
-void Tokenizer::ConsumeWhitespace() {
- while (true) {
- if (AtEnd()) {
- return;
- }
- // Skip any whitespace.
- if (Char() == ' ' || Char() == '\r' || Char() == '\t') {
- ConsumeChar();
- } else if (Char() == '\n') {
- ConsumeChar();
- ++linenumber_;
- } else {
- // There is no fail. Once we are out of whitespace (including 0 of it),
- // declare success.
- return;
- }
- }
-}
-
-bool Tokenizer::Consume(const char *token) {
- const absl::string_view original = data_;
- while (true) {
- // Finishing the token is success.
- if (*token == '\0') {
- return true;
- }
-
- // But finishing the data first is failure.
- if (AtEnd()) {
- data_ = original;
- return false;
- }
-
- // Missmatch is failure.
- if (*token != Char()) {
- data_ = original;
- return false;
- }
-
- ConsumeChar();
- ++token;
- }
-}
-
-bool Tokenizer::ConsumeString(::std::string *s) {
- // Under no conditions is it acceptible to run out of data while parsing a
- // string. Any AtEnd checks should confirm that.
- const absl::string_view original = data_;
- if (AtEnd()) {
- return false;
- }
-
- // Expect the leading "
- if (Char() != '"') {
- return false;
- }
-
- ConsumeChar();
- absl::string_view last_parsed_data = data_;
- *s = ::std::string();
-
- while (true) {
- if (AtEnd()) {
- data_ = original;
- return false;
- }
-
- // If we get an end or an escape, do something special.
- if (Char() == '"' || Char() == '\\') {
- // Save what we found up until now, not including this character.
- *s += ::std::string(
- last_parsed_data.substr(0, last_parsed_data.size() - data_.size()));
-
- // Update the pointer.
- last_parsed_data = data_;
-
- // " is the end, declare victory.
- if (Char() == '"') {
- ConsumeChar();
- return true;
- } else {
- ConsumeChar();
- // Now consume valid escape characters and add their representation onto
- // the output string.
- if (AtEnd()) {
- data_ = original;
- return false;
- } else if (Char() == '"') {
- *s += "\"";
- } else if (Char() == '\\') {
- *s += "\\";
- } else if (Char() == '/') {
- *s += "/";
- } else if (Char() == 'b') {
- *s += "\b";
- } else if (Char() == 'f') {
- *s += "\f";
- } else if (Char() == 'n') {
- *s += "\n";
- } else if (Char() == 'r') {
- *s += "\r";
- } else if (Char() == 't') {
- *s += "\t";
- } else if (Char() == 'u') {
- // TODO(austin): Unicode should be valid, but I really don't care to
- // do this now...
- fprintf(stderr, "Unexpected unicode on line %d\n", linenumber_);
- data_ = original;
- return false;
- }
- }
- // And skip the escaped character.
- last_parsed_data = data_.substr(1);
- }
-
- ConsumeChar();
- }
-}
-
-bool Tokenizer::ConsumeNumber(::std::string *s) {
- // Under no conditions is it acceptible to run out of data while parsing a
- // number. Any AtEnd() checks should confirm that.
- *s = ::std::string();
- const absl::string_view original = data_;
-
- // Consume the leading - unconditionally.
- Consume("-");
-
- // Then, we either get a 0, or we get a nonzero. Only nonzero can be followed
- // by a second number.
- if (!Consume("0")) {
- if (AtEnd()) {
- return false;
- } else if (Char() >= '1' && Char() <= '9') {
- // This wasn't a zero, but was a valid digit. Consume it.
- ConsumeChar();
- } else {
- return false;
- }
-
- // Now consume any number of any digits.
- while (true) {
- if (AtEnd()) {
- data_ = original;
- return false;
- }
- if (Char() < '0' || Char() > '9') {
- break;
- }
- ConsumeChar();
- }
- }
-
- // We could now have a decimal.
- if (Char() == '.') {
- ConsumeChar();
- while (true) {
- if (AtEnd()) {
- data_ = original;
- return false;
- }
- // And any number of digits.
- if (Char() < '0' || Char() > '9') {
- break;
- }
- ConsumeChar();
- }
- }
-
- // And now an exponent.
- if (Char() == 'e' || Char() == 'E') {
- ConsumeChar();
- if (AtEnd()) {
- data_ = original;
- return false;
- }
-
- // Which could have a +-
- if (Char() == '+' || Char() == '-') {
- ConsumeChar();
- }
- int count = 0;
- while (true) {
- if (AtEnd()) {
- data_ = original;
- return false;
- }
- // And digits.
- if (Char() < '0' || Char() > '9') {
- break;
- }
- ConsumeChar();
- ++count;
- }
- // But, it is an error to have an exponent and nothing following it.
- if (count == 0) {
- data_ = original;
- return false;
- }
- }
-
- *s = ::std::string(original.substr(0, original.size() - data_.size()));
- return true;
-}
-
-Tokenizer::TokenType Tokenizer::Next() {
- switch (state_) {
- case State::kExpectObjectStart:
- // We should always start out with a {
- if (!Consume("{")) return TokenType::kError;
-
- // Document that we just started an object.
- object_type_.push_back(ObjectType::kObject);
-
- ConsumeWhitespace();
-
- if (Consume("}")) {
- ConsumeWhitespace();
- state_ = State::kExpectObjectEnd;
- } else {
- state_ = State::kExpectField;
- }
- return TokenType::kStartObject;
-
- case State::kExpectField: {
- // Fields are built up of strings, whitespace, and then a : (followed by
- // whitespace...)
- ::std::string s;
- if (!ConsumeString(&s)) {
- fprintf(stderr, "Error on line %d, expected string for field name.\n",
- linenumber_);
- return TokenType::kError;
- }
- field_name_ = ::std::move(s);
-
- ConsumeWhitespace();
-
- if (!Consume(":")) {
- fprintf(stderr, "Error on line %d\n", linenumber_);
- return TokenType::kError;
- }
-
- ConsumeWhitespace();
-
- state_ = State::kExpectValue;
-
- return TokenType::kField;
- } break;
- case State::kExpectValue: {
- TokenType result = TokenType::kError;
-
- ::std::string s;
- if (Consume("{")) {
- // Fields are in objects. Record and recurse.
- object_type_.push_back(ObjectType::kObject);
-
- ConsumeWhitespace();
-
- state_ = State::kExpectField;
- return TokenType::kStartObject;
- } else if (Consume("[")) {
- // Values are in arrays. Record and recurse.
- object_type_.push_back(ObjectType::kArray);
-
- ConsumeWhitespace();
- state_ = State::kExpectValue;
- return TokenType::kStartArray;
- } else if (ConsumeString(&s)) {
- // Parsed as a string, grab it.
- field_value_ = ::std::move(s);
- result = TokenType::kStringValue;
- } else if (ConsumeNumber(&s)) {
- // Parsed as a number, grab it.
- field_value_ = ::std::move(s);
- result = TokenType::kNumberValue;
- } else if (Consume("true")) {
- // Parsed as a true, grab it.
- field_value_ = "true";
- result = TokenType::kTrueValue;
- } else if (Consume("false")) {
- // Parsed as a false, grab it.
- field_value_ = "false";
- result = TokenType::kFalseValue;
- } else {
- // Couldn't parse, so we have a syntax error.
- fprintf(stderr, "Error line %d, invalid field value.\n", linenumber_);
- }
-
- ConsumeWhitespace();
-
- // After a field, we either have a , and another field (or value if we are
- // in an array), or we should be closing out the object (or array).
- if (Consume(",")) {
- ConsumeWhitespace();
- switch (object_type_.back()) {
- case ObjectType::kObject:
- state_ = State::kExpectField;
- break;
- case ObjectType::kArray:
- state_ = State::kExpectValue;
- break;
- }
- } else {
- // Sanity check that the stack is deep enough.
- if (object_type_.size() == 0) {
- fprintf(stderr, "Error on line %d\n", linenumber_);
- return TokenType::kError;
- }
-
- // And then require closing out the object.
- switch (object_type_.back()) {
- case ObjectType::kObject:
- if (Consume("}")) {
- ConsumeWhitespace();
- state_ = State::kExpectObjectEnd;
- } else {
- return TokenType::kError;
- }
- break;
- case ObjectType::kArray:
- if (Consume("]")) {
- ConsumeWhitespace();
- state_ = State::kExpectArrayEnd;
- } else {
- return TokenType::kError;
- }
- break;
- }
- }
- return result;
- } break;
-
- case State::kExpectArrayEnd:
- case State::kExpectObjectEnd: {
- const TokenType result = state_ == State::kExpectArrayEnd
- ? TokenType::kEndArray
- : TokenType::kEndObject;
- // This is a transient state so we can send 2 tokens out in a row. We
- // discover the object or array end at the end of reading the value.
- object_type_.pop_back();
- if (object_type_.size() == 0) {
- // We unwound the outer object. We should send kEnd next.
- state_ = State::kExpectEnd;
- } else if (object_type_.back() == ObjectType::kObject) {
- // If we are going into an object, it should either have another field
- // or end.
- if (Consume(",")) {
- ConsumeWhitespace();
- state_ = State::kExpectField;
- } else if (Consume("}")) {
- ConsumeWhitespace();
- state_ = State::kExpectObjectEnd;
- } else {
- return TokenType::kError;
- }
- } else if (object_type_.back() == ObjectType::kArray) {
- // If we are going into an array, it should either have another value
- // or end.
- if (Consume(",")) {
- ConsumeWhitespace();
- state_ = State::kExpectValue;
- } else if (Consume("]")) {
- ConsumeWhitespace();
- state_ = State::kExpectArrayEnd;
- } else {
- return TokenType::kError;
- }
- }
- // And then send out the correct token.
- return result;
- }
- case State::kExpectEnd:
- // If we are supposed to be done, confirm nothing is after the end.
- if (AtEnd()) {
- return TokenType::kEnd;
- } else {
- fprintf(stderr, "Data past end at line %d\n", linenumber_);
- return TokenType::kError;
- }
- }
- return TokenType::kError;
-}
-
-bool Tokenizer::FieldAsInt(long long *value) {
- const char *pos = field_value().c_str();
- errno = 0;
- *value = strtoll(field_value().c_str(), const_cast<char **>(&pos), 10);
- if (pos != field_value().c_str() + field_value().size() || errno != 0) {
- return false;
- }
- return true;
-}
-
-bool Tokenizer::FieldAsDouble(double *value) {
- const char *pos = field_value().c_str();
- errno = 0;
- *value = strtod(field_value().c_str(), const_cast<char **>(&pos));
-
- if (pos != field_value().c_str() + field_value().size() || errno != 0) {
- return false;
- }
- return true;
-}
-
} // namespace aos
diff --git a/aos/json_to_flatbuffer.h b/aos/json_to_flatbuffer.h
index 6a12a56..78d0703 100644
--- a/aos/json_to_flatbuffer.h
+++ b/aos/json_to_flatbuffer.h
@@ -20,98 +20,6 @@
const flatbuffers::TypeTable *typetable,
bool multi_line = false);
-// This class implements the state machine at json.org
-class Tokenizer {
- public:
- Tokenizer(const absl::string_view data) : data_(data) {}
-
- enum class TokenType {
- kEnd,
- kError,
- kStartObject,
- kEndObject,
- kStartArray,
- kEndArray,
- kField,
- kNumberValue,
- kStringValue,
- kTrueValue,
- kFalseValue,
- };
-
- // Returns the next token.
- TokenType Next();
-
- // Returns the last field_name and field_value. These are only valid when
- // Next returns them.
- const ::std::string &field_name() const { return field_name_; }
- const ::std::string &field_value() const { return field_value_; }
-
- // Parses the current field value as a long long. Returns false if it failed
- // to parse.
- bool FieldAsInt(long long *value);
- // Parses the current field value as a double. Returns false if it failed
- // to parse.
- bool FieldAsDouble(double *value);
-
- // Returns true if we are at the end of the input.
- bool AtEnd() { return data_.size() == 0; }
-
- const absl::string_view data_left() const { return data_; }
-
- private:
- // Consumes a single character.
- void ConsumeChar() { data_ = data_.substr(1); }
-
- // Returns the current character.
- char Char() const { return data_[0]; }
-
- // Consumes a string out of data_. Populates s with the string. Returns true
- // if a valid string was found, and false otherwise.
- // data_ is updated only on success.
- bool ConsumeString(::std::string *s);
- // Consumes a number out of data_. Populates s with the string containing the
- // number. Returns true if a valid number was found, and false otherwise.
- // data_ is updated only on success.
- bool ConsumeNumber(::std::string *s);
- // Consumes a fixed token out of data_. Returns true if the string was found,
- // and false otherwise.
- // data_ is updated only on success.
- bool Consume(const char* token);
- // Consumes whitespace out of data_. Returns true if the string was found,
- // and false otherwise.
- // data_ is unconditionally updated.
- void ConsumeWhitespace();
-
- // State for the parsing state machine.
- enum class State {
- kExpectField,
- kExpectObjectStart,
- kExpectObjectEnd,
- kExpectArrayEnd,
- kExpectValue,
- kExpectEnd,
- };
-
- State state_ = State::kExpectObjectStart;
-
- // Data pointer.
- absl::string_view data_;
- // Current line number used for printing debug.
- int linenumber_ = 0;
-
- // Stack used to track which object type we were in when we recursed.
- enum class ObjectType {
- kObject,
- kArray,
- };
- ::std::vector<ObjectType> object_type_;
-
- // Last field name.
- ::std::string field_name_;
- // Last field value.
- ::std::string field_value_;
-};
} // namespace aos
#endif // AOS_JSON_TO_FLATBUFFER_H_
diff --git a/aos/json_tokenizer.cc b/aos/json_tokenizer.cc
new file mode 100644
index 0000000..38ff4e3
--- /dev/null
+++ b/aos/json_tokenizer.cc
@@ -0,0 +1,409 @@
+#include "aos/json_tokenizer.h"
+
+namespace aos {
+
+void Tokenizer::ConsumeWhitespace() {
+ while (true) {
+ if (AtEnd()) {
+ return;
+ }
+ // Skip any whitespace.
+ if (Char() == ' ' || Char() == '\r' || Char() == '\t') {
+ ConsumeChar();
+ } else if (Char() == '\n') {
+ ConsumeChar();
+ ++linenumber_;
+ } else {
+ // There is no fail. Once we are out of whitespace (including 0 of it),
+ // declare success.
+ return;
+ }
+ }
+}
+
+bool Tokenizer::Consume(const char *token) {
+ const absl::string_view original = data_;
+ while (true) {
+ // Finishing the token is success.
+ if (*token == '\0') {
+ return true;
+ }
+
+ // But finishing the data first is failure.
+ if (AtEnd()) {
+ data_ = original;
+ return false;
+ }
+
+ // Missmatch is failure.
+ if (*token != Char()) {
+ data_ = original;
+ return false;
+ }
+
+ ConsumeChar();
+ ++token;
+ }
+}
+
+bool Tokenizer::ConsumeString(::std::string *s) {
+ // Under no conditions is it acceptible to run out of data while parsing a
+ // string. Any AtEnd checks should confirm that.
+ const absl::string_view original = data_;
+ if (AtEnd()) {
+ return false;
+ }
+
+ // Expect the leading "
+ if (Char() != '"') {
+ return false;
+ }
+
+ ConsumeChar();
+ absl::string_view last_parsed_data = data_;
+ *s = ::std::string();
+
+ while (true) {
+ if (AtEnd()) {
+ data_ = original;
+ return false;
+ }
+
+ // If we get an end or an escape, do something special.
+ if (Char() == '"' || Char() == '\\') {
+ // Save what we found up until now, not including this character.
+ *s += ::std::string(
+ last_parsed_data.substr(0, last_parsed_data.size() - data_.size()));
+
+ // Update the pointer.
+ last_parsed_data = data_;
+
+ // " is the end, declare victory.
+ if (Char() == '"') {
+ ConsumeChar();
+ return true;
+ } else {
+ ConsumeChar();
+ // Now consume valid escape characters and add their representation onto
+ // the output string.
+ if (AtEnd()) {
+ data_ = original;
+ return false;
+ } else if (Char() == '"') {
+ *s += "\"";
+ } else if (Char() == '\\') {
+ *s += "\\";
+ } else if (Char() == '/') {
+ *s += "/";
+ } else if (Char() == 'b') {
+ *s += "\b";
+ } else if (Char() == 'f') {
+ *s += "\f";
+ } else if (Char() == 'n') {
+ *s += "\n";
+ } else if (Char() == 'r') {
+ *s += "\r";
+ } else if (Char() == 't') {
+ *s += "\t";
+ } else if (Char() == 'u') {
+ // TODO(austin): Unicode should be valid, but I really don't care to
+ // do this now...
+ fprintf(stderr, "Unexpected unicode on line %d\n", linenumber_);
+ data_ = original;
+ return false;
+ }
+ }
+ // And skip the escaped character.
+ last_parsed_data = data_.substr(1);
+ }
+
+ ConsumeChar();
+ }
+}
+
+bool Tokenizer::ConsumeNumber(::std::string *s) {
+ // Under no conditions is it acceptible to run out of data while parsing a
+ // number. Any AtEnd() checks should confirm that.
+ *s = ::std::string();
+ const absl::string_view original = data_;
+
+ // Consume the leading - unconditionally.
+ Consume("-");
+
+ // Then, we either get a 0, or we get a nonzero. Only nonzero can be followed
+ // by a second number.
+ if (!Consume("0")) {
+ if (AtEnd()) {
+ return false;
+ } else if (Char() >= '1' && Char() <= '9') {
+ // This wasn't a zero, but was a valid digit. Consume it.
+ ConsumeChar();
+ } else {
+ return false;
+ }
+
+ // Now consume any number of any digits.
+ while (true) {
+ if (AtEnd()) {
+ data_ = original;
+ return false;
+ }
+ if (Char() < '0' || Char() > '9') {
+ break;
+ }
+ ConsumeChar();
+ }
+ }
+
+ // We could now have a decimal.
+ if (Char() == '.') {
+ ConsumeChar();
+ while (true) {
+ if (AtEnd()) {
+ data_ = original;
+ return false;
+ }
+ // And any number of digits.
+ if (Char() < '0' || Char() > '9') {
+ break;
+ }
+ ConsumeChar();
+ }
+ }
+
+ // And now an exponent.
+ if (Char() == 'e' || Char() == 'E') {
+ ConsumeChar();
+ if (AtEnd()) {
+ data_ = original;
+ return false;
+ }
+
+ // Which could have a +-
+ if (Char() == '+' || Char() == '-') {
+ ConsumeChar();
+ }
+ int count = 0;
+ while (true) {
+ if (AtEnd()) {
+ data_ = original;
+ return false;
+ }
+ // And digits.
+ if (Char() < '0' || Char() > '9') {
+ break;
+ }
+ ConsumeChar();
+ ++count;
+ }
+ // But, it is an error to have an exponent and nothing following it.
+ if (count == 0) {
+ data_ = original;
+ return false;
+ }
+ }
+
+ *s = ::std::string(original.substr(0, original.size() - data_.size()));
+ return true;
+}
+
+Tokenizer::TokenType Tokenizer::Next() {
+ switch (state_) {
+ case State::kExpectObjectStart:
+ // We should always start out with a {
+ if (!Consume("{")) return TokenType::kError;
+
+ // Document that we just started an object.
+ object_type_.push_back(ObjectType::kObject);
+
+ ConsumeWhitespace();
+
+ if (Consume("}")) {
+ ConsumeWhitespace();
+ state_ = State::kExpectObjectEnd;
+ } else {
+ state_ = State::kExpectField;
+ }
+ return TokenType::kStartObject;
+
+ case State::kExpectField: {
+ // Fields are built up of strings, whitespace, and then a : (followed by
+ // whitespace...)
+ ::std::string s;
+ if (!ConsumeString(&s)) {
+ fprintf(stderr, "Error on line %d, expected string for field name.\n",
+ linenumber_);
+ return TokenType::kError;
+ }
+ field_name_ = ::std::move(s);
+
+ ConsumeWhitespace();
+
+ if (!Consume(":")) {
+ fprintf(stderr, "Error on line %d\n", linenumber_);
+ return TokenType::kError;
+ }
+
+ ConsumeWhitespace();
+
+ state_ = State::kExpectValue;
+
+ return TokenType::kField;
+ } break;
+ case State::kExpectValue: {
+ TokenType result = TokenType::kError;
+
+ ::std::string s;
+ if (Consume("{")) {
+ // Fields are in objects. Record and recurse.
+ object_type_.push_back(ObjectType::kObject);
+
+ ConsumeWhitespace();
+
+ state_ = State::kExpectField;
+ return TokenType::kStartObject;
+ } else if (Consume("[")) {
+ // Values are in arrays. Record and recurse.
+ object_type_.push_back(ObjectType::kArray);
+
+ ConsumeWhitespace();
+ state_ = State::kExpectValue;
+ return TokenType::kStartArray;
+ } else if (ConsumeString(&s)) {
+ // Parsed as a string, grab it.
+ field_value_ = ::std::move(s);
+ result = TokenType::kStringValue;
+ } else if (ConsumeNumber(&s)) {
+ // Parsed as a number, grab it.
+ field_value_ = ::std::move(s);
+ result = TokenType::kNumberValue;
+ } else if (Consume("true")) {
+ // Parsed as a true, grab it.
+ field_value_ = "true";
+ result = TokenType::kTrueValue;
+ } else if (Consume("false")) {
+ // Parsed as a false, grab it.
+ field_value_ = "false";
+ result = TokenType::kFalseValue;
+ } else {
+ // Couldn't parse, so we have a syntax error.
+ fprintf(stderr, "Error line %d, invalid field value.\n", linenumber_);
+ }
+
+ ConsumeWhitespace();
+
+ // After a field, we either have a , and another field (or value if we are
+ // in an array), or we should be closing out the object (or array).
+ if (Consume(",")) {
+ ConsumeWhitespace();
+ switch (object_type_.back()) {
+ case ObjectType::kObject:
+ state_ = State::kExpectField;
+ break;
+ case ObjectType::kArray:
+ state_ = State::kExpectValue;
+ break;
+ }
+ } else {
+ // Sanity check that the stack is deep enough.
+ if (object_type_.size() == 0) {
+ fprintf(stderr, "Error on line %d\n", linenumber_);
+ return TokenType::kError;
+ }
+
+ // And then require closing out the object.
+ switch (object_type_.back()) {
+ case ObjectType::kObject:
+ if (Consume("}")) {
+ ConsumeWhitespace();
+ state_ = State::kExpectObjectEnd;
+ } else {
+ return TokenType::kError;
+ }
+ break;
+ case ObjectType::kArray:
+ if (Consume("]")) {
+ ConsumeWhitespace();
+ state_ = State::kExpectArrayEnd;
+ } else {
+ return TokenType::kError;
+ }
+ break;
+ }
+ }
+ return result;
+ } break;
+
+ case State::kExpectArrayEnd:
+ case State::kExpectObjectEnd: {
+ const TokenType result = state_ == State::kExpectArrayEnd
+ ? TokenType::kEndArray
+ : TokenType::kEndObject;
+ // This is a transient state so we can send 2 tokens out in a row. We
+ // discover the object or array end at the end of reading the value.
+ object_type_.pop_back();
+ if (object_type_.size() == 0) {
+ // We unwound the outer object. We should send kEnd next.
+ state_ = State::kExpectEnd;
+ } else if (object_type_.back() == ObjectType::kObject) {
+ // If we are going into an object, it should either have another field
+ // or end.
+ if (Consume(",")) {
+ ConsumeWhitespace();
+ state_ = State::kExpectField;
+ } else if (Consume("}")) {
+ ConsumeWhitespace();
+ state_ = State::kExpectObjectEnd;
+ } else {
+ return TokenType::kError;
+ }
+ } else if (object_type_.back() == ObjectType::kArray) {
+ // If we are going into an array, it should either have another value
+ // or end.
+ if (Consume(",")) {
+ ConsumeWhitespace();
+ state_ = State::kExpectValue;
+ } else if (Consume("]")) {
+ ConsumeWhitespace();
+ state_ = State::kExpectArrayEnd;
+ } else {
+ return TokenType::kError;
+ }
+ }
+ // And then send out the correct token.
+ return result;
+ }
+ case State::kExpectEnd:
+ // If we are supposed to be done, confirm nothing is after the end.
+ if (AtEnd()) {
+ return TokenType::kEnd;
+ } else {
+ fprintf(stderr, "Data past end at line %d\n", linenumber_);
+ return TokenType::kError;
+ }
+ }
+ return TokenType::kError;
+}
+
+bool Tokenizer::FieldAsInt(long long *value) {
+ const char *pos = field_value().c_str();
+ errno = 0;
+ *value = strtoll(field_value().c_str(), const_cast<char **>(&pos), 10);
+ if (pos != field_value().c_str() + field_value().size() || errno != 0) {
+ return false;
+ }
+ return true;
+}
+
+bool Tokenizer::FieldAsDouble(double *value) {
+ const char *pos = field_value().c_str();
+ errno = 0;
+ *value = strtod(field_value().c_str(), const_cast<char **>(&pos));
+
+ if (pos != field_value().c_str() + field_value().size() || errno != 0) {
+ return false;
+ }
+ return true;
+}
+
+} // namespace aos
diff --git a/aos/json_tokenizer.h b/aos/json_tokenizer.h
new file mode 100644
index 0000000..3058d7d
--- /dev/null
+++ b/aos/json_tokenizer.h
@@ -0,0 +1,109 @@
+#ifndef AOS_JSON_TOKENIZER_H_
+#define AOS_JSON_TOKENIZER_H_
+
+#include <string>
+#include <vector>
+
+#include "absl/strings/string_view.h"
+
+namespace aos {
+
+// This class implements the state machine at json.org
+//
+// The only modification is that it supports C++ comments /**/ in all
+// whitespace.
+class Tokenizer {
+ public:
+ Tokenizer(const absl::string_view data) : data_(data) {}
+
+ enum class TokenType {
+ kEnd,
+ kError,
+ kStartObject,
+ kEndObject,
+ kStartArray,
+ kEndArray,
+ kField,
+ kNumberValue,
+ kStringValue,
+ kTrueValue,
+ kFalseValue,
+ };
+
+ // Returns the next token.
+ TokenType Next();
+
+ // Returns the last field_name and field_value. These are only valid when
+ // Next returns them.
+ const ::std::string &field_name() const { return field_name_; }
+ const ::std::string &field_value() const { return field_value_; }
+
+ // Parses the current field value as a long long. Returns false if it failed
+ // to parse.
+ bool FieldAsInt(long long *value);
+ // Parses the current field value as a double. Returns false if it failed
+ // to parse.
+ bool FieldAsDouble(double *value);
+
+ // Returns true if we are at the end of the input.
+ bool AtEnd() { return data_.size() == 0; }
+
+ const absl::string_view data_left() const { return data_; }
+
+ private:
+ // Consumes a single character.
+ void ConsumeChar() { data_ = data_.substr(1); }
+
+ // Returns the current character.
+ char Char() const { return data_[0]; }
+
+ // Consumes a string out of data_. Populates s with the string. Returns true
+ // if a valid string was found, and false otherwise.
+ // data_ is updated only on success.
+ bool ConsumeString(::std::string *s);
+ // Consumes a number out of data_. Populates s with the string containing the
+ // number. Returns true if a valid number was found, and false otherwise.
+ // data_ is updated only on success.
+ bool ConsumeNumber(::std::string *s);
+ // Consumes a fixed token out of data_. Returns true if the string was found,
+ // and false otherwise.
+ // data_ is updated only on success.
+ bool Consume(const char *token);
+ // Consumes whitespace out of data_. Returns true if the string was found,
+ // and false otherwise.
+ // data_ is unconditionally updated.
+ void ConsumeWhitespace();
+
+ // State for the parsing state machine.
+ enum class State {
+ kExpectField,
+ kExpectObjectStart,
+ kExpectObjectEnd,
+ kExpectArrayEnd,
+ kExpectValue,
+ kExpectEnd,
+ };
+
+ State state_ = State::kExpectObjectStart;
+
+ // Data pointer.
+ absl::string_view data_;
+ // Current line number used for printing debug.
+ int linenumber_ = 0;
+
+ // Stack used to track which object type we were in when we recursed.
+ enum class ObjectType {
+ kObject,
+ kArray,
+ };
+ ::std::vector<ObjectType> object_type_;
+
+ // Last field name.
+ ::std::string field_name_;
+ // Last field value.
+ ::std::string field_value_;
+};
+
+} // namespace aos
+
+#endif // AOS_JSON_TOKENIZER_H_