Austin Schuh | d7e252d | 2019-10-06 13:51:02 -0700 | [diff] [blame] | 1 | #ifndef AOS_JSON_TOKENIZER_H_ |
| 2 | #define AOS_JSON_TOKENIZER_H_ |
| 3 | |
| 4 | #include <string> |
James Kuszmaul | 3ae4226 | 2019-11-08 12:33:41 -0800 | [diff] [blame] | 5 | #include <string_view> |
Austin Schuh | d7e252d | 2019-10-06 13:51:02 -0700 | [diff] [blame] | 6 | #include <vector> |
| 7 | |
Austin Schuh | d7e252d | 2019-10-06 13:51:02 -0700 | [diff] [blame] | 8 | namespace aos { |
| 9 | |
| 10 | // This class implements the state machine at json.org |
| 11 | // |
| 12 | // The only modification is that it supports C++ comments /**/ in all |
| 13 | // whitespace. |
| 14 | class Tokenizer { |
| 15 | public: |
James Kuszmaul | 3ae4226 | 2019-11-08 12:33:41 -0800 | [diff] [blame] | 16 | Tokenizer(const std::string_view data) : data_(data) {} |
Austin Schuh | d7e252d | 2019-10-06 13:51:02 -0700 | [diff] [blame] | 17 | |
| 18 | enum class TokenType { |
| 19 | kEnd, |
| 20 | kError, |
| 21 | kStartObject, |
| 22 | kEndObject, |
| 23 | kStartArray, |
| 24 | kEndArray, |
| 25 | kField, |
| 26 | kNumberValue, |
| 27 | kStringValue, |
| 28 | kTrueValue, |
| 29 | kFalseValue, |
| 30 | }; |
| 31 | |
| 32 | // Returns the next token. |
| 33 | TokenType Next(); |
| 34 | |
| 35 | // Returns the last field_name and field_value. These are only valid when |
| 36 | // Next returns them. |
| 37 | const ::std::string &field_name() const { return field_name_; } |
| 38 | const ::std::string &field_value() const { return field_value_; } |
| 39 | |
| 40 | // Parses the current field value as a long long. Returns false if it failed |
| 41 | // to parse. |
| 42 | bool FieldAsInt(long long *value); |
| 43 | // Parses the current field value as a double. Returns false if it failed |
| 44 | // to parse. |
| 45 | bool FieldAsDouble(double *value); |
| 46 | |
| 47 | // Returns true if we are at the end of the input. |
| 48 | bool AtEnd() { return data_.size() == 0; } |
| 49 | |
James Kuszmaul | 3ae4226 | 2019-11-08 12:33:41 -0800 | [diff] [blame] | 50 | const std::string_view data_left() const { return data_; } |
Austin Schuh | d7e252d | 2019-10-06 13:51:02 -0700 | [diff] [blame] | 51 | |
| 52 | private: |
| 53 | // Consumes a single character. |
| 54 | void ConsumeChar() { data_ = data_.substr(1); } |
| 55 | |
| 56 | // Returns the current character. |
| 57 | char Char() const { return data_[0]; } |
| 58 | |
| 59 | // Consumes a string out of data_. Populates s with the string. Returns true |
| 60 | // if a valid string was found, and false otherwise. |
| 61 | // data_ is updated only on success. |
| 62 | bool ConsumeString(::std::string *s); |
| 63 | // Consumes a number out of data_. Populates s with the string containing the |
| 64 | // number. Returns true if a valid number was found, and false otherwise. |
| 65 | // data_ is updated only on success. |
| 66 | bool ConsumeNumber(::std::string *s); |
| 67 | // Consumes a fixed token out of data_. Returns true if the string was found, |
| 68 | // and false otherwise. |
| 69 | // data_ is updated only on success. |
| 70 | bool Consume(const char *token); |
| 71 | // Consumes whitespace out of data_. Returns true if the string was found, |
| 72 | // and false otherwise. |
| 73 | // data_ is unconditionally updated. |
| 74 | void ConsumeWhitespace(); |
| 75 | |
| 76 | // State for the parsing state machine. |
| 77 | enum class State { |
| 78 | kExpectField, |
| 79 | kExpectObjectStart, |
| 80 | kExpectObjectEnd, |
| 81 | kExpectArrayEnd, |
| 82 | kExpectValue, |
| 83 | kExpectEnd, |
| 84 | }; |
| 85 | |
| 86 | State state_ = State::kExpectObjectStart; |
| 87 | |
| 88 | // Data pointer. |
James Kuszmaul | 3ae4226 | 2019-11-08 12:33:41 -0800 | [diff] [blame] | 89 | std::string_view data_; |
Austin Schuh | d7e252d | 2019-10-06 13:51:02 -0700 | [diff] [blame] | 90 | // Current line number used for printing debug. |
| 91 | int linenumber_ = 0; |
| 92 | |
| 93 | // Stack used to track which object type we were in when we recursed. |
| 94 | enum class ObjectType { |
| 95 | kObject, |
| 96 | kArray, |
| 97 | }; |
| 98 | ::std::vector<ObjectType> object_type_; |
| 99 | |
| 100 | // Last field name. |
| 101 | ::std::string field_name_; |
| 102 | // Last field value. |
| 103 | ::std::string field_value_; |
| 104 | }; |
| 105 | |
| 106 | } // namespace aos |
| 107 | |
| 108 | #endif // AOS_JSON_TOKENIZER_H_ |