Austin Schuh | d7e252d | 2019-10-06 13:51:02 -0700 | [diff] [blame] | 1 | #ifndef AOS_JSON_TOKENIZER_H_ |
| 2 | #define AOS_JSON_TOKENIZER_H_ |
| 3 | |
| 4 | #include <string> |
James Kuszmaul | 3ae4226 | 2019-11-08 12:33:41 -0800 | [diff] [blame] | 5 | #include <string_view> |
Austin Schuh | d7e252d | 2019-10-06 13:51:02 -0700 | [diff] [blame] | 6 | #include <vector> |
Pallavi Madhukar | e2eb281 | 2022-07-19 09:56:09 -0700 | [diff] [blame] | 7 | #include "flatbuffers/util.h" |
Austin Schuh | d7e252d | 2019-10-06 13:51:02 -0700 | [diff] [blame] | 8 | |
Austin Schuh | d7e252d | 2019-10-06 13:51:02 -0700 | [diff] [blame] | 9 | namespace aos { |
| 10 | |
| 11 | // This class implements the state machine at json.org |
| 12 | // |
| 13 | // The only modification is that it supports C++ comments /**/ in all |
| 14 | // whitespace. |
| 15 | class Tokenizer { |
| 16 | public: |
James Kuszmaul | 3ae4226 | 2019-11-08 12:33:41 -0800 | [diff] [blame] | 17 | Tokenizer(const std::string_view data) : data_(data) {} |
Austin Schuh | d7e252d | 2019-10-06 13:51:02 -0700 | [diff] [blame] | 18 | |
| 19 | enum class TokenType { |
| 20 | kEnd, |
| 21 | kError, |
| 22 | kStartObject, |
| 23 | kEndObject, |
| 24 | kStartArray, |
| 25 | kEndArray, |
| 26 | kField, |
| 27 | kNumberValue, |
| 28 | kStringValue, |
| 29 | kTrueValue, |
| 30 | kFalseValue, |
| 31 | }; |
| 32 | |
| 33 | // Returns the next token. |
| 34 | TokenType Next(); |
| 35 | |
| 36 | // Returns the last field_name and field_value. These are only valid when |
| 37 | // Next returns them. |
| 38 | const ::std::string &field_name() const { return field_name_; } |
| 39 | const ::std::string &field_value() const { return field_value_; } |
| 40 | |
| 41 | // Parses the current field value as a long long. Returns false if it failed |
| 42 | // to parse. |
| 43 | bool FieldAsInt(long long *value); |
| 44 | // Parses the current field value as a double. Returns false if it failed |
| 45 | // to parse. |
| 46 | bool FieldAsDouble(double *value); |
| 47 | |
| 48 | // Returns true if we are at the end of the input. |
| 49 | bool AtEnd() { return data_.size() == 0; } |
| 50 | |
James Kuszmaul | 3ae4226 | 2019-11-08 12:33:41 -0800 | [diff] [blame] | 51 | const std::string_view data_left() const { return data_; } |
Austin Schuh | d7e252d | 2019-10-06 13:51:02 -0700 | [diff] [blame] | 52 | |
| 53 | private: |
| 54 | // Consumes a single character. |
| 55 | void ConsumeChar() { data_ = data_.substr(1); } |
| 56 | |
| 57 | // Returns the current character. |
| 58 | char Char() const { return data_[0]; } |
| 59 | |
| 60 | // Consumes a string out of data_. Populates s with the string. Returns true |
| 61 | // if a valid string was found, and false otherwise. |
| 62 | // data_ is updated only on success. |
| 63 | bool ConsumeString(::std::string *s); |
| 64 | // Consumes a number out of data_. Populates s with the string containing the |
| 65 | // number. Returns true if a valid number was found, and false otherwise. |
| 66 | // data_ is updated only on success. |
| 67 | bool ConsumeNumber(::std::string *s); |
| 68 | // Consumes a fixed token out of data_. Returns true if the string was found, |
| 69 | // and false otherwise. |
| 70 | // data_ is updated only on success. |
| 71 | bool Consume(const char *token); |
| 72 | // Consumes whitespace out of data_. Returns true if the string was found, |
| 73 | // and false otherwise. |
| 74 | // data_ is unconditionally updated. |
| 75 | void ConsumeWhitespace(); |
Pallavi Madhukar | e2eb281 | 2022-07-19 09:56:09 -0700 | [diff] [blame] | 76 | // Consumes a unicode out of data_. Populates s with the unicode. Returns |
| 77 | // true if a valid unicode was found, and false otherwise. data_ is updated |
| 78 | // only on success. |
| 79 | bool ConsumeUnicode(::std::string *s); |
Austin Schuh | d7e252d | 2019-10-06 13:51:02 -0700 | [diff] [blame] | 80 | |
| 81 | // State for the parsing state machine. |
| 82 | enum class State { |
| 83 | kExpectField, |
| 84 | kExpectObjectStart, |
| 85 | kExpectObjectEnd, |
| 86 | kExpectArrayEnd, |
| 87 | kExpectValue, |
| 88 | kExpectEnd, |
| 89 | }; |
| 90 | |
| 91 | State state_ = State::kExpectObjectStart; |
| 92 | |
| 93 | // Data pointer. |
James Kuszmaul | 3ae4226 | 2019-11-08 12:33:41 -0800 | [diff] [blame] | 94 | std::string_view data_; |
Austin Schuh | d7e252d | 2019-10-06 13:51:02 -0700 | [diff] [blame] | 95 | // Current line number used for printing debug. |
| 96 | int linenumber_ = 0; |
| 97 | |
Pallavi Madhukar | e2eb281 | 2022-07-19 09:56:09 -0700 | [diff] [blame] | 98 | // Surrogate pairs i.e. high surrogates (\ud000 - \ud8ff) combined |
| 99 | // with low surrogates (\udc00 - \udfff) cannot be interpreted when |
| 100 | // they do not appear as a part of the pair. |
| 101 | int unicode_high_surrogate_ = -1; |
| 102 | |
Austin Schuh | d7e252d | 2019-10-06 13:51:02 -0700 | [diff] [blame] | 103 | // Stack used to track which object type we were in when we recursed. |
| 104 | enum class ObjectType { |
| 105 | kObject, |
| 106 | kArray, |
| 107 | }; |
| 108 | ::std::vector<ObjectType> object_type_; |
| 109 | |
| 110 | // Last field name. |
| 111 | ::std::string field_name_; |
| 112 | // Last field value. |
| 113 | ::std::string field_value_; |
| 114 | }; |
| 115 | |
| 116 | } // namespace aos |
| 117 | |
| 118 | #endif // AOS_JSON_TOKENIZER_H_ |