blob: 892c575d9460a4eff9da07cb6489f589a8ded258 [file] [log] [blame]
Austin Schuhd7e252d2019-10-06 13:51:02 -07001#ifndef AOS_JSON_TOKENIZER_H_
2#define AOS_JSON_TOKENIZER_H_
3
4#include <string>
James Kuszmaul3ae42262019-11-08 12:33:41 -08005#include <string_view>
Austin Schuhd7e252d2019-10-06 13:51:02 -07006#include <vector>
Philipp Schrader790cb542023-07-05 21:06:52 -07007
Pallavi Madhukare2eb2812022-07-19 09:56:09 -07008#include "flatbuffers/util.h"
Austin Schuhd7e252d2019-10-06 13:51:02 -07009
Austin Schuhd7e252d2019-10-06 13:51:02 -070010namespace aos {
11
12// This class implements the state machine at json.org
13//
14// The only modification is that it supports C++ comments /**/ in all
15// whitespace.
16class Tokenizer {
17 public:
James Kuszmaul3ae42262019-11-08 12:33:41 -080018 Tokenizer(const std::string_view data) : data_(data) {}
Austin Schuhd7e252d2019-10-06 13:51:02 -070019
20 enum class TokenType {
21 kEnd,
22 kError,
23 kStartObject,
24 kEndObject,
25 kStartArray,
26 kEndArray,
27 kField,
28 kNumberValue,
29 kStringValue,
30 kTrueValue,
31 kFalseValue,
32 };
33
34 // Returns the next token.
35 TokenType Next();
36
37 // Returns the last field_name and field_value. These are only valid when
38 // Next returns them.
39 const ::std::string &field_name() const { return field_name_; }
40 const ::std::string &field_value() const { return field_value_; }
41
42 // Parses the current field value as a long long. Returns false if it failed
43 // to parse.
44 bool FieldAsInt(long long *value);
45 // Parses the current field value as a double. Returns false if it failed
46 // to parse.
47 bool FieldAsDouble(double *value);
48
49 // Returns true if we are at the end of the input.
50 bool AtEnd() { return data_.size() == 0; }
51
James Kuszmaul3ae42262019-11-08 12:33:41 -080052 const std::string_view data_left() const { return data_; }
Austin Schuhd7e252d2019-10-06 13:51:02 -070053
54 private:
55 // Consumes a single character.
56 void ConsumeChar() { data_ = data_.substr(1); }
57
58 // Returns the current character.
59 char Char() const { return data_[0]; }
60
61 // Consumes a string out of data_. Populates s with the string. Returns true
62 // if a valid string was found, and false otherwise.
63 // data_ is updated only on success.
64 bool ConsumeString(::std::string *s);
65 // Consumes a number out of data_. Populates s with the string containing the
66 // number. Returns true if a valid number was found, and false otherwise.
67 // data_ is updated only on success.
68 bool ConsumeNumber(::std::string *s);
69 // Consumes a fixed token out of data_. Returns true if the string was found,
70 // and false otherwise.
71 // data_ is updated only on success.
72 bool Consume(const char *token);
73 // Consumes whitespace out of data_. Returns true if the string was found,
74 // and false otherwise.
75 // data_ is unconditionally updated.
76 void ConsumeWhitespace();
Pallavi Madhukare2eb2812022-07-19 09:56:09 -070077 // Consumes a unicode out of data_. Populates s with the unicode. Returns
78 // true if a valid unicode was found, and false otherwise. data_ is updated
79 // only on success.
80 bool ConsumeUnicode(::std::string *s);
Austin Schuhd7e252d2019-10-06 13:51:02 -070081
82 // State for the parsing state machine.
83 enum class State {
84 kExpectField,
85 kExpectObjectStart,
86 kExpectObjectEnd,
87 kExpectArrayEnd,
88 kExpectValue,
89 kExpectEnd,
90 };
91
92 State state_ = State::kExpectObjectStart;
93
94 // Data pointer.
James Kuszmaul3ae42262019-11-08 12:33:41 -080095 std::string_view data_;
Austin Schuhd7e252d2019-10-06 13:51:02 -070096 // Current line number used for printing debug.
97 int linenumber_ = 0;
98
Pallavi Madhukare2eb2812022-07-19 09:56:09 -070099 // Surrogate pairs i.e. high surrogates (\ud000 - \ud8ff) combined
100 // with low surrogates (\udc00 - \udfff) cannot be interpreted when
101 // they do not appear as a part of the pair.
102 int unicode_high_surrogate_ = -1;
103
Austin Schuhd7e252d2019-10-06 13:51:02 -0700104 // Stack used to track which object type we were in when we recursed.
105 enum class ObjectType {
106 kObject,
107 kArray,
108 };
109 ::std::vector<ObjectType> object_type_;
110
111 // Last field name.
112 ::std::string field_name_;
113 // Last field value.
114 ::std::string field_value_;
115};
116
117} // namespace aos
118
119#endif // AOS_JSON_TOKENIZER_H_