blob: 5b41da176a2e4c6178f8118d39379ad4d386a08c [file] [log] [blame]
Austin Schuhd7e252d2019-10-06 13:51:02 -07001#ifndef AOS_JSON_TOKENIZER_H_
2#define AOS_JSON_TOKENIZER_H_
3
4#include <string>
James Kuszmaul3ae42262019-11-08 12:33:41 -08005#include <string_view>
Austin Schuhd7e252d2019-10-06 13:51:02 -07006#include <vector>
7
Austin Schuhd7e252d2019-10-06 13:51:02 -07008namespace aos {
9
10// This class implements the state machine at json.org
11//
12// The only modification is that it supports C++ comments /**/ in all
13// whitespace.
14class Tokenizer {
15 public:
James Kuszmaul3ae42262019-11-08 12:33:41 -080016 Tokenizer(const std::string_view data) : data_(data) {}
Austin Schuhd7e252d2019-10-06 13:51:02 -070017
18 enum class TokenType {
19 kEnd,
20 kError,
21 kStartObject,
22 kEndObject,
23 kStartArray,
24 kEndArray,
25 kField,
26 kNumberValue,
27 kStringValue,
28 kTrueValue,
29 kFalseValue,
30 };
31
32 // Returns the next token.
33 TokenType Next();
34
35 // Returns the last field_name and field_value. These are only valid when
36 // Next returns them.
37 const ::std::string &field_name() const { return field_name_; }
38 const ::std::string &field_value() const { return field_value_; }
39
40 // Parses the current field value as a long long. Returns false if it failed
41 // to parse.
42 bool FieldAsInt(long long *value);
43 // Parses the current field value as a double. Returns false if it failed
44 // to parse.
45 bool FieldAsDouble(double *value);
46
47 // Returns true if we are at the end of the input.
48 bool AtEnd() { return data_.size() == 0; }
49
James Kuszmaul3ae42262019-11-08 12:33:41 -080050 const std::string_view data_left() const { return data_; }
Austin Schuhd7e252d2019-10-06 13:51:02 -070051
52 private:
53 // Consumes a single character.
54 void ConsumeChar() { data_ = data_.substr(1); }
55
56 // Returns the current character.
57 char Char() const { return data_[0]; }
58
59 // Consumes a string out of data_. Populates s with the string. Returns true
60 // if a valid string was found, and false otherwise.
61 // data_ is updated only on success.
62 bool ConsumeString(::std::string *s);
63 // Consumes a number out of data_. Populates s with the string containing the
64 // number. Returns true if a valid number was found, and false otherwise.
65 // data_ is updated only on success.
66 bool ConsumeNumber(::std::string *s);
67 // Consumes a fixed token out of data_. Returns true if the string was found,
68 // and false otherwise.
69 // data_ is updated only on success.
70 bool Consume(const char *token);
71 // Consumes whitespace out of data_. Returns true if the string was found,
72 // and false otherwise.
73 // data_ is unconditionally updated.
74 void ConsumeWhitespace();
75
76 // State for the parsing state machine.
77 enum class State {
78 kExpectField,
79 kExpectObjectStart,
80 kExpectObjectEnd,
81 kExpectArrayEnd,
82 kExpectValue,
83 kExpectEnd,
84 };
85
86 State state_ = State::kExpectObjectStart;
87
88 // Data pointer.
James Kuszmaul3ae42262019-11-08 12:33:41 -080089 std::string_view data_;
Austin Schuhd7e252d2019-10-06 13:51:02 -070090 // Current line number used for printing debug.
91 int linenumber_ = 0;
92
93 // Stack used to track which object type we were in when we recursed.
94 enum class ObjectType {
95 kObject,
96 kArray,
97 };
98 ::std::vector<ObjectType> object_type_;
99
100 // Last field name.
101 ::std::string field_name_;
102 // Last field value.
103 ::std::string field_value_;
104};
105
106} // namespace aos
107
108#endif // AOS_JSON_TOKENIZER_H_