blob: dec2a3255f73792ebe9187f9de1777f06a60521f [file] [log] [blame]
Austin Schuhd7e252d2019-10-06 13:51:02 -07001#ifndef AOS_JSON_TOKENIZER_H_
2#define AOS_JSON_TOKENIZER_H_
3
4#include <string>
James Kuszmaul3ae42262019-11-08 12:33:41 -08005#include <string_view>
Austin Schuhd7e252d2019-10-06 13:51:02 -07006#include <vector>
Pallavi Madhukare2eb2812022-07-19 09:56:09 -07007#include "flatbuffers/util.h"
Austin Schuhd7e252d2019-10-06 13:51:02 -07008
Austin Schuhd7e252d2019-10-06 13:51:02 -07009namespace aos {
10
11// This class implements the state machine at json.org
12//
13// The only modification is that it supports C++ comments /**/ in all
14// whitespace.
15class Tokenizer {
16 public:
James Kuszmaul3ae42262019-11-08 12:33:41 -080017 Tokenizer(const std::string_view data) : data_(data) {}
Austin Schuhd7e252d2019-10-06 13:51:02 -070018
19 enum class TokenType {
20 kEnd,
21 kError,
22 kStartObject,
23 kEndObject,
24 kStartArray,
25 kEndArray,
26 kField,
27 kNumberValue,
28 kStringValue,
29 kTrueValue,
30 kFalseValue,
31 };
32
33 // Returns the next token.
34 TokenType Next();
35
36 // Returns the last field_name and field_value. These are only valid when
37 // Next returns them.
38 const ::std::string &field_name() const { return field_name_; }
39 const ::std::string &field_value() const { return field_value_; }
40
41 // Parses the current field value as a long long. Returns false if it failed
42 // to parse.
43 bool FieldAsInt(long long *value);
44 // Parses the current field value as a double. Returns false if it failed
45 // to parse.
46 bool FieldAsDouble(double *value);
47
48 // Returns true if we are at the end of the input.
49 bool AtEnd() { return data_.size() == 0; }
50
James Kuszmaul3ae42262019-11-08 12:33:41 -080051 const std::string_view data_left() const { return data_; }
Austin Schuhd7e252d2019-10-06 13:51:02 -070052
53 private:
54 // Consumes a single character.
55 void ConsumeChar() { data_ = data_.substr(1); }
56
57 // Returns the current character.
58 char Char() const { return data_[0]; }
59
60 // Consumes a string out of data_. Populates s with the string. Returns true
61 // if a valid string was found, and false otherwise.
62 // data_ is updated only on success.
63 bool ConsumeString(::std::string *s);
64 // Consumes a number out of data_. Populates s with the string containing the
65 // number. Returns true if a valid number was found, and false otherwise.
66 // data_ is updated only on success.
67 bool ConsumeNumber(::std::string *s);
68 // Consumes a fixed token out of data_. Returns true if the string was found,
69 // and false otherwise.
70 // data_ is updated only on success.
71 bool Consume(const char *token);
72 // Consumes whitespace out of data_. Returns true if the string was found,
73 // and false otherwise.
74 // data_ is unconditionally updated.
75 void ConsumeWhitespace();
Pallavi Madhukare2eb2812022-07-19 09:56:09 -070076 // Consumes a unicode out of data_. Populates s with the unicode. Returns
77 // true if a valid unicode was found, and false otherwise. data_ is updated
78 // only on success.
79 bool ConsumeUnicode(::std::string *s);
Austin Schuhd7e252d2019-10-06 13:51:02 -070080
81 // State for the parsing state machine.
82 enum class State {
83 kExpectField,
84 kExpectObjectStart,
85 kExpectObjectEnd,
86 kExpectArrayEnd,
87 kExpectValue,
88 kExpectEnd,
89 };
90
91 State state_ = State::kExpectObjectStart;
92
93 // Data pointer.
James Kuszmaul3ae42262019-11-08 12:33:41 -080094 std::string_view data_;
Austin Schuhd7e252d2019-10-06 13:51:02 -070095 // Current line number used for printing debug.
96 int linenumber_ = 0;
97
Pallavi Madhukare2eb2812022-07-19 09:56:09 -070098 // Surrogate pairs i.e. high surrogates (\ud000 - \ud8ff) combined
99 // with low surrogates (\udc00 - \udfff) cannot be interpreted when
100 // they do not appear as a part of the pair.
101 int unicode_high_surrogate_ = -1;
102
Austin Schuhd7e252d2019-10-06 13:51:02 -0700103 // Stack used to track which object type we were in when we recursed.
104 enum class ObjectType {
105 kObject,
106 kArray,
107 };
108 ::std::vector<ObjectType> object_type_;
109
110 // Last field name.
111 ::std::string field_name_;
112 // Last field value.
113 ::std::string field_value_;
114};
115
116} // namespace aos
117
118#endif // AOS_JSON_TOKENIZER_H_