blob: 3058d7dc38295440737737bb277d91dd8988a17f [file] [log] [blame]
Austin Schuhd7e252d2019-10-06 13:51:02 -07001#ifndef AOS_JSON_TOKENIZER_H_
2#define AOS_JSON_TOKENIZER_H_
3
4#include <string>
5#include <vector>
6
7#include "absl/strings/string_view.h"
8
9namespace aos {
10
11// This class implements the state machine at json.org
12//
13// The only modification is that it supports C++ comments /**/ in all
14// whitespace.
15class Tokenizer {
16 public:
17 Tokenizer(const absl::string_view data) : data_(data) {}
18
19 enum class TokenType {
20 kEnd,
21 kError,
22 kStartObject,
23 kEndObject,
24 kStartArray,
25 kEndArray,
26 kField,
27 kNumberValue,
28 kStringValue,
29 kTrueValue,
30 kFalseValue,
31 };
32
33 // Returns the next token.
34 TokenType Next();
35
36 // Returns the last field_name and field_value. These are only valid when
37 // Next returns them.
38 const ::std::string &field_name() const { return field_name_; }
39 const ::std::string &field_value() const { return field_value_; }
40
41 // Parses the current field value as a long long. Returns false if it failed
42 // to parse.
43 bool FieldAsInt(long long *value);
44 // Parses the current field value as a double. Returns false if it failed
45 // to parse.
46 bool FieldAsDouble(double *value);
47
48 // Returns true if we are at the end of the input.
49 bool AtEnd() { return data_.size() == 0; }
50
51 const absl::string_view data_left() const { return data_; }
52
53 private:
54 // Consumes a single character.
55 void ConsumeChar() { data_ = data_.substr(1); }
56
57 // Returns the current character.
58 char Char() const { return data_[0]; }
59
60 // Consumes a string out of data_. Populates s with the string. Returns true
61 // if a valid string was found, and false otherwise.
62 // data_ is updated only on success.
63 bool ConsumeString(::std::string *s);
64 // Consumes a number out of data_. Populates s with the string containing the
65 // number. Returns true if a valid number was found, and false otherwise.
66 // data_ is updated only on success.
67 bool ConsumeNumber(::std::string *s);
68 // Consumes a fixed token out of data_. Returns true if the string was found,
69 // and false otherwise.
70 // data_ is updated only on success.
71 bool Consume(const char *token);
72 // Consumes whitespace out of data_. Returns true if the string was found,
73 // and false otherwise.
74 // data_ is unconditionally updated.
75 void ConsumeWhitespace();
76
77 // State for the parsing state machine.
78 enum class State {
79 kExpectField,
80 kExpectObjectStart,
81 kExpectObjectEnd,
82 kExpectArrayEnd,
83 kExpectValue,
84 kExpectEnd,
85 };
86
87 State state_ = State::kExpectObjectStart;
88
89 // Data pointer.
90 absl::string_view data_;
91 // Current line number used for printing debug.
92 int linenumber_ = 0;
93
94 // Stack used to track which object type we were in when we recursed.
95 enum class ObjectType {
96 kObject,
97 kArray,
98 };
99 ::std::vector<ObjectType> object_type_;
100
101 // Last field name.
102 ::std::string field_name_;
103 // Last field value.
104 ::std::string field_value_;
105};
106
107} // namespace aos
108
109#endif // AOS_JSON_TOKENIZER_H_