Factor tokenizer out to a separate file.

json_to_flatbuffer.{cc,h} was getting too big.

Change-Id: I38c10001b15da7fa2c0bfef66be5e640d1fa446f
diff --git a/aos/json_tokenizer.h b/aos/json_tokenizer.h
new file mode 100644
index 0000000..3058d7d
--- /dev/null
+++ b/aos/json_tokenizer.h
@@ -0,0 +1,109 @@
+#ifndef AOS_JSON_TOKENIZER_H_
+#define AOS_JSON_TOKENIZER_H_
+
+#include <string>
+#include <vector>
+
+#include "absl/strings/string_view.h"
+
+namespace aos {
+
+// This class implements the state machine at json.org
+//
+// The only modification is that it supports C++ comments /**/ in all
+// whitespace.
+class Tokenizer {
+ public:
+  Tokenizer(const absl::string_view data) : data_(data) {}
+
+  enum class TokenType {
+    kEnd,
+    kError,
+    kStartObject,
+    kEndObject,
+    kStartArray,
+    kEndArray,
+    kField,
+    kNumberValue,
+    kStringValue,
+    kTrueValue,
+    kFalseValue,
+  };
+
+  // Returns the next token.
+  TokenType Next();
+
+  // Returns the last field_name and field_value.  These are only valid when
+  // Next returns them.
+  const ::std::string &field_name() const { return field_name_; }
+  const ::std::string &field_value() const { return field_value_; }
+
+  // Parses the current field value as a long long.  Returns false if it failed
+  // to parse.
+  bool FieldAsInt(long long *value);
+  // Parses the current field value as a double.  Returns false if it failed
+  // to parse.
+  bool FieldAsDouble(double *value);
+
+  // Returns true if we are at the end of the input.
+  bool AtEnd() { return data_.size() == 0; }
+
+  const absl::string_view data_left() const { return data_; }
+
+ private:
+  // Consumes a single character.
+  void ConsumeChar() { data_ = data_.substr(1); }
+
+  // Returns the current character.
+  char Char() const { return data_[0]; }
+
+  // Consumes a string out of data_.  Populates s with the string.  Returns true
+  // if a valid string was found, and false otherwise.
+  // data_ is updated only on success.
+  bool ConsumeString(::std::string *s);
+  // Consumes a number out of data_.  Populates s with the string containing the
+  // number.  Returns true if a valid number was found, and false otherwise.
+  // data_ is updated only on success.
+  bool ConsumeNumber(::std::string *s);
+  // Consumes a fixed token out of data_. Returns true if the string was found,
+  // and false otherwise.
+  // data_ is updated only on success.
+  bool Consume(const char *token);
+  // Consumes whitespace out of data_. Returns true if the string was found,
+  // and false otherwise.
+  // data_ is unconditionally updated.
+  void ConsumeWhitespace();
+
+  // State for the parsing state machine.
+  enum class State {
+    kExpectField,
+    kExpectObjectStart,
+    kExpectObjectEnd,
+    kExpectArrayEnd,
+    kExpectValue,
+    kExpectEnd,
+  };
+
+  State state_ = State::kExpectObjectStart;
+
+  // Data pointer.
+  absl::string_view data_;
+  // Current line number used for printing debug.
+  int linenumber_ = 0;
+
+  // Stack used to track which object type we were in when we recursed.
+  enum class ObjectType {
+    kObject,
+    kArray,
+  };
+  ::std::vector<ObjectType> object_type_;
+
+  // Last field name.
+  ::std::string field_name_;
+  // Last field value.
+  ::std::string field_value_;
+};
+
+}  // namespace aos
+
+#endif  // AOS_JSON_TOKENIZER_H_