Add unicode parsing for JSON strings

Parses unicode in JSON strings. Syntax of the unicode in the string
should be \uxxxx. Also checks the validity of the unicode.

Change-Id: Ie6aa16ef1a67110c02f0374fe04edd9004a12c9b
Signed-off-by: James Kuszmaul <james.kuszmaul@bluerivertech.com>
diff --git a/aos/json_tokenizer.h b/aos/json_tokenizer.h
index 5b41da1..dec2a32 100644
--- a/aos/json_tokenizer.h
+++ b/aos/json_tokenizer.h
@@ -4,6 +4,7 @@
 #include <string>
 #include <string_view>
 #include <vector>
+#include "flatbuffers/util.h"
 
 namespace aos {
 
@@ -72,6 +73,10 @@
   // and false otherwise.
   // data_ is unconditionally updated.
   void ConsumeWhitespace();
+  // Consumes a unicode out of data_.  Populates s with the unicode.  Returns
+  // true if a valid unicode was found, and false otherwise. data_ is updated
+  // only on success.
+  bool ConsumeUnicode(::std::string *s);
 
   // State for the parsing state machine.
   enum class State {
@@ -90,6 +95,11 @@
   // Current line number used for printing debug.
   int linenumber_ = 0;
 
+  // Surrogate pairs i.e. high surrogates (\ud000 - \ud8ff) combined
+  // with low surrogates (\udc00 - \udfff) cannot be interpreted when
+  // they do not appear as a part of the pair.
+  int unicode_high_surrogate_ = -1;
+
   // Stack used to track which object type we were in when we recursed.
   enum class ObjectType {
     kObject,