Add unicode parsing for JSON strings
Parses unicode in JSON strings. Syntax of the unicode in the string
should be \uxxxx. Also checks the validity of the unicode.
Change-Id: Ie6aa16ef1a67110c02f0374fe04edd9004a12c9b
Signed-off-by: James Kuszmaul <james.kuszmaul@bluerivertech.com>
diff --git a/aos/json_tokenizer.h b/aos/json_tokenizer.h
index 5b41da1..dec2a32 100644
--- a/aos/json_tokenizer.h
+++ b/aos/json_tokenizer.h
@@ -4,6 +4,7 @@
#include <string>
#include <string_view>
#include <vector>
+#include "flatbuffers/util.h"
namespace aos {
@@ -72,6 +73,10 @@
// and false otherwise.
// data_ is unconditionally updated.
void ConsumeWhitespace();
+ // Consumes a unicode out of data_. Populates s with the unicode. Returns
+ // true if a valid unicode was found, and false otherwise. data_ is updated
+ // only on success.
+ bool ConsumeUnicode(::std::string *s);
// State for the parsing state machine.
enum class State {
@@ -90,6 +95,11 @@
// Current line number used for printing debug.
int linenumber_ = 0;
+ // Surrogate pairs i.e. high surrogates (\ud000 - \ud8ff) combined
+ // with low surrogates (\udc00 - \udfff) cannot be interpreted when
+ // they do not appear as a part of the pair.
+ int unicode_high_surrogate_ = -1;
+
// Stack used to track which object type we were in when we recursed.
enum class ObjectType {
kObject,