Factor tokenizer out to a separate file. json_to_flatbuffer.{cc,h} was getting too big. Change-Id: I38c10001b15da7fa2c0bfef66be5e640d1fa446f

commit: d7e252dfc4c196a328d11ea4e10ae60a18920141 [log] [tgz]
author: Austin Schuh <austin.linux@gmail.com> Sun Oct 06 13:51:02 2019 -0700
committer: Austin Schuh <austin.linux@gmail.com> Sun Oct 06 13:59:24 2019 -0700
tree: 1f4aca8aa21943ed2748d32a0c27bd8eba0a6f2f
parent: d339a9bc566bc721695e8110ad302367dbd27c7f [diff] [blame]
diff --git a/aos/json_to_flatbuffer.cc b/aos/json_to_flatbuffer.cc
index 68d3b03..85e2d7f 100644
--- a/aos/json_to_flatbuffer.cc
+++ b/aos/json_to_flatbuffer.cc

@@ -6,6 +6,7 @@
 #include "absl/strings/string_view.h"
 #include "aos/flatbuffer_utils.h"
 #include "aos/logging/logging.h"
+#include "aos/json_tokenizer.h"
 #include "flatbuffers/flatbuffers.h"
 #include "flatbuffers/minireflect.h"
 
@@ -729,408 +730,4 @@
   return tostring_visitor.s;
 }
 
-void Tokenizer::ConsumeWhitespace() {
-  while (true) {
-    if (AtEnd()) {
-      return;
-    }
-    // Skip any whitespace.
-    if (Char() == ' ' || Char() == '\r' || Char() == '\t') {
-      ConsumeChar();
-    } else if (Char() == '\n') {
-      ConsumeChar();
-      ++linenumber_;
-    } else {
-      // There is no fail.  Once we are out of whitespace (including 0 of it),
-      // declare success.
-      return;
-    }
-  }
-}
-
-bool Tokenizer::Consume(const char *token) {
-  const absl::string_view original = data_;
-  while (true) {
-    // Finishing the token is success.
-    if (*token == '\0') {
-      return true;
-    }
-
-    // But finishing the data first is failure.
-    if (AtEnd()) {
-      data_ = original;
-      return false;
-    }
-
-    // Missmatch is failure.
-    if (*token != Char()) {
-      data_ = original;
-      return false;
-    }
-
-    ConsumeChar();
-    ++token;
-  }
-}
-
-bool Tokenizer::ConsumeString(::std::string *s) {
-  // Under no conditions is it acceptible to run out of data while parsing a
-  // string.  Any AtEnd checks should confirm that.
-  const absl::string_view original = data_;
-  if (AtEnd()) {
-    return false;
-  }
-
-  // Expect the leading "
-  if (Char() != '"') {
-    return false;
-  }
-
-  ConsumeChar();
-  absl::string_view last_parsed_data = data_;
-  *s = ::std::string();
-
-  while (true) {
-    if (AtEnd()) {
-      data_ = original;
-      return false;
-    }
-
-    // If we get an end or an escape, do something special.
-    if (Char() == '"' || Char() == '\\') {
-      // Save what we found up until now, not including this character.
-      *s += ::std::string(
-          last_parsed_data.substr(0, last_parsed_data.size() - data_.size()));
-
-      // Update the pointer.
-      last_parsed_data = data_;
-
-      // " is the end, declare victory.
-      if (Char() == '"') {
-        ConsumeChar();
-        return true;
-      } else {
-        ConsumeChar();
-        // Now consume valid escape characters and add their representation onto
-        // the output string.
-        if (AtEnd()) {
-          data_ = original;
-          return false;
-        } else if (Char() == '"') {
-          *s += "\"";
-        } else if (Char() == '\\') {
-          *s += "\\";
-        } else if (Char() == '/') {
-          *s += "/";
-        } else if (Char() == 'b') {
-          *s += "\b";
-        } else if (Char() == 'f') {
-          *s += "\f";
-        } else if (Char() == 'n') {
-          *s += "\n";
-        } else if (Char() == 'r') {
-          *s += "\r";
-        } else if (Char() == 't') {
-          *s += "\t";
-        } else if (Char() == 'u') {
-          // TODO(austin): Unicode should be valid, but I really don't care to
-          // do this now...
-          fprintf(stderr, "Unexpected unicode on line %d\n", linenumber_);
-          data_ = original;
-          return false;
-        }
-      }
-      // And skip the escaped character.
-      last_parsed_data = data_.substr(1);
-    }
-
-    ConsumeChar();
-  }
-}
-
-bool Tokenizer::ConsumeNumber(::std::string *s) {
-  // Under no conditions is it acceptible to run out of data while parsing a
-  // number.  Any AtEnd() checks should confirm that.
-  *s = ::std::string();
-  const absl::string_view original = data_;
-
-  // Consume the leading - unconditionally.
-  Consume("-");
-
-  // Then, we either get a 0, or we get a nonzero.  Only nonzero can be followed
-  // by a second number.
-  if (!Consume("0")) {
-    if (AtEnd()) {
-      return false;
-    } else if (Char() >= '1' && Char() <= '9') {
-      // This wasn't a zero, but was a valid digit.  Consume it.
-      ConsumeChar();
-    } else {
-      return false;
-    }
-
-    // Now consume any number of any digits.
-    while (true) {
-      if (AtEnd()) {
-        data_ = original;
-        return false;
-      }
-      if (Char() < '0' || Char() > '9') {
-        break;
-      }
-      ConsumeChar();
-    }
-  }
-
-  // We could now have a decimal.
-  if (Char() == '.') {
-    ConsumeChar();
-    while (true) {
-      if (AtEnd()) {
-        data_ = original;
-        return false;
-      }
-      // And any number of digits.
-      if (Char() < '0' || Char() > '9') {
-        break;
-      }
-      ConsumeChar();
-    }
-  }
-
-  // And now an exponent.
-  if (Char() == 'e' || Char() == 'E') {
-    ConsumeChar();
-    if (AtEnd()) {
-      data_ = original;
-      return false;
-    }
-
-    // Which could have a +-
-    if (Char() == '+' || Char() == '-') {
-      ConsumeChar();
-    }
-    int count = 0;
-    while (true) {
-      if (AtEnd()) {
-        data_ = original;
-        return false;
-      }
-      // And digits.
-      if (Char() < '0' || Char() > '9') {
-        break;
-      }
-      ConsumeChar();
-      ++count;
-    }
-    // But, it is an error to have an exponent and nothing following it.
-    if (count == 0) {
-      data_ = original;
-      return false;
-    }
-  }
-
-  *s = ::std::string(original.substr(0, original.size() - data_.size()));
-  return true;
-}
-
-Tokenizer::TokenType Tokenizer::Next() {
-  switch (state_) {
-    case State::kExpectObjectStart:
-      // We should always start out with a {
-      if (!Consume("{")) return TokenType::kError;
-
-      // Document that we just started an object.
-      object_type_.push_back(ObjectType::kObject);
-
-      ConsumeWhitespace();
-
-      if (Consume("}")) {
-        ConsumeWhitespace();
-        state_ = State::kExpectObjectEnd;
-      } else {
-        state_ = State::kExpectField;
-      }
-      return TokenType::kStartObject;
-
-    case State::kExpectField: {
-      // Fields are built up of strings, whitespace, and then a : (followed by
-      // whitespace...)
-      ::std::string s;
-      if (!ConsumeString(&s)) {
-        fprintf(stderr, "Error on line %d, expected string for field name.\n",
-                linenumber_);
-        return TokenType::kError;
-      }
-      field_name_ = ::std::move(s);
-
-      ConsumeWhitespace();
-
-      if (!Consume(":")) {
-        fprintf(stderr, "Error on line %d\n", linenumber_);
-        return TokenType::kError;
-      }
-
-      ConsumeWhitespace();
-
-      state_ = State::kExpectValue;
-
-      return TokenType::kField;
-    } break;
-    case State::kExpectValue: {
-      TokenType result = TokenType::kError;
-
-      ::std::string s;
-      if (Consume("{")) {
-        // Fields are in objects.  Record and recurse.
-        object_type_.push_back(ObjectType::kObject);
-
-        ConsumeWhitespace();
-
-        state_ = State::kExpectField;
-        return TokenType::kStartObject;
-      } else if (Consume("[")) {
-        // Values are in arrays.  Record and recurse.
-        object_type_.push_back(ObjectType::kArray);
-
-        ConsumeWhitespace();
-        state_ = State::kExpectValue;
-        return TokenType::kStartArray;
-      } else if (ConsumeString(&s)) {
-        // Parsed as a string, grab it.
-        field_value_ = ::std::move(s);
-        result = TokenType::kStringValue;
-      } else if (ConsumeNumber(&s)) {
-        // Parsed as a number, grab it.
-        field_value_ = ::std::move(s);
-        result = TokenType::kNumberValue;
-      } else if (Consume("true")) {
-        // Parsed as a true, grab it.
-        field_value_ = "true";
-        result = TokenType::kTrueValue;
-      } else if (Consume("false")) {
-        // Parsed as a false, grab it.
-        field_value_ = "false";
-        result = TokenType::kFalseValue;
-      } else {
-        // Couldn't parse, so we have a syntax error.
-        fprintf(stderr, "Error line %d, invalid field value.\n", linenumber_);
-      }
-
-      ConsumeWhitespace();
-
-      // After a field, we either have a , and another field (or value if we are
-      // in an array), or we should be closing out the object (or array).
-      if (Consume(",")) {
-        ConsumeWhitespace();
-        switch (object_type_.back()) {
-          case ObjectType::kObject:
-            state_ = State::kExpectField;
-            break;
-          case ObjectType::kArray:
-            state_ = State::kExpectValue;
-            break;
-        }
-      } else {
-        // Sanity check that the stack is deep enough.
-        if (object_type_.size() == 0) {
-          fprintf(stderr, "Error on line %d\n", linenumber_);
-          return TokenType::kError;
-        }
-
-        // And then require closing out the object.
-        switch (object_type_.back()) {
-          case ObjectType::kObject:
-            if (Consume("}")) {
-              ConsumeWhitespace();
-              state_ = State::kExpectObjectEnd;
-            } else {
-              return TokenType::kError;
-            }
-            break;
-          case ObjectType::kArray:
-            if (Consume("]")) {
-              ConsumeWhitespace();
-              state_ = State::kExpectArrayEnd;
-            } else {
-              return TokenType::kError;
-            }
-            break;
-        }
-      }
-      return result;
-    } break;
-
-    case State::kExpectArrayEnd:
-    case State::kExpectObjectEnd: {
-      const TokenType result = state_ == State::kExpectArrayEnd
-                                   ? TokenType::kEndArray
-                                   : TokenType::kEndObject;
-      // This is a transient state so we can send 2 tokens out in a row.  We
-      // discover the object or array end at the end of reading the value.
-      object_type_.pop_back();
-      if (object_type_.size() == 0) {
-        // We unwound the outer object.  We should send kEnd next.
-        state_ = State::kExpectEnd;
-      } else if (object_type_.back() == ObjectType::kObject) {
-        // If we are going into an object, it should either have another field
-        // or end.
-        if (Consume(",")) {
-          ConsumeWhitespace();
-          state_ = State::kExpectField;
-        } else if (Consume("}")) {
-          ConsumeWhitespace();
-          state_ = State::kExpectObjectEnd;
-        } else {
-          return TokenType::kError;
-        }
-      } else if (object_type_.back() == ObjectType::kArray) {
-        // If we are going into an array, it should either have another value
-        // or end.
-        if (Consume(",")) {
-          ConsumeWhitespace();
-          state_ = State::kExpectValue;
-        } else if (Consume("]")) {
-          ConsumeWhitespace();
-          state_ = State::kExpectArrayEnd;
-        } else {
-          return TokenType::kError;
-        }
-      }
-      // And then send out the correct token.
-      return result;
-    }
-    case State::kExpectEnd:
-      // If we are supposed to be done, confirm nothing is after the end.
-      if (AtEnd()) {
-        return TokenType::kEnd;
-      } else {
-        fprintf(stderr, "Data past end at line %d\n", linenumber_);
-        return TokenType::kError;
-      }
-  }
-  return TokenType::kError;
-}
-
-bool Tokenizer::FieldAsInt(long long *value) {
-  const char *pos = field_value().c_str();
-  errno = 0;
-  *value = strtoll(field_value().c_str(), const_cast<char **>(&pos), 10);
-  if (pos != field_value().c_str() + field_value().size() || errno != 0) {
-    return false;
-  }
-  return true;
-}
-
-bool Tokenizer::FieldAsDouble(double *value) {
-  const char *pos = field_value().c_str();
-  errno = 0;
-  *value = strtod(field_value().c_str(), const_cast<char **>(&pos));
-
-  if (pos != field_value().c_str() + field_value().size() || errno != 0) {
-    return false;
-  }
-  return true;
-}
-
 }  // namespace aos
commit	d7e252dfc4c196a328d11ea4e10ae60a18920141	[log] [tgz]
author	Austin Schuh <austin.linux@gmail.com>	Sun Oct 06 13:51:02 2019 -0700
committer	Austin Schuh <austin.linux@gmail.com>	Sun Oct 06 13:59:24 2019 -0700
tree	1f4aca8aa21943ed2748d32a0c27bd8eba0a6f2f
parent	d339a9bc566bc721695e8110ad302367dbd27c7f [diff] [blame]