Squashed 'third_party/flatbuffers/' content from commit acc9990ab
Change-Id: I48550d40d78fea996ebe74e9723a5d1f910de491
git-subtree-dir: third_party/flatbuffers
git-subtree-split: acc9990abd2206491480291b0f85f925110102ea
diff --git a/tests/fuzzer/flatbuffers_scalar_fuzzer.cc b/tests/fuzzer/flatbuffers_scalar_fuzzer.cc
new file mode 100644
index 0000000..074a488
--- /dev/null
+++ b/tests/fuzzer/flatbuffers_scalar_fuzzer.cc
@@ -0,0 +1,351 @@
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <algorithm>
+#include <clocale>
+#include <memory>
+#include <regex>
+#include <string>
+
+#include "flatbuffers/idl.h"
+#include "test_init.h"
+
+static constexpr uint8_t flags_scalar_type = 0x0F; // type of scalar value
+static constexpr uint8_t flags_quotes_kind = 0x10; // quote " or '
+// reserved for future: json {named} or [unnamed]
+// static constexpr uint8_t flags_json_bracer = 0x20;
+
+// Find all 'subj' sub-strings and replace first character of sub-string.
+// BreakSequence("testest","tes", 'X') -> "XesXest".
+// BreakSequence("xxx","xx", 'Y') -> "YYx".
+static void BreakSequence(std::string &s, const char *subj, char repl) {
+ size_t pos = 0;
+ while (pos = s.find(subj, pos), pos != std::string::npos) {
+ s.at(pos) = repl;
+ pos++;
+ }
+}
+
+// Remove all leading and trailing symbols matched with pattern set.
+// StripString("xy{xy}y", "xy") -> "{xy}"
+static std::string StripString(const std::string &s, const char *pattern,
+ size_t *pos = nullptr) {
+ if (pos) *pos = 0;
+ // leading
+ auto first = s.find_first_not_of(pattern);
+ if (std::string::npos == first) return "";
+ if (pos) *pos = first;
+ // trailing
+ auto last = s.find_last_not_of(pattern);
+ assert(last < s.length());
+ assert(first <= last);
+ return s.substr(first, last - first + 1);
+}
+
+class RegexMatcher {
+ protected:
+ virtual bool MatchNumber(const std::string &input) const = 0;
+
+ public:
+ virtual ~RegexMatcher() = default;
+
+ struct MatchResult {
+ size_t pos{ 0 };
+ size_t len{ 0 };
+ bool res{ false };
+ bool quoted{ false };
+ };
+
+ MatchResult Match(const std::string &input) const {
+ MatchResult r;
+ // strip leading and trailing "spaces" accepted by flatbuffer
+ auto test = StripString(input, "\t\r\n ", &r.pos);
+ r.len = test.size();
+ // check quotes
+ if (test.size() >= 2) {
+ auto fch = test.front();
+ auto lch = test.back();
+ r.quoted = (fch == lch) && (fch == '\'' || fch == '\"');
+ if (r.quoted) {
+ // remove quotes for regex test
+ test = test.substr(1, test.size() - 2);
+ }
+ }
+ // Fast check:
+ if (test.empty()) return r;
+ // A string with a valid scalar shouldn't have non-ascii or non-printable
+ // symbols.
+ for (auto c : test) {
+ if ((c < ' ') || (c > '~')) return r;
+ }
+ // Check with regex
+ r.res = MatchNumber(test);
+ return r;
+ }
+
+ bool MatchRegexList(const std::string &input,
+ const std::vector<std::regex> &re_list) const {
+ auto str = StripString(input, " ");
+ if (str.empty()) return false;
+ for (auto &re : re_list) {
+ std::smatch match;
+ if (std::regex_match(str, match, re)) return true;
+ }
+ return false;
+ }
+};
+
+class IntegerRegex : public RegexMatcher {
+ protected:
+ bool MatchNumber(const std::string &input) const override {
+ static const std::vector<std::regex> re_list = {
+ std::regex{ R"(^[-+]?[0-9]+$)", std::regex_constants::optimize },
+
+ std::regex{
+ R"(^[-+]?0[xX][0-9a-fA-F]+$)", std::regex_constants::optimize }
+ };
+ return MatchRegexList(input, re_list);
+ }
+
+ public:
+ IntegerRegex() = default;
+ virtual ~IntegerRegex() = default;
+};
+
+class UIntegerRegex : public RegexMatcher {
+ protected:
+ bool MatchNumber(const std::string &input) const override {
+ static const std::vector<std::regex> re_list = {
+ std::regex{ R"(^[+]?[0-9]+$)", std::regex_constants::optimize },
+ std::regex{
+ R"(^[+]?0[xX][0-9a-fA-F]+$)", std::regex_constants::optimize },
+ // accept -0 number
+ std::regex{ R"(^[-](?:0[xX])?0+$)", std::regex_constants::optimize }
+ };
+ return MatchRegexList(input, re_list);
+ }
+
+ public:
+ UIntegerRegex() = default;
+ virtual ~UIntegerRegex() = default;
+};
+
+class BooleanRegex : public IntegerRegex {
+ protected:
+ bool MatchNumber(const std::string &input) const override {
+ if (input == "true" || input == "false") return true;
+ return IntegerRegex::MatchNumber(input);
+ }
+
+ public:
+ BooleanRegex() = default;
+ virtual ~BooleanRegex() = default;
+};
+
+class FloatRegex : public RegexMatcher {
+ protected:
+ bool MatchNumber(const std::string &input) const override {
+ static const std::vector<std::regex> re_list = {
+ // hex-float
+ std::regex{
+ R"(^[-+]?0[xX](?:(?:[.][0-9a-fA-F]+)|(?:[0-9a-fA-F]+[.][0-9a-fA-F]*)|(?:[0-9a-fA-F]+))[pP][-+]?[0-9]+$)",
+ std::regex_constants::optimize },
+ // dec-float
+ std::regex{
+ R"(^[-+]?(?:(?:[.][0-9]+)|(?:[0-9]+[.][0-9]*)|(?:[0-9]+))(?:[eE][-+]?[0-9]+)?$)",
+ std::regex_constants::optimize },
+
+ std::regex{ R"(^[-+]?(?:nan|inf|infinity)$)",
+ std::regex_constants::optimize | std::regex_constants::icase }
+ };
+ return MatchRegexList(input, re_list);
+ }
+
+ public:
+ FloatRegex() = default;
+ virtual ~FloatRegex() = default;
+};
+
+class ScalarReferenceResult {
+ private:
+ ScalarReferenceResult(const char *_type, RegexMatcher::MatchResult _matched)
+ : type(_type), matched(_matched) {}
+
+ public:
+ // Decode scalar type and check if the input string satisfies the scalar type.
+ static ScalarReferenceResult Check(uint8_t code, const std::string &input) {
+ switch (code) {
+ case 0x0: return { "double", FloatRegex().Match(input) };
+ case 0x1: return { "float", FloatRegex().Match(input) };
+ case 0x2: return { "int8", IntegerRegex().Match(input) };
+ case 0x3: return { "int16", IntegerRegex().Match(input) };
+ case 0x4: return { "int32", IntegerRegex().Match(input) };
+ case 0x5: return { "int64", IntegerRegex().Match(input) };
+ case 0x6: return { "uint8", UIntegerRegex().Match(input) };
+ case 0x7: return { "uint16", UIntegerRegex().Match(input) };
+ case 0x8: return { "uint32", UIntegerRegex().Match(input) };
+ case 0x9: return { "uint64", UIntegerRegex().Match(input) };
+ case 0xA: return { "bool", BooleanRegex().Match(input) };
+ default: return { "float", FloatRegex().Match(input) };
+ };
+ }
+
+ const char *type;
+ const RegexMatcher::MatchResult matched;
+};
+
+bool Parse(flatbuffers::Parser &parser, const std::string &json,
+ std::string *_text) {
+ auto done = parser.Parse(json.c_str());
+ if (done) {
+ TEST_EQ(GenerateText(parser, parser.builder_.GetBufferPointer(), _text),
+ true);
+ } else {
+ *_text = parser.error_;
+ }
+ return done;
+}
+
+// Utility for test run.
+OneTimeTestInit OneTimeTestInit::one_time_init_;
+
+// llvm std::regex have problem with stack overflow, limit maximum length.
+// ./scalar_fuzzer -max_len=3000
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ // Reserve one byte for Parser flags and one byte for repetition counter.
+ if (size < 3) return 0;
+ const uint8_t flags = data[0];
+ // normalize to ascii alphabet
+ const int extra_rep_number = data[1] >= '0' ? (data[1] - '0') : 0;
+ data += 2;
+ size -= 2; // bypass
+
+ // Guarantee 0-termination.
+ const std::string original(reinterpret_cast<const char *>(data), size);
+ auto input = std::string(original.c_str()); // until '\0'
+ if (input.empty()) return 0;
+
+ // Break comments in json to avoid complexity with regex matcher.
+ // The string " 12345 /* text */" will be accepted if insert it to string
+ // expression: "table X { Y: " + " 12345 /* text */" + "; }.
+ // But strings like this will complicate regex matcher.
+ // We reject this by transform "/* text */ 12345" to "@* text */ 12345".
+ BreakSequence(input, "//", '@'); // "//" -> "@/"
+ BreakSequence(input, "/*", '@'); // "/*" -> "@*"
+ // Break all known scalar functions (todo: add them to regex?):
+ for (auto f : { "deg", "rad", "sin", "cos", "tan", "asin", "acos", "atan" }) {
+ BreakSequence(input, f, '_'); // ident -> ident
+ }
+
+ // Extract type of scalar from 'flags' and check if the input string satisfies
+ // the scalar type.
+ const auto ref_res =
+ ScalarReferenceResult::Check(flags & flags_scalar_type, input);
+ auto &recheck = ref_res.matched;
+
+ // Create parser
+ flatbuffers::IDLOptions opts;
+ opts.force_defaults = true;
+ opts.output_default_scalars_in_json = true;
+ opts.indent_step = -1;
+ opts.strict_json = true;
+
+ flatbuffers::Parser parser(opts);
+ auto schema =
+ "table X { Y: " + std::string(ref_res.type) + "; } root_type X;";
+ TEST_EQ_FUNC(parser.Parse(schema.c_str()), true);
+
+ // The fuzzer can adjust the number repetition if a side-effects have found.
+ // Each test should pass at least two times to ensure that the parser doesn't
+ // have any hidden-states or locale-depended effects.
+ for (auto cnt = 0; cnt < (extra_rep_number + 2); cnt++) {
+ // Each even run (0,2,4..) will test locale independed code.
+ auto use_locale = !!OneTimeTestInit::test_locale() && (0 == (cnt % 2));
+ // Set new locale.
+ if (use_locale) {
+ FLATBUFFERS_ASSERT(setlocale(LC_ALL, OneTimeTestInit::test_locale()));
+ }
+
+ // Parse original input as-is.
+ auto orig_scalar = "{ \"Y\" : " + input + " }";
+ std::string orig_back;
+ auto orig_done = Parse(parser, orig_scalar, &orig_back);
+
+ if (recheck.res != orig_done) {
+ // look for "does not fit" or "doesn't fit" or "out of range"
+ auto not_fit =
+ (true == recheck.res)
+ ? ((orig_back.find("does not fit") != std::string::npos) ||
+ (orig_back.find("out of range") != std::string::npos))
+ : false;
+
+ if (false == not_fit) {
+ TEST_OUTPUT_LINE("Stage 1 failed: Parser(%d) != Regex(%d)", orig_done,
+ recheck.res);
+ TEST_EQ_STR(orig_back.c_str(),
+ input.substr(recheck.pos, recheck.len).c_str());
+ TEST_EQ_FUNC(orig_done, recheck.res);
+ }
+ }
+
+ // Try to make quoted string and test it.
+ std::string qouted_input;
+ if (true == recheck.quoted) {
+ // we can't simply remove quotes, they may be nested "'12'".
+ // Original string "\'12\'" converted to "'12'".
+ // The string can be an invalid string by JSON rules, but after quotes
+ // removed can transform to valid.
+ assert(recheck.len >= 2);
+ } else {
+ const auto quote = (flags & flags_quotes_kind) ? '\"' : '\'';
+ qouted_input = input; // copy
+ qouted_input.insert(recheck.pos + recheck.len, 1, quote);
+ qouted_input.insert(recheck.pos, 1, quote);
+ }
+
+ // Test quoted version of the string
+ if (!qouted_input.empty()) {
+ auto fix_scalar = "{ \"Y\" : " + qouted_input + " }";
+ std::string fix_back;
+ auto fix_done = Parse(parser, fix_scalar, &fix_back);
+
+ if (orig_done != fix_done) {
+ TEST_OUTPUT_LINE("Stage 2 failed: Parser(%d) != Regex(%d)", fix_done,
+ orig_done);
+ TEST_EQ_STR(fix_back.c_str(), orig_back.c_str());
+ }
+ if (orig_done) { TEST_EQ_STR(fix_back.c_str(), orig_back.c_str()); }
+ TEST_EQ_FUNC(fix_done, orig_done);
+ }
+
+ // Create new parser and test default value
+ if (true == orig_done) {
+ flatbuffers::Parser def_parser(opts); // re-use options
+ auto def_schema = "table X { Y: " + std::string(ref_res.type) + " = " +
+ input + "; } root_type X;" +
+ "{}"; // <- with empty json {}!
+
+ auto def_done = def_parser.Parse(def_schema.c_str());
+ if (false == def_done) {
+ TEST_OUTPUT_LINE("Stage 3.1 failed with _error = %s",
+ def_parser.error_.c_str());
+ FLATBUFFERS_ASSERT(false);
+ }
+ // Compare with print.
+ std::string ref_string, def_string;
+ FLATBUFFERS_ASSERT(GenerateText(
+ parser, parser.builder_.GetBufferPointer(), &ref_string));
+ FLATBUFFERS_ASSERT(GenerateText(
+ def_parser, def_parser.builder_.GetBufferPointer(), &def_string));
+ if (ref_string != def_string) {
+ TEST_OUTPUT_LINE("Stage 3.2 failed: '%s' != '%s'", def_string.c_str(),
+ ref_string.c_str());
+ FLATBUFFERS_ASSERT(false);
+ }
+ }
+
+ // Restore locale.
+ if (use_locale) { FLATBUFFERS_ASSERT(setlocale(LC_ALL, "C")); }
+ }
+ return 0;
+}