Austin Schuh | 58b9b47 | 2020-11-25 19:12:44 -0800 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright 2014 Google Inc. All rights reserved. |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
Austin Schuh | e89fa2d | 2019-08-14 20:24:23 -0700 | [diff] [blame] | 17 | #include <assert.h> |
| 18 | #include <stddef.h> |
| 19 | #include <stdint.h> |
Austin Schuh | 58b9b47 | 2020-11-25 19:12:44 -0800 | [diff] [blame^] | 20 | |
Austin Schuh | e89fa2d | 2019-08-14 20:24:23 -0700 | [diff] [blame] | 21 | #include <algorithm> |
| 22 | #include <clocale> |
| 23 | #include <memory> |
| 24 | #include <regex> |
| 25 | #include <string> |
| 26 | |
| 27 | #include "flatbuffers/idl.h" |
| 28 | #include "test_init.h" |
| 29 | |
| 30 | static constexpr uint8_t flags_scalar_type = 0x0F; // type of scalar value |
| 31 | static constexpr uint8_t flags_quotes_kind = 0x10; // quote " or ' |
| 32 | // reserved for future: json {named} or [unnamed] |
| 33 | // static constexpr uint8_t flags_json_bracer = 0x20; |
| 34 | |
| 35 | // Find all 'subj' sub-strings and replace first character of sub-string. |
| 36 | // BreakSequence("testest","tes", 'X') -> "XesXest". |
| 37 | // BreakSequence("xxx","xx", 'Y') -> "YYx". |
| 38 | static void BreakSequence(std::string &s, const char *subj, char repl) { |
| 39 | size_t pos = 0; |
| 40 | while (pos = s.find(subj, pos), pos != std::string::npos) { |
| 41 | s.at(pos) = repl; |
| 42 | pos++; |
| 43 | } |
| 44 | } |
| 45 | |
| 46 | // Remove all leading and trailing symbols matched with pattern set. |
| 47 | // StripString("xy{xy}y", "xy") -> "{xy}" |
| 48 | static std::string StripString(const std::string &s, const char *pattern, |
| 49 | size_t *pos = nullptr) { |
| 50 | if (pos) *pos = 0; |
| 51 | // leading |
| 52 | auto first = s.find_first_not_of(pattern); |
| 53 | if (std::string::npos == first) return ""; |
| 54 | if (pos) *pos = first; |
| 55 | // trailing |
| 56 | auto last = s.find_last_not_of(pattern); |
| 57 | assert(last < s.length()); |
| 58 | assert(first <= last); |
| 59 | return s.substr(first, last - first + 1); |
| 60 | } |
| 61 | |
| 62 | class RegexMatcher { |
| 63 | protected: |
| 64 | virtual bool MatchNumber(const std::string &input) const = 0; |
| 65 | |
| 66 | public: |
| 67 | virtual ~RegexMatcher() = default; |
| 68 | |
| 69 | struct MatchResult { |
| 70 | size_t pos{ 0 }; |
| 71 | size_t len{ 0 }; |
| 72 | bool res{ false }; |
| 73 | bool quoted{ false }; |
| 74 | }; |
| 75 | |
| 76 | MatchResult Match(const std::string &input) const { |
| 77 | MatchResult r; |
| 78 | // strip leading and trailing "spaces" accepted by flatbuffer |
| 79 | auto test = StripString(input, "\t\r\n ", &r.pos); |
| 80 | r.len = test.size(); |
| 81 | // check quotes |
| 82 | if (test.size() >= 2) { |
| 83 | auto fch = test.front(); |
| 84 | auto lch = test.back(); |
| 85 | r.quoted = (fch == lch) && (fch == '\'' || fch == '\"'); |
| 86 | if (r.quoted) { |
| 87 | // remove quotes for regex test |
| 88 | test = test.substr(1, test.size() - 2); |
| 89 | } |
| 90 | } |
| 91 | // Fast check: |
| 92 | if (test.empty()) return r; |
| 93 | // A string with a valid scalar shouldn't have non-ascii or non-printable |
| 94 | // symbols. |
| 95 | for (auto c : test) { |
| 96 | if ((c < ' ') || (c > '~')) return r; |
| 97 | } |
| 98 | // Check with regex |
| 99 | r.res = MatchNumber(test); |
| 100 | return r; |
| 101 | } |
| 102 | |
| 103 | bool MatchRegexList(const std::string &input, |
| 104 | const std::vector<std::regex> &re_list) const { |
| 105 | auto str = StripString(input, " "); |
| 106 | if (str.empty()) return false; |
| 107 | for (auto &re : re_list) { |
| 108 | std::smatch match; |
| 109 | if (std::regex_match(str, match, re)) return true; |
| 110 | } |
| 111 | return false; |
| 112 | } |
| 113 | }; |
| 114 | |
| 115 | class IntegerRegex : public RegexMatcher { |
| 116 | protected: |
| 117 | bool MatchNumber(const std::string &input) const override { |
| 118 | static const std::vector<std::regex> re_list = { |
| 119 | std::regex{ R"(^[-+]?[0-9]+$)", std::regex_constants::optimize }, |
| 120 | |
Austin Schuh | 272c613 | 2020-11-14 16:37:52 -0800 | [diff] [blame] | 121 | std::regex{ R"(^[-+]?0[xX][0-9a-fA-F]+$)", |
| 122 | std::regex_constants::optimize } |
Austin Schuh | e89fa2d | 2019-08-14 20:24:23 -0700 | [diff] [blame] | 123 | }; |
| 124 | return MatchRegexList(input, re_list); |
| 125 | } |
| 126 | |
| 127 | public: |
| 128 | IntegerRegex() = default; |
| 129 | virtual ~IntegerRegex() = default; |
| 130 | }; |
| 131 | |
| 132 | class UIntegerRegex : public RegexMatcher { |
| 133 | protected: |
| 134 | bool MatchNumber(const std::string &input) const override { |
| 135 | static const std::vector<std::regex> re_list = { |
| 136 | std::regex{ R"(^[+]?[0-9]+$)", std::regex_constants::optimize }, |
Austin Schuh | 272c613 | 2020-11-14 16:37:52 -0800 | [diff] [blame] | 137 | std::regex{ R"(^[+]?0[xX][0-9a-fA-F]+$)", |
| 138 | std::regex_constants::optimize }, |
Austin Schuh | e89fa2d | 2019-08-14 20:24:23 -0700 | [diff] [blame] | 139 | // accept -0 number |
| 140 | std::regex{ R"(^[-](?:0[xX])?0+$)", std::regex_constants::optimize } |
| 141 | }; |
| 142 | return MatchRegexList(input, re_list); |
| 143 | } |
| 144 | |
| 145 | public: |
| 146 | UIntegerRegex() = default; |
| 147 | virtual ~UIntegerRegex() = default; |
| 148 | }; |
| 149 | |
| 150 | class BooleanRegex : public IntegerRegex { |
| 151 | protected: |
| 152 | bool MatchNumber(const std::string &input) const override { |
| 153 | if (input == "true" || input == "false") return true; |
| 154 | return IntegerRegex::MatchNumber(input); |
| 155 | } |
| 156 | |
| 157 | public: |
| 158 | BooleanRegex() = default; |
| 159 | virtual ~BooleanRegex() = default; |
| 160 | }; |
| 161 | |
| 162 | class FloatRegex : public RegexMatcher { |
| 163 | protected: |
| 164 | bool MatchNumber(const std::string &input) const override { |
| 165 | static const std::vector<std::regex> re_list = { |
| 166 | // hex-float |
| 167 | std::regex{ |
| 168 | R"(^[-+]?0[xX](?:(?:[.][0-9a-fA-F]+)|(?:[0-9a-fA-F]+[.][0-9a-fA-F]*)|(?:[0-9a-fA-F]+))[pP][-+]?[0-9]+$)", |
| 169 | std::regex_constants::optimize }, |
| 170 | // dec-float |
| 171 | std::regex{ |
| 172 | R"(^[-+]?(?:(?:[.][0-9]+)|(?:[0-9]+[.][0-9]*)|(?:[0-9]+))(?:[eE][-+]?[0-9]+)?$)", |
| 173 | std::regex_constants::optimize }, |
| 174 | |
| 175 | std::regex{ R"(^[-+]?(?:nan|inf|infinity)$)", |
| 176 | std::regex_constants::optimize | std::regex_constants::icase } |
| 177 | }; |
| 178 | return MatchRegexList(input, re_list); |
| 179 | } |
| 180 | |
| 181 | public: |
| 182 | FloatRegex() = default; |
| 183 | virtual ~FloatRegex() = default; |
| 184 | }; |
| 185 | |
| 186 | class ScalarReferenceResult { |
| 187 | private: |
| 188 | ScalarReferenceResult(const char *_type, RegexMatcher::MatchResult _matched) |
| 189 | : type(_type), matched(_matched) {} |
| 190 | |
| 191 | public: |
| 192 | // Decode scalar type and check if the input string satisfies the scalar type. |
| 193 | static ScalarReferenceResult Check(uint8_t code, const std::string &input) { |
| 194 | switch (code) { |
| 195 | case 0x0: return { "double", FloatRegex().Match(input) }; |
| 196 | case 0x1: return { "float", FloatRegex().Match(input) }; |
| 197 | case 0x2: return { "int8", IntegerRegex().Match(input) }; |
| 198 | case 0x3: return { "int16", IntegerRegex().Match(input) }; |
| 199 | case 0x4: return { "int32", IntegerRegex().Match(input) }; |
| 200 | case 0x5: return { "int64", IntegerRegex().Match(input) }; |
| 201 | case 0x6: return { "uint8", UIntegerRegex().Match(input) }; |
| 202 | case 0x7: return { "uint16", UIntegerRegex().Match(input) }; |
| 203 | case 0x8: return { "uint32", UIntegerRegex().Match(input) }; |
| 204 | case 0x9: return { "uint64", UIntegerRegex().Match(input) }; |
| 205 | case 0xA: return { "bool", BooleanRegex().Match(input) }; |
| 206 | default: return { "float", FloatRegex().Match(input) }; |
| 207 | }; |
| 208 | } |
| 209 | |
| 210 | const char *type; |
| 211 | const RegexMatcher::MatchResult matched; |
| 212 | }; |
| 213 | |
| 214 | bool Parse(flatbuffers::Parser &parser, const std::string &json, |
| 215 | std::string *_text) { |
Austin Schuh | 58b9b47 | 2020-11-25 19:12:44 -0800 | [diff] [blame^] | 216 | auto done = parser.ParseJson(json.c_str()); |
Austin Schuh | e89fa2d | 2019-08-14 20:24:23 -0700 | [diff] [blame] | 217 | if (done) { |
| 218 | TEST_EQ(GenerateText(parser, parser.builder_.GetBufferPointer(), _text), |
| 219 | true); |
| 220 | } else { |
| 221 | *_text = parser.error_; |
| 222 | } |
| 223 | return done; |
| 224 | } |
| 225 | |
| 226 | // Utility for test run. |
| 227 | OneTimeTestInit OneTimeTestInit::one_time_init_; |
| 228 | |
| 229 | // llvm std::regex have problem with stack overflow, limit maximum length. |
| 230 | // ./scalar_fuzzer -max_len=3000 |
| 231 | extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { |
| 232 | // Reserve one byte for Parser flags and one byte for repetition counter. |
| 233 | if (size < 3) return 0; |
| 234 | const uint8_t flags = data[0]; |
| 235 | // normalize to ascii alphabet |
Austin Schuh | 272c613 | 2020-11-14 16:37:52 -0800 | [diff] [blame] | 236 | const int extra_rep_number = |
| 237 | std::max(5, (data[1] < '0' ? (data[1] - '0') : 0)); |
Austin Schuh | e89fa2d | 2019-08-14 20:24:23 -0700 | [diff] [blame] | 238 | data += 2; |
| 239 | size -= 2; // bypass |
| 240 | |
| 241 | // Guarantee 0-termination. |
| 242 | const std::string original(reinterpret_cast<const char *>(data), size); |
| 243 | auto input = std::string(original.c_str()); // until '\0' |
| 244 | if (input.empty()) return 0; |
| 245 | |
| 246 | // Break comments in json to avoid complexity with regex matcher. |
| 247 | // The string " 12345 /* text */" will be accepted if insert it to string |
| 248 | // expression: "table X { Y: " + " 12345 /* text */" + "; }. |
| 249 | // But strings like this will complicate regex matcher. |
| 250 | // We reject this by transform "/* text */ 12345" to "@* text */ 12345". |
| 251 | BreakSequence(input, "//", '@'); // "//" -> "@/" |
| 252 | BreakSequence(input, "/*", '@'); // "/*" -> "@*" |
Austin Schuh | 272c613 | 2020-11-14 16:37:52 -0800 | [diff] [blame] | 253 | // { "$schema: "text" } is exceptional case. |
| 254 | // This key:value ignored by the parser. Numbers can not have $. |
| 255 | BreakSequence(input, "$schema", '@'); // "$schema" -> "@schema" |
Austin Schuh | e89fa2d | 2019-08-14 20:24:23 -0700 | [diff] [blame] | 256 | // Break all known scalar functions (todo: add them to regex?): |
| 257 | for (auto f : { "deg", "rad", "sin", "cos", "tan", "asin", "acos", "atan" }) { |
| 258 | BreakSequence(input, f, '_'); // ident -> ident |
| 259 | } |
| 260 | |
| 261 | // Extract type of scalar from 'flags' and check if the input string satisfies |
| 262 | // the scalar type. |
| 263 | const auto ref_res = |
| 264 | ScalarReferenceResult::Check(flags & flags_scalar_type, input); |
| 265 | auto &recheck = ref_res.matched; |
| 266 | |
| 267 | // Create parser |
| 268 | flatbuffers::IDLOptions opts; |
| 269 | opts.force_defaults = true; |
| 270 | opts.output_default_scalars_in_json = true; |
| 271 | opts.indent_step = -1; |
| 272 | opts.strict_json = true; |
| 273 | |
| 274 | flatbuffers::Parser parser(opts); |
| 275 | auto schema = |
| 276 | "table X { Y: " + std::string(ref_res.type) + "; } root_type X;"; |
| 277 | TEST_EQ_FUNC(parser.Parse(schema.c_str()), true); |
| 278 | |
| 279 | // The fuzzer can adjust the number repetition if a side-effects have found. |
| 280 | // Each test should pass at least two times to ensure that the parser doesn't |
| 281 | // have any hidden-states or locale-depended effects. |
| 282 | for (auto cnt = 0; cnt < (extra_rep_number + 2); cnt++) { |
| 283 | // Each even run (0,2,4..) will test locale independed code. |
| 284 | auto use_locale = !!OneTimeTestInit::test_locale() && (0 == (cnt % 2)); |
| 285 | // Set new locale. |
| 286 | if (use_locale) { |
| 287 | FLATBUFFERS_ASSERT(setlocale(LC_ALL, OneTimeTestInit::test_locale())); |
| 288 | } |
| 289 | |
| 290 | // Parse original input as-is. |
| 291 | auto orig_scalar = "{ \"Y\" : " + input + " }"; |
| 292 | std::string orig_back; |
| 293 | auto orig_done = Parse(parser, orig_scalar, &orig_back); |
| 294 | |
| 295 | if (recheck.res != orig_done) { |
| 296 | // look for "does not fit" or "doesn't fit" or "out of range" |
| 297 | auto not_fit = |
| 298 | (true == recheck.res) |
| 299 | ? ((orig_back.find("does not fit") != std::string::npos) || |
| 300 | (orig_back.find("out of range") != std::string::npos)) |
| 301 | : false; |
| 302 | |
| 303 | if (false == not_fit) { |
| 304 | TEST_OUTPUT_LINE("Stage 1 failed: Parser(%d) != Regex(%d)", orig_done, |
| 305 | recheck.res); |
| 306 | TEST_EQ_STR(orig_back.c_str(), |
| 307 | input.substr(recheck.pos, recheck.len).c_str()); |
| 308 | TEST_EQ_FUNC(orig_done, recheck.res); |
| 309 | } |
| 310 | } |
| 311 | |
| 312 | // Try to make quoted string and test it. |
| 313 | std::string qouted_input; |
| 314 | if (true == recheck.quoted) { |
| 315 | // we can't simply remove quotes, they may be nested "'12'". |
| 316 | // Original string "\'12\'" converted to "'12'". |
| 317 | // The string can be an invalid string by JSON rules, but after quotes |
| 318 | // removed can transform to valid. |
| 319 | assert(recheck.len >= 2); |
| 320 | } else { |
| 321 | const auto quote = (flags & flags_quotes_kind) ? '\"' : '\''; |
| 322 | qouted_input = input; // copy |
| 323 | qouted_input.insert(recheck.pos + recheck.len, 1, quote); |
| 324 | qouted_input.insert(recheck.pos, 1, quote); |
| 325 | } |
| 326 | |
| 327 | // Test quoted version of the string |
| 328 | if (!qouted_input.empty()) { |
| 329 | auto fix_scalar = "{ \"Y\" : " + qouted_input + " }"; |
| 330 | std::string fix_back; |
| 331 | auto fix_done = Parse(parser, fix_scalar, &fix_back); |
| 332 | |
| 333 | if (orig_done != fix_done) { |
| 334 | TEST_OUTPUT_LINE("Stage 2 failed: Parser(%d) != Regex(%d)", fix_done, |
| 335 | orig_done); |
| 336 | TEST_EQ_STR(fix_back.c_str(), orig_back.c_str()); |
| 337 | } |
| 338 | if (orig_done) { TEST_EQ_STR(fix_back.c_str(), orig_back.c_str()); } |
| 339 | TEST_EQ_FUNC(fix_done, orig_done); |
| 340 | } |
| 341 | |
| 342 | // Create new parser and test default value |
| 343 | if (true == orig_done) { |
| 344 | flatbuffers::Parser def_parser(opts); // re-use options |
| 345 | auto def_schema = "table X { Y: " + std::string(ref_res.type) + " = " + |
| 346 | input + "; } root_type X;" + |
| 347 | "{}"; // <- with empty json {}! |
| 348 | |
| 349 | auto def_done = def_parser.Parse(def_schema.c_str()); |
| 350 | if (false == def_done) { |
| 351 | TEST_OUTPUT_LINE("Stage 3.1 failed with _error = %s", |
| 352 | def_parser.error_.c_str()); |
| 353 | FLATBUFFERS_ASSERT(false); |
| 354 | } |
| 355 | // Compare with print. |
| 356 | std::string ref_string, def_string; |
| 357 | FLATBUFFERS_ASSERT(GenerateText( |
| 358 | parser, parser.builder_.GetBufferPointer(), &ref_string)); |
| 359 | FLATBUFFERS_ASSERT(GenerateText( |
| 360 | def_parser, def_parser.builder_.GetBufferPointer(), &def_string)); |
| 361 | if (ref_string != def_string) { |
| 362 | TEST_OUTPUT_LINE("Stage 3.2 failed: '%s' != '%s'", def_string.c_str(), |
| 363 | ref_string.c_str()); |
| 364 | FLATBUFFERS_ASSERT(false); |
| 365 | } |
| 366 | } |
| 367 | |
| 368 | // Restore locale. |
| 369 | if (use_locale) { FLATBUFFERS_ASSERT(setlocale(LC_ALL, "C")); } |
| 370 | } |
| 371 | return 0; |
| 372 | } |