blob: 3c711fcb3e5fc22dcf645799c2e1fab49ac4ba47 [file] [log] [blame]
Austin Schuh58b9b472020-11-25 19:12:44 -08001/*
2 * Copyright 2014 Google Inc. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Austin Schuhe89fa2d2019-08-14 20:24:23 -070017#include <assert.h>
18#include <stddef.h>
19#include <stdint.h>
Austin Schuh58b9b472020-11-25 19:12:44 -080020
Austin Schuhe89fa2d2019-08-14 20:24:23 -070021#include <algorithm>
22#include <clocale>
23#include <memory>
24#include <regex>
25#include <string>
26
27#include "flatbuffers/idl.h"
28#include "test_init.h"
29
30static constexpr uint8_t flags_scalar_type = 0x0F; // type of scalar value
31static constexpr uint8_t flags_quotes_kind = 0x10; // quote " or '
32// reserved for future: json {named} or [unnamed]
33// static constexpr uint8_t flags_json_bracer = 0x20;
34
35// Find all 'subj' sub-strings and replace first character of sub-string.
36// BreakSequence("testest","tes", 'X') -> "XesXest".
37// BreakSequence("xxx","xx", 'Y') -> "YYx".
38static void BreakSequence(std::string &s, const char *subj, char repl) {
39 size_t pos = 0;
40 while (pos = s.find(subj, pos), pos != std::string::npos) {
41 s.at(pos) = repl;
42 pos++;
43 }
44}
45
46// Remove all leading and trailing symbols matched with pattern set.
47// StripString("xy{xy}y", "xy") -> "{xy}"
48static std::string StripString(const std::string &s, const char *pattern,
49 size_t *pos = nullptr) {
50 if (pos) *pos = 0;
51 // leading
52 auto first = s.find_first_not_of(pattern);
53 if (std::string::npos == first) return "";
54 if (pos) *pos = first;
55 // trailing
56 auto last = s.find_last_not_of(pattern);
57 assert(last < s.length());
58 assert(first <= last);
59 return s.substr(first, last - first + 1);
60}
61
62class RegexMatcher {
63 protected:
64 virtual bool MatchNumber(const std::string &input) const = 0;
65
66 public:
67 virtual ~RegexMatcher() = default;
68
69 struct MatchResult {
70 size_t pos{ 0 };
71 size_t len{ 0 };
72 bool res{ false };
73 bool quoted{ false };
74 };
75
76 MatchResult Match(const std::string &input) const {
77 MatchResult r;
78 // strip leading and trailing "spaces" accepted by flatbuffer
79 auto test = StripString(input, "\t\r\n ", &r.pos);
80 r.len = test.size();
81 // check quotes
82 if (test.size() >= 2) {
83 auto fch = test.front();
84 auto lch = test.back();
85 r.quoted = (fch == lch) && (fch == '\'' || fch == '\"');
86 if (r.quoted) {
87 // remove quotes for regex test
88 test = test.substr(1, test.size() - 2);
89 }
90 }
91 // Fast check:
92 if (test.empty()) return r;
93 // A string with a valid scalar shouldn't have non-ascii or non-printable
94 // symbols.
95 for (auto c : test) {
96 if ((c < ' ') || (c > '~')) return r;
97 }
98 // Check with regex
99 r.res = MatchNumber(test);
100 return r;
101 }
102
103 bool MatchRegexList(const std::string &input,
104 const std::vector<std::regex> &re_list) const {
105 auto str = StripString(input, " ");
106 if (str.empty()) return false;
107 for (auto &re : re_list) {
108 std::smatch match;
109 if (std::regex_match(str, match, re)) return true;
110 }
111 return false;
112 }
113};
114
115class IntegerRegex : public RegexMatcher {
116 protected:
117 bool MatchNumber(const std::string &input) const override {
118 static const std::vector<std::regex> re_list = {
119 std::regex{ R"(^[-+]?[0-9]+$)", std::regex_constants::optimize },
120
Austin Schuh272c6132020-11-14 16:37:52 -0800121 std::regex{ R"(^[-+]?0[xX][0-9a-fA-F]+$)",
122 std::regex_constants::optimize }
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700123 };
124 return MatchRegexList(input, re_list);
125 }
126
127 public:
128 IntegerRegex() = default;
129 virtual ~IntegerRegex() = default;
130};
131
132class UIntegerRegex : public RegexMatcher {
133 protected:
134 bool MatchNumber(const std::string &input) const override {
135 static const std::vector<std::regex> re_list = {
136 std::regex{ R"(^[+]?[0-9]+$)", std::regex_constants::optimize },
Austin Schuh272c6132020-11-14 16:37:52 -0800137 std::regex{ R"(^[+]?0[xX][0-9a-fA-F]+$)",
138 std::regex_constants::optimize },
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700139 // accept -0 number
140 std::regex{ R"(^[-](?:0[xX])?0+$)", std::regex_constants::optimize }
141 };
142 return MatchRegexList(input, re_list);
143 }
144
145 public:
146 UIntegerRegex() = default;
147 virtual ~UIntegerRegex() = default;
148};
149
150class BooleanRegex : public IntegerRegex {
151 protected:
152 bool MatchNumber(const std::string &input) const override {
153 if (input == "true" || input == "false") return true;
154 return IntegerRegex::MatchNumber(input);
155 }
156
157 public:
158 BooleanRegex() = default;
159 virtual ~BooleanRegex() = default;
160};
161
162class FloatRegex : public RegexMatcher {
163 protected:
164 bool MatchNumber(const std::string &input) const override {
165 static const std::vector<std::regex> re_list = {
166 // hex-float
167 std::regex{
168 R"(^[-+]?0[xX](?:(?:[.][0-9a-fA-F]+)|(?:[0-9a-fA-F]+[.][0-9a-fA-F]*)|(?:[0-9a-fA-F]+))[pP][-+]?[0-9]+$)",
169 std::regex_constants::optimize },
170 // dec-float
171 std::regex{
172 R"(^[-+]?(?:(?:[.][0-9]+)|(?:[0-9]+[.][0-9]*)|(?:[0-9]+))(?:[eE][-+]?[0-9]+)?$)",
173 std::regex_constants::optimize },
174
175 std::regex{ R"(^[-+]?(?:nan|inf|infinity)$)",
176 std::regex_constants::optimize | std::regex_constants::icase }
177 };
178 return MatchRegexList(input, re_list);
179 }
180
181 public:
182 FloatRegex() = default;
183 virtual ~FloatRegex() = default;
184};
185
186class ScalarReferenceResult {
187 private:
188 ScalarReferenceResult(const char *_type, RegexMatcher::MatchResult _matched)
189 : type(_type), matched(_matched) {}
190
191 public:
192 // Decode scalar type and check if the input string satisfies the scalar type.
193 static ScalarReferenceResult Check(uint8_t code, const std::string &input) {
194 switch (code) {
195 case 0x0: return { "double", FloatRegex().Match(input) };
196 case 0x1: return { "float", FloatRegex().Match(input) };
197 case 0x2: return { "int8", IntegerRegex().Match(input) };
198 case 0x3: return { "int16", IntegerRegex().Match(input) };
199 case 0x4: return { "int32", IntegerRegex().Match(input) };
200 case 0x5: return { "int64", IntegerRegex().Match(input) };
201 case 0x6: return { "uint8", UIntegerRegex().Match(input) };
202 case 0x7: return { "uint16", UIntegerRegex().Match(input) };
203 case 0x8: return { "uint32", UIntegerRegex().Match(input) };
204 case 0x9: return { "uint64", UIntegerRegex().Match(input) };
205 case 0xA: return { "bool", BooleanRegex().Match(input) };
206 default: return { "float", FloatRegex().Match(input) };
207 };
208 }
209
210 const char *type;
211 const RegexMatcher::MatchResult matched;
212};
213
214bool Parse(flatbuffers::Parser &parser, const std::string &json,
215 std::string *_text) {
Austin Schuh58b9b472020-11-25 19:12:44 -0800216 auto done = parser.ParseJson(json.c_str());
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700217 if (done) {
218 TEST_EQ(GenerateText(parser, parser.builder_.GetBufferPointer(), _text),
219 true);
220 } else {
221 *_text = parser.error_;
222 }
223 return done;
224}
225
226// Utility for test run.
227OneTimeTestInit OneTimeTestInit::one_time_init_;
228
229// llvm std::regex have problem with stack overflow, limit maximum length.
230// ./scalar_fuzzer -max_len=3000
231extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
232 // Reserve one byte for Parser flags and one byte for repetition counter.
233 if (size < 3) return 0;
234 const uint8_t flags = data[0];
235 // normalize to ascii alphabet
Austin Schuh272c6132020-11-14 16:37:52 -0800236 const int extra_rep_number =
237 std::max(5, (data[1] < '0' ? (data[1] - '0') : 0));
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700238 data += 2;
239 size -= 2; // bypass
240
241 // Guarantee 0-termination.
242 const std::string original(reinterpret_cast<const char *>(data), size);
243 auto input = std::string(original.c_str()); // until '\0'
244 if (input.empty()) return 0;
245
246 // Break comments in json to avoid complexity with regex matcher.
247 // The string " 12345 /* text */" will be accepted if insert it to string
248 // expression: "table X { Y: " + " 12345 /* text */" + "; }.
249 // But strings like this will complicate regex matcher.
250 // We reject this by transform "/* text */ 12345" to "@* text */ 12345".
251 BreakSequence(input, "//", '@'); // "//" -> "@/"
252 BreakSequence(input, "/*", '@'); // "/*" -> "@*"
Austin Schuh272c6132020-11-14 16:37:52 -0800253 // { "$schema: "text" } is exceptional case.
254 // This key:value ignored by the parser. Numbers can not have $.
255 BreakSequence(input, "$schema", '@'); // "$schema" -> "@schema"
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700256 // Break all known scalar functions (todo: add them to regex?):
257 for (auto f : { "deg", "rad", "sin", "cos", "tan", "asin", "acos", "atan" }) {
258 BreakSequence(input, f, '_'); // ident -> ident
259 }
260
261 // Extract type of scalar from 'flags' and check if the input string satisfies
262 // the scalar type.
263 const auto ref_res =
264 ScalarReferenceResult::Check(flags & flags_scalar_type, input);
265 auto &recheck = ref_res.matched;
266
267 // Create parser
268 flatbuffers::IDLOptions opts;
269 opts.force_defaults = true;
270 opts.output_default_scalars_in_json = true;
271 opts.indent_step = -1;
272 opts.strict_json = true;
273
274 flatbuffers::Parser parser(opts);
275 auto schema =
276 "table X { Y: " + std::string(ref_res.type) + "; } root_type X;";
277 TEST_EQ_FUNC(parser.Parse(schema.c_str()), true);
278
279 // The fuzzer can adjust the number repetition if a side-effects have found.
280 // Each test should pass at least two times to ensure that the parser doesn't
281 // have any hidden-states or locale-depended effects.
282 for (auto cnt = 0; cnt < (extra_rep_number + 2); cnt++) {
283 // Each even run (0,2,4..) will test locale independed code.
284 auto use_locale = !!OneTimeTestInit::test_locale() && (0 == (cnt % 2));
285 // Set new locale.
286 if (use_locale) {
287 FLATBUFFERS_ASSERT(setlocale(LC_ALL, OneTimeTestInit::test_locale()));
288 }
289
290 // Parse original input as-is.
291 auto orig_scalar = "{ \"Y\" : " + input + " }";
292 std::string orig_back;
293 auto orig_done = Parse(parser, orig_scalar, &orig_back);
294
295 if (recheck.res != orig_done) {
296 // look for "does not fit" or "doesn't fit" or "out of range"
297 auto not_fit =
298 (true == recheck.res)
299 ? ((orig_back.find("does not fit") != std::string::npos) ||
300 (orig_back.find("out of range") != std::string::npos))
301 : false;
302
303 if (false == not_fit) {
304 TEST_OUTPUT_LINE("Stage 1 failed: Parser(%d) != Regex(%d)", orig_done,
305 recheck.res);
306 TEST_EQ_STR(orig_back.c_str(),
307 input.substr(recheck.pos, recheck.len).c_str());
308 TEST_EQ_FUNC(orig_done, recheck.res);
309 }
310 }
311
312 // Try to make quoted string and test it.
313 std::string qouted_input;
314 if (true == recheck.quoted) {
315 // we can't simply remove quotes, they may be nested "'12'".
316 // Original string "\'12\'" converted to "'12'".
317 // The string can be an invalid string by JSON rules, but after quotes
318 // removed can transform to valid.
319 assert(recheck.len >= 2);
320 } else {
321 const auto quote = (flags & flags_quotes_kind) ? '\"' : '\'';
322 qouted_input = input; // copy
323 qouted_input.insert(recheck.pos + recheck.len, 1, quote);
324 qouted_input.insert(recheck.pos, 1, quote);
325 }
326
327 // Test quoted version of the string
328 if (!qouted_input.empty()) {
329 auto fix_scalar = "{ \"Y\" : " + qouted_input + " }";
330 std::string fix_back;
331 auto fix_done = Parse(parser, fix_scalar, &fix_back);
332
333 if (orig_done != fix_done) {
334 TEST_OUTPUT_LINE("Stage 2 failed: Parser(%d) != Regex(%d)", fix_done,
335 orig_done);
336 TEST_EQ_STR(fix_back.c_str(), orig_back.c_str());
337 }
338 if (orig_done) { TEST_EQ_STR(fix_back.c_str(), orig_back.c_str()); }
339 TEST_EQ_FUNC(fix_done, orig_done);
340 }
341
342 // Create new parser and test default value
343 if (true == orig_done) {
344 flatbuffers::Parser def_parser(opts); // re-use options
345 auto def_schema = "table X { Y: " + std::string(ref_res.type) + " = " +
346 input + "; } root_type X;" +
347 "{}"; // <- with empty json {}!
348
349 auto def_done = def_parser.Parse(def_schema.c_str());
350 if (false == def_done) {
351 TEST_OUTPUT_LINE("Stage 3.1 failed with _error = %s",
352 def_parser.error_.c_str());
353 FLATBUFFERS_ASSERT(false);
354 }
355 // Compare with print.
356 std::string ref_string, def_string;
357 FLATBUFFERS_ASSERT(GenerateText(
358 parser, parser.builder_.GetBufferPointer(), &ref_string));
359 FLATBUFFERS_ASSERT(GenerateText(
360 def_parser, def_parser.builder_.GetBufferPointer(), &def_string));
361 if (ref_string != def_string) {
362 TEST_OUTPUT_LINE("Stage 3.2 failed: '%s' != '%s'", def_string.c_str(),
363 ref_string.c_str());
364 FLATBUFFERS_ASSERT(false);
365 }
366 }
367
368 // Restore locale.
369 if (use_locale) { FLATBUFFERS_ASSERT(setlocale(LC_ALL, "C")); }
370 }
371 return 0;
372}