Blame - tests/fuzzer/flatbuffers_scalar_fuzzer.cc - RealtimeRoboticsGroup/test

blob: 074a488854f25b1b4a0184571bf451ceab9f807f [file] [log] [blame]

Austin Schuh	e89fa2d	2019-08-14 20:24:23 -0700	[diff] [blame^]	1	#include <assert.h>
				2	#include <stddef.h>
				3	#include <stdint.h>
				4	#include <algorithm>
				5	#include <clocale>
				6	#include <memory>
				7	#include <regex>
				8	#include <string>
				9
				10	#include "flatbuffers/idl.h"
				11	#include "test_init.h"
				12
				13	static constexpr uint8_t flags_scalar_type = 0x0F; // type of scalar value
				14	static constexpr uint8_t flags_quotes_kind = 0x10; // quote " or '
				15	// reserved for future: json {named} or [unnamed]
				16	// static constexpr uint8_t flags_json_bracer = 0x20;
				17
				18	// Find all 'subj' sub-strings and replace first character of sub-string.
				19	// BreakSequence("testest","tes", 'X') -> "XesXest".
				20	// BreakSequence("xxx","xx", 'Y') -> "YYx".
				21	static void BreakSequence(std::string &s, const char *subj, char repl) {
				22	size_t pos = 0;
				23	while (pos = s.find(subj, pos), pos != std::string::npos) {
				24	s.at(pos) = repl;
				25	pos++;
				26	}
				27	}
				28
				29	// Remove all leading and trailing symbols matched with pattern set.
				30	// StripString("xy{xy}y", "xy") -> "{xy}"
				31	static std::string StripString(const std::string &s, const char *pattern,
				32	size_t *pos = nullptr) {
				33	if (pos) *pos = 0;
				34	// leading
				35	auto first = s.find_first_not_of(pattern);
				36	if (std::string::npos == first) return "";
				37	if (pos) *pos = first;
				38	// trailing
				39	auto last = s.find_last_not_of(pattern);
				40	assert(last < s.length());
				41	assert(first <= last);
				42	return s.substr(first, last - first + 1);
				43	}
				44
				45	class RegexMatcher {
				46	protected:
				47	virtual bool MatchNumber(const std::string &input) const = 0;
				48
				49	public:
				50	virtual ~RegexMatcher() = default;
				51
				52	struct MatchResult {
				53	size_t pos{ 0 };
				54	size_t len{ 0 };
				55	bool res{ false };
				56	bool quoted{ false };
				57	};
				58
				59	MatchResult Match(const std::string &input) const {
				60	MatchResult r;
				61	// strip leading and trailing "spaces" accepted by flatbuffer
				62	auto test = StripString(input, "\t\r\n ", &r.pos);
				63	r.len = test.size();
				64	// check quotes
				65	if (test.size() >= 2) {
				66	auto fch = test.front();
				67	auto lch = test.back();
				68	r.quoted = (fch == lch) && (fch == '\'' \|\| fch == '\"');
				69	if (r.quoted) {
				70	// remove quotes for regex test
				71	test = test.substr(1, test.size() - 2);
				72	}
				73	}
				74	// Fast check:
				75	if (test.empty()) return r;
				76	// A string with a valid scalar shouldn't have non-ascii or non-printable
				77	// symbols.
				78	for (auto c : test) {
				79	if ((c < ' ') \|\| (c > '~')) return r;
				80	}
				81	// Check with regex
				82	r.res = MatchNumber(test);
				83	return r;
				84	}
				85
				86	bool MatchRegexList(const std::string &input,
				87	const std::vector<std::regex> &re_list) const {
				88	auto str = StripString(input, " ");
				89	if (str.empty()) return false;
				90	for (auto &re : re_list) {
				91	std::smatch match;
				92	if (std::regex_match(str, match, re)) return true;
				93	}
				94	return false;
				95	}
				96	};
				97
				98	class IntegerRegex : public RegexMatcher {
				99	protected:
				100	bool MatchNumber(const std::string &input) const override {
				101	static const std::vector<std::regex> re_list = {
				102	std::regex{ R"(^[-+]?[0-9]+$)", std::regex_constants::optimize },
				103
				104	std::regex{
				105	R"(^[-+]?0[xX][0-9a-fA-F]+$)", std::regex_constants::optimize }
				106	};
				107	return MatchRegexList(input, re_list);
				108	}
				109
				110	public:
				111	IntegerRegex() = default;
				112	virtual ~IntegerRegex() = default;
				113	};
				114
				115	class UIntegerRegex : public RegexMatcher {
				116	protected:
				117	bool MatchNumber(const std::string &input) const override {
				118	static const std::vector<std::regex> re_list = {
				119	std::regex{ R"(^[+]?[0-9]+$)", std::regex_constants::optimize },
				120	std::regex{
				121	R"(^[+]?0[xX][0-9a-fA-F]+$)", std::regex_constants::optimize },
				122	// accept -0 number
				123	std::regex{ R"(^[-](?:0[xX])?0+$)", std::regex_constants::optimize }
				124	};
				125	return MatchRegexList(input, re_list);
				126	}
				127
				128	public:
				129	UIntegerRegex() = default;
				130	virtual ~UIntegerRegex() = default;
				131	};
				132
				133	class BooleanRegex : public IntegerRegex {
				134	protected:
				135	bool MatchNumber(const std::string &input) const override {
				136	if (input == "true" \|\| input == "false") return true;
				137	return IntegerRegex::MatchNumber(input);
				138	}
				139
				140	public:
				141	BooleanRegex() = default;
				142	virtual ~BooleanRegex() = default;
				143	};
				144
				145	class FloatRegex : public RegexMatcher {
				146	protected:
				147	bool MatchNumber(const std::string &input) const override {
				148	static const std::vector<std::regex> re_list = {
				149	// hex-float
				150	std::regex{
				151	R"(^[-+]?0[xX](?:(?:[.][0-9a-fA-F]+)\|(?:[0-9a-fA-F]+[.][0-9a-fA-F]*)\|(?:[0-9a-fA-F]+))[pP][-+]?[0-9]+$)",
				152	std::regex_constants::optimize },
				153	// dec-float
				154	std::regex{
				155	R"(^[-+]?(?:(?:[.][0-9]+)\|(?:[0-9]+[.][0-9]*)\|(?:[0-9]+))(?:[eE][-+]?[0-9]+)?$)",
				156	std::regex_constants::optimize },
				157
				158	std::regex{ R"(^[-+]?(?:nan\|inf\|infinity)$)",
				159	std::regex_constants::optimize \| std::regex_constants::icase }
				160	};
				161	return MatchRegexList(input, re_list);
				162	}
				163
				164	public:
				165	FloatRegex() = default;
				166	virtual ~FloatRegex() = default;
				167	};
				168
				169	class ScalarReferenceResult {
				170	private:
				171	ScalarReferenceResult(const char *_type, RegexMatcher::MatchResult _matched)
				172	: type(_type), matched(_matched) {}
				173
				174	public:
				175	// Decode scalar type and check if the input string satisfies the scalar type.
				176	static ScalarReferenceResult Check(uint8_t code, const std::string &input) {
				177	switch (code) {
				178	case 0x0: return { "double", FloatRegex().Match(input) };
				179	case 0x1: return { "float", FloatRegex().Match(input) };
				180	case 0x2: return { "int8", IntegerRegex().Match(input) };
				181	case 0x3: return { "int16", IntegerRegex().Match(input) };
				182	case 0x4: return { "int32", IntegerRegex().Match(input) };
				183	case 0x5: return { "int64", IntegerRegex().Match(input) };
				184	case 0x6: return { "uint8", UIntegerRegex().Match(input) };
				185	case 0x7: return { "uint16", UIntegerRegex().Match(input) };
				186	case 0x8: return { "uint32", UIntegerRegex().Match(input) };
				187	case 0x9: return { "uint64", UIntegerRegex().Match(input) };
				188	case 0xA: return { "bool", BooleanRegex().Match(input) };
				189	default: return { "float", FloatRegex().Match(input) };
				190	};
				191	}
				192
				193	const char *type;
				194	const RegexMatcher::MatchResult matched;
				195	};
				196
				197	bool Parse(flatbuffers::Parser &parser, const std::string &json,
				198	std::string *_text) {
				199	auto done = parser.Parse(json.c_str());
				200	if (done) {
				201	TEST_EQ(GenerateText(parser, parser.builder_.GetBufferPointer(), _text),
				202	true);
				203	} else {
				204	*_text = parser.error_;
				205	}
				206	return done;
				207	}
				208
				209	// Utility for test run.
				210	OneTimeTestInit OneTimeTestInit::one_time_init_;
				211
				212	// llvm std::regex have problem with stack overflow, limit maximum length.
				213	// ./scalar_fuzzer -max_len=3000
				214	extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
				215	// Reserve one byte for Parser flags and one byte for repetition counter.
				216	if (size < 3) return 0;
				217	const uint8_t flags = data[0];
				218	// normalize to ascii alphabet
				219	const int extra_rep_number = data[1] >= '0' ? (data[1] - '0') : 0;
				220	data += 2;
				221	size -= 2; // bypass
				222
				223	// Guarantee 0-termination.
				224	const std::string original(reinterpret_cast<const char *>(data), size);
				225	auto input = std::string(original.c_str()); // until '\0'
				226	if (input.empty()) return 0;
				227
				228	// Break comments in json to avoid complexity with regex matcher.
				229	// The string " 12345 /* text */" will be accepted if insert it to string
				230	// expression: "table X { Y: " + " 12345 /* text */" + "; }.
				231	// But strings like this will complicate regex matcher.
				232	// We reject this by transform "/* text / 12345" to "@ text */ 12345".
				233	BreakSequence(input, "//", '@'); // "//" -> "@/"
				234	BreakSequence(input, "/", '@'); // "/" -> "@*"
				235	// Break all known scalar functions (todo: add them to regex?):
				236	for (auto f : { "deg", "rad", "sin", "cos", "tan", "asin", "acos", "atan" }) {
				237	BreakSequence(input, f, '_'); // ident -> ident
				238	}
				239
				240	// Extract type of scalar from 'flags' and check if the input string satisfies
				241	// the scalar type.
				242	const auto ref_res =
				243	ScalarReferenceResult::Check(flags & flags_scalar_type, input);
				244	auto &recheck = ref_res.matched;
				245
				246	// Create parser
				247	flatbuffers::IDLOptions opts;
				248	opts.force_defaults = true;
				249	opts.output_default_scalars_in_json = true;
				250	opts.indent_step = -1;
				251	opts.strict_json = true;
				252
				253	flatbuffers::Parser parser(opts);
				254	auto schema =
				255	"table X { Y: " + std::string(ref_res.type) + "; } root_type X;";
				256	TEST_EQ_FUNC(parser.Parse(schema.c_str()), true);
				257
				258	// The fuzzer can adjust the number repetition if a side-effects have found.
				259	// Each test should pass at least two times to ensure that the parser doesn't
				260	// have any hidden-states or locale-depended effects.
				261	for (auto cnt = 0; cnt < (extra_rep_number + 2); cnt++) {
				262	// Each even run (0,2,4..) will test locale independed code.
				263	auto use_locale = !!OneTimeTestInit::test_locale() && (0 == (cnt % 2));
				264	// Set new locale.
				265	if (use_locale) {
				266	FLATBUFFERS_ASSERT(setlocale(LC_ALL, OneTimeTestInit::test_locale()));
				267	}
				268
				269	// Parse original input as-is.
				270	auto orig_scalar = "{ \"Y\" : " + input + " }";
				271	std::string orig_back;
				272	auto orig_done = Parse(parser, orig_scalar, &orig_back);
				273
				274	if (recheck.res != orig_done) {
				275	// look for "does not fit" or "doesn't fit" or "out of range"
				276	auto not_fit =
				277	(true == recheck.res)
				278	? ((orig_back.find("does not fit") != std::string::npos) \|\|
				279	(orig_back.find("out of range") != std::string::npos))
				280	: false;
				281
				282	if (false == not_fit) {
				283	TEST_OUTPUT_LINE("Stage 1 failed: Parser(%d) != Regex(%d)", orig_done,
				284	recheck.res);
				285	TEST_EQ_STR(orig_back.c_str(),
				286	input.substr(recheck.pos, recheck.len).c_str());
				287	TEST_EQ_FUNC(orig_done, recheck.res);
				288	}
				289	}
				290
				291	// Try to make quoted string and test it.
				292	std::string qouted_input;
				293	if (true == recheck.quoted) {
				294	// we can't simply remove quotes, they may be nested "'12'".
				295	// Original string "\'12\'" converted to "'12'".
				296	// The string can be an invalid string by JSON rules, but after quotes
				297	// removed can transform to valid.
				298	assert(recheck.len >= 2);
				299	} else {
				300	const auto quote = (flags & flags_quotes_kind) ? '\"' : '\'';
				301	qouted_input = input; // copy
				302	qouted_input.insert(recheck.pos + recheck.len, 1, quote);
				303	qouted_input.insert(recheck.pos, 1, quote);
				304	}
				305
				306	// Test quoted version of the string
				307	if (!qouted_input.empty()) {
				308	auto fix_scalar = "{ \"Y\" : " + qouted_input + " }";
				309	std::string fix_back;
				310	auto fix_done = Parse(parser, fix_scalar, &fix_back);
				311
				312	if (orig_done != fix_done) {
				313	TEST_OUTPUT_LINE("Stage 2 failed: Parser(%d) != Regex(%d)", fix_done,
				314	orig_done);
				315	TEST_EQ_STR(fix_back.c_str(), orig_back.c_str());
				316	}
				317	if (orig_done) { TEST_EQ_STR(fix_back.c_str(), orig_back.c_str()); }
				318	TEST_EQ_FUNC(fix_done, orig_done);
				319	}
				320
				321	// Create new parser and test default value
				322	if (true == orig_done) {
				323	flatbuffers::Parser def_parser(opts); // re-use options
				324	auto def_schema = "table X { Y: " + std::string(ref_res.type) + " = " +
				325	input + "; } root_type X;" +
				326	"{}"; // <- with empty json {}!
				327
				328	auto def_done = def_parser.Parse(def_schema.c_str());
				329	if (false == def_done) {
				330	TEST_OUTPUT_LINE("Stage 3.1 failed with _error = %s",
				331	def_parser.error_.c_str());
				332	FLATBUFFERS_ASSERT(false);
				333	}
				334	// Compare with print.
				335	std::string ref_string, def_string;
				336	FLATBUFFERS_ASSERT(GenerateText(
				337	parser, parser.builder_.GetBufferPointer(), &ref_string));
				338	FLATBUFFERS_ASSERT(GenerateText(
				339	def_parser, def_parser.builder_.GetBufferPointer(), &def_string));
				340	if (ref_string != def_string) {
				341	TEST_OUTPUT_LINE("Stage 3.2 failed: '%s' != '%s'", def_string.c_str(),
				342	ref_string.c_str());
				343	FLATBUFFERS_ASSERT(false);
				344	}
				345	}
				346
				347	// Restore locale.
				348	if (use_locale) { FLATBUFFERS_ASSERT(setlocale(LC_ALL, "C")); }
				349	}
				350	return 0;
				351	}