blob: 66883faaa1f3db21e6cb4119d5f00634f1e97e87 [file] [log] [blame]
Austin Schuh2dd86a92022-09-14 21:19:23 -07001#include "fuzz_test.h"
2
3#include "flatbuffers/flatbuffers.h"
4#include "flatbuffers/idl.h"
5#include "test_assert.h"
6
7namespace flatbuffers {
8namespace tests {
9namespace {
10
11// Include simple random number generator to ensure results will be the
12// same cross platform.
13// http://en.wikipedia.org/wiki/Park%E2%80%93Miller_random_number_generator
14uint32_t lcg_seed = 48271;
15uint32_t lcg_rand() {
16 return lcg_seed =
17 (static_cast<uint64_t>(lcg_seed) * 279470273UL) % 4294967291UL;
18}
19void lcg_reset() { lcg_seed = 48271; }
20
21template<typename T>
22static void CompareTableFieldValue(flatbuffers::Table *table,
23 flatbuffers::voffset_t voffset, T val) {
24 T read = table->GetField(voffset, static_cast<T>(0));
25 TEST_EQ(read, val);
26}
27
28} // namespace
29
30// Low level stress/fuzz test: serialize/deserialize a variety of
31// different kinds of data in different combinations
32void FuzzTest1() {
33 // Values we're testing against: chosen to ensure no bits get chopped
34 // off anywhere, and also be different from eachother.
35 const uint8_t bool_val = true;
36 const int8_t char_val = -127; // 0x81
37 const uint8_t uchar_val = 0xFF;
38 const int16_t short_val = -32222; // 0x8222;
39 const uint16_t ushort_val = 0xFEEE;
40 const int32_t int_val = 0x83333333;
41 const uint32_t uint_val = 0xFDDDDDDD;
42 const int64_t long_val = 0x8444444444444444LL;
43 const uint64_t ulong_val = 0xFCCCCCCCCCCCCCCCULL;
44 const float float_val = 3.14159f;
45 const double double_val = 3.14159265359;
46
47 const int test_values_max = 11;
48 const flatbuffers::voffset_t fields_per_object = 4;
49 const int num_fuzz_objects = 10000; // The higher, the more thorough :)
50
51 flatbuffers::FlatBufferBuilder builder;
52
53 lcg_reset(); // Keep it deterministic.
54
55 flatbuffers::uoffset_t objects[num_fuzz_objects];
56
57 // Generate num_fuzz_objects random objects each consisting of
58 // fields_per_object fields, each of a random type.
59 for (int i = 0; i < num_fuzz_objects; i++) {
60 auto start = builder.StartTable();
61 for (flatbuffers::voffset_t f = 0; f < fields_per_object; f++) {
62 int choice = lcg_rand() % test_values_max;
63 auto off = flatbuffers::FieldIndexToOffset(f);
64 switch (choice) {
65 case 0: builder.AddElement<uint8_t>(off, bool_val, 0); break;
66 case 1: builder.AddElement<int8_t>(off, char_val, 0); break;
67 case 2: builder.AddElement<uint8_t>(off, uchar_val, 0); break;
68 case 3: builder.AddElement<int16_t>(off, short_val, 0); break;
69 case 4: builder.AddElement<uint16_t>(off, ushort_val, 0); break;
70 case 5: builder.AddElement<int32_t>(off, int_val, 0); break;
71 case 6: builder.AddElement<uint32_t>(off, uint_val, 0); break;
72 case 7: builder.AddElement<int64_t>(off, long_val, 0); break;
73 case 8: builder.AddElement<uint64_t>(off, ulong_val, 0); break;
74 case 9: builder.AddElement<float>(off, float_val, 0); break;
75 case 10: builder.AddElement<double>(off, double_val, 0); break;
76 }
77 }
78 objects[i] = builder.EndTable(start);
79 }
80 builder.PreAlign<flatbuffers::largest_scalar_t>(0); // Align whole buffer.
81
82 lcg_reset(); // Reset.
83
84 uint8_t *eob = builder.GetCurrentBufferPointer() + builder.GetSize();
85
86 // Test that all objects we generated are readable and return the
87 // expected values. We generate random objects in the same order
88 // so this is deterministic.
89 for (int i = 0; i < num_fuzz_objects; i++) {
90 auto table = reinterpret_cast<flatbuffers::Table *>(eob - objects[i]);
91 for (flatbuffers::voffset_t f = 0; f < fields_per_object; f++) {
92 int choice = lcg_rand() % test_values_max;
93 flatbuffers::voffset_t off = flatbuffers::FieldIndexToOffset(f);
94 switch (choice) {
95 case 0: CompareTableFieldValue(table, off, bool_val); break;
96 case 1: CompareTableFieldValue(table, off, char_val); break;
97 case 2: CompareTableFieldValue(table, off, uchar_val); break;
98 case 3: CompareTableFieldValue(table, off, short_val); break;
99 case 4: CompareTableFieldValue(table, off, ushort_val); break;
100 case 5: CompareTableFieldValue(table, off, int_val); break;
101 case 6: CompareTableFieldValue(table, off, uint_val); break;
102 case 7: CompareTableFieldValue(table, off, long_val); break;
103 case 8: CompareTableFieldValue(table, off, ulong_val); break;
104 case 9: CompareTableFieldValue(table, off, float_val); break;
105 case 10: CompareTableFieldValue(table, off, double_val); break;
106 }
107 }
108 }
109}
110
111// High level stress/fuzz test: generate a big schema and
112// matching json data in random combinations, then parse both,
113// generate json back from the binary, and compare with the original.
114void FuzzTest2() {
115 lcg_reset(); // Keep it deterministic.
116
117 const int num_definitions = 30;
118 const int num_struct_definitions = 5; // Subset of num_definitions.
119 const int fields_per_definition = 15;
120 const int instances_per_definition = 5;
121 const int deprecation_rate = 10; // 1 in deprecation_rate fields will
122 // be deprecated.
123
124 std::string schema = "namespace test;\n\n";
125
126 struct RndDef {
127 std::string instances[instances_per_definition];
128
129 // Since we're generating schema and corresponding data in tandem,
130 // this convenience function adds strings to both at once.
131 static void Add(RndDef (&definitions_l)[num_definitions],
132 std::string &schema_l, const int instances_per_definition_l,
133 const char *schema_add, const char *instance_add,
134 int definition) {
135 schema_l += schema_add;
136 for (int i = 0; i < instances_per_definition_l; i++)
137 definitions_l[definition].instances[i] += instance_add;
138 }
139 };
140
141 // clang-format off
142 #define AddToSchemaAndInstances(schema_add, instance_add) \
143 RndDef::Add(definitions, schema, instances_per_definition, \
144 schema_add, instance_add, definition)
145
146 #define Dummy() \
147 RndDef::Add(definitions, schema, instances_per_definition, \
148 "byte", "1", definition)
149 // clang-format on
150
151 RndDef definitions[num_definitions];
152
153 // We are going to generate num_definitions, the first
154 // num_struct_definitions will be structs, the rest tables. For each
155 // generate random fields, some of which may be struct/table types
156 // referring to previously generated structs/tables.
157 // Simultanenously, we generate instances_per_definition JSON data
158 // definitions, which will have identical structure to the schema
159 // being generated. We generate multiple instances such that when creating
160 // hierarchy, we get some variety by picking one randomly.
161 for (int definition = 0; definition < num_definitions; definition++) {
162 std::string definition_name = "D" + flatbuffers::NumToString(definition);
163
164 bool is_struct = definition < num_struct_definitions;
165
166 AddToSchemaAndInstances(
167 ((is_struct ? "struct " : "table ") + definition_name + " {\n").c_str(),
168 "{\n");
169
170 for (int field = 0; field < fields_per_definition; field++) {
171 const bool is_last_field = field == fields_per_definition - 1;
172
173 // Deprecate 1 in deprecation_rate fields. Only table fields can be
174 // deprecated.
175 // Don't deprecate the last field to avoid dangling commas in JSON.
176 const bool deprecated =
177 !is_struct && !is_last_field && (lcg_rand() % deprecation_rate == 0);
178
179 std::string field_name = "f" + flatbuffers::NumToString(field);
180 AddToSchemaAndInstances((" " + field_name + ":").c_str(),
181 deprecated ? "" : (field_name + ": ").c_str());
182 // Pick random type:
183 auto base_type = static_cast<flatbuffers::BaseType>(
184 lcg_rand() % (flatbuffers::BASE_TYPE_UNION + 1));
185 switch (base_type) {
186 case flatbuffers::BASE_TYPE_STRING:
187 if (is_struct) {
188 Dummy(); // No strings in structs.
189 } else {
190 AddToSchemaAndInstances("string", deprecated ? "" : "\"hi\"");
191 }
192 break;
193 case flatbuffers::BASE_TYPE_VECTOR:
194 if (is_struct) {
195 Dummy(); // No vectors in structs.
196 } else {
197 AddToSchemaAndInstances("[ubyte]",
198 deprecated ? "" : "[\n0,\n1,\n255\n]");
199 }
200 break;
201 case flatbuffers::BASE_TYPE_NONE:
202 case flatbuffers::BASE_TYPE_UTYPE:
203 case flatbuffers::BASE_TYPE_STRUCT:
204 case flatbuffers::BASE_TYPE_UNION:
205 if (definition) {
206 // Pick a random previous definition and random data instance of
207 // that definition.
208 int defref = lcg_rand() % definition;
209 int instance = lcg_rand() % instances_per_definition;
210 AddToSchemaAndInstances(
211 ("D" + flatbuffers::NumToString(defref)).c_str(),
212 deprecated ? ""
213 : definitions[defref].instances[instance].c_str());
214 } else {
215 // If this is the first definition, we have no definition we can
216 // refer to.
217 Dummy();
218 }
219 break;
220 case flatbuffers::BASE_TYPE_BOOL:
221 AddToSchemaAndInstances(
222 "bool", deprecated ? "" : (lcg_rand() % 2 ? "true" : "false"));
223 break;
224 case flatbuffers::BASE_TYPE_ARRAY:
225 if (!is_struct) {
226 AddToSchemaAndInstances(
227 "ubyte",
228 deprecated ? "" : "255"); // No fixed-length arrays in tables.
229 } else {
230 AddToSchemaAndInstances("[int:3]", deprecated ? "" : "[\n,\n,\n]");
231 }
232 break;
233 default:
234 // All the scalar types.
235 schema += flatbuffers::kTypeNames[base_type];
236
237 if (!deprecated) {
238 // We want each instance to use its own random value.
239 for (int inst = 0; inst < instances_per_definition; inst++)
240 definitions[definition].instances[inst] +=
241 flatbuffers::IsFloat(base_type)
242 ? flatbuffers::NumToString<double>(lcg_rand() % 128)
243 .c_str()
244 : flatbuffers::NumToString<int>(lcg_rand() % 128).c_str();
245 }
246 }
247 AddToSchemaAndInstances(deprecated ? "(deprecated);\n" : ";\n",
248 deprecated ? ""
249 : is_last_field ? "\n"
250 : ",\n");
251 }
252 AddToSchemaAndInstances("}\n\n", "}");
253 }
254
255 schema += "root_type D" + flatbuffers::NumToString(num_definitions - 1);
256 schema += ";\n";
257
258 flatbuffers::Parser parser;
259
260 // Will not compare against the original if we don't write defaults
261 parser.builder_.ForceDefaults(true);
262
263 // Parse the schema, parse the generated data, then generate text back
264 // from the binary and compare against the original.
265 TEST_EQ(parser.Parse(schema.c_str()), true);
266
267 const std::string &json =
268 definitions[num_definitions - 1].instances[0] + "\n";
269
270 TEST_EQ(parser.Parse(json.c_str()), true);
271
272 std::string jsongen;
273 parser.opts.indent_step = 0;
274 auto result =
275 GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen);
276 TEST_EQ(result, true);
277
278 if (jsongen != json) {
279 // These strings are larger than a megabyte, so we show the bytes around
280 // the first bytes that are different rather than the whole string.
281 size_t len = std::min(json.length(), jsongen.length());
282 for (size_t i = 0; i < len; i++) {
283 if (json[i] != jsongen[i]) {
284 i -= std::min(static_cast<size_t>(10), i); // show some context;
285 size_t end = std::min(len, i + 20);
286 for (; i < end; i++)
287 TEST_OUTPUT_LINE("at %d: found \"%c\", expected \"%c\"\n",
288 static_cast<int>(i), jsongen[i], json[i]);
289 break;
290 }
291 }
292 TEST_NOTNULL(nullptr); //-V501 (this comment suppresses CWE-570 warning)
293 }
294
295 // clang-format off
296 #ifdef FLATBUFFERS_TEST_VERBOSE
297 TEST_OUTPUT_LINE("%dk schema tested with %dk of json\n",
298 static_cast<int>(schema.length() / 1024),
299 static_cast<int>(json.length() / 1024));
300 #endif
301 // clang-format on
302}
303
304} // namespace tests
305} // namespace flatbuffers