James Kuszmaul | 3b15b0c | 2022-11-08 14:03:16 -0800 | [diff] [blame] | 1 | #include <algorithm> |
| 2 | |
Austin Schuh | 2dd86a9 | 2022-09-14 21:19:23 -0700 | [diff] [blame] | 3 | #include "fuzz_test.h" |
| 4 | |
| 5 | #include "flatbuffers/flatbuffers.h" |
| 6 | #include "flatbuffers/idl.h" |
| 7 | #include "test_assert.h" |
| 8 | |
| 9 | namespace flatbuffers { |
| 10 | namespace tests { |
| 11 | namespace { |
| 12 | |
| 13 | // Include simple random number generator to ensure results will be the |
| 14 | // same cross platform. |
| 15 | // http://en.wikipedia.org/wiki/Park%E2%80%93Miller_random_number_generator |
| 16 | uint32_t lcg_seed = 48271; |
| 17 | uint32_t lcg_rand() { |
| 18 | return lcg_seed = |
| 19 | (static_cast<uint64_t>(lcg_seed) * 279470273UL) % 4294967291UL; |
| 20 | } |
| 21 | void lcg_reset() { lcg_seed = 48271; } |
| 22 | |
| 23 | template<typename T> |
| 24 | static void CompareTableFieldValue(flatbuffers::Table *table, |
| 25 | flatbuffers::voffset_t voffset, T val) { |
| 26 | T read = table->GetField(voffset, static_cast<T>(0)); |
| 27 | TEST_EQ(read, val); |
| 28 | } |
| 29 | |
| 30 | } // namespace |
| 31 | |
| 32 | // Low level stress/fuzz test: serialize/deserialize a variety of |
| 33 | // different kinds of data in different combinations |
| 34 | void FuzzTest1() { |
| 35 | // Values we're testing against: chosen to ensure no bits get chopped |
| 36 | // off anywhere, and also be different from eachother. |
| 37 | const uint8_t bool_val = true; |
| 38 | const int8_t char_val = -127; // 0x81 |
| 39 | const uint8_t uchar_val = 0xFF; |
| 40 | const int16_t short_val = -32222; // 0x8222; |
| 41 | const uint16_t ushort_val = 0xFEEE; |
| 42 | const int32_t int_val = 0x83333333; |
| 43 | const uint32_t uint_val = 0xFDDDDDDD; |
| 44 | const int64_t long_val = 0x8444444444444444LL; |
| 45 | const uint64_t ulong_val = 0xFCCCCCCCCCCCCCCCULL; |
| 46 | const float float_val = 3.14159f; |
| 47 | const double double_val = 3.14159265359; |
| 48 | |
| 49 | const int test_values_max = 11; |
| 50 | const flatbuffers::voffset_t fields_per_object = 4; |
| 51 | const int num_fuzz_objects = 10000; // The higher, the more thorough :) |
| 52 | |
| 53 | flatbuffers::FlatBufferBuilder builder; |
| 54 | |
| 55 | lcg_reset(); // Keep it deterministic. |
| 56 | |
| 57 | flatbuffers::uoffset_t objects[num_fuzz_objects]; |
| 58 | |
| 59 | // Generate num_fuzz_objects random objects each consisting of |
| 60 | // fields_per_object fields, each of a random type. |
| 61 | for (int i = 0; i < num_fuzz_objects; i++) { |
| 62 | auto start = builder.StartTable(); |
| 63 | for (flatbuffers::voffset_t f = 0; f < fields_per_object; f++) { |
| 64 | int choice = lcg_rand() % test_values_max; |
| 65 | auto off = flatbuffers::FieldIndexToOffset(f); |
| 66 | switch (choice) { |
| 67 | case 0: builder.AddElement<uint8_t>(off, bool_val, 0); break; |
| 68 | case 1: builder.AddElement<int8_t>(off, char_val, 0); break; |
| 69 | case 2: builder.AddElement<uint8_t>(off, uchar_val, 0); break; |
| 70 | case 3: builder.AddElement<int16_t>(off, short_val, 0); break; |
| 71 | case 4: builder.AddElement<uint16_t>(off, ushort_val, 0); break; |
| 72 | case 5: builder.AddElement<int32_t>(off, int_val, 0); break; |
| 73 | case 6: builder.AddElement<uint32_t>(off, uint_val, 0); break; |
| 74 | case 7: builder.AddElement<int64_t>(off, long_val, 0); break; |
| 75 | case 8: builder.AddElement<uint64_t>(off, ulong_val, 0); break; |
| 76 | case 9: builder.AddElement<float>(off, float_val, 0); break; |
| 77 | case 10: builder.AddElement<double>(off, double_val, 0); break; |
| 78 | } |
| 79 | } |
| 80 | objects[i] = builder.EndTable(start); |
| 81 | } |
| 82 | builder.PreAlign<flatbuffers::largest_scalar_t>(0); // Align whole buffer. |
| 83 | |
| 84 | lcg_reset(); // Reset. |
| 85 | |
| 86 | uint8_t *eob = builder.GetCurrentBufferPointer() + builder.GetSize(); |
| 87 | |
| 88 | // Test that all objects we generated are readable and return the |
| 89 | // expected values. We generate random objects in the same order |
| 90 | // so this is deterministic. |
| 91 | for (int i = 0; i < num_fuzz_objects; i++) { |
| 92 | auto table = reinterpret_cast<flatbuffers::Table *>(eob - objects[i]); |
| 93 | for (flatbuffers::voffset_t f = 0; f < fields_per_object; f++) { |
| 94 | int choice = lcg_rand() % test_values_max; |
| 95 | flatbuffers::voffset_t off = flatbuffers::FieldIndexToOffset(f); |
| 96 | switch (choice) { |
| 97 | case 0: CompareTableFieldValue(table, off, bool_val); break; |
| 98 | case 1: CompareTableFieldValue(table, off, char_val); break; |
| 99 | case 2: CompareTableFieldValue(table, off, uchar_val); break; |
| 100 | case 3: CompareTableFieldValue(table, off, short_val); break; |
| 101 | case 4: CompareTableFieldValue(table, off, ushort_val); break; |
| 102 | case 5: CompareTableFieldValue(table, off, int_val); break; |
| 103 | case 6: CompareTableFieldValue(table, off, uint_val); break; |
| 104 | case 7: CompareTableFieldValue(table, off, long_val); break; |
| 105 | case 8: CompareTableFieldValue(table, off, ulong_val); break; |
| 106 | case 9: CompareTableFieldValue(table, off, float_val); break; |
| 107 | case 10: CompareTableFieldValue(table, off, double_val); break; |
| 108 | } |
| 109 | } |
| 110 | } |
| 111 | } |
| 112 | |
| 113 | // High level stress/fuzz test: generate a big schema and |
| 114 | // matching json data in random combinations, then parse both, |
| 115 | // generate json back from the binary, and compare with the original. |
| 116 | void FuzzTest2() { |
| 117 | lcg_reset(); // Keep it deterministic. |
| 118 | |
| 119 | const int num_definitions = 30; |
| 120 | const int num_struct_definitions = 5; // Subset of num_definitions. |
| 121 | const int fields_per_definition = 15; |
| 122 | const int instances_per_definition = 5; |
| 123 | const int deprecation_rate = 10; // 1 in deprecation_rate fields will |
| 124 | // be deprecated. |
| 125 | |
| 126 | std::string schema = "namespace test;\n\n"; |
| 127 | |
| 128 | struct RndDef { |
| 129 | std::string instances[instances_per_definition]; |
| 130 | |
| 131 | // Since we're generating schema and corresponding data in tandem, |
| 132 | // this convenience function adds strings to both at once. |
| 133 | static void Add(RndDef (&definitions_l)[num_definitions], |
| 134 | std::string &schema_l, const int instances_per_definition_l, |
| 135 | const char *schema_add, const char *instance_add, |
| 136 | int definition) { |
| 137 | schema_l += schema_add; |
| 138 | for (int i = 0; i < instances_per_definition_l; i++) |
| 139 | definitions_l[definition].instances[i] += instance_add; |
| 140 | } |
| 141 | }; |
| 142 | |
| 143 | // clang-format off |
| 144 | #define AddToSchemaAndInstances(schema_add, instance_add) \ |
| 145 | RndDef::Add(definitions, schema, instances_per_definition, \ |
| 146 | schema_add, instance_add, definition) |
| 147 | |
| 148 | #define Dummy() \ |
| 149 | RndDef::Add(definitions, schema, instances_per_definition, \ |
| 150 | "byte", "1", definition) |
| 151 | // clang-format on |
| 152 | |
| 153 | RndDef definitions[num_definitions]; |
| 154 | |
| 155 | // We are going to generate num_definitions, the first |
| 156 | // num_struct_definitions will be structs, the rest tables. For each |
| 157 | // generate random fields, some of which may be struct/table types |
| 158 | // referring to previously generated structs/tables. |
| 159 | // Simultanenously, we generate instances_per_definition JSON data |
| 160 | // definitions, which will have identical structure to the schema |
| 161 | // being generated. We generate multiple instances such that when creating |
| 162 | // hierarchy, we get some variety by picking one randomly. |
| 163 | for (int definition = 0; definition < num_definitions; definition++) { |
| 164 | std::string definition_name = "D" + flatbuffers::NumToString(definition); |
| 165 | |
| 166 | bool is_struct = definition < num_struct_definitions; |
| 167 | |
| 168 | AddToSchemaAndInstances( |
| 169 | ((is_struct ? "struct " : "table ") + definition_name + " {\n").c_str(), |
| 170 | "{\n"); |
| 171 | |
| 172 | for (int field = 0; field < fields_per_definition; field++) { |
| 173 | const bool is_last_field = field == fields_per_definition - 1; |
| 174 | |
| 175 | // Deprecate 1 in deprecation_rate fields. Only table fields can be |
| 176 | // deprecated. |
| 177 | // Don't deprecate the last field to avoid dangling commas in JSON. |
| 178 | const bool deprecated = |
| 179 | !is_struct && !is_last_field && (lcg_rand() % deprecation_rate == 0); |
| 180 | |
| 181 | std::string field_name = "f" + flatbuffers::NumToString(field); |
| 182 | AddToSchemaAndInstances((" " + field_name + ":").c_str(), |
| 183 | deprecated ? "" : (field_name + ": ").c_str()); |
| 184 | // Pick random type: |
| 185 | auto base_type = static_cast<flatbuffers::BaseType>( |
| 186 | lcg_rand() % (flatbuffers::BASE_TYPE_UNION + 1)); |
| 187 | switch (base_type) { |
| 188 | case flatbuffers::BASE_TYPE_STRING: |
| 189 | if (is_struct) { |
| 190 | Dummy(); // No strings in structs. |
| 191 | } else { |
| 192 | AddToSchemaAndInstances("string", deprecated ? "" : "\"hi\""); |
| 193 | } |
| 194 | break; |
| 195 | case flatbuffers::BASE_TYPE_VECTOR: |
| 196 | if (is_struct) { |
| 197 | Dummy(); // No vectors in structs. |
| 198 | } else { |
| 199 | AddToSchemaAndInstances("[ubyte]", |
| 200 | deprecated ? "" : "[\n0,\n1,\n255\n]"); |
| 201 | } |
| 202 | break; |
| 203 | case flatbuffers::BASE_TYPE_NONE: |
| 204 | case flatbuffers::BASE_TYPE_UTYPE: |
| 205 | case flatbuffers::BASE_TYPE_STRUCT: |
| 206 | case flatbuffers::BASE_TYPE_UNION: |
| 207 | if (definition) { |
| 208 | // Pick a random previous definition and random data instance of |
| 209 | // that definition. |
| 210 | int defref = lcg_rand() % definition; |
| 211 | int instance = lcg_rand() % instances_per_definition; |
| 212 | AddToSchemaAndInstances( |
| 213 | ("D" + flatbuffers::NumToString(defref)).c_str(), |
| 214 | deprecated ? "" |
| 215 | : definitions[defref].instances[instance].c_str()); |
| 216 | } else { |
| 217 | // If this is the first definition, we have no definition we can |
| 218 | // refer to. |
| 219 | Dummy(); |
| 220 | } |
| 221 | break; |
| 222 | case flatbuffers::BASE_TYPE_BOOL: |
| 223 | AddToSchemaAndInstances( |
| 224 | "bool", deprecated ? "" : (lcg_rand() % 2 ? "true" : "false")); |
| 225 | break; |
| 226 | case flatbuffers::BASE_TYPE_ARRAY: |
| 227 | if (!is_struct) { |
| 228 | AddToSchemaAndInstances( |
| 229 | "ubyte", |
| 230 | deprecated ? "" : "255"); // No fixed-length arrays in tables. |
| 231 | } else { |
| 232 | AddToSchemaAndInstances("[int:3]", deprecated ? "" : "[\n,\n,\n]"); |
| 233 | } |
| 234 | break; |
| 235 | default: |
| 236 | // All the scalar types. |
| 237 | schema += flatbuffers::kTypeNames[base_type]; |
| 238 | |
| 239 | if (!deprecated) { |
| 240 | // We want each instance to use its own random value. |
| 241 | for (int inst = 0; inst < instances_per_definition; inst++) |
| 242 | definitions[definition].instances[inst] += |
| 243 | flatbuffers::IsFloat(base_type) |
| 244 | ? flatbuffers::NumToString<double>(lcg_rand() % 128) |
| 245 | .c_str() |
| 246 | : flatbuffers::NumToString<int>(lcg_rand() % 128).c_str(); |
| 247 | } |
| 248 | } |
| 249 | AddToSchemaAndInstances(deprecated ? "(deprecated);\n" : ";\n", |
| 250 | deprecated ? "" |
| 251 | : is_last_field ? "\n" |
| 252 | : ",\n"); |
| 253 | } |
| 254 | AddToSchemaAndInstances("}\n\n", "}"); |
| 255 | } |
| 256 | |
| 257 | schema += "root_type D" + flatbuffers::NumToString(num_definitions - 1); |
| 258 | schema += ";\n"; |
| 259 | |
| 260 | flatbuffers::Parser parser; |
| 261 | |
| 262 | // Will not compare against the original if we don't write defaults |
| 263 | parser.builder_.ForceDefaults(true); |
| 264 | |
| 265 | // Parse the schema, parse the generated data, then generate text back |
| 266 | // from the binary and compare against the original. |
| 267 | TEST_EQ(parser.Parse(schema.c_str()), true); |
| 268 | |
| 269 | const std::string &json = |
| 270 | definitions[num_definitions - 1].instances[0] + "\n"; |
| 271 | |
| 272 | TEST_EQ(parser.Parse(json.c_str()), true); |
| 273 | |
| 274 | std::string jsongen; |
| 275 | parser.opts.indent_step = 0; |
| 276 | auto result = |
| 277 | GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen); |
| 278 | TEST_EQ(result, true); |
| 279 | |
| 280 | if (jsongen != json) { |
| 281 | // These strings are larger than a megabyte, so we show the bytes around |
| 282 | // the first bytes that are different rather than the whole string. |
| 283 | size_t len = std::min(json.length(), jsongen.length()); |
| 284 | for (size_t i = 0; i < len; i++) { |
| 285 | if (json[i] != jsongen[i]) { |
| 286 | i -= std::min(static_cast<size_t>(10), i); // show some context; |
| 287 | size_t end = std::min(len, i + 20); |
| 288 | for (; i < end; i++) |
| 289 | TEST_OUTPUT_LINE("at %d: found \"%c\", expected \"%c\"\n", |
| 290 | static_cast<int>(i), jsongen[i], json[i]); |
| 291 | break; |
| 292 | } |
| 293 | } |
| 294 | TEST_NOTNULL(nullptr); //-V501 (this comment suppresses CWE-570 warning) |
| 295 | } |
| 296 | |
| 297 | // clang-format off |
| 298 | #ifdef FLATBUFFERS_TEST_VERBOSE |
| 299 | TEST_OUTPUT_LINE("%dk schema tested with %dk of json\n", |
| 300 | static_cast<int>(schema.length() / 1024), |
| 301 | static_cast<int>(json.length() / 1024)); |
| 302 | #endif |
| 303 | // clang-format on |
| 304 | } |
| 305 | |
| 306 | } // namespace tests |
James Kuszmaul | 3b15b0c | 2022-11-08 14:03:16 -0800 | [diff] [blame] | 307 | } // namespace flatbuffers |