Squashed 'third_party/flatbuffers/' changes from e5f331db9..bc44fad35
bc44fad35 UnPackTo disable merge by default (#7527)
4fca4dc60 [TS/JS] Move TS tests to dedicated folder and deps upgrade (#7508)
036032373 Bump junit from 4.13 to 4.13.1 in /java (#7526)
89dfb43f3 Replace `bash JavaTest.sh` with `mvn test` (#7500)
c49aff4b6 enabled cpp17 tests in CI (#7524)
56e60223c prevent force_align attribute on enums (#7523)
89b1f5aa1 remove travis config (#7522)
b90159823 [Java][Flexbuffers] Add API to add nullables into the buffer. (#7521)
8cdc6a288 Install BuildFlatBuffers.cmake (#7519)
a67e35aff Moves all of the swift test code into tests/swift (#7509)
f124e41ae Updated Readme
4c954181c [Java][FlexBuffers] throwing exception for untyped fixed vectors (#7507)
7f7547737 [Android] Remove maven dependency of flatbuffers and use source folder (#7503)
a79d61ea8 Fixes issue with cocoapods failing to be published because of docc (#7505)
d465b39c3 [CMake]: fix breaking find_package change (#7499) (#7502)
c5a609dc2 [C#] Prepares for official Nuget release (#7496)
5634dc3d0 [ISSUE-6268] returns NaN insteadof nan (#7498)
37e37b8ca Updates cocoapods version (#7497)
8fd4534fb update android multidex setting (#7495)
d5427da52 Disable Android Build (#7494)
06c5c7ed0 FlatBuffers Version 2.0.8 (#7492)
b190ce11b Verifier Refinements (#7490)
bf5d23230 Namer applied to Typescript generator (#7488)
ce382d6dd [TS/JS] Add rollup and config to generate iife flatbuffers bundle (#7449)
41d9add7e C++: Add option to skip verifying nested flatbuffers (#7489)
6a8742754 [C++] support native_inline attribute for vector of tables (#7479)
694add668 Refactor test.cpp (#7487)
7edf8c908 Update scorecard to 1.1.2
b86387442 Fix typos (#7483)
e2eb5ee67 Include <array> head in stl_emulation.h (#7480)
994502b6d Version number in file package.json updated to 2.0.7 (#7476)
fa41e8367 [C++] Fixed crash when copying table with empty shared strings (#7477)
799cc8f7b Use type traits for specialization (#7475)
b7eb44147 Disable RTTI and rework use in idl_gen_ts.cpp (#7474)
8d01c5859 CMake project version detection made more robust (#7473)
237e8b71f Moved compiler warnings around (#7471)
eeb8fd60d Include builder.addOffset for vector of structs (#7470)
fef2ffc4d Use schema include name for keep-prefix (#7469)
8367664f1 Flatbuffers Version 2.0.7 (#7462)
d6f06c33f Reworked keep prefix (#7456)
627e8bf36 update grpc version (#7457)
883c42b7d disabling unpackto optimization (#7459)
7aae0af30 Remove old GRPC bash script and convert to python3 (#7454)
b057aa917 Grouped anonymous namespaces together, (#7455)
f1b26ff7f Change to GetTypeName (#7453)
9610a666b Generate SLSA signatures for Released zip files (#7450)
1e0f75a64 [WIP] speedup (#7452)
82b75407a Wrap types in namespace for --ts-flat-files and --gen-all (#7451)
f7c511957 Audit and fixups for GCC and Clang (#7212)
a66de58af Partial support for --ts-flat-files and --gen-all (#7446)
a3508f36d [Kotlin] Make sure namespace path exist for code generation (#7357)
137fec716 Stop using __has_trivial_copy on recent clang versions. (#7443)
214125e41 [C#] Rework how sorted vectors are looked up (#7441)
44a7dc999 Define minimum buffer size (#7440)
3cc2daa78 make_span overloads for pointer to vector (#7374) (#7435)
fa1174aa7 [TypeScript] Fix namespaceless schema generation (#7432)
83d4e2a10 Add checks to verifier (#7438)
8a09f3fb0 Fix FlexBuffers JS/TS bug https://github.com/google/flatbuffers/issues/6934 (#7434)
9dbe819ef Add flatc python tests to CI (#7437)
67c414958 Update TypeScriptTest.py to work better cross platform (#7436)
8b8c7dbdf Update gitingore to reflect name change (#7431)
2ee20a5f3 Remove auto including locale functions (#7430)
4be605604 [C++] Set StructDef::has_key property when deserializing from binary schema (#7386) (#7428)
fc5d86f1e [C++] Make template parameter in stl_emulation.h more explicit to avoid conflicts with cpprestsdk U macro (#7424)
9dce287ad Issue#6959 :Updated Automatically generated rust files. (#7425)
7798be3bb avoid zero-as-null-pointer warning (#7423)
966362e07 [C++] Vector of Tables equality (#7415)
a89c279ed [golang] Perform keyword escaping after case conversion (#7421)
a212b3c03 Turn of fail fast for C++ CI
9230f600d Remove stringop-overflow from error (#7422)
c79362156 [golang] Add support for text parsing with json struct tags (#7353)
ee2ced236 Moved TypeScriptTests to python script (#7411)
468c00a3f Rebased: grpc/compiler: Respect filename suffix and extension during code generation (#7414)
47c757f71 Add tests for flatc (#7405)
9a5ff8900 Add FLATBUFFERS_STRICT_MODE (#7408)
950444a34 [TS] Use TextEncoder and TextDecoder (#7400)
30d76198c Compilation issue msys2 #7399 (#7409)
cce3a66f0 Delete .travis directory
8d1cc6ac7 Revert "Compilation issue msys2 (#7403)" (#7407)
5b207639a Update readme.md
359e0f9d6 Revert "grpc/compiler: Respect filename suffix and extension during code generation (#7343)" (#7406)
ebbed0513 Delete cpp-linter.yml
aa395e5a5 (#7323) Rename CMake files according to project name (#7378)
32328075d Fix error msg format when generate GRPC failed (#7350)
97e89c5ac grpc/compiler: Respect filename suffix and extension during code generation (#7343)
5f6672be4 Fix Clang-Cl compile on Windows (#7308)
28e858c85 [TS/Bazel] Minor improvements to typescript.bzl (#7300)
987bebe67 [TS] fix incorrect reverse when writting array of structs (#7271)
ec0129369 Fix FlexBuffers Verifier tracking vectors reuse at wrong offset
50dd385b3 Add missing const (#7401)
da702cfd8 Compilation issue msys2 (#7403)
6e2791640 keep-prefix keeps relative pathing (#7394)
52fce5e53 fix(#7360): grpc used deprecated functions (#7361)
b7f13cd8e cpp_generator: comment out unused parameter to avoid warnings (#7381)
e42985e5a Updated Newtonsoft.Json to 13.0.1 (#7393)
0a8064637 Fix references to LICENSE file (#7377)
b9eea76a8 [Dart] Implement putBool to fix errors when serializing structs with bools (#7359)
1b9030015 Bump Newtonsoft.Json from 12.0.3 to 13.0.1 in /tests/FlatBuffers.Test (#7363)
83a43fc79 Reenable optional json (#7352)
5f0137602 Only include direct included filed (#7348)
9a1913a87 Revert "Implement optional scalars for JSON (#7322)" (#7351)
b4647beb8 Revert "Move reflection_ts_fbs into a separate directory (#7342)" (#7349)
d6060977a Remove asserting in verifier for flattests
987aa5b5e move -Wextra-semi to GCC 8.0+
42acdb63c [TS] Don't generate self-imports with --ts-flat-file (#7340)
0cc1aeb8c [golang] Create missing namespace directory structure (#7324) (#7325)
ba6c67170 [Kotlin] Remove download benchmark files dependency (#7314)
d2f33fc45 Disable Android on Linux CI build
0d1b72cbc [TS] fix ts import path issue (#7298)
9fce2fbf2 replace io/ioutil to os (#7281)
a18ea40d6 Implement optional scalars for JSON (#7322)
090caa280 Move reflection_ts_fbs into a separate directory (#7342)
49e1ea333 Implement optional scalars for Python (#7318)
11a198870 Started implementation for private flags in rust (#7269)
967df08b1 Adds full supposed for Wasm in the swift lib (#7328)
9aa08a429 Use keep case for Rust union discriminant type. (#7321)
9e8c758f5 Add explicit return types to lobster generated code (#7312)
74a25536b Add size check to fix out of bounds read risk (#7304)
12917af8a Update Rust docs page (#7296)
1ea2472f7 [swift] add had<ArrayName> property for arrays to check presence in a message (#7280)
0fe13cb28 Remove span ConstIterator/cbegin()/cend(). (#7295)
385dddc66 Namerkot (#7245)
750dde766 Make `flatc` generate Rust files not requiring `std` (#7273)
9917a168c [swift] Make swift module public (#7274)
76d3cca19 Rust: fix a name conflict when building with "no_std" feature (#7268)
c86e6d0e3 json inf parsing
d34dc32c2 fix include order
234d86c92 fixed off-by-one in parser
746c73b91 Add Annotations for Monster schema and example buffer
0bbfd4b2e fixes for annotator
716521953 Update readme.md (#7257)
a45f564cf [performance] Add aggressive systematic inlining in ByteBuffer and FlatBufferBuilder (#7253)
9d45a6403 more google merge fixes
ccfb4c20b Handle +/-inf in protos (#7256)
7bcd857b8 Specialize CreateVector with std::initializer_list (#7254)
23c8ab34c Swift update performance benchmark infrastructure (#7255)
70002dc5c various fixes for google merge
6e0e79f24 Add test for nested buffer verifier (#7252)
b856368d7 Turn off go modules temporary until we get a proper fix (#7251)
e37156a30 Keep the underlying storage capacity when clearing the FlatBufferBuilder. Gives a significant performance boost for serialisation of many small messages. (#7250)
a10b0e546 Java namer variable keep case (#7249)
275b73994 allow overriding FLATBUFFERS_MAX_ALIGNMENT
9d1ce9a10 Add parameter back to EndVector (#7246)
79afe6c3d Make Java namespaces keep case by default (#7243)
c6dbb2230 Add write permissions for labeller
18bacd3ea Expand test to make sure {}-initializers are properly understood by template. (#7242)
a2c913aec Add -Wnon-virtual-dtor
67b33b294 set workflows permissions to read-only (#7239)
7b5fd2bd0 [Kotlin] Fix key lookup returning null clashing with default value (#7237)
7181d7770 [Java] Fix key lookup returning null clashing with default value (#7236)
7f663b120 Allow CreateVectorOfStrings() to work with any string-type. (#7238)
173ebb694 Fixes a bug where the create function doesnt optional + required items (#7228)
d65823948 [Kotlin] Update gradle to 7.4.1 and simplify config files. (#7231)
ab4bf59e8 remove toascii (#7234)
eee44bbb2 disable cpp-linter (#7229)
a63fa51a1 Create cpp-linter.yml (#7208)
2049e5210 Adds a way to verify/exposes Entities ids (#7221)
832c618f5 Adds implementation flag for swift (#7202)
14615699f Started to migrate to target_compile_options (#7222)
20aad0c41 [C++] stl_emulation span::count_ is not const anymore (#7226) (#7227)
f083b33f2 code gen flexbuffer verifier (#7207)
bf17df346 [C++] generate sorted #include directives (#7213)
35281dedb Fix for [C++] flatc generates invalid Code in the default constructor for structs, when --cpp-field-case-style is used #7209 (#7211)
c9651b742 Add overloads for C# ByteBuffer/FlatBufferBuilder to allow adding vector blocks from ArraySegments or IntPtr (#7193)
26c3b3ada Update codeql.yml
da6e1b985 Update codeql.yml
ad27d751e Added Oss fuzz badge
0aab623cb Create codeql.yml
6a446bdd8 maximize parallel builds in CI (#7206)
21fb5cbbc Create scorecards.yml
0da6f9486 [C++] Static assert on Flatbuffers Version (#7203)
59e971308 reduce fuzzing time to 1 minute in CI
40866a892 fixed padding in struct for annotated binary (#7199)
b71d968fa Apply Namer prefix/suffix to other generators (#7197)
fac0d7be0 Apply Namer to Java. (#7194)
6c5603fd9 [C#] Fix collision of field name and type name (#7149)
2d21853a7 monster fuzzer fix for json default scalars
fec1a8d01 [swift] Add bazel configuration for Swift (#7195)
7fd857623 structured comments (#7192)
a4cb1599d Namerdart (#7187)
ae4ce7265 fuzzed binary annotator (#7188)
e2be0c0b0 Handle root offset and root table vtable invalidation (#7177)
2ad408697 [TS] Fix generation of struct members in object api (#7148)
4213d9105 VerifySizePrefixed (reflection::Schema) and GetAnySizePrefixedRoot added (#7181)
5a13f622c Correctly parse lists of enums in Dart generated code (#7157)
23a7e4e0b Adds no-includes flags to the swift code generator (#7182)
eeb49c275 Move flatbuffer_ts_library to typescript.bzl (#7183)
824763b31 Typo in flatc options (warning-as-errors instead of warnings-as-errors) (#7180)
d3aeee32b Annotated Flatbuffer Binary (#7174)
0bceba24d [Lua] Apply Namer to Lua (#7171)
b8c77d404 Make inclusion of header <optional> opt-out via macro (#7168)
8468eab83 Namersw (#7167)
2b2e8d4ae Nameroverloads (#7164)
b80b32bfa Use DESCRIPTION only if CMake version >= 3.9 (#7166)
Change-Id: Ic2681dabb1a798b7515e62753ee06aecb9933260
git-subtree-dir: third_party/flatbuffers
git-subtree-split: bc44fad35271e43fd7a79b4d691ac9e41708797f
Signed-off-by: Austin Schuh <austin.schuh@bluerivertech.com>
diff --git a/src/binary_annotator.cpp b/src/binary_annotator.cpp
new file mode 100644
index 0000000..dd0b454
--- /dev/null
+++ b/src/binary_annotator.cpp
@@ -0,0 +1,1419 @@
+#include "binary_annotator.h"
+
+#include <limits>
+#include <string>
+#include <vector>
+
+#include "flatbuffers/reflection.h"
+#include "flatbuffers/verifier.h"
+
+namespace flatbuffers {
+namespace {
+
+static bool BinaryRegionSort(const BinaryRegion &a, const BinaryRegion &b) {
+ return a.offset < b.offset;
+}
+
+static void SetError(BinaryRegionComment &comment, BinaryRegionStatus status,
+ std::string message = "") {
+ comment.status = status;
+ comment.status_message = message;
+}
+
+static BinaryRegion MakeBinaryRegion(
+ const uint64_t offset = 0, const uint64_t length = 0,
+ const BinaryRegionType type = BinaryRegionType::Unknown,
+ const uint64_t array_length = 0, const uint64_t points_to_offset = 0,
+ const BinaryRegionComment comment = {}) {
+ BinaryRegion region;
+ region.offset = offset;
+ region.length = length;
+ region.type = type;
+ region.array_length = array_length;
+ region.points_to_offset = points_to_offset;
+ region.comment = std::move(comment);
+ return region;
+}
+
+static BinarySection MakeBinarySection(
+ const std::string &name, const BinarySectionType type,
+ const std::vector<BinaryRegion> regions) {
+ BinarySection section;
+ section.name = name;
+ section.type = type;
+ section.regions = std::move(regions);
+ return section;
+}
+
+static BinarySection MakeSingleRegionBinarySection(const std::string &name,
+ const BinarySectionType type,
+ const BinaryRegion ®ion) {
+ std::vector<BinaryRegion> regions;
+ regions.push_back(region);
+ return MakeBinarySection(name, type, std::move(regions));
+}
+
+static bool IsNonZeroRegion(const uint64_t offset, const uint64_t length,
+ const uint8_t *const binary) {
+ for (uint64_t i = offset; i < offset + length; ++i) {
+ if (binary[i] != 0) { return true; }
+ }
+ return false;
+}
+
+static bool IsPrintableRegion(const uint64_t offset, const uint64_t length,
+ const uint8_t *const binary) {
+ for (uint64_t i = offset; i < offset + length; ++i) {
+ if (!isprint(binary[i])) { return false; }
+ }
+ return true;
+}
+
+static BinarySection GenerateMissingSection(const uint64_t offset,
+ const uint64_t length,
+ const uint8_t *const binary) {
+ std::vector<BinaryRegion> regions;
+
+ // Check if the region is all zeros or not, as that can tell us if it is
+ // padding or not.
+ if (IsNonZeroRegion(offset, length, binary)) {
+ // Some of the padding bytes are non-zero, so this might be an unknown
+ // section of the binary.
+ // TODO(dbaileychess): We could be a bit smarter with different sized
+ // alignments. For now, the 8 byte check encompasses all the smaller
+ // alignments.
+ BinaryRegionComment comment;
+ comment.type = BinaryRegionCommentType::Unknown;
+ if (length >= 8) {
+ SetError(comment, BinaryRegionStatus::WARN_NO_REFERENCES);
+ } else {
+ SetError(comment, BinaryRegionStatus::WARN_CORRUPTED_PADDING);
+ }
+
+ regions.push_back(MakeBinaryRegion(offset, length * sizeof(uint8_t),
+ BinaryRegionType::Unknown, length, 0,
+ comment));
+
+ return MakeBinarySection("no known references", BinarySectionType::Unknown,
+ std::move(regions));
+ }
+
+ BinaryRegionComment comment;
+ comment.type = BinaryRegionCommentType::Padding;
+ if (length >= 8) {
+ SetError(comment, BinaryRegionStatus::WARN_PADDING_LENGTH);
+ }
+
+ // This region is most likely padding.
+ regions.push_back(MakeBinaryRegion(offset, length * sizeof(uint8_t),
+ BinaryRegionType::Uint8, length, 0,
+ comment));
+
+ return MakeBinarySection("", BinarySectionType::Padding, std::move(regions));
+}
+
+} // namespace
+
+std::map<uint64_t, BinarySection> BinaryAnnotator::Annotate() {
+ flatbuffers::Verifier verifier(bfbs_, static_cast<size_t>(bfbs_length_));
+ if (!reflection::VerifySchemaBuffer(verifier)) { return {}; }
+
+ // The binary is too short to read as a flatbuffers.
+ // TODO(dbaileychess): We could spit out the annotated buffer sections, but
+ // I'm not sure if it is worth it.
+ if (binary_length_ < 4) { return {}; }
+
+ // Make sure we start with a clean slate.
+ vtables_.clear();
+ sections_.clear();
+
+ // First parse the header region which always start at offset 0.
+ // The returned offset will point to the root_table location.
+ const uint64_t root_table_offset = BuildHeader(0);
+
+ if (IsValidOffset(root_table_offset)) {
+ // Build the root table, and all else will be referenced from it.
+ BuildTable(root_table_offset, BinarySectionType::RootTable,
+ schema_->root_table());
+ }
+
+ // Now that all the sections are built, make sure the binary sections are
+ // contiguous.
+ FixMissingRegions();
+
+ // Then scan the area between BinarySections insert padding sections that are
+ // implied.
+ FixMissingSections();
+
+ return sections_;
+}
+
+uint64_t BinaryAnnotator::BuildHeader(const uint64_t header_offset) {
+ const auto root_table_offset = ReadScalar<uint32_t>(header_offset);
+
+ if (!root_table_offset.has_value()) {
+ // This shouldn't occur, since we validate the min size of the buffer
+ // before. But for completion sake, we shouldn't read passed the binary end.
+ return std::numeric_limits<uint64_t>::max();
+ }
+
+ std::vector<BinaryRegion> regions;
+ uint64_t offset = header_offset;
+ // TODO(dbaileychess): sized prefixed value
+
+ BinaryRegionComment root_offset_comment;
+ root_offset_comment.type = BinaryRegionCommentType::RootTableOffset;
+ root_offset_comment.name = schema_->root_table()->name()->str();
+
+ if (!IsValidOffset(root_table_offset.value())) {
+ SetError(root_offset_comment,
+ BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
+ }
+
+ regions.push_back(
+ MakeBinaryRegion(offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
+ root_table_offset.value(), root_offset_comment));
+ offset += sizeof(uint32_t);
+
+ if (IsValidRead(offset, flatbuffers::kFileIdentifierLength) &&
+ IsPrintableRegion(offset, flatbuffers::kFileIdentifierLength, binary_)) {
+ BinaryRegionComment comment;
+ comment.type = BinaryRegionCommentType::FileIdentifier;
+ // Check if the file identifier region has non-zero data, and assume its
+ // the file identifier. Otherwise, it will get filled in with padding
+ // later.
+ regions.push_back(MakeBinaryRegion(
+ offset, flatbuffers::kFileIdentifierLength * sizeof(uint8_t),
+ BinaryRegionType::Char, flatbuffers::kFileIdentifierLength, 0,
+ comment));
+ }
+
+ AddSection(header_offset, MakeBinarySection("", BinarySectionType::Header,
+ std::move(regions)));
+
+ return root_table_offset.value();
+}
+
+void BinaryAnnotator::BuildVTable(const uint64_t vtable_offset,
+ const reflection::Object *const table,
+ const uint64_t offset_of_referring_table) {
+ // First see if we have used this vtable before, if so skip building it again.
+ auto it = vtables_.find(vtable_offset);
+ if (it != vtables_.end()) { return; }
+
+ if (ContainsSection(vtable_offset)) { return; }
+
+ BinaryRegionComment vtable_size_comment;
+ vtable_size_comment.type = BinaryRegionCommentType::VTableSize;
+
+ const auto vtable_length = ReadScalar<uint16_t>(vtable_offset);
+ if (!vtable_length.has_value()) {
+ const uint64_t remaining = RemainingBytes(vtable_offset);
+
+ SetError(vtable_size_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
+ "2");
+
+ AddSection(vtable_offset,
+ MakeSingleRegionBinarySection(
+ table->name()->str(), BinarySectionType::VTable,
+ MakeBinaryRegion(vtable_offset, remaining,
+ BinaryRegionType::Unknown, remaining, 0,
+ vtable_size_comment)));
+ return;
+ }
+
+ // Vtables start with the size of the vtable
+ const uint16_t vtable_size = vtable_length.value();
+
+ if (!IsValidOffset(vtable_offset + vtable_size - 1)) {
+ SetError(vtable_size_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_LONG);
+ // The vtable_size points to off the end of the binary.
+ AddSection(vtable_offset,
+ MakeSingleRegionBinarySection(
+ table->name()->str(), BinarySectionType::VTable,
+ MakeBinaryRegion(vtable_offset, sizeof(uint16_t),
+ BinaryRegionType::Uint16, 0, 0,
+ vtable_size_comment)));
+
+ return;
+ } else if (vtable_size < 2 * sizeof(uint16_t)) {
+ SetError(vtable_size_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_SHORT,
+ "4");
+ // The size includes itself and the table size which are both uint16_t.
+ AddSection(vtable_offset,
+ MakeSingleRegionBinarySection(
+ table->name()->str(), BinarySectionType::VTable,
+ MakeBinaryRegion(vtable_offset, sizeof(uint16_t),
+ BinaryRegionType::Uint16, 0, 0,
+ vtable_size_comment)));
+ return;
+ }
+
+ std::vector<BinaryRegion> regions;
+
+ regions.push_back(MakeBinaryRegion(vtable_offset, sizeof(uint16_t),
+ BinaryRegionType::Uint16, 0, 0,
+ vtable_size_comment));
+ uint64_t offset = vtable_offset + sizeof(uint16_t);
+
+ BinaryRegionComment ref_table_len_comment;
+ ref_table_len_comment.type =
+ BinaryRegionCommentType::VTableRefferingTableLength;
+
+ // Ensure we can read the next uint16_t field, which is the size of the
+ // referring table.
+ const auto table_length = ReadScalar<uint16_t>(offset);
+
+ if (!table_length.has_value()) {
+ const uint64_t remaining = RemainingBytes(offset);
+ SetError(ref_table_len_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
+ "2");
+
+ AddSection(offset, MakeSingleRegionBinarySection(
+ table->name()->str(), BinarySectionType::VTable,
+ MakeBinaryRegion(
+ offset, remaining, BinaryRegionType::Unknown,
+ remaining, 0, ref_table_len_comment)));
+ return;
+ }
+
+ // Then they have the size of the table they reference.
+ const uint16_t table_size = table_length.value();
+
+ if (!IsValidOffset(offset_of_referring_table + table_size - 1)) {
+ SetError(ref_table_len_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_LONG);
+ } else if (table_size < 4) {
+ SetError(ref_table_len_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_SHORT,
+ "4");
+ }
+
+ regions.push_back(MakeBinaryRegion(offset, sizeof(uint16_t),
+ BinaryRegionType::Uint16, 0, 0,
+ ref_table_len_comment));
+ offset += sizeof(uint16_t);
+
+ const uint64_t offset_start = offset;
+
+ // A mapping between field (and its id) to the relative offset (uin16_t) from
+ // the start of the table.
+ std::map<uint16_t, VTable::Entry> fields;
+
+ // Counter for determining if the binary has more vtable entries than the
+ // schema provided. This can occur if the binary was created at a newer schema
+ // version and is being processed with an older one.
+ uint16_t fields_processed = 0;
+
+ // Loop over all the fields.
+ ForAllFields(table, /*reverse=*/false, [&](const reflection::Field *field) {
+ const uint64_t field_offset = offset_start + field->id() * sizeof(uint16_t);
+
+ if (field_offset >= vtable_offset + vtable_size) {
+ // This field_offset is too large for this vtable, so it must come from a
+ // newer schema than the binary was create with or the binary writer did
+ // not write it. For either case, it is safe to ignore.
+
+ // TODO(dbaileychess): We could show which fields are not set an their
+ // default values if we want. We just need a way to make it obvious that
+ // it isn't part of the buffer.
+ return;
+ }
+
+ BinaryRegionComment field_comment;
+ field_comment.type = BinaryRegionCommentType::VTableFieldOffset;
+ field_comment.name = std::string(field->name()->c_str()) +
+ "` (id: " + std::to_string(field->id()) + ")";
+
+ const auto offset_from_table = ReadScalar<uint16_t>(field_offset);
+
+ if (!offset_from_table.has_value()) {
+ const uint64_t remaining = RemainingBytes(field_offset);
+
+ SetError(field_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "2");
+ regions.push_back(MakeBinaryRegion(field_offset, remaining,
+ BinaryRegionType::Unknown, remaining,
+ 0, field_comment));
+
+ return;
+ }
+
+ if (!IsValidOffset(offset_of_referring_table + offset_from_table.value() -
+ 1)) {
+ SetError(field_comment, BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
+ regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t),
+ BinaryRegionType::VOffset, 0, 0,
+ field_comment));
+ return;
+ }
+
+ VTable::Entry entry;
+ entry.field = field;
+ entry.offset_from_table = offset_from_table.value();
+ fields.insert(std::make_pair(field->id(), entry));
+
+ std::string default_label;
+ if (offset_from_table.value() == 0) {
+ // Not present, so could be default or be optional.
+ if (field->required()) {
+ SetError(field_comment,
+ BinaryRegionStatus::ERROR_REQUIRED_FIELD_NOT_PRESENT);
+ // If this is a required field, make it known this is an error.
+ regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t),
+ BinaryRegionType::VOffset, 0, 0,
+ field_comment));
+ return;
+ } else {
+ // Its an optional field, so get the default value and interpret and
+ // provided an annotation for it.
+ if (IsScalar(field->type()->base_type())) {
+ default_label += "<defaults to ";
+ default_label += IsFloat(field->type()->base_type())
+ ? std::to_string(field->default_real())
+ : std::to_string(field->default_integer());
+ default_label += "> (";
+ } else {
+ default_label += "<null> (";
+ }
+ default_label +=
+ reflection::EnumNameBaseType(field->type()->base_type());
+ default_label += ")";
+ }
+ }
+ field_comment.default_value = default_label;
+
+ regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t),
+ BinaryRegionType::VOffset, 0, 0,
+ field_comment));
+
+ fields_processed++;
+ });
+
+ // Check if we covered all the expectant fields. If not, we need to add them
+ // as unknown fields.
+ uint16_t expectant_vtable_fields =
+ (vtable_size - sizeof(uint16_t) - sizeof(uint16_t)) / sizeof(uint16_t);
+
+ // Prevent a bad binary from declaring a really large vtable_size, that we can
+ // not indpendently verify.
+ expectant_vtable_fields = std::min(
+ static_cast<uint16_t>(fields_processed * 3), expectant_vtable_fields);
+
+ for (uint16_t id = fields_processed; id < expectant_vtable_fields; ++id) {
+ const uint64_t field_offset = offset_start + id * sizeof(uint16_t);
+
+ const auto offset_from_table = ReadScalar<uint16_t>(field_offset);
+
+ BinaryRegionComment field_comment;
+ field_comment.type = BinaryRegionCommentType::VTableUnknownFieldOffset;
+ field_comment.index = id;
+
+ if (!offset_from_table.has_value()) {
+ const uint64_t remaining = RemainingBytes(field_offset);
+ SetError(field_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "2");
+ regions.push_back(MakeBinaryRegion(field_offset, remaining,
+ BinaryRegionType::Unknown, remaining,
+ 0, field_comment));
+ continue;
+ }
+
+ VTable::Entry entry;
+ entry.field = nullptr; // No field to reference.
+ entry.offset_from_table = offset_from_table.value();
+ fields.insert(std::make_pair(id, entry));
+
+ regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t),
+ BinaryRegionType::VOffset, 0, 0,
+ field_comment));
+ }
+
+ sections_[vtable_offset] = MakeBinarySection(
+ table->name()->str(), BinarySectionType::VTable, std::move(regions));
+
+ VTable vtable;
+ vtable.fields = std::move(fields);
+ vtable.table_size = table_size;
+ vtable.vtable_size = vtable_size;
+
+ vtables_[vtable_offset] = vtable;
+}
+
+void BinaryAnnotator::BuildTable(const uint64_t table_offset,
+ const BinarySectionType type,
+ const reflection::Object *const table) {
+ if (ContainsSection(table_offset)) { return; }
+
+ BinaryRegionComment vtable_offset_comment;
+ vtable_offset_comment.type = BinaryRegionCommentType::TableVTableOffset;
+
+ const auto vtable_soffset = ReadScalar<int32_t>(table_offset);
+
+ if (!vtable_soffset.has_value()) {
+ const uint64_t remaining = RemainingBytes(table_offset);
+ SetError(vtable_offset_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
+ "4");
+
+ AddSection(
+ table_offset,
+ MakeSingleRegionBinarySection(
+ table->name()->str(), type,
+ MakeBinaryRegion(table_offset, remaining, BinaryRegionType::Unknown,
+ remaining, 0, vtable_offset_comment)));
+
+ // If there aren't enough bytes left to read the vtable offset, there is
+ // nothing we can do.
+ return;
+ }
+
+ // Tables start with the vtable
+ const uint64_t vtable_offset = table_offset - vtable_soffset.value();
+
+ if (!IsValidOffset(vtable_offset)) {
+ SetError(vtable_offset_comment,
+ BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
+
+ AddSection(table_offset,
+ MakeSingleRegionBinarySection(
+ table->name()->str(), type,
+ MakeBinaryRegion(table_offset, sizeof(int32_t),
+ BinaryRegionType::SOffset, 0, vtable_offset,
+ vtable_offset_comment)));
+
+ // There isn't much to do with an invalid vtable offset, as we won't be able
+ // to intepret the rest of the table fields.
+ return;
+ }
+
+ std::vector<BinaryRegion> regions;
+ regions.push_back(MakeBinaryRegion(table_offset, sizeof(int32_t),
+ BinaryRegionType::SOffset, 0,
+ vtable_offset, vtable_offset_comment));
+
+ // Parse the vtable first so we know what the rest of the fields in the table
+ // are.
+ BuildVTable(vtable_offset, table, table_offset);
+
+ auto vtable_entry = vtables_.find(vtable_offset);
+ if (vtable_entry == vtables_.end()) {
+ // There is no valid vtable for this table, so we cannot process the rest of
+ // the table entries.
+ return;
+ }
+
+ const VTable &vtable = vtable_entry->second;
+
+ // This is the size and length of this table.
+ const uint16_t table_size = vtable.table_size;
+ uint64_t table_end_offset = table_offset + table_size;
+
+ if (!IsValidOffset(table_end_offset - 1)) {
+ // We already validated the table size in BuildVTable, but we have to make
+ // sure we don't use a bad value here.
+ table_end_offset = binary_length_;
+ }
+
+ // We need to iterate over the vtable fields by their offset in the binary,
+ // not by their IDs. So copy them over to another vector that we can sort on
+ // the offset_from_table property.
+ std::vector<VTable::Entry> fields;
+ for (const auto &vtable_field : vtable.fields) {
+ fields.push_back(vtable_field.second);
+ }
+
+ std::stable_sort(fields.begin(), fields.end(),
+ [](const VTable::Entry &a, const VTable::Entry &b) {
+ return a.offset_from_table < b.offset_from_table;
+ });
+
+ // Iterate over all the fields by order of their offset.
+ for (size_t i = 0; i < fields.size(); ++i) {
+ const reflection::Field *field = fields[i].field;
+ const uint16_t offset_from_table = fields[i].offset_from_table;
+
+ if (offset_from_table == 0) {
+ // Skip non-present fields.
+ continue;
+ }
+
+ // The field offsets are relative to the start of the table.
+ const uint64_t field_offset = table_offset + offset_from_table;
+
+ if (!IsValidOffset(field_offset)) {
+ // The field offset is larger than the binary, nothing we can do.
+ continue;
+ }
+
+ // We have a vtable entry for a non-existant field, that means its a binary
+ // generated by a newer schema than we are currently processing.
+ if (field == nullptr) {
+ // Calculate the length of this unknown field.
+ const uint64_t unknown_field_length =
+ // Check if there is another unknown field after this one.
+ ((i + 1 < fields.size())
+ ? table_offset + fields[i + 1].offset_from_table
+ // Otherwise use the known end of the table.
+ : table_end_offset) -
+ field_offset;
+
+ if (unknown_field_length == 0) { continue; }
+
+ std::string hint;
+
+ if (unknown_field_length == 4) {
+ const auto relative_offset = ReadScalar<uint32_t>(field_offset);
+ if (relative_offset.has_value()) {
+ // The field is 4 in length, so it could be an offset? Provide a hint.
+ hint += "<possibly an offset? Check Loc: +0x";
+ hint += ToHex(field_offset + relative_offset.value());
+ hint += ">";
+ }
+ }
+
+ BinaryRegionComment unknown_field_comment;
+ unknown_field_comment.type = BinaryRegionCommentType::TableUnknownField;
+
+ if (!IsValidRead(field_offset, unknown_field_length)) {
+ const uint64_t remaining = RemainingBytes(field_offset);
+
+ SetError(unknown_field_comment,
+ BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
+ std::to_string(unknown_field_length));
+
+ regions.push_back(MakeBinaryRegion(field_offset, remaining,
+ BinaryRegionType::Unknown, remaining,
+ 0, unknown_field_comment));
+ continue;
+ }
+
+ unknown_field_comment.default_value = hint;
+
+ regions.push_back(MakeBinaryRegion(
+ field_offset, unknown_field_length, BinaryRegionType::Unknown,
+ unknown_field_length, 0, unknown_field_comment));
+ continue;
+ }
+
+ if (IsScalar(field->type()->base_type())) {
+ // These are the raw values store in the table.
+ const uint64_t type_size = GetTypeSize(field->type()->base_type());
+ const BinaryRegionType region_type =
+ GetRegionType(field->type()->base_type());
+
+ BinaryRegionComment scalar_field_comment;
+ scalar_field_comment.type = BinaryRegionCommentType::TableField;
+ scalar_field_comment.name =
+ std::string(field->name()->c_str()) + "` (" +
+ reflection::EnumNameBaseType(field->type()->base_type()) + ")";
+
+ if (!IsValidRead(field_offset, type_size)) {
+ const uint64_t remaining = RemainingBytes(field_offset);
+ SetError(scalar_field_comment,
+ BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
+ std::to_string(type_size));
+
+ regions.push_back(MakeBinaryRegion(field_offset, remaining,
+ BinaryRegionType::Unknown, remaining,
+ 0, scalar_field_comment));
+ continue;
+ }
+
+ if (IsUnionType(field)) {
+ // This is a type for a union. Validate the value
+ const auto enum_value = ReadScalar<uint8_t>(field_offset);
+
+ // This should always have a value, due to the IsValidRead check above.
+ if (!IsValidUnionValue(field, enum_value.value())) {
+ SetError(scalar_field_comment,
+ BinaryRegionStatus::ERROR_INVALID_UNION_TYPE);
+
+ regions.push_back(MakeBinaryRegion(field_offset, type_size,
+ region_type, 0, 0,
+ scalar_field_comment));
+ continue;
+ }
+ }
+
+ regions.push_back(MakeBinaryRegion(field_offset, type_size, region_type,
+ 0, 0, scalar_field_comment));
+ continue;
+ }
+
+ // Read the offset
+ const auto offset_from_field = ReadScalar<uint32_t>(field_offset);
+ uint64_t offset_of_next_item = 0;
+ BinaryRegionComment offset_field_comment;
+ offset_field_comment.type = BinaryRegionCommentType::TableOffsetField;
+ offset_field_comment.name = field->name()->c_str();
+ const std::string offset_prefix =
+ "offset to field `" + std::string(field->name()->c_str()) + "`";
+
+ // Validate any field that isn't inline (i.e., non-structs).
+ if (!IsInlineField(field)) {
+ if (!offset_from_field.has_value()) {
+ const uint64_t remaining = RemainingBytes(field_offset);
+
+ SetError(offset_field_comment,
+ BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4");
+
+ regions.push_back(MakeBinaryRegion(field_offset, remaining,
+ BinaryRegionType::Unknown, remaining,
+ 0, offset_field_comment));
+ continue;
+ }
+
+ offset_of_next_item = field_offset + offset_from_field.value();
+
+ if (!IsValidOffset(offset_of_next_item)) {
+ SetError(offset_field_comment,
+ BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
+ regions.push_back(MakeBinaryRegion(
+ field_offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
+ offset_of_next_item, offset_field_comment));
+ continue;
+ }
+ }
+
+ switch (field->type()->base_type()) {
+ case reflection::BaseType::Obj: {
+ const reflection::Object *next_object =
+ schema_->objects()->Get(field->type()->index());
+
+ if (next_object->is_struct()) {
+ // Structs are stored inline.
+ BuildStruct(field_offset, regions, next_object);
+ } else {
+ offset_field_comment.default_value = "(table)";
+
+ regions.push_back(MakeBinaryRegion(
+ field_offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
+ offset_of_next_item, offset_field_comment));
+
+ BuildTable(offset_of_next_item, BinarySectionType::Table,
+ next_object);
+ }
+ } break;
+
+ case reflection::BaseType::String: {
+ offset_field_comment.default_value = "(string)";
+ regions.push_back(MakeBinaryRegion(
+ field_offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
+ offset_of_next_item, offset_field_comment));
+ BuildString(offset_of_next_item, table, field);
+ } break;
+
+ case reflection::BaseType::Vector: {
+ offset_field_comment.default_value = "(vector)";
+ regions.push_back(MakeBinaryRegion(
+ field_offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
+ offset_of_next_item, offset_field_comment));
+ BuildVector(offset_of_next_item, table, field, table_offset, vtable);
+ } break;
+
+ case reflection::BaseType::Union: {
+ const uint64_t union_offset = offset_of_next_item;
+
+ // The union type field is always one less than the union itself.
+ const uint16_t union_type_id = field->id() - 1;
+
+ auto vtable_field = vtable.fields.find(union_type_id);
+ if (vtable_field == vtable.fields.end()) {
+ // TODO(dbaileychess): need to capture this error condition.
+ break;
+ }
+ offset_field_comment.default_value = "(union)";
+
+ const uint64_t type_offset =
+ table_offset + vtable_field->second.offset_from_table;
+
+ const auto realized_type = ReadScalar<uint8_t>(type_offset);
+ if (!realized_type.has_value()) {
+ const uint64_t remaining = RemainingBytes(type_offset);
+ SetError(offset_field_comment,
+ BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "1");
+ regions.push_back(MakeBinaryRegion(
+ type_offset, remaining, BinaryRegionType::Unknown, remaining, 0,
+ offset_field_comment));
+ continue;
+ }
+
+ if (!IsValidUnionValue(field, realized_type.value())) {
+ // We already export an error in the union type field, so just skip
+ // building the union itself and it will default to an unreference
+ // Binary section.
+ continue;
+ }
+
+ const std::string enum_type =
+ BuildUnion(union_offset, realized_type.value(), field);
+
+ offset_field_comment.default_value =
+ "(union of type `" + enum_type + "`)";
+
+ regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint32_t),
+ BinaryRegionType::UOffset, 0,
+ union_offset, offset_field_comment));
+
+ } break;
+
+ default: break;
+ }
+ }
+
+ // Handle the case where there is padding after the last known binary
+ // region. Calculate where we left off towards the expected end of the
+ // table.
+ const uint64_t i = regions.back().offset + regions.back().length + 1;
+
+ if (i < table_end_offset) {
+ const uint64_t pad_bytes = table_end_offset - i + 1;
+
+ BinaryRegionComment padding_comment;
+ padding_comment.type = BinaryRegionCommentType::Padding;
+
+ regions.push_back(MakeBinaryRegion(i - 1, pad_bytes * sizeof(uint8_t),
+ BinaryRegionType::Uint8, pad_bytes, 0,
+ padding_comment));
+ }
+
+ AddSection(table_offset,
+ MakeBinarySection(table->name()->str(), type, std::move(regions)));
+}
+
+uint64_t BinaryAnnotator::BuildStruct(const uint64_t struct_offset,
+ std::vector<BinaryRegion> ®ions,
+ const reflection::Object *const object) {
+ if (!object->is_struct()) { return struct_offset; }
+ uint64_t offset = struct_offset;
+
+ // Loop over all the fields in increasing order
+ ForAllFields(object, /*reverse=*/false, [&](const reflection::Field *field) {
+ if (IsScalar(field->type()->base_type())) {
+ // Structure Field value
+ const uint64_t type_size = GetTypeSize(field->type()->base_type());
+ const BinaryRegionType region_type =
+ GetRegionType(field->type()->base_type());
+
+ BinaryRegionComment comment;
+ comment.type = BinaryRegionCommentType::StructField;
+ comment.name =
+ std::string(object->name()->c_str()) + "." + field->name()->c_str();
+ comment.default_value = "(" +
+ std::string(reflection::EnumNameBaseType(
+ field->type()->base_type())) +
+ ")";
+
+ if (!IsValidRead(offset, type_size)) {
+ const uint64_t remaining = RemainingBytes(offset);
+ SetError(comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
+ std::to_string(type_size));
+ regions.push_back(MakeBinaryRegion(offset, remaining,
+ BinaryRegionType::Unknown, remaining,
+ 0, comment));
+
+ // TODO(dbaileychess): Should I bail out here? This sets offset to the
+ // end of the binary. So all other reads in the loop should fail.
+ offset += remaining;
+ return;
+ }
+
+ regions.push_back(
+ MakeBinaryRegion(offset, type_size, region_type, 0, 0, comment));
+ offset += type_size;
+ } else if (field->type()->base_type() == reflection::BaseType::Obj) {
+ // Structs are stored inline, even when nested.
+ offset = BuildStruct(offset, regions,
+ schema_->objects()->Get(field->type()->index()));
+ } else if (field->type()->base_type() == reflection::BaseType::Array) {
+ const bool is_scalar = IsScalar(field->type()->element());
+ const uint64_t type_size = GetTypeSize(field->type()->element());
+ const BinaryRegionType region_type =
+ GetRegionType(field->type()->element());
+
+ // Arrays are just repeated structures.
+ for (uint16_t i = 0; i < field->type()->fixed_length(); ++i) {
+ if (is_scalar) {
+ BinaryRegionComment array_comment;
+ array_comment.type = BinaryRegionCommentType::ArrayField;
+ array_comment.name = std::string(object->name()->c_str()) + "." +
+ field->name()->c_str();
+ array_comment.index = i;
+ array_comment.default_value =
+ "(" +
+ std::string(
+ reflection::EnumNameBaseType(field->type()->element())) +
+ ")";
+
+ if (!IsValidRead(offset, type_size)) {
+ const uint64_t remaining = RemainingBytes(offset);
+
+ SetError(array_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
+ std::to_string(type_size));
+
+ regions.push_back(MakeBinaryRegion(offset, remaining,
+ BinaryRegionType::Unknown,
+ remaining, 0, array_comment));
+
+ // TODO(dbaileychess): Should I bail out here? This sets offset to
+ // the end of the binary. So all other reads in the loop should
+ // fail.
+ offset += remaining;
+ break;
+ }
+
+ regions.push_back(MakeBinaryRegion(offset, type_size, region_type, 0,
+ 0, array_comment));
+
+ offset += type_size;
+ } else {
+ // Array of Structs.
+ //
+ // TODO(dbaileychess): This works, but the comments on the fields lose
+ // some context. Need to figure a way how to plumb the nested arrays
+ // comments together that isn't too confusing.
+ offset = BuildStruct(offset, regions,
+ schema_->objects()->Get(field->type()->index()));
+ }
+ }
+ }
+
+ // Insert any padding after this field.
+ const uint16_t padding = field->padding();
+ if (padding > 0 && IsValidOffset(offset + padding)) {
+ BinaryRegionComment padding_comment;
+ padding_comment.type = BinaryRegionCommentType::Padding;
+
+ regions.push_back(MakeBinaryRegion(offset, padding,
+ BinaryRegionType::Uint8, padding, 0,
+ padding_comment));
+ offset += padding;
+ }
+ });
+
+ return offset;
+}
+
+void BinaryAnnotator::BuildString(const uint64_t string_offset,
+ const reflection::Object *const table,
+ const reflection::Field *const field) {
+ // Check if we have already generated this string section, and this is a
+ // shared string instance.
+ if (ContainsSection(string_offset)) { return; }
+
+ std::vector<BinaryRegion> regions;
+ const auto string_length = ReadScalar<uint32_t>(string_offset);
+
+ BinaryRegionComment string_length_comment;
+ string_length_comment.type = BinaryRegionCommentType::StringLength;
+
+ if (!string_length.has_value()) {
+ const uint64_t remaining = RemainingBytes(string_offset);
+
+ SetError(string_length_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
+ "4");
+
+ regions.push_back(MakeBinaryRegion(string_offset, remaining,
+ BinaryRegionType::Unknown, remaining, 0,
+ string_length_comment));
+
+ } else {
+ const uint32_t string_size = string_length.value();
+ const uint64_t string_end =
+ string_offset + sizeof(uint32_t) + string_size + sizeof(char);
+
+ if (!IsValidOffset(string_end - 1)) {
+ SetError(string_length_comment,
+ BinaryRegionStatus::ERROR_LENGTH_TOO_LONG);
+
+ regions.push_back(MakeBinaryRegion(string_offset, sizeof(uint32_t),
+ BinaryRegionType::Uint32, 0, 0,
+ string_length_comment));
+ } else {
+ regions.push_back(MakeBinaryRegion(string_offset, sizeof(uint32_t),
+ BinaryRegionType::Uint32, 0, 0,
+ string_length_comment));
+
+ BinaryRegionComment string_comment;
+ string_comment.type = BinaryRegionCommentType::StringValue;
+
+ regions.push_back(MakeBinaryRegion(string_offset + sizeof(uint32_t),
+ string_size, BinaryRegionType::Char,
+ string_size, 0, string_comment));
+
+ BinaryRegionComment string_terminator_comment;
+ string_terminator_comment.type =
+ BinaryRegionCommentType::StringTerminator;
+
+ regions.push_back(MakeBinaryRegion(
+ string_offset + sizeof(uint32_t) + string_size, sizeof(char),
+ BinaryRegionType::Char, 0, 0, string_terminator_comment));
+ }
+ }
+
+ AddSection(string_offset,
+ MakeBinarySection(std::string(table->name()->c_str()) + "." +
+ field->name()->c_str(),
+ BinarySectionType::String, std::move(regions)));
+}
+
+void BinaryAnnotator::BuildVector(const uint64_t vector_offset,
+ const reflection::Object *const table,
+ const reflection::Field *const field,
+ const uint64_t parent_table_offset,
+ const VTable &vtable) {
+ if (ContainsSection(vector_offset)) { return; }
+
+ BinaryRegionComment vector_length_comment;
+ vector_length_comment.type = BinaryRegionCommentType::VectorLength;
+
+ const auto vector_length = ReadScalar<uint32_t>(vector_offset);
+ if (!vector_length.has_value()) {
+ const uint64_t remaining = RemainingBytes(vector_offset);
+ SetError(vector_length_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
+ "4");
+
+ AddSection(
+ vector_offset,
+ MakeSingleRegionBinarySection(
+ std::string(table->name()->c_str()) + "." + field->name()->c_str(),
+ BinarySectionType::Vector,
+ MakeBinaryRegion(vector_offset, remaining,
+ BinaryRegionType::Unknown, remaining, 0,
+ vector_length_comment)));
+ return;
+ }
+
+ // Validate there are enough bytes left in the binary to process all the
+ // items.
+ const uint64_t last_item_offset =
+ vector_offset + sizeof(uint32_t) +
+ vector_length.value() * GetElementSize(field);
+
+ if (!IsValidOffset(last_item_offset - 1)) {
+ SetError(vector_length_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_LONG);
+ AddSection(
+ vector_offset,
+ MakeSingleRegionBinarySection(
+ std::string(table->name()->c_str()) + "." + field->name()->c_str(),
+ BinarySectionType::Vector,
+ MakeBinaryRegion(vector_offset, sizeof(uint32_t),
+ BinaryRegionType::Uint32, 0, 0,
+ vector_length_comment)));
+
+ return;
+ }
+
+ std::vector<BinaryRegion> regions;
+
+ regions.push_back(MakeBinaryRegion(vector_offset, sizeof(uint32_t),
+ BinaryRegionType::Uint32, 0, 0,
+ vector_length_comment));
+
+ uint64_t offset = vector_offset + sizeof(uint32_t);
+
+ switch (field->type()->element()) {
+ case reflection::BaseType::Obj: {
+ const reflection::Object *object =
+ schema_->objects()->Get(field->type()->index());
+
+ if (object->is_struct()) {
+ // Vector of structs
+ for (size_t i = 0; i < vector_length.value(); ++i) {
+ // Structs are inline to the vector.
+ const uint64_t next_offset = BuildStruct(offset, regions, object);
+ if (next_offset == offset) { break; }
+ offset = next_offset;
+ }
+ } else {
+ // Vector of objects
+ for (size_t i = 0; i < vector_length.value(); ++i) {
+ BinaryRegionComment vector_object_comment;
+ vector_object_comment.type =
+ BinaryRegionCommentType::VectorTableValue;
+ vector_object_comment.index = i;
+
+ const auto table_relative_offset = ReadScalar<uint32_t>(offset);
+ if (!table_relative_offset.has_value()) {
+ const uint64_t remaining = RemainingBytes(offset);
+ SetError(vector_object_comment,
+ BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4");
+
+ regions.push_back(
+ MakeBinaryRegion(offset, remaining, BinaryRegionType::Unknown,
+ remaining, 0, vector_object_comment));
+ break;
+ }
+
+ // The table offset is relative from the offset location itself.
+ const uint64_t table_offset = offset + table_relative_offset.value();
+
+ if (!IsValidOffset(table_offset)) {
+ SetError(vector_object_comment,
+ BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
+ regions.push_back(MakeBinaryRegion(
+ offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
+ table_offset, vector_object_comment));
+
+ offset += sizeof(uint32_t);
+ continue;
+ }
+
+ if (table_offset == parent_table_offset) {
+ SetError(vector_object_comment,
+ BinaryRegionStatus::ERROR_CYCLE_DETECTED);
+ // A cycle detected where a table vector field is pointing to
+ // itself. This should only happen in corrupted files.
+ regions.push_back(MakeBinaryRegion(
+ offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
+ table_offset, vector_object_comment));
+
+ offset += sizeof(uint32_t);
+ continue;
+ }
+
+ regions.push_back(MakeBinaryRegion(
+ offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
+ table_offset, vector_object_comment));
+
+ offset += sizeof(uint32_t);
+
+ BuildTable(table_offset, BinarySectionType::Table, object);
+ }
+ }
+ } break;
+ case reflection::BaseType::String: {
+ // Vector of strings
+ for (size_t i = 0; i < vector_length.value(); ++i) {
+ BinaryRegionComment vector_object_comment;
+ vector_object_comment.type = BinaryRegionCommentType::VectorStringValue;
+ vector_object_comment.index = i;
+
+ const auto string_relative_offset = ReadScalar<uint32_t>(offset);
+ if (!string_relative_offset.has_value()) {
+ const uint64_t remaining = RemainingBytes(offset);
+
+ SetError(vector_object_comment,
+ BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4");
+
+ regions.push_back(
+ MakeBinaryRegion(offset, remaining, BinaryRegionType::Unknown,
+ remaining, 0, vector_object_comment));
+ break;
+ }
+
+ // The string offset is relative from the offset location itself.
+ const uint64_t string_offset = offset + string_relative_offset.value();
+
+ if (!IsValidOffset(string_offset)) {
+ SetError(vector_object_comment,
+ BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
+ regions.push_back(MakeBinaryRegion(
+ offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
+ string_offset, vector_object_comment));
+
+ offset += sizeof(uint32_t);
+ continue;
+ }
+
+ regions.push_back(MakeBinaryRegion(
+ offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
+ string_offset, vector_object_comment));
+
+ BuildString(string_offset, table, field);
+
+ offset += sizeof(uint32_t);
+ }
+ } break;
+ case reflection::BaseType::Union: {
+ // Vector of unions
+ // Unions have both their realized type (uint8_t for now) that are
+ // stored separately. These are stored in the field->index() - 1
+ // location.
+ const uint16_t union_type_vector_id = field->id() - 1;
+
+ auto vtable_entry = vtable.fields.find(union_type_vector_id);
+ if (vtable_entry == vtable.fields.end()) {
+ // TODO(dbaileychess): need to capture this error condition.
+ break;
+ }
+
+ const uint64_t union_type_vector_field_offset =
+ parent_table_offset + vtable_entry->second.offset_from_table;
+
+ const auto union_type_vector_field_relative_offset =
+ ReadScalar<uint16_t>(union_type_vector_field_offset);
+
+ if (!union_type_vector_field_relative_offset.has_value()) {
+ const uint64_t remaining = RemainingBytes(offset);
+ BinaryRegionComment vector_union_comment;
+ vector_union_comment.type = BinaryRegionCommentType::VectorUnionValue;
+ SetError(vector_union_comment,
+ BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "2");
+
+ regions.push_back(MakeBinaryRegion(offset, remaining,
+ BinaryRegionType::Unknown, remaining,
+ 0, vector_union_comment));
+
+ break;
+ }
+
+ // Get the offset to the first type (the + sizeof(uint32_t) is to skip
+ // over the vector length which we already know). Validation happens
+ // within the loop below.
+ const uint64_t union_type_vector_data_offset =
+ union_type_vector_field_offset +
+ union_type_vector_field_relative_offset.value() + sizeof(uint32_t);
+
+ for (size_t i = 0; i < vector_length.value(); ++i) {
+ BinaryRegionComment comment;
+ comment.type = BinaryRegionCommentType::VectorUnionValue;
+ comment.index = i;
+
+ const auto union_relative_offset = ReadScalar<uint32_t>(offset);
+ if (!union_relative_offset.has_value()) {
+ const uint64_t remaining = RemainingBytes(offset);
+
+ SetError(comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4");
+
+ regions.push_back(MakeBinaryRegion(offset, remaining,
+ BinaryRegionType::Unknown,
+ remaining, 0, comment));
+
+ break;
+ }
+
+ // The union offset is relative from the offset location itself.
+ const uint64_t union_offset = offset + union_relative_offset.value();
+
+ if (!IsValidOffset(union_offset)) {
+ SetError(comment, BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
+
+ regions.push_back(MakeBinaryRegion(offset, sizeof(uint32_t),
+ BinaryRegionType::UOffset, 0,
+ union_offset, comment));
+ continue;
+ }
+
+ const auto realized_type =
+ ReadScalar<uint8_t>(union_type_vector_data_offset + i);
+
+ if (!realized_type.has_value()) {
+ SetError(comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "1");
+ regions.push_back(MakeBinaryRegion(
+ offset, 0, BinaryRegionType::Unknown, 0, 0, comment));
+ continue;
+ }
+
+ if (!IsValidUnionValue(vtable_entry->second.field->type()->index(),
+ realized_type.value())) {
+ // We already export an error in the union type field, so just skip
+ // building the union itself and it will default to an unreference
+ // Binary section.
+ offset += sizeof(uint32_t);
+ continue;
+ }
+
+ const std::string enum_type =
+ BuildUnion(union_offset, realized_type.value(), field);
+
+ comment.default_value = "(`" + enum_type + "`)";
+ regions.push_back(MakeBinaryRegion(offset, sizeof(uint32_t),
+ BinaryRegionType::UOffset, 0,
+ union_offset, comment));
+
+ offset += sizeof(uint32_t);
+ }
+ } break;
+ default: {
+ if (IsScalar(field->type()->element())) {
+ const BinaryRegionType binary_region_type =
+ GetRegionType(field->type()->element());
+
+ const uint64_t type_size = GetTypeSize(field->type()->element());
+
+ // TODO(dbaileychess): It might be nicer to user the
+ // BinaryRegion.array_length field to indicate this.
+ for (size_t i = 0; i < vector_length.value(); ++i) {
+ BinaryRegionComment vector_scalar_comment;
+ vector_scalar_comment.type = BinaryRegionCommentType::VectorValue;
+ vector_scalar_comment.index = i;
+
+ if (!IsValidRead(offset, type_size)) {
+ const uint64_t remaining = RemainingBytes(offset);
+
+ SetError(vector_scalar_comment,
+ BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
+ std::to_string(type_size));
+
+ regions.push_back(
+ MakeBinaryRegion(offset, remaining, BinaryRegionType::Unknown,
+ remaining, 0, vector_scalar_comment));
+ break;
+ }
+
+ if (IsUnionType(field->type()->element())) {
+ // This is a type for a union. Validate the value
+ const auto enum_value = ReadScalar<uint8_t>(offset);
+
+ // This should always have a value, due to the IsValidRead check
+ // above.
+ if (!IsValidUnionValue(field->type()->index(),
+ enum_value.value())) {
+ SetError(vector_scalar_comment,
+ BinaryRegionStatus::ERROR_INVALID_UNION_TYPE);
+ regions.push_back(MakeBinaryRegion(offset, type_size,
+ binary_region_type, 0, 0,
+ vector_scalar_comment));
+ offset += type_size;
+ continue;
+ }
+ }
+
+ regions.push_back(MakeBinaryRegion(offset, type_size,
+ binary_region_type, 0, 0,
+ vector_scalar_comment));
+ offset += type_size;
+ }
+ }
+ } break;
+ }
+ AddSection(vector_offset,
+ MakeBinarySection(std::string(table->name()->c_str()) + "." +
+ field->name()->c_str(),
+ BinarySectionType::Vector, std::move(regions)));
+}
+
+std::string BinaryAnnotator::BuildUnion(const uint64_t union_offset,
+ const uint8_t realized_type,
+ const reflection::Field *const field) {
+ const reflection::Enum *next_enum =
+ schema_->enums()->Get(field->type()->index());
+
+ const reflection::EnumVal *enum_val = next_enum->values()->Get(realized_type);
+
+ if (ContainsSection(union_offset)) { return enum_val->name()->c_str(); }
+
+ const reflection::Type *union_type = enum_val->union_type();
+
+ if (union_type->base_type() == reflection::BaseType::Obj) {
+ const reflection::Object *object =
+ schema_->objects()->Get(union_type->index());
+
+ if (object->is_struct()) {
+ // Union of vectors point to a new Binary section
+ std::vector<BinaryRegion> regions;
+
+ BuildStruct(union_offset, regions, object);
+
+ AddSection(
+ union_offset,
+ MakeBinarySection(std::string(object->name()->c_str()) + "." +
+ field->name()->c_str(),
+ BinarySectionType::Union, std::move(regions)));
+ } else {
+ BuildTable(union_offset, BinarySectionType::Table, object);
+ }
+ }
+ // TODO(dbaileychess): handle the other union types.
+
+ return enum_val->name()->c_str();
+}
+
+void BinaryAnnotator::FixMissingRegions() {
+ std::vector<BinaryRegion> regions_to_insert;
+ for (auto ¤t_section : sections_) {
+ BinarySection §ion = current_section.second;
+ if (section.regions.empty()) {
+ // TODO(dbaileychess): is this possible?
+ continue;
+ }
+
+ uint64_t offset = section.regions[0].offset + section.regions[0].length;
+ for (size_t i = 1; i < section.regions.size(); ++i) {
+ BinaryRegion ®ion = section.regions[i];
+
+ const uint64_t next_offset = region.offset;
+ if (!IsValidOffset(next_offset)) {
+ // TODO(dbaileychess): figure out how we get into this situation.
+ continue;
+ }
+
+ if (offset < next_offset) {
+ const uint64_t padding_bytes = next_offset - offset;
+
+ BinaryRegionComment comment;
+ comment.type = BinaryRegionCommentType::Padding;
+
+ if (IsNonZeroRegion(offset, padding_bytes, binary_)) {
+ SetError(comment, BinaryRegionStatus::WARN_NO_REFERENCES);
+ regions_to_insert.push_back(
+ MakeBinaryRegion(offset, padding_bytes, BinaryRegionType::Unknown,
+ padding_bytes, 0, comment));
+ } else {
+ regions_to_insert.push_back(
+ MakeBinaryRegion(offset, padding_bytes, BinaryRegionType::Uint8,
+ padding_bytes, 0, comment));
+ }
+ }
+ offset = next_offset + region.length;
+ }
+
+ if (!regions_to_insert.empty()) {
+ section.regions.insert(section.regions.end(), regions_to_insert.begin(),
+ regions_to_insert.end());
+ std::stable_sort(section.regions.begin(), section.regions.end(),
+ BinaryRegionSort);
+ regions_to_insert.clear();
+ }
+ }
+}
+
+void BinaryAnnotator::FixMissingSections() {
+ uint64_t offset = 0;
+
+ std::vector<BinarySection> sections_to_insert;
+
+ for (auto ¤t_section : sections_) {
+ BinarySection §ion = current_section.second;
+ const uint64_t section_start_offset = current_section.first;
+ const uint64_t section_end_offset =
+ section.regions.back().offset + section.regions.back().length;
+
+ if (offset < section_start_offset) {
+ // We are at an offset that is less then the current section.
+ const uint64_t pad_bytes = section_start_offset - offset + 1;
+
+ sections_to_insert.push_back(
+ GenerateMissingSection(offset - 1, pad_bytes, binary_));
+ }
+ offset = section_end_offset + 1;
+ }
+
+ // Handle the case where there are still bytes left in the binary that are
+ // unaccounted for.
+ if (offset < binary_length_) {
+ const uint64_t pad_bytes = binary_length_ - offset + 1;
+ sections_to_insert.push_back(
+ GenerateMissingSection(offset - 1, pad_bytes, binary_));
+ }
+
+ for (const BinarySection §ion_to_insert : sections_to_insert) {
+ AddSection(section_to_insert.regions[0].offset, section_to_insert);
+ }
+}
+
+bool BinaryAnnotator::ContainsSection(const uint64_t offset) {
+ auto it = sections_.lower_bound(offset);
+ // If the section is found, check that it is exactly equal its offset.
+ if (it != sections_.end() && it->first == offset) { return true; }
+
+ // If this was the first section, there are no other previous sections to
+ // check.
+ if (it == sections_.begin()) { return false; }
+
+ // Go back one section.
+ --it;
+
+ // And check that if the offset is covered by the section.
+ return offset >= it->first && offset < it->second.regions.back().offset +
+ it->second.regions.back().length;
+}
+
+} // namespace flatbuffers
\ No newline at end of file