blob: dd0b4549e683d989e13b212145a3e7894ab91848 [file] [log] [blame]
Austin Schuh2dd86a92022-09-14 21:19:23 -07001#include "binary_annotator.h"
2
3#include <limits>
4#include <string>
5#include <vector>
6
7#include "flatbuffers/reflection.h"
8#include "flatbuffers/verifier.h"
9
10namespace flatbuffers {
11namespace {
12
13static bool BinaryRegionSort(const BinaryRegion &a, const BinaryRegion &b) {
14 return a.offset < b.offset;
15}
16
17static void SetError(BinaryRegionComment &comment, BinaryRegionStatus status,
18 std::string message = "") {
19 comment.status = status;
20 comment.status_message = message;
21}
22
23static BinaryRegion MakeBinaryRegion(
24 const uint64_t offset = 0, const uint64_t length = 0,
25 const BinaryRegionType type = BinaryRegionType::Unknown,
26 const uint64_t array_length = 0, const uint64_t points_to_offset = 0,
27 const BinaryRegionComment comment = {}) {
28 BinaryRegion region;
29 region.offset = offset;
30 region.length = length;
31 region.type = type;
32 region.array_length = array_length;
33 region.points_to_offset = points_to_offset;
34 region.comment = std::move(comment);
35 return region;
36}
37
38static BinarySection MakeBinarySection(
39 const std::string &name, const BinarySectionType type,
40 const std::vector<BinaryRegion> regions) {
41 BinarySection section;
42 section.name = name;
43 section.type = type;
44 section.regions = std::move(regions);
45 return section;
46}
47
48static BinarySection MakeSingleRegionBinarySection(const std::string &name,
49 const BinarySectionType type,
50 const BinaryRegion &region) {
51 std::vector<BinaryRegion> regions;
52 regions.push_back(region);
53 return MakeBinarySection(name, type, std::move(regions));
54}
55
56static bool IsNonZeroRegion(const uint64_t offset, const uint64_t length,
57 const uint8_t *const binary) {
58 for (uint64_t i = offset; i < offset + length; ++i) {
59 if (binary[i] != 0) { return true; }
60 }
61 return false;
62}
63
64static bool IsPrintableRegion(const uint64_t offset, const uint64_t length,
65 const uint8_t *const binary) {
66 for (uint64_t i = offset; i < offset + length; ++i) {
67 if (!isprint(binary[i])) { return false; }
68 }
69 return true;
70}
71
72static BinarySection GenerateMissingSection(const uint64_t offset,
73 const uint64_t length,
74 const uint8_t *const binary) {
75 std::vector<BinaryRegion> regions;
76
77 // Check if the region is all zeros or not, as that can tell us if it is
78 // padding or not.
79 if (IsNonZeroRegion(offset, length, binary)) {
80 // Some of the padding bytes are non-zero, so this might be an unknown
81 // section of the binary.
82 // TODO(dbaileychess): We could be a bit smarter with different sized
83 // alignments. For now, the 8 byte check encompasses all the smaller
84 // alignments.
85 BinaryRegionComment comment;
86 comment.type = BinaryRegionCommentType::Unknown;
87 if (length >= 8) {
88 SetError(comment, BinaryRegionStatus::WARN_NO_REFERENCES);
89 } else {
90 SetError(comment, BinaryRegionStatus::WARN_CORRUPTED_PADDING);
91 }
92
93 regions.push_back(MakeBinaryRegion(offset, length * sizeof(uint8_t),
94 BinaryRegionType::Unknown, length, 0,
95 comment));
96
97 return MakeBinarySection("no known references", BinarySectionType::Unknown,
98 std::move(regions));
99 }
100
101 BinaryRegionComment comment;
102 comment.type = BinaryRegionCommentType::Padding;
103 if (length >= 8) {
104 SetError(comment, BinaryRegionStatus::WARN_PADDING_LENGTH);
105 }
106
107 // This region is most likely padding.
108 regions.push_back(MakeBinaryRegion(offset, length * sizeof(uint8_t),
109 BinaryRegionType::Uint8, length, 0,
110 comment));
111
112 return MakeBinarySection("", BinarySectionType::Padding, std::move(regions));
113}
114
115} // namespace
116
117std::map<uint64_t, BinarySection> BinaryAnnotator::Annotate() {
118 flatbuffers::Verifier verifier(bfbs_, static_cast<size_t>(bfbs_length_));
119 if (!reflection::VerifySchemaBuffer(verifier)) { return {}; }
120
121 // The binary is too short to read as a flatbuffers.
122 // TODO(dbaileychess): We could spit out the annotated buffer sections, but
123 // I'm not sure if it is worth it.
124 if (binary_length_ < 4) { return {}; }
125
126 // Make sure we start with a clean slate.
127 vtables_.clear();
128 sections_.clear();
129
130 // First parse the header region which always start at offset 0.
131 // The returned offset will point to the root_table location.
132 const uint64_t root_table_offset = BuildHeader(0);
133
134 if (IsValidOffset(root_table_offset)) {
135 // Build the root table, and all else will be referenced from it.
136 BuildTable(root_table_offset, BinarySectionType::RootTable,
137 schema_->root_table());
138 }
139
140 // Now that all the sections are built, make sure the binary sections are
141 // contiguous.
142 FixMissingRegions();
143
144 // Then scan the area between BinarySections insert padding sections that are
145 // implied.
146 FixMissingSections();
147
148 return sections_;
149}
150
151uint64_t BinaryAnnotator::BuildHeader(const uint64_t header_offset) {
152 const auto root_table_offset = ReadScalar<uint32_t>(header_offset);
153
154 if (!root_table_offset.has_value()) {
155 // This shouldn't occur, since we validate the min size of the buffer
156 // before. But for completion sake, we shouldn't read passed the binary end.
157 return std::numeric_limits<uint64_t>::max();
158 }
159
160 std::vector<BinaryRegion> regions;
161 uint64_t offset = header_offset;
162 // TODO(dbaileychess): sized prefixed value
163
164 BinaryRegionComment root_offset_comment;
165 root_offset_comment.type = BinaryRegionCommentType::RootTableOffset;
166 root_offset_comment.name = schema_->root_table()->name()->str();
167
168 if (!IsValidOffset(root_table_offset.value())) {
169 SetError(root_offset_comment,
170 BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
171 }
172
173 regions.push_back(
174 MakeBinaryRegion(offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
175 root_table_offset.value(), root_offset_comment));
176 offset += sizeof(uint32_t);
177
178 if (IsValidRead(offset, flatbuffers::kFileIdentifierLength) &&
179 IsPrintableRegion(offset, flatbuffers::kFileIdentifierLength, binary_)) {
180 BinaryRegionComment comment;
181 comment.type = BinaryRegionCommentType::FileIdentifier;
182 // Check if the file identifier region has non-zero data, and assume its
183 // the file identifier. Otherwise, it will get filled in with padding
184 // later.
185 regions.push_back(MakeBinaryRegion(
186 offset, flatbuffers::kFileIdentifierLength * sizeof(uint8_t),
187 BinaryRegionType::Char, flatbuffers::kFileIdentifierLength, 0,
188 comment));
189 }
190
191 AddSection(header_offset, MakeBinarySection("", BinarySectionType::Header,
192 std::move(regions)));
193
194 return root_table_offset.value();
195}
196
197void BinaryAnnotator::BuildVTable(const uint64_t vtable_offset,
198 const reflection::Object *const table,
199 const uint64_t offset_of_referring_table) {
200 // First see if we have used this vtable before, if so skip building it again.
201 auto it = vtables_.find(vtable_offset);
202 if (it != vtables_.end()) { return; }
203
204 if (ContainsSection(vtable_offset)) { return; }
205
206 BinaryRegionComment vtable_size_comment;
207 vtable_size_comment.type = BinaryRegionCommentType::VTableSize;
208
209 const auto vtable_length = ReadScalar<uint16_t>(vtable_offset);
210 if (!vtable_length.has_value()) {
211 const uint64_t remaining = RemainingBytes(vtable_offset);
212
213 SetError(vtable_size_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
214 "2");
215
216 AddSection(vtable_offset,
217 MakeSingleRegionBinarySection(
218 table->name()->str(), BinarySectionType::VTable,
219 MakeBinaryRegion(vtable_offset, remaining,
220 BinaryRegionType::Unknown, remaining, 0,
221 vtable_size_comment)));
222 return;
223 }
224
225 // Vtables start with the size of the vtable
226 const uint16_t vtable_size = vtable_length.value();
227
228 if (!IsValidOffset(vtable_offset + vtable_size - 1)) {
229 SetError(vtable_size_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_LONG);
230 // The vtable_size points to off the end of the binary.
231 AddSection(vtable_offset,
232 MakeSingleRegionBinarySection(
233 table->name()->str(), BinarySectionType::VTable,
234 MakeBinaryRegion(vtable_offset, sizeof(uint16_t),
235 BinaryRegionType::Uint16, 0, 0,
236 vtable_size_comment)));
237
238 return;
239 } else if (vtable_size < 2 * sizeof(uint16_t)) {
240 SetError(vtable_size_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_SHORT,
241 "4");
242 // The size includes itself and the table size which are both uint16_t.
243 AddSection(vtable_offset,
244 MakeSingleRegionBinarySection(
245 table->name()->str(), BinarySectionType::VTable,
246 MakeBinaryRegion(vtable_offset, sizeof(uint16_t),
247 BinaryRegionType::Uint16, 0, 0,
248 vtable_size_comment)));
249 return;
250 }
251
252 std::vector<BinaryRegion> regions;
253
254 regions.push_back(MakeBinaryRegion(vtable_offset, sizeof(uint16_t),
255 BinaryRegionType::Uint16, 0, 0,
256 vtable_size_comment));
257 uint64_t offset = vtable_offset + sizeof(uint16_t);
258
259 BinaryRegionComment ref_table_len_comment;
260 ref_table_len_comment.type =
261 BinaryRegionCommentType::VTableRefferingTableLength;
262
263 // Ensure we can read the next uint16_t field, which is the size of the
264 // referring table.
265 const auto table_length = ReadScalar<uint16_t>(offset);
266
267 if (!table_length.has_value()) {
268 const uint64_t remaining = RemainingBytes(offset);
269 SetError(ref_table_len_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
270 "2");
271
272 AddSection(offset, MakeSingleRegionBinarySection(
273 table->name()->str(), BinarySectionType::VTable,
274 MakeBinaryRegion(
275 offset, remaining, BinaryRegionType::Unknown,
276 remaining, 0, ref_table_len_comment)));
277 return;
278 }
279
280 // Then they have the size of the table they reference.
281 const uint16_t table_size = table_length.value();
282
283 if (!IsValidOffset(offset_of_referring_table + table_size - 1)) {
284 SetError(ref_table_len_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_LONG);
285 } else if (table_size < 4) {
286 SetError(ref_table_len_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_SHORT,
287 "4");
288 }
289
290 regions.push_back(MakeBinaryRegion(offset, sizeof(uint16_t),
291 BinaryRegionType::Uint16, 0, 0,
292 ref_table_len_comment));
293 offset += sizeof(uint16_t);
294
295 const uint64_t offset_start = offset;
296
297 // A mapping between field (and its id) to the relative offset (uin16_t) from
298 // the start of the table.
299 std::map<uint16_t, VTable::Entry> fields;
300
301 // Counter for determining if the binary has more vtable entries than the
302 // schema provided. This can occur if the binary was created at a newer schema
303 // version and is being processed with an older one.
304 uint16_t fields_processed = 0;
305
306 // Loop over all the fields.
307 ForAllFields(table, /*reverse=*/false, [&](const reflection::Field *field) {
308 const uint64_t field_offset = offset_start + field->id() * sizeof(uint16_t);
309
310 if (field_offset >= vtable_offset + vtable_size) {
311 // This field_offset is too large for this vtable, so it must come from a
312 // newer schema than the binary was create with or the binary writer did
313 // not write it. For either case, it is safe to ignore.
314
315 // TODO(dbaileychess): We could show which fields are not set an their
316 // default values if we want. We just need a way to make it obvious that
317 // it isn't part of the buffer.
318 return;
319 }
320
321 BinaryRegionComment field_comment;
322 field_comment.type = BinaryRegionCommentType::VTableFieldOffset;
323 field_comment.name = std::string(field->name()->c_str()) +
324 "` (id: " + std::to_string(field->id()) + ")";
325
326 const auto offset_from_table = ReadScalar<uint16_t>(field_offset);
327
328 if (!offset_from_table.has_value()) {
329 const uint64_t remaining = RemainingBytes(field_offset);
330
331 SetError(field_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "2");
332 regions.push_back(MakeBinaryRegion(field_offset, remaining,
333 BinaryRegionType::Unknown, remaining,
334 0, field_comment));
335
336 return;
337 }
338
339 if (!IsValidOffset(offset_of_referring_table + offset_from_table.value() -
340 1)) {
341 SetError(field_comment, BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
342 regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t),
343 BinaryRegionType::VOffset, 0, 0,
344 field_comment));
345 return;
346 }
347
348 VTable::Entry entry;
349 entry.field = field;
350 entry.offset_from_table = offset_from_table.value();
351 fields.insert(std::make_pair(field->id(), entry));
352
353 std::string default_label;
354 if (offset_from_table.value() == 0) {
355 // Not present, so could be default or be optional.
356 if (field->required()) {
357 SetError(field_comment,
358 BinaryRegionStatus::ERROR_REQUIRED_FIELD_NOT_PRESENT);
359 // If this is a required field, make it known this is an error.
360 regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t),
361 BinaryRegionType::VOffset, 0, 0,
362 field_comment));
363 return;
364 } else {
365 // Its an optional field, so get the default value and interpret and
366 // provided an annotation for it.
367 if (IsScalar(field->type()->base_type())) {
368 default_label += "<defaults to ";
369 default_label += IsFloat(field->type()->base_type())
370 ? std::to_string(field->default_real())
371 : std::to_string(field->default_integer());
372 default_label += "> (";
373 } else {
374 default_label += "<null> (";
375 }
376 default_label +=
377 reflection::EnumNameBaseType(field->type()->base_type());
378 default_label += ")";
379 }
380 }
381 field_comment.default_value = default_label;
382
383 regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t),
384 BinaryRegionType::VOffset, 0, 0,
385 field_comment));
386
387 fields_processed++;
388 });
389
390 // Check if we covered all the expectant fields. If not, we need to add them
391 // as unknown fields.
392 uint16_t expectant_vtable_fields =
393 (vtable_size - sizeof(uint16_t) - sizeof(uint16_t)) / sizeof(uint16_t);
394
395 // Prevent a bad binary from declaring a really large vtable_size, that we can
396 // not indpendently verify.
397 expectant_vtable_fields = std::min(
398 static_cast<uint16_t>(fields_processed * 3), expectant_vtable_fields);
399
400 for (uint16_t id = fields_processed; id < expectant_vtable_fields; ++id) {
401 const uint64_t field_offset = offset_start + id * sizeof(uint16_t);
402
403 const auto offset_from_table = ReadScalar<uint16_t>(field_offset);
404
405 BinaryRegionComment field_comment;
406 field_comment.type = BinaryRegionCommentType::VTableUnknownFieldOffset;
407 field_comment.index = id;
408
409 if (!offset_from_table.has_value()) {
410 const uint64_t remaining = RemainingBytes(field_offset);
411 SetError(field_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "2");
412 regions.push_back(MakeBinaryRegion(field_offset, remaining,
413 BinaryRegionType::Unknown, remaining,
414 0, field_comment));
415 continue;
416 }
417
418 VTable::Entry entry;
419 entry.field = nullptr; // No field to reference.
420 entry.offset_from_table = offset_from_table.value();
421 fields.insert(std::make_pair(id, entry));
422
423 regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t),
424 BinaryRegionType::VOffset, 0, 0,
425 field_comment));
426 }
427
428 sections_[vtable_offset] = MakeBinarySection(
429 table->name()->str(), BinarySectionType::VTable, std::move(regions));
430
431 VTable vtable;
432 vtable.fields = std::move(fields);
433 vtable.table_size = table_size;
434 vtable.vtable_size = vtable_size;
435
436 vtables_[vtable_offset] = vtable;
437}
438
439void BinaryAnnotator::BuildTable(const uint64_t table_offset,
440 const BinarySectionType type,
441 const reflection::Object *const table) {
442 if (ContainsSection(table_offset)) { return; }
443
444 BinaryRegionComment vtable_offset_comment;
445 vtable_offset_comment.type = BinaryRegionCommentType::TableVTableOffset;
446
447 const auto vtable_soffset = ReadScalar<int32_t>(table_offset);
448
449 if (!vtable_soffset.has_value()) {
450 const uint64_t remaining = RemainingBytes(table_offset);
451 SetError(vtable_offset_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
452 "4");
453
454 AddSection(
455 table_offset,
456 MakeSingleRegionBinarySection(
457 table->name()->str(), type,
458 MakeBinaryRegion(table_offset, remaining, BinaryRegionType::Unknown,
459 remaining, 0, vtable_offset_comment)));
460
461 // If there aren't enough bytes left to read the vtable offset, there is
462 // nothing we can do.
463 return;
464 }
465
466 // Tables start with the vtable
467 const uint64_t vtable_offset = table_offset - vtable_soffset.value();
468
469 if (!IsValidOffset(vtable_offset)) {
470 SetError(vtable_offset_comment,
471 BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
472
473 AddSection(table_offset,
474 MakeSingleRegionBinarySection(
475 table->name()->str(), type,
476 MakeBinaryRegion(table_offset, sizeof(int32_t),
477 BinaryRegionType::SOffset, 0, vtable_offset,
478 vtable_offset_comment)));
479
480 // There isn't much to do with an invalid vtable offset, as we won't be able
481 // to intepret the rest of the table fields.
482 return;
483 }
484
485 std::vector<BinaryRegion> regions;
486 regions.push_back(MakeBinaryRegion(table_offset, sizeof(int32_t),
487 BinaryRegionType::SOffset, 0,
488 vtable_offset, vtable_offset_comment));
489
490 // Parse the vtable first so we know what the rest of the fields in the table
491 // are.
492 BuildVTable(vtable_offset, table, table_offset);
493
494 auto vtable_entry = vtables_.find(vtable_offset);
495 if (vtable_entry == vtables_.end()) {
496 // There is no valid vtable for this table, so we cannot process the rest of
497 // the table entries.
498 return;
499 }
500
501 const VTable &vtable = vtable_entry->second;
502
503 // This is the size and length of this table.
504 const uint16_t table_size = vtable.table_size;
505 uint64_t table_end_offset = table_offset + table_size;
506
507 if (!IsValidOffset(table_end_offset - 1)) {
508 // We already validated the table size in BuildVTable, but we have to make
509 // sure we don't use a bad value here.
510 table_end_offset = binary_length_;
511 }
512
513 // We need to iterate over the vtable fields by their offset in the binary,
514 // not by their IDs. So copy them over to another vector that we can sort on
515 // the offset_from_table property.
516 std::vector<VTable::Entry> fields;
517 for (const auto &vtable_field : vtable.fields) {
518 fields.push_back(vtable_field.second);
519 }
520
521 std::stable_sort(fields.begin(), fields.end(),
522 [](const VTable::Entry &a, const VTable::Entry &b) {
523 return a.offset_from_table < b.offset_from_table;
524 });
525
526 // Iterate over all the fields by order of their offset.
527 for (size_t i = 0; i < fields.size(); ++i) {
528 const reflection::Field *field = fields[i].field;
529 const uint16_t offset_from_table = fields[i].offset_from_table;
530
531 if (offset_from_table == 0) {
532 // Skip non-present fields.
533 continue;
534 }
535
536 // The field offsets are relative to the start of the table.
537 const uint64_t field_offset = table_offset + offset_from_table;
538
539 if (!IsValidOffset(field_offset)) {
540 // The field offset is larger than the binary, nothing we can do.
541 continue;
542 }
543
544 // We have a vtable entry for a non-existant field, that means its a binary
545 // generated by a newer schema than we are currently processing.
546 if (field == nullptr) {
547 // Calculate the length of this unknown field.
548 const uint64_t unknown_field_length =
549 // Check if there is another unknown field after this one.
550 ((i + 1 < fields.size())
551 ? table_offset + fields[i + 1].offset_from_table
552 // Otherwise use the known end of the table.
553 : table_end_offset) -
554 field_offset;
555
556 if (unknown_field_length == 0) { continue; }
557
558 std::string hint;
559
560 if (unknown_field_length == 4) {
561 const auto relative_offset = ReadScalar<uint32_t>(field_offset);
562 if (relative_offset.has_value()) {
563 // The field is 4 in length, so it could be an offset? Provide a hint.
564 hint += "<possibly an offset? Check Loc: +0x";
565 hint += ToHex(field_offset + relative_offset.value());
566 hint += ">";
567 }
568 }
569
570 BinaryRegionComment unknown_field_comment;
571 unknown_field_comment.type = BinaryRegionCommentType::TableUnknownField;
572
573 if (!IsValidRead(field_offset, unknown_field_length)) {
574 const uint64_t remaining = RemainingBytes(field_offset);
575
576 SetError(unknown_field_comment,
577 BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
578 std::to_string(unknown_field_length));
579
580 regions.push_back(MakeBinaryRegion(field_offset, remaining,
581 BinaryRegionType::Unknown, remaining,
582 0, unknown_field_comment));
583 continue;
584 }
585
586 unknown_field_comment.default_value = hint;
587
588 regions.push_back(MakeBinaryRegion(
589 field_offset, unknown_field_length, BinaryRegionType::Unknown,
590 unknown_field_length, 0, unknown_field_comment));
591 continue;
592 }
593
594 if (IsScalar(field->type()->base_type())) {
595 // These are the raw values store in the table.
596 const uint64_t type_size = GetTypeSize(field->type()->base_type());
597 const BinaryRegionType region_type =
598 GetRegionType(field->type()->base_type());
599
600 BinaryRegionComment scalar_field_comment;
601 scalar_field_comment.type = BinaryRegionCommentType::TableField;
602 scalar_field_comment.name =
603 std::string(field->name()->c_str()) + "` (" +
604 reflection::EnumNameBaseType(field->type()->base_type()) + ")";
605
606 if (!IsValidRead(field_offset, type_size)) {
607 const uint64_t remaining = RemainingBytes(field_offset);
608 SetError(scalar_field_comment,
609 BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
610 std::to_string(type_size));
611
612 regions.push_back(MakeBinaryRegion(field_offset, remaining,
613 BinaryRegionType::Unknown, remaining,
614 0, scalar_field_comment));
615 continue;
616 }
617
618 if (IsUnionType(field)) {
619 // This is a type for a union. Validate the value
620 const auto enum_value = ReadScalar<uint8_t>(field_offset);
621
622 // This should always have a value, due to the IsValidRead check above.
623 if (!IsValidUnionValue(field, enum_value.value())) {
624 SetError(scalar_field_comment,
625 BinaryRegionStatus::ERROR_INVALID_UNION_TYPE);
626
627 regions.push_back(MakeBinaryRegion(field_offset, type_size,
628 region_type, 0, 0,
629 scalar_field_comment));
630 continue;
631 }
632 }
633
634 regions.push_back(MakeBinaryRegion(field_offset, type_size, region_type,
635 0, 0, scalar_field_comment));
636 continue;
637 }
638
639 // Read the offset
640 const auto offset_from_field = ReadScalar<uint32_t>(field_offset);
641 uint64_t offset_of_next_item = 0;
642 BinaryRegionComment offset_field_comment;
643 offset_field_comment.type = BinaryRegionCommentType::TableOffsetField;
644 offset_field_comment.name = field->name()->c_str();
645 const std::string offset_prefix =
646 "offset to field `" + std::string(field->name()->c_str()) + "`";
647
648 // Validate any field that isn't inline (i.e., non-structs).
649 if (!IsInlineField(field)) {
650 if (!offset_from_field.has_value()) {
651 const uint64_t remaining = RemainingBytes(field_offset);
652
653 SetError(offset_field_comment,
654 BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4");
655
656 regions.push_back(MakeBinaryRegion(field_offset, remaining,
657 BinaryRegionType::Unknown, remaining,
658 0, offset_field_comment));
659 continue;
660 }
661
662 offset_of_next_item = field_offset + offset_from_field.value();
663
664 if (!IsValidOffset(offset_of_next_item)) {
665 SetError(offset_field_comment,
666 BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
667 regions.push_back(MakeBinaryRegion(
668 field_offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
669 offset_of_next_item, offset_field_comment));
670 continue;
671 }
672 }
673
674 switch (field->type()->base_type()) {
675 case reflection::BaseType::Obj: {
676 const reflection::Object *next_object =
677 schema_->objects()->Get(field->type()->index());
678
679 if (next_object->is_struct()) {
680 // Structs are stored inline.
681 BuildStruct(field_offset, regions, next_object);
682 } else {
683 offset_field_comment.default_value = "(table)";
684
685 regions.push_back(MakeBinaryRegion(
686 field_offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
687 offset_of_next_item, offset_field_comment));
688
689 BuildTable(offset_of_next_item, BinarySectionType::Table,
690 next_object);
691 }
692 } break;
693
694 case reflection::BaseType::String: {
695 offset_field_comment.default_value = "(string)";
696 regions.push_back(MakeBinaryRegion(
697 field_offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
698 offset_of_next_item, offset_field_comment));
699 BuildString(offset_of_next_item, table, field);
700 } break;
701
702 case reflection::BaseType::Vector: {
703 offset_field_comment.default_value = "(vector)";
704 regions.push_back(MakeBinaryRegion(
705 field_offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
706 offset_of_next_item, offset_field_comment));
707 BuildVector(offset_of_next_item, table, field, table_offset, vtable);
708 } break;
709
710 case reflection::BaseType::Union: {
711 const uint64_t union_offset = offset_of_next_item;
712
713 // The union type field is always one less than the union itself.
714 const uint16_t union_type_id = field->id() - 1;
715
716 auto vtable_field = vtable.fields.find(union_type_id);
717 if (vtable_field == vtable.fields.end()) {
718 // TODO(dbaileychess): need to capture this error condition.
719 break;
720 }
721 offset_field_comment.default_value = "(union)";
722
723 const uint64_t type_offset =
724 table_offset + vtable_field->second.offset_from_table;
725
726 const auto realized_type = ReadScalar<uint8_t>(type_offset);
727 if (!realized_type.has_value()) {
728 const uint64_t remaining = RemainingBytes(type_offset);
729 SetError(offset_field_comment,
730 BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "1");
731 regions.push_back(MakeBinaryRegion(
732 type_offset, remaining, BinaryRegionType::Unknown, remaining, 0,
733 offset_field_comment));
734 continue;
735 }
736
737 if (!IsValidUnionValue(field, realized_type.value())) {
738 // We already export an error in the union type field, so just skip
739 // building the union itself and it will default to an unreference
740 // Binary section.
741 continue;
742 }
743
744 const std::string enum_type =
745 BuildUnion(union_offset, realized_type.value(), field);
746
747 offset_field_comment.default_value =
748 "(union of type `" + enum_type + "`)";
749
750 regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint32_t),
751 BinaryRegionType::UOffset, 0,
752 union_offset, offset_field_comment));
753
754 } break;
755
756 default: break;
757 }
758 }
759
760 // Handle the case where there is padding after the last known binary
761 // region. Calculate where we left off towards the expected end of the
762 // table.
763 const uint64_t i = regions.back().offset + regions.back().length + 1;
764
765 if (i < table_end_offset) {
766 const uint64_t pad_bytes = table_end_offset - i + 1;
767
768 BinaryRegionComment padding_comment;
769 padding_comment.type = BinaryRegionCommentType::Padding;
770
771 regions.push_back(MakeBinaryRegion(i - 1, pad_bytes * sizeof(uint8_t),
772 BinaryRegionType::Uint8, pad_bytes, 0,
773 padding_comment));
774 }
775
776 AddSection(table_offset,
777 MakeBinarySection(table->name()->str(), type, std::move(regions)));
778}
779
780uint64_t BinaryAnnotator::BuildStruct(const uint64_t struct_offset,
781 std::vector<BinaryRegion> &regions,
782 const reflection::Object *const object) {
783 if (!object->is_struct()) { return struct_offset; }
784 uint64_t offset = struct_offset;
785
786 // Loop over all the fields in increasing order
787 ForAllFields(object, /*reverse=*/false, [&](const reflection::Field *field) {
788 if (IsScalar(field->type()->base_type())) {
789 // Structure Field value
790 const uint64_t type_size = GetTypeSize(field->type()->base_type());
791 const BinaryRegionType region_type =
792 GetRegionType(field->type()->base_type());
793
794 BinaryRegionComment comment;
795 comment.type = BinaryRegionCommentType::StructField;
796 comment.name =
797 std::string(object->name()->c_str()) + "." + field->name()->c_str();
798 comment.default_value = "(" +
799 std::string(reflection::EnumNameBaseType(
800 field->type()->base_type())) +
801 ")";
802
803 if (!IsValidRead(offset, type_size)) {
804 const uint64_t remaining = RemainingBytes(offset);
805 SetError(comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
806 std::to_string(type_size));
807 regions.push_back(MakeBinaryRegion(offset, remaining,
808 BinaryRegionType::Unknown, remaining,
809 0, comment));
810
811 // TODO(dbaileychess): Should I bail out here? This sets offset to the
812 // end of the binary. So all other reads in the loop should fail.
813 offset += remaining;
814 return;
815 }
816
817 regions.push_back(
818 MakeBinaryRegion(offset, type_size, region_type, 0, 0, comment));
819 offset += type_size;
820 } else if (field->type()->base_type() == reflection::BaseType::Obj) {
821 // Structs are stored inline, even when nested.
822 offset = BuildStruct(offset, regions,
823 schema_->objects()->Get(field->type()->index()));
824 } else if (field->type()->base_type() == reflection::BaseType::Array) {
825 const bool is_scalar = IsScalar(field->type()->element());
826 const uint64_t type_size = GetTypeSize(field->type()->element());
827 const BinaryRegionType region_type =
828 GetRegionType(field->type()->element());
829
830 // Arrays are just repeated structures.
831 for (uint16_t i = 0; i < field->type()->fixed_length(); ++i) {
832 if (is_scalar) {
833 BinaryRegionComment array_comment;
834 array_comment.type = BinaryRegionCommentType::ArrayField;
835 array_comment.name = std::string(object->name()->c_str()) + "." +
836 field->name()->c_str();
837 array_comment.index = i;
838 array_comment.default_value =
839 "(" +
840 std::string(
841 reflection::EnumNameBaseType(field->type()->element())) +
842 ")";
843
844 if (!IsValidRead(offset, type_size)) {
845 const uint64_t remaining = RemainingBytes(offset);
846
847 SetError(array_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
848 std::to_string(type_size));
849
850 regions.push_back(MakeBinaryRegion(offset, remaining,
851 BinaryRegionType::Unknown,
852 remaining, 0, array_comment));
853
854 // TODO(dbaileychess): Should I bail out here? This sets offset to
855 // the end of the binary. So all other reads in the loop should
856 // fail.
857 offset += remaining;
858 break;
859 }
860
861 regions.push_back(MakeBinaryRegion(offset, type_size, region_type, 0,
862 0, array_comment));
863
864 offset += type_size;
865 } else {
866 // Array of Structs.
867 //
868 // TODO(dbaileychess): This works, but the comments on the fields lose
869 // some context. Need to figure a way how to plumb the nested arrays
870 // comments together that isn't too confusing.
871 offset = BuildStruct(offset, regions,
872 schema_->objects()->Get(field->type()->index()));
873 }
874 }
875 }
876
877 // Insert any padding after this field.
878 const uint16_t padding = field->padding();
879 if (padding > 0 && IsValidOffset(offset + padding)) {
880 BinaryRegionComment padding_comment;
881 padding_comment.type = BinaryRegionCommentType::Padding;
882
883 regions.push_back(MakeBinaryRegion(offset, padding,
884 BinaryRegionType::Uint8, padding, 0,
885 padding_comment));
886 offset += padding;
887 }
888 });
889
890 return offset;
891}
892
893void BinaryAnnotator::BuildString(const uint64_t string_offset,
894 const reflection::Object *const table,
895 const reflection::Field *const field) {
896 // Check if we have already generated this string section, and this is a
897 // shared string instance.
898 if (ContainsSection(string_offset)) { return; }
899
900 std::vector<BinaryRegion> regions;
901 const auto string_length = ReadScalar<uint32_t>(string_offset);
902
903 BinaryRegionComment string_length_comment;
904 string_length_comment.type = BinaryRegionCommentType::StringLength;
905
906 if (!string_length.has_value()) {
907 const uint64_t remaining = RemainingBytes(string_offset);
908
909 SetError(string_length_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
910 "4");
911
912 regions.push_back(MakeBinaryRegion(string_offset, remaining,
913 BinaryRegionType::Unknown, remaining, 0,
914 string_length_comment));
915
916 } else {
917 const uint32_t string_size = string_length.value();
918 const uint64_t string_end =
919 string_offset + sizeof(uint32_t) + string_size + sizeof(char);
920
921 if (!IsValidOffset(string_end - 1)) {
922 SetError(string_length_comment,
923 BinaryRegionStatus::ERROR_LENGTH_TOO_LONG);
924
925 regions.push_back(MakeBinaryRegion(string_offset, sizeof(uint32_t),
926 BinaryRegionType::Uint32, 0, 0,
927 string_length_comment));
928 } else {
929 regions.push_back(MakeBinaryRegion(string_offset, sizeof(uint32_t),
930 BinaryRegionType::Uint32, 0, 0,
931 string_length_comment));
932
933 BinaryRegionComment string_comment;
934 string_comment.type = BinaryRegionCommentType::StringValue;
935
936 regions.push_back(MakeBinaryRegion(string_offset + sizeof(uint32_t),
937 string_size, BinaryRegionType::Char,
938 string_size, 0, string_comment));
939
940 BinaryRegionComment string_terminator_comment;
941 string_terminator_comment.type =
942 BinaryRegionCommentType::StringTerminator;
943
944 regions.push_back(MakeBinaryRegion(
945 string_offset + sizeof(uint32_t) + string_size, sizeof(char),
946 BinaryRegionType::Char, 0, 0, string_terminator_comment));
947 }
948 }
949
950 AddSection(string_offset,
951 MakeBinarySection(std::string(table->name()->c_str()) + "." +
952 field->name()->c_str(),
953 BinarySectionType::String, std::move(regions)));
954}
955
956void BinaryAnnotator::BuildVector(const uint64_t vector_offset,
957 const reflection::Object *const table,
958 const reflection::Field *const field,
959 const uint64_t parent_table_offset,
960 const VTable &vtable) {
961 if (ContainsSection(vector_offset)) { return; }
962
963 BinaryRegionComment vector_length_comment;
964 vector_length_comment.type = BinaryRegionCommentType::VectorLength;
965
966 const auto vector_length = ReadScalar<uint32_t>(vector_offset);
967 if (!vector_length.has_value()) {
968 const uint64_t remaining = RemainingBytes(vector_offset);
969 SetError(vector_length_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
970 "4");
971
972 AddSection(
973 vector_offset,
974 MakeSingleRegionBinarySection(
975 std::string(table->name()->c_str()) + "." + field->name()->c_str(),
976 BinarySectionType::Vector,
977 MakeBinaryRegion(vector_offset, remaining,
978 BinaryRegionType::Unknown, remaining, 0,
979 vector_length_comment)));
980 return;
981 }
982
983 // Validate there are enough bytes left in the binary to process all the
984 // items.
985 const uint64_t last_item_offset =
986 vector_offset + sizeof(uint32_t) +
987 vector_length.value() * GetElementSize(field);
988
989 if (!IsValidOffset(last_item_offset - 1)) {
990 SetError(vector_length_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_LONG);
991 AddSection(
992 vector_offset,
993 MakeSingleRegionBinarySection(
994 std::string(table->name()->c_str()) + "." + field->name()->c_str(),
995 BinarySectionType::Vector,
996 MakeBinaryRegion(vector_offset, sizeof(uint32_t),
997 BinaryRegionType::Uint32, 0, 0,
998 vector_length_comment)));
999
1000 return;
1001 }
1002
1003 std::vector<BinaryRegion> regions;
1004
1005 regions.push_back(MakeBinaryRegion(vector_offset, sizeof(uint32_t),
1006 BinaryRegionType::Uint32, 0, 0,
1007 vector_length_comment));
1008
1009 uint64_t offset = vector_offset + sizeof(uint32_t);
1010
1011 switch (field->type()->element()) {
1012 case reflection::BaseType::Obj: {
1013 const reflection::Object *object =
1014 schema_->objects()->Get(field->type()->index());
1015
1016 if (object->is_struct()) {
1017 // Vector of structs
1018 for (size_t i = 0; i < vector_length.value(); ++i) {
1019 // Structs are inline to the vector.
1020 const uint64_t next_offset = BuildStruct(offset, regions, object);
1021 if (next_offset == offset) { break; }
1022 offset = next_offset;
1023 }
1024 } else {
1025 // Vector of objects
1026 for (size_t i = 0; i < vector_length.value(); ++i) {
1027 BinaryRegionComment vector_object_comment;
1028 vector_object_comment.type =
1029 BinaryRegionCommentType::VectorTableValue;
1030 vector_object_comment.index = i;
1031
1032 const auto table_relative_offset = ReadScalar<uint32_t>(offset);
1033 if (!table_relative_offset.has_value()) {
1034 const uint64_t remaining = RemainingBytes(offset);
1035 SetError(vector_object_comment,
1036 BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4");
1037
1038 regions.push_back(
1039 MakeBinaryRegion(offset, remaining, BinaryRegionType::Unknown,
1040 remaining, 0, vector_object_comment));
1041 break;
1042 }
1043
1044 // The table offset is relative from the offset location itself.
1045 const uint64_t table_offset = offset + table_relative_offset.value();
1046
1047 if (!IsValidOffset(table_offset)) {
1048 SetError(vector_object_comment,
1049 BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
1050 regions.push_back(MakeBinaryRegion(
1051 offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
1052 table_offset, vector_object_comment));
1053
1054 offset += sizeof(uint32_t);
1055 continue;
1056 }
1057
1058 if (table_offset == parent_table_offset) {
1059 SetError(vector_object_comment,
1060 BinaryRegionStatus::ERROR_CYCLE_DETECTED);
1061 // A cycle detected where a table vector field is pointing to
1062 // itself. This should only happen in corrupted files.
1063 regions.push_back(MakeBinaryRegion(
1064 offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
1065 table_offset, vector_object_comment));
1066
1067 offset += sizeof(uint32_t);
1068 continue;
1069 }
1070
1071 regions.push_back(MakeBinaryRegion(
1072 offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
1073 table_offset, vector_object_comment));
1074
1075 offset += sizeof(uint32_t);
1076
1077 BuildTable(table_offset, BinarySectionType::Table, object);
1078 }
1079 }
1080 } break;
1081 case reflection::BaseType::String: {
1082 // Vector of strings
1083 for (size_t i = 0; i < vector_length.value(); ++i) {
1084 BinaryRegionComment vector_object_comment;
1085 vector_object_comment.type = BinaryRegionCommentType::VectorStringValue;
1086 vector_object_comment.index = i;
1087
1088 const auto string_relative_offset = ReadScalar<uint32_t>(offset);
1089 if (!string_relative_offset.has_value()) {
1090 const uint64_t remaining = RemainingBytes(offset);
1091
1092 SetError(vector_object_comment,
1093 BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4");
1094
1095 regions.push_back(
1096 MakeBinaryRegion(offset, remaining, BinaryRegionType::Unknown,
1097 remaining, 0, vector_object_comment));
1098 break;
1099 }
1100
1101 // The string offset is relative from the offset location itself.
1102 const uint64_t string_offset = offset + string_relative_offset.value();
1103
1104 if (!IsValidOffset(string_offset)) {
1105 SetError(vector_object_comment,
1106 BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
1107 regions.push_back(MakeBinaryRegion(
1108 offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
1109 string_offset, vector_object_comment));
1110
1111 offset += sizeof(uint32_t);
1112 continue;
1113 }
1114
1115 regions.push_back(MakeBinaryRegion(
1116 offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
1117 string_offset, vector_object_comment));
1118
1119 BuildString(string_offset, table, field);
1120
1121 offset += sizeof(uint32_t);
1122 }
1123 } break;
1124 case reflection::BaseType::Union: {
1125 // Vector of unions
1126 // Unions have both their realized type (uint8_t for now) that are
1127 // stored separately. These are stored in the field->index() - 1
1128 // location.
1129 const uint16_t union_type_vector_id = field->id() - 1;
1130
1131 auto vtable_entry = vtable.fields.find(union_type_vector_id);
1132 if (vtable_entry == vtable.fields.end()) {
1133 // TODO(dbaileychess): need to capture this error condition.
1134 break;
1135 }
1136
1137 const uint64_t union_type_vector_field_offset =
1138 parent_table_offset + vtable_entry->second.offset_from_table;
1139
1140 const auto union_type_vector_field_relative_offset =
1141 ReadScalar<uint16_t>(union_type_vector_field_offset);
1142
1143 if (!union_type_vector_field_relative_offset.has_value()) {
1144 const uint64_t remaining = RemainingBytes(offset);
1145 BinaryRegionComment vector_union_comment;
1146 vector_union_comment.type = BinaryRegionCommentType::VectorUnionValue;
1147 SetError(vector_union_comment,
1148 BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "2");
1149
1150 regions.push_back(MakeBinaryRegion(offset, remaining,
1151 BinaryRegionType::Unknown, remaining,
1152 0, vector_union_comment));
1153
1154 break;
1155 }
1156
1157 // Get the offset to the first type (the + sizeof(uint32_t) is to skip
1158 // over the vector length which we already know). Validation happens
1159 // within the loop below.
1160 const uint64_t union_type_vector_data_offset =
1161 union_type_vector_field_offset +
1162 union_type_vector_field_relative_offset.value() + sizeof(uint32_t);
1163
1164 for (size_t i = 0; i < vector_length.value(); ++i) {
1165 BinaryRegionComment comment;
1166 comment.type = BinaryRegionCommentType::VectorUnionValue;
1167 comment.index = i;
1168
1169 const auto union_relative_offset = ReadScalar<uint32_t>(offset);
1170 if (!union_relative_offset.has_value()) {
1171 const uint64_t remaining = RemainingBytes(offset);
1172
1173 SetError(comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4");
1174
1175 regions.push_back(MakeBinaryRegion(offset, remaining,
1176 BinaryRegionType::Unknown,
1177 remaining, 0, comment));
1178
1179 break;
1180 }
1181
1182 // The union offset is relative from the offset location itself.
1183 const uint64_t union_offset = offset + union_relative_offset.value();
1184
1185 if (!IsValidOffset(union_offset)) {
1186 SetError(comment, BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
1187
1188 regions.push_back(MakeBinaryRegion(offset, sizeof(uint32_t),
1189 BinaryRegionType::UOffset, 0,
1190 union_offset, comment));
1191 continue;
1192 }
1193
1194 const auto realized_type =
1195 ReadScalar<uint8_t>(union_type_vector_data_offset + i);
1196
1197 if (!realized_type.has_value()) {
1198 SetError(comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "1");
1199 regions.push_back(MakeBinaryRegion(
1200 offset, 0, BinaryRegionType::Unknown, 0, 0, comment));
1201 continue;
1202 }
1203
1204 if (!IsValidUnionValue(vtable_entry->second.field->type()->index(),
1205 realized_type.value())) {
1206 // We already export an error in the union type field, so just skip
1207 // building the union itself and it will default to an unreference
1208 // Binary section.
1209 offset += sizeof(uint32_t);
1210 continue;
1211 }
1212
1213 const std::string enum_type =
1214 BuildUnion(union_offset, realized_type.value(), field);
1215
1216 comment.default_value = "(`" + enum_type + "`)";
1217 regions.push_back(MakeBinaryRegion(offset, sizeof(uint32_t),
1218 BinaryRegionType::UOffset, 0,
1219 union_offset, comment));
1220
1221 offset += sizeof(uint32_t);
1222 }
1223 } break;
1224 default: {
1225 if (IsScalar(field->type()->element())) {
1226 const BinaryRegionType binary_region_type =
1227 GetRegionType(field->type()->element());
1228
1229 const uint64_t type_size = GetTypeSize(field->type()->element());
1230
1231 // TODO(dbaileychess): It might be nicer to user the
1232 // BinaryRegion.array_length field to indicate this.
1233 for (size_t i = 0; i < vector_length.value(); ++i) {
1234 BinaryRegionComment vector_scalar_comment;
1235 vector_scalar_comment.type = BinaryRegionCommentType::VectorValue;
1236 vector_scalar_comment.index = i;
1237
1238 if (!IsValidRead(offset, type_size)) {
1239 const uint64_t remaining = RemainingBytes(offset);
1240
1241 SetError(vector_scalar_comment,
1242 BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
1243 std::to_string(type_size));
1244
1245 regions.push_back(
1246 MakeBinaryRegion(offset, remaining, BinaryRegionType::Unknown,
1247 remaining, 0, vector_scalar_comment));
1248 break;
1249 }
1250
1251 if (IsUnionType(field->type()->element())) {
1252 // This is a type for a union. Validate the value
1253 const auto enum_value = ReadScalar<uint8_t>(offset);
1254
1255 // This should always have a value, due to the IsValidRead check
1256 // above.
1257 if (!IsValidUnionValue(field->type()->index(),
1258 enum_value.value())) {
1259 SetError(vector_scalar_comment,
1260 BinaryRegionStatus::ERROR_INVALID_UNION_TYPE);
1261 regions.push_back(MakeBinaryRegion(offset, type_size,
1262 binary_region_type, 0, 0,
1263 vector_scalar_comment));
1264 offset += type_size;
1265 continue;
1266 }
1267 }
1268
1269 regions.push_back(MakeBinaryRegion(offset, type_size,
1270 binary_region_type, 0, 0,
1271 vector_scalar_comment));
1272 offset += type_size;
1273 }
1274 }
1275 } break;
1276 }
1277 AddSection(vector_offset,
1278 MakeBinarySection(std::string(table->name()->c_str()) + "." +
1279 field->name()->c_str(),
1280 BinarySectionType::Vector, std::move(regions)));
1281}
1282
1283std::string BinaryAnnotator::BuildUnion(const uint64_t union_offset,
1284 const uint8_t realized_type,
1285 const reflection::Field *const field) {
1286 const reflection::Enum *next_enum =
1287 schema_->enums()->Get(field->type()->index());
1288
1289 const reflection::EnumVal *enum_val = next_enum->values()->Get(realized_type);
1290
1291 if (ContainsSection(union_offset)) { return enum_val->name()->c_str(); }
1292
1293 const reflection::Type *union_type = enum_val->union_type();
1294
1295 if (union_type->base_type() == reflection::BaseType::Obj) {
1296 const reflection::Object *object =
1297 schema_->objects()->Get(union_type->index());
1298
1299 if (object->is_struct()) {
1300 // Union of vectors point to a new Binary section
1301 std::vector<BinaryRegion> regions;
1302
1303 BuildStruct(union_offset, regions, object);
1304
1305 AddSection(
1306 union_offset,
1307 MakeBinarySection(std::string(object->name()->c_str()) + "." +
1308 field->name()->c_str(),
1309 BinarySectionType::Union, std::move(regions)));
1310 } else {
1311 BuildTable(union_offset, BinarySectionType::Table, object);
1312 }
1313 }
1314 // TODO(dbaileychess): handle the other union types.
1315
1316 return enum_val->name()->c_str();
1317}
1318
1319void BinaryAnnotator::FixMissingRegions() {
1320 std::vector<BinaryRegion> regions_to_insert;
1321 for (auto &current_section : sections_) {
1322 BinarySection &section = current_section.second;
1323 if (section.regions.empty()) {
1324 // TODO(dbaileychess): is this possible?
1325 continue;
1326 }
1327
1328 uint64_t offset = section.regions[0].offset + section.regions[0].length;
1329 for (size_t i = 1; i < section.regions.size(); ++i) {
1330 BinaryRegion &region = section.regions[i];
1331
1332 const uint64_t next_offset = region.offset;
1333 if (!IsValidOffset(next_offset)) {
1334 // TODO(dbaileychess): figure out how we get into this situation.
1335 continue;
1336 }
1337
1338 if (offset < next_offset) {
1339 const uint64_t padding_bytes = next_offset - offset;
1340
1341 BinaryRegionComment comment;
1342 comment.type = BinaryRegionCommentType::Padding;
1343
1344 if (IsNonZeroRegion(offset, padding_bytes, binary_)) {
1345 SetError(comment, BinaryRegionStatus::WARN_NO_REFERENCES);
1346 regions_to_insert.push_back(
1347 MakeBinaryRegion(offset, padding_bytes, BinaryRegionType::Unknown,
1348 padding_bytes, 0, comment));
1349 } else {
1350 regions_to_insert.push_back(
1351 MakeBinaryRegion(offset, padding_bytes, BinaryRegionType::Uint8,
1352 padding_bytes, 0, comment));
1353 }
1354 }
1355 offset = next_offset + region.length;
1356 }
1357
1358 if (!regions_to_insert.empty()) {
1359 section.regions.insert(section.regions.end(), regions_to_insert.begin(),
1360 regions_to_insert.end());
1361 std::stable_sort(section.regions.begin(), section.regions.end(),
1362 BinaryRegionSort);
1363 regions_to_insert.clear();
1364 }
1365 }
1366}
1367
1368void BinaryAnnotator::FixMissingSections() {
1369 uint64_t offset = 0;
1370
1371 std::vector<BinarySection> sections_to_insert;
1372
1373 for (auto &current_section : sections_) {
1374 BinarySection &section = current_section.second;
1375 const uint64_t section_start_offset = current_section.first;
1376 const uint64_t section_end_offset =
1377 section.regions.back().offset + section.regions.back().length;
1378
1379 if (offset < section_start_offset) {
1380 // We are at an offset that is less then the current section.
1381 const uint64_t pad_bytes = section_start_offset - offset + 1;
1382
1383 sections_to_insert.push_back(
1384 GenerateMissingSection(offset - 1, pad_bytes, binary_));
1385 }
1386 offset = section_end_offset + 1;
1387 }
1388
1389 // Handle the case where there are still bytes left in the binary that are
1390 // unaccounted for.
1391 if (offset < binary_length_) {
1392 const uint64_t pad_bytes = binary_length_ - offset + 1;
1393 sections_to_insert.push_back(
1394 GenerateMissingSection(offset - 1, pad_bytes, binary_));
1395 }
1396
1397 for (const BinarySection &section_to_insert : sections_to_insert) {
1398 AddSection(section_to_insert.regions[0].offset, section_to_insert);
1399 }
1400}
1401
1402bool BinaryAnnotator::ContainsSection(const uint64_t offset) {
1403 auto it = sections_.lower_bound(offset);
1404 // If the section is found, check that it is exactly equal its offset.
1405 if (it != sections_.end() && it->first == offset) { return true; }
1406
1407 // If this was the first section, there are no other previous sections to
1408 // check.
1409 if (it == sections_.begin()) { return false; }
1410
1411 // Go back one section.
1412 --it;
1413
1414 // And check that if the offset is covered by the section.
1415 return offset >= it->first && offset < it->second.regions.back().offset +
1416 it->second.regions.back().length;
1417}
1418
1419} // namespace flatbuffers