blob: f89d0a9aee82788181d6958d3faa190671892d89 [file] [log] [blame]
Austin Schuh2dd86a92022-09-14 21:19:23 -07001/*
2 * Copyright 2021 Google Inc. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef FLATBUFFERS_BINARY_ANNOTATOR_H_
18#define FLATBUFFERS_BINARY_ANNOTATOR_H_
19
20#include <map>
21#include <string>
22#include <vector>
23
24#include "flatbuffers/base.h"
25#include "flatbuffers/reflection.h"
26#include "flatbuffers/stl_emulation.h"
27#include "flatbuffers/util.h"
28
29namespace flatbuffers {
30
31enum class BinaryRegionType {
32 Unknown = 0,
33 UOffset = 1,
34 SOffset = 2,
35 VOffset = 3,
36 Bool = 4,
37 Byte = 5,
38 Char = 6,
39 Uint8 = 7,
40 Int8 = 8,
41 Uint16 = 9,
42 Int16 = 10,
43 Uint32 = 11,
44 Int32 = 12,
45 Uint64 = 13,
46 Int64 = 14,
47 Float = 15,
48 Double = 16,
49 UType = 17,
50};
51
52template<typename T>
53static inline std::string ToHex(T i, size_t width = sizeof(T)) {
54 std::stringstream stream;
55 stream << std::hex << std::uppercase << std::setfill('0') << std::setw(width)
56 << i;
57 return stream.str();
58}
59
60// Specialized version for uint8_t that don't work well with std::hex.
61static inline std::string ToHex(uint8_t i) {
62 return ToHex(static_cast<int>(i), 2);
63}
64
65enum class BinaryRegionStatus {
66 OK = 0,
67 WARN = 100,
68 WARN_NO_REFERENCES,
69 WARN_CORRUPTED_PADDING,
70 WARN_PADDING_LENGTH,
71 ERROR = 200,
72 // An offset is pointing outside the binary bounds.
73 ERROR_OFFSET_OUT_OF_BINARY,
74 // Expecting to read N bytes but not enough remain in the binary.
75 ERROR_INCOMPLETE_BINARY,
76 // When a length of a vtable/vector is longer than possible.
77 ERROR_LENGTH_TOO_LONG,
78 // When a length of a vtable/vector is shorter than possible.
79 ERROR_LENGTH_TOO_SHORT,
80 // A field mark required is not present in the vtable.
81 ERROR_REQUIRED_FIELD_NOT_PRESENT,
82 // A realized union type is not within the enum bounds.
83 ERROR_INVALID_UNION_TYPE,
84 // Occurs when there is a cycle in offsets.
85 ERROR_CYCLE_DETECTED,
86};
87
88enum class BinaryRegionCommentType {
89 Unknown = 0,
90 SizePrefix,
91 // The offset to the root table.
92 RootTableOffset,
93 // The optional 4-char file identifier.
94 FileIdentifier,
95 // Generic 0-filled padding
96 Padding,
97 // The size of the vtable.
98 VTableSize,
99 // The size of the referring table.
100 VTableRefferingTableLength,
101 // Offsets to vtable fields.
102 VTableFieldOffset,
103 // Offsets to unknown vtable fields.
104 VTableUnknownFieldOffset,
105 // The vtable offset of a table.
106 TableVTableOffset,
107 // A "inline" table field value.
108 TableField,
109 // A table field that is unknown.
110 TableUnknownField,
111 // A table field value that points to another section.
112 TableOffsetField,
113 // A struct field value.
114 StructField,
115 // A array field value.
116 ArrayField,
117 // The length of the string.
118 StringLength,
119 // The string contents.
120 StringValue,
121 // The explicit string terminator.
122 StringTerminator,
123 // The length of the vector (# of items).
124 VectorLength,
125 // A "inline" value of a vector.
126 VectorValue,
127 // A vector value that points to another section.
128 VectorTableValue,
129 VectorStringValue,
130 VectorUnionValue,
131};
132
133struct BinaryRegionComment {
134 BinaryRegionStatus status = BinaryRegionStatus::OK;
135
136 // If status is non OK, this may be filled in with additional details.
137 std::string status_message;
138
139 BinaryRegionCommentType type = BinaryRegionCommentType::Unknown;
140
141 std::string name;
142
143 std::string default_value;
144
145 size_t index = 0;
146};
147
148struct BinaryRegion {
149 // Offset into the binary where this region begins.
150 uint64_t offset = 0;
151
152 // The length of this region in bytes.
153 uint64_t length = 0;
154
155 // The underlying datatype of this region
156 BinaryRegionType type = BinaryRegionType::Unknown;
157
158 // If `type` is an array/vector, this is the number of those types this region
159 // encompasses.
160 uint64_t array_length = 0;
161
162 // If the is an offset to some other region, this is what it points to. The
163 // offset is relative to overall binary, not to this region.
164 uint64_t points_to_offset = 0;
165
166 // The comment on the region.
167 BinaryRegionComment comment;
168};
169
170enum class BinarySectionType {
171 Unknown = 0,
172 Header = 1,
173 Table = 2,
174 RootTable = 3,
175 VTable = 4,
176 Struct = 5,
177 String = 6,
178 Vector = 7,
179 Union = 8,
180 Padding = 9,
181};
182
183// A section of the binary that is grouped together in some logical manner, and
184// often is pointed too by some other offset BinaryRegion. Sections include
185// `tables`, `vtables`, `strings`, `vectors`, etc..
186struct BinarySection {
187 // User-specified name of the section, if applicable.
188 std::string name;
189
190 // The type of this section.
191 BinarySectionType type = BinarySectionType::Unknown;
192
193 // The binary regions that make up this section, in order of their offsets.
194 std::vector<BinaryRegion> regions;
195};
196
197inline static BinaryRegionType GetRegionType(reflection::BaseType base_type) {
198 switch (base_type) {
199 case reflection::UType: return BinaryRegionType::UType;
200 case reflection::Bool: return BinaryRegionType::Uint8;
201 case reflection::Byte: return BinaryRegionType::Uint8;
202 case reflection::UByte: return BinaryRegionType::Uint8;
203 case reflection::Short: return BinaryRegionType::Int16;
204 case reflection::UShort: return BinaryRegionType::Uint16;
205 case reflection::Int: return BinaryRegionType::Uint32;
206 case reflection::UInt: return BinaryRegionType::Uint32;
207 case reflection::Long: return BinaryRegionType::Int64;
208 case reflection::ULong: return BinaryRegionType::Uint64;
209 case reflection::Float: return BinaryRegionType::Float;
210 case reflection::Double: return BinaryRegionType::Double;
211 default: return BinaryRegionType::Unknown;
212 }
213}
214
215inline static std::string ToString(const BinaryRegionType type) {
216 switch (type) {
217 case BinaryRegionType::UOffset: return "UOffset32";
218 case BinaryRegionType::SOffset: return "SOffset32";
219 case BinaryRegionType::VOffset: return "VOffset16";
220 case BinaryRegionType::Bool: return "bool";
221 case BinaryRegionType::Char: return "char";
222 case BinaryRegionType::Byte: return "int8_t";
223 case BinaryRegionType::Uint8: return "uint8_t";
224 case BinaryRegionType::Uint16: return "uint16_t";
225 case BinaryRegionType::Uint32: return "uint32_t";
226 case BinaryRegionType::Uint64: return "uint64_t"; ;
227 case BinaryRegionType::Int8: return "int8_t";
228 case BinaryRegionType::Int16: return "int16_t";
229 case BinaryRegionType::Int32: return "int32_t";
230 case BinaryRegionType::Int64: return "int64_t";
231 case BinaryRegionType::Double: return "double";
232 case BinaryRegionType::Float: return "float";
233 case BinaryRegionType::UType: return "UType8";
234 case BinaryRegionType::Unknown: return "?uint8_t";
235 default: return "todo";
236 }
237}
238
239class BinaryAnnotator {
240 public:
241 explicit BinaryAnnotator(const uint8_t *const bfbs,
242 const uint64_t bfbs_length,
243 const uint8_t *const binary,
244 const uint64_t binary_length)
245 : bfbs_(bfbs),
246 bfbs_length_(bfbs_length),
247 schema_(reflection::GetSchema(bfbs)),
248 binary_(binary),
249 binary_length_(binary_length) {}
250
251 std::map<uint64_t, BinarySection> Annotate();
252
253 private:
254 struct VTable {
255 struct Entry {
256 const reflection::Field *field = nullptr;
257 uint16_t offset_from_table = 0;
258 };
259
260 // Field ID -> {field def, offset from table}
261 std::map<uint16_t, Entry> fields;
262
263 uint16_t vtable_size = 0;
264 uint16_t table_size = 0;
265 };
266
267 uint64_t BuildHeader(uint64_t offset);
268
269 void BuildVTable(uint64_t offset, const reflection::Object *table,
270 uint64_t offset_of_referring_table);
271
272 void BuildTable(uint64_t offset, const BinarySectionType type,
273 const reflection::Object *table);
274
275 uint64_t BuildStruct(uint64_t offset, std::vector<BinaryRegion> &regions,
276 const reflection::Object *structure);
277
278 void BuildString(uint64_t offset, const reflection::Object *table,
279 const reflection::Field *field);
280
281 void BuildVector(uint64_t offset, const reflection::Object *table,
282 const reflection::Field *field, uint64_t parent_table_offset,
283 const VTable &vtable);
284
285 std::string BuildUnion(uint64_t offset, uint8_t realized_type,
286 const reflection::Field *field);
287
288 void FixMissingRegions();
289 void FixMissingSections();
290
291 inline bool IsValidOffset(const uint64_t offset) const {
292 return offset < binary_length_;
293 }
294
295 // Determines if performing a GetScalar request for `T` at `offset` would read
296 // passed the end of the binary.
297 template<typename T> inline bool IsValidRead(const uint64_t offset) const {
298 return IsValidRead(offset, sizeof(T));
299 }
300
301 inline bool IsValidRead(const uint64_t offset, const uint64_t length) const {
302 return length < binary_length_ && IsValidOffset(offset + length - 1);
303 }
304
305 // Calculate the number of bytes remaining from the given offset. If offset is
306 // > binary_length, 0 is returned.
307 uint64_t RemainingBytes(const uint64_t offset) const {
308 return IsValidOffset(offset) ? binary_length_ - offset : 0;
309 }
310
311 template<typename T>
312 flatbuffers::Optional<T> ReadScalar(const uint64_t offset) const {
313 if (!IsValidRead<T>(offset)) { return flatbuffers::nullopt; }
314
315 return flatbuffers::ReadScalar<T>(binary_ + offset);
316 }
317
318 // Adds the provided `section` keyed by the `offset` it occurs at. If a
319 // section is already added at that offset, it doesn't replace the exisiting
320 // one.
321 void AddSection(const uint64_t offset, const BinarySection &section) {
322 sections_.insert(std::make_pair(offset, section));
323 }
324
325 bool IsInlineField(const reflection::Field *const field) {
326 if (field->type()->base_type() == reflection::BaseType::Obj) {
327 return schema_->objects()->Get(field->type()->index())->is_struct();
328 }
329 return IsScalar(field->type()->base_type());
330 }
331
332 bool IsUnionType(const reflection::BaseType type) {
333 return (type == reflection::BaseType::UType ||
334 type == reflection::BaseType::Union);
335 }
336
337 bool IsUnionType(const reflection::Field *const field) {
338 return IsUnionType(field->type()->base_type()) &&
339 field->type()->index() >= 0;
340 }
341
342 bool IsValidUnionValue(const reflection::Field *const field,
343 const uint8_t value) {
344 return IsUnionType(field) &&
345 IsValidUnionValue(field->type()->index(), value);
346 }
347
348 bool IsValidUnionValue(const uint32_t enum_id, const uint8_t value) {
349 if (enum_id >= schema_->enums()->size()) { return false; }
350
351 const reflection::Enum *enum_def = schema_->enums()->Get(enum_id);
352
353 if (enum_def == nullptr) { return false; }
354
355 return value < enum_def->values()->size();
356 }
357
358 uint64_t GetElementSize(const reflection::Field *const field) {
359 if (IsScalar(field->type()->element())) {
360 return GetTypeSize(field->type()->element());
361 }
362
363 switch (field->type()->element()) {
364 case reflection::BaseType::Obj: {
365 auto obj = schema_->objects()->Get(field->type()->index());
366 return obj->is_struct() ? obj->bytesize() : sizeof(uint32_t);
367 }
368 default: return sizeof(uint32_t);
369 }
370 }
371
372 bool ContainsSection(const uint64_t offset);
373
374 // The schema for the binary file
375 const uint8_t *bfbs_;
376 const uint64_t bfbs_length_;
377 const reflection::Schema *schema_;
378
379 // The binary data itself.
380 const uint8_t *binary_;
381 const uint64_t binary_length_;
382
383 // Map of binary offset to vtables, to dedupe vtables.
384 std::map<uint64_t, VTable> vtables_;
385
386 // The annotated binary sections, index by their absolute offset.
387 std::map<uint64_t, BinarySection> sections_;
388};
389
390} // namespace flatbuffers
391
392#endif // FLATBUFFERS_BINARY_ANNOTATOR_H_