blob: ec30b1dd69667cdc24bbc657ed283ad05457acad [file] [log] [blame]
Austin Schuh2dd86a92022-09-14 21:19:23 -07001#include "annotated_binary_text_gen.h"
2
3#include <sstream>
4#include <string>
5
6#include "binary_annotator.h"
7#include "flatbuffers/base.h"
8#include "flatbuffers/util.h"
9
10namespace flatbuffers {
11namespace {
12
13struct OutputConfig {
14 size_t largest_type_string = 10;
15
16 size_t largest_value_string = 20;
17
18 size_t max_bytes_per_line = 8;
19
20 size_t offset_max_char = 4;
21
22 char delimiter = '|';
23};
24
25static std::string ToString(const BinarySectionType type) {
26 switch (type) {
27 case BinarySectionType::Header: return "header";
28 case BinarySectionType::Table: return "table";
29 case BinarySectionType::RootTable: return "root_table";
30 case BinarySectionType::VTable: return "vtable";
31 case BinarySectionType::Struct: return "struct";
32 case BinarySectionType::String: return "string";
33 case BinarySectionType::Vector: return "vector";
34 case BinarySectionType::Unknown: return "unknown";
35 case BinarySectionType::Union: return "union";
36 case BinarySectionType::Padding: return "padding";
37 default: return "todo";
38 }
39}
40
41static bool IsOffset(const BinaryRegionType type) {
42 return type == BinaryRegionType::UOffset || type == BinaryRegionType::SOffset;
43}
44
45template<typename T> std::string ToString(T value) {
46 if (std::is_floating_point<T>::value) {
47 std::stringstream ss;
48 ss << value;
49 return ss.str();
50 } else {
51 return std::to_string(value);
52 }
53}
54
55template<typename T>
56std::string ToValueString(const BinaryRegion &region, const uint8_t *binary) {
57 std::string s;
58 s += "0x";
59 const T val = ReadScalar<T>(binary + region.offset);
60 const uint64_t start_index = region.offset + region.length - 1;
61 for (uint64_t i = 0; i < region.length; ++i) {
62 s += ToHex(binary[start_index - i]);
63 }
64 s += " (";
65 s += ToString(val);
66 s += ")";
67 return s;
68}
69
70template<>
71std::string ToValueString<std::string>(const BinaryRegion &region,
72 const uint8_t *binary) {
73 return std::string(reinterpret_cast<const char *>(binary + region.offset),
74 static_cast<size_t>(region.array_length));
75}
76
77static std::string ToValueString(const BinaryRegion &region,
78 const uint8_t *binary,
79 const OutputConfig &output_config) {
80 std::string s;
81
82 if (region.array_length) {
83 if (region.type == BinaryRegionType::Uint8 ||
84 region.type == BinaryRegionType::Unknown) {
85 // Interpet each value as a ASCII to aid debugging
86 for (uint64_t i = 0; i < region.array_length; ++i) {
87 const uint8_t c = *(binary + region.offset + i);
88 s += isprint(c) ? static_cast<char>(c & 0x7F) : '.';
89 }
90 return s;
91 } else if (region.type == BinaryRegionType::Char) {
92 // string value
93 return ToValueString<std::string>(region, binary);
94 }
95 }
96
97 switch (region.type) {
98 case BinaryRegionType::Uint32:
99 return ToValueString<uint32_t>(region, binary);
100 case BinaryRegionType::Int32: return ToValueString<int32_t>(region, binary);
101 case BinaryRegionType::Uint16:
102 return ToValueString<uint16_t>(region, binary);
103 case BinaryRegionType::Int16: return ToValueString<int16_t>(region, binary);
104 case BinaryRegionType::Bool: return ToValueString<bool>(region, binary);
105 case BinaryRegionType::Uint8: return ToValueString<uint8_t>(region, binary);
106 case BinaryRegionType::Char: return ToValueString<char>(region, binary);
107 case BinaryRegionType::Byte:
108 case BinaryRegionType::Int8: return ToValueString<int8_t>(region, binary);
109 case BinaryRegionType::Int64: return ToValueString<int64_t>(region, binary);
110 case BinaryRegionType::Uint64:
111 return ToValueString<uint64_t>(region, binary);
112 case BinaryRegionType::Double: return ToValueString<double>(region, binary);
113 case BinaryRegionType::Float: return ToValueString<float>(region, binary);
114 case BinaryRegionType::UType: return ToValueString<uint8_t>(region, binary);
115
116 // Handle Offsets separately, incase they add additional details.
117 case BinaryRegionType::UOffset:
118 s += ToValueString<uint32_t>(region, binary);
119 break;
120 case BinaryRegionType::SOffset:
121 s += ToValueString<int32_t>(region, binary);
122 break;
123 case BinaryRegionType::VOffset:
124 s += ToValueString<uint16_t>(region, binary);
125 break;
126
127 default: break;
128 }
129 // If this is an offset type, include the calculated offset location in the
130 // value.
131 // TODO(dbaileychess): It might be nicer to put this in the comment field.
132 if (IsOffset(region.type)) {
133 s += " Loc: +0x";
134 s += ToHex(region.points_to_offset, output_config.offset_max_char);
135 }
136 return s;
137}
138
139struct DocContinuation {
140 // The start column where the value text first starts
141 size_t value_start_column = 0;
142
143 // The remaining part of the doc to print.
144 std::string value;
145};
146
147static std::string GenerateTypeString(const BinaryRegion &region) {
148 return ToString(region.type) +
149 ((region.array_length)
150 ? "[" + std::to_string(region.array_length) + "]"
151 : "");
152}
153
154static std::string GenerateComment(const BinaryRegionComment &comment,
155 const BinarySection &) {
156 std::string s;
157 switch (comment.type) {
158 case BinaryRegionCommentType::Unknown: s = "unknown"; break;
159 case BinaryRegionCommentType::SizePrefix: s = "size prefix"; break;
160 case BinaryRegionCommentType::RootTableOffset:
161 s = "offset to root table `" + comment.name + "`";
162 break;
163 // TODO(dbaileychess): make this lowercase to follow the convention.
164 case BinaryRegionCommentType::FileIdentifier: s = "File Identifier"; break;
165 case BinaryRegionCommentType::Padding: s = "padding"; break;
166 case BinaryRegionCommentType::VTableSize: s = "size of this vtable"; break;
167 case BinaryRegionCommentType::VTableRefferingTableLength:
168 s = "size of referring table";
169 break;
170 case BinaryRegionCommentType::VTableFieldOffset:
171 s = "offset to field `" + comment.name;
172 break;
173 case BinaryRegionCommentType::VTableUnknownFieldOffset:
174 s = "offset to unknown field (id: " + std::to_string(comment.index) + ")";
175 break;
176
177 case BinaryRegionCommentType::TableVTableOffset:
178 s = "offset to vtable";
179 break;
180 case BinaryRegionCommentType::TableField:
181 s = "table field `" + comment.name;
182 break;
183 case BinaryRegionCommentType::TableUnknownField: s = "unknown field"; break;
184 case BinaryRegionCommentType::TableOffsetField:
185 s = "offset to field `" + comment.name + "`";
186 break;
187 case BinaryRegionCommentType::StructField:
188 s = "struct field `" + comment.name + "`";
189 break;
190 case BinaryRegionCommentType::ArrayField:
191 s = "array field `" + comment.name + "`[" +
192 std::to_string(comment.index) + "]";
193 break;
194 case BinaryRegionCommentType::StringLength: s = "length of string"; break;
195 case BinaryRegionCommentType::StringValue: s = "string literal"; break;
196 case BinaryRegionCommentType::StringTerminator:
197 s = "string terminator";
198 break;
199 case BinaryRegionCommentType::VectorLength:
200 s = "length of vector (# items)";
201 break;
202 case BinaryRegionCommentType::VectorValue:
203 s = "value[" + std::to_string(comment.index) + "]";
204 break;
205 case BinaryRegionCommentType::VectorTableValue:
206 s = "offset to table[" + std::to_string(comment.index) + "]";
207 break;
208 case BinaryRegionCommentType::VectorStringValue:
209 s = "offset to string[" + std::to_string(comment.index) + "]";
210 break;
211 case BinaryRegionCommentType::VectorUnionValue:
212 s = "offset to union[" + std::to_string(comment.index) + "]";
213 break;
214
215 default: break;
216 }
217 if (!comment.default_value.empty()) { s += " " + comment.default_value; }
218
219 switch (comment.status) {
220 case BinaryRegionStatus::OK: break; // no-op
221 case BinaryRegionStatus::WARN: s = "WARN: " + s; break;
222 case BinaryRegionStatus::WARN_NO_REFERENCES:
223 s = "WARN: nothing refers to this section.";
224 break;
225 case BinaryRegionStatus::WARN_CORRUPTED_PADDING:
226 s = "WARN: could be corrupted padding region.";
227 break;
228 case BinaryRegionStatus::WARN_PADDING_LENGTH:
229 s = "WARN: padding is longer than expected.";
230 break;
231 case BinaryRegionStatus::ERROR: s = "ERROR: " + s; break;
232 case BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY:
233 s = "ERROR: " + s + ". Invalid offset, points outside the binary.";
234 break;
235 case BinaryRegionStatus::ERROR_INCOMPLETE_BINARY:
236 s = "ERROR: " + s + ". Incomplete binary, expected to read " +
237 comment.status_message + " bytes.";
238 break;
239 case BinaryRegionStatus::ERROR_LENGTH_TOO_LONG:
240 s = "ERROR: " + s + ". Longer than the binary.";
241 break;
242 case BinaryRegionStatus::ERROR_LENGTH_TOO_SHORT:
243 s = "ERROR: " + s + ". Shorter than the minimum length: ";
244 break;
245 case BinaryRegionStatus::ERROR_REQUIRED_FIELD_NOT_PRESENT:
246 s = "ERROR: " + s + ". Required field is not present.";
247 break;
248 case BinaryRegionStatus::ERROR_INVALID_UNION_TYPE:
249 s = "ERROR: " + s + ". Invalid union type value.";
250 break;
251 case BinaryRegionStatus::ERROR_CYCLE_DETECTED:
252 s = "ERROR: " + s + ". Invalid offset, cycle detected.";
253 break;
254 }
255
256 return s;
257}
258
259static std::string GenerateDocumentation(const BinaryRegion &region,
260 const BinarySection &section,
261 const uint8_t *binary,
262 DocContinuation &continuation,
263 const OutputConfig &output_config) {
264 std::string s;
265
266 // Check if there is a doc continuation that should be prioritized.
267 if (continuation.value_start_column) {
268 s += std::string(continuation.value_start_column - 2, ' ');
269 s += output_config.delimiter;
270 s += " ";
271
272 s += continuation.value.substr(0, output_config.max_bytes_per_line);
273 continuation.value = continuation.value.substr(
274 std::min(output_config.max_bytes_per_line, continuation.value.size()));
275 return s;
276 }
277
278 {
279 std::stringstream ss;
280 ss << std::setw(output_config.largest_type_string) << std::left;
281 ss << GenerateTypeString(region);
282 s += ss.str();
283 }
284 s += " ";
285 s += output_config.delimiter;
286 s += " ";
287 if (region.array_length) {
288 // Record where the value is first being outputted.
289 continuation.value_start_column = s.size();
290
291 // Get the full-length value, which we will chunk below.
292 const std::string value = ToValueString(region, binary, output_config);
293
294 std::stringstream ss;
295 ss << std::setw(output_config.largest_value_string) << std::left;
296 ss << value.substr(0, output_config.max_bytes_per_line);
297 s += ss.str();
298
299 continuation.value =
300 value.substr(std::min(output_config.max_bytes_per_line, value.size()));
301 } else {
302 std::stringstream ss;
303 ss << std::setw(output_config.largest_value_string) << std::left;
304 ss << ToValueString(region, binary, output_config);
305 s += ss.str();
306 }
307
308 s += " ";
309 s += output_config.delimiter;
310 s += " ";
311 s += GenerateComment(region.comment, section);
312
313 return s;
314}
315
316static std::string GenerateRegion(const BinaryRegion &region,
317 const BinarySection &section,
318 const uint8_t *binary,
319 const OutputConfig &output_config) {
320 std::string s;
321 bool doc_generated = false;
322 DocContinuation doc_continuation;
323 for (uint64_t i = 0; i < region.length; ++i) {
324 if ((i % output_config.max_bytes_per_line) == 0) {
325 // Start a new line of output
326 s += '\n';
327 s += " ";
328 s += "+0x";
329 s += ToHex(region.offset + i, output_config.offset_max_char);
330 s += " ";
331 s += output_config.delimiter;
332 }
333
334 // Add each byte
335 s += " ";
336 s += ToHex(binary[region.offset + i]);
337
338 // Check for end of line or end of region conditions.
339 if (((i + 1) % output_config.max_bytes_per_line == 0) ||
340 i + 1 == region.length) {
341 if (i + 1 == region.length) {
342 // We are out of bytes but haven't the kMaxBytesPerLine, so we need to
343 // zero those out to align everything globally.
344 for (uint64_t j = i + 1; (j % output_config.max_bytes_per_line) != 0;
345 ++j) {
346 s += " ";
347 }
348 }
349 s += " ";
350 s += output_config.delimiter;
351 // This is the end of the first line or its the last byte of the region,
352 // generate the end-of-line documentation.
353 if (!doc_generated) {
354 s += " ";
355 s += GenerateDocumentation(region, section, binary, doc_continuation,
356 output_config);
357
358 // If we have a value in the doc continuation, that means the doc is
359 // being printed on multiple lines.
360 doc_generated = doc_continuation.value.empty();
361 }
362 }
363 }
364
365 return s;
366}
367
368static std::string GenerateSection(const BinarySection &section,
369 const uint8_t *binary,
370 const OutputConfig &output_config) {
371 std::string s;
372 s += "\n";
373 s += ToString(section.type);
374 if (!section.name.empty()) { s += " (" + section.name + ")"; }
375 s += ":";
376 for (const BinaryRegion &region : section.regions) {
377 s += GenerateRegion(region, section, binary, output_config);
378 }
379 return s;
380}
381} // namespace
382
383bool AnnotatedBinaryTextGenerator::Generate(
384 const std::string &filename, const std::string &schema_filename) {
385 OutputConfig output_config;
386 output_config.max_bytes_per_line = options_.max_bytes_per_line;
387
388 // Given the length of the binary, we can calculate the maximum number of
389 // characters to display in the offset hex: (i.e. 2 would lead to 0XFF being
390 // the max output).
391 output_config.offset_max_char =
392 binary_length_ > 0xFFFFFF
393 ? 8
394 : (binary_length_ > 0xFFFF ? 6 : (binary_length_ > 0xFF ? 4 : 2));
395
396 // Find the largest type string of all the regions in this file, so we can
397 // align the output nicely.
398 output_config.largest_type_string = 0;
399 for (const auto &section : annotations_) {
400 for (const auto &region : section.second.regions) {
401 std::string s = GenerateTypeString(region);
402 if (s.size() > output_config.largest_type_string) {
403 output_config.largest_type_string = s.size();
404 }
405
406 // Don't consider array regions, as they will be split to multiple lines.
407 if (!region.array_length) {
408 s = ToValueString(region, binary_, output_config);
409 if (s.size() > output_config.largest_value_string) {
410 output_config.largest_value_string = s.size();
411 }
412 }
413 }
414 }
415
416 // Generate each of the binary sections
417 std::string s;
418
419 s += "// Annotated Flatbuffer Binary\n";
420 s += "//\n";
421 s += "// Schema file: " + schema_filename + "\n";
422 s += "// Binary file: " + filename + "\n";
423
424 for (const auto &section : annotations_) {
425 s += GenerateSection(section.second, binary_, output_config);
426 s += "\n";
427 }
428
429 // Modify the output filename.
430 std::string output_filename = StripExtension(filename);
431 output_filename += options_.output_postfix;
432 output_filename +=
433 "." + (options_.output_extension.empty() ? GetExtension(filename)
434 : options_.output_extension);
435
436 return SaveFile(output_filename.c_str(), s, false);
437}
438
439} // namespace flatbuffers