blob: 79cdd9c5d62deb455cafc18b4b76570989646f2b [file] [log] [blame]
Austin Schuhe89fa2d2019-08-14 20:24:23 -07001/*
2 * Copyright 2017 Google Inc. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef FLATBUFFERS_FLEXBUFFERS_H_
18#define FLATBUFFERS_FLEXBUFFERS_H_
19
James Kuszmaul3b15b0c2022-11-08 14:03:16 -080020#include <algorithm>
Austin Schuhe89fa2d2019-08-14 20:24:23 -070021#include <map>
22// Used to select STL variant.
23#include "flatbuffers/base.h"
24// We use the basic binary writing functions from the regular FlatBuffers.
25#include "flatbuffers/util.h"
26
27#ifdef _MSC_VER
28# include <intrin.h>
29#endif
30
31#if defined(_MSC_VER)
32# pragma warning(push)
33# pragma warning(disable : 4127) // C4127: conditional expression is constant
34#endif
35
36namespace flexbuffers {
37
38class Reference;
39class Map;
40
41// These are used in the lower 2 bits of a type field to determine the size of
42// the elements (and or size field) of the item pointed to (e.g. vector).
43enum BitWidth {
44 BIT_WIDTH_8 = 0,
45 BIT_WIDTH_16 = 1,
46 BIT_WIDTH_32 = 2,
47 BIT_WIDTH_64 = 3,
48};
49
50// These are used as the upper 6 bits of a type field to indicate the actual
51// type.
52enum Type {
53 FBT_NULL = 0,
54 FBT_INT = 1,
55 FBT_UINT = 2,
56 FBT_FLOAT = 3,
James Kuszmaul8e62b022022-03-22 09:33:25 -070057 // Types above stored inline, types below (except FBT_BOOL) store an offset.
Austin Schuhe89fa2d2019-08-14 20:24:23 -070058 FBT_KEY = 4,
59 FBT_STRING = 5,
60 FBT_INDIRECT_INT = 6,
61 FBT_INDIRECT_UINT = 7,
62 FBT_INDIRECT_FLOAT = 8,
63 FBT_MAP = 9,
64 FBT_VECTOR = 10, // Untyped.
65 FBT_VECTOR_INT = 11, // Typed any size (stores no type table).
66 FBT_VECTOR_UINT = 12,
67 FBT_VECTOR_FLOAT = 13,
68 FBT_VECTOR_KEY = 14,
Austin Schuh272c6132020-11-14 16:37:52 -080069 // DEPRECATED, use FBT_VECTOR or FBT_VECTOR_KEY instead.
70 // Read test.cpp/FlexBuffersDeprecatedTest() for details on why.
71 FBT_VECTOR_STRING_DEPRECATED = 15,
Austin Schuhe89fa2d2019-08-14 20:24:23 -070072 FBT_VECTOR_INT2 = 16, // Typed tuple (no type table, no size field).
73 FBT_VECTOR_UINT2 = 17,
74 FBT_VECTOR_FLOAT2 = 18,
75 FBT_VECTOR_INT3 = 19, // Typed triple (no type table, no size field).
76 FBT_VECTOR_UINT3 = 20,
77 FBT_VECTOR_FLOAT3 = 21,
78 FBT_VECTOR_INT4 = 22, // Typed quad (no type table, no size field).
79 FBT_VECTOR_UINT4 = 23,
80 FBT_VECTOR_FLOAT4 = 24,
81 FBT_BLOB = 25,
82 FBT_BOOL = 26,
83 FBT_VECTOR_BOOL =
84 36, // To Allow the same type of conversion of type to vector type
James Kuszmaul8e62b022022-03-22 09:33:25 -070085
86 FBT_MAX_TYPE = 37
Austin Schuhe89fa2d2019-08-14 20:24:23 -070087};
88
89inline bool IsInline(Type t) { return t <= FBT_FLOAT || t == FBT_BOOL; }
90
91inline bool IsTypedVectorElementType(Type t) {
92 return (t >= FBT_INT && t <= FBT_STRING) || t == FBT_BOOL;
93}
94
95inline bool IsTypedVector(Type t) {
Austin Schuh272c6132020-11-14 16:37:52 -080096 return (t >= FBT_VECTOR_INT && t <= FBT_VECTOR_STRING_DEPRECATED) ||
Austin Schuhe89fa2d2019-08-14 20:24:23 -070097 t == FBT_VECTOR_BOOL;
98}
99
100inline bool IsFixedTypedVector(Type t) {
101 return t >= FBT_VECTOR_INT2 && t <= FBT_VECTOR_FLOAT4;
102}
103
104inline Type ToTypedVector(Type t, size_t fixed_len = 0) {
105 FLATBUFFERS_ASSERT(IsTypedVectorElementType(t));
106 switch (fixed_len) {
107 case 0: return static_cast<Type>(t - FBT_INT + FBT_VECTOR_INT);
108 case 2: return static_cast<Type>(t - FBT_INT + FBT_VECTOR_INT2);
109 case 3: return static_cast<Type>(t - FBT_INT + FBT_VECTOR_INT3);
110 case 4: return static_cast<Type>(t - FBT_INT + FBT_VECTOR_INT4);
111 default: FLATBUFFERS_ASSERT(0); return FBT_NULL;
112 }
113}
114
115inline Type ToTypedVectorElementType(Type t) {
116 FLATBUFFERS_ASSERT(IsTypedVector(t));
117 return static_cast<Type>(t - FBT_VECTOR_INT + FBT_INT);
118}
119
120inline Type ToFixedTypedVectorElementType(Type t, uint8_t *len) {
121 FLATBUFFERS_ASSERT(IsFixedTypedVector(t));
122 auto fixed_type = t - FBT_VECTOR_INT2;
123 *len = static_cast<uint8_t>(fixed_type / 3 +
124 2); // 3 types each, starting from length 2.
125 return static_cast<Type>(fixed_type % 3 + FBT_INT);
126}
127
128// TODO: implement proper support for 8/16bit floats, or decide not to
129// support them.
130typedef int16_t half;
131typedef int8_t quarter;
132
133// TODO: can we do this without conditionals using intrinsics or inline asm
134// on some platforms? Given branch prediction the method below should be
135// decently quick, but it is the most frequently executed function.
136// We could do an (unaligned) 64-bit read if we ifdef out the platforms for
137// which that doesn't work (or where we'd read into un-owned memory).
138template<typename R, typename T1, typename T2, typename T4, typename T8>
139R ReadSizedScalar(const uint8_t *data, uint8_t byte_width) {
140 return byte_width < 4
141 ? (byte_width < 2
142 ? static_cast<R>(flatbuffers::ReadScalar<T1>(data))
143 : static_cast<R>(flatbuffers::ReadScalar<T2>(data)))
144 : (byte_width < 8
145 ? static_cast<R>(flatbuffers::ReadScalar<T4>(data))
146 : static_cast<R>(flatbuffers::ReadScalar<T8>(data)));
147}
148
149inline int64_t ReadInt64(const uint8_t *data, uint8_t byte_width) {
150 return ReadSizedScalar<int64_t, int8_t, int16_t, int32_t, int64_t>(
151 data, byte_width);
152}
153
154inline uint64_t ReadUInt64(const uint8_t *data, uint8_t byte_width) {
155 // This is the "hottest" function (all offset lookups use this), so worth
156 // optimizing if possible.
157 // TODO: GCC apparently replaces memcpy by a rep movsb, but only if count is a
158 // constant, which here it isn't. Test if memcpy is still faster than
159 // the conditionals in ReadSizedScalar. Can also use inline asm.
Austin Schuh2dd86a92022-09-14 21:19:23 -0700160
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700161 // clang-format off
James Kuszmaul8e62b022022-03-22 09:33:25 -0700162 #if defined(_MSC_VER) && defined(_M_X64) && !defined(_M_ARM64EC)
163 // This is 64-bit Windows only, __movsb does not work on 32-bit Windows.
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700164 uint64_t u = 0;
165 __movsb(reinterpret_cast<uint8_t *>(&u),
166 reinterpret_cast<const uint8_t *>(data), byte_width);
167 return flatbuffers::EndianScalar(u);
168 #else
169 return ReadSizedScalar<uint64_t, uint8_t, uint16_t, uint32_t, uint64_t>(
170 data, byte_width);
171 #endif
172 // clang-format on
173}
174
175inline double ReadDouble(const uint8_t *data, uint8_t byte_width) {
176 return ReadSizedScalar<double, quarter, half, float, double>(data,
177 byte_width);
178}
179
180inline const uint8_t *Indirect(const uint8_t *offset, uint8_t byte_width) {
181 return offset - ReadUInt64(offset, byte_width);
182}
183
184template<typename T> const uint8_t *Indirect(const uint8_t *offset) {
185 return offset - flatbuffers::ReadScalar<T>(offset);
186}
187
188inline BitWidth WidthU(uint64_t u) {
189#define FLATBUFFERS_GET_FIELD_BIT_WIDTH(value, width) \
190 { \
191 if (!((u) & ~((1ULL << (width)) - 1ULL))) return BIT_WIDTH_##width; \
192 }
193 FLATBUFFERS_GET_FIELD_BIT_WIDTH(u, 8);
194 FLATBUFFERS_GET_FIELD_BIT_WIDTH(u, 16);
195 FLATBUFFERS_GET_FIELD_BIT_WIDTH(u, 32);
196#undef FLATBUFFERS_GET_FIELD_BIT_WIDTH
197 return BIT_WIDTH_64;
198}
199
200inline BitWidth WidthI(int64_t i) {
201 auto u = static_cast<uint64_t>(i) << 1;
202 return WidthU(i >= 0 ? u : ~u);
203}
204
205inline BitWidth WidthF(double f) {
206 return static_cast<double>(static_cast<float>(f)) == f ? BIT_WIDTH_32
207 : BIT_WIDTH_64;
208}
209
210// Base class of all types below.
211// Points into the data buffer and allows access to one type.
212class Object {
213 public:
214 Object(const uint8_t *data, uint8_t byte_width)
215 : data_(data), byte_width_(byte_width) {}
216
217 protected:
218 const uint8_t *data_;
219 uint8_t byte_width_;
220};
221
Austin Schuh272c6132020-11-14 16:37:52 -0800222// Object that has a size, obtained either from size prefix, or elsewhere.
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700223class Sized : public Object {
224 public:
Austin Schuh272c6132020-11-14 16:37:52 -0800225 // Size prefix.
226 Sized(const uint8_t *data, uint8_t byte_width)
227 : Object(data, byte_width), size_(read_size()) {}
228 // Manual size.
229 Sized(const uint8_t *data, uint8_t byte_width, size_t sz)
230 : Object(data, byte_width), size_(sz) {}
231 size_t size() const { return size_; }
232 // Access size stored in `byte_width_` bytes before data_ pointer.
233 size_t read_size() const {
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700234 return static_cast<size_t>(ReadUInt64(data_ - byte_width_, byte_width_));
235 }
Austin Schuh272c6132020-11-14 16:37:52 -0800236
237 protected:
238 size_t size_;
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700239};
240
241class String : public Sized {
242 public:
Austin Schuh272c6132020-11-14 16:37:52 -0800243 // Size prefix.
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700244 String(const uint8_t *data, uint8_t byte_width) : Sized(data, byte_width) {}
Austin Schuh272c6132020-11-14 16:37:52 -0800245 // Manual size.
246 String(const uint8_t *data, uint8_t byte_width, size_t sz)
247 : Sized(data, byte_width, sz) {}
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700248
249 size_t length() const { return size(); }
250 const char *c_str() const { return reinterpret_cast<const char *>(data_); }
Austin Schuh272c6132020-11-14 16:37:52 -0800251 std::string str() const { return std::string(c_str(), size()); }
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700252
253 static String EmptyString() {
Austin Schuh272c6132020-11-14 16:37:52 -0800254 static const char *empty_string = "";
255 return String(reinterpret_cast<const uint8_t *>(empty_string), 1, 0);
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700256 }
257 bool IsTheEmptyString() const { return data_ == EmptyString().data_; }
258};
259
260class Blob : public Sized {
261 public:
262 Blob(const uint8_t *data_buf, uint8_t byte_width)
263 : Sized(data_buf, byte_width) {}
264
265 static Blob EmptyBlob() {
266 static const uint8_t empty_blob[] = { 0 /*len*/ };
267 return Blob(empty_blob + 1, 1);
268 }
269 bool IsTheEmptyBlob() const { return data_ == EmptyBlob().data_; }
270 const uint8_t *data() const { return data_; }
271};
272
273class Vector : public Sized {
274 public:
275 Vector(const uint8_t *data, uint8_t byte_width) : Sized(data, byte_width) {}
276
277 Reference operator[](size_t i) const;
278
279 static Vector EmptyVector() {
280 static const uint8_t empty_vector[] = { 0 /*len*/ };
281 return Vector(empty_vector + 1, 1);
282 }
283 bool IsTheEmptyVector() const { return data_ == EmptyVector().data_; }
284};
285
286class TypedVector : public Sized {
287 public:
288 TypedVector(const uint8_t *data, uint8_t byte_width, Type element_type)
289 : Sized(data, byte_width), type_(element_type) {}
290
291 Reference operator[](size_t i) const;
292
293 static TypedVector EmptyTypedVector() {
294 static const uint8_t empty_typed_vector[] = { 0 /*len*/ };
295 return TypedVector(empty_typed_vector + 1, 1, FBT_INT);
296 }
297 bool IsTheEmptyVector() const {
298 return data_ == TypedVector::EmptyTypedVector().data_;
299 }
300
301 Type ElementType() { return type_; }
302
Austin Schuh272c6132020-11-14 16:37:52 -0800303 friend Reference;
304
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700305 private:
306 Type type_;
307
308 friend Map;
309};
310
311class FixedTypedVector : public Object {
312 public:
313 FixedTypedVector(const uint8_t *data, uint8_t byte_width, Type element_type,
314 uint8_t len)
315 : Object(data, byte_width), type_(element_type), len_(len) {}
316
317 Reference operator[](size_t i) const;
318
319 static FixedTypedVector EmptyFixedTypedVector() {
320 static const uint8_t fixed_empty_vector[] = { 0 /* unused */ };
321 return FixedTypedVector(fixed_empty_vector, 1, FBT_INT, 0);
322 }
323 bool IsTheEmptyFixedTypedVector() const {
324 return data_ == FixedTypedVector::EmptyFixedTypedVector().data_;
325 }
326
James Kuszmaul8e62b022022-03-22 09:33:25 -0700327 Type ElementType() const { return type_; }
328 uint8_t size() const { return len_; }
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700329
330 private:
331 Type type_;
332 uint8_t len_;
333};
334
335class Map : public Vector {
336 public:
337 Map(const uint8_t *data, uint8_t byte_width) : Vector(data, byte_width) {}
338
339 Reference operator[](const char *key) const;
340 Reference operator[](const std::string &key) const;
341
342 Vector Values() const { return Vector(data_, byte_width_); }
343
344 TypedVector Keys() const {
345 const size_t num_prefixed_fields = 3;
346 auto keys_offset = data_ - byte_width_ * num_prefixed_fields;
347 return TypedVector(Indirect(keys_offset, byte_width_),
348 static_cast<uint8_t>(
349 ReadUInt64(keys_offset + byte_width_, byte_width_)),
350 FBT_KEY);
351 }
352
353 static Map EmptyMap() {
354 static const uint8_t empty_map[] = {
355 0 /*keys_len*/, 0 /*keys_offset*/, 1 /*keys_width*/, 0 /*len*/
356 };
357 return Map(empty_map + 4, 1);
358 }
359
360 bool IsTheEmptyMap() const { return data_ == EmptyMap().data_; }
361};
362
363template<typename T>
364void AppendToString(std::string &s, T &&v, bool keys_quoted) {
Austin Schuh272c6132020-11-14 16:37:52 -0800365 s += "[ ";
366 for (size_t i = 0; i < v.size(); i++) {
367 if (i) s += ", ";
368 v[i].ToString(true, keys_quoted, s);
369 }
370 s += " ]";
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700371}
372
373class Reference {
374 public:
Austin Schuh272c6132020-11-14 16:37:52 -0800375 Reference()
James Kuszmaul8e62b022022-03-22 09:33:25 -0700376 : data_(nullptr), parent_width_(0), byte_width_(0), type_(FBT_NULL) {}
Austin Schuh272c6132020-11-14 16:37:52 -0800377
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700378 Reference(const uint8_t *data, uint8_t parent_width, uint8_t byte_width,
379 Type type)
380 : data_(data),
381 parent_width_(parent_width),
382 byte_width_(byte_width),
383 type_(type) {}
384
385 Reference(const uint8_t *data, uint8_t parent_width, uint8_t packed_type)
386 : data_(data), parent_width_(parent_width) {
387 byte_width_ = 1U << static_cast<BitWidth>(packed_type & 3);
388 type_ = static_cast<Type>(packed_type >> 2);
389 }
390
391 Type GetType() const { return type_; }
392
393 bool IsNull() const { return type_ == FBT_NULL; }
394 bool IsBool() const { return type_ == FBT_BOOL; }
395 bool IsInt() const { return type_ == FBT_INT || type_ == FBT_INDIRECT_INT; }
396 bool IsUInt() const {
397 return type_ == FBT_UINT || type_ == FBT_INDIRECT_UINT;
398 }
399 bool IsIntOrUint() const { return IsInt() || IsUInt(); }
400 bool IsFloat() const {
401 return type_ == FBT_FLOAT || type_ == FBT_INDIRECT_FLOAT;
402 }
403 bool IsNumeric() const { return IsIntOrUint() || IsFloat(); }
404 bool IsString() const { return type_ == FBT_STRING; }
405 bool IsKey() const { return type_ == FBT_KEY; }
406 bool IsVector() const { return type_ == FBT_VECTOR || type_ == FBT_MAP; }
Austin Schuh272c6132020-11-14 16:37:52 -0800407 bool IsUntypedVector() const { return type_ == FBT_VECTOR; }
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700408 bool IsTypedVector() const { return flexbuffers::IsTypedVector(type_); }
Austin Schuh272c6132020-11-14 16:37:52 -0800409 bool IsFixedTypedVector() const {
410 return flexbuffers::IsFixedTypedVector(type_);
411 }
412 bool IsAnyVector() const {
413 return (IsTypedVector() || IsFixedTypedVector() || IsVector());
414 }
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700415 bool IsMap() const { return type_ == FBT_MAP; }
416 bool IsBlob() const { return type_ == FBT_BLOB; }
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700417 bool AsBool() const {
418 return (type_ == FBT_BOOL ? ReadUInt64(data_, parent_width_)
Austin Schuh272c6132020-11-14 16:37:52 -0800419 : AsUInt64()) != 0;
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700420 }
421
422 // Reads any type as a int64_t. Never fails, does most sensible conversion.
423 // Truncates floats, strings are attempted to be parsed for a number,
424 // vectors/maps return their size. Returns 0 if all else fails.
425 int64_t AsInt64() const {
426 if (type_ == FBT_INT) {
427 // A fast path for the common case.
428 return ReadInt64(data_, parent_width_);
429 } else
430 switch (type_) {
431 case FBT_INDIRECT_INT: return ReadInt64(Indirect(), byte_width_);
432 case FBT_UINT: return ReadUInt64(data_, parent_width_);
433 case FBT_INDIRECT_UINT: return ReadUInt64(Indirect(), byte_width_);
434 case FBT_FLOAT:
435 return static_cast<int64_t>(ReadDouble(data_, parent_width_));
436 case FBT_INDIRECT_FLOAT:
437 return static_cast<int64_t>(ReadDouble(Indirect(), byte_width_));
438 case FBT_NULL: return 0;
439 case FBT_STRING: return flatbuffers::StringToInt(AsString().c_str());
440 case FBT_VECTOR: return static_cast<int64_t>(AsVector().size());
441 case FBT_BOOL: return ReadInt64(data_, parent_width_);
442 default:
443 // Convert other things to int.
444 return 0;
445 }
446 }
447
448 // TODO: could specialize these to not use AsInt64() if that saves
449 // extension ops in generated code, and use a faster op than ReadInt64.
450 int32_t AsInt32() const { return static_cast<int32_t>(AsInt64()); }
451 int16_t AsInt16() const { return static_cast<int16_t>(AsInt64()); }
452 int8_t AsInt8() const { return static_cast<int8_t>(AsInt64()); }
453
454 uint64_t AsUInt64() const {
455 if (type_ == FBT_UINT) {
456 // A fast path for the common case.
457 return ReadUInt64(data_, parent_width_);
458 } else
459 switch (type_) {
460 case FBT_INDIRECT_UINT: return ReadUInt64(Indirect(), byte_width_);
461 case FBT_INT: return ReadInt64(data_, parent_width_);
462 case FBT_INDIRECT_INT: return ReadInt64(Indirect(), byte_width_);
463 case FBT_FLOAT:
464 return static_cast<uint64_t>(ReadDouble(data_, parent_width_));
465 case FBT_INDIRECT_FLOAT:
466 return static_cast<uint64_t>(ReadDouble(Indirect(), byte_width_));
467 case FBT_NULL: return 0;
468 case FBT_STRING: return flatbuffers::StringToUInt(AsString().c_str());
469 case FBT_VECTOR: return static_cast<uint64_t>(AsVector().size());
470 case FBT_BOOL: return ReadUInt64(data_, parent_width_);
471 default:
472 // Convert other things to uint.
473 return 0;
474 }
475 }
476
477 uint32_t AsUInt32() const { return static_cast<uint32_t>(AsUInt64()); }
478 uint16_t AsUInt16() const { return static_cast<uint16_t>(AsUInt64()); }
479 uint8_t AsUInt8() const { return static_cast<uint8_t>(AsUInt64()); }
480
481 double AsDouble() const {
482 if (type_ == FBT_FLOAT) {
483 // A fast path for the common case.
484 return ReadDouble(data_, parent_width_);
485 } else
486 switch (type_) {
487 case FBT_INDIRECT_FLOAT: return ReadDouble(Indirect(), byte_width_);
488 case FBT_INT:
489 return static_cast<double>(ReadInt64(data_, parent_width_));
490 case FBT_UINT:
491 return static_cast<double>(ReadUInt64(data_, parent_width_));
492 case FBT_INDIRECT_INT:
493 return static_cast<double>(ReadInt64(Indirect(), byte_width_));
494 case FBT_INDIRECT_UINT:
495 return static_cast<double>(ReadUInt64(Indirect(), byte_width_));
496 case FBT_NULL: return 0.0;
Austin Schuh272c6132020-11-14 16:37:52 -0800497 case FBT_STRING: {
498 double d;
499 flatbuffers::StringToNumber(AsString().c_str(), &d);
500 return d;
501 }
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700502 case FBT_VECTOR: return static_cast<double>(AsVector().size());
503 case FBT_BOOL:
504 return static_cast<double>(ReadUInt64(data_, parent_width_));
505 default:
506 // Convert strings and other things to float.
507 return 0;
508 }
509 }
510
511 float AsFloat() const { return static_cast<float>(AsDouble()); }
512
513 const char *AsKey() const {
Austin Schuh272c6132020-11-14 16:37:52 -0800514 if (type_ == FBT_KEY || type_ == FBT_STRING) {
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700515 return reinterpret_cast<const char *>(Indirect());
516 } else {
517 return "";
518 }
519 }
520
Austin Schuh272c6132020-11-14 16:37:52 -0800521 // This function returns the empty string if you try to read something that
522 // is not a string or key.
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700523 String AsString() const {
524 if (type_ == FBT_STRING) {
525 return String(Indirect(), byte_width_);
Austin Schuh272c6132020-11-14 16:37:52 -0800526 } else if (type_ == FBT_KEY) {
527 auto key = Indirect();
528 return String(key, byte_width_,
529 strlen(reinterpret_cast<const char *>(key)));
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700530 } else {
531 return String::EmptyString();
532 }
533 }
534
535 // Unlike AsString(), this will convert any type to a std::string.
536 std::string ToString() const {
537 std::string s;
538 ToString(false, false, s);
539 return s;
540 }
541
542 // Convert any type to a JSON-like string. strings_quoted determines if
543 // string values at the top level receive "" quotes (inside other values
544 // they always do). keys_quoted determines if keys are quoted, at any level.
545 // TODO(wvo): add further options to have indentation/newlines.
546 void ToString(bool strings_quoted, bool keys_quoted, std::string &s) const {
547 if (type_ == FBT_STRING) {
548 String str(Indirect(), byte_width_);
549 if (strings_quoted) {
550 flatbuffers::EscapeString(str.c_str(), str.length(), &s, true, false);
551 } else {
552 s.append(str.c_str(), str.length());
553 }
554 } else if (IsKey()) {
555 auto str = AsKey();
556 if (keys_quoted) {
557 flatbuffers::EscapeString(str, strlen(str), &s, true, false);
558 } else {
559 s += str;
560 }
561 } else if (IsInt()) {
562 s += flatbuffers::NumToString(AsInt64());
563 } else if (IsUInt()) {
564 s += flatbuffers::NumToString(AsUInt64());
565 } else if (IsFloat()) {
566 s += flatbuffers::NumToString(AsDouble());
567 } else if (IsNull()) {
568 s += "null";
569 } else if (IsBool()) {
570 s += AsBool() ? "true" : "false";
571 } else if (IsMap()) {
572 s += "{ ";
573 auto m = AsMap();
574 auto keys = m.Keys();
575 auto vals = m.Values();
576 for (size_t i = 0; i < keys.size(); i++) {
James Kuszmaul8e62b022022-03-22 09:33:25 -0700577 bool kq = keys_quoted;
578 if (!kq) {
579 // FlexBuffers keys may contain arbitrary characters, only allow
580 // unquoted if it looks like an "identifier":
581 const char *p = keys[i].AsKey();
582 if (!flatbuffers::is_alpha(*p) && *p != '_') {
Austin Schuh2dd86a92022-09-14 21:19:23 -0700583 kq = true;
James Kuszmaul8e62b022022-03-22 09:33:25 -0700584 } else {
585 while (*++p) {
586 if (!flatbuffers::is_alnum(*p) && *p != '_') {
587 kq = true;
588 break;
589 }
590 }
591 }
592 }
593 keys[i].ToString(true, kq, s);
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700594 s += ": ";
595 vals[i].ToString(true, keys_quoted, s);
596 if (i < keys.size() - 1) s += ", ";
597 }
598 s += " }";
599 } else if (IsVector()) {
600 AppendToString<Vector>(s, AsVector(), keys_quoted);
601 } else if (IsTypedVector()) {
602 AppendToString<TypedVector>(s, AsTypedVector(), keys_quoted);
603 } else if (IsFixedTypedVector()) {
604 AppendToString<FixedTypedVector>(s, AsFixedTypedVector(), keys_quoted);
605 } else if (IsBlob()) {
606 auto blob = AsBlob();
Austin Schuh272c6132020-11-14 16:37:52 -0800607 flatbuffers::EscapeString(reinterpret_cast<const char *>(blob.data()),
608 blob.size(), &s, true, false);
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700609 } else {
610 s += "(?)";
611 }
612 }
613
614 // This function returns the empty blob if you try to read a not-blob.
615 // Strings can be viewed as blobs too.
616 Blob AsBlob() const {
617 if (type_ == FBT_BLOB || type_ == FBT_STRING) {
618 return Blob(Indirect(), byte_width_);
619 } else {
620 return Blob::EmptyBlob();
621 }
622 }
623
624 // This function returns the empty vector if you try to read a not-vector.
625 // Maps can be viewed as vectors too.
626 Vector AsVector() const {
627 if (type_ == FBT_VECTOR || type_ == FBT_MAP) {
628 return Vector(Indirect(), byte_width_);
629 } else {
630 return Vector::EmptyVector();
631 }
632 }
633
634 TypedVector AsTypedVector() const {
635 if (IsTypedVector()) {
Austin Schuh272c6132020-11-14 16:37:52 -0800636 auto tv =
637 TypedVector(Indirect(), byte_width_, ToTypedVectorElementType(type_));
638 if (tv.type_ == FBT_STRING) {
639 // These can't be accessed as strings, since we don't know the bit-width
640 // of the size field, see the declaration of
641 // FBT_VECTOR_STRING_DEPRECATED above for details.
642 // We change the type here to be keys, which are a subtype of strings,
643 // and will ignore the size field. This will truncate strings with
644 // embedded nulls.
645 tv.type_ = FBT_KEY;
646 }
647 return tv;
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700648 } else {
649 return TypedVector::EmptyTypedVector();
650 }
651 }
652
653 FixedTypedVector AsFixedTypedVector() const {
654 if (IsFixedTypedVector()) {
655 uint8_t len = 0;
656 auto vtype = ToFixedTypedVectorElementType(type_, &len);
657 return FixedTypedVector(Indirect(), byte_width_, vtype, len);
658 } else {
659 return FixedTypedVector::EmptyFixedTypedVector();
660 }
661 }
662
663 Map AsMap() const {
664 if (type_ == FBT_MAP) {
665 return Map(Indirect(), byte_width_);
666 } else {
667 return Map::EmptyMap();
668 }
669 }
670
671 template<typename T> T As() const;
672
673 // Experimental: Mutation functions.
674 // These allow scalars in an already created buffer to be updated in-place.
675 // Since by default scalars are stored in the smallest possible space,
676 // the new value may not fit, in which case these functions return false.
677 // To avoid this, you can construct the values you intend to mutate using
678 // Builder::ForceMinimumBitWidth.
679 bool MutateInt(int64_t i) {
680 if (type_ == FBT_INT) {
681 return Mutate(data_, i, parent_width_, WidthI(i));
682 } else if (type_ == FBT_INDIRECT_INT) {
683 return Mutate(Indirect(), i, byte_width_, WidthI(i));
684 } else if (type_ == FBT_UINT) {
685 auto u = static_cast<uint64_t>(i);
686 return Mutate(data_, u, parent_width_, WidthU(u));
687 } else if (type_ == FBT_INDIRECT_UINT) {
688 auto u = static_cast<uint64_t>(i);
689 return Mutate(Indirect(), u, byte_width_, WidthU(u));
690 } else {
691 return false;
692 }
693 }
694
695 bool MutateBool(bool b) {
696 return type_ == FBT_BOOL && Mutate(data_, b, parent_width_, BIT_WIDTH_8);
697 }
698
699 bool MutateUInt(uint64_t u) {
700 if (type_ == FBT_UINT) {
701 return Mutate(data_, u, parent_width_, WidthU(u));
702 } else if (type_ == FBT_INDIRECT_UINT) {
703 return Mutate(Indirect(), u, byte_width_, WidthU(u));
704 } else if (type_ == FBT_INT) {
705 auto i = static_cast<int64_t>(u);
706 return Mutate(data_, i, parent_width_, WidthI(i));
707 } else if (type_ == FBT_INDIRECT_INT) {
708 auto i = static_cast<int64_t>(u);
709 return Mutate(Indirect(), i, byte_width_, WidthI(i));
710 } else {
711 return false;
712 }
713 }
714
715 bool MutateFloat(float f) {
716 if (type_ == FBT_FLOAT) {
717 return MutateF(data_, f, parent_width_, BIT_WIDTH_32);
718 } else if (type_ == FBT_INDIRECT_FLOAT) {
719 return MutateF(Indirect(), f, byte_width_, BIT_WIDTH_32);
720 } else {
721 return false;
722 }
723 }
724
725 bool MutateFloat(double d) {
726 if (type_ == FBT_FLOAT) {
727 return MutateF(data_, d, parent_width_, WidthF(d));
728 } else if (type_ == FBT_INDIRECT_FLOAT) {
729 return MutateF(Indirect(), d, byte_width_, WidthF(d));
730 } else {
731 return false;
732 }
733 }
734
735 bool MutateString(const char *str, size_t len) {
736 auto s = AsString();
737 if (s.IsTheEmptyString()) return false;
738 // This is very strict, could allow shorter strings, but that creates
739 // garbage.
740 if (s.length() != len) return false;
741 memcpy(const_cast<char *>(s.c_str()), str, len);
742 return true;
743 }
744 bool MutateString(const char *str) { return MutateString(str, strlen(str)); }
745 bool MutateString(const std::string &str) {
746 return MutateString(str.data(), str.length());
747 }
748
749 private:
750 const uint8_t *Indirect() const {
751 return flexbuffers::Indirect(data_, parent_width_);
752 }
753
754 template<typename T>
755 bool Mutate(const uint8_t *dest, T t, size_t byte_width,
756 BitWidth value_width) {
757 auto fits = static_cast<size_t>(static_cast<size_t>(1U) << value_width) <=
758 byte_width;
759 if (fits) {
760 t = flatbuffers::EndianScalar(t);
761 memcpy(const_cast<uint8_t *>(dest), &t, byte_width);
762 }
763 return fits;
764 }
765
766 template<typename T>
767 bool MutateF(const uint8_t *dest, T t, size_t byte_width,
768 BitWidth value_width) {
769 if (byte_width == sizeof(double))
770 return Mutate(dest, static_cast<double>(t), byte_width, value_width);
771 if (byte_width == sizeof(float))
772 return Mutate(dest, static_cast<float>(t), byte_width, value_width);
773 FLATBUFFERS_ASSERT(false);
774 return false;
775 }
776
James Kuszmaul8e62b022022-03-22 09:33:25 -0700777 friend class Verifier;
778
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700779 const uint8_t *data_;
780 uint8_t parent_width_;
781 uint8_t byte_width_;
782 Type type_;
783};
784
785// Template specialization for As().
786template<> inline bool Reference::As<bool>() const { return AsBool(); }
787
788template<> inline int8_t Reference::As<int8_t>() const { return AsInt8(); }
789template<> inline int16_t Reference::As<int16_t>() const { return AsInt16(); }
790template<> inline int32_t Reference::As<int32_t>() const { return AsInt32(); }
791template<> inline int64_t Reference::As<int64_t>() const { return AsInt64(); }
792
793template<> inline uint8_t Reference::As<uint8_t>() const { return AsUInt8(); }
Austin Schuh272c6132020-11-14 16:37:52 -0800794template<> inline uint16_t Reference::As<uint16_t>() const {
795 return AsUInt16();
796}
797template<> inline uint32_t Reference::As<uint32_t>() const {
798 return AsUInt32();
799}
800template<> inline uint64_t Reference::As<uint64_t>() const {
801 return AsUInt64();
802}
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700803
804template<> inline double Reference::As<double>() const { return AsDouble(); }
805template<> inline float Reference::As<float>() const { return AsFloat(); }
806
807template<> inline String Reference::As<String>() const { return AsString(); }
808template<> inline std::string Reference::As<std::string>() const {
809 return AsString().str();
810}
811
812template<> inline Blob Reference::As<Blob>() const { return AsBlob(); }
813template<> inline Vector Reference::As<Vector>() const { return AsVector(); }
814template<> inline TypedVector Reference::As<TypedVector>() const {
815 return AsTypedVector();
816}
817template<> inline FixedTypedVector Reference::As<FixedTypedVector>() const {
818 return AsFixedTypedVector();
819}
820template<> inline Map Reference::As<Map>() const { return AsMap(); }
821
822inline uint8_t PackedType(BitWidth bit_width, Type type) {
823 return static_cast<uint8_t>(bit_width | (type << 2));
824}
825
826inline uint8_t NullPackedType() { return PackedType(BIT_WIDTH_8, FBT_NULL); }
827
828// Vector accessors.
829// Note: if you try to access outside of bounds, you get a Null value back
830// instead. Normally this would be an assert, but since this is "dynamically
831// typed" data, you may not want that (someone sends you a 2d vector and you
832// wanted 3d).
833// The Null converts seamlessly into a default value for any other type.
834// TODO(wvo): Could introduce an #ifdef that makes this into an assert?
835inline Reference Vector::operator[](size_t i) const {
836 auto len = size();
837 if (i >= len) return Reference(nullptr, 1, NullPackedType());
838 auto packed_type = (data_ + len * byte_width_)[i];
839 auto elem = data_ + i * byte_width_;
840 return Reference(elem, byte_width_, packed_type);
841}
842
843inline Reference TypedVector::operator[](size_t i) const {
844 auto len = size();
845 if (i >= len) return Reference(nullptr, 1, NullPackedType());
846 auto elem = data_ + i * byte_width_;
847 return Reference(elem, byte_width_, 1, type_);
848}
849
850inline Reference FixedTypedVector::operator[](size_t i) const {
851 if (i >= len_) return Reference(nullptr, 1, NullPackedType());
852 auto elem = data_ + i * byte_width_;
853 return Reference(elem, byte_width_, 1, type_);
854}
855
856template<typename T> int KeyCompare(const void *key, const void *elem) {
857 auto str_elem = reinterpret_cast<const char *>(
858 Indirect<T>(reinterpret_cast<const uint8_t *>(elem)));
859 auto skey = reinterpret_cast<const char *>(key);
860 return strcmp(skey, str_elem);
861}
862
863inline Reference Map::operator[](const char *key) const {
864 auto keys = Keys();
865 // We can't pass keys.byte_width_ to the comparison function, so we have
866 // to pick the right one ahead of time.
867 int (*comp)(const void *, const void *) = nullptr;
868 switch (keys.byte_width_) {
869 case 1: comp = KeyCompare<uint8_t>; break;
870 case 2: comp = KeyCompare<uint16_t>; break;
871 case 4: comp = KeyCompare<uint32_t>; break;
872 case 8: comp = KeyCompare<uint64_t>; break;
James Kuszmaul8e62b022022-03-22 09:33:25 -0700873 default: FLATBUFFERS_ASSERT(false); return Reference();
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700874 }
875 auto res = std::bsearch(key, keys.data_, keys.size(), keys.byte_width_, comp);
876 if (!res) return Reference(nullptr, 1, NullPackedType());
877 auto i = (reinterpret_cast<uint8_t *>(res) - keys.data_) / keys.byte_width_;
878 return (*static_cast<const Vector *>(this))[i];
879}
880
881inline Reference Map::operator[](const std::string &key) const {
882 return (*this)[key.c_str()];
883}
884
885inline Reference GetRoot(const uint8_t *buffer, size_t size) {
886 // See Finish() below for the serialization counterpart of this.
887 // The root starts at the end of the buffer, so we parse backwards from there.
888 auto end = buffer + size;
889 auto byte_width = *--end;
890 auto packed_type = *--end;
891 end -= byte_width; // The root data item.
892 return Reference(end, byte_width, packed_type);
893}
894
895inline Reference GetRoot(const std::vector<uint8_t> &buffer) {
James Kuszmaul8e62b022022-03-22 09:33:25 -0700896 return GetRoot(buffer.data(), buffer.size());
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700897}
898
899// Flags that configure how the Builder behaves.
900// The "Share" flags determine if the Builder automatically tries to pool
901// this type. Pooling can reduce the size of serialized data if there are
902// multiple maps of the same kind, at the expense of slightly slower
903// serialization (the cost of lookups) and more memory use (std::set).
904// By default this is on for keys, but off for strings.
905// Turn keys off if you have e.g. only one map.
906// Turn strings on if you expect many non-unique string values.
907// Additionally, sharing key vectors can save space if you have maps with
908// identical field populations.
909enum BuilderFlag {
910 BUILDER_FLAG_NONE = 0,
911 BUILDER_FLAG_SHARE_KEYS = 1,
912 BUILDER_FLAG_SHARE_STRINGS = 2,
913 BUILDER_FLAG_SHARE_KEYS_AND_STRINGS = 3,
914 BUILDER_FLAG_SHARE_KEY_VECTORS = 4,
915 BUILDER_FLAG_SHARE_ALL = 7,
916};
917
918class Builder FLATBUFFERS_FINAL_CLASS {
919 public:
920 Builder(size_t initial_size = 256,
921 BuilderFlag flags = BUILDER_FLAG_SHARE_KEYS)
922 : buf_(initial_size),
923 finished_(false),
Austin Schuh58b9b472020-11-25 19:12:44 -0800924 has_duplicate_keys_(false),
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700925 flags_(flags),
926 force_min_bit_width_(BIT_WIDTH_8),
927 key_pool(KeyOffsetCompare(buf_)),
928 string_pool(StringOffsetCompare(buf_)) {
929 buf_.clear();
930 }
931
Austin Schuh58b9b472020-11-25 19:12:44 -0800932#ifdef FLATBUFFERS_DEFAULT_DECLARATION
933 Builder(Builder &&) = default;
934 Builder &operator=(Builder &&) = default;
935#endif
936
Austin Schuhe89fa2d2019-08-14 20:24:23 -0700937 /// @brief Get the serialized buffer (after you call `Finish()`).
938 /// @return Returns a vector owned by this class.
939 const std::vector<uint8_t> &GetBuffer() const {
940 Finished();
941 return buf_;
942 }
943
944 // Size of the buffer. Does not include unfinished values.
945 size_t GetSize() const { return buf_.size(); }
946
947 // Reset all state so we can re-use the buffer.
948 void Clear() {
949 buf_.clear();
950 stack_.clear();
951 finished_ = false;
952 // flags_ remains as-is;
953 force_min_bit_width_ = BIT_WIDTH_8;
954 key_pool.clear();
955 string_pool.clear();
956 }
957
958 // All value constructing functions below have two versions: one that
959 // takes a key (for placement inside a map) and one that doesn't (for inside
960 // vectors and elsewhere).
961
962 void Null() { stack_.push_back(Value()); }
963 void Null(const char *key) {
964 Key(key);
965 Null();
966 }
967
968 void Int(int64_t i) { stack_.push_back(Value(i, FBT_INT, WidthI(i))); }
969 void Int(const char *key, int64_t i) {
970 Key(key);
971 Int(i);
972 }
973
974 void UInt(uint64_t u) { stack_.push_back(Value(u, FBT_UINT, WidthU(u))); }
975 void UInt(const char *key, uint64_t u) {
976 Key(key);
977 UInt(u);
978 }
979
980 void Float(float f) { stack_.push_back(Value(f)); }
981 void Float(const char *key, float f) {
982 Key(key);
983 Float(f);
984 }
985
986 void Double(double f) { stack_.push_back(Value(f)); }
987 void Double(const char *key, double d) {
988 Key(key);
989 Double(d);
990 }
991
992 void Bool(bool b) { stack_.push_back(Value(b)); }
993 void Bool(const char *key, bool b) {
994 Key(key);
995 Bool(b);
996 }
997
998 void IndirectInt(int64_t i) { PushIndirect(i, FBT_INDIRECT_INT, WidthI(i)); }
999 void IndirectInt(const char *key, int64_t i) {
1000 Key(key);
1001 IndirectInt(i);
1002 }
1003
1004 void IndirectUInt(uint64_t u) {
1005 PushIndirect(u, FBT_INDIRECT_UINT, WidthU(u));
1006 }
1007 void IndirectUInt(const char *key, uint64_t u) {
1008 Key(key);
1009 IndirectUInt(u);
1010 }
1011
1012 void IndirectFloat(float f) {
1013 PushIndirect(f, FBT_INDIRECT_FLOAT, BIT_WIDTH_32);
1014 }
1015 void IndirectFloat(const char *key, float f) {
1016 Key(key);
1017 IndirectFloat(f);
1018 }
1019
1020 void IndirectDouble(double f) {
1021 PushIndirect(f, FBT_INDIRECT_FLOAT, WidthF(f));
1022 }
1023 void IndirectDouble(const char *key, double d) {
1024 Key(key);
1025 IndirectDouble(d);
1026 }
1027
1028 size_t Key(const char *str, size_t len) {
1029 auto sloc = buf_.size();
1030 WriteBytes(str, len + 1);
1031 if (flags_ & BUILDER_FLAG_SHARE_KEYS) {
1032 auto it = key_pool.find(sloc);
1033 if (it != key_pool.end()) {
1034 // Already in the buffer. Remove key we just serialized, and use
1035 // existing offset instead.
1036 buf_.resize(sloc);
1037 sloc = *it;
1038 } else {
1039 key_pool.insert(sloc);
1040 }
1041 }
1042 stack_.push_back(Value(static_cast<uint64_t>(sloc), FBT_KEY, BIT_WIDTH_8));
1043 return sloc;
1044 }
1045
1046 size_t Key(const char *str) { return Key(str, strlen(str)); }
1047 size_t Key(const std::string &str) { return Key(str.c_str(), str.size()); }
1048
1049 size_t String(const char *str, size_t len) {
1050 auto reset_to = buf_.size();
1051 auto sloc = CreateBlob(str, len, 1, FBT_STRING);
1052 if (flags_ & BUILDER_FLAG_SHARE_STRINGS) {
1053 StringOffset so(sloc, len);
1054 auto it = string_pool.find(so);
1055 if (it != string_pool.end()) {
1056 // Already in the buffer. Remove string we just serialized, and use
1057 // existing offset instead.
1058 buf_.resize(reset_to);
1059 sloc = it->first;
1060 stack_.back().u_ = sloc;
1061 } else {
1062 string_pool.insert(so);
1063 }
1064 }
1065 return sloc;
1066 }
1067 size_t String(const char *str) { return String(str, strlen(str)); }
1068 size_t String(const std::string &str) {
1069 return String(str.c_str(), str.size());
1070 }
1071 void String(const flexbuffers::String &str) {
1072 String(str.c_str(), str.length());
1073 }
1074
1075 void String(const char *key, const char *str) {
1076 Key(key);
1077 String(str);
1078 }
1079 void String(const char *key, const std::string &str) {
1080 Key(key);
1081 String(str);
1082 }
1083 void String(const char *key, const flexbuffers::String &str) {
1084 Key(key);
1085 String(str);
1086 }
1087
1088 size_t Blob(const void *data, size_t len) {
1089 return CreateBlob(data, len, 0, FBT_BLOB);
1090 }
1091 size_t Blob(const std::vector<uint8_t> &v) {
James Kuszmaul8e62b022022-03-22 09:33:25 -07001092 return CreateBlob(v.data(), v.size(), 0, FBT_BLOB);
1093 }
1094
1095 void Blob(const char *key, const void *data, size_t len) {
1096 Key(key);
1097 Blob(data, len);
1098 }
1099 void Blob(const char *key, const std::vector<uint8_t> &v) {
1100 Key(key);
1101 Blob(v);
Austin Schuhe89fa2d2019-08-14 20:24:23 -07001102 }
1103
1104 // TODO(wvo): support all the FlexBuffer types (like flexbuffers::String),
1105 // e.g. Vector etc. Also in overloaded versions.
1106 // Also some FlatBuffers types?
1107
1108 size_t StartVector() { return stack_.size(); }
1109 size_t StartVector(const char *key) {
1110 Key(key);
1111 return stack_.size();
1112 }
1113 size_t StartMap() { return stack_.size(); }
1114 size_t StartMap(const char *key) {
1115 Key(key);
1116 return stack_.size();
1117 }
1118
James Kuszmaul8e62b022022-03-22 09:33:25 -07001119 // TODO(wvo): allow this to specify an alignment greater than the natural
Austin Schuhe89fa2d2019-08-14 20:24:23 -07001120 // alignment.
1121 size_t EndVector(size_t start, bool typed, bool fixed) {
1122 auto vec = CreateVector(start, stack_.size() - start, 1, typed, fixed);
1123 // Remove temp elements and return vector.
1124 stack_.resize(start);
1125 stack_.push_back(vec);
1126 return static_cast<size_t>(vec.u_);
1127 }
1128
1129 size_t EndMap(size_t start) {
1130 // We should have interleaved keys and values on the stack.
1131 // Make sure it is an even number:
1132 auto len = stack_.size() - start;
1133 FLATBUFFERS_ASSERT(!(len & 1));
1134 len /= 2;
1135 // Make sure keys are all strings:
1136 for (auto key = start; key < stack_.size(); key += 2) {
1137 FLATBUFFERS_ASSERT(stack_[key].type_ == FBT_KEY);
1138 }
Austin Schuh272c6132020-11-14 16:37:52 -08001139 // Now sort values, so later we can do a binary search lookup.
Austin Schuhe89fa2d2019-08-14 20:24:23 -07001140 // We want to sort 2 array elements at a time.
1141 struct TwoValue {
1142 Value key;
1143 Value val;
1144 };
1145 // TODO(wvo): strict aliasing?
1146 // TODO(wvo): allow the caller to indicate the data is already sorted
1147 // for maximum efficiency? With an assert to check sortedness to make sure
1148 // we're not breaking binary search.
1149 // Or, we can track if the map is sorted as keys are added which would be
1150 // be quite cheap (cheaper than checking it here), so we can skip this
1151 // step automatically when appliccable, and encourage people to write in
1152 // sorted fashion.
1153 // std::sort is typically already a lot faster on sorted data though.
James Kuszmaul8e62b022022-03-22 09:33:25 -07001154 auto dict = reinterpret_cast<TwoValue *>(stack_.data() + start);
1155 std::sort(
1156 dict, dict + len, [&](const TwoValue &a, const TwoValue &b) -> bool {
1157 auto as = reinterpret_cast<const char *>(buf_.data() + a.key.u_);
1158 auto bs = reinterpret_cast<const char *>(buf_.data() + b.key.u_);
1159 auto comp = strcmp(as, bs);
1160 // We want to disallow duplicate keys, since this results in a
1161 // map where values cannot be found.
1162 // But we can't assert here (since we don't want to fail on
1163 // random JSON input) or have an error mechanism.
1164 // Instead, we set has_duplicate_keys_ in the builder to
1165 // signal this.
1166 // TODO: Have to check for pointer equality, as some sort
1167 // implementation apparently call this function with the same
1168 // element?? Why?
1169 if (!comp && &a != &b) has_duplicate_keys_ = true;
1170 return comp < 0;
1171 });
Austin Schuhe89fa2d2019-08-14 20:24:23 -07001172 // First create a vector out of all keys.
1173 // TODO(wvo): if kBuilderFlagShareKeyVectors is true, see if we can share
1174 // the first vector.
1175 auto keys = CreateVector(start, len, 2, true, false);
1176 auto vec = CreateVector(start + 1, len, 2, false, false, &keys);
1177 // Remove temp elements and return map.
1178 stack_.resize(start);
1179 stack_.push_back(vec);
1180 return static_cast<size_t>(vec.u_);
1181 }
1182
Austin Schuh58b9b472020-11-25 19:12:44 -08001183 // Call this after EndMap to see if the map had any duplicate keys.
1184 // Any map with such keys won't be able to retrieve all values.
1185 bool HasDuplicateKeys() const { return has_duplicate_keys_; }
1186
Austin Schuhe89fa2d2019-08-14 20:24:23 -07001187 template<typename F> size_t Vector(F f) {
1188 auto start = StartVector();
1189 f();
1190 return EndVector(start, false, false);
1191 }
1192 template<typename F, typename T> size_t Vector(F f, T &state) {
1193 auto start = StartVector();
1194 f(state);
1195 return EndVector(start, false, false);
1196 }
1197 template<typename F> size_t Vector(const char *key, F f) {
1198 auto start = StartVector(key);
1199 f();
1200 return EndVector(start, false, false);
1201 }
1202 template<typename F, typename T>
1203 size_t Vector(const char *key, F f, T &state) {
1204 auto start = StartVector(key);
1205 f(state);
1206 return EndVector(start, false, false);
1207 }
1208
1209 template<typename T> void Vector(const T *elems, size_t len) {
1210 if (flatbuffers::is_scalar<T>::value) {
1211 // This path should be a lot quicker and use less space.
1212 ScalarVector(elems, len, false);
1213 } else {
1214 auto start = StartVector();
1215 for (size_t i = 0; i < len; i++) Add(elems[i]);
1216 EndVector(start, false, false);
1217 }
1218 }
1219 template<typename T>
1220 void Vector(const char *key, const T *elems, size_t len) {
1221 Key(key);
1222 Vector(elems, len);
1223 }
1224 template<typename T> void Vector(const std::vector<T> &vec) {
James Kuszmaul8e62b022022-03-22 09:33:25 -07001225 Vector(vec.data(), vec.size());
Austin Schuhe89fa2d2019-08-14 20:24:23 -07001226 }
1227
1228 template<typename F> size_t TypedVector(F f) {
1229 auto start = StartVector();
1230 f();
1231 return EndVector(start, true, false);
1232 }
1233 template<typename F, typename T> size_t TypedVector(F f, T &state) {
1234 auto start = StartVector();
1235 f(state);
1236 return EndVector(start, true, false);
1237 }
1238 template<typename F> size_t TypedVector(const char *key, F f) {
1239 auto start = StartVector(key);
1240 f();
1241 return EndVector(start, true, false);
1242 }
1243 template<typename F, typename T>
1244 size_t TypedVector(const char *key, F f, T &state) {
1245 auto start = StartVector(key);
1246 f(state);
1247 return EndVector(start, true, false);
1248 }
1249
1250 template<typename T> size_t FixedTypedVector(const T *elems, size_t len) {
1251 // We only support a few fixed vector lengths. Anything bigger use a
1252 // regular typed vector.
1253 FLATBUFFERS_ASSERT(len >= 2 && len <= 4);
1254 // And only scalar values.
1255 static_assert(flatbuffers::is_scalar<T>::value, "Unrelated types");
1256 return ScalarVector(elems, len, true);
1257 }
1258
1259 template<typename T>
1260 size_t FixedTypedVector(const char *key, const T *elems, size_t len) {
1261 Key(key);
1262 return FixedTypedVector(elems, len);
1263 }
1264
1265 template<typename F> size_t Map(F f) {
1266 auto start = StartMap();
1267 f();
1268 return EndMap(start);
1269 }
1270 template<typename F, typename T> size_t Map(F f, T &state) {
1271 auto start = StartMap();
1272 f(state);
1273 return EndMap(start);
1274 }
1275 template<typename F> size_t Map(const char *key, F f) {
1276 auto start = StartMap(key);
1277 f();
1278 return EndMap(start);
1279 }
1280 template<typename F, typename T> size_t Map(const char *key, F f, T &state) {
1281 auto start = StartMap(key);
1282 f(state);
1283 return EndMap(start);
1284 }
1285 template<typename T> void Map(const std::map<std::string, T> &map) {
1286 auto start = StartMap();
1287 for (auto it = map.begin(); it != map.end(); ++it)
1288 Add(it->first.c_str(), it->second);
1289 EndMap(start);
1290 }
1291
Austin Schuh272c6132020-11-14 16:37:52 -08001292 // If you wish to share a value explicitly (a value not shared automatically
1293 // through one of the BUILDER_FLAG_SHARE_* flags) you can do so with these
1294 // functions. Or if you wish to turn those flags off for performance reasons
1295 // and still do some explicit sharing. For example:
1296 // builder.IndirectDouble(M_PI);
1297 // auto id = builder.LastValue(); // Remember where we stored it.
1298 // .. more code goes here ..
1299 // builder.ReuseValue(id); // Refers to same double by offset.
1300 // LastValue works regardless of whether the value has a key or not.
1301 // Works on any data type.
1302 struct Value;
1303 Value LastValue() { return stack_.back(); }
1304 void ReuseValue(Value v) { stack_.push_back(v); }
1305 void ReuseValue(const char *key, Value v) {
1306 Key(key);
1307 ReuseValue(v);
1308 }
1309
Austin Schuhe89fa2d2019-08-14 20:24:23 -07001310 // Overloaded Add that tries to call the correct function above.
1311 void Add(int8_t i) { Int(i); }
1312 void Add(int16_t i) { Int(i); }
1313 void Add(int32_t i) { Int(i); }
1314 void Add(int64_t i) { Int(i); }
1315 void Add(uint8_t u) { UInt(u); }
1316 void Add(uint16_t u) { UInt(u); }
1317 void Add(uint32_t u) { UInt(u); }
1318 void Add(uint64_t u) { UInt(u); }
1319 void Add(float f) { Float(f); }
1320 void Add(double d) { Double(d); }
1321 void Add(bool b) { Bool(b); }
1322 void Add(const char *str) { String(str); }
1323 void Add(const std::string &str) { String(str); }
1324 void Add(const flexbuffers::String &str) { String(str); }
1325
1326 template<typename T> void Add(const std::vector<T> &vec) { Vector(vec); }
1327
1328 template<typename T> void Add(const char *key, const T &t) {
1329 Key(key);
1330 Add(t);
1331 }
1332
1333 template<typename T> void Add(const std::map<std::string, T> &map) {
1334 Map(map);
1335 }
1336
1337 template<typename T> void operator+=(const T &t) { Add(t); }
1338
1339 // This function is useful in combination with the Mutate* functions above.
1340 // It forces elements of vectors and maps to have a minimum size, such that
1341 // they can later be updated without failing.
1342 // Call with no arguments to reset.
1343 void ForceMinimumBitWidth(BitWidth bw = BIT_WIDTH_8) {
1344 force_min_bit_width_ = bw;
1345 }
1346
1347 void Finish() {
1348 // If you hit this assert, you likely have objects that were never included
1349 // in a parent. You need to have exactly one root to finish a buffer.
1350 // Check your Start/End calls are matched, and all objects are inside
1351 // some other object.
1352 FLATBUFFERS_ASSERT(stack_.size() == 1);
1353
1354 // Write root value.
1355 auto byte_width = Align(stack_[0].ElemWidth(buf_.size(), 0));
1356 WriteAny(stack_[0], byte_width);
1357 // Write root type.
1358 Write(stack_[0].StoredPackedType(), 1);
1359 // Write root size. Normally determined by parent, but root has no parent :)
1360 Write(byte_width, 1);
1361
1362 finished_ = true;
1363 }
1364
1365 private:
1366 void Finished() const {
1367 // If you get this assert, you're attempting to get access a buffer
1368 // which hasn't been finished yet. Be sure to call
1369 // Builder::Finish with your root object.
1370 FLATBUFFERS_ASSERT(finished_);
1371 }
1372
1373 // Align to prepare for writing a scalar with a certain size.
1374 uint8_t Align(BitWidth alignment) {
1375 auto byte_width = 1U << alignment;
1376 buf_.insert(buf_.end(), flatbuffers::PaddingBytes(buf_.size(), byte_width),
1377 0);
1378 return static_cast<uint8_t>(byte_width);
1379 }
1380
1381 void WriteBytes(const void *val, size_t size) {
1382 buf_.insert(buf_.end(), reinterpret_cast<const uint8_t *>(val),
1383 reinterpret_cast<const uint8_t *>(val) + size);
1384 }
1385
1386 template<typename T> void Write(T val, size_t byte_width) {
1387 FLATBUFFERS_ASSERT(sizeof(T) >= byte_width);
1388 val = flatbuffers::EndianScalar(val);
1389 WriteBytes(&val, byte_width);
1390 }
1391
1392 void WriteDouble(double f, uint8_t byte_width) {
1393 switch (byte_width) {
1394 case 8: Write(f, byte_width); break;
1395 case 4: Write(static_cast<float>(f), byte_width); break;
1396 // case 2: Write(static_cast<half>(f), byte_width); break;
1397 // case 1: Write(static_cast<quarter>(f), byte_width); break;
1398 default: FLATBUFFERS_ASSERT(0);
1399 }
1400 }
1401
1402 void WriteOffset(uint64_t o, uint8_t byte_width) {
1403 auto reloff = buf_.size() - o;
1404 FLATBUFFERS_ASSERT(byte_width == 8 || reloff < 1ULL << (byte_width * 8));
1405 Write(reloff, byte_width);
1406 }
1407
1408 template<typename T> void PushIndirect(T val, Type type, BitWidth bit_width) {
1409 auto byte_width = Align(bit_width);
1410 auto iloc = buf_.size();
1411 Write(val, byte_width);
1412 stack_.push_back(Value(static_cast<uint64_t>(iloc), type, bit_width));
1413 }
1414
1415 static BitWidth WidthB(size_t byte_width) {
1416 switch (byte_width) {
1417 case 1: return BIT_WIDTH_8;
1418 case 2: return BIT_WIDTH_16;
1419 case 4: return BIT_WIDTH_32;
1420 case 8: return BIT_WIDTH_64;
1421 default: FLATBUFFERS_ASSERT(false); return BIT_WIDTH_64;
1422 }
1423 }
1424
1425 template<typename T> static Type GetScalarType() {
1426 static_assert(flatbuffers::is_scalar<T>::value, "Unrelated types");
Austin Schuh2dd86a92022-09-14 21:19:23 -07001427 return flatbuffers::is_floating_point<T>::value
1428 ? FBT_FLOAT
1429 : flatbuffers::is_same<T, bool>::value
1430 ? FBT_BOOL
1431 : (flatbuffers::is_unsigned<T>::value ? FBT_UINT
1432 : FBT_INT);
Austin Schuhe89fa2d2019-08-14 20:24:23 -07001433 }
1434
Austin Schuh272c6132020-11-14 16:37:52 -08001435 public:
1436 // This was really intended to be private, except for LastValue/ReuseValue.
Austin Schuhe89fa2d2019-08-14 20:24:23 -07001437 struct Value {
1438 union {
1439 int64_t i_;
1440 uint64_t u_;
1441 double f_;
1442 };
1443
1444 Type type_;
1445
1446 // For scalars: of itself, for vector: of its elements, for string: length.
1447 BitWidth min_bit_width_;
1448
1449 Value() : i_(0), type_(FBT_NULL), min_bit_width_(BIT_WIDTH_8) {}
1450
1451 Value(bool b)
1452 : u_(static_cast<uint64_t>(b)),
1453 type_(FBT_BOOL),
1454 min_bit_width_(BIT_WIDTH_8) {}
1455
1456 Value(int64_t i, Type t, BitWidth bw)
1457 : i_(i), type_(t), min_bit_width_(bw) {}
1458 Value(uint64_t u, Type t, BitWidth bw)
1459 : u_(u), type_(t), min_bit_width_(bw) {}
1460
Austin Schuh272c6132020-11-14 16:37:52 -08001461 Value(float f)
1462 : f_(static_cast<double>(f)),
1463 type_(FBT_FLOAT),
1464 min_bit_width_(BIT_WIDTH_32) {}
Austin Schuhe89fa2d2019-08-14 20:24:23 -07001465 Value(double f) : f_(f), type_(FBT_FLOAT), min_bit_width_(WidthF(f)) {}
1466
1467 uint8_t StoredPackedType(BitWidth parent_bit_width_ = BIT_WIDTH_8) const {
1468 return PackedType(StoredWidth(parent_bit_width_), type_);
1469 }
1470
1471 BitWidth ElemWidth(size_t buf_size, size_t elem_index) const {
1472 if (IsInline(type_)) {
1473 return min_bit_width_;
1474 } else {
1475 // We have an absolute offset, but want to store a relative offset
1476 // elem_index elements beyond the current buffer end. Since whether
1477 // the relative offset fits in a certain byte_width depends on
1478 // the size of the elements before it (and their alignment), we have
1479 // to test for each size in turn.
1480 for (size_t byte_width = 1;
1481 byte_width <= sizeof(flatbuffers::largest_scalar_t);
1482 byte_width *= 2) {
1483 // Where are we going to write this offset?
1484 auto offset_loc = buf_size +
1485 flatbuffers::PaddingBytes(buf_size, byte_width) +
1486 elem_index * byte_width;
1487 // Compute relative offset.
1488 auto offset = offset_loc - u_;
1489 // Does it fit?
1490 auto bit_width = WidthU(offset);
1491 if (static_cast<size_t>(static_cast<size_t>(1U) << bit_width) ==
1492 byte_width)
1493 return bit_width;
1494 }
1495 FLATBUFFERS_ASSERT(false); // Must match one of the sizes above.
1496 return BIT_WIDTH_64;
1497 }
1498 }
1499
1500 BitWidth StoredWidth(BitWidth parent_bit_width_ = BIT_WIDTH_8) const {
1501 if (IsInline(type_)) {
1502 return (std::max)(min_bit_width_, parent_bit_width_);
1503 } else {
1504 return min_bit_width_;
1505 }
1506 }
1507 };
1508
Austin Schuh272c6132020-11-14 16:37:52 -08001509 private:
Austin Schuhe89fa2d2019-08-14 20:24:23 -07001510 void WriteAny(const Value &val, uint8_t byte_width) {
1511 switch (val.type_) {
1512 case FBT_NULL:
1513 case FBT_INT: Write(val.i_, byte_width); break;
1514 case FBT_BOOL:
1515 case FBT_UINT: Write(val.u_, byte_width); break;
1516 case FBT_FLOAT: WriteDouble(val.f_, byte_width); break;
1517 default: WriteOffset(val.u_, byte_width); break;
1518 }
1519 }
1520
1521 size_t CreateBlob(const void *data, size_t len, size_t trailing, Type type) {
1522 auto bit_width = WidthU(len);
1523 auto byte_width = Align(bit_width);
1524 Write<uint64_t>(len, byte_width);
1525 auto sloc = buf_.size();
1526 WriteBytes(data, len + trailing);
1527 stack_.push_back(Value(static_cast<uint64_t>(sloc), type, bit_width));
1528 return sloc;
1529 }
1530
1531 template<typename T>
1532 size_t ScalarVector(const T *elems, size_t len, bool fixed) {
1533 auto vector_type = GetScalarType<T>();
1534 auto byte_width = sizeof(T);
1535 auto bit_width = WidthB(byte_width);
1536 // If you get this assert, you're trying to write a vector with a size
1537 // field that is bigger than the scalars you're trying to write (e.g. a
1538 // byte vector > 255 elements). For such types, write a "blob" instead.
1539 // TODO: instead of asserting, could write vector with larger elements
1540 // instead, though that would be wasteful.
1541 FLATBUFFERS_ASSERT(WidthU(len) <= bit_width);
Austin Schuh272c6132020-11-14 16:37:52 -08001542 Align(bit_width);
Austin Schuhe89fa2d2019-08-14 20:24:23 -07001543 if (!fixed) Write<uint64_t>(len, byte_width);
1544 auto vloc = buf_.size();
1545 for (size_t i = 0; i < len; i++) Write(elems[i], byte_width);
1546 stack_.push_back(Value(static_cast<uint64_t>(vloc),
1547 ToTypedVector(vector_type, fixed ? len : 0),
1548 bit_width));
1549 return vloc;
1550 }
1551
1552 Value CreateVector(size_t start, size_t vec_len, size_t step, bool typed,
1553 bool fixed, const Value *keys = nullptr) {
Austin Schuh272c6132020-11-14 16:37:52 -08001554 FLATBUFFERS_ASSERT(
1555 !fixed ||
1556 typed); // typed=false, fixed=true combination is not supported.
Austin Schuhe89fa2d2019-08-14 20:24:23 -07001557 // Figure out smallest bit width we can store this vector with.
1558 auto bit_width = (std::max)(force_min_bit_width_, WidthU(vec_len));
1559 auto prefix_elems = 1;
1560 if (keys) {
1561 // If this vector is part of a map, we will pre-fix an offset to the keys
1562 // to this vector.
1563 bit_width = (std::max)(bit_width, keys->ElemWidth(buf_.size(), 0));
1564 prefix_elems += 2;
1565 }
1566 Type vector_type = FBT_KEY;
1567 // Check bit widths and types for all elements.
1568 for (size_t i = start; i < stack_.size(); i += step) {
Austin Schuh272c6132020-11-14 16:37:52 -08001569 auto elem_width =
1570 stack_[i].ElemWidth(buf_.size(), i - start + prefix_elems);
Austin Schuhe89fa2d2019-08-14 20:24:23 -07001571 bit_width = (std::max)(bit_width, elem_width);
1572 if (typed) {
1573 if (i == start) {
1574 vector_type = stack_[i].type_;
1575 } else {
1576 // If you get this assert, you are writing a typed vector with
1577 // elements that are not all the same type.
1578 FLATBUFFERS_ASSERT(vector_type == stack_[i].type_);
1579 }
1580 }
1581 }
James Kuszmaul8e62b022022-03-22 09:33:25 -07001582 // If you get this assert, your typed types are not one of:
Austin Schuhe89fa2d2019-08-14 20:24:23 -07001583 // Int / UInt / Float / Key.
James Kuszmaul8e62b022022-03-22 09:33:25 -07001584 FLATBUFFERS_ASSERT(!typed || IsTypedVectorElementType(vector_type));
Austin Schuhe89fa2d2019-08-14 20:24:23 -07001585 auto byte_width = Align(bit_width);
1586 // Write vector. First the keys width/offset if available, and size.
1587 if (keys) {
1588 WriteOffset(keys->u_, byte_width);
1589 Write<uint64_t>(1ULL << keys->min_bit_width_, byte_width);
1590 }
1591 if (!fixed) Write<uint64_t>(vec_len, byte_width);
1592 // Then the actual data.
1593 auto vloc = buf_.size();
1594 for (size_t i = start; i < stack_.size(); i += step) {
1595 WriteAny(stack_[i], byte_width);
1596 }
1597 // Then the types.
1598 if (!typed) {
1599 for (size_t i = start; i < stack_.size(); i += step) {
1600 buf_.push_back(stack_[i].StoredPackedType(bit_width));
1601 }
1602 }
1603 return Value(static_cast<uint64_t>(vloc),
1604 keys ? FBT_MAP
1605 : (typed ? ToTypedVector(vector_type, fixed ? vec_len : 0)
1606 : FBT_VECTOR),
1607 bit_width);
1608 }
1609
1610 // You shouldn't really be copying instances of this class.
1611 Builder(const Builder &);
1612 Builder &operator=(const Builder &);
1613
1614 std::vector<uint8_t> buf_;
1615 std::vector<Value> stack_;
1616
1617 bool finished_;
Austin Schuh58b9b472020-11-25 19:12:44 -08001618 bool has_duplicate_keys_;
Austin Schuhe89fa2d2019-08-14 20:24:23 -07001619
1620 BuilderFlag flags_;
1621
1622 BitWidth force_min_bit_width_;
1623
1624 struct KeyOffsetCompare {
1625 explicit KeyOffsetCompare(const std::vector<uint8_t> &buf) : buf_(&buf) {}
1626 bool operator()(size_t a, size_t b) const {
James Kuszmaul8e62b022022-03-22 09:33:25 -07001627 auto stra = reinterpret_cast<const char *>(buf_->data() + a);
1628 auto strb = reinterpret_cast<const char *>(buf_->data() + b);
Austin Schuhe89fa2d2019-08-14 20:24:23 -07001629 return strcmp(stra, strb) < 0;
1630 }
1631 const std::vector<uint8_t> *buf_;
1632 };
1633
1634 typedef std::pair<size_t, size_t> StringOffset;
1635 struct StringOffsetCompare {
Austin Schuh272c6132020-11-14 16:37:52 -08001636 explicit StringOffsetCompare(const std::vector<uint8_t> &buf)
1637 : buf_(&buf) {}
Austin Schuhe89fa2d2019-08-14 20:24:23 -07001638 bool operator()(const StringOffset &a, const StringOffset &b) const {
James Kuszmaul8e62b022022-03-22 09:33:25 -07001639 auto stra = buf_->data() + a.first;
1640 auto strb = buf_->data() + b.first;
1641 auto cr = memcmp(stra, strb, (std::min)(a.second, b.second) + 1);
1642 return cr < 0 || (cr == 0 && a.second < b.second);
Austin Schuhe89fa2d2019-08-14 20:24:23 -07001643 }
1644 const std::vector<uint8_t> *buf_;
1645 };
1646
1647 typedef std::set<size_t, KeyOffsetCompare> KeyOffsetMap;
1648 typedef std::set<StringOffset, StringOffsetCompare> StringOffsetMap;
1649
1650 KeyOffsetMap key_pool;
1651 StringOffsetMap string_pool;
James Kuszmaul8e62b022022-03-22 09:33:25 -07001652
1653 friend class Verifier;
Austin Schuhe89fa2d2019-08-14 20:24:23 -07001654};
1655
James Kuszmaul8e62b022022-03-22 09:33:25 -07001656// Helper class to verify the integrity of a FlexBuffer
1657class Verifier FLATBUFFERS_FINAL_CLASS {
1658 public:
1659 Verifier(const uint8_t *buf, size_t buf_len,
1660 // Supplying this vector likely results in faster verification
1661 // of larger buffers with many shared keys/strings, but
1662 // comes at the cost of using additional memory the same size of
1663 // the buffer being verified, so it is by default off.
1664 std::vector<uint8_t> *reuse_tracker = nullptr,
1665 bool _check_alignment = true, size_t max_depth = 64)
1666 : buf_(buf),
1667 size_(buf_len),
1668 depth_(0),
1669 max_depth_(max_depth),
1670 num_vectors_(0),
1671 max_vectors_(buf_len),
1672 check_alignment_(_check_alignment),
1673 reuse_tracker_(reuse_tracker) {
1674 FLATBUFFERS_ASSERT(size_ < FLATBUFFERS_MAX_BUFFER_SIZE);
1675 if (reuse_tracker_) {
1676 reuse_tracker_->clear();
1677 reuse_tracker_->resize(size_, PackedType(BIT_WIDTH_8, FBT_NULL));
1678 }
1679 }
1680
1681 private:
1682 // Central location where any verification failures register.
1683 bool Check(bool ok) const {
1684 // clang-format off
1685 #ifdef FLATBUFFERS_DEBUG_VERIFICATION_FAILURE
1686 FLATBUFFERS_ASSERT(ok);
1687 #endif
1688 // clang-format on
1689 return ok;
1690 }
1691
1692 // Verify any range within the buffer.
1693 bool VerifyFrom(size_t elem, size_t elem_len) const {
1694 return Check(elem_len < size_ && elem <= size_ - elem_len);
1695 }
1696 bool VerifyBefore(size_t elem, size_t elem_len) const {
1697 return Check(elem_len <= elem);
1698 }
1699
1700 bool VerifyFromPointer(const uint8_t *p, size_t len) {
1701 auto o = static_cast<size_t>(p - buf_);
1702 return VerifyFrom(o, len);
1703 }
1704 bool VerifyBeforePointer(const uint8_t *p, size_t len) {
1705 auto o = static_cast<size_t>(p - buf_);
1706 return VerifyBefore(o, len);
1707 }
1708
1709 bool VerifyByteWidth(size_t width) {
1710 return Check(width == 1 || width == 2 || width == 4 || width == 8);
1711 }
1712
1713 bool VerifyType(int type) { return Check(type >= 0 && type < FBT_MAX_TYPE); }
1714
1715 bool VerifyOffset(uint64_t off, const uint8_t *p) {
1716 return Check(off <= static_cast<uint64_t>(size_)) &&
1717 off <= static_cast<uint64_t>(p - buf_);
1718 }
1719
1720 bool VerifyAlignment(const uint8_t *p, size_t size) const {
1721 auto o = static_cast<size_t>(p - buf_);
1722 return Check((o & (size - 1)) == 0 || !check_alignment_);
1723 }
1724
1725// Macro, since we want to escape from parent function & use lazy args.
1726#define FLEX_CHECK_VERIFIED(P, PACKED_TYPE) \
1727 if (reuse_tracker_) { \
1728 auto packed_type = PACKED_TYPE; \
1729 auto existing = (*reuse_tracker_)[P - buf_]; \
1730 if (existing == packed_type) return true; \
1731 /* Fail verification if already set with different type! */ \
1732 if (!Check(existing == 0)) return false; \
1733 (*reuse_tracker_)[P - buf_] = packed_type; \
1734 }
1735
1736 bool VerifyVector(Reference r, const uint8_t *p, Type elem_type) {
1737 // Any kind of nesting goes thru this function, so guard against that
1738 // here, both with simple nesting checks, and the reuse tracker if on.
1739 depth_++;
1740 num_vectors_++;
1741 if (!Check(depth_ <= max_depth_ && num_vectors_ <= max_vectors_))
1742 return false;
1743 auto size_byte_width = r.byte_width_;
James Kuszmaul8e62b022022-03-22 09:33:25 -07001744 if (!VerifyBeforePointer(p, size_byte_width)) return false;
Austin Schuh2dd86a92022-09-14 21:19:23 -07001745 FLEX_CHECK_VERIFIED(p - size_byte_width,
1746 PackedType(Builder::WidthB(size_byte_width), r.type_));
James Kuszmaul8e62b022022-03-22 09:33:25 -07001747 auto sized = Sized(p, size_byte_width);
1748 auto num_elems = sized.size();
1749 auto elem_byte_width = r.type_ == FBT_STRING || r.type_ == FBT_BLOB
1750 ? uint8_t(1)
1751 : r.byte_width_;
1752 auto max_elems = SIZE_MAX / elem_byte_width;
1753 if (!Check(num_elems < max_elems))
1754 return false; // Protect against byte_size overflowing.
1755 auto byte_size = num_elems * elem_byte_width;
1756 if (!VerifyFromPointer(p, byte_size)) return false;
1757 if (elem_type == FBT_NULL) {
1758 // Verify type bytes after the vector.
1759 if (!VerifyFromPointer(p + byte_size, num_elems)) return false;
1760 auto v = Vector(p, size_byte_width);
1761 for (size_t i = 0; i < num_elems; i++)
1762 if (!VerifyRef(v[i])) return false;
1763 } else if (elem_type == FBT_KEY) {
1764 auto v = TypedVector(p, elem_byte_width, FBT_KEY);
1765 for (size_t i = 0; i < num_elems; i++)
1766 if (!VerifyRef(v[i])) return false;
1767 } else {
1768 FLATBUFFERS_ASSERT(IsInline(elem_type));
1769 }
1770 depth_--;
1771 return true;
1772 }
1773
1774 bool VerifyKeys(const uint8_t *p, uint8_t byte_width) {
1775 // The vector part of the map has already been verified.
1776 const size_t num_prefixed_fields = 3;
1777 if (!VerifyBeforePointer(p, byte_width * num_prefixed_fields)) return false;
1778 p -= byte_width * num_prefixed_fields;
1779 auto off = ReadUInt64(p, byte_width);
1780 if (!VerifyOffset(off, p)) return false;
1781 auto key_byte_with =
1782 static_cast<uint8_t>(ReadUInt64(p + byte_width, byte_width));
1783 if (!VerifyByteWidth(key_byte_with)) return false;
1784 return VerifyVector(Reference(p, byte_width, key_byte_with, FBT_VECTOR_KEY),
1785 p - off, FBT_KEY);
1786 }
1787
1788 bool VerifyKey(const uint8_t *p) {
1789 FLEX_CHECK_VERIFIED(p, PackedType(BIT_WIDTH_8, FBT_KEY));
1790 while (p < buf_ + size_)
1791 if (*p++) return true;
1792 return false;
1793 }
1794
1795#undef FLEX_CHECK_VERIFIED
1796
1797 bool VerifyTerminator(const String &s) {
1798 return VerifyFromPointer(reinterpret_cast<const uint8_t *>(s.c_str()),
1799 s.size() + 1);
1800 }
1801
1802 bool VerifyRef(Reference r) {
1803 // r.parent_width_ and r.data_ already verified.
1804 if (!VerifyByteWidth(r.byte_width_) || !VerifyType(r.type_)) {
1805 return false;
1806 }
1807 if (IsInline(r.type_)) {
1808 // Inline scalars, don't require further verification.
1809 return true;
1810 }
1811 // All remaining types are an offset.
1812 auto off = ReadUInt64(r.data_, r.parent_width_);
1813 if (!VerifyOffset(off, r.data_)) return false;
1814 auto p = r.Indirect();
1815 if (!VerifyAlignment(p, r.byte_width_)) return false;
1816 switch (r.type_) {
1817 case FBT_INDIRECT_INT:
1818 case FBT_INDIRECT_UINT:
1819 case FBT_INDIRECT_FLOAT: return VerifyFromPointer(p, r.byte_width_);
1820 case FBT_KEY: return VerifyKey(p);
1821 case FBT_MAP:
1822 return VerifyVector(r, p, FBT_NULL) && VerifyKeys(p, r.byte_width_);
1823 case FBT_VECTOR: return VerifyVector(r, p, FBT_NULL);
1824 case FBT_VECTOR_INT: return VerifyVector(r, p, FBT_INT);
1825 case FBT_VECTOR_BOOL:
1826 case FBT_VECTOR_UINT: return VerifyVector(r, p, FBT_UINT);
1827 case FBT_VECTOR_FLOAT: return VerifyVector(r, p, FBT_FLOAT);
1828 case FBT_VECTOR_KEY: return VerifyVector(r, p, FBT_KEY);
1829 case FBT_VECTOR_STRING_DEPRECATED:
1830 // Use of FBT_KEY here intentional, see elsewhere.
1831 return VerifyVector(r, p, FBT_KEY);
1832 case FBT_BLOB: return VerifyVector(r, p, FBT_UINT);
1833 case FBT_STRING:
1834 return VerifyVector(r, p, FBT_UINT) &&
1835 VerifyTerminator(String(p, r.byte_width_));
1836 case FBT_VECTOR_INT2:
1837 case FBT_VECTOR_UINT2:
1838 case FBT_VECTOR_FLOAT2:
1839 case FBT_VECTOR_INT3:
1840 case FBT_VECTOR_UINT3:
1841 case FBT_VECTOR_FLOAT3:
1842 case FBT_VECTOR_INT4:
1843 case FBT_VECTOR_UINT4:
1844 case FBT_VECTOR_FLOAT4: {
1845 uint8_t len = 0;
1846 auto vtype = ToFixedTypedVectorElementType(r.type_, &len);
1847 if (!VerifyType(vtype)) return false;
1848 return VerifyFromPointer(p, r.byte_width_ * len);
1849 }
1850 default: return false;
1851 }
1852 }
1853
1854 public:
1855 bool VerifyBuffer() {
1856 if (!Check(size_ >= 3)) return false;
1857 auto end = buf_ + size_;
1858 auto byte_width = *--end;
1859 auto packed_type = *--end;
1860 return VerifyByteWidth(byte_width) && Check(end - buf_ >= byte_width) &&
1861 VerifyRef(Reference(end - byte_width, byte_width, packed_type));
1862 }
1863
1864 private:
1865 const uint8_t *buf_;
1866 size_t size_;
1867 size_t depth_;
1868 const size_t max_depth_;
1869 size_t num_vectors_;
1870 const size_t max_vectors_;
1871 bool check_alignment_;
1872 std::vector<uint8_t> *reuse_tracker_;
1873};
1874
Austin Schuh2dd86a92022-09-14 21:19:23 -07001875// Utility function that constructs the Verifier for you, see above for
James Kuszmaul8e62b022022-03-22 09:33:25 -07001876// parameters.
1877inline bool VerifyBuffer(const uint8_t *buf, size_t buf_len,
1878 std::vector<uint8_t> *reuse_tracker = nullptr) {
1879 Verifier verifier(buf, buf_len, reuse_tracker);
1880 return verifier.VerifyBuffer();
1881}
1882
Austin Schuhe89fa2d2019-08-14 20:24:23 -07001883} // namespace flexbuffers
1884
Austin Schuh272c6132020-11-14 16:37:52 -08001885#if defined(_MSC_VER)
1886# pragma warning(pop)
1887#endif
Austin Schuhe89fa2d2019-08-14 20:24:23 -07001888
1889#endif // FLATBUFFERS_FLEXBUFFERS_H_