blob: c02a5ce4a474024094cf2906566741abd708a3e9 [file] [log] [blame]
Brian Silverman9c614bc2016-02-15 20:20:02 -05001// Amalgamated source file
2#include "upb.h"
3
4
Austin Schuh40c16522018-10-28 20:27:54 -07005/* Maps descriptor type -> upb field type. */
6static const uint8_t upb_desctype_to_fieldtype[] = {
7 UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */
8 UPB_TYPE_DOUBLE, /* DOUBLE */
9 UPB_TYPE_FLOAT, /* FLOAT */
10 UPB_TYPE_INT64, /* INT64 */
11 UPB_TYPE_UINT64, /* UINT64 */
12 UPB_TYPE_INT32, /* INT32 */
13 UPB_TYPE_UINT64, /* FIXED64 */
14 UPB_TYPE_UINT32, /* FIXED32 */
15 UPB_TYPE_BOOL, /* BOOL */
16 UPB_TYPE_STRING, /* STRING */
17 UPB_TYPE_MESSAGE, /* GROUP */
18 UPB_TYPE_MESSAGE, /* MESSAGE */
19 UPB_TYPE_BYTES, /* BYTES */
20 UPB_TYPE_UINT32, /* UINT32 */
21 UPB_TYPE_ENUM, /* ENUM */
22 UPB_TYPE_INT32, /* SFIXED32 */
23 UPB_TYPE_INT64, /* SFIXED64 */
24 UPB_TYPE_INT32, /* SINT32 */
25 UPB_TYPE_INT64, /* SINT64 */
26};
27
28/* Data pertaining to the parse. */
29typedef struct {
30 upb_env *env;
31 /* Current decoding pointer. Points to the beginning of a field until we
32 * have finished decoding the whole field. */
33 const char *ptr;
34} upb_decstate;
35
36/* Data pertaining to a single message frame. */
37typedef struct {
38 const char *limit;
39 int32_t group_number; /* 0 if we are not parsing a group. */
40
41 /* These members are unset for an unknown group frame. */
42 char *msg;
43 const upb_msglayout_msginit_v1 *m;
44} upb_decframe;
45
46#define CHK(x) if (!(x)) { return false; }
47
48static bool upb_skip_unknowngroup(upb_decstate *d, int field_number,
49 const char *limit);
50static bool upb_decode_message(upb_decstate *d, const char *limit,
51 int group_number, char *msg,
52 const upb_msglayout_msginit_v1 *l);
53
54static bool upb_decode_varint(const char **ptr, const char *limit,
55 uint64_t *val) {
56 uint8_t byte;
57 int bitpos = 0;
58 const char *p = *ptr;
59 *val = 0;
60
61 do {
62 CHK(bitpos < 70 && p < limit);
63 byte = *p;
64 *val |= (uint64_t)(byte & 0x7F) << bitpos;
65 p++;
66 bitpos += 7;
67 } while (byte & 0x80);
68
69 *ptr = p;
70 return true;
71}
72
73static bool upb_decode_varint32(const char **ptr, const char *limit,
74 uint32_t *val) {
75 uint64_t u64;
76 CHK(upb_decode_varint(ptr, limit, &u64) && u64 <= UINT32_MAX);
77 *val = u64;
78 return true;
79}
80
81static bool upb_decode_64bit(const char **ptr, const char *limit,
82 uint64_t *val) {
83 CHK(limit - *ptr >= 8);
84 memcpy(val, *ptr, 8);
85 *ptr += 8;
86 return true;
87}
88
89static bool upb_decode_32bit(const char **ptr, const char *limit,
90 uint32_t *val) {
91 CHK(limit - *ptr >= 4);
92 memcpy(val, *ptr, 4);
93 *ptr += 4;
94 return true;
95}
96
97static bool upb_decode_tag(const char **ptr, const char *limit,
98 int *field_number, int *wire_type) {
99 uint32_t tag = 0;
100 CHK(upb_decode_varint32(ptr, limit, &tag));
101 *field_number = tag >> 3;
102 *wire_type = tag & 7;
103 return true;
104}
105
106static int32_t upb_zzdecode_32(uint32_t n) {
107 return (n >> 1) ^ -(int32_t)(n & 1);
108}
109
110static int64_t upb_zzdecode_64(uint64_t n) {
111 return (n >> 1) ^ -(int64_t)(n & 1);
112}
113
114static bool upb_decode_string(const char **ptr, const char *limit,
115 upb_stringview *val) {
116 uint32_t len;
117
118 CHK(upb_decode_varint32(ptr, limit, &len) &&
119 len < INT32_MAX &&
120 limit - *ptr >= (int32_t)len);
121
122 *val = upb_stringview_make(*ptr, len);
123 *ptr += len;
124 return true;
125}
126
127static void upb_set32(void *msg, size_t ofs, uint32_t val) {
128 memcpy((char*)msg + ofs, &val, sizeof(val));
129}
130
131static bool upb_append_unknown(upb_decstate *d, upb_decframe *frame,
132 const char *start) {
133 UPB_UNUSED(d);
134 UPB_UNUSED(frame);
135 UPB_UNUSED(start);
136 return true;
137}
138
139static bool upb_skip_unknownfielddata(upb_decstate *d, upb_decframe *frame,
140 int field_number, int wire_type) {
141 switch (wire_type) {
142 case UPB_WIRE_TYPE_VARINT: {
143 uint64_t val;
144 return upb_decode_varint(&d->ptr, frame->limit, &val);
145 }
146 case UPB_WIRE_TYPE_32BIT: {
147 uint32_t val;
148 return upb_decode_32bit(&d->ptr, frame->limit, &val);
149 }
150 case UPB_WIRE_TYPE_64BIT: {
151 uint64_t val;
152 return upb_decode_64bit(&d->ptr, frame->limit, &val);
153 }
154 case UPB_WIRE_TYPE_DELIMITED: {
155 upb_stringview val;
156 return upb_decode_string(&d->ptr, frame->limit, &val);
157 }
158 case UPB_WIRE_TYPE_START_GROUP:
159 return upb_skip_unknowngroup(d, field_number, frame->limit);
160 case UPB_WIRE_TYPE_END_GROUP:
161 CHK(field_number == frame->group_number);
162 frame->limit = d->ptr;
163 return true;
164 }
165 return false;
166}
167
168static bool upb_array_grow(upb_array *arr, size_t elements) {
169 size_t needed = arr->len + elements;
170 size_t new_size = UPB_MAX(arr->size, 8);
171 size_t new_bytes;
172 size_t old_bytes;
173 void *new_data;
174
175 while (new_size < needed) {
176 new_size *= 2;
177 }
178
179 old_bytes = arr->len * arr->element_size;
180 new_bytes = new_size * arr->element_size;
181 new_data = upb_realloc(arr->alloc, arr->data, old_bytes, new_bytes);
182 CHK(new_data);
183
184 arr->data = new_data;
185 arr->size = new_size;
186 return true;
187}
188
189static void *upb_array_reserve(upb_array *arr, size_t elements) {
190 if (arr->size - arr->len < elements) {
191 CHK(upb_array_grow(arr, elements));
192 }
193 return (char*)arr->data + (arr->len * arr->element_size);
194}
195
196static void *upb_array_add(upb_array *arr, size_t elements) {
197 void *ret = upb_array_reserve(arr, elements);
198 arr->len += elements;
199 return ret;
200}
201
202static upb_array *upb_getarr(upb_decframe *frame,
203 const upb_msglayout_fieldinit_v1 *field) {
204 UPB_ASSERT(field->label == UPB_LABEL_REPEATED);
205 return *(upb_array**)&frame->msg[field->offset];
206}
207
208static upb_array *upb_getorcreatearr(upb_decstate *d,
209 upb_decframe *frame,
210 const upb_msglayout_fieldinit_v1 *field) {
211 upb_array *arr = upb_getarr(frame, field);
212
213 if (!arr) {
214 arr = upb_env_malloc(d->env, sizeof(*arr));
215 if (!arr) {
216 return NULL;
217 }
218 upb_array_init(arr, upb_desctype_to_fieldtype[field->type],
219 upb_arena_alloc(upb_env_arena(d->env)));
220 *(upb_array**)&frame->msg[field->offset] = arr;
221 }
222
223 return arr;
224}
225
226static void upb_sethasbit(upb_decframe *frame,
227 const upb_msglayout_fieldinit_v1 *field) {
228 UPB_ASSERT(field->hasbit != UPB_NO_HASBIT);
229 frame->msg[field->hasbit / 8] |= (1 << (field->hasbit % 8));
230}
231
232static void upb_setoneofcase(upb_decframe *frame,
233 const upb_msglayout_fieldinit_v1 *field) {
234 UPB_ASSERT(field->oneof_index != UPB_NOT_IN_ONEOF);
235 upb_set32(frame->msg, frame->m->oneofs[field->oneof_index].case_offset,
236 field->number);
237}
238
239static char *upb_decode_prepareslot(upb_decstate *d,
240 upb_decframe *frame,
241 const upb_msglayout_fieldinit_v1 *field) {
242 char *field_mem = frame->msg + field->offset;
243 upb_array *arr;
244
245 if (field->label == UPB_LABEL_REPEATED) {
246 arr = upb_getorcreatearr(d, frame, field);
247 field_mem = upb_array_reserve(arr, 1);
248 }
249
250 return field_mem;
251}
252
253static void upb_decode_setpresent(upb_decframe *frame,
254 const upb_msglayout_fieldinit_v1 *field) {
255 if (field->label == UPB_LABEL_REPEATED) {
256 upb_array *arr = upb_getarr(frame, field);
257 UPB_ASSERT(arr->len < arr->size);
258 arr->len++;
259 } else if (field->oneof_index != UPB_NOT_IN_ONEOF) {
260 upb_setoneofcase(frame, field);
261 } else if (field->hasbit != UPB_NO_HASBIT) {
262 upb_sethasbit(frame, field);
263 }
264}
265
266static bool upb_decode_submsg(upb_decstate *d,
267 upb_decframe *frame,
268 const char *limit,
269 const upb_msglayout_fieldinit_v1 *field,
270 int group_number) {
271 char *submsg = *(void**)&frame->msg[field->offset];
272 const upb_msglayout_msginit_v1 *subm;
273
274 UPB_ASSERT(field->submsg_index != UPB_NO_SUBMSG);
275 subm = frame->m->submsgs[field->submsg_index];
276 UPB_ASSERT(subm);
277
278 if (!submsg) {
279 submsg = upb_env_malloc(d->env, upb_msg_sizeof((upb_msglayout *)subm));
280 CHK(submsg);
281 submsg = upb_msg_init(
282 submsg, (upb_msglayout*)subm, upb_arena_alloc(upb_env_arena(d->env)));
283 *(void**)&frame->msg[field->offset] = submsg;
284 }
285
286 upb_decode_message(d, limit, group_number, submsg, subm);
287
288 return true;
289}
290
291static bool upb_decode_varintfield(upb_decstate *d, upb_decframe *frame,
292 const char *field_start,
293 const upb_msglayout_fieldinit_v1 *field) {
294 uint64_t val;
295 void *field_mem;
296
297 field_mem = upb_decode_prepareslot(d, frame, field);
298 CHK(field_mem);
299 CHK(upb_decode_varint(&d->ptr, frame->limit, &val));
300
301 switch ((upb_descriptortype_t)field->type) {
302 case UPB_DESCRIPTOR_TYPE_INT64:
303 case UPB_DESCRIPTOR_TYPE_UINT64:
304 memcpy(field_mem, &val, sizeof(val));
305 break;
306 case UPB_DESCRIPTOR_TYPE_INT32:
307 case UPB_DESCRIPTOR_TYPE_UINT32:
308 case UPB_DESCRIPTOR_TYPE_ENUM: {
309 uint32_t val32 = val;
310 memcpy(field_mem, &val32, sizeof(val32));
311 break;
312 }
313 case UPB_DESCRIPTOR_TYPE_BOOL: {
314 bool valbool = val != 0;
315 memcpy(field_mem, &valbool, sizeof(valbool));
316 break;
317 }
318 case UPB_DESCRIPTOR_TYPE_SINT32: {
319 int32_t decoded = upb_zzdecode_32(val);
320 memcpy(field_mem, &decoded, sizeof(decoded));
321 break;
322 }
323 case UPB_DESCRIPTOR_TYPE_SINT64: {
324 int64_t decoded = upb_zzdecode_64(val);
325 memcpy(field_mem, &decoded, sizeof(decoded));
326 break;
327 }
328 default:
329 return upb_append_unknown(d, frame, field_start);
330 }
331
332 upb_decode_setpresent(frame, field);
333 return true;
334}
335
336static bool upb_decode_64bitfield(upb_decstate *d, upb_decframe *frame,
337 const char *field_start,
338 const upb_msglayout_fieldinit_v1 *field) {
339 void *field_mem;
340 uint64_t val;
341
342 field_mem = upb_decode_prepareslot(d, frame, field);
343 CHK(field_mem);
344 CHK(upb_decode_64bit(&d->ptr, frame->limit, &val));
345
346 switch ((upb_descriptortype_t)field->type) {
347 case UPB_DESCRIPTOR_TYPE_DOUBLE:
348 case UPB_DESCRIPTOR_TYPE_FIXED64:
349 case UPB_DESCRIPTOR_TYPE_SFIXED64:
350 memcpy(field_mem, &val, sizeof(val));
351 break;
352 default:
353 return upb_append_unknown(d, frame, field_start);
354 }
355
356 upb_decode_setpresent(frame, field);
357 return true;
358}
359
360static bool upb_decode_32bitfield(upb_decstate *d, upb_decframe *frame,
361 const char *field_start,
362 const upb_msglayout_fieldinit_v1 *field) {
363 void *field_mem;
364 uint32_t val;
365
366 field_mem = upb_decode_prepareslot(d, frame, field);
367 CHK(field_mem);
368 CHK(upb_decode_32bit(&d->ptr, frame->limit, &val));
369
370 switch ((upb_descriptortype_t)field->type) {
371 case UPB_DESCRIPTOR_TYPE_FLOAT:
372 case UPB_DESCRIPTOR_TYPE_FIXED32:
373 case UPB_DESCRIPTOR_TYPE_SFIXED32:
374 memcpy(field_mem, &val, sizeof(val));
375 break;
376 default:
377 return upb_append_unknown(d, frame, field_start);
378 }
379
380 upb_decode_setpresent(frame, field);
381 return true;
382}
383
384static bool upb_decode_fixedpacked(upb_array *arr, upb_stringview data,
385 int elem_size) {
386 int elements = data.size / elem_size;
387 void *field_mem;
388
389 CHK((size_t)(elements * elem_size) == data.size);
390 field_mem = upb_array_add(arr, elements);
391 CHK(field_mem);
392 memcpy(field_mem, data.data, data.size);
393 return true;
394}
395
396static bool upb_decode_toarray(upb_decstate *d, upb_decframe *frame,
397 const char *field_start,
398 const upb_msglayout_fieldinit_v1 *field,
399 upb_stringview val) {
400 upb_array *arr = upb_getorcreatearr(d, frame, field);
401
402#define VARINT_CASE(ctype, decode) { \
403 const char *ptr = val.data; \
404 const char *limit = ptr + val.size; \
405 while (ptr < limit) { \
406 uint64_t val; \
407 void *field_mem; \
408 ctype decoded; \
409 CHK(upb_decode_varint(&ptr, limit, &val)); \
410 decoded = (decode)(val); \
411 field_mem = upb_array_add(arr, 1); \
412 CHK(field_mem); \
413 memcpy(field_mem, &decoded, sizeof(ctype)); \
414 } \
415 return true; \
416}
417
418 switch ((upb_descriptortype_t)field->type) {
419 case UPB_DESCRIPTOR_TYPE_STRING:
420 case UPB_DESCRIPTOR_TYPE_BYTES: {
421 void *field_mem = upb_array_add(arr, 1);
422 CHK(field_mem);
423 memcpy(field_mem, &val, sizeof(val));
424 return true;
425 }
426 case UPB_DESCRIPTOR_TYPE_FLOAT:
427 case UPB_DESCRIPTOR_TYPE_FIXED32:
428 case UPB_DESCRIPTOR_TYPE_SFIXED32:
429 return upb_decode_fixedpacked(arr, val, sizeof(int32_t));
430 case UPB_DESCRIPTOR_TYPE_DOUBLE:
431 case UPB_DESCRIPTOR_TYPE_FIXED64:
432 case UPB_DESCRIPTOR_TYPE_SFIXED64:
433 return upb_decode_fixedpacked(arr, val, sizeof(int64_t));
434 case UPB_DESCRIPTOR_TYPE_INT32:
435 case UPB_DESCRIPTOR_TYPE_UINT32:
436 case UPB_DESCRIPTOR_TYPE_ENUM:
437 /* TODO: proto2 enum field that isn't in the enum. */
438 VARINT_CASE(uint32_t, uint32_t);
439 case UPB_DESCRIPTOR_TYPE_INT64:
440 case UPB_DESCRIPTOR_TYPE_UINT64:
441 VARINT_CASE(uint64_t, uint64_t);
442 case UPB_DESCRIPTOR_TYPE_BOOL:
443 VARINT_CASE(bool, bool);
444 case UPB_DESCRIPTOR_TYPE_SINT32:
445 VARINT_CASE(int32_t, upb_zzdecode_32);
446 case UPB_DESCRIPTOR_TYPE_SINT64:
447 VARINT_CASE(int64_t, upb_zzdecode_64);
448 case UPB_DESCRIPTOR_TYPE_MESSAGE:
449 CHK(val.size <= (size_t)(frame->limit - val.data));
450 return upb_decode_submsg(d, frame, val.data + val.size, field, 0);
451 case UPB_DESCRIPTOR_TYPE_GROUP:
452 return upb_append_unknown(d, frame, field_start);
453 }
454#undef VARINT_CASE
455 UPB_UNREACHABLE();
456}
457
458static bool upb_decode_delimitedfield(upb_decstate *d, upb_decframe *frame,
459 const char *field_start,
460 const upb_msglayout_fieldinit_v1 *field) {
461 upb_stringview val;
462
463 CHK(upb_decode_string(&d->ptr, frame->limit, &val));
464
465 if (field->label == UPB_LABEL_REPEATED) {
466 return upb_decode_toarray(d, frame, field_start, field, val);
467 } else {
468 switch ((upb_descriptortype_t)field->type) {
469 case UPB_DESCRIPTOR_TYPE_STRING:
470 case UPB_DESCRIPTOR_TYPE_BYTES: {
471 void *field_mem = upb_decode_prepareslot(d, frame, field);
472 CHK(field_mem);
473 memcpy(field_mem, &val, sizeof(val));
474 break;
475 }
476 case UPB_DESCRIPTOR_TYPE_MESSAGE:
477 CHK(val.size <= (size_t)(frame->limit - val.data));
478 CHK(upb_decode_submsg(d, frame, val.data + val.size, field, 0));
479 break;
480 default:
481 /* TODO(haberman): should we accept the last element of a packed? */
482 return upb_append_unknown(d, frame, field_start);
483 }
484 upb_decode_setpresent(frame, field);
485 return true;
486 }
487}
488
489static const upb_msglayout_fieldinit_v1 *upb_find_field(
490 const upb_msglayout_msginit_v1 *l, uint32_t field_number) {
491 /* Lots of optimization opportunities here. */
492 int i;
493 for (i = 0; i < l->field_count; i++) {
494 if (l->fields[i].number == field_number) {
495 return &l->fields[i];
496 }
497 }
498
499 return NULL; /* Unknown field. */
500}
501
502static bool upb_decode_field(upb_decstate *d, upb_decframe *frame) {
503 int field_number;
504 int wire_type;
505 const char *field_start = d->ptr;
506 const upb_msglayout_fieldinit_v1 *field;
507
508 CHK(upb_decode_tag(&d->ptr, frame->limit, &field_number, &wire_type));
509 field = upb_find_field(frame->m, field_number);
510
511 if (field) {
512 switch (wire_type) {
513 case UPB_WIRE_TYPE_VARINT:
514 return upb_decode_varintfield(d, frame, field_start, field);
515 case UPB_WIRE_TYPE_32BIT:
516 return upb_decode_32bitfield(d, frame, field_start, field);
517 case UPB_WIRE_TYPE_64BIT:
518 return upb_decode_64bitfield(d, frame, field_start, field);
519 case UPB_WIRE_TYPE_DELIMITED:
520 return upb_decode_delimitedfield(d, frame, field_start, field);
521 case UPB_WIRE_TYPE_START_GROUP:
522 CHK(field->type == UPB_DESCRIPTOR_TYPE_GROUP);
523 return upb_decode_submsg(d, frame, frame->limit, field, field_number);
524 case UPB_WIRE_TYPE_END_GROUP:
525 CHK(frame->group_number == field_number)
526 frame->limit = d->ptr;
527 return true;
528 default:
529 return false;
530 }
531 } else {
532 CHK(field_number != 0);
533 return upb_skip_unknownfielddata(d, frame, field_number, wire_type);
534 }
535}
536
537static bool upb_skip_unknowngroup(upb_decstate *d, int field_number,
538 const char *limit) {
539 upb_decframe frame;
540 frame.msg = NULL;
541 frame.m = NULL;
542 frame.group_number = field_number;
543 frame.limit = limit;
544
545 while (d->ptr < frame.limit) {
546 int wire_type;
547 int field_number;
548
549 CHK(upb_decode_tag(&d->ptr, frame.limit, &field_number, &wire_type));
550 CHK(upb_skip_unknownfielddata(d, &frame, field_number, wire_type));
551 }
552
553 return true;
554}
555
556static bool upb_decode_message(upb_decstate *d, const char *limit,
557 int group_number, char *msg,
558 const upb_msglayout_msginit_v1 *l) {
559 upb_decframe frame;
560 frame.group_number = group_number;
561 frame.limit = limit;
562 frame.msg = msg;
563 frame.m = l;
564
565 while (d->ptr < frame.limit) {
566 CHK(upb_decode_field(d, &frame));
567 }
568
569 return true;
570}
571
572bool upb_decode(upb_stringview buf, void *msg,
573 const upb_msglayout_msginit_v1 *l, upb_env *env) {
574 upb_decstate state;
575 state.ptr = buf.data;
576 state.env = env;
577
578 return upb_decode_message(&state, buf.data + buf.size, 0, msg, l);
579}
580
581#undef CHK
582
583
584#include <ctype.h>
Brian Silverman9c614bc2016-02-15 20:20:02 -0500585#include <stdlib.h>
586#include <string.h>
587
588typedef struct {
589 size_t len;
590 char str[1]; /* Null-terminated string data follows. */
591} str_t;
592
593static str_t *newstr(const char *data, size_t len) {
Austin Schuh40c16522018-10-28 20:27:54 -0700594 str_t *ret = upb_gmalloc(sizeof(*ret) + len);
Brian Silverman9c614bc2016-02-15 20:20:02 -0500595 if (!ret) return NULL;
596 ret->len = len;
597 memcpy(ret->str, data, len);
598 ret->str[len] = '\0';
599 return ret;
600}
601
Austin Schuh40c16522018-10-28 20:27:54 -0700602static void freestr(str_t *s) { upb_gfree(s); }
Brian Silverman9c614bc2016-02-15 20:20:02 -0500603
604/* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
605static bool upb_isbetween(char c, char low, char high) {
606 return c >= low && c <= high;
607}
608
609static bool upb_isletter(char c) {
610 return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_';
611}
612
613static bool upb_isalphanum(char c) {
614 return upb_isletter(c) || upb_isbetween(c, '0', '9');
615}
616
617static bool upb_isident(const char *str, size_t len, bool full, upb_status *s) {
618 bool start = true;
619 size_t i;
620 for (i = 0; i < len; i++) {
621 char c = str[i];
622 if (c == '.') {
623 if (start || !full) {
624 upb_status_seterrf(s, "invalid name: unexpected '.' (%s)", str);
625 return false;
626 }
627 start = true;
628 } else if (start) {
629 if (!upb_isletter(c)) {
630 upb_status_seterrf(
631 s, "invalid name: path components must start with a letter (%s)",
632 str);
633 return false;
634 }
635 start = false;
636 } else {
637 if (!upb_isalphanum(c)) {
638 upb_status_seterrf(s, "invalid name: non-alphanumeric character (%s)",
639 str);
640 return false;
641 }
642 }
643 }
644 return !start;
645}
646
Austin Schuh40c16522018-10-28 20:27:54 -0700647static bool upb_isoneof(const upb_refcounted *def) {
648 return def->vtbl == &upb_oneofdef_vtbl;
649}
650
651static bool upb_isfield(const upb_refcounted *def) {
652 return def->vtbl == &upb_fielddef_vtbl;
653}
654
655static const upb_oneofdef *upb_trygetoneof(const upb_refcounted *def) {
656 return upb_isoneof(def) ? (const upb_oneofdef*)def : NULL;
657}
658
659static const upb_fielddef *upb_trygetfield(const upb_refcounted *def) {
660 return upb_isfield(def) ? (const upb_fielddef*)def : NULL;
661}
662
Brian Silverman9c614bc2016-02-15 20:20:02 -0500663
664/* upb_def ********************************************************************/
665
666upb_deftype_t upb_def_type(const upb_def *d) { return d->type; }
667
668const char *upb_def_fullname(const upb_def *d) { return d->fullname; }
669
Austin Schuh40c16522018-10-28 20:27:54 -0700670const char *upb_def_name(const upb_def *d) {
671 const char *p;
672
673 if (d->fullname == NULL) {
674 return NULL;
675 } else if ((p = strrchr(d->fullname, '.')) == NULL) {
676 /* No '.' in the name, return the full string. */
677 return d->fullname;
678 } else {
679 /* Return one past the last '.'. */
680 return p + 1;
681 }
682}
683
Brian Silverman9c614bc2016-02-15 20:20:02 -0500684bool upb_def_setfullname(upb_def *def, const char *fullname, upb_status *s) {
Austin Schuh40c16522018-10-28 20:27:54 -0700685 UPB_ASSERT(!upb_def_isfrozen(def));
686 if (!upb_isident(fullname, strlen(fullname), true, s)) {
687 return false;
688 }
689
690 fullname = upb_gstrdup(fullname);
691 if (!fullname) {
692 upb_upberr_setoom(s);
693 return false;
694 }
695
696 upb_gfree((void*)def->fullname);
697 def->fullname = fullname;
Brian Silverman9c614bc2016-02-15 20:20:02 -0500698 return true;
699}
700
Austin Schuh40c16522018-10-28 20:27:54 -0700701const upb_filedef *upb_def_file(const upb_def *d) { return d->file; }
Brian Silverman9c614bc2016-02-15 20:20:02 -0500702
703static bool upb_def_init(upb_def *def, upb_deftype_t type,
704 const struct upb_refcounted_vtbl *vtbl,
705 const void *owner) {
706 if (!upb_refcounted_init(upb_def_upcast_mutable(def), vtbl, owner)) return false;
707 def->type = type;
708 def->fullname = NULL;
709 def->came_from_user = false;
Austin Schuh40c16522018-10-28 20:27:54 -0700710 def->file = NULL;
Brian Silverman9c614bc2016-02-15 20:20:02 -0500711 return true;
712}
713
714static void upb_def_uninit(upb_def *def) {
Austin Schuh40c16522018-10-28 20:27:54 -0700715 upb_gfree((void*)def->fullname);
Brian Silverman9c614bc2016-02-15 20:20:02 -0500716}
717
718static const char *msgdef_name(const upb_msgdef *m) {
719 const char *name = upb_def_fullname(upb_msgdef_upcast(m));
720 return name ? name : "(anonymous)";
721}
722
723static bool upb_validate_field(upb_fielddef *f, upb_status *s) {
724 if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
725 upb_status_seterrmsg(s, "fielddef must have name and number set");
726 return false;
727 }
728
729 if (!f->type_is_set_) {
730 upb_status_seterrmsg(s, "fielddef type was not initialized");
731 return false;
732 }
733
734 if (upb_fielddef_lazy(f) &&
735 upb_fielddef_descriptortype(f) != UPB_DESCRIPTOR_TYPE_MESSAGE) {
736 upb_status_seterrmsg(s,
737 "only length-delimited submessage fields may be lazy");
738 return false;
739 }
740
741 if (upb_fielddef_hassubdef(f)) {
742 const upb_def *subdef;
743
744 if (f->subdef_is_symbolic) {
745 upb_status_seterrf(s, "field '%s.%s' has not been resolved",
746 msgdef_name(f->msg.def), upb_fielddef_name(f));
747 return false;
748 }
749
750 subdef = upb_fielddef_subdef(f);
751 if (subdef == NULL) {
752 upb_status_seterrf(s, "field %s.%s is missing required subdef",
753 msgdef_name(f->msg.def), upb_fielddef_name(f));
754 return false;
755 }
756
757 if (!upb_def_isfrozen(subdef) && !subdef->came_from_user) {
758 upb_status_seterrf(s,
759 "subdef of field %s.%s is not frozen or being frozen",
760 msgdef_name(f->msg.def), upb_fielddef_name(f));
761 return false;
762 }
763 }
764
765 if (upb_fielddef_type(f) == UPB_TYPE_ENUM) {
766 bool has_default_name = upb_fielddef_enumhasdefaultstr(f);
767 bool has_default_number = upb_fielddef_enumhasdefaultint32(f);
768
769 /* Previously verified by upb_validate_enumdef(). */
Austin Schuh40c16522018-10-28 20:27:54 -0700770 UPB_ASSERT(upb_enumdef_numvals(upb_fielddef_enumsubdef(f)) > 0);
Brian Silverman9c614bc2016-02-15 20:20:02 -0500771
772 /* We've already validated that we have an associated enumdef and that it
773 * has at least one member, so at least one of these should be true.
774 * Because if the user didn't set anything, we'll pick up the enum's
775 * default, but if the user *did* set something we should at least pick up
776 * the one they set (int32 or string). */
Austin Schuh40c16522018-10-28 20:27:54 -0700777 UPB_ASSERT(has_default_name || has_default_number);
Brian Silverman9c614bc2016-02-15 20:20:02 -0500778
779 if (!has_default_name) {
780 upb_status_seterrf(s,
781 "enum default for field %s.%s (%d) is not in the enum",
782 msgdef_name(f->msg.def), upb_fielddef_name(f),
783 upb_fielddef_defaultint32(f));
784 return false;
785 }
786
787 if (!has_default_number) {
788 upb_status_seterrf(s,
789 "enum default for field %s.%s (%s) is not in the enum",
790 msgdef_name(f->msg.def), upb_fielddef_name(f),
791 upb_fielddef_defaultstr(f, NULL));
792 return false;
793 }
794
795 /* Lift the effective numeric default into the field's default slot, in case
796 * we were only getting it "by reference" from the enumdef. */
797 upb_fielddef_setdefaultint32(f, upb_fielddef_defaultint32(f));
798 }
799
800 /* Ensure that MapEntry submessages only appear as repeated fields, not
801 * optional/required (singular) fields. */
802 if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
803 upb_fielddef_msgsubdef(f) != NULL) {
804 const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
805 if (upb_msgdef_mapentry(subdef) && !upb_fielddef_isseq(f)) {
806 upb_status_seterrf(s,
807 "Field %s refers to mapentry message but is not "
808 "a repeated field",
809 upb_fielddef_name(f) ? upb_fielddef_name(f) :
810 "(unnamed)");
811 return false;
812 }
813 }
814
815 return true;
816}
817
818static bool upb_validate_enumdef(const upb_enumdef *e, upb_status *s) {
819 if (upb_enumdef_numvals(e) == 0) {
820 upb_status_seterrf(s, "enum %s has no members (must have at least one)",
821 upb_enumdef_fullname(e));
822 return false;
823 }
824
825 return true;
826}
827
828/* All submessage fields are lower than all other fields.
829 * Secondly, fields are increasing in order. */
830uint32_t field_rank(const upb_fielddef *f) {
831 uint32_t ret = upb_fielddef_number(f);
832 const uint32_t high_bit = 1 << 30;
Austin Schuh40c16522018-10-28 20:27:54 -0700833 UPB_ASSERT(ret < high_bit);
Brian Silverman9c614bc2016-02-15 20:20:02 -0500834 if (!upb_fielddef_issubmsg(f))
835 ret |= high_bit;
836 return ret;
837}
838
839int cmp_fields(const void *p1, const void *p2) {
840 const upb_fielddef *f1 = *(upb_fielddef*const*)p1;
841 const upb_fielddef *f2 = *(upb_fielddef*const*)p2;
842 return field_rank(f1) - field_rank(f2);
843}
844
845static bool assign_msg_indices(upb_msgdef *m, upb_status *s) {
846 /* Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the
847 * lowest indexes, but we do not publicly guarantee this. */
848 upb_msg_field_iter j;
Austin Schuh40c16522018-10-28 20:27:54 -0700849 upb_msg_oneof_iter k;
Brian Silverman9c614bc2016-02-15 20:20:02 -0500850 int i;
851 uint32_t selector;
852 int n = upb_msgdef_numfields(m);
Austin Schuh40c16522018-10-28 20:27:54 -0700853 upb_fielddef **fields;
854
855 if (n == 0) {
856 m->selector_count = UPB_STATIC_SELECTOR_COUNT;
857 m->submsg_field_count = 0;
858 return true;
859 }
860
861 fields = upb_gmalloc(n * sizeof(*fields));
862 if (!fields) {
863 upb_upberr_setoom(s);
864 return false;
865 }
Brian Silverman9c614bc2016-02-15 20:20:02 -0500866
867 m->submsg_field_count = 0;
868 for(i = 0, upb_msg_field_begin(&j, m);
869 !upb_msg_field_done(&j);
870 upb_msg_field_next(&j), i++) {
871 upb_fielddef *f = upb_msg_iter_field(&j);
Austin Schuh40c16522018-10-28 20:27:54 -0700872 UPB_ASSERT(f->msg.def == m);
Brian Silverman9c614bc2016-02-15 20:20:02 -0500873 if (!upb_validate_field(f, s)) {
Austin Schuh40c16522018-10-28 20:27:54 -0700874 upb_gfree(fields);
Brian Silverman9c614bc2016-02-15 20:20:02 -0500875 return false;
876 }
877 if (upb_fielddef_issubmsg(f)) {
878 m->submsg_field_count++;
879 }
880 fields[i] = f;
881 }
882
883 qsort(fields, n, sizeof(*fields), cmp_fields);
884
885 selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count;
886 for (i = 0; i < n; i++) {
887 upb_fielddef *f = fields[i];
888 f->index_ = i;
889 f->selector_base = selector + upb_handlers_selectorbaseoffset(f);
890 selector += upb_handlers_selectorcount(f);
891 }
892 m->selector_count = selector;
893
894#ifndef NDEBUG
895 {
896 /* Verify that all selectors for the message are distinct. */
897#define TRY(type) \
898 if (upb_handlers_getselector(f, type, &sel)) upb_inttable_insert(&t, sel, v);
899
900 upb_inttable t;
901 upb_value v;
902 upb_selector_t sel;
903
904 upb_inttable_init(&t, UPB_CTYPE_BOOL);
905 v = upb_value_bool(true);
906 upb_inttable_insert(&t, UPB_STARTMSG_SELECTOR, v);
907 upb_inttable_insert(&t, UPB_ENDMSG_SELECTOR, v);
Austin Schuh40c16522018-10-28 20:27:54 -0700908 upb_inttable_insert(&t, UPB_UNKNOWN_SELECTOR, v);
Brian Silverman9c614bc2016-02-15 20:20:02 -0500909 for(upb_msg_field_begin(&j, m);
910 !upb_msg_field_done(&j);
911 upb_msg_field_next(&j)) {
912 upb_fielddef *f = upb_msg_iter_field(&j);
913 /* These calls will assert-fail in upb_table if the value already
914 * exists. */
915 TRY(UPB_HANDLER_INT32);
916 TRY(UPB_HANDLER_INT64)
917 TRY(UPB_HANDLER_UINT32)
918 TRY(UPB_HANDLER_UINT64)
919 TRY(UPB_HANDLER_FLOAT)
920 TRY(UPB_HANDLER_DOUBLE)
921 TRY(UPB_HANDLER_BOOL)
922 TRY(UPB_HANDLER_STARTSTR)
923 TRY(UPB_HANDLER_STRING)
924 TRY(UPB_HANDLER_ENDSTR)
925 TRY(UPB_HANDLER_STARTSUBMSG)
926 TRY(UPB_HANDLER_ENDSUBMSG)
927 TRY(UPB_HANDLER_STARTSEQ)
928 TRY(UPB_HANDLER_ENDSEQ)
929 }
930 upb_inttable_uninit(&t);
931 }
932#undef TRY
933#endif
934
Austin Schuh40c16522018-10-28 20:27:54 -0700935 for(upb_msg_oneof_begin(&k, m), i = 0;
936 !upb_msg_oneof_done(&k);
937 upb_msg_oneof_next(&k), i++) {
938 upb_oneofdef *o = upb_msg_iter_oneof(&k);
939 o->index = i;
940 }
941
942 upb_gfree(fields);
Brian Silverman9c614bc2016-02-15 20:20:02 -0500943 return true;
944}
945
Austin Schuh40c16522018-10-28 20:27:54 -0700946bool _upb_def_validate(upb_def *const*defs, size_t n, upb_status *s) {
947 size_t i;
Brian Silverman9c614bc2016-02-15 20:20:02 -0500948
949 /* First perform validation, in two passes so we can check that we have a
950 * transitive closure without needing to search. */
951 for (i = 0; i < n; i++) {
952 upb_def *def = defs[i];
953 if (upb_def_isfrozen(def)) {
954 /* Could relax this requirement if it's annoying. */
955 upb_status_seterrmsg(s, "def is already frozen");
956 goto err;
957 } else if (def->type == UPB_DEF_FIELD) {
958 upb_status_seterrmsg(s, "standalone fielddefs can not be frozen");
959 goto err;
Brian Silverman9c614bc2016-02-15 20:20:02 -0500960 } else {
961 /* Set now to detect transitive closure in the second pass. */
962 def->came_from_user = true;
Austin Schuh40c16522018-10-28 20:27:54 -0700963
964 if (def->type == UPB_DEF_ENUM &&
965 !upb_validate_enumdef(upb_dyncast_enumdef(def), s)) {
966 goto err;
967 }
Brian Silverman9c614bc2016-02-15 20:20:02 -0500968 }
969 }
970
971 /* Second pass of validation. Also assign selector bases and indexes, and
972 * compact tables. */
973 for (i = 0; i < n; i++) {
Austin Schuh40c16522018-10-28 20:27:54 -0700974 upb_def *def = defs[i];
975 upb_msgdef *m = upb_dyncast_msgdef_mutable(def);
976 upb_enumdef *e = upb_dyncast_enumdef_mutable(def);
Brian Silverman9c614bc2016-02-15 20:20:02 -0500977 if (m) {
978 upb_inttable_compact(&m->itof);
979 if (!assign_msg_indices(m, s)) {
980 goto err;
981 }
982 } else if (e) {
983 upb_inttable_compact(&e->iton);
984 }
985 }
986
Austin Schuh40c16522018-10-28 20:27:54 -0700987 return true;
Brian Silverman9c614bc2016-02-15 20:20:02 -0500988
989err:
990 for (i = 0; i < n; i++) {
Austin Schuh40c16522018-10-28 20:27:54 -0700991 upb_def *def = defs[i];
992 def->came_from_user = false;
Brian Silverman9c614bc2016-02-15 20:20:02 -0500993 }
Austin Schuh40c16522018-10-28 20:27:54 -0700994 UPB_ASSERT(!(s && upb_ok(s)));
Brian Silverman9c614bc2016-02-15 20:20:02 -0500995 return false;
996}
997
Austin Schuh40c16522018-10-28 20:27:54 -0700998bool upb_def_freeze(upb_def *const* defs, size_t n, upb_status *s) {
999 /* Def graph contains FieldDefs between each MessageDef, so double the
1000 * limit. */
1001 const size_t maxdepth = UPB_MAX_MESSAGE_DEPTH * 2;
1002
1003 if (!_upb_def_validate(defs, n, s)) {
1004 return false;
1005 }
1006
1007
1008 /* Validation all passed; freeze the objects. */
1009 return upb_refcounted_freeze((upb_refcounted *const*)defs, n, s, maxdepth);
1010}
1011
Brian Silverman9c614bc2016-02-15 20:20:02 -05001012
1013/* upb_enumdef ****************************************************************/
1014
Austin Schuh40c16522018-10-28 20:27:54 -07001015static void visitenum(const upb_refcounted *r, upb_refcounted_visit *visit,
1016 void *closure) {
1017 const upb_enumdef *e = (const upb_enumdef*)r;
1018 const upb_def *def = upb_enumdef_upcast(e);
1019 if (upb_def_file(def)) {
1020 visit(r, upb_filedef_upcast(upb_def_file(def)), closure);
1021 }
1022}
1023
1024static void freeenum(upb_refcounted *r) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05001025 upb_enumdef *e = (upb_enumdef*)r;
1026 upb_inttable_iter i;
1027 upb_inttable_begin(&i, &e->iton);
1028 for( ; !upb_inttable_done(&i); upb_inttable_next(&i)) {
Austin Schuh40c16522018-10-28 20:27:54 -07001029 /* To clean up the upb_gstrdup() from upb_enumdef_addval(). */
1030 upb_gfree(upb_value_getcstr(upb_inttable_iter_value(&i)));
Brian Silverman9c614bc2016-02-15 20:20:02 -05001031 }
1032 upb_strtable_uninit(&e->ntoi);
1033 upb_inttable_uninit(&e->iton);
1034 upb_def_uninit(upb_enumdef_upcast_mutable(e));
Austin Schuh40c16522018-10-28 20:27:54 -07001035 upb_gfree(e);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001036}
1037
Austin Schuh40c16522018-10-28 20:27:54 -07001038const struct upb_refcounted_vtbl upb_enumdef_vtbl = {&visitenum, &freeenum};
1039
Brian Silverman9c614bc2016-02-15 20:20:02 -05001040upb_enumdef *upb_enumdef_new(const void *owner) {
Austin Schuh40c16522018-10-28 20:27:54 -07001041 upb_enumdef *e = upb_gmalloc(sizeof(*e));
Brian Silverman9c614bc2016-02-15 20:20:02 -05001042 if (!e) return NULL;
Austin Schuh40c16522018-10-28 20:27:54 -07001043
1044 if (!upb_def_init(upb_enumdef_upcast_mutable(e), UPB_DEF_ENUM,
1045 &upb_enumdef_vtbl, owner)) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05001046 goto err2;
Austin Schuh40c16522018-10-28 20:27:54 -07001047 }
1048
Brian Silverman9c614bc2016-02-15 20:20:02 -05001049 if (!upb_strtable_init(&e->ntoi, UPB_CTYPE_INT32)) goto err2;
1050 if (!upb_inttable_init(&e->iton, UPB_CTYPE_CSTR)) goto err1;
1051 return e;
1052
1053err1:
1054 upb_strtable_uninit(&e->ntoi);
1055err2:
Austin Schuh40c16522018-10-28 20:27:54 -07001056 upb_gfree(e);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001057 return NULL;
1058}
1059
Brian Silverman9c614bc2016-02-15 20:20:02 -05001060bool upb_enumdef_freeze(upb_enumdef *e, upb_status *status) {
1061 upb_def *d = upb_enumdef_upcast_mutable(e);
1062 return upb_def_freeze(&d, 1, status);
1063}
1064
1065const char *upb_enumdef_fullname(const upb_enumdef *e) {
1066 return upb_def_fullname(upb_enumdef_upcast(e));
1067}
1068
Austin Schuh40c16522018-10-28 20:27:54 -07001069const char *upb_enumdef_name(const upb_enumdef *e) {
1070 return upb_def_name(upb_enumdef_upcast(e));
1071}
1072
Brian Silverman9c614bc2016-02-15 20:20:02 -05001073bool upb_enumdef_setfullname(upb_enumdef *e, const char *fullname,
1074 upb_status *s) {
1075 return upb_def_setfullname(upb_enumdef_upcast_mutable(e), fullname, s);
1076}
1077
1078bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num,
1079 upb_status *status) {
Austin Schuh40c16522018-10-28 20:27:54 -07001080 char *name2;
1081
Brian Silverman9c614bc2016-02-15 20:20:02 -05001082 if (!upb_isident(name, strlen(name), false, status)) {
1083 return false;
1084 }
Austin Schuh40c16522018-10-28 20:27:54 -07001085
Brian Silverman9c614bc2016-02-15 20:20:02 -05001086 if (upb_enumdef_ntoiz(e, name, NULL)) {
1087 upb_status_seterrf(status, "name '%s' is already defined", name);
1088 return false;
1089 }
Austin Schuh40c16522018-10-28 20:27:54 -07001090
Brian Silverman9c614bc2016-02-15 20:20:02 -05001091 if (!upb_strtable_insert(&e->ntoi, name, upb_value_int32(num))) {
1092 upb_status_seterrmsg(status, "out of memory");
1093 return false;
1094 }
Austin Schuh40c16522018-10-28 20:27:54 -07001095
1096 if (!upb_inttable_lookup(&e->iton, num, NULL)) {
1097 name2 = upb_gstrdup(name);
1098 if (!name2 || !upb_inttable_insert(&e->iton, num, upb_value_cstr(name2))) {
1099 upb_status_seterrmsg(status, "out of memory");
1100 upb_strtable_remove(&e->ntoi, name, NULL);
1101 return false;
1102 }
Brian Silverman9c614bc2016-02-15 20:20:02 -05001103 }
Austin Schuh40c16522018-10-28 20:27:54 -07001104
Brian Silverman9c614bc2016-02-15 20:20:02 -05001105 if (upb_enumdef_numvals(e) == 1) {
1106 bool ok = upb_enumdef_setdefault(e, num, NULL);
Austin Schuh40c16522018-10-28 20:27:54 -07001107 UPB_ASSERT(ok);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001108 }
Austin Schuh40c16522018-10-28 20:27:54 -07001109
Brian Silverman9c614bc2016-02-15 20:20:02 -05001110 return true;
1111}
1112
1113int32_t upb_enumdef_default(const upb_enumdef *e) {
Austin Schuh40c16522018-10-28 20:27:54 -07001114 UPB_ASSERT(upb_enumdef_iton(e, e->defaultval));
Brian Silverman9c614bc2016-02-15 20:20:02 -05001115 return e->defaultval;
1116}
1117
1118bool upb_enumdef_setdefault(upb_enumdef *e, int32_t val, upb_status *s) {
Austin Schuh40c16522018-10-28 20:27:54 -07001119 UPB_ASSERT(!upb_enumdef_isfrozen(e));
Brian Silverman9c614bc2016-02-15 20:20:02 -05001120 if (!upb_enumdef_iton(e, val)) {
1121 upb_status_seterrf(s, "number '%d' is not in the enum.", val);
1122 return false;
1123 }
1124 e->defaultval = val;
1125 return true;
1126}
1127
1128int upb_enumdef_numvals(const upb_enumdef *e) {
1129 return upb_strtable_count(&e->ntoi);
1130}
1131
1132void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
1133 /* We iterate over the ntoi table, to account for duplicate numbers. */
1134 upb_strtable_begin(i, &e->ntoi);
1135}
1136
1137void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); }
1138bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); }
1139
1140bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name,
1141 size_t len, int32_t *num) {
1142 upb_value v;
1143 if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) {
1144 return false;
1145 }
1146 if (num) *num = upb_value_getint32(v);
1147 return true;
1148}
1149
1150const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) {
1151 upb_value v;
1152 return upb_inttable_lookup32(&def->iton, num, &v) ?
1153 upb_value_getcstr(v) : NULL;
1154}
1155
1156const char *upb_enum_iter_name(upb_enum_iter *iter) {
1157 return upb_strtable_iter_key(iter);
1158}
1159
1160int32_t upb_enum_iter_number(upb_enum_iter *iter) {
1161 return upb_value_getint32(upb_strtable_iter_value(iter));
1162}
1163
1164
1165/* upb_fielddef ***************************************************************/
1166
1167static void upb_fielddef_init_default(upb_fielddef *f);
1168
1169static void upb_fielddef_uninit_default(upb_fielddef *f) {
1170 if (f->type_is_set_ && f->default_is_string && f->defaultval.bytes)
1171 freestr(f->defaultval.bytes);
1172}
1173
Austin Schuh40c16522018-10-28 20:27:54 -07001174const char *upb_fielddef_fullname(const upb_fielddef *e) {
1175 return upb_def_fullname(upb_fielddef_upcast(e));
1176}
1177
Brian Silverman9c614bc2016-02-15 20:20:02 -05001178static void visitfield(const upb_refcounted *r, upb_refcounted_visit *visit,
1179 void *closure) {
1180 const upb_fielddef *f = (const upb_fielddef*)r;
Austin Schuh40c16522018-10-28 20:27:54 -07001181 const upb_def *def = upb_fielddef_upcast(f);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001182 if (upb_fielddef_containingtype(f)) {
1183 visit(r, upb_msgdef_upcast2(upb_fielddef_containingtype(f)), closure);
1184 }
1185 if (upb_fielddef_containingoneof(f)) {
Austin Schuh40c16522018-10-28 20:27:54 -07001186 visit(r, upb_oneofdef_upcast(upb_fielddef_containingoneof(f)), closure);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001187 }
1188 if (upb_fielddef_subdef(f)) {
1189 visit(r, upb_def_upcast(upb_fielddef_subdef(f)), closure);
1190 }
Austin Schuh40c16522018-10-28 20:27:54 -07001191 if (upb_def_file(def)) {
1192 visit(r, upb_filedef_upcast(upb_def_file(def)), closure);
1193 }
Brian Silverman9c614bc2016-02-15 20:20:02 -05001194}
1195
1196static void freefield(upb_refcounted *r) {
1197 upb_fielddef *f = (upb_fielddef*)r;
1198 upb_fielddef_uninit_default(f);
1199 if (f->subdef_is_symbolic)
Austin Schuh40c16522018-10-28 20:27:54 -07001200 upb_gfree(f->sub.name);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001201 upb_def_uninit(upb_fielddef_upcast_mutable(f));
Austin Schuh40c16522018-10-28 20:27:54 -07001202 upb_gfree(f);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001203}
1204
1205static const char *enumdefaultstr(const upb_fielddef *f) {
1206 const upb_enumdef *e;
Austin Schuh40c16522018-10-28 20:27:54 -07001207 UPB_ASSERT(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001208 e = upb_fielddef_enumsubdef(f);
1209 if (f->default_is_string && f->defaultval.bytes) {
1210 /* Default was explicitly set as a string. */
1211 str_t *s = f->defaultval.bytes;
1212 return s->str;
1213 } else if (e) {
1214 if (!f->default_is_string) {
1215 /* Default was explicitly set as an integer; look it up in enumdef. */
1216 const char *name = upb_enumdef_iton(e, f->defaultval.sint);
1217 if (name) {
1218 return name;
1219 }
1220 } else {
1221 /* Default is completely unset; pull enumdef default. */
1222 if (upb_enumdef_numvals(e) > 0) {
1223 const char *name = upb_enumdef_iton(e, upb_enumdef_default(e));
Austin Schuh40c16522018-10-28 20:27:54 -07001224 UPB_ASSERT(name);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001225 return name;
1226 }
1227 }
1228 }
1229 return NULL;
1230}
1231
1232static bool enumdefaultint32(const upb_fielddef *f, int32_t *val) {
1233 const upb_enumdef *e;
Austin Schuh40c16522018-10-28 20:27:54 -07001234 UPB_ASSERT(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001235 e = upb_fielddef_enumsubdef(f);
1236 if (!f->default_is_string) {
1237 /* Default was explicitly set as an integer. */
1238 *val = f->defaultval.sint;
1239 return true;
1240 } else if (e) {
1241 if (f->defaultval.bytes) {
1242 /* Default was explicitly set as a str; try to lookup corresponding int. */
1243 str_t *s = f->defaultval.bytes;
1244 if (upb_enumdef_ntoiz(e, s->str, val)) {
1245 return true;
1246 }
1247 } else {
1248 /* Default is unset; try to pull in enumdef default. */
1249 if (upb_enumdef_numvals(e) > 0) {
1250 *val = upb_enumdef_default(e);
1251 return true;
1252 }
1253 }
1254 }
1255 return false;
1256}
1257
Austin Schuh40c16522018-10-28 20:27:54 -07001258const struct upb_refcounted_vtbl upb_fielddef_vtbl = {visitfield, freefield};
1259
Brian Silverman9c614bc2016-02-15 20:20:02 -05001260upb_fielddef *upb_fielddef_new(const void *o) {
Austin Schuh40c16522018-10-28 20:27:54 -07001261 upb_fielddef *f = upb_gmalloc(sizeof(*f));
Brian Silverman9c614bc2016-02-15 20:20:02 -05001262 if (!f) return NULL;
Austin Schuh40c16522018-10-28 20:27:54 -07001263 if (!upb_def_init(upb_fielddef_upcast_mutable(f), UPB_DEF_FIELD,
1264 &upb_fielddef_vtbl, o)) {
1265 upb_gfree(f);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001266 return NULL;
1267 }
1268 f->msg.def = NULL;
1269 f->sub.def = NULL;
1270 f->oneof = NULL;
1271 f->subdef_is_symbolic = false;
1272 f->msg_is_symbolic = false;
1273 f->label_ = UPB_LABEL_OPTIONAL;
1274 f->type_ = UPB_TYPE_INT32;
1275 f->number_ = 0;
1276 f->type_is_set_ = false;
1277 f->tagdelim = false;
1278 f->is_extension_ = false;
1279 f->lazy_ = false;
1280 f->packed_ = true;
1281
1282 /* For the moment we default this to UPB_INTFMT_VARIABLE, since it will work
1283 * with all integer types and is in some since more "default" since the most
1284 * normal-looking proto2 types int32/int64/uint32/uint64 use variable.
1285 *
1286 * Other options to consider:
1287 * - there is no default; users must set this manually (like type).
1288 * - default signed integers to UPB_INTFMT_ZIGZAG, since it's more likely to
1289 * be an optimal default for signed integers. */
1290 f->intfmt = UPB_INTFMT_VARIABLE;
1291 return f;
1292}
1293
Brian Silverman9c614bc2016-02-15 20:20:02 -05001294bool upb_fielddef_typeisset(const upb_fielddef *f) {
1295 return f->type_is_set_;
1296}
1297
1298upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) {
Austin Schuh40c16522018-10-28 20:27:54 -07001299 UPB_ASSERT(f->type_is_set_);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001300 return f->type_;
1301}
1302
1303uint32_t upb_fielddef_index(const upb_fielddef *f) {
1304 return f->index_;
1305}
1306
1307upb_label_t upb_fielddef_label(const upb_fielddef *f) {
1308 return f->label_;
1309}
1310
1311upb_intfmt_t upb_fielddef_intfmt(const upb_fielddef *f) {
1312 return f->intfmt;
1313}
1314
1315bool upb_fielddef_istagdelim(const upb_fielddef *f) {
1316 return f->tagdelim;
1317}
1318
1319uint32_t upb_fielddef_number(const upb_fielddef *f) {
1320 return f->number_;
1321}
1322
1323bool upb_fielddef_isextension(const upb_fielddef *f) {
1324 return f->is_extension_;
1325}
1326
1327bool upb_fielddef_lazy(const upb_fielddef *f) {
1328 return f->lazy_;
1329}
1330
1331bool upb_fielddef_packed(const upb_fielddef *f) {
1332 return f->packed_;
1333}
1334
1335const char *upb_fielddef_name(const upb_fielddef *f) {
1336 return upb_def_fullname(upb_fielddef_upcast(f));
1337}
1338
Austin Schuh40c16522018-10-28 20:27:54 -07001339size_t upb_fielddef_getjsonname(const upb_fielddef *f, char *buf, size_t len) {
1340 const char *name = upb_fielddef_name(f);
1341 size_t src, dst = 0;
1342 bool ucase_next = false;
1343
1344#define WRITE(byte) \
1345 ++dst; \
1346 if (dst < len) buf[dst - 1] = byte; \
1347 else if (dst == len) buf[dst - 1] = '\0'
1348
1349 if (!name) {
1350 WRITE('\0');
1351 return 0;
1352 }
1353
1354 /* Implement the transformation as described in the spec:
1355 * 1. upper case all letters after an underscore.
1356 * 2. remove all underscores.
1357 */
1358 for (src = 0; name[src]; src++) {
1359 if (name[src] == '_') {
1360 ucase_next = true;
1361 continue;
1362 }
1363
1364 if (ucase_next) {
1365 WRITE(toupper(name[src]));
1366 ucase_next = false;
1367 } else {
1368 WRITE(name[src]);
1369 }
1370 }
1371
1372 WRITE('\0');
1373 return dst;
1374
1375#undef WRITE
1376}
1377
Brian Silverman9c614bc2016-02-15 20:20:02 -05001378const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) {
1379 return f->msg_is_symbolic ? NULL : f->msg.def;
1380}
1381
1382const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) {
1383 return f->oneof;
1384}
1385
1386upb_msgdef *upb_fielddef_containingtype_mutable(upb_fielddef *f) {
1387 return (upb_msgdef*)upb_fielddef_containingtype(f);
1388}
1389
1390const char *upb_fielddef_containingtypename(upb_fielddef *f) {
1391 return f->msg_is_symbolic ? f->msg.name : NULL;
1392}
1393
1394static void release_containingtype(upb_fielddef *f) {
Austin Schuh40c16522018-10-28 20:27:54 -07001395 if (f->msg_is_symbolic) upb_gfree(f->msg.name);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001396}
1397
1398bool upb_fielddef_setcontainingtypename(upb_fielddef *f, const char *name,
1399 upb_status *s) {
Austin Schuh40c16522018-10-28 20:27:54 -07001400 char *name_copy;
1401 UPB_ASSERT(!upb_fielddef_isfrozen(f));
Brian Silverman9c614bc2016-02-15 20:20:02 -05001402 if (upb_fielddef_containingtype(f)) {
1403 upb_status_seterrmsg(s, "field has already been added to a message.");
1404 return false;
1405 }
1406 /* TODO: validate name (upb_isident() doesn't quite work atm because this name
1407 * may have a leading "."). */
Austin Schuh40c16522018-10-28 20:27:54 -07001408
1409 name_copy = upb_gstrdup(name);
1410 if (!name_copy) {
1411 upb_upberr_setoom(s);
1412 return false;
1413 }
1414
Brian Silverman9c614bc2016-02-15 20:20:02 -05001415 release_containingtype(f);
Austin Schuh40c16522018-10-28 20:27:54 -07001416 f->msg.name = name_copy;
Brian Silverman9c614bc2016-02-15 20:20:02 -05001417 f->msg_is_symbolic = true;
1418 return true;
1419}
1420
1421bool upb_fielddef_setname(upb_fielddef *f, const char *name, upb_status *s) {
1422 if (upb_fielddef_containingtype(f) || upb_fielddef_containingoneof(f)) {
1423 upb_status_seterrmsg(s, "Already added to message or oneof");
1424 return false;
1425 }
1426 return upb_def_setfullname(upb_fielddef_upcast_mutable(f), name, s);
1427}
1428
1429static void chkdefaulttype(const upb_fielddef *f, upb_fieldtype_t type) {
1430 UPB_UNUSED(f);
1431 UPB_UNUSED(type);
Austin Schuh40c16522018-10-28 20:27:54 -07001432 UPB_ASSERT(f->type_is_set_ && upb_fielddef_type(f) == type);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001433}
1434
1435int64_t upb_fielddef_defaultint64(const upb_fielddef *f) {
1436 chkdefaulttype(f, UPB_TYPE_INT64);
1437 return f->defaultval.sint;
1438}
1439
1440int32_t upb_fielddef_defaultint32(const upb_fielddef *f) {
1441 if (f->type_is_set_ && upb_fielddef_type(f) == UPB_TYPE_ENUM) {
1442 int32_t val;
1443 bool ok = enumdefaultint32(f, &val);
Austin Schuh40c16522018-10-28 20:27:54 -07001444 UPB_ASSERT(ok);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001445 return val;
1446 } else {
1447 chkdefaulttype(f, UPB_TYPE_INT32);
1448 return f->defaultval.sint;
1449 }
1450}
1451
1452uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f) {
1453 chkdefaulttype(f, UPB_TYPE_UINT64);
1454 return f->defaultval.uint;
1455}
1456
1457uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f) {
1458 chkdefaulttype(f, UPB_TYPE_UINT32);
1459 return f->defaultval.uint;
1460}
1461
1462bool upb_fielddef_defaultbool(const upb_fielddef *f) {
1463 chkdefaulttype(f, UPB_TYPE_BOOL);
1464 return f->defaultval.uint;
1465}
1466
1467float upb_fielddef_defaultfloat(const upb_fielddef *f) {
1468 chkdefaulttype(f, UPB_TYPE_FLOAT);
1469 return f->defaultval.flt;
1470}
1471
1472double upb_fielddef_defaultdouble(const upb_fielddef *f) {
1473 chkdefaulttype(f, UPB_TYPE_DOUBLE);
1474 return f->defaultval.dbl;
1475}
1476
1477const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) {
Austin Schuh40c16522018-10-28 20:27:54 -07001478 UPB_ASSERT(f->type_is_set_);
1479 UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_STRING ||
Brian Silverman9c614bc2016-02-15 20:20:02 -05001480 upb_fielddef_type(f) == UPB_TYPE_BYTES ||
1481 upb_fielddef_type(f) == UPB_TYPE_ENUM);
1482
1483 if (upb_fielddef_type(f) == UPB_TYPE_ENUM) {
1484 const char *ret = enumdefaultstr(f);
Austin Schuh40c16522018-10-28 20:27:54 -07001485 UPB_ASSERT(ret);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001486 /* Enum defaults can't have embedded NULLs. */
1487 if (len) *len = strlen(ret);
1488 return ret;
1489 }
1490
1491 if (f->default_is_string) {
1492 str_t *str = f->defaultval.bytes;
1493 if (len) *len = str->len;
1494 return str->str;
1495 }
1496
1497 return NULL;
1498}
1499
1500static void upb_fielddef_init_default(upb_fielddef *f) {
1501 f->default_is_string = false;
1502 switch (upb_fielddef_type(f)) {
1503 case UPB_TYPE_DOUBLE: f->defaultval.dbl = 0; break;
1504 case UPB_TYPE_FLOAT: f->defaultval.flt = 0; break;
1505 case UPB_TYPE_INT32:
1506 case UPB_TYPE_INT64: f->defaultval.sint = 0; break;
1507 case UPB_TYPE_UINT64:
1508 case UPB_TYPE_UINT32:
1509 case UPB_TYPE_BOOL: f->defaultval.uint = 0; break;
1510 case UPB_TYPE_STRING:
1511 case UPB_TYPE_BYTES:
1512 f->defaultval.bytes = newstr("", 0);
1513 f->default_is_string = true;
1514 break;
1515 case UPB_TYPE_MESSAGE: break;
1516 case UPB_TYPE_ENUM:
1517 /* This is our special sentinel that indicates "not set" for an enum. */
1518 f->default_is_string = true;
1519 f->defaultval.bytes = NULL;
1520 break;
1521 }
1522}
1523
1524const upb_def *upb_fielddef_subdef(const upb_fielddef *f) {
1525 return f->subdef_is_symbolic ? NULL : f->sub.def;
1526}
1527
1528const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f) {
1529 const upb_def *def = upb_fielddef_subdef(f);
1530 return def ? upb_dyncast_msgdef(def) : NULL;
1531}
1532
1533const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f) {
1534 const upb_def *def = upb_fielddef_subdef(f);
1535 return def ? upb_dyncast_enumdef(def) : NULL;
1536}
1537
1538upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f) {
1539 return (upb_def*)upb_fielddef_subdef(f);
1540}
1541
1542const char *upb_fielddef_subdefname(const upb_fielddef *f) {
1543 if (f->subdef_is_symbolic) {
1544 return f->sub.name;
1545 } else if (f->sub.def) {
1546 return upb_def_fullname(f->sub.def);
1547 } else {
1548 return NULL;
1549 }
1550}
1551
1552bool upb_fielddef_setnumber(upb_fielddef *f, uint32_t number, upb_status *s) {
1553 if (upb_fielddef_containingtype(f)) {
1554 upb_status_seterrmsg(
1555 s, "cannot change field number after adding to a message");
1556 return false;
1557 }
1558 if (number == 0 || number > UPB_MAX_FIELDNUMBER) {
1559 upb_status_seterrf(s, "invalid field number (%u)", number);
1560 return false;
1561 }
1562 f->number_ = number;
1563 return true;
1564}
1565
1566void upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type) {
Austin Schuh40c16522018-10-28 20:27:54 -07001567 UPB_ASSERT(!upb_fielddef_isfrozen(f));
1568 UPB_ASSERT(upb_fielddef_checktype(type));
Brian Silverman9c614bc2016-02-15 20:20:02 -05001569 upb_fielddef_uninit_default(f);
1570 f->type_ = type;
1571 f->type_is_set_ = true;
1572 upb_fielddef_init_default(f);
1573}
1574
1575void upb_fielddef_setdescriptortype(upb_fielddef *f, int type) {
Austin Schuh40c16522018-10-28 20:27:54 -07001576 UPB_ASSERT(!upb_fielddef_isfrozen(f));
Brian Silverman9c614bc2016-02-15 20:20:02 -05001577 switch (type) {
1578 case UPB_DESCRIPTOR_TYPE_DOUBLE:
1579 upb_fielddef_settype(f, UPB_TYPE_DOUBLE);
1580 break;
1581 case UPB_DESCRIPTOR_TYPE_FLOAT:
1582 upb_fielddef_settype(f, UPB_TYPE_FLOAT);
1583 break;
1584 case UPB_DESCRIPTOR_TYPE_INT64:
1585 case UPB_DESCRIPTOR_TYPE_SFIXED64:
1586 case UPB_DESCRIPTOR_TYPE_SINT64:
1587 upb_fielddef_settype(f, UPB_TYPE_INT64);
1588 break;
1589 case UPB_DESCRIPTOR_TYPE_UINT64:
1590 case UPB_DESCRIPTOR_TYPE_FIXED64:
1591 upb_fielddef_settype(f, UPB_TYPE_UINT64);
1592 break;
1593 case UPB_DESCRIPTOR_TYPE_INT32:
1594 case UPB_DESCRIPTOR_TYPE_SFIXED32:
1595 case UPB_DESCRIPTOR_TYPE_SINT32:
1596 upb_fielddef_settype(f, UPB_TYPE_INT32);
1597 break;
1598 case UPB_DESCRIPTOR_TYPE_UINT32:
1599 case UPB_DESCRIPTOR_TYPE_FIXED32:
1600 upb_fielddef_settype(f, UPB_TYPE_UINT32);
1601 break;
1602 case UPB_DESCRIPTOR_TYPE_BOOL:
1603 upb_fielddef_settype(f, UPB_TYPE_BOOL);
1604 break;
1605 case UPB_DESCRIPTOR_TYPE_STRING:
1606 upb_fielddef_settype(f, UPB_TYPE_STRING);
1607 break;
1608 case UPB_DESCRIPTOR_TYPE_BYTES:
1609 upb_fielddef_settype(f, UPB_TYPE_BYTES);
1610 break;
1611 case UPB_DESCRIPTOR_TYPE_GROUP:
1612 case UPB_DESCRIPTOR_TYPE_MESSAGE:
1613 upb_fielddef_settype(f, UPB_TYPE_MESSAGE);
1614 break;
1615 case UPB_DESCRIPTOR_TYPE_ENUM:
1616 upb_fielddef_settype(f, UPB_TYPE_ENUM);
1617 break;
Austin Schuh40c16522018-10-28 20:27:54 -07001618 default: UPB_ASSERT(false);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001619 }
1620
1621 if (type == UPB_DESCRIPTOR_TYPE_FIXED64 ||
1622 type == UPB_DESCRIPTOR_TYPE_FIXED32 ||
1623 type == UPB_DESCRIPTOR_TYPE_SFIXED64 ||
1624 type == UPB_DESCRIPTOR_TYPE_SFIXED32) {
1625 upb_fielddef_setintfmt(f, UPB_INTFMT_FIXED);
1626 } else if (type == UPB_DESCRIPTOR_TYPE_SINT64 ||
1627 type == UPB_DESCRIPTOR_TYPE_SINT32) {
1628 upb_fielddef_setintfmt(f, UPB_INTFMT_ZIGZAG);
1629 } else {
1630 upb_fielddef_setintfmt(f, UPB_INTFMT_VARIABLE);
1631 }
1632
1633 upb_fielddef_settagdelim(f, type == UPB_DESCRIPTOR_TYPE_GROUP);
1634}
1635
1636upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) {
1637 switch (upb_fielddef_type(f)) {
1638 case UPB_TYPE_FLOAT: return UPB_DESCRIPTOR_TYPE_FLOAT;
1639 case UPB_TYPE_DOUBLE: return UPB_DESCRIPTOR_TYPE_DOUBLE;
1640 case UPB_TYPE_BOOL: return UPB_DESCRIPTOR_TYPE_BOOL;
1641 case UPB_TYPE_STRING: return UPB_DESCRIPTOR_TYPE_STRING;
1642 case UPB_TYPE_BYTES: return UPB_DESCRIPTOR_TYPE_BYTES;
1643 case UPB_TYPE_ENUM: return UPB_DESCRIPTOR_TYPE_ENUM;
1644 case UPB_TYPE_INT32:
1645 switch (upb_fielddef_intfmt(f)) {
1646 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT32;
1647 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_SFIXED32;
1648 case UPB_INTFMT_ZIGZAG: return UPB_DESCRIPTOR_TYPE_SINT32;
1649 }
1650 case UPB_TYPE_INT64:
1651 switch (upb_fielddef_intfmt(f)) {
1652 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT64;
1653 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_SFIXED64;
1654 case UPB_INTFMT_ZIGZAG: return UPB_DESCRIPTOR_TYPE_SINT64;
1655 }
1656 case UPB_TYPE_UINT32:
1657 switch (upb_fielddef_intfmt(f)) {
1658 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT32;
1659 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_FIXED32;
1660 case UPB_INTFMT_ZIGZAG: return -1;
1661 }
1662 case UPB_TYPE_UINT64:
1663 switch (upb_fielddef_intfmt(f)) {
1664 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT64;
1665 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_FIXED64;
1666 case UPB_INTFMT_ZIGZAG: return -1;
1667 }
1668 case UPB_TYPE_MESSAGE:
1669 return upb_fielddef_istagdelim(f) ?
1670 UPB_DESCRIPTOR_TYPE_GROUP : UPB_DESCRIPTOR_TYPE_MESSAGE;
1671 }
1672 return 0;
1673}
1674
1675void upb_fielddef_setisextension(upb_fielddef *f, bool is_extension) {
Austin Schuh40c16522018-10-28 20:27:54 -07001676 UPB_ASSERT(!upb_fielddef_isfrozen(f));
Brian Silverman9c614bc2016-02-15 20:20:02 -05001677 f->is_extension_ = is_extension;
1678}
1679
1680void upb_fielddef_setlazy(upb_fielddef *f, bool lazy) {
Austin Schuh40c16522018-10-28 20:27:54 -07001681 UPB_ASSERT(!upb_fielddef_isfrozen(f));
Brian Silverman9c614bc2016-02-15 20:20:02 -05001682 f->lazy_ = lazy;
1683}
1684
1685void upb_fielddef_setpacked(upb_fielddef *f, bool packed) {
Austin Schuh40c16522018-10-28 20:27:54 -07001686 UPB_ASSERT(!upb_fielddef_isfrozen(f));
Brian Silverman9c614bc2016-02-15 20:20:02 -05001687 f->packed_ = packed;
1688}
1689
1690void upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label) {
Austin Schuh40c16522018-10-28 20:27:54 -07001691 UPB_ASSERT(!upb_fielddef_isfrozen(f));
1692 UPB_ASSERT(upb_fielddef_checklabel(label));
Brian Silverman9c614bc2016-02-15 20:20:02 -05001693 f->label_ = label;
1694}
1695
1696void upb_fielddef_setintfmt(upb_fielddef *f, upb_intfmt_t fmt) {
Austin Schuh40c16522018-10-28 20:27:54 -07001697 UPB_ASSERT(!upb_fielddef_isfrozen(f));
1698 UPB_ASSERT(upb_fielddef_checkintfmt(fmt));
Brian Silverman9c614bc2016-02-15 20:20:02 -05001699 f->intfmt = fmt;
1700}
1701
1702void upb_fielddef_settagdelim(upb_fielddef *f, bool tag_delim) {
Austin Schuh40c16522018-10-28 20:27:54 -07001703 UPB_ASSERT(!upb_fielddef_isfrozen(f));
Brian Silverman9c614bc2016-02-15 20:20:02 -05001704 f->tagdelim = tag_delim;
1705 f->tagdelim = tag_delim;
1706}
1707
1708static bool checksetdefault(upb_fielddef *f, upb_fieldtype_t type) {
1709 if (!f->type_is_set_ || upb_fielddef_isfrozen(f) ||
1710 upb_fielddef_type(f) != type) {
Austin Schuh40c16522018-10-28 20:27:54 -07001711 UPB_ASSERT(false);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001712 return false;
1713 }
1714 if (f->default_is_string) {
1715 str_t *s = f->defaultval.bytes;
Austin Schuh40c16522018-10-28 20:27:54 -07001716 UPB_ASSERT(s || type == UPB_TYPE_ENUM);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001717 if (s) freestr(s);
1718 }
1719 f->default_is_string = false;
1720 return true;
1721}
1722
1723void upb_fielddef_setdefaultint64(upb_fielddef *f, int64_t value) {
1724 if (checksetdefault(f, UPB_TYPE_INT64))
1725 f->defaultval.sint = value;
1726}
1727
1728void upb_fielddef_setdefaultint32(upb_fielddef *f, int32_t value) {
1729 if ((upb_fielddef_type(f) == UPB_TYPE_ENUM &&
1730 checksetdefault(f, UPB_TYPE_ENUM)) ||
1731 checksetdefault(f, UPB_TYPE_INT32)) {
1732 f->defaultval.sint = value;
1733 }
1734}
1735
1736void upb_fielddef_setdefaultuint64(upb_fielddef *f, uint64_t value) {
1737 if (checksetdefault(f, UPB_TYPE_UINT64))
1738 f->defaultval.uint = value;
1739}
1740
1741void upb_fielddef_setdefaultuint32(upb_fielddef *f, uint32_t value) {
1742 if (checksetdefault(f, UPB_TYPE_UINT32))
1743 f->defaultval.uint = value;
1744}
1745
1746void upb_fielddef_setdefaultbool(upb_fielddef *f, bool value) {
1747 if (checksetdefault(f, UPB_TYPE_BOOL))
1748 f->defaultval.uint = value;
1749}
1750
1751void upb_fielddef_setdefaultfloat(upb_fielddef *f, float value) {
1752 if (checksetdefault(f, UPB_TYPE_FLOAT))
1753 f->defaultval.flt = value;
1754}
1755
1756void upb_fielddef_setdefaultdouble(upb_fielddef *f, double value) {
1757 if (checksetdefault(f, UPB_TYPE_DOUBLE))
1758 f->defaultval.dbl = value;
1759}
1760
1761bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len,
1762 upb_status *s) {
1763 str_t *str2;
Austin Schuh40c16522018-10-28 20:27:54 -07001764 UPB_ASSERT(upb_fielddef_isstring(f) || f->type_ == UPB_TYPE_ENUM);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001765 if (f->type_ == UPB_TYPE_ENUM && !upb_isident(str, len, false, s))
1766 return false;
1767
1768 if (f->default_is_string) {
1769 str_t *s = f->defaultval.bytes;
Austin Schuh40c16522018-10-28 20:27:54 -07001770 UPB_ASSERT(s || f->type_ == UPB_TYPE_ENUM);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001771 if (s) freestr(s);
1772 } else {
Austin Schuh40c16522018-10-28 20:27:54 -07001773 UPB_ASSERT(f->type_ == UPB_TYPE_ENUM);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001774 }
1775
1776 str2 = newstr(str, len);
1777 f->defaultval.bytes = str2;
1778 f->default_is_string = true;
1779 return true;
1780}
1781
1782void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str,
1783 upb_status *s) {
Austin Schuh40c16522018-10-28 20:27:54 -07001784 UPB_ASSERT(f->type_is_set_);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001785 upb_fielddef_setdefaultstr(f, str, str ? strlen(str) : 0, s);
1786}
1787
1788bool upb_fielddef_enumhasdefaultint32(const upb_fielddef *f) {
1789 int32_t val;
Austin Schuh40c16522018-10-28 20:27:54 -07001790 UPB_ASSERT(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001791 return enumdefaultint32(f, &val);
1792}
1793
1794bool upb_fielddef_enumhasdefaultstr(const upb_fielddef *f) {
Austin Schuh40c16522018-10-28 20:27:54 -07001795 UPB_ASSERT(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001796 return enumdefaultstr(f) != NULL;
1797}
1798
1799static bool upb_subdef_typecheck(upb_fielddef *f, const upb_def *subdef,
1800 upb_status *s) {
1801 if (f->type_ == UPB_TYPE_MESSAGE) {
1802 if (upb_dyncast_msgdef(subdef)) return true;
1803 upb_status_seterrmsg(s, "invalid subdef type for this submessage field");
1804 return false;
1805 } else if (f->type_ == UPB_TYPE_ENUM) {
1806 if (upb_dyncast_enumdef(subdef)) return true;
1807 upb_status_seterrmsg(s, "invalid subdef type for this enum field");
1808 return false;
1809 } else {
1810 upb_status_seterrmsg(s, "only message and enum fields can have a subdef");
1811 return false;
1812 }
1813}
1814
1815static void release_subdef(upb_fielddef *f) {
1816 if (f->subdef_is_symbolic) {
Austin Schuh40c16522018-10-28 20:27:54 -07001817 upb_gfree(f->sub.name);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001818 } else if (f->sub.def) {
1819 upb_unref2(f->sub.def, f);
1820 }
1821}
1822
1823bool upb_fielddef_setsubdef(upb_fielddef *f, const upb_def *subdef,
1824 upb_status *s) {
Austin Schuh40c16522018-10-28 20:27:54 -07001825 UPB_ASSERT(!upb_fielddef_isfrozen(f));
1826 UPB_ASSERT(upb_fielddef_hassubdef(f));
Brian Silverman9c614bc2016-02-15 20:20:02 -05001827 if (subdef && !upb_subdef_typecheck(f, subdef, s)) return false;
1828 release_subdef(f);
1829 f->sub.def = subdef;
1830 f->subdef_is_symbolic = false;
1831 if (f->sub.def) upb_ref2(f->sub.def, f);
1832 return true;
1833}
1834
1835bool upb_fielddef_setmsgsubdef(upb_fielddef *f, const upb_msgdef *subdef,
1836 upb_status *s) {
1837 return upb_fielddef_setsubdef(f, upb_msgdef_upcast(subdef), s);
1838}
1839
1840bool upb_fielddef_setenumsubdef(upb_fielddef *f, const upb_enumdef *subdef,
1841 upb_status *s) {
1842 return upb_fielddef_setsubdef(f, upb_enumdef_upcast(subdef), s);
1843}
1844
1845bool upb_fielddef_setsubdefname(upb_fielddef *f, const char *name,
1846 upb_status *s) {
Austin Schuh40c16522018-10-28 20:27:54 -07001847 char *name_copy;
1848 UPB_ASSERT(!upb_fielddef_isfrozen(f));
Brian Silverman9c614bc2016-02-15 20:20:02 -05001849 if (!upb_fielddef_hassubdef(f)) {
1850 upb_status_seterrmsg(s, "field type does not accept a subdef");
1851 return false;
1852 }
Austin Schuh40c16522018-10-28 20:27:54 -07001853
1854 name_copy = upb_gstrdup(name);
1855 if (!name_copy) {
1856 upb_upberr_setoom(s);
1857 return false;
1858 }
1859
Brian Silverman9c614bc2016-02-15 20:20:02 -05001860 /* TODO: validate name (upb_isident() doesn't quite work atm because this name
1861 * may have a leading "."). */
1862 release_subdef(f);
Austin Schuh40c16522018-10-28 20:27:54 -07001863 f->sub.name = name_copy;
Brian Silverman9c614bc2016-02-15 20:20:02 -05001864 f->subdef_is_symbolic = true;
1865 return true;
1866}
1867
1868bool upb_fielddef_issubmsg(const upb_fielddef *f) {
1869 return upb_fielddef_type(f) == UPB_TYPE_MESSAGE;
1870}
1871
1872bool upb_fielddef_isstring(const upb_fielddef *f) {
1873 return upb_fielddef_type(f) == UPB_TYPE_STRING ||
1874 upb_fielddef_type(f) == UPB_TYPE_BYTES;
1875}
1876
1877bool upb_fielddef_isseq(const upb_fielddef *f) {
1878 return upb_fielddef_label(f) == UPB_LABEL_REPEATED;
1879}
1880
1881bool upb_fielddef_isprimitive(const upb_fielddef *f) {
1882 return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f);
1883}
1884
1885bool upb_fielddef_ismap(const upb_fielddef *f) {
1886 return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) &&
1887 upb_msgdef_mapentry(upb_fielddef_msgsubdef(f));
1888}
1889
Austin Schuh40c16522018-10-28 20:27:54 -07001890bool upb_fielddef_haspresence(const upb_fielddef *f) {
1891 if (upb_fielddef_isseq(f)) return false;
1892 if (upb_fielddef_issubmsg(f)) return true;
1893
1894 /* Primitive field: return true unless there is a message that specifies
1895 * presence should not exist. */
1896 if (f->msg_is_symbolic || !f->msg.def) return true;
1897 return f->msg.def->syntax == UPB_SYNTAX_PROTO2;
1898}
1899
Brian Silverman9c614bc2016-02-15 20:20:02 -05001900bool upb_fielddef_hassubdef(const upb_fielddef *f) {
1901 return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM;
1902}
1903
1904static bool between(int32_t x, int32_t low, int32_t high) {
1905 return x >= low && x <= high;
1906}
1907
1908bool upb_fielddef_checklabel(int32_t label) { return between(label, 1, 3); }
1909bool upb_fielddef_checktype(int32_t type) { return between(type, 1, 11); }
1910bool upb_fielddef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
1911
1912bool upb_fielddef_checkdescriptortype(int32_t type) {
1913 return between(type, 1, 18);
1914}
1915
1916/* upb_msgdef *****************************************************************/
1917
1918static void visitmsg(const upb_refcounted *r, upb_refcounted_visit *visit,
1919 void *closure) {
1920 upb_msg_oneof_iter o;
1921 const upb_msgdef *m = (const upb_msgdef*)r;
Austin Schuh40c16522018-10-28 20:27:54 -07001922 const upb_def *def = upb_msgdef_upcast(m);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001923 upb_msg_field_iter i;
1924 for(upb_msg_field_begin(&i, m);
1925 !upb_msg_field_done(&i);
1926 upb_msg_field_next(&i)) {
1927 upb_fielddef *f = upb_msg_iter_field(&i);
1928 visit(r, upb_fielddef_upcast2(f), closure);
1929 }
1930 for(upb_msg_oneof_begin(&o, m);
1931 !upb_msg_oneof_done(&o);
1932 upb_msg_oneof_next(&o)) {
1933 upb_oneofdef *f = upb_msg_iter_oneof(&o);
Austin Schuh40c16522018-10-28 20:27:54 -07001934 visit(r, upb_oneofdef_upcast(f), closure);
1935 }
1936 if (upb_def_file(def)) {
1937 visit(r, upb_filedef_upcast(upb_def_file(def)), closure);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001938 }
1939}
1940
1941static void freemsg(upb_refcounted *r) {
1942 upb_msgdef *m = (upb_msgdef*)r;
Brian Silverman9c614bc2016-02-15 20:20:02 -05001943 upb_strtable_uninit(&m->ntof);
1944 upb_inttable_uninit(&m->itof);
1945 upb_def_uninit(upb_msgdef_upcast_mutable(m));
Austin Schuh40c16522018-10-28 20:27:54 -07001946 upb_gfree(m);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001947}
1948
Austin Schuh40c16522018-10-28 20:27:54 -07001949const struct upb_refcounted_vtbl upb_msgdef_vtbl = {visitmsg, freemsg};
1950
Brian Silverman9c614bc2016-02-15 20:20:02 -05001951upb_msgdef *upb_msgdef_new(const void *owner) {
Austin Schuh40c16522018-10-28 20:27:54 -07001952 upb_msgdef *m = upb_gmalloc(sizeof(*m));
Brian Silverman9c614bc2016-02-15 20:20:02 -05001953 if (!m) return NULL;
Austin Schuh40c16522018-10-28 20:27:54 -07001954
1955 if (!upb_def_init(upb_msgdef_upcast_mutable(m), UPB_DEF_MSG, &upb_msgdef_vtbl,
1956 owner)) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05001957 goto err2;
Austin Schuh40c16522018-10-28 20:27:54 -07001958 }
1959
1960 if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err2;
1961 if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err1;
Brian Silverman9c614bc2016-02-15 20:20:02 -05001962 m->map_entry = false;
Austin Schuh40c16522018-10-28 20:27:54 -07001963 m->syntax = UPB_SYNTAX_PROTO2;
Brian Silverman9c614bc2016-02-15 20:20:02 -05001964 return m;
1965
1966err1:
Brian Silverman9c614bc2016-02-15 20:20:02 -05001967 upb_inttable_uninit(&m->itof);
Austin Schuh40c16522018-10-28 20:27:54 -07001968err2:
1969 upb_gfree(m);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001970 return NULL;
1971}
1972
Brian Silverman9c614bc2016-02-15 20:20:02 -05001973bool upb_msgdef_freeze(upb_msgdef *m, upb_status *status) {
1974 upb_def *d = upb_msgdef_upcast_mutable(m);
1975 return upb_def_freeze(&d, 1, status);
1976}
1977
1978const char *upb_msgdef_fullname(const upb_msgdef *m) {
1979 return upb_def_fullname(upb_msgdef_upcast(m));
1980}
1981
Austin Schuh40c16522018-10-28 20:27:54 -07001982const char *upb_msgdef_name(const upb_msgdef *m) {
1983 return upb_def_name(upb_msgdef_upcast(m));
1984}
1985
Brian Silverman9c614bc2016-02-15 20:20:02 -05001986bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname,
1987 upb_status *s) {
1988 return upb_def_setfullname(upb_msgdef_upcast_mutable(m), fullname, s);
1989}
1990
Austin Schuh40c16522018-10-28 20:27:54 -07001991bool upb_msgdef_setsyntax(upb_msgdef *m, upb_syntax_t syntax) {
1992 if (syntax != UPB_SYNTAX_PROTO2 && syntax != UPB_SYNTAX_PROTO3) {
1993 return false;
1994 }
1995
1996 m->syntax = syntax;
1997 return true;
1998}
1999
2000upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m) {
2001 return m->syntax;
2002}
2003
Brian Silverman9c614bc2016-02-15 20:20:02 -05002004/* Helper: check that the field |f| is safe to add to msgdef |m|. Set an error
2005 * on status |s| and return false if not. */
2006static bool check_field_add(const upb_msgdef *m, const upb_fielddef *f,
2007 upb_status *s) {
2008 if (upb_fielddef_containingtype(f) != NULL) {
2009 upb_status_seterrmsg(s, "fielddef already belongs to a message");
2010 return false;
2011 } else if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
2012 upb_status_seterrmsg(s, "field name or number were not set");
2013 return false;
Austin Schuh40c16522018-10-28 20:27:54 -07002014 } else if (upb_msgdef_itof(m, upb_fielddef_number(f))) {
2015 upb_status_seterrmsg(s, "duplicate field number");
2016 return false;
2017 } else if (upb_strtable_lookup(&m->ntof, upb_fielddef_name(f), NULL)) {
2018 upb_status_seterrmsg(s, "name conflicts with existing field or oneof");
Brian Silverman9c614bc2016-02-15 20:20:02 -05002019 return false;
2020 }
2021 return true;
2022}
2023
2024static void add_field(upb_msgdef *m, upb_fielddef *f, const void *ref_donor) {
2025 release_containingtype(f);
2026 f->msg.def = m;
2027 f->msg_is_symbolic = false;
2028 upb_inttable_insert(&m->itof, upb_fielddef_number(f), upb_value_ptr(f));
2029 upb_strtable_insert(&m->ntof, upb_fielddef_name(f), upb_value_ptr(f));
2030 upb_ref2(f, m);
2031 upb_ref2(m, f);
2032 if (ref_donor) upb_fielddef_unref(f, ref_donor);
2033}
2034
2035bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor,
2036 upb_status *s) {
2037 /* TODO: extensions need to have a separate namespace, because proto2 allows a
2038 * top-level extension (ie. one not in any package) to have the same name as a
2039 * field from the message.
2040 *
2041 * This also implies that there needs to be a separate lookup-by-name method
2042 * for extensions. It seems desirable for iteration to return both extensions
2043 * and non-extensions though.
2044 *
2045 * We also need to validate that the field number is in an extension range iff
2046 * it is an extension.
2047 *
2048 * This method is idempotent. Check if |f| is already part of this msgdef and
2049 * return immediately if so. */
2050 if (upb_fielddef_containingtype(f) == m) {
Austin Schuh40c16522018-10-28 20:27:54 -07002051 if (ref_donor) upb_fielddef_unref(f, ref_donor);
Brian Silverman9c614bc2016-02-15 20:20:02 -05002052 return true;
2053 }
2054
2055 /* Check constraints for all fields before performing any action. */
2056 if (!check_field_add(m, f, s)) {
2057 return false;
2058 } else if (upb_fielddef_containingoneof(f) != NULL) {
2059 /* Fields in a oneof can only be added by adding the oneof to the msgdef. */
2060 upb_status_seterrmsg(s, "fielddef is part of a oneof");
2061 return false;
2062 }
2063
2064 /* Constraint checks ok, perform the action. */
2065 add_field(m, f, ref_donor);
2066 return true;
2067}
2068
2069bool upb_msgdef_addoneof(upb_msgdef *m, upb_oneofdef *o, const void *ref_donor,
2070 upb_status *s) {
2071 upb_oneof_iter it;
2072
2073 /* Check various conditions that would prevent this oneof from being added. */
2074 if (upb_oneofdef_containingtype(o)) {
2075 upb_status_seterrmsg(s, "oneofdef already belongs to a message");
2076 return false;
2077 } else if (upb_oneofdef_name(o) == NULL) {
2078 upb_status_seterrmsg(s, "oneofdef name was not set");
2079 return false;
Austin Schuh40c16522018-10-28 20:27:54 -07002080 } else if (upb_strtable_lookup(&m->ntof, upb_oneofdef_name(o), NULL)) {
2081 upb_status_seterrmsg(s, "name conflicts with existing field or oneof");
Brian Silverman9c614bc2016-02-15 20:20:02 -05002082 return false;
2083 }
2084
2085 /* Check that all of the oneof's fields do not conflict with names or numbers
2086 * of fields already in the message. */
2087 for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
2088 const upb_fielddef *f = upb_oneof_iter_field(&it);
2089 if (!check_field_add(m, f, s)) {
2090 return false;
2091 }
2092 }
2093
2094 /* Everything checks out -- commit now. */
2095
2096 /* Add oneof itself first. */
2097 o->parent = m;
Austin Schuh40c16522018-10-28 20:27:54 -07002098 upb_strtable_insert(&m->ntof, upb_oneofdef_name(o), upb_value_ptr(o));
Brian Silverman9c614bc2016-02-15 20:20:02 -05002099 upb_ref2(o, m);
2100 upb_ref2(m, o);
2101
2102 /* Add each field of the oneof directly to the msgdef. */
2103 for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
2104 upb_fielddef *f = upb_oneof_iter_field(&it);
2105 add_field(m, f, NULL);
2106 }
2107
2108 if (ref_donor) upb_oneofdef_unref(o, ref_donor);
2109
2110 return true;
2111}
2112
2113const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) {
2114 upb_value val;
2115 return upb_inttable_lookup32(&m->itof, i, &val) ?
2116 upb_value_getptr(val) : NULL;
2117}
2118
2119const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
2120 size_t len) {
2121 upb_value val;
Austin Schuh40c16522018-10-28 20:27:54 -07002122
2123 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
2124 return NULL;
2125 }
2126
2127 return upb_trygetfield(upb_value_getptr(val));
Brian Silverman9c614bc2016-02-15 20:20:02 -05002128}
2129
2130const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
2131 size_t len) {
2132 upb_value val;
Austin Schuh40c16522018-10-28 20:27:54 -07002133
2134 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
2135 return NULL;
2136 }
2137
2138 return upb_trygetoneof(upb_value_getptr(val));
2139}
2140
2141bool upb_msgdef_lookupname(const upb_msgdef *m, const char *name, size_t len,
2142 const upb_fielddef **f, const upb_oneofdef **o) {
2143 upb_value val;
2144
2145 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
2146 return false;
2147 }
2148
2149 *o = upb_trygetoneof(upb_value_getptr(val));
2150 *f = upb_trygetfield(upb_value_getptr(val));
2151 UPB_ASSERT((*o != NULL) ^ (*f != NULL)); /* Exactly one of the two should be set. */
2152 return true;
Brian Silverman9c614bc2016-02-15 20:20:02 -05002153}
2154
2155int upb_msgdef_numfields(const upb_msgdef *m) {
Austin Schuh40c16522018-10-28 20:27:54 -07002156 /* The number table contains only fields. */
2157 return upb_inttable_count(&m->itof);
Brian Silverman9c614bc2016-02-15 20:20:02 -05002158}
2159
2160int upb_msgdef_numoneofs(const upb_msgdef *m) {
Austin Schuh40c16522018-10-28 20:27:54 -07002161 /* The name table includes oneofs, and the number table does not. */
2162 return upb_strtable_count(&m->ntof) - upb_inttable_count(&m->itof);
Brian Silverman9c614bc2016-02-15 20:20:02 -05002163}
2164
2165void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry) {
Austin Schuh40c16522018-10-28 20:27:54 -07002166 UPB_ASSERT(!upb_msgdef_isfrozen(m));
Brian Silverman9c614bc2016-02-15 20:20:02 -05002167 m->map_entry = map_entry;
2168}
2169
2170bool upb_msgdef_mapentry(const upb_msgdef *m) {
2171 return m->map_entry;
2172}
2173
2174void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) {
2175 upb_inttable_begin(iter, &m->itof);
2176}
2177
2178void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); }
2179
2180bool upb_msg_field_done(const upb_msg_field_iter *iter) {
2181 return upb_inttable_done(iter);
2182}
2183
2184upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) {
2185 return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
2186}
2187
2188void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) {
2189 upb_inttable_iter_setdone(iter);
2190}
2191
2192void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) {
Austin Schuh40c16522018-10-28 20:27:54 -07002193 upb_strtable_begin(iter, &m->ntof);
2194 /* We need to skip past any initial fields. */
2195 while (!upb_strtable_done(iter) &&
2196 !upb_isoneof(upb_value_getptr(upb_strtable_iter_value(iter)))) {
2197 upb_strtable_next(iter);
2198 }
Brian Silverman9c614bc2016-02-15 20:20:02 -05002199}
2200
Austin Schuh40c16522018-10-28 20:27:54 -07002201void upb_msg_oneof_next(upb_msg_oneof_iter *iter) {
2202 /* We need to skip past fields to return only oneofs. */
2203 do {
2204 upb_strtable_next(iter);
2205 } while (!upb_strtable_done(iter) &&
2206 !upb_isoneof(upb_value_getptr(upb_strtable_iter_value(iter))));
2207}
Brian Silverman9c614bc2016-02-15 20:20:02 -05002208
2209bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) {
2210 return upb_strtable_done(iter);
2211}
2212
2213upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) {
2214 return (upb_oneofdef*)upb_value_getptr(upb_strtable_iter_value(iter));
2215}
2216
2217void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) {
2218 upb_strtable_iter_setdone(iter);
2219}
2220
2221/* upb_oneofdef ***************************************************************/
2222
2223static void visitoneof(const upb_refcounted *r, upb_refcounted_visit *visit,
2224 void *closure) {
2225 const upb_oneofdef *o = (const upb_oneofdef*)r;
2226 upb_oneof_iter i;
2227 for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) {
2228 const upb_fielddef *f = upb_oneof_iter_field(&i);
2229 visit(r, upb_fielddef_upcast2(f), closure);
2230 }
2231 if (o->parent) {
2232 visit(r, upb_msgdef_upcast2(o->parent), closure);
2233 }
2234}
2235
2236static void freeoneof(upb_refcounted *r) {
2237 upb_oneofdef *o = (upb_oneofdef*)r;
2238 upb_strtable_uninit(&o->ntof);
2239 upb_inttable_uninit(&o->itof);
Austin Schuh40c16522018-10-28 20:27:54 -07002240 upb_gfree((void*)o->name);
2241 upb_gfree(o);
Brian Silverman9c614bc2016-02-15 20:20:02 -05002242}
2243
Austin Schuh40c16522018-10-28 20:27:54 -07002244const struct upb_refcounted_vtbl upb_oneofdef_vtbl = {visitoneof, freeoneof};
2245
Brian Silverman9c614bc2016-02-15 20:20:02 -05002246upb_oneofdef *upb_oneofdef_new(const void *owner) {
Austin Schuh40c16522018-10-28 20:27:54 -07002247 upb_oneofdef *o = upb_gmalloc(sizeof(*o));
2248
2249 if (!o) {
2250 return NULL;
2251 }
2252
Brian Silverman9c614bc2016-02-15 20:20:02 -05002253 o->parent = NULL;
Austin Schuh40c16522018-10-28 20:27:54 -07002254 o->name = NULL;
2255
2256 if (!upb_refcounted_init(upb_oneofdef_upcast_mutable(o), &upb_oneofdef_vtbl,
2257 owner)) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05002258 goto err2;
Austin Schuh40c16522018-10-28 20:27:54 -07002259 }
2260
Brian Silverman9c614bc2016-02-15 20:20:02 -05002261 if (!upb_inttable_init(&o->itof, UPB_CTYPE_PTR)) goto err2;
2262 if (!upb_strtable_init(&o->ntof, UPB_CTYPE_PTR)) goto err1;
Austin Schuh40c16522018-10-28 20:27:54 -07002263
Brian Silverman9c614bc2016-02-15 20:20:02 -05002264 return o;
2265
2266err1:
2267 upb_inttable_uninit(&o->itof);
2268err2:
Austin Schuh40c16522018-10-28 20:27:54 -07002269 upb_gfree(o);
Brian Silverman9c614bc2016-02-15 20:20:02 -05002270 return NULL;
2271}
2272
Austin Schuh40c16522018-10-28 20:27:54 -07002273const char *upb_oneofdef_name(const upb_oneofdef *o) { return o->name; }
Brian Silverman9c614bc2016-02-15 20:20:02 -05002274
Austin Schuh40c16522018-10-28 20:27:54 -07002275bool upb_oneofdef_setname(upb_oneofdef *o, const char *name, upb_status *s) {
2276 UPB_ASSERT(!upb_oneofdef_isfrozen(o));
Brian Silverman9c614bc2016-02-15 20:20:02 -05002277 if (upb_oneofdef_containingtype(o)) {
2278 upb_status_seterrmsg(s, "oneof already added to a message");
2279 return false;
2280 }
Austin Schuh40c16522018-10-28 20:27:54 -07002281
2282 if (!upb_isident(name, strlen(name), true, s)) {
2283 return false;
2284 }
2285
2286 name = upb_gstrdup(name);
2287 if (!name) {
2288 upb_status_seterrmsg(s, "One of memory");
2289 return false;
2290 }
2291
2292 upb_gfree((void*)o->name);
2293 o->name = name;
2294 return true;
Brian Silverman9c614bc2016-02-15 20:20:02 -05002295}
2296
2297const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) {
2298 return o->parent;
2299}
2300
2301int upb_oneofdef_numfields(const upb_oneofdef *o) {
2302 return upb_strtable_count(&o->ntof);
2303}
2304
Austin Schuh40c16522018-10-28 20:27:54 -07002305uint32_t upb_oneofdef_index(const upb_oneofdef *o) {
2306 return o->index;
2307}
2308
Brian Silverman9c614bc2016-02-15 20:20:02 -05002309bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f,
2310 const void *ref_donor,
2311 upb_status *s) {
Austin Schuh40c16522018-10-28 20:27:54 -07002312 UPB_ASSERT(!upb_oneofdef_isfrozen(o));
2313 UPB_ASSERT(!o->parent || !upb_msgdef_isfrozen(o->parent));
Brian Silverman9c614bc2016-02-15 20:20:02 -05002314
2315 /* This method is idempotent. Check if |f| is already part of this oneofdef
2316 * and return immediately if so. */
2317 if (upb_fielddef_containingoneof(f) == o) {
2318 return true;
2319 }
2320
2321 /* The field must have an OPTIONAL label. */
2322 if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) {
2323 upb_status_seterrmsg(s, "fields in oneof must have OPTIONAL label");
2324 return false;
2325 }
2326
2327 /* Check that no field with this name or number exists already in the oneof.
2328 * Also check that the field is not already part of a oneof. */
2329 if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
2330 upb_status_seterrmsg(s, "field name or number were not set");
2331 return false;
2332 } else if (upb_oneofdef_itof(o, upb_fielddef_number(f)) ||
2333 upb_oneofdef_ntofz(o, upb_fielddef_name(f))) {
2334 upb_status_seterrmsg(s, "duplicate field name or number");
2335 return false;
2336 } else if (upb_fielddef_containingoneof(f) != NULL) {
2337 upb_status_seterrmsg(s, "fielddef already belongs to a oneof");
2338 return false;
2339 }
2340
2341 /* We allow adding a field to the oneof either if the field is not part of a
2342 * msgdef, or if it is and we are also part of the same msgdef. */
2343 if (o->parent == NULL) {
2344 /* If we're not in a msgdef, the field cannot be either. Otherwise we would
2345 * need to magically add this oneof to a msgdef to remain consistent, which
2346 * is surprising behavior. */
2347 if (upb_fielddef_containingtype(f) != NULL) {
2348 upb_status_seterrmsg(s, "fielddef already belongs to a message, but "
2349 "oneof does not");
2350 return false;
2351 }
2352 } else {
2353 /* If we're in a msgdef, the user can add fields that either aren't in any
2354 * msgdef (in which case they're added to our msgdef) or already a part of
2355 * our msgdef. */
2356 if (upb_fielddef_containingtype(f) != NULL &&
2357 upb_fielddef_containingtype(f) != o->parent) {
2358 upb_status_seterrmsg(s, "fielddef belongs to a different message "
2359 "than oneof");
2360 return false;
2361 }
2362 }
2363
2364 /* Commit phase. First add the field to our parent msgdef, if any, because
2365 * that may fail; then add the field to our own tables. */
2366
2367 if (o->parent != NULL && upb_fielddef_containingtype(f) == NULL) {
2368 if (!upb_msgdef_addfield((upb_msgdef*)o->parent, f, NULL, s)) {
2369 return false;
2370 }
2371 }
2372
2373 release_containingtype(f);
2374 f->oneof = o;
2375 upb_inttable_insert(&o->itof, upb_fielddef_number(f), upb_value_ptr(f));
2376 upb_strtable_insert(&o->ntof, upb_fielddef_name(f), upb_value_ptr(f));
2377 upb_ref2(f, o);
2378 upb_ref2(o, f);
2379 if (ref_donor) upb_fielddef_unref(f, ref_donor);
2380
2381 return true;
2382}
2383
2384const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o,
2385 const char *name, size_t length) {
2386 upb_value val;
2387 return upb_strtable_lookup2(&o->ntof, name, length, &val) ?
2388 upb_value_getptr(val) : NULL;
2389}
2390
2391const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) {
2392 upb_value val;
2393 return upb_inttable_lookup32(&o->itof, num, &val) ?
2394 upb_value_getptr(val) : NULL;
2395}
2396
2397void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) {
2398 upb_inttable_begin(iter, &o->itof);
2399}
2400
2401void upb_oneof_next(upb_oneof_iter *iter) {
2402 upb_inttable_next(iter);
2403}
2404
2405bool upb_oneof_done(upb_oneof_iter *iter) {
2406 return upb_inttable_done(iter);
2407}
2408
2409upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) {
2410 return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
2411}
2412
2413void upb_oneof_iter_setdone(upb_oneof_iter *iter) {
2414 upb_inttable_iter_setdone(iter);
2415}
2416
Austin Schuh40c16522018-10-28 20:27:54 -07002417/* upb_filedef ****************************************************************/
Brian Silverman9c614bc2016-02-15 20:20:02 -05002418
Austin Schuh40c16522018-10-28 20:27:54 -07002419static void visitfiledef(const upb_refcounted *r, upb_refcounted_visit *visit,
2420 void *closure) {
2421 const upb_filedef *f = (const upb_filedef*)r;
2422 size_t i;
Brian Silverman9c614bc2016-02-15 20:20:02 -05002423
Austin Schuh40c16522018-10-28 20:27:54 -07002424 for(i = 0; i < upb_filedef_defcount(f); i++) {
2425 visit(r, upb_def_upcast(upb_filedef_def(f, i)), closure);
Brian Silverman9c614bc2016-02-15 20:20:02 -05002426 }
Austin Schuh40c16522018-10-28 20:27:54 -07002427}
Brian Silverman9c614bc2016-02-15 20:20:02 -05002428
Austin Schuh40c16522018-10-28 20:27:54 -07002429static void freefiledef(upb_refcounted *r) {
2430 upb_filedef *f = (upb_filedef*)r;
2431 size_t i;
Brian Silverman9c614bc2016-02-15 20:20:02 -05002432
Austin Schuh40c16522018-10-28 20:27:54 -07002433 for(i = 0; i < upb_filedef_depcount(f); i++) {
2434 upb_filedef_unref(upb_filedef_dep(f, i), f);
2435 }
Brian Silverman9c614bc2016-02-15 20:20:02 -05002436
Austin Schuh40c16522018-10-28 20:27:54 -07002437 upb_inttable_uninit(&f->defs);
2438 upb_inttable_uninit(&f->deps);
2439 upb_gfree((void*)f->name);
2440 upb_gfree((void*)f->package);
2441 upb_gfree((void*)f->phpprefix);
2442 upb_gfree((void*)f->phpnamespace);
2443 upb_gfree(f);
2444}
2445
2446const struct upb_refcounted_vtbl upb_filedef_vtbl = {visitfiledef, freefiledef};
2447
2448upb_filedef *upb_filedef_new(const void *owner) {
2449 upb_filedef *f = upb_gmalloc(sizeof(*f));
2450
2451 if (!f) {
2452 return NULL;
2453 }
2454
2455 f->package = NULL;
2456 f->name = NULL;
2457 f->phpprefix = NULL;
2458 f->phpnamespace = NULL;
2459 f->syntax = UPB_SYNTAX_PROTO2;
2460
2461 if (!upb_refcounted_init(upb_filedef_upcast_mutable(f), &upb_filedef_vtbl,
2462 owner)) {
2463 goto err;
2464 }
2465
2466 if (!upb_inttable_init(&f->defs, UPB_CTYPE_CONSTPTR)) {
2467 goto err;
2468 }
2469
2470 if (!upb_inttable_init(&f->deps, UPB_CTYPE_CONSTPTR)) {
2471 goto err2;
2472 }
2473
2474 return f;
2475
2476
2477err2:
2478 upb_inttable_uninit(&f->defs);
2479
2480err:
2481 upb_gfree(f);
2482 return NULL;
2483}
2484
2485const char *upb_filedef_name(const upb_filedef *f) {
2486 return f->name;
2487}
2488
2489const char *upb_filedef_package(const upb_filedef *f) {
2490 return f->package;
2491}
2492
2493const char *upb_filedef_phpprefix(const upb_filedef *f) {
2494 return f->phpprefix;
2495}
2496
2497const char *upb_filedef_phpnamespace(const upb_filedef *f) {
2498 return f->phpnamespace;
2499}
2500
2501upb_syntax_t upb_filedef_syntax(const upb_filedef *f) {
2502 return f->syntax;
2503}
2504
2505size_t upb_filedef_defcount(const upb_filedef *f) {
2506 return upb_inttable_count(&f->defs);
2507}
2508
2509size_t upb_filedef_depcount(const upb_filedef *f) {
2510 return upb_inttable_count(&f->deps);
2511}
2512
2513const upb_def *upb_filedef_def(const upb_filedef *f, size_t i) {
2514 upb_value v;
2515
2516 if (upb_inttable_lookup32(&f->defs, i, &v)) {
2517 return upb_value_getconstptr(v);
Brian Silverman9c614bc2016-02-15 20:20:02 -05002518 } else {
Austin Schuh40c16522018-10-28 20:27:54 -07002519 return NULL;
Brian Silverman9c614bc2016-02-15 20:20:02 -05002520 }
2521}
2522
Austin Schuh40c16522018-10-28 20:27:54 -07002523const upb_filedef *upb_filedef_dep(const upb_filedef *f, size_t i) {
2524 upb_value v;
Brian Silverman9c614bc2016-02-15 20:20:02 -05002525
Austin Schuh40c16522018-10-28 20:27:54 -07002526 if (upb_inttable_lookup32(&f->deps, i, &v)) {
2527 return upb_value_getconstptr(v);
2528 } else {
2529 return NULL;
Brian Silverman9c614bc2016-02-15 20:20:02 -05002530 }
2531}
2532
Austin Schuh40c16522018-10-28 20:27:54 -07002533bool upb_filedef_setname(upb_filedef *f, const char *name, upb_status *s) {
2534 name = upb_gstrdup(name);
2535 if (!name) {
2536 upb_upberr_setoom(s);
2537 return false;
2538 }
2539 upb_gfree((void*)f->name);
2540 f->name = name;
2541 return true;
Brian Silverman9c614bc2016-02-15 20:20:02 -05002542}
2543
Austin Schuh40c16522018-10-28 20:27:54 -07002544bool upb_filedef_setpackage(upb_filedef *f, const char *package,
2545 upb_status *s) {
2546 if (!upb_isident(package, strlen(package), true, s)) return false;
2547 package = upb_gstrdup(package);
2548 if (!package) {
2549 upb_upberr_setoom(s);
2550 return false;
2551 }
2552 upb_gfree((void*)f->package);
2553 f->package = package;
2554 return true;
Brian Silverman9c614bc2016-02-15 20:20:02 -05002555}
2556
Austin Schuh40c16522018-10-28 20:27:54 -07002557bool upb_filedef_setphpprefix(upb_filedef *f, const char *phpprefix,
2558 upb_status *s) {
2559 phpprefix = upb_gstrdup(phpprefix);
2560 if (!phpprefix) {
2561 upb_upberr_setoom(s);
2562 return false;
2563 }
2564 upb_gfree((void*)f->phpprefix);
2565 f->phpprefix = phpprefix;
2566 return true;
Brian Silverman9c614bc2016-02-15 20:20:02 -05002567}
2568
Austin Schuh40c16522018-10-28 20:27:54 -07002569bool upb_filedef_setphpnamespace(upb_filedef *f, const char *phpnamespace,
2570 upb_status *s) {
2571 phpnamespace = upb_gstrdup(phpnamespace);
2572 if (!phpnamespace) {
2573 upb_upberr_setoom(s);
2574 return false;
2575 }
2576 upb_gfree((void*)f->phpnamespace);
2577 f->phpnamespace = phpnamespace;
2578 return true;
Brian Silverman9c614bc2016-02-15 20:20:02 -05002579}
2580
Austin Schuh40c16522018-10-28 20:27:54 -07002581bool upb_filedef_setsyntax(upb_filedef *f, upb_syntax_t syntax,
2582 upb_status *s) {
2583 UPB_UNUSED(s);
2584 if (syntax != UPB_SYNTAX_PROTO2 &&
2585 syntax != UPB_SYNTAX_PROTO3) {
2586 upb_status_seterrmsg(s, "Unknown syntax value.");
2587 return false;
2588 }
2589 f->syntax = syntax;
Brian Silverman9c614bc2016-02-15 20:20:02 -05002590
Austin Schuh40c16522018-10-28 20:27:54 -07002591 {
2592 /* Set all messages in this file to match. */
2593 size_t i;
2594 for (i = 0; i < upb_filedef_defcount(f); i++) {
2595 /* Casting const away is safe since all defs in mutable filedef must
2596 * also be mutable. */
2597 upb_def *def = (upb_def*)upb_filedef_def(f, i);
Brian Silverman9c614bc2016-02-15 20:20:02 -05002598
Austin Schuh40c16522018-10-28 20:27:54 -07002599 upb_msgdef *m = upb_dyncast_msgdef_mutable(def);
2600 if (m) {
2601 m->syntax = syntax;
2602 }
2603 }
2604 }
Brian Silverman9c614bc2016-02-15 20:20:02 -05002605
2606 return true;
2607}
2608
Austin Schuh40c16522018-10-28 20:27:54 -07002609bool upb_filedef_adddef(upb_filedef *f, upb_def *def, const void *ref_donor,
2610 upb_status *s) {
2611 if (def->file) {
2612 upb_status_seterrmsg(s, "Def is already part of another filedef.");
2613 return false;
2614 }
2615
2616 if (upb_inttable_push(&f->defs, upb_value_constptr(def))) {
2617 def->file = f;
2618 upb_ref2(def, f);
2619 upb_ref2(f, def);
2620 if (ref_donor) upb_def_unref(def, ref_donor);
2621 if (def->type == UPB_DEF_MSG) {
2622 upb_downcast_msgdef_mutable(def)->syntax = f->syntax;
2623 }
2624 return true;
Brian Silverman9c614bc2016-02-15 20:20:02 -05002625 } else {
Austin Schuh40c16522018-10-28 20:27:54 -07002626 upb_upberr_setoom(s);
2627 return false;
Brian Silverman9c614bc2016-02-15 20:20:02 -05002628 }
2629}
2630
Austin Schuh40c16522018-10-28 20:27:54 -07002631bool upb_filedef_adddep(upb_filedef *f, const upb_filedef *dep) {
2632 if (upb_inttable_push(&f->deps, upb_value_constptr(dep))) {
2633 /* Regular ref instead of ref2 because files can't form cycles. */
2634 upb_filedef_ref(dep, f);
2635 return true;
2636 } else {
2637 return false;
2638 }
2639}
Brian Silverman9c614bc2016-02-15 20:20:02 -05002640
Austin Schuh40c16522018-10-28 20:27:54 -07002641void upb_symtab_free(upb_symtab *s) {
2642 upb_strtable_iter i;
2643 upb_strtable_begin(&i, &s->symtab);
2644 for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
2645 const upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
2646 upb_def_unref(def, s);
2647 }
2648 upb_strtable_uninit(&s->symtab);
2649 upb_gfree(s);
2650}
Brian Silverman9c614bc2016-02-15 20:20:02 -05002651
Austin Schuh40c16522018-10-28 20:27:54 -07002652upb_symtab *upb_symtab_new() {
2653 upb_symtab *s = upb_gmalloc(sizeof(*s));
2654 if (!s) {
2655 return NULL;
2656 }
2657
2658 upb_strtable_init(&s->symtab, UPB_CTYPE_PTR);
2659 return s;
2660}
2661
2662const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym) {
2663 upb_value v;
2664 upb_def *ret = upb_strtable_lookup(&s->symtab, sym, &v) ?
2665 upb_value_getptr(v) : NULL;
Brian Silverman9c614bc2016-02-15 20:20:02 -05002666 return ret;
2667}
2668
Austin Schuh40c16522018-10-28 20:27:54 -07002669const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) {
2670 upb_value v;
2671 upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
2672 upb_value_getptr(v) : NULL;
2673 return def ? upb_dyncast_msgdef(def) : NULL;
Brian Silverman9c614bc2016-02-15 20:20:02 -05002674}
2675
Austin Schuh40c16522018-10-28 20:27:54 -07002676const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) {
2677 upb_value v;
2678 upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
2679 upb_value_getptr(v) : NULL;
2680 return def ? upb_dyncast_enumdef(def) : NULL;
Brian Silverman9c614bc2016-02-15 20:20:02 -05002681}
2682
Austin Schuh40c16522018-10-28 20:27:54 -07002683/* Given a symbol and the base symbol inside which it is defined, find the
2684 * symbol's definition in t. */
2685static upb_def *upb_resolvename(const upb_strtable *t,
2686 const char *base, const char *sym) {
2687 if(strlen(sym) == 0) return NULL;
2688 if(sym[0] == '.') {
2689 /* Symbols starting with '.' are absolute, so we do a single lookup.
2690 * Slice to omit the leading '.' */
2691 upb_value v;
2692 return upb_strtable_lookup(t, sym + 1, &v) ? upb_value_getptr(v) : NULL;
Brian Silverman9c614bc2016-02-15 20:20:02 -05002693 } else {
Austin Schuh40c16522018-10-28 20:27:54 -07002694 /* Remove components from base until we find an entry or run out.
2695 * TODO: This branch is totally broken, but currently not used. */
2696 (void)base;
2697 UPB_ASSERT(false);
2698 return NULL;
2699 }
2700}
2701
2702const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base,
2703 const char *sym) {
2704 upb_def *ret = upb_resolvename(&s->symtab, base, sym);
2705 return ret;
2706}
2707
2708/* TODO(haberman): we need a lot more testing of error conditions. */
2709static bool symtab_add(upb_symtab *s, upb_def *const*defs, size_t n,
2710 void *ref_donor, upb_refcounted *freeze_also,
2711 upb_status *status) {
2712 size_t i;
2713 size_t add_n;
2714 size_t freeze_n;
2715 upb_strtable_iter iter;
2716 upb_refcounted **add_objs = NULL;
2717 upb_def **add_defs = NULL;
2718 size_t add_objs_size;
2719 upb_strtable addtab;
2720
2721 if (n == 0 && !freeze_also) {
2722 return true;
2723 }
2724
2725 if (!upb_strtable_init(&addtab, UPB_CTYPE_PTR)) {
2726 upb_status_seterrmsg(status, "out of memory");
2727 return false;
2728 }
2729
2730 /* Add new defs to our "add" set. */
2731 for (i = 0; i < n; i++) {
2732 upb_def *def = defs[i];
2733 const char *fullname;
2734 upb_fielddef *f;
2735
2736 if (upb_def_isfrozen(def)) {
2737 upb_status_seterrmsg(status, "added defs must be mutable");
2738 goto err;
2739 }
2740 UPB_ASSERT(!upb_def_isfrozen(def));
2741 fullname = upb_def_fullname(def);
2742 if (!fullname) {
2743 upb_status_seterrmsg(
2744 status, "Anonymous defs cannot be added to a symtab");
2745 goto err;
2746 }
2747
2748 f = upb_dyncast_fielddef_mutable(def);
2749
2750 if (f) {
2751 if (!upb_fielddef_containingtypename(f)) {
2752 upb_status_seterrmsg(status,
2753 "Standalone fielddefs must have a containing type "
2754 "(extendee) name set");
2755 goto err;
2756 }
Brian Silverman9c614bc2016-02-15 20:20:02 -05002757 } else {
Austin Schuh40c16522018-10-28 20:27:54 -07002758 if (upb_strtable_lookup(&addtab, fullname, NULL)) {
2759 upb_status_seterrf(status, "Conflicting defs named '%s'", fullname);
2760 goto err;
2761 }
2762 if (upb_strtable_lookup(&s->symtab, fullname, NULL)) {
2763 upb_status_seterrf(status, "Symtab already has a def named '%s'",
2764 fullname);
2765 goto err;
2766 }
2767 if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def)))
2768 goto oom_err;
2769 upb_def_donateref(def, ref_donor, s);
2770 }
2771
2772 if (upb_dyncast_fielddef_mutable(def)) {
2773 /* TODO(haberman): allow adding extensions attached to files. */
2774 upb_status_seterrf(status, "Can't add extensions to symtab.\n");
2775 goto err;
Brian Silverman9c614bc2016-02-15 20:20:02 -05002776 }
2777 }
Austin Schuh40c16522018-10-28 20:27:54 -07002778
2779 /* Now using the table, resolve symbolic references for subdefs. */
2780 upb_strtable_begin(&iter, &addtab);
2781 for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
2782 const char *base;
2783 upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
2784 upb_msgdef *m = upb_dyncast_msgdef_mutable(def);
2785 upb_msg_field_iter j;
2786
2787 if (!m) continue;
2788 /* Type names are resolved relative to the message in which they appear. */
2789 base = upb_msgdef_fullname(m);
2790
2791 for(upb_msg_field_begin(&j, m);
2792 !upb_msg_field_done(&j);
2793 upb_msg_field_next(&j)) {
2794 upb_fielddef *f = upb_msg_iter_field(&j);
2795 const char *name = upb_fielddef_subdefname(f);
2796 if (name && !upb_fielddef_subdef(f)) {
2797 /* Try the lookup in the current set of to-be-added defs first. If not
2798 * there, try existing defs. */
2799 upb_def *subdef = upb_resolvename(&addtab, base, name);
2800 if (subdef == NULL) {
2801 subdef = upb_resolvename(&s->symtab, base, name);
2802 }
2803 if (subdef == NULL) {
2804 upb_status_seterrf(
2805 status, "couldn't resolve name '%s' in message '%s'", name, base);
2806 goto err;
2807 } else if (!upb_fielddef_setsubdef(f, subdef, status)) {
2808 goto err;
2809 }
2810 }
2811 }
2812 }
2813
2814 /* We need an array of the defs in addtab, for passing to
2815 * upb_refcounted_freeze(). */
2816 add_objs_size = upb_strtable_count(&addtab);
2817 if (freeze_also) {
2818 add_objs_size++;
2819 }
2820
2821 add_defs = upb_gmalloc(sizeof(void*) * add_objs_size);
2822 if (add_defs == NULL) goto oom_err;
2823 upb_strtable_begin(&iter, &addtab);
2824 for (add_n = 0; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
2825 add_defs[add_n++] = upb_value_getptr(upb_strtable_iter_value(&iter));
2826 }
2827
2828 /* Validate defs. */
2829 if (!_upb_def_validate(add_defs, add_n, status)) {
2830 goto err;
2831 }
2832
2833 /* Cheat a little and give the array a new type.
2834 * This is probably undefined behavior, but this code will be deleted soon. */
2835 add_objs = (upb_refcounted**)add_defs;
2836
2837 freeze_n = add_n;
2838 if (freeze_also) {
2839 add_objs[freeze_n++] = freeze_also;
2840 }
2841
2842 if (!upb_refcounted_freeze(add_objs, freeze_n, status,
2843 UPB_MAX_MESSAGE_DEPTH * 2)) {
2844 goto err;
2845 }
2846
2847 /* This must be delayed until all errors have been detected, since error
2848 * recovery code uses this table to cleanup defs. */
2849 upb_strtable_uninit(&addtab);
2850
2851 /* TODO(haberman) we don't properly handle errors after this point (like
2852 * OOM in upb_strtable_insert() below). */
2853 for (i = 0; i < add_n; i++) {
2854 upb_def *def = (upb_def*)add_objs[i];
2855 const char *name = upb_def_fullname(def);
2856 bool success;
2857 success = upb_strtable_insert(&s->symtab, name, upb_value_ptr(def));
2858 UPB_ASSERT(success);
2859 }
2860 upb_gfree(add_defs);
2861 return true;
2862
2863oom_err:
2864 upb_status_seterrmsg(status, "out of memory");
2865err: {
2866 /* We need to donate the refs back. */
2867 upb_strtable_begin(&iter, &addtab);
2868 for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
2869 upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
2870 upb_def_donateref(def, s, ref_donor);
2871 }
2872 }
2873 upb_strtable_uninit(&addtab);
2874 upb_gfree(add_defs);
2875 UPB_ASSERT(!upb_ok(status));
2876 return false;
Brian Silverman9c614bc2016-02-15 20:20:02 -05002877}
2878
Austin Schuh40c16522018-10-28 20:27:54 -07002879bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, size_t n,
2880 void *ref_donor, upb_status *status) {
2881 return symtab_add(s, defs, n, ref_donor, NULL, status);
Brian Silverman9c614bc2016-02-15 20:20:02 -05002882}
2883
Austin Schuh40c16522018-10-28 20:27:54 -07002884bool upb_symtab_addfile(upb_symtab *s, upb_filedef *file, upb_status *status) {
2885 size_t n;
2886 size_t i;
2887 upb_def **defs;
2888 bool ret;
2889
2890 n = upb_filedef_defcount(file);
2891 if (n == 0) {
2892 return true;
2893 }
2894 defs = upb_gmalloc(sizeof(*defs) * n);
2895
2896 if (defs == NULL) {
2897 upb_status_seterrmsg(status, "Out of memory");
2898 return false;
2899 }
2900
2901 for (i = 0; i < n; i++) {
2902 defs[i] = upb_filedef_mutabledef(file, i);
2903 }
2904
2905 ret = symtab_add(s, defs, n, NULL, upb_filedef_upcast_mutable(file), status);
2906
2907 upb_gfree(defs);
2908 return ret;
2909}
2910
2911/* Iteration. */
2912
2913static void advance_to_matching(upb_symtab_iter *iter) {
2914 if (iter->type == UPB_DEF_ANY)
2915 return;
2916
2917 while (!upb_strtable_done(&iter->iter) &&
2918 iter->type != upb_symtab_iter_def(iter)->type) {
2919 upb_strtable_next(&iter->iter);
Brian Silverman9c614bc2016-02-15 20:20:02 -05002920 }
2921}
2922
Austin Schuh40c16522018-10-28 20:27:54 -07002923void upb_symtab_begin(upb_symtab_iter *iter, const upb_symtab *s,
2924 upb_deftype_t type) {
2925 upb_strtable_begin(&iter->iter, &s->symtab);
2926 iter->type = type;
2927 advance_to_matching(iter);
Brian Silverman9c614bc2016-02-15 20:20:02 -05002928}
2929
Austin Schuh40c16522018-10-28 20:27:54 -07002930void upb_symtab_next(upb_symtab_iter *iter) {
2931 upb_strtable_next(&iter->iter);
2932 advance_to_matching(iter);
Brian Silverman9c614bc2016-02-15 20:20:02 -05002933}
Austin Schuh40c16522018-10-28 20:27:54 -07002934
2935bool upb_symtab_done(const upb_symtab_iter *iter) {
2936 return upb_strtable_done(&iter->iter);
2937}
2938
2939const upb_def *upb_symtab_iter_def(const upb_symtab_iter *iter) {
2940 return upb_value_getptr(upb_strtable_iter_value(&iter->iter));
2941}
2942/* We encode backwards, to avoid pre-computing lengths (one-pass encode). */
2943
2944
2945#define UPB_PB_VARINT_MAX_LEN 10
2946#define CHK(x) do { if (!(x)) { return false; } } while(0)
2947
2948/* Maps descriptor type -> upb field type. */
2949static const uint8_t upb_desctype_to_fieldtype2[] = {
2950 UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */
2951 UPB_TYPE_DOUBLE, /* DOUBLE */
2952 UPB_TYPE_FLOAT, /* FLOAT */
2953 UPB_TYPE_INT64, /* INT64 */
2954 UPB_TYPE_UINT64, /* UINT64 */
2955 UPB_TYPE_INT32, /* INT32 */
2956 UPB_TYPE_UINT64, /* FIXED64 */
2957 UPB_TYPE_UINT32, /* FIXED32 */
2958 UPB_TYPE_BOOL, /* BOOL */
2959 UPB_TYPE_STRING, /* STRING */
2960 UPB_TYPE_MESSAGE, /* GROUP */
2961 UPB_TYPE_MESSAGE, /* MESSAGE */
2962 UPB_TYPE_BYTES, /* BYTES */
2963 UPB_TYPE_UINT32, /* UINT32 */
2964 UPB_TYPE_ENUM, /* ENUM */
2965 UPB_TYPE_INT32, /* SFIXED32 */
2966 UPB_TYPE_INT64, /* SFIXED64 */
2967 UPB_TYPE_INT32, /* SINT32 */
2968 UPB_TYPE_INT64, /* SINT64 */
2969};
2970
2971static size_t upb_encode_varint(uint64_t val, char *buf) {
2972 size_t i;
2973 if (val < 128) { buf[0] = val; return 1; }
2974 i = 0;
2975 while (val) {
2976 uint8_t byte = val & 0x7fU;
2977 val >>= 7;
2978 if (val) byte |= 0x80U;
2979 buf[i++] = byte;
2980 }
2981 return i;
2982}
2983
2984static uint32_t upb_zzencode_32(int32_t n) { return (n << 1) ^ (n >> 31); }
2985static uint64_t upb_zzencode_64(int64_t n) { return (n << 1) ^ (n >> 63); }
2986
2987typedef struct {
2988 upb_env *env;
2989 char *buf, *ptr, *limit;
2990} upb_encstate;
2991
2992static size_t upb_roundup_pow2(size_t bytes) {
2993 size_t ret = 128;
2994 while (ret < bytes) {
2995 ret *= 2;
2996 }
2997 return ret;
2998}
2999
3000static bool upb_encode_growbuffer(upb_encstate *e, size_t bytes) {
3001 size_t old_size = e->limit - e->buf;
3002 size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr));
3003 char *new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size);
3004 CHK(new_buf);
3005
3006 /* We want previous data at the end, realloc() put it at the beginning. */
3007 memmove(e->limit - old_size, e->buf, old_size);
3008
3009 e->ptr = new_buf + new_size - (e->limit - e->ptr);
3010 e->limit = new_buf + new_size;
3011 e->buf = new_buf;
3012 return true;
3013}
3014
3015/* Call to ensure that at least "bytes" bytes are available for writing at
3016 * e->ptr. Returns false if the bytes could not be allocated. */
3017static bool upb_encode_reserve(upb_encstate *e, size_t bytes) {
3018 CHK(UPB_LIKELY((size_t)(e->ptr - e->buf) >= bytes) ||
3019 upb_encode_growbuffer(e, bytes));
3020
3021 e->ptr -= bytes;
3022 return true;
3023}
3024
3025/* Writes the given bytes to the buffer, handling reserve/advance. */
3026static bool upb_put_bytes(upb_encstate *e, const void *data, size_t len) {
3027 CHK(upb_encode_reserve(e, len));
3028 memcpy(e->ptr, data, len);
3029 return true;
3030}
3031
3032static bool upb_put_fixed64(upb_encstate *e, uint64_t val) {
3033 /* TODO(haberman): byte-swap for big endian. */
3034 return upb_put_bytes(e, &val, sizeof(uint64_t));
3035}
3036
3037static bool upb_put_fixed32(upb_encstate *e, uint32_t val) {
3038 /* TODO(haberman): byte-swap for big endian. */
3039 return upb_put_bytes(e, &val, sizeof(uint32_t));
3040}
3041
3042static bool upb_put_varint(upb_encstate *e, uint64_t val) {
3043 size_t len;
3044 char *start;
3045 CHK(upb_encode_reserve(e, UPB_PB_VARINT_MAX_LEN));
3046 len = upb_encode_varint(val, e->ptr);
3047 start = e->ptr + UPB_PB_VARINT_MAX_LEN - len;
3048 memmove(start, e->ptr, len);
3049 e->ptr = start;
3050 return true;
3051}
3052
3053static bool upb_put_double(upb_encstate *e, double d) {
3054 uint64_t u64;
3055 UPB_ASSERT(sizeof(double) == sizeof(uint64_t));
3056 memcpy(&u64, &d, sizeof(uint64_t));
3057 return upb_put_fixed64(e, u64);
3058}
3059
3060static bool upb_put_float(upb_encstate *e, float d) {
3061 uint32_t u32;
3062 UPB_ASSERT(sizeof(float) == sizeof(uint32_t));
3063 memcpy(&u32, &d, sizeof(uint32_t));
3064 return upb_put_fixed32(e, u32);
3065}
3066
3067static uint32_t upb_readcase(const char *msg, const upb_msglayout_msginit_v1 *m,
3068 int oneof_index) {
3069 uint32_t ret;
3070 memcpy(&ret, msg + m->oneofs[oneof_index].case_offset, sizeof(ret));
3071 return ret;
3072}
3073
3074static bool upb_readhasbit(const char *msg,
3075 const upb_msglayout_fieldinit_v1 *f) {
3076 UPB_ASSERT(f->hasbit != UPB_NO_HASBIT);
3077 return msg[f->hasbit / 8] & (1 << (f->hasbit % 8));
3078}
3079
3080static bool upb_put_tag(upb_encstate *e, int field_number, int wire_type) {
3081 return upb_put_varint(e, (field_number << 3) | wire_type);
3082}
3083
3084static bool upb_put_fixedarray(upb_encstate *e, const upb_array *arr,
3085 size_t size) {
3086 size_t bytes = arr->len * size;
3087 return upb_put_bytes(e, arr->data, bytes) && upb_put_varint(e, bytes);
3088}
3089
3090bool upb_encode_message(upb_encstate *e, const char *msg,
3091 const upb_msglayout_msginit_v1 *m,
3092 size_t *size);
3093
3094static bool upb_encode_array(upb_encstate *e, const char *field_mem,
3095 const upb_msglayout_msginit_v1 *m,
3096 const upb_msglayout_fieldinit_v1 *f) {
3097 const upb_array *arr = *(const upb_array**)field_mem;
3098
3099 if (arr == NULL || arr->len == 0) {
3100 return true;
3101 }
3102
3103 UPB_ASSERT(arr->type == upb_desctype_to_fieldtype2[f->type]);
3104
3105#define VARINT_CASE(ctype, encode) { \
3106 ctype *start = arr->data; \
3107 ctype *ptr = start + arr->len; \
3108 size_t pre_len = e->limit - e->ptr; \
3109 do { \
3110 ptr--; \
3111 CHK(upb_put_varint(e, encode)); \
3112 } while (ptr != start); \
3113 CHK(upb_put_varint(e, e->limit - e->ptr - pre_len)); \
3114} \
3115break; \
3116do { ; } while(0)
3117
3118 switch (f->type) {
3119 case UPB_DESCRIPTOR_TYPE_DOUBLE:
3120 CHK(upb_put_fixedarray(e, arr, sizeof(double)));
3121 break;
3122 case UPB_DESCRIPTOR_TYPE_FLOAT:
3123 CHK(upb_put_fixedarray(e, arr, sizeof(float)));
3124 break;
3125 case UPB_DESCRIPTOR_TYPE_SFIXED64:
3126 case UPB_DESCRIPTOR_TYPE_FIXED64:
3127 CHK(upb_put_fixedarray(e, arr, sizeof(uint64_t)));
3128 break;
3129 case UPB_DESCRIPTOR_TYPE_FIXED32:
3130 case UPB_DESCRIPTOR_TYPE_SFIXED32:
3131 CHK(upb_put_fixedarray(e, arr, sizeof(uint32_t)));
3132 break;
3133 case UPB_DESCRIPTOR_TYPE_INT64:
3134 case UPB_DESCRIPTOR_TYPE_UINT64:
3135 VARINT_CASE(uint64_t, *ptr);
3136 case UPB_DESCRIPTOR_TYPE_UINT32:
3137 case UPB_DESCRIPTOR_TYPE_INT32:
3138 case UPB_DESCRIPTOR_TYPE_ENUM:
3139 VARINT_CASE(uint32_t, *ptr);
3140 case UPB_DESCRIPTOR_TYPE_BOOL:
3141 VARINT_CASE(bool, *ptr);
3142 case UPB_DESCRIPTOR_TYPE_SINT32:
3143 VARINT_CASE(int32_t, upb_zzencode_32(*ptr));
3144 case UPB_DESCRIPTOR_TYPE_SINT64:
3145 VARINT_CASE(int64_t, upb_zzencode_64(*ptr));
3146 case UPB_DESCRIPTOR_TYPE_STRING:
3147 case UPB_DESCRIPTOR_TYPE_BYTES: {
3148 upb_stringview *start = arr->data;
3149 upb_stringview *ptr = start + arr->len;
3150 do {
3151 ptr--;
3152 CHK(upb_put_bytes(e, ptr->data, ptr->size) &&
3153 upb_put_varint(e, ptr->size) &&
3154 upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
3155 } while (ptr != start);
3156 return true;
3157 }
3158 case UPB_DESCRIPTOR_TYPE_GROUP: {
3159 void **start = arr->data;
3160 void **ptr = start + arr->len;
3161 const upb_msglayout_msginit_v1 *subm = m->submsgs[f->submsg_index];
3162 do {
3163 size_t size;
3164 ptr--;
3165 CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) &&
3166 upb_encode_message(e, *ptr, subm, &size) &&
3167 upb_put_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP));
3168 } while (ptr != start);
3169 return true;
3170 }
3171 case UPB_DESCRIPTOR_TYPE_MESSAGE: {
3172 void **start = arr->data;
3173 void **ptr = start + arr->len;
3174 const upb_msglayout_msginit_v1 *subm = m->submsgs[f->submsg_index];
3175 do {
3176 size_t size;
3177 ptr--;
3178 CHK(upb_encode_message(e, *ptr, subm, &size) &&
3179 upb_put_varint(e, size) &&
3180 upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
3181 } while (ptr != start);
3182 return true;
3183 }
3184 }
3185#undef VARINT_CASE
3186
3187 /* We encode all primitive arrays as packed, regardless of what was specified
3188 * in the .proto file. Could special case 1-sized arrays. */
3189 CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
3190 return true;
3191}
3192
3193static bool upb_encode_scalarfield(upb_encstate *e, const char *field_mem,
3194 const upb_msglayout_msginit_v1 *m,
3195 const upb_msglayout_fieldinit_v1 *f,
3196 bool is_proto3) {
3197 bool skip_zero_value = is_proto3 && f->oneof_index == UPB_NOT_IN_ONEOF;
3198
3199#define CASE(ctype, type, wire_type, encodeval) do { \
3200 ctype val = *(ctype*)field_mem; \
3201 if (skip_zero_value && val == 0) { \
3202 return true; \
3203 } \
3204 return upb_put_ ## type(e, encodeval) && \
3205 upb_put_tag(e, f->number, wire_type); \
3206} while(0)
3207
3208 switch (f->type) {
3209 case UPB_DESCRIPTOR_TYPE_DOUBLE:
3210 CASE(double, double, UPB_WIRE_TYPE_64BIT, val);
3211 case UPB_DESCRIPTOR_TYPE_FLOAT:
3212 CASE(float, float, UPB_WIRE_TYPE_32BIT, val);
3213 case UPB_DESCRIPTOR_TYPE_INT64:
3214 case UPB_DESCRIPTOR_TYPE_UINT64:
3215 CASE(uint64_t, varint, UPB_WIRE_TYPE_VARINT, val);
3216 case UPB_DESCRIPTOR_TYPE_UINT32:
3217 case UPB_DESCRIPTOR_TYPE_INT32:
3218 case UPB_DESCRIPTOR_TYPE_ENUM:
3219 CASE(uint32_t, varint, UPB_WIRE_TYPE_VARINT, val);
3220 case UPB_DESCRIPTOR_TYPE_SFIXED64:
3221 case UPB_DESCRIPTOR_TYPE_FIXED64:
3222 CASE(uint64_t, fixed64, UPB_WIRE_TYPE_64BIT, val);
3223 case UPB_DESCRIPTOR_TYPE_FIXED32:
3224 case UPB_DESCRIPTOR_TYPE_SFIXED32:
3225 CASE(uint32_t, fixed32, UPB_WIRE_TYPE_32BIT, val);
3226 case UPB_DESCRIPTOR_TYPE_BOOL:
3227 CASE(bool, varint, UPB_WIRE_TYPE_VARINT, val);
3228 case UPB_DESCRIPTOR_TYPE_SINT32:
3229 CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzencode_32(val));
3230 case UPB_DESCRIPTOR_TYPE_SINT64:
3231 CASE(int64_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzencode_64(val));
3232 case UPB_DESCRIPTOR_TYPE_STRING:
3233 case UPB_DESCRIPTOR_TYPE_BYTES: {
3234 upb_stringview view = *(upb_stringview*)field_mem;
3235 if (skip_zero_value && view.size == 0) {
3236 return true;
3237 }
3238 return upb_put_bytes(e, view.data, view.size) &&
3239 upb_put_varint(e, view.size) &&
3240 upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
3241 }
3242 case UPB_DESCRIPTOR_TYPE_GROUP: {
3243 size_t size;
3244 void *submsg = *(void**)field_mem;
3245 const upb_msglayout_msginit_v1 *subm = m->submsgs[f->submsg_index];
3246 if (skip_zero_value && submsg == NULL) {
3247 return true;
3248 }
3249 return upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) &&
3250 upb_encode_message(e, submsg, subm, &size) &&
3251 upb_put_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP);
3252 }
3253 case UPB_DESCRIPTOR_TYPE_MESSAGE: {
3254 size_t size;
3255 void *submsg = *(void**)field_mem;
3256 const upb_msglayout_msginit_v1 *subm = m->submsgs[f->submsg_index];
3257 if (skip_zero_value && submsg == NULL) {
3258 return true;
3259 }
3260 return upb_encode_message(e, submsg, subm, &size) &&
3261 upb_put_varint(e, size) &&
3262 upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
3263 }
3264 }
3265#undef CASE
3266 UPB_UNREACHABLE();
3267}
3268
3269bool upb_encode_hasscalarfield(const char *msg,
3270 const upb_msglayout_msginit_v1 *m,
3271 const upb_msglayout_fieldinit_v1 *f) {
3272 if (f->oneof_index != UPB_NOT_IN_ONEOF) {
3273 return upb_readcase(msg, m, f->oneof_index) == f->number;
3274 } else if (m->is_proto2) {
3275 return upb_readhasbit(msg, f);
3276 } else {
3277 /* For proto3, we'll test for the field being empty later. */
3278 return true;
3279 }
3280}
3281
3282bool upb_encode_message(upb_encstate* e, const char *msg,
3283 const upb_msglayout_msginit_v1 *m,
3284 size_t *size) {
3285 int i;
3286 char *buf_end = e->ptr;
3287
3288 if (msg == NULL) {
3289 return true;
3290 }
3291
3292 for (i = m->field_count - 1; i >= 0; i--) {
3293 const upb_msglayout_fieldinit_v1 *f = &m->fields[i];
3294
3295 if (f->label == UPB_LABEL_REPEATED) {
3296 CHK(upb_encode_array(e, msg + f->offset, m, f));
3297 } else {
3298 if (upb_encode_hasscalarfield(msg, m, f)) {
3299 CHK(upb_encode_scalarfield(e, msg + f->offset, m, f, !m->is_proto2));
3300 }
3301 }
3302 }
3303
3304 *size = buf_end - e->ptr;
3305 return true;
3306}
3307
3308char *upb_encode(const void *msg, const upb_msglayout_msginit_v1 *m,
3309 upb_env *env, size_t *size) {
3310 upb_encstate e;
3311 e.env = env;
3312 e.buf = NULL;
3313 e.limit = NULL;
3314 e.ptr = NULL;
3315
3316 if (!upb_encode_message(&e, msg, m, size)) {
3317 *size = 0;
3318 return NULL;
3319 }
3320
3321 *size = e.limit - e.ptr;
3322
3323 if (*size == 0) {
3324 static char ch;
3325 return &ch;
3326 } else {
3327 UPB_ASSERT(e.ptr);
3328 return e.ptr;
3329 }
3330}
3331
3332#undef CHK
Brian Silverman9c614bc2016-02-15 20:20:02 -05003333/*
3334** TODO(haberman): it's unclear whether a lot of the consistency checks should
Austin Schuh40c16522018-10-28 20:27:54 -07003335** UPB_ASSERT() or return false.
Brian Silverman9c614bc2016-02-15 20:20:02 -05003336*/
3337
3338
Brian Silverman9c614bc2016-02-15 20:20:02 -05003339#include <string.h>
3340
3341
Austin Schuh40c16522018-10-28 20:27:54 -07003342static void *upb_calloc(size_t size) {
3343 void *mem = upb_gmalloc(size);
3344 if (mem) {
3345 memset(mem, 0, size);
3346 }
3347 return mem;
3348}
Brian Silverman9c614bc2016-02-15 20:20:02 -05003349
3350/* Defined for the sole purpose of having a unique pointer value for
3351 * UPB_NO_CLOSURE. */
3352char _upb_noclosure;
3353
3354static void freehandlers(upb_refcounted *r) {
3355 upb_handlers *h = (upb_handlers*)r;
3356
3357 upb_inttable_iter i;
3358 upb_inttable_begin(&i, &h->cleanup_);
3359 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
3360 void *val = (void*)upb_inttable_iter_key(&i);
3361 upb_value func_val = upb_inttable_iter_value(&i);
3362 upb_handlerfree *func = upb_value_getfptr(func_val);
3363 func(val);
3364 }
3365
3366 upb_inttable_uninit(&h->cleanup_);
3367 upb_msgdef_unref(h->msg, h);
Austin Schuh40c16522018-10-28 20:27:54 -07003368 upb_gfree(h->sub);
3369 upb_gfree(h);
Brian Silverman9c614bc2016-02-15 20:20:02 -05003370}
3371
3372static void visithandlers(const upb_refcounted *r, upb_refcounted_visit *visit,
3373 void *closure) {
3374 const upb_handlers *h = (const upb_handlers*)r;
3375 upb_msg_field_iter i;
3376 for(upb_msg_field_begin(&i, h->msg);
3377 !upb_msg_field_done(&i);
3378 upb_msg_field_next(&i)) {
3379 upb_fielddef *f = upb_msg_iter_field(&i);
3380 const upb_handlers *sub;
3381 if (!upb_fielddef_issubmsg(f)) continue;
3382 sub = upb_handlers_getsubhandlers(h, f);
3383 if (sub) visit(r, upb_handlers_upcast(sub), closure);
3384 }
3385}
3386
3387static const struct upb_refcounted_vtbl vtbl = {visithandlers, freehandlers};
3388
3389typedef struct {
3390 upb_inttable tab; /* maps upb_msgdef* -> upb_handlers*. */
3391 upb_handlers_callback *callback;
3392 const void *closure;
3393} dfs_state;
3394
3395/* TODO(haberman): discard upb_handlers* objects that do not actually have any
3396 * handlers set and cannot reach any upb_handlers* object that does. This is
3397 * slightly tricky to do correctly. */
3398static upb_handlers *newformsg(const upb_msgdef *m, const void *owner,
3399 dfs_state *s) {
3400 upb_msg_field_iter i;
3401 upb_handlers *h = upb_handlers_new(m, owner);
3402 if (!h) return NULL;
3403 if (!upb_inttable_insertptr(&s->tab, m, upb_value_ptr(h))) goto oom;
3404
3405 s->callback(s->closure, h);
3406
3407 /* For each submessage field, get or create a handlers object and set it as
3408 * the subhandlers. */
3409 for(upb_msg_field_begin(&i, m);
3410 !upb_msg_field_done(&i);
3411 upb_msg_field_next(&i)) {
3412 upb_fielddef *f = upb_msg_iter_field(&i);
3413 const upb_msgdef *subdef;
3414 upb_value subm_ent;
3415
3416 if (!upb_fielddef_issubmsg(f)) continue;
3417
3418 subdef = upb_downcast_msgdef(upb_fielddef_subdef(f));
3419 if (upb_inttable_lookupptr(&s->tab, subdef, &subm_ent)) {
3420 upb_handlers_setsubhandlers(h, f, upb_value_getptr(subm_ent));
3421 } else {
3422 upb_handlers *sub_mh = newformsg(subdef, &sub_mh, s);
3423 if (!sub_mh) goto oom;
3424 upb_handlers_setsubhandlers(h, f, sub_mh);
3425 upb_handlers_unref(sub_mh, &sub_mh);
3426 }
3427 }
3428 return h;
3429
3430oom:
3431 upb_handlers_unref(h, owner);
3432 return NULL;
3433}
3434
3435/* Given a selector for a STARTSUBMSG handler, resolves to a pointer to the
3436 * subhandlers for this submessage field. */
3437#define SUBH(h, selector) (h->sub[selector])
3438
3439/* The selector for a submessage field is the field index. */
3440#define SUBH_F(h, f) SUBH(h, f->index_)
3441
3442static int32_t trygetsel(upb_handlers *h, const upb_fielddef *f,
3443 upb_handlertype_t type) {
3444 upb_selector_t sel;
Austin Schuh40c16522018-10-28 20:27:54 -07003445 UPB_ASSERT(!upb_handlers_isfrozen(h));
Brian Silverman9c614bc2016-02-15 20:20:02 -05003446 if (upb_handlers_msgdef(h) != upb_fielddef_containingtype(f)) {
3447 upb_status_seterrf(
3448 &h->status_, "type mismatch: field %s does not belong to message %s",
3449 upb_fielddef_name(f), upb_msgdef_fullname(upb_handlers_msgdef(h)));
3450 return -1;
3451 }
3452 if (!upb_handlers_getselector(f, type, &sel)) {
3453 upb_status_seterrf(
3454 &h->status_,
3455 "type mismatch: cannot register handler type %d for field %s",
3456 type, upb_fielddef_name(f));
3457 return -1;
3458 }
3459 return sel;
3460}
3461
3462static upb_selector_t handlers_getsel(upb_handlers *h, const upb_fielddef *f,
3463 upb_handlertype_t type) {
3464 int32_t sel = trygetsel(h, f, type);
Austin Schuh40c16522018-10-28 20:27:54 -07003465 UPB_ASSERT(sel >= 0);
Brian Silverman9c614bc2016-02-15 20:20:02 -05003466 return sel;
3467}
3468
3469static const void **returntype(upb_handlers *h, const upb_fielddef *f,
3470 upb_handlertype_t type) {
3471 return &h->table[handlers_getsel(h, f, type)].attr.return_closure_type_;
3472}
3473
3474static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f,
3475 upb_handlertype_t type, upb_func *func,
3476 upb_handlerattr *attr) {
3477 upb_handlerattr set_attr = UPB_HANDLERATTR_INITIALIZER;
3478 const void *closure_type;
3479 const void **context_closure_type;
3480
Austin Schuh40c16522018-10-28 20:27:54 -07003481 UPB_ASSERT(!upb_handlers_isfrozen(h));
Brian Silverman9c614bc2016-02-15 20:20:02 -05003482
3483 if (sel < 0) {
3484 upb_status_seterrmsg(&h->status_,
3485 "incorrect handler type for this field.");
3486 return false;
3487 }
3488
3489 if (h->table[sel].func) {
3490 upb_status_seterrmsg(&h->status_,
3491 "cannot change handler once it has been set.");
3492 return false;
3493 }
3494
3495 if (attr) {
3496 set_attr = *attr;
3497 }
3498
3499 /* Check that the given closure type matches the closure type that has been
3500 * established for this context (if any). */
3501 closure_type = upb_handlerattr_closuretype(&set_attr);
3502
3503 if (type == UPB_HANDLER_STRING) {
3504 context_closure_type = returntype(h, f, UPB_HANDLER_STARTSTR);
3505 } else if (f && upb_fielddef_isseq(f) &&
3506 type != UPB_HANDLER_STARTSEQ &&
3507 type != UPB_HANDLER_ENDSEQ) {
3508 context_closure_type = returntype(h, f, UPB_HANDLER_STARTSEQ);
3509 } else {
3510 context_closure_type = &h->top_closure_type;
3511 }
3512
3513 if (closure_type && *context_closure_type &&
3514 closure_type != *context_closure_type) {
3515 /* TODO(haberman): better message for debugging. */
3516 if (f) {
3517 upb_status_seterrf(&h->status_,
3518 "closure type does not match for field %s",
3519 upb_fielddef_name(f));
3520 } else {
3521 upb_status_seterrmsg(
3522 &h->status_, "closure type does not match for message-level handler");
3523 }
3524 return false;
3525 }
3526
3527 if (closure_type)
3528 *context_closure_type = closure_type;
3529
3530 /* If this is a STARTSEQ or STARTSTR handler, check that the returned pointer
3531 * matches any pre-existing expectations about what type is expected. */
3532 if (type == UPB_HANDLER_STARTSEQ || type == UPB_HANDLER_STARTSTR) {
3533 const void *return_type = upb_handlerattr_returnclosuretype(&set_attr);
3534 const void *table_return_type =
3535 upb_handlerattr_returnclosuretype(&h->table[sel].attr);
3536 if (return_type && table_return_type && return_type != table_return_type) {
3537 upb_status_seterrmsg(&h->status_, "closure return type does not match");
3538 return false;
3539 }
3540
3541 if (table_return_type && !return_type)
3542 upb_handlerattr_setreturnclosuretype(&set_attr, table_return_type);
3543 }
3544
3545 h->table[sel].func = (upb_func*)func;
3546 h->table[sel].attr = set_attr;
3547 return true;
3548}
3549
3550/* Returns the effective closure type for this handler (which will propagate
3551 * from outer frames if this frame has no START* handler). Not implemented for
3552 * UPB_HANDLER_STRING at the moment since this is not needed. Returns NULL is
3553 * the effective closure type is unspecified (either no handler was registered
3554 * to specify it or the handler that was registered did not specify the closure
3555 * type). */
3556const void *effective_closure_type(upb_handlers *h, const upb_fielddef *f,
3557 upb_handlertype_t type) {
3558 const void *ret;
3559 upb_selector_t sel;
3560
Austin Schuh40c16522018-10-28 20:27:54 -07003561 UPB_ASSERT(type != UPB_HANDLER_STRING);
Brian Silverman9c614bc2016-02-15 20:20:02 -05003562 ret = h->top_closure_type;
3563
3564 if (upb_fielddef_isseq(f) &&
3565 type != UPB_HANDLER_STARTSEQ &&
3566 type != UPB_HANDLER_ENDSEQ &&
3567 h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)].func) {
3568 ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
3569 }
3570
3571 if (type == UPB_HANDLER_STRING &&
3572 h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSTR)].func) {
3573 ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
3574 }
3575
3576 /* The effective type of the submessage; not used yet.
3577 * if (type == SUBMESSAGE &&
3578 * h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)].func) {
3579 * ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
3580 * } */
3581
3582 return ret;
3583}
3584
3585/* Checks whether the START* handler specified by f & type is missing even
3586 * though it is required to convert the established type of an outer frame
3587 * ("closure_type") into the established type of an inner frame (represented in
3588 * the return closure type of this handler's attr. */
3589bool checkstart(upb_handlers *h, const upb_fielddef *f, upb_handlertype_t type,
3590 upb_status *status) {
3591 const void *closure_type;
3592 const upb_handlerattr *attr;
3593 const void *return_closure_type;
3594
3595 upb_selector_t sel = handlers_getsel(h, f, type);
3596 if (h->table[sel].func) return true;
3597 closure_type = effective_closure_type(h, f, type);
3598 attr = &h->table[sel].attr;
3599 return_closure_type = upb_handlerattr_returnclosuretype(attr);
3600 if (closure_type && return_closure_type &&
3601 closure_type != return_closure_type) {
3602 upb_status_seterrf(status,
3603 "expected start handler to return sub type for field %f",
3604 upb_fielddef_name(f));
3605 return false;
3606 }
3607 return true;
3608}
3609
3610/* Public interface ***********************************************************/
3611
3612upb_handlers *upb_handlers_new(const upb_msgdef *md, const void *owner) {
3613 int extra;
3614 upb_handlers *h;
3615
Austin Schuh40c16522018-10-28 20:27:54 -07003616 UPB_ASSERT(upb_msgdef_isfrozen(md));
Brian Silverman9c614bc2016-02-15 20:20:02 -05003617
3618 extra = sizeof(upb_handlers_tabent) * (md->selector_count - 1);
Austin Schuh40c16522018-10-28 20:27:54 -07003619 h = upb_calloc(sizeof(*h) + extra);
Brian Silverman9c614bc2016-02-15 20:20:02 -05003620 if (!h) return NULL;
3621
3622 h->msg = md;
3623 upb_msgdef_ref(h->msg, h);
3624 upb_status_clear(&h->status_);
Austin Schuh40c16522018-10-28 20:27:54 -07003625
3626 if (md->submsg_field_count > 0) {
3627 h->sub = upb_calloc(md->submsg_field_count * sizeof(*h->sub));
3628 if (!h->sub) goto oom;
3629 } else {
3630 h->sub = 0;
3631 }
3632
Brian Silverman9c614bc2016-02-15 20:20:02 -05003633 if (!upb_refcounted_init(upb_handlers_upcast_mutable(h), &vtbl, owner))
3634 goto oom;
3635 if (!upb_inttable_init(&h->cleanup_, UPB_CTYPE_FPTR)) goto oom;
3636
3637 /* calloc() above initialized all handlers to NULL. */
3638 return h;
3639
3640oom:
3641 freehandlers(upb_handlers_upcast_mutable(h));
3642 return NULL;
3643}
3644
3645const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m,
3646 const void *owner,
3647 upb_handlers_callback *callback,
3648 const void *closure) {
3649 dfs_state state;
3650 upb_handlers *ret;
3651 bool ok;
3652 upb_refcounted *r;
3653
3654 state.callback = callback;
3655 state.closure = closure;
3656 if (!upb_inttable_init(&state.tab, UPB_CTYPE_PTR)) return NULL;
3657
3658 ret = newformsg(m, owner, &state);
3659
3660 upb_inttable_uninit(&state.tab);
3661 if (!ret) return NULL;
3662
3663 r = upb_handlers_upcast_mutable(ret);
3664 ok = upb_refcounted_freeze(&r, 1, NULL, UPB_MAX_HANDLER_DEPTH);
Austin Schuh40c16522018-10-28 20:27:54 -07003665 UPB_ASSERT(ok);
Brian Silverman9c614bc2016-02-15 20:20:02 -05003666
3667 return ret;
3668}
3669
3670const upb_status *upb_handlers_status(upb_handlers *h) {
Austin Schuh40c16522018-10-28 20:27:54 -07003671 UPB_ASSERT(!upb_handlers_isfrozen(h));
Brian Silverman9c614bc2016-02-15 20:20:02 -05003672 return &h->status_;
3673}
3674
3675void upb_handlers_clearerr(upb_handlers *h) {
Austin Schuh40c16522018-10-28 20:27:54 -07003676 UPB_ASSERT(!upb_handlers_isfrozen(h));
Brian Silverman9c614bc2016-02-15 20:20:02 -05003677 upb_status_clear(&h->status_);
3678}
3679
3680#define SETTER(name, handlerctype, handlertype) \
3681 bool upb_handlers_set ## name(upb_handlers *h, const upb_fielddef *f, \
3682 handlerctype func, upb_handlerattr *attr) { \
3683 int32_t sel = trygetsel(h, f, handlertype); \
3684 return doset(h, sel, f, handlertype, (upb_func*)func, attr); \
3685 }
3686
3687SETTER(int32, upb_int32_handlerfunc*, UPB_HANDLER_INT32)
3688SETTER(int64, upb_int64_handlerfunc*, UPB_HANDLER_INT64)
3689SETTER(uint32, upb_uint32_handlerfunc*, UPB_HANDLER_UINT32)
3690SETTER(uint64, upb_uint64_handlerfunc*, UPB_HANDLER_UINT64)
3691SETTER(float, upb_float_handlerfunc*, UPB_HANDLER_FLOAT)
3692SETTER(double, upb_double_handlerfunc*, UPB_HANDLER_DOUBLE)
3693SETTER(bool, upb_bool_handlerfunc*, UPB_HANDLER_BOOL)
3694SETTER(startstr, upb_startstr_handlerfunc*, UPB_HANDLER_STARTSTR)
3695SETTER(string, upb_string_handlerfunc*, UPB_HANDLER_STRING)
3696SETTER(endstr, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSTR)
3697SETTER(startseq, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSEQ)
3698SETTER(startsubmsg, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSUBMSG)
3699SETTER(endsubmsg, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSUBMSG)
3700SETTER(endseq, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSEQ)
3701
3702#undef SETTER
3703
Austin Schuh40c16522018-10-28 20:27:54 -07003704bool upb_handlers_setunknown(upb_handlers *h, upb_unknown_handlerfunc *func,
3705 upb_handlerattr *attr) {
3706 return doset(h, UPB_UNKNOWN_SELECTOR, NULL, UPB_HANDLER_INT32,
3707 (upb_func *)func, attr);
3708}
3709
Brian Silverman9c614bc2016-02-15 20:20:02 -05003710bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func,
3711 upb_handlerattr *attr) {
3712 return doset(h, UPB_STARTMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
3713 (upb_func *)func, attr);
3714}
3715
3716bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func,
3717 upb_handlerattr *attr) {
Austin Schuh40c16522018-10-28 20:27:54 -07003718 UPB_ASSERT(!upb_handlers_isfrozen(h));
Brian Silverman9c614bc2016-02-15 20:20:02 -05003719 return doset(h, UPB_ENDMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
3720 (upb_func *)func, attr);
3721}
3722
3723bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f,
3724 const upb_handlers *sub) {
Austin Schuh40c16522018-10-28 20:27:54 -07003725 UPB_ASSERT(sub);
3726 UPB_ASSERT(!upb_handlers_isfrozen(h));
3727 UPB_ASSERT(upb_fielddef_issubmsg(f));
Brian Silverman9c614bc2016-02-15 20:20:02 -05003728 if (SUBH_F(h, f)) return false; /* Can't reset. */
3729 if (upb_msgdef_upcast(upb_handlers_msgdef(sub)) != upb_fielddef_subdef(f)) {
3730 return false;
3731 }
3732 SUBH_F(h, f) = sub;
3733 upb_ref2(sub, h);
3734 return true;
3735}
3736
3737const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h,
3738 const upb_fielddef *f) {
Austin Schuh40c16522018-10-28 20:27:54 -07003739 UPB_ASSERT(upb_fielddef_issubmsg(f));
Brian Silverman9c614bc2016-02-15 20:20:02 -05003740 return SUBH_F(h, f);
3741}
3742
3743bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t sel,
3744 upb_handlerattr *attr) {
3745 if (!upb_handlers_gethandler(h, sel))
3746 return false;
3747 *attr = h->table[sel].attr;
3748 return true;
3749}
3750
3751const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h,
3752 upb_selector_t sel) {
3753 /* STARTSUBMSG selector in sel is the field's selector base. */
3754 return SUBH(h, sel - UPB_STATIC_SELECTOR_COUNT);
3755}
3756
3757const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; }
3758
3759bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *func) {
3760 bool ok;
3761 if (upb_inttable_lookupptr(&h->cleanup_, p, NULL)) {
3762 return false;
3763 }
3764 ok = upb_inttable_insertptr(&h->cleanup_, p, upb_value_fptr(func));
Austin Schuh40c16522018-10-28 20:27:54 -07003765 UPB_ASSERT(ok);
Brian Silverman9c614bc2016-02-15 20:20:02 -05003766 return true;
3767}
3768
3769
3770/* "Static" methods ***********************************************************/
3771
3772bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s) {
3773 /* TODO: verify we have a transitive closure. */
3774 int i;
3775 for (i = 0; i < n; i++) {
3776 upb_msg_field_iter j;
3777 upb_handlers *h = handlers[i];
3778
3779 if (!upb_ok(&h->status_)) {
3780 upb_status_seterrf(s, "handlers for message %s had error status: %s",
3781 upb_msgdef_fullname(upb_handlers_msgdef(h)),
3782 upb_status_errmsg(&h->status_));
3783 return false;
3784 }
3785
3786 /* Check that there are no closure mismatches due to missing Start* handlers
3787 * or subhandlers with different type-level types. */
3788 for(upb_msg_field_begin(&j, h->msg);
3789 !upb_msg_field_done(&j);
3790 upb_msg_field_next(&j)) {
3791
3792 const upb_fielddef *f = upb_msg_iter_field(&j);
3793 if (upb_fielddef_isseq(f)) {
3794 if (!checkstart(h, f, UPB_HANDLER_STARTSEQ, s))
3795 return false;
3796 }
3797
3798 if (upb_fielddef_isstring(f)) {
3799 if (!checkstart(h, f, UPB_HANDLER_STARTSTR, s))
3800 return false;
3801 }
3802
3803 if (upb_fielddef_issubmsg(f)) {
3804 bool hashandler = false;
3805 if (upb_handlers_gethandler(
3806 h, handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)) ||
3807 upb_handlers_gethandler(
3808 h, handlers_getsel(h, f, UPB_HANDLER_ENDSUBMSG))) {
3809 hashandler = true;
3810 }
3811
3812 if (upb_fielddef_isseq(f) &&
3813 (upb_handlers_gethandler(
3814 h, handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)) ||
3815 upb_handlers_gethandler(
3816 h, handlers_getsel(h, f, UPB_HANDLER_ENDSEQ)))) {
3817 hashandler = true;
3818 }
3819
3820 if (hashandler && !upb_handlers_getsubhandlers(h, f)) {
3821 /* For now we add an empty subhandlers in this case. It makes the
3822 * decoder code generator simpler, because it only has to handle two
3823 * cases (submessage has handlers or not) as opposed to three
3824 * (submessage has handlers in enclosing message but no subhandlers).
3825 *
3826 * This makes parsing less efficient in the case that we want to
3827 * notice a submessage but skip its contents (like if we're testing
3828 * for submessage presence or counting the number of repeated
3829 * submessages). In this case we will end up parsing the submessage
3830 * field by field and throwing away the results for each, instead of
3831 * skipping the whole delimited thing at once. If this is an issue we
3832 * can revisit it, but do remember that this only arises when you have
3833 * handlers (startseq/startsubmsg/endsubmsg/endseq) set for the
3834 * submessage but no subhandlers. The uses cases for this are
3835 * limited. */
3836 upb_handlers *sub = upb_handlers_new(upb_fielddef_msgsubdef(f), &sub);
3837 upb_handlers_setsubhandlers(h, f, sub);
3838 upb_handlers_unref(sub, &sub);
3839 }
3840
3841 /* TODO(haberman): check type of submessage.
3842 * This is slightly tricky; also consider whether we should check that
3843 * they match at setsubhandlers time. */
3844 }
3845 }
3846 }
3847
3848 if (!upb_refcounted_freeze((upb_refcounted*const*)handlers, n, s,
3849 UPB_MAX_HANDLER_DEPTH)) {
3850 return false;
3851 }
3852
3853 return true;
3854}
3855
3856upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) {
3857 switch (upb_fielddef_type(f)) {
3858 case UPB_TYPE_INT32:
3859 case UPB_TYPE_ENUM: return UPB_HANDLER_INT32;
3860 case UPB_TYPE_INT64: return UPB_HANDLER_INT64;
3861 case UPB_TYPE_UINT32: return UPB_HANDLER_UINT32;
3862 case UPB_TYPE_UINT64: return UPB_HANDLER_UINT64;
3863 case UPB_TYPE_FLOAT: return UPB_HANDLER_FLOAT;
3864 case UPB_TYPE_DOUBLE: return UPB_HANDLER_DOUBLE;
3865 case UPB_TYPE_BOOL: return UPB_HANDLER_BOOL;
Austin Schuh40c16522018-10-28 20:27:54 -07003866 default: UPB_ASSERT(false); return -1; /* Invalid input. */
Brian Silverman9c614bc2016-02-15 20:20:02 -05003867 }
3868}
3869
3870bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type,
3871 upb_selector_t *s) {
3872 switch (type) {
3873 case UPB_HANDLER_INT32:
3874 case UPB_HANDLER_INT64:
3875 case UPB_HANDLER_UINT32:
3876 case UPB_HANDLER_UINT64:
3877 case UPB_HANDLER_FLOAT:
3878 case UPB_HANDLER_DOUBLE:
3879 case UPB_HANDLER_BOOL:
3880 if (!upb_fielddef_isprimitive(f) ||
3881 upb_handlers_getprimitivehandlertype(f) != type)
3882 return false;
3883 *s = f->selector_base;
3884 break;
3885 case UPB_HANDLER_STRING:
3886 if (upb_fielddef_isstring(f)) {
3887 *s = f->selector_base;
3888 } else if (upb_fielddef_lazy(f)) {
3889 *s = f->selector_base + 3;
3890 } else {
3891 return false;
3892 }
3893 break;
3894 case UPB_HANDLER_STARTSTR:
3895 if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
3896 *s = f->selector_base + 1;
3897 } else {
3898 return false;
3899 }
3900 break;
3901 case UPB_HANDLER_ENDSTR:
3902 if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
3903 *s = f->selector_base + 2;
3904 } else {
3905 return false;
3906 }
3907 break;
3908 case UPB_HANDLER_STARTSEQ:
3909 if (!upb_fielddef_isseq(f)) return false;
3910 *s = f->selector_base - 2;
3911 break;
3912 case UPB_HANDLER_ENDSEQ:
3913 if (!upb_fielddef_isseq(f)) return false;
3914 *s = f->selector_base - 1;
3915 break;
3916 case UPB_HANDLER_STARTSUBMSG:
3917 if (!upb_fielddef_issubmsg(f)) return false;
3918 /* Selectors for STARTSUBMSG are at the beginning of the table so that the
3919 * selector can also be used as an index into the "sub" array of
3920 * subhandlers. The indexes for the two into these two tables are the
3921 * same, except that in the handler table the static selectors come first. */
3922 *s = f->index_ + UPB_STATIC_SELECTOR_COUNT;
3923 break;
3924 case UPB_HANDLER_ENDSUBMSG:
3925 if (!upb_fielddef_issubmsg(f)) return false;
3926 *s = f->selector_base;
3927 break;
3928 }
Austin Schuh40c16522018-10-28 20:27:54 -07003929 UPB_ASSERT((size_t)*s < upb_fielddef_containingtype(f)->selector_count);
Brian Silverman9c614bc2016-02-15 20:20:02 -05003930 return true;
3931}
3932
3933uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) {
3934 return upb_fielddef_isseq(f) ? 2 : 0;
3935}
3936
3937uint32_t upb_handlers_selectorcount(const upb_fielddef *f) {
3938 uint32_t ret = 1;
3939 if (upb_fielddef_isseq(f)) ret += 2; /* STARTSEQ/ENDSEQ */
3940 if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */
3941 if (upb_fielddef_issubmsg(f)) {
3942 /* ENDSUBMSG (STARTSUBMSG is at table beginning) */
3943 ret += 0;
3944 if (upb_fielddef_lazy(f)) {
3945 /* STARTSTR/ENDSTR/STRING (for lazy) */
3946 ret += 3;
3947 }
3948 }
3949 return ret;
3950}
3951
3952
3953/* upb_handlerattr ************************************************************/
3954
3955void upb_handlerattr_init(upb_handlerattr *attr) {
3956 upb_handlerattr from = UPB_HANDLERATTR_INITIALIZER;
3957 memcpy(attr, &from, sizeof(*attr));
3958}
3959
3960void upb_handlerattr_uninit(upb_handlerattr *attr) {
3961 UPB_UNUSED(attr);
3962}
3963
3964bool upb_handlerattr_sethandlerdata(upb_handlerattr *attr, const void *hd) {
3965 attr->handler_data_ = hd;
3966 return true;
3967}
3968
3969bool upb_handlerattr_setclosuretype(upb_handlerattr *attr, const void *type) {
3970 attr->closure_type_ = type;
3971 return true;
3972}
3973
3974const void *upb_handlerattr_closuretype(const upb_handlerattr *attr) {
3975 return attr->closure_type_;
3976}
3977
3978bool upb_handlerattr_setreturnclosuretype(upb_handlerattr *attr,
3979 const void *type) {
3980 attr->return_closure_type_ = type;
3981 return true;
3982}
3983
3984const void *upb_handlerattr_returnclosuretype(const upb_handlerattr *attr) {
3985 return attr->return_closure_type_;
3986}
3987
3988bool upb_handlerattr_setalwaysok(upb_handlerattr *attr, bool alwaysok) {
3989 attr->alwaysok_ = alwaysok;
3990 return true;
3991}
3992
3993bool upb_handlerattr_alwaysok(const upb_handlerattr *attr) {
3994 return attr->alwaysok_;
3995}
3996
3997/* upb_bufhandle **************************************************************/
3998
3999size_t upb_bufhandle_objofs(const upb_bufhandle *h) {
4000 return h->objofs_;
4001}
4002
4003/* upb_byteshandler ***********************************************************/
4004
4005void upb_byteshandler_init(upb_byteshandler* h) {
4006 memset(h, 0, sizeof(*h));
4007}
4008
4009/* For when we support handlerfree callbacks. */
4010void upb_byteshandler_uninit(upb_byteshandler* h) {
4011 UPB_UNUSED(h);
4012}
4013
4014bool upb_byteshandler_setstartstr(upb_byteshandler *h,
4015 upb_startstr_handlerfunc *func, void *d) {
4016 h->table[UPB_STARTSTR_SELECTOR].func = (upb_func*)func;
4017 h->table[UPB_STARTSTR_SELECTOR].attr.handler_data_ = d;
4018 return true;
4019}
4020
4021bool upb_byteshandler_setstring(upb_byteshandler *h,
4022 upb_string_handlerfunc *func, void *d) {
4023 h->table[UPB_STRING_SELECTOR].func = (upb_func*)func;
4024 h->table[UPB_STRING_SELECTOR].attr.handler_data_ = d;
4025 return true;
4026}
4027
4028bool upb_byteshandler_setendstr(upb_byteshandler *h,
4029 upb_endfield_handlerfunc *func, void *d) {
4030 h->table[UPB_ENDSTR_SELECTOR].func = (upb_func*)func;
4031 h->table[UPB_ENDSTR_SELECTOR].attr.handler_data_ = d;
4032 return true;
4033}
Austin Schuh40c16522018-10-28 20:27:54 -07004034
4035
4036static bool is_power_of_two(size_t val) {
4037 return (val & (val - 1)) == 0;
4038}
4039
4040/* Align up to the given power of 2. */
4041static size_t align_up(size_t val, size_t align) {
4042 UPB_ASSERT(is_power_of_two(align));
4043 return (val + align - 1) & ~(align - 1);
4044}
4045
4046static size_t div_round_up(size_t n, size_t d) {
4047 return (n + d - 1) / d;
4048}
4049
4050bool upb_fieldtype_mapkeyok(upb_fieldtype_t type) {
4051 return type == UPB_TYPE_BOOL || type == UPB_TYPE_INT32 ||
4052 type == UPB_TYPE_UINT32 || type == UPB_TYPE_INT64 ||
4053 type == UPB_TYPE_UINT64 || type == UPB_TYPE_STRING;
4054}
4055
4056void *upb_array_pack(const upb_array *arr, void *p, size_t *ofs, size_t size);
4057void *upb_map_pack(const upb_map *map, void *p, size_t *ofs, size_t size);
4058
4059#define PTR_AT(msg, ofs, type) (type*)((char*)msg + ofs)
4060#define VOIDPTR_AT(msg, ofs) PTR_AT(msg, ofs, void)
4061#define ENCODE_MAX_NESTING 64
4062#define CHECK_TRUE(x) if (!(x)) { return false; }
4063
4064/** upb_msgval ****************************************************************/
4065
4066#define upb_alignof(t) offsetof(struct { char c; t x; }, x)
4067
4068/* These functions will generate real memcpy() calls on ARM sadly, because
4069 * the compiler assumes they might not be aligned. */
4070
4071static upb_msgval upb_msgval_read(const void *p, size_t ofs,
4072 uint8_t size) {
4073 upb_msgval val;
4074 p = (char*)p + ofs;
4075 memcpy(&val, p, size);
4076 return val;
4077}
4078
4079static void upb_msgval_write(void *p, size_t ofs, upb_msgval val,
4080 uint8_t size) {
4081 p = (char*)p + ofs;
4082 memcpy(p, &val, size);
4083}
4084
4085static size_t upb_msgval_sizeof(upb_fieldtype_t type) {
4086 switch (type) {
4087 case UPB_TYPE_DOUBLE:
4088 case UPB_TYPE_INT64:
4089 case UPB_TYPE_UINT64:
4090 return 8;
4091 case UPB_TYPE_ENUM:
4092 case UPB_TYPE_INT32:
4093 case UPB_TYPE_UINT32:
4094 case UPB_TYPE_FLOAT:
4095 return 4;
4096 case UPB_TYPE_BOOL:
4097 return 1;
4098 case UPB_TYPE_BYTES:
4099 case UPB_TYPE_MESSAGE:
4100 return sizeof(void*);
4101 case UPB_TYPE_STRING:
4102 return sizeof(upb_stringview);
4103 }
4104 UPB_UNREACHABLE();
4105}
4106
4107static uint8_t upb_msg_fieldsize(const upb_msglayout_fieldinit_v1 *field) {
4108 if (field->label == UPB_LABEL_REPEATED) {
4109 return sizeof(void*);
4110 } else {
4111 return upb_msgval_sizeof(field->type);
4112 }
4113}
4114
4115static uint8_t upb_msg_fielddefsize(const upb_fielddef *f) {
4116 if (upb_fielddef_isseq(f)) {
4117 return sizeof(void*);
4118 } else {
4119 return upb_msgval_sizeof(upb_fielddef_type(f));
4120 }
4121}
4122
4123/* TODO(haberman): this is broken right now because upb_msgval can contain
4124 * a char* / size_t pair, which is too big for a upb_value. To fix this
4125 * we'll probably need to dynamically allocate a upb_msgval and store a
4126 * pointer to that in the tables for extensions/maps. */
4127static upb_value upb_toval(upb_msgval val) {
4128 upb_value ret;
4129 UPB_UNUSED(val);
4130 memset(&ret, 0, sizeof(upb_value)); /* XXX */
4131 return ret;
4132}
4133
4134static upb_msgval upb_msgval_fromval(upb_value val) {
4135 upb_msgval ret;
4136 UPB_UNUSED(val);
4137 memset(&ret, 0, sizeof(upb_msgval)); /* XXX */
4138 return ret;
4139}
4140
4141static upb_ctype_t upb_fieldtotabtype(upb_fieldtype_t type) {
4142 switch (type) {
4143 case UPB_TYPE_FLOAT: return UPB_CTYPE_FLOAT;
4144 case UPB_TYPE_DOUBLE: return UPB_CTYPE_DOUBLE;
4145 case UPB_TYPE_BOOL: return UPB_CTYPE_BOOL;
4146 case UPB_TYPE_BYTES:
4147 case UPB_TYPE_MESSAGE:
4148 case UPB_TYPE_STRING: return UPB_CTYPE_CONSTPTR;
4149 case UPB_TYPE_ENUM:
4150 case UPB_TYPE_INT32: return UPB_CTYPE_INT32;
4151 case UPB_TYPE_UINT32: return UPB_CTYPE_UINT32;
4152 case UPB_TYPE_INT64: return UPB_CTYPE_INT64;
4153 case UPB_TYPE_UINT64: return UPB_CTYPE_UINT64;
4154 default: UPB_ASSERT(false); return 0;
4155 }
4156}
4157
4158static upb_msgval upb_msgval_fromdefault(const upb_fielddef *f) {
4159 switch (upb_fielddef_type(f)) {
4160 case UPB_TYPE_FLOAT:
4161 return upb_msgval_float(upb_fielddef_defaultfloat(f));
4162 case UPB_TYPE_DOUBLE:
4163 return upb_msgval_double(upb_fielddef_defaultdouble(f));
4164 case UPB_TYPE_BOOL:
4165 return upb_msgval_bool(upb_fielddef_defaultbool(f));
4166 case UPB_TYPE_STRING:
4167 case UPB_TYPE_BYTES: {
4168 size_t len;
4169 const char *ptr = upb_fielddef_defaultstr(f, &len);
4170 return upb_msgval_makestr(ptr, len);
4171 }
4172 case UPB_TYPE_MESSAGE:
4173 return upb_msgval_msg(NULL);
4174 case UPB_TYPE_ENUM:
4175 case UPB_TYPE_INT32:
4176 return upb_msgval_int32(upb_fielddef_defaultint32(f));
4177 case UPB_TYPE_UINT32:
4178 return upb_msgval_uint32(upb_fielddef_defaultuint32(f));
4179 case UPB_TYPE_INT64:
4180 return upb_msgval_int64(upb_fielddef_defaultint64(f));
4181 case UPB_TYPE_UINT64:
4182 return upb_msgval_uint64(upb_fielddef_defaultuint64(f));
4183 default:
4184 UPB_ASSERT(false);
4185 return upb_msgval_msg(NULL);
4186 }
4187}
4188
4189
4190/** upb_msglayout *************************************************************/
4191
4192struct upb_msglayout {
4193 struct upb_msglayout_msginit_v1 data;
4194};
4195
4196static void upb_msglayout_free(upb_msglayout *l) {
4197 upb_gfree(l->data.default_msg);
4198 upb_gfree(l);
4199}
4200
4201static size_t upb_msglayout_place(upb_msglayout *l, size_t size) {
4202 size_t ret;
4203
4204 l->data.size = align_up(l->data.size, size);
4205 ret = l->data.size;
4206 l->data.size += size;
4207 return ret;
4208}
4209
4210static uint32_t upb_msglayout_offset(const upb_msglayout *l,
4211 const upb_fielddef *f) {
4212 return l->data.fields[upb_fielddef_index(f)].offset;
4213}
4214
4215static uint32_t upb_msglayout_hasbit(const upb_msglayout *l,
4216 const upb_fielddef *f) {
4217 return l->data.fields[upb_fielddef_index(f)].hasbit;
4218}
4219
4220static bool upb_msglayout_initdefault(upb_msglayout *l, const upb_msgdef *m) {
4221 upb_msg_field_iter it;
4222
4223 if (upb_msgdef_syntax(m) == UPB_SYNTAX_PROTO2 && l->data.size) {
4224 /* Allocate default message and set default values in it. */
4225 l->data.default_msg = upb_gmalloc(l->data.size);
4226 if (!l->data.default_msg) {
4227 return false;
4228 }
4229
4230 memset(l->data.default_msg, 0, l->data.size);
4231
4232 for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it);
4233 upb_msg_field_next(&it)) {
4234 const upb_fielddef* f = upb_msg_iter_field(&it);
4235
4236 if (upb_fielddef_containingoneof(f)) {
4237 continue;
4238 }
4239
4240 /* TODO(haberman): handle strings. */
4241 if (!upb_fielddef_isstring(f) &&
4242 !upb_fielddef_issubmsg(f) &&
4243 !upb_fielddef_isseq(f)) {
4244 upb_msg_set(l->data.default_msg,
4245 upb_fielddef_index(f),
4246 upb_msgval_fromdefault(f),
4247 l);
4248 }
4249 }
4250 }
4251
4252 return true;
4253}
4254
4255static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) {
4256 upb_msg_field_iter it;
4257 upb_msg_oneof_iter oit;
4258 upb_msglayout *l;
4259 size_t hasbit;
4260 size_t submsg_count = 0;
4261 const upb_msglayout_msginit_v1 **submsgs;
4262 upb_msglayout_fieldinit_v1 *fields;
4263 upb_msglayout_oneofinit_v1 *oneofs;
4264
4265 for (upb_msg_field_begin(&it, m);
4266 !upb_msg_field_done(&it);
4267 upb_msg_field_next(&it)) {
4268 const upb_fielddef* f = upb_msg_iter_field(&it);
4269 if (upb_fielddef_issubmsg(f)) {
4270 submsg_count++;
4271 }
4272 }
4273
4274 l = upb_gmalloc(sizeof(*l));
4275 if (!l) return NULL;
4276
4277 memset(l, 0, sizeof(*l));
4278
4279 fields = upb_gmalloc(upb_msgdef_numfields(m) * sizeof(*fields));
4280 submsgs = upb_gmalloc(submsg_count * sizeof(*submsgs));
4281 oneofs = upb_gmalloc(upb_msgdef_numoneofs(m) * sizeof(*oneofs));
4282
4283 if ((!fields && upb_msgdef_numfields(m)) ||
4284 (!submsgs && submsg_count) ||
4285 (!oneofs && upb_msgdef_numoneofs(m))) {
4286 /* OOM. */
4287 upb_gfree(l);
4288 upb_gfree(fields);
4289 upb_gfree(submsgs);
4290 upb_gfree(oneofs);
4291 return NULL;
4292 }
4293
4294 l->data.field_count = upb_msgdef_numfields(m);
4295 l->data.oneof_count = upb_msgdef_numoneofs(m);
4296 l->data.fields = fields;
4297 l->data.submsgs = submsgs;
4298 l->data.oneofs = oneofs;
4299 l->data.is_proto2 = (upb_msgdef_syntax(m) == UPB_SYNTAX_PROTO2);
4300
4301 /* Allocate data offsets in three stages:
4302 *
4303 * 1. hasbits.
4304 * 2. regular fields.
4305 * 3. oneof fields.
4306 *
4307 * OPT: There is a lot of room for optimization here to minimize the size.
4308 */
4309
4310 /* Allocate hasbits and set basic field attributes. */
4311 for (upb_msg_field_begin(&it, m), hasbit = 0;
4312 !upb_msg_field_done(&it);
4313 upb_msg_field_next(&it)) {
4314 const upb_fielddef* f = upb_msg_iter_field(&it);
4315 upb_msglayout_fieldinit_v1 *field = &fields[upb_fielddef_index(f)];
4316
4317 field->number = upb_fielddef_number(f);
4318 field->type = upb_fielddef_type(f);
4319 field->label = upb_fielddef_label(f);
4320
4321 if (upb_fielddef_containingoneof(f)) {
4322 field->oneof_index = upb_oneofdef_index(upb_fielddef_containingoneof(f));
4323 } else {
4324 field->oneof_index = UPB_NOT_IN_ONEOF;
4325 }
4326
4327 if (upb_fielddef_haspresence(f) && !upb_fielddef_containingoneof(f)) {
4328 field->hasbit = hasbit++;
4329 }
4330 }
4331
4332 /* Account for space used by hasbits. */
4333 l->data.size = div_round_up(hasbit, 8);
4334
4335 /* Allocate non-oneof fields. */
4336 for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it);
4337 upb_msg_field_next(&it)) {
4338 const upb_fielddef* f = upb_msg_iter_field(&it);
4339 size_t field_size = upb_msg_fielddefsize(f);
4340 size_t index = upb_fielddef_index(f);
4341
4342 if (upb_fielddef_containingoneof(f)) {
4343 /* Oneofs are handled separately below. */
4344 continue;
4345 }
4346
4347 fields[index].offset = upb_msglayout_place(l, field_size);
4348 }
4349
4350 /* Allocate oneof fields. Each oneof field consists of a uint32 for the case
4351 * and space for the actual data. */
4352 for (upb_msg_oneof_begin(&oit, m); !upb_msg_oneof_done(&oit);
4353 upb_msg_oneof_next(&oit)) {
4354 const upb_oneofdef* o = upb_msg_iter_oneof(&oit);
4355 upb_oneof_iter fit;
4356
4357 size_t case_size = sizeof(uint32_t); /* Could potentially optimize this. */
4358 upb_msglayout_oneofinit_v1 *oneof = &oneofs[upb_oneofdef_index(o)];
4359 size_t field_size = 0;
4360
4361 /* Calculate field size: the max of all field sizes. */
4362 for (upb_oneof_begin(&fit, o);
4363 !upb_oneof_done(&fit);
4364 upb_oneof_next(&fit)) {
4365 const upb_fielddef* f = upb_oneof_iter_field(&fit);
4366 field_size = UPB_MAX(field_size, upb_msg_fielddefsize(f));
4367 }
4368
4369 /* Align and allocate case offset. */
4370 oneof->case_offset = upb_msglayout_place(l, case_size);
4371 oneof->data_offset = upb_msglayout_place(l, field_size);
4372 }
4373
4374 /* Size of the entire structure should be a multiple of its greatest
4375 * alignment. TODO: track overall alignment for real? */
4376 l->data.size = align_up(l->data.size, 8);
4377
4378 if (upb_msglayout_initdefault(l, m)) {
4379 return l;
4380 } else {
4381 upb_msglayout_free(l);
4382 return NULL;
4383 }
4384}
4385
4386
4387/** upb_msgfactory ************************************************************/
4388
4389struct upb_msgfactory {
4390 const upb_symtab *symtab; /* We own a ref. */
4391 upb_inttable layouts;
4392 upb_inttable mergehandlers;
4393};
4394
4395upb_msgfactory *upb_msgfactory_new(const upb_symtab *symtab) {
4396 upb_msgfactory *ret = upb_gmalloc(sizeof(*ret));
4397
4398 ret->symtab = symtab;
4399 upb_inttable_init(&ret->layouts, UPB_CTYPE_PTR);
4400 upb_inttable_init(&ret->mergehandlers, UPB_CTYPE_CONSTPTR);
4401
4402 return ret;
4403}
4404
4405void upb_msgfactory_free(upb_msgfactory *f) {
4406 upb_inttable_iter i;
4407 upb_inttable_begin(&i, &f->layouts);
4408 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
4409 upb_msglayout *l = upb_value_getptr(upb_inttable_iter_value(&i));
4410 upb_msglayout_free(l);
4411 }
4412
4413 upb_inttable_begin(&i, &f->mergehandlers);
4414 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
4415 const upb_handlers *h = upb_value_getconstptr(upb_inttable_iter_value(&i));
4416 upb_handlers_unref(h, f);
4417 }
4418
4419 upb_inttable_uninit(&f->layouts);
4420 upb_inttable_uninit(&f->mergehandlers);
4421 upb_gfree(f);
4422}
4423
4424const upb_symtab *upb_msgfactory_symtab(const upb_msgfactory *f) {
4425 return f->symtab;
4426}
4427
4428const upb_msglayout *upb_msgfactory_getlayout(upb_msgfactory *f,
4429 const upb_msgdef *m) {
4430 upb_value v;
4431 UPB_ASSERT(upb_symtab_lookupmsg(f->symtab, upb_msgdef_fullname(m)) == m);
4432 UPB_ASSERT(!upb_msgdef_mapentry(m));
4433
4434 if (upb_inttable_lookupptr(&f->layouts, m, &v)) {
4435 UPB_ASSERT(upb_value_getptr(v));
4436 return upb_value_getptr(v);
4437 } else {
4438 upb_msgfactory *mutable_f = (void*)f;
4439 upb_msglayout *l = upb_msglayout_new(m);
4440 upb_inttable_insertptr(&mutable_f->layouts, m, upb_value_ptr(l));
4441 UPB_ASSERT(l);
4442 return l;
4443 }
4444}
4445
4446/* Our handlers that we don't expose externally. */
4447
4448void *upb_msg_startstr(void *msg, const void *hd, size_t size_hint) {
4449 uint32_t ofs = (uintptr_t)hd;
4450 upb_alloc *alloc = upb_msg_alloc(msg);
4451 upb_msgval val;
4452 UPB_UNUSED(size_hint);
4453
4454 val = upb_msgval_read(msg, ofs, upb_msgval_sizeof(UPB_TYPE_STRING));
4455
4456 upb_free(alloc, (void*)val.str.data);
4457 val.str.data = NULL;
4458 val.str.size = 0;
4459
4460 upb_msgval_write(msg, ofs, val, upb_msgval_sizeof(UPB_TYPE_STRING));
4461 return msg;
4462}
4463
4464size_t upb_msg_str(void *msg, const void *hd, const char *ptr, size_t size,
4465 const upb_bufhandle *handle) {
4466 uint32_t ofs = (uintptr_t)hd;
4467 upb_alloc *alloc = upb_msg_alloc(msg);
4468 upb_msgval val;
4469 size_t newsize;
4470 UPB_UNUSED(handle);
4471
4472 val = upb_msgval_read(msg, ofs, upb_msgval_sizeof(UPB_TYPE_STRING));
4473
4474 newsize = val.str.size + size;
4475 val.str.data = upb_realloc(alloc, (void*)val.str.data, val.str.size, newsize);
4476
4477 if (!val.str.data) {
4478 return false;
4479 }
4480
4481 memcpy((char*)val.str.data + val.str.size, ptr, size);
4482 val.str.size = newsize;
4483 upb_msgval_write(msg, ofs, val, upb_msgval_sizeof(UPB_TYPE_STRING));
4484 return size;
4485}
4486
4487static void callback(const void *closure, upb_handlers *h) {
4488 upb_msgfactory *factory = (upb_msgfactory*)closure;
4489 const upb_msgdef *md = upb_handlers_msgdef(h);
4490 const upb_msglayout* layout = upb_msgfactory_getlayout(factory, md);
4491 upb_msg_field_iter i;
4492 UPB_UNUSED(factory);
4493
4494 for(upb_msg_field_begin(&i, md);
4495 !upb_msg_field_done(&i);
4496 upb_msg_field_next(&i)) {
4497 const upb_fielddef *f = upb_msg_iter_field(&i);
4498 size_t offset = upb_msglayout_offset(layout, f);
4499 upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
4500 upb_handlerattr_sethandlerdata(&attr, (void*)offset);
4501
4502 if (upb_fielddef_isseq(f)) {
4503 } else if (upb_fielddef_isstring(f)) {
4504 upb_handlers_setstartstr(h, f, upb_msg_startstr, &attr);
4505 upb_handlers_setstring(h, f, upb_msg_str, &attr);
4506 } else {
4507 upb_msg_setscalarhandler(
4508 h, f, offset, upb_msglayout_hasbit(layout, f));
4509 }
4510 }
4511}
4512
4513const upb_handlers *upb_msgfactory_getmergehandlers(upb_msgfactory *f,
4514 const upb_msgdef *m) {
4515 upb_msgfactory *mutable_f = (void*)f;
4516
4517 /* TODO(haberman): properly cache these. */
4518 const upb_handlers *ret = upb_handlers_newfrozen(m, f, callback, f);
4519 upb_inttable_push(&mutable_f->mergehandlers, upb_value_constptr(ret));
4520
4521 return ret;
4522}
4523
4524const upb_visitorplan *upb_msgfactory_getvisitorplan(upb_msgfactory *f,
4525 const upb_handlers *h) {
4526 const upb_msgdef *md = upb_handlers_msgdef(h);
4527 return (const upb_visitorplan*)upb_msgfactory_getlayout(f, md);
4528}
4529
4530
4531/** upb_visitor ***************************************************************/
4532
4533struct upb_visitor {
4534 const upb_msglayout *layout;
4535 upb_sink *sink;
4536};
4537
4538static upb_selector_t getsel2(const upb_fielddef *f, upb_handlertype_t type) {
4539 upb_selector_t ret;
4540 bool ok = upb_handlers_getselector(f, type, &ret);
4541 UPB_ASSERT(ok);
4542 return ret;
4543}
4544
4545static bool upb_visitor_hasfield(const upb_msg *msg, const upb_fielddef *f,
4546 const upb_msglayout *layout) {
4547 int field_index = upb_fielddef_index(f);
4548 if (upb_fielddef_isseq(f)) {
4549 return upb_msgval_getarr(upb_msg_get(msg, field_index, layout)) != NULL;
4550 } else if (upb_msgdef_syntax(upb_fielddef_containingtype(f)) ==
4551 UPB_SYNTAX_PROTO2) {
4552 return upb_msg_has(msg, field_index, layout);
4553 } else {
4554 upb_msgval val = upb_msg_get(msg, field_index, layout);
4555 switch (upb_fielddef_type(f)) {
4556 case UPB_TYPE_FLOAT:
4557 return upb_msgval_getfloat(val) != 0;
4558 case UPB_TYPE_DOUBLE:
4559 return upb_msgval_getdouble(val) != 0;
4560 case UPB_TYPE_BOOL:
4561 return upb_msgval_getbool(val);
4562 case UPB_TYPE_ENUM:
4563 case UPB_TYPE_INT32:
4564 return upb_msgval_getint32(val) != 0;
4565 case UPB_TYPE_UINT32:
4566 return upb_msgval_getuint32(val) != 0;
4567 case UPB_TYPE_INT64:
4568 return upb_msgval_getint64(val) != 0;
4569 case UPB_TYPE_UINT64:
4570 return upb_msgval_getuint64(val) != 0;
4571 case UPB_TYPE_STRING:
4572 case UPB_TYPE_BYTES:
4573 return upb_msgval_getstr(val).size > 0;
4574 case UPB_TYPE_MESSAGE:
4575 return upb_msgval_getmsg(val) != NULL;
4576 }
4577 UPB_UNREACHABLE();
4578 }
4579}
4580
4581static bool upb_visitor_visitmsg2(const upb_msg *msg,
4582 const upb_msglayout *layout, upb_sink *sink,
4583 int depth) {
4584 const upb_msgdef *md = upb_handlers_msgdef(sink->handlers);
4585 upb_msg_field_iter i;
4586 upb_status status;
4587
4588 upb_sink_startmsg(sink);
4589
4590 /* Protect against cycles (possible because users may freely reassign message
4591 * and repeated fields) by imposing a maximum recursion depth. */
4592 if (depth > ENCODE_MAX_NESTING) {
4593 return false;
4594 }
4595
4596 for (upb_msg_field_begin(&i, md);
4597 !upb_msg_field_done(&i);
4598 upb_msg_field_next(&i)) {
4599 upb_fielddef *f = upb_msg_iter_field(&i);
4600 upb_msgval val;
4601
4602 if (!upb_visitor_hasfield(msg, f, layout)) {
4603 continue;
4604 }
4605
4606 val = upb_msg_get(msg, upb_fielddef_index(f), layout);
4607
4608 if (upb_fielddef_isseq(f)) {
4609 const upb_array *arr = upb_msgval_getarr(val);
4610 UPB_ASSERT(arr);
4611 /* TODO: putary(ary, f, sink, depth);*/
4612 } else if (upb_fielddef_issubmsg(f)) {
4613 const upb_map *map = upb_msgval_getmap(val);
4614 UPB_ASSERT(map);
4615 /* TODO: putmap(map, f, sink, depth);*/
4616 } else if (upb_fielddef_isstring(f)) {
4617 /* TODO putstr(); */
4618 } else {
4619 upb_selector_t sel = getsel2(f, upb_handlers_getprimitivehandlertype(f));
4620 UPB_ASSERT(upb_fielddef_isprimitive(f));
4621
4622 switch (upb_fielddef_type(f)) {
4623 case UPB_TYPE_FLOAT:
4624 CHECK_TRUE(upb_sink_putfloat(sink, sel, upb_msgval_getfloat(val)));
4625 break;
4626 case UPB_TYPE_DOUBLE:
4627 CHECK_TRUE(upb_sink_putdouble(sink, sel, upb_msgval_getdouble(val)));
4628 break;
4629 case UPB_TYPE_BOOL:
4630 CHECK_TRUE(upb_sink_putbool(sink, sel, upb_msgval_getbool(val)));
4631 break;
4632 case UPB_TYPE_ENUM:
4633 case UPB_TYPE_INT32:
4634 CHECK_TRUE(upb_sink_putint32(sink, sel, upb_msgval_getint32(val)));
4635 break;
4636 case UPB_TYPE_UINT32:
4637 CHECK_TRUE(upb_sink_putuint32(sink, sel, upb_msgval_getuint32(val)));
4638 break;
4639 case UPB_TYPE_INT64:
4640 CHECK_TRUE(upb_sink_putint64(sink, sel, upb_msgval_getint64(val)));
4641 break;
4642 case UPB_TYPE_UINT64:
4643 CHECK_TRUE(upb_sink_putuint64(sink, sel, upb_msgval_getuint64(val)));
4644 break;
4645 case UPB_TYPE_STRING:
4646 case UPB_TYPE_BYTES:
4647 case UPB_TYPE_MESSAGE:
4648 UPB_UNREACHABLE();
4649 }
4650 }
4651 }
4652
4653 upb_sink_endmsg(sink, &status);
4654 return true;
4655}
4656
4657upb_visitor *upb_visitor_create(upb_env *e, const upb_visitorplan *vp,
4658 upb_sink *output) {
4659 upb_visitor *visitor = upb_env_malloc(e, sizeof(*visitor));
4660 visitor->layout = (const upb_msglayout*)vp;
4661 visitor->sink = output;
4662 return visitor;
4663}
4664
4665bool upb_visitor_visitmsg(upb_visitor *visitor, const upb_msg *msg) {
4666 return upb_visitor_visitmsg2(msg, visitor->layout, visitor->sink, 0);
4667}
4668
4669
4670/** upb_msg *******************************************************************/
4671
4672/* If we always read/write as a consistent type to each address, this shouldn't
4673 * violate aliasing.
4674 */
4675#define DEREF(msg, ofs, type) *PTR_AT(msg, ofs, type)
4676
4677/* Internal members of a upb_msg. We can change this without breaking binary
4678 * compatibility. We put these before the user's data. The user's upb_msg*
4679 * points after the upb_msg_internal. */
4680
4681/* Used when a message is not extendable. */
4682typedef struct {
4683 /* TODO(haberman): add unknown fields. */
4684 upb_alloc *alloc;
4685} upb_msg_internal;
4686
4687/* Used when a message is extendable. */
4688typedef struct {
4689 upb_inttable *extdict;
4690 upb_msg_internal base;
4691} upb_msg_internal_withext;
4692
4693static int upb_msg_internalsize(const upb_msglayout *l) {
4694 return sizeof(upb_msg_internal) - l->data.extendable * sizeof(void*);
4695}
4696
4697static upb_msg_internal *upb_msg_getinternal(upb_msg *msg) {
4698 return VOIDPTR_AT(msg, -sizeof(upb_msg_internal));
4699}
4700
4701static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) {
4702 return VOIDPTR_AT(msg, -sizeof(upb_msg_internal));
4703}
4704
4705static upb_msg_internal_withext *upb_msg_getinternalwithext(
4706 upb_msg *msg, const upb_msglayout *l) {
4707 UPB_ASSERT(l->data.extendable);
4708 return VOIDPTR_AT(msg, -sizeof(upb_msg_internal_withext));
4709}
4710
4711static const upb_msglayout_fieldinit_v1 *upb_msg_checkfield(
4712 int field_index, const upb_msglayout *l) {
4713 UPB_ASSERT(field_index >= 0 && field_index < l->data.field_count);
4714 return &l->data.fields[field_index];
4715}
4716
4717static bool upb_msg_inoneof(const upb_msglayout_fieldinit_v1 *field) {
4718 return field->oneof_index != UPB_NOT_IN_ONEOF;
4719}
4720
4721static uint32_t *upb_msg_oneofcase(const upb_msg *msg, int field_index,
4722 const upb_msglayout *l) {
4723 const upb_msglayout_fieldinit_v1 *field = upb_msg_checkfield(field_index, l);
4724 UPB_ASSERT(upb_msg_inoneof(field));
4725 return PTR_AT(msg, l->data.oneofs[field->oneof_index].case_offset, uint32_t);
4726}
4727
4728size_t upb_msg_sizeof(const upb_msglayout *l) {
4729 return l->data.size + upb_msg_internalsize(l);
4730}
4731
4732upb_msg *upb_msg_init(void *mem, const upb_msglayout *l, upb_alloc *a) {
4733 upb_msg *msg = VOIDPTR_AT(mem, upb_msg_internalsize(l));
4734
4735 /* Initialize normal members. */
4736 if (l->data.default_msg) {
4737 memcpy(msg, l->data.default_msg, l->data.size);
4738 } else {
4739 memset(msg, 0, l->data.size);
4740 }
4741
4742 /* Initialize internal members. */
4743 upb_msg_getinternal(msg)->alloc = a;
4744
4745 if (l->data.extendable) {
4746 upb_msg_getinternalwithext(msg, l)->extdict = NULL;
4747 }
4748
4749 return msg;
4750}
4751
4752void *upb_msg_uninit(upb_msg *msg, const upb_msglayout *l) {
4753 if (l->data.extendable) {
4754 upb_inttable *ext_dict = upb_msg_getinternalwithext(msg, l)->extdict;
4755 if (ext_dict) {
4756 upb_inttable_uninit2(ext_dict, upb_msg_alloc(msg));
4757 upb_free(upb_msg_alloc(msg), ext_dict);
4758 }
4759 }
4760
4761 return VOIDPTR_AT(msg, -upb_msg_internalsize(l));
4762}
4763
4764upb_msg *upb_msg_new(const upb_msglayout *l, upb_alloc *a) {
4765 void *mem = upb_malloc(a, upb_msg_sizeof(l));
4766 return mem ? upb_msg_init(mem, l, a) : NULL;
4767}
4768
4769void upb_msg_free(upb_msg *msg, const upb_msglayout *l) {
4770 upb_free(upb_msg_alloc(msg), upb_msg_uninit(msg, l));
4771}
4772
4773upb_alloc *upb_msg_alloc(const upb_msg *msg) {
4774 return upb_msg_getinternal_const(msg)->alloc;
4775}
4776
4777bool upb_msg_has(const upb_msg *msg,
4778 int field_index,
4779 const upb_msglayout *l) {
4780 const upb_msglayout_fieldinit_v1 *field = upb_msg_checkfield(field_index, l);
4781
4782 UPB_ASSERT(l->data.is_proto2);
4783
4784 if (upb_msg_inoneof(field)) {
4785 /* Oneofs are set when the oneof number is set to this field. */
4786 return *upb_msg_oneofcase(msg, field_index, l) == field->number;
4787 } else {
4788 /* Other fields are set when their hasbit is set. */
4789 uint32_t hasbit = l->data.fields[field_index].hasbit;
4790 return DEREF(msg, hasbit / 8, char) | (1 << (hasbit % 8));
4791 }
4792}
4793
4794upb_msgval upb_msg_get(const upb_msg *msg, int field_index,
4795 const upb_msglayout *l) {
4796 const upb_msglayout_fieldinit_v1 *field = upb_msg_checkfield(field_index, l);
4797 int size = upb_msg_fieldsize(field);
4798
4799 if (upb_msg_inoneof(field)) {
4800 if (*upb_msg_oneofcase(msg, field_index, l) == field->number) {
4801 size_t ofs = l->data.oneofs[field->oneof_index].data_offset;
4802 return upb_msgval_read(msg, ofs, size);
4803 } else {
4804 /* Return default. */
4805 return upb_msgval_read(l->data.default_msg, field->offset, size);
4806 }
4807 } else {
4808 return upb_msgval_read(msg, field->offset, size);
4809 }
4810}
4811
4812void upb_msg_set(upb_msg *msg, int field_index, upb_msgval val,
4813 const upb_msglayout *l) {
4814 const upb_msglayout_fieldinit_v1 *field = upb_msg_checkfield(field_index, l);
4815 int size = upb_msg_fieldsize(field);
4816
4817 if (upb_msg_inoneof(field)) {
4818 size_t ofs = l->data.oneofs[field->oneof_index].data_offset;
4819 *upb_msg_oneofcase(msg, field_index, l) = field->number;
4820 upb_msgval_write(msg, ofs, val, size);
4821 } else {
4822 upb_msgval_write(msg, field->offset, val, size);
4823 }
4824}
4825
4826
4827/** upb_array *****************************************************************/
4828
4829#define DEREF_ARR(arr, i, type) ((type*)arr->data)[i]
4830
4831size_t upb_array_sizeof(upb_fieldtype_t type) {
4832 UPB_UNUSED(type);
4833 return sizeof(upb_array);
4834}
4835
4836void upb_array_init(upb_array *arr, upb_fieldtype_t type, upb_alloc *alloc) {
4837 arr->type = type;
4838 arr->data = NULL;
4839 arr->len = 0;
4840 arr->size = 0;
4841 arr->element_size = upb_msgval_sizeof(type);
4842 arr->alloc = alloc;
4843}
4844
4845void upb_array_uninit(upb_array *arr) {
4846 upb_free(arr->alloc, arr->data);
4847}
4848
4849upb_array *upb_array_new(upb_fieldtype_t type, upb_alloc *a) {
4850 upb_array *ret = upb_malloc(a, upb_array_sizeof(type));
4851
4852 if (ret) {
4853 upb_array_init(ret, type, a);
4854 }
4855
4856 return ret;
4857}
4858
4859void upb_array_free(upb_array *arr) {
4860 upb_array_uninit(arr);
4861 upb_free(arr->alloc, arr);
4862}
4863
4864size_t upb_array_size(const upb_array *arr) {
4865 return arr->len;
4866}
4867
4868upb_fieldtype_t upb_array_type(const upb_array *arr) {
4869 return arr->type;
4870}
4871
4872upb_msgval upb_array_get(const upb_array *arr, size_t i) {
4873 UPB_ASSERT(i < arr->len);
4874 return upb_msgval_read(arr->data, i * arr->element_size, arr->element_size);
4875}
4876
4877bool upb_array_set(upb_array *arr, size_t i, upb_msgval val) {
4878 UPB_ASSERT(i <= arr->len);
4879
4880 if (i == arr->len) {
4881 /* Extending the array. */
4882
4883 if (i == arr->size) {
4884 /* Need to reallocate. */
4885 size_t new_size = UPB_MAX(arr->size * 2, 8);
4886 size_t new_bytes = new_size * arr->element_size;
4887 size_t old_bytes = arr->size * arr->element_size;
4888 upb_msgval *new_data =
4889 upb_realloc(arr->alloc, arr->data, old_bytes, new_bytes);
4890
4891 if (!new_data) {
4892 return false;
4893 }
4894
4895 arr->data = new_data;
4896 arr->size = new_size;
4897 }
4898
4899 arr->len = i + 1;
4900 }
4901
4902 upb_msgval_write(arr->data, i * arr->element_size, val, arr->element_size);
4903 return true;
4904}
4905
4906
4907/** upb_map *******************************************************************/
4908
4909struct upb_map {
4910 upb_fieldtype_t key_type;
4911 upb_fieldtype_t val_type;
4912 /* We may want to optimize this to use inttable where possible, for greater
4913 * efficiency and lower memory footprint. */
4914 upb_strtable strtab;
4915 upb_alloc *alloc;
4916};
4917
4918static void upb_map_tokey(upb_fieldtype_t type, upb_msgval *key,
4919 const char **out_key, size_t *out_len) {
4920 switch (type) {
4921 case UPB_TYPE_STRING:
4922 /* Point to string data of the input key. */
4923 *out_key = key->str.data;
4924 *out_len = key->str.size;
4925 return;
4926 case UPB_TYPE_BOOL:
4927 case UPB_TYPE_INT32:
4928 case UPB_TYPE_UINT32:
4929 case UPB_TYPE_INT64:
4930 case UPB_TYPE_UINT64:
4931 /* Point to the key itself. XXX: big-endian. */
4932 *out_key = (const char*)key;
4933 *out_len = upb_msgval_sizeof(type);
4934 return;
4935 case UPB_TYPE_BYTES:
4936 case UPB_TYPE_DOUBLE:
4937 case UPB_TYPE_ENUM:
4938 case UPB_TYPE_FLOAT:
4939 case UPB_TYPE_MESSAGE:
4940 break; /* Cannot be a map key. */
4941 }
4942 UPB_UNREACHABLE();
4943}
4944
4945static upb_msgval upb_map_fromkey(upb_fieldtype_t type, const char *key,
4946 size_t len) {
4947 switch (type) {
4948 case UPB_TYPE_STRING:
4949 return upb_msgval_makestr(key, len);
4950 case UPB_TYPE_BOOL:
4951 case UPB_TYPE_INT32:
4952 case UPB_TYPE_UINT32:
4953 case UPB_TYPE_INT64:
4954 case UPB_TYPE_UINT64:
4955 return upb_msgval_read(key, 0, upb_msgval_sizeof(type));
4956 case UPB_TYPE_BYTES:
4957 case UPB_TYPE_DOUBLE:
4958 case UPB_TYPE_ENUM:
4959 case UPB_TYPE_FLOAT:
4960 case UPB_TYPE_MESSAGE:
4961 break; /* Cannot be a map key. */
4962 }
4963 UPB_UNREACHABLE();
4964}
4965
4966size_t upb_map_sizeof(upb_fieldtype_t ktype, upb_fieldtype_t vtype) {
4967 /* Size does not currently depend on key/value type. */
4968 UPB_UNUSED(ktype);
4969 UPB_UNUSED(vtype);
4970 return sizeof(upb_map);
4971}
4972
4973bool upb_map_init(upb_map *map, upb_fieldtype_t ktype, upb_fieldtype_t vtype,
4974 upb_alloc *a) {
4975 upb_ctype_t vtabtype = upb_fieldtotabtype(vtype);
4976 UPB_ASSERT(upb_fieldtype_mapkeyok(ktype));
4977 map->key_type = ktype;
4978 map->val_type = vtype;
4979 map->alloc = a;
4980
4981 if (!upb_strtable_init2(&map->strtab, vtabtype, a)) {
4982 return false;
4983 }
4984
4985 return true;
4986}
4987
4988void upb_map_uninit(upb_map *map) {
4989 upb_strtable_uninit2(&map->strtab, map->alloc);
4990}
4991
4992upb_map *upb_map_new(upb_fieldtype_t ktype, upb_fieldtype_t vtype,
4993 upb_alloc *a) {
4994 upb_map *map = upb_malloc(a, upb_map_sizeof(ktype, vtype));
4995
4996 if (!map) {
4997 return NULL;
4998 }
4999
5000 if (!upb_map_init(map, ktype, vtype, a)) {
5001 return NULL;
5002 }
5003
5004 return map;
5005}
5006
5007void upb_map_free(upb_map *map) {
5008 upb_map_uninit(map);
5009 upb_free(map->alloc, map);
5010}
5011
5012size_t upb_map_size(const upb_map *map) {
5013 return upb_strtable_count(&map->strtab);
5014}
5015
5016upb_fieldtype_t upb_map_keytype(const upb_map *map) {
5017 return map->key_type;
5018}
5019
5020upb_fieldtype_t upb_map_valuetype(const upb_map *map) {
5021 return map->val_type;
5022}
5023
5024bool upb_map_get(const upb_map *map, upb_msgval key, upb_msgval *val) {
5025 upb_value tabval;
5026 const char *key_str;
5027 size_t key_len;
5028 bool ret;
5029
5030 upb_map_tokey(map->key_type, &key, &key_str, &key_len);
5031 ret = upb_strtable_lookup2(&map->strtab, key_str, key_len, &tabval);
5032 if (ret) {
5033 memcpy(val, &tabval, sizeof(tabval));
5034 }
5035
5036 return ret;
5037}
5038
5039bool upb_map_set(upb_map *map, upb_msgval key, upb_msgval val,
5040 upb_msgval *removed) {
5041 const char *key_str;
5042 size_t key_len;
5043 upb_value tabval = upb_toval(val);
5044 upb_value removedtabval;
5045 upb_alloc *a = map->alloc;
5046
5047 upb_map_tokey(map->key_type, &key, &key_str, &key_len);
5048
5049 /* TODO(haberman): add overwrite operation to minimize number of lookups. */
5050 if (upb_strtable_lookup2(&map->strtab, key_str, key_len, NULL)) {
5051 upb_strtable_remove3(&map->strtab, key_str, key_len, &removedtabval, a);
5052 memcpy(&removed, &removedtabval, sizeof(removed));
5053 }
5054
5055 return upb_strtable_insert3(&map->strtab, key_str, key_len, tabval, a);
5056}
5057
5058bool upb_map_del(upb_map *map, upb_msgval key) {
5059 const char *key_str;
5060 size_t key_len;
5061 upb_alloc *a = map->alloc;
5062
5063 upb_map_tokey(map->key_type, &key, &key_str, &key_len);
5064 return upb_strtable_remove3(&map->strtab, key_str, key_len, NULL, a);
5065}
5066
5067
5068/** upb_mapiter ***************************************************************/
5069
5070struct upb_mapiter {
5071 upb_strtable_iter iter;
5072 upb_fieldtype_t key_type;
5073};
5074
5075size_t upb_mapiter_sizeof() {
5076 return sizeof(upb_mapiter);
5077}
5078
5079void upb_mapiter_begin(upb_mapiter *i, const upb_map *map) {
5080 upb_strtable_begin(&i->iter, &map->strtab);
5081 i->key_type = map->key_type;
5082}
5083
5084upb_mapiter *upb_mapiter_new(const upb_map *t, upb_alloc *a) {
5085 upb_mapiter *ret = upb_malloc(a, upb_mapiter_sizeof());
5086
5087 if (!ret) {
5088 return NULL;
5089 }
5090
5091 upb_mapiter_begin(ret, t);
5092 return ret;
5093}
5094
5095void upb_mapiter_free(upb_mapiter *i, upb_alloc *a) {
5096 upb_free(a, i);
5097}
5098
5099void upb_mapiter_next(upb_mapiter *i) {
5100 upb_strtable_next(&i->iter);
5101}
5102
5103bool upb_mapiter_done(const upb_mapiter *i) {
5104 return upb_strtable_done(&i->iter);
5105}
5106
5107upb_msgval upb_mapiter_key(const upb_mapiter *i) {
5108 return upb_map_fromkey(i->key_type, upb_strtable_iter_key(&i->iter),
5109 upb_strtable_iter_keylength(&i->iter));
5110}
5111
5112upb_msgval upb_mapiter_value(const upb_mapiter *i) {
5113 return upb_msgval_fromval(upb_strtable_iter_value(&i->iter));
5114}
5115
5116void upb_mapiter_setdone(upb_mapiter *i) {
5117 upb_strtable_iter_setdone(&i->iter);
5118}
5119
5120bool upb_mapiter_isequal(const upb_mapiter *i1, const upb_mapiter *i2) {
5121 return upb_strtable_iter_isequal(&i1->iter, &i2->iter);
5122}
5123
5124
5125/** Handlers for upb_msg ******************************************************/
5126
5127typedef struct {
5128 size_t offset;
5129 int32_t hasbit;
5130} upb_msg_handlerdata;
5131
5132/* Fallback implementation if the handler is not specialized by the producer. */
5133#define MSG_WRITER(type, ctype) \
5134 bool upb_msg_set ## type (void *c, const void *hd, ctype val) { \
5135 uint8_t *m = c; \
5136 const upb_msg_handlerdata *d = hd; \
5137 if (d->hasbit > 0) \
5138 *(uint8_t*)&m[d->hasbit / 8] |= 1 << (d->hasbit % 8); \
5139 *(ctype*)&m[d->offset] = val; \
5140 return true; \
5141 } \
5142
5143MSG_WRITER(double, double)
5144MSG_WRITER(float, float)
5145MSG_WRITER(int32, int32_t)
5146MSG_WRITER(int64, int64_t)
5147MSG_WRITER(uint32, uint32_t)
5148MSG_WRITER(uint64, uint64_t)
5149MSG_WRITER(bool, bool)
5150
5151bool upb_msg_setscalarhandler(upb_handlers *h, const upb_fielddef *f,
5152 size_t offset, int32_t hasbit) {
5153 upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
5154 bool ok;
5155
5156 upb_msg_handlerdata *d = upb_gmalloc(sizeof(*d));
5157 if (!d) return false;
5158 d->offset = offset;
5159 d->hasbit = hasbit;
5160
5161 upb_handlerattr_sethandlerdata(&attr, d);
5162 upb_handlerattr_setalwaysok(&attr, true);
5163 upb_handlers_addcleanup(h, d, upb_gfree);
5164
5165#define TYPE(u, l) \
5166 case UPB_TYPE_##u: \
5167 ok = upb_handlers_set##l(h, f, upb_msg_set##l, &attr); break;
5168
5169 ok = false;
5170
5171 switch (upb_fielddef_type(f)) {
5172 TYPE(INT64, int64);
5173 TYPE(INT32, int32);
5174 TYPE(ENUM, int32);
5175 TYPE(UINT64, uint64);
5176 TYPE(UINT32, uint32);
5177 TYPE(DOUBLE, double);
5178 TYPE(FLOAT, float);
5179 TYPE(BOOL, bool);
5180 default: UPB_ASSERT(false); break;
5181 }
5182#undef TYPE
5183
5184 upb_handlerattr_uninit(&attr);
5185 return ok;
5186}
5187
5188bool upb_msg_getscalarhandlerdata(const upb_handlers *h,
5189 upb_selector_t s,
5190 upb_fieldtype_t *type,
5191 size_t *offset,
5192 int32_t *hasbit) {
5193 const upb_msg_handlerdata *d;
5194 upb_func *f = upb_handlers_gethandler(h, s);
5195
5196 if ((upb_int64_handlerfunc*)f == upb_msg_setint64) {
5197 *type = UPB_TYPE_INT64;
5198 } else if ((upb_int32_handlerfunc*)f == upb_msg_setint32) {
5199 *type = UPB_TYPE_INT32;
5200 } else if ((upb_uint64_handlerfunc*)f == upb_msg_setuint64) {
5201 *type = UPB_TYPE_UINT64;
5202 } else if ((upb_uint32_handlerfunc*)f == upb_msg_setuint32) {
5203 *type = UPB_TYPE_UINT32;
5204 } else if ((upb_double_handlerfunc*)f == upb_msg_setdouble) {
5205 *type = UPB_TYPE_DOUBLE;
5206 } else if ((upb_float_handlerfunc*)f == upb_msg_setfloat) {
5207 *type = UPB_TYPE_FLOAT;
5208 } else if ((upb_bool_handlerfunc*)f == upb_msg_setbool) {
5209 *type = UPB_TYPE_BOOL;
5210 } else {
5211 return false;
5212 }
5213
5214 d = upb_handlers_gethandlerdata(h, s);
5215 *offset = d->offset;
5216 *hasbit = d->hasbit;
5217 return true;
5218}
Brian Silverman9c614bc2016-02-15 20:20:02 -05005219/*
5220** upb::RefCounted Implementation
5221**
5222** Our key invariants are:
5223** 1. reference cycles never span groups
5224** 2. for ref2(to, from), we increment to's count iff group(from) != group(to)
5225**
5226** The previous two are how we avoid leaking cycles. Other important
5227** invariants are:
5228** 3. for mutable objects "from" and "to", if there exists a ref2(to, from)
5229** this implies group(from) == group(to). (In practice, what we implement
5230** is even stronger; "from" and "to" will share a group if there has *ever*
5231** been a ref2(to, from), but all that is necessary for correctness is the
5232** weaker one).
5233** 4. mutable and immutable objects are never in the same group.
5234*/
5235
5236
5237#include <setjmp.h>
Brian Silverman9c614bc2016-02-15 20:20:02 -05005238
5239static void freeobj(upb_refcounted *o);
5240
5241const char untracked_val;
5242const void *UPB_UNTRACKED_REF = &untracked_val;
5243
5244/* arch-specific atomic primitives *******************************************/
5245
5246#ifdef UPB_THREAD_UNSAFE /*---------------------------------------------------*/
5247
5248static void atomic_inc(uint32_t *a) { (*a)++; }
5249static bool atomic_dec(uint32_t *a) { return --(*a) == 0; }
5250
5251#elif defined(__GNUC__) || defined(__clang__) /*------------------------------*/
5252
5253static void atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); }
5254static bool atomic_dec(uint32_t *a) { return __sync_sub_and_fetch(a, 1) == 0; }
5255
5256#elif defined(WIN32) /*-------------------------------------------------------*/
5257
5258#include <Windows.h>
5259
5260static void atomic_inc(upb_atomic_t *a) { InterlockedIncrement(&a->val); }
5261static bool atomic_dec(upb_atomic_t *a) {
5262 return InterlockedDecrement(&a->val) == 0;
5263}
5264
5265#else
5266#error Atomic primitives not defined for your platform/CPU. \
5267 Implement them or compile with UPB_THREAD_UNSAFE.
5268#endif
5269
5270/* All static objects point to this refcount.
5271 * It is special-cased in ref/unref below. */
5272uint32_t static_refcount = -1;
5273
5274/* We can avoid atomic ops for statically-declared objects.
5275 * This is a minor optimization but nice since we can avoid degrading under
5276 * contention in this case. */
5277
5278static void refgroup(uint32_t *group) {
5279 if (group != &static_refcount)
5280 atomic_inc(group);
5281}
5282
5283static bool unrefgroup(uint32_t *group) {
5284 if (group == &static_refcount) {
5285 return false;
5286 } else {
5287 return atomic_dec(group);
5288 }
5289}
5290
5291
5292/* Reference tracking (debug only) ********************************************/
5293
5294#ifdef UPB_DEBUG_REFS
5295
5296#ifdef UPB_THREAD_UNSAFE
5297
5298static void upb_lock() {}
5299static void upb_unlock() {}
5300
5301#else
5302
5303/* User must define functions that lock/unlock a global mutex and link this
5304 * file against them. */
5305void upb_lock();
5306void upb_unlock();
5307
5308#endif
5309
5310/* UPB_DEBUG_REFS mode counts on being able to malloc() memory in some
5311 * code-paths that can normally never fail, like upb_refcounted_ref(). Since
5312 * we have no way to propagage out-of-memory errors back to the user, and since
Austin Schuh40c16522018-10-28 20:27:54 -07005313 * these errors can only occur in UPB_DEBUG_REFS mode, we use an allocator that
5314 * immediately aborts on failure (avoiding the global allocator, which might
5315 * inject failures). */
5316
5317#include <stdlib.h>
5318
5319static void *upb_debugrefs_allocfunc(upb_alloc *alloc, void *ptr,
5320 size_t oldsize, size_t size) {
5321 UPB_UNUSED(alloc);
5322 UPB_UNUSED(oldsize);
5323 if (size == 0) {
5324 free(ptr);
5325 return NULL;
5326 } else {
5327 void *ret = realloc(ptr, size);
5328
5329 if (!ret) {
5330 abort();
5331 }
5332
5333 return ret;
5334 }
5335}
5336
5337upb_alloc upb_alloc_debugrefs = {&upb_debugrefs_allocfunc};
Brian Silverman9c614bc2016-02-15 20:20:02 -05005338
5339typedef struct {
5340 int count; /* How many refs there are (duplicates only allowed for ref2). */
5341 bool is_ref2;
5342} trackedref;
5343
5344static trackedref *trackedref_new(bool is_ref2) {
Austin Schuh40c16522018-10-28 20:27:54 -07005345 trackedref *ret = upb_malloc(&upb_alloc_debugrefs, sizeof(*ret));
Brian Silverman9c614bc2016-02-15 20:20:02 -05005346 ret->count = 1;
5347 ret->is_ref2 = is_ref2;
5348 return ret;
5349}
5350
5351static void track(const upb_refcounted *r, const void *owner, bool ref2) {
5352 upb_value v;
5353
Austin Schuh40c16522018-10-28 20:27:54 -07005354 UPB_ASSERT(owner);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005355 if (owner == UPB_UNTRACKED_REF) return;
5356
5357 upb_lock();
5358 if (upb_inttable_lookupptr(r->refs, owner, &v)) {
5359 trackedref *ref = upb_value_getptr(v);
5360 /* Since we allow multiple ref2's for the same to/from pair without
5361 * allocating separate memory for each one, we lose the fine-grained
5362 * tracking behavior we get with regular refs. Since ref2s only happen
5363 * inside upb, we'll accept this limitation until/unless there is a really
5364 * difficult upb-internal bug that can't be figured out without it. */
Austin Schuh40c16522018-10-28 20:27:54 -07005365 UPB_ASSERT(ref2);
5366 UPB_ASSERT(ref->is_ref2);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005367 ref->count++;
5368 } else {
5369 trackedref *ref = trackedref_new(ref2);
Austin Schuh40c16522018-10-28 20:27:54 -07005370 upb_inttable_insertptr2(r->refs, owner, upb_value_ptr(ref),
5371 &upb_alloc_debugrefs);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005372 if (ref2) {
5373 /* We know this cast is safe when it is a ref2, because it's coming from
5374 * another refcounted object. */
5375 const upb_refcounted *from = owner;
Austin Schuh40c16522018-10-28 20:27:54 -07005376 UPB_ASSERT(!upb_inttable_lookupptr(from->ref2s, r, NULL));
5377 upb_inttable_insertptr2(from->ref2s, r, upb_value_ptr(NULL),
5378 &upb_alloc_debugrefs);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005379 }
5380 }
5381 upb_unlock();
5382}
5383
5384static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
5385 upb_value v;
5386 bool found;
5387 trackedref *ref;
5388
Austin Schuh40c16522018-10-28 20:27:54 -07005389 UPB_ASSERT(owner);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005390 if (owner == UPB_UNTRACKED_REF) return;
5391
5392 upb_lock();
5393 found = upb_inttable_lookupptr(r->refs, owner, &v);
5394 /* This assert will fail if an owner attempts to release a ref it didn't have. */
Austin Schuh40c16522018-10-28 20:27:54 -07005395 UPB_ASSERT(found);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005396 ref = upb_value_getptr(v);
Austin Schuh40c16522018-10-28 20:27:54 -07005397 UPB_ASSERT(ref->is_ref2 == ref2);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005398 if (--ref->count == 0) {
5399 free(ref);
5400 upb_inttable_removeptr(r->refs, owner, NULL);
5401 if (ref2) {
5402 /* We know this cast is safe when it is a ref2, because it's coming from
5403 * another refcounted object. */
5404 const upb_refcounted *from = owner;
5405 bool removed = upb_inttable_removeptr(from->ref2s, r, NULL);
Austin Schuh40c16522018-10-28 20:27:54 -07005406 UPB_ASSERT(removed);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005407 }
5408 }
5409 upb_unlock();
5410}
5411
5412static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
5413 upb_value v;
5414 bool found;
5415 trackedref *ref;
5416
5417 upb_lock();
5418 found = upb_inttable_lookupptr(r->refs, owner, &v);
Austin Schuh40c16522018-10-28 20:27:54 -07005419 UPB_ASSERT(found);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005420 ref = upb_value_getptr(v);
Austin Schuh40c16522018-10-28 20:27:54 -07005421 UPB_ASSERT(ref->is_ref2 == ref2);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005422 upb_unlock();
5423}
5424
5425/* Populates the given UPB_CTYPE_INT32 inttable with counts of ref2's that
5426 * originate from the given owner. */
5427static void getref2s(const upb_refcounted *owner, upb_inttable *tab) {
5428 upb_inttable_iter i;
5429
5430 upb_lock();
5431 upb_inttable_begin(&i, owner->ref2s);
5432 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
5433 upb_value v;
5434 upb_value count;
5435 trackedref *ref;
Brian Silverman9c614bc2016-02-15 20:20:02 -05005436 bool found;
5437
5438 upb_refcounted *to = (upb_refcounted*)upb_inttable_iter_key(&i);
5439
5440 /* To get the count we need to look in the target's table. */
5441 found = upb_inttable_lookupptr(to->refs, owner, &v);
Austin Schuh40c16522018-10-28 20:27:54 -07005442 UPB_ASSERT(found);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005443 ref = upb_value_getptr(v);
5444 count = upb_value_int32(ref->count);
5445
Austin Schuh40c16522018-10-28 20:27:54 -07005446 upb_inttable_insertptr2(tab, to, count, &upb_alloc_debugrefs);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005447 }
5448 upb_unlock();
5449}
5450
5451typedef struct {
5452 upb_inttable ref2;
5453 const upb_refcounted *obj;
5454} check_state;
5455
5456static void visit_check(const upb_refcounted *obj, const upb_refcounted *subobj,
5457 void *closure) {
5458 check_state *s = closure;
5459 upb_inttable *ref2 = &s->ref2;
5460 upb_value v;
5461 bool removed;
5462 int32_t newcount;
5463
Austin Schuh40c16522018-10-28 20:27:54 -07005464 UPB_ASSERT(obj == s->obj);
5465 UPB_ASSERT(subobj);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005466 removed = upb_inttable_removeptr(ref2, subobj, &v);
5467 /* The following assertion will fail if the visit() function visits a subobj
5468 * that it did not have a ref2 on, or visits the same subobj too many times. */
Austin Schuh40c16522018-10-28 20:27:54 -07005469 UPB_ASSERT(removed);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005470 newcount = upb_value_getint32(v) - 1;
5471 if (newcount > 0) {
Austin Schuh40c16522018-10-28 20:27:54 -07005472 upb_inttable_insert2(ref2, (uintptr_t)subobj, upb_value_int32(newcount),
5473 &upb_alloc_debugrefs);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005474 }
5475}
5476
5477static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
5478 void *closure) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05005479 /* In DEBUG_REFS mode we know what existing ref2 refs there are, so we know
5480 * exactly the set of nodes that visit() should visit. So we verify visit()'s
5481 * correctness here. */
5482 check_state state;
5483 state.obj = r;
Austin Schuh40c16522018-10-28 20:27:54 -07005484 upb_inttable_init2(&state.ref2, UPB_CTYPE_INT32, &upb_alloc_debugrefs);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005485 getref2s(r, &state.ref2);
5486
5487 /* This should visit any children in the ref2 table. */
5488 if (r->vtbl->visit) r->vtbl->visit(r, visit_check, &state);
5489
5490 /* This assertion will fail if the visit() function missed any children. */
Austin Schuh40c16522018-10-28 20:27:54 -07005491 UPB_ASSERT(upb_inttable_count(&state.ref2) == 0);
5492 upb_inttable_uninit2(&state.ref2, &upb_alloc_debugrefs);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005493 if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
5494}
5495
Austin Schuh40c16522018-10-28 20:27:54 -07005496static void trackinit(upb_refcounted *r) {
5497 r->refs = upb_malloc(&upb_alloc_debugrefs, sizeof(*r->refs));
5498 r->ref2s = upb_malloc(&upb_alloc_debugrefs, sizeof(*r->ref2s));
5499 upb_inttable_init2(r->refs, UPB_CTYPE_PTR, &upb_alloc_debugrefs);
5500 upb_inttable_init2(r->ref2s, UPB_CTYPE_PTR, &upb_alloc_debugrefs);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005501}
5502
5503static void trackfree(const upb_refcounted *r) {
Austin Schuh40c16522018-10-28 20:27:54 -07005504 upb_inttable_uninit2(r->refs, &upb_alloc_debugrefs);
5505 upb_inttable_uninit2(r->ref2s, &upb_alloc_debugrefs);
5506 upb_free(&upb_alloc_debugrefs, r->refs);
5507 upb_free(&upb_alloc_debugrefs, r->ref2s);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005508}
5509
5510#else
5511
5512static void track(const upb_refcounted *r, const void *owner, bool ref2) {
5513 UPB_UNUSED(r);
5514 UPB_UNUSED(owner);
5515 UPB_UNUSED(ref2);
5516}
5517
5518static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
5519 UPB_UNUSED(r);
5520 UPB_UNUSED(owner);
5521 UPB_UNUSED(ref2);
5522}
5523
5524static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
5525 UPB_UNUSED(r);
5526 UPB_UNUSED(owner);
5527 UPB_UNUSED(ref2);
5528}
5529
Austin Schuh40c16522018-10-28 20:27:54 -07005530static void trackinit(upb_refcounted *r) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05005531 UPB_UNUSED(r);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005532}
5533
5534static void trackfree(const upb_refcounted *r) {
5535 UPB_UNUSED(r);
5536}
5537
5538static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
5539 void *closure) {
5540 if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
5541}
5542
5543#endif /* UPB_DEBUG_REFS */
5544
5545
5546/* freeze() *******************************************************************/
5547
5548/* The freeze() operation is by far the most complicated part of this scheme.
5549 * We compute strongly-connected components and then mutate the graph such that
5550 * we preserve the invariants documented at the top of this file. And we must
5551 * handle out-of-memory errors gracefully (without leaving the graph
5552 * inconsistent), which adds to the fun. */
5553
5554/* The state used by the freeze operation (shared across many functions). */
5555typedef struct {
5556 int depth;
5557 int maxdepth;
5558 uint64_t index;
5559 /* Maps upb_refcounted* -> attributes (color, etc). attr layout varies by
5560 * color. */
5561 upb_inttable objattr;
5562 upb_inttable stack; /* stack of upb_refcounted* for Tarjan's algorithm. */
5563 upb_inttable groups; /* array of uint32_t*, malloc'd refcounts for new groups */
5564 upb_status *status;
5565 jmp_buf err;
5566} tarjan;
5567
5568static void release_ref2(const upb_refcounted *obj,
5569 const upb_refcounted *subobj,
5570 void *closure);
5571
5572/* Node attributes -----------------------------------------------------------*/
5573
5574/* After our analysis phase all nodes will be either GRAY or WHITE. */
5575
5576typedef enum {
5577 BLACK = 0, /* Object has not been seen. */
5578 GRAY, /* Object has been found via a refgroup but may not be reachable. */
5579 GREEN, /* Object is reachable and is currently on the Tarjan stack. */
5580 WHITE /* Object is reachable and has been assigned a group (SCC). */
5581} color_t;
5582
5583UPB_NORETURN static void err(tarjan *t) { longjmp(t->err, 1); }
5584UPB_NORETURN static void oom(tarjan *t) {
5585 upb_status_seterrmsg(t->status, "out of memory");
5586 err(t);
5587}
5588
5589static uint64_t trygetattr(const tarjan *t, const upb_refcounted *r) {
5590 upb_value v;
5591 return upb_inttable_lookupptr(&t->objattr, r, &v) ?
5592 upb_value_getuint64(v) : 0;
5593}
5594
5595static uint64_t getattr(const tarjan *t, const upb_refcounted *r) {
5596 upb_value v;
5597 bool found = upb_inttable_lookupptr(&t->objattr, r, &v);
Austin Schuh40c16522018-10-28 20:27:54 -07005598 UPB_ASSERT(found);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005599 return upb_value_getuint64(v);
5600}
5601
5602static void setattr(tarjan *t, const upb_refcounted *r, uint64_t attr) {
5603 upb_inttable_removeptr(&t->objattr, r, NULL);
5604 upb_inttable_insertptr(&t->objattr, r, upb_value_uint64(attr));
5605}
5606
5607static color_t color(tarjan *t, const upb_refcounted *r) {
5608 return trygetattr(t, r) & 0x3; /* Color is always stored in the low 2 bits. */
5609}
5610
5611static void set_gray(tarjan *t, const upb_refcounted *r) {
Austin Schuh40c16522018-10-28 20:27:54 -07005612 UPB_ASSERT(color(t, r) == BLACK);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005613 setattr(t, r, GRAY);
5614}
5615
5616/* Pushes an obj onto the Tarjan stack and sets it to GREEN. */
5617static void push(tarjan *t, const upb_refcounted *r) {
Austin Schuh40c16522018-10-28 20:27:54 -07005618 UPB_ASSERT(color(t, r) == BLACK || color(t, r) == GRAY);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005619 /* This defines the attr layout for the GREEN state. "index" and "lowlink"
5620 * get 31 bits, which is plenty (limit of 2B objects frozen at a time). */
5621 setattr(t, r, GREEN | (t->index << 2) | (t->index << 33));
5622 if (++t->index == 0x80000000) {
5623 upb_status_seterrmsg(t->status, "too many objects to freeze");
5624 err(t);
5625 }
5626 upb_inttable_push(&t->stack, upb_value_ptr((void*)r));
5627}
5628
5629/* Pops an obj from the Tarjan stack and sets it to WHITE, with a ptr to its
5630 * SCC group. */
5631static upb_refcounted *pop(tarjan *t) {
5632 upb_refcounted *r = upb_value_getptr(upb_inttable_pop(&t->stack));
Austin Schuh40c16522018-10-28 20:27:54 -07005633 UPB_ASSERT(color(t, r) == GREEN);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005634 /* This defines the attr layout for nodes in the WHITE state.
5635 * Top of group stack is [group, NULL]; we point at group. */
5636 setattr(t, r, WHITE | (upb_inttable_count(&t->groups) - 2) << 8);
5637 return r;
5638}
5639
5640static void tarjan_newgroup(tarjan *t) {
Austin Schuh40c16522018-10-28 20:27:54 -07005641 uint32_t *group = upb_gmalloc(sizeof(*group));
Brian Silverman9c614bc2016-02-15 20:20:02 -05005642 if (!group) oom(t);
5643 /* Push group and empty group leader (we'll fill in leader later). */
5644 if (!upb_inttable_push(&t->groups, upb_value_ptr(group)) ||
5645 !upb_inttable_push(&t->groups, upb_value_ptr(NULL))) {
Austin Schuh40c16522018-10-28 20:27:54 -07005646 upb_gfree(group);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005647 oom(t);
5648 }
5649 *group = 0;
5650}
5651
5652static uint32_t idx(tarjan *t, const upb_refcounted *r) {
Austin Schuh40c16522018-10-28 20:27:54 -07005653 UPB_ASSERT(color(t, r) == GREEN);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005654 return (getattr(t, r) >> 2) & 0x7FFFFFFF;
5655}
5656
5657static uint32_t lowlink(tarjan *t, const upb_refcounted *r) {
5658 if (color(t, r) == GREEN) {
5659 return getattr(t, r) >> 33;
5660 } else {
5661 return UINT32_MAX;
5662 }
5663}
5664
5665static void set_lowlink(tarjan *t, const upb_refcounted *r, uint32_t lowlink) {
Austin Schuh40c16522018-10-28 20:27:54 -07005666 UPB_ASSERT(color(t, r) == GREEN);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005667 setattr(t, r, ((uint64_t)lowlink << 33) | (getattr(t, r) & 0x1FFFFFFFF));
5668}
5669
5670static uint32_t *group(tarjan *t, upb_refcounted *r) {
5671 uint64_t groupnum;
5672 upb_value v;
5673 bool found;
5674
Austin Schuh40c16522018-10-28 20:27:54 -07005675 UPB_ASSERT(color(t, r) == WHITE);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005676 groupnum = getattr(t, r) >> 8;
5677 found = upb_inttable_lookup(&t->groups, groupnum, &v);
Austin Schuh40c16522018-10-28 20:27:54 -07005678 UPB_ASSERT(found);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005679 return upb_value_getptr(v);
5680}
5681
5682/* If the group leader for this object's group has not previously been set,
5683 * the given object is assigned to be its leader. */
5684static upb_refcounted *groupleader(tarjan *t, upb_refcounted *r) {
5685 uint64_t leader_slot;
5686 upb_value v;
5687 bool found;
5688
Austin Schuh40c16522018-10-28 20:27:54 -07005689 UPB_ASSERT(color(t, r) == WHITE);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005690 leader_slot = (getattr(t, r) >> 8) + 1;
5691 found = upb_inttable_lookup(&t->groups, leader_slot, &v);
Austin Schuh40c16522018-10-28 20:27:54 -07005692 UPB_ASSERT(found);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005693 if (upb_value_getptr(v)) {
5694 return upb_value_getptr(v);
5695 } else {
5696 upb_inttable_remove(&t->groups, leader_slot, NULL);
5697 upb_inttable_insert(&t->groups, leader_slot, upb_value_ptr(r));
5698 return r;
5699 }
5700}
5701
5702
5703/* Tarjan's algorithm --------------------------------------------------------*/
5704
5705/* See:
5706 * http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm */
5707static void do_tarjan(const upb_refcounted *obj, tarjan *t);
5708
5709static void tarjan_visit(const upb_refcounted *obj,
5710 const upb_refcounted *subobj,
5711 void *closure) {
5712 tarjan *t = closure;
5713 if (++t->depth > t->maxdepth) {
5714 upb_status_seterrf(t->status, "graph too deep to freeze (%d)", t->maxdepth);
5715 err(t);
5716 } else if (subobj->is_frozen || color(t, subobj) == WHITE) {
5717 /* Do nothing: we don't want to visit or color already-frozen nodes,
5718 * and WHITE nodes have already been assigned a SCC. */
5719 } else if (color(t, subobj) < GREEN) {
5720 /* Subdef has not yet been visited; recurse on it. */
5721 do_tarjan(subobj, t);
5722 set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), lowlink(t, subobj)));
5723 } else if (color(t, subobj) == GREEN) {
5724 /* Subdef is in the stack and hence in the current SCC. */
5725 set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), idx(t, subobj)));
5726 }
5727 --t->depth;
5728}
5729
5730static void do_tarjan(const upb_refcounted *obj, tarjan *t) {
5731 if (color(t, obj) == BLACK) {
5732 /* We haven't seen this object's group; mark the whole group GRAY. */
5733 const upb_refcounted *o = obj;
5734 do { set_gray(t, o); } while ((o = o->next) != obj);
5735 }
5736
5737 push(t, obj);
5738 visit(obj, tarjan_visit, t);
5739 if (lowlink(t, obj) == idx(t, obj)) {
5740 tarjan_newgroup(t);
5741 while (pop(t) != obj)
5742 ;
5743 }
5744}
5745
5746
5747/* freeze() ------------------------------------------------------------------*/
5748
5749static void crossref(const upb_refcounted *r, const upb_refcounted *subobj,
5750 void *_t) {
5751 tarjan *t = _t;
Austin Schuh40c16522018-10-28 20:27:54 -07005752 UPB_ASSERT(color(t, r) > BLACK);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005753 if (color(t, subobj) > BLACK && r->group != subobj->group) {
5754 /* Previously this ref was not reflected in subobj->group because they
5755 * were in the same group; now that they are split a ref must be taken. */
5756 refgroup(subobj->group);
5757 }
5758}
5759
5760static bool freeze(upb_refcounted *const*roots, int n, upb_status *s,
5761 int maxdepth) {
5762 volatile bool ret = false;
5763 int i;
5764 upb_inttable_iter iter;
5765
5766 /* We run in two passes so that we can allocate all memory before performing
5767 * any mutation of the input -- this allows us to leave the input unchanged
5768 * in the case of memory allocation failure. */
5769 tarjan t;
5770 t.index = 0;
5771 t.depth = 0;
5772 t.maxdepth = maxdepth;
5773 t.status = s;
5774 if (!upb_inttable_init(&t.objattr, UPB_CTYPE_UINT64)) goto err1;
5775 if (!upb_inttable_init(&t.stack, UPB_CTYPE_PTR)) goto err2;
5776 if (!upb_inttable_init(&t.groups, UPB_CTYPE_PTR)) goto err3;
5777 if (setjmp(t.err) != 0) goto err4;
5778
5779
5780 for (i = 0; i < n; i++) {
5781 if (color(&t, roots[i]) < GREEN) {
5782 do_tarjan(roots[i], &t);
5783 }
5784 }
5785
5786 /* If we've made it this far, no further errors are possible so it's safe to
5787 * mutate the objects without risk of leaving them in an inconsistent state. */
5788 ret = true;
5789
5790 /* The transformation that follows requires care. The preconditions are:
5791 * - all objects in attr map are WHITE or GRAY, and are in mutable groups
5792 * (groups of all mutable objs)
5793 * - no ref2(to, from) refs have incremented count(to) if both "to" and
5794 * "from" are in our attr map (this follows from invariants (2) and (3)) */
5795
5796 /* Pass 1: we remove WHITE objects from their mutable groups, and add them to
5797 * new groups according to the SCC's we computed. These new groups will
5798 * consist of only frozen objects. None will be immediately collectible,
5799 * because WHITE objects are by definition reachable from one of "roots",
5800 * which the caller must own refs on. */
5801 upb_inttable_begin(&iter, &t.objattr);
5802 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
5803 upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
5804 /* Since removal from a singly-linked list requires access to the object's
5805 * predecessor, we consider obj->next instead of obj for moving. With the
5806 * while() loop we guarantee that we will visit every node's predecessor.
5807 * Proof:
5808 * 1. every node's predecessor is in our attr map.
5809 * 2. though the loop body may change a node's predecessor, it will only
5810 * change it to be the node we are currently operating on, so with a
5811 * while() loop we guarantee ourselves the chance to remove each node. */
5812 while (color(&t, obj->next) == WHITE &&
5813 group(&t, obj->next) != obj->next->group) {
5814 upb_refcounted *leader;
5815
5816 /* Remove from old group. */
5817 upb_refcounted *move = obj->next;
5818 if (obj == move) {
5819 /* Removing the last object from a group. */
Austin Schuh40c16522018-10-28 20:27:54 -07005820 UPB_ASSERT(*obj->group == obj->individual_count);
5821 upb_gfree(obj->group);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005822 } else {
5823 obj->next = move->next;
5824 /* This may decrease to zero; we'll collect GRAY objects (if any) that
5825 * remain in the group in the third pass. */
Austin Schuh40c16522018-10-28 20:27:54 -07005826 UPB_ASSERT(*move->group >= move->individual_count);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005827 *move->group -= move->individual_count;
5828 }
5829
5830 /* Add to new group. */
5831 leader = groupleader(&t, move);
5832 if (move == leader) {
5833 /* First object added to new group is its leader. */
5834 move->group = group(&t, move);
5835 move->next = move;
5836 *move->group = move->individual_count;
5837 } else {
5838 /* Group already has at least one object in it. */
Austin Schuh40c16522018-10-28 20:27:54 -07005839 UPB_ASSERT(leader->group == group(&t, move));
Brian Silverman9c614bc2016-02-15 20:20:02 -05005840 move->group = group(&t, move);
5841 move->next = leader->next;
5842 leader->next = move;
5843 *move->group += move->individual_count;
5844 }
5845
5846 move->is_frozen = true;
5847 }
5848 }
5849
5850 /* Pass 2: GRAY and WHITE objects "obj" with ref2(to, obj) references must
5851 * increment count(to) if group(obj) != group(to) (which could now be the
5852 * case if "to" was just frozen). */
5853 upb_inttable_begin(&iter, &t.objattr);
5854 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
5855 upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
5856 visit(obj, crossref, &t);
5857 }
5858
5859 /* Pass 3: GRAY objects are collected if their group's refcount dropped to
5860 * zero when we removed its white nodes. This can happen if they had only
5861 * been kept alive by virtue of sharing a group with an object that was just
5862 * frozen.
5863 *
5864 * It is important that we do this last, since the GRAY object's free()
5865 * function could call unref2() on just-frozen objects, which will decrement
5866 * refs that were added in pass 2. */
5867 upb_inttable_begin(&iter, &t.objattr);
5868 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
5869 upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
5870 if (obj->group == NULL || *obj->group == 0) {
5871 if (obj->group) {
5872 upb_refcounted *o;
5873
5874 /* We eagerly free() the group's count (since we can't easily determine
5875 * the group's remaining size it's the easiest way to ensure it gets
5876 * done). */
Austin Schuh40c16522018-10-28 20:27:54 -07005877 upb_gfree(obj->group);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005878
5879 /* Visit to release ref2's (done in a separate pass since release_ref2
5880 * depends on o->group being unmodified so it can test merged()). */
5881 o = obj;
5882 do { visit(o, release_ref2, NULL); } while ((o = o->next) != obj);
5883
5884 /* Mark "group" fields as NULL so we know to free the objects later in
5885 * this loop, but also don't try to delete the group twice. */
5886 o = obj;
5887 do { o->group = NULL; } while ((o = o->next) != obj);
5888 }
5889 freeobj(obj);
5890 }
5891 }
5892
5893err4:
5894 if (!ret) {
5895 upb_inttable_begin(&iter, &t.groups);
5896 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter))
Austin Schuh40c16522018-10-28 20:27:54 -07005897 upb_gfree(upb_value_getptr(upb_inttable_iter_value(&iter)));
Brian Silverman9c614bc2016-02-15 20:20:02 -05005898 }
5899 upb_inttable_uninit(&t.groups);
5900err3:
5901 upb_inttable_uninit(&t.stack);
5902err2:
5903 upb_inttable_uninit(&t.objattr);
5904err1:
5905 return ret;
5906}
5907
5908
5909/* Misc internal functions ***************************************************/
5910
5911static bool merged(const upb_refcounted *r, const upb_refcounted *r2) {
5912 return r->group == r2->group;
5913}
5914
5915static void merge(upb_refcounted *r, upb_refcounted *from) {
5916 upb_refcounted *base;
5917 upb_refcounted *tmp;
5918
5919 if (merged(r, from)) return;
5920 *r->group += *from->group;
Austin Schuh40c16522018-10-28 20:27:54 -07005921 upb_gfree(from->group);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005922 base = from;
5923
5924 /* Set all refcount pointers in the "from" chain to the merged refcount.
5925 *
5926 * TODO(haberman): this linear algorithm can result in an overall O(n^2) bound
5927 * if the user continuously extends a group by one object. Prevent this by
5928 * using one of the techniques in this paper:
Austin Schuh40c16522018-10-28 20:27:54 -07005929 * http://bioinfo.ict.ac.cn/~dbu/AlgorithmCourses/Lectures/Union-Find-Tarjan.pdf */
Brian Silverman9c614bc2016-02-15 20:20:02 -05005930 do { from->group = r->group; } while ((from = from->next) != base);
5931
5932 /* Merge the two circularly linked lists by swapping their next pointers. */
5933 tmp = r->next;
5934 r->next = base->next;
5935 base->next = tmp;
5936}
5937
5938static void unref(const upb_refcounted *r);
5939
5940static void release_ref2(const upb_refcounted *obj,
5941 const upb_refcounted *subobj,
5942 void *closure) {
5943 UPB_UNUSED(closure);
5944 untrack(subobj, obj, true);
5945 if (!merged(obj, subobj)) {
Austin Schuh40c16522018-10-28 20:27:54 -07005946 UPB_ASSERT(subobj->is_frozen);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005947 unref(subobj);
5948 }
5949}
5950
5951static void unref(const upb_refcounted *r) {
5952 if (unrefgroup(r->group)) {
5953 const upb_refcounted *o;
5954
Austin Schuh40c16522018-10-28 20:27:54 -07005955 upb_gfree(r->group);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005956
5957 /* In two passes, since release_ref2 needs a guarantee that any subobjs
5958 * are alive. */
5959 o = r;
5960 do { visit(o, release_ref2, NULL); } while((o = o->next) != r);
5961
5962 o = r;
5963 do {
5964 const upb_refcounted *next = o->next;
Austin Schuh40c16522018-10-28 20:27:54 -07005965 UPB_ASSERT(o->is_frozen || o->individual_count == 0);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005966 freeobj((upb_refcounted*)o);
5967 o = next;
5968 } while(o != r);
5969 }
5970}
5971
5972static void freeobj(upb_refcounted *o) {
5973 trackfree(o);
5974 o->vtbl->free((upb_refcounted*)o);
5975}
5976
5977
5978/* Public interface ***********************************************************/
5979
5980bool upb_refcounted_init(upb_refcounted *r,
5981 const struct upb_refcounted_vtbl *vtbl,
5982 const void *owner) {
5983#ifndef NDEBUG
5984 /* Endianness check. This is unrelated to upb_refcounted, it's just a
5985 * convenient place to put the check that we can be assured will run for
5986 * basically every program using upb. */
5987 const int x = 1;
5988#ifdef UPB_BIG_ENDIAN
Austin Schuh40c16522018-10-28 20:27:54 -07005989 UPB_ASSERT(*(char*)&x != 1);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005990#else
Austin Schuh40c16522018-10-28 20:27:54 -07005991 UPB_ASSERT(*(char*)&x == 1);
Brian Silverman9c614bc2016-02-15 20:20:02 -05005992#endif
5993#endif
5994
5995 r->next = r;
5996 r->vtbl = vtbl;
5997 r->individual_count = 0;
5998 r->is_frozen = false;
Austin Schuh40c16522018-10-28 20:27:54 -07005999 r->group = upb_gmalloc(sizeof(*r->group));
Brian Silverman9c614bc2016-02-15 20:20:02 -05006000 if (!r->group) return false;
6001 *r->group = 0;
Austin Schuh40c16522018-10-28 20:27:54 -07006002 trackinit(r);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006003 upb_refcounted_ref(r, owner);
6004 return true;
6005}
6006
6007bool upb_refcounted_isfrozen(const upb_refcounted *r) {
6008 return r->is_frozen;
6009}
6010
6011void upb_refcounted_ref(const upb_refcounted *r, const void *owner) {
6012 track(r, owner, false);
6013 if (!r->is_frozen)
6014 ((upb_refcounted*)r)->individual_count++;
6015 refgroup(r->group);
6016}
6017
6018void upb_refcounted_unref(const upb_refcounted *r, const void *owner) {
6019 untrack(r, owner, false);
6020 if (!r->is_frozen)
6021 ((upb_refcounted*)r)->individual_count--;
6022 unref(r);
6023}
6024
6025void upb_refcounted_ref2(const upb_refcounted *r, upb_refcounted *from) {
Austin Schuh40c16522018-10-28 20:27:54 -07006026 UPB_ASSERT(!from->is_frozen); /* Non-const pointer implies this. */
Brian Silverman9c614bc2016-02-15 20:20:02 -05006027 track(r, from, true);
6028 if (r->is_frozen) {
6029 refgroup(r->group);
6030 } else {
6031 merge((upb_refcounted*)r, from);
6032 }
6033}
6034
6035void upb_refcounted_unref2(const upb_refcounted *r, upb_refcounted *from) {
Austin Schuh40c16522018-10-28 20:27:54 -07006036 UPB_ASSERT(!from->is_frozen); /* Non-const pointer implies this. */
Brian Silverman9c614bc2016-02-15 20:20:02 -05006037 untrack(r, from, true);
6038 if (r->is_frozen) {
6039 unref(r);
6040 } else {
Austin Schuh40c16522018-10-28 20:27:54 -07006041 UPB_ASSERT(merged(r, from));
Brian Silverman9c614bc2016-02-15 20:20:02 -05006042 }
6043}
6044
6045void upb_refcounted_donateref(
6046 const upb_refcounted *r, const void *from, const void *to) {
Austin Schuh40c16522018-10-28 20:27:54 -07006047 UPB_ASSERT(from != to);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006048 if (to != NULL)
6049 upb_refcounted_ref(r, to);
6050 if (from != NULL)
6051 upb_refcounted_unref(r, from);
6052}
6053
6054void upb_refcounted_checkref(const upb_refcounted *r, const void *owner) {
6055 checkref(r, owner, false);
6056}
6057
6058bool upb_refcounted_freeze(upb_refcounted *const*roots, int n, upb_status *s,
6059 int maxdepth) {
6060 int i;
Austin Schuh40c16522018-10-28 20:27:54 -07006061 bool ret;
Brian Silverman9c614bc2016-02-15 20:20:02 -05006062 for (i = 0; i < n; i++) {
Austin Schuh40c16522018-10-28 20:27:54 -07006063 UPB_ASSERT(!roots[i]->is_frozen);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006064 }
Austin Schuh40c16522018-10-28 20:27:54 -07006065 ret = freeze(roots, n, s, maxdepth);
6066 UPB_ASSERT(!s || ret == upb_ok(s));
Brian Silverman9c614bc2016-02-15 20:20:02 -05006067 return ret;
6068}
6069
Brian Silverman9c614bc2016-02-15 20:20:02 -05006070
Austin Schuh40c16522018-10-28 20:27:54 -07006071bool upb_bufsrc_putbuf(const char *buf, size_t len, upb_bytessink *sink) {
6072 void *subc;
6073 bool ret;
6074 upb_bufhandle handle;
6075 upb_bufhandle_init(&handle);
6076 upb_bufhandle_setbuf(&handle, buf, 0);
6077 ret = upb_bytessink_start(sink, len, &subc);
6078 if (ret && len != 0) {
6079 ret = (upb_bytessink_putbuf(sink, subc, buf, len, &handle) >= len);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006080 }
Austin Schuh40c16522018-10-28 20:27:54 -07006081 if (ret) {
6082 ret = upb_bytessink_end(sink);
6083 }
6084 upb_bufhandle_uninit(&handle);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006085 return ret;
6086}
6087
Austin Schuh40c16522018-10-28 20:27:54 -07006088struct upb_bufsink {
6089 upb_byteshandler handler;
6090 upb_bytessink sink;
6091 upb_env *env;
6092 char *ptr;
6093 size_t len, size;
6094};
Brian Silverman9c614bc2016-02-15 20:20:02 -05006095
Austin Schuh40c16522018-10-28 20:27:54 -07006096static void *upb_bufsink_start(void *_sink, const void *hd, size_t size_hint) {
6097 upb_bufsink *sink = _sink;
6098 UPB_UNUSED(hd);
6099 UPB_UNUSED(size_hint);
6100 sink->len = 0;
6101 return sink;
Brian Silverman9c614bc2016-02-15 20:20:02 -05006102}
6103
Austin Schuh40c16522018-10-28 20:27:54 -07006104static size_t upb_bufsink_string(void *_sink, const void *hd, const char *ptr,
6105 size_t len, const upb_bufhandle *handle) {
6106 upb_bufsink *sink = _sink;
6107 size_t new_size = sink->size;
Brian Silverman9c614bc2016-02-15 20:20:02 -05006108
Austin Schuh40c16522018-10-28 20:27:54 -07006109 UPB_ASSERT(new_size > 0);
6110 UPB_UNUSED(hd);
6111 UPB_UNUSED(handle);
6112
6113 while (sink->len + len > new_size) {
6114 new_size *= 2;
Brian Silverman9c614bc2016-02-15 20:20:02 -05006115 }
6116
Austin Schuh40c16522018-10-28 20:27:54 -07006117 if (new_size != sink->size) {
6118 sink->ptr = upb_env_realloc(sink->env, sink->ptr, sink->size, new_size);
6119 sink->size = new_size;
Brian Silverman9c614bc2016-02-15 20:20:02 -05006120 }
6121
Austin Schuh40c16522018-10-28 20:27:54 -07006122 memcpy(sink->ptr + sink->len, ptr, len);
6123 sink->len += len;
Brian Silverman9c614bc2016-02-15 20:20:02 -05006124
Austin Schuh40c16522018-10-28 20:27:54 -07006125 return len;
Brian Silverman9c614bc2016-02-15 20:20:02 -05006126}
6127
Austin Schuh40c16522018-10-28 20:27:54 -07006128upb_bufsink *upb_bufsink_new(upb_env *env) {
6129 upb_bufsink *sink = upb_env_malloc(env, sizeof(upb_bufsink));
6130 upb_byteshandler_init(&sink->handler);
6131 upb_byteshandler_setstartstr(&sink->handler, upb_bufsink_start, NULL);
6132 upb_byteshandler_setstring(&sink->handler, upb_bufsink_string, NULL);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006133
Austin Schuh40c16522018-10-28 20:27:54 -07006134 upb_bytessink_reset(&sink->sink, &sink->handler, sink);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006135
Austin Schuh40c16522018-10-28 20:27:54 -07006136 sink->env = env;
6137 sink->size = 32;
6138 sink->ptr = upb_env_malloc(env, sink->size);
6139 sink->len = 0;
6140
6141 return sink;
Brian Silverman9c614bc2016-02-15 20:20:02 -05006142}
6143
Austin Schuh40c16522018-10-28 20:27:54 -07006144void upb_bufsink_free(upb_bufsink *sink) {
6145 upb_env_free(sink->env, sink->ptr);
6146 upb_env_free(sink->env, sink);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006147}
6148
Austin Schuh40c16522018-10-28 20:27:54 -07006149upb_bytessink *upb_bufsink_sink(upb_bufsink *sink) {
6150 return &sink->sink;
Brian Silverman9c614bc2016-02-15 20:20:02 -05006151}
6152
Austin Schuh40c16522018-10-28 20:27:54 -07006153const char *upb_bufsink_getdata(const upb_bufsink *sink, size_t *len) {
6154 *len = sink->len;
6155 return sink->ptr;
Brian Silverman9c614bc2016-02-15 20:20:02 -05006156}
6157/*
6158** upb_table Implementation
6159**
6160** Implementation is heavily inspired by Lua's ltable.c.
6161*/
6162
6163
Brian Silverman9c614bc2016-02-15 20:20:02 -05006164#include <string.h>
6165
6166#define UPB_MAXARRSIZE 16 /* 64k. */
6167
6168/* From Chromium. */
6169#define ARRAY_SIZE(x) \
6170 ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x])))))
6171
Austin Schuh40c16522018-10-28 20:27:54 -07006172static void upb_check_alloc(upb_table *t, upb_alloc *a) {
6173 UPB_UNUSED(t);
6174 UPB_UNUSED(a);
6175 UPB_ASSERT_DEBUGVAR(t->alloc == a);
6176}
6177
Brian Silverman9c614bc2016-02-15 20:20:02 -05006178static const double MAX_LOAD = 0.85;
6179
6180/* The minimum utilization of the array part of a mixed hash/array table. This
6181 * is a speed/memory-usage tradeoff (though it's not straightforward because of
6182 * cache effects). The lower this is, the more memory we'll use. */
6183static const double MIN_DENSITY = 0.1;
6184
6185bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
6186
6187int log2ceil(uint64_t v) {
6188 int ret = 0;
6189 bool pow2 = is_pow2(v);
6190 while (v >>= 1) ret++;
6191 ret = pow2 ? ret : ret + 1; /* Ceiling. */
6192 return UPB_MIN(UPB_MAXARRSIZE, ret);
6193}
6194
Austin Schuh40c16522018-10-28 20:27:54 -07006195char *upb_strdup(const char *s, upb_alloc *a) {
6196 return upb_strdup2(s, strlen(s), a);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006197}
6198
Austin Schuh40c16522018-10-28 20:27:54 -07006199char *upb_strdup2(const char *s, size_t len, upb_alloc *a) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05006200 size_t n;
6201 char *p;
6202
6203 /* Prevent overflow errors. */
6204 if (len == SIZE_MAX) return NULL;
6205 /* Always null-terminate, even if binary data; but don't rely on the input to
6206 * have a null-terminating byte since it may be a raw binary buffer. */
6207 n = len + 1;
Austin Schuh40c16522018-10-28 20:27:54 -07006208 p = upb_malloc(a, n);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006209 if (p) {
6210 memcpy(p, s, len);
6211 p[len] = 0;
6212 }
6213 return p;
6214}
6215
6216/* A type to represent the lookup key of either a strtable or an inttable. */
6217typedef union {
6218 uintptr_t num;
6219 struct {
6220 const char *str;
6221 size_t len;
6222 } str;
6223} lookupkey_t;
6224
6225static lookupkey_t strkey2(const char *str, size_t len) {
6226 lookupkey_t k;
6227 k.str.str = str;
6228 k.str.len = len;
6229 return k;
6230}
6231
6232static lookupkey_t intkey(uintptr_t key) {
6233 lookupkey_t k;
6234 k.num = key;
6235 return k;
6236}
6237
6238typedef uint32_t hashfunc_t(upb_tabkey key);
6239typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
6240
6241/* Base table (shared code) ***************************************************/
6242
6243/* For when we need to cast away const. */
6244static upb_tabent *mutable_entries(upb_table *t) {
6245 return (upb_tabent*)t->entries;
6246}
6247
6248static bool isfull(upb_table *t) {
Austin Schuh40c16522018-10-28 20:27:54 -07006249 if (upb_table_size(t) == 0) {
6250 return true;
6251 } else {
6252 return ((double)(t->count + 1) / upb_table_size(t)) > MAX_LOAD;
6253 }
Brian Silverman9c614bc2016-02-15 20:20:02 -05006254}
6255
Austin Schuh40c16522018-10-28 20:27:54 -07006256static bool init(upb_table *t, upb_ctype_t ctype, uint8_t size_lg2,
6257 upb_alloc *a) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05006258 size_t bytes;
6259
6260 t->count = 0;
6261 t->ctype = ctype;
6262 t->size_lg2 = size_lg2;
6263 t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
Austin Schuh40c16522018-10-28 20:27:54 -07006264#ifndef NDEBUG
6265 t->alloc = a;
6266#endif
Brian Silverman9c614bc2016-02-15 20:20:02 -05006267 bytes = upb_table_size(t) * sizeof(upb_tabent);
6268 if (bytes > 0) {
Austin Schuh40c16522018-10-28 20:27:54 -07006269 t->entries = upb_malloc(a, bytes);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006270 if (!t->entries) return false;
6271 memset(mutable_entries(t), 0, bytes);
6272 } else {
6273 t->entries = NULL;
6274 }
6275 return true;
6276}
6277
Austin Schuh40c16522018-10-28 20:27:54 -07006278static void uninit(upb_table *t, upb_alloc *a) {
6279 upb_check_alloc(t, a);
6280 upb_free(a, mutable_entries(t));
6281}
Brian Silverman9c614bc2016-02-15 20:20:02 -05006282
6283static upb_tabent *emptyent(upb_table *t) {
6284 upb_tabent *e = mutable_entries(t) + upb_table_size(t);
Austin Schuh40c16522018-10-28 20:27:54 -07006285 while (1) { if (upb_tabent_isempty(--e)) return e; UPB_ASSERT(e > t->entries); }
Brian Silverman9c614bc2016-02-15 20:20:02 -05006286}
6287
6288static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) {
6289 return (upb_tabent*)upb_getentry(t, hash);
6290}
6291
6292static const upb_tabent *findentry(const upb_table *t, lookupkey_t key,
6293 uint32_t hash, eqlfunc_t *eql) {
6294 const upb_tabent *e;
6295
6296 if (t->size_lg2 == 0) return NULL;
6297 e = upb_getentry(t, hash);
6298 if (upb_tabent_isempty(e)) return NULL;
6299 while (1) {
6300 if (eql(e->key, key)) return e;
6301 if ((e = e->next) == NULL) return NULL;
6302 }
6303}
6304
6305static upb_tabent *findentry_mutable(upb_table *t, lookupkey_t key,
6306 uint32_t hash, eqlfunc_t *eql) {
6307 return (upb_tabent*)findentry(t, key, hash, eql);
6308}
6309
6310static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v,
6311 uint32_t hash, eqlfunc_t *eql) {
6312 const upb_tabent *e = findentry(t, key, hash, eql);
6313 if (e) {
6314 if (v) {
6315 _upb_value_setval(v, e->val.val, t->ctype);
6316 }
6317 return true;
6318 } else {
6319 return false;
6320 }
6321}
6322
6323/* The given key must not already exist in the table. */
6324static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey,
6325 upb_value val, uint32_t hash,
6326 hashfunc_t *hashfunc, eqlfunc_t *eql) {
6327 upb_tabent *mainpos_e;
6328 upb_tabent *our_e;
6329
Austin Schuh40c16522018-10-28 20:27:54 -07006330 UPB_ASSERT(findentry(t, key, hash, eql) == NULL);
6331 UPB_ASSERT_DEBUGVAR(val.ctype == t->ctype);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006332
6333 t->count++;
6334 mainpos_e = getentry_mutable(t, hash);
6335 our_e = mainpos_e;
6336
6337 if (upb_tabent_isempty(mainpos_e)) {
6338 /* Our main position is empty; use it. */
6339 our_e->next = NULL;
6340 } else {
6341 /* Collision. */
6342 upb_tabent *new_e = emptyent(t);
6343 /* Head of collider's chain. */
6344 upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key));
6345 if (chain == mainpos_e) {
6346 /* Existing ent is in its main posisiton (it has the same hash as us, and
6347 * is the head of our chain). Insert to new ent and append to this chain. */
6348 new_e->next = mainpos_e->next;
6349 mainpos_e->next = new_e;
6350 our_e = new_e;
6351 } else {
6352 /* Existing ent is not in its main position (it is a node in some other
6353 * chain). This implies that no existing ent in the table has our hash.
6354 * Evict it (updating its chain) and use its ent for head of our chain. */
6355 *new_e = *mainpos_e; /* copies next. */
6356 while (chain->next != mainpos_e) {
6357 chain = (upb_tabent*)chain->next;
Austin Schuh40c16522018-10-28 20:27:54 -07006358 UPB_ASSERT(chain);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006359 }
6360 chain->next = new_e;
6361 our_e = mainpos_e;
6362 our_e->next = NULL;
6363 }
6364 }
6365 our_e->key = tabkey;
6366 our_e->val.val = val.val;
Austin Schuh40c16522018-10-28 20:27:54 -07006367 UPB_ASSERT(findentry(t, key, hash, eql) == our_e);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006368}
6369
6370static bool rm(upb_table *t, lookupkey_t key, upb_value *val,
6371 upb_tabkey *removed, uint32_t hash, eqlfunc_t *eql) {
6372 upb_tabent *chain = getentry_mutable(t, hash);
6373 if (upb_tabent_isempty(chain)) return false;
6374 if (eql(chain->key, key)) {
6375 /* Element to remove is at the head of its chain. */
6376 t->count--;
Austin Schuh40c16522018-10-28 20:27:54 -07006377 if (val) _upb_value_setval(val, chain->val.val, t->ctype);
6378 if (removed) *removed = chain->key;
Brian Silverman9c614bc2016-02-15 20:20:02 -05006379 if (chain->next) {
6380 upb_tabent *move = (upb_tabent*)chain->next;
6381 *chain = *move;
Brian Silverman9c614bc2016-02-15 20:20:02 -05006382 move->key = 0; /* Make the slot empty. */
6383 } else {
Brian Silverman9c614bc2016-02-15 20:20:02 -05006384 chain->key = 0; /* Make the slot empty. */
6385 }
6386 return true;
6387 } else {
6388 /* Element to remove is either in a non-head position or not in the
6389 * table. */
Austin Schuh40c16522018-10-28 20:27:54 -07006390 while (chain->next && !eql(chain->next->key, key)) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05006391 chain = (upb_tabent*)chain->next;
Austin Schuh40c16522018-10-28 20:27:54 -07006392 }
Brian Silverman9c614bc2016-02-15 20:20:02 -05006393 if (chain->next) {
6394 /* Found element to remove. */
Austin Schuh40c16522018-10-28 20:27:54 -07006395 upb_tabent *rm = (upb_tabent*)chain->next;
Brian Silverman9c614bc2016-02-15 20:20:02 -05006396 t->count--;
Austin Schuh40c16522018-10-28 20:27:54 -07006397 if (val) _upb_value_setval(val, chain->next->val.val, t->ctype);
6398 if (removed) *removed = rm->key;
6399 rm->key = 0; /* Make the slot empty. */
6400 chain->next = rm->next;
Brian Silverman9c614bc2016-02-15 20:20:02 -05006401 return true;
6402 } else {
Austin Schuh40c16522018-10-28 20:27:54 -07006403 /* Element to remove is not in the table. */
Brian Silverman9c614bc2016-02-15 20:20:02 -05006404 return false;
6405 }
6406 }
6407}
6408
6409static size_t next(const upb_table *t, size_t i) {
6410 do {
6411 if (++i >= upb_table_size(t))
6412 return SIZE_MAX;
6413 } while(upb_tabent_isempty(&t->entries[i]));
6414
6415 return i;
6416}
6417
6418static size_t begin(const upb_table *t) {
6419 return next(t, -1);
6420}
6421
6422
6423/* upb_strtable ***************************************************************/
6424
6425/* A simple "subclass" of upb_table that only adds a hash function for strings. */
6426
Austin Schuh40c16522018-10-28 20:27:54 -07006427static upb_tabkey strcopy(lookupkey_t k2, upb_alloc *a) {
6428 char *str = upb_malloc(a, k2.str.len + sizeof(uint32_t) + 1);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006429 if (str == NULL) return 0;
6430 memcpy(str, &k2.str.len, sizeof(uint32_t));
6431 memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len + 1);
6432 return (uintptr_t)str;
6433}
6434
6435static uint32_t strhash(upb_tabkey key) {
6436 uint32_t len;
6437 char *str = upb_tabstr(key, &len);
6438 return MurmurHash2(str, len, 0);
6439}
6440
6441static bool streql(upb_tabkey k1, lookupkey_t k2) {
6442 uint32_t len;
6443 char *str = upb_tabstr(k1, &len);
6444 return len == k2.str.len && memcmp(str, k2.str.str, len) == 0;
6445}
6446
Austin Schuh40c16522018-10-28 20:27:54 -07006447bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype, upb_alloc *a) {
6448 return init(&t->t, ctype, 2, a);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006449}
6450
Austin Schuh40c16522018-10-28 20:27:54 -07006451void upb_strtable_uninit2(upb_strtable *t, upb_alloc *a) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05006452 size_t i;
6453 for (i = 0; i < upb_table_size(&t->t); i++)
Austin Schuh40c16522018-10-28 20:27:54 -07006454 upb_free(a, (void*)t->t.entries[i].key);
6455 uninit(&t->t, a);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006456}
6457
Austin Schuh40c16522018-10-28 20:27:54 -07006458bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05006459 upb_strtable new_table;
6460 upb_strtable_iter i;
6461
Austin Schuh40c16522018-10-28 20:27:54 -07006462 upb_check_alloc(&t->t, a);
6463
6464 if (!init(&new_table.t, t->t.ctype, size_lg2, a))
Brian Silverman9c614bc2016-02-15 20:20:02 -05006465 return false;
6466 upb_strtable_begin(&i, t);
6467 for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
Austin Schuh40c16522018-10-28 20:27:54 -07006468 upb_strtable_insert3(
Brian Silverman9c614bc2016-02-15 20:20:02 -05006469 &new_table,
6470 upb_strtable_iter_key(&i),
6471 upb_strtable_iter_keylength(&i),
Austin Schuh40c16522018-10-28 20:27:54 -07006472 upb_strtable_iter_value(&i),
6473 a);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006474 }
Austin Schuh40c16522018-10-28 20:27:54 -07006475 upb_strtable_uninit2(t, a);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006476 *t = new_table;
6477 return true;
6478}
6479
Austin Schuh40c16522018-10-28 20:27:54 -07006480bool upb_strtable_insert3(upb_strtable *t, const char *k, size_t len,
6481 upb_value v, upb_alloc *a) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05006482 lookupkey_t key;
6483 upb_tabkey tabkey;
6484 uint32_t hash;
6485
Austin Schuh40c16522018-10-28 20:27:54 -07006486 upb_check_alloc(&t->t, a);
6487
Brian Silverman9c614bc2016-02-15 20:20:02 -05006488 if (isfull(&t->t)) {
6489 /* Need to resize. New table of double the size, add old elements to it. */
Austin Schuh40c16522018-10-28 20:27:54 -07006490 if (!upb_strtable_resize(t, t->t.size_lg2 + 1, a)) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05006491 return false;
6492 }
6493 }
6494
6495 key = strkey2(k, len);
Austin Schuh40c16522018-10-28 20:27:54 -07006496 tabkey = strcopy(key, a);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006497 if (tabkey == 0) return false;
6498
6499 hash = MurmurHash2(key.str.str, key.str.len, 0);
6500 insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
6501 return true;
6502}
6503
6504bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
6505 upb_value *v) {
6506 uint32_t hash = MurmurHash2(key, len, 0);
6507 return lookup(&t->t, strkey2(key, len), v, hash, &streql);
6508}
6509
Austin Schuh40c16522018-10-28 20:27:54 -07006510bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len,
6511 upb_value *val, upb_alloc *alloc) {
6512 uint32_t hash = MurmurHash2(key, len, 0);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006513 upb_tabkey tabkey;
6514 if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
Austin Schuh40c16522018-10-28 20:27:54 -07006515 upb_free(alloc, (void*)tabkey);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006516 return true;
6517 } else {
6518 return false;
6519 }
6520}
6521
6522/* Iteration */
6523
6524static const upb_tabent *str_tabent(const upb_strtable_iter *i) {
6525 return &i->t->t.entries[i->index];
6526}
6527
6528void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
6529 i->t = t;
6530 i->index = begin(&t->t);
6531}
6532
6533void upb_strtable_next(upb_strtable_iter *i) {
6534 i->index = next(&i->t->t, i->index);
6535}
6536
6537bool upb_strtable_done(const upb_strtable_iter *i) {
6538 return i->index >= upb_table_size(&i->t->t) ||
6539 upb_tabent_isempty(str_tabent(i));
6540}
6541
Austin Schuh40c16522018-10-28 20:27:54 -07006542const char *upb_strtable_iter_key(const upb_strtable_iter *i) {
6543 UPB_ASSERT(!upb_strtable_done(i));
Brian Silverman9c614bc2016-02-15 20:20:02 -05006544 return upb_tabstr(str_tabent(i)->key, NULL);
6545}
6546
Austin Schuh40c16522018-10-28 20:27:54 -07006547size_t upb_strtable_iter_keylength(const upb_strtable_iter *i) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05006548 uint32_t len;
Austin Schuh40c16522018-10-28 20:27:54 -07006549 UPB_ASSERT(!upb_strtable_done(i));
Brian Silverman9c614bc2016-02-15 20:20:02 -05006550 upb_tabstr(str_tabent(i)->key, &len);
6551 return len;
6552}
6553
6554upb_value upb_strtable_iter_value(const upb_strtable_iter *i) {
Austin Schuh40c16522018-10-28 20:27:54 -07006555 UPB_ASSERT(!upb_strtable_done(i));
Brian Silverman9c614bc2016-02-15 20:20:02 -05006556 return _upb_value_val(str_tabent(i)->val.val, i->t->t.ctype);
6557}
6558
6559void upb_strtable_iter_setdone(upb_strtable_iter *i) {
6560 i->index = SIZE_MAX;
6561}
6562
6563bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
6564 const upb_strtable_iter *i2) {
6565 if (upb_strtable_done(i1) && upb_strtable_done(i2))
6566 return true;
6567 return i1->t == i2->t && i1->index == i2->index;
6568}
6569
6570
6571/* upb_inttable ***************************************************************/
6572
6573/* For inttables we use a hybrid structure where small keys are kept in an
6574 * array and large keys are put in the hash table. */
6575
6576static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
6577
6578static bool inteql(upb_tabkey k1, lookupkey_t k2) {
6579 return k1 == k2.num;
6580}
6581
6582static upb_tabval *mutable_array(upb_inttable *t) {
6583 return (upb_tabval*)t->array;
6584}
6585
6586static upb_tabval *inttable_val(upb_inttable *t, uintptr_t key) {
6587 if (key < t->array_size) {
6588 return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
6589 } else {
6590 upb_tabent *e =
6591 findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
6592 return e ? &e->val : NULL;
6593 }
6594}
6595
6596static const upb_tabval *inttable_val_const(const upb_inttable *t,
6597 uintptr_t key) {
6598 return inttable_val((upb_inttable*)t, key);
6599}
6600
6601size_t upb_inttable_count(const upb_inttable *t) {
6602 return t->t.count + t->array_count;
6603}
6604
6605static void check(upb_inttable *t) {
6606 UPB_UNUSED(t);
6607#if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
6608 {
6609 /* This check is very expensive (makes inserts/deletes O(N)). */
6610 size_t count = 0;
6611 upb_inttable_iter i;
6612 upb_inttable_begin(&i, t);
6613 for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) {
Austin Schuh40c16522018-10-28 20:27:54 -07006614 UPB_ASSERT(upb_inttable_lookup(t, upb_inttable_iter_key(&i), NULL));
Brian Silverman9c614bc2016-02-15 20:20:02 -05006615 }
Austin Schuh40c16522018-10-28 20:27:54 -07006616 UPB_ASSERT(count == upb_inttable_count(t));
Brian Silverman9c614bc2016-02-15 20:20:02 -05006617 }
6618#endif
6619}
6620
6621bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t ctype,
Austin Schuh40c16522018-10-28 20:27:54 -07006622 size_t asize, int hsize_lg2, upb_alloc *a) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05006623 size_t array_bytes;
6624
Austin Schuh40c16522018-10-28 20:27:54 -07006625 if (!init(&t->t, ctype, hsize_lg2, a)) return false;
Brian Silverman9c614bc2016-02-15 20:20:02 -05006626 /* Always make the array part at least 1 long, so that we know key 0
6627 * won't be in the hash part, which simplifies things. */
6628 t->array_size = UPB_MAX(1, asize);
6629 t->array_count = 0;
6630 array_bytes = t->array_size * sizeof(upb_value);
Austin Schuh40c16522018-10-28 20:27:54 -07006631 t->array = upb_malloc(a, array_bytes);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006632 if (!t->array) {
Austin Schuh40c16522018-10-28 20:27:54 -07006633 uninit(&t->t, a);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006634 return false;
6635 }
6636 memset(mutable_array(t), 0xff, array_bytes);
6637 check(t);
6638 return true;
6639}
6640
Austin Schuh40c16522018-10-28 20:27:54 -07006641bool upb_inttable_init2(upb_inttable *t, upb_ctype_t ctype, upb_alloc *a) {
6642 return upb_inttable_sizedinit(t, ctype, 0, 4, a);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006643}
6644
Austin Schuh40c16522018-10-28 20:27:54 -07006645void upb_inttable_uninit2(upb_inttable *t, upb_alloc *a) {
6646 uninit(&t->t, a);
6647 upb_free(a, mutable_array(t));
Brian Silverman9c614bc2016-02-15 20:20:02 -05006648}
6649
Austin Schuh40c16522018-10-28 20:27:54 -07006650bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val,
6651 upb_alloc *a) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05006652 upb_tabval tabval;
6653 tabval.val = val.val;
Austin Schuh40c16522018-10-28 20:27:54 -07006654 UPB_ASSERT(upb_arrhas(tabval)); /* This will reject (uint64_t)-1. Fix this. */
6655
6656 upb_check_alloc(&t->t, a);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006657
6658 if (key < t->array_size) {
Austin Schuh40c16522018-10-28 20:27:54 -07006659 UPB_ASSERT(!upb_arrhas(t->array[key]));
Brian Silverman9c614bc2016-02-15 20:20:02 -05006660 t->array_count++;
6661 mutable_array(t)[key].val = val.val;
6662 } else {
6663 if (isfull(&t->t)) {
6664 /* Need to resize the hash part, but we re-use the array part. */
6665 size_t i;
6666 upb_table new_table;
Austin Schuh40c16522018-10-28 20:27:54 -07006667
6668 if (!init(&new_table, t->t.ctype, t->t.size_lg2 + 1, a)) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05006669 return false;
Austin Schuh40c16522018-10-28 20:27:54 -07006670 }
6671
Brian Silverman9c614bc2016-02-15 20:20:02 -05006672 for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
6673 const upb_tabent *e = &t->t.entries[i];
6674 uint32_t hash;
6675 upb_value v;
6676
6677 _upb_value_setval(&v, e->val.val, t->t.ctype);
6678 hash = upb_inthash(e->key);
6679 insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
6680 }
6681
Austin Schuh40c16522018-10-28 20:27:54 -07006682 UPB_ASSERT(t->t.count == new_table.count);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006683
Austin Schuh40c16522018-10-28 20:27:54 -07006684 uninit(&t->t, a);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006685 t->t = new_table;
6686 }
6687 insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
6688 }
6689 check(t);
6690 return true;
6691}
6692
6693bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v) {
6694 const upb_tabval *table_v = inttable_val_const(t, key);
6695 if (!table_v) return false;
6696 if (v) _upb_value_setval(v, table_v->val, t->t.ctype);
6697 return true;
6698}
6699
6700bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val) {
6701 upb_tabval *table_v = inttable_val(t, key);
6702 if (!table_v) return false;
6703 table_v->val = val.val;
6704 return true;
6705}
6706
6707bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
6708 bool success;
6709 if (key < t->array_size) {
6710 if (upb_arrhas(t->array[key])) {
6711 upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
6712 t->array_count--;
6713 if (val) {
6714 _upb_value_setval(val, t->array[key].val, t->t.ctype);
6715 }
6716 mutable_array(t)[key] = empty;
6717 success = true;
6718 } else {
6719 success = false;
6720 }
6721 } else {
Austin Schuh40c16522018-10-28 20:27:54 -07006722 success = rm(&t->t, intkey(key), val, NULL, upb_inthash(key), &inteql);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006723 }
6724 check(t);
6725 return success;
6726}
6727
Austin Schuh40c16522018-10-28 20:27:54 -07006728bool upb_inttable_push2(upb_inttable *t, upb_value val, upb_alloc *a) {
6729 upb_check_alloc(&t->t, a);
6730 return upb_inttable_insert2(t, upb_inttable_count(t), val, a);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006731}
6732
6733upb_value upb_inttable_pop(upb_inttable *t) {
6734 upb_value val;
6735 bool ok = upb_inttable_remove(t, upb_inttable_count(t) - 1, &val);
Austin Schuh40c16522018-10-28 20:27:54 -07006736 UPB_ASSERT(ok);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006737 return val;
6738}
6739
Austin Schuh40c16522018-10-28 20:27:54 -07006740bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val,
6741 upb_alloc *a) {
6742 upb_check_alloc(&t->t, a);
6743 return upb_inttable_insert2(t, (uintptr_t)key, val, a);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006744}
6745
6746bool upb_inttable_lookupptr(const upb_inttable *t, const void *key,
6747 upb_value *v) {
6748 return upb_inttable_lookup(t, (uintptr_t)key, v);
6749}
6750
6751bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) {
6752 return upb_inttable_remove(t, (uintptr_t)key, val);
6753}
6754
Austin Schuh40c16522018-10-28 20:27:54 -07006755void upb_inttable_compact2(upb_inttable *t, upb_alloc *a) {
6756 /* A power-of-two histogram of the table keys. */
6757 size_t counts[UPB_MAXARRSIZE + 1] = {0};
6758
6759 /* The max key in each bucket. */
6760 uintptr_t max[UPB_MAXARRSIZE + 1] = {0};
6761
Brian Silverman9c614bc2016-02-15 20:20:02 -05006762 upb_inttable_iter i;
Austin Schuh40c16522018-10-28 20:27:54 -07006763 size_t arr_count;
6764 int size_lg2;
Brian Silverman9c614bc2016-02-15 20:20:02 -05006765 upb_inttable new_t;
6766
Austin Schuh40c16522018-10-28 20:27:54 -07006767 upb_check_alloc(&t->t, a);
6768
Brian Silverman9c614bc2016-02-15 20:20:02 -05006769 upb_inttable_begin(&i, t);
6770 for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
6771 uintptr_t key = upb_inttable_iter_key(&i);
Austin Schuh40c16522018-10-28 20:27:54 -07006772 int bucket = log2ceil(key);
6773 max[bucket] = UPB_MAX(max[bucket], key);
6774 counts[bucket]++;
Brian Silverman9c614bc2016-02-15 20:20:02 -05006775 }
6776
Austin Schuh40c16522018-10-28 20:27:54 -07006777 /* Find the largest power of two that satisfies the MIN_DENSITY
6778 * definition (while actually having some keys). */
Brian Silverman9c614bc2016-02-15 20:20:02 -05006779 arr_count = upb_inttable_count(t);
6780
Austin Schuh40c16522018-10-28 20:27:54 -07006781 for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 0; size_lg2--) {
6782 if (counts[size_lg2] == 0) {
6783 /* We can halve again without losing any entries. */
6784 continue;
6785 } else if (arr_count >= (1 << size_lg2) * MIN_DENSITY) {
6786 break;
Brian Silverman9c614bc2016-02-15 20:20:02 -05006787 }
Austin Schuh40c16522018-10-28 20:27:54 -07006788
6789 arr_count -= counts[size_lg2];
Brian Silverman9c614bc2016-02-15 20:20:02 -05006790 }
6791
Austin Schuh40c16522018-10-28 20:27:54 -07006792 UPB_ASSERT(arr_count <= upb_inttable_count(t));
Brian Silverman9c614bc2016-02-15 20:20:02 -05006793
6794 {
6795 /* Insert all elements into new, perfectly-sized table. */
Austin Schuh40c16522018-10-28 20:27:54 -07006796 size_t arr_size = max[size_lg2] + 1; /* +1 so arr[max] will fit. */
6797 size_t hash_count = upb_inttable_count(t) - arr_count;
6798 size_t hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
6799 size_t hashsize_lg2 = log2ceil(hash_size);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006800
Austin Schuh40c16522018-10-28 20:27:54 -07006801 upb_inttable_sizedinit(&new_t, t->t.ctype, arr_size, hashsize_lg2, a);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006802 upb_inttable_begin(&i, t);
6803 for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
6804 uintptr_t k = upb_inttable_iter_key(&i);
Austin Schuh40c16522018-10-28 20:27:54 -07006805 upb_inttable_insert2(&new_t, k, upb_inttable_iter_value(&i), a);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006806 }
Austin Schuh40c16522018-10-28 20:27:54 -07006807 UPB_ASSERT(new_t.array_size == arr_size);
6808 UPB_ASSERT(new_t.t.size_lg2 == hashsize_lg2);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006809 }
Austin Schuh40c16522018-10-28 20:27:54 -07006810 upb_inttable_uninit2(t, a);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006811 *t = new_t;
6812}
6813
6814/* Iteration. */
6815
6816static const upb_tabent *int_tabent(const upb_inttable_iter *i) {
Austin Schuh40c16522018-10-28 20:27:54 -07006817 UPB_ASSERT(!i->array_part);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006818 return &i->t->t.entries[i->index];
6819}
6820
6821static upb_tabval int_arrent(const upb_inttable_iter *i) {
Austin Schuh40c16522018-10-28 20:27:54 -07006822 UPB_ASSERT(i->array_part);
Brian Silverman9c614bc2016-02-15 20:20:02 -05006823 return i->t->array[i->index];
6824}
6825
6826void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) {
6827 i->t = t;
6828 i->index = -1;
6829 i->array_part = true;
6830 upb_inttable_next(i);
6831}
6832
6833void upb_inttable_next(upb_inttable_iter *iter) {
6834 const upb_inttable *t = iter->t;
6835 if (iter->array_part) {
6836 while (++iter->index < t->array_size) {
6837 if (upb_arrhas(int_arrent(iter))) {
6838 return;
6839 }
6840 }
6841 iter->array_part = false;
6842 iter->index = begin(&t->t);
6843 } else {
6844 iter->index = next(&t->t, iter->index);
6845 }
6846}
6847
6848bool upb_inttable_done(const upb_inttable_iter *i) {
6849 if (i->array_part) {
6850 return i->index >= i->t->array_size ||
6851 !upb_arrhas(int_arrent(i));
6852 } else {
6853 return i->index >= upb_table_size(&i->t->t) ||
6854 upb_tabent_isempty(int_tabent(i));
6855 }
6856}
6857
6858uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i) {
Austin Schuh40c16522018-10-28 20:27:54 -07006859 UPB_ASSERT(!upb_inttable_done(i));
Brian Silverman9c614bc2016-02-15 20:20:02 -05006860 return i->array_part ? i->index : int_tabent(i)->key;
6861}
6862
6863upb_value upb_inttable_iter_value(const upb_inttable_iter *i) {
Austin Schuh40c16522018-10-28 20:27:54 -07006864 UPB_ASSERT(!upb_inttable_done(i));
Brian Silverman9c614bc2016-02-15 20:20:02 -05006865 return _upb_value_val(
6866 i->array_part ? i->t->array[i->index].val : int_tabent(i)->val.val,
6867 i->t->t.ctype);
6868}
6869
6870void upb_inttable_iter_setdone(upb_inttable_iter *i) {
6871 i->index = SIZE_MAX;
6872 i->array_part = false;
6873}
6874
6875bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
6876 const upb_inttable_iter *i2) {
6877 if (upb_inttable_done(i1) && upb_inttable_done(i2))
6878 return true;
6879 return i1->t == i2->t && i1->index == i2->index &&
6880 i1->array_part == i2->array_part;
6881}
6882
6883#ifdef UPB_UNALIGNED_READS_OK
6884/* -----------------------------------------------------------------------------
6885 * MurmurHash2, by Austin Appleby (released as public domain).
6886 * Reformatted and C99-ified by Joshua Haberman.
6887 * Note - This code makes a few assumptions about how your machine behaves -
6888 * 1. We can read a 4-byte value from any address without crashing
6889 * 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
6890 * And it has a few limitations -
6891 * 1. It will not work incrementally.
6892 * 2. It will not produce the same results on little-endian and big-endian
6893 * machines. */
6894uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
6895 /* 'm' and 'r' are mixing constants generated offline.
6896 * They're not really 'magic', they just happen to work well. */
6897 const uint32_t m = 0x5bd1e995;
6898 const int32_t r = 24;
6899
6900 /* Initialize the hash to a 'random' value */
6901 uint32_t h = seed ^ len;
6902
6903 /* Mix 4 bytes at a time into the hash */
6904 const uint8_t * data = (const uint8_t *)key;
6905 while(len >= 4) {
6906 uint32_t k = *(uint32_t *)data;
6907
6908 k *= m;
6909 k ^= k >> r;
6910 k *= m;
6911
6912 h *= m;
6913 h ^= k;
6914
6915 data += 4;
6916 len -= 4;
6917 }
6918
6919 /* Handle the last few bytes of the input array */
6920 switch(len) {
6921 case 3: h ^= data[2] << 16;
6922 case 2: h ^= data[1] << 8;
6923 case 1: h ^= data[0]; h *= m;
6924 };
6925
6926 /* Do a few final mixes of the hash to ensure the last few
6927 * bytes are well-incorporated. */
6928 h ^= h >> 13;
6929 h *= m;
6930 h ^= h >> 15;
6931
6932 return h;
6933}
6934
6935#else /* !UPB_UNALIGNED_READS_OK */
6936
6937/* -----------------------------------------------------------------------------
6938 * MurmurHashAligned2, by Austin Appleby
6939 * Same algorithm as MurmurHash2, but only does aligned reads - should be safer
6940 * on certain platforms.
6941 * Performance will be lower than MurmurHash2 */
6942
6943#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
6944
6945uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
6946 const uint32_t m = 0x5bd1e995;
6947 const int32_t r = 24;
6948 const uint8_t * data = (const uint8_t *)key;
6949 uint32_t h = seed ^ len;
6950 uint8_t align = (uintptr_t)data & 3;
6951
6952 if(align && (len >= 4)) {
6953 /* Pre-load the temp registers */
6954 uint32_t t = 0, d = 0;
6955 int32_t sl;
6956 int32_t sr;
6957
6958 switch(align) {
6959 case 1: t |= data[2] << 16;
6960 case 2: t |= data[1] << 8;
6961 case 3: t |= data[0];
6962 }
6963
6964 t <<= (8 * align);
6965
6966 data += 4-align;
6967 len -= 4-align;
6968
6969 sl = 8 * (4-align);
6970 sr = 8 * align;
6971
6972 /* Mix */
6973
6974 while(len >= 4) {
6975 uint32_t k;
6976
6977 d = *(uint32_t *)data;
6978 t = (t >> sr) | (d << sl);
6979
6980 k = t;
6981
6982 MIX(h,k,m);
6983
6984 t = d;
6985
6986 data += 4;
6987 len -= 4;
6988 }
6989
6990 /* Handle leftover data in temp registers */
6991
6992 d = 0;
6993
6994 if(len >= align) {
6995 uint32_t k;
6996
6997 switch(align) {
6998 case 3: d |= data[2] << 16;
6999 case 2: d |= data[1] << 8;
7000 case 1: d |= data[0];
7001 }
7002
7003 k = (t >> sr) | (d << sl);
7004 MIX(h,k,m);
7005
7006 data += align;
7007 len -= align;
7008
7009 /* ----------
7010 * Handle tail bytes */
7011
7012 switch(len) {
7013 case 3: h ^= data[2] << 16;
7014 case 2: h ^= data[1] << 8;
7015 case 1: h ^= data[0]; h *= m;
7016 };
7017 } else {
7018 switch(len) {
7019 case 3: d |= data[2] << 16;
7020 case 2: d |= data[1] << 8;
7021 case 1: d |= data[0];
7022 case 0: h ^= (t >> sr) | (d << sl); h *= m;
7023 }
7024 }
7025
7026 h ^= h >> 13;
7027 h *= m;
7028 h ^= h >> 15;
7029
7030 return h;
7031 } else {
7032 while(len >= 4) {
7033 uint32_t k = *(uint32_t *)data;
7034
7035 MIX(h,k,m);
7036
7037 data += 4;
7038 len -= 4;
7039 }
7040
7041 /* ----------
7042 * Handle tail bytes */
7043
7044 switch(len) {
7045 case 3: h ^= data[2] << 16;
7046 case 2: h ^= data[1] << 8;
7047 case 1: h ^= data[0]; h *= m;
7048 };
7049
7050 h ^= h >> 13;
7051 h *= m;
7052 h ^= h >> 15;
7053
7054 return h;
7055 }
7056}
7057#undef MIX
7058
7059#endif /* UPB_UNALIGNED_READS_OK */
7060
7061#include <errno.h>
7062#include <stdarg.h>
7063#include <stddef.h>
7064#include <stdint.h>
7065#include <stdio.h>
7066#include <stdlib.h>
7067#include <string.h>
7068
7069bool upb_dumptostderr(void *closure, const upb_status* status) {
7070 UPB_UNUSED(closure);
7071 fprintf(stderr, "%s\n", upb_status_errmsg(status));
7072 return false;
7073}
7074
7075/* Guarantee null-termination and provide ellipsis truncation.
7076 * It may be tempting to "optimize" this by initializing these final
7077 * four bytes up-front and then being careful never to overwrite them,
7078 * this is safer and simpler. */
7079static void nullz(upb_status *status) {
7080 const char *ellipsis = "...";
7081 size_t len = strlen(ellipsis);
Austin Schuh40c16522018-10-28 20:27:54 -07007082 UPB_ASSERT(sizeof(status->msg) > len);
Brian Silverman9c614bc2016-02-15 20:20:02 -05007083 memcpy(status->msg + sizeof(status->msg) - len, ellipsis, len);
7084}
7085
Austin Schuh40c16522018-10-28 20:27:54 -07007086
7087/* upb_upberr *****************************************************************/
7088
7089upb_errorspace upb_upberr = {"upb error"};
7090
7091void upb_upberr_setoom(upb_status *status) {
7092 status->error_space_ = &upb_upberr;
7093 upb_status_seterrmsg(status, "Out of memory");
7094}
7095
7096
7097/* upb_status *****************************************************************/
7098
Brian Silverman9c614bc2016-02-15 20:20:02 -05007099void upb_status_clear(upb_status *status) {
7100 if (!status) return;
7101 status->ok_ = true;
7102 status->code_ = 0;
7103 status->msg[0] = '\0';
7104}
7105
7106bool upb_ok(const upb_status *status) { return status->ok_; }
7107
7108upb_errorspace *upb_status_errspace(const upb_status *status) {
7109 return status->error_space_;
7110}
7111
7112int upb_status_errcode(const upb_status *status) { return status->code_; }
7113
7114const char *upb_status_errmsg(const upb_status *status) { return status->msg; }
7115
7116void upb_status_seterrmsg(upb_status *status, const char *msg) {
7117 if (!status) return;
7118 status->ok_ = false;
7119 strncpy(status->msg, msg, sizeof(status->msg));
7120 nullz(status);
7121}
7122
7123void upb_status_seterrf(upb_status *status, const char *fmt, ...) {
7124 va_list args;
7125 va_start(args, fmt);
7126 upb_status_vseterrf(status, fmt, args);
7127 va_end(args);
7128}
7129
7130void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args) {
7131 if (!status) return;
7132 status->ok_ = false;
7133 _upb_vsnprintf(status->msg, sizeof(status->msg), fmt, args);
7134 nullz(status);
7135}
7136
Brian Silverman9c614bc2016-02-15 20:20:02 -05007137void upb_status_copy(upb_status *to, const upb_status *from) {
7138 if (!to) return;
7139 *to = *from;
7140}
Austin Schuh40c16522018-10-28 20:27:54 -07007141
7142
7143/* upb_alloc ******************************************************************/
7144
7145static void *upb_global_allocfunc(upb_alloc *alloc, void *ptr, size_t oldsize,
7146 size_t size) {
7147 UPB_UNUSED(alloc);
7148 UPB_UNUSED(oldsize);
7149 if (size == 0) {
7150 free(ptr);
7151 return NULL;
7152 } else {
7153 return realloc(ptr, size);
7154 }
7155}
7156
7157upb_alloc upb_alloc_global = {&upb_global_allocfunc};
7158
7159
7160/* upb_arena ******************************************************************/
7161
7162/* Be conservative and choose 16 in case anyone is using SSE. */
7163static const size_t maxalign = 16;
7164
7165static size_t align_up_max(size_t size) {
7166 return ((size + maxalign - 1) / maxalign) * maxalign;
7167}
7168
7169typedef struct mem_block {
7170 struct mem_block *next;
7171 size_t size;
7172 size_t used;
7173 bool owned;
7174 /* Data follows. */
7175} mem_block;
7176
7177typedef struct cleanup_ent {
7178 struct cleanup_ent *next;
7179 upb_cleanup_func *cleanup;
7180 void *ud;
7181} cleanup_ent;
7182
7183static void upb_arena_addblock(upb_arena *a, void *ptr, size_t size,
7184 bool owned) {
7185 mem_block *block = ptr;
7186
7187 block->next = a->block_head;
7188 block->size = size;
7189 block->used = align_up_max(sizeof(mem_block));
7190 block->owned = owned;
7191
7192 a->block_head = block;
7193
7194 /* TODO(haberman): ASAN poison. */
7195}
7196
7197
7198static mem_block *upb_arena_allocblock(upb_arena *a, size_t size) {
7199 size_t block_size = UPB_MAX(size, a->next_block_size) + sizeof(mem_block);
7200 mem_block *block = upb_malloc(a->block_alloc, block_size);
7201
7202 if (!block) {
7203 return NULL;
7204 }
7205
7206 upb_arena_addblock(a, block, block_size, true);
7207 a->next_block_size = UPB_MIN(block_size * 2, a->max_block_size);
7208
7209 return block;
7210}
7211
7212static void *upb_arena_doalloc(upb_alloc *alloc, void *ptr, size_t oldsize,
7213 size_t size) {
7214 upb_arena *a = (upb_arena*)alloc; /* upb_alloc is initial member. */
7215 mem_block *block = a->block_head;
7216 void *ret;
7217
7218 if (size == 0) {
7219 return NULL; /* We are an arena, don't need individual frees. */
7220 }
7221
7222 size = align_up_max(size);
7223
7224 /* TODO(haberman): special-case if this is a realloc of the last alloc? */
7225
7226 if (!block || block->size - block->used < size) {
7227 /* Slow path: have to allocate a new block. */
7228 block = upb_arena_allocblock(a, size);
7229
7230 if (!block) {
7231 return NULL; /* Out of memory. */
7232 }
7233 }
7234
7235 ret = (char*)block + block->used;
7236 block->used += size;
7237
7238 if (oldsize > 0) {
7239 memcpy(ret, ptr, oldsize); /* Preserve existing data. */
7240 }
7241
7242 /* TODO(haberman): ASAN unpoison. */
7243
7244 a->bytes_allocated += size;
7245 return ret;
7246}
7247
7248/* Public Arena API ***********************************************************/
7249
7250void upb_arena_init(upb_arena *a) {
7251 a->alloc.func = &upb_arena_doalloc;
7252 a->block_alloc = &upb_alloc_global;
7253 a->bytes_allocated = 0;
7254 a->next_block_size = 256;
7255 a->max_block_size = 16384;
7256 a->cleanup_head = NULL;
7257 a->block_head = NULL;
7258}
7259
7260void upb_arena_init2(upb_arena *a, void *mem, size_t size, upb_alloc *alloc) {
7261 upb_arena_init(a);
7262
7263 if (size > sizeof(mem_block)) {
7264 upb_arena_addblock(a, mem, size, false);
7265 }
7266
7267 if (alloc) {
7268 a->block_alloc = alloc;
7269 }
7270}
7271
7272void upb_arena_uninit(upb_arena *a) {
7273 cleanup_ent *ent = a->cleanup_head;
7274 mem_block *block = a->block_head;
7275
7276 while (ent) {
7277 ent->cleanup(ent->ud);
7278 ent = ent->next;
7279 }
7280
7281 /* Must do this after running cleanup functions, because this will delete
7282 * the memory we store our cleanup entries in! */
7283 while (block) {
7284 mem_block *next = block->next;
7285
7286 if (block->owned) {
7287 upb_free(a->block_alloc, block);
7288 }
7289
7290 block = next;
7291 }
7292
7293 /* Protect against multiple-uninit. */
7294 a->cleanup_head = NULL;
7295 a->block_head = NULL;
7296}
7297
7298bool upb_arena_addcleanup(upb_arena *a, upb_cleanup_func *func, void *ud) {
7299 cleanup_ent *ent = upb_malloc(&a->alloc, sizeof(cleanup_ent));
7300 if (!ent) {
7301 return false; /* Out of memory. */
7302 }
7303
7304 ent->cleanup = func;
7305 ent->ud = ud;
7306 ent->next = a->cleanup_head;
7307 a->cleanup_head = ent;
7308
7309 return true;
7310}
7311
7312size_t upb_arena_bytesallocated(const upb_arena *a) {
7313 return a->bytes_allocated;
7314}
7315
7316
7317/* Standard error functions ***************************************************/
7318
7319static bool default_err(void *ud, const upb_status *status) {
7320 UPB_UNUSED(ud);
7321 UPB_UNUSED(status);
7322 return false;
7323}
7324
7325static bool write_err_to(void *ud, const upb_status *status) {
7326 upb_status *copy_to = ud;
7327 upb_status_copy(copy_to, status);
7328 return false;
7329}
7330
7331
7332/* upb_env ********************************************************************/
7333
7334void upb_env_initonly(upb_env *e) {
7335 e->ok_ = true;
7336 e->error_func_ = &default_err;
7337 e->error_ud_ = NULL;
7338}
7339
7340void upb_env_init(upb_env *e) {
7341 upb_arena_init(&e->arena_);
7342 upb_env_initonly(e);
7343}
7344
7345void upb_env_init2(upb_env *e, void *mem, size_t n, upb_alloc *alloc) {
7346 upb_arena_init2(&e->arena_, mem, n, alloc);
7347 upb_env_initonly(e);
7348}
7349
7350void upb_env_uninit(upb_env *e) {
7351 upb_arena_uninit(&e->arena_);
7352}
7353
7354void upb_env_seterrorfunc(upb_env *e, upb_error_func *func, void *ud) {
7355 e->error_func_ = func;
7356 e->error_ud_ = ud;
7357}
7358
7359void upb_env_reporterrorsto(upb_env *e, upb_status *s) {
7360 e->error_func_ = &write_err_to;
7361 e->error_ud_ = s;
7362}
7363
7364bool upb_env_reporterror(upb_env *e, const upb_status *status) {
7365 e->ok_ = false;
7366 return e->error_func_(e->error_ud_, status);
7367}
7368
7369void *upb_env_malloc(upb_env *e, size_t size) {
7370 return upb_malloc(&e->arena_.alloc, size);
7371}
7372
7373void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size) {
7374 return upb_realloc(&e->arena_.alloc, ptr, oldsize, size);
7375}
7376
7377void upb_env_free(upb_env *e, void *ptr) {
7378 upb_free(&e->arena_.alloc, ptr);
7379}
7380
7381bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud) {
7382 return upb_arena_addcleanup(&e->arena_, func, ud);
7383}
7384
7385size_t upb_env_bytesallocated(const upb_env *e) {
7386 return upb_arena_bytesallocated(&e->arena_);
7387}
7388/* This file was generated by upbc (the upb compiler) from the input
7389 * file:
7390 *
7391 * upb/descriptor/descriptor.proto
7392 *
Brian Silverman9c614bc2016-02-15 20:20:02 -05007393 * Do not edit -- your changes will be discarded when the file is
7394 * regenerated. */
7395
7396
Austin Schuh40c16522018-10-28 20:27:54 -07007397static const upb_msgdef msgs[22];
7398static const upb_fielddef fields[107];
7399static const upb_enumdef enums[5];
Brian Silverman9c614bc2016-02-15 20:20:02 -05007400static const upb_tabent strentries[236];
Austin Schuh40c16522018-10-28 20:27:54 -07007401static const upb_tabent intentries[18];
7402static const upb_tabval arrays[187];
Brian Silverman9c614bc2016-02-15 20:20:02 -05007403
7404#ifdef UPB_DEBUG_REFS
Austin Schuh40c16522018-10-28 20:27:54 -07007405static upb_inttable reftables[268];
Brian Silverman9c614bc2016-02-15 20:20:02 -05007406#endif
7407
Austin Schuh40c16522018-10-28 20:27:54 -07007408static const upb_msgdef msgs[22] = {
7409 UPB_MSGDEF_INIT("google.protobuf.DescriptorProto", 41, 8, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[0], 11, 10), UPB_STRTABLE_INIT(10, 15, UPB_CTYPE_PTR, 4, &strentries[0]), false, UPB_SYNTAX_PROTO2, &reftables[0], &reftables[1]),
7410 UPB_MSGDEF_INIT("google.protobuf.DescriptorProto.ExtensionRange", 5, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[11], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[16]), false, UPB_SYNTAX_PROTO2, &reftables[2], &reftables[3]),
7411 UPB_MSGDEF_INIT("google.protobuf.DescriptorProto.ReservedRange", 5, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[14], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[20]), false, UPB_SYNTAX_PROTO2, &reftables[4], &reftables[5]),
7412 UPB_MSGDEF_INIT("google.protobuf.EnumDescriptorProto", 12, 2, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[17], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[24]), false, UPB_SYNTAX_PROTO2, &reftables[6], &reftables[7]),
7413 UPB_MSGDEF_INIT("google.protobuf.EnumOptions", 9, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[0], &arrays[21], 4, 2), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[28]), false, UPB_SYNTAX_PROTO2, &reftables[8], &reftables[9]),
7414 UPB_MSGDEF_INIT("google.protobuf.EnumValueDescriptorProto", 9, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[25], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[32]), false, UPB_SYNTAX_PROTO2, &reftables[10], &reftables[11]),
7415 UPB_MSGDEF_INIT("google.protobuf.EnumValueOptions", 8, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[2], &arrays[29], 2, 1), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[36]), false, UPB_SYNTAX_PROTO2, &reftables[12], &reftables[13]),
7416 UPB_MSGDEF_INIT("google.protobuf.FieldDescriptorProto", 24, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[31], 11, 10), UPB_STRTABLE_INIT(10, 15, UPB_CTYPE_PTR, 4, &strentries[40]), false, UPB_SYNTAX_PROTO2, &reftables[14], &reftables[15]),
7417 UPB_MSGDEF_INIT("google.protobuf.FieldOptions", 13, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[4], &arrays[42], 11, 6), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[56]), false, UPB_SYNTAX_PROTO2, &reftables[16], &reftables[17]),
7418 UPB_MSGDEF_INIT("google.protobuf.FileDescriptorProto", 43, 6, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[53], 13, 12), UPB_STRTABLE_INIT(12, 15, UPB_CTYPE_PTR, 4, &strentries[72]), false, UPB_SYNTAX_PROTO2, &reftables[18], &reftables[19]),
7419 UPB_MSGDEF_INIT("google.protobuf.FileDescriptorSet", 7, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[66], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[88]), false, UPB_SYNTAX_PROTO2, &reftables[20], &reftables[21]),
7420 UPB_MSGDEF_INIT("google.protobuf.FileOptions", 38, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[6], &arrays[68], 42, 17), UPB_STRTABLE_INIT(18, 31, UPB_CTYPE_PTR, 5, &strentries[92]), false, UPB_SYNTAX_PROTO2, &reftables[22], &reftables[23]),
7421 UPB_MSGDEF_INIT("google.protobuf.MessageOptions", 11, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[8], &arrays[110], 8, 4), UPB_STRTABLE_INIT(5, 7, UPB_CTYPE_PTR, 3, &strentries[124]), false, UPB_SYNTAX_PROTO2, &reftables[24], &reftables[25]),
7422 UPB_MSGDEF_INIT("google.protobuf.MethodDescriptorProto", 16, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[118], 7, 6), UPB_STRTABLE_INIT(6, 7, UPB_CTYPE_PTR, 3, &strentries[132]), false, UPB_SYNTAX_PROTO2, &reftables[26], &reftables[27]),
7423 UPB_MSGDEF_INIT("google.protobuf.MethodOptions", 8, 1, UPB_INTTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &intentries[10], &arrays[125], 1, 0), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[140]), false, UPB_SYNTAX_PROTO2, &reftables[28], &reftables[29]),
7424 UPB_MSGDEF_INIT("google.protobuf.OneofDescriptorProto", 6, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[126], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[144]), false, UPB_SYNTAX_PROTO2, &reftables[30], &reftables[31]),
7425 UPB_MSGDEF_INIT("google.protobuf.ServiceDescriptorProto", 12, 2, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[128], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[148]), false, UPB_SYNTAX_PROTO2, &reftables[32], &reftables[33]),
7426 UPB_MSGDEF_INIT("google.protobuf.ServiceOptions", 8, 1, UPB_INTTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &intentries[14], &arrays[132], 1, 0), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[152]), false, UPB_SYNTAX_PROTO2, &reftables[34], &reftables[35]),
7427 UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo", 7, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[133], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[156]), false, UPB_SYNTAX_PROTO2, &reftables[36], &reftables[37]),
7428 UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo.Location", 20, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[135], 7, 5), UPB_STRTABLE_INIT(5, 7, UPB_CTYPE_PTR, 3, &strentries[160]), false, UPB_SYNTAX_PROTO2, &reftables[38], &reftables[39]),
7429 UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption", 19, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[142], 9, 7), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[168]), false, UPB_SYNTAX_PROTO2, &reftables[40], &reftables[41]),
7430 UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption.NamePart", 7, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[151], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[184]), false, UPB_SYNTAX_PROTO2, &reftables[42], &reftables[43]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007431};
7432
Austin Schuh40c16522018-10-28 20:27:54 -07007433static const upb_fielddef fields[107] = {
7434 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "aggregate_value", 8, &msgs[20], NULL, 16, 6, {0},&reftables[44], &reftables[45]),
7435 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "allow_alias", 2, &msgs[4], NULL, 7, 1, {0},&reftables[46], &reftables[47]),
7436 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "cc_enable_arenas", 31, &msgs[11], NULL, 24, 12, {0},&reftables[48], &reftables[49]),
7437 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "cc_generic_services", 16, &msgs[11], NULL, 18, 6, {0},&reftables[50], &reftables[51]),
7438 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "client_streaming", 5, &msgs[13], NULL, 14, 4, {0},&reftables[52], &reftables[53]),
7439 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "csharp_namespace", 37, &msgs[11], NULL, 28, 14, {0},&reftables[54], &reftables[55]),
7440 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "ctype", 1, &msgs[8], (const upb_def*)(&enums[2]), 7, 1, {0},&reftables[56], &reftables[57]),
7441 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "default_value", 7, &msgs[7], NULL, 17, 7, {0},&reftables[58], &reftables[59]),
7442 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "dependency", 3, &msgs[9], NULL, 31, 8, {0},&reftables[60], &reftables[61]),
7443 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 3, &msgs[8], NULL, 9, 3, {0},&reftables[62], &reftables[63]),
7444 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 33, &msgs[14], NULL, 7, 1, {0},&reftables[64], &reftables[65]),
7445 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 3, &msgs[12], NULL, 9, 3, {0},&reftables[66], &reftables[67]),
7446 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 23, &msgs[11], NULL, 22, 10, {0},&reftables[68], &reftables[69]),
7447 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 1, &msgs[6], NULL, 7, 1, {0},&reftables[70], &reftables[71]),
7448 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 3, &msgs[4], NULL, 8, 2, {0},&reftables[72], &reftables[73]),
7449 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 33, &msgs[17], NULL, 7, 1, {0},&reftables[74], &reftables[75]),
7450 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_DOUBLE, 0, false, false, false, false, "double_value", 6, &msgs[20], NULL, 12, 4, {0},&reftables[76], &reftables[77]),
7451 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "end", 2, &msgs[2], NULL, 4, 1, {0},&reftables[78], &reftables[79]),
7452 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "end", 2, &msgs[1], NULL, 4, 1, {0},&reftables[80], &reftables[81]),
7453 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 5, &msgs[9], (const upb_def*)(&msgs[3]), 14, 1, {0},&reftables[82], &reftables[83]),
7454 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 4, &msgs[0], (const upb_def*)(&msgs[3]), 19, 2, {0},&reftables[84], &reftables[85]),
7455 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "extendee", 2, &msgs[7], NULL, 8, 2, {0},&reftables[86], &reftables[87]),
7456 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 6, &msgs[0], (const upb_def*)(&msgs[7]), 25, 4, {0},&reftables[88], &reftables[89]),
7457 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 7, &msgs[9], (const upb_def*)(&msgs[7]), 20, 3, {0},&reftables[90], &reftables[91]),
7458 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension_range", 5, &msgs[0], (const upb_def*)(&msgs[1]), 22, 3, {0},&reftables[92], &reftables[93]),
7459 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "field", 2, &msgs[0], (const upb_def*)(&msgs[7]), 13, 0, {0},&reftables[94], &reftables[95]),
7460 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "file", 1, &msgs[10], (const upb_def*)(&msgs[9]), 6, 0, {0},&reftables[96], &reftables[97]),
7461 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "go_package", 11, &msgs[11], NULL, 15, 5, {0},&reftables[98], &reftables[99]),
7462 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "identifier_value", 3, &msgs[20], NULL, 7, 1, {0},&reftables[100], &reftables[101]),
7463 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "input_type", 2, &msgs[13], NULL, 8, 2, {0},&reftables[102], &reftables[103]),
7464 UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_BOOL, 0, false, false, false, false, "is_extension", 2, &msgs[21], NULL, 6, 1, {0},&reftables[104], &reftables[105]),
7465 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generate_equals_and_hash", 20, &msgs[11], NULL, 21, 9, {0},&reftables[106], &reftables[107]),
7466 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generic_services", 17, &msgs[11], NULL, 19, 7, {0},&reftables[108], &reftables[109]),
7467 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_multiple_files", 10, &msgs[11], NULL, 14, 4, {0},&reftables[110], &reftables[111]),
7468 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_outer_classname", 8, &msgs[11], NULL, 10, 2, {0},&reftables[112], &reftables[113]),
7469 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_package", 1, &msgs[11], NULL, 7, 1, {0},&reftables[114], &reftables[115]),
7470 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_string_check_utf8", 27, &msgs[11], NULL, 23, 11, {0},&reftables[116], &reftables[117]),
7471 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "javanano_use_deprecated_package", 38, &msgs[11], NULL, 31, 15, {0},&reftables[118], &reftables[119]),
7472 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "json_name", 10, &msgs[7], NULL, 21, 9, {0},&reftables[120], &reftables[121]),
7473 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "jstype", 6, &msgs[8], (const upb_def*)(&enums[3]), 11, 5, {0},&reftables[122], &reftables[123]),
7474 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "label", 4, &msgs[7], (const upb_def*)(&enums[0]), 12, 4, {0},&reftables[124], &reftables[125]),
7475 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "lazy", 5, &msgs[8], NULL, 10, 4, {0},&reftables[126], &reftables[127]),
7476 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "leading_comments", 3, &msgs[19], NULL, 9, 2, {0},&reftables[128], &reftables[129]),
7477 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "leading_detached_comments", 6, &msgs[19], NULL, 17, 4, {0},&reftables[130], &reftables[131]),
7478 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "location", 1, &msgs[18], (const upb_def*)(&msgs[19]), 6, 0, {0},&reftables[132], &reftables[133]),
7479 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "map_entry", 7, &msgs[12], NULL, 10, 4, {0},&reftables[134], &reftables[135]),
7480 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "message_set_wire_format", 1, &msgs[12], NULL, 7, 1, {0},&reftables[136], &reftables[137]),
7481 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "message_type", 4, &msgs[9], (const upb_def*)(&msgs[0]), 11, 0, {0},&reftables[138], &reftables[139]),
7482 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "method", 2, &msgs[16], (const upb_def*)(&msgs[13]), 7, 0, {0},&reftables[140], &reftables[141]),
7483 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "name", 2, &msgs[20], (const upb_def*)(&msgs[21]), 6, 0, {0},&reftables[142], &reftables[143]),
7484 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[5], NULL, 5, 1, {0},&reftables[144], &reftables[145]),
7485 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[9], NULL, 23, 6, {0},&reftables[146], &reftables[147]),
7486 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[3], NULL, 9, 2, {0},&reftables[148], &reftables[149]),
7487 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[16], NULL, 9, 2, {0},&reftables[150], &reftables[151]),
7488 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[15], NULL, 3, 0, {0},&reftables[152], &reftables[153]),
7489 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[13], NULL, 5, 1, {0},&reftables[154], &reftables[155]),
7490 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[7], NULL, 5, 1, {0},&reftables[156], &reftables[157]),
7491 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[0], NULL, 33, 8, {0},&reftables[158], &reftables[159]),
7492 UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_STRING, 0, false, false, false, false, "name_part", 1, &msgs[21], NULL, 3, 0, {0},&reftables[160], &reftables[161]),
7493 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT64, UPB_INTFMT_VARIABLE, false, false, false, false, "negative_int_value", 5, &msgs[20], NULL, 11, 3, {0},&reftables[162], &reftables[163]),
7494 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "nested_type", 3, &msgs[0], (const upb_def*)(&msgs[0]), 16, 1, {0},&reftables[164], &reftables[165]),
7495 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "no_standard_descriptor_accessor", 2, &msgs[12], NULL, 8, 2, {0},&reftables[166], &reftables[167]),
7496 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 3, &msgs[7], NULL, 11, 3, {0},&reftables[168], &reftables[169]),
7497 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 2, &msgs[5], NULL, 8, 2, {0},&reftables[170], &reftables[171]),
7498 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "objc_class_prefix", 36, &msgs[11], NULL, 25, 13, {0},&reftables[172], &reftables[173]),
7499 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "oneof_decl", 8, &msgs[0], (const upb_def*)(&msgs[15]), 29, 6, {0},&reftables[174], &reftables[175]),
7500 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "oneof_index", 9, &msgs[7], NULL, 20, 8, {0},&reftables[176], &reftables[177]),
7501 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "optimize_for", 9, &msgs[11], (const upb_def*)(&enums[4]), 13, 3, {0},&reftables[178], &reftables[179]),
7502 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 7, &msgs[0], (const upb_def*)(&msgs[12]), 26, 5, {0},&reftables[180], &reftables[181]),
7503 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[9], (const upb_def*)(&msgs[11]), 21, 4, {0},&reftables[182], &reftables[183]),
7504 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[7], (const upb_def*)(&msgs[8]), 4, 0, {0},&reftables[184], &reftables[185]),
7505 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 4, &msgs[13], (const upb_def*)(&msgs[14]), 4, 0, {0},&reftables[186], &reftables[187]),
7506 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[16], (const upb_def*)(&msgs[17]), 8, 1, {0},&reftables[188], &reftables[189]),
7507 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[3], (const upb_def*)(&msgs[4]), 8, 1, {0},&reftables[190], &reftables[191]),
7508 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[5], (const upb_def*)(&msgs[6]), 4, 0, {0},&reftables[192], &reftables[193]),
7509 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "output_type", 3, &msgs[13], NULL, 11, 3, {0},&reftables[194], &reftables[195]),
7510 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "package", 2, &msgs[9], NULL, 26, 7, {0},&reftables[196], &reftables[197]),
7511 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "packed", 2, &msgs[8], NULL, 8, 2, {0},&reftables[198], &reftables[199]),
7512 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "path", 1, &msgs[19], NULL, 5, 0, {0},&reftables[200], &reftables[201]),
7513 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "php_class_prefix", 40, &msgs[11], NULL, 32, 16, {0},&reftables[202], &reftables[203]),
7514 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "php_namespace", 41, &msgs[11], NULL, 35, 17, {0},&reftables[204], &reftables[205]),
7515 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_UINT64, UPB_INTFMT_VARIABLE, false, false, false, false, "positive_int_value", 4, &msgs[20], NULL, 10, 2, {0},&reftables[206], &reftables[207]),
7516 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "public_dependency", 10, &msgs[9], NULL, 36, 9, {0},&reftables[208], &reftables[209]),
7517 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "py_generic_services", 18, &msgs[11], NULL, 20, 8, {0},&reftables[210], &reftables[211]),
7518 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "reserved_name", 10, &msgs[0], NULL, 38, 9, {0},&reftables[212], &reftables[213]),
7519 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "reserved_range", 9, &msgs[0], (const upb_def*)(&msgs[2]), 32, 7, {0},&reftables[214], &reftables[215]),
7520 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "server_streaming", 6, &msgs[13], NULL, 15, 5, {0},&reftables[216], &reftables[217]),
7521 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "service", 6, &msgs[9], (const upb_def*)(&msgs[16]), 17, 2, {0},&reftables[218], &reftables[219]),
7522 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "source_code_info", 9, &msgs[9], (const upb_def*)(&msgs[18]), 22, 5, {0},&reftables[220], &reftables[221]),
7523 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "span", 2, &msgs[19], NULL, 8, 1, {0},&reftables[222], &reftables[223]),
7524 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "start", 1, &msgs[2], NULL, 3, 0, {0},&reftables[224], &reftables[225]),
7525 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "start", 1, &msgs[1], NULL, 3, 0, {0},&reftables[226], &reftables[227]),
7526 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BYTES, 0, false, false, false, false, "string_value", 7, &msgs[20], NULL, 13, 5, {0},&reftables[228], &reftables[229]),
7527 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "syntax", 12, &msgs[9], NULL, 40, 11, {0},&reftables[230], &reftables[231]),
7528 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "trailing_comments", 4, &msgs[19], NULL, 12, 3, {0},&reftables[232], &reftables[233]),
7529 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "type", 5, &msgs[7], (const upb_def*)(&enums[1]), 13, 5, {0},&reftables[234], &reftables[235]),
7530 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "type_name", 6, &msgs[7], NULL, 14, 6, {0},&reftables[236], &reftables[237]),
7531 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[12], (const upb_def*)(&msgs[20]), 6, 0, {0},&reftables[238], &reftables[239]),
7532 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[17], (const upb_def*)(&msgs[20]), 6, 0, {0},&reftables[240], &reftables[241]),
7533 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[11], (const upb_def*)(&msgs[20]), 6, 0, {0},&reftables[242], &reftables[243]),
7534 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[14], (const upb_def*)(&msgs[20]), 6, 0, {0},&reftables[244], &reftables[245]),
7535 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[8], (const upb_def*)(&msgs[20]), 6, 0, {0},&reftables[246], &reftables[247]),
7536 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[6], (const upb_def*)(&msgs[20]), 6, 0, {0},&reftables[248], &reftables[249]),
7537 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[4], (const upb_def*)(&msgs[20]), 6, 0, {0},&reftables[250], &reftables[251]),
7538 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "value", 2, &msgs[3], (const upb_def*)(&msgs[5]), 7, 0, {0},&reftables[252], &reftables[253]),
7539 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "weak", 10, &msgs[8], NULL, 12, 6, {0},&reftables[254], &reftables[255]),
7540 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "weak_dependency", 11, &msgs[9], NULL, 39, 10, {0},&reftables[256], &reftables[257]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007541};
7542
Austin Schuh40c16522018-10-28 20:27:54 -07007543static const upb_enumdef enums[5] = {
7544 UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Label", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[188]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[154], 4, 3), 0, &reftables[258], &reftables[259]),
7545 UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Type", UPB_STRTABLE_INIT(18, 31, UPB_CTYPE_INT32, 5, &strentries[192]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[158], 19, 18), 0, &reftables[260], &reftables[261]),
7546 UPB_ENUMDEF_INIT("google.protobuf.FieldOptions.CType", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[224]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[177], 3, 3), 0, &reftables[262], &reftables[263]),
7547 UPB_ENUMDEF_INIT("google.protobuf.FieldOptions.JSType", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[228]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[180], 3, 3), 0, &reftables[264], &reftables[265]),
7548 UPB_ENUMDEF_INIT("google.protobuf.FileOptions.OptimizeMode", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[232]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[183], 4, 3), 0, &reftables[266], &reftables[267]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007549};
7550
7551static const upb_tabent strentries[236] = {
Austin Schuh40c16522018-10-28 20:27:54 -07007552 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "extension"), UPB_TABVALUE_PTR_INIT(&fields[22]), NULL},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007553 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007554 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "reserved_name"), UPB_TABVALUE_PTR_INIT(&fields[84]), NULL},
7555 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[57]), NULL},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007556 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7557 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7558 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007559 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "field"), UPB_TABVALUE_PTR_INIT(&fields[25]), &strentries[12]},
7560 {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "extension_range"), UPB_TABVALUE_PTR_INIT(&fields[24]), &strentries[14]},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007561 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007562 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "nested_type"), UPB_TABVALUE_PTR_INIT(&fields[60]), NULL},
7563 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7564 {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "reserved_range"), UPB_TABVALUE_PTR_INIT(&fields[85]), NULL},
7565 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[68]), NULL},
7566 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "oneof_decl"), UPB_TABVALUE_PTR_INIT(&fields[65]), NULL},
7567 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "enum_type"), UPB_TABVALUE_PTR_INIT(&fields[20]), &strentries[13]},
7568 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "start"), UPB_TABVALUE_PTR_INIT(&fields[91]), NULL},
7569 {UPB_TABKEY_STR("\003", "\000", "\000", "\000", "end"), UPB_TABVALUE_PTR_INIT(&fields[18]), NULL},
7570 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7571 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7572 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "start"), UPB_TABVALUE_PTR_INIT(&fields[90]), NULL},
7573 {UPB_TABKEY_STR("\003", "\000", "\000", "\000", "end"), UPB_TABVALUE_PTR_INIT(&fields[17]), NULL},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007574 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7575 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7576 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007577 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "value"), UPB_TABVALUE_PTR_INIT(&fields[104]), NULL},
7578 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[73]), NULL},
7579 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[52]), &strentries[26]},
7580 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[103]), NULL},
7581 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[14]), NULL},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007582 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "allow_alias"), UPB_TABVALUE_PTR_INIT(&fields[1]), NULL},
7583 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007584 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "number"), UPB_TABVALUE_PTR_INIT(&fields[63]), NULL},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007585 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007586 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[74]), NULL},
7587 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[50]), &strentries[34]},
7588 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[102]), NULL},
7589 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[13]), NULL},
7590 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7591 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7592 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "oneof_index"), UPB_TABVALUE_PTR_INIT(&fields[66]), NULL},
7593 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "label"), UPB_TABVALUE_PTR_INIT(&fields[40]), NULL},
7594 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7595 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[56]), NULL},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007596 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7597 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7598 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7599 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007600 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "number"), UPB_TABVALUE_PTR_INIT(&fields[62]), &strentries[53]},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007601 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007602 {UPB_TABKEY_STR("\010", "\000", "\000", "\000", "extendee"), UPB_TABVALUE_PTR_INIT(&fields[21]), NULL},
7603 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "type_name"), UPB_TABVALUE_PTR_INIT(&fields[96]), NULL},
7604 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "json_name"), UPB_TABVALUE_PTR_INIT(&fields[38]), NULL},
7605 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "type"), UPB_TABVALUE_PTR_INIT(&fields[95]), &strentries[50]},
7606 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "default_value"), UPB_TABVALUE_PTR_INIT(&fields[7]), NULL},
7607 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[70]), NULL},
7608 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[101]), NULL},
7609 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7610 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "weak"), UPB_TABVALUE_PTR_INIT(&fields[105]), NULL},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007611 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7612 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7613 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7614 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007615 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "packed"), UPB_TABVALUE_PTR_INIT(&fields[77]), NULL},
7616 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "lazy"), UPB_TABVALUE_PTR_INIT(&fields[41]), NULL},
7617 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7618 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "ctype"), UPB_TABVALUE_PTR_INIT(&fields[6]), NULL},
7619 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7620 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "jstype"), UPB_TABVALUE_PTR_INIT(&fields[39]), NULL},
7621 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[9]), NULL},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007622 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7623 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007624 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "extension"), UPB_TABVALUE_PTR_INIT(&fields[23]), NULL},
7625 {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "weak_dependency"), UPB_TABVALUE_PTR_INIT(&fields[106]), NULL},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007626 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007627 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[51]), NULL},
7628 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "service"), UPB_TABVALUE_PTR_INIT(&fields[87]), NULL},
7629 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7630 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "source_code_info"), UPB_TABVALUE_PTR_INIT(&fields[88]), NULL},
7631 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7632 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7633 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "syntax"), UPB_TABVALUE_PTR_INIT(&fields[93]), NULL},
7634 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "dependency"), UPB_TABVALUE_PTR_INIT(&fields[8]), NULL},
7635 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "message_type"), UPB_TABVALUE_PTR_INIT(&fields[47]), NULL},
7636 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "package"), UPB_TABVALUE_PTR_INIT(&fields[76]), NULL},
7637 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[69]), &strentries[86]},
7638 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "enum_type"), UPB_TABVALUE_PTR_INIT(&fields[19]), NULL},
7639 {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "public_dependency"), UPB_TABVALUE_PTR_INIT(&fields[82]), &strentries[85]},
7640 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7641 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "file"), UPB_TABVALUE_PTR_INIT(&fields[26]), NULL},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007642 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7643 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7644 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7645 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007646 {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "cc_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[3]), NULL},
7647 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "csharp_namespace"), UPB_TABVALUE_PTR_INIT(&fields[5]), &strentries[116]},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007648 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7649 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7650 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7651 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7652 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007653 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7654 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7655 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "go_package"), UPB_TABVALUE_PTR_INIT(&fields[27]), NULL},
7656 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "java_package"), UPB_TABVALUE_PTR_INIT(&fields[35]), &strentries[120]},
7657 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7658 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7659 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "java_outer_classname"), UPB_TABVALUE_PTR_INIT(&fields[34]), NULL},
7660 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "php_namespace"), UPB_TABVALUE_PTR_INIT(&fields[80]), &strentries[113]},
7661 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7662 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7663 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7664 {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "java_multiple_files"), UPB_TABVALUE_PTR_INIT(&fields[33]), &strentries[117]},
7665 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[99]), NULL},
7666 {UPB_TABKEY_STR("\025", "\000", "\000", "\000", "java_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[32]), &strentries[118]},
7667 {UPB_TABKEY_STR("\035", "\000", "\000", "\000", "java_generate_equals_and_hash"), UPB_TABVALUE_PTR_INIT(&fields[31]), NULL},
7668 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "php_class_prefix"), UPB_TABVALUE_PTR_INIT(&fields[79]), NULL},
7669 {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "javanano_use_deprecated_package"), UPB_TABVALUE_PTR_INIT(&fields[37]), &strentries[123]},
7670 {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "py_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[83]), NULL},
7671 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "optimize_for"), UPB_TABVALUE_PTR_INIT(&fields[67]), NULL},
7672 {UPB_TABKEY_STR("\026", "\000", "\000", "\000", "java_string_check_utf8"), UPB_TABVALUE_PTR_INIT(&fields[36]), NULL},
7673 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[12]), &strentries[119]},
7674 {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "objc_class_prefix"), UPB_TABVALUE_PTR_INIT(&fields[64]), NULL},
7675 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "cc_enable_arenas"), UPB_TABVALUE_PTR_INIT(&fields[2]), NULL},
7676 {UPB_TABKEY_STR("\027", "\000", "\000", "\000", "message_set_wire_format"), UPB_TABVALUE_PTR_INIT(&fields[46]), &strentries[128]},
7677 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7678 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7679 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7680 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[97]), NULL},
7681 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[11]), NULL},
7682 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "map_entry"), UPB_TABVALUE_PTR_INIT(&fields[45]), NULL},
7683 {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "no_standard_descriptor_accessor"), UPB_TABVALUE_PTR_INIT(&fields[61]), NULL},
7684 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7685 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "client_streaming"), UPB_TABVALUE_PTR_INIT(&fields[4]), NULL},
7686 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "server_streaming"), UPB_TABVALUE_PTR_INIT(&fields[86]), NULL},
7687 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[55]), NULL},
7688 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "input_type"), UPB_TABVALUE_PTR_INIT(&fields[29]), NULL},
7689 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7690 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "output_type"), UPB_TABVALUE_PTR_INIT(&fields[75]), NULL},
7691 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[71]), NULL},
7692 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[100]), NULL},
7693 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[10]), NULL},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007694 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7695 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7696 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7697 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007698 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007699 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[54]), NULL},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007700 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007701 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[72]), &strentries[150]},
7702 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "method"), UPB_TABVALUE_PTR_INIT(&fields[48]), NULL},
7703 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[53]), &strentries[149]},
7704 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[98]), NULL},
7705 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[15]), NULL},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007706 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7707 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7708 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007709 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7710 {UPB_TABKEY_STR("\010", "\000", "\000", "\000", "location"), UPB_TABVALUE_PTR_INIT(&fields[44]), NULL},
7711 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7712 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7713 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7714 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7715 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "span"), UPB_TABVALUE_PTR_INIT(&fields[89]), &strentries[167]},
7716 {UPB_TABKEY_STR("\031", "\000", "\000", "\000", "leading_detached_comments"), UPB_TABVALUE_PTR_INIT(&fields[43]), &strentries[165]},
7717 {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "trailing_comments"), UPB_TABVALUE_PTR_INIT(&fields[94]), NULL},
7718 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "leading_comments"), UPB_TABVALUE_PTR_INIT(&fields[42]), &strentries[164]},
7719 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "path"), UPB_TABVALUE_PTR_INIT(&fields[78]), NULL},
7720 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "double_value"), UPB_TABVALUE_PTR_INIT(&fields[16]), NULL},
7721 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7722 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7723 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[49]), NULL},
7724 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7725 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7726 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7727 {UPB_TABKEY_STR("\022", "\000", "\000", "\000", "negative_int_value"), UPB_TABVALUE_PTR_INIT(&fields[59]), NULL},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007728 {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "aggregate_value"), UPB_TABVALUE_PTR_INIT(&fields[0]), NULL},
7729 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7730 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7731 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7732 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007733 {UPB_TABKEY_STR("\022", "\000", "\000", "\000", "positive_int_value"), UPB_TABVALUE_PTR_INIT(&fields[81]), NULL},
7734 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "identifier_value"), UPB_TABVALUE_PTR_INIT(&fields[28]), NULL},
7735 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "string_value"), UPB_TABVALUE_PTR_INIT(&fields[92]), &strentries[182]},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007736 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7737 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007738 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "is_extension"), UPB_TABVALUE_PTR_INIT(&fields[30]), NULL},
7739 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "name_part"), UPB_TABVALUE_PTR_INIT(&fields[58]), NULL},
7740 {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_REQUIRED"), UPB_TABVALUE_INT_INIT(2), &strentries[190]},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007741 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7742 {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_REPEATED"), UPB_TABVALUE_INT_INIT(3), NULL},
7743 {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_OPTIONAL"), UPB_TABVALUE_INT_INIT(1), NULL},
7744 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_FIXED64"), UPB_TABVALUE_INT_INIT(6), NULL},
7745 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7746 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7747 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7748 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7749 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_STRING"), UPB_TABVALUE_INT_INIT(9), NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007750 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_FLOAT"), UPB_TABVALUE_INT_INIT(2), &strentries[221]},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007751 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_DOUBLE"), UPB_TABVALUE_INT_INIT(1), NULL},
7752 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7753 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_INT32"), UPB_TABVALUE_INT_INIT(5), NULL},
7754 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "TYPE_SFIXED32"), UPB_TABVALUE_INT_INIT(15), NULL},
7755 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_FIXED32"), UPB_TABVALUE_INT_INIT(7), NULL},
7756 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007757 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_MESSAGE"), UPB_TABVALUE_INT_INIT(11), &strentries[222]},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007758 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7759 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007760 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_INT64"), UPB_TABVALUE_INT_INIT(3), &strentries[219]},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007761 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7762 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7763 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7764 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7765 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "TYPE_ENUM"), UPB_TABVALUE_INT_INIT(14), NULL},
7766 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_UINT32"), UPB_TABVALUE_INT_INIT(13), NULL},
7767 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007768 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_UINT64"), UPB_TABVALUE_INT_INIT(4), &strentries[218]},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007769 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7770 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "TYPE_SFIXED64"), UPB_TABVALUE_INT_INIT(16), NULL},
7771 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_BYTES"), UPB_TABVALUE_INT_INIT(12), NULL},
7772 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_SINT64"), UPB_TABVALUE_INT_INIT(18), NULL},
7773 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "TYPE_BOOL"), UPB_TABVALUE_INT_INIT(8), NULL},
7774 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_GROUP"), UPB_TABVALUE_INT_INIT(10), NULL},
7775 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_SINT32"), UPB_TABVALUE_INT_INIT(17), NULL},
7776 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7777 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "CORD"), UPB_TABVALUE_INT_INIT(1), NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007778 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "STRING"), UPB_TABVALUE_INT_INIT(0), &strentries[225]},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007779 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "STRING_PIECE"), UPB_TABVALUE_INT_INIT(2), NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007780 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7781 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "JS_NORMAL"), UPB_TABVALUE_INT_INIT(0), NULL},
7782 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "JS_NUMBER"), UPB_TABVALUE_INT_INIT(2), NULL},
7783 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "JS_STRING"), UPB_TABVALUE_INT_INIT(1), NULL},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007784 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "CODE_SIZE"), UPB_TABVALUE_INT_INIT(2), NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007785 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "SPEED"), UPB_TABVALUE_INT_INIT(1), &strentries[235]},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007786 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7787 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "LITE_RUNTIME"), UPB_TABVALUE_INT_INIT(3), NULL},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007788};
7789
Austin Schuh40c16522018-10-28 20:27:54 -07007790static const upb_tabent intentries[18] = {
Brian Silverman9c614bc2016-02-15 20:20:02 -05007791 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007792 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[103]), NULL},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007793 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007794 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[102]), NULL},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007795 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007796 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[101]), NULL},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007797 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007798 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[99]), NULL},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007799 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007800 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[97]), NULL},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007801 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007802 {UPB_TABKEY_NUM(33), UPB_TABVALUE_PTR_INIT(&fields[10]), NULL},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007803 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Austin Schuh40c16522018-10-28 20:27:54 -07007804 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[100]), NULL},
7805 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7806 {UPB_TABKEY_NUM(33), UPB_TABVALUE_PTR_INIT(&fields[15]), NULL},
7807 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7808 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[98]), NULL},
Brian Silverman9c614bc2016-02-15 20:20:02 -05007809};
7810
Austin Schuh40c16522018-10-28 20:27:54 -07007811static const upb_tabval arrays[187] = {
Brian Silverman9c614bc2016-02-15 20:20:02 -05007812 UPB_TABVALUE_EMPTY_INIT,
Austin Schuh40c16522018-10-28 20:27:54 -07007813 UPB_TABVALUE_PTR_INIT(&fields[57]),
7814 UPB_TABVALUE_PTR_INIT(&fields[25]),
7815 UPB_TABVALUE_PTR_INIT(&fields[60]),
7816 UPB_TABVALUE_PTR_INIT(&fields[20]),
7817 UPB_TABVALUE_PTR_INIT(&fields[24]),
7818 UPB_TABVALUE_PTR_INIT(&fields[22]),
7819 UPB_TABVALUE_PTR_INIT(&fields[68]),
7820 UPB_TABVALUE_PTR_INIT(&fields[65]),
7821 UPB_TABVALUE_PTR_INIT(&fields[85]),
7822 UPB_TABVALUE_PTR_INIT(&fields[84]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007823 UPB_TABVALUE_EMPTY_INIT,
Austin Schuh40c16522018-10-28 20:27:54 -07007824 UPB_TABVALUE_PTR_INIT(&fields[91]),
7825 UPB_TABVALUE_PTR_INIT(&fields[18]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007826 UPB_TABVALUE_EMPTY_INIT,
Austin Schuh40c16522018-10-28 20:27:54 -07007827 UPB_TABVALUE_PTR_INIT(&fields[90]),
7828 UPB_TABVALUE_PTR_INIT(&fields[17]),
7829 UPB_TABVALUE_EMPTY_INIT,
7830 UPB_TABVALUE_PTR_INIT(&fields[52]),
7831 UPB_TABVALUE_PTR_INIT(&fields[104]),
7832 UPB_TABVALUE_PTR_INIT(&fields[73]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007833 UPB_TABVALUE_EMPTY_INIT,
7834 UPB_TABVALUE_EMPTY_INIT,
7835 UPB_TABVALUE_PTR_INIT(&fields[1]),
Austin Schuh40c16522018-10-28 20:27:54 -07007836 UPB_TABVALUE_PTR_INIT(&fields[14]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007837 UPB_TABVALUE_EMPTY_INIT,
Austin Schuh40c16522018-10-28 20:27:54 -07007838 UPB_TABVALUE_PTR_INIT(&fields[50]),
7839 UPB_TABVALUE_PTR_INIT(&fields[63]),
7840 UPB_TABVALUE_PTR_INIT(&fields[74]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007841 UPB_TABVALUE_EMPTY_INIT,
Austin Schuh40c16522018-10-28 20:27:54 -07007842 UPB_TABVALUE_PTR_INIT(&fields[13]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007843 UPB_TABVALUE_EMPTY_INIT,
Austin Schuh40c16522018-10-28 20:27:54 -07007844 UPB_TABVALUE_PTR_INIT(&fields[56]),
7845 UPB_TABVALUE_PTR_INIT(&fields[21]),
7846 UPB_TABVALUE_PTR_INIT(&fields[62]),
7847 UPB_TABVALUE_PTR_INIT(&fields[40]),
7848 UPB_TABVALUE_PTR_INIT(&fields[95]),
7849 UPB_TABVALUE_PTR_INIT(&fields[96]),
7850 UPB_TABVALUE_PTR_INIT(&fields[7]),
7851 UPB_TABVALUE_PTR_INIT(&fields[70]),
7852 UPB_TABVALUE_PTR_INIT(&fields[66]),
7853 UPB_TABVALUE_PTR_INIT(&fields[38]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007854 UPB_TABVALUE_EMPTY_INIT,
Austin Schuh40c16522018-10-28 20:27:54 -07007855 UPB_TABVALUE_PTR_INIT(&fields[6]),
7856 UPB_TABVALUE_PTR_INIT(&fields[77]),
7857 UPB_TABVALUE_PTR_INIT(&fields[9]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007858 UPB_TABVALUE_EMPTY_INIT,
7859 UPB_TABVALUE_PTR_INIT(&fields[41]),
Austin Schuh40c16522018-10-28 20:27:54 -07007860 UPB_TABVALUE_PTR_INIT(&fields[39]),
7861 UPB_TABVALUE_EMPTY_INIT,
7862 UPB_TABVALUE_EMPTY_INIT,
7863 UPB_TABVALUE_EMPTY_INIT,
7864 UPB_TABVALUE_PTR_INIT(&fields[105]),
7865 UPB_TABVALUE_EMPTY_INIT,
Brian Silverman9c614bc2016-02-15 20:20:02 -05007866 UPB_TABVALUE_PTR_INIT(&fields[51]),
Austin Schuh40c16522018-10-28 20:27:54 -07007867 UPB_TABVALUE_PTR_INIT(&fields[76]),
7868 UPB_TABVALUE_PTR_INIT(&fields[8]),
7869 UPB_TABVALUE_PTR_INIT(&fields[47]),
7870 UPB_TABVALUE_PTR_INIT(&fields[19]),
7871 UPB_TABVALUE_PTR_INIT(&fields[87]),
7872 UPB_TABVALUE_PTR_INIT(&fields[23]),
7873 UPB_TABVALUE_PTR_INIT(&fields[69]),
7874 UPB_TABVALUE_PTR_INIT(&fields[88]),
7875 UPB_TABVALUE_PTR_INIT(&fields[82]),
7876 UPB_TABVALUE_PTR_INIT(&fields[106]),
7877 UPB_TABVALUE_PTR_INIT(&fields[93]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007878 UPB_TABVALUE_EMPTY_INIT,
Austin Schuh40c16522018-10-28 20:27:54 -07007879 UPB_TABVALUE_PTR_INIT(&fields[26]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007880 UPB_TABVALUE_EMPTY_INIT,
Austin Schuh40c16522018-10-28 20:27:54 -07007881 UPB_TABVALUE_PTR_INIT(&fields[35]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007882 UPB_TABVALUE_EMPTY_INIT,
7883 UPB_TABVALUE_EMPTY_INIT,
7884 UPB_TABVALUE_EMPTY_INIT,
7885 UPB_TABVALUE_EMPTY_INIT,
7886 UPB_TABVALUE_EMPTY_INIT,
7887 UPB_TABVALUE_EMPTY_INIT,
7888 UPB_TABVALUE_PTR_INIT(&fields[34]),
Austin Schuh40c16522018-10-28 20:27:54 -07007889 UPB_TABVALUE_PTR_INIT(&fields[67]),
7890 UPB_TABVALUE_PTR_INIT(&fields[33]),
7891 UPB_TABVALUE_PTR_INIT(&fields[27]),
7892 UPB_TABVALUE_EMPTY_INIT,
7893 UPB_TABVALUE_EMPTY_INIT,
7894 UPB_TABVALUE_EMPTY_INIT,
7895 UPB_TABVALUE_EMPTY_INIT,
7896 UPB_TABVALUE_PTR_INIT(&fields[3]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007897 UPB_TABVALUE_PTR_INIT(&fields[32]),
Austin Schuh40c16522018-10-28 20:27:54 -07007898 UPB_TABVALUE_PTR_INIT(&fields[83]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007899 UPB_TABVALUE_EMPTY_INIT,
Austin Schuh40c16522018-10-28 20:27:54 -07007900 UPB_TABVALUE_PTR_INIT(&fields[31]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007901 UPB_TABVALUE_EMPTY_INIT,
Austin Schuh40c16522018-10-28 20:27:54 -07007902 UPB_TABVALUE_EMPTY_INIT,
7903 UPB_TABVALUE_PTR_INIT(&fields[12]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007904 UPB_TABVALUE_EMPTY_INIT,
7905 UPB_TABVALUE_EMPTY_INIT,
7906 UPB_TABVALUE_EMPTY_INIT,
Austin Schuh40c16522018-10-28 20:27:54 -07007907 UPB_TABVALUE_PTR_INIT(&fields[36]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007908 UPB_TABVALUE_EMPTY_INIT,
7909 UPB_TABVALUE_EMPTY_INIT,
7910 UPB_TABVALUE_EMPTY_INIT,
7911 UPB_TABVALUE_PTR_INIT(&fields[2]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007912 UPB_TABVALUE_EMPTY_INIT,
7913 UPB_TABVALUE_EMPTY_INIT,
7914 UPB_TABVALUE_EMPTY_INIT,
7915 UPB_TABVALUE_EMPTY_INIT,
Austin Schuh40c16522018-10-28 20:27:54 -07007916 UPB_TABVALUE_PTR_INIT(&fields[64]),
7917 UPB_TABVALUE_PTR_INIT(&fields[5]),
7918 UPB_TABVALUE_PTR_INIT(&fields[37]),
7919 UPB_TABVALUE_EMPTY_INIT,
7920 UPB_TABVALUE_PTR_INIT(&fields[79]),
7921 UPB_TABVALUE_PTR_INIT(&fields[80]),
7922 UPB_TABVALUE_EMPTY_INIT,
7923 UPB_TABVALUE_PTR_INIT(&fields[46]),
7924 UPB_TABVALUE_PTR_INIT(&fields[61]),
7925 UPB_TABVALUE_PTR_INIT(&fields[11]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007926 UPB_TABVALUE_EMPTY_INIT,
7927 UPB_TABVALUE_EMPTY_INIT,
7928 UPB_TABVALUE_EMPTY_INIT,
Brian Silverman9c614bc2016-02-15 20:20:02 -05007929 UPB_TABVALUE_PTR_INIT(&fields[45]),
7930 UPB_TABVALUE_EMPTY_INIT,
Brian Silverman9c614bc2016-02-15 20:20:02 -05007931 UPB_TABVALUE_PTR_INIT(&fields[55]),
Austin Schuh40c16522018-10-28 20:27:54 -07007932 UPB_TABVALUE_PTR_INIT(&fields[29]),
7933 UPB_TABVALUE_PTR_INIT(&fields[75]),
7934 UPB_TABVALUE_PTR_INIT(&fields[71]),
7935 UPB_TABVALUE_PTR_INIT(&fields[4]),
7936 UPB_TABVALUE_PTR_INIT(&fields[86]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007937 UPB_TABVALUE_EMPTY_INIT,
7938 UPB_TABVALUE_EMPTY_INIT,
Brian Silverman9c614bc2016-02-15 20:20:02 -05007939 UPB_TABVALUE_PTR_INIT(&fields[54]),
7940 UPB_TABVALUE_EMPTY_INIT,
Austin Schuh40c16522018-10-28 20:27:54 -07007941 UPB_TABVALUE_PTR_INIT(&fields[53]),
7942 UPB_TABVALUE_PTR_INIT(&fields[48]),
7943 UPB_TABVALUE_PTR_INIT(&fields[72]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007944 UPB_TABVALUE_EMPTY_INIT,
7945 UPB_TABVALUE_EMPTY_INIT,
Austin Schuh40c16522018-10-28 20:27:54 -07007946 UPB_TABVALUE_PTR_INIT(&fields[44]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007947 UPB_TABVALUE_EMPTY_INIT,
Austin Schuh40c16522018-10-28 20:27:54 -07007948 UPB_TABVALUE_PTR_INIT(&fields[78]),
7949 UPB_TABVALUE_PTR_INIT(&fields[89]),
7950 UPB_TABVALUE_PTR_INIT(&fields[42]),
7951 UPB_TABVALUE_PTR_INIT(&fields[94]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007952 UPB_TABVALUE_EMPTY_INIT,
Brian Silverman9c614bc2016-02-15 20:20:02 -05007953 UPB_TABVALUE_PTR_INIT(&fields[43]),
Austin Schuh40c16522018-10-28 20:27:54 -07007954 UPB_TABVALUE_EMPTY_INIT,
7955 UPB_TABVALUE_EMPTY_INIT,
7956 UPB_TABVALUE_PTR_INIT(&fields[49]),
7957 UPB_TABVALUE_PTR_INIT(&fields[28]),
7958 UPB_TABVALUE_PTR_INIT(&fields[81]),
7959 UPB_TABVALUE_PTR_INIT(&fields[59]),
7960 UPB_TABVALUE_PTR_INIT(&fields[16]),
7961 UPB_TABVALUE_PTR_INIT(&fields[92]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007962 UPB_TABVALUE_PTR_INIT(&fields[0]),
7963 UPB_TABVALUE_EMPTY_INIT,
Austin Schuh40c16522018-10-28 20:27:54 -07007964 UPB_TABVALUE_PTR_INIT(&fields[58]),
7965 UPB_TABVALUE_PTR_INIT(&fields[30]),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007966 UPB_TABVALUE_EMPTY_INIT,
7967 UPB_TABVALUE_PTR_INIT("LABEL_OPTIONAL"),
7968 UPB_TABVALUE_PTR_INIT("LABEL_REQUIRED"),
7969 UPB_TABVALUE_PTR_INIT("LABEL_REPEATED"),
7970 UPB_TABVALUE_EMPTY_INIT,
7971 UPB_TABVALUE_PTR_INIT("TYPE_DOUBLE"),
7972 UPB_TABVALUE_PTR_INIT("TYPE_FLOAT"),
7973 UPB_TABVALUE_PTR_INIT("TYPE_INT64"),
7974 UPB_TABVALUE_PTR_INIT("TYPE_UINT64"),
7975 UPB_TABVALUE_PTR_INIT("TYPE_INT32"),
7976 UPB_TABVALUE_PTR_INIT("TYPE_FIXED64"),
7977 UPB_TABVALUE_PTR_INIT("TYPE_FIXED32"),
7978 UPB_TABVALUE_PTR_INIT("TYPE_BOOL"),
7979 UPB_TABVALUE_PTR_INIT("TYPE_STRING"),
7980 UPB_TABVALUE_PTR_INIT("TYPE_GROUP"),
7981 UPB_TABVALUE_PTR_INIT("TYPE_MESSAGE"),
7982 UPB_TABVALUE_PTR_INIT("TYPE_BYTES"),
7983 UPB_TABVALUE_PTR_INIT("TYPE_UINT32"),
7984 UPB_TABVALUE_PTR_INIT("TYPE_ENUM"),
7985 UPB_TABVALUE_PTR_INIT("TYPE_SFIXED32"),
7986 UPB_TABVALUE_PTR_INIT("TYPE_SFIXED64"),
7987 UPB_TABVALUE_PTR_INIT("TYPE_SINT32"),
7988 UPB_TABVALUE_PTR_INIT("TYPE_SINT64"),
7989 UPB_TABVALUE_PTR_INIT("STRING"),
7990 UPB_TABVALUE_PTR_INIT("CORD"),
7991 UPB_TABVALUE_PTR_INIT("STRING_PIECE"),
Austin Schuh40c16522018-10-28 20:27:54 -07007992 UPB_TABVALUE_PTR_INIT("JS_NORMAL"),
7993 UPB_TABVALUE_PTR_INIT("JS_STRING"),
7994 UPB_TABVALUE_PTR_INIT("JS_NUMBER"),
Brian Silverman9c614bc2016-02-15 20:20:02 -05007995 UPB_TABVALUE_EMPTY_INIT,
7996 UPB_TABVALUE_PTR_INIT("SPEED"),
7997 UPB_TABVALUE_PTR_INIT("CODE_SIZE"),
7998 UPB_TABVALUE_PTR_INIT("LITE_RUNTIME"),
7999};
8000
Brian Silverman9c614bc2016-02-15 20:20:02 -05008001#ifdef UPB_DEBUG_REFS
Austin Schuh40c16522018-10-28 20:27:54 -07008002static upb_inttable reftables[268] = {
8003 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8004 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8005 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8006 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8007 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8008 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8009 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8010 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8011 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8012 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8013 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8014 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8015 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8016 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8017 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8018 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8019 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8020 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8021 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8022 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8023 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8024 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8025 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8026 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8027 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8028 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8029 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8030 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8031 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8032 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8033 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8034 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8035 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8036 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8037 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8038 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8039 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8040 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8041 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8042 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8043 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8044 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8045 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8046 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8047 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8048 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8049 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8050 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8051 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8052 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8053 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8054 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8055 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8056 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8057 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8058 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
Brian Silverman9c614bc2016-02-15 20:20:02 -05008059 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8060 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8061 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8062 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8063 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8064 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8065 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8066 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8067 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8068 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8069 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8070 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8071 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8072 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8073 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8074 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8075 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8076 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8077 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8078 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8079 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8080 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8081 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8082 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8083 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8084 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8085 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8086 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8087 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8088 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8089 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8090 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8091 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8092 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8093 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8094 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8095 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8096 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8097 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8098 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8099 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8100 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8101 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8102 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8103 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8104 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8105 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8106 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8107 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8108 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8109 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8110 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8111 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8112 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8113 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8114 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8115 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8116 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8117 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8118 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8119 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8120 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8121 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8122 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8123 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8124 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8125 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8126 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8127 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8128 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8129 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8130 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8131 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8132 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8133 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8134 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8135 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8136 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8137 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8138 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8139 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8140 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8141 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8142 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8143 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8144 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8145 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8146 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8147 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8148 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8149 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8150 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8151 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8152 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8153 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8154 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8155 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8156 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8157 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8158 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8159 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8160 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8161 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8162 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8163 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8164 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8165 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8166 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8167 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8168 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8169 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8170 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8171 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8172 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8173 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8174 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8175 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8176 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8177 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8178 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8179 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8180 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8181 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8182 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8183 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8184 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8185 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8186 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8187 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8188 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8189 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8190 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8191 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8192 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8193 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8194 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8195 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8196 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8197 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8198 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8199 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8200 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8201 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8202 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8203 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8204 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8205 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8206 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8207 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8208 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8209 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8210 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8211 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8212 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8213 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8214 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8215 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8216 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8217 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8218 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8219 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8220 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8221 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8222 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8223 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8224 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8225 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8226 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8227 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8228 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8229 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8230 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8231 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8232 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8233 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8234 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8235 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8236 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8237 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8238 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8239 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8240 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8241 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8242 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8243 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8244 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8245 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8246 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8247 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8248 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8249 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8250 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8251 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8252 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8253 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8254 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8255 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8256 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8257 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8258 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8259 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8260 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8261 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8262 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8263 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8264 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8265 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8266 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8267 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8268 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8269 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8270 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8271};
8272#endif
8273
Austin Schuh40c16522018-10-28 20:27:54 -07008274static const upb_msgdef *refm(const upb_msgdef *m, const void *owner) {
8275 upb_msgdef_ref(m, owner);
8276 return m;
8277}
8278
8279static const upb_enumdef *refe(const upb_enumdef *e, const void *owner) {
8280 upb_enumdef_ref(e, owner);
8281 return e;
8282}
8283
8284/* Public API. */
8285const upb_msgdef *upbdefs_google_protobuf_DescriptorProto_get(const void *owner) { return refm(&msgs[0], owner); }
8286const upb_msgdef *upbdefs_google_protobuf_DescriptorProto_ExtensionRange_get(const void *owner) { return refm(&msgs[1], owner); }
8287const upb_msgdef *upbdefs_google_protobuf_DescriptorProto_ReservedRange_get(const void *owner) { return refm(&msgs[2], owner); }
8288const upb_msgdef *upbdefs_google_protobuf_EnumDescriptorProto_get(const void *owner) { return refm(&msgs[3], owner); }
8289const upb_msgdef *upbdefs_google_protobuf_EnumOptions_get(const void *owner) { return refm(&msgs[4], owner); }
8290const upb_msgdef *upbdefs_google_protobuf_EnumValueDescriptorProto_get(const void *owner) { return refm(&msgs[5], owner); }
8291const upb_msgdef *upbdefs_google_protobuf_EnumValueOptions_get(const void *owner) { return refm(&msgs[6], owner); }
8292const upb_msgdef *upbdefs_google_protobuf_FieldDescriptorProto_get(const void *owner) { return refm(&msgs[7], owner); }
8293const upb_msgdef *upbdefs_google_protobuf_FieldOptions_get(const void *owner) { return refm(&msgs[8], owner); }
8294const upb_msgdef *upbdefs_google_protobuf_FileDescriptorProto_get(const void *owner) { return refm(&msgs[9], owner); }
8295const upb_msgdef *upbdefs_google_protobuf_FileDescriptorSet_get(const void *owner) { return refm(&msgs[10], owner); }
8296const upb_msgdef *upbdefs_google_protobuf_FileOptions_get(const void *owner) { return refm(&msgs[11], owner); }
8297const upb_msgdef *upbdefs_google_protobuf_MessageOptions_get(const void *owner) { return refm(&msgs[12], owner); }
8298const upb_msgdef *upbdefs_google_protobuf_MethodDescriptorProto_get(const void *owner) { return refm(&msgs[13], owner); }
8299const upb_msgdef *upbdefs_google_protobuf_MethodOptions_get(const void *owner) { return refm(&msgs[14], owner); }
8300const upb_msgdef *upbdefs_google_protobuf_OneofDescriptorProto_get(const void *owner) { return refm(&msgs[15], owner); }
8301const upb_msgdef *upbdefs_google_protobuf_ServiceDescriptorProto_get(const void *owner) { return refm(&msgs[16], owner); }
8302const upb_msgdef *upbdefs_google_protobuf_ServiceOptions_get(const void *owner) { return refm(&msgs[17], owner); }
8303const upb_msgdef *upbdefs_google_protobuf_SourceCodeInfo_get(const void *owner) { return refm(&msgs[18], owner); }
8304const upb_msgdef *upbdefs_google_protobuf_SourceCodeInfo_Location_get(const void *owner) { return refm(&msgs[19], owner); }
8305const upb_msgdef *upbdefs_google_protobuf_UninterpretedOption_get(const void *owner) { return refm(&msgs[20], owner); }
8306const upb_msgdef *upbdefs_google_protobuf_UninterpretedOption_NamePart_get(const void *owner) { return refm(&msgs[21], owner); }
8307
8308const upb_enumdef *upbdefs_google_protobuf_FieldDescriptorProto_Label_get(const void *owner) { return refe(&enums[0], owner); }
8309const upb_enumdef *upbdefs_google_protobuf_FieldDescriptorProto_Type_get(const void *owner) { return refe(&enums[1], owner); }
8310const upb_enumdef *upbdefs_google_protobuf_FieldOptions_CType_get(const void *owner) { return refe(&enums[2], owner); }
8311const upb_enumdef *upbdefs_google_protobuf_FieldOptions_JSType_get(const void *owner) { return refe(&enums[3], owner); }
8312const upb_enumdef *upbdefs_google_protobuf_FileOptions_OptimizeMode_get(const void *owner) { return refe(&enums[4], owner); }
Brian Silverman9c614bc2016-02-15 20:20:02 -05008313/*
8314** XXX: The routines in this file that consume a string do not currently
8315** support having the string span buffers. In the future, as upb_sink and
8316** its buffering/sharing functionality evolve there should be an easy and
8317** idiomatic way of correctly handling this case. For now, we accept this
8318** limitation since we currently only parse descriptors from single strings.
8319*/
8320
8321
8322#include <errno.h>
8323#include <stdlib.h>
8324#include <string.h>
8325
Austin Schuh40c16522018-10-28 20:27:54 -07008326/* Compares a NULL-terminated string with a non-NULL-terminated string. */
8327static bool upb_streq(const char *str, const char *buf, size_t n) {
8328 return strlen(str) == n && memcmp(str, buf, n) == 0;
8329}
Brian Silverman9c614bc2016-02-15 20:20:02 -05008330
8331/* We keep a stack of all the messages scopes we are currently in, as well as
8332 * the top-level file scope. This is necessary to correctly qualify the
8333 * definitions that are contained inside. "name" tracks the name of the
8334 * message or package (a bare name -- not qualified by any enclosing scopes). */
8335typedef struct {
8336 char *name;
8337 /* Index of the first def that is under this scope. For msgdefs, the
8338 * msgdef itself is at start-1. */
8339 int start;
Austin Schuh40c16522018-10-28 20:27:54 -07008340 uint32_t oneof_start;
8341 uint32_t oneof_index;
Brian Silverman9c614bc2016-02-15 20:20:02 -05008342} upb_descreader_frame;
8343
8344/* The maximum number of nested declarations that are allowed, ie.
8345 * message Foo {
8346 * message Bar {
8347 * message Baz {
8348 * }
8349 * }
8350 * }
8351 *
8352 * This is a resource limit that affects how big our runtime stack can grow.
8353 * TODO: make this a runtime-settable property of the Reader instance. */
8354#define UPB_MAX_MESSAGE_NESTING 64
8355
8356struct upb_descreader {
8357 upb_sink sink;
Austin Schuh40c16522018-10-28 20:27:54 -07008358 upb_inttable files;
8359 upb_strtable files_by_name;
8360 upb_filedef *file; /* The last file in files. */
Brian Silverman9c614bc2016-02-15 20:20:02 -05008361 upb_descreader_frame stack[UPB_MAX_MESSAGE_NESTING];
8362 int stack_len;
Austin Schuh40c16522018-10-28 20:27:54 -07008363 upb_inttable oneofs;
Brian Silverman9c614bc2016-02-15 20:20:02 -05008364
8365 uint32_t number;
8366 char *name;
8367 bool saw_number;
8368 bool saw_name;
8369
8370 char *default_string;
8371
8372 upb_fielddef *f;
8373};
8374
Austin Schuh40c16522018-10-28 20:27:54 -07008375static char *upb_gstrndup(const char *buf, size_t n) {
8376 char *ret = upb_gmalloc(n + 1);
Brian Silverman9c614bc2016-02-15 20:20:02 -05008377 if (!ret) return NULL;
8378 memcpy(ret, buf, n);
8379 ret[n] = '\0';
8380 return ret;
8381}
8382
8383/* Returns a newly allocated string that joins input strings together, for
8384 * example:
8385 * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
8386 * join("", "Baz") -> "Baz"
8387 * Caller owns a ref on the returned string. */
8388static char *upb_join(const char *base, const char *name) {
8389 if (!base || strlen(base) == 0) {
Austin Schuh40c16522018-10-28 20:27:54 -07008390 return upb_gstrdup(name);
Brian Silverman9c614bc2016-02-15 20:20:02 -05008391 } else {
Austin Schuh40c16522018-10-28 20:27:54 -07008392 char *ret = upb_gmalloc(strlen(base) + strlen(name) + 2);
8393 if (!ret) {
8394 return NULL;
8395 }
Brian Silverman9c614bc2016-02-15 20:20:02 -05008396 ret[0] = '\0';
8397 strcat(ret, base);
8398 strcat(ret, ".");
8399 strcat(ret, name);
8400 return ret;
8401 }
8402}
8403
Brian Silverman9c614bc2016-02-15 20:20:02 -05008404/* Qualify the defname for all defs starting with offset "start" with "str". */
Austin Schuh40c16522018-10-28 20:27:54 -07008405static bool upb_descreader_qualify(upb_filedef *f, char *str, int32_t start) {
8406 size_t i;
8407 for (i = start; i < upb_filedef_defcount(f); i++) {
8408 upb_def *def = upb_filedef_mutabledef(f, i);
Brian Silverman9c614bc2016-02-15 20:20:02 -05008409 char *name = upb_join(str, upb_def_fullname(def));
Austin Schuh40c16522018-10-28 20:27:54 -07008410 if (!name) {
8411 /* Need better logic here; at this point we've qualified some names but
8412 * not others. */
8413 return false;
8414 }
Brian Silverman9c614bc2016-02-15 20:20:02 -05008415 upb_def_setfullname(def, name, NULL);
Austin Schuh40c16522018-10-28 20:27:54 -07008416 upb_gfree(name);
Brian Silverman9c614bc2016-02-15 20:20:02 -05008417 }
Austin Schuh40c16522018-10-28 20:27:54 -07008418 return true;
Brian Silverman9c614bc2016-02-15 20:20:02 -05008419}
8420
8421
8422/* upb_descreader ************************************************************/
8423
8424static upb_msgdef *upb_descreader_top(upb_descreader *r) {
8425 int index;
Austin Schuh40c16522018-10-28 20:27:54 -07008426 UPB_ASSERT(r->stack_len > 1);
Brian Silverman9c614bc2016-02-15 20:20:02 -05008427 index = r->stack[r->stack_len-1].start - 1;
Austin Schuh40c16522018-10-28 20:27:54 -07008428 UPB_ASSERT(index >= 0);
8429 return upb_downcast_msgdef_mutable(upb_filedef_mutabledef(r->file, index));
Brian Silverman9c614bc2016-02-15 20:20:02 -05008430}
8431
8432static upb_def *upb_descreader_last(upb_descreader *r) {
Austin Schuh40c16522018-10-28 20:27:54 -07008433 return upb_filedef_mutabledef(r->file, upb_filedef_defcount(r->file) - 1);
Brian Silverman9c614bc2016-02-15 20:20:02 -05008434}
8435
8436/* Start/end handlers for FileDescriptorProto and DescriptorProto (the two
8437 * entities that have names and can contain sub-definitions. */
8438void upb_descreader_startcontainer(upb_descreader *r) {
8439 upb_descreader_frame *f = &r->stack[r->stack_len++];
Austin Schuh40c16522018-10-28 20:27:54 -07008440 f->start = upb_filedef_defcount(r->file);
8441 f->oneof_start = upb_inttable_count(&r->oneofs);
8442 f->oneof_index = 0;
Brian Silverman9c614bc2016-02-15 20:20:02 -05008443 f->name = NULL;
8444}
8445
Austin Schuh40c16522018-10-28 20:27:54 -07008446bool upb_descreader_endcontainer(upb_descreader *r) {
8447 upb_descreader_frame *f = &r->stack[r->stack_len - 1];
8448
8449 while (upb_inttable_count(&r->oneofs) > f->oneof_start) {
8450 upb_oneofdef *o = upb_value_getptr(upb_inttable_pop(&r->oneofs));
8451 bool ok = upb_msgdef_addoneof(upb_descreader_top(r), o, &r->oneofs, NULL);
8452 UPB_ASSERT(ok);
8453 }
8454
8455 if (!upb_descreader_qualify(r->file, f->name, f->start)) {
8456 return false;
8457 }
8458 upb_gfree(f->name);
Brian Silverman9c614bc2016-02-15 20:20:02 -05008459 f->name = NULL;
Austin Schuh40c16522018-10-28 20:27:54 -07008460
8461 r->stack_len--;
8462 return true;
Brian Silverman9c614bc2016-02-15 20:20:02 -05008463}
8464
8465void upb_descreader_setscopename(upb_descreader *r, char *str) {
8466 upb_descreader_frame *f = &r->stack[r->stack_len-1];
Austin Schuh40c16522018-10-28 20:27:54 -07008467 upb_gfree(f->name);
Brian Silverman9c614bc2016-02-15 20:20:02 -05008468 f->name = str;
8469}
8470
Austin Schuh40c16522018-10-28 20:27:54 -07008471static upb_oneofdef *upb_descreader_getoneof(upb_descreader *r,
8472 uint32_t index) {
8473 bool found;
8474 upb_value val;
8475 upb_descreader_frame *f = &r->stack[r->stack_len-1];
8476
8477 /* DescriptorProto messages can be nested, so we will see the nested messages
8478 * between when we see the FieldDescriptorProto and the OneofDescriptorProto.
8479 * We need to preserve the oneofs in between these two things. */
8480 index += f->oneof_start;
8481
8482 while (upb_inttable_count(&r->oneofs) <= index) {
8483 upb_inttable_push(&r->oneofs, upb_value_ptr(upb_oneofdef_new(&r->oneofs)));
8484 }
8485
8486 found = upb_inttable_lookup(&r->oneofs, index, &val);
8487 UPB_ASSERT(found);
8488 return upb_value_getptr(val);
8489}
8490
8491/** Handlers for google.protobuf.FileDescriptorSet. ***************************/
8492
8493static void *fileset_startfile(void *closure, const void *hd) {
8494 upb_descreader *r = closure;
8495 UPB_UNUSED(hd);
8496 r->file = upb_filedef_new(&r->files);
8497 upb_inttable_push(&r->files, upb_value_ptr(r->file));
8498 return r;
8499}
8500
8501/** Handlers for google.protobuf.FileDescriptorProto. *************************/
8502
8503static bool file_start(void *closure, const void *hd) {
8504 upb_descreader *r = closure;
Brian Silverman9c614bc2016-02-15 20:20:02 -05008505 UPB_UNUSED(hd);
8506 upb_descreader_startcontainer(r);
8507 return true;
8508}
8509
Austin Schuh40c16522018-10-28 20:27:54 -07008510static bool file_end(void *closure, const void *hd, upb_status *status) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05008511 upb_descreader *r = closure;
8512 UPB_UNUSED(hd);
8513 UPB_UNUSED(status);
Austin Schuh40c16522018-10-28 20:27:54 -07008514 return upb_descreader_endcontainer(r);
8515}
8516
8517static size_t file_onname(void *closure, const void *hd, const char *buf,
8518 size_t n, const upb_bufhandle *handle) {
8519 upb_descreader *r = closure;
8520 char *name;
8521 bool ok;
8522 UPB_UNUSED(hd);
8523 UPB_UNUSED(handle);
8524
8525 name = upb_gstrndup(buf, n);
8526 upb_strtable_insert(&r->files_by_name, name, upb_value_ptr(r->file));
8527 /* XXX: see comment at the top of the file. */
8528 ok = upb_filedef_setname(r->file, name, NULL);
8529 upb_gfree(name);
8530 UPB_ASSERT(ok);
8531 return n;
Brian Silverman9c614bc2016-02-15 20:20:02 -05008532}
8533
8534static size_t file_onpackage(void *closure, const void *hd, const char *buf,
8535 size_t n, const upb_bufhandle *handle) {
8536 upb_descreader *r = closure;
Austin Schuh40c16522018-10-28 20:27:54 -07008537 char *package;
8538 bool ok;
Brian Silverman9c614bc2016-02-15 20:20:02 -05008539 UPB_UNUSED(hd);
8540 UPB_UNUSED(handle);
Austin Schuh40c16522018-10-28 20:27:54 -07008541
8542 package = upb_gstrndup(buf, n);
Brian Silverman9c614bc2016-02-15 20:20:02 -05008543 /* XXX: see comment at the top of the file. */
Austin Schuh40c16522018-10-28 20:27:54 -07008544 upb_descreader_setscopename(r, package);
8545 ok = upb_filedef_setpackage(r->file, package, NULL);
8546 UPB_ASSERT(ok);
Brian Silverman9c614bc2016-02-15 20:20:02 -05008547 return n;
8548}
8549
Austin Schuh40c16522018-10-28 20:27:54 -07008550static void *file_startphpnamespace(void *closure, const void *hd,
8551 size_t size_hint) {
8552 upb_descreader *r = closure;
8553 bool ok;
8554 UPB_UNUSED(hd);
8555 UPB_UNUSED(size_hint);
8556
8557 ok = upb_filedef_setphpnamespace(r->file, "", NULL);
8558 UPB_ASSERT(ok);
8559 return closure;
8560}
8561
8562static size_t file_onphpnamespace(void *closure, const void *hd,
8563 const char *buf, size_t n,
8564 const upb_bufhandle *handle) {
8565 upb_descreader *r = closure;
8566 char *php_namespace;
8567 bool ok;
8568 UPB_UNUSED(hd);
8569 UPB_UNUSED(handle);
8570
8571 php_namespace = upb_gstrndup(buf, n);
8572 ok = upb_filedef_setphpnamespace(r->file, php_namespace, NULL);
8573 upb_gfree(php_namespace);
8574 UPB_ASSERT(ok);
8575 return n;
8576}
8577
8578static size_t file_onphpprefix(void *closure, const void *hd, const char *buf,
8579 size_t n, const upb_bufhandle *handle) {
8580 upb_descreader *r = closure;
8581 char *prefix;
8582 bool ok;
8583 UPB_UNUSED(hd);
8584 UPB_UNUSED(handle);
8585
8586 prefix = upb_gstrndup(buf, n);
8587 ok = upb_filedef_setphpprefix(r->file, prefix, NULL);
8588 upb_gfree(prefix);
8589 UPB_ASSERT(ok);
8590 return n;
8591}
8592
8593static size_t file_onsyntax(void *closure, const void *hd, const char *buf,
8594 size_t n, const upb_bufhandle *handle) {
8595 upb_descreader *r = closure;
8596 bool ok;
8597 UPB_UNUSED(hd);
8598 UPB_UNUSED(handle);
8599 /* XXX: see comment at the top of the file. */
8600 if (upb_streq("proto2", buf, n)) {
8601 ok = upb_filedef_setsyntax(r->file, UPB_SYNTAX_PROTO2, NULL);
8602 } else if (upb_streq("proto3", buf, n)) {
8603 ok = upb_filedef_setsyntax(r->file, UPB_SYNTAX_PROTO3, NULL);
8604 } else {
8605 ok = false;
8606 }
8607
8608 UPB_ASSERT(ok);
8609 return n;
8610}
8611
8612static void *file_startmsg(void *closure, const void *hd) {
8613 upb_descreader *r = closure;
8614 upb_msgdef *m = upb_msgdef_new(&m);
8615 bool ok = upb_filedef_addmsg(r->file, m, &m, NULL);
8616 UPB_UNUSED(hd);
8617 UPB_ASSERT(ok);
8618 return r;
8619}
8620
8621static void *file_startenum(void *closure, const void *hd) {
8622 upb_descreader *r = closure;
8623 upb_enumdef *e = upb_enumdef_new(&e);
8624 bool ok = upb_filedef_addenum(r->file, e, &e, NULL);
8625 UPB_UNUSED(hd);
8626 UPB_ASSERT(ok);
8627 return r;
8628}
8629
8630static void *file_startext(void *closure, const void *hd) {
8631 upb_descreader *r = closure;
8632 bool ok;
8633 r->f = upb_fielddef_new(r);
8634 ok = upb_filedef_addext(r->file, r->f, r, NULL);
8635 UPB_UNUSED(hd);
8636 UPB_ASSERT(ok);
8637 return r;
8638}
8639
8640static size_t file_ondep(void *closure, const void *hd, const char *buf,
8641 size_t n, const upb_bufhandle *handle) {
8642 upb_descreader *r = closure;
8643 upb_value val;
8644 if (upb_strtable_lookup2(&r->files_by_name, buf, n, &val)) {
8645 upb_filedef_adddep(r->file, upb_value_getptr(val));
8646 }
8647 UPB_UNUSED(hd);
8648 UPB_UNUSED(handle);
8649 return n;
8650}
8651
8652/** Handlers for google.protobuf.EnumValueDescriptorProto. *********************/
8653
Brian Silverman9c614bc2016-02-15 20:20:02 -05008654static bool enumval_startmsg(void *closure, const void *hd) {
8655 upb_descreader *r = closure;
8656 UPB_UNUSED(hd);
8657 r->saw_number = false;
8658 r->saw_name = false;
8659 return true;
8660}
8661
8662static size_t enumval_onname(void *closure, const void *hd, const char *buf,
8663 size_t n, const upb_bufhandle *handle) {
8664 upb_descreader *r = closure;
8665 UPB_UNUSED(hd);
8666 UPB_UNUSED(handle);
8667 /* XXX: see comment at the top of the file. */
Austin Schuh40c16522018-10-28 20:27:54 -07008668 upb_gfree(r->name);
8669 r->name = upb_gstrndup(buf, n);
Brian Silverman9c614bc2016-02-15 20:20:02 -05008670 r->saw_name = true;
8671 return n;
8672}
8673
8674static bool enumval_onnumber(void *closure, const void *hd, int32_t val) {
8675 upb_descreader *r = closure;
8676 UPB_UNUSED(hd);
8677 r->number = val;
8678 r->saw_number = true;
8679 return true;
8680}
8681
8682static bool enumval_endmsg(void *closure, const void *hd, upb_status *status) {
8683 upb_descreader *r = closure;
8684 upb_enumdef *e;
8685 UPB_UNUSED(hd);
8686
8687 if(!r->saw_number || !r->saw_name) {
8688 upb_status_seterrmsg(status, "Enum value missing name or number.");
8689 return false;
8690 }
8691 e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
8692 upb_enumdef_addval(e, r->name, r->number, status);
Austin Schuh40c16522018-10-28 20:27:54 -07008693 upb_gfree(r->name);
Brian Silverman9c614bc2016-02-15 20:20:02 -05008694 r->name = NULL;
8695 return true;
8696}
8697
Austin Schuh40c16522018-10-28 20:27:54 -07008698/** Handlers for google.protobuf.EnumDescriptorProto. *************************/
Brian Silverman9c614bc2016-02-15 20:20:02 -05008699
8700static bool enum_endmsg(void *closure, const void *hd, upb_status *status) {
8701 upb_descreader *r = closure;
8702 upb_enumdef *e;
8703 UPB_UNUSED(hd);
8704
8705 e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
8706 if (upb_def_fullname(upb_descreader_last(r)) == NULL) {
8707 upb_status_seterrmsg(status, "Enum had no name.");
8708 return false;
8709 }
8710 if (upb_enumdef_numvals(e) == 0) {
8711 upb_status_seterrmsg(status, "Enum had no values.");
8712 return false;
8713 }
8714 return true;
8715}
8716
8717static size_t enum_onname(void *closure, const void *hd, const char *buf,
8718 size_t n, const upb_bufhandle *handle) {
8719 upb_descreader *r = closure;
Austin Schuh40c16522018-10-28 20:27:54 -07008720 char *fullname = upb_gstrndup(buf, n);
Brian Silverman9c614bc2016-02-15 20:20:02 -05008721 UPB_UNUSED(hd);
8722 UPB_UNUSED(handle);
8723 /* XXX: see comment at the top of the file. */
8724 upb_def_setfullname(upb_descreader_last(r), fullname, NULL);
Austin Schuh40c16522018-10-28 20:27:54 -07008725 upb_gfree(fullname);
Brian Silverman9c614bc2016-02-15 20:20:02 -05008726 return n;
8727}
8728
Austin Schuh40c16522018-10-28 20:27:54 -07008729/** Handlers for google.protobuf.FieldDescriptorProto *************************/
8730
Brian Silverman9c614bc2016-02-15 20:20:02 -05008731static bool field_startmsg(void *closure, const void *hd) {
8732 upb_descreader *r = closure;
8733 UPB_UNUSED(hd);
Austin Schuh40c16522018-10-28 20:27:54 -07008734 UPB_ASSERT(r->f);
8735 upb_gfree(r->default_string);
Brian Silverman9c614bc2016-02-15 20:20:02 -05008736 r->default_string = NULL;
8737
8738 /* fielddefs default to packed, but descriptors default to non-packed. */
8739 upb_fielddef_setpacked(r->f, false);
8740 return true;
8741}
8742
8743/* Converts the default value in string "str" into "d". Passes a ref on str.
8744 * Returns true on success. */
8745static bool parse_default(char *str, upb_fielddef *f) {
8746 bool success = true;
8747 char *end;
8748 switch (upb_fielddef_type(f)) {
8749 case UPB_TYPE_INT32: {
8750 long val = strtol(str, &end, 0);
8751 if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
8752 success = false;
8753 else
8754 upb_fielddef_setdefaultint32(f, val);
8755 break;
8756 }
8757 case UPB_TYPE_INT64: {
8758 /* XXX: Need to write our own strtoll, since it's not available in c89. */
8759 long long val = strtol(str, &end, 0);
8760 if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end)
8761 success = false;
8762 else
8763 upb_fielddef_setdefaultint64(f, val);
8764 break;
8765 }
8766 case UPB_TYPE_UINT32: {
8767 unsigned long val = strtoul(str, &end, 0);
8768 if (val > UINT32_MAX || errno == ERANGE || *end)
8769 success = false;
8770 else
8771 upb_fielddef_setdefaultuint32(f, val);
8772 break;
8773 }
8774 case UPB_TYPE_UINT64: {
8775 /* XXX: Need to write our own strtoull, since it's not available in c89. */
8776 unsigned long long val = strtoul(str, &end, 0);
8777 if (val > UINT64_MAX || errno == ERANGE || *end)
8778 success = false;
8779 else
8780 upb_fielddef_setdefaultuint64(f, val);
8781 break;
8782 }
8783 case UPB_TYPE_DOUBLE: {
8784 double val = strtod(str, &end);
8785 if (errno == ERANGE || *end)
8786 success = false;
8787 else
8788 upb_fielddef_setdefaultdouble(f, val);
8789 break;
8790 }
8791 case UPB_TYPE_FLOAT: {
8792 /* XXX: Need to write our own strtof, since it's not available in c89. */
8793 float val = strtod(str, &end);
8794 if (errno == ERANGE || *end)
8795 success = false;
8796 else
8797 upb_fielddef_setdefaultfloat(f, val);
8798 break;
8799 }
8800 case UPB_TYPE_BOOL: {
8801 if (strcmp(str, "false") == 0)
8802 upb_fielddef_setdefaultbool(f, false);
8803 else if (strcmp(str, "true") == 0)
8804 upb_fielddef_setdefaultbool(f, true);
8805 else
8806 success = false;
8807 break;
8808 }
8809 default: abort();
8810 }
8811 return success;
8812}
8813
8814static bool field_endmsg(void *closure, const void *hd, upb_status *status) {
8815 upb_descreader *r = closure;
8816 upb_fielddef *f = r->f;
8817 UPB_UNUSED(hd);
8818
8819 /* TODO: verify that all required fields were present. */
Austin Schuh40c16522018-10-28 20:27:54 -07008820 UPB_ASSERT(upb_fielddef_number(f) != 0);
8821 UPB_ASSERT(upb_fielddef_name(f) != NULL);
8822 UPB_ASSERT((upb_fielddef_subdefname(f) != NULL) == upb_fielddef_hassubdef(f));
Brian Silverman9c614bc2016-02-15 20:20:02 -05008823
8824 if (r->default_string) {
8825 if (upb_fielddef_issubmsg(f)) {
8826 upb_status_seterrmsg(status, "Submessages cannot have defaults.");
8827 return false;
8828 }
8829 if (upb_fielddef_isstring(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM) {
8830 upb_fielddef_setdefaultcstr(f, r->default_string, NULL);
8831 } else {
8832 if (r->default_string && !parse_default(r->default_string, f)) {
8833 /* We don't worry too much about giving a great error message since the
8834 * compiler should have ensured this was correct. */
8835 upb_status_seterrmsg(status, "Error converting default value.");
8836 return false;
8837 }
8838 }
8839 }
8840 return true;
8841}
8842
8843static bool field_onlazy(void *closure, const void *hd, bool val) {
8844 upb_descreader *r = closure;
8845 UPB_UNUSED(hd);
8846
8847 upb_fielddef_setlazy(r->f, val);
8848 return true;
8849}
8850
8851static bool field_onpacked(void *closure, const void *hd, bool val) {
8852 upb_descreader *r = closure;
8853 UPB_UNUSED(hd);
8854
8855 upb_fielddef_setpacked(r->f, val);
8856 return true;
8857}
8858
8859static bool field_ontype(void *closure, const void *hd, int32_t val) {
8860 upb_descreader *r = closure;
8861 UPB_UNUSED(hd);
8862
8863 upb_fielddef_setdescriptortype(r->f, val);
8864 return true;
8865}
8866
8867static bool field_onlabel(void *closure, const void *hd, int32_t val) {
8868 upb_descreader *r = closure;
8869 UPB_UNUSED(hd);
8870
8871 upb_fielddef_setlabel(r->f, val);
8872 return true;
8873}
8874
8875static bool field_onnumber(void *closure, const void *hd, int32_t val) {
8876 upb_descreader *r = closure;
Austin Schuh40c16522018-10-28 20:27:54 -07008877 bool ok;
Brian Silverman9c614bc2016-02-15 20:20:02 -05008878 UPB_UNUSED(hd);
8879
Austin Schuh40c16522018-10-28 20:27:54 -07008880 ok = upb_fielddef_setnumber(r->f, val, NULL);
8881 UPB_ASSERT(ok);
Brian Silverman9c614bc2016-02-15 20:20:02 -05008882 return true;
8883}
8884
8885static size_t field_onname(void *closure, const void *hd, const char *buf,
8886 size_t n, const upb_bufhandle *handle) {
8887 upb_descreader *r = closure;
Austin Schuh40c16522018-10-28 20:27:54 -07008888 char *name = upb_gstrndup(buf, n);
Brian Silverman9c614bc2016-02-15 20:20:02 -05008889 UPB_UNUSED(hd);
8890 UPB_UNUSED(handle);
8891
8892 /* XXX: see comment at the top of the file. */
8893 upb_fielddef_setname(r->f, name, NULL);
Austin Schuh40c16522018-10-28 20:27:54 -07008894 upb_gfree(name);
Brian Silverman9c614bc2016-02-15 20:20:02 -05008895 return n;
8896}
8897
8898static size_t field_ontypename(void *closure, const void *hd, const char *buf,
8899 size_t n, const upb_bufhandle *handle) {
8900 upb_descreader *r = closure;
Austin Schuh40c16522018-10-28 20:27:54 -07008901 char *name = upb_gstrndup(buf, n);
Brian Silverman9c614bc2016-02-15 20:20:02 -05008902 UPB_UNUSED(hd);
8903 UPB_UNUSED(handle);
8904
8905 /* XXX: see comment at the top of the file. */
8906 upb_fielddef_setsubdefname(r->f, name, NULL);
Austin Schuh40c16522018-10-28 20:27:54 -07008907 upb_gfree(name);
Brian Silverman9c614bc2016-02-15 20:20:02 -05008908 return n;
8909}
8910
8911static size_t field_onextendee(void *closure, const void *hd, const char *buf,
8912 size_t n, const upb_bufhandle *handle) {
8913 upb_descreader *r = closure;
Austin Schuh40c16522018-10-28 20:27:54 -07008914 char *name = upb_gstrndup(buf, n);
Brian Silverman9c614bc2016-02-15 20:20:02 -05008915 UPB_UNUSED(hd);
8916 UPB_UNUSED(handle);
8917
8918 /* XXX: see comment at the top of the file. */
8919 upb_fielddef_setcontainingtypename(r->f, name, NULL);
Austin Schuh40c16522018-10-28 20:27:54 -07008920 upb_gfree(name);
Brian Silverman9c614bc2016-02-15 20:20:02 -05008921 return n;
8922}
8923
8924static size_t field_ondefaultval(void *closure, const void *hd, const char *buf,
8925 size_t n, const upb_bufhandle *handle) {
8926 upb_descreader *r = closure;
8927 UPB_UNUSED(hd);
8928 UPB_UNUSED(handle);
8929
8930 /* Have to convert from string to the correct type, but we might not know the
8931 * type yet, so we save it as a string until the end of the field.
8932 * XXX: see comment at the top of the file. */
Austin Schuh40c16522018-10-28 20:27:54 -07008933 upb_gfree(r->default_string);
8934 r->default_string = upb_gstrndup(buf, n);
Brian Silverman9c614bc2016-02-15 20:20:02 -05008935 return n;
8936}
8937
Austin Schuh40c16522018-10-28 20:27:54 -07008938static bool field_ononeofindex(void *closure, const void *hd, int32_t index) {
8939 upb_descreader *r = closure;
8940 upb_oneofdef *o = upb_descreader_getoneof(r, index);
8941 bool ok = upb_oneofdef_addfield(o, r->f, &r->f, NULL);
8942 UPB_UNUSED(hd);
8943
8944 UPB_ASSERT(ok);
8945 return true;
8946}
8947
8948/** Handlers for google.protobuf.OneofDescriptorProto. ************************/
8949
8950static size_t oneof_name(void *closure, const void *hd, const char *buf,
8951 size_t n, const upb_bufhandle *handle) {
8952 upb_descreader *r = closure;
8953 upb_descreader_frame *f = &r->stack[r->stack_len-1];
8954 upb_oneofdef *o = upb_descreader_getoneof(r, f->oneof_index++);
8955 char *name_null_terminated = upb_gstrndup(buf, n);
8956 bool ok = upb_oneofdef_setname(o, name_null_terminated, NULL);
8957 UPB_UNUSED(hd);
8958 UPB_UNUSED(handle);
8959
8960 UPB_ASSERT(ok);
8961 free(name_null_terminated);
8962 return n;
8963}
8964
8965/** Handlers for google.protobuf.DescriptorProto ******************************/
8966
8967static bool msg_start(void *closure, const void *hd) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05008968 upb_descreader *r = closure;
8969 UPB_UNUSED(hd);
8970
Brian Silverman9c614bc2016-02-15 20:20:02 -05008971 upb_descreader_startcontainer(r);
8972 return true;
8973}
8974
Austin Schuh40c16522018-10-28 20:27:54 -07008975static bool msg_end(void *closure, const void *hd, upb_status *status) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05008976 upb_descreader *r = closure;
8977 upb_msgdef *m = upb_descreader_top(r);
8978 UPB_UNUSED(hd);
8979
8980 if(!upb_def_fullname(upb_msgdef_upcast_mutable(m))) {
8981 upb_status_seterrmsg(status, "Encountered message with no name.");
8982 return false;
8983 }
Austin Schuh40c16522018-10-28 20:27:54 -07008984 return upb_descreader_endcontainer(r);
Brian Silverman9c614bc2016-02-15 20:20:02 -05008985}
8986
Austin Schuh40c16522018-10-28 20:27:54 -07008987static size_t msg_name(void *closure, const void *hd, const char *buf,
8988 size_t n, const upb_bufhandle *handle) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05008989 upb_descreader *r = closure;
8990 upb_msgdef *m = upb_descreader_top(r);
8991 /* XXX: see comment at the top of the file. */
Austin Schuh40c16522018-10-28 20:27:54 -07008992 char *name = upb_gstrndup(buf, n);
Brian Silverman9c614bc2016-02-15 20:20:02 -05008993 UPB_UNUSED(hd);
8994 UPB_UNUSED(handle);
8995
8996 upb_def_setfullname(upb_msgdef_upcast_mutable(m), name, NULL);
8997 upb_descreader_setscopename(r, name); /* Passes ownership of name. */
8998 return n;
8999}
9000
Austin Schuh40c16522018-10-28 20:27:54 -07009001static void *msg_startmsg(void *closure, const void *hd) {
9002 upb_descreader *r = closure;
9003 upb_msgdef *m = upb_msgdef_new(&m);
9004 bool ok = upb_filedef_addmsg(r->file, m, &m, NULL);
9005 UPB_UNUSED(hd);
9006 UPB_ASSERT(ok);
9007 return r;
9008}
9009
9010static void *msg_startext(void *closure, const void *hd) {
9011 upb_descreader *r = closure;
9012 upb_fielddef *f = upb_fielddef_new(&f);
9013 bool ok = upb_filedef_addext(r->file, f, &f, NULL);
9014 UPB_UNUSED(hd);
9015 UPB_ASSERT(ok);
9016 return r;
9017}
9018
9019static void *msg_startfield(void *closure, const void *hd) {
9020 upb_descreader *r = closure;
9021 r->f = upb_fielddef_new(&r->f);
9022 /* We can't add the new field to the message until its name/number are
9023 * filled in. */
9024 UPB_UNUSED(hd);
9025 return r;
9026}
9027
9028static bool msg_endfield(void *closure, const void *hd) {
9029 upb_descreader *r = closure;
9030 upb_msgdef *m = upb_descreader_top(r);
9031 bool ok;
9032 UPB_UNUSED(hd);
9033
9034 /* Oneof fields are added to the msgdef through their oneof, so don't need to
9035 * be added here. */
9036 if (upb_fielddef_containingoneof(r->f) == NULL) {
9037 ok = upb_msgdef_addfield(m, r->f, &r->f, NULL);
9038 UPB_ASSERT(ok);
9039 }
9040 r->f = NULL;
9041 return true;
9042}
9043
9044static bool msg_onmapentry(void *closure, const void *hd, bool mapentry) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05009045 upb_descreader *r = closure;
9046 upb_msgdef *m = upb_descreader_top(r);
9047 UPB_UNUSED(hd);
9048
Austin Schuh40c16522018-10-28 20:27:54 -07009049 upb_msgdef_setmapentry(m, mapentry);
Brian Silverman9c614bc2016-02-15 20:20:02 -05009050 r->f = NULL;
9051 return true;
9052}
9053
Brian Silverman9c614bc2016-02-15 20:20:02 -05009054
Brian Silverman9c614bc2016-02-15 20:20:02 -05009055
Austin Schuh40c16522018-10-28 20:27:54 -07009056/** Code to register handlers *************************************************/
9057
9058#define F(msg, field) upbdefs_google_protobuf_ ## msg ## _f_ ## field(m)
Brian Silverman9c614bc2016-02-15 20:20:02 -05009059
9060static void reghandlers(const void *closure, upb_handlers *h) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05009061 const upb_msgdef *m = upb_handlers_msgdef(h);
Austin Schuh40c16522018-10-28 20:27:54 -07009062 UPB_UNUSED(closure);
Brian Silverman9c614bc2016-02-15 20:20:02 -05009063
Austin Schuh40c16522018-10-28 20:27:54 -07009064 if (upbdefs_google_protobuf_FileDescriptorSet_is(m)) {
9065 upb_handlers_setstartsubmsg(h, F(FileDescriptorSet, file),
9066 &fileset_startfile, NULL);
9067 } else if (upbdefs_google_protobuf_DescriptorProto_is(m)) {
9068 upb_handlers_setstartmsg(h, &msg_start, NULL);
9069 upb_handlers_setendmsg(h, &msg_end, NULL);
9070 upb_handlers_setstring(h, F(DescriptorProto, name), &msg_name, NULL);
9071 upb_handlers_setstartsubmsg(h, F(DescriptorProto, extension), &msg_startext,
9072 NULL);
9073 upb_handlers_setstartsubmsg(h, F(DescriptorProto, nested_type),
9074 &msg_startmsg, NULL);
9075 upb_handlers_setstartsubmsg(h, F(DescriptorProto, field),
9076 &msg_startfield, NULL);
9077 upb_handlers_setendsubmsg(h, F(DescriptorProto, field),
9078 &msg_endfield, NULL);
9079 upb_handlers_setstartsubmsg(h, F(DescriptorProto, enum_type),
9080 &file_startenum, NULL);
9081 } else if (upbdefs_google_protobuf_FileDescriptorProto_is(m)) {
9082 upb_handlers_setstartmsg(h, &file_start, NULL);
9083 upb_handlers_setendmsg(h, &file_end, NULL);
9084 upb_handlers_setstring(h, F(FileDescriptorProto, name), &file_onname,
Brian Silverman9c614bc2016-02-15 20:20:02 -05009085 NULL);
Austin Schuh40c16522018-10-28 20:27:54 -07009086 upb_handlers_setstring(h, F(FileDescriptorProto, package), &file_onpackage,
9087 NULL);
9088 upb_handlers_setstring(h, F(FileDescriptorProto, syntax), &file_onsyntax,
9089 NULL);
9090 upb_handlers_setstartsubmsg(h, F(FileDescriptorProto, message_type),
9091 &file_startmsg, NULL);
9092 upb_handlers_setstartsubmsg(h, F(FileDescriptorProto, enum_type),
9093 &file_startenum, NULL);
9094 upb_handlers_setstartsubmsg(h, F(FileDescriptorProto, extension),
9095 &file_startext, NULL);
9096 upb_handlers_setstring(h, F(FileDescriptorProto, dependency),
9097 &file_ondep, NULL);
9098 } else if (upbdefs_google_protobuf_EnumValueDescriptorProto_is(m)) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05009099 upb_handlers_setstartmsg(h, &enumval_startmsg, NULL);
9100 upb_handlers_setendmsg(h, &enumval_endmsg, NULL);
Austin Schuh40c16522018-10-28 20:27:54 -07009101 upb_handlers_setstring(h, F(EnumValueDescriptorProto, name), &enumval_onname, NULL);
9102 upb_handlers_setint32(h, F(EnumValueDescriptorProto, number), &enumval_onnumber,
Brian Silverman9c614bc2016-02-15 20:20:02 -05009103 NULL);
Austin Schuh40c16522018-10-28 20:27:54 -07009104 } else if (upbdefs_google_protobuf_EnumDescriptorProto_is(m)) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05009105 upb_handlers_setendmsg(h, &enum_endmsg, NULL);
Austin Schuh40c16522018-10-28 20:27:54 -07009106 upb_handlers_setstring(h, F(EnumDescriptorProto, name), &enum_onname, NULL);
9107 } else if (upbdefs_google_protobuf_FieldDescriptorProto_is(m)) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05009108 upb_handlers_setstartmsg(h, &field_startmsg, NULL);
9109 upb_handlers_setendmsg(h, &field_endmsg, NULL);
Austin Schuh40c16522018-10-28 20:27:54 -07009110 upb_handlers_setint32(h, F(FieldDescriptorProto, type), &field_ontype,
Brian Silverman9c614bc2016-02-15 20:20:02 -05009111 NULL);
Austin Schuh40c16522018-10-28 20:27:54 -07009112 upb_handlers_setint32(h, F(FieldDescriptorProto, label), &field_onlabel,
Brian Silverman9c614bc2016-02-15 20:20:02 -05009113 NULL);
Austin Schuh40c16522018-10-28 20:27:54 -07009114 upb_handlers_setint32(h, F(FieldDescriptorProto, number), &field_onnumber,
Brian Silverman9c614bc2016-02-15 20:20:02 -05009115 NULL);
Austin Schuh40c16522018-10-28 20:27:54 -07009116 upb_handlers_setstring(h, F(FieldDescriptorProto, name), &field_onname,
Brian Silverman9c614bc2016-02-15 20:20:02 -05009117 NULL);
Austin Schuh40c16522018-10-28 20:27:54 -07009118 upb_handlers_setstring(h, F(FieldDescriptorProto, type_name),
Brian Silverman9c614bc2016-02-15 20:20:02 -05009119 &field_ontypename, NULL);
Austin Schuh40c16522018-10-28 20:27:54 -07009120 upb_handlers_setstring(h, F(FieldDescriptorProto, extendee),
Brian Silverman9c614bc2016-02-15 20:20:02 -05009121 &field_onextendee, NULL);
Austin Schuh40c16522018-10-28 20:27:54 -07009122 upb_handlers_setstring(h, F(FieldDescriptorProto, default_value),
Brian Silverman9c614bc2016-02-15 20:20:02 -05009123 &field_ondefaultval, NULL);
Austin Schuh40c16522018-10-28 20:27:54 -07009124 upb_handlers_setint32(h, F(FieldDescriptorProto, oneof_index),
9125 &field_ononeofindex, NULL);
9126 } else if (upbdefs_google_protobuf_OneofDescriptorProto_is(m)) {
9127 upb_handlers_setstring(h, F(OneofDescriptorProto, name), &oneof_name, NULL);
9128 } else if (upbdefs_google_protobuf_FieldOptions_is(m)) {
9129 upb_handlers_setbool(h, F(FieldOptions, lazy), &field_onlazy, NULL);
9130 upb_handlers_setbool(h, F(FieldOptions, packed), &field_onpacked, NULL);
9131 } else if (upbdefs_google_protobuf_MessageOptions_is(m)) {
9132 upb_handlers_setbool(h, F(MessageOptions, map_entry), &msg_onmapentry, NULL);
9133 } else if (upbdefs_google_protobuf_FileOptions_is(m)) {
9134 upb_handlers_setstring(h, F(FileOptions, php_class_prefix),
9135 &file_onphpprefix, NULL);
9136 upb_handlers_setstartstr(h, F(FileOptions, php_namespace),
9137 &file_startphpnamespace, NULL);
9138 upb_handlers_setstring(h, F(FileOptions, php_namespace),
9139 &file_onphpnamespace, NULL);
Brian Silverman9c614bc2016-02-15 20:20:02 -05009140 }
Austin Schuh40c16522018-10-28 20:27:54 -07009141
9142 UPB_ASSERT(upb_ok(upb_handlers_status(h)));
Brian Silverman9c614bc2016-02-15 20:20:02 -05009143}
9144
Austin Schuh40c16522018-10-28 20:27:54 -07009145#undef F
Brian Silverman9c614bc2016-02-15 20:20:02 -05009146
9147void descreader_cleanup(void *_r) {
9148 upb_descreader *r = _r;
Austin Schuh40c16522018-10-28 20:27:54 -07009149 size_t i;
9150
9151 for (i = 0; i < upb_descreader_filecount(r); i++) {
9152 upb_filedef_unref(upb_descreader_file(r, i), &r->files);
9153 }
9154
9155 upb_gfree(r->name);
9156 upb_inttable_uninit(&r->files);
9157 upb_strtable_uninit(&r->files_by_name);
9158 upb_inttable_uninit(&r->oneofs);
9159 upb_gfree(r->default_string);
Brian Silverman9c614bc2016-02-15 20:20:02 -05009160 while (r->stack_len > 0) {
9161 upb_descreader_frame *f = &r->stack[--r->stack_len];
Austin Schuh40c16522018-10-28 20:27:54 -07009162 upb_gfree(f->name);
Brian Silverman9c614bc2016-02-15 20:20:02 -05009163 }
9164}
9165
9166
9167/* Public API ****************************************************************/
9168
9169upb_descreader *upb_descreader_create(upb_env *e, const upb_handlers *h) {
9170 upb_descreader *r = upb_env_malloc(e, sizeof(upb_descreader));
9171 if (!r || !upb_env_addcleanup(e, descreader_cleanup, r)) {
9172 return NULL;
9173 }
9174
Austin Schuh40c16522018-10-28 20:27:54 -07009175 upb_inttable_init(&r->files, UPB_CTYPE_PTR);
9176 upb_strtable_init(&r->files_by_name, UPB_CTYPE_PTR);
9177 upb_inttable_init(&r->oneofs, UPB_CTYPE_PTR);
Brian Silverman9c614bc2016-02-15 20:20:02 -05009178 upb_sink_reset(upb_descreader_input(r), h, r);
9179 r->stack_len = 0;
9180 r->name = NULL;
9181 r->default_string = NULL;
9182
9183 return r;
9184}
9185
Austin Schuh40c16522018-10-28 20:27:54 -07009186size_t upb_descreader_filecount(const upb_descreader *r) {
9187 return upb_inttable_count(&r->files);
9188}
9189
9190upb_filedef *upb_descreader_file(const upb_descreader *r, size_t i) {
9191 upb_value v;
9192 if (upb_inttable_lookup(&r->files, i, &v)) {
9193 return upb_value_getptr(v);
9194 } else {
9195 return NULL;
9196 }
Brian Silverman9c614bc2016-02-15 20:20:02 -05009197}
9198
9199upb_sink *upb_descreader_input(upb_descreader *r) {
9200 return &r->sink;
9201}
9202
9203const upb_handlers *upb_descreader_newhandlers(const void *owner) {
Austin Schuh40c16522018-10-28 20:27:54 -07009204 const upb_msgdef *m = upbdefs_google_protobuf_FileDescriptorSet_get(&m);
9205 const upb_handlers *h = upb_handlers_newfrozen(m, owner, reghandlers, NULL);
9206 upb_msgdef_unref(m, &m);
Brian Silverman9c614bc2016-02-15 20:20:02 -05009207 return h;
9208}
9209/*
9210** protobuf decoder bytecode compiler
9211**
9212** Code to compile a upb::Handlers into bytecode for decoding a protobuf
9213** according to that specific schema and destination handlers.
9214**
9215** Compiling to bytecode is always the first step. If we are using the
9216** interpreted decoder we leave it as bytecode and interpret that. If we are
9217** using a JIT decoder we use a code generator to turn the bytecode into native
9218** code, LLVM IR, etc.
9219**
9220** Bytecode definition is in decoder.int.h.
9221*/
9222
9223#include <stdarg.h>
9224
9225#ifdef UPB_DUMP_BYTECODE
9226#include <stdio.h>
9227#endif
9228
9229#define MAXLABEL 5
9230#define EMPTYLABEL -1
9231
9232/* mgroup *********************************************************************/
9233
9234static void freegroup(upb_refcounted *r) {
9235 mgroup *g = (mgroup*)r;
9236 upb_inttable_uninit(&g->methods);
9237#ifdef UPB_USE_JIT_X64
9238 upb_pbdecoder_freejit(g);
9239#endif
Austin Schuh40c16522018-10-28 20:27:54 -07009240 upb_gfree(g->bytecode);
9241 upb_gfree(g);
Brian Silverman9c614bc2016-02-15 20:20:02 -05009242}
9243
9244static void visitgroup(const upb_refcounted *r, upb_refcounted_visit *visit,
9245 void *closure) {
9246 const mgroup *g = (const mgroup*)r;
9247 upb_inttable_iter i;
9248 upb_inttable_begin(&i, &g->methods);
9249 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
9250 upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
9251 visit(r, upb_pbdecodermethod_upcast(method), closure);
9252 }
9253}
9254
9255mgroup *newgroup(const void *owner) {
Austin Schuh40c16522018-10-28 20:27:54 -07009256 mgroup *g = upb_gmalloc(sizeof(*g));
Brian Silverman9c614bc2016-02-15 20:20:02 -05009257 static const struct upb_refcounted_vtbl vtbl = {visitgroup, freegroup};
9258 upb_refcounted_init(mgroup_upcast_mutable(g), &vtbl, owner);
9259 upb_inttable_init(&g->methods, UPB_CTYPE_PTR);
9260 g->bytecode = NULL;
9261 g->bytecode_end = NULL;
9262 return g;
9263}
9264
9265
9266/* upb_pbdecodermethod ********************************************************/
9267
9268static void freemethod(upb_refcounted *r) {
9269 upb_pbdecodermethod *method = (upb_pbdecodermethod*)r;
9270
9271 if (method->dest_handlers_) {
9272 upb_handlers_unref(method->dest_handlers_, method);
9273 }
9274
9275 upb_inttable_uninit(&method->dispatch);
Austin Schuh40c16522018-10-28 20:27:54 -07009276 upb_gfree(method);
Brian Silverman9c614bc2016-02-15 20:20:02 -05009277}
9278
9279static void visitmethod(const upb_refcounted *r, upb_refcounted_visit *visit,
9280 void *closure) {
9281 const upb_pbdecodermethod *m = (const upb_pbdecodermethod*)r;
9282 visit(r, m->group, closure);
9283}
9284
9285static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
9286 mgroup *group) {
9287 static const struct upb_refcounted_vtbl vtbl = {visitmethod, freemethod};
Austin Schuh40c16522018-10-28 20:27:54 -07009288 upb_pbdecodermethod *ret = upb_gmalloc(sizeof(*ret));
Brian Silverman9c614bc2016-02-15 20:20:02 -05009289 upb_refcounted_init(upb_pbdecodermethod_upcast_mutable(ret), &vtbl, &ret);
9290 upb_byteshandler_init(&ret->input_handler_);
9291
9292 /* The method references the group and vice-versa, in a circular reference. */
9293 upb_ref2(ret, group);
9294 upb_ref2(group, ret);
9295 upb_inttable_insertptr(&group->methods, dest_handlers, upb_value_ptr(ret));
9296 upb_pbdecodermethod_unref(ret, &ret);
9297
9298 ret->group = mgroup_upcast_mutable(group);
9299 ret->dest_handlers_ = dest_handlers;
9300 ret->is_native_ = false; /* If we JIT, it will update this later. */
9301 upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
9302
9303 if (ret->dest_handlers_) {
9304 upb_handlers_ref(ret->dest_handlers_, ret);
9305 }
9306 return ret;
9307}
9308
9309const upb_handlers *upb_pbdecodermethod_desthandlers(
9310 const upb_pbdecodermethod *m) {
9311 return m->dest_handlers_;
9312}
9313
9314const upb_byteshandler *upb_pbdecodermethod_inputhandler(
9315 const upb_pbdecodermethod *m) {
9316 return &m->input_handler_;
9317}
9318
9319bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) {
9320 return m->is_native_;
9321}
9322
9323const upb_pbdecodermethod *upb_pbdecodermethod_new(
9324 const upb_pbdecodermethodopts *opts, const void *owner) {
9325 const upb_pbdecodermethod *ret;
9326 upb_pbcodecache cache;
9327
9328 upb_pbcodecache_init(&cache);
9329 ret = upb_pbcodecache_getdecodermethod(&cache, opts);
9330 upb_pbdecodermethod_ref(ret, owner);
9331 upb_pbcodecache_uninit(&cache);
9332 return ret;
9333}
9334
9335
9336/* bytecode compiler **********************************************************/
9337
9338/* Data used only at compilation time. */
9339typedef struct {
9340 mgroup *group;
9341
9342 uint32_t *pc;
9343 int fwd_labels[MAXLABEL];
9344 int back_labels[MAXLABEL];
9345
9346 /* For fields marked "lazy", parse them lazily or eagerly? */
9347 bool lazy;
9348} compiler;
9349
9350static compiler *newcompiler(mgroup *group, bool lazy) {
Austin Schuh40c16522018-10-28 20:27:54 -07009351 compiler *ret = upb_gmalloc(sizeof(*ret));
Brian Silverman9c614bc2016-02-15 20:20:02 -05009352 int i;
9353
9354 ret->group = group;
9355 ret->lazy = lazy;
9356 for (i = 0; i < MAXLABEL; i++) {
9357 ret->fwd_labels[i] = EMPTYLABEL;
9358 ret->back_labels[i] = EMPTYLABEL;
9359 }
9360 return ret;
9361}
9362
9363static void freecompiler(compiler *c) {
Austin Schuh40c16522018-10-28 20:27:54 -07009364 upb_gfree(c);
Brian Silverman9c614bc2016-02-15 20:20:02 -05009365}
9366
9367const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
9368
9369/* How many words an instruction is. */
9370static int instruction_len(uint32_t instr) {
9371 switch (getop(instr)) {
9372 case OP_SETDISPATCH: return 1 + ptr_words;
9373 case OP_TAGN: return 3;
9374 case OP_SETBIGGROUPNUM: return 2;
9375 default: return 1;
9376 }
9377}
9378
9379bool op_has_longofs(int32_t instruction) {
9380 switch (getop(instruction)) {
9381 case OP_CALL:
9382 case OP_BRANCH:
9383 case OP_CHECKDELIM:
9384 return true;
9385 /* The "tag" instructions only have 8 bytes available for the jump target,
9386 * but that is ok because these opcodes only require short jumps. */
9387 case OP_TAG1:
9388 case OP_TAG2:
9389 case OP_TAGN:
9390 return false;
9391 default:
Austin Schuh40c16522018-10-28 20:27:54 -07009392 UPB_ASSERT(false);
Brian Silverman9c614bc2016-02-15 20:20:02 -05009393 return false;
9394 }
9395}
9396
9397static int32_t getofs(uint32_t instruction) {
9398 if (op_has_longofs(instruction)) {
9399 return (int32_t)instruction >> 8;
9400 } else {
9401 return (int8_t)(instruction >> 8);
9402 }
9403}
9404
9405static void setofs(uint32_t *instruction, int32_t ofs) {
9406 if (op_has_longofs(*instruction)) {
9407 *instruction = getop(*instruction) | ofs << 8;
9408 } else {
9409 *instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8);
9410 }
Austin Schuh40c16522018-10-28 20:27:54 -07009411 UPB_ASSERT(getofs(*instruction) == ofs); /* Would fail in cases of overflow. */
Brian Silverman9c614bc2016-02-15 20:20:02 -05009412}
9413
9414static uint32_t pcofs(compiler *c) { return c->pc - c->group->bytecode; }
9415
9416/* Defines a local label at the current PC location. All previous forward
9417 * references are updated to point to this location. The location is noted
9418 * for any future backward references. */
9419static void label(compiler *c, unsigned int label) {
9420 int val;
9421 uint32_t *codep;
9422
Austin Schuh40c16522018-10-28 20:27:54 -07009423 UPB_ASSERT(label < MAXLABEL);
Brian Silverman9c614bc2016-02-15 20:20:02 -05009424 val = c->fwd_labels[label];
9425 codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
9426 while (codep) {
9427 int ofs = getofs(*codep);
9428 setofs(codep, c->pc - codep - instruction_len(*codep));
9429 codep = ofs ? codep + ofs : NULL;
9430 }
9431 c->fwd_labels[label] = EMPTYLABEL;
9432 c->back_labels[label] = pcofs(c);
9433}
9434
9435/* Creates a reference to a numbered label; either a forward reference
9436 * (positive arg) or backward reference (negative arg). For forward references
9437 * the value returned now is actually a "next" pointer into a linked list of all
9438 * instructions that use this label and will be patched later when the label is
9439 * defined with label().
9440 *
9441 * The returned value is the offset that should be written into the instruction.
9442 */
9443static int32_t labelref(compiler *c, int label) {
Austin Schuh40c16522018-10-28 20:27:54 -07009444 UPB_ASSERT(label < MAXLABEL);
Brian Silverman9c614bc2016-02-15 20:20:02 -05009445 if (label == LABEL_DISPATCH) {
9446 /* No resolving required. */
9447 return 0;
9448 } else if (label < 0) {
9449 /* Backward local label. Relative to the next instruction. */
9450 uint32_t from = (c->pc + 1) - c->group->bytecode;
9451 return c->back_labels[-label] - from;
9452 } else {
9453 /* Forward local label: prepend to (possibly-empty) linked list. */
9454 int *lptr = &c->fwd_labels[label];
9455 int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c);
9456 *lptr = pcofs(c);
9457 return ret;
9458 }
9459}
9460
9461static void put32(compiler *c, uint32_t v) {
9462 mgroup *g = c->group;
9463 if (c->pc == g->bytecode_end) {
9464 int ofs = pcofs(c);
9465 size_t oldsize = g->bytecode_end - g->bytecode;
9466 size_t newsize = UPB_MAX(oldsize * 2, 64);
9467 /* TODO(haberman): handle OOM. */
Austin Schuh40c16522018-10-28 20:27:54 -07009468 g->bytecode = upb_grealloc(g->bytecode, oldsize * sizeof(uint32_t),
9469 newsize * sizeof(uint32_t));
Brian Silverman9c614bc2016-02-15 20:20:02 -05009470 g->bytecode_end = g->bytecode + newsize;
9471 c->pc = g->bytecode + ofs;
9472 }
9473 *c->pc++ = v;
9474}
9475
Austin Schuh40c16522018-10-28 20:27:54 -07009476static void putop(compiler *c, int op, ...) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05009477 va_list ap;
9478 va_start(ap, op);
9479
9480 switch (op) {
9481 case OP_SETDISPATCH: {
9482 uintptr_t ptr = (uintptr_t)va_arg(ap, void*);
9483 put32(c, OP_SETDISPATCH);
9484 put32(c, ptr);
9485 if (sizeof(uintptr_t) > sizeof(uint32_t))
9486 put32(c, (uint64_t)ptr >> 32);
9487 break;
9488 }
9489 case OP_STARTMSG:
9490 case OP_ENDMSG:
9491 case OP_PUSHLENDELIM:
9492 case OP_POP:
9493 case OP_SETDELIM:
9494 case OP_HALT:
9495 case OP_RET:
9496 case OP_DISPATCH:
9497 put32(c, op);
9498 break;
9499 case OP_PARSE_DOUBLE:
9500 case OP_PARSE_FLOAT:
9501 case OP_PARSE_INT64:
9502 case OP_PARSE_UINT64:
9503 case OP_PARSE_INT32:
9504 case OP_PARSE_FIXED64:
9505 case OP_PARSE_FIXED32:
9506 case OP_PARSE_BOOL:
9507 case OP_PARSE_UINT32:
9508 case OP_PARSE_SFIXED32:
9509 case OP_PARSE_SFIXED64:
9510 case OP_PARSE_SINT32:
9511 case OP_PARSE_SINT64:
9512 case OP_STARTSEQ:
9513 case OP_ENDSEQ:
9514 case OP_STARTSUBMSG:
9515 case OP_ENDSUBMSG:
9516 case OP_STARTSTR:
9517 case OP_STRING:
9518 case OP_ENDSTR:
9519 case OP_PUSHTAGDELIM:
9520 put32(c, op | va_arg(ap, upb_selector_t) << 8);
9521 break;
9522 case OP_SETBIGGROUPNUM:
9523 put32(c, op);
9524 put32(c, va_arg(ap, int));
9525 break;
9526 case OP_CALL: {
9527 const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *);
9528 put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8);
9529 break;
9530 }
9531 case OP_CHECKDELIM:
9532 case OP_BRANCH: {
9533 uint32_t instruction = op;
9534 int label = va_arg(ap, int);
9535 setofs(&instruction, labelref(c, label));
9536 put32(c, instruction);
9537 break;
9538 }
9539 case OP_TAG1:
9540 case OP_TAG2: {
9541 int label = va_arg(ap, int);
9542 uint64_t tag = va_arg(ap, uint64_t);
9543 uint32_t instruction = op | (tag << 16);
Austin Schuh40c16522018-10-28 20:27:54 -07009544 UPB_ASSERT(tag <= 0xffff);
Brian Silverman9c614bc2016-02-15 20:20:02 -05009545 setofs(&instruction, labelref(c, label));
9546 put32(c, instruction);
9547 break;
9548 }
9549 case OP_TAGN: {
9550 int label = va_arg(ap, int);
9551 uint64_t tag = va_arg(ap, uint64_t);
9552 uint32_t instruction = op | (upb_value_size(tag) << 16);
9553 setofs(&instruction, labelref(c, label));
9554 put32(c, instruction);
9555 put32(c, tag);
9556 put32(c, tag >> 32);
9557 break;
9558 }
9559 }
9560
9561 va_end(ap);
9562}
9563
9564#if defined(UPB_USE_JIT_X64) || defined(UPB_DUMP_BYTECODE)
9565
9566const char *upb_pbdecoder_getopname(unsigned int op) {
9567#define QUOTE(x) #x
9568#define EXPAND_AND_QUOTE(x) QUOTE(x)
9569#define OPNAME(x) OP_##x
9570#define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x));
9571#define T(x) OP(PARSE_##x)
9572 /* Keep in sync with list in decoder.int.h. */
9573 switch ((opcode)op) {
9574 T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32)
9575 T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64)
9576 OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG)
9577 OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET)
9578 OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM)
9579 OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP)
9580 OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT)
9581 }
9582 return "<unknown op>";
9583#undef OP
9584#undef T
9585}
9586
9587#endif
9588
9589#ifdef UPB_DUMP_BYTECODE
9590
9591static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
9592
9593 uint32_t *begin = p;
9594
9595 while (p < end) {
9596 fprintf(f, "%p %8tx", p, p - begin);
9597 uint32_t instr = *p++;
9598 uint8_t op = getop(instr);
9599 fprintf(f, " %s", upb_pbdecoder_getopname(op));
9600 switch ((opcode)op) {
9601 case OP_SETDISPATCH: {
9602 const upb_inttable *dispatch;
9603 memcpy(&dispatch, p, sizeof(void*));
9604 p += ptr_words;
9605 const upb_pbdecodermethod *method =
9606 (void *)((char *)dispatch -
9607 offsetof(upb_pbdecodermethod, dispatch));
9608 fprintf(f, " %s", upb_msgdef_fullname(
9609 upb_handlers_msgdef(method->dest_handlers_)));
9610 break;
9611 }
9612 case OP_DISPATCH:
9613 case OP_STARTMSG:
9614 case OP_ENDMSG:
9615 case OP_PUSHLENDELIM:
9616 case OP_POP:
9617 case OP_SETDELIM:
9618 case OP_HALT:
9619 case OP_RET:
9620 break;
9621 case OP_PARSE_DOUBLE:
9622 case OP_PARSE_FLOAT:
9623 case OP_PARSE_INT64:
9624 case OP_PARSE_UINT64:
9625 case OP_PARSE_INT32:
9626 case OP_PARSE_FIXED64:
9627 case OP_PARSE_FIXED32:
9628 case OP_PARSE_BOOL:
9629 case OP_PARSE_UINT32:
9630 case OP_PARSE_SFIXED32:
9631 case OP_PARSE_SFIXED64:
9632 case OP_PARSE_SINT32:
9633 case OP_PARSE_SINT64:
9634 case OP_STARTSEQ:
9635 case OP_ENDSEQ:
9636 case OP_STARTSUBMSG:
9637 case OP_ENDSUBMSG:
9638 case OP_STARTSTR:
9639 case OP_STRING:
9640 case OP_ENDSTR:
9641 case OP_PUSHTAGDELIM:
9642 fprintf(f, " %d", instr >> 8);
9643 break;
9644 case OP_SETBIGGROUPNUM:
9645 fprintf(f, " %d", *p++);
9646 break;
9647 case OP_CHECKDELIM:
9648 case OP_CALL:
9649 case OP_BRANCH:
9650 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
9651 break;
9652 case OP_TAG1:
9653 case OP_TAG2: {
9654 fprintf(f, " tag:0x%x", instr >> 16);
9655 if (getofs(instr)) {
9656 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
9657 }
9658 break;
9659 }
9660 case OP_TAGN: {
9661 uint64_t tag = *p++;
9662 tag |= (uint64_t)*p++ << 32;
9663 fprintf(f, " tag:0x%llx", (long long)tag);
9664 fprintf(f, " n:%d", instr >> 16);
9665 if (getofs(instr)) {
9666 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
9667 }
9668 break;
9669 }
9670 }
9671 fputs("\n", f);
9672 }
9673}
9674
9675#endif
9676
9677static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) {
9678 uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type;
9679 uint64_t encoded_tag = upb_vencode32(tag);
9680 /* No tag should be greater than 5 bytes. */
Austin Schuh40c16522018-10-28 20:27:54 -07009681 UPB_ASSERT(encoded_tag <= 0xffffffffff);
Brian Silverman9c614bc2016-02-15 20:20:02 -05009682 return encoded_tag;
9683}
9684
9685static void putchecktag(compiler *c, const upb_fielddef *f,
9686 int wire_type, int dest) {
9687 uint64_t tag = get_encoded_tag(f, wire_type);
9688 switch (upb_value_size(tag)) {
9689 case 1:
9690 putop(c, OP_TAG1, dest, tag);
9691 break;
9692 case 2:
9693 putop(c, OP_TAG2, dest, tag);
9694 break;
9695 default:
9696 putop(c, OP_TAGN, dest, tag);
9697 break;
9698 }
9699}
9700
9701static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
9702 upb_selector_t selector;
9703 bool ok = upb_handlers_getselector(f, type, &selector);
Austin Schuh40c16522018-10-28 20:27:54 -07009704 UPB_ASSERT(ok);
Brian Silverman9c614bc2016-02-15 20:20:02 -05009705 return selector;
9706}
9707
9708/* Takes an existing, primary dispatch table entry and repacks it with a
9709 * different alternate wire type. Called when we are inserting a secondary
9710 * dispatch table entry for an alternate wire type. */
9711static uint64_t repack(uint64_t dispatch, int new_wt2) {
9712 uint64_t ofs;
9713 uint8_t wt1;
9714 uint8_t old_wt2;
9715 upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2);
Austin Schuh40c16522018-10-28 20:27:54 -07009716 UPB_ASSERT(old_wt2 == NO_WIRE_TYPE); /* wt2 should not be set yet. */
Brian Silverman9c614bc2016-02-15 20:20:02 -05009717 return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2);
9718}
9719
9720/* Marks the current bytecode position as the dispatch target for this message,
9721 * field, and wire type. */
9722static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
9723 const upb_fielddef *f, int wire_type) {
9724 /* Offset is relative to msg base. */
9725 uint64_t ofs = pcofs(c) - method->code_base.ofs;
9726 uint32_t fn = upb_fielddef_number(f);
9727 upb_inttable *d = &method->dispatch;
9728 upb_value v;
9729 if (upb_inttable_remove(d, fn, &v)) {
9730 /* TODO: prioritize based on packed setting in .proto file. */
9731 uint64_t repacked = repack(upb_value_getuint64(v), wire_type);
9732 upb_inttable_insert(d, fn, upb_value_uint64(repacked));
9733 upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs));
9734 } else {
9735 uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE);
9736 upb_inttable_insert(d, fn, upb_value_uint64(val));
9737 }
9738}
9739
9740static void putpush(compiler *c, const upb_fielddef *f) {
9741 if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) {
9742 putop(c, OP_PUSHLENDELIM);
9743 } else {
9744 uint32_t fn = upb_fielddef_number(f);
9745 if (fn >= 1 << 24) {
9746 putop(c, OP_PUSHTAGDELIM, 0);
9747 putop(c, OP_SETBIGGROUPNUM, fn);
9748 } else {
9749 putop(c, OP_PUSHTAGDELIM, fn);
9750 }
9751 }
9752}
9753
9754static upb_pbdecodermethod *find_submethod(const compiler *c,
9755 const upb_pbdecodermethod *method,
9756 const upb_fielddef *f) {
9757 const upb_handlers *sub =
9758 upb_handlers_getsubhandlers(method->dest_handlers_, f);
9759 upb_value v;
9760 return upb_inttable_lookupptr(&c->group->methods, sub, &v)
9761 ? upb_value_getptr(v)
9762 : NULL;
9763}
9764
9765static void putsel(compiler *c, opcode op, upb_selector_t sel,
9766 const upb_handlers *h) {
9767 if (upb_handlers_gethandler(h, sel)) {
9768 putop(c, op, sel);
9769 }
9770}
9771
9772/* Puts an opcode to call a callback, but only if a callback actually exists for
9773 * this field and handler type. */
9774static void maybeput(compiler *c, opcode op, const upb_handlers *h,
9775 const upb_fielddef *f, upb_handlertype_t type) {
9776 putsel(c, op, getsel(f, type), h);
9777}
9778
9779static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) {
9780 if (!upb_fielddef_lazy(f))
9781 return false;
9782
9783 return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR)) ||
9784 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING)) ||
9785 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR));
9786}
9787
9788
9789/* bytecode compiler code generation ******************************************/
9790
9791/* Symbolic names for our local labels. */
9792#define LABEL_LOOPSTART 1 /* Top of a repeated field loop. */
9793#define LABEL_LOOPBREAK 2 /* To jump out of a repeated loop */
9794#define LABEL_FIELD 3 /* Jump backward to find the most recent field. */
9795#define LABEL_ENDMSG 4 /* To reach the OP_ENDMSG instr for this msg. */
9796
9797/* Generates bytecode to parse a single non-lazy message field. */
9798static void generate_msgfield(compiler *c, const upb_fielddef *f,
9799 upb_pbdecodermethod *method) {
9800 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
9801 const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
9802 int wire_type;
9803
9804 if (!sub_m) {
9805 /* Don't emit any code for this field at all; it will be parsed as an
Austin Schuh40c16522018-10-28 20:27:54 -07009806 * unknown field.
9807 *
9808 * TODO(haberman): we should change this to parse it as a string field
9809 * instead. It will probably be faster, but more importantly, once we
9810 * start vending unknown fields, a field shouldn't be treated as unknown
9811 * just because it doesn't have subhandlers registered. */
Brian Silverman9c614bc2016-02-15 20:20:02 -05009812 return;
9813 }
9814
9815 label(c, LABEL_FIELD);
9816
9817 wire_type =
9818 (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE)
9819 ? UPB_WIRE_TYPE_DELIMITED
9820 : UPB_WIRE_TYPE_START_GROUP;
9821
9822 if (upb_fielddef_isseq(f)) {
9823 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
9824 putchecktag(c, f, wire_type, LABEL_DISPATCH);
9825 dispatchtarget(c, method, f, wire_type);
9826 putop(c, OP_PUSHTAGDELIM, 0);
9827 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
9828 label(c, LABEL_LOOPSTART);
9829 putpush(c, f);
9830 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
9831 putop(c, OP_CALL, sub_m);
9832 putop(c, OP_POP);
9833 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
9834 if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
9835 putop(c, OP_SETDELIM);
9836 }
9837 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
9838 putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
9839 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
9840 label(c, LABEL_LOOPBREAK);
9841 putop(c, OP_POP);
9842 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
9843 } else {
9844 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
9845 putchecktag(c, f, wire_type, LABEL_DISPATCH);
9846 dispatchtarget(c, method, f, wire_type);
9847 putpush(c, f);
9848 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
9849 putop(c, OP_CALL, sub_m);
9850 putop(c, OP_POP);
9851 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
9852 if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
9853 putop(c, OP_SETDELIM);
9854 }
9855 }
9856}
9857
9858/* Generates bytecode to parse a single string or lazy submessage field. */
9859static void generate_delimfield(compiler *c, const upb_fielddef *f,
9860 upb_pbdecodermethod *method) {
9861 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
9862
9863 label(c, LABEL_FIELD);
9864 if (upb_fielddef_isseq(f)) {
9865 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
9866 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
9867 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
9868 putop(c, OP_PUSHTAGDELIM, 0);
9869 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
9870 label(c, LABEL_LOOPSTART);
9871 putop(c, OP_PUSHLENDELIM);
9872 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
9873 /* Need to emit even if no handler to skip past the string. */
9874 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
9875 putop(c, OP_POP);
9876 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
9877 putop(c, OP_SETDELIM);
9878 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
9879 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK);
9880 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
9881 label(c, LABEL_LOOPBREAK);
9882 putop(c, OP_POP);
9883 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
9884 } else {
9885 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
9886 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
9887 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
9888 putop(c, OP_PUSHLENDELIM);
9889 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
9890 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
9891 putop(c, OP_POP);
9892 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
9893 putop(c, OP_SETDELIM);
9894 }
9895}
9896
9897/* Generates bytecode to parse a single primitive field. */
9898static void generate_primitivefield(compiler *c, const upb_fielddef *f,
9899 upb_pbdecodermethod *method) {
9900 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
9901 upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
9902 opcode parse_type;
9903 upb_selector_t sel;
9904 int wire_type;
9905
9906 label(c, LABEL_FIELD);
9907
9908 /* From a decoding perspective, ENUM is the same as INT32. */
9909 if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
9910 descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
9911
9912 parse_type = (opcode)descriptor_type;
9913
9914 /* TODO(haberman): generate packed or non-packed first depending on "packed"
9915 * setting in the fielddef. This will favor (in speed) whichever was
9916 * specified. */
9917
Austin Schuh40c16522018-10-28 20:27:54 -07009918 UPB_ASSERT((int)parse_type >= 0 && parse_type <= OP_MAX);
Brian Silverman9c614bc2016-02-15 20:20:02 -05009919 sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
9920 wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
9921 if (upb_fielddef_isseq(f)) {
9922 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
9923 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
9924 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
9925 putop(c, OP_PUSHLENDELIM);
9926 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Packed */
9927 label(c, LABEL_LOOPSTART);
9928 putop(c, parse_type, sel);
9929 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
9930 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
9931 dispatchtarget(c, method, f, wire_type);
9932 putop(c, OP_PUSHTAGDELIM, 0);
9933 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Non-packed */
9934 label(c, LABEL_LOOPSTART);
9935 putop(c, parse_type, sel);
9936 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
9937 putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
9938 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
9939 label(c, LABEL_LOOPBREAK);
9940 putop(c, OP_POP); /* Packed and non-packed join. */
9941 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
9942 putop(c, OP_SETDELIM); /* Could remove for non-packed by dup ENDSEQ. */
9943 } else {
9944 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
9945 putchecktag(c, f, wire_type, LABEL_DISPATCH);
9946 dispatchtarget(c, method, f, wire_type);
9947 putop(c, parse_type, sel);
9948 }
9949}
9950
9951/* Adds bytecode for parsing the given message to the given decoderplan,
9952 * while adding all dispatch targets to this message's dispatch table. */
9953static void compile_method(compiler *c, upb_pbdecodermethod *method) {
9954 const upb_handlers *h;
9955 const upb_msgdef *md;
9956 uint32_t* start_pc;
9957 upb_msg_field_iter i;
9958 upb_value val;
9959
Austin Schuh40c16522018-10-28 20:27:54 -07009960 UPB_ASSERT(method);
Brian Silverman9c614bc2016-02-15 20:20:02 -05009961
9962 /* Clear all entries in the dispatch table. */
9963 upb_inttable_uninit(&method->dispatch);
9964 upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
9965
9966 h = upb_pbdecodermethod_desthandlers(method);
9967 md = upb_handlers_msgdef(h);
9968
9969 method->code_base.ofs = pcofs(c);
9970 putop(c, OP_SETDISPATCH, &method->dispatch);
9971 putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
9972 label(c, LABEL_FIELD);
9973 start_pc = c->pc;
9974 for(upb_msg_field_begin(&i, md);
9975 !upb_msg_field_done(&i);
9976 upb_msg_field_next(&i)) {
9977 const upb_fielddef *f = upb_msg_iter_field(&i);
9978 upb_fieldtype_t type = upb_fielddef_type(f);
9979
9980 if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) {
9981 generate_msgfield(c, f, method);
9982 } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES ||
9983 type == UPB_TYPE_MESSAGE) {
9984 generate_delimfield(c, f, method);
9985 } else {
9986 generate_primitivefield(c, f, method);
9987 }
9988 }
9989
9990 /* If there were no fields, or if no handlers were defined, we need to
9991 * generate a non-empty loop body so that we can at least dispatch for unknown
9992 * fields and check for the end of the message. */
9993 if (c->pc == start_pc) {
9994 /* Check for end-of-message. */
9995 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
9996 /* Unconditionally dispatch. */
9997 putop(c, OP_DISPATCH, 0);
9998 }
9999
10000 /* For now we just loop back to the last field of the message (or if none,
10001 * the DISPATCH opcode for the message). */
10002 putop(c, OP_BRANCH, -LABEL_FIELD);
10003
10004 /* Insert both a label and a dispatch table entry for this end-of-msg. */
10005 label(c, LABEL_ENDMSG);
10006 val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
10007 upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val);
10008
10009 putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h);
10010 putop(c, OP_RET);
10011
10012 upb_inttable_compact(&method->dispatch);
10013}
10014
10015/* Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
10016 * Returns the method for these handlers.
10017 *
10018 * Generates a new method for every destination handlers reachable from "h". */
10019static void find_methods(compiler *c, const upb_handlers *h) {
10020 upb_value v;
10021 upb_msg_field_iter i;
10022 const upb_msgdef *md;
10023
10024 if (upb_inttable_lookupptr(&c->group->methods, h, &v))
10025 return;
10026 newmethod(h, c->group);
10027
10028 /* Find submethods. */
10029 md = upb_handlers_msgdef(h);
10030 for(upb_msg_field_begin(&i, md);
10031 !upb_msg_field_done(&i);
10032 upb_msg_field_next(&i)) {
10033 const upb_fielddef *f = upb_msg_iter_field(&i);
10034 const upb_handlers *sub_h;
10035 if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
10036 (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) {
10037 /* We only generate a decoder method for submessages with handlers.
10038 * Others will be parsed as unknown fields. */
10039 find_methods(c, sub_h);
10040 }
10041 }
10042}
10043
10044/* (Re-)compile bytecode for all messages in "msgs."
10045 * Overwrites any existing bytecode in "c". */
10046static void compile_methods(compiler *c) {
10047 upb_inttable_iter i;
10048
10049 /* Start over at the beginning of the bytecode. */
10050 c->pc = c->group->bytecode;
10051
10052 upb_inttable_begin(&i, &c->group->methods);
10053 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
10054 upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
10055 compile_method(c, method);
10056 }
10057}
10058
10059static void set_bytecode_handlers(mgroup *g) {
10060 upb_inttable_iter i;
10061 upb_inttable_begin(&i, &g->methods);
10062 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
10063 upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i));
10064 upb_byteshandler *h = &m->input_handler_;
10065
10066 m->code_base.ptr = g->bytecode + m->code_base.ofs;
10067
10068 upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr);
10069 upb_byteshandler_setstring(h, upb_pbdecoder_decode, g);
10070 upb_byteshandler_setendstr(h, upb_pbdecoder_end, m);
10071 }
10072}
10073
10074
10075/* JIT setup. *****************************************************************/
10076
10077#ifdef UPB_USE_JIT_X64
10078
10079static void sethandlers(mgroup *g, bool allowjit) {
10080 g->jit_code = NULL;
10081 if (allowjit) {
10082 /* Compile byte-code into machine code, create handlers. */
10083 upb_pbdecoder_jit(g);
10084 } else {
10085 set_bytecode_handlers(g);
10086 }
10087}
10088
10089#else /* UPB_USE_JIT_X64 */
10090
10091static void sethandlers(mgroup *g, bool allowjit) {
10092 /* No JIT compiled in; use bytecode handlers unconditionally. */
10093 UPB_UNUSED(allowjit);
10094 set_bytecode_handlers(g);
10095}
10096
10097#endif /* UPB_USE_JIT_X64 */
10098
10099
10100/* TODO(haberman): allow this to be constructed for an arbitrary set of dest
10101 * handlers and other mgroups (but verify we have a transitive closure). */
10102const mgroup *mgroup_new(const upb_handlers *dest, bool allowjit, bool lazy,
10103 const void *owner) {
10104 mgroup *g;
10105 compiler *c;
10106
10107 UPB_UNUSED(allowjit);
Austin Schuh40c16522018-10-28 20:27:54 -070010108 UPB_ASSERT(upb_handlers_isfrozen(dest));
Brian Silverman9c614bc2016-02-15 20:20:02 -050010109
10110 g = newgroup(owner);
10111 c = newcompiler(g, lazy);
10112 find_methods(c, dest);
10113
10114 /* We compile in two passes:
10115 * 1. all messages are assigned relative offsets from the beginning of the
10116 * bytecode (saved in method->code_base).
10117 * 2. forwards OP_CALL instructions can be correctly linked since message
10118 * offsets have been previously assigned.
10119 *
10120 * Could avoid the second pass by linking OP_CALL instructions somehow. */
10121 compile_methods(c);
10122 compile_methods(c);
10123 g->bytecode_end = c->pc;
10124 freecompiler(c);
10125
10126#ifdef UPB_DUMP_BYTECODE
10127 {
Austin Schuh40c16522018-10-28 20:27:54 -070010128 FILE *f = fopen("/tmp/upb-bytecode", "w");
10129 UPB_ASSERT(f);
Brian Silverman9c614bc2016-02-15 20:20:02 -050010130 dumpbc(g->bytecode, g->bytecode_end, stderr);
10131 dumpbc(g->bytecode, g->bytecode_end, f);
10132 fclose(f);
Austin Schuh40c16522018-10-28 20:27:54 -070010133
10134 f = fopen("/tmp/upb-bytecode.bin", "wb");
10135 UPB_ASSERT(f);
10136 fwrite(g->bytecode, 1, g->bytecode_end - g->bytecode, f);
10137 fclose(f);
Brian Silverman9c614bc2016-02-15 20:20:02 -050010138 }
10139#endif
10140
10141 sethandlers(g, allowjit);
10142 return g;
10143}
10144
10145
10146/* upb_pbcodecache ************************************************************/
10147
10148void upb_pbcodecache_init(upb_pbcodecache *c) {
10149 upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR);
10150 c->allow_jit_ = true;
10151}
10152
10153void upb_pbcodecache_uninit(upb_pbcodecache *c) {
10154 upb_inttable_iter i;
10155 upb_inttable_begin(&i, &c->groups);
10156 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
10157 const mgroup *group = upb_value_getconstptr(upb_inttable_iter_value(&i));
10158 mgroup_unref(group, c);
10159 }
10160 upb_inttable_uninit(&c->groups);
10161}
10162
10163bool upb_pbcodecache_allowjit(const upb_pbcodecache *c) {
10164 return c->allow_jit_;
10165}
10166
10167bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow) {
10168 if (upb_inttable_count(&c->groups) > 0)
10169 return false;
10170 c->allow_jit_ = allow;
10171 return true;
10172}
10173
10174const upb_pbdecodermethod *upb_pbcodecache_getdecodermethod(
10175 upb_pbcodecache *c, const upb_pbdecodermethodopts *opts) {
10176 upb_value v;
10177 bool ok;
10178
10179 /* Right now we build a new DecoderMethod every time.
10180 * TODO(haberman): properly cache methods by their true key. */
10181 const mgroup *g = mgroup_new(opts->handlers, c->allow_jit_, opts->lazy, c);
10182 upb_inttable_push(&c->groups, upb_value_constptr(g));
10183
10184 ok = upb_inttable_lookupptr(&g->methods, opts->handlers, &v);
Austin Schuh40c16522018-10-28 20:27:54 -070010185 UPB_ASSERT(ok);
Brian Silverman9c614bc2016-02-15 20:20:02 -050010186 return upb_value_getptr(v);
10187}
10188
10189
10190/* upb_pbdecodermethodopts ****************************************************/
10191
10192void upb_pbdecodermethodopts_init(upb_pbdecodermethodopts *opts,
10193 const upb_handlers *h) {
10194 opts->handlers = h;
10195 opts->lazy = false;
10196}
10197
10198void upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts *opts, bool lazy) {
10199 opts->lazy = lazy;
10200}
10201/*
10202** upb::Decoder (Bytecode Decoder VM)
10203**
10204** Bytecode must previously have been generated using the bytecode compiler in
10205** compile_decoder.c. This decoder then walks through the bytecode op-by-op to
10206** parse the input.
10207**
10208** Decoding is fully resumable; we just keep a pointer to the current bytecode
10209** instruction and resume from there. A fair amount of the logic here is to
10210** handle the fact that values can span buffer seams and we have to be able to
10211** be capable of suspending/resuming from any byte in the stream. This
10212** sometimes requires keeping a few trailing bytes from the last buffer around
10213** in the "residual" buffer.
10214*/
10215
10216#include <inttypes.h>
10217#include <stddef.h>
10218
10219#ifdef UPB_DUMP_BYTECODE
10220#include <stdio.h>
10221#endif
10222
10223#define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d);
10224
10225/* Error messages that are shared between the bytecode and JIT decoders. */
10226const char *kPbDecoderStackOverflow = "Nesting too deep.";
10227const char *kPbDecoderSubmessageTooLong =
10228 "Submessage end extends past enclosing submessage.";
10229
10230/* Error messages shared within this file. */
10231static const char *kUnterminatedVarint = "Unterminated varint.";
10232
10233/* upb_pbdecoder **************************************************************/
10234
10235static opcode halt = OP_HALT;
10236
Austin Schuh40c16522018-10-28 20:27:54 -070010237/* A dummy character we can point to when the user passes us a NULL buffer.
10238 * We need this because in C (NULL + 0) and (NULL - NULL) are undefined
10239 * behavior, which would invalidate functions like curbufleft(). */
10240static const char dummy_char;
10241
Brian Silverman9c614bc2016-02-15 20:20:02 -050010242/* Whether an op consumes any of the input buffer. */
10243static bool consumes_input(opcode op) {
10244 switch (op) {
10245 case OP_SETDISPATCH:
10246 case OP_STARTMSG:
10247 case OP_ENDMSG:
10248 case OP_STARTSEQ:
10249 case OP_ENDSEQ:
10250 case OP_STARTSUBMSG:
10251 case OP_ENDSUBMSG:
10252 case OP_STARTSTR:
10253 case OP_ENDSTR:
10254 case OP_PUSHTAGDELIM:
10255 case OP_POP:
10256 case OP_SETDELIM:
10257 case OP_SETBIGGROUPNUM:
10258 case OP_CHECKDELIM:
10259 case OP_CALL:
10260 case OP_RET:
10261 case OP_BRANCH:
10262 return false;
10263 default:
10264 return true;
10265 }
10266}
10267
10268static size_t stacksize(upb_pbdecoder *d, size_t entries) {
10269 UPB_UNUSED(d);
10270 return entries * sizeof(upb_pbdecoder_frame);
10271}
10272
10273static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
10274 UPB_UNUSED(d);
10275
10276#ifdef UPB_USE_JIT_X64
10277 if (d->method_->is_native_) {
10278 /* Each native stack frame needs two pointers, plus we need a few frames for
10279 * the enter/exit trampolines. */
10280 size_t ret = entries * sizeof(void*) * 2;
10281 ret += sizeof(void*) * 10;
10282 return ret;
10283 }
10284#endif
10285
10286 return entries * sizeof(uint32_t*);
10287}
10288
10289
10290static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
10291
10292/* It's unfortunate that we have to micro-manage the compiler with
10293 * UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily
10294 * specific to one hardware configuration. But empirically on a Core i7,
10295 * performance increases 30-50% with these annotations. Every instance where
10296 * these appear, gcc 4.2.1 made the wrong decision and degraded performance in
10297 * benchmarks. */
10298
10299static void seterr(upb_pbdecoder *d, const char *msg) {
10300 upb_status status = UPB_STATUS_INIT;
10301 upb_status_seterrmsg(&status, msg);
10302 upb_env_reporterror(d->env, &status);
10303}
10304
10305void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) {
10306 seterr(d, msg);
10307}
10308
10309
10310/* Buffering ******************************************************************/
10311
10312/* We operate on one buffer at a time, which is either the user's buffer passed
10313 * to our "decode" callback or some residual bytes from the previous buffer. */
10314
10315/* How many bytes can be safely read from d->ptr without reading past end-of-buf
10316 * or past the current delimited end. */
10317static size_t curbufleft(const upb_pbdecoder *d) {
Austin Schuh40c16522018-10-28 20:27:54 -070010318 UPB_ASSERT(d->data_end >= d->ptr);
Brian Silverman9c614bc2016-02-15 20:20:02 -050010319 return d->data_end - d->ptr;
10320}
10321
10322/* How many bytes are available before end-of-buffer. */
10323static size_t bufleft(const upb_pbdecoder *d) {
10324 return d->end - d->ptr;
10325}
10326
10327/* Overall stream offset of d->ptr. */
10328uint64_t offset(const upb_pbdecoder *d) {
10329 return d->bufstart_ofs + (d->ptr - d->buf);
10330}
10331
10332/* How many bytes are available before the end of this delimited region. */
10333size_t delim_remaining(const upb_pbdecoder *d) {
10334 return d->top->end_ofs - offset(d);
10335}
10336
10337/* Advances d->ptr. */
10338static void advance(upb_pbdecoder *d, size_t len) {
Austin Schuh40c16522018-10-28 20:27:54 -070010339 UPB_ASSERT(curbufleft(d) >= len);
Brian Silverman9c614bc2016-02-15 20:20:02 -050010340 d->ptr += len;
10341}
10342
10343static bool in_buf(const char *p, const char *buf, const char *end) {
10344 return p >= buf && p <= end;
10345}
10346
10347static bool in_residual_buf(const upb_pbdecoder *d, const char *p) {
10348 return in_buf(p, d->residual, d->residual_end);
10349}
10350
10351/* Calculates the delim_end value, which is affected by both the current buffer
10352 * and the parsing stack, so must be called whenever either is updated. */
10353static void set_delim_end(upb_pbdecoder *d) {
10354 size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs;
10355 if (delim_ofs <= (size_t)(d->end - d->buf)) {
10356 d->delim_end = d->buf + delim_ofs;
10357 d->data_end = d->delim_end;
10358 } else {
10359 d->data_end = d->end;
10360 d->delim_end = NULL;
10361 }
10362}
10363
10364static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) {
10365 d->ptr = buf;
10366 d->buf = buf;
10367 d->end = end;
10368 set_delim_end(d);
10369}
10370
10371static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) {
Austin Schuh40c16522018-10-28 20:27:54 -070010372 UPB_ASSERT(curbufleft(d) == 0);
Brian Silverman9c614bc2016-02-15 20:20:02 -050010373 d->bufstart_ofs += (d->end - d->buf);
10374 switchtobuf(d, buf, buf + len);
10375}
10376
10377static void checkpoint(upb_pbdecoder *d) {
10378 /* The assertion here is in the interests of efficiency, not correctness.
10379 * We are trying to ensure that we don't checkpoint() more often than
10380 * necessary. */
Austin Schuh40c16522018-10-28 20:27:54 -070010381 UPB_ASSERT(d->checkpoint != d->ptr);
Brian Silverman9c614bc2016-02-15 20:20:02 -050010382 d->checkpoint = d->ptr;
10383}
10384
10385/* Skips "bytes" bytes in the stream, which may be more than available. If we
10386 * skip more bytes than are available, we return a long read count to the caller
10387 * indicating how many bytes can be skipped over before passing actual data
10388 * again. Skipped bytes can pass a NULL buffer and the decoder guarantees they
10389 * won't actually be read.
10390 */
10391static int32_t skip(upb_pbdecoder *d, size_t bytes) {
Austin Schuh40c16522018-10-28 20:27:54 -070010392 UPB_ASSERT(!in_residual_buf(d, d->ptr) || d->size_param == 0);
10393 UPB_ASSERT(d->skip == 0);
Brian Silverman9c614bc2016-02-15 20:20:02 -050010394 if (bytes > delim_remaining(d)) {
10395 seterr(d, "Skipped value extended beyond enclosing submessage.");
10396 return upb_pbdecoder_suspend(d);
Austin Schuh40c16522018-10-28 20:27:54 -070010397 } else if (bufleft(d) >= bytes) {
Brian Silverman9c614bc2016-02-15 20:20:02 -050010398 /* Skipped data is all in current buffer, and more is still available. */
10399 advance(d, bytes);
10400 d->skip = 0;
10401 return DECODE_OK;
10402 } else {
10403 /* Skipped data extends beyond currently available buffers. */
10404 d->pc = d->last;
10405 d->skip = bytes - curbufleft(d);
10406 d->bufstart_ofs += (d->end - d->buf);
10407 d->residual_end = d->residual;
10408 switchtobuf(d, d->residual, d->residual_end);
10409 return d->size_param + d->skip;
10410 }
10411}
10412
10413
10414/* Resumes the decoder from an initial state or from a previous suspend. */
10415int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
10416 size_t size, const upb_bufhandle *handle) {
10417 UPB_UNUSED(p); /* Useless; just for the benefit of the JIT. */
10418
Austin Schuh40c16522018-10-28 20:27:54 -070010419 /* d->skip and d->residual_end could probably elegantly be represented
10420 * as a single variable, to more easily represent this invariant. */
10421 UPB_ASSERT(!(d->skip && d->residual_end > d->residual));
10422
10423 /* We need to remember the original size_param, so that the value we return
10424 * is relative to it, even if we do some skipping first. */
Brian Silverman9c614bc2016-02-15 20:20:02 -050010425 d->size_param = size;
10426 d->handle = handle;
10427
Austin Schuh40c16522018-10-28 20:27:54 -070010428 /* Have to handle this case specially (ie. not with skip()) because the user
10429 * is allowed to pass a NULL buffer here, which won't allow us to safely
10430 * calculate a d->end or use our normal functions like curbufleft(). */
10431 if (d->skip && d->skip >= size) {
10432 d->skip -= size;
10433 d->bufstart_ofs += size;
10434 buf = &dummy_char;
10435 size = 0;
10436
10437 /* We can't just return now, because we might need to execute some ops
10438 * like CHECKDELIM, which could call some callbacks and pop the stack. */
Brian Silverman9c614bc2016-02-15 20:20:02 -050010439 }
10440
Austin Schuh40c16522018-10-28 20:27:54 -070010441 /* We need to pretend that this was the actual buffer param, since some of the
10442 * calculations assume that d->ptr/d->buf is relative to this. */
10443 d->buf_param = buf;
Brian Silverman9c614bc2016-02-15 20:20:02 -050010444
10445 if (!buf) {
10446 /* NULL buf is ok if its entire span is covered by the "skip" above, but
10447 * by this point we know that "skip" doesn't cover the buffer. */
10448 seterr(d, "Passed NULL buffer over non-skippable region.");
10449 return upb_pbdecoder_suspend(d);
10450 }
10451
Austin Schuh40c16522018-10-28 20:27:54 -070010452 if (d->residual_end > d->residual) {
10453 /* We have residual bytes from the last buffer. */
10454 UPB_ASSERT(d->ptr == d->residual);
10455 } else {
10456 switchtobuf(d, buf, buf + size);
10457 }
10458
10459 d->checkpoint = d->ptr;
10460
10461 /* Handle skips that don't cover the whole buffer (as above). */
10462 if (d->skip) {
10463 size_t skip_bytes = d->skip;
10464 d->skip = 0;
10465 CHECK_RETURN(skip(d, skip_bytes));
10466 checkpoint(d);
10467 }
10468
10469 /* If we're inside an unknown group, continue to parse unknown values. */
Brian Silverman9c614bc2016-02-15 20:20:02 -050010470 if (d->top->groupnum < 0) {
10471 CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0));
Austin Schuh40c16522018-10-28 20:27:54 -070010472 checkpoint(d);
Brian Silverman9c614bc2016-02-15 20:20:02 -050010473 }
10474
10475 return DECODE_OK;
10476}
10477
10478/* Suspends the decoder at the last checkpoint, without saving any residual
10479 * bytes. If there are any unconsumed bytes, returns a short byte count. */
10480size_t upb_pbdecoder_suspend(upb_pbdecoder *d) {
10481 d->pc = d->last;
10482 if (d->checkpoint == d->residual) {
10483 /* Checkpoint was in residual buf; no user bytes were consumed. */
10484 d->ptr = d->residual;
10485 return 0;
10486 } else {
Austin Schuh40c16522018-10-28 20:27:54 -070010487 size_t ret = d->size_param - (d->end - d->checkpoint);
10488 UPB_ASSERT(!in_residual_buf(d, d->checkpoint));
10489 UPB_ASSERT(d->buf == d->buf_param || d->buf == &dummy_char);
Brian Silverman9c614bc2016-02-15 20:20:02 -050010490
Austin Schuh40c16522018-10-28 20:27:54 -070010491 d->bufstart_ofs += (d->checkpoint - d->buf);
Brian Silverman9c614bc2016-02-15 20:20:02 -050010492 d->residual_end = d->residual;
10493 switchtobuf(d, d->residual, d->residual_end);
Austin Schuh40c16522018-10-28 20:27:54 -070010494 return ret;
Brian Silverman9c614bc2016-02-15 20:20:02 -050010495 }
10496}
10497
10498/* Suspends the decoder at the last checkpoint, and saves any unconsumed
10499 * bytes in our residual buffer. This is necessary if we need more user
10500 * bytes to form a complete value, which might not be contiguous in the
10501 * user's buffers. Always consumes all user bytes. */
10502static size_t suspend_save(upb_pbdecoder *d) {
10503 /* We hit end-of-buffer before we could parse a full value.
10504 * Save any unconsumed bytes (if any) to the residual buffer. */
10505 d->pc = d->last;
10506
10507 if (d->checkpoint == d->residual) {
10508 /* Checkpoint was in residual buf; append user byte(s) to residual buf. */
Austin Schuh40c16522018-10-28 20:27:54 -070010509 UPB_ASSERT((d->residual_end - d->residual) + d->size_param <=
Brian Silverman9c614bc2016-02-15 20:20:02 -050010510 sizeof(d->residual));
10511 if (!in_residual_buf(d, d->ptr)) {
10512 d->bufstart_ofs -= (d->residual_end - d->residual);
10513 }
10514 memcpy(d->residual_end, d->buf_param, d->size_param);
10515 d->residual_end += d->size_param;
10516 } else {
10517 /* Checkpoint was in user buf; old residual bytes not needed. */
10518 size_t save;
Austin Schuh40c16522018-10-28 20:27:54 -070010519 UPB_ASSERT(!in_residual_buf(d, d->checkpoint));
Brian Silverman9c614bc2016-02-15 20:20:02 -050010520
10521 d->ptr = d->checkpoint;
10522 save = curbufleft(d);
Austin Schuh40c16522018-10-28 20:27:54 -070010523 UPB_ASSERT(save <= sizeof(d->residual));
Brian Silverman9c614bc2016-02-15 20:20:02 -050010524 memcpy(d->residual, d->ptr, save);
10525 d->residual_end = d->residual + save;
10526 d->bufstart_ofs = offset(d);
10527 }
10528
10529 switchtobuf(d, d->residual, d->residual_end);
10530 return d->size_param;
10531}
10532
10533/* Copies the next "bytes" bytes into "buf" and advances the stream.
10534 * Requires that this many bytes are available in the current buffer. */
10535UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
10536 size_t bytes) {
Austin Schuh40c16522018-10-28 20:27:54 -070010537 UPB_ASSERT(bytes <= curbufleft(d));
Brian Silverman9c614bc2016-02-15 20:20:02 -050010538 memcpy(buf, d->ptr, bytes);
10539 advance(d, bytes);
10540}
10541
10542/* Slow path for getting the next "bytes" bytes, regardless of whether they are
10543 * available in the current buffer or not. Returns a status code as described
10544 * in decoder.int.h. */
10545UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
10546 size_t bytes) {
10547 const size_t avail = curbufleft(d);
10548 consumebytes(d, buf, avail);
10549 bytes -= avail;
Austin Schuh40c16522018-10-28 20:27:54 -070010550 UPB_ASSERT(bytes > 0);
Brian Silverman9c614bc2016-02-15 20:20:02 -050010551 if (in_residual_buf(d, d->ptr)) {
10552 advancetobuf(d, d->buf_param, d->size_param);
10553 }
10554 if (curbufleft(d) >= bytes) {
10555 consumebytes(d, (char *)buf + avail, bytes);
10556 return DECODE_OK;
10557 } else if (d->data_end == d->delim_end) {
10558 seterr(d, "Submessage ended in the middle of a value or group");
10559 return upb_pbdecoder_suspend(d);
10560 } else {
10561 return suspend_save(d);
10562 }
10563}
10564
10565/* Gets the next "bytes" bytes, regardless of whether they are available in the
10566 * current buffer or not. Returns a status code as described in decoder.int.h.
10567 */
10568UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf,
10569 size_t bytes) {
10570 if (curbufleft(d) >= bytes) {
10571 /* Buffer has enough data to satisfy. */
10572 consumebytes(d, buf, bytes);
10573 return DECODE_OK;
10574 } else {
10575 return getbytes_slow(d, buf, bytes);
10576 }
10577}
10578
10579UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
10580 size_t bytes) {
10581 size_t ret = curbufleft(d);
10582 memcpy(buf, d->ptr, ret);
10583 if (in_residual_buf(d, d->ptr)) {
10584 size_t copy = UPB_MIN(bytes - ret, d->size_param);
10585 memcpy((char *)buf + ret, d->buf_param, copy);
10586 ret += copy;
10587 }
10588 return ret;
10589}
10590
10591UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf,
10592 size_t bytes) {
10593 if (curbufleft(d) >= bytes) {
10594 memcpy(buf, d->ptr, bytes);
10595 return bytes;
10596 } else {
10597 return peekbytes_slow(d, buf, bytes);
10598 }
10599}
10600
10601
10602/* Decoding of wire types *****************************************************/
10603
10604/* Slow path for decoding a varint from the current buffer position.
10605 * Returns a status code as described in decoder.int.h. */
10606UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
10607 uint64_t *u64) {
10608 uint8_t byte = 0x80;
10609 int bitpos;
10610 *u64 = 0;
10611 for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
Austin Schuh40c16522018-10-28 20:27:54 -070010612 CHECK_RETURN(getbytes(d, &byte, 1));
Brian Silverman9c614bc2016-02-15 20:20:02 -050010613 *u64 |= (uint64_t)(byte & 0x7F) << bitpos;
10614 }
10615 if(bitpos == 70 && (byte & 0x80)) {
10616 seterr(d, kUnterminatedVarint);
10617 return upb_pbdecoder_suspend(d);
10618 }
10619 return DECODE_OK;
10620}
10621
10622/* Decodes a varint from the current buffer position.
10623 * Returns a status code as described in decoder.int.h. */
10624UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
10625 if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) {
10626 *u64 = *d->ptr;
10627 advance(d, 1);
10628 return DECODE_OK;
10629 } else if (curbufleft(d) >= 10) {
10630 /* Fast case. */
10631 upb_decoderet r = upb_vdecode_fast(d->ptr);
10632 if (r.p == NULL) {
10633 seterr(d, kUnterminatedVarint);
10634 return upb_pbdecoder_suspend(d);
10635 }
10636 advance(d, r.p - d->ptr);
10637 *u64 = r.val;
10638 return DECODE_OK;
10639 } else {
10640 /* Slow case -- varint spans buffer seam. */
10641 return upb_pbdecoder_decode_varint_slow(d, u64);
10642 }
10643}
10644
10645/* Decodes a 32-bit varint from the current buffer position.
10646 * Returns a status code as described in decoder.int.h. */
10647UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
10648 uint64_t u64;
10649 int32_t ret = decode_varint(d, &u64);
10650 if (ret >= 0) return ret;
10651 if (u64 > UINT32_MAX) {
10652 seterr(d, "Unterminated 32-bit varint");
10653 /* TODO(haberman) guarantee that this function return is >= 0 somehow,
10654 * so we know this path will always be treated as error by our caller.
10655 * Right now the size_t -> int32_t can overflow and produce negative values.
10656 */
10657 *u32 = 0;
10658 return upb_pbdecoder_suspend(d);
10659 }
10660 *u32 = u64;
10661 return DECODE_OK;
10662}
10663
10664/* Decodes a fixed32 from the current buffer position.
10665 * Returns a status code as described in decoder.int.h.
10666 * TODO: proper byte swapping for big-endian machines. */
10667UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
10668 return getbytes(d, u32, 4);
10669}
10670
10671/* Decodes a fixed64 from the current buffer position.
10672 * Returns a status code as described in decoder.int.h.
10673 * TODO: proper byte swapping for big-endian machines. */
10674UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
10675 return getbytes(d, u64, 8);
10676}
10677
10678/* Non-static versions of the above functions.
10679 * These are called by the JIT for fallback paths. */
10680int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32) {
10681 return decode_fixed32(d, u32);
10682}
10683
10684int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64) {
10685 return decode_fixed64(d, u64);
10686}
10687
10688static double as_double(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
10689static float as_float(uint32_t n) { float f; memcpy(&f, &n, 4); return f; }
10690
10691/* Pushes a frame onto the decoder stack. */
10692static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
10693 upb_pbdecoder_frame *fr = d->top;
10694
10695 if (end > fr->end_ofs) {
10696 seterr(d, kPbDecoderSubmessageTooLong);
10697 return false;
10698 } else if (fr == d->limit) {
10699 seterr(d, kPbDecoderStackOverflow);
10700 return false;
10701 }
10702
10703 fr++;
10704 fr->end_ofs = end;
10705 fr->dispatch = NULL;
10706 fr->groupnum = 0;
10707 d->top = fr;
10708 return true;
10709}
10710
10711static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) {
10712 /* While we expect to see an "end" tag (either ENDGROUP or a non-sequence
10713 * field number) prior to hitting any enclosing submessage end, pushing our
10714 * existing delim end prevents us from continuing to parse values from a
10715 * corrupt proto that doesn't give us an END tag in time. */
10716 if (!decoder_push(d, d->top->end_ofs))
10717 return false;
10718 d->top->groupnum = arg;
10719 return true;
10720}
10721
10722/* Pops a frame from the decoder stack. */
10723static void decoder_pop(upb_pbdecoder *d) { d->top--; }
10724
10725UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
10726 uint64_t expected) {
10727 uint64_t data = 0;
10728 size_t bytes = upb_value_size(expected);
10729 size_t read = peekbytes(d, &data, bytes);
10730 if (read == bytes && data == expected) {
10731 /* Advance past matched bytes. */
10732 int32_t ok = getbytes(d, &data, read);
Austin Schuh40c16522018-10-28 20:27:54 -070010733 UPB_ASSERT(ok < 0);
Brian Silverman9c614bc2016-02-15 20:20:02 -050010734 return DECODE_OK;
10735 } else if (read < bytes && memcmp(&data, &expected, read) == 0) {
10736 return suspend_save(d);
10737 } else {
10738 return DECODE_MISMATCH;
10739 }
10740}
10741
10742int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum,
10743 uint8_t wire_type) {
10744 if (fieldnum >= 0)
10745 goto have_tag;
10746
10747 while (true) {
10748 uint32_t tag;
10749 CHECK_RETURN(decode_v32(d, &tag));
10750 wire_type = tag & 0x7;
10751 fieldnum = tag >> 3;
10752
10753have_tag:
10754 if (fieldnum == 0) {
10755 seterr(d, "Saw invalid field number (0)");
10756 return upb_pbdecoder_suspend(d);
10757 }
10758
Brian Silverman9c614bc2016-02-15 20:20:02 -050010759 switch (wire_type) {
10760 case UPB_WIRE_TYPE_32BIT:
10761 CHECK_RETURN(skip(d, 4));
10762 break;
10763 case UPB_WIRE_TYPE_64BIT:
10764 CHECK_RETURN(skip(d, 8));
10765 break;
10766 case UPB_WIRE_TYPE_VARINT: {
10767 uint64_t u64;
10768 CHECK_RETURN(decode_varint(d, &u64));
10769 break;
10770 }
10771 case UPB_WIRE_TYPE_DELIMITED: {
10772 uint32_t len;
10773 CHECK_RETURN(decode_v32(d, &len));
10774 CHECK_RETURN(skip(d, len));
10775 break;
10776 }
10777 case UPB_WIRE_TYPE_START_GROUP:
10778 CHECK_SUSPEND(pushtagdelim(d, -fieldnum));
10779 break;
10780 case UPB_WIRE_TYPE_END_GROUP:
10781 if (fieldnum == -d->top->groupnum) {
10782 decoder_pop(d);
10783 } else if (fieldnum == d->top->groupnum) {
10784 return DECODE_ENDGROUP;
10785 } else {
10786 seterr(d, "Unmatched ENDGROUP tag.");
10787 return upb_pbdecoder_suspend(d);
10788 }
10789 break;
10790 default:
10791 seterr(d, "Invalid wire type");
10792 return upb_pbdecoder_suspend(d);
10793 }
10794
10795 if (d->top->groupnum >= 0) {
Austin Schuh40c16522018-10-28 20:27:54 -070010796 /* TODO: More code needed for handling unknown groups. */
10797 upb_sink_putunknown(&d->top->sink, d->checkpoint, d->ptr - d->checkpoint);
Brian Silverman9c614bc2016-02-15 20:20:02 -050010798 return DECODE_OK;
10799 }
10800
10801 /* Unknown group -- continue looping over unknown fields. */
10802 checkpoint(d);
10803 }
10804}
10805
10806static void goto_endmsg(upb_pbdecoder *d) {
10807 upb_value v;
10808 bool found = upb_inttable_lookup32(d->top->dispatch, DISPATCH_ENDMSG, &v);
Austin Schuh40c16522018-10-28 20:27:54 -070010809 UPB_ASSERT(found);
Brian Silverman9c614bc2016-02-15 20:20:02 -050010810 d->pc = d->top->base + upb_value_getuint64(v);
10811}
10812
10813/* Parses a tag and jumps to the corresponding bytecode instruction for this
10814 * field.
10815 *
10816 * If the tag is unknown (or the wire type doesn't match), parses the field as
10817 * unknown. If the tag is a valid ENDGROUP tag, jumps to the bytecode
10818 * instruction for the end of message. */
10819static int32_t dispatch(upb_pbdecoder *d) {
10820 upb_inttable *dispatch = d->top->dispatch;
10821 uint32_t tag;
10822 uint8_t wire_type;
10823 uint32_t fieldnum;
10824 upb_value val;
10825 int32_t retval;
10826
10827 /* Decode tag. */
10828 CHECK_RETURN(decode_v32(d, &tag));
10829 wire_type = tag & 0x7;
10830 fieldnum = tag >> 3;
10831
10832 /* Lookup tag. Because of packed/non-packed compatibility, we have to
10833 * check the wire type against two possibilities. */
10834 if (fieldnum != DISPATCH_ENDMSG &&
10835 upb_inttable_lookup32(dispatch, fieldnum, &val)) {
10836 uint64_t v = upb_value_getuint64(val);
10837 if (wire_type == (v & 0xff)) {
10838 d->pc = d->top->base + (v >> 16);
10839 return DECODE_OK;
10840 } else if (wire_type == ((v >> 8) & 0xff)) {
10841 bool found =
10842 upb_inttable_lookup(dispatch, fieldnum + UPB_MAX_FIELDNUMBER, &val);
Austin Schuh40c16522018-10-28 20:27:54 -070010843 UPB_ASSERT(found);
Brian Silverman9c614bc2016-02-15 20:20:02 -050010844 d->pc = d->top->base + upb_value_getuint64(val);
10845 return DECODE_OK;
10846 }
10847 }
10848
10849 /* We have some unknown fields (or ENDGROUP) to parse. The DISPATCH or TAG
10850 * bytecode that triggered this is preceded by a CHECKDELIM bytecode which
10851 * we need to back up to, so that when we're done skipping unknown data we
10852 * can re-check the delimited end. */
10853 d->last--; /* Necessary if we get suspended */
10854 d->pc = d->last;
Austin Schuh40c16522018-10-28 20:27:54 -070010855 UPB_ASSERT(getop(*d->last) == OP_CHECKDELIM);
Brian Silverman9c614bc2016-02-15 20:20:02 -050010856
10857 /* Unknown field or ENDGROUP. */
10858 retval = upb_pbdecoder_skipunknown(d, fieldnum, wire_type);
10859
10860 CHECK_RETURN(retval);
10861
10862 if (retval == DECODE_ENDGROUP) {
10863 goto_endmsg(d);
10864 return DECODE_OK;
10865 }
10866
10867 return DECODE_OK;
10868}
10869
10870/* Callers know that the stack is more than one deep because the opcodes that
10871 * call this only occur after PUSH operations. */
10872upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) {
Austin Schuh40c16522018-10-28 20:27:54 -070010873 UPB_ASSERT(d->top != d->stack);
Brian Silverman9c614bc2016-02-15 20:20:02 -050010874 return d->top - 1;
10875}
10876
10877
10878/* The main decoding loop *****************************************************/
10879
10880/* The main decoder VM function. Uses traditional bytecode dispatch loop with a
10881 * switch() statement. */
10882size_t run_decoder_vm(upb_pbdecoder *d, const mgroup *group,
10883 const upb_bufhandle* handle) {
10884
10885#define VMCASE(op, code) \
10886 case op: { code; if (consumes_input(op)) checkpoint(d); break; }
10887#define PRIMITIVE_OP(type, wt, name, convfunc, ctype) \
10888 VMCASE(OP_PARSE_ ## type, { \
10889 ctype val; \
10890 CHECK_RETURN(decode_ ## wt(d, &val)); \
10891 upb_sink_put ## name(&d->top->sink, arg, (convfunc)(val)); \
10892 })
10893
10894 while(1) {
10895 int32_t instruction;
10896 opcode op;
10897 uint32_t arg;
10898 int32_t longofs;
10899
10900 d->last = d->pc;
10901 instruction = *d->pc++;
10902 op = getop(instruction);
10903 arg = instruction >> 8;
10904 longofs = arg;
Austin Schuh40c16522018-10-28 20:27:54 -070010905 UPB_ASSERT(d->ptr != d->residual_end);
Brian Silverman9c614bc2016-02-15 20:20:02 -050010906 UPB_UNUSED(group);
10907#ifdef UPB_DUMP_BYTECODE
10908 fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d "
10909 "%x %s (%d)\n",
10910 (int)offset(d),
10911 (int)(d->ptr - d->buf),
10912 (int)(d->data_end - d->ptr),
10913 (int)(d->end - d->ptr),
10914 (int)((d->top->end_ofs - d->bufstart_ofs) - (d->ptr - d->buf)),
10915 (int)(d->pc - 1 - group->bytecode),
10916 upb_pbdecoder_getopname(op),
10917 arg);
10918#endif
10919 switch (op) {
10920 /* Technically, we are losing data if we see a 32-bit varint that is not
10921 * properly sign-extended. We could detect this and error about the data
10922 * loss, but proto2 does not do this, so we pass. */
10923 PRIMITIVE_OP(INT32, varint, int32, int32_t, uint64_t)
10924 PRIMITIVE_OP(INT64, varint, int64, int64_t, uint64_t)
10925 PRIMITIVE_OP(UINT32, varint, uint32, uint32_t, uint64_t)
10926 PRIMITIVE_OP(UINT64, varint, uint64, uint64_t, uint64_t)
10927 PRIMITIVE_OP(FIXED32, fixed32, uint32, uint32_t, uint32_t)
10928 PRIMITIVE_OP(FIXED64, fixed64, uint64, uint64_t, uint64_t)
10929 PRIMITIVE_OP(SFIXED32, fixed32, int32, int32_t, uint32_t)
10930 PRIMITIVE_OP(SFIXED64, fixed64, int64, int64_t, uint64_t)
10931 PRIMITIVE_OP(BOOL, varint, bool, bool, uint64_t)
10932 PRIMITIVE_OP(DOUBLE, fixed64, double, as_double, uint64_t)
10933 PRIMITIVE_OP(FLOAT, fixed32, float, as_float, uint32_t)
10934 PRIMITIVE_OP(SINT32, varint, int32, upb_zzdec_32, uint64_t)
10935 PRIMITIVE_OP(SINT64, varint, int64, upb_zzdec_64, uint64_t)
10936
10937 VMCASE(OP_SETDISPATCH,
10938 d->top->base = d->pc - 1;
10939 memcpy(&d->top->dispatch, d->pc, sizeof(void*));
10940 d->pc += sizeof(void*) / sizeof(uint32_t);
10941 )
10942 VMCASE(OP_STARTMSG,
10943 CHECK_SUSPEND(upb_sink_startmsg(&d->top->sink));
10944 )
10945 VMCASE(OP_ENDMSG,
10946 CHECK_SUSPEND(upb_sink_endmsg(&d->top->sink, d->status));
10947 )
10948 VMCASE(OP_STARTSEQ,
10949 upb_pbdecoder_frame *outer = outer_frame(d);
10950 CHECK_SUSPEND(upb_sink_startseq(&outer->sink, arg, &d->top->sink));
10951 )
10952 VMCASE(OP_ENDSEQ,
10953 CHECK_SUSPEND(upb_sink_endseq(&d->top->sink, arg));
10954 )
10955 VMCASE(OP_STARTSUBMSG,
10956 upb_pbdecoder_frame *outer = outer_frame(d);
10957 CHECK_SUSPEND(upb_sink_startsubmsg(&outer->sink, arg, &d->top->sink));
10958 )
10959 VMCASE(OP_ENDSUBMSG,
10960 CHECK_SUSPEND(upb_sink_endsubmsg(&d->top->sink, arg));
10961 )
10962 VMCASE(OP_STARTSTR,
10963 uint32_t len = delim_remaining(d);
10964 upb_pbdecoder_frame *outer = outer_frame(d);
10965 CHECK_SUSPEND(upb_sink_startstr(&outer->sink, arg, len, &d->top->sink));
10966 if (len == 0) {
10967 d->pc++; /* Skip OP_STRING. */
10968 }
10969 )
10970 VMCASE(OP_STRING,
10971 uint32_t len = curbufleft(d);
10972 size_t n = upb_sink_putstring(&d->top->sink, arg, d->ptr, len, handle);
10973 if (n > len) {
10974 if (n > delim_remaining(d)) {
10975 seterr(d, "Tried to skip past end of string.");
10976 return upb_pbdecoder_suspend(d);
10977 } else {
10978 int32_t ret = skip(d, n);
10979 /* This shouldn't return DECODE_OK, because n > len. */
Austin Schuh40c16522018-10-28 20:27:54 -070010980 UPB_ASSERT(ret >= 0);
Brian Silverman9c614bc2016-02-15 20:20:02 -050010981 return ret;
10982 }
10983 }
10984 advance(d, n);
10985 if (n < len || d->delim_end == NULL) {
10986 /* We aren't finished with this string yet. */
10987 d->pc--; /* Repeat OP_STRING. */
10988 if (n > 0) checkpoint(d);
10989 return upb_pbdecoder_suspend(d);
10990 }
10991 )
10992 VMCASE(OP_ENDSTR,
10993 CHECK_SUSPEND(upb_sink_endstr(&d->top->sink, arg));
10994 )
10995 VMCASE(OP_PUSHTAGDELIM,
10996 CHECK_SUSPEND(pushtagdelim(d, arg));
10997 )
10998 VMCASE(OP_SETBIGGROUPNUM,
10999 d->top->groupnum = *d->pc++;
11000 )
11001 VMCASE(OP_POP,
Austin Schuh40c16522018-10-28 20:27:54 -070011002 UPB_ASSERT(d->top > d->stack);
Brian Silverman9c614bc2016-02-15 20:20:02 -050011003 decoder_pop(d);
11004 )
11005 VMCASE(OP_PUSHLENDELIM,
11006 uint32_t len;
11007 CHECK_RETURN(decode_v32(d, &len));
11008 CHECK_SUSPEND(decoder_push(d, offset(d) + len));
11009 set_delim_end(d);
11010 )
11011 VMCASE(OP_SETDELIM,
11012 set_delim_end(d);
11013 )
11014 VMCASE(OP_CHECKDELIM,
11015 /* We are guaranteed of this assert because we never allow ourselves to
11016 * consume bytes beyond data_end, which covers delim_end when non-NULL.
11017 */
Austin Schuh40c16522018-10-28 20:27:54 -070011018 UPB_ASSERT(!(d->delim_end && d->ptr > d->delim_end));
Brian Silverman9c614bc2016-02-15 20:20:02 -050011019 if (d->ptr == d->delim_end)
11020 d->pc += longofs;
11021 )
11022 VMCASE(OP_CALL,
11023 d->callstack[d->call_len++] = d->pc;
11024 d->pc += longofs;
11025 )
11026 VMCASE(OP_RET,
Austin Schuh40c16522018-10-28 20:27:54 -070011027 UPB_ASSERT(d->call_len > 0);
Brian Silverman9c614bc2016-02-15 20:20:02 -050011028 d->pc = d->callstack[--d->call_len];
11029 )
11030 VMCASE(OP_BRANCH,
11031 d->pc += longofs;
11032 )
11033 VMCASE(OP_TAG1,
11034 uint8_t expected;
11035 CHECK_SUSPEND(curbufleft(d) > 0);
11036 expected = (arg >> 8) & 0xff;
11037 if (*d->ptr == expected) {
11038 advance(d, 1);
11039 } else {
11040 int8_t shortofs;
11041 badtag:
11042 shortofs = arg;
11043 if (shortofs == LABEL_DISPATCH) {
11044 CHECK_RETURN(dispatch(d));
11045 } else {
11046 d->pc += shortofs;
11047 break; /* Avoid checkpoint(). */
11048 }
11049 }
11050 )
11051 VMCASE(OP_TAG2,
11052 uint16_t expected;
11053 CHECK_SUSPEND(curbufleft(d) > 0);
11054 expected = (arg >> 8) & 0xffff;
11055 if (curbufleft(d) >= 2) {
11056 uint16_t actual;
11057 memcpy(&actual, d->ptr, 2);
11058 if (expected == actual) {
11059 advance(d, 2);
11060 } else {
11061 goto badtag;
11062 }
11063 } else {
11064 int32_t result = upb_pbdecoder_checktag_slow(d, expected);
11065 if (result == DECODE_MISMATCH) goto badtag;
11066 if (result >= 0) return result;
11067 }
11068 )
11069 VMCASE(OP_TAGN, {
11070 uint64_t expected;
11071 int32_t result;
11072 memcpy(&expected, d->pc, 8);
11073 d->pc += 2;
11074 result = upb_pbdecoder_checktag_slow(d, expected);
11075 if (result == DECODE_MISMATCH) goto badtag;
11076 if (result >= 0) return result;
11077 })
11078 VMCASE(OP_DISPATCH, {
11079 CHECK_RETURN(dispatch(d));
11080 })
11081 VMCASE(OP_HALT, {
11082 return d->size_param;
11083 })
11084 }
11085 }
11086}
11087
11088
11089/* BytesHandler handlers ******************************************************/
11090
11091void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
11092 upb_pbdecoder *d = closure;
11093 UPB_UNUSED(size_hint);
11094 d->top->end_ofs = UINT64_MAX;
11095 d->bufstart_ofs = 0;
11096 d->call_len = 1;
11097 d->callstack[0] = &halt;
11098 d->pc = pc;
11099 d->skip = 0;
11100 return d;
11101}
11102
11103void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) {
11104 upb_pbdecoder *d = closure;
11105 UPB_UNUSED(hd);
11106 UPB_UNUSED(size_hint);
11107 d->top->end_ofs = UINT64_MAX;
11108 d->bufstart_ofs = 0;
11109 d->call_len = 0;
11110 d->skip = 0;
11111 return d;
11112}
11113
11114bool upb_pbdecoder_end(void *closure, const void *handler_data) {
11115 upb_pbdecoder *d = closure;
11116 const upb_pbdecodermethod *method = handler_data;
11117 uint64_t end;
11118 char dummy;
11119
11120 if (d->residual_end > d->residual) {
11121 seterr(d, "Unexpected EOF: decoder still has buffered unparsed data");
11122 return false;
11123 }
11124
11125 if (d->skip) {
11126 seterr(d, "Unexpected EOF inside skipped data");
11127 return false;
11128 }
11129
11130 if (d->top->end_ofs != UINT64_MAX) {
11131 seterr(d, "Unexpected EOF inside delimited string");
11132 return false;
11133 }
11134
11135 /* The user's end() call indicates that the message ends here. */
11136 end = offset(d);
11137 d->top->end_ofs = end;
11138
11139#ifdef UPB_USE_JIT_X64
11140 if (method->is_native_) {
11141 const mgroup *group = (const mgroup*)method->group;
11142 if (d->top != d->stack)
11143 d->stack->end_ofs = 0;
11144 group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL);
11145 } else
11146#endif
11147 {
11148 const uint32_t *p = d->pc;
11149 d->stack->end_ofs = end;
11150 /* Check the previous bytecode, but guard against beginning. */
11151 if (p != method->code_base.ptr) p--;
11152 if (getop(*p) == OP_CHECKDELIM) {
11153 /* Rewind from OP_TAG* to OP_CHECKDELIM. */
Austin Schuh40c16522018-10-28 20:27:54 -070011154 UPB_ASSERT(getop(*d->pc) == OP_TAG1 ||
Brian Silverman9c614bc2016-02-15 20:20:02 -050011155 getop(*d->pc) == OP_TAG2 ||
11156 getop(*d->pc) == OP_TAGN ||
11157 getop(*d->pc) == OP_DISPATCH);
11158 d->pc = p;
11159 }
11160 upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL);
11161 }
11162
11163 if (d->call_len != 0) {
11164 seterr(d, "Unexpected EOF inside submessage or group");
11165 return false;
11166 }
11167
11168 return true;
11169}
11170
11171size_t upb_pbdecoder_decode(void *decoder, const void *group, const char *buf,
11172 size_t size, const upb_bufhandle *handle) {
11173 int32_t result = upb_pbdecoder_resume(decoder, NULL, buf, size, handle);
11174
11175 if (result == DECODE_ENDGROUP) goto_endmsg(decoder);
11176 CHECK_RETURN(result);
11177
11178 return run_decoder_vm(decoder, group, handle);
11179}
11180
11181
11182/* Public API *****************************************************************/
11183
11184void upb_pbdecoder_reset(upb_pbdecoder *d) {
11185 d->top = d->stack;
11186 d->top->groupnum = 0;
11187 d->ptr = d->residual;
11188 d->buf = d->residual;
11189 d->end = d->residual;
11190 d->residual_end = d->residual;
11191}
11192
11193upb_pbdecoder *upb_pbdecoder_create(upb_env *e, const upb_pbdecodermethod *m,
11194 upb_sink *sink) {
11195 const size_t default_max_nesting = 64;
11196#ifndef NDEBUG
11197 size_t size_before = upb_env_bytesallocated(e);
11198#endif
11199
11200 upb_pbdecoder *d = upb_env_malloc(e, sizeof(upb_pbdecoder));
11201 if (!d) return NULL;
11202
11203 d->method_ = m;
11204 d->callstack = upb_env_malloc(e, callstacksize(d, default_max_nesting));
11205 d->stack = upb_env_malloc(e, stacksize(d, default_max_nesting));
11206 if (!d->stack || !d->callstack) {
11207 return NULL;
11208 }
11209
11210 d->env = e;
11211 d->limit = d->stack + default_max_nesting - 1;
11212 d->stack_size = default_max_nesting;
Austin Schuh40c16522018-10-28 20:27:54 -070011213 d->status = NULL;
Brian Silverman9c614bc2016-02-15 20:20:02 -050011214
11215 upb_pbdecoder_reset(d);
11216 upb_bytessink_reset(&d->input_, &m->input_handler_, d);
11217
Austin Schuh40c16522018-10-28 20:27:54 -070011218 UPB_ASSERT(sink);
Brian Silverman9c614bc2016-02-15 20:20:02 -050011219 if (d->method_->dest_handlers_) {
11220 if (sink->handlers != d->method_->dest_handlers_)
11221 return NULL;
11222 }
11223 upb_sink_reset(&d->top->sink, sink->handlers, sink->closure);
11224
11225 /* If this fails, increase the value in decoder.h. */
Austin Schuh40c16522018-10-28 20:27:54 -070011226 UPB_ASSERT_DEBUGVAR(upb_env_bytesallocated(e) - size_before <=
11227 UPB_PB_DECODER_SIZE);
Brian Silverman9c614bc2016-02-15 20:20:02 -050011228 return d;
11229}
11230
11231uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) {
11232 return offset(d);
11233}
11234
11235const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) {
11236 return d->method_;
11237}
11238
11239upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d) {
11240 return &d->input_;
11241}
11242
11243size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) {
11244 return d->stack_size;
11245}
11246
11247bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) {
Austin Schuh40c16522018-10-28 20:27:54 -070011248 UPB_ASSERT(d->top >= d->stack);
Brian Silverman9c614bc2016-02-15 20:20:02 -050011249
11250 if (max < (size_t)(d->top - d->stack)) {
11251 /* Can't set a limit smaller than what we are currently at. */
11252 return false;
11253 }
11254
11255 if (max > d->stack_size) {
11256 /* Need to reallocate stack and callstack to accommodate. */
11257 size_t old_size = stacksize(d, d->stack_size);
11258 size_t new_size = stacksize(d, max);
11259 void *p = upb_env_realloc(d->env, d->stack, old_size, new_size);
11260 if (!p) {
11261 return false;
11262 }
11263 d->stack = p;
11264
11265 old_size = callstacksize(d, d->stack_size);
11266 new_size = callstacksize(d, max);
11267 p = upb_env_realloc(d->env, d->callstack, old_size, new_size);
11268 if (!p) {
11269 return false;
11270 }
11271 d->callstack = p;
11272
11273 d->stack_size = max;
11274 }
11275
11276 d->limit = d->stack + max - 1;
11277 return true;
11278}
11279/*
11280** upb::Encoder
11281**
11282** Since we are implementing pure handlers (ie. without any out-of-band access
11283** to pre-computed lengths), we have to buffer all submessages before we can
11284** emit even their first byte.
11285**
11286** Not knowing the size of submessages also means we can't write a perfect
11287** zero-copy implementation, even with buffering. Lengths are stored as
11288** varints, which means that we don't know how many bytes to reserve for the
11289** length until we know what the length is.
11290**
11291** This leaves us with three main choices:
11292**
11293** 1. buffer all submessage data in a temporary buffer, then copy it exactly
11294** once into the output buffer.
11295**
11296** 2. attempt to buffer data directly into the output buffer, estimating how
11297** many bytes each length will take. When our guesses are wrong, use
11298** memmove() to grow or shrink the allotted space.
11299**
11300** 3. buffer directly into the output buffer, allocating a max length
11301** ahead-of-time for each submessage length. If we overallocated, we waste
11302** space, but no memcpy() or memmove() is required. This approach requires
11303** defining a maximum size for submessages and rejecting submessages that
11304** exceed that size.
11305**
11306** (2) and (3) have the potential to have better performance, but they are more
11307** complicated and subtle to implement:
11308**
11309** (3) requires making an arbitrary choice of the maximum message size; it
11310** wastes space when submessages are shorter than this and fails
11311** completely when they are longer. This makes it more finicky and
11312** requires configuration based on the input. It also makes it impossible
11313** to perfectly match the output of reference encoders that always use the
11314** optimal amount of space for each length.
11315**
Austin Schuh40c16522018-10-28 20:27:54 -070011316** (2) requires guessing the the size upfront, and if multiple lengths are
Brian Silverman9c614bc2016-02-15 20:20:02 -050011317** guessed wrong the minimum required number of memmove() operations may
11318** be complicated to compute correctly. Implemented properly, it may have
11319** a useful amortized or average cost, but more investigation is required
11320** to determine this and what the optimal algorithm is to achieve it.
11321**
11322** (1) makes you always pay for exactly one copy, but its implementation is
11323** the simplest and its performance is predictable.
11324**
11325** So for now, we implement (1) only. If we wish to optimize later, we should
11326** be able to do it without affecting users.
11327**
11328** The strategy is to buffer the segments of data that do *not* depend on
11329** unknown lengths in one buffer, and keep a separate buffer of segment pointers
11330** and lengths. When the top-level submessage ends, we can go beginning to end,
11331** alternating the writing of lengths with memcpy() of the rest of the data.
11332** At the top level though, no buffering is required.
11333*/
11334
11335
Brian Silverman9c614bc2016-02-15 20:20:02 -050011336
11337/* The output buffer is divided into segments; a segment is a string of data
11338 * that is "ready to go" -- it does not need any varint lengths inserted into
11339 * the middle. The seams between segments are where varints will be inserted
11340 * once they are known.
11341 *
11342 * We also use the concept of a "run", which is a range of encoded bytes that
11343 * occur at a single submessage level. Every segment contains one or more runs.
11344 *
11345 * A segment can span messages. Consider:
11346 *
11347 * .--Submessage lengths---------.
11348 * | | |
11349 * | V V
11350 * V | |--------------- | |-----------------
11351 * Submessages: | |-----------------------------------------------
11352 * Top-level msg: ------------------------------------------------------------
11353 *
11354 * Segments: ----- ------------------- -----------------
11355 * Runs: *---- *--------------*--- *----------------
11356 * (* marks the start)
11357 *
11358 * Note that the top-level menssage is not in any segment because it does not
11359 * have any length preceding it.
11360 *
11361 * A segment is only interrupted when another length needs to be inserted. So
11362 * observe how the second segment spans both the inner submessage and part of
11363 * the next enclosing message. */
11364typedef struct {
11365 uint32_t msglen; /* The length to varint-encode before this segment. */
11366 uint32_t seglen; /* Length of the segment. */
11367} upb_pb_encoder_segment;
11368
11369struct upb_pb_encoder {
11370 upb_env *env;
11371
11372 /* Our input and output. */
11373 upb_sink input_;
11374 upb_bytessink *output_;
11375
11376 /* The "subclosure" -- used as the inner closure as part of the bytessink
11377 * protocol. */
11378 void *subc;
11379
11380 /* The output buffer and limit, and our current write position. "buf"
11381 * initially points to "initbuf", but is dynamically allocated if we need to
11382 * grow beyond the initial size. */
11383 char *buf, *ptr, *limit;
11384
11385 /* The beginning of the current run, or undefined if we are at the top
11386 * level. */
11387 char *runbegin;
11388
11389 /* The list of segments we are accumulating. */
11390 upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
11391
11392 /* The stack of enclosing submessages. Each entry in the stack points to the
11393 * segment where this submessage's length is being accumulated. */
11394 int *stack, *top, *stacklimit;
11395
11396 /* Depth of startmsg/endmsg calls. */
11397 int depth;
11398};
11399
11400/* low-level buffering ********************************************************/
11401
11402/* Low-level functions for interacting with the output buffer. */
11403
11404/* TODO(haberman): handle pushback */
11405static void putbuf(upb_pb_encoder *e, const char *buf, size_t len) {
11406 size_t n = upb_bytessink_putbuf(e->output_, e->subc, buf, len, NULL);
Austin Schuh40c16522018-10-28 20:27:54 -070011407 UPB_ASSERT(n == len);
Brian Silverman9c614bc2016-02-15 20:20:02 -050011408}
11409
11410static upb_pb_encoder_segment *top(upb_pb_encoder *e) {
11411 return &e->segbuf[*e->top];
11412}
11413
11414/* Call to ensure that at least "bytes" bytes are available for writing at
11415 * e->ptr. Returns false if the bytes could not be allocated. */
11416static bool reserve(upb_pb_encoder *e, size_t bytes) {
11417 if ((size_t)(e->limit - e->ptr) < bytes) {
11418 /* Grow buffer. */
11419 char *new_buf;
11420 size_t needed = bytes + (e->ptr - e->buf);
11421 size_t old_size = e->limit - e->buf;
11422
11423 size_t new_size = old_size;
11424
11425 while (new_size < needed) {
11426 new_size *= 2;
11427 }
11428
11429 new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size);
11430
11431 if (new_buf == NULL) {
11432 return false;
11433 }
11434
11435 e->ptr = new_buf + (e->ptr - e->buf);
11436 e->runbegin = new_buf + (e->runbegin - e->buf);
11437 e->limit = new_buf + new_size;
11438 e->buf = new_buf;
11439 }
11440
11441 return true;
11442}
11443
11444/* Call when "bytes" bytes have been writte at e->ptr. The caller *must* have
11445 * previously called reserve() with at least this many bytes. */
11446static void encoder_advance(upb_pb_encoder *e, size_t bytes) {
Austin Schuh40c16522018-10-28 20:27:54 -070011447 UPB_ASSERT((size_t)(e->limit - e->ptr) >= bytes);
Brian Silverman9c614bc2016-02-15 20:20:02 -050011448 e->ptr += bytes;
11449}
11450
11451/* Call when all of the bytes for a handler have been written. Flushes the
11452 * bytes if possible and necessary, returning false if this failed. */
11453static bool commit(upb_pb_encoder *e) {
11454 if (!e->top) {
11455 /* We aren't inside a delimited region. Flush our accumulated bytes to
11456 * the output.
11457 *
11458 * TODO(haberman): in the future we may want to delay flushing for
11459 * efficiency reasons. */
11460 putbuf(e, e->buf, e->ptr - e->buf);
11461 e->ptr = e->buf;
11462 }
11463
11464 return true;
11465}
11466
11467/* Writes the given bytes to the buffer, handling reserve/advance. */
11468static bool encode_bytes(upb_pb_encoder *e, const void *data, size_t len) {
11469 if (!reserve(e, len)) {
11470 return false;
11471 }
11472
11473 memcpy(e->ptr, data, len);
11474 encoder_advance(e, len);
11475 return true;
11476}
11477
11478/* Finish the current run by adding the run totals to the segment and message
11479 * length. */
11480static void accumulate(upb_pb_encoder *e) {
11481 size_t run_len;
Austin Schuh40c16522018-10-28 20:27:54 -070011482 UPB_ASSERT(e->ptr >= e->runbegin);
Brian Silverman9c614bc2016-02-15 20:20:02 -050011483 run_len = e->ptr - e->runbegin;
11484 e->segptr->seglen += run_len;
11485 top(e)->msglen += run_len;
11486 e->runbegin = e->ptr;
11487}
11488
11489/* Call to indicate the start of delimited region for which the full length is
11490 * not yet known. All data will be buffered until the length is known.
11491 * Delimited regions may be nested; their lengths will all be tracked properly. */
11492static bool start_delim(upb_pb_encoder *e) {
11493 if (e->top) {
11494 /* We are already buffering, advance to the next segment and push it on the
11495 * stack. */
11496 accumulate(e);
11497
11498 if (++e->top == e->stacklimit) {
11499 /* TODO(haberman): grow stack? */
11500 return false;
11501 }
11502
11503 if (++e->segptr == e->seglimit) {
11504 /* Grow segment buffer. */
11505 size_t old_size =
11506 (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment);
11507 size_t new_size = old_size * 2;
11508 upb_pb_encoder_segment *new_buf =
11509 upb_env_realloc(e->env, e->segbuf, old_size, new_size);
11510
11511 if (new_buf == NULL) {
11512 return false;
11513 }
11514
11515 e->segptr = new_buf + (e->segptr - e->segbuf);
11516 e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment));
11517 e->segbuf = new_buf;
11518 }
11519 } else {
11520 /* We were previously at the top level, start buffering. */
11521 e->segptr = e->segbuf;
11522 e->top = e->stack;
11523 e->runbegin = e->ptr;
11524 }
11525
11526 *e->top = e->segptr - e->segbuf;
11527 e->segptr->seglen = 0;
11528 e->segptr->msglen = 0;
11529
11530 return true;
11531}
11532
11533/* Call to indicate the end of a delimited region. We now know the length of
11534 * the delimited region. If we are not nested inside any other delimited
11535 * regions, we can now emit all of the buffered data we accumulated. */
11536static bool end_delim(upb_pb_encoder *e) {
11537 size_t msglen;
11538 accumulate(e);
11539 msglen = top(e)->msglen;
11540
11541 if (e->top == e->stack) {
11542 /* All lengths are now available, emit all buffered data. */
11543 char buf[UPB_PB_VARINT_MAX_LEN];
11544 upb_pb_encoder_segment *s;
11545 const char *ptr = e->buf;
11546 for (s = e->segbuf; s <= e->segptr; s++) {
11547 size_t lenbytes = upb_vencode64(s->msglen, buf);
11548 putbuf(e, buf, lenbytes);
11549 putbuf(e, ptr, s->seglen);
11550 ptr += s->seglen;
11551 }
11552
11553 e->ptr = e->buf;
11554 e->top = NULL;
11555 } else {
11556 /* Need to keep buffering; propagate length info into enclosing
11557 * submessages. */
11558 --e->top;
11559 top(e)->msglen += msglen + upb_varint_size(msglen);
11560 }
11561
11562 return true;
11563}
11564
11565
11566/* tag_t **********************************************************************/
11567
11568/* A precomputed (pre-encoded) tag and length. */
11569
11570typedef struct {
11571 uint8_t bytes;
11572 char tag[7];
11573} tag_t;
11574
11575/* Allocates a new tag for this field, and sets it in these handlerattr. */
11576static void new_tag(upb_handlers *h, const upb_fielddef *f, upb_wiretype_t wt,
11577 upb_handlerattr *attr) {
11578 uint32_t n = upb_fielddef_number(f);
11579
Austin Schuh40c16522018-10-28 20:27:54 -070011580 tag_t *tag = upb_gmalloc(sizeof(tag_t));
Brian Silverman9c614bc2016-02-15 20:20:02 -050011581 tag->bytes = upb_vencode64((n << 3) | wt, tag->tag);
11582
11583 upb_handlerattr_init(attr);
11584 upb_handlerattr_sethandlerdata(attr, tag);
Austin Schuh40c16522018-10-28 20:27:54 -070011585 upb_handlers_addcleanup(h, tag, upb_gfree);
Brian Silverman9c614bc2016-02-15 20:20:02 -050011586}
11587
11588static bool encode_tag(upb_pb_encoder *e, const tag_t *tag) {
11589 return encode_bytes(e, tag->tag, tag->bytes);
11590}
11591
11592
11593/* encoding of wire types *****************************************************/
11594
11595static bool encode_fixed64(upb_pb_encoder *e, uint64_t val) {
11596 /* TODO(haberman): byte-swap for big endian. */
11597 return encode_bytes(e, &val, sizeof(uint64_t));
11598}
11599
11600static bool encode_fixed32(upb_pb_encoder *e, uint32_t val) {
11601 /* TODO(haberman): byte-swap for big endian. */
11602 return encode_bytes(e, &val, sizeof(uint32_t));
11603}
11604
11605static bool encode_varint(upb_pb_encoder *e, uint64_t val) {
11606 if (!reserve(e, UPB_PB_VARINT_MAX_LEN)) {
11607 return false;
11608 }
11609
11610 encoder_advance(e, upb_vencode64(val, e->ptr));
11611 return true;
11612}
11613
11614static uint64_t dbl2uint64(double d) {
11615 uint64_t ret;
11616 memcpy(&ret, &d, sizeof(uint64_t));
11617 return ret;
11618}
11619
11620static uint32_t flt2uint32(float d) {
11621 uint32_t ret;
11622 memcpy(&ret, &d, sizeof(uint32_t));
11623 return ret;
11624}
11625
11626
11627/* encoding of proto types ****************************************************/
11628
11629static bool startmsg(void *c, const void *hd) {
11630 upb_pb_encoder *e = c;
11631 UPB_UNUSED(hd);
11632 if (e->depth++ == 0) {
11633 upb_bytessink_start(e->output_, 0, &e->subc);
11634 }
11635 return true;
11636}
11637
11638static bool endmsg(void *c, const void *hd, upb_status *status) {
11639 upb_pb_encoder *e = c;
11640 UPB_UNUSED(hd);
11641 UPB_UNUSED(status);
11642 if (--e->depth == 0) {
11643 upb_bytessink_end(e->output_);
11644 }
11645 return true;
11646}
11647
11648static void *encode_startdelimfield(void *c, const void *hd) {
11649 bool ok = encode_tag(c, hd) && commit(c) && start_delim(c);
11650 return ok ? c : UPB_BREAK;
11651}
11652
Austin Schuh40c16522018-10-28 20:27:54 -070011653static bool encode_unknown(void *c, const void *hd, const char *buf,
11654 size_t len) {
11655 UPB_UNUSED(hd);
11656 return encode_bytes(c, buf, len) && commit(c);
11657}
11658
Brian Silverman9c614bc2016-02-15 20:20:02 -050011659static bool encode_enddelimfield(void *c, const void *hd) {
11660 UPB_UNUSED(hd);
11661 return end_delim(c);
11662}
11663
11664static void *encode_startgroup(void *c, const void *hd) {
11665 return (encode_tag(c, hd) && commit(c)) ? c : UPB_BREAK;
11666}
11667
11668static bool encode_endgroup(void *c, const void *hd) {
11669 return encode_tag(c, hd) && commit(c);
11670}
11671
11672static void *encode_startstr(void *c, const void *hd, size_t size_hint) {
11673 UPB_UNUSED(size_hint);
11674 return encode_startdelimfield(c, hd);
11675}
11676
11677static size_t encode_strbuf(void *c, const void *hd, const char *buf,
11678 size_t len, const upb_bufhandle *h) {
11679 UPB_UNUSED(hd);
11680 UPB_UNUSED(h);
11681 return encode_bytes(c, buf, len) ? len : 0;
11682}
11683
11684#define T(type, ctype, convert, encode) \
11685 static bool encode_scalar_##type(void *e, const void *hd, ctype val) { \
11686 return encode_tag(e, hd) && encode(e, (convert)(val)) && commit(e); \
11687 } \
11688 static bool encode_packed_##type(void *e, const void *hd, ctype val) { \
11689 UPB_UNUSED(hd); \
11690 return encode(e, (convert)(val)); \
11691 }
11692
11693T(double, double, dbl2uint64, encode_fixed64)
11694T(float, float, flt2uint32, encode_fixed32)
11695T(int64, int64_t, uint64_t, encode_varint)
Austin Schuh40c16522018-10-28 20:27:54 -070011696T(int32, int32_t, int64_t, encode_varint)
Brian Silverman9c614bc2016-02-15 20:20:02 -050011697T(fixed64, uint64_t, uint64_t, encode_fixed64)
11698T(fixed32, uint32_t, uint32_t, encode_fixed32)
11699T(bool, bool, bool, encode_varint)
11700T(uint32, uint32_t, uint32_t, encode_varint)
11701T(uint64, uint64_t, uint64_t, encode_varint)
11702T(enum, int32_t, uint32_t, encode_varint)
11703T(sfixed32, int32_t, uint32_t, encode_fixed32)
11704T(sfixed64, int64_t, uint64_t, encode_fixed64)
11705T(sint32, int32_t, upb_zzenc_32, encode_varint)
11706T(sint64, int64_t, upb_zzenc_64, encode_varint)
11707
11708#undef T
11709
11710
11711/* code to build the handlers *************************************************/
11712
11713static void newhandlers_callback(const void *closure, upb_handlers *h) {
11714 const upb_msgdef *m;
11715 upb_msg_field_iter i;
11716
11717 UPB_UNUSED(closure);
11718
11719 upb_handlers_setstartmsg(h, startmsg, NULL);
11720 upb_handlers_setendmsg(h, endmsg, NULL);
Austin Schuh40c16522018-10-28 20:27:54 -070011721 upb_handlers_setunknown(h, encode_unknown, NULL);
Brian Silverman9c614bc2016-02-15 20:20:02 -050011722
11723 m = upb_handlers_msgdef(h);
11724 for(upb_msg_field_begin(&i, m);
11725 !upb_msg_field_done(&i);
11726 upb_msg_field_next(&i)) {
11727 const upb_fielddef *f = upb_msg_iter_field(&i);
11728 bool packed = upb_fielddef_isseq(f) && upb_fielddef_isprimitive(f) &&
11729 upb_fielddef_packed(f);
11730 upb_handlerattr attr;
11731 upb_wiretype_t wt =
11732 packed ? UPB_WIRE_TYPE_DELIMITED
11733 : upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
11734
11735 /* Pre-encode the tag for this field. */
11736 new_tag(h, f, wt, &attr);
11737
11738 if (packed) {
11739 upb_handlers_setstartseq(h, f, encode_startdelimfield, &attr);
11740 upb_handlers_setendseq(h, f, encode_enddelimfield, &attr);
11741 }
11742
11743#define T(upper, lower, upbtype) \
11744 case UPB_DESCRIPTOR_TYPE_##upper: \
11745 if (packed) { \
11746 upb_handlers_set##upbtype(h, f, encode_packed_##lower, &attr); \
11747 } else { \
11748 upb_handlers_set##upbtype(h, f, encode_scalar_##lower, &attr); \
11749 } \
11750 break;
11751
11752 switch (upb_fielddef_descriptortype(f)) {
11753 T(DOUBLE, double, double);
11754 T(FLOAT, float, float);
11755 T(INT64, int64, int64);
11756 T(INT32, int32, int32);
11757 T(FIXED64, fixed64, uint64);
11758 T(FIXED32, fixed32, uint32);
11759 T(BOOL, bool, bool);
11760 T(UINT32, uint32, uint32);
11761 T(UINT64, uint64, uint64);
11762 T(ENUM, enum, int32);
11763 T(SFIXED32, sfixed32, int32);
11764 T(SFIXED64, sfixed64, int64);
11765 T(SINT32, sint32, int32);
11766 T(SINT64, sint64, int64);
11767 case UPB_DESCRIPTOR_TYPE_STRING:
11768 case UPB_DESCRIPTOR_TYPE_BYTES:
11769 upb_handlers_setstartstr(h, f, encode_startstr, &attr);
11770 upb_handlers_setendstr(h, f, encode_enddelimfield, &attr);
11771 upb_handlers_setstring(h, f, encode_strbuf, &attr);
11772 break;
11773 case UPB_DESCRIPTOR_TYPE_MESSAGE:
11774 upb_handlers_setstartsubmsg(h, f, encode_startdelimfield, &attr);
11775 upb_handlers_setendsubmsg(h, f, encode_enddelimfield, &attr);
11776 break;
11777 case UPB_DESCRIPTOR_TYPE_GROUP: {
11778 /* Endgroup takes a different tag (wire_type = END_GROUP). */
11779 upb_handlerattr attr2;
11780 new_tag(h, f, UPB_WIRE_TYPE_END_GROUP, &attr2);
11781
11782 upb_handlers_setstartsubmsg(h, f, encode_startgroup, &attr);
11783 upb_handlers_setendsubmsg(h, f, encode_endgroup, &attr2);
11784
11785 upb_handlerattr_uninit(&attr2);
11786 break;
11787 }
11788 }
11789
11790#undef T
11791
11792 upb_handlerattr_uninit(&attr);
11793 }
11794}
11795
11796void upb_pb_encoder_reset(upb_pb_encoder *e) {
11797 e->segptr = NULL;
11798 e->top = NULL;
11799 e->depth = 0;
11800}
11801
11802
11803/* public API *****************************************************************/
11804
11805const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m,
11806 const void *owner) {
11807 return upb_handlers_newfrozen(m, owner, newhandlers_callback, NULL);
11808}
11809
11810upb_pb_encoder *upb_pb_encoder_create(upb_env *env, const upb_handlers *h,
11811 upb_bytessink *output) {
11812 const size_t initial_bufsize = 256;
11813 const size_t initial_segbufsize = 16;
11814 /* TODO(haberman): make this configurable. */
11815 const size_t stack_size = 64;
11816#ifndef NDEBUG
11817 const size_t size_before = upb_env_bytesallocated(env);
11818#endif
11819
11820 upb_pb_encoder *e = upb_env_malloc(env, sizeof(upb_pb_encoder));
11821 if (!e) return NULL;
11822
11823 e->buf = upb_env_malloc(env, initial_bufsize);
11824 e->segbuf = upb_env_malloc(env, initial_segbufsize * sizeof(*e->segbuf));
11825 e->stack = upb_env_malloc(env, stack_size * sizeof(*e->stack));
11826
11827 if (!e->buf || !e->segbuf || !e->stack) {
11828 return NULL;
11829 }
11830
11831 e->limit = e->buf + initial_bufsize;
11832 e->seglimit = e->segbuf + initial_segbufsize;
11833 e->stacklimit = e->stack + stack_size;
11834
11835 upb_pb_encoder_reset(e);
11836 upb_sink_reset(&e->input_, h, e);
11837
11838 e->env = env;
11839 e->output_ = output;
11840 e->subc = output->closure;
11841 e->ptr = e->buf;
11842
11843 /* If this fails, increase the value in encoder.h. */
Austin Schuh40c16522018-10-28 20:27:54 -070011844 UPB_ASSERT_DEBUGVAR(upb_env_bytesallocated(env) - size_before <=
11845 UPB_PB_ENCODER_SIZE);
Brian Silverman9c614bc2016-02-15 20:20:02 -050011846 return e;
11847}
11848
11849upb_sink *upb_pb_encoder_input(upb_pb_encoder *e) { return &e->input_; }
11850
11851
Brian Silverman9c614bc2016-02-15 20:20:02 -050011852
Austin Schuh40c16522018-10-28 20:27:54 -070011853upb_filedef **upb_loaddescriptor(const char *buf, size_t n, const void *owner,
11854 upb_status *status) {
Brian Silverman9c614bc2016-02-15 20:20:02 -050011855 /* Create handlers. */
11856 const upb_pbdecodermethod *decoder_m;
11857 const upb_handlers *reader_h = upb_descreader_newhandlers(&reader_h);
11858 upb_env env;
11859 upb_pbdecodermethodopts opts;
11860 upb_pbdecoder *decoder;
11861 upb_descreader *reader;
11862 bool ok;
Austin Schuh40c16522018-10-28 20:27:54 -070011863 size_t i;
11864 upb_filedef **ret = NULL;
Brian Silverman9c614bc2016-02-15 20:20:02 -050011865
11866 upb_pbdecodermethodopts_init(&opts, reader_h);
11867 decoder_m = upb_pbdecodermethod_new(&opts, &decoder_m);
11868
11869 upb_env_init(&env);
11870 upb_env_reporterrorsto(&env, status);
11871
11872 reader = upb_descreader_create(&env, reader_h);
11873 decoder = upb_pbdecoder_create(&env, decoder_m, upb_descreader_input(reader));
11874
11875 /* Push input data. */
Austin Schuh40c16522018-10-28 20:27:54 -070011876 ok = upb_bufsrc_putbuf(buf, n, upb_pbdecoder_input(decoder));
Brian Silverman9c614bc2016-02-15 20:20:02 -050011877
Austin Schuh40c16522018-10-28 20:27:54 -070011878 if (!ok) {
11879 goto cleanup;
11880 }
11881
11882 ret = upb_gmalloc(sizeof (*ret) * (upb_descreader_filecount(reader) + 1));
11883
11884 if (!ret) {
11885 goto cleanup;
11886 }
11887
11888 for (i = 0; i < upb_descreader_filecount(reader); i++) {
11889 ret[i] = upb_descreader_file(reader, i);
11890 upb_filedef_ref(ret[i], owner);
11891 }
11892
11893 ret[i] = NULL;
Brian Silverman9c614bc2016-02-15 20:20:02 -050011894
11895cleanup:
11896 upb_env_uninit(&env);
11897 upb_handlers_unref(reader_h, &reader_h);
11898 upb_pbdecodermethod_unref(decoder_m, &decoder_m);
11899 return ret;
11900}
Brian Silverman9c614bc2016-02-15 20:20:02 -050011901/*
11902 * upb::pb::TextPrinter
11903 *
11904 * OPT: This is not optimized at all. It uses printf() which parses the format
11905 * string every time, and it allocates memory for every put.
11906 */
11907
11908
11909#include <ctype.h>
11910#include <float.h>
11911#include <inttypes.h>
11912#include <stdarg.h>
11913#include <stdio.h>
Brian Silverman9c614bc2016-02-15 20:20:02 -050011914#include <string.h>
11915
11916
11917struct upb_textprinter {
11918 upb_sink input_;
11919 upb_bytessink *output_;
11920 int indent_depth_;
11921 bool single_line_;
11922 void *subc;
11923};
11924
11925#define CHECK(x) if ((x) < 0) goto err;
11926
11927static const char *shortname(const char *longname) {
11928 const char *last = strrchr(longname, '.');
11929 return last ? last + 1 : longname;
11930}
11931
11932static int indent(upb_textprinter *p) {
11933 int i;
11934 if (!p->single_line_)
11935 for (i = 0; i < p->indent_depth_; i++)
11936 upb_bytessink_putbuf(p->output_, p->subc, " ", 2, NULL);
11937 return 0;
11938}
11939
11940static int endfield(upb_textprinter *p) {
11941 const char ch = (p->single_line_ ? ' ' : '\n');
11942 upb_bytessink_putbuf(p->output_, p->subc, &ch, 1, NULL);
11943 return 0;
11944}
11945
11946static int putescaped(upb_textprinter *p, const char *buf, size_t len,
11947 bool preserve_utf8) {
11948 /* Based on CEscapeInternal() from Google's protobuf release. */
11949 char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
11950 const char *end = buf + len;
11951
11952 /* I think hex is prettier and more useful, but proto2 uses octal; should
11953 * investigate whether it can parse hex also. */
11954 const bool use_hex = false;
11955 bool last_hex_escape = false; /* true if last output char was \xNN */
11956
11957 for (; buf < end; buf++) {
11958 bool is_hex_escape;
11959
11960 if (dstend - dst < 4) {
11961 upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
11962 dst = dstbuf;
11963 }
11964
11965 is_hex_escape = false;
11966 switch (*buf) {
11967 case '\n': *(dst++) = '\\'; *(dst++) = 'n'; break;
11968 case '\r': *(dst++) = '\\'; *(dst++) = 'r'; break;
11969 case '\t': *(dst++) = '\\'; *(dst++) = 't'; break;
11970 case '\"': *(dst++) = '\\'; *(dst++) = '\"'; break;
11971 case '\'': *(dst++) = '\\'; *(dst++) = '\''; break;
11972 case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break;
11973 default:
11974 /* Note that if we emit \xNN and the buf character after that is a hex
11975 * digit then that digit must be escaped too to prevent it being
11976 * interpreted as part of the character code by C. */
11977 if ((!preserve_utf8 || (uint8_t)*buf < 0x80) &&
11978 (!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) {
11979 sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf);
11980 is_hex_escape = use_hex;
11981 dst += 4;
11982 } else {
11983 *(dst++) = *buf; break;
11984 }
11985 }
11986 last_hex_escape = is_hex_escape;
11987 }
11988 /* Flush remaining data. */
11989 upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
11990 return 0;
11991}
11992
11993bool putf(upb_textprinter *p, const char *fmt, ...) {
11994 va_list args;
11995 va_list args_copy;
11996 char *str;
11997 int written;
11998 int len;
11999 bool ok;
12000
12001 va_start(args, fmt);
12002
12003 /* Run once to get the length of the string. */
12004 _upb_va_copy(args_copy, args);
12005 len = _upb_vsnprintf(NULL, 0, fmt, args_copy);
12006 va_end(args_copy);
12007
12008 /* + 1 for NULL terminator (vsprintf() requires it even if we don't). */
Austin Schuh40c16522018-10-28 20:27:54 -070012009 str = upb_gmalloc(len + 1);
Brian Silverman9c614bc2016-02-15 20:20:02 -050012010 if (!str) return false;
12011 written = vsprintf(str, fmt, args);
12012 va_end(args);
Austin Schuh40c16522018-10-28 20:27:54 -070012013 UPB_ASSERT(written == len);
Brian Silverman9c614bc2016-02-15 20:20:02 -050012014
12015 ok = upb_bytessink_putbuf(p->output_, p->subc, str, len, NULL);
Austin Schuh40c16522018-10-28 20:27:54 -070012016 upb_gfree(str);
Brian Silverman9c614bc2016-02-15 20:20:02 -050012017 return ok;
12018}
12019
12020
12021/* handlers *******************************************************************/
12022
12023static bool textprinter_startmsg(void *c, const void *hd) {
12024 upb_textprinter *p = c;
12025 UPB_UNUSED(hd);
12026 if (p->indent_depth_ == 0) {
12027 upb_bytessink_start(p->output_, 0, &p->subc);
12028 }
12029 return true;
12030}
12031
12032static bool textprinter_endmsg(void *c, const void *hd, upb_status *s) {
12033 upb_textprinter *p = c;
12034 UPB_UNUSED(hd);
12035 UPB_UNUSED(s);
12036 if (p->indent_depth_ == 0) {
12037 upb_bytessink_end(p->output_);
12038 }
12039 return true;
12040}
12041
12042#define TYPE(name, ctype, fmt) \
12043 static bool textprinter_put ## name(void *closure, const void *handler_data, \
12044 ctype val) { \
12045 upb_textprinter *p = closure; \
12046 const upb_fielddef *f = handler_data; \
12047 CHECK(indent(p)); \
12048 putf(p, "%s: " fmt, upb_fielddef_name(f), val); \
12049 CHECK(endfield(p)); \
12050 return true; \
12051 err: \
12052 return false; \
12053}
12054
12055static bool textprinter_putbool(void *closure, const void *handler_data,
12056 bool val) {
12057 upb_textprinter *p = closure;
12058 const upb_fielddef *f = handler_data;
12059 CHECK(indent(p));
12060 putf(p, "%s: %s", upb_fielddef_name(f), val ? "true" : "false");
12061 CHECK(endfield(p));
12062 return true;
12063err:
12064 return false;
12065}
12066
12067#define STRINGIFY_HELPER(x) #x
12068#define STRINGIFY_MACROVAL(x) STRINGIFY_HELPER(x)
12069
12070TYPE(int32, int32_t, "%" PRId32)
12071TYPE(int64, int64_t, "%" PRId64)
12072TYPE(uint32, uint32_t, "%" PRIu32)
12073TYPE(uint64, uint64_t, "%" PRIu64)
12074TYPE(float, float, "%." STRINGIFY_MACROVAL(FLT_DIG) "g")
12075TYPE(double, double, "%." STRINGIFY_MACROVAL(DBL_DIG) "g")
12076
12077#undef TYPE
12078
12079/* Output a symbolic value from the enum if found, else just print as int32. */
12080static bool textprinter_putenum(void *closure, const void *handler_data,
12081 int32_t val) {
12082 upb_textprinter *p = closure;
12083 const upb_fielddef *f = handler_data;
12084 const upb_enumdef *enum_def = upb_downcast_enumdef(upb_fielddef_subdef(f));
12085 const char *label = upb_enumdef_iton(enum_def, val);
12086 if (label) {
12087 indent(p);
12088 putf(p, "%s: %s", upb_fielddef_name(f), label);
12089 endfield(p);
12090 } else {
12091 if (!textprinter_putint32(closure, handler_data, val))
12092 return false;
12093 }
12094 return true;
12095}
12096
12097static void *textprinter_startstr(void *closure, const void *handler_data,
12098 size_t size_hint) {
12099 upb_textprinter *p = closure;
12100 const upb_fielddef *f = handler_data;
12101 UPB_UNUSED(size_hint);
12102 indent(p);
12103 putf(p, "%s: \"", upb_fielddef_name(f));
12104 return p;
12105}
12106
12107static bool textprinter_endstr(void *closure, const void *handler_data) {
12108 upb_textprinter *p = closure;
12109 UPB_UNUSED(handler_data);
12110 putf(p, "\"");
12111 endfield(p);
12112 return true;
12113}
12114
12115static size_t textprinter_putstr(void *closure, const void *hd, const char *buf,
12116 size_t len, const upb_bufhandle *handle) {
12117 upb_textprinter *p = closure;
12118 const upb_fielddef *f = hd;
12119 UPB_UNUSED(handle);
12120 CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING));
12121 return len;
12122err:
12123 return 0;
12124}
12125
12126static void *textprinter_startsubmsg(void *closure, const void *handler_data) {
12127 upb_textprinter *p = closure;
12128 const char *name = handler_data;
12129 CHECK(indent(p));
12130 putf(p, "%s {%c", name, p->single_line_ ? ' ' : '\n');
12131 p->indent_depth_++;
12132 return p;
12133err:
12134 return UPB_BREAK;
12135}
12136
12137static bool textprinter_endsubmsg(void *closure, const void *handler_data) {
12138 upb_textprinter *p = closure;
12139 UPB_UNUSED(handler_data);
12140 p->indent_depth_--;
12141 CHECK(indent(p));
12142 upb_bytessink_putbuf(p->output_, p->subc, "}", 1, NULL);
12143 CHECK(endfield(p));
12144 return true;
12145err:
12146 return false;
12147}
12148
12149static void onmreg(const void *c, upb_handlers *h) {
12150 const upb_msgdef *m = upb_handlers_msgdef(h);
12151 upb_msg_field_iter i;
12152 UPB_UNUSED(c);
12153
12154 upb_handlers_setstartmsg(h, textprinter_startmsg, NULL);
12155 upb_handlers_setendmsg(h, textprinter_endmsg, NULL);
12156
12157 for(upb_msg_field_begin(&i, m);
12158 !upb_msg_field_done(&i);
12159 upb_msg_field_next(&i)) {
12160 upb_fielddef *f = upb_msg_iter_field(&i);
12161 upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
12162 upb_handlerattr_sethandlerdata(&attr, f);
12163 switch (upb_fielddef_type(f)) {
12164 case UPB_TYPE_INT32:
12165 upb_handlers_setint32(h, f, textprinter_putint32, &attr);
12166 break;
12167 case UPB_TYPE_INT64:
12168 upb_handlers_setint64(h, f, textprinter_putint64, &attr);
12169 break;
12170 case UPB_TYPE_UINT32:
12171 upb_handlers_setuint32(h, f, textprinter_putuint32, &attr);
12172 break;
12173 case UPB_TYPE_UINT64:
12174 upb_handlers_setuint64(h, f, textprinter_putuint64, &attr);
12175 break;
12176 case UPB_TYPE_FLOAT:
12177 upb_handlers_setfloat(h, f, textprinter_putfloat, &attr);
12178 break;
12179 case UPB_TYPE_DOUBLE:
12180 upb_handlers_setdouble(h, f, textprinter_putdouble, &attr);
12181 break;
12182 case UPB_TYPE_BOOL:
12183 upb_handlers_setbool(h, f, textprinter_putbool, &attr);
12184 break;
12185 case UPB_TYPE_STRING:
12186 case UPB_TYPE_BYTES:
12187 upb_handlers_setstartstr(h, f, textprinter_startstr, &attr);
12188 upb_handlers_setstring(h, f, textprinter_putstr, &attr);
12189 upb_handlers_setendstr(h, f, textprinter_endstr, &attr);
12190 break;
12191 case UPB_TYPE_MESSAGE: {
12192 const char *name =
12193 upb_fielddef_istagdelim(f)
12194 ? shortname(upb_msgdef_fullname(upb_fielddef_msgsubdef(f)))
12195 : upb_fielddef_name(f);
12196 upb_handlerattr_sethandlerdata(&attr, name);
12197 upb_handlers_setstartsubmsg(h, f, textprinter_startsubmsg, &attr);
12198 upb_handlers_setendsubmsg(h, f, textprinter_endsubmsg, &attr);
12199 break;
12200 }
12201 case UPB_TYPE_ENUM:
12202 upb_handlers_setint32(h, f, textprinter_putenum, &attr);
12203 break;
12204 }
12205 }
12206}
12207
12208static void textprinter_reset(upb_textprinter *p, bool single_line) {
12209 p->single_line_ = single_line;
12210 p->indent_depth_ = 0;
12211}
12212
12213
12214/* Public API *****************************************************************/
12215
12216upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h,
12217 upb_bytessink *output) {
12218 upb_textprinter *p = upb_env_malloc(env, sizeof(upb_textprinter));
12219 if (!p) return NULL;
12220
12221 p->output_ = output;
12222 upb_sink_reset(&p->input_, h, p);
12223 textprinter_reset(p, false);
12224
12225 return p;
12226}
12227
12228const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m,
12229 const void *owner) {
12230 return upb_handlers_newfrozen(m, owner, &onmreg, NULL);
12231}
12232
12233upb_sink *upb_textprinter_input(upb_textprinter *p) { return &p->input_; }
12234
12235void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) {
12236 p->single_line_ = single_line;
12237}
12238
12239
12240/* Index is descriptor type. */
12241const uint8_t upb_pb_native_wire_types[] = {
12242 UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */
12243 UPB_WIRE_TYPE_64BIT, /* DOUBLE */
12244 UPB_WIRE_TYPE_32BIT, /* FLOAT */
12245 UPB_WIRE_TYPE_VARINT, /* INT64 */
12246 UPB_WIRE_TYPE_VARINT, /* UINT64 */
12247 UPB_WIRE_TYPE_VARINT, /* INT32 */
12248 UPB_WIRE_TYPE_64BIT, /* FIXED64 */
12249 UPB_WIRE_TYPE_32BIT, /* FIXED32 */
12250 UPB_WIRE_TYPE_VARINT, /* BOOL */
12251 UPB_WIRE_TYPE_DELIMITED, /* STRING */
12252 UPB_WIRE_TYPE_START_GROUP, /* GROUP */
12253 UPB_WIRE_TYPE_DELIMITED, /* MESSAGE */
12254 UPB_WIRE_TYPE_DELIMITED, /* BYTES */
12255 UPB_WIRE_TYPE_VARINT, /* UINT32 */
12256 UPB_WIRE_TYPE_VARINT, /* ENUM */
12257 UPB_WIRE_TYPE_32BIT, /* SFIXED32 */
12258 UPB_WIRE_TYPE_64BIT, /* SFIXED64 */
12259 UPB_WIRE_TYPE_VARINT, /* SINT32 */
12260 UPB_WIRE_TYPE_VARINT, /* SINT64 */
12261};
12262
12263/* A basic branch-based decoder, uses 32-bit values to get good performance
12264 * on 32-bit architectures (but performs well on 64-bits also).
12265 * This scheme comes from the original Google Protobuf implementation
12266 * (proto2). */
12267upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r) {
12268 upb_decoderet err = {NULL, 0};
12269 const char *p = r.p;
12270 uint32_t low = (uint32_t)r.val;
12271 uint32_t high = 0;
12272 uint32_t b;
12273 b = *(p++); low |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
12274 b = *(p++); low |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
12275 b = *(p++); low |= (b & 0x7fU) << 28;
12276 high = (b & 0x7fU) >> 4; if (!(b & 0x80)) goto done;
12277 b = *(p++); high |= (b & 0x7fU) << 3; if (!(b & 0x80)) goto done;
12278 b = *(p++); high |= (b & 0x7fU) << 10; if (!(b & 0x80)) goto done;
12279 b = *(p++); high |= (b & 0x7fU) << 17; if (!(b & 0x80)) goto done;
12280 b = *(p++); high |= (b & 0x7fU) << 24; if (!(b & 0x80)) goto done;
12281 b = *(p++); high |= (b & 0x7fU) << 31; if (!(b & 0x80)) goto done;
12282 return err;
12283
12284done:
12285 r.val = ((uint64_t)high << 32) | low;
12286 r.p = p;
12287 return r;
12288}
12289
12290/* Like the previous, but uses 64-bit values. */
12291upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r) {
12292 const char *p = r.p;
12293 uint64_t val = r.val;
12294 uint64_t b;
12295 upb_decoderet err = {NULL, 0};
12296 b = *(p++); val |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
12297 b = *(p++); val |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
12298 b = *(p++); val |= (b & 0x7fU) << 28; if (!(b & 0x80)) goto done;
12299 b = *(p++); val |= (b & 0x7fU) << 35; if (!(b & 0x80)) goto done;
12300 b = *(p++); val |= (b & 0x7fU) << 42; if (!(b & 0x80)) goto done;
12301 b = *(p++); val |= (b & 0x7fU) << 49; if (!(b & 0x80)) goto done;
12302 b = *(p++); val |= (b & 0x7fU) << 56; if (!(b & 0x80)) goto done;
12303 b = *(p++); val |= (b & 0x7fU) << 63; if (!(b & 0x80)) goto done;
12304 return err;
12305
12306done:
12307 r.val = val;
12308 r.p = p;
12309 return r;
12310}
12311
Brian Silverman9c614bc2016-02-15 20:20:02 -050012312#line 1 "upb/json/parser.rl"
12313/*
12314** upb::json::Parser (upb_json_parser)
12315**
12316** A parser that uses the Ragel State Machine Compiler to generate
12317** the finite automata.
12318**
12319** Ragel only natively handles regular languages, but we can manually
12320** program it a bit to handle context-free languages like JSON, by using
12321** the "fcall" and "fret" constructs.
12322**
12323** This parser can handle the basics, but needs several things to be fleshed
12324** out:
12325**
12326** - handling of unicode escape sequences (including high surrogate pairs).
12327** - properly check and report errors for unknown fields, stack overflow,
12328** improper array nesting (or lack of nesting).
12329** - handling of base64 sequences with padding characters.
12330** - handling of push-back (non-success returns from sink functions).
12331** - handling of keys/escape-sequences/etc that span input buffers.
12332*/
12333
Brian Silverman9c614bc2016-02-15 20:20:02 -050012334#include <errno.h>
Austin Schuh40c16522018-10-28 20:27:54 -070012335#include <float.h>
12336#include <math.h>
12337#include <stdint.h>
12338#include <stdlib.h>
12339#include <string.h>
Brian Silverman9c614bc2016-02-15 20:20:02 -050012340
12341
12342#define UPB_JSON_MAX_DEPTH 64
12343
12344typedef struct {
12345 upb_sink sink;
12346
12347 /* The current message in which we're parsing, and the field whose value we're
12348 * expecting next. */
12349 const upb_msgdef *m;
12350 const upb_fielddef *f;
12351
Austin Schuh40c16522018-10-28 20:27:54 -070012352 /* The table mapping json name to fielddef for this message. */
12353 upb_strtable *name_table;
12354
Brian Silverman9c614bc2016-02-15 20:20:02 -050012355 /* We are in a repeated-field context, ready to emit mapentries as
12356 * submessages. This flag alters the start-of-object (open-brace) behavior to
12357 * begin a sequence of mapentry messages rather than a single submessage. */
12358 bool is_map;
12359
12360 /* We are in a map-entry message context. This flag is set when parsing the
12361 * value field of a single map entry and indicates to all value-field parsers
12362 * (subobjects, strings, numbers, and bools) that the map-entry submessage
12363 * should end as soon as the value is parsed. */
12364 bool is_mapentry;
12365
12366 /* If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
12367 * message's map field that we're currently parsing. This differs from |f|
12368 * because |f| is the field in the *current* message (i.e., the map-entry
12369 * message itself), not the parent's field that leads to this map. */
12370 const upb_fielddef *mapfield;
12371} upb_jsonparser_frame;
12372
12373struct upb_json_parser {
12374 upb_env *env;
Austin Schuh40c16522018-10-28 20:27:54 -070012375 const upb_json_parsermethod *method;
Brian Silverman9c614bc2016-02-15 20:20:02 -050012376 upb_bytessink input_;
12377
12378 /* Stack to track the JSON scopes we are in. */
12379 upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
12380 upb_jsonparser_frame *top;
12381 upb_jsonparser_frame *limit;
12382
12383 upb_status status;
12384
12385 /* Ragel's internal parsing stack for the parsing state machine. */
12386 int current_state;
12387 int parser_stack[UPB_JSON_MAX_DEPTH];
12388 int parser_top;
12389
12390 /* The handle for the current buffer. */
12391 const upb_bufhandle *handle;
12392
12393 /* Accumulate buffer. See details in parser.rl. */
12394 const char *accumulated;
12395 size_t accumulated_len;
12396 char *accumulate_buf;
12397 size_t accumulate_buf_size;
12398
12399 /* Multi-part text data. See details in parser.rl. */
12400 int multipart_state;
12401 upb_selector_t string_selector;
12402
12403 /* Input capture. See details in parser.rl. */
12404 const char *capture;
12405
12406 /* Intermediate result of parsing a unicode escape sequence. */
12407 uint32_t digit;
12408};
12409
Austin Schuh40c16522018-10-28 20:27:54 -070012410struct upb_json_parsermethod {
12411 upb_refcounted base;
12412
12413 upb_byteshandler input_handler_;
12414
12415 /* Mainly for the purposes of refcounting, so all the fielddefs we point
12416 * to stay alive. */
12417 const upb_msgdef *msg;
12418
12419 /* Keys are upb_msgdef*, values are upb_strtable (json_name -> fielddef) */
12420 upb_inttable name_tables;
12421};
12422
Brian Silverman9c614bc2016-02-15 20:20:02 -050012423#define PARSER_CHECK_RETURN(x) if (!(x)) return false
12424
12425/* Used to signal that a capture has been suspended. */
12426static char suspend_capture;
12427
12428static upb_selector_t getsel_for_handlertype(upb_json_parser *p,
12429 upb_handlertype_t type) {
12430 upb_selector_t sel;
12431 bool ok = upb_handlers_getselector(p->top->f, type, &sel);
Austin Schuh40c16522018-10-28 20:27:54 -070012432 UPB_ASSERT(ok);
Brian Silverman9c614bc2016-02-15 20:20:02 -050012433 return sel;
12434}
12435
12436static upb_selector_t parser_getsel(upb_json_parser *p) {
12437 return getsel_for_handlertype(
12438 p, upb_handlers_getprimitivehandlertype(p->top->f));
12439}
12440
12441static bool check_stack(upb_json_parser *p) {
12442 if ((p->top + 1) == p->limit) {
12443 upb_status_seterrmsg(&p->status, "Nesting too deep");
12444 upb_env_reporterror(p->env, &p->status);
12445 return false;
12446 }
12447
12448 return true;
12449}
12450
Austin Schuh40c16522018-10-28 20:27:54 -070012451static void set_name_table(upb_json_parser *p, upb_jsonparser_frame *frame) {
12452 upb_value v;
12453 bool ok = upb_inttable_lookupptr(&p->method->name_tables, frame->m, &v);
12454 UPB_ASSERT(ok);
12455 frame->name_table = upb_value_getptr(v);
12456}
12457
Brian Silverman9c614bc2016-02-15 20:20:02 -050012458/* There are GCC/Clang built-ins for overflow checking which we could start
12459 * using if there was any performance benefit to it. */
12460
12461static bool checked_add(size_t a, size_t b, size_t *c) {
12462 if (SIZE_MAX - a < b) return false;
12463 *c = a + b;
12464 return true;
12465}
12466
12467static size_t saturating_multiply(size_t a, size_t b) {
12468 /* size_t is unsigned, so this is defined behavior even on overflow. */
12469 size_t ret = a * b;
12470 if (b != 0 && ret / b != a) {
12471 ret = SIZE_MAX;
12472 }
12473 return ret;
12474}
12475
12476
12477/* Base64 decoding ************************************************************/
12478
12479/* TODO(haberman): make this streaming. */
12480
12481static const signed char b64table[] = {
12482 -1, -1, -1, -1, -1, -1, -1, -1,
12483 -1, -1, -1, -1, -1, -1, -1, -1,
12484 -1, -1, -1, -1, -1, -1, -1, -1,
12485 -1, -1, -1, -1, -1, -1, -1, -1,
12486 -1, -1, -1, -1, -1, -1, -1, -1,
12487 -1, -1, -1, 62/*+*/, -1, -1, -1, 63/*/ */,
12488 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
12489 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1,
12490 -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/,
12491 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
12492 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
12493 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1,
12494 -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
12495 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
12496 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
12497 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1,
12498 -1, -1, -1, -1, -1, -1, -1, -1,
12499 -1, -1, -1, -1, -1, -1, -1, -1,
12500 -1, -1, -1, -1, -1, -1, -1, -1,
12501 -1, -1, -1, -1, -1, -1, -1, -1,
12502 -1, -1, -1, -1, -1, -1, -1, -1,
12503 -1, -1, -1, -1, -1, -1, -1, -1,
12504 -1, -1, -1, -1, -1, -1, -1, -1,
12505 -1, -1, -1, -1, -1, -1, -1, -1,
12506 -1, -1, -1, -1, -1, -1, -1, -1,
12507 -1, -1, -1, -1, -1, -1, -1, -1,
12508 -1, -1, -1, -1, -1, -1, -1, -1,
12509 -1, -1, -1, -1, -1, -1, -1, -1,
12510 -1, -1, -1, -1, -1, -1, -1, -1,
12511 -1, -1, -1, -1, -1, -1, -1, -1,
12512 -1, -1, -1, -1, -1, -1, -1, -1,
12513 -1, -1, -1, -1, -1, -1, -1, -1
12514};
12515
12516/* Returns the table value sign-extended to 32 bits. Knowing that the upper
12517 * bits will be 1 for unrecognized characters makes it easier to check for
12518 * this error condition later (see below). */
12519int32_t b64lookup(unsigned char ch) { return b64table[ch]; }
12520
12521/* Returns true if the given character is not a valid base64 character or
12522 * padding. */
12523bool nonbase64(unsigned char ch) { return b64lookup(ch) == -1 && ch != '='; }
12524
12525static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr,
12526 size_t len) {
12527 const char *limit = ptr + len;
12528 for (; ptr < limit; ptr += 4) {
12529 uint32_t val;
12530 char output[3];
12531
12532 if (limit - ptr < 4) {
12533 upb_status_seterrf(&p->status,
12534 "Base64 input for bytes field not a multiple of 4: %s",
12535 upb_fielddef_name(p->top->f));
12536 upb_env_reporterror(p->env, &p->status);
12537 return false;
12538 }
12539
12540 val = b64lookup(ptr[0]) << 18 |
12541 b64lookup(ptr[1]) << 12 |
12542 b64lookup(ptr[2]) << 6 |
12543 b64lookup(ptr[3]);
12544
12545 /* Test the upper bit; returns true if any of the characters returned -1. */
12546 if (val & 0x80000000) {
12547 goto otherchar;
12548 }
12549
12550 output[0] = val >> 16;
12551 output[1] = (val >> 8) & 0xff;
12552 output[2] = val & 0xff;
12553 upb_sink_putstring(&p->top->sink, sel, output, 3, NULL);
12554 }
12555 return true;
12556
12557otherchar:
12558 if (nonbase64(ptr[0]) || nonbase64(ptr[1]) || nonbase64(ptr[2]) ||
12559 nonbase64(ptr[3]) ) {
12560 upb_status_seterrf(&p->status,
12561 "Non-base64 characters in bytes field: %s",
12562 upb_fielddef_name(p->top->f));
12563 upb_env_reporterror(p->env, &p->status);
12564 return false;
12565 } if (ptr[2] == '=') {
12566 uint32_t val;
12567 char output;
12568
12569 /* Last group contains only two input bytes, one output byte. */
12570 if (ptr[0] == '=' || ptr[1] == '=' || ptr[3] != '=') {
12571 goto badpadding;
12572 }
12573
12574 val = b64lookup(ptr[0]) << 18 |
12575 b64lookup(ptr[1]) << 12;
12576
Austin Schuh40c16522018-10-28 20:27:54 -070012577 UPB_ASSERT(!(val & 0x80000000));
Brian Silverman9c614bc2016-02-15 20:20:02 -050012578 output = val >> 16;
12579 upb_sink_putstring(&p->top->sink, sel, &output, 1, NULL);
12580 return true;
12581 } else {
12582 uint32_t val;
12583 char output[2];
12584
12585 /* Last group contains only three input bytes, two output bytes. */
12586 if (ptr[0] == '=' || ptr[1] == '=' || ptr[2] == '=') {
12587 goto badpadding;
12588 }
12589
12590 val = b64lookup(ptr[0]) << 18 |
12591 b64lookup(ptr[1]) << 12 |
12592 b64lookup(ptr[2]) << 6;
12593
12594 output[0] = val >> 16;
12595 output[1] = (val >> 8) & 0xff;
12596 upb_sink_putstring(&p->top->sink, sel, output, 2, NULL);
12597 return true;
12598 }
12599
12600badpadding:
12601 upb_status_seterrf(&p->status,
12602 "Incorrect base64 padding for field: %s (%.*s)",
12603 upb_fielddef_name(p->top->f),
12604 4, ptr);
12605 upb_env_reporterror(p->env, &p->status);
12606 return false;
12607}
12608
12609
12610/* Accumulate buffer **********************************************************/
12611
12612/* Functionality for accumulating a buffer.
12613 *
12614 * Some parts of the parser need an entire value as a contiguous string. For
12615 * example, to look up a member name in a hash table, or to turn a string into
12616 * a number, the relevant library routines need the input string to be in
12617 * contiguous memory, even if the value spanned two or more buffers in the
12618 * input. These routines handle that.
12619 *
12620 * In the common case we can just point to the input buffer to get this
12621 * contiguous string and avoid any actual copy. So we optimistically begin
12622 * this way. But there are a few cases where we must instead copy into a
12623 * separate buffer:
12624 *
12625 * 1. The string was not contiguous in the input (it spanned buffers).
12626 *
12627 * 2. The string included escape sequences that need to be interpreted to get
12628 * the true value in a contiguous buffer. */
12629
12630static void assert_accumulate_empty(upb_json_parser *p) {
Austin Schuh40c16522018-10-28 20:27:54 -070012631 UPB_ASSERT(p->accumulated == NULL);
12632 UPB_ASSERT(p->accumulated_len == 0);
Brian Silverman9c614bc2016-02-15 20:20:02 -050012633}
12634
12635static void accumulate_clear(upb_json_parser *p) {
12636 p->accumulated = NULL;
12637 p->accumulated_len = 0;
12638}
12639
12640/* Used internally by accumulate_append(). */
12641static bool accumulate_realloc(upb_json_parser *p, size_t need) {
12642 void *mem;
12643 size_t old_size = p->accumulate_buf_size;
12644 size_t new_size = UPB_MAX(old_size, 128);
12645 while (new_size < need) {
12646 new_size = saturating_multiply(new_size, 2);
12647 }
12648
12649 mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size);
12650 if (!mem) {
12651 upb_status_seterrmsg(&p->status, "Out of memory allocating buffer.");
12652 upb_env_reporterror(p->env, &p->status);
12653 return false;
12654 }
12655
12656 p->accumulate_buf = mem;
12657 p->accumulate_buf_size = new_size;
12658 return true;
12659}
12660
12661/* Logically appends the given data to the append buffer.
12662 * If "can_alias" is true, we will try to avoid actually copying, but the buffer
12663 * must be valid until the next accumulate_append() call (if any). */
12664static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
12665 bool can_alias) {
12666 size_t need;
12667
12668 if (!p->accumulated && can_alias) {
12669 p->accumulated = buf;
12670 p->accumulated_len = len;
12671 return true;
12672 }
12673
12674 if (!checked_add(p->accumulated_len, len, &need)) {
12675 upb_status_seterrmsg(&p->status, "Integer overflow.");
12676 upb_env_reporterror(p->env, &p->status);
12677 return false;
12678 }
12679
12680 if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
12681 return false;
12682 }
12683
12684 if (p->accumulated != p->accumulate_buf) {
12685 memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len);
12686 p->accumulated = p->accumulate_buf;
12687 }
12688
12689 memcpy(p->accumulate_buf + p->accumulated_len, buf, len);
12690 p->accumulated_len += len;
12691 return true;
12692}
12693
12694/* Returns a pointer to the data accumulated since the last accumulate_clear()
12695 * call, and writes the length to *len. This with point either to the input
12696 * buffer or a temporary accumulate buffer. */
12697static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
Austin Schuh40c16522018-10-28 20:27:54 -070012698 UPB_ASSERT(p->accumulated);
Brian Silverman9c614bc2016-02-15 20:20:02 -050012699 *len = p->accumulated_len;
12700 return p->accumulated;
12701}
12702
12703
12704/* Mult-part text data ********************************************************/
12705
12706/* When we have text data in the input, it can often come in multiple segments.
12707 * For example, there may be some raw string data followed by an escape
12708 * sequence. The two segments are processed with different logic. Also buffer
12709 * seams in the input can cause multiple segments.
12710 *
12711 * As we see segments, there are two main cases for how we want to process them:
12712 *
12713 * 1. we want to push the captured input directly to string handlers.
12714 *
12715 * 2. we need to accumulate all the parts into a contiguous buffer for further
12716 * processing (field name lookup, string->number conversion, etc). */
12717
12718/* This is the set of states for p->multipart_state. */
12719enum {
12720 /* We are not currently processing multipart data. */
12721 MULTIPART_INACTIVE = 0,
12722
12723 /* We are processing multipart data by accumulating it into a contiguous
12724 * buffer. */
12725 MULTIPART_ACCUMULATE = 1,
12726
12727 /* We are processing multipart data by pushing each part directly to the
12728 * current string handlers. */
12729 MULTIPART_PUSHEAGERLY = 2
12730};
12731
12732/* Start a multi-part text value where we accumulate the data for processing at
12733 * the end. */
12734static void multipart_startaccum(upb_json_parser *p) {
12735 assert_accumulate_empty(p);
Austin Schuh40c16522018-10-28 20:27:54 -070012736 UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE);
Brian Silverman9c614bc2016-02-15 20:20:02 -050012737 p->multipart_state = MULTIPART_ACCUMULATE;
12738}
12739
12740/* Start a multi-part text value where we immediately push text data to a string
12741 * value with the given selector. */
12742static void multipart_start(upb_json_parser *p, upb_selector_t sel) {
12743 assert_accumulate_empty(p);
Austin Schuh40c16522018-10-28 20:27:54 -070012744 UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE);
Brian Silverman9c614bc2016-02-15 20:20:02 -050012745 p->multipart_state = MULTIPART_PUSHEAGERLY;
12746 p->string_selector = sel;
12747}
12748
12749static bool multipart_text(upb_json_parser *p, const char *buf, size_t len,
12750 bool can_alias) {
12751 switch (p->multipart_state) {
12752 case MULTIPART_INACTIVE:
12753 upb_status_seterrmsg(
12754 &p->status, "Internal error: unexpected state MULTIPART_INACTIVE");
12755 upb_env_reporterror(p->env, &p->status);
12756 return false;
12757
12758 case MULTIPART_ACCUMULATE:
12759 if (!accumulate_append(p, buf, len, can_alias)) {
12760 return false;
12761 }
12762 break;
12763
12764 case MULTIPART_PUSHEAGERLY: {
12765 const upb_bufhandle *handle = can_alias ? p->handle : NULL;
12766 upb_sink_putstring(&p->top->sink, p->string_selector, buf, len, handle);
12767 break;
12768 }
12769 }
12770
12771 return true;
12772}
12773
12774/* Note: this invalidates the accumulate buffer! Call only after reading its
12775 * contents. */
12776static void multipart_end(upb_json_parser *p) {
Austin Schuh40c16522018-10-28 20:27:54 -070012777 UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE);
Brian Silverman9c614bc2016-02-15 20:20:02 -050012778 p->multipart_state = MULTIPART_INACTIVE;
12779 accumulate_clear(p);
12780}
12781
12782
12783/* Input capture **************************************************************/
12784
12785/* Functionality for capturing a region of the input as text. Gracefully
12786 * handles the case where a buffer seam occurs in the middle of the captured
12787 * region. */
12788
12789static void capture_begin(upb_json_parser *p, const char *ptr) {
Austin Schuh40c16522018-10-28 20:27:54 -070012790 UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE);
12791 UPB_ASSERT(p->capture == NULL);
Brian Silverman9c614bc2016-02-15 20:20:02 -050012792 p->capture = ptr;
12793}
12794
12795static bool capture_end(upb_json_parser *p, const char *ptr) {
Austin Schuh40c16522018-10-28 20:27:54 -070012796 UPB_ASSERT(p->capture);
Brian Silverman9c614bc2016-02-15 20:20:02 -050012797 if (multipart_text(p, p->capture, ptr - p->capture, true)) {
12798 p->capture = NULL;
12799 return true;
12800 } else {
12801 return false;
12802 }
12803}
12804
12805/* This is called at the end of each input buffer (ie. when we have hit a
12806 * buffer seam). If we are in the middle of capturing the input, this
12807 * processes the unprocessed capture region. */
12808static void capture_suspend(upb_json_parser *p, const char **ptr) {
12809 if (!p->capture) return;
12810
12811 if (multipart_text(p, p->capture, *ptr - p->capture, false)) {
12812 /* We use this as a signal that we were in the middle of capturing, and
12813 * that capturing should resume at the beginning of the next buffer.
12814 *
12815 * We can't use *ptr here, because we have no guarantee that this pointer
12816 * will be valid when we resume (if the underlying memory is freed, then
12817 * using the pointer at all, even to compare to NULL, is likely undefined
12818 * behavior). */
12819 p->capture = &suspend_capture;
12820 } else {
12821 /* Need to back up the pointer to the beginning of the capture, since
12822 * we were not able to actually preserve it. */
12823 *ptr = p->capture;
12824 }
12825}
12826
12827static void capture_resume(upb_json_parser *p, const char *ptr) {
12828 if (p->capture) {
Austin Schuh40c16522018-10-28 20:27:54 -070012829 UPB_ASSERT(p->capture == &suspend_capture);
Brian Silverman9c614bc2016-02-15 20:20:02 -050012830 p->capture = ptr;
12831 }
12832}
12833
12834
12835/* Callbacks from the parser **************************************************/
12836
12837/* These are the functions called directly from the parser itself.
12838 * We define these in the same order as their declarations in the parser. */
12839
12840static char escape_char(char in) {
12841 switch (in) {
12842 case 'r': return '\r';
12843 case 't': return '\t';
12844 case 'n': return '\n';
12845 case 'f': return '\f';
12846 case 'b': return '\b';
12847 case '/': return '/';
12848 case '"': return '"';
12849 case '\\': return '\\';
12850 default:
Austin Schuh40c16522018-10-28 20:27:54 -070012851 UPB_ASSERT(0);
Brian Silverman9c614bc2016-02-15 20:20:02 -050012852 return 'x';
12853 }
12854}
12855
12856static bool escape(upb_json_parser *p, const char *ptr) {
12857 char ch = escape_char(*ptr);
12858 return multipart_text(p, &ch, 1, false);
12859}
12860
12861static void start_hex(upb_json_parser *p) {
12862 p->digit = 0;
12863}
12864
12865static void hexdigit(upb_json_parser *p, const char *ptr) {
12866 char ch = *ptr;
12867
12868 p->digit <<= 4;
12869
12870 if (ch >= '0' && ch <= '9') {
12871 p->digit += (ch - '0');
12872 } else if (ch >= 'a' && ch <= 'f') {
12873 p->digit += ((ch - 'a') + 10);
12874 } else {
Austin Schuh40c16522018-10-28 20:27:54 -070012875 UPB_ASSERT(ch >= 'A' && ch <= 'F');
Brian Silverman9c614bc2016-02-15 20:20:02 -050012876 p->digit += ((ch - 'A') + 10);
12877 }
12878}
12879
12880static bool end_hex(upb_json_parser *p) {
12881 uint32_t codepoint = p->digit;
12882
12883 /* emit the codepoint as UTF-8. */
12884 char utf8[3]; /* support \u0000 -- \uFFFF -- need only three bytes. */
12885 int length = 0;
12886 if (codepoint <= 0x7F) {
12887 utf8[0] = codepoint;
12888 length = 1;
12889 } else if (codepoint <= 0x07FF) {
12890 utf8[1] = (codepoint & 0x3F) | 0x80;
12891 codepoint >>= 6;
12892 utf8[0] = (codepoint & 0x1F) | 0xC0;
12893 length = 2;
12894 } else /* codepoint <= 0xFFFF */ {
12895 utf8[2] = (codepoint & 0x3F) | 0x80;
12896 codepoint >>= 6;
12897 utf8[1] = (codepoint & 0x3F) | 0x80;
12898 codepoint >>= 6;
12899 utf8[0] = (codepoint & 0x0F) | 0xE0;
12900 length = 3;
12901 }
12902 /* TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
12903 * we have to wait for the next escape to get the full code point). */
12904
12905 return multipart_text(p, utf8, length, false);
12906}
12907
12908static void start_text(upb_json_parser *p, const char *ptr) {
12909 capture_begin(p, ptr);
12910}
12911
12912static bool end_text(upb_json_parser *p, const char *ptr) {
12913 return capture_end(p, ptr);
12914}
12915
12916static void start_number(upb_json_parser *p, const char *ptr) {
12917 multipart_startaccum(p);
12918 capture_begin(p, ptr);
12919}
12920
Austin Schuh40c16522018-10-28 20:27:54 -070012921static bool parse_number(upb_json_parser *p, bool is_quoted);
Brian Silverman9c614bc2016-02-15 20:20:02 -050012922
12923static bool end_number(upb_json_parser *p, const char *ptr) {
12924 if (!capture_end(p, ptr)) {
12925 return false;
12926 }
12927
Austin Schuh40c16522018-10-28 20:27:54 -070012928 return parse_number(p, false);
Brian Silverman9c614bc2016-02-15 20:20:02 -050012929}
12930
Austin Schuh40c16522018-10-28 20:27:54 -070012931/* |buf| is NULL-terminated. |buf| itself will never include quotes;
12932 * |is_quoted| tells us whether this text originally appeared inside quotes. */
12933static bool parse_number_from_buffer(upb_json_parser *p, const char *buf,
12934 bool is_quoted) {
12935 size_t len = strlen(buf);
12936 const char *bufend = buf + len;
12937 char *end;
12938 upb_fieldtype_t type = upb_fielddef_type(p->top->f);
12939 double val;
12940 double dummy;
12941 double inf = 1.0 / 0.0; /* C89 does not have an INFINITY macro. */
12942
12943 errno = 0;
12944
12945 if (len == 0 || buf[0] == ' ') {
12946 return false;
12947 }
12948
12949 /* For integer types, first try parsing with integer-specific routines.
12950 * If these succeed, they will be more accurate for int64/uint64 than
12951 * strtod().
12952 */
12953 switch (type) {
12954 case UPB_TYPE_ENUM:
12955 case UPB_TYPE_INT32: {
12956 long val = strtol(buf, &end, 0);
12957 if (errno == ERANGE || end != bufend) {
12958 break;
12959 } else if (val > INT32_MAX || val < INT32_MIN) {
12960 return false;
12961 } else {
12962 upb_sink_putint32(&p->top->sink, parser_getsel(p), val);
12963 return true;
12964 }
12965 }
12966 case UPB_TYPE_UINT32: {
12967 unsigned long val = strtoul(buf, &end, 0);
12968 if (end != bufend) {
12969 break;
12970 } else if (val > UINT32_MAX || errno == ERANGE) {
12971 return false;
12972 } else {
12973 upb_sink_putuint32(&p->top->sink, parser_getsel(p), val);
12974 return true;
12975 }
12976 }
12977 /* XXX: We can't handle [u]int64 properly on 32-bit machines because
12978 * strto[u]ll isn't in C89. */
12979 case UPB_TYPE_INT64: {
12980 long val = strtol(buf, &end, 0);
12981 if (errno == ERANGE || end != bufend) {
12982 break;
12983 } else {
12984 upb_sink_putint64(&p->top->sink, parser_getsel(p), val);
12985 return true;
12986 }
12987 }
12988 case UPB_TYPE_UINT64: {
12989 unsigned long val = strtoul(p->accumulated, &end, 0);
12990 if (end != bufend) {
12991 break;
12992 } else if (errno == ERANGE) {
12993 return false;
12994 } else {
12995 upb_sink_putuint64(&p->top->sink, parser_getsel(p), val);
12996 return true;
12997 }
12998 }
12999 default:
13000 break;
13001 }
13002
13003 if (type != UPB_TYPE_DOUBLE && type != UPB_TYPE_FLOAT && is_quoted) {
13004 /* Quoted numbers for integer types are not allowed to be in double form. */
13005 return false;
13006 }
13007
13008 if (len == strlen("Infinity") && strcmp(buf, "Infinity") == 0) {
13009 /* C89 does not have an INFINITY macro. */
13010 val = inf;
13011 } else if (len == strlen("-Infinity") && strcmp(buf, "-Infinity") == 0) {
13012 val = -inf;
13013 } else {
13014 val = strtod(buf, &end);
13015 if (errno == ERANGE || end != bufend) {
13016 return false;
13017 }
13018 }
13019
13020 switch (type) {
13021#define CASE(capitaltype, smalltype, ctype, min, max) \
13022 case UPB_TYPE_ ## capitaltype: { \
13023 if (modf(val, &dummy) != 0 || val > max || val < min) { \
13024 return false; \
13025 } else { \
13026 upb_sink_put ## smalltype(&p->top->sink, parser_getsel(p), \
13027 (ctype)val); \
13028 return true; \
13029 } \
13030 break; \
13031 }
13032 case UPB_TYPE_ENUM:
13033 CASE(INT32, int32, int32_t, INT32_MIN, INT32_MAX);
13034 CASE(INT64, int64, int64_t, INT64_MIN, INT64_MAX);
13035 CASE(UINT32, uint32, uint32_t, 0, UINT32_MAX);
13036 CASE(UINT64, uint64, uint64_t, 0, UINT64_MAX);
13037#undef CASE
13038
13039 case UPB_TYPE_DOUBLE:
13040 upb_sink_putdouble(&p->top->sink, parser_getsel(p), val);
13041 return true;
13042 case UPB_TYPE_FLOAT:
13043 if ((val > FLT_MAX || val < -FLT_MAX) && val != inf && val != -inf) {
13044 return false;
13045 } else {
13046 upb_sink_putfloat(&p->top->sink, parser_getsel(p), val);
13047 return true;
13048 }
13049 default:
13050 return false;
13051 }
13052}
13053
13054static bool parse_number(upb_json_parser *p, bool is_quoted) {
Brian Silverman9c614bc2016-02-15 20:20:02 -050013055 size_t len;
13056 const char *buf;
Brian Silverman9c614bc2016-02-15 20:20:02 -050013057
13058 /* strtol() and friends unfortunately do not support specifying the length of
13059 * the input string, so we need to force a copy into a NULL-terminated buffer. */
13060 if (!multipart_text(p, "\0", 1, false)) {
13061 return false;
13062 }
13063
13064 buf = accumulate_getptr(p, &len);
Brian Silverman9c614bc2016-02-15 20:20:02 -050013065
Austin Schuh40c16522018-10-28 20:27:54 -070013066 if (parse_number_from_buffer(p, buf, is_quoted)) {
13067 multipart_end(p);
13068 return true;
13069 } else {
13070 upb_status_seterrf(&p->status, "error parsing number: %s", buf);
13071 upb_env_reporterror(p->env, &p->status);
13072 multipart_end(p);
13073 return false;
Brian Silverman9c614bc2016-02-15 20:20:02 -050013074 }
Brian Silverman9c614bc2016-02-15 20:20:02 -050013075}
13076
13077static bool parser_putbool(upb_json_parser *p, bool val) {
13078 bool ok;
13079
13080 if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
13081 upb_status_seterrf(&p->status,
13082 "Boolean value specified for non-bool field: %s",
13083 upb_fielddef_name(p->top->f));
13084 upb_env_reporterror(p->env, &p->status);
13085 return false;
13086 }
13087
13088 ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
Austin Schuh40c16522018-10-28 20:27:54 -070013089 UPB_ASSERT(ok);
Brian Silverman9c614bc2016-02-15 20:20:02 -050013090
13091 return true;
13092}
13093
13094static bool start_stringval(upb_json_parser *p) {
Austin Schuh40c16522018-10-28 20:27:54 -070013095 UPB_ASSERT(p->top->f);
Brian Silverman9c614bc2016-02-15 20:20:02 -050013096
13097 if (upb_fielddef_isstring(p->top->f)) {
13098 upb_jsonparser_frame *inner;
13099 upb_selector_t sel;
13100
13101 if (!check_stack(p)) return false;
13102
13103 /* Start a new parser frame: parser frames correspond one-to-one with
13104 * handler frames, and string events occur in a sub-frame. */
13105 inner = p->top + 1;
13106 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
13107 upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
13108 inner->m = p->top->m;
13109 inner->f = p->top->f;
Austin Schuh40c16522018-10-28 20:27:54 -070013110 inner->name_table = NULL;
Brian Silverman9c614bc2016-02-15 20:20:02 -050013111 inner->is_map = false;
13112 inner->is_mapentry = false;
13113 p->top = inner;
13114
13115 if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
13116 /* For STRING fields we push data directly to the handlers as it is
13117 * parsed. We don't do this yet for BYTES fields, because our base64
13118 * decoder is not streaming.
13119 *
13120 * TODO(haberman): make base64 decoding streaming also. */
13121 multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING));
13122 return true;
13123 } else {
13124 multipart_startaccum(p);
13125 return true;
13126 }
Austin Schuh40c16522018-10-28 20:27:54 -070013127 } else if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL &&
13128 upb_fielddef_type(p->top->f) != UPB_TYPE_MESSAGE) {
13129 /* No need to push a frame -- numeric values in quotes remain in the
13130 * current parser frame. These values must accmulate so we can convert
13131 * them all at once at the end. */
Brian Silverman9c614bc2016-02-15 20:20:02 -050013132 multipart_startaccum(p);
13133 return true;
13134 } else {
13135 upb_status_seterrf(&p->status,
Austin Schuh40c16522018-10-28 20:27:54 -070013136 "String specified for bool or submessage field: %s",
Brian Silverman9c614bc2016-02-15 20:20:02 -050013137 upb_fielddef_name(p->top->f));
13138 upb_env_reporterror(p->env, &p->status);
13139 return false;
13140 }
13141}
13142
13143static bool end_stringval(upb_json_parser *p) {
13144 bool ok = true;
13145
13146 switch (upb_fielddef_type(p->top->f)) {
13147 case UPB_TYPE_BYTES:
13148 if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
13149 p->accumulated, p->accumulated_len)) {
13150 return false;
13151 }
13152 /* Fall through. */
13153
13154 case UPB_TYPE_STRING: {
13155 upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
Brian Silverman9c614bc2016-02-15 20:20:02 -050013156 p->top--;
Austin Schuh40c16522018-10-28 20:27:54 -070013157 upb_sink_endstr(&p->top->sink, sel);
Brian Silverman9c614bc2016-02-15 20:20:02 -050013158 break;
13159 }
13160
13161 case UPB_TYPE_ENUM: {
13162 /* Resolve enum symbolic name to integer value. */
13163 const upb_enumdef *enumdef =
13164 (const upb_enumdef*)upb_fielddef_subdef(p->top->f);
13165
13166 size_t len;
13167 const char *buf = accumulate_getptr(p, &len);
13168
13169 int32_t int_val = 0;
13170 ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val);
13171
13172 if (ok) {
13173 upb_selector_t sel = parser_getsel(p);
13174 upb_sink_putint32(&p->top->sink, sel, int_val);
13175 } else {
13176 upb_status_seterrf(&p->status, "Enum value unknown: '%.*s'", len, buf);
13177 upb_env_reporterror(p->env, &p->status);
13178 }
13179
13180 break;
13181 }
13182
Austin Schuh40c16522018-10-28 20:27:54 -070013183 case UPB_TYPE_INT32:
13184 case UPB_TYPE_INT64:
13185 case UPB_TYPE_UINT32:
13186 case UPB_TYPE_UINT64:
13187 case UPB_TYPE_DOUBLE:
13188 case UPB_TYPE_FLOAT:
13189 ok = parse_number(p, true);
13190 break;
13191
Brian Silverman9c614bc2016-02-15 20:20:02 -050013192 default:
Austin Schuh40c16522018-10-28 20:27:54 -070013193 UPB_ASSERT(false);
Brian Silverman9c614bc2016-02-15 20:20:02 -050013194 upb_status_seterrmsg(&p->status, "Internal error in JSON decoder");
13195 upb_env_reporterror(p->env, &p->status);
13196 ok = false;
13197 break;
13198 }
13199
13200 multipart_end(p);
13201
13202 return ok;
13203}
13204
13205static void start_member(upb_json_parser *p) {
Austin Schuh40c16522018-10-28 20:27:54 -070013206 UPB_ASSERT(!p->top->f);
Brian Silverman9c614bc2016-02-15 20:20:02 -050013207 multipart_startaccum(p);
13208}
13209
13210/* Helper: invoked during parse_mapentry() to emit the mapentry message's key
13211 * field based on the current contents of the accumulate buffer. */
13212static bool parse_mapentry_key(upb_json_parser *p) {
13213
13214 size_t len;
13215 const char *buf = accumulate_getptr(p, &len);
13216
13217 /* Emit the key field. We do a bit of ad-hoc parsing here because the
13218 * parser state machine has already decided that this is a string field
13219 * name, and we are reinterpreting it as some arbitrary key type. In
13220 * particular, integer and bool keys are quoted, so we need to parse the
13221 * quoted string contents here. */
13222
13223 p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY);
13224 if (p->top->f == NULL) {
13225 upb_status_seterrmsg(&p->status, "mapentry message has no key");
13226 upb_env_reporterror(p->env, &p->status);
13227 return false;
13228 }
13229 switch (upb_fielddef_type(p->top->f)) {
13230 case UPB_TYPE_INT32:
13231 case UPB_TYPE_INT64:
13232 case UPB_TYPE_UINT32:
13233 case UPB_TYPE_UINT64:
13234 /* Invoke end_number. The accum buffer has the number's text already. */
Austin Schuh40c16522018-10-28 20:27:54 -070013235 if (!parse_number(p, true)) {
Brian Silverman9c614bc2016-02-15 20:20:02 -050013236 return false;
13237 }
13238 break;
13239 case UPB_TYPE_BOOL:
13240 if (len == 4 && !strncmp(buf, "true", 4)) {
13241 if (!parser_putbool(p, true)) {
13242 return false;
13243 }
13244 } else if (len == 5 && !strncmp(buf, "false", 5)) {
13245 if (!parser_putbool(p, false)) {
13246 return false;
13247 }
13248 } else {
13249 upb_status_seterrmsg(&p->status,
13250 "Map bool key not 'true' or 'false'");
13251 upb_env_reporterror(p->env, &p->status);
13252 return false;
13253 }
13254 multipart_end(p);
13255 break;
13256 case UPB_TYPE_STRING:
13257 case UPB_TYPE_BYTES: {
13258 upb_sink subsink;
13259 upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
13260 upb_sink_startstr(&p->top->sink, sel, len, &subsink);
13261 sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
13262 upb_sink_putstring(&subsink, sel, buf, len, NULL);
13263 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
Austin Schuh40c16522018-10-28 20:27:54 -070013264 upb_sink_endstr(&p->top->sink, sel);
Brian Silverman9c614bc2016-02-15 20:20:02 -050013265 multipart_end(p);
13266 break;
13267 }
13268 default:
13269 upb_status_seterrmsg(&p->status, "Invalid field type for map key");
13270 upb_env_reporterror(p->env, &p->status);
13271 return false;
13272 }
13273
13274 return true;
13275}
13276
13277/* Helper: emit one map entry (as a submessage in the map field sequence). This
13278 * is invoked from end_membername(), at the end of the map entry's key string,
13279 * with the map key in the accumulate buffer. It parses the key from that
13280 * buffer, emits the handler calls to start the mapentry submessage (setting up
13281 * its subframe in the process), and sets up state in the subframe so that the
13282 * value parser (invoked next) will emit the mapentry's value field and then
13283 * end the mapentry message. */
13284
13285static bool handle_mapentry(upb_json_parser *p) {
13286 const upb_fielddef *mapfield;
13287 const upb_msgdef *mapentrymsg;
13288 upb_jsonparser_frame *inner;
13289 upb_selector_t sel;
13290
13291 /* Map entry: p->top->sink is the seq frame, so we need to start a frame
13292 * for the mapentry itself, and then set |f| in that frame so that the map
13293 * value field is parsed, and also set a flag to end the frame after the
13294 * map-entry value is parsed. */
13295 if (!check_stack(p)) return false;
13296
13297 mapfield = p->top->mapfield;
13298 mapentrymsg = upb_fielddef_msgsubdef(mapfield);
13299
13300 inner = p->top + 1;
13301 p->top->f = mapfield;
13302 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
13303 upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
13304 inner->m = mapentrymsg;
Austin Schuh40c16522018-10-28 20:27:54 -070013305 inner->name_table = NULL;
Brian Silverman9c614bc2016-02-15 20:20:02 -050013306 inner->mapfield = mapfield;
13307 inner->is_map = false;
13308
13309 /* Don't set this to true *yet* -- we reuse parsing handlers below to push
13310 * the key field value to the sink, and these handlers will pop the frame
13311 * if they see is_mapentry (when invoked by the parser state machine, they
13312 * would have just seen the map-entry value, not key). */
13313 inner->is_mapentry = false;
13314 p->top = inner;
13315
13316 /* send STARTMSG in submsg frame. */
13317 upb_sink_startmsg(&p->top->sink);
13318
13319 parse_mapentry_key(p);
13320
13321 /* Set up the value field to receive the map-entry value. */
13322 p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE);
13323 p->top->is_mapentry = true; /* set up to pop frame after value is parsed. */
13324 p->top->mapfield = mapfield;
13325 if (p->top->f == NULL) {
13326 upb_status_seterrmsg(&p->status, "mapentry message has no value");
13327 upb_env_reporterror(p->env, &p->status);
13328 return false;
13329 }
13330
13331 return true;
13332}
13333
13334static bool end_membername(upb_json_parser *p) {
Austin Schuh40c16522018-10-28 20:27:54 -070013335 UPB_ASSERT(!p->top->f);
Brian Silverman9c614bc2016-02-15 20:20:02 -050013336
13337 if (p->top->is_map) {
13338 return handle_mapentry(p);
13339 } else {
13340 size_t len;
13341 const char *buf = accumulate_getptr(p, &len);
Austin Schuh40c16522018-10-28 20:27:54 -070013342 upb_value v;
Brian Silverman9c614bc2016-02-15 20:20:02 -050013343
Austin Schuh40c16522018-10-28 20:27:54 -070013344 if (upb_strtable_lookup2(p->top->name_table, buf, len, &v)) {
13345 p->top->f = upb_value_getconstptr(v);
13346 multipart_end(p);
13347
13348 return true;
13349 } else {
Brian Silverman9c614bc2016-02-15 20:20:02 -050013350 /* TODO(haberman): Ignore unknown fields if requested/configured to do
13351 * so. */
13352 upb_status_seterrf(&p->status, "No such field: %.*s\n", (int)len, buf);
13353 upb_env_reporterror(p->env, &p->status);
13354 return false;
13355 }
Brian Silverman9c614bc2016-02-15 20:20:02 -050013356 }
13357}
13358
13359static void end_member(upb_json_parser *p) {
13360 /* If we just parsed a map-entry value, end that frame too. */
13361 if (p->top->is_mapentry) {
13362 upb_status s = UPB_STATUS_INIT;
13363 upb_selector_t sel;
13364 bool ok;
13365 const upb_fielddef *mapfield;
13366
Austin Schuh40c16522018-10-28 20:27:54 -070013367 UPB_ASSERT(p->top > p->stack);
Brian Silverman9c614bc2016-02-15 20:20:02 -050013368 /* send ENDMSG on submsg. */
13369 upb_sink_endmsg(&p->top->sink, &s);
13370 mapfield = p->top->mapfield;
13371
13372 /* send ENDSUBMSG in repeated-field-of-mapentries frame. */
13373 p->top--;
13374 ok = upb_handlers_getselector(mapfield, UPB_HANDLER_ENDSUBMSG, &sel);
Austin Schuh40c16522018-10-28 20:27:54 -070013375 UPB_ASSERT(ok);
Brian Silverman9c614bc2016-02-15 20:20:02 -050013376 upb_sink_endsubmsg(&p->top->sink, sel);
13377 }
13378
13379 p->top->f = NULL;
13380}
13381
13382static bool start_subobject(upb_json_parser *p) {
Austin Schuh40c16522018-10-28 20:27:54 -070013383 UPB_ASSERT(p->top->f);
Brian Silverman9c614bc2016-02-15 20:20:02 -050013384
13385 if (upb_fielddef_ismap(p->top->f)) {
13386 upb_jsonparser_frame *inner;
13387 upb_selector_t sel;
13388
13389 /* Beginning of a map. Start a new parser frame in a repeated-field
13390 * context. */
13391 if (!check_stack(p)) return false;
13392
13393 inner = p->top + 1;
13394 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
13395 upb_sink_startseq(&p->top->sink, sel, &inner->sink);
13396 inner->m = upb_fielddef_msgsubdef(p->top->f);
Austin Schuh40c16522018-10-28 20:27:54 -070013397 inner->name_table = NULL;
Brian Silverman9c614bc2016-02-15 20:20:02 -050013398 inner->mapfield = p->top->f;
13399 inner->f = NULL;
13400 inner->is_map = true;
13401 inner->is_mapentry = false;
13402 p->top = inner;
13403
13404 return true;
13405 } else if (upb_fielddef_issubmsg(p->top->f)) {
13406 upb_jsonparser_frame *inner;
13407 upb_selector_t sel;
13408
13409 /* Beginning of a subobject. Start a new parser frame in the submsg
13410 * context. */
13411 if (!check_stack(p)) return false;
13412
13413 inner = p->top + 1;
13414
13415 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
13416 upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
13417 inner->m = upb_fielddef_msgsubdef(p->top->f);
Austin Schuh40c16522018-10-28 20:27:54 -070013418 set_name_table(p, inner);
Brian Silverman9c614bc2016-02-15 20:20:02 -050013419 inner->f = NULL;
13420 inner->is_map = false;
13421 inner->is_mapentry = false;
13422 p->top = inner;
13423
13424 return true;
13425 } else {
13426 upb_status_seterrf(&p->status,
13427 "Object specified for non-message/group field: %s",
13428 upb_fielddef_name(p->top->f));
13429 upb_env_reporterror(p->env, &p->status);
13430 return false;
13431 }
13432}
13433
13434static void end_subobject(upb_json_parser *p) {
13435 if (p->top->is_map) {
13436 upb_selector_t sel;
13437 p->top--;
13438 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
13439 upb_sink_endseq(&p->top->sink, sel);
13440 } else {
13441 upb_selector_t sel;
13442 p->top--;
13443 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
13444 upb_sink_endsubmsg(&p->top->sink, sel);
13445 }
13446}
13447
13448static bool start_array(upb_json_parser *p) {
13449 upb_jsonparser_frame *inner;
13450 upb_selector_t sel;
13451
Austin Schuh40c16522018-10-28 20:27:54 -070013452 UPB_ASSERT(p->top->f);
Brian Silverman9c614bc2016-02-15 20:20:02 -050013453
13454 if (!upb_fielddef_isseq(p->top->f)) {
13455 upb_status_seterrf(&p->status,
13456 "Array specified for non-repeated field: %s",
13457 upb_fielddef_name(p->top->f));
13458 upb_env_reporterror(p->env, &p->status);
13459 return false;
13460 }
13461
13462 if (!check_stack(p)) return false;
13463
13464 inner = p->top + 1;
13465 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
13466 upb_sink_startseq(&p->top->sink, sel, &inner->sink);
13467 inner->m = p->top->m;
Austin Schuh40c16522018-10-28 20:27:54 -070013468 inner->name_table = NULL;
Brian Silverman9c614bc2016-02-15 20:20:02 -050013469 inner->f = p->top->f;
13470 inner->is_map = false;
13471 inner->is_mapentry = false;
13472 p->top = inner;
13473
13474 return true;
13475}
13476
13477static void end_array(upb_json_parser *p) {
13478 upb_selector_t sel;
13479
Austin Schuh40c16522018-10-28 20:27:54 -070013480 UPB_ASSERT(p->top > p->stack);
Brian Silverman9c614bc2016-02-15 20:20:02 -050013481
13482 p->top--;
13483 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
13484 upb_sink_endseq(&p->top->sink, sel);
13485}
13486
13487static void start_object(upb_json_parser *p) {
13488 if (!p->top->is_map) {
13489 upb_sink_startmsg(&p->top->sink);
13490 }
13491}
13492
13493static void end_object(upb_json_parser *p) {
13494 if (!p->top->is_map) {
13495 upb_status status;
13496 upb_status_clear(&status);
13497 upb_sink_endmsg(&p->top->sink, &status);
13498 if (!upb_ok(&status)) {
13499 upb_env_reporterror(p->env, &status);
13500 }
13501 }
13502}
13503
13504
13505#define CHECK_RETURN_TOP(x) if (!(x)) goto error
13506
13507
13508/* The actual parser **********************************************************/
13509
13510/* What follows is the Ragel parser itself. The language is specified in Ragel
13511 * and the actions call our C functions above.
13512 *
13513 * Ragel has an extensive set of functionality, and we use only a small part of
13514 * it. There are many action types but we only use a few:
13515 *
13516 * ">" -- transition into a machine
13517 * "%" -- transition out of a machine
13518 * "@" -- transition into a final state of a machine.
13519 *
13520 * "@" transitions are tricky because a machine can transition into a final
13521 * state repeatedly. But in some cases we know this can't happen, for example
13522 * a string which is delimited by a final '"' can only transition into its
13523 * final state once, when the closing '"' is seen. */
13524
13525
Austin Schuh40c16522018-10-28 20:27:54 -070013526#line 1310 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013527
13528
13529
Austin Schuh40c16522018-10-28 20:27:54 -070013530#line 1222 "upb/json/parser.c"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013531static const char _json_actions[] = {
13532 0, 1, 0, 1, 2, 1, 3, 1,
13533 5, 1, 6, 1, 7, 1, 8, 1,
13534 10, 1, 12, 1, 13, 1, 14, 1,
13535 15, 1, 16, 1, 17, 1, 21, 1,
13536 25, 1, 27, 2, 3, 8, 2, 4,
13537 5, 2, 6, 2, 2, 6, 8, 2,
13538 11, 9, 2, 13, 15, 2, 14, 15,
13539 2, 18, 1, 2, 19, 27, 2, 20,
13540 9, 2, 22, 27, 2, 23, 27, 2,
13541 24, 27, 2, 26, 27, 3, 14, 11,
13542 9
13543};
13544
13545static const unsigned char _json_key_offsets[] = {
13546 0, 0, 4, 9, 14, 15, 19, 24,
13547 29, 34, 38, 42, 45, 48, 50, 54,
13548 58, 60, 62, 67, 69, 71, 80, 86,
13549 92, 98, 104, 106, 115, 116, 116, 116,
13550 121, 126, 131, 132, 133, 134, 135, 135,
13551 136, 137, 138, 138, 139, 140, 141, 141,
13552 146, 151, 152, 156, 161, 166, 171, 175,
13553 175, 178, 178, 178
13554};
13555
13556static const char _json_trans_keys[] = {
13557 32, 123, 9, 13, 32, 34, 125, 9,
13558 13, 32, 34, 125, 9, 13, 34, 32,
13559 58, 9, 13, 32, 93, 125, 9, 13,
13560 32, 44, 125, 9, 13, 32, 44, 125,
13561 9, 13, 32, 34, 9, 13, 45, 48,
13562 49, 57, 48, 49, 57, 46, 69, 101,
13563 48, 57, 69, 101, 48, 57, 43, 45,
13564 48, 57, 48, 57, 48, 57, 46, 69,
13565 101, 48, 57, 34, 92, 34, 92, 34,
13566 47, 92, 98, 102, 110, 114, 116, 117,
13567 48, 57, 65, 70, 97, 102, 48, 57,
13568 65, 70, 97, 102, 48, 57, 65, 70,
13569 97, 102, 48, 57, 65, 70, 97, 102,
13570 34, 92, 34, 45, 91, 102, 110, 116,
13571 123, 48, 57, 34, 32, 93, 125, 9,
13572 13, 32, 44, 93, 9, 13, 32, 93,
13573 125, 9, 13, 97, 108, 115, 101, 117,
13574 108, 108, 114, 117, 101, 32, 34, 125,
13575 9, 13, 32, 34, 125, 9, 13, 34,
13576 32, 58, 9, 13, 32, 93, 125, 9,
13577 13, 32, 44, 125, 9, 13, 32, 44,
13578 125, 9, 13, 32, 34, 9, 13, 32,
13579 9, 13, 0
13580};
13581
13582static const char _json_single_lengths[] = {
13583 0, 2, 3, 3, 1, 2, 3, 3,
13584 3, 2, 2, 1, 3, 0, 2, 2,
13585 0, 0, 3, 2, 2, 9, 0, 0,
13586 0, 0, 2, 7, 1, 0, 0, 3,
13587 3, 3, 1, 1, 1, 1, 0, 1,
13588 1, 1, 0, 1, 1, 1, 0, 3,
13589 3, 1, 2, 3, 3, 3, 2, 0,
13590 1, 0, 0, 0
13591};
13592
13593static const char _json_range_lengths[] = {
13594 0, 1, 1, 1, 0, 1, 1, 1,
13595 1, 1, 1, 1, 0, 1, 1, 1,
13596 1, 1, 1, 0, 0, 0, 3, 3,
13597 3, 3, 0, 1, 0, 0, 0, 1,
13598 1, 1, 0, 0, 0, 0, 0, 0,
13599 0, 0, 0, 0, 0, 0, 0, 1,
13600 1, 0, 1, 1, 1, 1, 1, 0,
13601 1, 0, 0, 0
13602};
13603
13604static const short _json_index_offsets[] = {
13605 0, 0, 4, 9, 14, 16, 20, 25,
13606 30, 35, 39, 43, 46, 50, 52, 56,
13607 60, 62, 64, 69, 72, 75, 85, 89,
13608 93, 97, 101, 104, 113, 115, 116, 117,
13609 122, 127, 132, 134, 136, 138, 140, 141,
13610 143, 145, 147, 148, 150, 152, 154, 155,
13611 160, 165, 167, 171, 176, 181, 186, 190,
13612 191, 194, 195, 196
13613};
13614
13615static const char _json_indicies[] = {
13616 0, 2, 0, 1, 3, 4, 5, 3,
13617 1, 6, 7, 8, 6, 1, 9, 1,
13618 10, 11, 10, 1, 11, 1, 1, 11,
13619 12, 13, 14, 15, 13, 1, 16, 17,
13620 8, 16, 1, 17, 7, 17, 1, 18,
13621 19, 20, 1, 19, 20, 1, 22, 23,
13622 23, 21, 24, 1, 23, 23, 24, 21,
13623 25, 25, 26, 1, 26, 1, 26, 21,
13624 22, 23, 23, 20, 21, 28, 29, 27,
13625 31, 32, 30, 33, 33, 33, 33, 33,
13626 33, 33, 33, 34, 1, 35, 35, 35,
13627 1, 36, 36, 36, 1, 37, 37, 37,
13628 1, 38, 38, 38, 1, 40, 41, 39,
13629 42, 43, 44, 45, 46, 47, 48, 43,
13630 1, 49, 1, 50, 51, 53, 54, 1,
13631 53, 52, 55, 56, 54, 55, 1, 56,
13632 1, 1, 56, 52, 57, 1, 58, 1,
13633 59, 1, 60, 1, 61, 62, 1, 63,
13634 1, 64, 1, 65, 66, 1, 67, 1,
13635 68, 1, 69, 70, 71, 72, 70, 1,
13636 73, 74, 75, 73, 1, 76, 1, 77,
13637 78, 77, 1, 78, 1, 1, 78, 79,
13638 80, 81, 82, 80, 1, 83, 84, 75,
13639 83, 1, 84, 74, 84, 1, 85, 86,
13640 86, 1, 1, 1, 1, 0
13641};
13642
13643static const char _json_trans_targs[] = {
13644 1, 0, 2, 3, 4, 56, 3, 4,
13645 56, 5, 5, 6, 7, 8, 9, 56,
13646 8, 9, 11, 12, 18, 57, 13, 15,
13647 14, 16, 17, 20, 58, 21, 20, 58,
13648 21, 19, 22, 23, 24, 25, 26, 20,
13649 58, 21, 28, 30, 31, 34, 39, 43,
13650 47, 29, 59, 59, 32, 31, 29, 32,
13651 33, 35, 36, 37, 38, 59, 40, 41,
13652 42, 59, 44, 45, 46, 59, 48, 49,
13653 55, 48, 49, 55, 50, 50, 51, 52,
13654 53, 54, 55, 53, 54, 59, 56
13655};
13656
13657static const char _json_trans_actions[] = {
13658 0, 0, 0, 21, 77, 53, 0, 47,
13659 23, 17, 0, 0, 15, 19, 19, 50,
13660 0, 0, 0, 0, 0, 1, 0, 0,
13661 0, 0, 0, 3, 13, 0, 0, 35,
13662 5, 11, 0, 38, 7, 7, 7, 41,
13663 44, 9, 62, 56, 25, 0, 0, 0,
13664 31, 29, 33, 59, 15, 0, 27, 0,
13665 0, 0, 0, 0, 0, 68, 0, 0,
13666 0, 71, 0, 0, 0, 65, 21, 77,
13667 53, 0, 47, 23, 17, 0, 0, 15,
13668 19, 19, 50, 0, 0, 74, 0
13669};
13670
13671static const int json_start = 1;
13672
13673static const int json_en_number_machine = 10;
13674static const int json_en_string_machine = 19;
13675static const int json_en_value_machine = 27;
13676static const int json_en_main = 1;
13677
13678
Austin Schuh40c16522018-10-28 20:27:54 -070013679#line 1313 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013680
13681size_t parse(void *closure, const void *hd, const char *buf, size_t size,
13682 const upb_bufhandle *handle) {
13683 upb_json_parser *parser = closure;
13684
13685 /* Variables used by Ragel's generated code. */
13686 int cs = parser->current_state;
13687 int *stack = parser->parser_stack;
13688 int top = parser->parser_top;
13689
13690 const char *p = buf;
13691 const char *pe = buf + size;
13692
13693 parser->handle = handle;
13694
13695 UPB_UNUSED(hd);
13696 UPB_UNUSED(handle);
13697
13698 capture_resume(parser, buf);
13699
13700
Austin Schuh40c16522018-10-28 20:27:54 -070013701#line 1393 "upb/json/parser.c"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013702 {
13703 int _klen;
13704 unsigned int _trans;
13705 const char *_acts;
13706 unsigned int _nacts;
13707 const char *_keys;
13708
13709 if ( p == pe )
13710 goto _test_eof;
13711 if ( cs == 0 )
13712 goto _out;
13713_resume:
13714 _keys = _json_trans_keys + _json_key_offsets[cs];
13715 _trans = _json_index_offsets[cs];
13716
13717 _klen = _json_single_lengths[cs];
13718 if ( _klen > 0 ) {
13719 const char *_lower = _keys;
13720 const char *_mid;
13721 const char *_upper = _keys + _klen - 1;
13722 while (1) {
13723 if ( _upper < _lower )
13724 break;
13725
13726 _mid = _lower + ((_upper-_lower) >> 1);
13727 if ( (*p) < *_mid )
13728 _upper = _mid - 1;
13729 else if ( (*p) > *_mid )
13730 _lower = _mid + 1;
13731 else {
13732 _trans += (unsigned int)(_mid - _keys);
13733 goto _match;
13734 }
13735 }
13736 _keys += _klen;
13737 _trans += _klen;
13738 }
13739
13740 _klen = _json_range_lengths[cs];
13741 if ( _klen > 0 ) {
13742 const char *_lower = _keys;
13743 const char *_mid;
13744 const char *_upper = _keys + (_klen<<1) - 2;
13745 while (1) {
13746 if ( _upper < _lower )
13747 break;
13748
13749 _mid = _lower + (((_upper-_lower) >> 1) & ~1);
13750 if ( (*p) < _mid[0] )
13751 _upper = _mid - 2;
13752 else if ( (*p) > _mid[1] )
13753 _lower = _mid + 2;
13754 else {
13755 _trans += (unsigned int)((_mid - _keys)>>1);
13756 goto _match;
13757 }
13758 }
13759 _trans += _klen;
13760 }
13761
13762_match:
13763 _trans = _json_indicies[_trans];
13764 cs = _json_trans_targs[_trans];
13765
13766 if ( _json_trans_actions[_trans] == 0 )
13767 goto _again;
13768
13769 _acts = _json_actions + _json_trans_actions[_trans];
13770 _nacts = (unsigned int) *_acts++;
13771 while ( _nacts-- > 0 )
13772 {
13773 switch ( *_acts++ )
13774 {
13775 case 0:
Austin Schuh40c16522018-10-28 20:27:54 -070013776#line 1225 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013777 { p--; {cs = stack[--top]; goto _again;} }
13778 break;
13779 case 1:
Austin Schuh40c16522018-10-28 20:27:54 -070013780#line 1226 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013781 { p--; {stack[top++] = cs; cs = 10; goto _again;} }
13782 break;
13783 case 2:
Austin Schuh40c16522018-10-28 20:27:54 -070013784#line 1230 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013785 { start_text(parser, p); }
13786 break;
13787 case 3:
Austin Schuh40c16522018-10-28 20:27:54 -070013788#line 1231 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013789 { CHECK_RETURN_TOP(end_text(parser, p)); }
13790 break;
13791 case 4:
Austin Schuh40c16522018-10-28 20:27:54 -070013792#line 1237 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013793 { start_hex(parser); }
13794 break;
13795 case 5:
Austin Schuh40c16522018-10-28 20:27:54 -070013796#line 1238 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013797 { hexdigit(parser, p); }
13798 break;
13799 case 6:
Austin Schuh40c16522018-10-28 20:27:54 -070013800#line 1239 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013801 { CHECK_RETURN_TOP(end_hex(parser)); }
13802 break;
13803 case 7:
Austin Schuh40c16522018-10-28 20:27:54 -070013804#line 1245 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013805 { CHECK_RETURN_TOP(escape(parser, p)); }
13806 break;
13807 case 8:
Austin Schuh40c16522018-10-28 20:27:54 -070013808#line 1251 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013809 { p--; {cs = stack[--top]; goto _again;} }
13810 break;
13811 case 9:
Austin Schuh40c16522018-10-28 20:27:54 -070013812#line 1254 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013813 { {stack[top++] = cs; cs = 19; goto _again;} }
13814 break;
13815 case 10:
Austin Schuh40c16522018-10-28 20:27:54 -070013816#line 1256 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013817 { p--; {stack[top++] = cs; cs = 27; goto _again;} }
13818 break;
13819 case 11:
Austin Schuh40c16522018-10-28 20:27:54 -070013820#line 1261 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013821 { start_member(parser); }
13822 break;
13823 case 12:
Austin Schuh40c16522018-10-28 20:27:54 -070013824#line 1262 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013825 { CHECK_RETURN_TOP(end_membername(parser)); }
13826 break;
13827 case 13:
Austin Schuh40c16522018-10-28 20:27:54 -070013828#line 1265 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013829 { end_member(parser); }
13830 break;
13831 case 14:
Austin Schuh40c16522018-10-28 20:27:54 -070013832#line 1271 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013833 { start_object(parser); }
13834 break;
13835 case 15:
Austin Schuh40c16522018-10-28 20:27:54 -070013836#line 1274 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013837 { end_object(parser); }
13838 break;
13839 case 16:
Austin Schuh40c16522018-10-28 20:27:54 -070013840#line 1280 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013841 { CHECK_RETURN_TOP(start_array(parser)); }
13842 break;
13843 case 17:
Austin Schuh40c16522018-10-28 20:27:54 -070013844#line 1284 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013845 { end_array(parser); }
13846 break;
13847 case 18:
Austin Schuh40c16522018-10-28 20:27:54 -070013848#line 1289 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013849 { start_number(parser, p); }
13850 break;
13851 case 19:
Austin Schuh40c16522018-10-28 20:27:54 -070013852#line 1290 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013853 { CHECK_RETURN_TOP(end_number(parser, p)); }
13854 break;
13855 case 20:
Austin Schuh40c16522018-10-28 20:27:54 -070013856#line 1292 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013857 { CHECK_RETURN_TOP(start_stringval(parser)); }
13858 break;
13859 case 21:
Austin Schuh40c16522018-10-28 20:27:54 -070013860#line 1293 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013861 { CHECK_RETURN_TOP(end_stringval(parser)); }
13862 break;
13863 case 22:
Austin Schuh40c16522018-10-28 20:27:54 -070013864#line 1295 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013865 { CHECK_RETURN_TOP(parser_putbool(parser, true)); }
13866 break;
13867 case 23:
Austin Schuh40c16522018-10-28 20:27:54 -070013868#line 1297 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013869 { CHECK_RETURN_TOP(parser_putbool(parser, false)); }
13870 break;
13871 case 24:
Austin Schuh40c16522018-10-28 20:27:54 -070013872#line 1299 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013873 { /* null value */ }
13874 break;
13875 case 25:
Austin Schuh40c16522018-10-28 20:27:54 -070013876#line 1301 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013877 { CHECK_RETURN_TOP(start_subobject(parser)); }
13878 break;
13879 case 26:
Austin Schuh40c16522018-10-28 20:27:54 -070013880#line 1302 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013881 { end_subobject(parser); }
13882 break;
13883 case 27:
Austin Schuh40c16522018-10-28 20:27:54 -070013884#line 1307 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013885 { p--; {cs = stack[--top]; goto _again;} }
13886 break;
Austin Schuh40c16522018-10-28 20:27:54 -070013887#line 1579 "upb/json/parser.c"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013888 }
13889 }
13890
13891_again:
13892 if ( cs == 0 )
13893 goto _out;
13894 if ( ++p != pe )
13895 goto _resume;
13896 _test_eof: {}
13897 _out: {}
13898 }
13899
Austin Schuh40c16522018-10-28 20:27:54 -070013900#line 1334 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013901
13902 if (p != pe) {
Austin Schuh40c16522018-10-28 20:27:54 -070013903 upb_status_seterrf(&parser->status, "Parse error at '%.*s'\n", pe - p, p);
Brian Silverman9c614bc2016-02-15 20:20:02 -050013904 upb_env_reporterror(parser->env, &parser->status);
13905 } else {
13906 capture_suspend(parser, &p);
13907 }
13908
13909error:
13910 /* Save parsing state back to parser. */
13911 parser->current_state = cs;
13912 parser->parser_top = top;
13913
13914 return p - buf;
13915}
13916
13917bool end(void *closure, const void *hd) {
13918 UPB_UNUSED(closure);
13919 UPB_UNUSED(hd);
13920
13921 /* Prevent compile warning on unused static constants. */
13922 UPB_UNUSED(json_start);
13923 UPB_UNUSED(json_en_number_machine);
13924 UPB_UNUSED(json_en_string_machine);
13925 UPB_UNUSED(json_en_value_machine);
13926 UPB_UNUSED(json_en_main);
13927 return true;
13928}
13929
13930static void json_parser_reset(upb_json_parser *p) {
13931 int cs;
13932 int top;
13933
13934 p->top = p->stack;
13935 p->top->f = NULL;
13936 p->top->is_map = false;
13937 p->top->is_mapentry = false;
13938
13939 /* Emit Ragel initialization of the parser. */
13940
Austin Schuh40c16522018-10-28 20:27:54 -070013941#line 1633 "upb/json/parser.c"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013942 {
13943 cs = json_start;
13944 top = 0;
13945 }
13946
Austin Schuh40c16522018-10-28 20:27:54 -070013947#line 1374 "upb/json/parser.rl"
Brian Silverman9c614bc2016-02-15 20:20:02 -050013948 p->current_state = cs;
13949 p->parser_top = top;
13950 accumulate_clear(p);
13951 p->multipart_state = MULTIPART_INACTIVE;
13952 p->capture = NULL;
13953 p->accumulated = NULL;
13954 upb_status_clear(&p->status);
13955}
13956
Austin Schuh40c16522018-10-28 20:27:54 -070013957static void visit_json_parsermethod(const upb_refcounted *r,
13958 upb_refcounted_visit *visit,
13959 void *closure) {
13960 const upb_json_parsermethod *method = (upb_json_parsermethod*)r;
13961 visit(r, upb_msgdef_upcast2(method->msg), closure);
13962}
13963
13964static void free_json_parsermethod(upb_refcounted *r) {
13965 upb_json_parsermethod *method = (upb_json_parsermethod*)r;
13966
13967 upb_inttable_iter i;
13968 upb_inttable_begin(&i, &method->name_tables);
13969 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
13970 upb_value val = upb_inttable_iter_value(&i);
13971 upb_strtable *t = upb_value_getptr(val);
13972 upb_strtable_uninit(t);
13973 upb_gfree(t);
13974 }
13975
13976 upb_inttable_uninit(&method->name_tables);
13977
13978 upb_gfree(r);
13979}
13980
13981static void add_jsonname_table(upb_json_parsermethod *m, const upb_msgdef* md) {
13982 upb_msg_field_iter i;
13983 upb_strtable *t;
13984
13985 /* It would be nice to stack-allocate this, but protobufs do not limit the
13986 * length of fields to any reasonable limit. */
13987 char *buf = NULL;
13988 size_t len = 0;
13989
13990 if (upb_inttable_lookupptr(&m->name_tables, md, NULL)) {
13991 return;
13992 }
13993
13994 /* TODO(haberman): handle malloc failure. */
13995 t = upb_gmalloc(sizeof(*t));
13996 upb_strtable_init(t, UPB_CTYPE_CONSTPTR);
13997 upb_inttable_insertptr(&m->name_tables, md, upb_value_ptr(t));
13998
13999 for(upb_msg_field_begin(&i, md);
14000 !upb_msg_field_done(&i);
14001 upb_msg_field_next(&i)) {
14002 const upb_fielddef *f = upb_msg_iter_field(&i);
14003
14004 /* Add an entry for the JSON name. */
14005 size_t field_len = upb_fielddef_getjsonname(f, buf, len);
14006 if (field_len > len) {
14007 size_t len2;
14008 buf = upb_grealloc(buf, 0, field_len);
14009 len = field_len;
14010 len2 = upb_fielddef_getjsonname(f, buf, len);
14011 UPB_ASSERT(len == len2);
14012 }
14013 upb_strtable_insert(t, buf, upb_value_constptr(f));
14014
14015 if (strcmp(buf, upb_fielddef_name(f)) != 0) {
14016 /* Since the JSON name is different from the regular field name, add an
14017 * entry for the raw name (compliant proto3 JSON parsers must accept
14018 * both). */
14019 upb_strtable_insert(t, upb_fielddef_name(f), upb_value_constptr(f));
14020 }
14021
14022 if (upb_fielddef_issubmsg(f)) {
14023 add_jsonname_table(m, upb_fielddef_msgsubdef(f));
14024 }
14025 }
14026
14027 upb_gfree(buf);
14028}
Brian Silverman9c614bc2016-02-15 20:20:02 -050014029
14030/* Public API *****************************************************************/
14031
Austin Schuh40c16522018-10-28 20:27:54 -070014032upb_json_parser *upb_json_parser_create(upb_env *env,
14033 const upb_json_parsermethod *method,
14034 upb_sink *output) {
Brian Silverman9c614bc2016-02-15 20:20:02 -050014035#ifndef NDEBUG
14036 const size_t size_before = upb_env_bytesallocated(env);
14037#endif
14038 upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser));
14039 if (!p) return false;
14040
14041 p->env = env;
Austin Schuh40c16522018-10-28 20:27:54 -070014042 p->method = method;
Brian Silverman9c614bc2016-02-15 20:20:02 -050014043 p->limit = p->stack + UPB_JSON_MAX_DEPTH;
14044 p->accumulate_buf = NULL;
14045 p->accumulate_buf_size = 0;
Austin Schuh40c16522018-10-28 20:27:54 -070014046 upb_bytessink_reset(&p->input_, &method->input_handler_, p);
Brian Silverman9c614bc2016-02-15 20:20:02 -050014047
14048 json_parser_reset(p);
14049 upb_sink_reset(&p->top->sink, output->handlers, output->closure);
14050 p->top->m = upb_handlers_msgdef(output->handlers);
Austin Schuh40c16522018-10-28 20:27:54 -070014051 set_name_table(p, p->top);
Brian Silverman9c614bc2016-02-15 20:20:02 -050014052
14053 /* If this fails, uncomment and increase the value in parser.h. */
14054 /* fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); */
Austin Schuh40c16522018-10-28 20:27:54 -070014055 UPB_ASSERT_DEBUGVAR(upb_env_bytesallocated(env) - size_before <=
14056 UPB_JSON_PARSER_SIZE);
Brian Silverman9c614bc2016-02-15 20:20:02 -050014057 return p;
14058}
14059
14060upb_bytessink *upb_json_parser_input(upb_json_parser *p) {
14061 return &p->input_;
14062}
Austin Schuh40c16522018-10-28 20:27:54 -070014063
14064upb_json_parsermethod *upb_json_parsermethod_new(const upb_msgdef* md,
14065 const void* owner) {
14066 static const struct upb_refcounted_vtbl vtbl = {visit_json_parsermethod,
14067 free_json_parsermethod};
14068 upb_json_parsermethod *ret = upb_gmalloc(sizeof(*ret));
14069 upb_refcounted_init(upb_json_parsermethod_upcast_mutable(ret), &vtbl, owner);
14070
14071 ret->msg = md;
14072 upb_ref2(md, ret);
14073
14074 upb_byteshandler_init(&ret->input_handler_);
14075 upb_byteshandler_setstring(&ret->input_handler_, parse, ret);
14076 upb_byteshandler_setendstr(&ret->input_handler_, end, ret);
14077
14078 upb_inttable_init(&ret->name_tables, UPB_CTYPE_PTR);
14079
14080 add_jsonname_table(ret, md);
14081
14082 return ret;
14083}
14084
14085const upb_byteshandler *upb_json_parsermethod_inputhandler(
14086 const upb_json_parsermethod *m) {
14087 return &m->input_handler_;
14088}
Brian Silverman9c614bc2016-02-15 20:20:02 -050014089/*
14090** This currently uses snprintf() to format primitives, and could be optimized
14091** further.
14092*/
14093
14094
Brian Silverman9c614bc2016-02-15 20:20:02 -050014095#include <string.h>
14096#include <stdint.h>
14097
14098struct upb_json_printer {
14099 upb_sink input_;
14100 /* BytesSink closure. */
14101 void *subc_;
14102 upb_bytessink *output_;
14103
14104 /* We track the depth so that we know when to emit startstr/endstr on the
14105 * output. */
14106 int depth_;
14107
14108 /* Have we emitted the first element? This state is necessary to emit commas
14109 * without leaving a trailing comma in arrays/maps. We keep this state per
14110 * frame depth.
14111 *
14112 * Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
14113 * We count frames (contexts in which we separate elements by commas) as both
14114 * repeated fields and messages (maps), and the worst case is a
14115 * message->repeated field->submessage->repeated field->... nesting. */
14116 bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2];
14117};
14118
14119/* StringPiece; a pointer plus a length. */
14120typedef struct {
Austin Schuh40c16522018-10-28 20:27:54 -070014121 char *ptr;
Brian Silverman9c614bc2016-02-15 20:20:02 -050014122 size_t len;
14123} strpc;
14124
Austin Schuh40c16522018-10-28 20:27:54 -070014125void freestrpc(void *ptr) {
14126 strpc *pc = ptr;
14127 upb_gfree(pc->ptr);
14128 upb_gfree(pc);
14129}
14130
14131/* Convert fielddef name to JSON name and return as a string piece. */
14132strpc *newstrpc(upb_handlers *h, const upb_fielddef *f,
14133 bool preserve_fieldnames) {
14134 /* TODO(haberman): handle malloc failure. */
14135 strpc *ret = upb_gmalloc(sizeof(*ret));
14136 if (preserve_fieldnames) {
14137 ret->ptr = upb_gstrdup(upb_fielddef_name(f));
14138 ret->len = strlen(ret->ptr);
14139 } else {
14140 size_t len;
14141 ret->len = upb_fielddef_getjsonname(f, NULL, 0);
14142 ret->ptr = upb_gmalloc(ret->len);
14143 len = upb_fielddef_getjsonname(f, ret->ptr, ret->len);
14144 UPB_ASSERT(len == ret->len);
14145 ret->len--; /* NULL */
14146 }
14147
14148 upb_handlers_addcleanup(h, ret, freestrpc);
Brian Silverman9c614bc2016-02-15 20:20:02 -050014149 return ret;
14150}
14151
14152/* ------------ JSON string printing: values, maps, arrays ------------------ */
14153
14154static void print_data(
14155 upb_json_printer *p, const char *buf, unsigned int len) {
14156 /* TODO: Will need to change if we support pushback from the sink. */
14157 size_t n = upb_bytessink_putbuf(p->output_, p->subc_, buf, len, NULL);
Austin Schuh40c16522018-10-28 20:27:54 -070014158 UPB_ASSERT(n == len);
Brian Silverman9c614bc2016-02-15 20:20:02 -050014159}
14160
14161static void print_comma(upb_json_printer *p) {
14162 if (!p->first_elem_[p->depth_]) {
14163 print_data(p, ",", 1);
14164 }
14165 p->first_elem_[p->depth_] = false;
14166}
14167
14168/* Helpers that print properly formatted elements to the JSON output stream. */
14169
14170/* Used for escaping control chars in strings. */
14171static const char kControlCharLimit = 0x20;
14172
14173UPB_INLINE bool is_json_escaped(char c) {
14174 /* See RFC 4627. */
14175 unsigned char uc = (unsigned char)c;
14176 return uc < kControlCharLimit || uc == '"' || uc == '\\';
14177}
14178
Austin Schuh40c16522018-10-28 20:27:54 -070014179UPB_INLINE const char* json_nice_escape(char c) {
Brian Silverman9c614bc2016-02-15 20:20:02 -050014180 switch (c) {
14181 case '"': return "\\\"";
14182 case '\\': return "\\\\";
14183 case '\b': return "\\b";
14184 case '\f': return "\\f";
14185 case '\n': return "\\n";
14186 case '\r': return "\\r";
14187 case '\t': return "\\t";
14188 default: return NULL;
14189 }
14190}
14191
14192/* Write a properly escaped string chunk. The surrounding quotes are *not*
14193 * printed; this is so that the caller has the option of emitting the string
14194 * content in chunks. */
14195static void putstring(upb_json_printer *p, const char *buf, unsigned int len) {
14196 const char* unescaped_run = NULL;
14197 unsigned int i;
14198 for (i = 0; i < len; i++) {
14199 char c = buf[i];
14200 /* Handle escaping. */
14201 if (is_json_escaped(c)) {
14202 /* Use a "nice" escape, like \n, if one exists for this character. */
14203 const char* escape = json_nice_escape(c);
14204 /* If we don't have a specific 'nice' escape code, use a \uXXXX-style
14205 * escape. */
14206 char escape_buf[8];
14207 if (!escape) {
14208 unsigned char byte = (unsigned char)c;
14209 _upb_snprintf(escape_buf, sizeof(escape_buf), "\\u%04x", (int)byte);
14210 escape = escape_buf;
14211 }
14212
14213 /* N.B. that we assume that the input encoding is equal to the output
14214 * encoding (both UTF-8 for now), so for chars >= 0x20 and != \, ", we
14215 * can simply pass the bytes through. */
14216
14217 /* If there's a current run of unescaped chars, print that run first. */
14218 if (unescaped_run) {
14219 print_data(p, unescaped_run, &buf[i] - unescaped_run);
14220 unescaped_run = NULL;
14221 }
14222 /* Then print the escape code. */
14223 print_data(p, escape, strlen(escape));
14224 } else {
14225 /* Add to the current unescaped run of characters. */
14226 if (unescaped_run == NULL) {
14227 unescaped_run = &buf[i];
14228 }
14229 }
14230 }
14231
14232 /* If the string ended in a run of unescaped characters, print that last run. */
14233 if (unescaped_run) {
14234 print_data(p, unescaped_run, &buf[len] - unescaped_run);
14235 }
14236}
14237
14238#define CHKLENGTH(x) if (!(x)) return -1;
14239
14240/* Helpers that format floating point values according to our custom formats.
14241 * Right now we use %.8g and %.17g for float/double, respectively, to match
14242 * proto2::util::JsonFormat's defaults. May want to change this later. */
14243
Austin Schuh40c16522018-10-28 20:27:54 -070014244const char neginf[] = "\"-Infinity\"";
14245const char inf[] = "\"Infinity\"";
14246
Brian Silverman9c614bc2016-02-15 20:20:02 -050014247static size_t fmt_double(double val, char* buf, size_t length) {
Austin Schuh40c16522018-10-28 20:27:54 -070014248 if (val == (1.0 / 0.0)) {
14249 CHKLENGTH(length >= strlen(inf));
14250 strcpy(buf, inf);
14251 return strlen(inf);
14252 } else if (val == (-1.0 / 0.0)) {
14253 CHKLENGTH(length >= strlen(neginf));
14254 strcpy(buf, neginf);
14255 return strlen(neginf);
14256 } else {
14257 size_t n = _upb_snprintf(buf, length, "%.17g", val);
14258 CHKLENGTH(n > 0 && n < length);
14259 return n;
14260 }
Brian Silverman9c614bc2016-02-15 20:20:02 -050014261}
14262
14263static size_t fmt_float(float val, char* buf, size_t length) {
14264 size_t n = _upb_snprintf(buf, length, "%.8g", val);
14265 CHKLENGTH(n > 0 && n < length);
14266 return n;
14267}
14268
14269static size_t fmt_bool(bool val, char* buf, size_t length) {
14270 size_t n = _upb_snprintf(buf, length, "%s", (val ? "true" : "false"));
14271 CHKLENGTH(n > 0 && n < length);
14272 return n;
14273}
14274
14275static size_t fmt_int64(long val, char* buf, size_t length) {
14276 size_t n = _upb_snprintf(buf, length, "%ld", val);
14277 CHKLENGTH(n > 0 && n < length);
14278 return n;
14279}
14280
14281static size_t fmt_uint64(unsigned long long val, char* buf, size_t length) {
14282 size_t n = _upb_snprintf(buf, length, "%llu", val);
14283 CHKLENGTH(n > 0 && n < length);
14284 return n;
14285}
14286
14287/* Print a map key given a field name. Called by scalar field handlers and by
14288 * startseq for repeated fields. */
14289static bool putkey(void *closure, const void *handler_data) {
14290 upb_json_printer *p = closure;
14291 const strpc *key = handler_data;
14292 print_comma(p);
14293 print_data(p, "\"", 1);
14294 putstring(p, key->ptr, key->len);
14295 print_data(p, "\":", 2);
14296 return true;
14297}
14298
14299#define CHKFMT(val) if ((val) == (size_t)-1) return false;
14300#define CHK(val) if (!(val)) return false;
14301
14302#define TYPE_HANDLERS(type, fmt_func) \
14303 static bool put##type(void *closure, const void *handler_data, type val) { \
14304 upb_json_printer *p = closure; \
14305 char data[64]; \
14306 size_t length = fmt_func(val, data, sizeof(data)); \
14307 UPB_UNUSED(handler_data); \
14308 CHKFMT(length); \
14309 print_data(p, data, length); \
14310 return true; \
14311 } \
14312 static bool scalar_##type(void *closure, const void *handler_data, \
14313 type val) { \
14314 CHK(putkey(closure, handler_data)); \
14315 CHK(put##type(closure, handler_data, val)); \
14316 return true; \
14317 } \
14318 static bool repeated_##type(void *closure, const void *handler_data, \
14319 type val) { \
14320 upb_json_printer *p = closure; \
14321 print_comma(p); \
14322 CHK(put##type(closure, handler_data, val)); \
14323 return true; \
14324 }
14325
14326#define TYPE_HANDLERS_MAPKEY(type, fmt_func) \
14327 static bool putmapkey_##type(void *closure, const void *handler_data, \
14328 type val) { \
14329 upb_json_printer *p = closure; \
14330 print_data(p, "\"", 1); \
14331 CHK(put##type(closure, handler_data, val)); \
14332 print_data(p, "\":", 2); \
14333 return true; \
14334 }
14335
14336TYPE_HANDLERS(double, fmt_double)
14337TYPE_HANDLERS(float, fmt_float)
14338TYPE_HANDLERS(bool, fmt_bool)
14339TYPE_HANDLERS(int32_t, fmt_int64)
14340TYPE_HANDLERS(uint32_t, fmt_int64)
14341TYPE_HANDLERS(int64_t, fmt_int64)
14342TYPE_HANDLERS(uint64_t, fmt_uint64)
14343
14344/* double and float are not allowed to be map keys. */
14345TYPE_HANDLERS_MAPKEY(bool, fmt_bool)
14346TYPE_HANDLERS_MAPKEY(int32_t, fmt_int64)
14347TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64)
14348TYPE_HANDLERS_MAPKEY(int64_t, fmt_int64)
14349TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64)
14350
14351#undef TYPE_HANDLERS
14352#undef TYPE_HANDLERS_MAPKEY
14353
14354typedef struct {
14355 void *keyname;
14356 const upb_enumdef *enumdef;
14357} EnumHandlerData;
14358
14359static bool scalar_enum(void *closure, const void *handler_data,
14360 int32_t val) {
14361 const EnumHandlerData *hd = handler_data;
14362 upb_json_printer *p = closure;
14363 const char *symbolic_name;
14364
14365 CHK(putkey(closure, hd->keyname));
14366
14367 symbolic_name = upb_enumdef_iton(hd->enumdef, val);
14368 if (symbolic_name) {
14369 print_data(p, "\"", 1);
14370 putstring(p, symbolic_name, strlen(symbolic_name));
14371 print_data(p, "\"", 1);
14372 } else {
14373 putint32_t(closure, NULL, val);
14374 }
14375
14376 return true;
14377}
14378
14379static void print_enum_symbolic_name(upb_json_printer *p,
14380 const upb_enumdef *def,
14381 int32_t val) {
14382 const char *symbolic_name = upb_enumdef_iton(def, val);
14383 if (symbolic_name) {
14384 print_data(p, "\"", 1);
14385 putstring(p, symbolic_name, strlen(symbolic_name));
14386 print_data(p, "\"", 1);
14387 } else {
14388 putint32_t(p, NULL, val);
14389 }
14390}
14391
14392static bool repeated_enum(void *closure, const void *handler_data,
14393 int32_t val) {
14394 const EnumHandlerData *hd = handler_data;
14395 upb_json_printer *p = closure;
14396 print_comma(p);
14397
14398 print_enum_symbolic_name(p, hd->enumdef, val);
14399
14400 return true;
14401}
14402
14403static bool mapvalue_enum(void *closure, const void *handler_data,
14404 int32_t val) {
14405 const EnumHandlerData *hd = handler_data;
14406 upb_json_printer *p = closure;
14407
14408 print_enum_symbolic_name(p, hd->enumdef, val);
14409
14410 return true;
14411}
14412
14413static void *scalar_startsubmsg(void *closure, const void *handler_data) {
14414 return putkey(closure, handler_data) ? closure : UPB_BREAK;
14415}
14416
14417static void *repeated_startsubmsg(void *closure, const void *handler_data) {
14418 upb_json_printer *p = closure;
14419 UPB_UNUSED(handler_data);
14420 print_comma(p);
14421 return closure;
14422}
14423
14424static void start_frame(upb_json_printer *p) {
14425 p->depth_++;
14426 p->first_elem_[p->depth_] = true;
14427 print_data(p, "{", 1);
14428}
14429
14430static void end_frame(upb_json_printer *p) {
14431 print_data(p, "}", 1);
14432 p->depth_--;
14433}
14434
14435static bool printer_startmsg(void *closure, const void *handler_data) {
14436 upb_json_printer *p = closure;
14437 UPB_UNUSED(handler_data);
14438 if (p->depth_ == 0) {
14439 upb_bytessink_start(p->output_, 0, &p->subc_);
14440 }
14441 start_frame(p);
14442 return true;
14443}
14444
14445static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) {
14446 upb_json_printer *p = closure;
14447 UPB_UNUSED(handler_data);
14448 UPB_UNUSED(s);
14449 end_frame(p);
14450 if (p->depth_ == 0) {
14451 upb_bytessink_end(p->output_);
14452 }
14453 return true;
14454}
14455
14456static void *startseq(void *closure, const void *handler_data) {
14457 upb_json_printer *p = closure;
14458 CHK(putkey(closure, handler_data));
14459 p->depth_++;
14460 p->first_elem_[p->depth_] = true;
14461 print_data(p, "[", 1);
14462 return closure;
14463}
14464
14465static bool endseq(void *closure, const void *handler_data) {
14466 upb_json_printer *p = closure;
14467 UPB_UNUSED(handler_data);
14468 print_data(p, "]", 1);
14469 p->depth_--;
14470 return true;
14471}
14472
14473static void *startmap(void *closure, const void *handler_data) {
14474 upb_json_printer *p = closure;
14475 CHK(putkey(closure, handler_data));
14476 p->depth_++;
14477 p->first_elem_[p->depth_] = true;
14478 print_data(p, "{", 1);
14479 return closure;
14480}
14481
14482static bool endmap(void *closure, const void *handler_data) {
14483 upb_json_printer *p = closure;
14484 UPB_UNUSED(handler_data);
14485 print_data(p, "}", 1);
14486 p->depth_--;
14487 return true;
14488}
14489
14490static size_t putstr(void *closure, const void *handler_data, const char *str,
14491 size_t len, const upb_bufhandle *handle) {
14492 upb_json_printer *p = closure;
14493 UPB_UNUSED(handler_data);
14494 UPB_UNUSED(handle);
14495 putstring(p, str, len);
14496 return len;
14497}
14498
14499/* This has to Base64 encode the bytes, because JSON has no "bytes" type. */
14500static size_t putbytes(void *closure, const void *handler_data, const char *str,
14501 size_t len, const upb_bufhandle *handle) {
14502 upb_json_printer *p = closure;
14503
14504 /* This is the regular base64, not the "web-safe" version. */
14505 static const char base64[] =
14506 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
14507
14508 /* Base64-encode. */
14509 char data[16000];
14510 const char *limit = data + sizeof(data);
14511 const unsigned char *from = (const unsigned char*)str;
14512 char *to = data;
14513 size_t remaining = len;
14514 size_t bytes;
14515
14516 UPB_UNUSED(handler_data);
14517 UPB_UNUSED(handle);
14518
14519 while (remaining > 2) {
14520 /* TODO(haberman): handle encoded lengths > sizeof(data) */
Austin Schuh40c16522018-10-28 20:27:54 -070014521 UPB_ASSERT((limit - to) >= 4);
Brian Silverman9c614bc2016-02-15 20:20:02 -050014522
14523 to[0] = base64[from[0] >> 2];
14524 to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
14525 to[2] = base64[((from[1] & 0xf) << 2) | (from[2] >> 6)];
14526 to[3] = base64[from[2] & 0x3f];
14527
14528 remaining -= 3;
14529 to += 4;
14530 from += 3;
14531 }
14532
14533 switch (remaining) {
14534 case 2:
14535 to[0] = base64[from[0] >> 2];
14536 to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
14537 to[2] = base64[(from[1] & 0xf) << 2];
14538 to[3] = '=';
14539 to += 4;
14540 from += 2;
14541 break;
14542 case 1:
14543 to[0] = base64[from[0] >> 2];
14544 to[1] = base64[((from[0] & 0x3) << 4)];
14545 to[2] = '=';
14546 to[3] = '=';
14547 to += 4;
14548 from += 1;
14549 break;
14550 }
14551
14552 bytes = to - data;
14553 print_data(p, "\"", 1);
14554 putstring(p, data, bytes);
14555 print_data(p, "\"", 1);
14556 return len;
14557}
14558
14559static void *scalar_startstr(void *closure, const void *handler_data,
14560 size_t size_hint) {
14561 upb_json_printer *p = closure;
14562 UPB_UNUSED(handler_data);
14563 UPB_UNUSED(size_hint);
14564 CHK(putkey(closure, handler_data));
14565 print_data(p, "\"", 1);
14566 return p;
14567}
14568
14569static size_t scalar_str(void *closure, const void *handler_data,
14570 const char *str, size_t len,
14571 const upb_bufhandle *handle) {
14572 CHK(putstr(closure, handler_data, str, len, handle));
14573 return len;
14574}
14575
14576static bool scalar_endstr(void *closure, const void *handler_data) {
14577 upb_json_printer *p = closure;
14578 UPB_UNUSED(handler_data);
14579 print_data(p, "\"", 1);
14580 return true;
14581}
14582
14583static void *repeated_startstr(void *closure, const void *handler_data,
14584 size_t size_hint) {
14585 upb_json_printer *p = closure;
14586 UPB_UNUSED(handler_data);
14587 UPB_UNUSED(size_hint);
14588 print_comma(p);
14589 print_data(p, "\"", 1);
14590 return p;
14591}
14592
14593static size_t repeated_str(void *closure, const void *handler_data,
14594 const char *str, size_t len,
14595 const upb_bufhandle *handle) {
14596 CHK(putstr(closure, handler_data, str, len, handle));
14597 return len;
14598}
14599
14600static bool repeated_endstr(void *closure, const void *handler_data) {
14601 upb_json_printer *p = closure;
14602 UPB_UNUSED(handler_data);
14603 print_data(p, "\"", 1);
14604 return true;
14605}
14606
14607static void *mapkeyval_startstr(void *closure, const void *handler_data,
14608 size_t size_hint) {
14609 upb_json_printer *p = closure;
14610 UPB_UNUSED(handler_data);
14611 UPB_UNUSED(size_hint);
14612 print_data(p, "\"", 1);
14613 return p;
14614}
14615
14616static size_t mapkey_str(void *closure, const void *handler_data,
14617 const char *str, size_t len,
14618 const upb_bufhandle *handle) {
14619 CHK(putstr(closure, handler_data, str, len, handle));
14620 return len;
14621}
14622
14623static bool mapkey_endstr(void *closure, const void *handler_data) {
14624 upb_json_printer *p = closure;
14625 UPB_UNUSED(handler_data);
14626 print_data(p, "\":", 2);
14627 return true;
14628}
14629
14630static bool mapvalue_endstr(void *closure, const void *handler_data) {
14631 upb_json_printer *p = closure;
14632 UPB_UNUSED(handler_data);
14633 print_data(p, "\"", 1);
14634 return true;
14635}
14636
14637static size_t scalar_bytes(void *closure, const void *handler_data,
14638 const char *str, size_t len,
14639 const upb_bufhandle *handle) {
14640 CHK(putkey(closure, handler_data));
14641 CHK(putbytes(closure, handler_data, str, len, handle));
14642 return len;
14643}
14644
14645static size_t repeated_bytes(void *closure, const void *handler_data,
14646 const char *str, size_t len,
14647 const upb_bufhandle *handle) {
14648 upb_json_printer *p = closure;
14649 print_comma(p);
14650 CHK(putbytes(closure, handler_data, str, len, handle));
14651 return len;
14652}
14653
14654static size_t mapkey_bytes(void *closure, const void *handler_data,
14655 const char *str, size_t len,
14656 const upb_bufhandle *handle) {
14657 upb_json_printer *p = closure;
14658 CHK(putbytes(closure, handler_data, str, len, handle));
14659 print_data(p, ":", 1);
14660 return len;
14661}
14662
14663static void set_enum_hd(upb_handlers *h,
14664 const upb_fielddef *f,
Austin Schuh40c16522018-10-28 20:27:54 -070014665 bool preserve_fieldnames,
Brian Silverman9c614bc2016-02-15 20:20:02 -050014666 upb_handlerattr *attr) {
Austin Schuh40c16522018-10-28 20:27:54 -070014667 EnumHandlerData *hd = upb_gmalloc(sizeof(EnumHandlerData));
Brian Silverman9c614bc2016-02-15 20:20:02 -050014668 hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f);
Austin Schuh40c16522018-10-28 20:27:54 -070014669 hd->keyname = newstrpc(h, f, preserve_fieldnames);
14670 upb_handlers_addcleanup(h, hd, upb_gfree);
Brian Silverman9c614bc2016-02-15 20:20:02 -050014671 upb_handlerattr_sethandlerdata(attr, hd);
14672}
14673
14674/* Set up handlers for a mapentry submessage (i.e., an individual key/value pair
14675 * in a map).
14676 *
14677 * TODO: Handle missing key, missing value, out-of-order key/value, or repeated
14678 * key or value cases properly. The right way to do this is to allocate a
14679 * temporary structure at the start of a mapentry submessage, store key and
14680 * value data in it as key and value handlers are called, and then print the
14681 * key/value pair once at the end of the submessage. If we don't do this, we
14682 * should at least detect the case and throw an error. However, so far all of
14683 * our sources that emit mapentry messages do so canonically (with one key
14684 * field, and then one value field), so this is not a pressing concern at the
14685 * moment. */
Austin Schuh40c16522018-10-28 20:27:54 -070014686void printer_sethandlers_mapentry(const void *closure, bool preserve_fieldnames,
14687 upb_handlers *h) {
Brian Silverman9c614bc2016-02-15 20:20:02 -050014688 const upb_msgdef *md = upb_handlers_msgdef(h);
14689
14690 /* A mapentry message is printed simply as '"key": value'. Rather than
14691 * special-case key and value for every type below, we just handle both
14692 * fields explicitly here. */
14693 const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY);
14694 const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE);
14695
14696 upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
14697
14698 UPB_UNUSED(closure);
14699
14700 switch (upb_fielddef_type(key_field)) {
14701 case UPB_TYPE_INT32:
14702 upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr);
14703 break;
14704 case UPB_TYPE_INT64:
14705 upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr);
14706 break;
14707 case UPB_TYPE_UINT32:
14708 upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr);
14709 break;
14710 case UPB_TYPE_UINT64:
14711 upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr);
14712 break;
14713 case UPB_TYPE_BOOL:
14714 upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr);
14715 break;
14716 case UPB_TYPE_STRING:
14717 upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr);
14718 upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr);
14719 upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr);
14720 break;
14721 case UPB_TYPE_BYTES:
14722 upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr);
14723 break;
14724 default:
Austin Schuh40c16522018-10-28 20:27:54 -070014725 UPB_ASSERT(false);
Brian Silverman9c614bc2016-02-15 20:20:02 -050014726 break;
14727 }
14728
14729 switch (upb_fielddef_type(value_field)) {
14730 case UPB_TYPE_INT32:
14731 upb_handlers_setint32(h, value_field, putint32_t, &empty_attr);
14732 break;
14733 case UPB_TYPE_INT64:
14734 upb_handlers_setint64(h, value_field, putint64_t, &empty_attr);
14735 break;
14736 case UPB_TYPE_UINT32:
14737 upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr);
14738 break;
14739 case UPB_TYPE_UINT64:
14740 upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr);
14741 break;
14742 case UPB_TYPE_BOOL:
14743 upb_handlers_setbool(h, value_field, putbool, &empty_attr);
14744 break;
14745 case UPB_TYPE_FLOAT:
14746 upb_handlers_setfloat(h, value_field, putfloat, &empty_attr);
14747 break;
14748 case UPB_TYPE_DOUBLE:
14749 upb_handlers_setdouble(h, value_field, putdouble, &empty_attr);
14750 break;
14751 case UPB_TYPE_STRING:
14752 upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr);
14753 upb_handlers_setstring(h, value_field, putstr, &empty_attr);
14754 upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr);
14755 break;
14756 case UPB_TYPE_BYTES:
14757 upb_handlers_setstring(h, value_field, putbytes, &empty_attr);
14758 break;
14759 case UPB_TYPE_ENUM: {
14760 upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
Austin Schuh40c16522018-10-28 20:27:54 -070014761 set_enum_hd(h, value_field, preserve_fieldnames, &enum_attr);
Brian Silverman9c614bc2016-02-15 20:20:02 -050014762 upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr);
14763 upb_handlerattr_uninit(&enum_attr);
14764 break;
14765 }
14766 case UPB_TYPE_MESSAGE:
14767 /* No handler necessary -- the submsg handlers will print the message
14768 * as appropriate. */
14769 break;
14770 }
14771
14772 upb_handlerattr_uninit(&empty_attr);
14773}
14774
14775void printer_sethandlers(const void *closure, upb_handlers *h) {
14776 const upb_msgdef *md = upb_handlers_msgdef(h);
14777 bool is_mapentry = upb_msgdef_mapentry(md);
14778 upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
14779 upb_msg_field_iter i;
Austin Schuh40c16522018-10-28 20:27:54 -070014780 const bool *preserve_fieldnames_ptr = closure;
14781 const bool preserve_fieldnames = *preserve_fieldnames_ptr;
Brian Silverman9c614bc2016-02-15 20:20:02 -050014782
14783 if (is_mapentry) {
14784 /* mapentry messages are sufficiently different that we handle them
14785 * separately. */
Austin Schuh40c16522018-10-28 20:27:54 -070014786 printer_sethandlers_mapentry(closure, preserve_fieldnames, h);
Brian Silverman9c614bc2016-02-15 20:20:02 -050014787 return;
14788 }
14789
14790 upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr);
14791 upb_handlers_setendmsg(h, printer_endmsg, &empty_attr);
14792
14793#define TYPE(type, name, ctype) \
14794 case type: \
14795 if (upb_fielddef_isseq(f)) { \
14796 upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr); \
14797 } else { \
14798 upb_handlers_set##name(h, f, scalar_##ctype, &name_attr); \
14799 } \
14800 break;
14801
14802 upb_msg_field_begin(&i, md);
14803 for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
14804 const upb_fielddef *f = upb_msg_iter_field(&i);
14805
14806 upb_handlerattr name_attr = UPB_HANDLERATTR_INITIALIZER;
Austin Schuh40c16522018-10-28 20:27:54 -070014807 upb_handlerattr_sethandlerdata(&name_attr,
14808 newstrpc(h, f, preserve_fieldnames));
Brian Silverman9c614bc2016-02-15 20:20:02 -050014809
14810 if (upb_fielddef_ismap(f)) {
14811 upb_handlers_setstartseq(h, f, startmap, &name_attr);
14812 upb_handlers_setendseq(h, f, endmap, &name_attr);
14813 } else if (upb_fielddef_isseq(f)) {
14814 upb_handlers_setstartseq(h, f, startseq, &name_attr);
14815 upb_handlers_setendseq(h, f, endseq, &empty_attr);
14816 }
14817
14818 switch (upb_fielddef_type(f)) {
14819 TYPE(UPB_TYPE_FLOAT, float, float);
14820 TYPE(UPB_TYPE_DOUBLE, double, double);
14821 TYPE(UPB_TYPE_BOOL, bool, bool);
14822 TYPE(UPB_TYPE_INT32, int32, int32_t);
14823 TYPE(UPB_TYPE_UINT32, uint32, uint32_t);
14824 TYPE(UPB_TYPE_INT64, int64, int64_t);
14825 TYPE(UPB_TYPE_UINT64, uint64, uint64_t);
14826 case UPB_TYPE_ENUM: {
14827 /* For now, we always emit symbolic names for enums. We may want an
14828 * option later to control this behavior, but we will wait for a real
14829 * need first. */
14830 upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
Austin Schuh40c16522018-10-28 20:27:54 -070014831 set_enum_hd(h, f, preserve_fieldnames, &enum_attr);
Brian Silverman9c614bc2016-02-15 20:20:02 -050014832
14833 if (upb_fielddef_isseq(f)) {
14834 upb_handlers_setint32(h, f, repeated_enum, &enum_attr);
14835 } else {
14836 upb_handlers_setint32(h, f, scalar_enum, &enum_attr);
14837 }
14838
14839 upb_handlerattr_uninit(&enum_attr);
14840 break;
14841 }
14842 case UPB_TYPE_STRING:
14843 if (upb_fielddef_isseq(f)) {
14844 upb_handlers_setstartstr(h, f, repeated_startstr, &empty_attr);
14845 upb_handlers_setstring(h, f, repeated_str, &empty_attr);
14846 upb_handlers_setendstr(h, f, repeated_endstr, &empty_attr);
14847 } else {
14848 upb_handlers_setstartstr(h, f, scalar_startstr, &name_attr);
14849 upb_handlers_setstring(h, f, scalar_str, &empty_attr);
14850 upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr);
14851 }
14852 break;
14853 case UPB_TYPE_BYTES:
14854 /* XXX: this doesn't support strings that span buffers yet. The base64
14855 * encoder will need to be made resumable for this to work properly. */
14856 if (upb_fielddef_isseq(f)) {
14857 upb_handlers_setstring(h, f, repeated_bytes, &empty_attr);
14858 } else {
14859 upb_handlers_setstring(h, f, scalar_bytes, &name_attr);
14860 }
14861 break;
14862 case UPB_TYPE_MESSAGE:
14863 if (upb_fielddef_isseq(f)) {
14864 upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &name_attr);
14865 } else {
14866 upb_handlers_setstartsubmsg(h, f, scalar_startsubmsg, &name_attr);
14867 }
14868 break;
14869 }
14870
14871 upb_handlerattr_uninit(&name_attr);
14872 }
14873
14874 upb_handlerattr_uninit(&empty_attr);
14875#undef TYPE
14876}
14877
14878static void json_printer_reset(upb_json_printer *p) {
14879 p->depth_ = 0;
14880}
14881
14882
14883/* Public API *****************************************************************/
14884
14885upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h,
14886 upb_bytessink *output) {
14887#ifndef NDEBUG
14888 size_t size_before = upb_env_bytesallocated(e);
14889#endif
14890
14891 upb_json_printer *p = upb_env_malloc(e, sizeof(upb_json_printer));
14892 if (!p) return NULL;
14893
14894 p->output_ = output;
14895 json_printer_reset(p);
14896 upb_sink_reset(&p->input_, h, p);
14897
14898 /* If this fails, increase the value in printer.h. */
Austin Schuh40c16522018-10-28 20:27:54 -070014899 UPB_ASSERT_DEBUGVAR(upb_env_bytesallocated(e) - size_before <=
14900 UPB_JSON_PRINTER_SIZE);
Brian Silverman9c614bc2016-02-15 20:20:02 -050014901 return p;
14902}
14903
14904upb_sink *upb_json_printer_input(upb_json_printer *p) {
14905 return &p->input_;
14906}
14907
14908const upb_handlers *upb_json_printer_newhandlers(const upb_msgdef *md,
Austin Schuh40c16522018-10-28 20:27:54 -070014909 bool preserve_fieldnames,
Brian Silverman9c614bc2016-02-15 20:20:02 -050014910 const void *owner) {
Austin Schuh40c16522018-10-28 20:27:54 -070014911 return upb_handlers_newfrozen(
14912 md, owner, printer_sethandlers, &preserve_fieldnames);
Brian Silverman9c614bc2016-02-15 20:20:02 -050014913}