blob: de13dfa8909a8aae705723d7d080f3a6d0dce7c0 [file] [log] [blame]
Austin Schuh40c16522018-10-28 20:27:54 -07001// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc. All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31#include "protobuf.h"
32#include "utf8.h"
33
34/* stringsink *****************************************************************/
35
36typedef struct {
37 upb_byteshandler handler;
38 upb_bytessink sink;
39 char *ptr;
40 size_t len, size;
41} stringsink;
42
43
44static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) {
45 stringsink *sink = _sink;
46 sink->len = 0;
47 return sink;
48}
49
50static size_t stringsink_string(void *_sink, const void *hd, const char *ptr,
51 size_t len, const upb_bufhandle *handle) {
52 stringsink *sink = _sink;
53 size_t new_size = sink->size;
54
55 UPB_UNUSED(hd);
56 UPB_UNUSED(handle);
57
58 while (sink->len + len > new_size) {
59 new_size *= 2;
60 }
61
62 if (new_size != sink->size) {
63 sink->ptr = realloc(sink->ptr, new_size);
64 sink->size = new_size;
65 }
66
67 memcpy(sink->ptr + sink->len, ptr, len);
68 sink->len += len;
69
70 return len;
71}
72
73void stringsink_init(stringsink *sink) {
74 upb_byteshandler_init(&sink->handler);
75 upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL);
76 upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL);
77
78 upb_bytessink_reset(&sink->sink, &sink->handler, sink);
79
80 sink->size = 32;
81 sink->ptr = malloc(sink->size);
82 sink->len = 0;
83}
84
85void stringsink_uninit(stringsink *sink) { free(sink->ptr); }
86
87/* stackenv *****************************************************************/
88
89// Stack-allocated context during an encode/decode operation. Contains the upb
90// environment and its stack-based allocator, an initial buffer for allocations
91// to avoid malloc() when possible, and a template for PHP exception messages
92// if any error occurs.
93#define STACK_ENV_STACKBYTES 4096
94typedef struct {
95 upb_env env;
96 const char *php_error_template;
97 char allocbuf[STACK_ENV_STACKBYTES];
98} stackenv;
99
100
101static void stackenv_init(stackenv* se, const char* errmsg);
102static void stackenv_uninit(stackenv* se);
103
104// Callback invoked by upb if any error occurs during parsing or serialization.
105static bool env_error_func(void* ud, const upb_status* status) {
106 char err_msg[100] = "";
107 stackenv* se = ud;
108 // Free the env -- zend_error will longjmp up the stack past the
109 // encode/decode function so it would not otherwise have been freed.
110 stackenv_uninit(se);
111
112 // TODO(teboring): have a way to verify that this is actually a parse error,
113 // instead of just throwing "parse error" unconditionally.
114 sprintf(err_msg, se->php_error_template, upb_status_errmsg(status));
115 TSRMLS_FETCH();
116 zend_throw_exception(NULL, err_msg, 0 TSRMLS_CC);
117 // Never reached.
118 return false;
119}
120
121static void stackenv_init(stackenv* se, const char* errmsg) {
122 se->php_error_template = errmsg;
123 upb_env_init2(&se->env, se->allocbuf, sizeof(se->allocbuf), NULL);
124 upb_env_seterrorfunc(&se->env, env_error_func, se);
125}
126
127static void stackenv_uninit(stackenv* se) {
128 upb_env_uninit(&se->env);
129}
130
131// -----------------------------------------------------------------------------
132// Parsing.
133// -----------------------------------------------------------------------------
134
135#define DEREF(msg, ofs, type) *(type*)(((uint8_t *)msg) + ofs)
136
137// Creates a handlerdata that simply contains the offset for this field.
138static const void* newhandlerdata(upb_handlers* h, uint32_t ofs) {
139 size_t* hd_ofs = (size_t*)malloc(sizeof(size_t));
140 *hd_ofs = ofs;
141 upb_handlers_addcleanup(h, hd_ofs, free);
142 return hd_ofs;
143}
144
145typedef size_t (*encodeunknown_handlerfunc)(void* _sink, const void* hd,
146 const char* ptr, size_t len,
147 const upb_bufhandle* handle);
148
149typedef struct {
150 encodeunknown_handlerfunc handler;
151} unknownfields_handlerdata_t;
152
153// Creates a handlerdata for unknown fields.
154static const void *newunknownfieldshandlerdata(upb_handlers* h) {
155 unknownfields_handlerdata_t* hd =
156 (unknownfields_handlerdata_t*)malloc(sizeof(unknownfields_handlerdata_t));
157 hd->handler = stringsink_string;
158 upb_handlers_addcleanup(h, hd, free);
159 return hd;
160}
161
162typedef struct {
163 size_t ofs;
164 const upb_msgdef *md;
165} submsg_handlerdata_t;
166
167// Creates a handlerdata that contains offset and submessage type information.
168static const void *newsubmsghandlerdata(upb_handlers* h, uint32_t ofs,
169 const upb_fielddef* f) {
170 submsg_handlerdata_t* hd =
171 (submsg_handlerdata_t*)malloc(sizeof(submsg_handlerdata_t));
172 hd->ofs = ofs;
173 hd->md = upb_fielddef_msgsubdef(f);
174 upb_handlers_addcleanup(h, hd, free);
175 return hd;
176}
177
178typedef struct {
179 size_t ofs; // union data slot
180 size_t case_ofs; // oneof_case field
181 int property_ofs; // properties table cache
182 uint32_t oneof_case_num; // oneof-case number to place in oneof_case field
183 const upb_msgdef *md; // msgdef, for oneof submessage handler
184 const upb_msgdef *parent_md; // msgdef, for parent submessage
185} oneof_handlerdata_t;
186
187static const void *newoneofhandlerdata(upb_handlers *h,
188 uint32_t ofs,
189 uint32_t case_ofs,
190 int property_ofs,
191 const upb_msgdef *m,
192 const upb_fielddef *f) {
193 oneof_handlerdata_t* hd =
194 (oneof_handlerdata_t*)malloc(sizeof(oneof_handlerdata_t));
195 hd->ofs = ofs;
196 hd->case_ofs = case_ofs;
197 hd->property_ofs = property_ofs;
198 hd->parent_md = m;
199 // We reuse the field tag number as a oneof union discriminant tag. Note that
200 // we don't expose these numbers to the user, so the only requirement is that
201 // we have some unique ID for each union case/possibility. The field tag
202 // numbers are already present and are easy to use so there's no reason to
203 // create a separate ID space. In addition, using the field tag number here
204 // lets us easily look up the field in the oneof accessor.
205 hd->oneof_case_num = upb_fielddef_number(f);
206 if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE) {
207 hd->md = upb_fielddef_msgsubdef(f);
208 } else {
209 hd->md = NULL;
210 }
211 upb_handlers_addcleanup(h, hd, free);
212 return hd;
213}
214
215// A handler that starts a repeated field. Gets the Repeated*Field instance for
216// this field (such an instance always exists even in an empty message).
217static void *startseq_handler(void* closure, const void* hd) {
218 MessageHeader* msg = closure;
219 const size_t *ofs = hd;
220 return CACHED_PTR_TO_ZVAL_PTR(DEREF(message_data(msg), *ofs, CACHED_VALUE*));
221}
222
223// Handlers that append primitive values to a repeated field.
224#define DEFINE_APPEND_HANDLER(type, ctype) \
225 static bool append##type##_handler(void* closure, const void* hd, \
226 ctype val) { \
227 zval* array = (zval*)closure; \
228 TSRMLS_FETCH(); \
229 RepeatedField* intern = UNBOX(RepeatedField, array); \
230 repeated_field_push_native(intern, &val); \
231 return true; \
232 }
233
234DEFINE_APPEND_HANDLER(bool, bool)
235DEFINE_APPEND_HANDLER(int32, int32_t)
236DEFINE_APPEND_HANDLER(uint32, uint32_t)
237DEFINE_APPEND_HANDLER(float, float)
238DEFINE_APPEND_HANDLER(int64, int64_t)
239DEFINE_APPEND_HANDLER(uint64, uint64_t)
240DEFINE_APPEND_HANDLER(double, double)
241
242// Appends a string to a repeated field.
243static void* appendstr_handler(void *closure,
244 const void *hd,
245 size_t size_hint) {
246 zval* array = (zval*)closure;
247 TSRMLS_FETCH();
248 RepeatedField* intern = UNBOX(RepeatedField, array);
249
250#if PHP_MAJOR_VERSION < 7
251 zval* str;
252 MAKE_STD_ZVAL(str);
253 PHP_PROTO_ZVAL_STRING(str, "", 1);
254 repeated_field_push_native(intern, &str);
255 return (void*)str;
256#else
257 zend_string* str = zend_string_init("", 0, 1);
258 repeated_field_push_native(intern, &str);
259 return intern;
260#endif
261}
262
263// Appends a 'bytes' string to a repeated field.
264static void* appendbytes_handler(void *closure,
265 const void *hd,
266 size_t size_hint) {
267 zval* array = (zval*)closure;
268 TSRMLS_FETCH();
269 RepeatedField* intern = UNBOX(RepeatedField, array);
270
271#if PHP_MAJOR_VERSION < 7
272 zval* str;
273 MAKE_STD_ZVAL(str);
274 PHP_PROTO_ZVAL_STRING(str, "", 1);
275 repeated_field_push_native(intern, &str);
276 return (void*)str;
277#else
278 zend_string* str = zend_string_init("", 0, 1);
279 repeated_field_push_native(intern, &str);
280 return intern;
281#endif
282}
283
284// Handlers that append primitive values to a repeated field.
285#define DEFINE_SINGULAR_HANDLER(type, ctype) \
286 static bool type##_handler(void* closure, const void* hd, \
287 ctype val) { \
288 MessageHeader* msg = (MessageHeader*)closure; \
289 const size_t *ofs = hd; \
290 DEREF(message_data(msg), *ofs, ctype) = val; \
291 return true; \
292 }
293
294DEFINE_SINGULAR_HANDLER(bool, bool)
295DEFINE_SINGULAR_HANDLER(int32, int32_t)
296DEFINE_SINGULAR_HANDLER(uint32, uint32_t)
297DEFINE_SINGULAR_HANDLER(float, float)
298DEFINE_SINGULAR_HANDLER(int64, int64_t)
299DEFINE_SINGULAR_HANDLER(uint64, uint64_t)
300DEFINE_SINGULAR_HANDLER(double, double)
301
302#undef DEFINE_SINGULAR_HANDLER
303
304#if PHP_MAJOR_VERSION < 7
305static void *empty_php_string(zval** value_ptr) {
306 SEPARATE_ZVAL_IF_NOT_REF(value_ptr);
307 if (Z_TYPE_PP(value_ptr) == IS_STRING &&
308 !IS_INTERNED(Z_STRVAL_PP(value_ptr))) {
309 FREE(Z_STRVAL_PP(value_ptr));
310 }
311 ZVAL_EMPTY_STRING(*value_ptr);
312 return (void*)(*value_ptr);
313}
314#else
315static void *empty_php_string(zval* value_ptr) {
316 if (Z_TYPE_P(value_ptr) == IS_STRING) {
317 zend_string_release(Z_STR_P(value_ptr));
318 }
319 ZVAL_EMPTY_STRING(value_ptr);
320 return value_ptr;
321}
322#endif
323
324// Sets a non-repeated string field in a message.
325static void* str_handler(void *closure,
326 const void *hd,
327 size_t size_hint) {
328 MessageHeader* msg = closure;
329 const size_t *ofs = hd;
330 return empty_php_string(DEREF(message_data(msg), *ofs, CACHED_VALUE*));
331}
332
333// Sets a non-repeated 'bytes' field in a message.
334static void* bytes_handler(void *closure,
335 const void *hd,
336 size_t size_hint) {
337 MessageHeader* msg = closure;
338 const size_t *ofs = hd;
339 return empty_php_string(DEREF(message_data(msg), *ofs, CACHED_VALUE*));
340}
341
342static size_t stringdata_handler(void* closure, const void* hd,
343 const char* str, size_t len,
344 const upb_bufhandle* handle) {
345 zval* php_str = (zval*)closure;
346#if PHP_MAJOR_VERSION < 7
347 // Oneof string/bytes fields may have NULL initial value, which doesn't need
348 // to be freed.
349 if (Z_TYPE_P(php_str) == IS_STRING && !IS_INTERNED(Z_STRVAL_P(php_str))) {
350 FREE(Z_STRVAL_P(php_str));
351 }
352 ZVAL_STRINGL(php_str, str, len, 1);
353#else
354 if (Z_TYPE_P(php_str) == IS_STRING) {
355 zend_string_release(Z_STR_P(php_str));
356 }
357 ZVAL_NEW_STR(php_str, zend_string_init(str, len, 0));
358#endif
359 return len;
360}
361
362#if PHP_MAJOR_VERSION >= 7
363static size_t zendstringdata_handler(void* closure, const void* hd,
364 const char* str, size_t len,
365 const upb_bufhandle* handle) {
366 RepeatedField* intern = (RepeatedField*)closure;
367
368 unsigned char memory[NATIVE_SLOT_MAX_SIZE];
369 memset(memory, 0, NATIVE_SLOT_MAX_SIZE);
370 *(zend_string**)memory = zend_string_init(str, len, 0);
371
372 HashTable *ht = PHP_PROTO_HASH_OF(intern->array);
373 int index = zend_hash_num_elements(ht) - 1;
374 php_proto_zend_hash_index_update_mem(
375 ht, index, memory, sizeof(zend_string*), NULL);
376
377 return len;
378}
379#endif
380
381// Appends a submessage to a repeated field.
382static void *appendsubmsg_handler(void *closure, const void *hd) {
383 zval* array = (zval*)closure;
384 TSRMLS_FETCH();
385 RepeatedField* intern = UNBOX(RepeatedField, array);
386
387 const submsg_handlerdata_t *submsgdata = hd;
388 Descriptor* subdesc =
389 UNBOX_HASHTABLE_VALUE(Descriptor, get_def_obj((void*)submsgdata->md));
390 zend_class_entry* subklass = subdesc->klass;
391 MessageHeader* submsg;
392
393#if PHP_MAJOR_VERSION < 7
394 zval* val = NULL;
395 MAKE_STD_ZVAL(val);
396 ZVAL_OBJ(val, subklass->create_object(subklass TSRMLS_CC));
397 repeated_field_push_native(intern, &val);
398 submsg = UNBOX(MessageHeader, val);
399#else
400 zend_object* obj = subklass->create_object(subklass TSRMLS_CC);
401 repeated_field_push_native(intern, &obj);
402 submsg = (MessageHeader*)((char*)obj - XtOffsetOf(MessageHeader, std));
403#endif
404 custom_data_init(subklass, submsg PHP_PROTO_TSRMLS_CC);
405
406 return submsg;
407}
408
409// Sets a non-repeated submessage field in a message.
410static void *submsg_handler(void *closure, const void *hd) {
411 MessageHeader* msg = closure;
412 const submsg_handlerdata_t* submsgdata = hd;
413 TSRMLS_FETCH();
414 Descriptor* subdesc =
415 UNBOX_HASHTABLE_VALUE(Descriptor, get_def_obj((void*)submsgdata->md));
416 zend_class_entry* subklass = subdesc->klass;
417 zval* submsg_php;
418 MessageHeader* submsg;
419
420 if (Z_TYPE_P(CACHED_PTR_TO_ZVAL_PTR(DEREF(message_data(msg), submsgdata->ofs,
421 CACHED_VALUE*))) == IS_NULL) {
422#if PHP_MAJOR_VERSION < 7
423 zval* val = NULL;
424 MAKE_STD_ZVAL(val);
425 ZVAL_OBJ(val, subklass->create_object(subklass TSRMLS_CC));
426 MessageHeader* intern = UNBOX(MessageHeader, val);
427 custom_data_init(subklass, intern PHP_PROTO_TSRMLS_CC);
428 php_proto_zval_ptr_dtor(*DEREF(message_data(msg), submsgdata->ofs, zval**));
429 *DEREF(message_data(msg), submsgdata->ofs, zval**) = val;
430#else
431 zend_object* obj = subklass->create_object(subklass TSRMLS_CC);
432 ZVAL_OBJ(DEREF(message_data(msg), submsgdata->ofs, zval*), obj);
433 MessageHeader* intern = UNBOX_HASHTABLE_VALUE(MessageHeader, obj);
434 custom_data_init(subklass, intern PHP_PROTO_TSRMLS_CC);
435#endif
436 }
437
438 submsg_php = CACHED_PTR_TO_ZVAL_PTR(
439 DEREF(message_data(msg), submsgdata->ofs, CACHED_VALUE*));
440
441 submsg = UNBOX(MessageHeader, submsg_php);
442 return submsg;
443}
444
445// Handler data for startmap/endmap handlers.
446typedef struct {
447 size_t ofs;
448 upb_fieldtype_t key_field_type;
449 upb_fieldtype_t value_field_type;
450
451 // We know that we can hold this reference because the handlerdata has the
452 // same lifetime as the upb_handlers struct, and the upb_handlers struct holds
453 // a reference to the upb_msgdef, which in turn has references to its subdefs.
454 const upb_def* value_field_subdef;
455} map_handlerdata_t;
456
457// Temporary frame for map parsing: at the beginning of a map entry message, a
458// submsg handler allocates a frame to hold (i) a reference to the Map object
459// into which this message will be inserted and (ii) storage slots to
460// temporarily hold the key and value for this map entry until the end of the
461// submessage. When the submessage ends, another handler is called to insert the
462// value into the map.
463typedef struct {
464 char key_storage[NATIVE_SLOT_MAX_SIZE];
465 char value_storage[NATIVE_SLOT_MAX_SIZE];
466} map_parse_frame_data_t;
467
468PHP_PROTO_WRAP_OBJECT_START(map_parse_frame_t)
469 map_parse_frame_data_t* data; // Place needs to be consistent with
470 // MessageHeader.
471 zval* map;
472 // In php7, we cannot allocate zval dynamically. So we need to add zval here
473 // to help decoding.
474 zval key_zval;
475 zval value_zval;
476PHP_PROTO_WRAP_OBJECT_END
477typedef struct map_parse_frame_t map_parse_frame_t;
478
479static void map_slot_init(void* memory, upb_fieldtype_t type, zval* cache) {
480 switch (type) {
481 case UPB_TYPE_STRING:
482 case UPB_TYPE_BYTES: {
483#if PHP_MAJOR_VERSION < 7
484 // Store zval** in memory in order to be consistent with the layout of
485 // singular fields.
486 zval** holder = ALLOC(zval*);
487 *(zval***)memory = holder;
488 zval* tmp;
489 MAKE_STD_ZVAL(tmp);
490 PHP_PROTO_ZVAL_STRINGL(tmp, "", 0, 1);
491 *holder = tmp;
492#else
493 *(zval**)memory = cache;
494 PHP_PROTO_ZVAL_STRINGL(*(zval**)memory, "", 0, 1);
495#endif
496 break;
497 }
498 case UPB_TYPE_MESSAGE: {
499#if PHP_MAJOR_VERSION < 7
500 zval** holder = ALLOC(zval*);
501 zval* tmp;
502 MAKE_STD_ZVAL(tmp);
503 ZVAL_NULL(tmp);
504 *holder = tmp;
505 *(zval***)memory = holder;
506#else
507 *(zval**)memory = cache;
508 ZVAL_NULL(*(zval**)memory);
509#endif
510 break;
511 }
512 default:
513 native_slot_init(type, memory, NULL);
514 }
515}
516
517static void map_slot_uninit(void* memory, upb_fieldtype_t type) {
518 switch (type) {
519 case UPB_TYPE_MESSAGE:
520 case UPB_TYPE_STRING:
521 case UPB_TYPE_BYTES: {
522#if PHP_MAJOR_VERSION < 7
523 zval** holder = *(zval***)memory;
524 zval_ptr_dtor(holder);
525 FREE(holder);
526#else
527 php_proto_zval_ptr_dtor(*(zval**)memory);
528#endif
529 break;
530 }
531 default:
532 break;
533 }
534}
535
536static void map_slot_key(upb_fieldtype_t type, const void* from,
537 const char** keyval,
538 size_t* length) {
539 if (type == UPB_TYPE_STRING) {
540#if PHP_MAJOR_VERSION < 7
541 zval* key_php = **(zval***)from;
542#else
543 zval* key_php = *(zval**)from;
544#endif
545 *keyval = Z_STRVAL_P(key_php);
546 *length = Z_STRLEN_P(key_php);
547 } else {
548 *keyval = from;
549 *length = native_slot_size(type);
550 }
551}
552
553static void map_slot_value(upb_fieldtype_t type, const void* from,
554 upb_value* v) {
555 size_t len;
556 void* to = upb_value_memory(v);
557#ifndef NDEBUG
558 v->ctype = UPB_CTYPE_UINT64;
559#endif
560
561 memset(to, 0, native_slot_size(type));
562
563 switch (type) {
564#if PHP_MAJOR_VERSION < 7
565 case UPB_TYPE_STRING:
566 case UPB_TYPE_BYTES:
567 case UPB_TYPE_MESSAGE: {
568 *(zval**)to = **(zval***)from;
569 Z_ADDREF_PP((zval**)to);
570 break;
571 }
572#else
573 case UPB_TYPE_STRING:
574 case UPB_TYPE_BYTES:
575 *(zend_string**)to = Z_STR_P(*(zval**)from);
576 zend_string_addref(*(zend_string**)to);
577 break;
578 case UPB_TYPE_MESSAGE:
579 *(zend_object**)to = Z_OBJ_P(*(zval**)from);
580 GC_ADDREF(*(zend_object**)to);
581 break;
582#endif
583 default:
584 len = native_slot_size(type);
585 memcpy(to, from, len);
586 }
587}
588
589// Handler to begin a map entry: allocates a temporary frame. This is the
590// 'startsubmsg' handler on the msgdef that contains the map field.
591static void *startmapentry_handler(void *closure, const void *hd) {
592 MessageHeader* msg = closure;
593 const map_handlerdata_t* mapdata = hd;
594 zval* map = CACHED_PTR_TO_ZVAL_PTR(
595 DEREF(message_data(msg), mapdata->ofs, CACHED_VALUE*));
596
597 map_parse_frame_t* frame = ALLOC(map_parse_frame_t);
598 frame->data = ALLOC(map_parse_frame_data_t);
599 frame->map = map;
600
601 map_slot_init(&frame->data->key_storage, mapdata->key_field_type,
602 &frame->key_zval);
603 map_slot_init(&frame->data->value_storage, mapdata->value_field_type,
604 &frame->value_zval);
605
606 return frame;
607}
608
609// Handler to end a map entry: inserts the value defined during the message into
610// the map. This is the 'endmsg' handler on the map entry msgdef.
611static bool endmap_handler(void* closure, const void* hd, upb_status* s) {
612 map_parse_frame_t* frame = closure;
613 const map_handlerdata_t* mapdata = hd;
614
615 TSRMLS_FETCH();
616 Map *map = UNBOX(Map, frame->map);
617
618 const char* keyval = NULL;
619 upb_value v;
620 size_t length;
621
622 map_slot_key(map->key_type, &frame->data->key_storage, &keyval, &length);
623 map_slot_value(map->value_type, &frame->data->value_storage, &v);
624
625 map_index_set(map, keyval, length, v);
626
627 map_slot_uninit(&frame->data->key_storage, mapdata->key_field_type);
628 map_slot_uninit(&frame->data->value_storage, mapdata->value_field_type);
629 FREE(frame->data);
630 FREE(frame);
631
632 return true;
633}
634
635// Allocates a new map_handlerdata_t given the map entry message definition. If
636// the offset of the field within the parent message is also given, that is
637// added to the handler data as well. Note that this is called *twice* per map
638// field: once in the parent message handler setup when setting the startsubmsg
639// handler and once in the map entry message handler setup when setting the
640// key/value and endmsg handlers. The reason is that there is no easy way to
641// pass the handlerdata down to the sub-message handler setup.
642static map_handlerdata_t* new_map_handlerdata(
643 size_t ofs,
644 const upb_msgdef* mapentry_def,
645 Descriptor* desc) {
646 const upb_fielddef* key_field;
647 const upb_fielddef* value_field;
648 // TODO(teboring): Use emalloc and efree.
649 map_handlerdata_t* hd =
650 (map_handlerdata_t*)malloc(sizeof(map_handlerdata_t));
651
652 hd->ofs = ofs;
653 key_field = upb_msgdef_itof(mapentry_def, MAP_KEY_FIELD);
654 assert(key_field != NULL);
655 hd->key_field_type = upb_fielddef_type(key_field);
656 value_field = upb_msgdef_itof(mapentry_def, MAP_VALUE_FIELD);
657 assert(value_field != NULL);
658 hd->value_field_type = upb_fielddef_type(value_field);
659 hd->value_field_subdef = upb_fielddef_subdef(value_field);
660
661 return hd;
662}
663
664// Handlers that set primitive values in oneofs.
665#define DEFINE_ONEOF_HANDLER(type, ctype) \
666 static bool oneof##type##_handler(void* closure, const void* hd, \
667 ctype val) { \
668 const oneof_handlerdata_t* oneofdata = hd; \
669 MessageHeader* msg = (MessageHeader*)closure; \
670 DEREF(message_data(closure), oneofdata->case_ofs, uint32_t) = \
671 oneofdata->oneof_case_num; \
672 DEREF(message_data(closure), oneofdata->ofs, ctype) = val; \
673 return true; \
674 }
675
676DEFINE_ONEOF_HANDLER(bool, bool)
677DEFINE_ONEOF_HANDLER(int32, int32_t)
678DEFINE_ONEOF_HANDLER(uint32, uint32_t)
679DEFINE_ONEOF_HANDLER(float, float)
680DEFINE_ONEOF_HANDLER(int64, int64_t)
681DEFINE_ONEOF_HANDLER(uint64, uint64_t)
682DEFINE_ONEOF_HANDLER(double, double)
683
684#undef DEFINE_ONEOF_HANDLER
685
686static void oneof_cleanup(MessageHeader* msg,
687 const oneof_handlerdata_t* oneofdata) {
688 uint32_t old_case_num =
689 DEREF(message_data(msg), oneofdata->case_ofs, uint32_t);
690 if (old_case_num == 0) {
691 return;
692 }
693
694 const upb_fielddef* old_field =
695 upb_msgdef_itof(oneofdata->parent_md, old_case_num);
696 bool need_clean = false;
697
698 switch (upb_fielddef_type(old_field)) {
699 case UPB_TYPE_STRING:
700 case UPB_TYPE_BYTES:
701 need_clean = true;
702 break;
703 case UPB_TYPE_MESSAGE:
704 if (oneofdata->oneof_case_num != old_case_num) {
705 need_clean = true;
706 }
707 break;
708 default:
709 break;
710 }
711
712 if (need_clean) {
713#if PHP_MAJOR_VERSION < 7
714 SEPARATE_ZVAL_IF_NOT_REF(
715 DEREF(message_data(msg), oneofdata->ofs, CACHED_VALUE*));
716 php_proto_zval_ptr_dtor(
717 *DEREF(message_data(msg), oneofdata->ofs, CACHED_VALUE*));
718 MAKE_STD_ZVAL(*DEREF(message_data(msg), oneofdata->ofs, CACHED_VALUE*));
719 ZVAL_NULL(*DEREF(message_data(msg), oneofdata->ofs, CACHED_VALUE*));
720#endif
721 }
722}
723
724// Handlers for string/bytes in a oneof.
725static void *oneofbytes_handler(void *closure,
726 const void *hd,
727 size_t size_hint) {
728 MessageHeader* msg = closure;
729 const oneof_handlerdata_t *oneofdata = hd;
730
731 oneof_cleanup(msg, oneofdata);
732
733 DEREF(message_data(msg), oneofdata->case_ofs, uint32_t) =
734 oneofdata->oneof_case_num;
735 DEREF(message_data(msg), oneofdata->ofs, CACHED_VALUE*) =
736 OBJ_PROP(&msg->std, oneofdata->property_ofs);
737
738 return empty_php_string(DEREF(
739 message_data(msg), oneofdata->ofs, CACHED_VALUE*));
740}
741
742static void *oneofstr_handler(void *closure,
743 const void *hd,
744 size_t size_hint) {
745 // TODO(teboring): Add it back.
746 // rb_enc_associate(str, kRubyString8bitEncoding);
747 return oneofbytes_handler(closure, hd, size_hint);
748}
749
750// Handler for a submessage field in a oneof.
751static void* oneofsubmsg_handler(void* closure, const void* hd) {
752 MessageHeader* msg = closure;
753 const oneof_handlerdata_t *oneofdata = hd;
754 uint32_t oldcase = DEREF(message_data(msg), oneofdata->case_ofs, uint32_t);
755 TSRMLS_FETCH();
756 Descriptor* subdesc =
757 UNBOX_HASHTABLE_VALUE(Descriptor, get_def_obj((void*)oneofdata->md));
758 zend_class_entry* subklass = subdesc->klass;
759 zval* submsg_php;
760 MessageHeader* submsg;
761
762 if (oldcase != oneofdata->oneof_case_num) {
763 oneof_cleanup(msg, oneofdata);
764
765 // Create new message.
766 DEREF(message_data(msg), oneofdata->ofs, CACHED_VALUE*) =
767 OBJ_PROP(&msg->std, oneofdata->property_ofs);
768 ZVAL_OBJ(CACHED_PTR_TO_ZVAL_PTR(
769 DEREF(message_data(msg), oneofdata->ofs, CACHED_VALUE*)),
770 subklass->create_object(subklass TSRMLS_CC));
771 }
772
773 DEREF(message_data(msg), oneofdata->case_ofs, uint32_t) =
774 oneofdata->oneof_case_num;
775
776 submsg_php = CACHED_PTR_TO_ZVAL_PTR(
777 DEREF(message_data(msg), oneofdata->ofs, CACHED_VALUE*));
778 submsg = UNBOX(MessageHeader, submsg_php);
779 custom_data_init(subklass, submsg PHP_PROTO_TSRMLS_CC);
780 return submsg;
781}
782
783// Set up handlers for a repeated field.
784static void add_handlers_for_repeated_field(upb_handlers *h,
785 const upb_fielddef *f,
786 size_t offset) {
787 upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
788 upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset));
789 upb_handlers_setstartseq(h, f, startseq_handler, &attr);
790 upb_handlerattr_uninit(&attr);
791
792 switch (upb_fielddef_type(f)) {
793
794#define SET_HANDLER(utype, ltype) \
795 case utype: \
796 upb_handlers_set##ltype(h, f, append##ltype##_handler, NULL); \
797 break;
798
799 SET_HANDLER(UPB_TYPE_BOOL, bool);
800 SET_HANDLER(UPB_TYPE_INT32, int32);
801 SET_HANDLER(UPB_TYPE_UINT32, uint32);
802 SET_HANDLER(UPB_TYPE_ENUM, int32);
803 SET_HANDLER(UPB_TYPE_FLOAT, float);
804 SET_HANDLER(UPB_TYPE_INT64, int64);
805 SET_HANDLER(UPB_TYPE_UINT64, uint64);
806 SET_HANDLER(UPB_TYPE_DOUBLE, double);
807
808#undef SET_HANDLER
809
810 case UPB_TYPE_STRING:
811 case UPB_TYPE_BYTES: {
812 bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
813 upb_handlers_setstartstr(h, f, is_bytes ?
814 appendbytes_handler : appendstr_handler,
815 NULL);
816#if PHP_MAJOR_VERSION < 7
817 upb_handlers_setstring(h, f, stringdata_handler, NULL);
818#else
819 upb_handlers_setstring(h, f, zendstringdata_handler, NULL);
820#endif
821 break;
822 }
823 case UPB_TYPE_MESSAGE: {
824 upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
825 upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, 0, f));
826 upb_handlers_setstartsubmsg(h, f, appendsubmsg_handler, &attr);
827 upb_handlerattr_uninit(&attr);
828 break;
829 }
830 }
831}
832
833// Set up handlers for a singular field.
834static void add_handlers_for_singular_field(upb_handlers *h,
835 const upb_fielddef *f,
836 size_t offset) {
837 switch (upb_fielddef_type(f)) {
838
839#define SET_HANDLER(utype, ltype) \
840 case utype: { \
841 upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; \
842 upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset)); \
843 upb_handlers_set##ltype(h, f, ltype##_handler, &attr); \
844 break; \
845 }
846
847 SET_HANDLER(UPB_TYPE_BOOL, bool);
848 SET_HANDLER(UPB_TYPE_INT32, int32);
849 SET_HANDLER(UPB_TYPE_UINT32, uint32);
850 SET_HANDLER(UPB_TYPE_ENUM, int32);
851 SET_HANDLER(UPB_TYPE_FLOAT, float);
852 SET_HANDLER(UPB_TYPE_INT64, int64);
853 SET_HANDLER(UPB_TYPE_UINT64, uint64);
854 SET_HANDLER(UPB_TYPE_DOUBLE, double);
855
856#undef SET_HANDLER
857
858 case UPB_TYPE_STRING:
859 case UPB_TYPE_BYTES: {
860 bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
861 upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
862 upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset));
863 upb_handlers_setstartstr(h, f,
864 is_bytes ? bytes_handler : str_handler,
865 &attr);
866 upb_handlers_setstring(h, f, stringdata_handler, &attr);
867 upb_handlerattr_uninit(&attr);
868 break;
869 }
870 case UPB_TYPE_MESSAGE: {
871 upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
872 upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, offset, f));
873 upb_handlers_setstartsubmsg(h, f, submsg_handler, &attr);
874 upb_handlerattr_uninit(&attr);
875 break;
876 }
877 }
878}
879
880// Adds handlers to a map field.
881static void add_handlers_for_mapfield(upb_handlers* h,
882 const upb_fielddef* fielddef,
883 size_t offset,
884 Descriptor* desc) {
885 const upb_msgdef* map_msgdef = upb_fielddef_msgsubdef(fielddef);
886 map_handlerdata_t* hd = new_map_handlerdata(offset, map_msgdef, desc);
887 upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
888
889 upb_handlers_addcleanup(h, hd, free);
890 upb_handlerattr_sethandlerdata(&attr, hd);
891 upb_handlers_setstartsubmsg(h, fielddef, startmapentry_handler, &attr);
892 upb_handlerattr_uninit(&attr);
893}
894
895// Adds handlers to a map-entry msgdef.
896static void add_handlers_for_mapentry(const upb_msgdef* msgdef, upb_handlers* h,
897 Descriptor* desc) {
898 const upb_fielddef* key_field = map_entry_key(msgdef);
899 const upb_fielddef* value_field = map_entry_value(msgdef);
900 map_handlerdata_t* hd = new_map_handlerdata(0, msgdef, desc);
901 upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
902
903 upb_handlers_addcleanup(h, hd, free);
904 upb_handlerattr_sethandlerdata(&attr, hd);
905 upb_handlers_setendmsg(h, endmap_handler, &attr);
906
907 add_handlers_for_singular_field(h, key_field,
908 offsetof(map_parse_frame_data_t,
909 key_storage));
910 add_handlers_for_singular_field(h, value_field,
911 offsetof(map_parse_frame_data_t,
912 value_storage));
913}
914
915// Set up handlers for a oneof field.
916static void add_handlers_for_oneof_field(upb_handlers *h,
917 const upb_msgdef *m,
918 const upb_fielddef *f,
919 size_t offset,
920 size_t oneof_case_offset,
921 int property_cache_offset) {
922
923 upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
924 upb_handlerattr_sethandlerdata(
925 &attr, newoneofhandlerdata(h, offset, oneof_case_offset,
926 property_cache_offset, m, f));
927
928 switch (upb_fielddef_type(f)) {
929
930#define SET_HANDLER(utype, ltype) \
931 case utype: \
932 upb_handlers_set##ltype(h, f, oneof##ltype##_handler, &attr); \
933 break;
934
935 SET_HANDLER(UPB_TYPE_BOOL, bool);
936 SET_HANDLER(UPB_TYPE_INT32, int32);
937 SET_HANDLER(UPB_TYPE_UINT32, uint32);
938 SET_HANDLER(UPB_TYPE_ENUM, int32);
939 SET_HANDLER(UPB_TYPE_FLOAT, float);
940 SET_HANDLER(UPB_TYPE_INT64, int64);
941 SET_HANDLER(UPB_TYPE_UINT64, uint64);
942 SET_HANDLER(UPB_TYPE_DOUBLE, double);
943
944#undef SET_HANDLER
945
946 case UPB_TYPE_STRING:
947 case UPB_TYPE_BYTES: {
948 bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
949 upb_handlers_setstartstr(h, f, is_bytes ?
950 oneofbytes_handler : oneofstr_handler,
951 &attr);
952 upb_handlers_setstring(h, f, stringdata_handler, NULL);
953 break;
954 }
955 case UPB_TYPE_MESSAGE: {
956 upb_handlers_setstartsubmsg(h, f, oneofsubmsg_handler, &attr);
957 break;
958 }
959 }
960
961 upb_handlerattr_uninit(&attr);
962}
963
964static bool add_unknown_handler(void* closure, const void* hd, const char* buf,
965 size_t size) {
966 encodeunknown_handlerfunc handler =
967 ((unknownfields_handlerdata_t*)hd)->handler;
968
969 MessageHeader* msg = (MessageHeader*)closure;
970 stringsink* unknown = DEREF(message_data(msg), 0, stringsink*);
971 if (unknown == NULL) {
972 DEREF(message_data(msg), 0, stringsink*) = ALLOC(stringsink);
973 unknown = DEREF(message_data(msg), 0, stringsink*);
974 stringsink_init(unknown);
975 }
976
977 handler(unknown, NULL, buf, size, NULL);
978
979 return true;
980}
981
982static void add_handlers_for_message(const void* closure,
983 upb_handlers* h) {
984 const upb_msgdef* msgdef = upb_handlers_msgdef(h);
985 TSRMLS_FETCH();
986 Descriptor* desc =
987 UNBOX_HASHTABLE_VALUE(Descriptor, get_def_obj((void*)msgdef));
988 upb_msg_field_iter i;
989
990 // If this is a mapentry message type, set up a special set of handlers and
991 // bail out of the normal (user-defined) message type handling.
992 if (upb_msgdef_mapentry(msgdef)) {
993 add_handlers_for_mapentry(msgdef, h, desc);
994 return;
995 }
996
997 // Ensure layout exists. We may be invoked to create handlers for a given
998 // message if we are included as a submsg of another message type before our
999 // class is actually built, so to work around this, we just create the layout
1000 // (and handlers, in the class-building function) on-demand.
1001 if (desc->layout == NULL) {
1002 desc->layout = create_layout(desc->msgdef);
1003 }
1004
1005 upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
1006 upb_handlerattr_sethandlerdata(&attr, newunknownfieldshandlerdata(h));
1007 upb_handlers_setunknown(h, add_unknown_handler, &attr);
1008
1009 for (upb_msg_field_begin(&i, desc->msgdef);
1010 !upb_msg_field_done(&i);
1011 upb_msg_field_next(&i)) {
1012 const upb_fielddef *f = upb_msg_iter_field(&i);
1013 size_t offset = desc->layout->fields[upb_fielddef_index(f)].offset;
1014
1015 if (upb_fielddef_containingoneof(f)) {
1016 size_t oneof_case_offset =
1017 desc->layout->fields[upb_fielddef_index(f)].case_offset;
1018 int property_cache_index =
1019 desc->layout->fields[upb_fielddef_index(f)].cache_index;
1020 add_handlers_for_oneof_field(h, desc->msgdef, f, offset,
1021 oneof_case_offset, property_cache_index);
1022 } else if (is_map_field(f)) {
1023 add_handlers_for_mapfield(h, f, offset, desc);
1024 } else if (upb_fielddef_isseq(f)) {
1025 add_handlers_for_repeated_field(h, f, offset);
1026 } else {
1027 add_handlers_for_singular_field(h, f, offset);
1028 }
1029 }
1030}
1031
1032// Creates upb handlers for populating a message.
1033static const upb_handlers *new_fill_handlers(Descriptor* desc,
1034 const void* owner) {
1035 // TODO(cfallin, haberman): once upb gets a caching/memoization layer for
1036 // handlers, reuse subdef handlers so that e.g. if we already parse
1037 // B-with-field-of-type-C, we don't have to rebuild the whole hierarchy to
1038 // parse A-with-field-of-type-B-with-field-of-type-C.
1039 return upb_handlers_newfrozen(desc->msgdef, owner,
1040 add_handlers_for_message, NULL);
1041}
1042
1043// Constructs the handlers for filling a message's data into an in-memory
1044// object.
1045const upb_handlers* get_fill_handlers(Descriptor* desc) {
1046 if (!desc->fill_handlers) {
1047 desc->fill_handlers =
1048 new_fill_handlers(desc, &desc->fill_handlers);
1049 }
1050 return desc->fill_handlers;
1051}
1052
1053const upb_pbdecodermethod *new_fillmsg_decodermethod(Descriptor* desc,
1054 const void* owner) {
1055 const upb_handlers* handlers = get_fill_handlers(desc);
1056 upb_pbdecodermethodopts opts;
1057 upb_pbdecodermethodopts_init(&opts, handlers);
1058
1059 return upb_pbdecodermethod_new(&opts, owner);
1060}
1061
1062static const upb_pbdecodermethod *msgdef_decodermethod(Descriptor* desc) {
1063 if (desc->fill_method == NULL) {
1064 desc->fill_method = new_fillmsg_decodermethod(
1065 desc, &desc->fill_method);
1066 }
1067 return desc->fill_method;
1068}
1069
1070static const upb_json_parsermethod *msgdef_jsonparsermethod(Descriptor* desc) {
1071 if (desc->json_fill_method == NULL) {
1072 desc->json_fill_method =
1073 upb_json_parsermethod_new(desc->msgdef, &desc->json_fill_method);
1074 }
1075 return desc->json_fill_method;
1076}
1077
1078// -----------------------------------------------------------------------------
1079// Serializing.
1080// -----------------------------------------------------------------------------
1081
1082static void putmsg(zval* msg, const Descriptor* desc, upb_sink* sink,
1083 int depth TSRMLS_DC);
1084static void putrawmsg(MessageHeader* msg, const Descriptor* desc,
1085 upb_sink* sink, int depth TSRMLS_DC);
1086
1087static void putstr(zval* str, const upb_fielddef* f, upb_sink* sink);
1088
1089static void putrawstr(const char* str, int len, const upb_fielddef* f,
1090 upb_sink* sink);
1091
1092static void putsubmsg(zval* submsg, const upb_fielddef* f, upb_sink* sink,
1093 int depth TSRMLS_DC);
1094static void putrawsubmsg(MessageHeader* submsg, const upb_fielddef* f,
1095 upb_sink* sink, int depth TSRMLS_DC);
1096
1097static void putarray(zval* array, const upb_fielddef* f, upb_sink* sink,
1098 int depth TSRMLS_DC);
1099static void putmap(zval* map, const upb_fielddef* f, upb_sink* sink,
1100 int depth TSRMLS_DC);
1101
1102static upb_selector_t getsel(const upb_fielddef* f, upb_handlertype_t type) {
1103 upb_selector_t ret;
1104 bool ok = upb_handlers_getselector(f, type, &ret);
1105 UPB_ASSERT(ok);
1106 return ret;
1107}
1108
1109static void put_optional_value(const void* memory, int len, const upb_fielddef* f,
1110 int depth, upb_sink* sink TSRMLS_DC) {
1111 assert(upb_fielddef_label(f) == UPB_LABEL_OPTIONAL);
1112
1113 switch (upb_fielddef_type(f)) {
1114#define T(upbtypeconst, upbtype, ctype, default_value) \
1115 case upbtypeconst: { \
1116 ctype value = DEREF(memory, 0, ctype); \
1117 if (value != default_value) { \
1118 upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f)); \
1119 upb_sink_put##upbtype(sink, sel, value); \
1120 } \
1121 } break;
1122
1123 T(UPB_TYPE_FLOAT, float, float, 0.0)
1124 T(UPB_TYPE_DOUBLE, double, double, 0.0)
1125 T(UPB_TYPE_BOOL, bool, uint8_t, 0)
1126 T(UPB_TYPE_ENUM, int32, int32_t, 0)
1127 T(UPB_TYPE_INT32, int32, int32_t, 0)
1128 T(UPB_TYPE_UINT32, uint32, uint32_t, 0)
1129 T(UPB_TYPE_INT64, int64, int64_t, 0)
1130 T(UPB_TYPE_UINT64, uint64, uint64_t, 0)
1131
1132#undef T
1133 case UPB_TYPE_STRING:
1134 case UPB_TYPE_BYTES:
1135 putrawstr(memory, len, f, sink);
1136 break;
1137 case UPB_TYPE_MESSAGE: {
1138#if PHP_MAJOR_VERSION < 7
1139 MessageHeader *submsg = UNBOX(MessageHeader, *(zval**)memory);
1140#else
1141 MessageHeader *submsg =
1142 (MessageHeader*)((char*)(*(zend_object**)memory) -
1143 XtOffsetOf(MessageHeader, std));
1144#endif
1145 putrawsubmsg(submsg, f, sink, depth TSRMLS_CC);
1146 break;
1147 }
1148 default:
1149 assert(false);
1150 }
1151}
1152
1153// Only string/bytes fields are stored as zval.
1154static const char* raw_value(void* memory, const upb_fielddef* f) {
1155 switch (upb_fielddef_type(f)) {
1156 case UPB_TYPE_STRING:
1157 case UPB_TYPE_BYTES:
1158#if PHP_MAJOR_VERSION < 7
1159 return Z_STRVAL_PP((zval**)memory);
1160#else
1161 return ZSTR_VAL(*(zend_string**)memory);
1162#endif
1163 break;
1164 default:
1165 return memory;
1166 }
1167}
1168
1169static int raw_value_len(void* memory, int len, const upb_fielddef* f) {
1170 switch (upb_fielddef_type(f)) {
1171 case UPB_TYPE_STRING:
1172 case UPB_TYPE_BYTES:
1173#if PHP_MAJOR_VERSION < 7
1174 return Z_STRLEN_PP((zval**)memory);
1175#else
1176 return ZSTR_LEN(*(zend_string**)memory);
1177#endif
1178 default:
1179 return len;
1180 }
1181}
1182
1183static void putmap(zval* map, const upb_fielddef* f, upb_sink* sink,
1184 int depth TSRMLS_DC) {
1185 upb_sink subsink;
1186 const upb_fielddef* key_field;
1187 const upb_fielddef* value_field;
1188 MapIter it;
1189 int len, size;
1190
1191 assert(map != NULL);
1192 Map* intern = UNBOX(Map, map);
1193 size = upb_strtable_count(&intern->table);
1194 if (size == 0) return;
1195
1196 upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink);
1197
1198 assert(upb_fielddef_type(f) == UPB_TYPE_MESSAGE);
1199 key_field = map_field_key(f);
1200 value_field = map_field_value(f);
1201
1202 for (map_begin(map, &it TSRMLS_CC); !map_done(&it); map_next(&it)) {
1203 upb_status status;
1204
1205 upb_sink entry_sink;
1206 upb_sink_startsubmsg(&subsink, getsel(f, UPB_HANDLER_STARTSUBMSG),
1207 &entry_sink);
1208 upb_sink_startmsg(&entry_sink);
1209
1210 // Serialize key.
1211 const char *key = map_iter_key(&it, &len);
1212 put_optional_value(key, len, key_field, depth + 1, &entry_sink TSRMLS_CC);
1213
1214 // Serialize value.
1215 upb_value value = map_iter_value(&it, &len);
1216 put_optional_value(raw_value(upb_value_memory(&value), value_field),
1217 raw_value_len(upb_value_memory(&value), len, value_field),
1218 value_field, depth + 1, &entry_sink TSRMLS_CC);
1219
1220 upb_sink_endmsg(&entry_sink, &status);
1221 upb_sink_endsubmsg(&subsink, getsel(f, UPB_HANDLER_ENDSUBMSG));
1222 }
1223
1224 upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ));
1225}
1226
1227static void putmsg(zval* msg_php, const Descriptor* desc, upb_sink* sink,
1228 int depth TSRMLS_DC) {
1229 MessageHeader* msg = UNBOX(MessageHeader, msg_php);
1230 putrawmsg(msg, desc, sink, depth TSRMLS_CC);
1231}
1232
1233static void putrawmsg(MessageHeader* msg, const Descriptor* desc,
1234 upb_sink* sink, int depth TSRMLS_DC) {
1235 upb_msg_field_iter i;
1236 upb_status status;
1237
1238 upb_sink_startmsg(sink);
1239
1240 // Protect against cycles (possible because users may freely reassign message
1241 // and repeated fields) by imposing a maximum recursion depth.
1242 if (depth > ENCODE_MAX_NESTING) {
1243 zend_error(E_ERROR,
1244 "Maximum recursion depth exceeded during encoding.");
1245 }
1246
1247 for (upb_msg_field_begin(&i, desc->msgdef); !upb_msg_field_done(&i);
1248 upb_msg_field_next(&i)) {
1249 upb_fielddef* f = upb_msg_iter_field(&i);
1250 uint32_t offset = desc->layout->fields[upb_fielddef_index(f)].offset;
1251 bool containing_oneof = false;
1252
1253 if (upb_fielddef_containingoneof(f)) {
1254 uint32_t oneof_case_offset =
1255 desc->layout->fields[upb_fielddef_index(f)].case_offset;
1256 // For a oneof, check that this field is actually present -- skip all the
1257 // below if not.
1258 if (DEREF(message_data(msg), oneof_case_offset, uint32_t) !=
1259 upb_fielddef_number(f)) {
1260 continue;
1261 }
1262 // Otherwise, fall through to the appropriate singular-field handler
1263 // below.
1264 containing_oneof = true;
1265 }
1266
1267 if (is_map_field(f)) {
1268 zval* map = CACHED_PTR_TO_ZVAL_PTR(
1269 DEREF(message_data(msg), offset, CACHED_VALUE*));
1270 if (map != NULL) {
1271 putmap(map, f, sink, depth TSRMLS_CC);
1272 }
1273 } else if (upb_fielddef_isseq(f)) {
1274 zval* array = CACHED_PTR_TO_ZVAL_PTR(
1275 DEREF(message_data(msg), offset, CACHED_VALUE*));
1276 if (array != NULL) {
1277 putarray(array, f, sink, depth TSRMLS_CC);
1278 }
1279 } else if (upb_fielddef_isstring(f)) {
1280 zval* str = CACHED_PTR_TO_ZVAL_PTR(
1281 DEREF(message_data(msg), offset, CACHED_VALUE*));
1282 if (containing_oneof || Z_STRLEN_P(str) > 0) {
1283 putstr(str, f, sink);
1284 }
1285 } else if (upb_fielddef_issubmsg(f)) {
1286 putsubmsg(CACHED_PTR_TO_ZVAL_PTR(
1287 DEREF(message_data(msg), offset, CACHED_VALUE*)),
1288 f, sink, depth TSRMLS_CC);
1289 } else {
1290 upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
1291
1292#define T(upbtypeconst, upbtype, ctype, default_value) \
1293 case upbtypeconst: { \
1294 ctype value = DEREF(message_data(msg), offset, ctype); \
1295 if (containing_oneof || value != default_value) { \
1296 upb_sink_put##upbtype(sink, sel, value); \
1297 } \
1298 } break;
1299
1300 switch (upb_fielddef_type(f)) {
1301 T(UPB_TYPE_FLOAT, float, float, 0.0)
1302 T(UPB_TYPE_DOUBLE, double, double, 0.0)
1303 T(UPB_TYPE_BOOL, bool, uint8_t, 0)
1304 case UPB_TYPE_ENUM:
1305 T(UPB_TYPE_INT32, int32, int32_t, 0)
1306 T(UPB_TYPE_UINT32, uint32, uint32_t, 0)
1307 T(UPB_TYPE_INT64, int64, int64_t, 0)
1308 T(UPB_TYPE_UINT64, uint64, uint64_t, 0)
1309
1310 case UPB_TYPE_STRING:
1311 case UPB_TYPE_BYTES:
1312 case UPB_TYPE_MESSAGE:
1313 zend_error(E_ERROR, "Internal error.");
1314 }
1315
1316#undef T
1317 }
1318 }
1319
1320 stringsink* unknown = DEREF(message_data(msg), 0, stringsink*);
1321 if (unknown != NULL) {
1322 upb_sink_putunknown(sink, unknown->ptr, unknown->len);
1323 }
1324
1325 upb_sink_endmsg(sink, &status);
1326}
1327
1328static void putstr(zval* str, const upb_fielddef *f, upb_sink *sink) {
1329 upb_sink subsink;
1330
1331 if (ZVAL_IS_NULL(str)) return;
1332
1333 assert(Z_TYPE_P(str) == IS_STRING);
1334
1335 upb_sink_startstr(sink, getsel(f, UPB_HANDLER_STARTSTR), Z_STRLEN_P(str),
1336 &subsink);
1337
1338 // For oneof string field, we may get here with string length is zero.
1339 if (Z_STRLEN_P(str) > 0) {
1340 // Ensure that the string has the correct encoding. We also check at
1341 // field-set time, but the user may have mutated the string object since
1342 // then.
1343 if (upb_fielddef_type(f) == UPB_TYPE_STRING &&
1344 !is_structurally_valid_utf8(Z_STRVAL_P(str), Z_STRLEN_P(str))) {
1345 zend_error(E_USER_ERROR, "Given string is not UTF8 encoded.");
1346 return;
1347 }
1348 upb_sink_putstring(&subsink, getsel(f, UPB_HANDLER_STRING), Z_STRVAL_P(str),
1349 Z_STRLEN_P(str), NULL);
1350 }
1351
1352 upb_sink_endstr(sink, getsel(f, UPB_HANDLER_ENDSTR));
1353}
1354
1355static void putrawstr(const char* str, int len, const upb_fielddef* f,
1356 upb_sink* sink) {
1357 upb_sink subsink;
1358
1359 if (len == 0) return;
1360
1361 // Ensure that the string has the correct encoding. We also check at field-set
1362 // time, but the user may have mutated the string object since then.
1363 if (upb_fielddef_type(f) == UPB_TYPE_STRING &&
1364 !is_structurally_valid_utf8(str, len)) {
1365 zend_error(E_USER_ERROR, "Given string is not UTF8 encoded.");
1366 return;
1367 }
1368
1369 upb_sink_startstr(sink, getsel(f, UPB_HANDLER_STARTSTR), len, &subsink);
1370 upb_sink_putstring(&subsink, getsel(f, UPB_HANDLER_STRING), str, len, NULL);
1371 upb_sink_endstr(sink, getsel(f, UPB_HANDLER_ENDSTR));
1372}
1373
1374static void putsubmsg(zval* submsg_php, const upb_fielddef* f, upb_sink* sink,
1375 int depth TSRMLS_DC) {
1376 if (Z_TYPE_P(submsg_php) == IS_NULL) return;
1377
1378 MessageHeader *submsg = UNBOX(MessageHeader, submsg_php);
1379 putrawsubmsg(submsg, f, sink, depth TSRMLS_CC);
1380}
1381
1382static void putrawsubmsg(MessageHeader* submsg, const upb_fielddef* f,
1383 upb_sink* sink, int depth TSRMLS_DC) {
1384 upb_sink subsink;
1385
1386 Descriptor* subdesc =
1387 UNBOX_HASHTABLE_VALUE(Descriptor, get_def_obj(upb_fielddef_msgsubdef(f)));
1388
1389 upb_sink_startsubmsg(sink, getsel(f, UPB_HANDLER_STARTSUBMSG), &subsink);
1390 putrawmsg(submsg, subdesc, &subsink, depth + 1 TSRMLS_CC);
1391 upb_sink_endsubmsg(sink, getsel(f, UPB_HANDLER_ENDSUBMSG));
1392}
1393
1394static void putarray(zval* array, const upb_fielddef* f, upb_sink* sink,
1395 int depth TSRMLS_DC) {
1396 upb_sink subsink;
1397 upb_fieldtype_t type = upb_fielddef_type(f);
1398 upb_selector_t sel = 0;
1399 int size, i;
1400
1401 assert(array != NULL);
1402 RepeatedField* intern = UNBOX(RepeatedField, array);
1403 HashTable *ht = PHP_PROTO_HASH_OF(intern->array);
1404 size = zend_hash_num_elements(ht);
1405 if (size == 0) return;
1406
1407 upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink);
1408
1409 if (upb_fielddef_isprimitive(f)) {
1410 sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
1411 }
1412
1413 for (i = 0; i < size; i++) {
1414 void* memory = repeated_field_index_native(intern, i TSRMLS_CC);
1415 switch (type) {
1416#define T(upbtypeconst, upbtype, ctype) \
1417 case upbtypeconst: \
1418 upb_sink_put##upbtype(&subsink, sel, *((ctype*)memory)); \
1419 break;
1420
1421 T(UPB_TYPE_FLOAT, float, float)
1422 T(UPB_TYPE_DOUBLE, double, double)
1423 T(UPB_TYPE_BOOL, bool, int8_t)
1424 case UPB_TYPE_ENUM:
1425 T(UPB_TYPE_INT32, int32, int32_t)
1426 T(UPB_TYPE_UINT32, uint32, uint32_t)
1427 T(UPB_TYPE_INT64, int64, int64_t)
1428 T(UPB_TYPE_UINT64, uint64, uint64_t)
1429
1430 case UPB_TYPE_STRING:
1431 case UPB_TYPE_BYTES: {
1432#if PHP_MAJOR_VERSION < 7
1433 const char* rawstr = Z_STRVAL_P(*(zval**)memory);
1434 int len = Z_STRLEN_P(*(zval**)memory);
1435#else
1436 const char* rawstr = ZSTR_VAL(*(zend_string**)memory);
1437 int len = ZSTR_LEN(*(zend_string**)memory);
1438#endif
1439 putrawstr(rawstr, len, f, &subsink);
1440 break;
1441 }
1442 case UPB_TYPE_MESSAGE: {
1443#if PHP_MAJOR_VERSION < 7
1444 MessageHeader *submsg = UNBOX(MessageHeader, *(zval**)memory);
1445#else
1446 MessageHeader *submsg =
1447 (MessageHeader*)((char*)(Z_OBJ_P((zval*)memory)) -
1448 XtOffsetOf(MessageHeader, std));
1449#endif
1450 putrawsubmsg(submsg, f, &subsink, depth TSRMLS_CC);
1451 break;
1452 }
1453
1454#undef T
1455 }
1456 }
1457 upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ));
1458}
1459
1460static const upb_handlers* msgdef_pb_serialize_handlers(Descriptor* desc) {
1461 if (desc->pb_serialize_handlers == NULL) {
1462 desc->pb_serialize_handlers =
1463 upb_pb_encoder_newhandlers(desc->msgdef, &desc->pb_serialize_handlers);
1464 }
1465 return desc->pb_serialize_handlers;
1466}
1467
1468static const upb_handlers* msgdef_json_serialize_handlers(
1469 Descriptor* desc, bool preserve_proto_fieldnames) {
1470 if (preserve_proto_fieldnames) {
1471 if (desc->json_serialize_handlers == NULL) {
1472 desc->json_serialize_handlers =
1473 upb_json_printer_newhandlers(
1474 desc->msgdef, true, &desc->json_serialize_handlers);
1475 }
1476 return desc->json_serialize_handlers;
1477 } else {
1478 if (desc->json_serialize_handlers_preserve == NULL) {
1479 desc->json_serialize_handlers_preserve =
1480 upb_json_printer_newhandlers(
1481 desc->msgdef, false, &desc->json_serialize_handlers_preserve);
1482 }
1483 return desc->json_serialize_handlers_preserve;
1484 }
1485}
1486
1487// -----------------------------------------------------------------------------
1488// PHP encode/decode methods
1489// -----------------------------------------------------------------------------
1490
1491void serialize_to_string(zval* val, zval* return_value TSRMLS_DC) {
1492 Descriptor* desc =
1493 UNBOX_HASHTABLE_VALUE(Descriptor, get_ce_obj(Z_OBJCE_P(val)));
1494
1495 stringsink sink;
1496 stringsink_init(&sink);
1497
1498 {
1499 const upb_handlers* serialize_handlers = msgdef_pb_serialize_handlers(desc);
1500
1501 stackenv se;
1502 upb_pb_encoder* encoder;
1503
1504 stackenv_init(&se, "Error occurred during encoding: %s");
1505 encoder = upb_pb_encoder_create(&se.env, serialize_handlers, &sink.sink);
1506
1507 putmsg(val, desc, upb_pb_encoder_input(encoder), 0 TSRMLS_CC);
1508
1509 PHP_PROTO_RETVAL_STRINGL(sink.ptr, sink.len, 1);
1510
1511 stackenv_uninit(&se);
1512 stringsink_uninit(&sink);
1513 }
1514}
1515
1516PHP_METHOD(Message, serializeToString) {
1517 serialize_to_string(getThis(), return_value TSRMLS_CC);
1518}
1519
1520void merge_from_string(const char* data, int data_len, const Descriptor* desc,
1521 MessageHeader* msg) {
1522 const upb_pbdecodermethod* method = msgdef_decodermethod(desc);
1523 const upb_handlers* h = upb_pbdecodermethod_desthandlers(method);
1524 stackenv se;
1525 upb_sink sink;
1526 upb_pbdecoder* decoder;
1527 stackenv_init(&se, "Error occurred during parsing: %s");
1528
1529 upb_sink_reset(&sink, h, msg);
1530 decoder = upb_pbdecoder_create(&se.env, method, &sink);
1531 upb_bufsrc_putbuf(data, data_len, upb_pbdecoder_input(decoder));
1532
1533 stackenv_uninit(&se);
1534}
1535
1536PHP_METHOD(Message, mergeFromString) {
1537 Descriptor* desc =
1538 UNBOX_HASHTABLE_VALUE(Descriptor, get_ce_obj(Z_OBJCE_P(getThis())));
1539 MessageHeader* msg = UNBOX(MessageHeader, getThis());
1540
1541 char *data = NULL;
1542 PHP_PROTO_SIZE data_len;
1543
1544 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &data, &data_len) ==
1545 FAILURE) {
1546 return;
1547 }
1548
1549 merge_from_string(data, data_len, desc, msg);
1550}
1551
1552PHP_METHOD(Message, serializeToJsonString) {
1553 Descriptor* desc =
1554 UNBOX_HASHTABLE_VALUE(Descriptor, get_ce_obj(Z_OBJCE_P(getThis())));
1555
1556 zend_bool preserve_proto_fieldnames = false;
1557 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|b",
1558 &preserve_proto_fieldnames) == FAILURE) {
1559 return;
1560 }
1561
1562 stringsink sink;
1563 stringsink_init(&sink);
1564
1565 {
1566 const upb_handlers* serialize_handlers =
1567 msgdef_json_serialize_handlers(desc, preserve_proto_fieldnames);
1568 upb_json_printer* printer;
1569 stackenv se;
1570
1571 stackenv_init(&se, "Error occurred during encoding: %s");
1572 printer = upb_json_printer_create(&se.env, serialize_handlers, &sink.sink);
1573
1574 putmsg(getThis(), desc, upb_json_printer_input(printer), 0 TSRMLS_CC);
1575
1576 PHP_PROTO_RETVAL_STRINGL(sink.ptr, sink.len, 1);
1577
1578 stackenv_uninit(&se);
1579 stringsink_uninit(&sink);
1580 }
1581}
1582
1583PHP_METHOD(Message, mergeFromJsonString) {
1584 Descriptor* desc =
1585 UNBOX_HASHTABLE_VALUE(Descriptor, get_ce_obj(Z_OBJCE_P(getThis())));
1586 MessageHeader* msg = UNBOX(MessageHeader, getThis());
1587
1588 char *data = NULL;
1589 PHP_PROTO_SIZE data_len;
1590
1591 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &data, &data_len) ==
1592 FAILURE) {
1593 return;
1594 }
1595
1596 // TODO(teboring): Check and respect string encoding. If not UTF-8, we need to
1597 // convert, because string handlers pass data directly to message string
1598 // fields.
1599
1600 // TODO(teboring): Clear message.
1601
1602 {
1603 const upb_json_parsermethod* method = msgdef_jsonparsermethod(desc);
1604 stackenv se;
1605 upb_sink sink;
1606 upb_json_parser* parser;
1607 stackenv_init(&se, "Error occurred during parsing: %s");
1608
1609 upb_sink_reset(&sink, get_fill_handlers(desc), msg);
1610 parser = upb_json_parser_create(&se.env, method, &sink);
1611 upb_bufsrc_putbuf(data, data_len, upb_json_parser_input(parser));
1612
1613 stackenv_uninit(&se);
1614 }
1615}
1616
1617// TODO(teboring): refactoring with putrawmsg
1618static void discard_unknown_fields(MessageHeader* msg) {
1619 upb_msg_field_iter it;
1620
1621 stringsink* unknown = DEREF(message_data(msg), 0, stringsink*);
1622 if (unknown != NULL) {
1623 stringsink_uninit(unknown);
1624 FREE(unknown);
1625 DEREF(message_data(msg), 0, stringsink*) = NULL;
1626 }
1627
1628 // Recursively discard unknown fields of submessages.
1629 Descriptor* desc = msg->descriptor;
1630 TSRMLS_FETCH();
1631 for (upb_msg_field_begin(&it, desc->msgdef);
1632 !upb_msg_field_done(&it);
1633 upb_msg_field_next(&it)) {
1634 upb_fielddef* f = upb_msg_iter_field(&it);
1635 uint32_t offset = desc->layout->fields[upb_fielddef_index(f)].offset;
1636 bool containing_oneof = false;
1637
1638 if (upb_fielddef_containingoneof(f)) {
1639 uint32_t oneof_case_offset =
1640 desc->layout->fields[upb_fielddef_index(f)].case_offset;
1641 // For a oneof, check that this field is actually present -- skip all the
1642 // below if not.
1643 if (DEREF(message_data(msg), oneof_case_offset, uint32_t) !=
1644 upb_fielddef_number(f)) {
1645 continue;
1646 }
1647 // Otherwise, fall through to the appropriate singular-field handler
1648 // below.
1649 containing_oneof = true;
1650 }
1651
1652 if (is_map_field(f)) {
1653 MapIter map_it;
1654 int len, size;
1655 const upb_fielddef* value_field;
1656
1657 value_field = map_field_value(f);
1658 if (!upb_fielddef_issubmsg(value_field)) continue;
1659
1660 zval* map_php = CACHED_PTR_TO_ZVAL_PTR(
1661 DEREF(message_data(msg), offset, CACHED_VALUE*));
1662 if (map_php == NULL) continue;
1663
1664 Map* intern = UNBOX(Map, map_php);
1665 for (map_begin(map_php, &map_it TSRMLS_CC);
1666 !map_done(&map_it); map_next(&map_it)) {
1667 upb_value value = map_iter_value(&map_it, &len);
1668 void* memory = raw_value(upb_value_memory(&value), value_field);
1669#if PHP_MAJOR_VERSION < 7
1670 MessageHeader *submsg = UNBOX(MessageHeader, *(zval**)memory);
1671#else
1672 MessageHeader *submsg =
1673 (MessageHeader*)((char*)(Z_OBJ_P((zval*)memory)) -
1674 XtOffsetOf(MessageHeader, std));
1675#endif
1676 discard_unknown_fields(submsg);
1677 }
1678 } else if (upb_fielddef_isseq(f)) {
1679 if (!upb_fielddef_issubmsg(f)) continue;
1680
1681 zval* array_php = CACHED_PTR_TO_ZVAL_PTR(
1682 DEREF(message_data(msg), offset, CACHED_VALUE*));
1683 if (array_php == NULL) continue;
1684
1685 int size, i;
1686 RepeatedField* intern = UNBOX(RepeatedField, array_php);
1687 HashTable *ht = PHP_PROTO_HASH_OF(intern->array);
1688 size = zend_hash_num_elements(ht);
1689 if (size == 0) continue;
1690
1691 for (i = 0; i < size; i++) {
1692 void* memory = repeated_field_index_native(intern, i TSRMLS_CC);
1693#if PHP_MAJOR_VERSION < 7
1694 MessageHeader *submsg = UNBOX(MessageHeader, *(zval**)memory);
1695#else
1696 MessageHeader *submsg =
1697 (MessageHeader*)((char*)(Z_OBJ_P((zval*)memory)) -
1698 XtOffsetOf(MessageHeader, std));
1699#endif
1700 discard_unknown_fields(submsg);
1701 }
1702 } else if (upb_fielddef_issubmsg(f)) {
1703 zval* submsg_php = CACHED_PTR_TO_ZVAL_PTR(
1704 DEREF(message_data(msg), offset, CACHED_VALUE*));
1705 if (Z_TYPE_P(submsg_php) == IS_NULL) continue;
1706 MessageHeader* submsg = UNBOX(MessageHeader, submsg_php);
1707 discard_unknown_fields(submsg);
1708 }
1709 }
1710}
1711
1712PHP_METHOD(Message, discardUnknownFields) {
1713 MessageHeader* msg = UNBOX(MessageHeader, getThis());
1714 discard_unknown_fields(msg);
1715}