blob: 863cde0132f2fae4ea9c011bd8d12e80209d7277 [file] [log] [blame]
Brian Silverman9c614bc2016-02-15 20:20:02 -05001// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc. All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: anuraag@google.com (Anuraag Agrawal)
32// Author: tibell@google.com (Johan Tibell)
33
34#include <google/protobuf/pyext/message.h>
35
36#include <map>
37#include <memory>
38#ifndef _SHARED_PTR_H
39#include <google/protobuf/stubs/shared_ptr.h>
40#endif
41#include <string>
42#include <vector>
43#include <structmember.h> // A Python header file.
44
45#ifndef PyVarObject_HEAD_INIT
46#define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
47#endif
48#ifndef Py_TYPE
49#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
50#endif
51#include <google/protobuf/descriptor.pb.h>
52#include <google/protobuf/stubs/common.h>
53#include <google/protobuf/stubs/logging.h>
54#include <google/protobuf/io/coded_stream.h>
55#include <google/protobuf/util/message_differencer.h>
56#include <google/protobuf/descriptor.h>
57#include <google/protobuf/message.h>
58#include <google/protobuf/text_format.h>
59#include <google/protobuf/unknown_field_set.h>
60#include <google/protobuf/pyext/descriptor.h>
61#include <google/protobuf/pyext/descriptor_pool.h>
62#include <google/protobuf/pyext/extension_dict.h>
63#include <google/protobuf/pyext/repeated_composite_container.h>
64#include <google/protobuf/pyext/repeated_scalar_container.h>
65#include <google/protobuf/pyext/map_container.h>
66#include <google/protobuf/pyext/scoped_pyobject_ptr.h>
67#include <google/protobuf/stubs/strutil.h>
68
69#if PY_MAJOR_VERSION >= 3
70 #define PyInt_Check PyLong_Check
71 #define PyInt_AsLong PyLong_AsLong
72 #define PyInt_FromLong PyLong_FromLong
73 #define PyInt_FromSize_t PyLong_FromSize_t
74 #define PyString_Check PyUnicode_Check
75 #define PyString_FromString PyUnicode_FromString
76 #define PyString_FromStringAndSize PyUnicode_FromStringAndSize
77 #if PY_VERSION_HEX < 0x03030000
78 #error "Python 3.0 - 3.2 are not supported."
79 #else
80 #define PyString_AsString(ob) \
81 (PyUnicode_Check(ob)? PyUnicode_AsUTF8(ob): PyBytes_AsString(ob))
82 #define PyString_AsStringAndSize(ob, charpp, sizep) \
83 (PyUnicode_Check(ob)? \
84 ((*(charpp) = PyUnicode_AsUTF8AndSize(ob, (sizep))) == NULL? -1: 0): \
85 PyBytes_AsStringAndSize(ob, (charpp), (sizep)))
86 #endif
87#endif
88
89namespace google {
90namespace protobuf {
91namespace python {
92
93static PyObject* kDESCRIPTOR;
94static PyObject* k_extensions_by_name;
95static PyObject* k_extensions_by_number;
96PyObject* EnumTypeWrapper_class;
97static PyObject* PythonMessage_class;
98static PyObject* kEmptyWeakref;
99static PyObject* WKT_classes = NULL;
100
101// Defines the Metaclass of all Message classes.
102// It allows us to cache some C++ pointers in the class object itself, they are
103// faster to extract than from the type's dictionary.
104
105struct PyMessageMeta {
106 // This is how CPython subclasses C structures: the base structure must be
107 // the first member of the object.
108 PyHeapTypeObject super;
109
110 // C++ descriptor of this message.
111 const Descriptor* message_descriptor;
112
113 // Owned reference, used to keep the pointer above alive.
114 PyObject* py_message_descriptor;
115
116 // The Python DescriptorPool used to create the class. It is needed to resolve
117 // fields descriptors, including extensions fields; its C++ MessageFactory is
118 // used to instantiate submessages.
119 // This can be different from DESCRIPTOR.file.pool, in the case of a custom
120 // DescriptorPool which defines new extensions.
121 // We own the reference, because it's important to keep the descriptors and
122 // factory alive.
123 PyDescriptorPool* py_descriptor_pool;
124};
125
126namespace message_meta {
127
128static int InsertEmptyWeakref(PyTypeObject* base);
129
130// Add the number of a field descriptor to the containing message class.
131// Equivalent to:
132// _cls.<field>_FIELD_NUMBER = <number>
133static bool AddFieldNumberToClass(
134 PyObject* cls, const FieldDescriptor* field_descriptor) {
135 string constant_name = field_descriptor->name() + "_FIELD_NUMBER";
136 UpperString(&constant_name);
137 ScopedPyObjectPtr attr_name(PyString_FromStringAndSize(
138 constant_name.c_str(), constant_name.size()));
139 if (attr_name == NULL) {
140 return false;
141 }
142 ScopedPyObjectPtr number(PyInt_FromLong(field_descriptor->number()));
143 if (number == NULL) {
144 return false;
145 }
146 if (PyObject_SetAttr(cls, attr_name.get(), number.get()) == -1) {
147 return false;
148 }
149 return true;
150}
151
152
153// Finalize the creation of the Message class.
154static int AddDescriptors(PyObject* cls, const Descriptor* descriptor) {
155 // If there are extension_ranges, the message is "extendable", and extension
156 // classes will register themselves in this class.
157 if (descriptor->extension_range_count() > 0) {
158 ScopedPyObjectPtr by_name(PyDict_New());
159 if (PyObject_SetAttr(cls, k_extensions_by_name, by_name.get()) < 0) {
160 return -1;
161 }
162 ScopedPyObjectPtr by_number(PyDict_New());
163 if (PyObject_SetAttr(cls, k_extensions_by_number, by_number.get()) < 0) {
164 return -1;
165 }
166 }
167
168 // For each field set: cls.<field>_FIELD_NUMBER = <number>
169 for (int i = 0; i < descriptor->field_count(); ++i) {
170 if (!AddFieldNumberToClass(cls, descriptor->field(i))) {
171 return -1;
172 }
173 }
174
175 // For each enum set cls.<enum name> = EnumTypeWrapper(<enum descriptor>).
176 //
177 // The enum descriptor we get from
178 // <messagedescriptor>.enum_types_by_name[name]
179 // which was built previously.
180 for (int i = 0; i < descriptor->enum_type_count(); ++i) {
181 const EnumDescriptor* enum_descriptor = descriptor->enum_type(i);
182 ScopedPyObjectPtr enum_type(
183 PyEnumDescriptor_FromDescriptor(enum_descriptor));
184 if (enum_type == NULL) {
185 return -1;
186 }
187 // Add wrapped enum type to message class.
188 ScopedPyObjectPtr wrapped(PyObject_CallFunctionObjArgs(
189 EnumTypeWrapper_class, enum_type.get(), NULL));
190 if (wrapped == NULL) {
191 return -1;
192 }
193 if (PyObject_SetAttrString(
194 cls, enum_descriptor->name().c_str(), wrapped.get()) == -1) {
195 return -1;
196 }
197
198 // For each enum value add cls.<name> = <number>
199 for (int j = 0; j < enum_descriptor->value_count(); ++j) {
200 const EnumValueDescriptor* enum_value_descriptor =
201 enum_descriptor->value(j);
202 ScopedPyObjectPtr value_number(PyInt_FromLong(
203 enum_value_descriptor->number()));
204 if (value_number == NULL) {
205 return -1;
206 }
207 if (PyObject_SetAttrString(cls, enum_value_descriptor->name().c_str(),
208 value_number.get()) == -1) {
209 return -1;
210 }
211 }
212 }
213
214 // For each extension set cls.<extension name> = <extension descriptor>.
215 //
216 // Extension descriptors come from
217 // <message descriptor>.extensions_by_name[name]
218 // which was defined previously.
219 for (int i = 0; i < descriptor->extension_count(); ++i) {
220 const google::protobuf::FieldDescriptor* field = descriptor->extension(i);
221 ScopedPyObjectPtr extension_field(PyFieldDescriptor_FromDescriptor(field));
222 if (extension_field == NULL) {
223 return -1;
224 }
225
226 // Add the extension field to the message class.
227 if (PyObject_SetAttrString(
228 cls, field->name().c_str(), extension_field.get()) == -1) {
229 return -1;
230 }
231
232 // For each extension set cls.<extension name>_FIELD_NUMBER = <number>.
233 if (!AddFieldNumberToClass(cls, field)) {
234 return -1;
235 }
236 }
237
238 return 0;
239}
240
241static PyObject* New(PyTypeObject* type,
242 PyObject* args, PyObject* kwargs) {
243 static char *kwlist[] = {"name", "bases", "dict", 0};
244 PyObject *bases, *dict;
245 const char* name;
246
247 // Check arguments: (name, bases, dict)
248 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "sO!O!:type", kwlist,
249 &name,
250 &PyTuple_Type, &bases,
251 &PyDict_Type, &dict)) {
252 return NULL;
253 }
254
255 // Check bases: only (), or (message.Message,) are allowed
256 if (!(PyTuple_GET_SIZE(bases) == 0 ||
257 (PyTuple_GET_SIZE(bases) == 1 &&
258 PyTuple_GET_ITEM(bases, 0) == PythonMessage_class))) {
259 PyErr_SetString(PyExc_TypeError,
260 "A Message class can only inherit from Message");
261 return NULL;
262 }
263
264 // Check dict['DESCRIPTOR']
265 PyObject* py_descriptor = PyDict_GetItem(dict, kDESCRIPTOR);
266 if (py_descriptor == NULL) {
267 PyErr_SetString(PyExc_TypeError, "Message class has no DESCRIPTOR");
268 return NULL;
269 }
270 if (!PyObject_TypeCheck(py_descriptor, &PyMessageDescriptor_Type)) {
271 PyErr_Format(PyExc_TypeError, "Expected a message Descriptor, got %s",
272 py_descriptor->ob_type->tp_name);
273 return NULL;
274 }
275
276 // Build the arguments to the base metaclass.
277 // We change the __bases__ classes.
278 ScopedPyObjectPtr new_args;
279 const Descriptor* message_descriptor =
280 PyMessageDescriptor_AsDescriptor(py_descriptor);
281 if (message_descriptor == NULL) {
282 return NULL;
283 }
284
285 if (WKT_classes == NULL) {
286 ScopedPyObjectPtr well_known_types(PyImport_ImportModule(
287 "google.protobuf.internal.well_known_types"));
288 GOOGLE_DCHECK(well_known_types != NULL);
289
290 WKT_classes = PyObject_GetAttrString(well_known_types.get(), "WKTBASES");
291 GOOGLE_DCHECK(WKT_classes != NULL);
292 }
293
294 PyObject* well_known_class = PyDict_GetItemString(
295 WKT_classes, message_descriptor->full_name().c_str());
296 if (well_known_class == NULL) {
297 new_args.reset(Py_BuildValue("s(OO)O", name, &CMessage_Type,
298 PythonMessage_class, dict));
299 } else {
300 new_args.reset(Py_BuildValue("s(OOO)O", name, &CMessage_Type,
301 PythonMessage_class, well_known_class, dict));
302 }
303
304 if (new_args == NULL) {
305 return NULL;
306 }
307 // Call the base metaclass.
308 ScopedPyObjectPtr result(PyType_Type.tp_new(type, new_args.get(), NULL));
309 if (result == NULL) {
310 return NULL;
311 }
312 PyMessageMeta* newtype = reinterpret_cast<PyMessageMeta*>(result.get());
313
314 // Insert the empty weakref into the base classes.
315 if (InsertEmptyWeakref(
316 reinterpret_cast<PyTypeObject*>(PythonMessage_class)) < 0 ||
317 InsertEmptyWeakref(&CMessage_Type) < 0) {
318 return NULL;
319 }
320
321 // Cache the descriptor, both as Python object and as C++ pointer.
322 const Descriptor* descriptor =
323 PyMessageDescriptor_AsDescriptor(py_descriptor);
324 if (descriptor == NULL) {
325 return NULL;
326 }
327 Py_INCREF(py_descriptor);
328 newtype->py_message_descriptor = py_descriptor;
329 newtype->message_descriptor = descriptor;
330 // TODO(amauryfa): Don't always use the canonical pool of the descriptor,
331 // use the MessageFactory optionally passed in the class dict.
332 newtype->py_descriptor_pool = GetDescriptorPool_FromPool(
333 descriptor->file()->pool());
334 if (newtype->py_descriptor_pool == NULL) {
335 return NULL;
336 }
337 Py_INCREF(newtype->py_descriptor_pool);
338
339 // Add the message to the DescriptorPool.
340 if (cdescriptor_pool::RegisterMessageClass(newtype->py_descriptor_pool,
341 descriptor, result.get()) < 0) {
342 return NULL;
343 }
344
345 // Continue with type initialization: add other descriptors, enum values...
346 if (AddDescriptors(result.get(), descriptor) < 0) {
347 return NULL;
348 }
349 return result.release();
350}
351
352static void Dealloc(PyMessageMeta *self) {
353 Py_DECREF(self->py_message_descriptor);
354 Py_DECREF(self->py_descriptor_pool);
355 Py_TYPE(self)->tp_free(reinterpret_cast<PyObject*>(self));
356}
357
358
359// This function inserts and empty weakref at the end of the list of
360// subclasses for the main protocol buffer Message class.
361//
362// This eliminates a O(n^2) behaviour in the internal add_subclass
363// routine.
364static int InsertEmptyWeakref(PyTypeObject *base_type) {
365#if PY_MAJOR_VERSION >= 3
366 // Python 3.4 has already included the fix for the issue that this
367 // hack addresses. For further background and the fix please see
368 // https://bugs.python.org/issue17936.
369 return 0;
370#else
371 PyObject *subclasses = base_type->tp_subclasses;
372 if (subclasses && PyList_CheckExact(subclasses)) {
373 return PyList_Append(subclasses, kEmptyWeakref);
374 }
375 return 0;
376#endif // PY_MAJOR_VERSION >= 3
377}
378
379} // namespace message_meta
380
381PyTypeObject PyMessageMeta_Type = {
382 PyVarObject_HEAD_INIT(&PyType_Type, 0)
383 FULL_MODULE_NAME ".MessageMeta", // tp_name
384 sizeof(PyMessageMeta), // tp_basicsize
385 0, // tp_itemsize
386 (destructor)message_meta::Dealloc, // tp_dealloc
387 0, // tp_print
388 0, // tp_getattr
389 0, // tp_setattr
390 0, // tp_compare
391 0, // tp_repr
392 0, // tp_as_number
393 0, // tp_as_sequence
394 0, // tp_as_mapping
395 0, // tp_hash
396 0, // tp_call
397 0, // tp_str
398 0, // tp_getattro
399 0, // tp_setattro
400 0, // tp_as_buffer
401 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, // tp_flags
402 "The metaclass of ProtocolMessages", // tp_doc
403 0, // tp_traverse
404 0, // tp_clear
405 0, // tp_richcompare
406 0, // tp_weaklistoffset
407 0, // tp_iter
408 0, // tp_iternext
409 0, // tp_methods
410 0, // tp_members
411 0, // tp_getset
412 0, // tp_base
413 0, // tp_dict
414 0, // tp_descr_get
415 0, // tp_descr_set
416 0, // tp_dictoffset
417 0, // tp_init
418 0, // tp_alloc
419 message_meta::New, // tp_new
420};
421
422static PyMessageMeta* CheckMessageClass(PyTypeObject* cls) {
423 if (!PyObject_TypeCheck(cls, &PyMessageMeta_Type)) {
424 PyErr_Format(PyExc_TypeError, "Class %s is not a Message", cls->tp_name);
425 return NULL;
426 }
427 return reinterpret_cast<PyMessageMeta*>(cls);
428}
429
430static const Descriptor* GetMessageDescriptor(PyTypeObject* cls) {
431 PyMessageMeta* type = CheckMessageClass(cls);
432 if (type == NULL) {
433 return NULL;
434 }
435 return type->message_descriptor;
436}
437
438// Forward declarations
439namespace cmessage {
440int InternalReleaseFieldByDescriptor(
441 CMessage* self,
442 const FieldDescriptor* field_descriptor,
443 PyObject* composite_field);
444} // namespace cmessage
445
446// ---------------------------------------------------------------------
447// Visiting the composite children of a CMessage
448
449struct ChildVisitor {
450 // Returns 0 on success, -1 on failure.
451 int VisitRepeatedCompositeContainer(RepeatedCompositeContainer* container) {
452 return 0;
453 }
454
455 // Returns 0 on success, -1 on failure.
456 int VisitRepeatedScalarContainer(RepeatedScalarContainer* container) {
457 return 0;
458 }
459
460 // Returns 0 on success, -1 on failure.
461 int VisitCMessage(CMessage* cmessage,
462 const FieldDescriptor* field_descriptor) {
463 return 0;
464 }
465};
466
467// Apply a function to a composite field. Does nothing if child is of
468// non-composite type.
469template<class Visitor>
470static int VisitCompositeField(const FieldDescriptor* descriptor,
471 PyObject* child,
472 Visitor visitor) {
473 if (descriptor->label() == FieldDescriptor::LABEL_REPEATED) {
474 if (descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
475 if (descriptor->is_map()) {
476 MapContainer* container = reinterpret_cast<MapContainer*>(child);
477 if (visitor.VisitMapContainer(container) == -1) {
478 return -1;
479 }
480 } else {
481 RepeatedCompositeContainer* container =
482 reinterpret_cast<RepeatedCompositeContainer*>(child);
483 if (visitor.VisitRepeatedCompositeContainer(container) == -1)
484 return -1;
485 }
486 } else {
487 RepeatedScalarContainer* container =
488 reinterpret_cast<RepeatedScalarContainer*>(child);
489 if (visitor.VisitRepeatedScalarContainer(container) == -1)
490 return -1;
491 }
492 } else if (descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
493 CMessage* cmsg = reinterpret_cast<CMessage*>(child);
494 if (visitor.VisitCMessage(cmsg, descriptor) == -1)
495 return -1;
496 }
497 // The ExtensionDict might contain non-composite fields, which we
498 // skip here.
499 return 0;
500}
501
502// Visit each composite field and extension field of this CMessage.
503// Returns -1 on error and 0 on success.
504template<class Visitor>
505int ForEachCompositeField(CMessage* self, Visitor visitor) {
506 Py_ssize_t pos = 0;
507 PyObject* key;
508 PyObject* field;
509
510 // Visit normal fields.
511 if (self->composite_fields) {
512 // Never use self->message in this function, it may be already freed.
513 const Descriptor* message_descriptor =
514 GetMessageDescriptor(Py_TYPE(self));
515 while (PyDict_Next(self->composite_fields, &pos, &key, &field)) {
516 Py_ssize_t key_str_size;
517 char *key_str_data;
518 if (PyString_AsStringAndSize(key, &key_str_data, &key_str_size) != 0)
519 return -1;
520 const string key_str(key_str_data, key_str_size);
521 const FieldDescriptor* descriptor =
522 message_descriptor->FindFieldByName(key_str);
523 if (descriptor != NULL) {
524 if (VisitCompositeField(descriptor, field, visitor) == -1)
525 return -1;
526 }
527 }
528 }
529
530 // Visit extension fields.
531 if (self->extensions != NULL) {
532 pos = 0;
533 while (PyDict_Next(self->extensions->values, &pos, &key, &field)) {
534 const FieldDescriptor* descriptor = cmessage::GetExtensionDescriptor(key);
535 if (descriptor == NULL)
536 return -1;
537 if (VisitCompositeField(descriptor, field, visitor) == -1)
538 return -1;
539 }
540 }
541
542 return 0;
543}
544
545// ---------------------------------------------------------------------
546
547// Constants used for integer type range checking.
548PyObject* kPythonZero;
549PyObject* kint32min_py;
550PyObject* kint32max_py;
551PyObject* kuint32max_py;
552PyObject* kint64min_py;
553PyObject* kint64max_py;
554PyObject* kuint64max_py;
555
556PyObject* EncodeError_class;
557PyObject* DecodeError_class;
558PyObject* PickleError_class;
559
560// Constant PyString values used for GetAttr/GetItem.
561static PyObject* k_cdescriptor;
562static PyObject* kfull_name;
563
564/* Is 64bit */
565void FormatTypeError(PyObject* arg, char* expected_types) {
566 PyObject* repr = PyObject_Repr(arg);
567 if (repr) {
568 PyErr_Format(PyExc_TypeError,
569 "%.100s has type %.100s, but expected one of: %s",
570 PyString_AsString(repr),
571 Py_TYPE(arg)->tp_name,
572 expected_types);
573 Py_DECREF(repr);
574 }
575}
576
577template<class T>
578bool CheckAndGetInteger(
579 PyObject* arg, T* value, PyObject* min, PyObject* max) {
580 bool is_long = PyLong_Check(arg);
581#if PY_MAJOR_VERSION < 3
582 if (!PyInt_Check(arg) && !is_long) {
583 FormatTypeError(arg, "int, long");
584 return false;
585 }
586 if (PyObject_Compare(min, arg) > 0 || PyObject_Compare(max, arg) < 0) {
587#else
588 if (!is_long) {
589 FormatTypeError(arg, "int");
590 return false;
591 }
592 if (PyObject_RichCompareBool(min, arg, Py_LE) != 1 ||
593 PyObject_RichCompareBool(max, arg, Py_GE) != 1) {
594#endif
595 if (!PyErr_Occurred()) {
596 PyObject *s = PyObject_Str(arg);
597 if (s) {
598 PyErr_Format(PyExc_ValueError,
599 "Value out of range: %s",
600 PyString_AsString(s));
601 Py_DECREF(s);
602 }
603 }
604 return false;
605 }
606#if PY_MAJOR_VERSION < 3
607 if (!is_long) {
608 *value = static_cast<T>(PyInt_AsLong(arg));
609 } else // NOLINT
610#endif
611 {
612 if (min == kPythonZero) {
613 *value = static_cast<T>(PyLong_AsUnsignedLongLong(arg));
614 } else {
615 *value = static_cast<T>(PyLong_AsLongLong(arg));
616 }
617 }
618 return true;
619}
620
621// These are referenced by repeated_scalar_container, and must
622// be explicitly instantiated.
623template bool CheckAndGetInteger<int32>(
624 PyObject*, int32*, PyObject*, PyObject*);
625template bool CheckAndGetInteger<int64>(
626 PyObject*, int64*, PyObject*, PyObject*);
627template bool CheckAndGetInteger<uint32>(
628 PyObject*, uint32*, PyObject*, PyObject*);
629template bool CheckAndGetInteger<uint64>(
630 PyObject*, uint64*, PyObject*, PyObject*);
631
632bool CheckAndGetDouble(PyObject* arg, double* value) {
633 if (!PyInt_Check(arg) && !PyLong_Check(arg) &&
634 !PyFloat_Check(arg)) {
635 FormatTypeError(arg, "int, long, float");
636 return false;
637 }
638 *value = PyFloat_AsDouble(arg);
639 return true;
640}
641
642bool CheckAndGetFloat(PyObject* arg, float* value) {
643 double double_value;
644 if (!CheckAndGetDouble(arg, &double_value)) {
645 return false;
646 }
647 *value = static_cast<float>(double_value);
648 return true;
649}
650
651bool CheckAndGetBool(PyObject* arg, bool* value) {
652 if (!PyInt_Check(arg) && !PyBool_Check(arg) && !PyLong_Check(arg)) {
653 FormatTypeError(arg, "int, long, bool");
654 return false;
655 }
656 *value = static_cast<bool>(PyInt_AsLong(arg));
657 return true;
658}
659
660// Checks whether the given object (which must be "bytes" or "unicode") contains
661// valid UTF-8.
662bool IsValidUTF8(PyObject* obj) {
663 if (PyBytes_Check(obj)) {
664 PyObject* unicode = PyUnicode_FromEncodedObject(obj, "utf-8", NULL);
665
666 // Clear the error indicator; we report our own error when desired.
667 PyErr_Clear();
668
669 if (unicode) {
670 Py_DECREF(unicode);
671 return true;
672 } else {
673 return false;
674 }
675 } else {
676 // Unicode object, known to be valid UTF-8.
677 return true;
678 }
679}
680
681bool AllowInvalidUTF8(const FieldDescriptor* field) { return false; }
682
683PyObject* CheckString(PyObject* arg, const FieldDescriptor* descriptor) {
684 GOOGLE_DCHECK(descriptor->type() == FieldDescriptor::TYPE_STRING ||
685 descriptor->type() == FieldDescriptor::TYPE_BYTES);
686 if (descriptor->type() == FieldDescriptor::TYPE_STRING) {
687 if (!PyBytes_Check(arg) && !PyUnicode_Check(arg)) {
688 FormatTypeError(arg, "bytes, unicode");
689 return NULL;
690 }
691
692 if (!IsValidUTF8(arg) && !AllowInvalidUTF8(descriptor)) {
693 PyObject* repr = PyObject_Repr(arg);
694 PyErr_Format(PyExc_ValueError,
695 "%s has type str, but isn't valid UTF-8 "
696 "encoding. Non-UTF-8 strings must be converted to "
697 "unicode objects before being added.",
698 PyString_AsString(repr));
699 Py_DECREF(repr);
700 return NULL;
701 }
702 } else if (!PyBytes_Check(arg)) {
703 FormatTypeError(arg, "bytes");
704 return NULL;
705 }
706
707 PyObject* encoded_string = NULL;
708 if (descriptor->type() == FieldDescriptor::TYPE_STRING) {
709 if (PyBytes_Check(arg)) {
710 // The bytes were already validated as correctly encoded UTF-8 above.
711 encoded_string = arg; // Already encoded.
712 Py_INCREF(encoded_string);
713 } else {
714 encoded_string = PyUnicode_AsEncodedObject(arg, "utf-8", NULL);
715 }
716 } else {
717 // In this case field type is "bytes".
718 encoded_string = arg;
719 Py_INCREF(encoded_string);
720 }
721
722 return encoded_string;
723}
724
725bool CheckAndSetString(
726 PyObject* arg, Message* message,
727 const FieldDescriptor* descriptor,
728 const Reflection* reflection,
729 bool append,
730 int index) {
731 ScopedPyObjectPtr encoded_string(CheckString(arg, descriptor));
732
733 if (encoded_string.get() == NULL) {
734 return false;
735 }
736
737 char* value;
738 Py_ssize_t value_len;
739 if (PyBytes_AsStringAndSize(encoded_string.get(), &value, &value_len) < 0) {
740 return false;
741 }
742
743 string value_string(value, value_len);
744 if (append) {
745 reflection->AddString(message, descriptor, value_string);
746 } else if (index < 0) {
747 reflection->SetString(message, descriptor, value_string);
748 } else {
749 reflection->SetRepeatedString(message, descriptor, index, value_string);
750 }
751 return true;
752}
753
754PyObject* ToStringObject(const FieldDescriptor* descriptor, string value) {
755 if (descriptor->type() != FieldDescriptor::TYPE_STRING) {
756 return PyBytes_FromStringAndSize(value.c_str(), value.length());
757 }
758
759 PyObject* result = PyUnicode_DecodeUTF8(value.c_str(), value.length(), NULL);
760 // If the string can't be decoded in UTF-8, just return a string object that
761 // contains the raw bytes. This can't happen if the value was assigned using
762 // the members of the Python message object, but can happen if the values were
763 // parsed from the wire (binary).
764 if (result == NULL) {
765 PyErr_Clear();
766 result = PyBytes_FromStringAndSize(value.c_str(), value.length());
767 }
768 return result;
769}
770
771bool CheckFieldBelongsToMessage(const FieldDescriptor* field_descriptor,
772 const Message* message) {
773 if (message->GetDescriptor() == field_descriptor->containing_type()) {
774 return true;
775 }
776 PyErr_Format(PyExc_KeyError, "Field '%s' does not belong to message '%s'",
777 field_descriptor->full_name().c_str(),
778 message->GetDescriptor()->full_name().c_str());
779 return false;
780}
781
782namespace cmessage {
783
784PyDescriptorPool* GetDescriptorPoolForMessage(CMessage* message) {
785 // No need to check the type: the type of instances of CMessage is always
786 // an instance of PyMessageMeta. Let's prove it with a debug-only check.
787 GOOGLE_DCHECK(PyObject_TypeCheck(message, &CMessage_Type));
788 return reinterpret_cast<PyMessageMeta*>(Py_TYPE(message))->py_descriptor_pool;
789}
790
791MessageFactory* GetFactoryForMessage(CMessage* message) {
792 return GetDescriptorPoolForMessage(message)->message_factory;
793}
794
795static int MaybeReleaseOverlappingOneofField(
796 CMessage* cmessage,
797 const FieldDescriptor* field) {
798#ifdef GOOGLE_PROTOBUF_HAS_ONEOF
799 Message* message = cmessage->message;
800 const Reflection* reflection = message->GetReflection();
801 if (!field->containing_oneof() ||
802 !reflection->HasOneof(*message, field->containing_oneof()) ||
803 reflection->HasField(*message, field)) {
804 // No other field in this oneof, no need to release.
805 return 0;
806 }
807
808 const OneofDescriptor* oneof = field->containing_oneof();
809 const FieldDescriptor* existing_field =
810 reflection->GetOneofFieldDescriptor(*message, oneof);
811 if (existing_field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
812 // Non-message fields don't need to be released.
813 return 0;
814 }
815 const char* field_name = existing_field->name().c_str();
816 PyObject* child_message = cmessage->composite_fields ?
817 PyDict_GetItemString(cmessage->composite_fields, field_name) : NULL;
818 if (child_message == NULL) {
819 // No python reference to this field so no need to release.
820 return 0;
821 }
822
823 if (InternalReleaseFieldByDescriptor(
824 cmessage, existing_field, child_message) < 0) {
825 return -1;
826 }
827 return PyDict_DelItemString(cmessage->composite_fields, field_name);
828#else
829 return 0;
830#endif
831}
832
833// ---------------------------------------------------------------------
834// Making a message writable
835
836static Message* GetMutableMessage(
837 CMessage* parent,
838 const FieldDescriptor* parent_field) {
839 Message* parent_message = parent->message;
840 const Reflection* reflection = parent_message->GetReflection();
841 if (MaybeReleaseOverlappingOneofField(parent, parent_field) < 0) {
842 return NULL;
843 }
844 return reflection->MutableMessage(
845 parent_message, parent_field, GetFactoryForMessage(parent));
846}
847
848struct FixupMessageReference : public ChildVisitor {
849 // message must outlive this object.
850 explicit FixupMessageReference(Message* message) :
851 message_(message) {}
852
853 int VisitRepeatedCompositeContainer(RepeatedCompositeContainer* container) {
854 container->message = message_;
855 return 0;
856 }
857
858 int VisitRepeatedScalarContainer(RepeatedScalarContainer* container) {
859 container->message = message_;
860 return 0;
861 }
862
863 int VisitMapContainer(MapContainer* container) {
864 container->message = message_;
865 return 0;
866 }
867
868 private:
869 Message* message_;
870};
871
872int AssureWritable(CMessage* self) {
873 if (self == NULL || !self->read_only) {
874 return 0;
875 }
876
877 if (self->parent == NULL) {
878 // If parent is NULL but we are trying to modify a read-only message, this
879 // is a reference to a constant default instance that needs to be replaced
880 // with a mutable top-level message.
881 self->message = self->message->New();
882 self->owner.reset(self->message);
883 // Cascade the new owner to eventual children: even if this message is
884 // empty, some submessages or repeated containers might exist already.
885 SetOwner(self, self->owner);
886 } else {
887 // Otherwise, we need a mutable child message.
888 if (AssureWritable(self->parent) == -1)
889 return -1;
890
891 // Make self->message writable.
892 Message* mutable_message = GetMutableMessage(
893 self->parent,
894 self->parent_field_descriptor);
895 if (mutable_message == NULL) {
896 return -1;
897 }
898 self->message = mutable_message;
899 }
900 self->read_only = false;
901
902 // When a CMessage is made writable its Message pointer is updated
903 // to point to a new mutable Message. When that happens we need to
904 // update any references to the old, read-only CMessage. There are
905 // four places such references occur: RepeatedScalarContainer,
906 // RepeatedCompositeContainer, MapContainer, and ExtensionDict.
907 if (self->extensions != NULL)
908 self->extensions->message = self->message;
909 if (ForEachCompositeField(self, FixupMessageReference(self->message)) == -1)
910 return -1;
911
912 return 0;
913}
914
915// --- Globals:
916
917// Retrieve a C++ FieldDescriptor for a message attribute.
918// The C++ message must be valid.
919// TODO(amauryfa): This function should stay internal, because exception
920// handling is not consistent.
921static const FieldDescriptor* GetFieldDescriptor(
922 CMessage* self, PyObject* name) {
923 const Descriptor *message_descriptor = self->message->GetDescriptor();
924 char* field_name;
925 Py_ssize_t size;
926 if (PyString_AsStringAndSize(name, &field_name, &size) < 0) {
927 return NULL;
928 }
929 const FieldDescriptor *field_descriptor =
930 message_descriptor->FindFieldByName(string(field_name, size));
931 if (field_descriptor == NULL) {
932 // Note: No exception is set!
933 return NULL;
934 }
935 return field_descriptor;
936}
937
938// Retrieve a C++ FieldDescriptor for an extension handle.
939const FieldDescriptor* GetExtensionDescriptor(PyObject* extension) {
940 ScopedPyObjectPtr cdescriptor;
941 if (!PyObject_TypeCheck(extension, &PyFieldDescriptor_Type)) {
942 // Most callers consider extensions as a plain dictionary. We should
943 // allow input which is not a field descriptor, and simply pretend it does
944 // not exist.
945 PyErr_SetObject(PyExc_KeyError, extension);
946 return NULL;
947 }
948 return PyFieldDescriptor_AsDescriptor(extension);
949}
950
951// If value is a string, convert it into an enum value based on the labels in
952// descriptor, otherwise simply return value. Always returns a new reference.
953static PyObject* GetIntegerEnumValue(const FieldDescriptor& descriptor,
954 PyObject* value) {
955 if (PyString_Check(value) || PyUnicode_Check(value)) {
956 const EnumDescriptor* enum_descriptor = descriptor.enum_type();
957 if (enum_descriptor == NULL) {
958 PyErr_SetString(PyExc_TypeError, "not an enum field");
959 return NULL;
960 }
961 char* enum_label;
962 Py_ssize_t size;
963 if (PyString_AsStringAndSize(value, &enum_label, &size) < 0) {
964 return NULL;
965 }
966 const EnumValueDescriptor* enum_value_descriptor =
967 enum_descriptor->FindValueByName(string(enum_label, size));
968 if (enum_value_descriptor == NULL) {
969 PyErr_SetString(PyExc_ValueError, "unknown enum label");
970 return NULL;
971 }
972 return PyInt_FromLong(enum_value_descriptor->number());
973 }
974 Py_INCREF(value);
975 return value;
976}
977
978// If cmessage_list is not NULL, this function releases values into the
979// container CMessages instead of just removing. Repeated composite container
980// needs to do this to make sure CMessages stay alive if they're still
981// referenced after deletion. Repeated scalar container doesn't need to worry.
982int InternalDeleteRepeatedField(
983 CMessage* self,
984 const FieldDescriptor* field_descriptor,
985 PyObject* slice,
986 PyObject* cmessage_list) {
987 Message* message = self->message;
988 Py_ssize_t length, from, to, step, slice_length;
989 const Reflection* reflection = message->GetReflection();
990 int min, max;
991 length = reflection->FieldSize(*message, field_descriptor);
992
993 if (PyInt_Check(slice) || PyLong_Check(slice)) {
994 from = to = PyLong_AsLong(slice);
995 if (from < 0) {
996 from = to = length + from;
997 }
998 step = 1;
999 min = max = from;
1000
1001 // Range check.
1002 if (from < 0 || from >= length) {
1003 PyErr_Format(PyExc_IndexError, "list assignment index out of range");
1004 return -1;
1005 }
1006 } else if (PySlice_Check(slice)) {
1007 from = to = step = slice_length = 0;
1008 PySlice_GetIndicesEx(
1009#if PY_MAJOR_VERSION < 3
1010 reinterpret_cast<PySliceObject*>(slice),
1011#else
1012 slice,
1013#endif
1014 length, &from, &to, &step, &slice_length);
1015 if (from < to) {
1016 min = from;
1017 max = to - 1;
1018 } else {
1019 min = to + 1;
1020 max = from;
1021 }
1022 } else {
1023 PyErr_SetString(PyExc_TypeError, "list indices must be integers");
1024 return -1;
1025 }
1026
1027 Py_ssize_t i = from;
1028 std::vector<bool> to_delete(length, false);
1029 while (i >= min && i <= max) {
1030 to_delete[i] = true;
1031 i += step;
1032 }
1033
1034 to = 0;
1035 for (i = 0; i < length; ++i) {
1036 if (!to_delete[i]) {
1037 if (i != to) {
1038 reflection->SwapElements(message, field_descriptor, i, to);
1039 if (cmessage_list != NULL) {
1040 // If a list of cmessages is passed in (i.e. from a repeated
1041 // composite container), swap those as well to correspond to the
1042 // swaps in the underlying message so they're in the right order
1043 // when we start releasing.
1044 PyObject* tmp = PyList_GET_ITEM(cmessage_list, i);
1045 PyList_SET_ITEM(cmessage_list, i,
1046 PyList_GET_ITEM(cmessage_list, to));
1047 PyList_SET_ITEM(cmessage_list, to, tmp);
1048 }
1049 }
1050 ++to;
1051 }
1052 }
1053
1054 while (i > to) {
1055 if (cmessage_list == NULL) {
1056 reflection->RemoveLast(message, field_descriptor);
1057 } else {
1058 CMessage* last_cmessage = reinterpret_cast<CMessage*>(
1059 PyList_GET_ITEM(cmessage_list, PyList_GET_SIZE(cmessage_list) - 1));
1060 repeated_composite_container::ReleaseLastTo(
1061 self, field_descriptor, last_cmessage);
1062 if (PySequence_DelItem(cmessage_list, -1) < 0) {
1063 return -1;
1064 }
1065 }
1066 --i;
1067 }
1068
1069 return 0;
1070}
1071
1072// Initializes fields of a message. Used in constructors.
1073int InitAttributes(CMessage* self, PyObject* kwargs) {
1074 if (kwargs == NULL) {
1075 return 0;
1076 }
1077
1078 Py_ssize_t pos = 0;
1079 PyObject* name;
1080 PyObject* value;
1081 while (PyDict_Next(kwargs, &pos, &name, &value)) {
1082 if (!PyString_Check(name)) {
1083 PyErr_SetString(PyExc_ValueError, "Field name must be a string");
1084 return -1;
1085 }
1086 const FieldDescriptor* descriptor = GetFieldDescriptor(self, name);
1087 if (descriptor == NULL) {
1088 PyErr_Format(PyExc_ValueError, "Protocol message %s has no \"%s\" field.",
1089 self->message->GetDescriptor()->name().c_str(),
1090 PyString_AsString(name));
1091 return -1;
1092 }
1093 if (descriptor->is_map()) {
1094 ScopedPyObjectPtr map(GetAttr(self, name));
1095 const FieldDescriptor* value_descriptor =
1096 descriptor->message_type()->FindFieldByName("value");
1097 if (value_descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
1098 Py_ssize_t map_pos = 0;
1099 PyObject* map_key;
1100 PyObject* map_value;
1101 while (PyDict_Next(value, &map_pos, &map_key, &map_value)) {
1102 ScopedPyObjectPtr function_return;
1103 function_return.reset(PyObject_GetItem(map.get(), map_key));
1104 if (function_return.get() == NULL) {
1105 return -1;
1106 }
1107 ScopedPyObjectPtr ok(PyObject_CallMethod(
1108 function_return.get(), "MergeFrom", "O", map_value));
1109 if (ok.get() == NULL) {
1110 return -1;
1111 }
1112 }
1113 } else {
1114 ScopedPyObjectPtr function_return;
1115 function_return.reset(
1116 PyObject_CallMethod(map.get(), "update", "O", value));
1117 if (function_return.get() == NULL) {
1118 return -1;
1119 }
1120 }
1121 } else if (descriptor->label() == FieldDescriptor::LABEL_REPEATED) {
1122 ScopedPyObjectPtr container(GetAttr(self, name));
1123 if (container == NULL) {
1124 return -1;
1125 }
1126 if (descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
1127 RepeatedCompositeContainer* rc_container =
1128 reinterpret_cast<RepeatedCompositeContainer*>(container.get());
1129 ScopedPyObjectPtr iter(PyObject_GetIter(value));
1130 if (iter == NULL) {
1131 PyErr_SetString(PyExc_TypeError, "Value must be iterable");
1132 return -1;
1133 }
1134 ScopedPyObjectPtr next;
1135 while ((next.reset(PyIter_Next(iter.get()))) != NULL) {
1136 PyObject* kwargs = (PyDict_Check(next.get()) ? next.get() : NULL);
1137 ScopedPyObjectPtr new_msg(
1138 repeated_composite_container::Add(rc_container, NULL, kwargs));
1139 if (new_msg == NULL) {
1140 return -1;
1141 }
1142 if (kwargs == NULL) {
1143 // next was not a dict, it's a message we need to merge
1144 ScopedPyObjectPtr merged(MergeFrom(
1145 reinterpret_cast<CMessage*>(new_msg.get()), next.get()));
1146 if (merged.get() == NULL) {
1147 return -1;
1148 }
1149 }
1150 }
1151 if (PyErr_Occurred()) {
1152 // Check to see how PyIter_Next() exited.
1153 return -1;
1154 }
1155 } else if (descriptor->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) {
1156 RepeatedScalarContainer* rs_container =
1157 reinterpret_cast<RepeatedScalarContainer*>(container.get());
1158 ScopedPyObjectPtr iter(PyObject_GetIter(value));
1159 if (iter == NULL) {
1160 PyErr_SetString(PyExc_TypeError, "Value must be iterable");
1161 return -1;
1162 }
1163 ScopedPyObjectPtr next;
1164 while ((next.reset(PyIter_Next(iter.get()))) != NULL) {
1165 ScopedPyObjectPtr enum_value(
1166 GetIntegerEnumValue(*descriptor, next.get()));
1167 if (enum_value == NULL) {
1168 return -1;
1169 }
1170 ScopedPyObjectPtr new_msg(repeated_scalar_container::Append(
1171 rs_container, enum_value.get()));
1172 if (new_msg == NULL) {
1173 return -1;
1174 }
1175 }
1176 if (PyErr_Occurred()) {
1177 // Check to see how PyIter_Next() exited.
1178 return -1;
1179 }
1180 } else {
1181 if (ScopedPyObjectPtr(repeated_scalar_container::Extend(
1182 reinterpret_cast<RepeatedScalarContainer*>(container.get()),
1183 value)) ==
1184 NULL) {
1185 return -1;
1186 }
1187 }
1188 } else if (descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
1189 ScopedPyObjectPtr message(GetAttr(self, name));
1190 if (message == NULL) {
1191 return -1;
1192 }
1193 CMessage* cmessage = reinterpret_cast<CMessage*>(message.get());
1194 if (PyDict_Check(value)) {
1195 if (InitAttributes(cmessage, value) < 0) {
1196 return -1;
1197 }
1198 } else {
1199 ScopedPyObjectPtr merged(MergeFrom(cmessage, value));
1200 if (merged == NULL) {
1201 return -1;
1202 }
1203 }
1204 } else {
1205 ScopedPyObjectPtr new_val;
1206 if (descriptor->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) {
1207 new_val.reset(GetIntegerEnumValue(*descriptor, value));
1208 if (new_val == NULL) {
1209 return -1;
1210 }
1211 }
1212 if (SetAttr(self, name, (new_val.get() == NULL) ? value : new_val.get()) <
1213 0) {
1214 return -1;
1215 }
1216 }
1217 }
1218 return 0;
1219}
1220
1221// Allocates an incomplete Python Message: the caller must fill self->message,
1222// self->owner and eventually self->parent.
1223CMessage* NewEmptyMessage(PyObject* type, const Descriptor *descriptor) {
1224 CMessage* self = reinterpret_cast<CMessage*>(
1225 PyType_GenericAlloc(reinterpret_cast<PyTypeObject*>(type), 0));
1226 if (self == NULL) {
1227 return NULL;
1228 }
1229
1230 self->message = NULL;
1231 self->parent = NULL;
1232 self->parent_field_descriptor = NULL;
1233 self->read_only = false;
1234 self->extensions = NULL;
1235
1236 self->composite_fields = NULL;
1237
1238 return self;
1239}
1240
1241// The __new__ method of Message classes.
1242// Creates a new C++ message and takes ownership.
1243static PyObject* New(PyTypeObject* cls,
1244 PyObject* unused_args, PyObject* unused_kwargs) {
1245 PyMessageMeta* type = CheckMessageClass(cls);
1246 if (type == NULL) {
1247 return NULL;
1248 }
1249 // Retrieve the message descriptor and the default instance (=prototype).
1250 const Descriptor* message_descriptor = type->message_descriptor;
1251 if (message_descriptor == NULL) {
1252 return NULL;
1253 }
1254 const Message* default_message = type->py_descriptor_pool->message_factory
1255 ->GetPrototype(message_descriptor);
1256 if (default_message == NULL) {
1257 PyErr_SetString(PyExc_TypeError, message_descriptor->full_name().c_str());
1258 return NULL;
1259 }
1260
1261 CMessage* self = NewEmptyMessage(reinterpret_cast<PyObject*>(type),
1262 message_descriptor);
1263 if (self == NULL) {
1264 return NULL;
1265 }
1266 self->message = default_message->New();
1267 self->owner.reset(self->message);
1268 return reinterpret_cast<PyObject*>(self);
1269}
1270
1271// The __init__ method of Message classes.
1272// It initializes fields from keywords passed to the constructor.
1273static int Init(CMessage* self, PyObject* args, PyObject* kwargs) {
1274 if (PyTuple_Size(args) != 0) {
1275 PyErr_SetString(PyExc_TypeError, "No positional arguments allowed");
1276 return -1;
1277 }
1278
1279 return InitAttributes(self, kwargs);
1280}
1281
1282// ---------------------------------------------------------------------
1283// Deallocating a CMessage
1284//
1285// Deallocating a CMessage requires that we clear any weak references
1286// from children to the message being deallocated.
1287
1288// Clear the weak reference from the child to the parent.
1289struct ClearWeakReferences : public ChildVisitor {
1290 int VisitRepeatedCompositeContainer(RepeatedCompositeContainer* container) {
1291 container->parent = NULL;
1292 // The elements in the container have the same parent as the
1293 // container itself, so NULL out that pointer as well.
1294 const Py_ssize_t n = PyList_GET_SIZE(container->child_messages);
1295 for (Py_ssize_t i = 0; i < n; ++i) {
1296 CMessage* child_cmessage = reinterpret_cast<CMessage*>(
1297 PyList_GET_ITEM(container->child_messages, i));
1298 child_cmessage->parent = NULL;
1299 }
1300 return 0;
1301 }
1302
1303 int VisitRepeatedScalarContainer(RepeatedScalarContainer* container) {
1304 container->parent = NULL;
1305 return 0;
1306 }
1307
1308 int VisitMapContainer(MapContainer* container) {
1309 container->parent = NULL;
1310 return 0;
1311 }
1312
1313 int VisitCMessage(CMessage* cmessage,
1314 const FieldDescriptor* field_descriptor) {
1315 cmessage->parent = NULL;
1316 return 0;
1317 }
1318};
1319
1320static void Dealloc(CMessage* self) {
1321 // Null out all weak references from children to this message.
1322 GOOGLE_CHECK_EQ(0, ForEachCompositeField(self, ClearWeakReferences()));
1323 if (self->extensions) {
1324 self->extensions->parent = NULL;
1325 }
1326
1327 Py_CLEAR(self->extensions);
1328 Py_CLEAR(self->composite_fields);
1329 self->owner.reset();
1330 Py_TYPE(self)->tp_free(reinterpret_cast<PyObject*>(self));
1331}
1332
1333// ---------------------------------------------------------------------
1334
1335
1336PyObject* IsInitialized(CMessage* self, PyObject* args) {
1337 PyObject* errors = NULL;
1338 if (PyArg_ParseTuple(args, "|O", &errors) < 0) {
1339 return NULL;
1340 }
1341 if (self->message->IsInitialized()) {
1342 Py_RETURN_TRUE;
1343 }
1344 if (errors != NULL) {
1345 ScopedPyObjectPtr initialization_errors(
1346 FindInitializationErrors(self));
1347 if (initialization_errors == NULL) {
1348 return NULL;
1349 }
1350 ScopedPyObjectPtr extend_name(PyString_FromString("extend"));
1351 if (extend_name == NULL) {
1352 return NULL;
1353 }
1354 ScopedPyObjectPtr result(PyObject_CallMethodObjArgs(
1355 errors,
1356 extend_name.get(),
1357 initialization_errors.get(),
1358 NULL));
1359 if (result == NULL) {
1360 return NULL;
1361 }
1362 }
1363 Py_RETURN_FALSE;
1364}
1365
1366PyObject* HasFieldByDescriptor(
1367 CMessage* self, const FieldDescriptor* field_descriptor) {
1368 Message* message = self->message;
1369 if (!CheckFieldBelongsToMessage(field_descriptor, message)) {
1370 return NULL;
1371 }
1372 if (field_descriptor->label() == FieldDescriptor::LABEL_REPEATED) {
1373 PyErr_SetString(PyExc_KeyError,
1374 "Field is repeated. A singular method is required.");
1375 return NULL;
1376 }
1377 bool has_field =
1378 message->GetReflection()->HasField(*message, field_descriptor);
1379 return PyBool_FromLong(has_field ? 1 : 0);
1380}
1381
1382const FieldDescriptor* FindFieldWithOneofs(
1383 const Message* message, const string& field_name, bool* in_oneof) {
1384 *in_oneof = false;
1385 const Descriptor* descriptor = message->GetDescriptor();
1386 const FieldDescriptor* field_descriptor =
1387 descriptor->FindFieldByName(field_name);
1388 if (field_descriptor != NULL) {
1389 return field_descriptor;
1390 }
1391 const OneofDescriptor* oneof_desc =
1392 descriptor->FindOneofByName(field_name);
1393 if (oneof_desc != NULL) {
1394 *in_oneof = true;
1395 return message->GetReflection()->GetOneofFieldDescriptor(*message,
1396 oneof_desc);
1397 }
1398 return NULL;
1399}
1400
1401bool CheckHasPresence(const FieldDescriptor* field_descriptor, bool in_oneof) {
1402 if (field_descriptor->label() == FieldDescriptor::LABEL_REPEATED) {
1403 PyErr_Format(PyExc_ValueError,
1404 "Protocol message has no singular \"%s\" field.",
1405 field_descriptor->name().c_str());
1406 return false;
1407 }
1408
1409 if (field_descriptor->file()->syntax() == FileDescriptor::SYNTAX_PROTO3) {
1410 // HasField() for a oneof *itself* isn't supported.
1411 if (in_oneof) {
1412 PyErr_Format(PyExc_ValueError,
1413 "Can't test oneof field \"%s\" for presence in proto3, use "
1414 "WhichOneof instead.",
1415 field_descriptor->containing_oneof()->name().c_str());
1416 return false;
1417 }
1418
1419 // ...but HasField() for fields *in* a oneof is supported.
1420 if (field_descriptor->containing_oneof() != NULL) {
1421 return true;
1422 }
1423
1424 if (field_descriptor->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
1425 PyErr_Format(
1426 PyExc_ValueError,
1427 "Can't test non-submessage field \"%s\" for presence in proto3.",
1428 field_descriptor->name().c_str());
1429 return false;
1430 }
1431 }
1432
1433 return true;
1434}
1435
1436PyObject* HasField(CMessage* self, PyObject* arg) {
1437 char* field_name;
1438 Py_ssize_t size;
1439#if PY_MAJOR_VERSION < 3
1440 if (PyString_AsStringAndSize(arg, &field_name, &size) < 0) {
1441 return NULL;
1442 }
1443#else
1444 field_name = PyUnicode_AsUTF8AndSize(arg, &size);
1445 if (!field_name) {
1446 return NULL;
1447 }
1448#endif
1449
1450 Message* message = self->message;
1451 bool is_in_oneof;
1452 const FieldDescriptor* field_descriptor =
1453 FindFieldWithOneofs(message, string(field_name, size), &is_in_oneof);
1454 if (field_descriptor == NULL) {
1455 if (!is_in_oneof) {
1456 PyErr_Format(PyExc_ValueError, "Unknown field %s.", field_name);
1457 return NULL;
1458 } else {
1459 Py_RETURN_FALSE;
1460 }
1461 }
1462
1463 if (!CheckHasPresence(field_descriptor, is_in_oneof)) {
1464 return NULL;
1465 }
1466
1467 if (message->GetReflection()->HasField(*message, field_descriptor)) {
1468 Py_RETURN_TRUE;
1469 }
1470 if (!message->GetReflection()->SupportsUnknownEnumValues() &&
1471 field_descriptor->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) {
1472 // Special case: Python HasField() differs in semantics from C++
1473 // slightly: we return HasField('enum_field') == true if there is
1474 // an unknown enum value present. To implement this we have to
1475 // look in the UnknownFieldSet.
1476 const UnknownFieldSet& unknown_field_set =
1477 message->GetReflection()->GetUnknownFields(*message);
1478 for (int i = 0; i < unknown_field_set.field_count(); ++i) {
1479 if (unknown_field_set.field(i).number() == field_descriptor->number()) {
1480 Py_RETURN_TRUE;
1481 }
1482 }
1483 }
1484 Py_RETURN_FALSE;
1485}
1486
1487PyObject* ClearExtension(CMessage* self, PyObject* extension) {
1488 if (self->extensions != NULL) {
1489 return extension_dict::ClearExtension(self->extensions, extension);
1490 } else {
1491 const FieldDescriptor* descriptor = GetExtensionDescriptor(extension);
1492 if (descriptor == NULL) {
1493 return NULL;
1494 }
1495 if (ScopedPyObjectPtr(ClearFieldByDescriptor(self, descriptor)) == NULL) {
1496 return NULL;
1497 }
1498 }
1499 Py_RETURN_NONE;
1500}
1501
1502PyObject* HasExtension(CMessage* self, PyObject* extension) {
1503 const FieldDescriptor* descriptor = GetExtensionDescriptor(extension);
1504 if (descriptor == NULL) {
1505 return NULL;
1506 }
1507 return HasFieldByDescriptor(self, descriptor);
1508}
1509
1510// ---------------------------------------------------------------------
1511// Releasing messages
1512//
1513// The Python API's ClearField() and Clear() methods behave
1514// differently than their C++ counterparts. While the C++ versions
1515// clears the children the Python versions detaches the children,
1516// without touching their content. This impedance mismatch causes
1517// some complexity in the implementation, which is captured in this
1518// section.
1519//
1520// When a CMessage field is cleared we need to:
1521//
1522// * Release the Message used as the backing store for the CMessage
1523// from its parent.
1524//
1525// * Change the owner field of the released CMessage and all of its
1526// children to point to the newly released Message.
1527//
1528// * Clear the weak references from the released CMessage to the
1529// parent.
1530//
1531// When a RepeatedCompositeContainer field is cleared we need to:
1532//
1533// * Release all the Message used as the backing store for the
1534// CMessages stored in the container.
1535//
1536// * Change the owner field of all the released CMessage and all of
1537// their children to point to the newly released Messages.
1538//
1539// * Clear the weak references from the released container to the
1540// parent.
1541
1542struct SetOwnerVisitor : public ChildVisitor {
1543 // new_owner must outlive this object.
1544 explicit SetOwnerVisitor(const shared_ptr<Message>& new_owner)
1545 : new_owner_(new_owner) {}
1546
1547 int VisitRepeatedCompositeContainer(RepeatedCompositeContainer* container) {
1548 repeated_composite_container::SetOwner(container, new_owner_);
1549 return 0;
1550 }
1551
1552 int VisitRepeatedScalarContainer(RepeatedScalarContainer* container) {
1553 repeated_scalar_container::SetOwner(container, new_owner_);
1554 return 0;
1555 }
1556
1557 int VisitMapContainer(MapContainer* container) {
1558 container->SetOwner(new_owner_);
1559 return 0;
1560 }
1561
1562 int VisitCMessage(CMessage* cmessage,
1563 const FieldDescriptor* field_descriptor) {
1564 return SetOwner(cmessage, new_owner_);
1565 }
1566
1567 private:
1568 const shared_ptr<Message>& new_owner_;
1569};
1570
1571// Change the owner of this CMessage and all its children, recursively.
1572int SetOwner(CMessage* self, const shared_ptr<Message>& new_owner) {
1573 self->owner = new_owner;
1574 if (ForEachCompositeField(self, SetOwnerVisitor(new_owner)) == -1)
1575 return -1;
1576 return 0;
1577}
1578
1579// Releases the message specified by 'field' and returns the
1580// pointer. If the field does not exist a new message is created using
1581// 'descriptor'. The caller takes ownership of the returned pointer.
1582Message* ReleaseMessage(CMessage* self,
1583 const Descriptor* descriptor,
1584 const FieldDescriptor* field_descriptor) {
1585 MessageFactory* message_factory = GetFactoryForMessage(self);
1586 Message* released_message = self->message->GetReflection()->ReleaseMessage(
1587 self->message, field_descriptor, message_factory);
1588 // ReleaseMessage will return NULL which differs from
1589 // child_cmessage->message, if the field does not exist. In this case,
1590 // the latter points to the default instance via a const_cast<>, so we
1591 // have to reset it to a new mutable object since we are taking ownership.
1592 if (released_message == NULL) {
1593 const Message* prototype = message_factory->GetPrototype(descriptor);
1594 GOOGLE_DCHECK(prototype != NULL);
1595 released_message = prototype->New();
1596 }
1597
1598 return released_message;
1599}
1600
1601int ReleaseSubMessage(CMessage* self,
1602 const FieldDescriptor* field_descriptor,
1603 CMessage* child_cmessage) {
1604 // Release the Message
1605 shared_ptr<Message> released_message(ReleaseMessage(
1606 self, child_cmessage->message->GetDescriptor(), field_descriptor));
1607 child_cmessage->message = released_message.get();
1608 child_cmessage->owner.swap(released_message);
1609 child_cmessage->parent = NULL;
1610 child_cmessage->parent_field_descriptor = NULL;
1611 child_cmessage->read_only = false;
1612 return ForEachCompositeField(child_cmessage,
1613 SetOwnerVisitor(child_cmessage->owner));
1614}
1615
1616struct ReleaseChild : public ChildVisitor {
1617 // message must outlive this object.
1618 explicit ReleaseChild(CMessage* parent) :
1619 parent_(parent) {}
1620
1621 int VisitRepeatedCompositeContainer(RepeatedCompositeContainer* container) {
1622 return repeated_composite_container::Release(
1623 reinterpret_cast<RepeatedCompositeContainer*>(container));
1624 }
1625
1626 int VisitRepeatedScalarContainer(RepeatedScalarContainer* container) {
1627 return repeated_scalar_container::Release(
1628 reinterpret_cast<RepeatedScalarContainer*>(container));
1629 }
1630
1631 int VisitMapContainer(MapContainer* container) {
1632 return reinterpret_cast<MapContainer*>(container)->Release();
1633 }
1634
1635 int VisitCMessage(CMessage* cmessage,
1636 const FieldDescriptor* field_descriptor) {
1637 return ReleaseSubMessage(parent_, field_descriptor,
1638 reinterpret_cast<CMessage*>(cmessage));
1639 }
1640
1641 CMessage* parent_;
1642};
1643
1644int InternalReleaseFieldByDescriptor(
1645 CMessage* self,
1646 const FieldDescriptor* field_descriptor,
1647 PyObject* composite_field) {
1648 return VisitCompositeField(
1649 field_descriptor,
1650 composite_field,
1651 ReleaseChild(self));
1652}
1653
1654PyObject* ClearFieldByDescriptor(
1655 CMessage* self,
1656 const FieldDescriptor* descriptor) {
1657 if (!CheckFieldBelongsToMessage(descriptor, self->message)) {
1658 return NULL;
1659 }
1660 AssureWritable(self);
1661 self->message->GetReflection()->ClearField(self->message, descriptor);
1662 Py_RETURN_NONE;
1663}
1664
1665PyObject* ClearField(CMessage* self, PyObject* arg) {
1666 if (!PyString_Check(arg)) {
1667 PyErr_SetString(PyExc_TypeError, "field name must be a string");
1668 return NULL;
1669 }
1670#if PY_MAJOR_VERSION < 3
1671 const char* field_name = PyString_AS_STRING(arg);
1672 Py_ssize_t size = PyString_GET_SIZE(arg);
1673#else
1674 Py_ssize_t size;
1675 const char* field_name = PyUnicode_AsUTF8AndSize(arg, &size);
1676#endif
1677 AssureWritable(self);
1678 Message* message = self->message;
1679 ScopedPyObjectPtr arg_in_oneof;
1680 bool is_in_oneof;
1681 const FieldDescriptor* field_descriptor =
1682 FindFieldWithOneofs(message, string(field_name, size), &is_in_oneof);
1683 if (field_descriptor == NULL) {
1684 if (!is_in_oneof) {
1685 PyErr_Format(PyExc_ValueError,
1686 "Protocol message has no \"%s\" field.", field_name);
1687 return NULL;
1688 } else {
1689 Py_RETURN_NONE;
1690 }
1691 } else if (is_in_oneof) {
1692 const string& name = field_descriptor->name();
1693 arg_in_oneof.reset(PyString_FromStringAndSize(name.c_str(), name.size()));
1694 arg = arg_in_oneof.get();
1695 }
1696
1697 PyObject* composite_field = self->composite_fields ?
1698 PyDict_GetItem(self->composite_fields, arg) : NULL;
1699
1700 // Only release the field if there's a possibility that there are
1701 // references to it.
1702 if (composite_field != NULL) {
1703 if (InternalReleaseFieldByDescriptor(self, field_descriptor,
1704 composite_field) < 0) {
1705 return NULL;
1706 }
1707 PyDict_DelItem(self->composite_fields, arg);
1708 }
1709 message->GetReflection()->ClearField(message, field_descriptor);
1710 if (field_descriptor->cpp_type() == FieldDescriptor::CPPTYPE_ENUM &&
1711 !message->GetReflection()->SupportsUnknownEnumValues()) {
1712 UnknownFieldSet* unknown_field_set =
1713 message->GetReflection()->MutableUnknownFields(message);
1714 unknown_field_set->DeleteByNumber(field_descriptor->number());
1715 }
1716
1717 Py_RETURN_NONE;
1718}
1719
1720PyObject* Clear(CMessage* self) {
1721 AssureWritable(self);
1722 if (ForEachCompositeField(self, ReleaseChild(self)) == -1)
1723 return NULL;
1724 Py_CLEAR(self->extensions);
1725 if (self->composite_fields) {
1726 PyDict_Clear(self->composite_fields);
1727 }
1728 self->message->Clear();
1729 Py_RETURN_NONE;
1730}
1731
1732// ---------------------------------------------------------------------
1733
1734static string GetMessageName(CMessage* self) {
1735 if (self->parent_field_descriptor != NULL) {
1736 return self->parent_field_descriptor->full_name();
1737 } else {
1738 return self->message->GetDescriptor()->full_name();
1739 }
1740}
1741
1742static PyObject* SerializeToString(CMessage* self, PyObject* args) {
1743 if (!self->message->IsInitialized()) {
1744 ScopedPyObjectPtr errors(FindInitializationErrors(self));
1745 if (errors == NULL) {
1746 return NULL;
1747 }
1748 ScopedPyObjectPtr comma(PyString_FromString(","));
1749 if (comma == NULL) {
1750 return NULL;
1751 }
1752 ScopedPyObjectPtr joined(
1753 PyObject_CallMethod(comma.get(), "join", "O", errors.get()));
1754 if (joined == NULL) {
1755 return NULL;
1756 }
1757
1758 // TODO(haberman): this is a (hopefully temporary) hack. The unit testing
1759 // infrastructure reloads all pure-Python modules for every test, but not
1760 // C++ modules (because that's generally impossible:
1761 // http://bugs.python.org/issue1144263). But if we cache EncodeError, we'll
1762 // return the EncodeError from a previous load of the module, which won't
1763 // match a user's attempt to catch EncodeError. So we have to look it up
1764 // again every time.
1765 ScopedPyObjectPtr message_module(PyImport_ImportModule(
1766 "google.protobuf.message"));
1767 if (message_module.get() == NULL) {
1768 return NULL;
1769 }
1770
1771 ScopedPyObjectPtr encode_error(
1772 PyObject_GetAttrString(message_module.get(), "EncodeError"));
1773 if (encode_error.get() == NULL) {
1774 return NULL;
1775 }
1776 PyErr_Format(encode_error.get(),
1777 "Message %s is missing required fields: %s",
1778 GetMessageName(self).c_str(), PyString_AsString(joined.get()));
1779 return NULL;
1780 }
1781 int size = self->message->ByteSize();
1782 if (size <= 0) {
1783 return PyBytes_FromString("");
1784 }
1785 PyObject* result = PyBytes_FromStringAndSize(NULL, size);
1786 if (result == NULL) {
1787 return NULL;
1788 }
1789 char* buffer = PyBytes_AS_STRING(result);
1790 self->message->SerializeWithCachedSizesToArray(
1791 reinterpret_cast<uint8*>(buffer));
1792 return result;
1793}
1794
1795static PyObject* SerializePartialToString(CMessage* self) {
1796 string contents;
1797 self->message->SerializePartialToString(&contents);
1798 return PyBytes_FromStringAndSize(contents.c_str(), contents.size());
1799}
1800
1801// Formats proto fields for ascii dumps using python formatting functions where
1802// appropriate.
1803class PythonFieldValuePrinter : public TextFormat::FieldValuePrinter {
1804 public:
1805 // Python has some differences from C++ when printing floating point numbers.
1806 //
1807 // 1) Trailing .0 is always printed.
1808 // 2) (Python2) Output is rounded to 12 digits.
1809 // 3) (Python3) The full precision of the double is preserved (and Python uses
1810 // David M. Gay's dtoa(), when the C++ code uses SimpleDtoa. There are some
1811 // differences, but they rarely happen)
1812 //
1813 // We override floating point printing with the C-API function for printing
1814 // Python floats to ensure consistency.
1815 string PrintFloat(float value) const { return PrintDouble(value); }
1816 string PrintDouble(double value) const {
1817 // This implementation is not highly optimized (it allocates two temporary
1818 // Python objects) but it is simple and portable. If this is shown to be a
1819 // performance bottleneck, we can optimize it, but the results will likely
1820 // be more complicated to accommodate the differing behavior of double
1821 // formatting between Python 2 and Python 3.
1822 //
1823 // (Though a valid question is: do we really want to make out output
1824 // dependent on the Python version?)
1825 ScopedPyObjectPtr py_value(PyFloat_FromDouble(value));
1826 if (!py_value.get()) {
1827 return string();
1828 }
1829
1830 ScopedPyObjectPtr py_str(PyObject_Str(py_value.get()));
1831 if (!py_str.get()) {
1832 return string();
1833 }
1834
1835 return string(PyString_AsString(py_str.get()));
1836 }
1837};
1838
1839static PyObject* ToStr(CMessage* self) {
1840 TextFormat::Printer printer;
1841 // Passes ownership
1842 printer.SetDefaultFieldValuePrinter(new PythonFieldValuePrinter());
1843 printer.SetHideUnknownFields(true);
1844 string output;
1845 if (!printer.PrintToString(*self->message, &output)) {
1846 PyErr_SetString(PyExc_ValueError, "Unable to convert message to str");
1847 return NULL;
1848 }
1849 return PyString_FromString(output.c_str());
1850}
1851
1852PyObject* MergeFrom(CMessage* self, PyObject* arg) {
1853 CMessage* other_message;
1854 if (!PyObject_TypeCheck(reinterpret_cast<PyObject *>(arg), &CMessage_Type)) {
1855 PyErr_SetString(PyExc_TypeError, "Must be a message");
1856 return NULL;
1857 }
1858
1859 other_message = reinterpret_cast<CMessage*>(arg);
1860 if (other_message->message->GetDescriptor() !=
1861 self->message->GetDescriptor()) {
1862 PyErr_Format(PyExc_TypeError,
1863 "Tried to merge from a message with a different type. "
1864 "to: %s, from: %s",
1865 self->message->GetDescriptor()->full_name().c_str(),
1866 other_message->message->GetDescriptor()->full_name().c_str());
1867 return NULL;
1868 }
1869 AssureWritable(self);
1870
1871 // TODO(tibell): Message::MergeFrom might turn some child Messages
1872 // into mutable messages, invalidating the message field in the
1873 // corresponding CMessages. We should run a FixupMessageReferences
1874 // pass here.
1875
1876 self->message->MergeFrom(*other_message->message);
1877 Py_RETURN_NONE;
1878}
1879
1880static PyObject* CopyFrom(CMessage* self, PyObject* arg) {
1881 CMessage* other_message;
1882 if (!PyObject_TypeCheck(reinterpret_cast<PyObject *>(arg), &CMessage_Type)) {
1883 PyErr_SetString(PyExc_TypeError, "Must be a message");
1884 return NULL;
1885 }
1886
1887 other_message = reinterpret_cast<CMessage*>(arg);
1888
1889 if (self == other_message) {
1890 Py_RETURN_NONE;
1891 }
1892
1893 if (other_message->message->GetDescriptor() !=
1894 self->message->GetDescriptor()) {
1895 PyErr_Format(PyExc_TypeError,
1896 "Tried to copy from a message with a different type. "
1897 "to: %s, from: %s",
1898 self->message->GetDescriptor()->full_name().c_str(),
1899 other_message->message->GetDescriptor()->full_name().c_str());
1900 return NULL;
1901 }
1902
1903 AssureWritable(self);
1904
1905 // CopyFrom on the message will not clean up self->composite_fields,
1906 // which can leave us in an inconsistent state, so clear it out here.
1907 (void)ScopedPyObjectPtr(Clear(self));
1908
1909 self->message->CopyFrom(*other_message->message);
1910
1911 Py_RETURN_NONE;
1912}
1913
1914static PyObject* MergeFromString(CMessage* self, PyObject* arg) {
1915 const void* data;
1916 Py_ssize_t data_length;
1917 if (PyObject_AsReadBuffer(arg, &data, &data_length) < 0) {
1918 return NULL;
1919 }
1920
1921 AssureWritable(self);
1922 io::CodedInputStream input(
1923 reinterpret_cast<const uint8*>(data), data_length);
1924 PyDescriptorPool* pool = GetDescriptorPoolForMessage(self);
1925 input.SetExtensionRegistry(pool->pool, pool->message_factory);
1926 bool success = self->message->MergePartialFromCodedStream(&input);
1927 if (success) {
1928 return PyInt_FromLong(input.CurrentPosition());
1929 } else {
1930 PyErr_Format(DecodeError_class, "Error parsing message");
1931 return NULL;
1932 }
1933}
1934
1935static PyObject* ParseFromString(CMessage* self, PyObject* arg) {
1936 if (ScopedPyObjectPtr(Clear(self)) == NULL) {
1937 return NULL;
1938 }
1939 return MergeFromString(self, arg);
1940}
1941
1942static PyObject* ByteSize(CMessage* self, PyObject* args) {
1943 return PyLong_FromLong(self->message->ByteSize());
1944}
1945
1946static PyObject* RegisterExtension(PyObject* cls,
1947 PyObject* extension_handle) {
1948 const FieldDescriptor* descriptor =
1949 GetExtensionDescriptor(extension_handle);
1950 if (descriptor == NULL) {
1951 return NULL;
1952 }
1953
1954 ScopedPyObjectPtr extensions_by_name(
1955 PyObject_GetAttr(cls, k_extensions_by_name));
1956 if (extensions_by_name == NULL) {
1957 PyErr_SetString(PyExc_TypeError, "no extensions_by_name on class");
1958 return NULL;
1959 }
1960 ScopedPyObjectPtr full_name(PyObject_GetAttr(extension_handle, kfull_name));
1961 if (full_name == NULL) {
1962 return NULL;
1963 }
1964
1965 // If the extension was already registered, check that it is the same.
1966 PyObject* existing_extension =
1967 PyDict_GetItem(extensions_by_name.get(), full_name.get());
1968 if (existing_extension != NULL) {
1969 const FieldDescriptor* existing_extension_descriptor =
1970 GetExtensionDescriptor(existing_extension);
1971 if (existing_extension_descriptor != descriptor) {
1972 PyErr_SetString(PyExc_ValueError, "Double registration of Extensions");
1973 return NULL;
1974 }
1975 // Nothing else to do.
1976 Py_RETURN_NONE;
1977 }
1978
1979 if (PyDict_SetItem(extensions_by_name.get(), full_name.get(),
1980 extension_handle) < 0) {
1981 return NULL;
1982 }
1983
1984 // Also store a mapping from extension number to implementing class.
1985 ScopedPyObjectPtr extensions_by_number(
1986 PyObject_GetAttr(cls, k_extensions_by_number));
1987 if (extensions_by_number == NULL) {
1988 PyErr_SetString(PyExc_TypeError, "no extensions_by_number on class");
1989 return NULL;
1990 }
1991 ScopedPyObjectPtr number(PyObject_GetAttrString(extension_handle, "number"));
1992 if (number == NULL) {
1993 return NULL;
1994 }
1995 if (PyDict_SetItem(extensions_by_number.get(), number.get(),
1996 extension_handle) < 0) {
1997 return NULL;
1998 }
1999
2000 // Check if it's a message set
2001 if (descriptor->is_extension() &&
2002 descriptor->containing_type()->options().message_set_wire_format() &&
2003 descriptor->type() == FieldDescriptor::TYPE_MESSAGE &&
2004 descriptor->label() == FieldDescriptor::LABEL_OPTIONAL) {
2005 ScopedPyObjectPtr message_name(PyString_FromStringAndSize(
2006 descriptor->message_type()->full_name().c_str(),
2007 descriptor->message_type()->full_name().size()));
2008 if (message_name == NULL) {
2009 return NULL;
2010 }
2011 PyDict_SetItem(extensions_by_name.get(), message_name.get(),
2012 extension_handle);
2013 }
2014
2015 Py_RETURN_NONE;
2016}
2017
2018static PyObject* SetInParent(CMessage* self, PyObject* args) {
2019 AssureWritable(self);
2020 Py_RETURN_NONE;
2021}
2022
2023static PyObject* WhichOneof(CMessage* self, PyObject* arg) {
2024 Py_ssize_t name_size;
2025 char *name_data;
2026 if (PyString_AsStringAndSize(arg, &name_data, &name_size) < 0)
2027 return NULL;
2028 string oneof_name = string(name_data, name_size);
2029 const OneofDescriptor* oneof_desc =
2030 self->message->GetDescriptor()->FindOneofByName(oneof_name);
2031 if (oneof_desc == NULL) {
2032 PyErr_Format(PyExc_ValueError,
2033 "Protocol message has no oneof \"%s\" field.",
2034 oneof_name.c_str());
2035 return NULL;
2036 }
2037 const FieldDescriptor* field_in_oneof =
2038 self->message->GetReflection()->GetOneofFieldDescriptor(
2039 *self->message, oneof_desc);
2040 if (field_in_oneof == NULL) {
2041 Py_RETURN_NONE;
2042 } else {
2043 const string& name = field_in_oneof->name();
2044 return PyString_FromStringAndSize(name.c_str(), name.size());
2045 }
2046}
2047
2048static PyObject* GetExtensionDict(CMessage* self, void *closure);
2049
2050static PyObject* ListFields(CMessage* self) {
2051 vector<const FieldDescriptor*> fields;
2052 self->message->GetReflection()->ListFields(*self->message, &fields);
2053
2054 // Normally, the list will be exactly the size of the fields.
2055 ScopedPyObjectPtr all_fields(PyList_New(fields.size()));
2056 if (all_fields == NULL) {
2057 return NULL;
2058 }
2059
2060 // When there are unknown extensions, the py list will *not* contain
2061 // the field information. Thus the actual size of the py list will be
2062 // smaller than the size of fields. Set the actual size at the end.
2063 Py_ssize_t actual_size = 0;
2064 for (size_t i = 0; i < fields.size(); ++i) {
2065 ScopedPyObjectPtr t(PyTuple_New(2));
2066 if (t == NULL) {
2067 return NULL;
2068 }
2069
2070 if (fields[i]->is_extension()) {
2071 ScopedPyObjectPtr extension_field(
2072 PyFieldDescriptor_FromDescriptor(fields[i]));
2073 if (extension_field == NULL) {
2074 return NULL;
2075 }
2076 // With C++ descriptors, the field can always be retrieved, but for
2077 // unknown extensions which have not been imported in Python code, there
2078 // is no message class and we cannot retrieve the value.
2079 // TODO(amauryfa): consider building the class on the fly!
2080 if (fields[i]->message_type() != NULL &&
2081 cdescriptor_pool::GetMessageClass(
2082 GetDescriptorPoolForMessage(self),
2083 fields[i]->message_type()) == NULL) {
2084 PyErr_Clear();
2085 continue;
2086 }
2087 ScopedPyObjectPtr extensions(GetExtensionDict(self, NULL));
2088 if (extensions == NULL) {
2089 return NULL;
2090 }
2091 // 'extension' reference later stolen by PyTuple_SET_ITEM.
2092 PyObject* extension = PyObject_GetItem(
2093 extensions.get(), extension_field.get());
2094 if (extension == NULL) {
2095 return NULL;
2096 }
2097 PyTuple_SET_ITEM(t.get(), 0, extension_field.release());
2098 // Steals reference to 'extension'
2099 PyTuple_SET_ITEM(t.get(), 1, extension);
2100 } else {
2101 // Normal field
2102 const string& field_name = fields[i]->name();
2103 ScopedPyObjectPtr py_field_name(PyString_FromStringAndSize(
2104 field_name.c_str(), field_name.length()));
2105 if (py_field_name == NULL) {
2106 PyErr_SetString(PyExc_ValueError, "bad string");
2107 return NULL;
2108 }
2109 ScopedPyObjectPtr field_descriptor(
2110 PyFieldDescriptor_FromDescriptor(fields[i]));
2111 if (field_descriptor == NULL) {
2112 return NULL;
2113 }
2114
2115 PyObject* field_value = GetAttr(self, py_field_name.get());
2116 if (field_value == NULL) {
2117 PyErr_SetObject(PyExc_ValueError, py_field_name.get());
2118 return NULL;
2119 }
2120 PyTuple_SET_ITEM(t.get(), 0, field_descriptor.release());
2121 PyTuple_SET_ITEM(t.get(), 1, field_value);
2122 }
2123 PyList_SET_ITEM(all_fields.get(), actual_size, t.release());
2124 ++actual_size;
2125 }
2126 Py_SIZE(all_fields.get()) = actual_size;
2127 return all_fields.release();
2128}
2129
2130PyObject* FindInitializationErrors(CMessage* self) {
2131 Message* message = self->message;
2132 vector<string> errors;
2133 message->FindInitializationErrors(&errors);
2134
2135 PyObject* error_list = PyList_New(errors.size());
2136 if (error_list == NULL) {
2137 return NULL;
2138 }
2139 for (size_t i = 0; i < errors.size(); ++i) {
2140 const string& error = errors[i];
2141 PyObject* error_string = PyString_FromStringAndSize(
2142 error.c_str(), error.length());
2143 if (error_string == NULL) {
2144 Py_DECREF(error_list);
2145 return NULL;
2146 }
2147 PyList_SET_ITEM(error_list, i, error_string);
2148 }
2149 return error_list;
2150}
2151
2152static PyObject* RichCompare(CMessage* self, PyObject* other, int opid) {
2153 // Only equality comparisons are implemented.
2154 if (opid != Py_EQ && opid != Py_NE) {
2155 Py_INCREF(Py_NotImplemented);
2156 return Py_NotImplemented;
2157 }
2158 bool equals = true;
2159 // If other is not a message, it cannot be equal.
2160 if (!PyObject_TypeCheck(other, &CMessage_Type)) {
2161 equals = false;
2162 }
2163 const google::protobuf::Message* other_message =
2164 reinterpret_cast<CMessage*>(other)->message;
2165 // If messages don't have the same descriptors, they are not equal.
2166 if (equals &&
2167 self->message->GetDescriptor() != other_message->GetDescriptor()) {
2168 equals = false;
2169 }
2170 // Check the message contents.
2171 if (equals && !google::protobuf::util::MessageDifferencer::Equals(
2172 *self->message,
2173 *reinterpret_cast<CMessage*>(other)->message)) {
2174 equals = false;
2175 }
2176 if (equals ^ (opid == Py_EQ)) {
2177 Py_RETURN_FALSE;
2178 } else {
2179 Py_RETURN_TRUE;
2180 }
2181}
2182
2183PyObject* InternalGetScalar(const Message* message,
2184 const FieldDescriptor* field_descriptor) {
2185 const Reflection* reflection = message->GetReflection();
2186
2187 if (!CheckFieldBelongsToMessage(field_descriptor, message)) {
2188 return NULL;
2189 }
2190
2191 PyObject* result = NULL;
2192 switch (field_descriptor->cpp_type()) {
2193 case FieldDescriptor::CPPTYPE_INT32: {
2194 int32 value = reflection->GetInt32(*message, field_descriptor);
2195 result = PyInt_FromLong(value);
2196 break;
2197 }
2198 case FieldDescriptor::CPPTYPE_INT64: {
2199 int64 value = reflection->GetInt64(*message, field_descriptor);
2200 result = PyLong_FromLongLong(value);
2201 break;
2202 }
2203 case FieldDescriptor::CPPTYPE_UINT32: {
2204 uint32 value = reflection->GetUInt32(*message, field_descriptor);
2205 result = PyInt_FromSize_t(value);
2206 break;
2207 }
2208 case FieldDescriptor::CPPTYPE_UINT64: {
2209 uint64 value = reflection->GetUInt64(*message, field_descriptor);
2210 result = PyLong_FromUnsignedLongLong(value);
2211 break;
2212 }
2213 case FieldDescriptor::CPPTYPE_FLOAT: {
2214 float value = reflection->GetFloat(*message, field_descriptor);
2215 result = PyFloat_FromDouble(value);
2216 break;
2217 }
2218 case FieldDescriptor::CPPTYPE_DOUBLE: {
2219 double value = reflection->GetDouble(*message, field_descriptor);
2220 result = PyFloat_FromDouble(value);
2221 break;
2222 }
2223 case FieldDescriptor::CPPTYPE_BOOL: {
2224 bool value = reflection->GetBool(*message, field_descriptor);
2225 result = PyBool_FromLong(value);
2226 break;
2227 }
2228 case FieldDescriptor::CPPTYPE_STRING: {
2229 string value = reflection->GetString(*message, field_descriptor);
2230 result = ToStringObject(field_descriptor, value);
2231 break;
2232 }
2233 case FieldDescriptor::CPPTYPE_ENUM: {
2234 if (!message->GetReflection()->SupportsUnknownEnumValues() &&
2235 !message->GetReflection()->HasField(*message, field_descriptor)) {
2236 // Look for the value in the unknown fields.
2237 const UnknownFieldSet& unknown_field_set =
2238 message->GetReflection()->GetUnknownFields(*message);
2239 for (int i = 0; i < unknown_field_set.field_count(); ++i) {
2240 if (unknown_field_set.field(i).number() ==
2241 field_descriptor->number() &&
2242 unknown_field_set.field(i).type() ==
2243 google::protobuf::UnknownField::TYPE_VARINT) {
2244 result = PyInt_FromLong(unknown_field_set.field(i).varint());
2245 break;
2246 }
2247 }
2248 }
2249
2250 if (result == NULL) {
2251 const EnumValueDescriptor* enum_value =
2252 message->GetReflection()->GetEnum(*message, field_descriptor);
2253 result = PyInt_FromLong(enum_value->number());
2254 }
2255 break;
2256 }
2257 default:
2258 PyErr_Format(
2259 PyExc_SystemError, "Getting a value from a field of unknown type %d",
2260 field_descriptor->cpp_type());
2261 }
2262
2263 return result;
2264}
2265
2266PyObject* InternalGetSubMessage(
2267 CMessage* self, const FieldDescriptor* field_descriptor) {
2268 const Reflection* reflection = self->message->GetReflection();
2269 PyDescriptorPool* pool = GetDescriptorPoolForMessage(self);
2270 const Message& sub_message = reflection->GetMessage(
2271 *self->message, field_descriptor, pool->message_factory);
2272
2273 PyObject *message_class = cdescriptor_pool::GetMessageClass(
2274 pool, field_descriptor->message_type());
2275 if (message_class == NULL) {
2276 return NULL;
2277 }
2278
2279 CMessage* cmsg = cmessage::NewEmptyMessage(message_class,
2280 sub_message.GetDescriptor());
2281 if (cmsg == NULL) {
2282 return NULL;
2283 }
2284
2285 cmsg->owner = self->owner;
2286 cmsg->parent = self;
2287 cmsg->parent_field_descriptor = field_descriptor;
2288 cmsg->read_only = !reflection->HasField(*self->message, field_descriptor);
2289 cmsg->message = const_cast<Message*>(&sub_message);
2290
2291 return reinterpret_cast<PyObject*>(cmsg);
2292}
2293
2294int InternalSetNonOneofScalar(
2295 Message* message,
2296 const FieldDescriptor* field_descriptor,
2297 PyObject* arg) {
2298 const Reflection* reflection = message->GetReflection();
2299
2300 if (!CheckFieldBelongsToMessage(field_descriptor, message)) {
2301 return -1;
2302 }
2303
2304 switch (field_descriptor->cpp_type()) {
2305 case FieldDescriptor::CPPTYPE_INT32: {
2306 GOOGLE_CHECK_GET_INT32(arg, value, -1);
2307 reflection->SetInt32(message, field_descriptor, value);
2308 break;
2309 }
2310 case FieldDescriptor::CPPTYPE_INT64: {
2311 GOOGLE_CHECK_GET_INT64(arg, value, -1);
2312 reflection->SetInt64(message, field_descriptor, value);
2313 break;
2314 }
2315 case FieldDescriptor::CPPTYPE_UINT32: {
2316 GOOGLE_CHECK_GET_UINT32(arg, value, -1);
2317 reflection->SetUInt32(message, field_descriptor, value);
2318 break;
2319 }
2320 case FieldDescriptor::CPPTYPE_UINT64: {
2321 GOOGLE_CHECK_GET_UINT64(arg, value, -1);
2322 reflection->SetUInt64(message, field_descriptor, value);
2323 break;
2324 }
2325 case FieldDescriptor::CPPTYPE_FLOAT: {
2326 GOOGLE_CHECK_GET_FLOAT(arg, value, -1);
2327 reflection->SetFloat(message, field_descriptor, value);
2328 break;
2329 }
2330 case FieldDescriptor::CPPTYPE_DOUBLE: {
2331 GOOGLE_CHECK_GET_DOUBLE(arg, value, -1);
2332 reflection->SetDouble(message, field_descriptor, value);
2333 break;
2334 }
2335 case FieldDescriptor::CPPTYPE_BOOL: {
2336 GOOGLE_CHECK_GET_BOOL(arg, value, -1);
2337 reflection->SetBool(message, field_descriptor, value);
2338 break;
2339 }
2340 case FieldDescriptor::CPPTYPE_STRING: {
2341 if (!CheckAndSetString(
2342 arg, message, field_descriptor, reflection, false, -1)) {
2343 return -1;
2344 }
2345 break;
2346 }
2347 case FieldDescriptor::CPPTYPE_ENUM: {
2348 GOOGLE_CHECK_GET_INT32(arg, value, -1);
2349 if (reflection->SupportsUnknownEnumValues()) {
2350 reflection->SetEnumValue(message, field_descriptor, value);
2351 } else {
2352 const EnumDescriptor* enum_descriptor = field_descriptor->enum_type();
2353 const EnumValueDescriptor* enum_value =
2354 enum_descriptor->FindValueByNumber(value);
2355 if (enum_value != NULL) {
2356 reflection->SetEnum(message, field_descriptor, enum_value);
2357 } else {
2358 PyErr_Format(PyExc_ValueError, "Unknown enum value: %d", value);
2359 return -1;
2360 }
2361 }
2362 break;
2363 }
2364 default:
2365 PyErr_Format(
2366 PyExc_SystemError, "Setting value to a field of unknown type %d",
2367 field_descriptor->cpp_type());
2368 return -1;
2369 }
2370
2371 return 0;
2372}
2373
2374int InternalSetScalar(
2375 CMessage* self,
2376 const FieldDescriptor* field_descriptor,
2377 PyObject* arg) {
2378 if (!CheckFieldBelongsToMessage(field_descriptor, self->message)) {
2379 return -1;
2380 }
2381
2382 if (MaybeReleaseOverlappingOneofField(self, field_descriptor) < 0) {
2383 return -1;
2384 }
2385
2386 return InternalSetNonOneofScalar(self->message, field_descriptor, arg);
2387}
2388
2389PyObject* FromString(PyTypeObject* cls, PyObject* serialized) {
2390 PyObject* py_cmsg = PyObject_CallObject(
2391 reinterpret_cast<PyObject*>(cls), NULL);
2392 if (py_cmsg == NULL) {
2393 return NULL;
2394 }
2395 CMessage* cmsg = reinterpret_cast<CMessage*>(py_cmsg);
2396
2397 ScopedPyObjectPtr py_length(MergeFromString(cmsg, serialized));
2398 if (py_length == NULL) {
2399 Py_DECREF(py_cmsg);
2400 return NULL;
2401 }
2402
2403 return py_cmsg;
2404}
2405
2406PyObject* DeepCopy(CMessage* self, PyObject* arg) {
2407 PyObject* clone = PyObject_CallObject(
2408 reinterpret_cast<PyObject*>(Py_TYPE(self)), NULL);
2409 if (clone == NULL) {
2410 return NULL;
2411 }
2412 if (!PyObject_TypeCheck(clone, &CMessage_Type)) {
2413 Py_DECREF(clone);
2414 return NULL;
2415 }
2416 if (ScopedPyObjectPtr(MergeFrom(
2417 reinterpret_cast<CMessage*>(clone),
2418 reinterpret_cast<PyObject*>(self))) == NULL) {
2419 Py_DECREF(clone);
2420 return NULL;
2421 }
2422 return clone;
2423}
2424
2425PyObject* ToUnicode(CMessage* self) {
2426 // Lazy import to prevent circular dependencies
2427 ScopedPyObjectPtr text_format(
2428 PyImport_ImportModule("google.protobuf.text_format"));
2429 if (text_format == NULL) {
2430 return NULL;
2431 }
2432 ScopedPyObjectPtr method_name(PyString_FromString("MessageToString"));
2433 if (method_name == NULL) {
2434 return NULL;
2435 }
2436 Py_INCREF(Py_True);
2437 ScopedPyObjectPtr encoded(PyObject_CallMethodObjArgs(
2438 text_format.get(), method_name.get(), self, Py_True, NULL));
2439 Py_DECREF(Py_True);
2440 if (encoded == NULL) {
2441 return NULL;
2442 }
2443#if PY_MAJOR_VERSION < 3
2444 PyObject* decoded = PyString_AsDecodedObject(encoded.get(), "utf-8", NULL);
2445#else
2446 PyObject* decoded = PyUnicode_FromEncodedObject(encoded.get(), "utf-8", NULL);
2447#endif
2448 if (decoded == NULL) {
2449 return NULL;
2450 }
2451 return decoded;
2452}
2453
2454PyObject* Reduce(CMessage* self) {
2455 ScopedPyObjectPtr constructor(reinterpret_cast<PyObject*>(Py_TYPE(self)));
2456 constructor.inc();
2457 ScopedPyObjectPtr args(PyTuple_New(0));
2458 if (args == NULL) {
2459 return NULL;
2460 }
2461 ScopedPyObjectPtr state(PyDict_New());
2462 if (state == NULL) {
2463 return NULL;
2464 }
2465 ScopedPyObjectPtr serialized(SerializePartialToString(self));
2466 if (serialized == NULL) {
2467 return NULL;
2468 }
2469 if (PyDict_SetItemString(state.get(), "serialized", serialized.get()) < 0) {
2470 return NULL;
2471 }
2472 return Py_BuildValue("OOO", constructor.get(), args.get(), state.get());
2473}
2474
2475PyObject* SetState(CMessage* self, PyObject* state) {
2476 if (!PyDict_Check(state)) {
2477 PyErr_SetString(PyExc_TypeError, "state not a dict");
2478 return NULL;
2479 }
2480 PyObject* serialized = PyDict_GetItemString(state, "serialized");
2481 if (serialized == NULL) {
2482 return NULL;
2483 }
2484 if (ScopedPyObjectPtr(ParseFromString(self, serialized)) == NULL) {
2485 return NULL;
2486 }
2487 Py_RETURN_NONE;
2488}
2489
2490// CMessage static methods:
2491PyObject* _CheckCalledFromGeneratedFile(PyObject* unused,
2492 PyObject* unused_arg) {
2493 if (!_CalledFromGeneratedFile(1)) {
2494 PyErr_SetString(PyExc_TypeError,
2495 "Descriptors should not be created directly, "
2496 "but only retrieved from their parent.");
2497 return NULL;
2498 }
2499 Py_RETURN_NONE;
2500}
2501
2502static PyObject* GetExtensionDict(CMessage* self, void *closure) {
2503 if (self->extensions) {
2504 Py_INCREF(self->extensions);
2505 return reinterpret_cast<PyObject*>(self->extensions);
2506 }
2507
2508 // If there are extension_ranges, the message is "extendable". Allocate a
2509 // dictionary to store the extension fields.
2510 const Descriptor* descriptor = GetMessageDescriptor(Py_TYPE(self));
2511 if (descriptor->extension_range_count() > 0) {
2512 ExtensionDict* extension_dict = extension_dict::NewExtensionDict(self);
2513 if (extension_dict == NULL) {
2514 return NULL;
2515 }
2516 self->extensions = extension_dict;
2517 Py_INCREF(self->extensions);
2518 return reinterpret_cast<PyObject*>(self->extensions);
2519 }
2520
2521 PyErr_SetNone(PyExc_AttributeError);
2522 return NULL;
2523}
2524
2525static PyGetSetDef Getters[] = {
2526 {"Extensions", (getter)GetExtensionDict, NULL, "Extension dict"},
2527 {NULL}
2528};
2529
2530static PyMethodDef Methods[] = {
2531 { "__deepcopy__", (PyCFunction)DeepCopy, METH_VARARGS,
2532 "Makes a deep copy of the class." },
2533 { "__reduce__", (PyCFunction)Reduce, METH_NOARGS,
2534 "Outputs picklable representation of the message." },
2535 { "__setstate__", (PyCFunction)SetState, METH_O,
2536 "Inputs picklable representation of the message." },
2537 { "__unicode__", (PyCFunction)ToUnicode, METH_NOARGS,
2538 "Outputs a unicode representation of the message." },
2539 { "ByteSize", (PyCFunction)ByteSize, METH_NOARGS,
2540 "Returns the size of the message in bytes." },
2541 { "Clear", (PyCFunction)Clear, METH_NOARGS,
2542 "Clears the message." },
2543 { "ClearExtension", (PyCFunction)ClearExtension, METH_O,
2544 "Clears a message field." },
2545 { "ClearField", (PyCFunction)ClearField, METH_O,
2546 "Clears a message field." },
2547 { "CopyFrom", (PyCFunction)CopyFrom, METH_O,
2548 "Copies a protocol message into the current message." },
2549 { "FindInitializationErrors", (PyCFunction)FindInitializationErrors,
2550 METH_NOARGS,
2551 "Finds unset required fields." },
2552 { "FromString", (PyCFunction)FromString, METH_O | METH_CLASS,
2553 "Creates new method instance from given serialized data." },
2554 { "HasExtension", (PyCFunction)HasExtension, METH_O,
2555 "Checks if a message field is set." },
2556 { "HasField", (PyCFunction)HasField, METH_O,
2557 "Checks if a message field is set." },
2558 { "IsInitialized", (PyCFunction)IsInitialized, METH_VARARGS,
2559 "Checks if all required fields of a protocol message are set." },
2560 { "ListFields", (PyCFunction)ListFields, METH_NOARGS,
2561 "Lists all set fields of a message." },
2562 { "MergeFrom", (PyCFunction)MergeFrom, METH_O,
2563 "Merges a protocol message into the current message." },
2564 { "MergeFromString", (PyCFunction)MergeFromString, METH_O,
2565 "Merges a serialized message into the current message." },
2566 { "ParseFromString", (PyCFunction)ParseFromString, METH_O,
2567 "Parses a serialized message into the current message." },
2568 { "RegisterExtension", (PyCFunction)RegisterExtension, METH_O | METH_CLASS,
2569 "Registers an extension with the current message." },
2570 { "SerializePartialToString", (PyCFunction)SerializePartialToString,
2571 METH_NOARGS,
2572 "Serializes the message to a string, even if it isn't initialized." },
2573 { "SerializeToString", (PyCFunction)SerializeToString, METH_NOARGS,
2574 "Serializes the message to a string, only for initialized messages." },
2575 { "SetInParent", (PyCFunction)SetInParent, METH_NOARGS,
2576 "Sets the has bit of the given field in its parent message." },
2577 { "WhichOneof", (PyCFunction)WhichOneof, METH_O,
2578 "Returns the name of the field set inside a oneof, "
2579 "or None if no field is set." },
2580
2581 // Static Methods.
2582 { "_CheckCalledFromGeneratedFile", (PyCFunction)_CheckCalledFromGeneratedFile,
2583 METH_NOARGS | METH_STATIC,
2584 "Raises TypeError if the caller is not in a _pb2.py file."},
2585 { NULL, NULL}
2586};
2587
2588static bool SetCompositeField(
2589 CMessage* self, PyObject* name, PyObject* value) {
2590 if (self->composite_fields == NULL) {
2591 self->composite_fields = PyDict_New();
2592 if (self->composite_fields == NULL) {
2593 return false;
2594 }
2595 }
2596 return PyDict_SetItem(self->composite_fields, name, value) == 0;
2597}
2598
2599PyObject* GetAttr(CMessage* self, PyObject* name) {
2600 PyObject* value = self->composite_fields ?
2601 PyDict_GetItem(self->composite_fields, name) : NULL;
2602 if (value != NULL) {
2603 Py_INCREF(value);
2604 return value;
2605 }
2606
2607 const FieldDescriptor* field_descriptor = GetFieldDescriptor(self, name);
2608 if (field_descriptor == NULL) {
2609 return CMessage_Type.tp_base->tp_getattro(
2610 reinterpret_cast<PyObject*>(self), name);
2611 }
2612
2613 if (field_descriptor->is_map()) {
2614 PyObject* py_container = NULL;
2615 const Descriptor* entry_type = field_descriptor->message_type();
2616 const FieldDescriptor* value_type = entry_type->FindFieldByName("value");
2617 if (value_type->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
2618 PyObject* value_class = cdescriptor_pool::GetMessageClass(
2619 GetDescriptorPoolForMessage(self), value_type->message_type());
2620 if (value_class == NULL) {
2621 return NULL;
2622 }
2623 py_container =
2624 NewMessageMapContainer(self, field_descriptor, value_class);
2625 } else {
2626 py_container = NewScalarMapContainer(self, field_descriptor);
2627 }
2628 if (py_container == NULL) {
2629 return NULL;
2630 }
2631 if (!SetCompositeField(self, name, py_container)) {
2632 Py_DECREF(py_container);
2633 return NULL;
2634 }
2635 return py_container;
2636 }
2637
2638 if (field_descriptor->label() == FieldDescriptor::LABEL_REPEATED) {
2639 PyObject* py_container = NULL;
2640 if (field_descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
2641 PyObject *message_class = cdescriptor_pool::GetMessageClass(
2642 GetDescriptorPoolForMessage(self), field_descriptor->message_type());
2643 if (message_class == NULL) {
2644 return NULL;
2645 }
2646 py_container = repeated_composite_container::NewContainer(
2647 self, field_descriptor, message_class);
2648 } else {
2649 py_container = repeated_scalar_container::NewContainer(
2650 self, field_descriptor);
2651 }
2652 if (py_container == NULL) {
2653 return NULL;
2654 }
2655 if (!SetCompositeField(self, name, py_container)) {
2656 Py_DECREF(py_container);
2657 return NULL;
2658 }
2659 return py_container;
2660 }
2661
2662 if (field_descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
2663 PyObject* sub_message = InternalGetSubMessage(self, field_descriptor);
2664 if (sub_message == NULL) {
2665 return NULL;
2666 }
2667 if (!SetCompositeField(self, name, sub_message)) {
2668 Py_DECREF(sub_message);
2669 return NULL;
2670 }
2671 return sub_message;
2672 }
2673
2674 return InternalGetScalar(self->message, field_descriptor);
2675}
2676
2677int SetAttr(CMessage* self, PyObject* name, PyObject* value) {
2678 if (self->composite_fields && PyDict_Contains(self->composite_fields, name)) {
2679 PyErr_SetString(PyExc_TypeError, "Can't set composite field");
2680 return -1;
2681 }
2682
2683 const FieldDescriptor* field_descriptor = GetFieldDescriptor(self, name);
2684 if (field_descriptor != NULL) {
2685 AssureWritable(self);
2686 if (field_descriptor->label() == FieldDescriptor::LABEL_REPEATED) {
2687 PyErr_Format(PyExc_AttributeError, "Assignment not allowed to repeated "
2688 "field \"%s\" in protocol message object.",
2689 field_descriptor->name().c_str());
2690 return -1;
2691 } else {
2692 if (field_descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
2693 PyErr_Format(PyExc_AttributeError, "Assignment not allowed to "
2694 "field \"%s\" in protocol message object.",
2695 field_descriptor->name().c_str());
2696 return -1;
2697 } else {
2698 return InternalSetScalar(self, field_descriptor, value);
2699 }
2700 }
2701 }
2702
2703 PyErr_Format(PyExc_AttributeError,
2704 "Assignment not allowed "
2705 "(no field \"%s\"in protocol message object).",
2706 PyString_AsString(name));
2707 return -1;
2708}
2709
2710} // namespace cmessage
2711
2712PyTypeObject CMessage_Type = {
2713 PyVarObject_HEAD_INIT(&PyMessageMeta_Type, 0)
2714 FULL_MODULE_NAME ".CMessage", // tp_name
2715 sizeof(CMessage), // tp_basicsize
2716 0, // tp_itemsize
2717 (destructor)cmessage::Dealloc, // tp_dealloc
2718 0, // tp_print
2719 0, // tp_getattr
2720 0, // tp_setattr
2721 0, // tp_compare
2722 0, // tp_repr
2723 0, // tp_as_number
2724 0, // tp_as_sequence
2725 0, // tp_as_mapping
2726 PyObject_HashNotImplemented, // tp_hash
2727 0, // tp_call
2728 (reprfunc)cmessage::ToStr, // tp_str
2729 (getattrofunc)cmessage::GetAttr, // tp_getattro
2730 (setattrofunc)cmessage::SetAttr, // tp_setattro
2731 0, // tp_as_buffer
2732 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, // tp_flags
2733 "A ProtocolMessage", // tp_doc
2734 0, // tp_traverse
2735 0, // tp_clear
2736 (richcmpfunc)cmessage::RichCompare, // tp_richcompare
2737 0, // tp_weaklistoffset
2738 0, // tp_iter
2739 0, // tp_iternext
2740 cmessage::Methods, // tp_methods
2741 0, // tp_members
2742 cmessage::Getters, // tp_getset
2743 0, // tp_base
2744 0, // tp_dict
2745 0, // tp_descr_get
2746 0, // tp_descr_set
2747 0, // tp_dictoffset
2748 (initproc)cmessage::Init, // tp_init
2749 0, // tp_alloc
2750 cmessage::New, // tp_new
2751};
2752
2753// --- Exposing the C proto living inside Python proto to C code:
2754
2755const Message* (*GetCProtoInsidePyProtoPtr)(PyObject* msg);
2756Message* (*MutableCProtoInsidePyProtoPtr)(PyObject* msg);
2757
2758static const Message* GetCProtoInsidePyProtoImpl(PyObject* msg) {
2759 if (!PyObject_TypeCheck(msg, &CMessage_Type)) {
2760 return NULL;
2761 }
2762 CMessage* cmsg = reinterpret_cast<CMessage*>(msg);
2763 return cmsg->message;
2764}
2765
2766static Message* MutableCProtoInsidePyProtoImpl(PyObject* msg) {
2767 if (!PyObject_TypeCheck(msg, &CMessage_Type)) {
2768 return NULL;
2769 }
2770 CMessage* cmsg = reinterpret_cast<CMessage*>(msg);
2771 if ((cmsg->composite_fields && PyDict_Size(cmsg->composite_fields) != 0) ||
2772 (cmsg->extensions != NULL &&
2773 PyDict_Size(cmsg->extensions->values) != 0)) {
2774 // There is currently no way of accurately syncing arbitrary changes to
2775 // the underlying C++ message back to the CMessage (e.g. removed repeated
2776 // composite containers). We only allow direct mutation of the underlying
2777 // C++ message if there is no child data in the CMessage.
2778 return NULL;
2779 }
2780 cmessage::AssureWritable(cmsg);
2781 return cmsg->message;
2782}
2783
2784static const char module_docstring[] =
2785"python-proto2 is a module that can be used to enhance proto2 Python API\n"
2786"performance.\n"
2787"\n"
2788"It provides access to the protocol buffers C++ reflection API that\n"
2789"implements the basic protocol buffer functions.";
2790
2791void InitGlobals() {
2792 // TODO(gps): Check all return values in this function for NULL and propagate
2793 // the error (MemoryError) on up to result in an import failure. These should
2794 // also be freed and reset to NULL during finalization.
2795 kPythonZero = PyInt_FromLong(0);
2796 kint32min_py = PyInt_FromLong(kint32min);
2797 kint32max_py = PyInt_FromLong(kint32max);
2798 kuint32max_py = PyLong_FromLongLong(kuint32max);
2799 kint64min_py = PyLong_FromLongLong(kint64min);
2800 kint64max_py = PyLong_FromLongLong(kint64max);
2801 kuint64max_py = PyLong_FromUnsignedLongLong(kuint64max);
2802
2803 kDESCRIPTOR = PyString_FromString("DESCRIPTOR");
2804 k_cdescriptor = PyString_FromString("_cdescriptor");
2805 kfull_name = PyString_FromString("full_name");
2806 k_extensions_by_name = PyString_FromString("_extensions_by_name");
2807 k_extensions_by_number = PyString_FromString("_extensions_by_number");
2808
2809 PyObject *dummy_obj = PySet_New(NULL);
2810 kEmptyWeakref = PyWeakref_NewRef(dummy_obj, NULL);
2811 Py_DECREF(dummy_obj);
2812}
2813
2814bool InitProto2MessageModule(PyObject *m) {
2815 // Initialize types and globals in descriptor.cc
2816 if (!InitDescriptor()) {
2817 return false;
2818 }
2819
2820 // Initialize types and globals in descriptor_pool.cc
2821 if (!InitDescriptorPool()) {
2822 return false;
2823 }
2824
2825 // Initialize constants defined in this file.
2826 InitGlobals();
2827
2828 PyMessageMeta_Type.tp_base = &PyType_Type;
2829 if (PyType_Ready(&PyMessageMeta_Type) < 0) {
2830 return false;
2831 }
2832 PyModule_AddObject(m, "MessageMeta",
2833 reinterpret_cast<PyObject*>(&PyMessageMeta_Type));
2834
2835 if (PyType_Ready(&CMessage_Type) < 0) {
2836 return false;
2837 }
2838
2839 // DESCRIPTOR is set on each protocol buffer message class elsewhere, but set
2840 // it here as well to document that subclasses need to set it.
2841 PyDict_SetItem(CMessage_Type.tp_dict, kDESCRIPTOR, Py_None);
2842 // Subclasses with message extensions will override _extensions_by_name and
2843 // _extensions_by_number with fresh mutable dictionaries in AddDescriptors.
2844 // All other classes can share this same immutable mapping.
2845 ScopedPyObjectPtr empty_dict(PyDict_New());
2846 if (empty_dict == NULL) {
2847 return false;
2848 }
2849 ScopedPyObjectPtr immutable_dict(PyDictProxy_New(empty_dict.get()));
2850 if (immutable_dict == NULL) {
2851 return false;
2852 }
2853 if (PyDict_SetItem(CMessage_Type.tp_dict,
2854 k_extensions_by_name, immutable_dict.get()) < 0) {
2855 return false;
2856 }
2857 if (PyDict_SetItem(CMessage_Type.tp_dict,
2858 k_extensions_by_number, immutable_dict.get()) < 0) {
2859 return false;
2860 }
2861
2862 PyModule_AddObject(m, "Message", reinterpret_cast<PyObject*>(&CMessage_Type));
2863
2864 // Initialize Repeated container types.
2865 {
2866 if (PyType_Ready(&RepeatedScalarContainer_Type) < 0) {
2867 return false;
2868 }
2869
2870 PyModule_AddObject(m, "RepeatedScalarContainer",
2871 reinterpret_cast<PyObject*>(
2872 &RepeatedScalarContainer_Type));
2873
2874 if (PyType_Ready(&RepeatedCompositeContainer_Type) < 0) {
2875 return false;
2876 }
2877
2878 PyModule_AddObject(
2879 m, "RepeatedCompositeContainer",
2880 reinterpret_cast<PyObject*>(
2881 &RepeatedCompositeContainer_Type));
2882
2883 // Register them as collections.Sequence
2884 ScopedPyObjectPtr collections(PyImport_ImportModule("collections"));
2885 if (collections == NULL) {
2886 return false;
2887 }
2888 ScopedPyObjectPtr mutable_sequence(
2889 PyObject_GetAttrString(collections.get(), "MutableSequence"));
2890 if (mutable_sequence == NULL) {
2891 return false;
2892 }
2893 if (ScopedPyObjectPtr(
2894 PyObject_CallMethod(mutable_sequence.get(), "register", "O",
2895 &RepeatedScalarContainer_Type)) == NULL) {
2896 return false;
2897 }
2898 if (ScopedPyObjectPtr(
2899 PyObject_CallMethod(mutable_sequence.get(), "register", "O",
2900 &RepeatedCompositeContainer_Type)) == NULL) {
2901 return false;
2902 }
2903 }
2904
2905 // Initialize Map container types.
2906 {
2907 // ScalarMapContainer_Type derives from our MutableMapping type.
2908 ScopedPyObjectPtr containers(PyImport_ImportModule(
2909 "google.protobuf.internal.containers"));
2910 if (containers == NULL) {
2911 return false;
2912 }
2913
2914 ScopedPyObjectPtr mutable_mapping(
2915 PyObject_GetAttrString(containers.get(), "MutableMapping"));
2916 if (mutable_mapping == NULL) {
2917 return false;
2918 }
2919
2920 if (!PyObject_TypeCheck(mutable_mapping.get(), &PyType_Type)) {
2921 return false;
2922 }
2923
2924 Py_INCREF(mutable_mapping.get());
2925#if PY_MAJOR_VERSION >= 3
2926 PyObject* bases = PyTuple_New(1);
2927 PyTuple_SET_ITEM(bases, 0, mutable_mapping.get());
2928
2929 ScalarMapContainer_Type =
2930 PyType_FromSpecWithBases(&ScalarMapContainer_Type_spec, bases);
2931 PyModule_AddObject(m, "ScalarMapContainer", ScalarMapContainer_Type);
2932#else
2933 ScalarMapContainer_Type.tp_base =
2934 reinterpret_cast<PyTypeObject*>(mutable_mapping.get());
2935
2936 if (PyType_Ready(&ScalarMapContainer_Type) < 0) {
2937 return false;
2938 }
2939
2940 PyModule_AddObject(m, "ScalarMapContainer",
2941 reinterpret_cast<PyObject*>(&ScalarMapContainer_Type));
2942#endif
2943
2944 if (PyType_Ready(&MapIterator_Type) < 0) {
2945 return false;
2946 }
2947
2948 PyModule_AddObject(m, "MapIterator",
2949 reinterpret_cast<PyObject*>(&MapIterator_Type));
2950
2951
2952#if PY_MAJOR_VERSION >= 3
2953 MessageMapContainer_Type =
2954 PyType_FromSpecWithBases(&MessageMapContainer_Type_spec, bases);
2955 PyModule_AddObject(m, "MessageMapContainer", MessageMapContainer_Type);
2956#else
2957 Py_INCREF(mutable_mapping.get());
2958 MessageMapContainer_Type.tp_base =
2959 reinterpret_cast<PyTypeObject*>(mutable_mapping.get());
2960
2961 if (PyType_Ready(&MessageMapContainer_Type) < 0) {
2962 return false;
2963 }
2964
2965 PyModule_AddObject(m, "MessageMapContainer",
2966 reinterpret_cast<PyObject*>(&MessageMapContainer_Type));
2967#endif
2968 }
2969
2970 if (PyType_Ready(&ExtensionDict_Type) < 0) {
2971 return false;
2972 }
2973 PyModule_AddObject(
2974 m, "ExtensionDict",
2975 reinterpret_cast<PyObject*>(&ExtensionDict_Type));
2976
2977 // Expose the DescriptorPool used to hold all descriptors added from generated
2978 // pb2.py files.
2979 // PyModule_AddObject steals a reference.
2980 Py_INCREF(GetDefaultDescriptorPool());
2981 PyModule_AddObject(m, "default_pool",
2982 reinterpret_cast<PyObject*>(GetDefaultDescriptorPool()));
2983
2984 PyModule_AddObject(m, "DescriptorPool", reinterpret_cast<PyObject*>(
2985 &PyDescriptorPool_Type));
2986
2987 // This implementation provides full Descriptor types, we advertise it so that
2988 // descriptor.py can use them in replacement of the Python classes.
2989 PyModule_AddIntConstant(m, "_USE_C_DESCRIPTORS", 1);
2990
2991 PyModule_AddObject(m, "Descriptor", reinterpret_cast<PyObject*>(
2992 &PyMessageDescriptor_Type));
2993 PyModule_AddObject(m, "FieldDescriptor", reinterpret_cast<PyObject*>(
2994 &PyFieldDescriptor_Type));
2995 PyModule_AddObject(m, "EnumDescriptor", reinterpret_cast<PyObject*>(
2996 &PyEnumDescriptor_Type));
2997 PyModule_AddObject(m, "EnumValueDescriptor", reinterpret_cast<PyObject*>(
2998 &PyEnumValueDescriptor_Type));
2999 PyModule_AddObject(m, "FileDescriptor", reinterpret_cast<PyObject*>(
3000 &PyFileDescriptor_Type));
3001 PyModule_AddObject(m, "OneofDescriptor", reinterpret_cast<PyObject*>(
3002 &PyOneofDescriptor_Type));
3003
3004 PyObject* enum_type_wrapper = PyImport_ImportModule(
3005 "google.protobuf.internal.enum_type_wrapper");
3006 if (enum_type_wrapper == NULL) {
3007 return false;
3008 }
3009 EnumTypeWrapper_class =
3010 PyObject_GetAttrString(enum_type_wrapper, "EnumTypeWrapper");
3011 Py_DECREF(enum_type_wrapper);
3012
3013 PyObject* message_module = PyImport_ImportModule(
3014 "google.protobuf.message");
3015 if (message_module == NULL) {
3016 return false;
3017 }
3018 EncodeError_class = PyObject_GetAttrString(message_module, "EncodeError");
3019 DecodeError_class = PyObject_GetAttrString(message_module, "DecodeError");
3020 PythonMessage_class = PyObject_GetAttrString(message_module, "Message");
3021 Py_DECREF(message_module);
3022
3023 PyObject* pickle_module = PyImport_ImportModule("pickle");
3024 if (pickle_module == NULL) {
3025 return false;
3026 }
3027 PickleError_class = PyObject_GetAttrString(pickle_module, "PickleError");
3028 Py_DECREF(pickle_module);
3029
3030 // Override {Get,Mutable}CProtoInsidePyProto.
3031 GetCProtoInsidePyProtoPtr = GetCProtoInsidePyProtoImpl;
3032 MutableCProtoInsidePyProtoPtr = MutableCProtoInsidePyProtoImpl;
3033
3034 return true;
3035}
3036
3037} // namespace python
3038} // namespace protobuf
3039
3040
3041#if PY_MAJOR_VERSION >= 3
3042static struct PyModuleDef _module = {
3043 PyModuleDef_HEAD_INIT,
3044 "_message",
3045 google::protobuf::python::module_docstring,
3046 -1,
3047 NULL,
3048 NULL,
3049 NULL,
3050 NULL,
3051 NULL
3052};
3053#define INITFUNC PyInit__message
3054#define INITFUNC_ERRORVAL NULL
3055#else // Python 2
3056#define INITFUNC init_message
3057#define INITFUNC_ERRORVAL
3058#endif
3059
3060extern "C" {
3061 PyMODINIT_FUNC INITFUNC(void) {
3062 PyObject* m;
3063#if PY_MAJOR_VERSION >= 3
3064 m = PyModule_Create(&_module);
3065#else
3066 m = Py_InitModule3("_message", NULL, google::protobuf::python::module_docstring);
3067#endif
3068 if (m == NULL) {
3069 return INITFUNC_ERRORVAL;
3070 }
3071
3072 if (!google::protobuf::python::InitProto2MessageModule(m)) {
3073 Py_DECREF(m);
3074 return INITFUNC_ERRORVAL;
3075 }
3076
3077#if PY_MAJOR_VERSION >= 3
3078 return m;
3079#endif
3080 }
3081}
3082} // namespace google