Blame - python/google/protobuf/text_format.py - RealtimeRoboticsGroup/test

blob: 8d256076c28ad4c3245aaccbbbd8284aeb4bc961 [file] [log] [blame]

Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame^]	1	# Protocol Buffers - Google's data interchange format
				2	# Copyright 2008 Google Inc. All rights reserved.
				3	# https://developers.google.com/protocol-buffers/
				4	#
				5	# Redistribution and use in source and binary forms, with or without
				6	# modification, are permitted provided that the following conditions are
				7	# met:
				8	#
				9	# * Redistributions of source code must retain the above copyright
				10	# notice, this list of conditions and the following disclaimer.
				11	# * Redistributions in binary form must reproduce the above
				12	# copyright notice, this list of conditions and the following disclaimer
				13	# in the documentation and/or other materials provided with the
				14	# distribution.
				15	# * Neither the name of Google Inc. nor the names of its
				16	# contributors may be used to endorse or promote products derived from
				17	# this software without specific prior written permission.
				18	#
				19	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				20	# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				21	# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				22	# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				23	# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				24	# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				25	# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				26	# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				27	# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				28	# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				29	# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				30
				31	"""Contains routines for printing protocol messages in text format.
				32
				33	Simple usage example:
				34
				35	# Create a proto object and serialize it to a text proto string.
				36	message = my_proto_pb2.MyMessage(foo='bar')
				37	text_proto = text_format.MessageToString(message)
				38
				39	# Parse a text proto string.
				40	message = text_format.Parse(text_proto, my_proto_pb2.MyMessage())
				41	"""
				42
				43	__author__ = 'kenton@google.com (Kenton Varda)'
				44
				45	import io
				46	import re
				47
				48	import six
				49
				50	if six.PY3:
				51	long = int
				52
				53	from google.protobuf.internal import type_checkers
				54	from google.protobuf import descriptor
				55	from google.protobuf import text_encoding
				56
				57	__all__ = ['MessageToString', 'PrintMessage', 'PrintField',
				58	'PrintFieldValue', 'Merge']
				59
				60
				61	_INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),
				62	type_checkers.Int32ValueChecker(),
				63	type_checkers.Uint64ValueChecker(),
				64	type_checkers.Int64ValueChecker())
				65	_FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE)
				66	_FLOAT_NAN = re.compile('nanf?', re.IGNORECASE)
				67	_FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT,
				68	descriptor.FieldDescriptor.CPPTYPE_DOUBLE])
				69	_QUOTES = frozenset(("'", '"'))
				70
				71
				72	class Error(Exception):
				73	"""Top-level module error for text_format."""
				74
				75
				76	class ParseError(Error):
				77	"""Thrown in case of text parsing error."""
				78
				79
				80	class TextWriter(object):
				81	def __init__(self, as_utf8):
				82	if six.PY2:
				83	self._writer = io.BytesIO()
				84	else:
				85	self._writer = io.StringIO()
				86
				87	def write(self, val):
				88	if six.PY2:
				89	if isinstance(val, six.text_type):
				90	val = val.encode('utf-8')
				91	return self._writer.write(val)
				92
				93	def close(self):
				94	return self._writer.close()
				95
				96	def getvalue(self):
				97	return self._writer.getvalue()
				98
				99
				100	def MessageToString(message, as_utf8=False, as_one_line=False,
				101	pointy_brackets=False, use_index_order=False,
				102	float_format=None):
				103	"""Convert protobuf message to text format.
				104
				105	Floating point values can be formatted compactly with 15 digits of
				106	precision (which is the most that IEEE 754 "double" can guarantee)
				107	using float_format='.15g'. To ensure that converting to text and back to a
				108	proto will result in an identical value, float_format='.17g' should be used.
				109
				110	Args:
				111	message: The protocol buffers message.
				112	as_utf8: Produce text output in UTF8 format.
				113	as_one_line: Don't introduce newlines between fields.
				114	pointy_brackets: If True, use angle brackets instead of curly braces for
				115	nesting.
				116	use_index_order: If True, print fields of a proto message using the order
				117	defined in source code instead of the field number. By default, use the
				118	field number order.
				119	float_format: If set, use this to specify floating point number formatting
				120	(per the "Format Specification Mini-Language"); otherwise, str() is used.
				121
				122	Returns:
				123	A string of the text formatted protocol buffer message.
				124	"""
				125	out = TextWriter(as_utf8)
				126	PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line,
				127	pointy_brackets=pointy_brackets,
				128	use_index_order=use_index_order,
				129	float_format=float_format)
				130	result = out.getvalue()
				131	out.close()
				132	if as_one_line:
				133	return result.rstrip()
				134	return result
				135
				136
				137	def _IsMapEntry(field):
				138	return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and
				139	field.message_type.has_options and
				140	field.message_type.GetOptions().map_entry)
				141
				142
				143	def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False,
				144	pointy_brackets=False, use_index_order=False,
				145	float_format=None):
				146	fields = message.ListFields()
				147	if use_index_order:
				148	fields.sort(key=lambda x: x[0].index)
				149	for field, value in fields:
				150	if _IsMapEntry(field):
				151	for key in sorted(value):
				152	# This is slow for maps with submessage entires because it copies the
				153	# entire tree. Unfortunately this would take significant refactoring
				154	# of this file to work around.
				155	#
				156	# TODO(haberman): refactor and optimize if this becomes an issue.
				157	entry_submsg = field.message_type._concrete_class(
				158	key=key, value=value[key])
				159	PrintField(field, entry_submsg, out, indent, as_utf8, as_one_line,
				160	pointy_brackets=pointy_brackets,
				161	use_index_order=use_index_order, float_format=float_format)
				162	elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
				163	for element in value:
				164	PrintField(field, element, out, indent, as_utf8, as_one_line,
				165	pointy_brackets=pointy_brackets,
				166	use_index_order=use_index_order,
				167	float_format=float_format)
				168	else:
				169	PrintField(field, value, out, indent, as_utf8, as_one_line,
				170	pointy_brackets=pointy_brackets,
				171	use_index_order=use_index_order,
				172	float_format=float_format)
				173
				174
				175	def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False,
				176	pointy_brackets=False, use_index_order=False, float_format=None):
				177	"""Print a single field name/value pair. For repeated fields, the value
				178	should be a single element.
				179	"""
				180
				181	out.write(' ' * indent)
				182	if field.is_extension:
				183	out.write('[')
				184	if (field.containing_type.GetOptions().message_set_wire_format and
				185	field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and
				186	field.label == descriptor.FieldDescriptor.LABEL_OPTIONAL):
				187	out.write(field.message_type.full_name)
				188	else:
				189	out.write(field.full_name)
				190	out.write(']')
				191	elif field.type == descriptor.FieldDescriptor.TYPE_GROUP:
				192	# For groups, use the capitalized name.
				193	out.write(field.message_type.name)
				194	else:
				195	out.write(field.name)
				196
				197	if field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
				198	# The colon is optional in this case, but our cross-language golden files
				199	# don't include it.
				200	out.write(': ')
				201
				202	PrintFieldValue(field, value, out, indent, as_utf8, as_one_line,
				203	pointy_brackets=pointy_brackets,
				204	use_index_order=use_index_order,
				205	float_format=float_format)
				206	if as_one_line:
				207	out.write(' ')
				208	else:
				209	out.write('\n')
				210
				211
				212	def PrintFieldValue(field, value, out, indent=0, as_utf8=False,
				213	as_one_line=False, pointy_brackets=False,
				214	use_index_order=False,
				215	float_format=None):
				216	"""Print a single field value (not including name). For repeated fields,
				217	the value should be a single element."""
				218
				219	if pointy_brackets:
				220	openb = '<'
				221	closeb = '>'
				222	else:
				223	openb = '{'
				224	closeb = '}'
				225
				226	if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
				227	if as_one_line:
				228	out.write(' %s ' % openb)
				229	PrintMessage(value, out, indent, as_utf8, as_one_line,
				230	pointy_brackets=pointy_brackets,
				231	use_index_order=use_index_order,
				232	float_format=float_format)
				233	out.write(closeb)
				234	else:
				235	out.write(' %s\n' % openb)
				236	PrintMessage(value, out, indent + 2, as_utf8, as_one_line,
				237	pointy_brackets=pointy_brackets,
				238	use_index_order=use_index_order,
				239	float_format=float_format)
				240	out.write(' ' * indent + closeb)
				241	elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
				242	enum_value = field.enum_type.values_by_number.get(value, None)
				243	if enum_value is not None:
				244	out.write(enum_value.name)
				245	else:
				246	out.write(str(value))
				247	elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
				248	out.write('\"')
				249	if isinstance(value, six.text_type):
				250	out_value = value.encode('utf-8')
				251	else:
				252	out_value = value
				253	if field.type == descriptor.FieldDescriptor.TYPE_BYTES:
				254	# We need to escape non-UTF8 chars in TYPE_BYTES field.
				255	out_as_utf8 = False
				256	else:
				257	out_as_utf8 = as_utf8
				258	out.write(text_encoding.CEscape(out_value, out_as_utf8))
				259	out.write('\"')
				260	elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:
				261	if value:
				262	out.write('true')
				263	else:
				264	out.write('false')
				265	elif field.cpp_type in _FLOAT_TYPES and float_format is not None:
				266	out.write('{1:{0}}'.format(float_format, value))
				267	else:
				268	out.write(str(value))
				269
				270
				271	def Parse(text, message, allow_unknown_extension=False):
				272	"""Parses an text representation of a protocol message into a message.
				273
				274	Args:
				275	text: Message text representation.
				276	message: A protocol buffer message to merge into.
				277	allow_unknown_extension: if True, skip over missing extensions and keep
				278	parsing
				279
				280	Returns:
				281	The same message passed as argument.
				282
				283	Raises:
				284	ParseError: On text parsing problems.
				285	"""
				286	if not isinstance(text, str):
				287	text = text.decode('utf-8')
				288	return ParseLines(text.split('\n'), message, allow_unknown_extension)
				289
				290
				291	def Merge(text, message, allow_unknown_extension=False):
				292	"""Parses an text representation of a protocol message into a message.
				293
				294	Like Parse(), but allows repeated values for a non-repeated field, and uses
				295	the last one.
				296
				297	Args:
				298	text: Message text representation.
				299	message: A protocol buffer message to merge into.
				300	allow_unknown_extension: if True, skip over missing extensions and keep
				301	parsing
				302
				303	Returns:
				304	The same message passed as argument.
				305
				306	Raises:
				307	ParseError: On text parsing problems.
				308	"""
				309	return MergeLines(text.split('\n'), message, allow_unknown_extension)
				310
				311
				312	def ParseLines(lines, message, allow_unknown_extension=False):
				313	"""Parses an text representation of a protocol message into a message.
				314
				315	Args:
				316	lines: An iterable of lines of a message's text representation.
				317	message: A protocol buffer message to merge into.
				318	allow_unknown_extension: if True, skip over missing extensions and keep
				319	parsing
				320
				321	Returns:
				322	The same message passed as argument.
				323
				324	Raises:
				325	ParseError: On text parsing problems.
				326	"""
				327	_ParseOrMerge(lines, message, False, allow_unknown_extension)
				328	return message
				329
				330
				331	def MergeLines(lines, message, allow_unknown_extension=False):
				332	"""Parses an text representation of a protocol message into a message.
				333
				334	Args:
				335	lines: An iterable of lines of a message's text representation.
				336	message: A protocol buffer message to merge into.
				337	allow_unknown_extension: if True, skip over missing extensions and keep
				338	parsing
				339
				340	Returns:
				341	The same message passed as argument.
				342
				343	Raises:
				344	ParseError: On text parsing problems.
				345	"""
				346	_ParseOrMerge(lines, message, True, allow_unknown_extension)
				347	return message
				348
				349
				350	def _ParseOrMerge(lines,
				351	message,
				352	allow_multiple_scalars,
				353	allow_unknown_extension=False):
				354	"""Converts an text representation of a protocol message into a message.
				355
				356	Args:
				357	lines: Lines of a message's text representation.
				358	message: A protocol buffer message to merge into.
				359	allow_multiple_scalars: Determines if repeated values for a non-repeated
				360	field are permitted, e.g., the string "foo: 1 foo: 2" for a
				361	required/optional field named "foo".
				362	allow_unknown_extension: if True, skip over missing extensions and keep
				363	parsing
				364
				365	Raises:
				366	ParseError: On text parsing problems.
				367	"""
				368	tokenizer = _Tokenizer(lines)
				369	while not tokenizer.AtEnd():
				370	_MergeField(tokenizer, message, allow_multiple_scalars,
				371	allow_unknown_extension)
				372
				373
				374	def _MergeField(tokenizer,
				375	message,
				376	allow_multiple_scalars,
				377	allow_unknown_extension=False):
				378	"""Merges a single protocol message field into a message.
				379
				380	Args:
				381	tokenizer: A tokenizer to parse the field name and values.
				382	message: A protocol message to record the data.
				383	allow_multiple_scalars: Determines if repeated values for a non-repeated
				384	field are permitted, e.g., the string "foo: 1 foo: 2" for a
				385	required/optional field named "foo".
				386	allow_unknown_extension: if True, skip over missing extensions and keep
				387	parsing
				388
				389	Raises:
				390	ParseError: In case of text parsing problems.
				391	"""
				392	message_descriptor = message.DESCRIPTOR
				393	if (hasattr(message_descriptor, 'syntax') and
				394	message_descriptor.syntax == 'proto3'):
				395	# Proto3 doesn't represent presence so we can't test if multiple
				396	# scalars have occurred. We have to allow them.
				397	allow_multiple_scalars = True
				398	if tokenizer.TryConsume('['):
				399	name = [tokenizer.ConsumeIdentifier()]
				400	while tokenizer.TryConsume('.'):
				401	name.append(tokenizer.ConsumeIdentifier())
				402	name = '.'.join(name)
				403
				404	if not message_descriptor.is_extendable:
				405	raise tokenizer.ParseErrorPreviousToken(
				406	'Message type "%s" does not have extensions.' %
				407	message_descriptor.full_name)
				408	# pylint: disable=protected-access
				409	field = message.Extensions._FindExtensionByName(name)
				410	# pylint: enable=protected-access
				411	if not field:
				412	if allow_unknown_extension:
				413	field = None
				414	else:
				415	raise tokenizer.ParseErrorPreviousToken(
				416	'Extension "%s" not registered.' % name)
				417	elif message_descriptor != field.containing_type:
				418	raise tokenizer.ParseErrorPreviousToken(
				419	'Extension "%s" does not extend message type "%s".' % (
				420	name, message_descriptor.full_name))
				421
				422	tokenizer.Consume(']')
				423
				424	else:
				425	name = tokenizer.ConsumeIdentifier()
				426	field = message_descriptor.fields_by_name.get(name, None)
				427
				428	# Group names are expected to be capitalized as they appear in the
				429	# .proto file, which actually matches their type names, not their field
				430	# names.
				431	if not field:
				432	field = message_descriptor.fields_by_name.get(name.lower(), None)
				433	if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP:
				434	field = None
				435
				436	if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and
				437	field.message_type.name != name):
				438	field = None
				439
				440	if not field:
				441	raise tokenizer.ParseErrorPreviousToken(
				442	'Message type "%s" has no field named "%s".' % (
				443	message_descriptor.full_name, name))
				444
				445	if field and field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
				446	is_map_entry = _IsMapEntry(field)
				447	tokenizer.TryConsume(':')
				448
				449	if tokenizer.TryConsume('<'):
				450	end_token = '>'
				451	else:
				452	tokenizer.Consume('{')
				453	end_token = '}'
				454
				455	if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
				456	if field.is_extension:
				457	sub_message = message.Extensions[field].add()
				458	elif is_map_entry:
				459	sub_message = field.message_type._concrete_class()
				460	else:
				461	sub_message = getattr(message, field.name).add()
				462	else:
				463	if field.is_extension:
				464	sub_message = message.Extensions[field]
				465	else:
				466	sub_message = getattr(message, field.name)
				467	sub_message.SetInParent()
				468
				469	while not tokenizer.TryConsume(end_token):
				470	if tokenizer.AtEnd():
				471	raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token))
				472	_MergeField(tokenizer, sub_message, allow_multiple_scalars,
				473	allow_unknown_extension)
				474
				475	if is_map_entry:
				476	value_cpptype = field.message_type.fields_by_name['value'].cpp_type
				477	if value_cpptype == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
				478	value = getattr(message, field.name)[sub_message.key]
				479	value.MergeFrom(sub_message.value)
				480	else:
				481	getattr(message, field.name)[sub_message.key] = sub_message.value
				482	elif field:
				483	tokenizer.Consume(':')
				484	if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED and
				485	tokenizer.TryConsume('[')):
				486	# Short repeated format, e.g. "foo: [1, 2, 3]"
				487	while True:
				488	_MergeScalarField(tokenizer, message, field, allow_multiple_scalars)
				489	if tokenizer.TryConsume(']'):
				490	break
				491	tokenizer.Consume(',')
				492	else:
				493	_MergeScalarField(tokenizer, message, field, allow_multiple_scalars)
				494	else: # Proto field is unknown.
				495	assert allow_unknown_extension
				496	_SkipFieldContents(tokenizer)
				497
				498	# For historical reasons, fields may optionally be separated by commas or
				499	# semicolons.
				500	if not tokenizer.TryConsume(','):
				501	tokenizer.TryConsume(';')
				502
				503
				504	def _SkipFieldContents(tokenizer):
				505	"""Skips over contents (value or message) of a field.
				506
				507	Args:
				508	tokenizer: A tokenizer to parse the field name and values.
				509	"""
				510	# Try to guess the type of this field.
				511	# If this field is not a message, there should be a ":" between the
				512	# field name and the field value and also the field value should not
				513	# start with "{" or "<" which indicates the beginning of a message body.
				514	# If there is no ":" or there is a "{" or "<" after ":", this field has
				515	# to be a message or the input is ill-formed.
				516	if tokenizer.TryConsume(':') and not tokenizer.LookingAt(
				517	'{') and not tokenizer.LookingAt('<'):
				518	_SkipFieldValue(tokenizer)
				519	else:
				520	_SkipFieldMessage(tokenizer)
				521
				522
				523	def _SkipField(tokenizer):
				524	"""Skips over a complete field (name and value/message).
				525
				526	Args:
				527	tokenizer: A tokenizer to parse the field name and values.
				528	"""
				529	if tokenizer.TryConsume('['):
				530	# Consume extension name.
				531	tokenizer.ConsumeIdentifier()
				532	while tokenizer.TryConsume('.'):
				533	tokenizer.ConsumeIdentifier()
				534	tokenizer.Consume(']')
				535	else:
				536	tokenizer.ConsumeIdentifier()
				537
				538	_SkipFieldContents(tokenizer)
				539
				540	# For historical reasons, fields may optionally be separated by commas or
				541	# semicolons.
				542	if not tokenizer.TryConsume(','):
				543	tokenizer.TryConsume(';')
				544
				545
				546	def _SkipFieldMessage(tokenizer):
				547	"""Skips over a field message.
				548
				549	Args:
				550	tokenizer: A tokenizer to parse the field name and values.
				551	"""
				552
				553	if tokenizer.TryConsume('<'):
				554	delimiter = '>'
				555	else:
				556	tokenizer.Consume('{')
				557	delimiter = '}'
				558
				559	while not tokenizer.LookingAt('>') and not tokenizer.LookingAt('}'):
				560	_SkipField(tokenizer)
				561
				562	tokenizer.Consume(delimiter)
				563
				564
				565	def _SkipFieldValue(tokenizer):
				566	"""Skips over a field value.
				567
				568	Args:
				569	tokenizer: A tokenizer to parse the field name and values.
				570
				571	Raises:
				572	ParseError: In case an invalid field value is found.
				573	"""
				574	# String tokens can come in multiple adjacent string literals.
				575	# If we can consume one, consume as many as we can.
				576	if tokenizer.TryConsumeString():
				577	while tokenizer.TryConsumeString():
				578	pass
				579	return
				580
				581	if (not tokenizer.TryConsumeIdentifier() and
				582	not tokenizer.TryConsumeInt64() and
				583	not tokenizer.TryConsumeUint64() and
				584	not tokenizer.TryConsumeFloat()):
				585	raise ParseError('Invalid field value: ' + tokenizer.token)
				586
				587
				588	def _MergeScalarField(tokenizer, message, field, allow_multiple_scalars):
				589	"""Merges a single protocol message scalar field into a message.
				590
				591	Args:
				592	tokenizer: A tokenizer to parse the field value.
				593	message: A protocol message to record the data.
				594	field: The descriptor of the field to be merged.
				595	allow_multiple_scalars: Determines if repeated values for a non-repeated
				596	field are permitted, e.g., the string "foo: 1 foo: 2" for a
				597	required/optional field named "foo".
				598
				599	Raises:
				600	ParseError: In case of text parsing problems.
				601	RuntimeError: On runtime errors.
				602	"""
				603	value = None
				604
				605	if field.type in (descriptor.FieldDescriptor.TYPE_INT32,
				606	descriptor.FieldDescriptor.TYPE_SINT32,
				607	descriptor.FieldDescriptor.TYPE_SFIXED32):
				608	value = tokenizer.ConsumeInt32()
				609	elif field.type in (descriptor.FieldDescriptor.TYPE_INT64,
				610	descriptor.FieldDescriptor.TYPE_SINT64,
				611	descriptor.FieldDescriptor.TYPE_SFIXED64):
				612	value = tokenizer.ConsumeInt64()
				613	elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32,
				614	descriptor.FieldDescriptor.TYPE_FIXED32):
				615	value = tokenizer.ConsumeUint32()
				616	elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64,
				617	descriptor.FieldDescriptor.TYPE_FIXED64):
				618	value = tokenizer.ConsumeUint64()
				619	elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT,
				620	descriptor.FieldDescriptor.TYPE_DOUBLE):
				621	value = tokenizer.ConsumeFloat()
				622	elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:
				623	value = tokenizer.ConsumeBool()
				624	elif field.type == descriptor.FieldDescriptor.TYPE_STRING:
				625	value = tokenizer.ConsumeString()
				626	elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:
				627	value = tokenizer.ConsumeByteString()
				628	elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:
				629	value = tokenizer.ConsumeEnum(field)
				630	else:
				631	raise RuntimeError('Unknown field type %d' % field.type)
				632
				633	if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
				634	if field.is_extension:
				635	message.Extensions[field].append(value)
				636	else:
				637	getattr(message, field.name).append(value)
				638	else:
				639	if field.is_extension:
				640	if not allow_multiple_scalars and message.HasExtension(field):
				641	raise tokenizer.ParseErrorPreviousToken(
				642	'Message type "%s" should not have multiple "%s" extensions.' %
				643	(message.DESCRIPTOR.full_name, field.full_name))
				644	else:
				645	message.Extensions[field] = value
				646	else:
				647	if not allow_multiple_scalars and message.HasField(field.name):
				648	raise tokenizer.ParseErrorPreviousToken(
				649	'Message type "%s" should not have multiple "%s" fields.' %
				650	(message.DESCRIPTOR.full_name, field.name))
				651	else:
				652	setattr(message, field.name, value)
				653
				654
				655	class _Tokenizer(object):
				656	"""Protocol buffer text representation tokenizer.
				657
				658	This class handles the lower level string parsing by splitting it into
				659	meaningful tokens.
				660
				661	It was directly ported from the Java protocol buffer API.
				662	"""
				663
				664	_WHITESPACE = re.compile('(\\s\|(#.*$))+', re.MULTILINE)
				665	_TOKEN = re.compile('\|'.join([
				666	r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier
				667	r'([0-9+-]\|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number
				668	] + [ # quoted str for each quote mark
				669	r'{qt}([^{qt}\n\\]\|\\.)*({qt}\|\\?$)'.format(qt=mark) for mark in _QUOTES
				670	]))
				671
				672	_IDENTIFIER = re.compile(r'\w+')
				673
				674	def __init__(self, lines):
				675	self._position = 0
				676	self._line = -1
				677	self._column = 0
				678	self._token_start = None
				679	self.token = ''
				680	self._lines = iter(lines)
				681	self._current_line = ''
				682	self._previous_line = 0
				683	self._previous_column = 0
				684	self._more_lines = True
				685	self._SkipWhitespace()
				686	self.NextToken()
				687
				688	def LookingAt(self, token):
				689	return self.token == token
				690
				691	def AtEnd(self):
				692	"""Checks the end of the text was reached.
				693
				694	Returns:
				695	True iff the end was reached.
				696	"""
				697	return not self.token
				698
				699	def _PopLine(self):
				700	while len(self._current_line) <= self._column:
				701	try:
				702	self._current_line = next(self._lines)
				703	except StopIteration:
				704	self._current_line = ''
				705	self._more_lines = False
				706	return
				707	else:
				708	self._line += 1
				709	self._column = 0
				710
				711	def _SkipWhitespace(self):
				712	while True:
				713	self._PopLine()
				714	match = self._WHITESPACE.match(self._current_line, self._column)
				715	if not match:
				716	break
				717	length = len(match.group(0))
				718	self._column += length
				719
				720	def TryConsume(self, token):
				721	"""Tries to consume a given piece of text.
				722
				723	Args:
				724	token: Text to consume.
				725
				726	Returns:
				727	True iff the text was consumed.
				728	"""
				729	if self.token == token:
				730	self.NextToken()
				731	return True
				732	return False
				733
				734	def Consume(self, token):
				735	"""Consumes a piece of text.
				736
				737	Args:
				738	token: Text to consume.
				739
				740	Raises:
				741	ParseError: If the text couldn't be consumed.
				742	"""
				743	if not self.TryConsume(token):
				744	raise self._ParseError('Expected "%s".' % token)
				745
				746	def TryConsumeIdentifier(self):
				747	try:
				748	self.ConsumeIdentifier()
				749	return True
				750	except ParseError:
				751	return False
				752
				753	def ConsumeIdentifier(self):
				754	"""Consumes protocol message field identifier.
				755
				756	Returns:
				757	Identifier string.
				758
				759	Raises:
				760	ParseError: If an identifier couldn't be consumed.
				761	"""
				762	result = self.token
				763	if not self._IDENTIFIER.match(result):
				764	raise self._ParseError('Expected identifier.')
				765	self.NextToken()
				766	return result
				767
				768	def ConsumeInt32(self):
				769	"""Consumes a signed 32bit integer number.
				770
				771	Returns:
				772	The integer parsed.
				773
				774	Raises:
				775	ParseError: If a signed 32bit integer couldn't be consumed.
				776	"""
				777	try:
				778	result = ParseInteger(self.token, is_signed=True, is_long=False)
				779	except ValueError as e:
				780	raise self._ParseError(str(e))
				781	self.NextToken()
				782	return result
				783
				784	def ConsumeUint32(self):
				785	"""Consumes an unsigned 32bit integer number.
				786
				787	Returns:
				788	The integer parsed.
				789
				790	Raises:
				791	ParseError: If an unsigned 32bit integer couldn't be consumed.
				792	"""
				793	try:
				794	result = ParseInteger(self.token, is_signed=False, is_long=False)
				795	except ValueError as e:
				796	raise self._ParseError(str(e))
				797	self.NextToken()
				798	return result
				799
				800	def TryConsumeInt64(self):
				801	try:
				802	self.ConsumeInt64()
				803	return True
				804	except ParseError:
				805	return False
				806
				807	def ConsumeInt64(self):
				808	"""Consumes a signed 64bit integer number.
				809
				810	Returns:
				811	The integer parsed.
				812
				813	Raises:
				814	ParseError: If a signed 64bit integer couldn't be consumed.
				815	"""
				816	try:
				817	result = ParseInteger(self.token, is_signed=True, is_long=True)
				818	except ValueError as e:
				819	raise self._ParseError(str(e))
				820	self.NextToken()
				821	return result
				822
				823	def TryConsumeUint64(self):
				824	try:
				825	self.ConsumeUint64()
				826	return True
				827	except ParseError:
				828	return False
				829
				830	def ConsumeUint64(self):
				831	"""Consumes an unsigned 64bit integer number.
				832
				833	Returns:
				834	The integer parsed.
				835
				836	Raises:
				837	ParseError: If an unsigned 64bit integer couldn't be consumed.
				838	"""
				839	try:
				840	result = ParseInteger(self.token, is_signed=False, is_long=True)
				841	except ValueError as e:
				842	raise self._ParseError(str(e))
				843	self.NextToken()
				844	return result
				845
				846	def TryConsumeFloat(self):
				847	try:
				848	self.ConsumeFloat()
				849	return True
				850	except ParseError:
				851	return False
				852
				853	def ConsumeFloat(self):
				854	"""Consumes an floating point number.
				855
				856	Returns:
				857	The number parsed.
				858
				859	Raises:
				860	ParseError: If a floating point number couldn't be consumed.
				861	"""
				862	try:
				863	result = ParseFloat(self.token)
				864	except ValueError as e:
				865	raise self._ParseError(str(e))
				866	self.NextToken()
				867	return result
				868
				869	def ConsumeBool(self):
				870	"""Consumes a boolean value.
				871
				872	Returns:
				873	The bool parsed.
				874
				875	Raises:
				876	ParseError: If a boolean value couldn't be consumed.
				877	"""
				878	try:
				879	result = ParseBool(self.token)
				880	except ValueError as e:
				881	raise self._ParseError(str(e))
				882	self.NextToken()
				883	return result
				884
				885	def TryConsumeString(self):
				886	try:
				887	self.ConsumeString()
				888	return True
				889	except ParseError:
				890	return False
				891
				892	def ConsumeString(self):
				893	"""Consumes a string value.
				894
				895	Returns:
				896	The string parsed.
				897
				898	Raises:
				899	ParseError: If a string value couldn't be consumed.
				900	"""
				901	the_bytes = self.ConsumeByteString()
				902	try:
				903	return six.text_type(the_bytes, 'utf-8')
				904	except UnicodeDecodeError as e:
				905	raise self._StringParseError(e)
				906
				907	def ConsumeByteString(self):
				908	"""Consumes a byte array value.
				909
				910	Returns:
				911	The array parsed (as a string).
				912
				913	Raises:
				914	ParseError: If a byte array value couldn't be consumed.
				915	"""
				916	the_list = [self._ConsumeSingleByteString()]
				917	while self.token and self.token[0] in _QUOTES:
				918	the_list.append(self._ConsumeSingleByteString())
				919	return b''.join(the_list)
				920
				921	def _ConsumeSingleByteString(self):
				922	"""Consume one token of a string literal.
				923
				924	String literals (whether bytes or text) can come in multiple adjacent
				925	tokens which are automatically concatenated, like in C or Python. This
				926	method only consumes one token.
				927
				928	Returns:
				929	The token parsed.
				930	Raises:
				931	ParseError: When the wrong format data is found.
				932	"""
				933	text = self.token
				934	if len(text) < 1 or text[0] not in _QUOTES:
				935	raise self._ParseError('Expected string but found: %r' % (text,))
				936
				937	if len(text) < 2 or text[-1] != text[0]:
				938	raise self._ParseError('String missing ending quote: %r' % (text,))
				939
				940	try:
				941	result = text_encoding.CUnescape(text[1:-1])
				942	except ValueError as e:
				943	raise self._ParseError(str(e))
				944	self.NextToken()
				945	return result
				946
				947	def ConsumeEnum(self, field):
				948	try:
				949	result = ParseEnum(field, self.token)
				950	except ValueError as e:
				951	raise self._ParseError(str(e))
				952	self.NextToken()
				953	return result
				954
				955	def ParseErrorPreviousToken(self, message):
				956	"""Creates and returns a ParseError for the previously read token.
				957
				958	Args:
				959	message: A message to set for the exception.
				960
				961	Returns:
				962	A ParseError instance.
				963	"""
				964	return ParseError('%d:%d : %s' % (
				965	self._previous_line + 1, self._previous_column + 1, message))
				966
				967	def _ParseError(self, message):
				968	"""Creates and returns a ParseError for the current token."""
				969	return ParseError('%d:%d : %s' % (
				970	self._line + 1, self._column + 1, message))
				971
				972	def _StringParseError(self, e):
				973	return self._ParseError('Couldn\'t parse string: ' + str(e))
				974
				975	def NextToken(self):
				976	"""Reads the next meaningful token."""
				977	self._previous_line = self._line
				978	self._previous_column = self._column
				979
				980	self._column += len(self.token)
				981	self._SkipWhitespace()
				982
				983	if not self._more_lines:
				984	self.token = ''
				985	return
				986
				987	match = self._TOKEN.match(self._current_line, self._column)
				988	if match:
				989	token = match.group(0)
				990	self.token = token
				991	else:
				992	self.token = self._current_line[self._column]
				993
				994
				995	def ParseInteger(text, is_signed=False, is_long=False):
				996	"""Parses an integer.
				997
				998	Args:
				999	text: The text to parse.
				1000	is_signed: True if a signed integer must be parsed.
				1001	is_long: True if a long integer must be parsed.
				1002
				1003	Returns:
				1004	The integer value.
				1005
				1006	Raises:
				1007	ValueError: Thrown Iff the text is not a valid integer.
				1008	"""
				1009	# Do the actual parsing. Exception handling is propagated to caller.
				1010	try:
				1011	# We force 32-bit values to int and 64-bit values to long to make
				1012	# alternate implementations where the distinction is more significant
				1013	# (e.g. the C++ implementation) simpler.
				1014	if is_long:
				1015	result = long(text, 0)
				1016	else:
				1017	result = int(text, 0)
				1018	except ValueError:
				1019	raise ValueError('Couldn\'t parse integer: %s' % text)
				1020
				1021	# Check if the integer is sane. Exceptions handled by callers.
				1022	checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]
				1023	checker.CheckValue(result)
				1024	return result
				1025
				1026
				1027	def ParseFloat(text):
				1028	"""Parse a floating point number.
				1029
				1030	Args:
				1031	text: Text to parse.
				1032
				1033	Returns:
				1034	The number parsed.
				1035
				1036	Raises:
				1037	ValueError: If a floating point number couldn't be parsed.
				1038	"""
				1039	try:
				1040	# Assume Python compatible syntax.
				1041	return float(text)
				1042	except ValueError:
				1043	# Check alternative spellings.
				1044	if _FLOAT_INFINITY.match(text):
				1045	if text[0] == '-':
				1046	return float('-inf')
				1047	else:
				1048	return float('inf')
				1049	elif _FLOAT_NAN.match(text):
				1050	return float('nan')
				1051	else:
				1052	# assume '1.0f' format
				1053	try:
				1054	return float(text.rstrip('f'))
				1055	except ValueError:
				1056	raise ValueError('Couldn\'t parse float: %s' % text)
				1057
				1058
				1059	def ParseBool(text):
				1060	"""Parse a boolean value.
				1061
				1062	Args:
				1063	text: Text to parse.
				1064
				1065	Returns:
				1066	Boolean values parsed
				1067
				1068	Raises:
				1069	ValueError: If text is not a valid boolean.
				1070	"""
				1071	if text in ('true', 't', '1'):
				1072	return True
				1073	elif text in ('false', 'f', '0'):
				1074	return False
				1075	else:
				1076	raise ValueError('Expected "true" or "false".')
				1077
				1078
				1079	def ParseEnum(field, value):
				1080	"""Parse an enum value.
				1081
				1082	The value can be specified by a number (the enum value), or by
				1083	a string literal (the enum name).
				1084
				1085	Args:
				1086	field: Enum field descriptor.
				1087	value: String value.
				1088
				1089	Returns:
				1090	Enum value number.
				1091
				1092	Raises:
				1093	ValueError: If the enum value could not be parsed.
				1094	"""
				1095	enum_descriptor = field.enum_type
				1096	try:
				1097	number = int(value, 0)
				1098	except ValueError:
				1099	# Identifier.
				1100	enum_value = enum_descriptor.values_by_name.get(value, None)
				1101	if enum_value is None:
				1102	raise ValueError(
				1103	'Enum type "%s" has no value named %s.' % (
				1104	enum_descriptor.full_name, value))
				1105	else:
				1106	# Numeric value.
				1107	enum_value = enum_descriptor.values_by_number.get(number, None)
				1108	if enum_value is None:
				1109	raise ValueError(
				1110	'Enum type "%s" has no value with number %d.' % (
				1111	enum_descriptor.full_name, number))
				1112	return enum_value.number