Blame - python/google/protobuf/text_format.py - RealtimeRoboticsGroup/test

blob: 2cbd21bc2561819c55009c4cd1cf4efcfbaa0f09 [file] [log] [blame]

Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1	# Protocol Buffers - Google's data interchange format
				2	# Copyright 2008 Google Inc. All rights reserved.
				3	# https://developers.google.com/protocol-buffers/
				4	#
				5	# Redistribution and use in source and binary forms, with or without
				6	# modification, are permitted provided that the following conditions are
				7	# met:
				8	#
				9	# * Redistributions of source code must retain the above copyright
				10	# notice, this list of conditions and the following disclaimer.
				11	# * Redistributions in binary form must reproduce the above
				12	# copyright notice, this list of conditions and the following disclaimer
				13	# in the documentation and/or other materials provided with the
				14	# distribution.
				15	# * Neither the name of Google Inc. nor the names of its
				16	# contributors may be used to endorse or promote products derived from
				17	# this software without specific prior written permission.
				18	#
				19	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				20	# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				21	# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				22	# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				23	# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				24	# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				25	# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				26	# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				27	# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				28	# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				29	# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				30
				31	"""Contains routines for printing protocol messages in text format.
				32
				33	Simple usage example:
				34
				35	# Create a proto object and serialize it to a text proto string.
				36	message = my_proto_pb2.MyMessage(foo='bar')
				37	text_proto = text_format.MessageToString(message)
				38
				39	# Parse a text proto string.
				40	message = text_format.Parse(text_proto, my_proto_pb2.MyMessage())
				41	"""
				42
				43	__author__ = 'kenton@google.com (Kenton Varda)'
				44
				45	import io
				46	import re
				47
				48	import six
				49
				50	if six.PY3:
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	51	long = int # pylint: disable=redefined-builtin,invalid-name
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	52
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	53	# pylint: disable=g-import-not-at-top
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	54	from google.protobuf.internal import type_checkers
				55	from google.protobuf import descriptor
				56	from google.protobuf import text_encoding
				57
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	58	__all__ = ['MessageToString', 'PrintMessage', 'PrintField', 'PrintFieldValue',
				59	'Merge']
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	60
				61	_INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),
				62	type_checkers.Int32ValueChecker(),
				63	type_checkers.Uint64ValueChecker(),
				64	type_checkers.Int64ValueChecker())
				65	_FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE)
				66	_FLOAT_NAN = re.compile('nanf?', re.IGNORECASE)
				67	_FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT,
				68	descriptor.FieldDescriptor.CPPTYPE_DOUBLE])
				69	_QUOTES = frozenset(("'", '"'))
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	70	_ANY_FULL_TYPE_NAME = 'google.protobuf.Any'
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	71
				72
				73	class Error(Exception):
				74	"""Top-level module error for text_format."""
				75
				76
				77	class ParseError(Error):
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	78	"""Thrown in case of text parsing or tokenizing error."""
				79
				80	def __init__(self, message=None, line=None, column=None):
				81	if message is not None and line is not None:
				82	loc = str(line)
				83	if column is not None:
				84	loc += ':{0}'.format(column)
				85	message = '{0} : {1}'.format(loc, message)
				86	if message is not None:
				87	super(ParseError, self).__init__(message)
				88	else:
				89	super(ParseError, self).__init__()
				90	self._line = line
				91	self._column = column
				92
				93	def GetLine(self):
				94	return self._line
				95
				96	def GetColumn(self):
				97	return self._column
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	98
				99
				100	class TextWriter(object):
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	101
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	102	def __init__(self, as_utf8):
				103	if six.PY2:
				104	self._writer = io.BytesIO()
				105	else:
				106	self._writer = io.StringIO()
				107
				108	def write(self, val):
				109	if six.PY2:
				110	if isinstance(val, six.text_type):
				111	val = val.encode('utf-8')
				112	return self._writer.write(val)
				113
				114	def close(self):
				115	return self._writer.close()
				116
				117	def getvalue(self):
				118	return self._writer.getvalue()
				119
				120
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	121	def MessageToString(message,
				122	as_utf8=False,
				123	as_one_line=False,
				124	pointy_brackets=False,
				125	use_index_order=False,
				126	float_format=None,
				127	use_field_number=False,
				128	descriptor_pool=None,
				129	indent=0,
				130	message_formatter=None):
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	131	"""Convert protobuf message to text format.
				132
				133	Floating point values can be formatted compactly with 15 digits of
				134	precision (which is the most that IEEE 754 "double" can guarantee)
				135	using float_format='.15g'. To ensure that converting to text and back to a
				136	proto will result in an identical value, float_format='.17g' should be used.
				137
				138	Args:
				139	message: The protocol buffers message.
				140	as_utf8: Produce text output in UTF8 format.
				141	as_one_line: Don't introduce newlines between fields.
				142	pointy_brackets: If True, use angle brackets instead of curly braces for
				143	nesting.
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	144	use_index_order: If True, fields of a proto message will be printed using
				145	the order defined in source code instead of the field number, extensions
				146	will be printed at the end of the message and their relative order is
				147	determined by the extension number. By default, use the field number
				148	order.
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	149	float_format: If set, use this to specify floating point number formatting
				150	(per the "Format Specification Mini-Language"); otherwise, str() is used.
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	151	use_field_number: If True, print field numbers instead of names.
				152	descriptor_pool: A DescriptorPool used to resolve Any types.
				153	indent: The indent level, in terms of spaces, for pretty print.
				154	message_formatter: A function(message, indent, as_one_line): unicode\|None
				155	to custom format selected sub-messages (usually based on message type).
				156	Use to pretty print parts of the protobuf for easier diffing.
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	157
				158	Returns:
				159	A string of the text formatted protocol buffer message.
				160	"""
				161	out = TextWriter(as_utf8)
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	162	printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets,
				163	use_index_order, float_format, use_field_number,
				164	descriptor_pool, message_formatter)
				165	printer.PrintMessage(message)
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	166	result = out.getvalue()
				167	out.close()
				168	if as_one_line:
				169	return result.rstrip()
				170	return result
				171
				172
				173	def _IsMapEntry(field):
				174	return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and
				175	field.message_type.has_options and
				176	field.message_type.GetOptions().map_entry)
				177
				178
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	179	def PrintMessage(message,
				180	out,
				181	indent=0,
				182	as_utf8=False,
				183	as_one_line=False,
				184	pointy_brackets=False,
				185	use_index_order=False,
				186	float_format=None,
				187	use_field_number=False,
				188	descriptor_pool=None,
				189	message_formatter=None):
				190	printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets,
				191	use_index_order, float_format, use_field_number,
				192	descriptor_pool, message_formatter)
				193	printer.PrintMessage(message)
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	194
				195
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	196	def PrintField(field,
				197	value,
				198	out,
				199	indent=0,
				200	as_utf8=False,
				201	as_one_line=False,
				202	pointy_brackets=False,
				203	use_index_order=False,
				204	float_format=None,
				205	message_formatter=None):
				206	"""Print a single field name/value pair."""
				207	printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets,
				208	use_index_order, float_format, message_formatter)
				209	printer.PrintField(field, value)
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	210
				211
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	212	def PrintFieldValue(field,
				213	value,
				214	out,
				215	indent=0,
				216	as_utf8=False,
				217	as_one_line=False,
				218	pointy_brackets=False,
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	219	use_index_order=False,
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	220	float_format=None,
				221	message_formatter=None):
				222	"""Print a single field value (not including name)."""
				223	printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets,
				224	use_index_order, float_format, message_formatter)
				225	printer.PrintFieldValue(field, value)
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	226
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	227
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	228	def _BuildMessageFromTypeName(type_name, descriptor_pool):
				229	"""Returns a protobuf message instance.
				230
				231	Args:
				232	type_name: Fully-qualified protobuf message type name string.
				233	descriptor_pool: DescriptorPool instance.
				234
				235	Returns:
				236	A Message instance of type matching type_name, or None if the a Descriptor
				237	wasn't found matching type_name.
				238	"""
				239	# pylint: disable=g-import-not-at-top
				240	if descriptor_pool is None:
				241	from google.protobuf import descriptor_pool as pool_mod
				242	descriptor_pool = pool_mod.Default()
				243	from google.protobuf import symbol_database
				244	database = symbol_database.Default()
				245	try:
				246	message_descriptor = descriptor_pool.FindMessageTypeByName(type_name)
				247	except KeyError:
				248	return None
				249	message_type = database.GetPrototype(message_descriptor)
				250	return message_type()
				251
				252
				253	class _Printer(object):
				254	"""Text format printer for protocol message."""
				255
				256	def __init__(self,
				257	out,
				258	indent=0,
				259	as_utf8=False,
				260	as_one_line=False,
				261	pointy_brackets=False,
				262	use_index_order=False,
				263	float_format=None,
				264	use_field_number=False,
				265	descriptor_pool=None,
				266	message_formatter=None):
				267	"""Initialize the Printer.
				268
				269	Floating point values can be formatted compactly with 15 digits of
				270	precision (which is the most that IEEE 754 "double" can guarantee)
				271	using float_format='.15g'. To ensure that converting to text and back to a
				272	proto will result in an identical value, float_format='.17g' should be used.
				273
				274	Args:
				275	out: To record the text format result.
				276	indent: The indent level for pretty print.
				277	as_utf8: Produce text output in UTF8 format.
				278	as_one_line: Don't introduce newlines between fields.
				279	pointy_brackets: If True, use angle brackets instead of curly braces for
				280	nesting.
				281	use_index_order: If True, print fields of a proto message using the order
				282	defined in source code instead of the field number. By default, use the
				283	field number order.
				284	float_format: If set, use this to specify floating point number formatting
				285	(per the "Format Specification Mini-Language"); otherwise, str() is
				286	used.
				287	use_field_number: If True, print field numbers instead of names.
				288	descriptor_pool: A DescriptorPool used to resolve Any types.
				289	message_formatter: A function(message, indent, as_one_line): unicode\|None
				290	to custom format selected sub-messages (usually based on message type).
				291	Use to pretty print parts of the protobuf for easier diffing.
				292	"""
				293	self.out = out
				294	self.indent = indent
				295	self.as_utf8 = as_utf8
				296	self.as_one_line = as_one_line
				297	self.pointy_brackets = pointy_brackets
				298	self.use_index_order = use_index_order
				299	self.float_format = float_format
				300	self.use_field_number = use_field_number
				301	self.descriptor_pool = descriptor_pool
				302	self.message_formatter = message_formatter
				303
				304	def _TryPrintAsAnyMessage(self, message):
				305	"""Serializes if message is a google.protobuf.Any field."""
				306	packed_message = _BuildMessageFromTypeName(message.TypeName(),
				307	self.descriptor_pool)
				308	if packed_message:
				309	packed_message.MergeFromString(message.value)
				310	self.out.write('%s[%s]' % (self.indent * ' ', message.type_url))
				311	self._PrintMessageFieldValue(packed_message)
				312	self.out.write(' ' if self.as_one_line else '\n')
				313	return True
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	314	else:
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	315	return False
				316
				317	def _TryCustomFormatMessage(self, message):
				318	formatted = self.message_formatter(message, self.indent, self.as_one_line)
				319	if formatted is None:
				320	return False
				321
				322	out = self.out
				323	out.write(' ' * self.indent)
				324	out.write(formatted)
				325	out.write(' ' if self.as_one_line else '\n')
				326	return True
				327
				328	def PrintMessage(self, message):
				329	"""Convert protobuf message to text format.
				330
				331	Args:
				332	message: The protocol buffers message.
				333	"""
				334	if self.message_formatter and self._TryCustomFormatMessage(message):
				335	return
				336	if (message.DESCRIPTOR.full_name == _ANY_FULL_TYPE_NAME and
				337	self._TryPrintAsAnyMessage(message)):
				338	return
				339	fields = message.ListFields()
				340	if self.use_index_order:
				341	fields.sort(
				342	key=lambda x: x[0].number if x[0].is_extension else x[0].index)
				343	for field, value in fields:
				344	if _IsMapEntry(field):
				345	for key in sorted(value):
				346	# This is slow for maps with submessage entries because it copies the
				347	# entire tree. Unfortunately this would take significant refactoring
				348	# of this file to work around.
				349	#
				350	# TODO(haberman): refactor and optimize if this becomes an issue.
				351	entry_submsg = value.GetEntryClass()(key=key, value=value[key])
				352	self.PrintField(field, entry_submsg)
				353	elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
				354	for element in value:
				355	self.PrintField(field, element)
				356	else:
				357	self.PrintField(field, value)
				358
				359	def PrintField(self, field, value):
				360	"""Print a single field name/value pair."""
				361	out = self.out
				362	out.write(' ' * self.indent)
				363	if self.use_field_number:
				364	out.write(str(field.number))
				365	else:
				366	if field.is_extension:
				367	out.write('[')
				368	if (field.containing_type.GetOptions().message_set_wire_format and
				369	field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and
				370	field.label == descriptor.FieldDescriptor.LABEL_OPTIONAL):
				371	out.write(field.message_type.full_name)
				372	else:
				373	out.write(field.full_name)
				374	out.write(']')
				375	elif field.type == descriptor.FieldDescriptor.TYPE_GROUP:
				376	# For groups, use the capitalized name.
				377	out.write(field.message_type.name)
				378	else:
				379	out.write(field.name)
				380
				381	if field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
				382	# The colon is optional in this case, but our cross-language golden files
				383	# don't include it.
				384	out.write(': ')
				385
				386	self.PrintFieldValue(field, value)
				387	if self.as_one_line:
				388	out.write(' ')
				389	else:
				390	out.write('\n')
				391
				392	def _PrintMessageFieldValue(self, value):
				393	if self.pointy_brackets:
				394	openb = '<'
				395	closeb = '>'
				396	else:
				397	openb = '{'
				398	closeb = '}'
				399
				400	if self.as_one_line:
				401	self.out.write(' %s ' % openb)
				402	self.PrintMessage(value)
				403	self.out.write(closeb)
				404	else:
				405	self.out.write(' %s\n' % openb)
				406	self.indent += 2
				407	self.PrintMessage(value)
				408	self.indent -= 2
				409	self.out.write(' ' * self.indent + closeb)
				410
				411	def PrintFieldValue(self, field, value):
				412	"""Print a single field value (not including name).
				413
				414	For repeated fields, the value should be a single element.
				415
				416	Args:
				417	field: The descriptor of the field to be printed.
				418	value: The value of the field.
				419	"""
				420	out = self.out
				421	if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
				422	self._PrintMessageFieldValue(value)
				423	elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
				424	enum_value = field.enum_type.values_by_number.get(value, None)
				425	if enum_value is not None:
				426	out.write(enum_value.name)
				427	else:
				428	out.write(str(value))
				429	elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
				430	out.write('\"')
				431	if isinstance(value, six.text_type):
				432	out_value = value.encode('utf-8')
				433	else:
				434	out_value = value
				435	if field.type == descriptor.FieldDescriptor.TYPE_BYTES:
				436	# We need to escape non-UTF8 chars in TYPE_BYTES field.
				437	out_as_utf8 = False
				438	else:
				439	out_as_utf8 = self.as_utf8
				440	out.write(text_encoding.CEscape(out_value, out_as_utf8))
				441	out.write('\"')
				442	elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:
				443	if value:
				444	out.write('true')
				445	else:
				446	out.write('false')
				447	elif field.cpp_type in _FLOAT_TYPES and self.float_format is not None:
				448	out.write('{1:{0}}'.format(self.float_format, value))
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	449	else:
				450	out.write(str(value))
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	451
				452
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	453	def Parse(text,
				454	message,
				455	allow_unknown_extension=False,
				456	allow_field_number=False,
				457	descriptor_pool=None):
				458	"""Parses a text representation of a protocol message into a message.
				459
				460	NOTE: for historical reasons this function does not clear the input
				461	message. This is different from what the binary msg.ParseFrom(...) does.
				462
				463	Example
				464	a = MyProto()
				465	a.repeated_field.append('test')
				466	b = MyProto()
				467
				468	text_format.Parse(repr(a), b)
				469	text_format.Parse(repr(a), b) # repeated_field contains ["test", "test"]
				470
				471	# Binary version:
				472	b.ParseFromString(a.SerializeToString()) # repeated_field is now "test"
				473
				474	Caller is responsible for clearing the message as needed.
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	475
				476	Args:
				477	text: Message text representation.
				478	message: A protocol buffer message to merge into.
				479	allow_unknown_extension: if True, skip over missing extensions and keep
				480	parsing
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	481	allow_field_number: if True, both field number and field name are allowed.
				482	descriptor_pool: A DescriptorPool used to resolve Any types.
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	483
				484	Returns:
				485	The same message passed as argument.
				486
				487	Raises:
				488	ParseError: On text parsing problems.
				489	"""
				490	if not isinstance(text, str):
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	491	if six.PY3:
				492	text = text.decode('utf-8')
				493	else:
				494	text = text.encode('utf-8')
				495	return ParseLines(text.split('\n'),
				496	message,
				497	allow_unknown_extension,
				498	allow_field_number,
				499	descriptor_pool=descriptor_pool)
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	500
				501
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	502	def Merge(text,
				503	message,
				504	allow_unknown_extension=False,
				505	allow_field_number=False,
				506	descriptor_pool=None):
				507	"""Parses a text representation of a protocol message into a message.
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	508
				509	Like Parse(), but allows repeated values for a non-repeated field, and uses
				510	the last one.
				511
				512	Args:
				513	text: Message text representation.
				514	message: A protocol buffer message to merge into.
				515	allow_unknown_extension: if True, skip over missing extensions and keep
				516	parsing
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	517	allow_field_number: if True, both field number and field name are allowed.
				518	descriptor_pool: A DescriptorPool used to resolve Any types.
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	519
				520	Returns:
				521	The same message passed as argument.
				522
				523	Raises:
				524	ParseError: On text parsing problems.
				525	"""
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	526	if not isinstance(text, str):
				527	if six.PY3:
				528	text = text.decode('utf-8')
				529	else:
				530	text = text.encode('utf-8')
				531	return MergeLines(
				532	text.split('\n'),
				533	message,
				534	allow_unknown_extension,
				535	allow_field_number,
				536	descriptor_pool=descriptor_pool)
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	537
				538
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	539	def ParseLines(lines,
				540	message,
				541	allow_unknown_extension=False,
				542	allow_field_number=False,
				543	descriptor_pool=None):
				544	"""Parses a text representation of a protocol message into a message.
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	545
				546	Args:
				547	lines: An iterable of lines of a message's text representation.
				548	message: A protocol buffer message to merge into.
				549	allow_unknown_extension: if True, skip over missing extensions and keep
				550	parsing
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	551	allow_field_number: if True, both field number and field name are allowed.
				552	descriptor_pool: A DescriptorPool used to resolve Any types.
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	553
				554	Returns:
				555	The same message passed as argument.
				556
				557	Raises:
				558	ParseError: On text parsing problems.
				559	"""
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	560	parser = _Parser(allow_unknown_extension,
				561	allow_field_number,
				562	descriptor_pool=descriptor_pool)
				563	return parser.ParseLines(lines, message)
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	564
				565
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	566	def MergeLines(lines,
				567	message,
				568	allow_unknown_extension=False,
				569	allow_field_number=False,
				570	descriptor_pool=None):
				571	"""Parses a text representation of a protocol message into a message.
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	572
				573	Args:
				574	lines: An iterable of lines of a message's text representation.
				575	message: A protocol buffer message to merge into.
				576	allow_unknown_extension: if True, skip over missing extensions and keep
				577	parsing
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	578	allow_field_number: if True, both field number and field name are allowed.
				579	descriptor_pool: A DescriptorPool used to resolve Any types.
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	580
				581	Returns:
				582	The same message passed as argument.
				583
				584	Raises:
				585	ParseError: On text parsing problems.
				586	"""
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	587	parser = _Parser(allow_unknown_extension,
				588	allow_field_number,
				589	descriptor_pool=descriptor_pool)
				590	return parser.MergeLines(lines, message)
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	591
				592
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	593	class _Parser(object):
				594	"""Text format parser for protocol message."""
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	595
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	596	def __init__(self,
				597	allow_unknown_extension=False,
				598	allow_field_number=False,
				599	descriptor_pool=None):
				600	self.allow_unknown_extension = allow_unknown_extension
				601	self.allow_field_number = allow_field_number
				602	self.descriptor_pool = descriptor_pool
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	603
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	604	def ParseFromString(self, text, message):
				605	"""Parses a text representation of a protocol message into a message."""
				606	if not isinstance(text, str):
				607	text = text.decode('utf-8')
				608	return self.ParseLines(text.split('\n'), message)
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	609
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	610	def ParseLines(self, lines, message):
				611	"""Parses a text representation of a protocol message into a message."""
				612	self._allow_multiple_scalars = False
				613	self._ParseOrMerge(lines, message)
				614	return message
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	615
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	616	def MergeFromString(self, text, message):
				617	"""Merges a text representation of a protocol message into a message."""
				618	return self._MergeLines(text.split('\n'), message)
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	619
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	620	def MergeLines(self, lines, message):
				621	"""Merges a text representation of a protocol message into a message."""
				622	self._allow_multiple_scalars = True
				623	self._ParseOrMerge(lines, message)
				624	return message
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	625
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	626	def _ParseOrMerge(self, lines, message):
				627	"""Converts a text representation of a protocol message into a message.
				628
				629	Args:
				630	lines: Lines of a message's text representation.
				631	message: A protocol buffer message to merge into.
				632
				633	Raises:
				634	ParseError: On text parsing problems.
				635	"""
				636	tokenizer = Tokenizer(lines)
				637	while not tokenizer.AtEnd():
				638	self._MergeField(tokenizer, message)
				639
				640	def _MergeField(self, tokenizer, message):
				641	"""Merges a single protocol message field into a message.
				642
				643	Args:
				644	tokenizer: A tokenizer to parse the field name and values.
				645	message: A protocol message to record the data.
				646
				647	Raises:
				648	ParseError: In case of text parsing problems.
				649	"""
				650	message_descriptor = message.DESCRIPTOR
				651	if (message_descriptor.full_name == _ANY_FULL_TYPE_NAME and
				652	tokenizer.TryConsume('[')):
				653	type_url_prefix, packed_type_name = self._ConsumeAnyTypeUrl(tokenizer)
				654	tokenizer.Consume(']')
				655	tokenizer.TryConsume(':')
				656	if tokenizer.TryConsume('<'):
				657	expanded_any_end_token = '>'
				658	else:
				659	tokenizer.Consume('{')
				660	expanded_any_end_token = '}'
				661	expanded_any_sub_message = _BuildMessageFromTypeName(packed_type_name,
				662	self.descriptor_pool)
				663	if not expanded_any_sub_message:
				664	raise ParseError('Type %s not found in descriptor pool' %
				665	packed_type_name)
				666	while not tokenizer.TryConsume(expanded_any_end_token):
				667	if tokenizer.AtEnd():
				668	raise tokenizer.ParseErrorPreviousToken('Expected "%s".' %
				669	(expanded_any_end_token,))
				670	self._MergeField(tokenizer, expanded_any_sub_message)
				671	message.Pack(expanded_any_sub_message,
				672	type_url_prefix=type_url_prefix)
				673	return
				674
				675	if tokenizer.TryConsume('['):
				676	name = [tokenizer.ConsumeIdentifier()]
				677	while tokenizer.TryConsume('.'):
				678	name.append(tokenizer.ConsumeIdentifier())
				679	name = '.'.join(name)
				680
				681	if not message_descriptor.is_extendable:
				682	raise tokenizer.ParseErrorPreviousToken(
				683	'Message type "%s" does not have extensions.' %
				684	message_descriptor.full_name)
				685	# pylint: disable=protected-access
				686	field = message.Extensions._FindExtensionByName(name)
				687	# pylint: enable=protected-access
				688	if not field:
				689	if self.allow_unknown_extension:
				690	field = None
				691	else:
				692	raise tokenizer.ParseErrorPreviousToken(
				693	'Extension "%s" not registered. '
				694	'Did you import the _pb2 module which defines it? '
				695	'If you are trying to place the extension in the MessageSet '
				696	'field of another message that is in an Any or MessageSet field, '
				697	'that message\'s _pb2 module must be imported as well' % name)
				698	elif message_descriptor != field.containing_type:
				699	raise tokenizer.ParseErrorPreviousToken(
				700	'Extension "%s" does not extend message type "%s".' %
				701	(name, message_descriptor.full_name))
				702
				703	tokenizer.Consume(']')
				704
				705	else:
				706	name = tokenizer.ConsumeIdentifierOrNumber()
				707	if self.allow_field_number and name.isdigit():
				708	number = ParseInteger(name, True, True)
				709	field = message_descriptor.fields_by_number.get(number, None)
				710	if not field and message_descriptor.is_extendable:
				711	field = message.Extensions._FindExtensionByNumber(number)
				712	else:
				713	field = message_descriptor.fields_by_name.get(name, None)
				714
				715	# Group names are expected to be capitalized as they appear in the
				716	# .proto file, which actually matches their type names, not their field
				717	# names.
				718	if not field:
				719	field = message_descriptor.fields_by_name.get(name.lower(), None)
				720	if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP:
				721	field = None
				722
				723	if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and
				724	field.message_type.name != name):
				725	field = None
				726
				727	if not field:
				728	raise tokenizer.ParseErrorPreviousToken(
				729	'Message type "%s" has no field named "%s".' %
				730	(message_descriptor.full_name, name))
				731
				732	if field:
				733	if not self._allow_multiple_scalars and field.containing_oneof:
				734	# Check if there's a different field set in this oneof.
				735	# Note that we ignore the case if the same field was set before, and we
				736	# apply _allow_multiple_scalars to non-scalar fields as well.
				737	which_oneof = message.WhichOneof(field.containing_oneof.name)
				738	if which_oneof is not None and which_oneof != field.name:
				739	raise tokenizer.ParseErrorPreviousToken(
				740	'Field "%s" is specified along with field "%s", another member '
				741	'of oneof "%s" for message type "%s".' %
				742	(field.name, which_oneof, field.containing_oneof.name,
				743	message_descriptor.full_name))
				744
				745	if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
				746	tokenizer.TryConsume(':')
				747	merger = self._MergeMessageField
				748	else:
				749	tokenizer.Consume(':')
				750	merger = self._MergeScalarField
				751
				752	if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED and
				753	tokenizer.TryConsume('[')):
				754	# Short repeated format, e.g. "foo: [1, 2, 3]"
				755	if not tokenizer.TryConsume(']'):
				756	while True:
				757	merger(tokenizer, message, field)
				758	if tokenizer.TryConsume(']'):
				759	break
				760	tokenizer.Consume(',')
				761
				762	else:
				763	merger(tokenizer, message, field)
				764
				765	else: # Proto field is unknown.
				766	assert self.allow_unknown_extension
				767	_SkipFieldContents(tokenizer)
				768
				769	# For historical reasons, fields may optionally be separated by commas or
				770	# semicolons.
				771	if not tokenizer.TryConsume(','):
				772	tokenizer.TryConsume(';')
				773
				774	def _ConsumeAnyTypeUrl(self, tokenizer):
				775	"""Consumes a google.protobuf.Any type URL and returns the type name."""
				776	# Consume "type.googleapis.com/".
				777	prefix = [tokenizer.ConsumeIdentifier()]
				778	tokenizer.Consume('.')
				779	prefix.append(tokenizer.ConsumeIdentifier())
				780	tokenizer.Consume('.')
				781	prefix.append(tokenizer.ConsumeIdentifier())
				782	tokenizer.Consume('/')
				783	# Consume the fully-qualified type name.
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	784	name = [tokenizer.ConsumeIdentifier()]
				785	while tokenizer.TryConsume('.'):
				786	name.append(tokenizer.ConsumeIdentifier())
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	787	return '.'.join(prefix), '.'.join(name)
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	788
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	789	def _MergeMessageField(self, tokenizer, message, field):
				790	"""Merges a single scalar field into a message.
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	791
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	792	Args:
				793	tokenizer: A tokenizer to parse the field value.
				794	message: The message of which field is a member.
				795	field: The descriptor of the field to be merged.
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	796
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	797	Raises:
				798	ParseError: In case of text parsing problems.
				799	"""
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	800	is_map_entry = _IsMapEntry(field)
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	801
				802	if tokenizer.TryConsume('<'):
				803	end_token = '>'
				804	else:
				805	tokenizer.Consume('{')
				806	end_token = '}'
				807
				808	if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
				809	if field.is_extension:
				810	sub_message = message.Extensions[field].add()
				811	elif is_map_entry:
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	812	sub_message = getattr(message, field.name).GetEntryClass()()
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	813	else:
				814	sub_message = getattr(message, field.name).add()
				815	else:
				816	if field.is_extension:
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	817	if (not self._allow_multiple_scalars and
				818	message.HasExtension(field)):
				819	raise tokenizer.ParseErrorPreviousToken(
				820	'Message type "%s" should not have multiple "%s" extensions.' %
				821	(message.DESCRIPTOR.full_name, field.full_name))
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	822	sub_message = message.Extensions[field]
				823	else:
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	824	# Also apply _allow_multiple_scalars to message field.
				825	# TODO(jieluo): Change to _allow_singular_overwrites.
				826	if (not self._allow_multiple_scalars and
				827	message.HasField(field.name)):
				828	raise tokenizer.ParseErrorPreviousToken(
				829	'Message type "%s" should not have multiple "%s" fields.' %
				830	(message.DESCRIPTOR.full_name, field.name))
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	831	sub_message = getattr(message, field.name)
				832	sub_message.SetInParent()
				833
				834	while not tokenizer.TryConsume(end_token):
				835	if tokenizer.AtEnd():
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	836	raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token,))
				837	self._MergeField(tokenizer, sub_message)
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	838
				839	if is_map_entry:
				840	value_cpptype = field.message_type.fields_by_name['value'].cpp_type
				841	if value_cpptype == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
				842	value = getattr(message, field.name)[sub_message.key]
				843	value.MergeFrom(sub_message.value)
				844	else:
				845	getattr(message, field.name)[sub_message.key] = sub_message.value
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	846
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	847	@staticmethod
				848	def _IsProto3Syntax(message):
				849	message_descriptor = message.DESCRIPTOR
				850	return (hasattr(message_descriptor, 'syntax') and
				851	message_descriptor.syntax == 'proto3')
				852
				853	def _MergeScalarField(self, tokenizer, message, field):
				854	"""Merges a single scalar field into a message.
				855
				856	Args:
				857	tokenizer: A tokenizer to parse the field value.
				858	message: A protocol message to record the data.
				859	field: The descriptor of the field to be merged.
				860
				861	Raises:
				862	ParseError: In case of text parsing problems.
				863	RuntimeError: On runtime errors.
				864	"""
				865	_ = self.allow_unknown_extension
				866	value = None
				867
				868	if field.type in (descriptor.FieldDescriptor.TYPE_INT32,
				869	descriptor.FieldDescriptor.TYPE_SINT32,
				870	descriptor.FieldDescriptor.TYPE_SFIXED32):
				871	value = _ConsumeInt32(tokenizer)
				872	elif field.type in (descriptor.FieldDescriptor.TYPE_INT64,
				873	descriptor.FieldDescriptor.TYPE_SINT64,
				874	descriptor.FieldDescriptor.TYPE_SFIXED64):
				875	value = _ConsumeInt64(tokenizer)
				876	elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32,
				877	descriptor.FieldDescriptor.TYPE_FIXED32):
				878	value = _ConsumeUint32(tokenizer)
				879	elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64,
				880	descriptor.FieldDescriptor.TYPE_FIXED64):
				881	value = _ConsumeUint64(tokenizer)
				882	elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT,
				883	descriptor.FieldDescriptor.TYPE_DOUBLE):
				884	value = tokenizer.ConsumeFloat()
				885	elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:
				886	value = tokenizer.ConsumeBool()
				887	elif field.type == descriptor.FieldDescriptor.TYPE_STRING:
				888	value = tokenizer.ConsumeString()
				889	elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:
				890	value = tokenizer.ConsumeByteString()
				891	elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:
				892	value = tokenizer.ConsumeEnum(field)
				893	else:
				894	raise RuntimeError('Unknown field type %d' % field.type)
				895
				896	if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
				897	if field.is_extension:
				898	message.Extensions[field].append(value)
				899	else:
				900	getattr(message, field.name).append(value)
				901	else:
				902	# Proto3 doesn't represent presence so we can't test if multiple scalars
				903	# have occurred. We have to allow them.
				904	can_check_presence = not self._IsProto3Syntax(message)
				905	if field.is_extension:
				906	if (not self._allow_multiple_scalars and can_check_presence and
				907	message.HasExtension(field)):
				908	raise tokenizer.ParseErrorPreviousToken(
				909	'Message type "%s" should not have multiple "%s" extensions.' %
				910	(message.DESCRIPTOR.full_name, field.full_name))
				911	else:
				912	message.Extensions[field] = value
				913	else:
				914	if (not self._allow_multiple_scalars and can_check_presence and
				915	message.HasField(field.name)):
				916	raise tokenizer.ParseErrorPreviousToken(
				917	'Message type "%s" should not have multiple "%s" fields.' %
				918	(message.DESCRIPTOR.full_name, field.name))
				919	else:
				920	setattr(message, field.name, value)
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	921
				922
				923	def _SkipFieldContents(tokenizer):
				924	"""Skips over contents (value or message) of a field.
				925
				926	Args:
				927	tokenizer: A tokenizer to parse the field name and values.
				928	"""
				929	# Try to guess the type of this field.
				930	# If this field is not a message, there should be a ":" between the
				931	# field name and the field value and also the field value should not
				932	# start with "{" or "<" which indicates the beginning of a message body.
				933	# If there is no ":" or there is a "{" or "<" after ":", this field has
				934	# to be a message or the input is ill-formed.
				935	if tokenizer.TryConsume(':') and not tokenizer.LookingAt(
				936	'{') and not tokenizer.LookingAt('<'):
				937	_SkipFieldValue(tokenizer)
				938	else:
				939	_SkipFieldMessage(tokenizer)
				940
				941
				942	def _SkipField(tokenizer):
				943	"""Skips over a complete field (name and value/message).
				944
				945	Args:
				946	tokenizer: A tokenizer to parse the field name and values.
				947	"""
				948	if tokenizer.TryConsume('['):
				949	# Consume extension name.
				950	tokenizer.ConsumeIdentifier()
				951	while tokenizer.TryConsume('.'):
				952	tokenizer.ConsumeIdentifier()
				953	tokenizer.Consume(']')
				954	else:
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	955	tokenizer.ConsumeIdentifierOrNumber()
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	956
				957	_SkipFieldContents(tokenizer)
				958
				959	# For historical reasons, fields may optionally be separated by commas or
				960	# semicolons.
				961	if not tokenizer.TryConsume(','):
				962	tokenizer.TryConsume(';')
				963
				964
				965	def _SkipFieldMessage(tokenizer):
				966	"""Skips over a field message.
				967
				968	Args:
				969	tokenizer: A tokenizer to parse the field name and values.
				970	"""
				971
				972	if tokenizer.TryConsume('<'):
				973	delimiter = '>'
				974	else:
				975	tokenizer.Consume('{')
				976	delimiter = '}'
				977
				978	while not tokenizer.LookingAt('>') and not tokenizer.LookingAt('}'):
				979	_SkipField(tokenizer)
				980
				981	tokenizer.Consume(delimiter)
				982
				983
				984	def _SkipFieldValue(tokenizer):
				985	"""Skips over a field value.
				986
				987	Args:
				988	tokenizer: A tokenizer to parse the field name and values.
				989
				990	Raises:
				991	ParseError: In case an invalid field value is found.
				992	"""
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	993	# String/bytes tokens can come in multiple adjacent string literals.
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	994	# If we can consume one, consume as many as we can.
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	995	if tokenizer.TryConsumeByteString():
				996	while tokenizer.TryConsumeByteString():
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	997	pass
				998	return
				999
				1000	if (not tokenizer.TryConsumeIdentifier() and
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1001	not _TryConsumeInt64(tokenizer) and not _TryConsumeUint64(tokenizer) and
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1002	not tokenizer.TryConsumeFloat()):
				1003	raise ParseError('Invalid field value: ' + tokenizer.token)
				1004
				1005
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1006	class Tokenizer(object):
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1007	"""Protocol buffer text representation tokenizer.
				1008
				1009	This class handles the lower level string parsing by splitting it into
				1010	meaningful tokens.
				1011
				1012	It was directly ported from the Java protocol buffer API.
				1013	"""
				1014
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1015	_WHITESPACE = re.compile(r'\s+')
				1016	_COMMENT = re.compile(r'(\s#.$)', re.MULTILINE)
				1017	_WHITESPACE_OR_COMMENT = re.compile(r'(\s\|(#.*$))+', re.MULTILINE)
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1018	_TOKEN = re.compile('\|'.join([
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1019	r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1020	r'([0-9+-]\|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1021	] + [ # quoted str for each quote mark
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1022	r'{qt}([^{qt}\n\\]\|\\.)*({qt}\|\\?$)'.format(qt=mark) for mark in _QUOTES
				1023	]))
				1024
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1025	_IDENTIFIER = re.compile(r'[^\d\W]\w*')
				1026	_IDENTIFIER_OR_NUMBER = re.compile(r'\w+')
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1027
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1028	def __init__(self, lines, skip_comments=True):
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1029	self._position = 0
				1030	self._line = -1
				1031	self._column = 0
				1032	self._token_start = None
				1033	self.token = ''
				1034	self._lines = iter(lines)
				1035	self._current_line = ''
				1036	self._previous_line = 0
				1037	self._previous_column = 0
				1038	self._more_lines = True
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1039	self._skip_comments = skip_comments
				1040	self._whitespace_pattern = (skip_comments and self._WHITESPACE_OR_COMMENT
				1041	or self._WHITESPACE)
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1042	self._SkipWhitespace()
				1043	self.NextToken()
				1044
				1045	def LookingAt(self, token):
				1046	return self.token == token
				1047
				1048	def AtEnd(self):
				1049	"""Checks the end of the text was reached.
				1050
				1051	Returns:
				1052	True iff the end was reached.
				1053	"""
				1054	return not self.token
				1055
				1056	def _PopLine(self):
				1057	while len(self._current_line) <= self._column:
				1058	try:
				1059	self._current_line = next(self._lines)
				1060	except StopIteration:
				1061	self._current_line = ''
				1062	self._more_lines = False
				1063	return
				1064	else:
				1065	self._line += 1
				1066	self._column = 0
				1067
				1068	def _SkipWhitespace(self):
				1069	while True:
				1070	self._PopLine()
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1071	match = self._whitespace_pattern.match(self._current_line, self._column)
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1072	if not match:
				1073	break
				1074	length = len(match.group(0))
				1075	self._column += length
				1076
				1077	def TryConsume(self, token):
				1078	"""Tries to consume a given piece of text.
				1079
				1080	Args:
				1081	token: Text to consume.
				1082
				1083	Returns:
				1084	True iff the text was consumed.
				1085	"""
				1086	if self.token == token:
				1087	self.NextToken()
				1088	return True
				1089	return False
				1090
				1091	def Consume(self, token):
				1092	"""Consumes a piece of text.
				1093
				1094	Args:
				1095	token: Text to consume.
				1096
				1097	Raises:
				1098	ParseError: If the text couldn't be consumed.
				1099	"""
				1100	if not self.TryConsume(token):
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1101	raise self.ParseError('Expected "%s".' % token)
				1102
				1103	def ConsumeComment(self):
				1104	result = self.token
				1105	if not self._COMMENT.match(result):
				1106	raise self.ParseError('Expected comment.')
				1107	self.NextToken()
				1108	return result
				1109
				1110	def ConsumeCommentOrTrailingComment(self):
				1111	"""Consumes a comment, returns a 2-tuple (trailing bool, comment str)."""
				1112
				1113	# Tokenizer initializes _previous_line and _previous_column to 0. As the
				1114	# tokenizer starts, it looks like there is a previous token on the line.
				1115	just_started = self._line == 0 and self._column == 0
				1116
				1117	before_parsing = self._previous_line
				1118	comment = self.ConsumeComment()
				1119
				1120	# A trailing comment is a comment on the same line than the previous token.
				1121	trailing = (self._previous_line == before_parsing
				1122	and not just_started)
				1123
				1124	return trailing, comment
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1125
				1126	def TryConsumeIdentifier(self):
				1127	try:
				1128	self.ConsumeIdentifier()
				1129	return True
				1130	except ParseError:
				1131	return False
				1132
				1133	def ConsumeIdentifier(self):
				1134	"""Consumes protocol message field identifier.
				1135
				1136	Returns:
				1137	Identifier string.
				1138
				1139	Raises:
				1140	ParseError: If an identifier couldn't be consumed.
				1141	"""
				1142	result = self.token
				1143	if not self._IDENTIFIER.match(result):
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1144	raise self.ParseError('Expected identifier.')
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1145	self.NextToken()
				1146	return result
				1147
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1148	def TryConsumeIdentifierOrNumber(self):
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1149	try:
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1150	self.ConsumeIdentifierOrNumber()
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1151	return True
				1152	except ParseError:
				1153	return False
				1154
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1155	def ConsumeIdentifierOrNumber(self):
				1156	"""Consumes protocol message field identifier.
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1157
				1158	Returns:
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1159	Identifier string.
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1160
				1161	Raises:
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1162	ParseError: If an identifier couldn't be consumed.
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1163	"""
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1164	result = self.token
				1165	if not self._IDENTIFIER_OR_NUMBER.match(result):
				1166	raise self.ParseError('Expected identifier or number, got %s.' % result)
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1167	self.NextToken()
				1168	return result
				1169
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1170	def TryConsumeInteger(self):
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1171	try:
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1172	# Note: is_long only affects value type, not whether an error is raised.
				1173	self.ConsumeInteger()
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1174	return True
				1175	except ParseError:
				1176	return False
				1177
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1178	def ConsumeInteger(self, is_long=False):
				1179	"""Consumes an integer number.
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1180
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1181	Args:
				1182	is_long: True if the value should be returned as a long integer.
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1183	Returns:
				1184	The integer parsed.
				1185
				1186	Raises:
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1187	ParseError: If an integer couldn't be consumed.
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1188	"""
				1189	try:
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1190	result = _ParseAbstractInteger(self.token, is_long=is_long)
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1191	except ValueError as e:
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1192	raise self.ParseError(str(e))
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1193	self.NextToken()
				1194	return result
				1195
				1196	def TryConsumeFloat(self):
				1197	try:
				1198	self.ConsumeFloat()
				1199	return True
				1200	except ParseError:
				1201	return False
				1202
				1203	def ConsumeFloat(self):
				1204	"""Consumes an floating point number.
				1205
				1206	Returns:
				1207	The number parsed.
				1208
				1209	Raises:
				1210	ParseError: If a floating point number couldn't be consumed.
				1211	"""
				1212	try:
				1213	result = ParseFloat(self.token)
				1214	except ValueError as e:
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1215	raise self.ParseError(str(e))
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1216	self.NextToken()
				1217	return result
				1218
				1219	def ConsumeBool(self):
				1220	"""Consumes a boolean value.
				1221
				1222	Returns:
				1223	The bool parsed.
				1224
				1225	Raises:
				1226	ParseError: If a boolean value couldn't be consumed.
				1227	"""
				1228	try:
				1229	result = ParseBool(self.token)
				1230	except ValueError as e:
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1231	raise self.ParseError(str(e))
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1232	self.NextToken()
				1233	return result
				1234
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1235	def TryConsumeByteString(self):
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1236	try:
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1237	self.ConsumeByteString()
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1238	return True
				1239	except ParseError:
				1240	return False
				1241
				1242	def ConsumeString(self):
				1243	"""Consumes a string value.
				1244
				1245	Returns:
				1246	The string parsed.
				1247
				1248	Raises:
				1249	ParseError: If a string value couldn't be consumed.
				1250	"""
				1251	the_bytes = self.ConsumeByteString()
				1252	try:
				1253	return six.text_type(the_bytes, 'utf-8')
				1254	except UnicodeDecodeError as e:
				1255	raise self._StringParseError(e)
				1256
				1257	def ConsumeByteString(self):
				1258	"""Consumes a byte array value.
				1259
				1260	Returns:
				1261	The array parsed (as a string).
				1262
				1263	Raises:
				1264	ParseError: If a byte array value couldn't be consumed.
				1265	"""
				1266	the_list = [self._ConsumeSingleByteString()]
				1267	while self.token and self.token[0] in _QUOTES:
				1268	the_list.append(self._ConsumeSingleByteString())
				1269	return b''.join(the_list)
				1270
				1271	def _ConsumeSingleByteString(self):
				1272	"""Consume one token of a string literal.
				1273
				1274	String literals (whether bytes or text) can come in multiple adjacent
				1275	tokens which are automatically concatenated, like in C or Python. This
				1276	method only consumes one token.
				1277
				1278	Returns:
				1279	The token parsed.
				1280	Raises:
				1281	ParseError: When the wrong format data is found.
				1282	"""
				1283	text = self.token
				1284	if len(text) < 1 or text[0] not in _QUOTES:
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1285	raise self.ParseError('Expected string but found: %r' % (text,))
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1286
				1287	if len(text) < 2 or text[-1] != text[0]:
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1288	raise self.ParseError('String missing ending quote: %r' % (text,))
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1289
				1290	try:
				1291	result = text_encoding.CUnescape(text[1:-1])
				1292	except ValueError as e:
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1293	raise self.ParseError(str(e))
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1294	self.NextToken()
				1295	return result
				1296
				1297	def ConsumeEnum(self, field):
				1298	try:
				1299	result = ParseEnum(field, self.token)
				1300	except ValueError as e:
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1301	raise self.ParseError(str(e))
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1302	self.NextToken()
				1303	return result
				1304
				1305	def ParseErrorPreviousToken(self, message):
				1306	"""Creates and returns a ParseError for the previously read token.
				1307
				1308	Args:
				1309	message: A message to set for the exception.
				1310
				1311	Returns:
				1312	A ParseError instance.
				1313	"""
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1314	return ParseError(message, self._previous_line + 1,
				1315	self._previous_column + 1)
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1316
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1317	def ParseError(self, message):
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1318	"""Creates and returns a ParseError for the current token."""
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1319	return ParseError(message, self._line + 1, self._column + 1)
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1320
				1321	def _StringParseError(self, e):
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1322	return self.ParseError('Couldn\'t parse string: ' + str(e))
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1323
				1324	def NextToken(self):
				1325	"""Reads the next meaningful token."""
				1326	self._previous_line = self._line
				1327	self._previous_column = self._column
				1328
				1329	self._column += len(self.token)
				1330	self._SkipWhitespace()
				1331
				1332	if not self._more_lines:
				1333	self.token = ''
				1334	return
				1335
				1336	match = self._TOKEN.match(self._current_line, self._column)
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1337	if not match and not self._skip_comments:
				1338	match = self._COMMENT.match(self._current_line, self._column)
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1339	if match:
				1340	token = match.group(0)
				1341	self.token = token
				1342	else:
				1343	self.token = self._current_line[self._column]
				1344
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1345	# Aliased so it can still be accessed by current visibility violators.
				1346	# TODO(dbarnett): Migrate violators to textformat_tokenizer.
				1347	_Tokenizer = Tokenizer # pylint: disable=invalid-name
				1348
				1349
				1350	def _ConsumeInt32(tokenizer):
				1351	"""Consumes a signed 32bit integer number from tokenizer.
				1352
				1353	Args:
				1354	tokenizer: A tokenizer used to parse the number.
				1355
				1356	Returns:
				1357	The integer parsed.
				1358
				1359	Raises:
				1360	ParseError: If a signed 32bit integer couldn't be consumed.
				1361	"""
				1362	return _ConsumeInteger(tokenizer, is_signed=True, is_long=False)
				1363
				1364
				1365	def _ConsumeUint32(tokenizer):
				1366	"""Consumes an unsigned 32bit integer number from tokenizer.
				1367
				1368	Args:
				1369	tokenizer: A tokenizer used to parse the number.
				1370
				1371	Returns:
				1372	The integer parsed.
				1373
				1374	Raises:
				1375	ParseError: If an unsigned 32bit integer couldn't be consumed.
				1376	"""
				1377	return _ConsumeInteger(tokenizer, is_signed=False, is_long=False)
				1378
				1379
				1380	def _TryConsumeInt64(tokenizer):
				1381	try:
				1382	_ConsumeInt64(tokenizer)
				1383	return True
				1384	except ParseError:
				1385	return False
				1386
				1387
				1388	def _ConsumeInt64(tokenizer):
				1389	"""Consumes a signed 32bit integer number from tokenizer.
				1390
				1391	Args:
				1392	tokenizer: A tokenizer used to parse the number.
				1393
				1394	Returns:
				1395	The integer parsed.
				1396
				1397	Raises:
				1398	ParseError: If a signed 32bit integer couldn't be consumed.
				1399	"""
				1400	return _ConsumeInteger(tokenizer, is_signed=True, is_long=True)
				1401
				1402
				1403	def _TryConsumeUint64(tokenizer):
				1404	try:
				1405	_ConsumeUint64(tokenizer)
				1406	return True
				1407	except ParseError:
				1408	return False
				1409
				1410
				1411	def _ConsumeUint64(tokenizer):
				1412	"""Consumes an unsigned 64bit integer number from tokenizer.
				1413
				1414	Args:
				1415	tokenizer: A tokenizer used to parse the number.
				1416
				1417	Returns:
				1418	The integer parsed.
				1419
				1420	Raises:
				1421	ParseError: If an unsigned 64bit integer couldn't be consumed.
				1422	"""
				1423	return _ConsumeInteger(tokenizer, is_signed=False, is_long=True)
				1424
				1425
				1426	def _TryConsumeInteger(tokenizer, is_signed=False, is_long=False):
				1427	try:
				1428	_ConsumeInteger(tokenizer, is_signed=is_signed, is_long=is_long)
				1429	return True
				1430	except ParseError:
				1431	return False
				1432
				1433
				1434	def _ConsumeInteger(tokenizer, is_signed=False, is_long=False):
				1435	"""Consumes an integer number from tokenizer.
				1436
				1437	Args:
				1438	tokenizer: A tokenizer used to parse the number.
				1439	is_signed: True if a signed integer must be parsed.
				1440	is_long: True if a long integer must be parsed.
				1441
				1442	Returns:
				1443	The integer parsed.
				1444
				1445	Raises:
				1446	ParseError: If an integer with given characteristics couldn't be consumed.
				1447	"""
				1448	try:
				1449	result = ParseInteger(tokenizer.token, is_signed=is_signed, is_long=is_long)
				1450	except ValueError as e:
				1451	raise tokenizer.ParseError(str(e))
				1452	tokenizer.NextToken()
				1453	return result
				1454
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1455
				1456	def ParseInteger(text, is_signed=False, is_long=False):
				1457	"""Parses an integer.
				1458
				1459	Args:
				1460	text: The text to parse.
				1461	is_signed: True if a signed integer must be parsed.
				1462	is_long: True if a long integer must be parsed.
				1463
				1464	Returns:
				1465	The integer value.
				1466
				1467	Raises:
				1468	ValueError: Thrown Iff the text is not a valid integer.
				1469	"""
				1470	# Do the actual parsing. Exception handling is propagated to caller.
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1471	result = _ParseAbstractInteger(text, is_long=is_long)
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1472
				1473	# Check if the integer is sane. Exceptions handled by callers.
				1474	checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]
				1475	checker.CheckValue(result)
				1476	return result
				1477
				1478
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1479	def _ParseAbstractInteger(text, is_long=False):
				1480	"""Parses an integer without checking size/signedness.
				1481
				1482	Args:
				1483	text: The text to parse.
				1484	is_long: True if the value should be returned as a long integer.
				1485
				1486	Returns:
				1487	The integer value.
				1488
				1489	Raises:
				1490	ValueError: Thrown Iff the text is not a valid integer.
				1491	"""
				1492	# Do the actual parsing. Exception handling is propagated to caller.
				1493	try:
				1494	# We force 32-bit values to int and 64-bit values to long to make
				1495	# alternate implementations where the distinction is more significant
				1496	# (e.g. the C++ implementation) simpler.
				1497	if is_long:
				1498	return long(text, 0)
				1499	else:
				1500	return int(text, 0)
				1501	except ValueError:
				1502	raise ValueError('Couldn\'t parse integer: %s' % text)
				1503
				1504
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1505	def ParseFloat(text):
				1506	"""Parse a floating point number.
				1507
				1508	Args:
				1509	text: Text to parse.
				1510
				1511	Returns:
				1512	The number parsed.
				1513
				1514	Raises:
				1515	ValueError: If a floating point number couldn't be parsed.
				1516	"""
				1517	try:
				1518	# Assume Python compatible syntax.
				1519	return float(text)
				1520	except ValueError:
				1521	# Check alternative spellings.
				1522	if _FLOAT_INFINITY.match(text):
				1523	if text[0] == '-':
				1524	return float('-inf')
				1525	else:
				1526	return float('inf')
				1527	elif _FLOAT_NAN.match(text):
				1528	return float('nan')
				1529	else:
				1530	# assume '1.0f' format
				1531	try:
				1532	return float(text.rstrip('f'))
				1533	except ValueError:
				1534	raise ValueError('Couldn\'t parse float: %s' % text)
				1535
				1536
				1537	def ParseBool(text):
				1538	"""Parse a boolean value.
				1539
				1540	Args:
				1541	text: Text to parse.
				1542
				1543	Returns:
				1544	Boolean values parsed
				1545
				1546	Raises:
				1547	ValueError: If text is not a valid boolean.
				1548	"""
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1549	if text in ('true', 't', '1', 'True'):
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1550	return True
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1551	elif text in ('false', 'f', '0', 'False'):
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1552	return False
				1553	else:
				1554	raise ValueError('Expected "true" or "false".')
				1555
				1556
				1557	def ParseEnum(field, value):
				1558	"""Parse an enum value.
				1559
				1560	The value can be specified by a number (the enum value), or by
				1561	a string literal (the enum name).
				1562
				1563	Args:
				1564	field: Enum field descriptor.
				1565	value: String value.
				1566
				1567	Returns:
				1568	Enum value number.
				1569
				1570	Raises:
				1571	ValueError: If the enum value could not be parsed.
				1572	"""
				1573	enum_descriptor = field.enum_type
				1574	try:
				1575	number = int(value, 0)
				1576	except ValueError:
				1577	# Identifier.
				1578	enum_value = enum_descriptor.values_by_name.get(value, None)
				1579	if enum_value is None:
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1580	raise ValueError('Enum type "%s" has no value named %s.' %
				1581	(enum_descriptor.full_name, value))
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1582	else:
				1583	# Numeric value.
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1584	if hasattr(field.file, 'syntax'):
				1585	# Attribute is checked for compatibility.
				1586	if field.file.syntax == 'proto3':
				1587	# Proto3 accept numeric unknown enums.
				1588	return number
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1589	enum_value = enum_descriptor.values_by_number.get(number, None)
				1590	if enum_value is None:
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1591	raise ValueError('Enum type "%s" has no value with number %d.' %
				1592	(enum_descriptor.full_name, number))
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1593	return enum_value.number