Blame - absl/debugging/internal/demangle.cc - RealtimeRoboticsGroup/test

blob: 46cdb67b1fbfdaecf489d8b8b684b90a0c7897d8 [file] [log] [blame]

Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame]	1	// Copyright 2018 The Abseil Authors.
				2	//
				3	// Licensed under the Apache License, Version 2.0 (the "License");
				4	// you may not use this file except in compliance with the License.
				5	// You may obtain a copy of the License at
				6	//
				7	// https://www.apache.org/licenses/LICENSE-2.0
				8	//
				9	// Unless required by applicable law or agreed to in writing, software
				10	// distributed under the License is distributed on an "AS IS" BASIS,
				11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	// See the License for the specific language governing permissions and
				13	// limitations under the License.
				14
				15	// For reference check out:
				16	// https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling
				17	//
				18	// Note that we only have partial C++11 support yet.
				19
				20	#include "absl/debugging/internal/demangle.h"
				21
				22	#include <cstdint>
				23	#include <cstdio>
				24	#include <limits>
				25
				26	namespace absl {
Austin Schuh	b4691e9	2020-12-31 12:37:18 -0800	[diff] [blame^]	27	ABSL_NAMESPACE_BEGIN
Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame]	28	namespace debugging_internal {
				29
				30	typedef struct {
				31	const char *abbrev;
				32	const char *real_name;
				33	// Number of arguments in <expression> context, or 0 if disallowed.
				34	int arity;
				35	} AbbrevPair;
				36
				37	// List of operators from Itanium C++ ABI.
				38	static const AbbrevPair kOperatorList[] = {
				39	// New has special syntax (not currently supported).
				40	{"nw", "new", 0},
				41	{"na", "new[]", 0},
				42
				43	// Works except that the 'gs' prefix is not supported.
				44	{"dl", "delete", 1},
				45	{"da", "delete[]", 1},
				46
				47	{"ps", "+", 1}, // "positive"
				48	{"ng", "-", 1}, // "negative"
				49	{"ad", "&", 1}, // "address-of"
				50	{"de", "*", 1}, // "dereference"
				51	{"co", "~", 1},
				52
				53	{"pl", "+", 2},
				54	{"mi", "-", 2},
				55	{"ml", "*", 2},
				56	{"dv", "/", 2},
				57	{"rm", "%", 2},
				58	{"an", "&", 2},
				59	{"or", "\|", 2},
				60	{"eo", "^", 2},
				61	{"aS", "=", 2},
				62	{"pL", "+=", 2},
				63	{"mI", "-=", 2},
				64	{"mL", "*=", 2},
				65	{"dV", "/=", 2},
				66	{"rM", "%=", 2},
				67	{"aN", "&=", 2},
				68	{"oR", "\|=", 2},
				69	{"eO", "^=", 2},
				70	{"ls", "<<", 2},
				71	{"rs", ">>", 2},
				72	{"lS", "<<=", 2},
				73	{"rS", ">>=", 2},
				74	{"eq", "==", 2},
				75	{"ne", "!=", 2},
				76	{"lt", "<", 2},
				77	{"gt", ">", 2},
				78	{"le", "<=", 2},
				79	{"ge", ">=", 2},
				80	{"nt", "!", 1},
				81	{"aa", "&&", 2},
				82	{"oo", "\|\|", 2},
				83	{"pp", "++", 1},
				84	{"mm", "--", 1},
				85	{"cm", ",", 2},
				86	{"pm", "->*", 2},
				87	{"pt", "->", 0}, // Special syntax
				88	{"cl", "()", 0}, // Special syntax
				89	{"ix", "[]", 2},
				90	{"qu", "?", 3},
				91	{"st", "sizeof", 0}, // Special syntax
				92	{"sz", "sizeof", 1}, // Not a real operator name, but used in expressions.
				93	{nullptr, nullptr, 0},
				94	};
				95
				96	// List of builtin types from Itanium C++ ABI.
Austin Schuh	b4691e9	2020-12-31 12:37:18 -0800	[diff] [blame^]	97	//
				98	// Invariant: only one- or two-character type abbreviations here.
Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame]	99	static const AbbrevPair kBuiltinTypeList[] = {
				100	{"v", "void", 0},
				101	{"w", "wchar_t", 0},
				102	{"b", "bool", 0},
				103	{"c", "char", 0},
				104	{"a", "signed char", 0},
				105	{"h", "unsigned char", 0},
				106	{"s", "short", 0},
				107	{"t", "unsigned short", 0},
				108	{"i", "int", 0},
				109	{"j", "unsigned int", 0},
				110	{"l", "long", 0},
				111	{"m", "unsigned long", 0},
				112	{"x", "long long", 0},
				113	{"y", "unsigned long long", 0},
				114	{"n", "__int128", 0},
				115	{"o", "unsigned __int128", 0},
				116	{"f", "float", 0},
				117	{"d", "double", 0},
				118	{"e", "long double", 0},
				119	{"g", "__float128", 0},
				120	{"z", "ellipsis", 0},
Austin Schuh	b4691e9	2020-12-31 12:37:18 -0800	[diff] [blame^]	121
				122	{"De", "decimal128", 0}, // IEEE 754r decimal floating point (128 bits)
				123	{"Dd", "decimal64", 0}, // IEEE 754r decimal floating point (64 bits)
				124	{"Dc", "decltype(auto)", 0},
				125	{"Da", "auto", 0},
				126	{"Dn", "std::nullptr_t", 0}, // i.e., decltype(nullptr)
				127	{"Df", "decimal32", 0}, // IEEE 754r decimal floating point (32 bits)
				128	{"Di", "char32_t", 0},
				129	{"Du", "char8_t", 0},
				130	{"Ds", "char16_t", 0},
				131	{"Dh", "float16", 0}, // IEEE 754r half-precision float (16 bits)
Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame]	132	{nullptr, nullptr, 0},
				133	};
				134
				135	// List of substitutions Itanium C++ ABI.
				136	static const AbbrevPair kSubstitutionList[] = {
				137	{"St", "", 0},
				138	{"Sa", "allocator", 0},
				139	{"Sb", "basic_string", 0},
				140	// std::basic_string<char, std::char_traits<char>,std::allocator<char> >
				141	{"Ss", "string", 0},
				142	// std::basic_istream<char, std::char_traits<char> >
				143	{"Si", "istream", 0},
				144	// std::basic_ostream<char, std::char_traits<char> >
				145	{"So", "ostream", 0},
				146	// std::basic_iostream<char, std::char_traits<char> >
				147	{"Sd", "iostream", 0},
				148	{nullptr, nullptr, 0},
				149	};
				150
				151	// State needed for demangling. This struct is copied in almost every stack
				152	// frame, so every byte counts.
				153	typedef struct {
				154	int mangled_idx; // Cursor of mangled name.
Austin Schuh	b4691e9	2020-12-31 12:37:18 -0800	[diff] [blame^]	155	int out_cur_idx; // Cursor of output string.
Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame]	156	int prev_name_idx; // For constructors/destructors.
				157	signed int prev_name_length : 16; // For constructors/destructors.
				158	signed int nest_level : 15; // For nested names.
				159	unsigned int append : 1; // Append flag.
				160	// Note: for some reason MSVC can't pack "bool append : 1" into the same int
				161	// with the above two fields, so we use an int instead. Amusingly it can pack
				162	// "signed bool" as expected, but relying on that to continue to be a legal
				163	// type seems ill-advised (as it's illegal in at least clang).
				164	} ParseState;
				165
				166	static_assert(sizeof(ParseState) == 4 * sizeof(int),
				167	"unexpected size of ParseState");
				168
				169	// One-off state for demangling that's not subject to backtracking -- either
				170	// constant data, data that's intentionally immune to backtracking (steps), or
				171	// data that would never be changed by backtracking anyway (recursion_depth).
				172	//
				173	// Only one copy of this exists for each call to Demangle, so the size of this
				174	// struct is nearly inconsequential.
				175	typedef struct {
Austin Schuh	b4691e9	2020-12-31 12:37:18 -0800	[diff] [blame^]	176	const char *mangled_begin; // Beginning of input string.
				177	char *out; // Beginning of output string.
Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame]	178	int out_end_idx; // One past last allowed output character.
				179	int recursion_depth; // For stack exhaustion prevention.
				180	int steps; // Cap how much work we'll do, regardless of depth.
				181	ParseState parse_state; // Backtrackable state copied for most frames.
				182	} State;
				183
				184	namespace {
				185	// Prevent deep recursion / stack exhaustion.
				186	// Also prevent unbounded handling of complex inputs.
				187	class ComplexityGuard {
				188	public:
				189	explicit ComplexityGuard(State *state) : state_(state) {
				190	++state->recursion_depth;
				191	++state->steps;
				192	}
				193	~ComplexityGuard() { --state_->recursion_depth; }
				194
				195	// 256 levels of recursion seems like a reasonable upper limit on depth.
				196	// 128 is not enough to demagle synthetic tests from demangle_unittest.txt:
				197	// "_ZaaZZZZ..." and "_ZaaZcvZcvZ..."
				198	static constexpr int kRecursionDepthLimit = 256;
				199
				200	// We're trying to pick a charitable upper-limit on how many parse steps are
				201	// necessary to handle something that a human could actually make use of.
				202	// This is mostly in place as a bound on how much work we'll do if we are
				203	// asked to demangle an mangled name from an untrusted source, so it should be
				204	// much larger than the largest expected symbol, but much smaller than the
				205	// amount of work we can do in, e.g., a second.
				206	//
				207	// Some real-world symbols from an arbitrary binary started failing between
				208	// 2^12 and 2^13, so we multiply the latter by an extra factor of 16 to set
				209	// the limit.
				210	//
				211	// Spending one second on 2^17 parse steps would require each step to take
				212	// 7.6us, or ~30000 clock cycles, so it's safe to say this can be done in
				213	// under a second.
				214	static constexpr int kParseStepsLimit = 1 << 17;
				215
				216	bool IsTooComplex() const {
				217	return state_->recursion_depth > kRecursionDepthLimit \|\|
				218	state_->steps > kParseStepsLimit;
				219	}
				220
				221	private:
				222	State *state_;
				223	};
				224	} // namespace
				225
				226	// We don't use strlen() in libc since it's not guaranteed to be async
				227	// signal safe.
				228	static size_t StrLen(const char *str) {
				229	size_t len = 0;
				230	while (*str != '\0') {
				231	++str;
				232	++len;
				233	}
				234	return len;
				235	}
				236
				237	// Returns true if "str" has at least "n" characters remaining.
				238	static bool AtLeastNumCharsRemaining(const char *str, int n) {
				239	for (int i = 0; i < n; ++i) {
				240	if (str[i] == '\0') {
				241	return false;
				242	}
				243	}
				244	return true;
				245	}
				246
				247	// Returns true if "str" has "prefix" as a prefix.
				248	static bool StrPrefix(const char str, const char prefix) {
				249	size_t i = 0;
				250	while (str[i] != '\0' && prefix[i] != '\0' && str[i] == prefix[i]) {
				251	++i;
				252	}
				253	return prefix[i] == '\0'; // Consumed everything in "prefix".
				254	}
				255
				256	static void InitState(State state, const char mangled, char *out,
				257	int out_size) {
				258	state->mangled_begin = mangled;
				259	state->out = out;
				260	state->out_end_idx = out_size;
				261	state->recursion_depth = 0;
				262	state->steps = 0;
				263
				264	state->parse_state.mangled_idx = 0;
				265	state->parse_state.out_cur_idx = 0;
				266	state->parse_state.prev_name_idx = 0;
				267	state->parse_state.prev_name_length = -1;
				268	state->parse_state.nest_level = -1;
				269	state->parse_state.append = true;
				270	}
				271
				272	static inline const char RemainingInput(State state) {
				273	return &state->mangled_begin[state->parse_state.mangled_idx];
				274	}
				275
				276	// Returns true and advances "mangled_idx" if we find "one_char_token"
				277	// at "mangled_idx" position. It is assumed that "one_char_token" does
				278	// not contain '\0'.
				279	static bool ParseOneCharToken(State *state, const char one_char_token) {
				280	ComplexityGuard guard(state);
				281	if (guard.IsTooComplex()) return false;
				282	if (RemainingInput(state)[0] == one_char_token) {
				283	++state->parse_state.mangled_idx;
				284	return true;
				285	}
				286	return false;
				287	}
				288
				289	// Returns true and advances "mangled_cur" if we find "two_char_token"
				290	// at "mangled_cur" position. It is assumed that "two_char_token" does
				291	// not contain '\0'.
				292	static bool ParseTwoCharToken(State state, const char two_char_token) {
				293	ComplexityGuard guard(state);
				294	if (guard.IsTooComplex()) return false;
				295	if (RemainingInput(state)[0] == two_char_token[0] &&
				296	RemainingInput(state)[1] == two_char_token[1]) {
				297	state->parse_state.mangled_idx += 2;
				298	return true;
				299	}
				300	return false;
				301	}
				302
				303	// Returns true and advances "mangled_cur" if we find any character in
				304	// "char_class" at "mangled_cur" position.
				305	static bool ParseCharClass(State state, const char char_class) {
				306	ComplexityGuard guard(state);
				307	if (guard.IsTooComplex()) return false;
				308	if (RemainingInput(state)[0] == '\0') {
				309	return false;
				310	}
				311	const char *p = char_class;
				312	for (; *p != '\0'; ++p) {
				313	if (RemainingInput(state)[0] == *p) {
				314	++state->parse_state.mangled_idx;
				315	return true;
				316	}
				317	}
				318	return false;
				319	}
				320
				321	static bool ParseDigit(State state, int digit) {
				322	char c = RemainingInput(state)[0];
				323	if (ParseCharClass(state, "0123456789")) {
				324	if (digit != nullptr) {
				325	*digit = c - '0';
				326	}
				327	return true;
				328	}
				329	return false;
				330	}
				331
				332	// This function is used for handling an optional non-terminal.
				333	static bool Optional(bool /status/) { return true; }
				334
				335	// This function is used for handling <non-terminal>+ syntax.
				336	typedef bool (ParseFunc)(State );
				337	static bool OneOrMore(ParseFunc parse_func, State *state) {
				338	if (parse_func(state)) {
				339	while (parse_func(state)) {
				340	}
				341	return true;
				342	}
				343	return false;
				344	}
				345
				346	// This function is used for handling <non-terminal>* syntax. The function
				347	// always returns true and must be followed by a termination token or a
				348	// terminating sequence not handled by parse_func (e.g.
				349	// ParseOneCharToken(state, 'E')).
				350	static bool ZeroOrMore(ParseFunc parse_func, State *state) {
				351	while (parse_func(state)) {
				352	}
				353	return true;
				354	}
				355
				356	// Append "str" at "out_cur_idx". If there is an overflow, out_cur_idx is
				357	// set to out_end_idx+1. The output string is ensured to
				358	// always terminate with '\0' as long as there is no overflow.
				359	static void Append(State state, const char const str, const int length) {
				360	for (int i = 0; i < length; ++i) {
				361	if (state->parse_state.out_cur_idx + 1 <
				362	state->out_end_idx) { // +1 for '\0'
				363	state->out[state->parse_state.out_cur_idx++] = str[i];
				364	} else {
				365	// signal overflow
				366	state->parse_state.out_cur_idx = state->out_end_idx + 1;
				367	break;
				368	}
				369	}
				370	if (state->parse_state.out_cur_idx < state->out_end_idx) {
				371	state->out[state->parse_state.out_cur_idx] =
				372	'\0'; // Terminate it with '\0'
				373	}
				374	}
				375
				376	// We don't use equivalents in libc to avoid locale issues.
				377	static bool IsLower(char c) { return c >= 'a' && c <= 'z'; }
				378
				379	static bool IsAlpha(char c) {
				380	return (c >= 'a' && c <= 'z') \|\| (c >= 'A' && c <= 'Z');
				381	}
				382
				383	static bool IsDigit(char c) { return c >= '0' && c <= '9'; }
				384
				385	// Returns true if "str" is a function clone suffix. These suffixes are used
				386	// by GCC 4.5.x and later versions (and our locally-modified version of GCC
				387	// 4.4.x) to indicate functions which have been cloned during optimization.
				388	// We treat any sequence (.<alpha>+.<digit>+)+ as a function clone suffix.
				389	static bool IsFunctionCloneSuffix(const char *str) {
				390	size_t i = 0;
				391	while (str[i] != '\0') {
				392	// Consume a single .<alpha>+.<digit>+ sequence.
				393	if (str[i] != '.' \|\| !IsAlpha(str[i + 1])) {
				394	return false;
				395	}
				396	i += 2;
				397	while (IsAlpha(str[i])) {
				398	++i;
				399	}
				400	if (str[i] != '.' \|\| !IsDigit(str[i + 1])) {
				401	return false;
				402	}
				403	i += 2;
				404	while (IsDigit(str[i])) {
				405	++i;
				406	}
				407	}
				408	return true; // Consumed everything in "str".
				409	}
				410
				411	static bool EndsWith(State *state, const char chr) {
				412	return state->parse_state.out_cur_idx > 0 &&
Austin Schuh	b4691e9	2020-12-31 12:37:18 -0800	[diff] [blame^]	413	state->parse_state.out_cur_idx < state->out_end_idx &&
Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame]	414	chr == state->out[state->parse_state.out_cur_idx - 1];
				415	}
				416
				417	// Append "str" with some tweaks, iff "append" state is true.
				418	static void MaybeAppendWithLength(State state, const char const str,
				419	const int length) {
				420	if (state->parse_state.append && length > 0) {
				421	// Append a space if the output buffer ends with '<' and "str"
				422	// starts with '<' to avoid <<<.
				423	if (str[0] == '<' && EndsWith(state, '<')) {
				424	Append(state, " ", 1);
				425	}
Austin Schuh	b4691e9	2020-12-31 12:37:18 -0800	[diff] [blame^]	426	// Remember the last identifier name for ctors/dtors,
				427	// but only if we haven't yet overflown the buffer.
				428	if (state->parse_state.out_cur_idx < state->out_end_idx &&
				429	(IsAlpha(str[0]) \|\| str[0] == '_')) {
Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame]	430	state->parse_state.prev_name_idx = state->parse_state.out_cur_idx;
				431	state->parse_state.prev_name_length = length;
				432	}
				433	Append(state, str, length);
				434	}
				435	}
				436
				437	// Appends a positive decimal number to the output if appending is enabled.
				438	static bool MaybeAppendDecimal(State *state, unsigned int val) {
				439	// Max {32-64}-bit unsigned int is 20 digits.
				440	constexpr size_t kMaxLength = 20;
				441	char buf[kMaxLength];
				442
				443	// We can't use itoa or sprintf as neither is specified to be
				444	// async-signal-safe.
				445	if (state->parse_state.append) {
				446	// We can't have a one-before-the-beginning pointer, so instead start with
				447	// one-past-the-end and manipulate one character before the pointer.
				448	char *p = &buf[kMaxLength];
				449	do { // val=0 is the only input that should write a leading zero digit.
				450	*--p = (val % 10) + '0';
				451	val /= 10;
				452	} while (p > buf && val != 0);
				453
				454	// 'p' landed on the last character we set. How convenient.
				455	Append(state, p, kMaxLength - (p - buf));
				456	}
				457
				458	return true;
				459	}
				460
				461	// A convenient wrapper around MaybeAppendWithLength().
				462	// Returns true so that it can be placed in "if" conditions.
				463	static bool MaybeAppend(State state, const char const str) {
				464	if (state->parse_state.append) {
				465	int length = StrLen(str);
				466	MaybeAppendWithLength(state, str, length);
				467	}
				468	return true;
				469	}
				470
				471	// This function is used for handling nested names.
				472	static bool EnterNestedName(State *state) {
				473	state->parse_state.nest_level = 0;
				474	return true;
				475	}
				476
				477	// This function is used for handling nested names.
				478	static bool LeaveNestedName(State *state, int16_t prev_value) {
				479	state->parse_state.nest_level = prev_value;
				480	return true;
				481	}
				482
				483	// Disable the append mode not to print function parameters, etc.
				484	static bool DisableAppend(State *state) {
				485	state->parse_state.append = false;
				486	return true;
				487	}
				488
				489	// Restore the append mode to the previous state.
				490	static bool RestoreAppend(State *state, bool prev_value) {
				491	state->parse_state.append = prev_value;
				492	return true;
				493	}
				494
				495	// Increase the nest level for nested names.
				496	static void MaybeIncreaseNestLevel(State *state) {
				497	if (state->parse_state.nest_level > -1) {
				498	++state->parse_state.nest_level;
				499	}
				500	}
				501
				502	// Appends :: for nested names if necessary.
				503	static void MaybeAppendSeparator(State *state) {
				504	if (state->parse_state.nest_level >= 1) {
				505	MaybeAppend(state, "::");
				506	}
				507	}
				508
				509	// Cancel the last separator if necessary.
				510	static void MaybeCancelLastSeparator(State *state) {
				511	if (state->parse_state.nest_level >= 1 && state->parse_state.append &&
				512	state->parse_state.out_cur_idx >= 2) {
				513	state->parse_state.out_cur_idx -= 2;
				514	state->out[state->parse_state.out_cur_idx] = '\0';
				515	}
				516	}
				517
				518	// Returns true if the identifier of the given length pointed to by
				519	// "mangled_cur" is anonymous namespace.
				520	static bool IdentifierIsAnonymousNamespace(State *state, int length) {
				521	// Returns true if "anon_prefix" is a proper prefix of "mangled_cur".
				522	static const char anon_prefix[] = "_GLOBAL__N_";
				523	return (length > static_cast<int>(sizeof(anon_prefix) - 1) &&
				524	StrPrefix(RemainingInput(state), anon_prefix));
				525	}
				526
				527	// Forward declarations of our parsing functions.
				528	static bool ParseMangledName(State *state);
				529	static bool ParseEncoding(State *state);
				530	static bool ParseName(State *state);
				531	static bool ParseUnscopedName(State *state);
				532	static bool ParseNestedName(State *state);
				533	static bool ParsePrefix(State *state);
				534	static bool ParseUnqualifiedName(State *state);
				535	static bool ParseSourceName(State *state);
				536	static bool ParseLocalSourceName(State *state);
				537	static bool ParseUnnamedTypeName(State *state);
				538	static bool ParseNumber(State state, int number_out);
				539	static bool ParseFloatNumber(State *state);
				540	static bool ParseSeqId(State *state);
				541	static bool ParseIdentifier(State *state, int length);
				542	static bool ParseOperatorName(State state, int arity);
				543	static bool ParseSpecialName(State *state);
				544	static bool ParseCallOffset(State *state);
				545	static bool ParseNVOffset(State *state);
				546	static bool ParseVOffset(State *state);
				547	static bool ParseCtorDtorName(State *state);
				548	static bool ParseDecltype(State *state);
				549	static bool ParseType(State *state);
				550	static bool ParseCVQualifiers(State *state);
				551	static bool ParseBuiltinType(State *state);
				552	static bool ParseFunctionType(State *state);
				553	static bool ParseBareFunctionType(State *state);
				554	static bool ParseClassEnumType(State *state);
				555	static bool ParseArrayType(State *state);
				556	static bool ParsePointerToMemberType(State *state);
				557	static bool ParseTemplateParam(State *state);
				558	static bool ParseTemplateTemplateParam(State *state);
				559	static bool ParseTemplateArgs(State *state);
				560	static bool ParseTemplateArg(State *state);
				561	static bool ParseBaseUnresolvedName(State *state);
				562	static bool ParseUnresolvedName(State *state);
				563	static bool ParseExpression(State *state);
				564	static bool ParseExprPrimary(State *state);
				565	static bool ParseExprCastValue(State *state);
				566	static bool ParseLocalName(State *state);
				567	static bool ParseLocalNameSuffix(State *state);
				568	static bool ParseDiscriminator(State *state);
				569	static bool ParseSubstitution(State *state, bool accept_std);
				570
				571	// Implementation note: the following code is a straightforward
				572	// translation of the Itanium C++ ABI defined in BNF with a couple of
				573	// exceptions.
				574	//
				575	// - Support GNU extensions not defined in the Itanium C++ ABI
				576	// - <prefix> and <template-prefix> are combined to avoid infinite loop
				577	// - Reorder patterns to shorten the code
				578	// - Reorder patterns to give greedier functions precedence
				579	// We'll mark "Less greedy than" for these cases in the code
				580	//
				581	// Each parsing function changes the parse state and returns true on
				582	// success, or returns false and doesn't change the parse state (note:
				583	// the parse-steps counter increases regardless of success or failure).
				584	// To ensure that the parse state isn't changed in the latter case, we
				585	// save the original state before we call multiple parsing functions
				586	// consecutively with &&, and restore it if unsuccessful. See
				587	// ParseEncoding() as an example of this convention. We follow the
				588	// convention throughout the code.
				589	//
				590	// Originally we tried to do demangling without following the full ABI
				591	// syntax but it turned out we needed to follow the full syntax to
				592	// parse complicated cases like nested template arguments. Note that
				593	// implementing a full-fledged demangler isn't trivial (libiberty's
				594	// cp-demangle.c has +4300 lines).
				595	//
				596	// Note that (foo) in <(foo) ...> is a modifier to be ignored.
				597	//
				598	// Reference:
				599	// - Itanium C++ ABI
				600	// <https://mentorembedded.github.io/cxx-abi/abi.html#mangling>
				601
				602	// <mangled-name> ::= _Z <encoding>
				603	static bool ParseMangledName(State *state) {
				604	ComplexityGuard guard(state);
				605	if (guard.IsTooComplex()) return false;
				606	return ParseTwoCharToken(state, "_Z") && ParseEncoding(state);
				607	}
				608
				609	// <encoding> ::= <(function) name> <bare-function-type>
				610	// ::= <(data) name>
				611	// ::= <special-name>
				612	static bool ParseEncoding(State *state) {
				613	ComplexityGuard guard(state);
				614	if (guard.IsTooComplex()) return false;
				615	// Implementing the first two productions together as <name>
				616	// [<bare-function-type>] avoids exponential blowup of backtracking.
				617	//
				618	// Since Optional(...) can't fail, there's no need to copy the state for
				619	// backtracking.
				620	if (ParseName(state) && Optional(ParseBareFunctionType(state))) {
				621	return true;
				622	}
				623
				624	if (ParseSpecialName(state)) {
				625	return true;
				626	}
				627	return false;
				628	}
				629
				630	// <name> ::= <nested-name>
				631	// ::= <unscoped-template-name> <template-args>
				632	// ::= <unscoped-name>
				633	// ::= <local-name>
				634	static bool ParseName(State *state) {
				635	ComplexityGuard guard(state);
				636	if (guard.IsTooComplex()) return false;
				637	if (ParseNestedName(state) \|\| ParseLocalName(state)) {
				638	return true;
				639	}
				640
				641	// We reorganize the productions to avoid re-parsing unscoped names.
				642	// - Inline <unscoped-template-name> productions:
				643	// <name> ::= <substitution> <template-args>
				644	// ::= <unscoped-name> <template-args>
				645	// ::= <unscoped-name>
				646	// - Merge the two productions that start with unscoped-name:
				647	// <name> ::= <unscoped-name> [<template-args>]
				648
				649	ParseState copy = state->parse_state;
				650	// "std<...>" isn't a valid name.
				651	if (ParseSubstitution(state, /accept_std=/false) &&
				652	ParseTemplateArgs(state)) {
				653	return true;
				654	}
				655	state->parse_state = copy;
				656
				657	// Note there's no need to restore state after this since only the first
				658	// subparser can fail.
				659	return ParseUnscopedName(state) && Optional(ParseTemplateArgs(state));
				660	}
				661
				662	// <unscoped-name> ::= <unqualified-name>
				663	// ::= St <unqualified-name>
				664	static bool ParseUnscopedName(State *state) {
				665	ComplexityGuard guard(state);
				666	if (guard.IsTooComplex()) return false;
				667	if (ParseUnqualifiedName(state)) {
				668	return true;
				669	}
				670
				671	ParseState copy = state->parse_state;
				672	if (ParseTwoCharToken(state, "St") && MaybeAppend(state, "std::") &&
				673	ParseUnqualifiedName(state)) {
				674	return true;
				675	}
				676	state->parse_state = copy;
				677	return false;
				678	}
				679
				680	// <ref-qualifer> ::= R // lvalue method reference qualifier
				681	// ::= O // rvalue method reference qualifier
				682	static inline bool ParseRefQualifier(State *state) {
				683	return ParseCharClass(state, "OR");
				684	}
				685
				686	// <nested-name> ::= N [<CV-qualifiers>] [<ref-qualifier>] <prefix>
				687	// <unqualified-name> E
				688	// ::= N [<CV-qualifiers>] [<ref-qualifier>] <template-prefix>
				689	// <template-args> E
				690	static bool ParseNestedName(State *state) {
				691	ComplexityGuard guard(state);
				692	if (guard.IsTooComplex()) return false;
				693	ParseState copy = state->parse_state;
				694	if (ParseOneCharToken(state, 'N') && EnterNestedName(state) &&
				695	Optional(ParseCVQualifiers(state)) &&
				696	Optional(ParseRefQualifier(state)) && ParsePrefix(state) &&
				697	LeaveNestedName(state, copy.nest_level) &&
				698	ParseOneCharToken(state, 'E')) {
				699	return true;
				700	}
				701	state->parse_state = copy;
				702	return false;
				703	}
				704
				705	// This part is tricky. If we literally translate them to code, we'll
				706	// end up infinite loop. Hence we merge them to avoid the case.
				707	//
				708	// <prefix> ::= <prefix> <unqualified-name>
				709	// ::= <template-prefix> <template-args>
				710	// ::= <template-param>
				711	// ::= <substitution>
				712	// ::= # empty
				713	// <template-prefix> ::= <prefix> <(template) unqualified-name>
				714	// ::= <template-param>
				715	// ::= <substitution>
				716	static bool ParsePrefix(State *state) {
				717	ComplexityGuard guard(state);
				718	if (guard.IsTooComplex()) return false;
				719	bool has_something = false;
				720	while (true) {
				721	MaybeAppendSeparator(state);
				722	if (ParseTemplateParam(state) \|\|
				723	ParseSubstitution(state, /accept_std=/true) \|\|
				724	ParseUnscopedName(state) \|\|
				725	(ParseOneCharToken(state, 'M') && ParseUnnamedTypeName(state))) {
				726	has_something = true;
				727	MaybeIncreaseNestLevel(state);
				728	continue;
				729	}
				730	MaybeCancelLastSeparator(state);
				731	if (has_something && ParseTemplateArgs(state)) {
				732	return ParsePrefix(state);
				733	} else {
				734	break;
				735	}
				736	}
				737	return true;
				738	}
				739
				740	// <unqualified-name> ::= <operator-name>
				741	// ::= <ctor-dtor-name>
				742	// ::= <source-name>
				743	// ::= <local-source-name> // GCC extension; see below.
				744	// ::= <unnamed-type-name>
				745	static bool ParseUnqualifiedName(State *state) {
				746	ComplexityGuard guard(state);
				747	if (guard.IsTooComplex()) return false;
				748	return (ParseOperatorName(state, nullptr) \|\| ParseCtorDtorName(state) \|\|
				749	ParseSourceName(state) \|\| ParseLocalSourceName(state) \|\|
				750	ParseUnnamedTypeName(state));
				751	}
				752
				753	// <source-name> ::= <positive length number> <identifier>
				754	static bool ParseSourceName(State *state) {
				755	ComplexityGuard guard(state);
				756	if (guard.IsTooComplex()) return false;
				757	ParseState copy = state->parse_state;
				758	int length = -1;
				759	if (ParseNumber(state, &length) && ParseIdentifier(state, length)) {
				760	return true;
				761	}
				762	state->parse_state = copy;
				763	return false;
				764	}
				765
				766	// <local-source-name> ::= L <source-name> [<discriminator>]
				767	//
				768	// References:
				769	// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=31775
				770	// https://gcc.gnu.org/viewcvs?view=rev&revision=124467
				771	static bool ParseLocalSourceName(State *state) {
				772	ComplexityGuard guard(state);
				773	if (guard.IsTooComplex()) return false;
				774	ParseState copy = state->parse_state;
				775	if (ParseOneCharToken(state, 'L') && ParseSourceName(state) &&
				776	Optional(ParseDiscriminator(state))) {
				777	return true;
				778	}
				779	state->parse_state = copy;
				780	return false;
				781	}
				782
				783	// <unnamed-type-name> ::= Ut [<(nonnegative) number>] _
				784	// ::= <closure-type-name>
				785	// <closure-type-name> ::= Ul <lambda-sig> E [<(nonnegative) number>] _
				786	// <lambda-sig> ::= <(parameter) type>+
				787	static bool ParseUnnamedTypeName(State *state) {
				788	ComplexityGuard guard(state);
				789	if (guard.IsTooComplex()) return false;
				790	ParseState copy = state->parse_state;
				791	// Type's 1-based index n is encoded as { "", n == 1; itoa(n-2), otherwise }.
				792	// Optionally parse the encoded value into 'which' and add 2 to get the index.
				793	int which = -1;
				794
				795	// Unnamed type local to function or class.
				796	if (ParseTwoCharToken(state, "Ut") && Optional(ParseNumber(state, &which)) &&
				797	which <= std::numeric_limits<int>::max() - 2 && // Don't overflow.
				798	ParseOneCharToken(state, '_')) {
				799	MaybeAppend(state, "{unnamed type#");
				800	MaybeAppendDecimal(state, 2 + which);
				801	MaybeAppend(state, "}");
				802	return true;
				803	}
				804	state->parse_state = copy;
				805
				806	// Closure type.
				807	which = -1;
				808	if (ParseTwoCharToken(state, "Ul") && DisableAppend(state) &&
				809	OneOrMore(ParseType, state) && RestoreAppend(state, copy.append) &&
				810	ParseOneCharToken(state, 'E') && Optional(ParseNumber(state, &which)) &&
				811	which <= std::numeric_limits<int>::max() - 2 && // Don't overflow.
				812	ParseOneCharToken(state, '_')) {
				813	MaybeAppend(state, "{lambda()#");
				814	MaybeAppendDecimal(state, 2 + which);
				815	MaybeAppend(state, "}");
				816	return true;
				817	}
				818	state->parse_state = copy;
				819
				820	return false;
				821	}
				822
				823	// <number> ::= [n] <non-negative decimal integer>
				824	// If "number_out" is non-null, then *number_out is set to the value of the
				825	// parsed number on success.
				826	static bool ParseNumber(State state, int number_out) {
				827	ComplexityGuard guard(state);
				828	if (guard.IsTooComplex()) return false;
				829	bool negative = false;
				830	if (ParseOneCharToken(state, 'n')) {
				831	negative = true;
				832	}
				833	const char *p = RemainingInput(state);
				834	uint64_t number = 0;
				835	for (; *p != '\0'; ++p) {
				836	if (IsDigit(*p)) {
				837	number = number * 10 + (*p - '0');
				838	} else {
				839	break;
				840	}
				841	}
				842	// Apply the sign with uint64_t arithmetic so overflows aren't UB. Gives
				843	// "incorrect" results for out-of-range inputs, but negative values only
				844	// appear for literals, which aren't printed.
				845	if (negative) {
				846	number = ~number + 1;
				847	}
				848	if (p != RemainingInput(state)) { // Conversion succeeded.
				849	state->parse_state.mangled_idx += p - RemainingInput(state);
				850	if (number_out != nullptr) {
				851	// Note: possibly truncate "number".
				852	*number_out = number;
				853	}
				854	return true;
				855	}
				856	return false;
				857	}
				858
				859	// Floating-point literals are encoded using a fixed-length lowercase
				860	// hexadecimal string.
				861	static bool ParseFloatNumber(State *state) {
				862	ComplexityGuard guard(state);
				863	if (guard.IsTooComplex()) return false;
				864	const char *p = RemainingInput(state);
				865	for (; *p != '\0'; ++p) {
				866	if (!IsDigit(p) && !(p >= 'a' && *p <= 'f')) {
				867	break;
				868	}
				869	}
				870	if (p != RemainingInput(state)) { // Conversion succeeded.
				871	state->parse_state.mangled_idx += p - RemainingInput(state);
				872	return true;
				873	}
				874	return false;
				875	}
				876
				877	// The <seq-id> is a sequence number in base 36,
				878	// using digits and upper case letters
				879	static bool ParseSeqId(State *state) {
				880	ComplexityGuard guard(state);
				881	if (guard.IsTooComplex()) return false;
				882	const char *p = RemainingInput(state);
				883	for (; *p != '\0'; ++p) {
				884	if (!IsDigit(p) && !(p >= 'A' && *p <= 'Z')) {
				885	break;
				886	}
				887	}
				888	if (p != RemainingInput(state)) { // Conversion succeeded.
				889	state->parse_state.mangled_idx += p - RemainingInput(state);
				890	return true;
				891	}
				892	return false;
				893	}
				894
				895	// <identifier> ::= <unqualified source code identifier> (of given length)
				896	static bool ParseIdentifier(State *state, int length) {
				897	ComplexityGuard guard(state);
				898	if (guard.IsTooComplex()) return false;
				899	if (length < 0 \|\| !AtLeastNumCharsRemaining(RemainingInput(state), length)) {
				900	return false;
				901	}
				902	if (IdentifierIsAnonymousNamespace(state, length)) {
				903	MaybeAppend(state, "(anonymous namespace)");
				904	} else {
				905	MaybeAppendWithLength(state, RemainingInput(state), length);
				906	}
				907	state->parse_state.mangled_idx += length;
				908	return true;
				909	}
				910
				911	// <operator-name> ::= nw, and other two letters cases
				912	// ::= cv <type> # (cast)
				913	// ::= v <digit> <source-name> # vendor extended operator
				914	static bool ParseOperatorName(State state, int arity) {
				915	ComplexityGuard guard(state);
				916	if (guard.IsTooComplex()) return false;
				917	if (!AtLeastNumCharsRemaining(RemainingInput(state), 2)) {
				918	return false;
				919	}
				920	// First check with "cv" (cast) case.
				921	ParseState copy = state->parse_state;
				922	if (ParseTwoCharToken(state, "cv") && MaybeAppend(state, "operator ") &&
				923	EnterNestedName(state) && ParseType(state) &&
				924	LeaveNestedName(state, copy.nest_level)) {
				925	if (arity != nullptr) {
				926	*arity = 1;
				927	}
				928	return true;
				929	}
				930	state->parse_state = copy;
				931
				932	// Then vendor extended operators.
				933	if (ParseOneCharToken(state, 'v') && ParseDigit(state, arity) &&
				934	ParseSourceName(state)) {
				935	return true;
				936	}
				937	state->parse_state = copy;
				938
				939	// Other operator names should start with a lower alphabet followed
				940	// by a lower/upper alphabet.
				941	if (!(IsLower(RemainingInput(state)[0]) &&
				942	IsAlpha(RemainingInput(state)[1]))) {
				943	return false;
				944	}
				945	// We may want to perform a binary search if we really need speed.
				946	const AbbrevPair *p;
				947	for (p = kOperatorList; p->abbrev != nullptr; ++p) {
				948	if (RemainingInput(state)[0] == p->abbrev[0] &&
				949	RemainingInput(state)[1] == p->abbrev[1]) {
				950	if (arity != nullptr) {
				951	*arity = p->arity;
				952	}
				953	MaybeAppend(state, "operator");
				954	if (IsLower(*p->real_name)) { // new, delete, etc.
				955	MaybeAppend(state, " ");
				956	}
				957	MaybeAppend(state, p->real_name);
				958	state->parse_state.mangled_idx += 2;
				959	return true;
				960	}
				961	}
				962	return false;
				963	}
				964
				965	// <special-name> ::= TV <type>
				966	// ::= TT <type>
				967	// ::= TI <type>
				968	// ::= TS <type>
Austin Schuh	b4691e9	2020-12-31 12:37:18 -0800	[diff] [blame^]	969	// ::= TH <type> # thread-local
Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame]	970	// ::= Tc <call-offset> <call-offset> <(base) encoding>
				971	// ::= GV <(object) name>
				972	// ::= T <call-offset> <(base) encoding>
				973	// G++ extensions:
				974	// ::= TC <type> <(offset) number> _ <(base) type>
				975	// ::= TF <type>
				976	// ::= TJ <type>
				977	// ::= GR <name>
				978	// ::= GA <encoding>
				979	// ::= Th <call-offset> <(base) encoding>
				980	// ::= Tv <call-offset> <(base) encoding>
				981	//
				982	// Note: we don't care much about them since they don't appear in
				983	// stack traces. The are special data.
				984	static bool ParseSpecialName(State *state) {
				985	ComplexityGuard guard(state);
				986	if (guard.IsTooComplex()) return false;
				987	ParseState copy = state->parse_state;
Austin Schuh	b4691e9	2020-12-31 12:37:18 -0800	[diff] [blame^]	988	if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "VTISH") &&
Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame]	989	ParseType(state)) {
				990	return true;
				991	}
				992	state->parse_state = copy;
				993
				994	if (ParseTwoCharToken(state, "Tc") && ParseCallOffset(state) &&
				995	ParseCallOffset(state) && ParseEncoding(state)) {
				996	return true;
				997	}
				998	state->parse_state = copy;
				999
				1000	if (ParseTwoCharToken(state, "GV") && ParseName(state)) {
				1001	return true;
				1002	}
				1003	state->parse_state = copy;
				1004
				1005	if (ParseOneCharToken(state, 'T') && ParseCallOffset(state) &&
				1006	ParseEncoding(state)) {
				1007	return true;
				1008	}
				1009	state->parse_state = copy;
				1010
				1011	// G++ extensions
				1012	if (ParseTwoCharToken(state, "TC") && ParseType(state) &&
				1013	ParseNumber(state, nullptr) && ParseOneCharToken(state, '_') &&
				1014	DisableAppend(state) && ParseType(state)) {
				1015	RestoreAppend(state, copy.append);
				1016	return true;
				1017	}
				1018	state->parse_state = copy;
				1019
				1020	if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "FJ") &&
				1021	ParseType(state)) {
				1022	return true;
				1023	}
				1024	state->parse_state = copy;
				1025
				1026	if (ParseTwoCharToken(state, "GR") && ParseName(state)) {
				1027	return true;
				1028	}
				1029	state->parse_state = copy;
				1030
				1031	if (ParseTwoCharToken(state, "GA") && ParseEncoding(state)) {
				1032	return true;
				1033	}
				1034	state->parse_state = copy;
				1035
				1036	if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "hv") &&
				1037	ParseCallOffset(state) && ParseEncoding(state)) {
				1038	return true;
				1039	}
				1040	state->parse_state = copy;
				1041	return false;
				1042	}
				1043
				1044	// <call-offset> ::= h <nv-offset> _
				1045	// ::= v <v-offset> _
				1046	static bool ParseCallOffset(State *state) {
				1047	ComplexityGuard guard(state);
				1048	if (guard.IsTooComplex()) return false;
				1049	ParseState copy = state->parse_state;
				1050	if (ParseOneCharToken(state, 'h') && ParseNVOffset(state) &&
				1051	ParseOneCharToken(state, '_')) {
				1052	return true;
				1053	}
				1054	state->parse_state = copy;
				1055
				1056	if (ParseOneCharToken(state, 'v') && ParseVOffset(state) &&
				1057	ParseOneCharToken(state, '_')) {
				1058	return true;
				1059	}
				1060	state->parse_state = copy;
				1061
				1062	return false;
				1063	}
				1064
				1065	// <nv-offset> ::= <(offset) number>
				1066	static bool ParseNVOffset(State *state) {
				1067	ComplexityGuard guard(state);
				1068	if (guard.IsTooComplex()) return false;
				1069	return ParseNumber(state, nullptr);
				1070	}
				1071
				1072	// <v-offset> ::= <(offset) number> _ <(virtual offset) number>
				1073	static bool ParseVOffset(State *state) {
				1074	ComplexityGuard guard(state);
				1075	if (guard.IsTooComplex()) return false;
				1076	ParseState copy = state->parse_state;
				1077	if (ParseNumber(state, nullptr) && ParseOneCharToken(state, '_') &&
				1078	ParseNumber(state, nullptr)) {
				1079	return true;
				1080	}
				1081	state->parse_state = copy;
				1082	return false;
				1083	}
				1084
Austin Schuh	b4691e9	2020-12-31 12:37:18 -0800	[diff] [blame^]	1085	// <ctor-dtor-name> ::= C1 \| C2 \| C3 \| CI1 <base-class-type> \| CI2
				1086	// <base-class-type>
Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame]	1087	// ::= D0 \| D1 \| D2
				1088	// # GCC extensions: "unified" constructor/destructor. See
Austin Schuh	b4691e9	2020-12-31 12:37:18 -0800	[diff] [blame^]	1089	// #
				1090	// https://github.com/gcc-mirror/gcc/blob/7ad17b583c3643bd4557f29b8391ca7ef08391f5/gcc/cp/mangle.c#L1847
Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame]	1091	// ::= C4 \| D4
				1092	static bool ParseCtorDtorName(State *state) {
				1093	ComplexityGuard guard(state);
				1094	if (guard.IsTooComplex()) return false;
				1095	ParseState copy = state->parse_state;
Austin Schuh	b4691e9	2020-12-31 12:37:18 -0800	[diff] [blame^]	1096	if (ParseOneCharToken(state, 'C')) {
				1097	if (ParseCharClass(state, "1234")) {
				1098	const char *const prev_name =
				1099	state->out + state->parse_state.prev_name_idx;
				1100	MaybeAppendWithLength(state, prev_name,
				1101	state->parse_state.prev_name_length);
				1102	return true;
				1103	} else if (ParseOneCharToken(state, 'I') && ParseCharClass(state, "12") &&
				1104	ParseClassEnumType(state)) {
				1105	return true;
				1106	}
Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame]	1107	}
				1108	state->parse_state = copy;
				1109
				1110	if (ParseOneCharToken(state, 'D') && ParseCharClass(state, "0124")) {
				1111	const char *const prev_name = state->out + state->parse_state.prev_name_idx;
				1112	MaybeAppend(state, "~");
				1113	MaybeAppendWithLength(state, prev_name,
				1114	state->parse_state.prev_name_length);
				1115	return true;
				1116	}
				1117	state->parse_state = copy;
				1118	return false;
				1119	}
				1120
				1121	// <decltype> ::= Dt <expression> E # decltype of an id-expression or class
				1122	// # member access (C++0x)
				1123	// ::= DT <expression> E # decltype of an expression (C++0x)
				1124	static bool ParseDecltype(State *state) {
				1125	ComplexityGuard guard(state);
				1126	if (guard.IsTooComplex()) return false;
				1127
				1128	ParseState copy = state->parse_state;
				1129	if (ParseOneCharToken(state, 'D') && ParseCharClass(state, "tT") &&
				1130	ParseExpression(state) && ParseOneCharToken(state, 'E')) {
				1131	return true;
				1132	}
				1133	state->parse_state = copy;
				1134
				1135	return false;
				1136	}
				1137
				1138	// <type> ::= <CV-qualifiers> <type>
				1139	// ::= P <type> # pointer-to
				1140	// ::= R <type> # reference-to
				1141	// ::= O <type> # rvalue reference-to (C++0x)
				1142	// ::= C <type> # complex pair (C 2000)
				1143	// ::= G <type> # imaginary (C 2000)
				1144	// ::= U <source-name> <type> # vendor extended type qualifier
				1145	// ::= <builtin-type>
				1146	// ::= <function-type>
				1147	// ::= <class-enum-type> # note: just an alias for <name>
				1148	// ::= <array-type>
				1149	// ::= <pointer-to-member-type>
				1150	// ::= <template-template-param> <template-args>
				1151	// ::= <template-param>
				1152	// ::= <decltype>
				1153	// ::= <substitution>
				1154	// ::= Dp <type> # pack expansion of (C++0x)
Austin Schuh	b4691e9	2020-12-31 12:37:18 -0800	[diff] [blame^]	1155	// ::= Dv <num-elems> _ # GNU vector extension
Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame]	1156	//
				1157	static bool ParseType(State *state) {
				1158	ComplexityGuard guard(state);
				1159	if (guard.IsTooComplex()) return false;
				1160	ParseState copy = state->parse_state;
				1161
				1162	// We should check CV-qualifers, and PRGC things first.
				1163	//
				1164	// CV-qualifiers overlap with some operator names, but an operator name is not
				1165	// valid as a type. To avoid an ambiguity that can lead to exponential time
				1166	// complexity, refuse to backtrack the CV-qualifiers.
				1167	//
				1168	// _Z4aoeuIrMvvE
				1169	// => _Z 4aoeuI rM v v E
				1170	// aoeu<operator%=, void, void>
				1171	// => _Z 4aoeuI r Mv v E
				1172	// aoeu<void void::* restrict>
				1173	//
				1174	// By consuming the CV-qualifiers first, the former parse is disabled.
				1175	if (ParseCVQualifiers(state)) {
				1176	const bool result = ParseType(state);
				1177	if (!result) state->parse_state = copy;
				1178	return result;
				1179	}
				1180	state->parse_state = copy;
				1181
				1182	// Similarly, these tag characters can overlap with other <name>s resulting in
				1183	// two different parse prefixes that land on <template-args> in the same
				1184	// place, such as "C3r1xI...". So, disable the "ctor-name = C3" parse by
				1185	// refusing to backtrack the tag characters.
				1186	if (ParseCharClass(state, "OPRCG")) {
				1187	const bool result = ParseType(state);
				1188	if (!result) state->parse_state = copy;
				1189	return result;
				1190	}
				1191	state->parse_state = copy;
				1192
				1193	if (ParseTwoCharToken(state, "Dp") && ParseType(state)) {
				1194	return true;
				1195	}
				1196	state->parse_state = copy;
				1197
Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame]	1198	if (ParseOneCharToken(state, 'U') && ParseSourceName(state) &&
				1199	ParseType(state)) {
				1200	return true;
				1201	}
				1202	state->parse_state = copy;
				1203
				1204	if (ParseBuiltinType(state) \|\| ParseFunctionType(state) \|\|
				1205	ParseClassEnumType(state) \|\| ParseArrayType(state) \|\|
				1206	ParsePointerToMemberType(state) \|\| ParseDecltype(state) \|\|
				1207	// "std" on its own isn't a type.
				1208	ParseSubstitution(state, /accept_std=/false)) {
				1209	return true;
				1210	}
				1211
				1212	if (ParseTemplateTemplateParam(state) && ParseTemplateArgs(state)) {
				1213	return true;
				1214	}
				1215	state->parse_state = copy;
				1216
				1217	// Less greedy than <template-template-param> <template-args>.
				1218	if (ParseTemplateParam(state)) {
				1219	return true;
				1220	}
				1221
Austin Schuh	b4691e9	2020-12-31 12:37:18 -0800	[diff] [blame^]	1222	if (ParseTwoCharToken(state, "Dv") && ParseNumber(state, nullptr) &&
				1223	ParseOneCharToken(state, '_')) {
				1224	return true;
				1225	}
				1226	state->parse_state = copy;
				1227
Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame]	1228	return false;
				1229	}
				1230
				1231	// <CV-qualifiers> ::= [r] [V] [K]
				1232	// We don't allow empty <CV-qualifiers> to avoid infinite loop in
				1233	// ParseType().
				1234	static bool ParseCVQualifiers(State *state) {
				1235	ComplexityGuard guard(state);
				1236	if (guard.IsTooComplex()) return false;
				1237	int num_cv_qualifiers = 0;
				1238	num_cv_qualifiers += ParseOneCharToken(state, 'r');
				1239	num_cv_qualifiers += ParseOneCharToken(state, 'V');
				1240	num_cv_qualifiers += ParseOneCharToken(state, 'K');
				1241	return num_cv_qualifiers > 0;
				1242	}
				1243
Austin Schuh	b4691e9	2020-12-31 12:37:18 -0800	[diff] [blame^]	1244	// <builtin-type> ::= v, etc. # single-character builtin types
Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame]	1245	// ::= u <source-name>
Austin Schuh	b4691e9	2020-12-31 12:37:18 -0800	[diff] [blame^]	1246	// ::= Dd, etc. # two-character builtin types
				1247	//
				1248	// Not supported:
				1249	// ::= DF <number> _ # _FloatN (N bits)
				1250	//
Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame]	1251	static bool ParseBuiltinType(State *state) {
				1252	ComplexityGuard guard(state);
				1253	if (guard.IsTooComplex()) return false;
				1254	const AbbrevPair *p;
				1255	for (p = kBuiltinTypeList; p->abbrev != nullptr; ++p) {
Austin Schuh	b4691e9	2020-12-31 12:37:18 -0800	[diff] [blame^]	1256	// Guaranteed only 1- or 2-character strings in kBuiltinTypeList.
				1257	if (p->abbrev[1] == '\0') {
				1258	if (ParseOneCharToken(state, p->abbrev[0])) {
				1259	MaybeAppend(state, p->real_name);
				1260	return true;
				1261	}
				1262	} else if (p->abbrev[2] == '\0' && ParseTwoCharToken(state, p->abbrev)) {
Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame]	1263	MaybeAppend(state, p->real_name);
Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame]	1264	return true;
				1265	}
				1266	}
				1267
				1268	ParseState copy = state->parse_state;
				1269	if (ParseOneCharToken(state, 'u') && ParseSourceName(state)) {
				1270	return true;
				1271	}
				1272	state->parse_state = copy;
				1273	return false;
				1274	}
				1275
Austin Schuh	b4691e9	2020-12-31 12:37:18 -0800	[diff] [blame^]	1276	// <exception-spec> ::= Do # non-throwing
				1277	// exception-specification (e.g.,
				1278	// noexcept, throw())
				1279	// ::= DO <expression> E # computed (instantiation-dependent)
				1280	// noexcept
				1281	// ::= Dw <type>+ E # dynamic exception specification
				1282	// with instantiation-dependent types
				1283	static bool ParseExceptionSpec(State *state) {
				1284	ComplexityGuard guard(state);
				1285	if (guard.IsTooComplex()) return false;
				1286
				1287	if (ParseTwoCharToken(state, "Do")) return true;
				1288
				1289	ParseState copy = state->parse_state;
				1290	if (ParseTwoCharToken(state, "DO") && ParseExpression(state) &&
				1291	ParseOneCharToken(state, 'E')) {
				1292	return true;
				1293	}
				1294	state->parse_state = copy;
				1295	if (ParseTwoCharToken(state, "Dw") && OneOrMore(ParseType, state) &&
				1296	ParseOneCharToken(state, 'E')) {
				1297	return true;
				1298	}
				1299	state->parse_state = copy;
				1300
				1301	return false;
				1302	}
				1303
				1304	// <function-type> ::= [exception-spec] F [Y] <bare-function-type> [O] E
Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame]	1305	static bool ParseFunctionType(State *state) {
				1306	ComplexityGuard guard(state);
				1307	if (guard.IsTooComplex()) return false;
				1308	ParseState copy = state->parse_state;
Austin Schuh	b4691e9	2020-12-31 12:37:18 -0800	[diff] [blame^]	1309	if (Optional(ParseExceptionSpec(state)) && ParseOneCharToken(state, 'F') &&
Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame]	1310	Optional(ParseOneCharToken(state, 'Y')) && ParseBareFunctionType(state) &&
Austin Schuh	b4691e9	2020-12-31 12:37:18 -0800	[diff] [blame^]	1311	Optional(ParseOneCharToken(state, 'O')) &&
Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame]	1312	ParseOneCharToken(state, 'E')) {
				1313	return true;
				1314	}
				1315	state->parse_state = copy;
				1316	return false;
				1317	}
				1318
				1319	// <bare-function-type> ::= <(signature) type>+
				1320	static bool ParseBareFunctionType(State *state) {
				1321	ComplexityGuard guard(state);
				1322	if (guard.IsTooComplex()) return false;
				1323	ParseState copy = state->parse_state;
				1324	DisableAppend(state);
				1325	if (OneOrMore(ParseType, state)) {
				1326	RestoreAppend(state, copy.append);
				1327	MaybeAppend(state, "()");
				1328	return true;
				1329	}
				1330	state->parse_state = copy;
				1331	return false;
				1332	}
				1333
				1334	// <class-enum-type> ::= <name>
				1335	static bool ParseClassEnumType(State *state) {
				1336	ComplexityGuard guard(state);
				1337	if (guard.IsTooComplex()) return false;
				1338	return ParseName(state);
				1339	}
				1340
				1341	// <array-type> ::= A <(positive dimension) number> _ <(element) type>
				1342	// ::= A [<(dimension) expression>] _ <(element) type>
				1343	static bool ParseArrayType(State *state) {
				1344	ComplexityGuard guard(state);
				1345	if (guard.IsTooComplex()) return false;
				1346	ParseState copy = state->parse_state;
				1347	if (ParseOneCharToken(state, 'A') && ParseNumber(state, nullptr) &&
				1348	ParseOneCharToken(state, '_') && ParseType(state)) {
				1349	return true;
				1350	}
				1351	state->parse_state = copy;
				1352
				1353	if (ParseOneCharToken(state, 'A') && Optional(ParseExpression(state)) &&
				1354	ParseOneCharToken(state, '_') && ParseType(state)) {
				1355	return true;
				1356	}
				1357	state->parse_state = copy;
				1358	return false;
				1359	}
				1360
				1361	// <pointer-to-member-type> ::= M <(class) type> <(member) type>
				1362	static bool ParsePointerToMemberType(State *state) {
				1363	ComplexityGuard guard(state);
				1364	if (guard.IsTooComplex()) return false;
				1365	ParseState copy = state->parse_state;
				1366	if (ParseOneCharToken(state, 'M') && ParseType(state) && ParseType(state)) {
				1367	return true;
				1368	}
				1369	state->parse_state = copy;
				1370	return false;
				1371	}
				1372
				1373	// <template-param> ::= T_
				1374	// ::= T <parameter-2 non-negative number> _
				1375	static bool ParseTemplateParam(State *state) {
				1376	ComplexityGuard guard(state);
				1377	if (guard.IsTooComplex()) return false;
				1378	if (ParseTwoCharToken(state, "T_")) {
				1379	MaybeAppend(state, "?"); // We don't support template substitutions.
				1380	return true;
				1381	}
				1382
				1383	ParseState copy = state->parse_state;
				1384	if (ParseOneCharToken(state, 'T') && ParseNumber(state, nullptr) &&
				1385	ParseOneCharToken(state, '_')) {
				1386	MaybeAppend(state, "?"); // We don't support template substitutions.
				1387	return true;
				1388	}
				1389	state->parse_state = copy;
				1390	return false;
				1391	}
				1392
				1393	// <template-template-param> ::= <template-param>
				1394	// ::= <substitution>
				1395	static bool ParseTemplateTemplateParam(State *state) {
				1396	ComplexityGuard guard(state);
				1397	if (guard.IsTooComplex()) return false;
				1398	return (ParseTemplateParam(state) \|\|
				1399	// "std" on its own isn't a template.
				1400	ParseSubstitution(state, /accept_std=/false));
				1401	}
				1402
				1403	// <template-args> ::= I <template-arg>+ E
				1404	static bool ParseTemplateArgs(State *state) {
				1405	ComplexityGuard guard(state);
				1406	if (guard.IsTooComplex()) return false;
				1407	ParseState copy = state->parse_state;
				1408	DisableAppend(state);
				1409	if (ParseOneCharToken(state, 'I') && OneOrMore(ParseTemplateArg, state) &&
				1410	ParseOneCharToken(state, 'E')) {
				1411	RestoreAppend(state, copy.append);
				1412	MaybeAppend(state, "<>");
				1413	return true;
				1414	}
				1415	state->parse_state = copy;
				1416	return false;
				1417	}
				1418
				1419	// <template-arg> ::= <type>
				1420	// ::= <expr-primary>
				1421	// ::= J <template-arg>* E # argument pack
				1422	// ::= X <expression> E
				1423	static bool ParseTemplateArg(State *state) {
				1424	ComplexityGuard guard(state);
				1425	if (guard.IsTooComplex()) return false;
				1426	ParseState copy = state->parse_state;
				1427	if (ParseOneCharToken(state, 'J') && ZeroOrMore(ParseTemplateArg, state) &&
				1428	ParseOneCharToken(state, 'E')) {
				1429	return true;
				1430	}
				1431	state->parse_state = copy;
				1432
				1433	// There can be significant overlap between the following leading to
				1434	// exponential backtracking:
				1435	//
				1436	// <expr-primary> ::= L <type> <expr-cast-value> E
				1437	// e.g. L 2xxIvE 1 E
				1438	// <type> ==> <local-source-name> <template-args>
				1439	// e.g. L 2xx IvE
				1440	//
				1441	// This means parsing an entire <type> twice, and <type> can contain
				1442	// <template-arg>, so this can generate exponential backtracking. There is
				1443	// only overlap when the remaining input starts with "L <source-name>", so
				1444	// parse all cases that can start this way jointly to share the common prefix.
				1445	//
				1446	// We have:
				1447	//
				1448	// <template-arg> ::= <type>
				1449	// ::= <expr-primary>
				1450	//
				1451	// First, drop all the productions of <type> that must start with something
				1452	// other than 'L'. All that's left is <class-enum-type>; inline it.
				1453	//
				1454	// <type> ::= <nested-name> # starts with 'N'
				1455	// ::= <unscoped-name>
				1456	// ::= <unscoped-template-name> <template-args>
				1457	// ::= <local-name> # starts with 'Z'
				1458	//
				1459	// Drop and inline again:
				1460	//
				1461	// <type> ::= <unscoped-name>
				1462	// ::= <unscoped-name> <template-args>
				1463	// ::= <substitution> <template-args> # starts with 'S'
				1464	//
				1465	// Merge the first two, inline <unscoped-name>, drop last:
				1466	//
				1467	// <type> ::= <unqualified-name> [<template-args>]
				1468	// ::= St <unqualified-name> [<template-args>] # starts with 'S'
				1469	//
				1470	// Drop and inline:
				1471	//
				1472	// <type> ::= <operator-name> [<template-args>] # starts with lowercase
				1473	// ::= <ctor-dtor-name> [<template-args>] # starts with 'C' or 'D'
				1474	// ::= <source-name> [<template-args>] # starts with digit
				1475	// ::= <local-source-name> [<template-args>]
				1476	// ::= <unnamed-type-name> [<template-args>] # starts with 'U'
				1477	//
				1478	// One more time:
				1479	//
				1480	// <type> ::= L <source-name> [<template-args>]
				1481	//
				1482	// Likewise with <expr-primary>:
				1483	//
				1484	// <expr-primary> ::= L <type> <expr-cast-value> E
				1485	// ::= LZ <encoding> E # cannot overlap; drop
				1486	// ::= L <mangled_name> E # cannot overlap; drop
				1487	//
				1488	// By similar reasoning as shown above, the only <type>s starting with
				1489	// <source-name> are "<source-name> [<template-args>]". Inline this.
				1490	//
				1491	// <expr-primary> ::= L <source-name> [<template-args>] <expr-cast-value> E
				1492	//
				1493	// Now inline both of these into <template-arg>:
				1494	//
				1495	// <template-arg> ::= L <source-name> [<template-args>]
				1496	// ::= L <source-name> [<template-args>] <expr-cast-value> E
				1497	//
				1498	// Merge them and we're done:
				1499	// <template-arg>
				1500	// ::= L <source-name> [<template-args>] [<expr-cast-value> E]
				1501	if (ParseLocalSourceName(state) && Optional(ParseTemplateArgs(state))) {
				1502	copy = state->parse_state;
				1503	if (ParseExprCastValue(state) && ParseOneCharToken(state, 'E')) {
				1504	return true;
				1505	}
				1506	state->parse_state = copy;
				1507	return true;
				1508	}
				1509
				1510	// Now that the overlapping cases can't reach this code, we can safely call
				1511	// both of these.
				1512	if (ParseType(state) \|\| ParseExprPrimary(state)) {
				1513	return true;
				1514	}
				1515	state->parse_state = copy;
				1516
				1517	if (ParseOneCharToken(state, 'X') && ParseExpression(state) &&
				1518	ParseOneCharToken(state, 'E')) {
				1519	return true;
				1520	}
				1521	state->parse_state = copy;
				1522	return false;
				1523	}
				1524
				1525	// <unresolved-type> ::= <template-param> [<template-args>]
				1526	// ::= <decltype>
				1527	// ::= <substitution>
				1528	static inline bool ParseUnresolvedType(State *state) {
				1529	// No ComplexityGuard because we don't copy the state in this stack frame.
				1530	return (ParseTemplateParam(state) && Optional(ParseTemplateArgs(state))) \|\|
				1531	ParseDecltype(state) \|\| ParseSubstitution(state, /accept_std=/false);
				1532	}
				1533
				1534	// <simple-id> ::= <source-name> [<template-args>]
				1535	static inline bool ParseSimpleId(State *state) {
				1536	// No ComplexityGuard because we don't copy the state in this stack frame.
				1537
				1538	// Note: <simple-id> cannot be followed by a parameter pack; see comment in
				1539	// ParseUnresolvedType.
				1540	return ParseSourceName(state) && Optional(ParseTemplateArgs(state));
				1541	}
				1542
				1543	// <base-unresolved-name> ::= <source-name> [<template-args>]
				1544	// ::= on <operator-name> [<template-args>]
				1545	// ::= dn <destructor-name>
				1546	static bool ParseBaseUnresolvedName(State *state) {
				1547	ComplexityGuard guard(state);
				1548	if (guard.IsTooComplex()) return false;
				1549
				1550	if (ParseSimpleId(state)) {
				1551	return true;
				1552	}
				1553
				1554	ParseState copy = state->parse_state;
				1555	if (ParseTwoCharToken(state, "on") && ParseOperatorName(state, nullptr) &&
				1556	Optional(ParseTemplateArgs(state))) {
				1557	return true;
				1558	}
				1559	state->parse_state = copy;
				1560
				1561	if (ParseTwoCharToken(state, "dn") &&
				1562	(ParseUnresolvedType(state) \|\| ParseSimpleId(state))) {
				1563	return true;
				1564	}
				1565	state->parse_state = copy;
				1566
				1567	return false;
				1568	}
				1569
				1570	// <unresolved-name> ::= [gs] <base-unresolved-name>
				1571	// ::= sr <unresolved-type> <base-unresolved-name>
				1572	// ::= srN <unresolved-type> <unresolved-qualifier-level>+ E
				1573	// <base-unresolved-name>
				1574	// ::= [gs] sr <unresolved-qualifier-level>+ E
				1575	// <base-unresolved-name>
				1576	static bool ParseUnresolvedName(State *state) {
				1577	ComplexityGuard guard(state);
				1578	if (guard.IsTooComplex()) return false;
				1579
				1580	ParseState copy = state->parse_state;
				1581	if (Optional(ParseTwoCharToken(state, "gs")) &&
				1582	ParseBaseUnresolvedName(state)) {
				1583	return true;
				1584	}
				1585	state->parse_state = copy;
				1586
				1587	if (ParseTwoCharToken(state, "sr") && ParseUnresolvedType(state) &&
				1588	ParseBaseUnresolvedName(state)) {
				1589	return true;
				1590	}
				1591	state->parse_state = copy;
				1592
				1593	if (ParseTwoCharToken(state, "sr") && ParseOneCharToken(state, 'N') &&
				1594	ParseUnresolvedType(state) &&
				1595	OneOrMore(/* <unresolved-qualifier-level> ::= */ ParseSimpleId, state) &&
				1596	ParseOneCharToken(state, 'E') && ParseBaseUnresolvedName(state)) {
				1597	return true;
				1598	}
				1599	state->parse_state = copy;
				1600
				1601	if (Optional(ParseTwoCharToken(state, "gs")) &&
				1602	ParseTwoCharToken(state, "sr") &&
				1603	OneOrMore(/* <unresolved-qualifier-level> ::= */ ParseSimpleId, state) &&
				1604	ParseOneCharToken(state, 'E') && ParseBaseUnresolvedName(state)) {
				1605	return true;
				1606	}
				1607	state->parse_state = copy;
				1608
				1609	return false;
				1610	}
				1611
				1612	// <expression> ::= <1-ary operator-name> <expression>
				1613	// ::= <2-ary operator-name> <expression> <expression>
				1614	// ::= <3-ary operator-name> <expression> <expression> <expression>
				1615	// ::= cl <expression>+ E
				1616	// ::= cv <type> <expression> # type (expression)
				1617	// ::= cv <type> _ <expression>* E # type (expr-list)
				1618	// ::= st <type>
				1619	// ::= <template-param>
				1620	// ::= <function-param>
				1621	// ::= <expr-primary>
				1622	// ::= dt <expression> <unresolved-name> # expr.name
				1623	// ::= pt <expression> <unresolved-name> # expr->name
				1624	// ::= sp <expression> # argument pack expansion
				1625	// ::= sr <type> <unqualified-name> <template-args>
				1626	// ::= sr <type> <unqualified-name>
				1627	// <function-param> ::= fp <(top-level) CV-qualifiers> _
				1628	// ::= fp <(top-level) CV-qualifiers> <number> _
				1629	// ::= fL <number> p <(top-level) CV-qualifiers> _
				1630	// ::= fL <number> p <(top-level) CV-qualifiers> <number> _
				1631	static bool ParseExpression(State *state) {
				1632	ComplexityGuard guard(state);
				1633	if (guard.IsTooComplex()) return false;
				1634	if (ParseTemplateParam(state) \|\| ParseExprPrimary(state)) {
				1635	return true;
				1636	}
				1637
				1638	// Object/function call expression.
				1639	ParseState copy = state->parse_state;
				1640	if (ParseTwoCharToken(state, "cl") && OneOrMore(ParseExpression, state) &&
				1641	ParseOneCharToken(state, 'E')) {
				1642	return true;
				1643	}
				1644	state->parse_state = copy;
				1645
				1646	// Function-param expression (level 0).
				1647	if (ParseTwoCharToken(state, "fp") && Optional(ParseCVQualifiers(state)) &&
				1648	Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) {
				1649	return true;
				1650	}
				1651	state->parse_state = copy;
				1652
				1653	// Function-param expression (level 1+).
				1654	if (ParseTwoCharToken(state, "fL") && Optional(ParseNumber(state, nullptr)) &&
				1655	ParseOneCharToken(state, 'p') && Optional(ParseCVQualifiers(state)) &&
				1656	Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) {
				1657	return true;
				1658	}
				1659	state->parse_state = copy;
				1660
				1661	// Parse the conversion expressions jointly to avoid re-parsing the <type> in
				1662	// their common prefix. Parsed as:
				1663	// <expression> ::= cv <type> <conversion-args>
				1664	// <conversion-args> ::= _ <expression>* E
				1665	// ::= <expression>
				1666	//
				1667	// Also don't try ParseOperatorName after seeing "cv", since ParseOperatorName
				1668	// also needs to accept "cv <type>" in other contexts.
				1669	if (ParseTwoCharToken(state, "cv")) {
				1670	if (ParseType(state)) {
				1671	ParseState copy2 = state->parse_state;
				1672	if (ParseOneCharToken(state, '_') && ZeroOrMore(ParseExpression, state) &&
				1673	ParseOneCharToken(state, 'E')) {
				1674	return true;
				1675	}
				1676	state->parse_state = copy2;
				1677	if (ParseExpression(state)) {
				1678	return true;
				1679	}
				1680	}
				1681	} else {
				1682	// Parse unary, binary, and ternary operator expressions jointly, taking
				1683	// care not to re-parse subexpressions repeatedly. Parse like:
				1684	// <expression> ::= <operator-name> <expression>
				1685	// [<one-to-two-expressions>]
				1686	// <one-to-two-expressions> ::= <expression> [<expression>]
				1687	int arity = -1;
				1688	if (ParseOperatorName(state, &arity) &&
				1689	arity > 0 && // 0 arity => disabled.
				1690	(arity < 3 \|\| ParseExpression(state)) &&
				1691	(arity < 2 \|\| ParseExpression(state)) &&
				1692	(arity < 1 \|\| ParseExpression(state))) {
				1693	return true;
				1694	}
				1695	}
				1696	state->parse_state = copy;
				1697
				1698	// sizeof type
				1699	if (ParseTwoCharToken(state, "st") && ParseType(state)) {
				1700	return true;
				1701	}
				1702	state->parse_state = copy;
				1703
				1704	// Object and pointer member access expressions.
				1705	if ((ParseTwoCharToken(state, "dt") \|\| ParseTwoCharToken(state, "pt")) &&
				1706	ParseExpression(state) && ParseType(state)) {
				1707	return true;
				1708	}
				1709	state->parse_state = copy;
				1710
				1711	// Pointer-to-member access expressions. This parses the same as a binary
				1712	// operator, but it's implemented separately because "ds" shouldn't be
				1713	// accepted in other contexts that parse an operator name.
				1714	if (ParseTwoCharToken(state, "ds") && ParseExpression(state) &&
				1715	ParseExpression(state)) {
				1716	return true;
				1717	}
				1718	state->parse_state = copy;
				1719
				1720	// Parameter pack expansion
				1721	if (ParseTwoCharToken(state, "sp") && ParseExpression(state)) {
				1722	return true;
				1723	}
				1724	state->parse_state = copy;
				1725
				1726	return ParseUnresolvedName(state);
				1727	}
				1728
				1729	// <expr-primary> ::= L <type> <(value) number> E
				1730	// ::= L <type> <(value) float> E
				1731	// ::= L <mangled-name> E
				1732	// // A bug in g++'s C++ ABI version 2 (-fabi-version=2).
				1733	// ::= LZ <encoding> E
				1734	//
				1735	// Warning, subtle: the "bug" LZ production above is ambiguous with the first
				1736	// production where <type> starts with <local-name>, which can lead to
				1737	// exponential backtracking in two scenarios:
				1738	//
				1739	// - When whatever follows the E in the <local-name> in the first production is
				1740	// not a name, we backtrack the whole <encoding> and re-parse the whole thing.
				1741	//
				1742	// - When whatever follows the <local-name> in the first production is not a
				1743	// number and this <expr-primary> may be followed by a name, we backtrack the
				1744	// <name> and re-parse it.
				1745	//
				1746	// Moreover this ambiguity isn't always resolved -- for example, the following
				1747	// has two different parses:
				1748	//
				1749	// _ZaaILZ4aoeuE1x1EvE
				1750	// => operator&&<aoeu, x, E, void>
				1751	// => operator&&<(aoeu::x)(1), void>
				1752	//
				1753	// To resolve this, we just do what GCC's demangler does, and refuse to parse
				1754	// casts to <local-name> types.
				1755	static bool ParseExprPrimary(State *state) {
				1756	ComplexityGuard guard(state);
				1757	if (guard.IsTooComplex()) return false;
				1758	ParseState copy = state->parse_state;
				1759
				1760	// The "LZ" special case: if we see LZ, we commit to accept "LZ <encoding> E"
				1761	// or fail, no backtracking.
				1762	if (ParseTwoCharToken(state, "LZ")) {
				1763	if (ParseEncoding(state) && ParseOneCharToken(state, 'E')) {
				1764	return true;
				1765	}
				1766
				1767	state->parse_state = copy;
				1768	return false;
				1769	}
				1770
				1771	// The merged cast production.
				1772	if (ParseOneCharToken(state, 'L') && ParseType(state) &&
				1773	ParseExprCastValue(state)) {
				1774	return true;
				1775	}
				1776	state->parse_state = copy;
				1777
				1778	if (ParseOneCharToken(state, 'L') && ParseMangledName(state) &&
				1779	ParseOneCharToken(state, 'E')) {
				1780	return true;
				1781	}
				1782	state->parse_state = copy;
				1783
				1784	return false;
				1785	}
				1786
				1787	// <number> or <float>, followed by 'E', as described above ParseExprPrimary.
				1788	static bool ParseExprCastValue(State *state) {
				1789	ComplexityGuard guard(state);
				1790	if (guard.IsTooComplex()) return false;
				1791	// We have to be able to backtrack after accepting a number because we could
				1792	// have e.g. "7fffE", which will accept "7" as a number but then fail to find
				1793	// the 'E'.
				1794	ParseState copy = state->parse_state;
				1795	if (ParseNumber(state, nullptr) && ParseOneCharToken(state, 'E')) {
				1796	return true;
				1797	}
				1798	state->parse_state = copy;
				1799
				1800	if (ParseFloatNumber(state) && ParseOneCharToken(state, 'E')) {
				1801	return true;
				1802	}
				1803	state->parse_state = copy;
				1804
				1805	return false;
				1806	}
				1807
				1808	// <local-name> ::= Z <(function) encoding> E <(entity) name> [<discriminator>]
				1809	// ::= Z <(function) encoding> E s [<discriminator>]
				1810	//
				1811	// Parsing a common prefix of these two productions together avoids an
				1812	// exponential blowup of backtracking. Parse like:
				1813	// <local-name> := Z <encoding> E <local-name-suffix>
				1814	// <local-name-suffix> ::= s [<discriminator>]
				1815	// ::= <name> [<discriminator>]
				1816
				1817	static bool ParseLocalNameSuffix(State *state) {
				1818	ComplexityGuard guard(state);
				1819	if (guard.IsTooComplex()) return false;
				1820
				1821	if (MaybeAppend(state, "::") && ParseName(state) &&
				1822	Optional(ParseDiscriminator(state))) {
				1823	return true;
				1824	}
				1825
				1826	// Since we're not going to overwrite the above "::" by re-parsing the
				1827	// <encoding> (whose trailing '\0' byte was in the byte now holding the
				1828	// first ':'), we have to rollback the "::" if the <name> parse failed.
				1829	if (state->parse_state.append) {
				1830	state->out[state->parse_state.out_cur_idx - 2] = '\0';
				1831	}
				1832
				1833	return ParseOneCharToken(state, 's') && Optional(ParseDiscriminator(state));
				1834	}
				1835
				1836	static bool ParseLocalName(State *state) {
				1837	ComplexityGuard guard(state);
				1838	if (guard.IsTooComplex()) return false;
				1839	ParseState copy = state->parse_state;
				1840	if (ParseOneCharToken(state, 'Z') && ParseEncoding(state) &&
				1841	ParseOneCharToken(state, 'E') && ParseLocalNameSuffix(state)) {
				1842	return true;
				1843	}
				1844	state->parse_state = copy;
				1845	return false;
				1846	}
				1847
				1848	// <discriminator> := _ <(non-negative) number>
				1849	static bool ParseDiscriminator(State *state) {
				1850	ComplexityGuard guard(state);
				1851	if (guard.IsTooComplex()) return false;
				1852	ParseState copy = state->parse_state;
				1853	if (ParseOneCharToken(state, '_') && ParseNumber(state, nullptr)) {
				1854	return true;
				1855	}
				1856	state->parse_state = copy;
				1857	return false;
				1858	}
				1859
				1860	// <substitution> ::= S_
				1861	// ::= S <seq-id> _
				1862	// ::= St, etc.
				1863	//
				1864	// "St" is special in that it's not valid as a standalone name, and it is
				1865	// allowed to precede a name without being wrapped in "N...E". This means that
				1866	// if we accept it on its own, we can accept "St1a" and try to parse
				1867	// template-args, then fail and backtrack, accept "St" on its own, then "1a" as
				1868	// an unqualified name and re-parse the same template-args. To block this
				1869	// exponential backtracking, we disable it with 'accept_std=false' in
				1870	// problematic contexts.
				1871	static bool ParseSubstitution(State *state, bool accept_std) {
				1872	ComplexityGuard guard(state);
				1873	if (guard.IsTooComplex()) return false;
				1874	if (ParseTwoCharToken(state, "S_")) {
				1875	MaybeAppend(state, "?"); // We don't support substitutions.
				1876	return true;
				1877	}
				1878
				1879	ParseState copy = state->parse_state;
				1880	if (ParseOneCharToken(state, 'S') && ParseSeqId(state) &&
				1881	ParseOneCharToken(state, '_')) {
				1882	MaybeAppend(state, "?"); // We don't support substitutions.
				1883	return true;
				1884	}
				1885	state->parse_state = copy;
				1886
				1887	// Expand abbreviations like "St" => "std".
				1888	if (ParseOneCharToken(state, 'S')) {
				1889	const AbbrevPair *p;
				1890	for (p = kSubstitutionList; p->abbrev != nullptr; ++p) {
				1891	if (RemainingInput(state)[0] == p->abbrev[1] &&
				1892	(accept_std \|\| p->abbrev[1] != 't')) {
				1893	MaybeAppend(state, "std");
				1894	if (p->real_name[0] != '\0') {
				1895	MaybeAppend(state, "::");
				1896	MaybeAppend(state, p->real_name);
				1897	}
				1898	++state->parse_state.mangled_idx;
				1899	return true;
				1900	}
				1901	}
				1902	}
				1903	state->parse_state = copy;
				1904	return false;
				1905	}
				1906
				1907	// Parse <mangled-name>, optionally followed by either a function-clone suffix
				1908	// or version suffix. Returns true only if all of "mangled_cur" was consumed.
				1909	static bool ParseTopLevelMangledName(State *state) {
				1910	ComplexityGuard guard(state);
				1911	if (guard.IsTooComplex()) return false;
				1912	if (ParseMangledName(state)) {
				1913	if (RemainingInput(state)[0] != '\0') {
				1914	// Drop trailing function clone suffix, if any.
				1915	if (IsFunctionCloneSuffix(RemainingInput(state))) {
				1916	return true;
				1917	}
				1918	// Append trailing version suffix if any.
				1919	// ex. _Z3foo@@GLIBCXX_3.4
				1920	if (RemainingInput(state)[0] == '@') {
				1921	MaybeAppend(state, RemainingInput(state));
				1922	return true;
				1923	}
				1924	return false; // Unconsumed suffix.
				1925	}
				1926	return true;
				1927	}
				1928	return false;
				1929	}
				1930
				1931	static bool Overflowed(const State *state) {
				1932	return state->parse_state.out_cur_idx >= state->out_end_idx;
				1933	}
				1934
				1935	// The demangler entry point.
				1936	bool Demangle(const char mangled, char out, int out_size) {
				1937	State state;
				1938	InitState(&state, mangled, out, out_size);
Austin Schuh	b4691e9	2020-12-31 12:37:18 -0800	[diff] [blame^]	1939	return ParseTopLevelMangledName(&state) && !Overflowed(&state) &&
				1940	state.parse_state.out_cur_idx > 0;
Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame]	1941	}
				1942
				1943	} // namespace debugging_internal
Austin Schuh	b4691e9	2020-12-31 12:37:18 -0800	[diff] [blame^]	1944	ABSL_NAMESPACE_END
Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame]	1945	} // namespace absl