blob: 9369f9a65c2decd02d02d1038f88ffe9b1d946e9 [file] [log] [blame]
Austin Schuh906616c2019-01-21 20:25:11 -08001// Copyright (c) 2006, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14// * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29//
30// Author: Satoru Takabayashi
31//
32// For reference check out:
33// http://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling
34//
35// Note that we only have partial C++0x support yet.
36
37#include <stdio.h> // for NULL
38#include "utilities.h"
39#include "demangle.h"
40
41#if defined(OS_WINDOWS)
42#include <dbghelp.h>
43#pragma comment(lib, "dbghelp")
44#endif
45
46_START_GOOGLE_NAMESPACE_
47
48#if !defined(OS_WINDOWS)
49typedef struct {
50 const char *abbrev;
51 const char *real_name;
52} AbbrevPair;
53
54// List of operators from Itanium C++ ABI.
55static const AbbrevPair kOperatorList[] = {
56 { "nw", "new" },
57 { "na", "new[]" },
58 { "dl", "delete" },
59 { "da", "delete[]" },
60 { "ps", "+" },
61 { "ng", "-" },
62 { "ad", "&" },
63 { "de", "*" },
64 { "co", "~" },
65 { "pl", "+" },
66 { "mi", "-" },
67 { "ml", "*" },
68 { "dv", "/" },
69 { "rm", "%" },
70 { "an", "&" },
71 { "or", "|" },
72 { "eo", "^" },
73 { "aS", "=" },
74 { "pL", "+=" },
75 { "mI", "-=" },
76 { "mL", "*=" },
77 { "dV", "/=" },
78 { "rM", "%=" },
79 { "aN", "&=" },
80 { "oR", "|=" },
81 { "eO", "^=" },
82 { "ls", "<<" },
83 { "rs", ">>" },
84 { "lS", "<<=" },
85 { "rS", ">>=" },
86 { "eq", "==" },
87 { "ne", "!=" },
88 { "lt", "<" },
89 { "gt", ">" },
90 { "le", "<=" },
91 { "ge", ">=" },
92 { "nt", "!" },
93 { "aa", "&&" },
94 { "oo", "||" },
95 { "pp", "++" },
96 { "mm", "--" },
97 { "cm", "," },
98 { "pm", "->*" },
99 { "pt", "->" },
100 { "cl", "()" },
101 { "ix", "[]" },
102 { "qu", "?" },
103 { "st", "sizeof" },
104 { "sz", "sizeof" },
105 { NULL, NULL },
106};
107
108// List of builtin types from Itanium C++ ABI.
109static const AbbrevPair kBuiltinTypeList[] = {
110 { "v", "void" },
111 { "w", "wchar_t" },
112 { "b", "bool" },
113 { "c", "char" },
114 { "a", "signed char" },
115 { "h", "unsigned char" },
116 { "s", "short" },
117 { "t", "unsigned short" },
118 { "i", "int" },
119 { "j", "unsigned int" },
120 { "l", "long" },
121 { "m", "unsigned long" },
122 { "x", "long long" },
123 { "y", "unsigned long long" },
124 { "n", "__int128" },
125 { "o", "unsigned __int128" },
126 { "f", "float" },
127 { "d", "double" },
128 { "e", "long double" },
129 { "g", "__float128" },
130 { "z", "ellipsis" },
131 { NULL, NULL }
132};
133
134// List of substitutions Itanium C++ ABI.
135static const AbbrevPair kSubstitutionList[] = {
136 { "St", "" },
137 { "Sa", "allocator" },
138 { "Sb", "basic_string" },
139 // std::basic_string<char, std::char_traits<char>,std::allocator<char> >
140 { "Ss", "string"},
141 // std::basic_istream<char, std::char_traits<char> >
142 { "Si", "istream" },
143 // std::basic_ostream<char, std::char_traits<char> >
144 { "So", "ostream" },
145 // std::basic_iostream<char, std::char_traits<char> >
146 { "Sd", "iostream" },
147 { NULL, NULL }
148};
149
150// State needed for demangling.
151typedef struct {
152 const char *mangled_cur; // Cursor of mangled name.
153 char *out_cur; // Cursor of output string.
154 const char *out_begin; // Beginning of output string.
155 const char *out_end; // End of output string.
156 const char *prev_name; // For constructors/destructors.
157 int prev_name_length; // For constructors/destructors.
158 short nest_level; // For nested names.
159 bool append; // Append flag.
160 bool overflowed; // True if output gets overflowed.
161} State;
162
163// We don't use strlen() in libc since it's not guaranteed to be async
164// signal safe.
165static size_t StrLen(const char *str) {
166 size_t len = 0;
167 while (*str != '\0') {
168 ++str;
169 ++len;
170 }
171 return len;
172}
173
174// Returns true if "str" has at least "n" characters remaining.
175static bool AtLeastNumCharsRemaining(const char *str, int n) {
176 for (int i = 0; i < n; ++i) {
177 if (str[i] == '\0') {
178 return false;
179 }
180 }
181 return true;
182}
183
184// Returns true if "str" has "prefix" as a prefix.
185static bool StrPrefix(const char *str, const char *prefix) {
186 size_t i = 0;
187 while (str[i] != '\0' && prefix[i] != '\0' &&
188 str[i] == prefix[i]) {
189 ++i;
190 }
191 return prefix[i] == '\0'; // Consumed everything in "prefix".
192}
193
194static void InitState(State *state, const char *mangled,
195 char *out, int out_size) {
196 state->mangled_cur = mangled;
197 state->out_cur = out;
198 state->out_begin = out;
199 state->out_end = out + out_size;
200 state->prev_name = NULL;
201 state->prev_name_length = -1;
202 state->nest_level = -1;
203 state->append = true;
204 state->overflowed = false;
205}
206
207// Returns true and advances "mangled_cur" if we find "one_char_token"
208// at "mangled_cur" position. It is assumed that "one_char_token" does
209// not contain '\0'.
210static bool ParseOneCharToken(State *state, const char one_char_token) {
211 if (state->mangled_cur[0] == one_char_token) {
212 ++state->mangled_cur;
213 return true;
214 }
215 return false;
216}
217
218// Returns true and advances "mangled_cur" if we find "two_char_token"
219// at "mangled_cur" position. It is assumed that "two_char_token" does
220// not contain '\0'.
221static bool ParseTwoCharToken(State *state, const char *two_char_token) {
222 if (state->mangled_cur[0] == two_char_token[0] &&
223 state->mangled_cur[1] == two_char_token[1]) {
224 state->mangled_cur += 2;
225 return true;
226 }
227 return false;
228}
229
230// Returns true and advances "mangled_cur" if we find any character in
231// "char_class" at "mangled_cur" position.
232static bool ParseCharClass(State *state, const char *char_class) {
233 const char *p = char_class;
234 for (; *p != '\0'; ++p) {
235 if (state->mangled_cur[0] == *p) {
236 ++state->mangled_cur;
237 return true;
238 }
239 }
240 return false;
241}
242
243// This function is used for handling an optional non-terminal.
244static bool Optional(bool) {
245 return true;
246}
247
248// This function is used for handling <non-terminal>+ syntax.
249typedef bool (*ParseFunc)(State *);
250static bool OneOrMore(ParseFunc parse_func, State *state) {
251 if (parse_func(state)) {
252 while (parse_func(state)) {
253 }
254 return true;
255 }
256 return false;
257}
258
259// This function is used for handling <non-terminal>* syntax. The function
260// always returns true and must be followed by a termination token or a
261// terminating sequence not handled by parse_func (e.g.
262// ParseOneCharToken(state, 'E')).
263static bool ZeroOrMore(ParseFunc parse_func, State *state) {
264 while (parse_func(state)) {
265 }
266 return true;
267}
268
269// Append "str" at "out_cur". If there is an overflow, "overflowed"
270// is set to true for later use. The output string is ensured to
271// always terminate with '\0' as long as there is no overflow.
272static void Append(State *state, const char * const str, const int length) {
273 int i;
274 for (i = 0; i < length; ++i) {
275 if (state->out_cur + 1 < state->out_end) { // +1 for '\0'
276 *state->out_cur = str[i];
277 ++state->out_cur;
278 } else {
279 state->overflowed = true;
280 break;
281 }
282 }
283 if (!state->overflowed) {
284 *state->out_cur = '\0'; // Terminate it with '\0'
285 }
286}
287
288// We don't use equivalents in libc to avoid locale issues.
289static bool IsLower(char c) {
290 return c >= 'a' && c <= 'z';
291}
292
293static bool IsAlpha(char c) {
294 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
295}
296
297static bool IsDigit(char c) {
298 return c >= '0' && c <= '9';
299}
300
301// Returns true if "str" is a function clone suffix. These suffixes are used
302// by GCC 4.5.x and later versions to indicate functions which have been
303// cloned during optimization. We treat any sequence (.<alpha>+.<digit>+)+ as
304// a function clone suffix.
305static bool IsFunctionCloneSuffix(const char *str) {
306 size_t i = 0;
307 while (str[i] != '\0') {
308 // Consume a single .<alpha>+.<digit>+ sequence.
309 if (str[i] != '.' || !IsAlpha(str[i + 1])) {
310 return false;
311 }
312 i += 2;
313 while (IsAlpha(str[i])) {
314 ++i;
315 }
316 if (str[i] != '.' || !IsDigit(str[i + 1])) {
317 return false;
318 }
319 i += 2;
320 while (IsDigit(str[i])) {
321 ++i;
322 }
323 }
324 return true; // Consumed everything in "str".
325}
326
327// Append "str" with some tweaks, iff "append" state is true.
328// Returns true so that it can be placed in "if" conditions.
329static void MaybeAppendWithLength(State *state, const char * const str,
330 const int length) {
331 if (state->append && length > 0) {
332 // Append a space if the output buffer ends with '<' and "str"
333 // starts with '<' to avoid <<<.
334 if (str[0] == '<' && state->out_begin < state->out_cur &&
335 state->out_cur[-1] == '<') {
336 Append(state, " ", 1);
337 }
338 // Remember the last identifier name for ctors/dtors.
339 if (IsAlpha(str[0]) || str[0] == '_') {
340 state->prev_name = state->out_cur;
341 state->prev_name_length = length;
342 }
343 Append(state, str, length);
344 }
345}
346
347// A convenient wrapper arount MaybeAppendWithLength().
348static bool MaybeAppend(State *state, const char * const str) {
349 if (state->append) {
350 int length = StrLen(str);
351 MaybeAppendWithLength(state, str, length);
352 }
353 return true;
354}
355
356// This function is used for handling nested names.
357static bool EnterNestedName(State *state) {
358 state->nest_level = 0;
359 return true;
360}
361
362// This function is used for handling nested names.
363static bool LeaveNestedName(State *state, short prev_value) {
364 state->nest_level = prev_value;
365 return true;
366}
367
368// Disable the append mode not to print function parameters, etc.
369static bool DisableAppend(State *state) {
370 state->append = false;
371 return true;
372}
373
374// Restore the append mode to the previous state.
375static bool RestoreAppend(State *state, bool prev_value) {
376 state->append = prev_value;
377 return true;
378}
379
380// Increase the nest level for nested names.
381static void MaybeIncreaseNestLevel(State *state) {
382 if (state->nest_level > -1) {
383 ++state->nest_level;
384 }
385}
386
387// Appends :: for nested names if necessary.
388static void MaybeAppendSeparator(State *state) {
389 if (state->nest_level >= 1) {
390 MaybeAppend(state, "::");
391 }
392}
393
394// Cancel the last separator if necessary.
395static void MaybeCancelLastSeparator(State *state) {
396 if (state->nest_level >= 1 && state->append &&
397 state->out_begin <= state->out_cur - 2) {
398 state->out_cur -= 2;
399 *state->out_cur = '\0';
400 }
401}
402
403// Returns true if the identifier of the given length pointed to by
404// "mangled_cur" is anonymous namespace.
405static bool IdentifierIsAnonymousNamespace(State *state, int length) {
406 static const char anon_prefix[] = "_GLOBAL__N_";
407 return (length > (int)sizeof(anon_prefix) - 1 && // Should be longer.
408 StrPrefix(state->mangled_cur, anon_prefix));
409}
410
411// Forward declarations of our parsing functions.
412static bool ParseMangledName(State *state);
413static bool ParseEncoding(State *state);
414static bool ParseName(State *state);
415static bool ParseUnscopedName(State *state);
416static bool ParseUnscopedTemplateName(State *state);
417static bool ParseNestedName(State *state);
418static bool ParsePrefix(State *state);
419static bool ParseUnqualifiedName(State *state);
420static bool ParseSourceName(State *state);
421static bool ParseLocalSourceName(State *state);
422static bool ParseNumber(State *state, int *number_out);
423static bool ParseFloatNumber(State *state);
424static bool ParseSeqId(State *state);
425static bool ParseIdentifier(State *state, int length);
426static bool ParseAbiTags(State *state);
427static bool ParseAbiTag(State *state);
428static bool ParseOperatorName(State *state);
429static bool ParseSpecialName(State *state);
430static bool ParseCallOffset(State *state);
431static bool ParseNVOffset(State *state);
432static bool ParseVOffset(State *state);
433static bool ParseCtorDtorName(State *state);
434static bool ParseType(State *state);
435static bool ParseCVQualifiers(State *state);
436static bool ParseBuiltinType(State *state);
437static bool ParseFunctionType(State *state);
438static bool ParseBareFunctionType(State *state);
439static bool ParseClassEnumType(State *state);
440static bool ParseArrayType(State *state);
441static bool ParsePointerToMemberType(State *state);
442static bool ParseTemplateParam(State *state);
443static bool ParseTemplateTemplateParam(State *state);
444static bool ParseTemplateArgs(State *state);
445static bool ParseTemplateArg(State *state);
446static bool ParseExpression(State *state);
447static bool ParseExprPrimary(State *state);
448static bool ParseLocalName(State *state);
449static bool ParseDiscriminator(State *state);
450static bool ParseSubstitution(State *state);
451
452// Implementation note: the following code is a straightforward
453// translation of the Itanium C++ ABI defined in BNF with a couple of
454// exceptions.
455//
456// - Support GNU extensions not defined in the Itanium C++ ABI
457// - <prefix> and <template-prefix> are combined to avoid infinite loop
458// - Reorder patterns to shorten the code
459// - Reorder patterns to give greedier functions precedence
460// We'll mark "Less greedy than" for these cases in the code
461//
462// Each parsing function changes the state and returns true on
463// success. Otherwise, don't change the state and returns false. To
464// ensure that the state isn't changed in the latter case, we save the
465// original state before we call more than one parsing functions
466// consecutively with &&, and restore the state if unsuccessful. See
467// ParseEncoding() as an example of this convention. We follow the
468// convention throughout the code.
469//
470// Originally we tried to do demangling without following the full ABI
471// syntax but it turned out we needed to follow the full syntax to
472// parse complicated cases like nested template arguments. Note that
473// implementing a full-fledged demangler isn't trivial (libiberty's
474// cp-demangle.c has +4300 lines).
475//
476// Note that (foo) in <(foo) ...> is a modifier to be ignored.
477//
478// Reference:
479// - Itanium C++ ABI
480// <http://www.codesourcery.com/cxx-abi/abi.html#mangling>
481
482// <mangled-name> ::= _Z <encoding>
483static bool ParseMangledName(State *state) {
484 return ParseTwoCharToken(state, "_Z") && ParseEncoding(state);
485}
486
487// <encoding> ::= <(function) name> <bare-function-type>
488// ::= <(data) name>
489// ::= <special-name>
490static bool ParseEncoding(State *state) {
491 State copy = *state;
492 if (ParseName(state) && ParseBareFunctionType(state)) {
493 return true;
494 }
495 *state = copy;
496
497 if (ParseName(state) || ParseSpecialName(state)) {
498 return true;
499 }
500 return false;
501}
502
503// <name> ::= <nested-name>
504// ::= <unscoped-template-name> <template-args>
505// ::= <unscoped-name>
506// ::= <local-name>
507static bool ParseName(State *state) {
508 if (ParseNestedName(state) || ParseLocalName(state)) {
509 return true;
510 }
511
512 State copy = *state;
513 if (ParseUnscopedTemplateName(state) &&
514 ParseTemplateArgs(state)) {
515 return true;
516 }
517 *state = copy;
518
519 // Less greedy than <unscoped-template-name> <template-args>.
520 if (ParseUnscopedName(state)) {
521 return true;
522 }
523 return false;
524}
525
526// <unscoped-name> ::= <unqualified-name>
527// ::= St <unqualified-name>
528static bool ParseUnscopedName(State *state) {
529 if (ParseUnqualifiedName(state)) {
530 return true;
531 }
532
533 State copy = *state;
534 if (ParseTwoCharToken(state, "St") &&
535 MaybeAppend(state, "std::") &&
536 ParseUnqualifiedName(state)) {
537 return true;
538 }
539 *state = copy;
540 return false;
541}
542
543// <unscoped-template-name> ::= <unscoped-name>
544// ::= <substitution>
545static bool ParseUnscopedTemplateName(State *state) {
546 return ParseUnscopedName(state) || ParseSubstitution(state);
547}
548
549// <nested-name> ::= N [<CV-qualifiers>] <prefix> <unqualified-name> E
550// ::= N [<CV-qualifiers>] <template-prefix> <template-args> E
551static bool ParseNestedName(State *state) {
552 State copy = *state;
553 if (ParseOneCharToken(state, 'N') &&
554 EnterNestedName(state) &&
555 Optional(ParseCVQualifiers(state)) &&
556 ParsePrefix(state) &&
557 LeaveNestedName(state, copy.nest_level) &&
558 ParseOneCharToken(state, 'E')) {
559 return true;
560 }
561 *state = copy;
562 return false;
563}
564
565// This part is tricky. If we literally translate them to code, we'll
566// end up infinite loop. Hence we merge them to avoid the case.
567//
568// <prefix> ::= <prefix> <unqualified-name>
569// ::= <template-prefix> <template-args>
570// ::= <template-param>
571// ::= <substitution>
572// ::= # empty
573// <template-prefix> ::= <prefix> <(template) unqualified-name>
574// ::= <template-param>
575// ::= <substitution>
576static bool ParsePrefix(State *state) {
577 bool has_something = false;
578 while (true) {
579 MaybeAppendSeparator(state);
580 if (ParseTemplateParam(state) ||
581 ParseSubstitution(state) ||
582 ParseUnscopedName(state)) {
583 has_something = true;
584 MaybeIncreaseNestLevel(state);
585 continue;
586 }
587 MaybeCancelLastSeparator(state);
588 if (has_something && ParseTemplateArgs(state)) {
589 return ParsePrefix(state);
590 } else {
591 break;
592 }
593 }
594 return true;
595}
596
597// <unqualified-name> ::= <operator-name>
598// ::= <ctor-dtor-name>
599// ::= <source-name> [<abi-tags>]
600// ::= <local-source-name> [<abi-tags>]
601static bool ParseUnqualifiedName(State *state) {
602 return (ParseOperatorName(state) ||
603 ParseCtorDtorName(state) ||
604 (ParseSourceName(state) && Optional(ParseAbiTags(state))) ||
605 (ParseLocalSourceName(state) && Optional(ParseAbiTags(state))));
606}
607
608// <source-name> ::= <positive length number> <identifier>
609static bool ParseSourceName(State *state) {
610 State copy = *state;
611 int length = -1;
612 if (ParseNumber(state, &length) && ParseIdentifier(state, length)) {
613 return true;
614 }
615 *state = copy;
616 return false;
617}
618
619// <local-source-name> ::= L <source-name> [<discriminator>]
620//
621// References:
622// http://gcc.gnu.org/bugzilla/show_bug.cgi?id=31775
623// http://gcc.gnu.org/viewcvs?view=rev&revision=124467
624static bool ParseLocalSourceName(State *state) {
625 State copy = *state;
626 if (ParseOneCharToken(state, 'L') && ParseSourceName(state) &&
627 Optional(ParseDiscriminator(state))) {
628 return true;
629 }
630 *state = copy;
631 return false;
632}
633
634// <number> ::= [n] <non-negative decimal integer>
635// If "number_out" is non-null, then *number_out is set to the value of the
636// parsed number on success.
637static bool ParseNumber(State *state, int *number_out) {
638 int sign = 1;
639 if (ParseOneCharToken(state, 'n')) {
640 sign = -1;
641 }
642 const char *p = state->mangled_cur;
643 int number = 0;
644 for (;*p != '\0'; ++p) {
645 if (IsDigit(*p)) {
646 number = number * 10 + (*p - '0');
647 } else {
648 break;
649 }
650 }
651 if (p != state->mangled_cur) { // Conversion succeeded.
652 state->mangled_cur = p;
653 if (number_out != NULL) {
654 *number_out = number * sign;
655 }
656 return true;
657 }
658 return false;
659}
660
661// Floating-point literals are encoded using a fixed-length lowercase
662// hexadecimal string.
663static bool ParseFloatNumber(State *state) {
664 const char *p = state->mangled_cur;
665 for (;*p != '\0'; ++p) {
666 if (!IsDigit(*p) && !(*p >= 'a' && *p <= 'f')) {
667 break;
668 }
669 }
670 if (p != state->mangled_cur) { // Conversion succeeded.
671 state->mangled_cur = p;
672 return true;
673 }
674 return false;
675}
676
677// The <seq-id> is a sequence number in base 36,
678// using digits and upper case letters
679static bool ParseSeqId(State *state) {
680 const char *p = state->mangled_cur;
681 for (;*p != '\0'; ++p) {
682 if (!IsDigit(*p) && !(*p >= 'A' && *p <= 'Z')) {
683 break;
684 }
685 }
686 if (p != state->mangled_cur) { // Conversion succeeded.
687 state->mangled_cur = p;
688 return true;
689 }
690 return false;
691}
692
693// <identifier> ::= <unqualified source code identifier> (of given length)
694static bool ParseIdentifier(State *state, int length) {
695 if (length == -1 ||
696 !AtLeastNumCharsRemaining(state->mangled_cur, length)) {
697 return false;
698 }
699 if (IdentifierIsAnonymousNamespace(state, length)) {
700 MaybeAppend(state, "(anonymous namespace)");
701 } else {
702 MaybeAppendWithLength(state, state->mangled_cur, length);
703 }
704 state->mangled_cur += length;
705 return true;
706}
707
708// <abi-tags> ::= <abi-tag> [<abi-tags>]
709static bool ParseAbiTags(State *state) {
710 State copy = *state;
711 DisableAppend(state);
712 if (OneOrMore(ParseAbiTag, state)) {
713 RestoreAppend(state, copy.append);
714 return true;
715 }
716 *state = copy;
717 return false;
718}
719
720// <abi-tag> ::= B <source-name>
721static bool ParseAbiTag(State *state) {
722 return ParseOneCharToken(state, 'B') && ParseSourceName(state);
723}
724
725// <operator-name> ::= nw, and other two letters cases
726// ::= cv <type> # (cast)
727// ::= v <digit> <source-name> # vendor extended operator
728static bool ParseOperatorName(State *state) {
729 if (!AtLeastNumCharsRemaining(state->mangled_cur, 2)) {
730 return false;
731 }
732 // First check with "cv" (cast) case.
733 State copy = *state;
734 if (ParseTwoCharToken(state, "cv") &&
735 MaybeAppend(state, "operator ") &&
736 EnterNestedName(state) &&
737 ParseType(state) &&
738 LeaveNestedName(state, copy.nest_level)) {
739 return true;
740 }
741 *state = copy;
742
743 // Then vendor extended operators.
744 if (ParseOneCharToken(state, 'v') && ParseCharClass(state, "0123456789") &&
745 ParseSourceName(state)) {
746 return true;
747 }
748 *state = copy;
749
750 // Other operator names should start with a lower alphabet followed
751 // by a lower/upper alphabet.
752 if (!(IsLower(state->mangled_cur[0]) &&
753 IsAlpha(state->mangled_cur[1]))) {
754 return false;
755 }
756 // We may want to perform a binary search if we really need speed.
757 const AbbrevPair *p;
758 for (p = kOperatorList; p->abbrev != NULL; ++p) {
759 if (state->mangled_cur[0] == p->abbrev[0] &&
760 state->mangled_cur[1] == p->abbrev[1]) {
761 MaybeAppend(state, "operator");
762 if (IsLower(*p->real_name)) { // new, delete, etc.
763 MaybeAppend(state, " ");
764 }
765 MaybeAppend(state, p->real_name);
766 state->mangled_cur += 2;
767 return true;
768 }
769 }
770 return false;
771}
772
773// <special-name> ::= TV <type>
774// ::= TT <type>
775// ::= TI <type>
776// ::= TS <type>
777// ::= Tc <call-offset> <call-offset> <(base) encoding>
778// ::= GV <(object) name>
779// ::= T <call-offset> <(base) encoding>
780// G++ extensions:
781// ::= TC <type> <(offset) number> _ <(base) type>
782// ::= TF <type>
783// ::= TJ <type>
784// ::= GR <name>
785// ::= GA <encoding>
786// ::= Th <call-offset> <(base) encoding>
787// ::= Tv <call-offset> <(base) encoding>
788//
789// Note: we don't care much about them since they don't appear in
790// stack traces. The are special data.
791static bool ParseSpecialName(State *state) {
792 State copy = *state;
793 if (ParseOneCharToken(state, 'T') &&
794 ParseCharClass(state, "VTIS") &&
795 ParseType(state)) {
796 return true;
797 }
798 *state = copy;
799
800 if (ParseTwoCharToken(state, "Tc") && ParseCallOffset(state) &&
801 ParseCallOffset(state) && ParseEncoding(state)) {
802 return true;
803 }
804 *state = copy;
805
806 if (ParseTwoCharToken(state, "GV") &&
807 ParseName(state)) {
808 return true;
809 }
810 *state = copy;
811
812 if (ParseOneCharToken(state, 'T') && ParseCallOffset(state) &&
813 ParseEncoding(state)) {
814 return true;
815 }
816 *state = copy;
817
818 // G++ extensions
819 if (ParseTwoCharToken(state, "TC") && ParseType(state) &&
820 ParseNumber(state, NULL) && ParseOneCharToken(state, '_') &&
821 DisableAppend(state) &&
822 ParseType(state)) {
823 RestoreAppend(state, copy.append);
824 return true;
825 }
826 *state = copy;
827
828 if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "FJ") &&
829 ParseType(state)) {
830 return true;
831 }
832 *state = copy;
833
834 if (ParseTwoCharToken(state, "GR") && ParseName(state)) {
835 return true;
836 }
837 *state = copy;
838
839 if (ParseTwoCharToken(state, "GA") && ParseEncoding(state)) {
840 return true;
841 }
842 *state = copy;
843
844 if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "hv") &&
845 ParseCallOffset(state) && ParseEncoding(state)) {
846 return true;
847 }
848 *state = copy;
849 return false;
850}
851
852// <call-offset> ::= h <nv-offset> _
853// ::= v <v-offset> _
854static bool ParseCallOffset(State *state) {
855 State copy = *state;
856 if (ParseOneCharToken(state, 'h') &&
857 ParseNVOffset(state) && ParseOneCharToken(state, '_')) {
858 return true;
859 }
860 *state = copy;
861
862 if (ParseOneCharToken(state, 'v') &&
863 ParseVOffset(state) && ParseOneCharToken(state, '_')) {
864 return true;
865 }
866 *state = copy;
867
868 return false;
869}
870
871// <nv-offset> ::= <(offset) number>
872static bool ParseNVOffset(State *state) {
873 return ParseNumber(state, NULL);
874}
875
876// <v-offset> ::= <(offset) number> _ <(virtual offset) number>
877static bool ParseVOffset(State *state) {
878 State copy = *state;
879 if (ParseNumber(state, NULL) && ParseOneCharToken(state, '_') &&
880 ParseNumber(state, NULL)) {
881 return true;
882 }
883 *state = copy;
884 return false;
885}
886
887// <ctor-dtor-name> ::= C1 | C2 | C3
888// ::= D0 | D1 | D2
889static bool ParseCtorDtorName(State *state) {
890 State copy = *state;
891 if (ParseOneCharToken(state, 'C') &&
892 ParseCharClass(state, "123")) {
893 const char * const prev_name = state->prev_name;
894 const int prev_name_length = state->prev_name_length;
895 MaybeAppendWithLength(state, prev_name, prev_name_length);
896 return true;
897 }
898 *state = copy;
899
900 if (ParseOneCharToken(state, 'D') &&
901 ParseCharClass(state, "012")) {
902 const char * const prev_name = state->prev_name;
903 const int prev_name_length = state->prev_name_length;
904 MaybeAppend(state, "~");
905 MaybeAppendWithLength(state, prev_name, prev_name_length);
906 return true;
907 }
908 *state = copy;
909 return false;
910}
911
912// <type> ::= <CV-qualifiers> <type>
913// ::= P <type> # pointer-to
914// ::= R <type> # reference-to
915// ::= O <type> # rvalue reference-to (C++0x)
916// ::= C <type> # complex pair (C 2000)
917// ::= G <type> # imaginary (C 2000)
918// ::= U <source-name> <type> # vendor extended type qualifier
919// ::= <builtin-type>
920// ::= <function-type>
921// ::= <class-enum-type>
922// ::= <array-type>
923// ::= <pointer-to-member-type>
924// ::= <template-template-param> <template-args>
925// ::= <template-param>
926// ::= <substitution>
927// ::= Dp <type> # pack expansion of (C++0x)
928// ::= Dt <expression> E # decltype of an id-expression or class
929// # member access (C++0x)
930// ::= DT <expression> E # decltype of an expression (C++0x)
931//
932static bool ParseType(State *state) {
933 // We should check CV-qualifers, and PRGC things first.
934 State copy = *state;
935 if (ParseCVQualifiers(state) && ParseType(state)) {
936 return true;
937 }
938 *state = copy;
939
940 if (ParseCharClass(state, "OPRCG") && ParseType(state)) {
941 return true;
942 }
943 *state = copy;
944
945 if (ParseTwoCharToken(state, "Dp") && ParseType(state)) {
946 return true;
947 }
948 *state = copy;
949
950 if (ParseOneCharToken(state, 'D') && ParseCharClass(state, "tT") &&
951 ParseExpression(state) && ParseOneCharToken(state, 'E')) {
952 return true;
953 }
954 *state = copy;
955
956 if (ParseOneCharToken(state, 'U') && ParseSourceName(state) &&
957 ParseType(state)) {
958 return true;
959 }
960 *state = copy;
961
962 if (ParseBuiltinType(state) ||
963 ParseFunctionType(state) ||
964 ParseClassEnumType(state) ||
965 ParseArrayType(state) ||
966 ParsePointerToMemberType(state) ||
967 ParseSubstitution(state)) {
968 return true;
969 }
970
971 if (ParseTemplateTemplateParam(state) &&
972 ParseTemplateArgs(state)) {
973 return true;
974 }
975 *state = copy;
976
977 // Less greedy than <template-template-param> <template-args>.
978 if (ParseTemplateParam(state)) {
979 return true;
980 }
981
982 return false;
983}
984
985// <CV-qualifiers> ::= [r] [V] [K]
986// We don't allow empty <CV-qualifiers> to avoid infinite loop in
987// ParseType().
988static bool ParseCVQualifiers(State *state) {
989 int num_cv_qualifiers = 0;
990 num_cv_qualifiers += ParseOneCharToken(state, 'r');
991 num_cv_qualifiers += ParseOneCharToken(state, 'V');
992 num_cv_qualifiers += ParseOneCharToken(state, 'K');
993 return num_cv_qualifiers > 0;
994}
995
996// <builtin-type> ::= v, etc.
997// ::= u <source-name>
998static bool ParseBuiltinType(State *state) {
999 const AbbrevPair *p;
1000 for (p = kBuiltinTypeList; p->abbrev != NULL; ++p) {
1001 if (state->mangled_cur[0] == p->abbrev[0]) {
1002 MaybeAppend(state, p->real_name);
1003 ++state->mangled_cur;
1004 return true;
1005 }
1006 }
1007
1008 State copy = *state;
1009 if (ParseOneCharToken(state, 'u') && ParseSourceName(state)) {
1010 return true;
1011 }
1012 *state = copy;
1013 return false;
1014}
1015
1016// <function-type> ::= F [Y] <bare-function-type> E
1017static bool ParseFunctionType(State *state) {
1018 State copy = *state;
1019 if (ParseOneCharToken(state, 'F') &&
1020 Optional(ParseOneCharToken(state, 'Y')) &&
1021 ParseBareFunctionType(state) && ParseOneCharToken(state, 'E')) {
1022 return true;
1023 }
1024 *state = copy;
1025 return false;
1026}
1027
1028// <bare-function-type> ::= <(signature) type>+
1029static bool ParseBareFunctionType(State *state) {
1030 State copy = *state;
1031 DisableAppend(state);
1032 if (OneOrMore(ParseType, state)) {
1033 RestoreAppend(state, copy.append);
1034 MaybeAppend(state, "()");
1035 return true;
1036 }
1037 *state = copy;
1038 return false;
1039}
1040
1041// <class-enum-type> ::= <name>
1042static bool ParseClassEnumType(State *state) {
1043 return ParseName(state);
1044}
1045
1046// <array-type> ::= A <(positive dimension) number> _ <(element) type>
1047// ::= A [<(dimension) expression>] _ <(element) type>
1048static bool ParseArrayType(State *state) {
1049 State copy = *state;
1050 if (ParseOneCharToken(state, 'A') && ParseNumber(state, NULL) &&
1051 ParseOneCharToken(state, '_') && ParseType(state)) {
1052 return true;
1053 }
1054 *state = copy;
1055
1056 if (ParseOneCharToken(state, 'A') && Optional(ParseExpression(state)) &&
1057 ParseOneCharToken(state, '_') && ParseType(state)) {
1058 return true;
1059 }
1060 *state = copy;
1061 return false;
1062}
1063
1064// <pointer-to-member-type> ::= M <(class) type> <(member) type>
1065static bool ParsePointerToMemberType(State *state) {
1066 State copy = *state;
1067 if (ParseOneCharToken(state, 'M') && ParseType(state) &&
1068 ParseType(state)) {
1069 return true;
1070 }
1071 *state = copy;
1072 return false;
1073}
1074
1075// <template-param> ::= T_
1076// ::= T <parameter-2 non-negative number> _
1077static bool ParseTemplateParam(State *state) {
1078 if (ParseTwoCharToken(state, "T_")) {
1079 MaybeAppend(state, "?"); // We don't support template substitutions.
1080 return true;
1081 }
1082
1083 State copy = *state;
1084 if (ParseOneCharToken(state, 'T') && ParseNumber(state, NULL) &&
1085 ParseOneCharToken(state, '_')) {
1086 MaybeAppend(state, "?"); // We don't support template substitutions.
1087 return true;
1088 }
1089 *state = copy;
1090 return false;
1091}
1092
1093
1094// <template-template-param> ::= <template-param>
1095// ::= <substitution>
1096static bool ParseTemplateTemplateParam(State *state) {
1097 return (ParseTemplateParam(state) ||
1098 ParseSubstitution(state));
1099}
1100
1101// <template-args> ::= I <template-arg>+ E
1102static bool ParseTemplateArgs(State *state) {
1103 State copy = *state;
1104 DisableAppend(state);
1105 if (ParseOneCharToken(state, 'I') &&
1106 OneOrMore(ParseTemplateArg, state) &&
1107 ParseOneCharToken(state, 'E')) {
1108 RestoreAppend(state, copy.append);
1109 MaybeAppend(state, "<>");
1110 return true;
1111 }
1112 *state = copy;
1113 return false;
1114}
1115
1116// <template-arg> ::= <type>
1117// ::= <expr-primary>
1118// ::= I <template-arg>* E # argument pack
1119// ::= J <template-arg>* E # argument pack
1120// ::= X <expression> E
1121static bool ParseTemplateArg(State *state) {
1122 State copy = *state;
1123 if ((ParseOneCharToken(state, 'I') || ParseOneCharToken(state, 'J')) &&
1124 ZeroOrMore(ParseTemplateArg, state) &&
1125 ParseOneCharToken(state, 'E')) {
1126 return true;
1127 }
1128 *state = copy;
1129
1130 if (ParseType(state) ||
1131 ParseExprPrimary(state)) {
1132 return true;
1133 }
1134 *state = copy;
1135
1136 if (ParseOneCharToken(state, 'X') && ParseExpression(state) &&
1137 ParseOneCharToken(state, 'E')) {
1138 return true;
1139 }
1140 *state = copy;
1141 return false;
1142}
1143
1144// <expression> ::= <template-param>
1145// ::= <expr-primary>
1146// ::= <unary operator-name> <expression>
1147// ::= <binary operator-name> <expression> <expression>
1148// ::= <trinary operator-name> <expression> <expression>
1149// <expression>
1150// ::= st <type>
1151// ::= sr <type> <unqualified-name> <template-args>
1152// ::= sr <type> <unqualified-name>
1153static bool ParseExpression(State *state) {
1154 if (ParseTemplateParam(state) || ParseExprPrimary(state)) {
1155 return true;
1156 }
1157
1158 State copy = *state;
1159 if (ParseOperatorName(state) &&
1160 ParseExpression(state) &&
1161 ParseExpression(state) &&
1162 ParseExpression(state)) {
1163 return true;
1164 }
1165 *state = copy;
1166
1167 if (ParseOperatorName(state) &&
1168 ParseExpression(state) &&
1169 ParseExpression(state)) {
1170 return true;
1171 }
1172 *state = copy;
1173
1174 if (ParseOperatorName(state) &&
1175 ParseExpression(state)) {
1176 return true;
1177 }
1178 *state = copy;
1179
1180 if (ParseTwoCharToken(state, "st") && ParseType(state)) {
1181 return true;
1182 }
1183 *state = copy;
1184
1185 if (ParseTwoCharToken(state, "sr") && ParseType(state) &&
1186 ParseUnqualifiedName(state) &&
1187 ParseTemplateArgs(state)) {
1188 return true;
1189 }
1190 *state = copy;
1191
1192 if (ParseTwoCharToken(state, "sr") && ParseType(state) &&
1193 ParseUnqualifiedName(state)) {
1194 return true;
1195 }
1196 *state = copy;
1197 return false;
1198}
1199
1200// <expr-primary> ::= L <type> <(value) number> E
1201// ::= L <type> <(value) float> E
1202// ::= L <mangled-name> E
1203// // A bug in g++'s C++ ABI version 2 (-fabi-version=2).
1204// ::= LZ <encoding> E
1205static bool ParseExprPrimary(State *state) {
1206 State copy = *state;
1207 if (ParseOneCharToken(state, 'L') && ParseType(state) &&
1208 ParseNumber(state, NULL) &&
1209 ParseOneCharToken(state, 'E')) {
1210 return true;
1211 }
1212 *state = copy;
1213
1214 if (ParseOneCharToken(state, 'L') && ParseType(state) &&
1215 ParseFloatNumber(state) &&
1216 ParseOneCharToken(state, 'E')) {
1217 return true;
1218 }
1219 *state = copy;
1220
1221 if (ParseOneCharToken(state, 'L') && ParseMangledName(state) &&
1222 ParseOneCharToken(state, 'E')) {
1223 return true;
1224 }
1225 *state = copy;
1226
1227 if (ParseTwoCharToken(state, "LZ") && ParseEncoding(state) &&
1228 ParseOneCharToken(state, 'E')) {
1229 return true;
1230 }
1231 *state = copy;
1232
1233 return false;
1234}
1235
1236// <local-name> := Z <(function) encoding> E <(entity) name>
1237// [<discriminator>]
1238// := Z <(function) encoding> E s [<discriminator>]
1239static bool ParseLocalName(State *state) {
1240 State copy = *state;
1241 if (ParseOneCharToken(state, 'Z') && ParseEncoding(state) &&
1242 ParseOneCharToken(state, 'E') && MaybeAppend(state, "::") &&
1243 ParseName(state) && Optional(ParseDiscriminator(state))) {
1244 return true;
1245 }
1246 *state = copy;
1247
1248 if (ParseOneCharToken(state, 'Z') && ParseEncoding(state) &&
1249 ParseTwoCharToken(state, "Es") && Optional(ParseDiscriminator(state))) {
1250 return true;
1251 }
1252 *state = copy;
1253 return false;
1254}
1255
1256// <discriminator> := _ <(non-negative) number>
1257static bool ParseDiscriminator(State *state) {
1258 State copy = *state;
1259 if (ParseOneCharToken(state, '_') && ParseNumber(state, NULL)) {
1260 return true;
1261 }
1262 *state = copy;
1263 return false;
1264}
1265
1266// <substitution> ::= S_
1267// ::= S <seq-id> _
1268// ::= St, etc.
1269static bool ParseSubstitution(State *state) {
1270 if (ParseTwoCharToken(state, "S_")) {
1271 MaybeAppend(state, "?"); // We don't support substitutions.
1272 return true;
1273 }
1274
1275 State copy = *state;
1276 if (ParseOneCharToken(state, 'S') && ParseSeqId(state) &&
1277 ParseOneCharToken(state, '_')) {
1278 MaybeAppend(state, "?"); // We don't support substitutions.
1279 return true;
1280 }
1281 *state = copy;
1282
1283 // Expand abbreviations like "St" => "std".
1284 if (ParseOneCharToken(state, 'S')) {
1285 const AbbrevPair *p;
1286 for (p = kSubstitutionList; p->abbrev != NULL; ++p) {
1287 if (state->mangled_cur[0] == p->abbrev[1]) {
1288 MaybeAppend(state, "std");
1289 if (p->real_name[0] != '\0') {
1290 MaybeAppend(state, "::");
1291 MaybeAppend(state, p->real_name);
1292 }
1293 ++state->mangled_cur;
1294 return true;
1295 }
1296 }
1297 }
1298 *state = copy;
1299 return false;
1300}
1301
1302// Parse <mangled-name>, optionally followed by either a function-clone suffix
1303// or version suffix. Returns true only if all of "mangled_cur" was consumed.
1304static bool ParseTopLevelMangledName(State *state) {
1305 if (ParseMangledName(state)) {
1306 if (state->mangled_cur[0] != '\0') {
1307 // Drop trailing function clone suffix, if any.
1308 if (IsFunctionCloneSuffix(state->mangled_cur)) {
1309 return true;
1310 }
1311 // Append trailing version suffix if any.
1312 // ex. _Z3foo@@GLIBCXX_3.4
1313 if (state->mangled_cur[0] == '@') {
1314 MaybeAppend(state, state->mangled_cur);
1315 return true;
1316 }
1317 return false; // Unconsumed suffix.
1318 }
1319 return true;
1320 }
1321 return false;
1322}
1323#endif
1324
1325// The demangler entry point.
1326bool Demangle(const char *mangled, char *out, int out_size) {
1327#if defined(OS_WINDOWS)
1328 // When built with incremental linking, the Windows debugger
1329 // library provides a more complicated `Symbol->Name` with the
1330 // Incremental Linking Table offset, which looks like
1331 // `@ILT+1105(?func@Foo@@SAXH@Z)`. However, the demangler expects
1332 // only the mangled symbol, `?func@Foo@@SAXH@Z`. Fortunately, the
1333 // mangled symbol is guaranteed not to have parentheses,
1334 // so we search for `(` and extract up to `)`.
1335 //
1336 // Since we may be in a signal handler here, we cannot use `std::string`.
1337 char buffer[1024]; // Big enough for a sane symbol.
1338 const char *lparen = strchr(mangled, '(');
1339 if (lparen) {
1340 // Extract the string `(?...)`
1341 const char *rparen = strchr(lparen, ')');
1342 size_t length = rparen - lparen - 1;
1343 strncpy(buffer, lparen + 1, length);
1344 buffer[length] = '\0';
1345 mangled = buffer;
1346 } // Else the symbol wasn't inside a set of parentheses
1347 // We use the ANSI version to ensure the string type is always `char *`.
1348 return UnDecorateSymbolName(mangled, out, out_size, UNDNAME_COMPLETE);
1349#else
1350 State state;
1351 InitState(&state, mangled, out, out_size);
1352 return ParseTopLevelMangledName(&state) && !state.overflowed;
1353#endif
1354}
1355
1356_END_GOOGLE_NAMESPACE_