Austin Schuh | cbc1740 | 2019-01-21 21:00:30 -0800 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | |
| 3 | """ |
| 4 | strip_asm.py - Cleanup ASM output for the specified file |
| 5 | """ |
| 6 | |
| 7 | from argparse import ArgumentParser |
| 8 | import sys |
| 9 | import os |
| 10 | import re |
| 11 | |
| 12 | def find_used_labels(asm): |
| 13 | found = set() |
| 14 | label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)") |
| 15 | for l in asm.splitlines(): |
| 16 | m = label_re.match(l) |
| 17 | if m: |
| 18 | found.add('.L%s' % m.group(1)) |
| 19 | return found |
| 20 | |
| 21 | |
| 22 | def normalize_labels(asm): |
| 23 | decls = set() |
| 24 | label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") |
| 25 | for l in asm.splitlines(): |
| 26 | m = label_decl.match(l) |
| 27 | if m: |
| 28 | decls.add(m.group(0)) |
| 29 | if len(decls) == 0: |
| 30 | return asm |
| 31 | needs_dot = next(iter(decls))[0] != '.' |
| 32 | if not needs_dot: |
| 33 | return asm |
| 34 | for ld in decls: |
| 35 | asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm) |
| 36 | return asm |
| 37 | |
| 38 | |
| 39 | def transform_labels(asm): |
| 40 | asm = normalize_labels(asm) |
| 41 | used_decls = find_used_labels(asm) |
| 42 | new_asm = '' |
| 43 | label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") |
| 44 | for l in asm.splitlines(): |
| 45 | m = label_decl.match(l) |
| 46 | if not m or m.group(0) in used_decls: |
| 47 | new_asm += l |
| 48 | new_asm += '\n' |
| 49 | return new_asm |
| 50 | |
| 51 | |
| 52 | def is_identifier(tk): |
| 53 | if len(tk) == 0: |
| 54 | return False |
| 55 | first = tk[0] |
| 56 | if not first.isalpha() and first != '_': |
| 57 | return False |
| 58 | for i in range(1, len(tk)): |
| 59 | c = tk[i] |
| 60 | if not c.isalnum() and c != '_': |
| 61 | return False |
| 62 | return True |
| 63 | |
| 64 | def process_identifiers(l): |
| 65 | """ |
| 66 | process_identifiers - process all identifiers and modify them to have |
| 67 | consistent names across all platforms; specifically across ELF and MachO. |
| 68 | For example, MachO inserts an additional understore at the beginning of |
| 69 | names. This function removes that. |
| 70 | """ |
| 71 | parts = re.split(r'([a-zA-Z0-9_]+)', l) |
| 72 | new_line = '' |
| 73 | for tk in parts: |
| 74 | if is_identifier(tk): |
| 75 | if tk.startswith('__Z'): |
| 76 | tk = tk[1:] |
| 77 | elif tk.startswith('_') and len(tk) > 1 and \ |
| 78 | tk[1].isalpha() and tk[1] != 'Z': |
| 79 | tk = tk[1:] |
| 80 | new_line += tk |
| 81 | return new_line |
| 82 | |
| 83 | |
| 84 | def process_asm(asm): |
| 85 | """ |
| 86 | Strip the ASM of unwanted directives and lines |
| 87 | """ |
| 88 | new_contents = '' |
| 89 | asm = transform_labels(asm) |
| 90 | |
| 91 | # TODO: Add more things we want to remove |
| 92 | discard_regexes = [ |
| 93 | re.compile("\s+\..*$"), # directive |
| 94 | re.compile("\s*#(NO_APP|APP)$"), #inline ASM |
| 95 | re.compile("\s*#.*$"), # comment line |
| 96 | re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive |
| 97 | re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"), |
| 98 | ] |
| 99 | keep_regexes = [ |
| 100 | |
| 101 | ] |
| 102 | fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:") |
| 103 | for l in asm.splitlines(): |
| 104 | # Remove Mach-O attribute |
| 105 | l = l.replace('@GOTPCREL', '') |
| 106 | add_line = True |
| 107 | for reg in discard_regexes: |
| 108 | if reg.match(l) is not None: |
| 109 | add_line = False |
| 110 | break |
| 111 | for reg in keep_regexes: |
| 112 | if reg.match(l) is not None: |
| 113 | add_line = True |
| 114 | break |
| 115 | if add_line: |
| 116 | if fn_label_def.match(l) and len(new_contents) != 0: |
| 117 | new_contents += '\n' |
| 118 | l = process_identifiers(l) |
| 119 | new_contents += l |
| 120 | new_contents += '\n' |
| 121 | return new_contents |
| 122 | |
| 123 | def main(): |
| 124 | parser = ArgumentParser( |
| 125 | description='generate a stripped assembly file') |
| 126 | parser.add_argument( |
| 127 | 'input', metavar='input', type=str, nargs=1, |
| 128 | help='An input assembly file') |
| 129 | parser.add_argument( |
| 130 | 'out', metavar='output', type=str, nargs=1, |
| 131 | help='The output file') |
| 132 | args, unknown_args = parser.parse_known_args() |
| 133 | input = args.input[0] |
| 134 | output = args.out[0] |
| 135 | if not os.path.isfile(input): |
| 136 | print(("ERROR: input file '%s' does not exist") % input) |
| 137 | sys.exit(1) |
| 138 | contents = None |
| 139 | with open(input, 'r') as f: |
| 140 | contents = f.read() |
| 141 | new_contents = process_asm(contents) |
| 142 | with open(output, 'w') as f: |
| 143 | f.write(new_contents) |
| 144 | |
| 145 | |
| 146 | if __name__ == '__main__': |
| 147 | main() |
| 148 | |
| 149 | # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 |
| 150 | # kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; |
| 151 | # kate: indent-mode python; remove-trailing-spaces modified; |