1a8c51b3fSopenharmony_ci#!/usr/bin/env python3 2a8c51b3fSopenharmony_ci 3a8c51b3fSopenharmony_ci""" 4a8c51b3fSopenharmony_cistrip_asm.py - Cleanup ASM output for the specified file 5a8c51b3fSopenharmony_ci""" 6a8c51b3fSopenharmony_ci 7a8c51b3fSopenharmony_cifrom argparse import ArgumentParser 8a8c51b3fSopenharmony_ciimport sys 9a8c51b3fSopenharmony_ciimport os 10a8c51b3fSopenharmony_ciimport re 11a8c51b3fSopenharmony_ci 12a8c51b3fSopenharmony_cidef find_used_labels(asm): 13a8c51b3fSopenharmony_ci found = set() 14a8c51b3fSopenharmony_ci label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)") 15a8c51b3fSopenharmony_ci for l in asm.splitlines(): 16a8c51b3fSopenharmony_ci m = label_re.match(l) 17a8c51b3fSopenharmony_ci if m: 18a8c51b3fSopenharmony_ci found.add('.L%s' % m.group(1)) 19a8c51b3fSopenharmony_ci return found 20a8c51b3fSopenharmony_ci 21a8c51b3fSopenharmony_ci 22a8c51b3fSopenharmony_cidef normalize_labels(asm): 23a8c51b3fSopenharmony_ci decls = set() 24a8c51b3fSopenharmony_ci label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") 25a8c51b3fSopenharmony_ci for l in asm.splitlines(): 26a8c51b3fSopenharmony_ci m = label_decl.match(l) 27a8c51b3fSopenharmony_ci if m: 28a8c51b3fSopenharmony_ci decls.add(m.group(0)) 29a8c51b3fSopenharmony_ci if len(decls) == 0: 30a8c51b3fSopenharmony_ci return asm 31a8c51b3fSopenharmony_ci needs_dot = next(iter(decls))[0] != '.' 32a8c51b3fSopenharmony_ci if not needs_dot: 33a8c51b3fSopenharmony_ci return asm 34a8c51b3fSopenharmony_ci for ld in decls: 35a8c51b3fSopenharmony_ci asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm) 36a8c51b3fSopenharmony_ci return asm 37a8c51b3fSopenharmony_ci 38a8c51b3fSopenharmony_ci 39a8c51b3fSopenharmony_cidef transform_labels(asm): 40a8c51b3fSopenharmony_ci asm = normalize_labels(asm) 41a8c51b3fSopenharmony_ci used_decls = find_used_labels(asm) 42a8c51b3fSopenharmony_ci new_asm = '' 43a8c51b3fSopenharmony_ci label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") 44a8c51b3fSopenharmony_ci for l in asm.splitlines(): 45a8c51b3fSopenharmony_ci m = label_decl.match(l) 46a8c51b3fSopenharmony_ci if not m or m.group(0) in used_decls: 47a8c51b3fSopenharmony_ci new_asm += l 48a8c51b3fSopenharmony_ci new_asm += '\n' 49a8c51b3fSopenharmony_ci return new_asm 50a8c51b3fSopenharmony_ci 51a8c51b3fSopenharmony_ci 52a8c51b3fSopenharmony_cidef is_identifier(tk): 53a8c51b3fSopenharmony_ci if len(tk) == 0: 54a8c51b3fSopenharmony_ci return False 55a8c51b3fSopenharmony_ci first = tk[0] 56a8c51b3fSopenharmony_ci if not first.isalpha() and first != '_': 57a8c51b3fSopenharmony_ci return False 58a8c51b3fSopenharmony_ci for i in range(1, len(tk)): 59a8c51b3fSopenharmony_ci c = tk[i] 60a8c51b3fSopenharmony_ci if not c.isalnum() and c != '_': 61a8c51b3fSopenharmony_ci return False 62a8c51b3fSopenharmony_ci return True 63a8c51b3fSopenharmony_ci 64a8c51b3fSopenharmony_cidef process_identifiers(l): 65a8c51b3fSopenharmony_ci """ 66a8c51b3fSopenharmony_ci process_identifiers - process all identifiers and modify them to have 67a8c51b3fSopenharmony_ci consistent names across all platforms; specifically across ELF and MachO. 68a8c51b3fSopenharmony_ci For example, MachO inserts an additional understore at the beginning of 69a8c51b3fSopenharmony_ci names. This function removes that. 70a8c51b3fSopenharmony_ci """ 71a8c51b3fSopenharmony_ci parts = re.split(r'([a-zA-Z0-9_]+)', l) 72a8c51b3fSopenharmony_ci new_line = '' 73a8c51b3fSopenharmony_ci for tk in parts: 74a8c51b3fSopenharmony_ci if is_identifier(tk): 75a8c51b3fSopenharmony_ci if tk.startswith('__Z'): 76a8c51b3fSopenharmony_ci tk = tk[1:] 77a8c51b3fSopenharmony_ci elif tk.startswith('_') and len(tk) > 1 and \ 78a8c51b3fSopenharmony_ci tk[1].isalpha() and tk[1] != 'Z': 79a8c51b3fSopenharmony_ci tk = tk[1:] 80a8c51b3fSopenharmony_ci new_line += tk 81a8c51b3fSopenharmony_ci return new_line 82a8c51b3fSopenharmony_ci 83a8c51b3fSopenharmony_ci 84a8c51b3fSopenharmony_cidef process_asm(asm): 85a8c51b3fSopenharmony_ci """ 86a8c51b3fSopenharmony_ci Strip the ASM of unwanted directives and lines 87a8c51b3fSopenharmony_ci """ 88a8c51b3fSopenharmony_ci new_contents = '' 89a8c51b3fSopenharmony_ci asm = transform_labels(asm) 90a8c51b3fSopenharmony_ci 91a8c51b3fSopenharmony_ci # TODO: Add more things we want to remove 92a8c51b3fSopenharmony_ci discard_regexes = [ 93a8c51b3fSopenharmony_ci re.compile("\s+\..*$"), # directive 94a8c51b3fSopenharmony_ci re.compile("\s*#(NO_APP|APP)$"), #inline ASM 95a8c51b3fSopenharmony_ci re.compile("\s*#.*$"), # comment line 96a8c51b3fSopenharmony_ci re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive 97a8c51b3fSopenharmony_ci re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"), 98a8c51b3fSopenharmony_ci ] 99a8c51b3fSopenharmony_ci keep_regexes = [ 100a8c51b3fSopenharmony_ci 101a8c51b3fSopenharmony_ci ] 102a8c51b3fSopenharmony_ci fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:") 103a8c51b3fSopenharmony_ci for l in asm.splitlines(): 104a8c51b3fSopenharmony_ci # Remove Mach-O attribute 105a8c51b3fSopenharmony_ci l = l.replace('@GOTPCREL', '') 106a8c51b3fSopenharmony_ci add_line = True 107a8c51b3fSopenharmony_ci for reg in discard_regexes: 108a8c51b3fSopenharmony_ci if reg.match(l) is not None: 109a8c51b3fSopenharmony_ci add_line = False 110a8c51b3fSopenharmony_ci break 111a8c51b3fSopenharmony_ci for reg in keep_regexes: 112a8c51b3fSopenharmony_ci if reg.match(l) is not None: 113a8c51b3fSopenharmony_ci add_line = True 114a8c51b3fSopenharmony_ci break 115a8c51b3fSopenharmony_ci if add_line: 116a8c51b3fSopenharmony_ci if fn_label_def.match(l) and len(new_contents) != 0: 117a8c51b3fSopenharmony_ci new_contents += '\n' 118a8c51b3fSopenharmony_ci l = process_identifiers(l) 119a8c51b3fSopenharmony_ci new_contents += l 120a8c51b3fSopenharmony_ci new_contents += '\n' 121a8c51b3fSopenharmony_ci return new_contents 122a8c51b3fSopenharmony_ci 123a8c51b3fSopenharmony_cidef main(): 124a8c51b3fSopenharmony_ci parser = ArgumentParser( 125a8c51b3fSopenharmony_ci description='generate a stripped assembly file') 126a8c51b3fSopenharmony_ci parser.add_argument( 127a8c51b3fSopenharmony_ci 'input', metavar='input', type=str, nargs=1, 128a8c51b3fSopenharmony_ci help='An input assembly file') 129a8c51b3fSopenharmony_ci parser.add_argument( 130a8c51b3fSopenharmony_ci 'out', metavar='output', type=str, nargs=1, 131a8c51b3fSopenharmony_ci help='The output file') 132a8c51b3fSopenharmony_ci args, unknown_args = parser.parse_known_args() 133a8c51b3fSopenharmony_ci input = args.input[0] 134a8c51b3fSopenharmony_ci output = args.out[0] 135a8c51b3fSopenharmony_ci if not os.path.isfile(input): 136a8c51b3fSopenharmony_ci print(("ERROR: input file '%s' does not exist") % input) 137a8c51b3fSopenharmony_ci sys.exit(1) 138a8c51b3fSopenharmony_ci contents = None 139a8c51b3fSopenharmony_ci with open(input, 'r') as f: 140a8c51b3fSopenharmony_ci contents = f.read() 141a8c51b3fSopenharmony_ci new_contents = process_asm(contents) 142a8c51b3fSopenharmony_ci with open(output, 'w') as f: 143a8c51b3fSopenharmony_ci f.write(new_contents) 144a8c51b3fSopenharmony_ci 145a8c51b3fSopenharmony_ci 146a8c51b3fSopenharmony_ciif __name__ == '__main__': 147a8c51b3fSopenharmony_ci main() 148a8c51b3fSopenharmony_ci 149a8c51b3fSopenharmony_ci# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 150a8c51b3fSopenharmony_ci# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; 151a8c51b3fSopenharmony_ci# kate: indent-mode python; remove-trailing-spaces modified; 152