1a8c51b3fSopenharmony_ci#!/usr/bin/env python3
2a8c51b3fSopenharmony_ci
3a8c51b3fSopenharmony_ci"""
4a8c51b3fSopenharmony_cistrip_asm.py - Cleanup ASM output for the specified file
5a8c51b3fSopenharmony_ci"""
6a8c51b3fSopenharmony_ci
7a8c51b3fSopenharmony_cifrom argparse import ArgumentParser
8a8c51b3fSopenharmony_ciimport sys
9a8c51b3fSopenharmony_ciimport os
10a8c51b3fSopenharmony_ciimport re
11a8c51b3fSopenharmony_ci
12a8c51b3fSopenharmony_cidef find_used_labels(asm):
13a8c51b3fSopenharmony_ci    found = set()
14a8c51b3fSopenharmony_ci    label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)")
15a8c51b3fSopenharmony_ci    for l in asm.splitlines():
16a8c51b3fSopenharmony_ci        m = label_re.match(l)
17a8c51b3fSopenharmony_ci        if m:
18a8c51b3fSopenharmony_ci            found.add('.L%s' % m.group(1))
19a8c51b3fSopenharmony_ci    return found
20a8c51b3fSopenharmony_ci
21a8c51b3fSopenharmony_ci
22a8c51b3fSopenharmony_cidef normalize_labels(asm):
23a8c51b3fSopenharmony_ci    decls = set()
24a8c51b3fSopenharmony_ci    label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
25a8c51b3fSopenharmony_ci    for l in asm.splitlines():
26a8c51b3fSopenharmony_ci        m = label_decl.match(l)
27a8c51b3fSopenharmony_ci        if m:
28a8c51b3fSopenharmony_ci            decls.add(m.group(0))
29a8c51b3fSopenharmony_ci    if len(decls) == 0:
30a8c51b3fSopenharmony_ci        return asm
31a8c51b3fSopenharmony_ci    needs_dot = next(iter(decls))[0] != '.'
32a8c51b3fSopenharmony_ci    if not needs_dot:
33a8c51b3fSopenharmony_ci        return asm
34a8c51b3fSopenharmony_ci    for ld in decls:
35a8c51b3fSopenharmony_ci        asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm)
36a8c51b3fSopenharmony_ci    return asm
37a8c51b3fSopenharmony_ci
38a8c51b3fSopenharmony_ci
39a8c51b3fSopenharmony_cidef transform_labels(asm):
40a8c51b3fSopenharmony_ci    asm = normalize_labels(asm)
41a8c51b3fSopenharmony_ci    used_decls = find_used_labels(asm)
42a8c51b3fSopenharmony_ci    new_asm = ''
43a8c51b3fSopenharmony_ci    label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
44a8c51b3fSopenharmony_ci    for l in asm.splitlines():
45a8c51b3fSopenharmony_ci        m = label_decl.match(l)
46a8c51b3fSopenharmony_ci        if not m or m.group(0) in used_decls:
47a8c51b3fSopenharmony_ci            new_asm += l
48a8c51b3fSopenharmony_ci            new_asm += '\n'
49a8c51b3fSopenharmony_ci    return new_asm
50a8c51b3fSopenharmony_ci
51a8c51b3fSopenharmony_ci
52a8c51b3fSopenharmony_cidef is_identifier(tk):
53a8c51b3fSopenharmony_ci    if len(tk) == 0:
54a8c51b3fSopenharmony_ci        return False
55a8c51b3fSopenharmony_ci    first = tk[0]
56a8c51b3fSopenharmony_ci    if not first.isalpha() and first != '_':
57a8c51b3fSopenharmony_ci        return False
58a8c51b3fSopenharmony_ci    for i in range(1, len(tk)):
59a8c51b3fSopenharmony_ci        c = tk[i]
60a8c51b3fSopenharmony_ci        if not c.isalnum() and c != '_':
61a8c51b3fSopenharmony_ci            return False
62a8c51b3fSopenharmony_ci    return True
63a8c51b3fSopenharmony_ci
64a8c51b3fSopenharmony_cidef process_identifiers(l):
65a8c51b3fSopenharmony_ci    """
66a8c51b3fSopenharmony_ci    process_identifiers - process all identifiers and modify them to have
67a8c51b3fSopenharmony_ci    consistent names across all platforms; specifically across ELF and MachO.
68a8c51b3fSopenharmony_ci    For example, MachO inserts an additional understore at the beginning of
69a8c51b3fSopenharmony_ci    names. This function removes that.
70a8c51b3fSopenharmony_ci    """
71a8c51b3fSopenharmony_ci    parts = re.split(r'([a-zA-Z0-9_]+)', l)
72a8c51b3fSopenharmony_ci    new_line = ''
73a8c51b3fSopenharmony_ci    for tk in parts:
74a8c51b3fSopenharmony_ci        if is_identifier(tk):
75a8c51b3fSopenharmony_ci            if tk.startswith('__Z'):
76a8c51b3fSopenharmony_ci                tk = tk[1:]
77a8c51b3fSopenharmony_ci            elif tk.startswith('_') and len(tk) > 1 and \
78a8c51b3fSopenharmony_ci                    tk[1].isalpha() and tk[1] != 'Z':
79a8c51b3fSopenharmony_ci                tk = tk[1:]
80a8c51b3fSopenharmony_ci        new_line += tk
81a8c51b3fSopenharmony_ci    return new_line
82a8c51b3fSopenharmony_ci
83a8c51b3fSopenharmony_ci
84a8c51b3fSopenharmony_cidef process_asm(asm):
85a8c51b3fSopenharmony_ci    """
86a8c51b3fSopenharmony_ci    Strip the ASM of unwanted directives and lines
87a8c51b3fSopenharmony_ci    """
88a8c51b3fSopenharmony_ci    new_contents = ''
89a8c51b3fSopenharmony_ci    asm = transform_labels(asm)
90a8c51b3fSopenharmony_ci
91a8c51b3fSopenharmony_ci    # TODO: Add more things we want to remove
92a8c51b3fSopenharmony_ci    discard_regexes = [
93a8c51b3fSopenharmony_ci        re.compile("\s+\..*$"), # directive
94a8c51b3fSopenharmony_ci        re.compile("\s*#(NO_APP|APP)$"), #inline ASM
95a8c51b3fSopenharmony_ci        re.compile("\s*#.*$"), # comment line
96a8c51b3fSopenharmony_ci        re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive
97a8c51b3fSopenharmony_ci        re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"),
98a8c51b3fSopenharmony_ci    ]
99a8c51b3fSopenharmony_ci    keep_regexes = [
100a8c51b3fSopenharmony_ci
101a8c51b3fSopenharmony_ci    ]
102a8c51b3fSopenharmony_ci    fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:")
103a8c51b3fSopenharmony_ci    for l in asm.splitlines():
104a8c51b3fSopenharmony_ci        # Remove Mach-O attribute
105a8c51b3fSopenharmony_ci        l = l.replace('@GOTPCREL', '')
106a8c51b3fSopenharmony_ci        add_line = True
107a8c51b3fSopenharmony_ci        for reg in discard_regexes:
108a8c51b3fSopenharmony_ci            if reg.match(l) is not None:
109a8c51b3fSopenharmony_ci                add_line = False
110a8c51b3fSopenharmony_ci                break
111a8c51b3fSopenharmony_ci        for reg in keep_regexes:
112a8c51b3fSopenharmony_ci            if reg.match(l) is not None:
113a8c51b3fSopenharmony_ci                add_line = True
114a8c51b3fSopenharmony_ci                break
115a8c51b3fSopenharmony_ci        if add_line:
116a8c51b3fSopenharmony_ci            if fn_label_def.match(l) and len(new_contents) != 0:
117a8c51b3fSopenharmony_ci                new_contents += '\n'
118a8c51b3fSopenharmony_ci            l = process_identifiers(l)
119a8c51b3fSopenharmony_ci            new_contents += l
120a8c51b3fSopenharmony_ci            new_contents += '\n'
121a8c51b3fSopenharmony_ci    return new_contents
122a8c51b3fSopenharmony_ci
123a8c51b3fSopenharmony_cidef main():
124a8c51b3fSopenharmony_ci    parser = ArgumentParser(
125a8c51b3fSopenharmony_ci        description='generate a stripped assembly file')
126a8c51b3fSopenharmony_ci    parser.add_argument(
127a8c51b3fSopenharmony_ci        'input', metavar='input', type=str, nargs=1,
128a8c51b3fSopenharmony_ci        help='An input assembly file')
129a8c51b3fSopenharmony_ci    parser.add_argument(
130a8c51b3fSopenharmony_ci        'out', metavar='output', type=str, nargs=1,
131a8c51b3fSopenharmony_ci        help='The output file')
132a8c51b3fSopenharmony_ci    args, unknown_args = parser.parse_known_args()
133a8c51b3fSopenharmony_ci    input = args.input[0]
134a8c51b3fSopenharmony_ci    output = args.out[0]
135a8c51b3fSopenharmony_ci    if not os.path.isfile(input):
136a8c51b3fSopenharmony_ci        print(("ERROR: input file '%s' does not exist") % input)
137a8c51b3fSopenharmony_ci        sys.exit(1)
138a8c51b3fSopenharmony_ci    contents = None
139a8c51b3fSopenharmony_ci    with open(input, 'r') as f:
140a8c51b3fSopenharmony_ci        contents = f.read()
141a8c51b3fSopenharmony_ci    new_contents = process_asm(contents)
142a8c51b3fSopenharmony_ci    with open(output, 'w') as f:
143a8c51b3fSopenharmony_ci        f.write(new_contents)
144a8c51b3fSopenharmony_ci
145a8c51b3fSopenharmony_ci
146a8c51b3fSopenharmony_ciif __name__ == '__main__':
147a8c51b3fSopenharmony_ci    main()
148a8c51b3fSopenharmony_ci
149a8c51b3fSopenharmony_ci# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
150a8c51b3fSopenharmony_ci# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
151a8c51b3fSopenharmony_ci# kate: indent-mode python; remove-trailing-spaces modified;
152