1b8021494Sopenharmony_ci#!/usr/bin/env python3
2b8021494Sopenharmony_ci
3b8021494Sopenharmony_ci# Copyright 2016, VIXL authors
4b8021494Sopenharmony_ci# All rights reserved.
5b8021494Sopenharmony_ci#
6b8021494Sopenharmony_ci# Redistribution and use in source and binary forms, with or without
7b8021494Sopenharmony_ci# modification, are permitted provided that the following conditions are met:
8b8021494Sopenharmony_ci#
9b8021494Sopenharmony_ci#   * Redistributions of source code must retain the above copyright notice,
10b8021494Sopenharmony_ci#     this list of conditions and the following disclaimer.
11b8021494Sopenharmony_ci#   * Redistributions in binary form must reproduce the above copyright notice,
12b8021494Sopenharmony_ci#     this list of conditions and the following disclaimer in the documentation
13b8021494Sopenharmony_ci#     and/or other materials provided with the distribution.
14b8021494Sopenharmony_ci#   * Neither the name of ARM Limited nor the names of its contributors may be
15b8021494Sopenharmony_ci#     used to endorse or promote products derived from this software without
16b8021494Sopenharmony_ci#     specific prior written permission.
17b8021494Sopenharmony_ci#
18b8021494Sopenharmony_ci# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
19b8021494Sopenharmony_ci# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20b8021494Sopenharmony_ci# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21b8021494Sopenharmony_ci# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
22b8021494Sopenharmony_ci# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23b8021494Sopenharmony_ci# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24b8021494Sopenharmony_ci# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25b8021494Sopenharmony_ci# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26b8021494Sopenharmony_ci# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27b8021494Sopenharmony_ci# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28b8021494Sopenharmony_ci
29b8021494Sopenharmony_ci"""
30b8021494Sopenharmony_ciVerify generated AArch32 assembler traces against `llvm-mc`.
31b8021494Sopenharmony_ci
32b8021494Sopenharmony_ciThis script will find all files in `test/aarch32/traces/` with names starting
33b8021494Sopenharmony_ciwill `assembler`, and check them against `llvm-mc`. It checks our assembler is
34b8021494Sopenharmony_cicorrect by looking up what instruction we meant to assemble, assemble it with
35b8021494Sopenharmony_ci`llvm` and check the result is bit identical to what our assembler generated.
36b8021494Sopenharmony_ci
37b8021494Sopenharmony_ciYou may run the script with no arguments from VIXL's top-level directory as long
38b8021494Sopenharmony_cias `llvm-mc` is in your PATH. You may provide a different `llvm-mc` path with
39b8021494Sopenharmony_cithe `--llvm-mc` option. This script relies on version 3.8 or higher of
40b8021494Sopenharmony_ciLLVM. Previous versions refuse to assemble some instructions that ARMv8 allows,
41b8021494Sopenharmony_cibut ARMv7 did not.
42b8021494Sopenharmony_ci
43b8021494Sopenharmony_ciFor example, let's say we have the following assembler trace for CLZ
44b8021494Sopenharmony_ci(the real trace is a lot bigger):
45b8021494Sopenharmony_ci
46b8021494Sopenharmony_ci~~~
47b8021494Sopenharmony_cistatic const byte kInstruction_Clz_eq_r0_r0[] = {
48b8021494Sopenharmony_ci  0x10, 0x0f, 0x6f, 0x01 // Clz eq r0 r0
49b8021494Sopenharmony_ci};
50b8021494Sopenharmony_cistatic const byte kInstruction_Clz_eq_r0_r1[] = {
51b8021494Sopenharmony_ci  0x11, 0x0f, 0x6f, 0x01 // Clz eq r0 r1
52b8021494Sopenharmony_ci};
53b8021494Sopenharmony_cistatic const byte kInstruction_Clz_eq_r0_r2[] = {
54b8021494Sopenharmony_ci  0x12, 0x0f, 0x6f, 0x01 // Clz eq r0 r2
55b8021494Sopenharmony_ci};
56b8021494Sopenharmony_cistatic const TestResult kReferenceClz[] = {
57b8021494Sopenharmony_ci  {
58b8021494Sopenharmony_ci    ARRAY_SIZE(kInstruction_Clz_eq_r0_r0),
59b8021494Sopenharmony_ci    kInstruction_Clz_eq_r0_r0,
60b8021494Sopenharmony_ci  },
61b8021494Sopenharmony_ci  {
62b8021494Sopenharmony_ci    ARRAY_SIZE(kInstruction_Clz_eq_r0_r1),
63b8021494Sopenharmony_ci    kInstruction_Clz_eq_r0_r1,
64b8021494Sopenharmony_ci  },
65b8021494Sopenharmony_ci  {
66b8021494Sopenharmony_ci    ARRAY_SIZE(kInstruction_Clz_eq_r0_r2),
67b8021494Sopenharmony_ci    kInstruction_Clz_eq_r0_r2,
68b8021494Sopenharmony_ci  },
69b8021494Sopenharmony_ci};
70b8021494Sopenharmony_ci~~~
71b8021494Sopenharmony_ci
72b8021494Sopenharmony_ciThe traces contain both the list of bytes that were encoded as well as a comment
73b8021494Sopenharmony_ciwith a description of the instruction this is. This script searches for these
74b8021494Sopenharmony_cilines and checks them.
75b8021494Sopenharmony_ci
76b8021494Sopenharmony_ciWith our example, the script will find the following:
77b8021494Sopenharmony_ci
78b8021494Sopenharmony_ci    [
79b8021494Sopenharmony_ci      ("Clz eq r0 r0", ["0x10", "0x0f", "0x6f", "0x01"]),
80b8021494Sopenharmony_ci      ("Clz eq r0 r1", ["0x11", "0x0f", "0x6f", "0x01"]),
81b8021494Sopenharmony_ci      ("Clz eq r0 r2", ["0x12", "0x0f", "0x6f", "0x01"])
82b8021494Sopenharmony_ci    ]
83b8021494Sopenharmony_ci
84b8021494Sopenharmony_ciThen the tricky part is to convert the description of the instruction into the
85b8021494Sopenharmony_cifollowing valid assembly syntax:
86b8021494Sopenharmony_ci
87b8021494Sopenharmony_ci    clzeq r0, r0
88b8021494Sopenharmony_ci    clzeq r0, r1
89b8021494Sopenharmony_ci    clzeq r0, r2
90b8021494Sopenharmony_ci
91b8021494Sopenharmony_ciOur example is easy, but it gets more complicated with load and store
92b8021494Sopenharmony_ciinstructions for example. We can feed this as input to `llvm-mc`:
93b8021494Sopenharmony_ci
94b8021494Sopenharmony_ci    $ echo "
95b8021494Sopenharmony_ci      clzeq r0, r0
96b8021494Sopenharmony_ci      clzeq r0, r1
97b8021494Sopenharmony_ci      clzeq r0, r2
98b8021494Sopenharmony_ci    " | llvm-mc -assemble -arch=arm -mattr=v8,crc -show-encoding
99b8021494Sopenharmony_ci
100b8021494Sopenharmony_ciAnd we will get the following output:
101b8021494Sopenharmony_ci
102b8021494Sopenharmony_ci            .text
103b8021494Sopenharmony_ci            clzeq   r0, r0                  @ encoding: [0x10,0x0f,0x6f,0x01]
104b8021494Sopenharmony_ci            clzeq   r0, r1                  @ encoding: [0x11,0x0f,0x6f,0x01]
105b8021494Sopenharmony_ci            clzeq   r0, r2                  @ encoding: [0x12,0x0f,0x6f,0x01]
106b8021494Sopenharmony_ci
107b8021494Sopenharmony_ciThe script will finally extract the encoding and compare it to what VIXL
108b8021494Sopenharmony_cigenerated.
109b8021494Sopenharmony_ci"""
110b8021494Sopenharmony_ci
111b8021494Sopenharmony_ciimport argparse
112b8021494Sopenharmony_ciimport subprocess
113b8021494Sopenharmony_ciimport os
114b8021494Sopenharmony_ciimport re
115b8021494Sopenharmony_ciimport itertools
116b8021494Sopenharmony_ciimport types
117b8021494Sopenharmony_ci
118b8021494Sopenharmony_cidef BuildOptions():
119b8021494Sopenharmony_ci  result = argparse.ArgumentParser(
120b8021494Sopenharmony_ci      description = 'Use `llvm-mc` to check the assembler traces are correct.',
121b8021494Sopenharmony_ci      formatter_class = argparse.ArgumentDefaultsHelpFormatter)
122b8021494Sopenharmony_ci  result.add_argument('--llvm-mc', default='llvm-mc', help='Path to llvm-mc')
123b8021494Sopenharmony_ci  result.add_argument('--verbose', '-v', action='store_true')
124b8021494Sopenharmony_ci  return result.parse_args()
125b8021494Sopenharmony_ci
126b8021494Sopenharmony_ci
127b8021494Sopenharmony_cidef CheckLLVMVersion(llvm_mc):
128b8021494Sopenharmony_ci  version = subprocess.check_output([llvm_mc, '-version'])
129b8021494Sopenharmony_ci  m = re.search("^  LLVM version (\d)\.(\d)\.\d$", version.decode(), re.M)
130b8021494Sopenharmony_ci  major, minor = m.groups()
131b8021494Sopenharmony_ci  if int(major) < 3 or (int(major) == 3 and int(minor) < 8):
132b8021494Sopenharmony_ci    raise Exception("This script requires LLVM version 3.8 or higher.")
133b8021494Sopenharmony_ci
134b8021494Sopenharmony_ci
135b8021494Sopenharmony_cidef ConvertToLLVMFormat(vixl_instruction, triple):
136b8021494Sopenharmony_ci  """
137b8021494Sopenharmony_ci  Take an string representing an instruction and convert it to assembly syntax
138b8021494Sopenharmony_ci  for LLVM. VIXL's test generation framework will print instruction
139b8021494Sopenharmony_ci  representations as a space separated list. The first element is the mnemonic
140b8021494Sopenharmony_ci  and the following elements are operands.
141b8021494Sopenharmony_ci  """
142b8021494Sopenharmony_ci
143b8021494Sopenharmony_ci  def DtUntypedToLLVM(matches):
144b8021494Sopenharmony_ci    dt = ""
145b8021494Sopenharmony_ci    if matches[1] == "untyped8":
146b8021494Sopenharmony_ci      dt = "8"
147b8021494Sopenharmony_ci    elif matches[1] == "untyped16":
148b8021494Sopenharmony_ci      dt = "16"
149b8021494Sopenharmony_ci    elif matches[1] == "untyped32":
150b8021494Sopenharmony_ci      dt = "32"
151b8021494Sopenharmony_ci    else:
152b8021494Sopenharmony_ci      raise Exception()
153b8021494Sopenharmony_ci
154b8021494Sopenharmony_ci    return "{}.{} {}, {}, {}".format(matches[0], dt, matches[2], matches[3], matches[4])
155b8021494Sopenharmony_ci
156b8021494Sopenharmony_ci  # Dictionnary of patterns. The key is an identifier used in
157b8021494Sopenharmony_ci  # `llvm_mc_instruction_converters` below. The value needs to be a capturing
158b8021494Sopenharmony_ci  # regular expression.
159b8021494Sopenharmony_ci  pattern_matchers = {
160b8021494Sopenharmony_ci      # Allow an optional underscore in case this an "and" instruction.
161b8021494Sopenharmony_ci      "mnemonic": "(\w+?)_?",
162b8021494Sopenharmony_ci      "condition":
163b8021494Sopenharmony_ci          "(al|eq|ne|cs|cc|mi|pl|vs|vc|hi|ls|ge|lt|gt|le)",
164b8021494Sopenharmony_ci      "register":
165b8021494Sopenharmony_ci          "(r0|r1|r2|r3|r4|r5|r6|r7|r8|r9|r10|r11|r12|r13|r14|r15|pc|sp|lr)",
166b8021494Sopenharmony_ci      "immediate": "(0x[0-9a-f]+|[0-9]+)",
167b8021494Sopenharmony_ci      "shift": "(lsl|lsr|asr|ror)",
168b8021494Sopenharmony_ci      "dregister": "(d[0-9]|d[12][0-9]|d3[01])",
169b8021494Sopenharmony_ci      "dt": "(s8|s16|s32|s64|u8|u16|u32|u64|f16|f32|f64|i8|i16|i32|i64|p8|p64)",
170b8021494Sopenharmony_ci      "dt_untyped": "(untyped8|untyped16|untyped32)"
171b8021494Sopenharmony_ci  }
172b8021494Sopenharmony_ci
173b8021494Sopenharmony_ci  # List of converters. Each of them represents an instruction form and what to
174b8021494Sopenharmony_ci  # convert it to. This list needs to be complete; an exception is raised if we
175b8021494Sopenharmony_ci  # couldn't find a converter for the instruction.
176b8021494Sopenharmony_ci  #
177b8021494Sopenharmony_ci  # The first part of each tuple is a pattern to match. It's simply a regular
178b8021494Sopenharmony_ci  # expression. Additionally, each identifier in curly braces is replaced by the
179b8021494Sopenharmony_ci  # corresponding pattern from `pattern_matchers`.
180b8021494Sopenharmony_ci  #
181b8021494Sopenharmony_ci  # The second part of the tuple is a string that describes what the result will
182b8021494Sopenharmony_ci  # look like. Empty curly braces are replaced by matches, in order.
183b8021494Sopenharmony_ci  llvm_mc_instruction_converters = [
184b8021494Sopenharmony_ci      ("it {condition}", "it {}"),
185b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {immediate}",
186b8021494Sopenharmony_ci       "{}{} {}, #{}"),
187b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {register} {immediate}",
188b8021494Sopenharmony_ci       "{}{} {}, {}, #{}"),
189b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {register}",
190b8021494Sopenharmony_ci       "{}{} {}, {}"),
191b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {register} {register}",
192b8021494Sopenharmony_ci       "{}{} {}, {}, {}"),
193b8021494Sopenharmony_ci      ("{mnemonic} {register} {register} {register}",
194b8021494Sopenharmony_ci       "{} {}, {}, {}"),
195b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {register} {immediate}",
196b8021494Sopenharmony_ci       "{}{} {}, {}, #{}"),
197b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {register} {register} {shift} "
198b8021494Sopenharmony_ci           "{immediate}",
199b8021494Sopenharmony_ci       "{}{} {}, {}, {}, {} #{}"),
200b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {register} {register} {shift} "
201b8021494Sopenharmony_ci           "{register}",
202b8021494Sopenharmony_ci       "{}{} {}, {}, {}, {} {}"),
203b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {register} {shift} {immediate}",
204b8021494Sopenharmony_ci       "{}{} {}, {}, {} #{}"),
205b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {register} {shift} {register}",
206b8021494Sopenharmony_ci       "{}{} {}, {}, {} {}"),
207b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {register} plus {immediate} offset",
208b8021494Sopenharmony_ci       "{}{} {}, [{}, #{}]"),
209b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {register} minus {immediate} offset",
210b8021494Sopenharmony_ci       "{}{} {}, [{}, #-{}]"),
211b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {register} plus {immediate} postindex",
212b8021494Sopenharmony_ci       "{}{} {}, [{}], #{}"),
213b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {register} minus {immediate} "
214b8021494Sopenharmony_ci           "postindex",
215b8021494Sopenharmony_ci       "{}{} {}, [{}], #-{}"),
216b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {register} plus {immediate} preindex",
217b8021494Sopenharmony_ci       "{}{} {}, [{}, #{}]!"),
218b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {register} minus {immediate} "
219b8021494Sopenharmony_ci           "preindex",
220b8021494Sopenharmony_ci       "{}{} {}, [{}, #-{}]!"),
221b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {register} plus {register} offset",
222b8021494Sopenharmony_ci       "{}{} {}, [{}, {}]"),
223b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {register} minus {register} offset",
224b8021494Sopenharmony_ci       "{}{} {}, [{}, -{}]"),
225b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {register} plus {register} postindex",
226b8021494Sopenharmony_ci       "{}{} {}, [{}], {}"),
227b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {register} minus {register} "
228b8021494Sopenharmony_ci           "postindex",
229b8021494Sopenharmony_ci       "{}{} {}, [{}], -{}"),
230b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {register} plus {register} preindex",
231b8021494Sopenharmony_ci       "{}{} {}, [{}, {}]!"),
232b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {register} minus {register} preindex",
233b8021494Sopenharmony_ci       "{}{} {}, [{}, -{}]!"),
234b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {register} plus {register} {shift} "
235b8021494Sopenharmony_ci           "{immediate} offset",
236b8021494Sopenharmony_ci       "{}{} {}, [{}, {}, {} #{}]"),
237b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {register} minus {register} {shift} "
238b8021494Sopenharmony_ci           "{immediate} offset",
239b8021494Sopenharmony_ci       "{}{} {}, [{}, -{}, {} #{}]"),
240b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {register} plus {register} {shift} "
241b8021494Sopenharmony_ci           "{immediate} postindex",
242b8021494Sopenharmony_ci       "{}{} {}, [{}], {}, {} #{}"),
243b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {register} minus {register} {shift} "
244b8021494Sopenharmony_ci           "{immediate} postindex",
245b8021494Sopenharmony_ci       "{}{} {}, [{}], -{}, {} #{}"),
246b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {register} plus {register} {shift} "
247b8021494Sopenharmony_ci           "{immediate} preindex",
248b8021494Sopenharmony_ci       "{}{} {}, [{}, {}, {} #{}]!"),
249b8021494Sopenharmony_ci      ("{mnemonic} {condition} {register} {register} minus {register} {shift} "
250b8021494Sopenharmony_ci           "{immediate} preindex",
251b8021494Sopenharmony_ci       "{}{} {}, [{}, -{}, {} #{}]!"),
252b8021494Sopenharmony_ci      ("{mnemonic} {dt} {dregister} {dregister} {dregister}",
253b8021494Sopenharmony_ci       "{}.{} {}, {}, {}"),
254b8021494Sopenharmony_ci      ("{mnemonic} {dt_untyped} {dregister} {dregister} {dregister}", DtUntypedToLLVM)
255b8021494Sopenharmony_ci  ]
256b8021494Sopenharmony_ci
257b8021494Sopenharmony_ci  # Work around issues in LLVM 3.8.
258b8021494Sopenharmony_ci  if triple == "thumbv8":
259b8021494Sopenharmony_ci    def ConvertMovRdImm(matches):
260b8021494Sopenharmony_ci      """
261b8021494Sopenharmony_ci      LLVM chooses the T3 encoding for `mov <rd>, #<immediate>` when the
262b8021494Sopenharmony_ci      immediate fits both into a modified immediate (T2 encoding) and 16
263b8021494Sopenharmony_ci      bits (T3 encoding). Adding the `.W` modifier forces the T2 encoding to
264b8021494Sopenharmony_ci      be used.
265b8021494Sopenharmony_ci      """
266b8021494Sopenharmony_ci      # The immediate is the second capture in "mov al {register} {immediate}".
267b8021494Sopenharmony_ci      imm = int(matches[1], 16)
268b8021494Sopenharmony_ci      if imm <= 0xffff:
269b8021494Sopenharmony_ci        lsb = imm & -imm
270b8021494Sopenharmony_ci        if (imm >> 8) < lsb:
271b8021494Sopenharmony_ci          return "mov.w {}, #{}".format(*matches)
272b8021494Sopenharmony_ci      # Fall back to a LLVM making the right decision.
273b8021494Sopenharmony_ci      return "mov {}, #{}".format(*matches)
274b8021494Sopenharmony_ci    llvm_mc_instruction_converters[:0] = [
275b8021494Sopenharmony_ci        # The ARM ARM specifies that if <Rn> is PC in either an ADD or SUB
276b8021494Sopenharmony_ci        # instruction with an immediate, the assembler should use the ADR
277b8021494Sopenharmony_ci        # encoding. LLVM does not know about this subtlety. We get around this
278b8021494Sopenharmony_ci        # by manually translating the instruction to their ADR form.
279b8021494Sopenharmony_ci        ("add al {register} pc {immediate}", "adr {}, #{}"),
280b8021494Sopenharmony_ci        ("sub al {register} pc {immediate}", "adr {}, #-{}"),
281b8021494Sopenharmony_ci
282b8021494Sopenharmony_ci        # LLVM is (rightfully) being helpful by swapping register operands so
283b8021494Sopenharmony_ci        # that the 16 bit encoding of the following instructions is used.
284b8021494Sopenharmony_ci        # However, VIXL does not do this. These rules specifically add the `.w`
285b8021494Sopenharmony_ci        # modifier to force LLVM to use the 32 bit encoding if the last register
286b8021494Sopenharmony_ci        # is identical to first one. But at the same time, we should still use
287b8021494Sopenharmony_ci        # the narrow encoding if all registers are the same.
288b8021494Sopenharmony_ci        ("adcs al {register} (\\1) (\\1)", "adcs.n {}, {}, {}"),
289b8021494Sopenharmony_ci        ("adcs al {register} {register} (\\1)", "adcs.w {}, {}, {}"),
290b8021494Sopenharmony_ci        ("orrs al {register} (\\1) (\\1)", "orrs.n {}, {}, {}"),
291b8021494Sopenharmony_ci        ("orrs al {register} {register} (\\1)", "orrs.w {}, {}, {}"),
292b8021494Sopenharmony_ci        ("eors al {register} (\\1) (\\1)", "eors.n {}, {}, {}"),
293b8021494Sopenharmony_ci        ("eors al {register} {register} (\\1)", "eors.w {}, {}, {}"),
294b8021494Sopenharmony_ci        ("ands al {register} (\\1) (\\1)", "ands.n {}, {}, {}"),
295b8021494Sopenharmony_ci        ("ands al {register} {register} (\\1)", "ands.w {}, {}, {}"),
296b8021494Sopenharmony_ci        # Solve the same issue as for the previous rules, however, we need to
297b8021494Sopenharmony_ci        # take into account that ADD instructions with the stack pointer have
298b8021494Sopenharmony_ci        # additional 16 bit forms.
299b8021494Sopenharmony_ci        ("add al {register} (\\1) (\\1)", "add.n {}, {}, {}"),
300b8021494Sopenharmony_ci        ("add al {register} (\\1) r13", "add.w {}, {}, sp"),
301b8021494Sopenharmony_ci        ("add al {register} r13 (\\1)", "add.n {}, sp, {}"),
302b8021494Sopenharmony_ci        ("add al {register} {register} (\\1)", "add.w {}, {}, {}"),
303b8021494Sopenharmony_ci        ("mov al {register} {immediate}", ConvertMovRdImm)
304b8021494Sopenharmony_ci    ]
305b8021494Sopenharmony_ci
306b8021494Sopenharmony_ci  # Our test generator framework uses mnemonics starting with a capital letters.
307b8021494Sopenharmony_ci  # We need everything to be lower case for LLVM.
308b8021494Sopenharmony_ci  vixl_instruction = vixl_instruction.lower()
309b8021494Sopenharmony_ci
310b8021494Sopenharmony_ci  llvm_instruction = []
311b8021494Sopenharmony_ci
312b8021494Sopenharmony_ci  # VIXL may have generated more than one instruction separated by ';'
313b8021494Sopenharmony_ci  # (an IT instruction for example).
314b8021494Sopenharmony_ci  for instruction in vixl_instruction.split(';'):
315b8021494Sopenharmony_ci    # Strip out extra white spaces.
316b8021494Sopenharmony_ci    instruction = instruction.strip()
317b8021494Sopenharmony_ci    # Try all converters in the list.
318b8021494Sopenharmony_ci    for pattern, result in llvm_mc_instruction_converters:
319b8021494Sopenharmony_ci      # Build the regular expression for this converter.
320b8021494Sopenharmony_ci      instruction_matcher = "^" + pattern.format(**pattern_matchers) + "$"
321b8021494Sopenharmony_ci      match = re.match(instruction_matcher, instruction)
322b8021494Sopenharmony_ci      if match:
323b8021494Sopenharmony_ci        # If we have a match, the object will contain a tuple of substrings.
324b8021494Sopenharmony_ci        if isinstance(result, types.FunctionType):
325b8021494Sopenharmony_ci          # `result` is a function, call it produce the instruction.
326b8021494Sopenharmony_ci          llvm_instruction.append(result(match.groups()))
327b8021494Sopenharmony_ci        else:
328b8021494Sopenharmony_ci          # `result` is a string, use it as the format string.
329b8021494Sopenharmony_ci          assert(isinstance(result, str))
330b8021494Sopenharmony_ci          llvm_instruction.append(result.format(*match.groups()))
331b8021494Sopenharmony_ci        break
332b8021494Sopenharmony_ci
333b8021494Sopenharmony_ci  if llvm_instruction:
334b8021494Sopenharmony_ci    return "\n".join(llvm_instruction)
335b8021494Sopenharmony_ci
336b8021494Sopenharmony_ci  # No converters worked so raise an exception.
337b8021494Sopenharmony_ci  raise Exception("Unsupported instruction {}.".format(instruction))
338b8021494Sopenharmony_ci
339b8021494Sopenharmony_ci
340b8021494Sopenharmony_cidef ReadTrace(trace):
341b8021494Sopenharmony_ci  """
342b8021494Sopenharmony_ci  Receive the content of an assembler trace, extract the relevant information
343b8021494Sopenharmony_ci  and return it as a list of tuples. The first part of each typle is a string
344b8021494Sopenharmony_ci  representing the instruction. The second part is a list of bytes representing
345b8021494Sopenharmony_ci  the encoding.
346b8021494Sopenharmony_ci
347b8021494Sopenharmony_ci  For example:
348b8021494Sopenharmony_ci
349b8021494Sopenharmony_ci      [
350b8021494Sopenharmony_ci        ("Clz eq r0 r0", ["0x10", "0x0f", "0x6f", "0x01"]),
351b8021494Sopenharmony_ci        ("Clz eq r0 r1", ["0x11", "0x0f", "0x6f", "0x01"]),
352b8021494Sopenharmony_ci        ("Clz eq r0 r2", ["0x12", "0x0f", "0x6f", "0x01"])
353b8021494Sopenharmony_ci      ]
354b8021494Sopenharmony_ci  """
355b8021494Sopenharmony_ci
356b8021494Sopenharmony_ci  pattern = re.compile(
357b8021494Sopenharmony_ci      "^  (?P<encoding>(:?0x[0-9a-f]{2}, )+0x[0-9a-f]{2}) // (?P<instruction>.*)$",
358b8021494Sopenharmony_ci      re.M)
359b8021494Sopenharmony_ci  return [
360b8021494Sopenharmony_ci      (m.group('instruction'), m.group('encoding').replace(" ", "").split(","))
361b8021494Sopenharmony_ci      for m in re.finditer(pattern, trace)
362b8021494Sopenharmony_ci  ]
363b8021494Sopenharmony_ci
364b8021494Sopenharmony_ci
365b8021494Sopenharmony_cidef VerifyInstructionsWithLLVMMC(llvm_mc, f, triple):
366b8021494Sopenharmony_ci  """
367b8021494Sopenharmony_ci  Extract all instructions from `f`, feed them to `llvm-mc` and make sure it's
368b8021494Sopenharmony_ci  encoded them the same way as VIXL. `triple` allows us to specify either
369b8021494Sopenharmony_ci  "thumbv8" or "armv8".
370b8021494Sopenharmony_ci  """
371b8021494Sopenharmony_ci
372b8021494Sopenharmony_ci  vixl_reference = ReadTrace(f.read())
373b8021494Sopenharmony_ci  vixl_instructions, vixl_encodings = zip(*vixl_reference)
374b8021494Sopenharmony_ci  instructions = [
375b8021494Sopenharmony_ci      ConvertToLLVMFormat(instruction, triple)
376b8021494Sopenharmony_ci      for instruction in vixl_instructions
377b8021494Sopenharmony_ci  ]
378b8021494Sopenharmony_ci  llvm_mc_proc = subprocess.Popen(
379b8021494Sopenharmony_ci      [llvm_mc, '-assemble', '-triple={}'.format(triple), '-mattr=v8,crc',
380b8021494Sopenharmony_ci       # LLVM fails to recognize some instructions as valid T32 when we do not
381b8021494Sopenharmony_ci       # set `-mcpu`.
382b8021494Sopenharmony_ci       '-mcpu=cortex-a53', '-show-encoding'],
383b8021494Sopenharmony_ci      stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
384b8021494Sopenharmony_ci  out, err = llvm_mc_proc.communicate("\n".join(instructions).encode())
385b8021494Sopenharmony_ci  # If `llvm-mc` printed something to stderr then stop.
386b8021494Sopenharmony_ci  if err:
387b8021494Sopenharmony_ci    print(err.decode())
388b8021494Sopenharmony_ci    return
389b8021494Sopenharmony_ci
390b8021494Sopenharmony_ci  # Extract list of bytes from `llvm-mc` output. It's in the following form:
391b8021494Sopenharmony_ci  #
392b8021494Sopenharmony_ci  #         clzeq   r0, r0                  @ encoding: [0x10,0x0f,0x6f,0x01]
393b8021494Sopenharmony_ci  #                                                      ^^^^ ^^^^ ^^^^ ^^^^
394b8021494Sopenharmony_ci  llvm_encodings = [
395b8021494Sopenharmony_ci      match_object.group('encoding').replace(" ", "").split(",")
396b8021494Sopenharmony_ci      for match_object in re.finditer(".*@ encoding: \[(?P<encoding>.*)\]",
397b8021494Sopenharmony_ci                                      out.decode())
398b8021494Sopenharmony_ci  ]
399b8021494Sopenharmony_ci
400b8021494Sopenharmony_ci  # If LLVM has generated exactly twice as much instructions, we assume this is
401b8021494Sopenharmony_ci  # due to IT instructions preceding every instruction under test. VIXL's
402b8021494Sopenharmony_ci  # assembly reference files will contain a single array of 4 bytes encoding
403b8021494Sopenharmony_ci  # both the IT and the following instruction. While LLVM will have decoded them
404b8021494Sopenharmony_ci  # into two separate 2 bytes arrays.
405b8021494Sopenharmony_ci  if len(llvm_encodings) == 2 * len(vixl_encodings):
406b8021494Sopenharmony_ci    llvm_encodings = [
407b8021494Sopenharmony_ci        llvm_encodings[i * 2] + llvm_encodings[(i * 2) + 1]
408b8021494Sopenharmony_ci        for i in range(0, len(vixl_encodings))
409b8021494Sopenharmony_ci    ]
410b8021494Sopenharmony_ci
411b8021494Sopenharmony_ci  # Check the encodings from LLVM are identical to VIXL's.
412b8021494Sopenharmony_ci  if len(llvm_encodings) != len(vixl_encodings):
413b8021494Sopenharmony_ci    print("""Error: llvm-mc generated {} instructions than there are in the
414b8021494Sopenharmony_cigenerated trace.
415b8021494Sopenharmony_ci        """.format("fewer" if len(llvm_encodings) < len(vixl_encodings) else "more"))
416b8021494Sopenharmony_ci  else:
417b8021494Sopenharmony_ci    for i in range(0, len(vixl_encodings)):
418b8021494Sopenharmony_ci      if llvm_encodings[i] != vixl_encodings[i]:
419b8021494Sopenharmony_ci        print("""Error: llvm-mc disagrees on the encoding of \"{instruction}\":
420b8021494Sopenharmony_ci  LLVM-MC: {llvm}
421b8021494Sopenharmony_ci  VIXL:    {vixl}
422b8021494Sopenharmony_ci            """.format(instruction=vixl_instructions[i].replace("\n", "; "),
423b8021494Sopenharmony_ci                       llvm=llvm_encodings[i],
424b8021494Sopenharmony_ci                       vixl=vixl_encodings[i]))
425b8021494Sopenharmony_ci
426b8021494Sopenharmony_ci
427b8021494Sopenharmony_ciif __name__ == "__main__":
428b8021494Sopenharmony_ci  args = BuildOptions()
429b8021494Sopenharmony_ci
430b8021494Sopenharmony_ci  CheckLLVMVersion(args.llvm_mc)
431b8021494Sopenharmony_ci
432b8021494Sopenharmony_ci  trace_dir = 'test/aarch32/traces/'
433b8021494Sopenharmony_ci  trace_files = [
434b8021494Sopenharmony_ci      trace_file
435b8021494Sopenharmony_ci      for trace_file in os.listdir(trace_dir)
436b8021494Sopenharmony_ci      if trace_file.startswith("assembler-")
437b8021494Sopenharmony_ci  ]
438b8021494Sopenharmony_ci  trace_files.sort()
439b8021494Sopenharmony_ci  for trace_file in trace_files:
440b8021494Sopenharmony_ci    if args.verbose:
441b8021494Sopenharmony_ci      print("Verifying \"" + trace_file + "\".")
442b8021494Sopenharmony_ci    with open(os.path.join(trace_dir, trace_file), "r") as f:
443b8021494Sopenharmony_ci      if "t32" in trace_file:
444b8021494Sopenharmony_ci        VerifyInstructionsWithLLVMMC(args.llvm_mc, f, "thumbv8")
445b8021494Sopenharmony_ci      elif "a32" in trace_file:
446b8021494Sopenharmony_ci        VerifyInstructionsWithLLVMMC(args.llvm_mc, f, "armv8")
447b8021494Sopenharmony_ci      else:
448b8021494Sopenharmony_ci        raise Exception("Failed to recognize the ISA in \"" + trace_file + "\".")
449