1// Copyright 2021, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7//   * Redistributions of source code must retain the above copyright notice,
8//     this list of conditions and the following disclaimer.
9//   * Redistributions in binary form must reproduce the above copyright notice,
10//     this list of conditions and the following disclaimer in the documentation
11//     and/or other materials provided with the distribution.
12//   * Neither the name of ARM Limited nor the names of its contributors may be
13//     used to endorse or promote products derived from this software without
14//     specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27#include "test-runner.h"
28#include "test-utils.h"
29
30#include "aarch64/cpu-aarch64.h"
31#include "aarch64/disasm-aarch64.h"
32#include "aarch64/macro-assembler-aarch64.h"
33#include "aarch64/simulator-aarch64.h"
34#include "aarch64/test-utils-aarch64.h"
35#include "test-assembler-aarch64.h"
36
37#define TEST_SVE(name) TEST_SVE_INNER("SIM", name)
38
39namespace vixl {
40namespace aarch64 {
41
42TEST_SVE(sve_matmul) {
43  SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
44                          CPUFeatures::kSVEI8MM,
45                          CPUFeatures::kNEON,
46                          CPUFeatures::kCRC32);
47  START();
48
49  SetInitialMachineState(&masm);
50  // state = 0xe2bd2480
51
52  {
53    ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
54    __ dci(0x45179979);  // smmla z25.s, z11.b, z23.b
55    // vl128 state = 0xf1ca8a4d
56    __ dci(0x45179b51);  // smmla z17.s, z26.b, z23.b
57    // vl128 state = 0x4458ad10
58    __ dci(0x45d79b53);  // ummla z19.s, z26.b, z23.b
59    // vl128 state = 0x43d4d064
60    __ dci(0x45d69b17);  // ummla z23.s, z24.b, z22.b
61    // vl128 state = 0x601e77c8
62    __ dci(0x45c69b33);  // ummla z19.s, z25.b, z6.b
63    // vl128 state = 0x561b4e22
64    __ dci(0x45c49b1b);  // ummla z27.s, z24.b, z4.b
65    // vl128 state = 0x89b65d78
66    __ dci(0x45dc9b1a);  // ummla z26.s, z24.b, z28.b
67    // vl128 state = 0x85c9e62d
68    __ dci(0x45d99b1b);  // ummla z27.s, z24.b, z25.b
69    // vl128 state = 0x3fc74134
70    __ dci(0x45d99b19);  // ummla z25.s, z24.b, z25.b
71    // vl128 state = 0xa2fa347b
72    __ dci(0x45d99b1b);  // ummla z27.s, z24.b, z25.b
73    // vl128 state = 0xb9854782
74    __ dci(0x45899b1a);  // usmmla z26.s, z24.b, z9.b
75    // vl128 state = 0x7fd376d8
76    __ dci(0x45099b8a);  // smmla z10.s, z28.b, z9.b
77    // vl128 state = 0xb41d8433
78    __ dci(0x45019bcb);  // smmla z11.s, z30.b, z1.b
79    // vl128 state = 0xc9c0e80d
80    __ dci(0x45019bdb);  // smmla z27.s, z30.b, z1.b
81    // vl128 state = 0xf1130e02
82    __ dci(0x45019b6b);  // smmla z11.s, z27.b, z1.b
83    // vl128 state = 0x282d3dc7
84    __ dci(0x45019b6f);  // smmla z15.s, z27.b, z1.b
85    // vl128 state = 0x34570238
86    __ dci(0x45859b6b);  // usmmla z11.s, z27.b, z5.b
87    // vl128 state = 0xc451206a
88    __ dci(0x45919b6a);  // usmmla z10.s, z27.b, z17.b
89    // vl128 state = 0xa58e2ea8
90    __ dci(0x45909a62);  // usmmla z2.s, z19.b, z16.b
91    // vl128 state = 0x7b5f948d
92    __ dci(0x45809a52);  // usmmla z18.s, z18.b, z0.b
93    // vl128 state = 0xf746260d
94    __ dci(0x45889b53);  // usmmla z19.s, z26.b, z8.b
95    // vl128 state = 0xc31cc539
96    __ dci(0x45809a57);  // usmmla z23.s, z18.b, z0.b
97    // vl128 state = 0x736bb3ee
98    __ dci(0x45809a96);  // usmmla z22.s, z20.b, z0.b
99    // vl128 state = 0xbb05fef6
100    __ dci(0x45809a92);  // usmmla z18.s, z20.b, z0.b
101    // vl128 state = 0xbc594372
102    __ dci(0x45809a82);  // usmmla z2.s, z20.b, z0.b
103    // vl128 state = 0x87c5a584
104    __ dci(0x45829ad2);  // usmmla z18.s, z22.b, z2.b
105    // vl128 state = 0xa413f733
106    __ dci(0x45889ad6);  // usmmla z22.s, z22.b, z8.b
107    // vl128 state = 0x87ec445d
108    __ dci(0x45c898d2);  // ummla z18.s, z6.b, z8.b
109    // vl128 state = 0x3ca8a6e5
110    __ dci(0x450898d0);  // smmla z16.s, z6.b, z8.b
111    // vl128 state = 0x4300d87b
112    __ dci(0x45189ad8);  // smmla z24.s, z22.b, z24.b
113    // vl128 state = 0x38be2e8a
114    __ dci(0x451c9bd9);  // smmla z25.s, z30.b, z28.b
115    // vl128 state = 0x8a3e6103
116    __ dci(0x45989bc9);  // usmmla z9.s, z30.b, z24.b
117    // vl128 state = 0xc728e586
118    __ dci(0x451c9bd9);  // smmla z25.s, z30.b, z28.b
119    // vl128 state = 0x4cb44c0e
120    __ dci(0x459c99d1);  // usmmla z17.s, z14.b, z28.b
121    // vl128 state = 0x84ebcb36
122    __ dci(0x459c99d5);  // usmmla z21.s, z14.b, z28.b
123    // vl128 state = 0x8813d2e2
124    __ dci(0x451c999d);  // smmla z29.s, z12.b, z28.b
125    // vl128 state = 0x8f26ee51
126    __ dci(0x451c999f);  // smmla z31.s, z12.b, z28.b
127    // vl128 state = 0x5d626fd0
128    __ dci(0x459e998f);  // usmmla z15.s, z12.b, z30.b
129    // vl128 state = 0x6b64cc8f
130    __ dci(0x459f991f);  // usmmla z31.s, z8.b, z31.b
131    // vl128 state = 0x41648186
132    __ dci(0x4587991e);  // usmmla z30.s, z8.b, z7.b
133    // vl128 state = 0x701525ec
134    __ dci(0x45079816);  // smmla z22.s, z0.b, z7.b
135    // vl128 state = 0x61a2d024
136    __ dci(0x450f9897);  // smmla z23.s, z4.b, z15.b
137    // vl128 state = 0x82ba6bd5
138    __ dci(0x450b98d3);  // smmla z19.s, z6.b, z11.b
139    // vl128 state = 0xa842bbde
140    __ dci(0x450b98db);  // smmla z27.s, z6.b, z11.b
141    // vl128 state = 0x9977677a
142    __ dci(0x451f98d3);  // smmla z19.s, z6.b, z31.b
143    // vl128 state = 0xe6d6c2ef
144    __ dci(0x451b9adb);  // smmla z27.s, z22.b, z27.b
145    // vl128 state = 0xa535453f
146    __ dci(0x450b98d9);  // smmla z25.s, z6.b, z11.b
147    // vl128 state = 0xeda3f381
148    __ dci(0x458b9adb);  // usmmla z27.s, z22.b, z11.b
149    // vl128 state = 0xd72dbdef
150    __ dci(0x45cb98da);  // ummla z26.s, z6.b, z11.b
151    // vl128 state = 0xfae4975b
152    __ dci(0x45c999d2);  // ummla z18.s, z14.b, z9.b
153    // vl128 state = 0x0aa6e1f6
154  }
155
156  uint32_t state;
157  ComputeMachineStateHash(&masm, &state);
158  __ Mov(x0, reinterpret_cast<uint64_t>(&state));
159  __ Ldr(w0, MemOperand(x0));
160
161  END();
162  if (CAN_RUN()) {
163    RUN();
164    uint32_t expected_hashes[] = {
165        0x0aa6e1f6,
166        0xba2d4547,
167        0x0e72a647,
168        0x15b8fc1b,
169        0x92eddc98,
170        0xe0c72bcf,
171        0x36b4e3ba,
172        0x1041114e,
173        0x4d44ebd4,
174        0xfe0e3cbf,
175        0x81c43455,
176        0x678617c5,
177        0xf72fac1f,
178        0xabdcd4e4,
179        0x108864bd,
180        0x035f6eca,
181    };
182    ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
183  }
184}
185
186TEST_SVE(sve_fmatmul_s) {
187  SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
188                          CPUFeatures::kSVEF32MM,
189                          CPUFeatures::kNEON,
190                          CPUFeatures::kCRC32);
191  START();
192
193  SetInitialMachineState(&masm);
194  // state = 0xe2bd2480
195
196  {
197    ExactAssemblyScope scope(&masm, 20 * kInstructionSize);
198    __ dci(0x64a1e6ee);  // fmmla z14.s, z23.s, z1.s
199    // vl128 state = 0x9db41bef
200    __ dci(0x64b1e7fe);  // fmmla z30.s, z31.s, z17.s
201    // vl128 state = 0xc1535e55
202    __ dci(0x64b9e7d6);  // fmmla z22.s, z30.s, z25.s
203    // vl128 state = 0xc65aad35
204    __ dci(0x64bde6c6);  // fmmla z6.s, z22.s, z29.s
205    // vl128 state = 0x68387c22
206    __ dci(0x64b9e4c2);  // fmmla z2.s, z6.s, z25.s
207    // vl128 state = 0xcf08b3a4
208    __ dci(0x64b9e543);  // fmmla z3.s, z10.s, z25.s
209    // vl128 state = 0x969bbe77
210    __ dci(0x64b9e553);  // fmmla z19.s, z10.s, z25.s
211    // vl128 state = 0xc3f514e1
212    __ dci(0x64b9e557);  // fmmla z23.s, z10.s, z25.s
213    // vl128 state = 0x4b351c29
214    __ dci(0x64b9e773);  // fmmla z19.s, z27.s, z25.s
215    // vl128 state = 0x5e026315
216    __ dci(0x64bbe757);  // fmmla z23.s, z26.s, z27.s
217    // vl128 state = 0x61684fe6
218    __ dci(0x64bbe755);  // fmmla z21.s, z26.s, z27.s
219    // vl128 state = 0x719b4ce0
220    __ dci(0x64bfe554);  // fmmla z20.s, z10.s, z31.s
221    // vl128 state = 0xdf3d2a1c
222    __ dci(0x64bfe550);  // fmmla z16.s, z10.s, z31.s
223    // vl128 state = 0x3279aab8
224    __ dci(0x64bfe714);  // fmmla z20.s, z24.s, z31.s
225    // vl128 state = 0x0b985869
226    __ dci(0x64b7e756);  // fmmla z22.s, z26.s, z23.s
227    // vl128 state = 0x14230587
228    __ dci(0x64b7e737);  // fmmla z23.s, z25.s, z23.s
229    // vl128 state = 0x2cb88e7f
230    __ dci(0x64bfe767);  // fmmla z7.s, z27.s, z31.s
231    // vl128 state = 0xb5ec0c65
232    __ dci(0x64bfe777);  // fmmla z23.s, z27.s, z31.s
233    // vl128 state = 0xb5e5eab0
234    __ dci(0x64bfe715);  // fmmla z21.s, z24.s, z31.s
235    // vl128 state = 0xd0491fb5
236    __ dci(0x64b7e797);  // fmmla z23.s, z28.s, z23.s
237    // vl128 state = 0x98a55a30
238  }
239
240  uint32_t state;
241  ComputeMachineStateHash(&masm, &state);
242  __ Mov(x0, reinterpret_cast<uint64_t>(&state));
243  __ Ldr(w0, MemOperand(x0));
244
245  END();
246  if (CAN_RUN()) {
247    RUN();
248    uint32_t expected_hashes[] = {
249        0x98a55a30,
250        0x590b7715,
251        0x4562ccf3,
252        0x1f8653a6,
253        0x5fe174d5,
254        0xb300dcb8,
255        0x3cefa79e,
256        0xa22484c7,
257        0x380697ec,
258        0xde9e699b,
259        0x99d21870,
260        0x456cb46b,
261        0x207d2615,
262        0xecaf9678,
263        0x0949e2d2,
264        0xa764c43f,
265    };
266    ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
267  }
268}
269
270}  // namespace aarch64
271}  // namespace vixl
272