1 // Copyright 2019, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27
28 #include <cstdio>
29 #include <cstring>
30 #include <string>
31
32 #include "test-runner.h"
33
34 #include "aarch64/disasm-aarch64.h"
35 #include "aarch64/macro-assembler-aarch64.h"
36 #include "aarch64/test-utils-aarch64.h"
37 #include "test-disasm-aarch64.h"
38 #include "test-utils-aarch64.h"
39
40 namespace vixl {
41 namespace aarch64 {
42
TEST(sve)43 TEST(sve) {
44 SETUP();
45
46 // TODO: Replace these tests when the disassembler is more capable.
47 COMPARE(asrr(z0.VnB(), p7.Merging(), z0.VnB(), z1.VnB()),
48 "asrr z0.b, p7/m, z0.b, z1.b");
49 COMPARE(fcmeq(p6.VnD(), p7.Zeroing(), z0.VnD(), z1.VnD()),
50 "fcmeq p6.d, p7/z, z0.d, z1.d");
51 COMPARE(mla(z0.VnB(), p7.Merging(), z0.VnB(), z1.VnB()),
52 "mla z0.b, p7/m, z0.b, z1.b");
53 COMPARE(mla(z1.VnS(), p7.Merging(), z1.VnS(), z0.VnS()),
54 "mla z1.s, p7/m, z1.s, z0.s");
55 COMPARE(asr(z1.VnB(), z0.VnB(), 8), "asr z1.b, z0.b, #8");
56 COMPARE(lsl(z0.VnH(), z1.VnH(), 15), "lsl z0.h, z1.h, #15");
57 COMPARE(lsr(z1.VnS(), z0.VnS(), 32), "lsr z1.s, z0.s, #32");
58 COMPARE(and_(p6.VnB(), p7.Zeroing(), p6.VnB(), p7.VnB()),
59 "and p6.b, p7/z, p6.b, p7.b");
60 COMPARE(rev(p7.VnB(), p6.VnB()), "rev p7.b, p6.b");
61 COMPARE(splice(z0.VnH(), p7, z0.VnH(), z1.VnH()),
62 "splice z0.h, p7, z0.h, z1.h");
63 COMPARE(fnmad(z0.VnD(), p6.Merging(), z1.VnD(), z0.VnD()),
64 "fnmad z0.d, p6/m, z1.d, z0.d");
65
66 CLEANUP();
67 }
68
TEST(sve_unallocated_fp_byte_type)69 TEST(sve_unallocated_fp_byte_type) {
70 // Ensure disassembly of FP instructions does not report byte-sized lanes.
71
72 SETUP();
73
74 COMPARE_PREFIX(dci(0x650003ca), "unallocated");
75 COMPARE_PREFIX(dci(0x6500230b), "unallocated");
76 COMPARE_PREFIX(dci(0x6500424c), "unallocated");
77 COMPARE_PREFIX(dci(0x6500618d), "unallocated");
78 COMPARE_PREFIX(dci(0x6500a00f), "unallocated");
79 COMPARE_PREFIX(dci(0x6500de91), "unallocated");
80 COMPARE_PREFIX(dci(0x6500fdd2), "unallocated");
81 COMPARE_PREFIX(dci(0x65011d13), "unallocated");
82 COMPARE_PREFIX(dci(0x65015b95), "unallocated");
83 COMPARE_PREFIX(dci(0x65017ad6), "unallocated");
84 COMPARE_PREFIX(dci(0x65019a17), "unallocated");
85 COMPARE_PREFIX(dci(0x6501b958), "unallocated");
86 COMPARE_PREFIX(dci(0x6502941f), "unallocated");
87 COMPARE_PREFIX(dci(0x6502b360), "unallocated");
88 COMPARE_PREFIX(dci(0x6502d2a1), "unallocated");
89 COMPARE_PREFIX(dci(0x65038e27), "unallocated");
90 COMPARE_PREFIX(dci(0x6503ad68), "unallocated");
91 COMPARE_PREFIX(dci(0x65042a6c), "unallocated");
92 COMPARE_PREFIX(dci(0x6504882f), "unallocated");
93 COMPARE_PREFIX(dci(0x6504a770), "unallocated");
94 COMPARE_PREFIX(dci(0x65052474), "unallocated");
95 COMPARE_PREFIX(dci(0x65058237), "unallocated");
96 COMPARE_PREFIX(dci(0x65063dbd), "unallocated");
97 COMPARE_PREFIX(dci(0x65069b80), "unallocated");
98 COMPARE_PREFIX(dci(0x6506bac1), "unallocated");
99 COMPARE_PREFIX(dci(0x65071884), "unallocated");
100 COMPARE_PREFIX(dci(0x650737c5), "unallocated");
101 COMPARE_PREFIX(dci(0x65079588), "unallocated");
102 COMPARE_PREFIX(dci(0x6507b4c9), "unallocated");
103 COMPARE_PREFIX(dci(0x65088f90), "unallocated");
104 COMPARE_PREFIX(dci(0x65090c94), "unallocated");
105 COMPARE_PREFIX(dci(0x65098998), "unallocated");
106 COMPARE_PREFIX(dci(0x650a83a0), "unallocated");
107 COMPARE_PREFIX(dci(0x650c96f1), "unallocated");
108 COMPARE_PREFIX(dci(0x650d90f9), "unallocated");
109 COMPARE_PREFIX(dci(0x65113a97), "unallocated");
110 COMPARE_PREFIX(dci(0x65183010), "unallocated");
111 COMPARE_PREFIX(dci(0x65200050), "unallocated");
112 COMPARE_PREFIX(dci(0x65203ed2), "unallocated");
113 COMPARE_PREFIX(dci(0x65205e13), "unallocated");
114 COMPARE_PREFIX(dci(0x65207d54), "unallocated");
115 COMPARE_PREFIX(dci(0x65209c95), "unallocated");
116 COMPARE_PREFIX(dci(0x6520bbd6), "unallocated");
117 COMPARE_PREFIX(dci(0x6520db17), "unallocated");
118 COMPARE_PREFIX(dci(0x6520fa58), "unallocated");
119 COMPARE_PREFIX(dci(0x650f31e1), "unallocated");
120 COMPARE_PREFIX(dci(0x650e30f7), "unallocated");
121 COMPARE_PREFIX(dci(0x6511376e), "unallocated");
122
123 CLEANUP();
124 }
125
TEST(sve_address_generation)126 TEST(sve_address_generation) {
127 SETUP();
128
129 COMPARE(adr(z19.VnD(), SVEMemOperand(z22.VnD(), z11.VnD(), SXTW)),
130 "adr z19.d, [z22.d, z11.d, sxtw]");
131 COMPARE(adr(z19.VnD(), SVEMemOperand(z22.VnD(), z11.VnD(), SXTW, 1)),
132 "adr z19.d, [z22.d, z11.d, sxtw #1]");
133 COMPARE(adr(z19.VnD(), SVEMemOperand(z22.VnD(), z11.VnD(), SXTW, 2)),
134 "adr z19.d, [z22.d, z11.d, sxtw #2]");
135 COMPARE(adr(z19.VnD(), SVEMemOperand(z22.VnD(), z11.VnD(), SXTW, 3)),
136 "adr z19.d, [z22.d, z11.d, sxtw #3]");
137 COMPARE(adr(z30.VnD(), SVEMemOperand(z14.VnD(), z16.VnD(), UXTW)),
138 "adr z30.d, [z14.d, z16.d, uxtw]");
139 COMPARE(adr(z30.VnD(), SVEMemOperand(z14.VnD(), z16.VnD(), UXTW, 1)),
140 "adr z30.d, [z14.d, z16.d, uxtw #1]");
141 COMPARE(adr(z30.VnD(), SVEMemOperand(z14.VnD(), z16.VnD(), UXTW, 2)),
142 "adr z30.d, [z14.d, z16.d, uxtw #2]");
143 COMPARE(adr(z30.VnD(), SVEMemOperand(z14.VnD(), z16.VnD(), UXTW, 3)),
144 "adr z30.d, [z14.d, z16.d, uxtw #3]");
145 COMPARE(adr(z8.VnS(), SVEMemOperand(z16.VnS(), z16.VnS())),
146 "adr z8.s, [z16.s, z16.s]");
147 COMPARE(adr(z8.VnS(), SVEMemOperand(z16.VnS(), z16.VnS(), LSL, 1)),
148 "adr z8.s, [z16.s, z16.s, lsl #1]");
149 COMPARE(adr(z8.VnS(), SVEMemOperand(z16.VnS(), z16.VnS(), LSL, 2)),
150 "adr z8.s, [z16.s, z16.s, lsl #2]");
151 COMPARE(adr(z8.VnS(), SVEMemOperand(z16.VnS(), z16.VnS(), LSL, 3)),
152 "adr z8.s, [z16.s, z16.s, lsl #3]");
153 COMPARE(adr(z9.VnD(), SVEMemOperand(z1.VnD(), z16.VnD())),
154 "adr z9.d, [z1.d, z16.d]");
155 COMPARE(adr(z9.VnD(), SVEMemOperand(z1.VnD(), z16.VnD(), LSL, 1)),
156 "adr z9.d, [z1.d, z16.d, lsl #1]");
157 COMPARE(adr(z9.VnD(), SVEMemOperand(z1.VnD(), z16.VnD(), LSL, 2)),
158 "adr z9.d, [z1.d, z16.d, lsl #2]");
159 COMPARE(adr(z9.VnD(), SVEMemOperand(z1.VnD(), z16.VnD(), LSL, 3)),
160 "adr z9.d, [z1.d, z16.d, lsl #3]");
161
162
163 CLEANUP();
164 }
165
TEST(sve_calculate_sve_address)166 TEST(sve_calculate_sve_address) {
167 #pragma GCC diagnostic push
168 #pragma GCC diagnostic ignored "-Wshadow"
169
170 // Shadow the `MacroAssembler` type so that the test macros work without
171 // modification.
172 typedef CalculateSVEAddressMacroAssembler MacroAssembler;
173
174 SETUP();
175
176 // It is important that we cover every branch in this test because most other
177 // tests tend not to check every code path.
178
179 // IsEquivalentToScalar()
180 COMPARE_MACRO(CalculateSVEAddress(x0, SVEMemOperand(x1)), "mov x0, x1");
181 COMPARE_MACRO(CalculateSVEAddress(x4, SVEMemOperand(x2, 0)), "mov x4, x2");
182 COMPARE_MACRO(CalculateSVEAddress(x4, SVEMemOperand(x2, xzr, LSL, 2)),
183 "mov x4, x2");
184
185 // IsScalarPlusImmediate()
186 // Simple immediates just pass through to 'Add'.
187 COMPARE_MACRO(CalculateSVEAddress(x10, SVEMemOperand(x0, 42)),
188 "add x10, x0, #0x2a (42)");
189 COMPARE_MACRO(CalculateSVEAddress(x10, SVEMemOperand(sp, 42)),
190 "add x10, sp, #0x2a (42)");
191 // SVE_MUL_VL variants use `Addpl`, which has its own tests, but
192 // `CalculateSVEAddress` needs to check and handle the access size.
193 COMPARE_MACRO(CalculateSVEAddress(x10, SVEMemOperand(x0, 3, SVE_MUL_VL), 0),
194 "addpl x10, x0, #24");
195 COMPARE_MACRO(CalculateSVEAddress(x10, SVEMemOperand(x0, 3, SVE_MUL_VL), 1),
196 "addpl x10, x0, #12");
197 COMPARE_MACRO(CalculateSVEAddress(x10, SVEMemOperand(x0, 3, SVE_MUL_VL), 2),
198 "addpl x10, x0, #6");
199 COMPARE_MACRO(CalculateSVEAddress(x10, SVEMemOperand(x0, 3, SVE_MUL_VL), 3),
200 "addpl x10, x0, #3");
201
202 // IsScalarPlusScalar()
203 // All forms pass through to `Add`, but SVE_LSL must be handled correctly.
204 COMPARE_MACRO(CalculateSVEAddress(x22, SVEMemOperand(x2, x3)),
205 "add x22, x2, x3");
206 COMPARE_MACRO(CalculateSVEAddress(x22, SVEMemOperand(sp, x3)),
207 "add x22, sp, x3");
208 COMPARE_MACRO(CalculateSVEAddress(x22, SVEMemOperand(x2, x3, LSL, 2)),
209 "add x22, x2, x3, lsl #2");
210 COMPARE_MACRO(CalculateSVEAddress(x22, SVEMemOperand(sp, x3, LSL, 2)),
211 "add x22, sp, x3, lsl #2");
212
213 CLEANUP();
214
215 #pragma GCC diagnostic pop
216 }
217
TEST(sve_bitwise_imm)218 TEST(sve_bitwise_imm) {
219 SETUP();
220
221 // The assembler will necessarily encode an immediate in the simplest bitset.
222 COMPARE(and_(z2.VnD(), z2.VnD(), 0x0000ffff0000ffff),
223 "and z2.s, z2.s, #0xffff");
224 COMPARE(dupm(z15.VnS(), 0x7f007f00), "dupm z15.h, #0x7f00");
225 COMPARE(eor(z26.VnH(), z26.VnH(), 0x7ff8), "eor z26.h, z26.h, #0x7ff8");
226 COMPARE(orr(z13.VnB(), z13.VnB(), 0x78), "orr z13.b, z13.b, #0x78");
227
228 // Logical aliases.
229 COMPARE(bic(z21.VnD(), z21.VnD(), 0xffff00000000ffff),
230 "and z21.d, z21.d, #0xffffffff0000");
231 COMPARE(eon(z31.VnS(), z31.VnS(), 0x1ffe), "eor z31.s, z31.s, #0xffffe001");
232 COMPARE(orn(z11.VnH(), z11.VnH(), 0x2), "orr z11.h, z11.h, #0xfffd");
233
234 // Mov alias for dupm.
235 COMPARE(mov(z0.VnH(), 0xf00f), "mov z0.h, #0xf00f");
236 COMPARE_MACRO(Mov(z11.VnS(), 0xe0000003), "mov z11.s, #0xe0000003");
237 COMPARE_MACRO(Mov(z22.VnD(), 0x8000), "dupm z22.d, #0x8000");
238
239 // Test dupm versus mov disassembly.
240 COMPARE(dupm(z0.VnH(), 0xfe), "dupm z0.h, #0xfe");
241 COMPARE(dupm(z0.VnH(), 0xff), "dupm z0.h, #0xff");
242 COMPARE(dupm(z0.VnH(), 0x1fe), "mov z0.h, #0x1fe");
243 COMPARE(dupm(z0.VnH(), 0xfe00), "dupm z0.h, #0xfe00");
244 COMPARE(dupm(z0.VnH(), 0xfe01), "mov z0.h, #0xfe01");
245 COMPARE(dupm(z0.VnS(), 0xfe00), "dupm z0.s, #0xfe00");
246 COMPARE(dupm(z0.VnS(), 0xfe000001), "mov z0.s, #0xfe000001");
247 COMPARE(dupm(z0.VnS(), 0xffffff00), "dupm z0.s, #0xffffff00");
248 COMPARE(dupm(z0.VnS(), 0xffffff01), "dupm z0.s, #0xffffff01");
249 COMPARE(dupm(z0.VnS(), 0xfffffe01), "mov z0.s, #0xfffffe01");
250 COMPARE(dupm(z0.VnS(), 0xfff), "mov z0.s, #0xfff");
251 COMPARE(dupm(z0.VnD(), 0xffffffffffffff00), "dupm z0.d, #0xffffffffffffff00");
252 COMPARE(dupm(z0.VnD(), 0x7fffffffffffff80), "mov z0.d, #0x7fffffffffffff80");
253 COMPARE(dupm(z0.VnD(), 0x8000), "dupm z0.d, #0x8000");
254 COMPARE(dupm(z0.VnD(), 0x10000), "mov z0.d, #0x10000");
255
256 CLEANUP();
257 }
258
TEST(sve_bitwise_logical_unpredicated)259 TEST(sve_bitwise_logical_unpredicated) {
260 SETUP();
261
262 COMPARE(and_(z12.VnD(), z5.VnD(), z29.VnD()), "and z12.d, z5.d, z29.d");
263 COMPARE(bic(z11.VnD(), z15.VnD(), z9.VnD()), "bic z11.d, z15.d, z9.d");
264 COMPARE(eor(z9.VnD(), z31.VnD(), z29.VnD()), "eor z9.d, z31.d, z29.d");
265 COMPARE(orr(z17.VnD(), z8.VnD(), z19.VnD()), "orr z17.d, z8.d, z19.d");
266
267 // Check mov aliases.
268 COMPARE(orr(z17.VnD(), z8.VnD(), z8.VnD()), "mov z17.d, z8.d");
269 COMPARE(mov(z18, z9), "mov z18.d, z9.d");
270 COMPARE_MACRO(Mov(z19, z10), "mov z19.d, z10.d");
271
272 CLEANUP();
273 }
274
TEST(sve_bitwise_shift_predicated)275 TEST(sve_bitwise_shift_predicated) {
276 SETUP();
277
278 COMPARE(asrr(z20.VnB(), p3.Merging(), z20.VnB(), z11.VnB()),
279 "asrr z20.b, p3/m, z20.b, z11.b");
280 COMPARE(asrr(z20.VnH(), p3.Merging(), z20.VnH(), z11.VnH()),
281 "asrr z20.h, p3/m, z20.h, z11.h");
282 COMPARE(asrr(z20.VnS(), p3.Merging(), z20.VnS(), z11.VnS()),
283 "asrr z20.s, p3/m, z20.s, z11.s");
284 COMPARE(asrr(z20.VnD(), p3.Merging(), z20.VnD(), z11.VnD()),
285 "asrr z20.d, p3/m, z20.d, z11.d");
286 COMPARE(asr(z26.VnB(), p2.Merging(), z26.VnB(), z17.VnB()),
287 "asr z26.b, p2/m, z26.b, z17.b");
288 COMPARE(asr(z26.VnH(), p2.Merging(), z26.VnH(), z17.VnH()),
289 "asr z26.h, p2/m, z26.h, z17.h");
290 COMPARE(asr(z26.VnS(), p2.Merging(), z26.VnS(), z17.VnS()),
291 "asr z26.s, p2/m, z26.s, z17.s");
292 COMPARE(asr(z26.VnD(), p2.Merging(), z26.VnD(), z17.VnD()),
293 "asr z26.d, p2/m, z26.d, z17.d");
294 COMPARE(lslr(z30.VnB(), p1.Merging(), z30.VnB(), z26.VnB()),
295 "lslr z30.b, p1/m, z30.b, z26.b");
296 COMPARE(lslr(z30.VnH(), p1.Merging(), z30.VnH(), z26.VnH()),
297 "lslr z30.h, p1/m, z30.h, z26.h");
298 COMPARE(lslr(z30.VnS(), p1.Merging(), z30.VnS(), z26.VnS()),
299 "lslr z30.s, p1/m, z30.s, z26.s");
300 COMPARE(lslr(z30.VnD(), p1.Merging(), z30.VnD(), z26.VnD()),
301 "lslr z30.d, p1/m, z30.d, z26.d");
302 COMPARE(lsl(z14.VnB(), p6.Merging(), z14.VnB(), z25.VnB()),
303 "lsl z14.b, p6/m, z14.b, z25.b");
304 COMPARE(lsl(z14.VnH(), p6.Merging(), z14.VnH(), z25.VnH()),
305 "lsl z14.h, p6/m, z14.h, z25.h");
306 COMPARE(lsl(z14.VnS(), p6.Merging(), z14.VnS(), z25.VnS()),
307 "lsl z14.s, p6/m, z14.s, z25.s");
308 COMPARE(lsl(z14.VnD(), p6.Merging(), z14.VnD(), z25.VnD()),
309 "lsl z14.d, p6/m, z14.d, z25.d");
310 COMPARE(lsrr(z3.VnB(), p1.Merging(), z3.VnB(), z16.VnB()),
311 "lsrr z3.b, p1/m, z3.b, z16.b");
312 COMPARE(lsrr(z3.VnH(), p1.Merging(), z3.VnH(), z16.VnH()),
313 "lsrr z3.h, p1/m, z3.h, z16.h");
314 COMPARE(lsrr(z3.VnS(), p1.Merging(), z3.VnS(), z16.VnS()),
315 "lsrr z3.s, p1/m, z3.s, z16.s");
316 COMPARE(lsrr(z3.VnD(), p1.Merging(), z3.VnD(), z16.VnD()),
317 "lsrr z3.d, p1/m, z3.d, z16.d");
318 COMPARE(lsr(z29.VnB(), p7.Merging(), z29.VnB(), z13.VnB()),
319 "lsr z29.b, p7/m, z29.b, z13.b");
320 COMPARE(lsr(z29.VnH(), p7.Merging(), z29.VnH(), z13.VnH()),
321 "lsr z29.h, p7/m, z29.h, z13.h");
322 COMPARE(lsr(z29.VnS(), p7.Merging(), z29.VnS(), z13.VnS()),
323 "lsr z29.s, p7/m, z29.s, z13.s");
324 COMPARE(lsr(z29.VnD(), p7.Merging(), z29.VnD(), z13.VnD()),
325 "lsr z29.d, p7/m, z29.d, z13.d");
326
327 COMPARE(asr(z4.VnB(), p0.Merging(), z4.VnB(), z30.VnD()),
328 "asr z4.b, p0/m, z4.b, z30.d");
329 COMPARE(asr(z4.VnH(), p0.Merging(), z4.VnH(), z30.VnD()),
330 "asr z4.h, p0/m, z4.h, z30.d");
331 COMPARE(asr(z4.VnS(), p0.Merging(), z4.VnS(), z30.VnD()),
332 "asr z4.s, p0/m, z4.s, z30.d");
333 COMPARE(lsl(z13.VnB(), p7.Merging(), z13.VnB(), z18.VnD()),
334 "lsl z13.b, p7/m, z13.b, z18.d");
335 COMPARE(lsl(z13.VnH(), p7.Merging(), z13.VnH(), z18.VnD()),
336 "lsl z13.h, p7/m, z13.h, z18.d");
337 COMPARE(lsl(z13.VnS(), p7.Merging(), z13.VnS(), z18.VnD()),
338 "lsl z13.s, p7/m, z13.s, z18.d");
339 COMPARE(lsr(z1.VnB(), p4.Merging(), z1.VnB(), z14.VnD()),
340 "lsr z1.b, p4/m, z1.b, z14.d");
341 COMPARE(lsr(z1.VnH(), p4.Merging(), z1.VnH(), z14.VnD()),
342 "lsr z1.h, p4/m, z1.h, z14.d");
343 COMPARE(lsr(z1.VnS(), p4.Merging(), z1.VnS(), z14.VnD()),
344 "lsr z1.s, p4/m, z1.s, z14.d");
345
346 COMPARE_MACRO(Asr(z4.VnB(), p0.Merging(), z4.VnB(), z30.VnB()),
347 "asr z4.b, p0/m, z4.b, z30.b");
348 COMPARE_MACRO(Asr(z4.VnB(), p0.Merging(), z30.VnB(), z4.VnB()),
349 "asrr z4.b, p0/m, z4.b, z30.b");
350 COMPARE_MACRO(Asr(z4.VnB(), p0.Merging(), z10.VnB(), z14.VnB()),
351 "movprfx z4.b, p0/m, z10.b\n"
352 "asr z4.b, p0/m, z4.b, z14.b");
353 COMPARE_MACRO(Lsl(z4.VnB(), p0.Merging(), z4.VnB(), z30.VnB()),
354 "lsl z4.b, p0/m, z4.b, z30.b");
355 COMPARE_MACRO(Lsl(z4.VnB(), p0.Merging(), z30.VnB(), z4.VnB()),
356 "lslr z4.b, p0/m, z4.b, z30.b");
357 COMPARE_MACRO(Lsl(z4.VnB(), p0.Merging(), z10.VnB(), z14.VnB()),
358 "movprfx z4.b, p0/m, z10.b\n"
359 "lsl z4.b, p0/m, z4.b, z14.b");
360 COMPARE_MACRO(Lsr(z4.VnB(), p0.Merging(), z4.VnB(), z30.VnB()),
361 "lsr z4.b, p0/m, z4.b, z30.b");
362 COMPARE_MACRO(Lsr(z4.VnB(), p0.Merging(), z30.VnB(), z4.VnB()),
363 "lsrr z4.b, p0/m, z4.b, z30.b");
364 COMPARE_MACRO(Lsr(z4.VnB(), p0.Merging(), z10.VnB(), z14.VnB()),
365 "movprfx z4.b, p0/m, z10.b\n"
366 "lsr z4.b, p0/m, z4.b, z14.b");
367
368 COMPARE(asrd(z0.VnB(), p4.Merging(), z0.VnB(), 1),
369 "asrd z0.b, p4/m, z0.b, #1");
370 COMPARE(asrd(z0.VnH(), p4.Merging(), z0.VnH(), 1),
371 "asrd z0.h, p4/m, z0.h, #1");
372 COMPARE(asrd(z0.VnS(), p4.Merging(), z0.VnS(), 1),
373 "asrd z0.s, p4/m, z0.s, #1");
374 COMPARE(asrd(z0.VnD(), p4.Merging(), z0.VnD(), 1),
375 "asrd z0.d, p4/m, z0.d, #1");
376 COMPARE(asr(z8.VnB(), p7.Merging(), z8.VnB(), 3), "asr z8.b, p7/m, z8.b, #3");
377 COMPARE(asr(z8.VnH(), p7.Merging(), z8.VnH(), 3), "asr z8.h, p7/m, z8.h, #3");
378 COMPARE(asr(z8.VnS(), p7.Merging(), z8.VnS(), 3), "asr z8.s, p7/m, z8.s, #3");
379 COMPARE(asr(z8.VnD(), p7.Merging(), z8.VnD(), 3), "asr z8.d, p7/m, z8.d, #3");
380 COMPARE(lsl(z29.VnB(), p6.Merging(), z29.VnB(), 0),
381 "lsl z29.b, p6/m, z29.b, #0");
382 COMPARE(lsl(z29.VnH(), p6.Merging(), z29.VnH(), 5),
383 "lsl z29.h, p6/m, z29.h, #5");
384 COMPARE(lsl(z29.VnS(), p6.Merging(), z29.VnS(), 0),
385 "lsl z29.s, p6/m, z29.s, #0");
386 COMPARE(lsl(z29.VnD(), p6.Merging(), z29.VnD(), 63),
387 "lsl z29.d, p6/m, z29.d, #63");
388 COMPARE(lsr(z24.VnB(), p2.Merging(), z24.VnB(), 8),
389 "lsr z24.b, p2/m, z24.b, #8");
390 COMPARE(lsr(z24.VnH(), p2.Merging(), z24.VnH(), 16),
391 "lsr z24.h, p2/m, z24.h, #16");
392 COMPARE(lsr(z24.VnS(), p2.Merging(), z24.VnS(), 32),
393 "lsr z24.s, p2/m, z24.s, #32");
394 COMPARE(lsr(z24.VnD(), p2.Merging(), z24.VnD(), 64),
395 "lsr z24.d, p2/m, z24.d, #64");
396
397 COMPARE_MACRO(Asrd(z0.VnB(), p4.Merging(), z8.VnB(), 1),
398 "movprfx z0.b, p4/m, z8.b\n"
399 "asrd z0.b, p4/m, z0.b, #1");
400 COMPARE_MACRO(Asr(z8.VnH(), p7.Merging(), z29.VnH(), 3),
401 "movprfx z8.h, p7/m, z29.h\n"
402 "asr z8.h, p7/m, z8.h, #3");
403 COMPARE_MACRO(Lsl(z29.VnS(), p6.Merging(), z24.VnS(), 0),
404 "movprfx z29.s, p6/m, z24.s\n"
405 "lsl z29.s, p6/m, z29.s, #0");
406 COMPARE_MACRO(Lsr(z24.VnD(), p2.Merging(), z0.VnD(), 64),
407 "movprfx z24.d, p2/m, z0.d\n"
408 "lsr z24.d, p2/m, z24.d, #64");
409
410 CLEANUP();
411 }
412
TEST(sve_bitwise_shift_unpredicated)413 TEST(sve_bitwise_shift_unpredicated) {
414 SETUP();
415
416 // Test lsl with reserved D-sized lane field.
417 COMPARE_PREFIX(dci(0x04ef8e15), "unimplemented");
418 // Test asr with reserved tsz field.
419 COMPARE_PREFIX(dci(0x04209345), "unimplemented");
420
421 COMPARE(asr(z4.VnB(), z27.VnB(), 1), "asr z4.b, z27.b, #1");
422 COMPARE(asr(z5.VnB(), z26.VnB(), 8), "asr z5.b, z26.b, #8");
423 COMPARE(asr(z6.VnH(), z25.VnH(), 1), "asr z6.h, z25.h, #1");
424 COMPARE(asr(z7.VnH(), z24.VnH(), 16), "asr z7.h, z24.h, #16");
425 COMPARE(asr(z8.VnS(), z23.VnS(), 1), "asr z8.s, z23.s, #1");
426 COMPARE(asr(z9.VnS(), z22.VnS(), 32), "asr z9.s, z22.s, #32");
427 COMPARE(asr(z10.VnD(), z21.VnD(), 1), "asr z10.d, z21.d, #1");
428 COMPARE(asr(z11.VnD(), z20.VnD(), 64), "asr z11.d, z20.d, #64");
429 COMPARE(lsr(z4.VnB(), z27.VnB(), 3), "lsr z4.b, z27.b, #3");
430 COMPARE(lsr(z5.VnB(), z26.VnB(), 7), "lsr z5.b, z26.b, #7");
431 COMPARE(lsr(z6.VnH(), z25.VnH(), 8), "lsr z6.h, z25.h, #8");
432 COMPARE(lsr(z7.VnH(), z24.VnH(), 15), "lsr z7.h, z24.h, #15");
433 COMPARE(lsr(z8.VnS(), z23.VnS(), 14), "lsr z8.s, z23.s, #14");
434 COMPARE(lsr(z9.VnS(), z22.VnS(), 31), "lsr z9.s, z22.s, #31");
435 COMPARE(lsr(z10.VnD(), z21.VnD(), 30), "lsr z10.d, z21.d, #30");
436 COMPARE(lsr(z11.VnD(), z20.VnD(), 63), "lsr z11.d, z20.d, #63");
437 COMPARE(lsl(z4.VnB(), z27.VnB(), 4), "lsl z4.b, z27.b, #4");
438 COMPARE(lsl(z5.VnB(), z26.VnB(), 6), "lsl z5.b, z26.b, #6");
439 COMPARE(lsl(z6.VnH(), z25.VnH(), 10), "lsl z6.h, z25.h, #10");
440 COMPARE(lsl(z7.VnH(), z24.VnH(), 14), "lsl z7.h, z24.h, #14");
441 COMPARE(lsl(z8.VnS(), z23.VnS(), 21), "lsl z8.s, z23.s, #21");
442 COMPARE(lsl(z9.VnS(), z22.VnS(), 30), "lsl z9.s, z22.s, #30");
443 COMPARE(lsl(z10.VnD(), z21.VnD(), 44), "lsl z10.d, z21.d, #44");
444 COMPARE(lsl(z11.VnD(), z20.VnD(), 62), "lsl z11.d, z20.d, #62");
445 COMPARE(asr(z12.VnB(), z8.VnB(), z14.VnD()), "asr z12.b, z8.b, z14.d");
446 COMPARE(asr(z14.VnH(), z8.VnH(), z12.VnD()), "asr z14.h, z8.h, z12.d");
447 COMPARE(asr(z16.VnS(), z8.VnS(), z10.VnD()), "asr z16.s, z8.s, z10.d");
448 COMPARE(lsl(z21.VnB(), z16.VnB(), z15.VnD()), "lsl z21.b, z16.b, z15.d");
449 COMPARE(lsl(z23.VnH(), z16.VnH(), z13.VnD()), "lsl z23.h, z16.h, z13.d");
450 COMPARE(lsl(z25.VnS(), z16.VnS(), z11.VnD()), "lsl z25.s, z16.s, z11.d");
451 COMPARE(lsr(z16.VnB(), z19.VnB(), z2.VnD()), "lsr z16.b, z19.b, z2.d");
452 COMPARE(lsr(z18.VnH(), z19.VnH(), z4.VnD()), "lsr z18.h, z19.h, z4.d");
453 COMPARE(lsr(z20.VnS(), z19.VnS(), z6.VnD()), "lsr z20.s, z19.s, z6.d");
454
455 CLEANUP();
456 }
457
458
TEST(sve_element_count)459 TEST(sve_element_count) {
460 SETUP();
461
462 COMPARE_MACRO(Cntb(x7), "cntb x7");
463 COMPARE_MACRO(Cntb(x7, SVE_POW2), "cntb x7, pow2");
464 COMPARE_MACRO(Cntb(x7, SVE_VL1), "cntb x7, vl1");
465 COMPARE_MACRO(Cntb(x7, SVE_VL2), "cntb x7, vl2");
466 COMPARE_MACRO(Cntb(x7, SVE_VL16), "cntb x7, vl16");
467 COMPARE_MACRO(Cntb(x7, SVE_VL256), "cntb x7, vl256");
468 COMPARE_MACRO(Cntb(x7, SVE_MUL4), "cntb x7, mul4");
469 COMPARE_MACRO(Cntb(x7, SVE_MUL3), "cntb x7, mul3");
470 COMPARE_MACRO(Cntb(x7, SVE_ALL), "cntb x7");
471
472 COMPARE_MACRO(Cntb(x7, SVE_POW2, 1), "cntb x7, pow2");
473 COMPARE_MACRO(Cntb(x7, SVE_VL1, 16), "cntb x7, vl1, mul #16");
474 COMPARE_MACRO(Cntb(x7, SVE_VL2, 15), "cntb x7, vl2, mul #15");
475 COMPARE_MACRO(Cntb(x7, SVE_VL16, 14), "cntb x7, vl16, mul #14");
476 COMPARE_MACRO(Cntb(x7, SVE_VL256, 8), "cntb x7, vl256, mul #8");
477 COMPARE_MACRO(Cntb(x7, SVE_MUL4, 4), "cntb x7, mul4, mul #4");
478 COMPARE_MACRO(Cntb(x7, SVE_MUL3, 3), "cntb x7, mul3, mul #3");
479 COMPARE_MACRO(Cntb(x7, SVE_ALL, 2), "cntb x7, all, mul #2");
480
481 COMPARE_MACRO(Cntb(x30), "cntb x30");
482 COMPARE_MACRO(Cntd(xzr, SVE_POW2), "cntd xzr, pow2");
483 COMPARE_MACRO(Cntd(xzr, SVE_MUL4, 1), "cntd xzr, mul4");
484 COMPARE_MACRO(Cnth(x29, SVE_MUL3, 4), "cnth x29, mul3, mul #4");
485 COMPARE_MACRO(Cntw(x28, SVE_VL256, 16), "cntw x28, vl256, mul #16");
486
487 CLEANUP();
488 }
489
TEST(sve_incdec_reg_element_count)490 TEST(sve_incdec_reg_element_count) {
491 SETUP();
492
493 COMPARE_MACRO(Decb(x4), "decb x4");
494 COMPARE_MACRO(Decb(x4, SVE_POW2), "decb x4, pow2");
495 COMPARE_MACRO(Decb(x4, SVE_VL1), "decb x4, vl1");
496 COMPARE_MACRO(Decb(x4, SVE_VL2), "decb x4, vl2");
497 COMPARE_MACRO(Decb(x4, SVE_VL16), "decb x4, vl16");
498 COMPARE_MACRO(Decb(x4, SVE_VL256), "decb x4, vl256");
499 COMPARE_MACRO(Decb(x4, SVE_MUL4), "decb x4, mul4");
500 COMPARE_MACRO(Decb(x4, SVE_MUL3), "decb x4, mul3");
501 COMPARE_MACRO(Decb(x4, SVE_ALL), "decb x4");
502
503 COMPARE_MACRO(Decb(x4, SVE_POW2, 1), "decb x4, pow2");
504 COMPARE_MACRO(Decb(x4, SVE_VL1, 16), "decb x4, vl1, mul #16");
505 COMPARE_MACRO(Decb(x4, SVE_VL2, 15), "decb x4, vl2, mul #15");
506 COMPARE_MACRO(Decb(x4, SVE_VL16, 14), "decb x4, vl16, mul #14");
507 COMPARE_MACRO(Decb(x4, SVE_VL256, 8), "decb x4, vl256, mul #8");
508 COMPARE_MACRO(Decb(x4, SVE_MUL4, 4), "decb x4, mul4, mul #4");
509 COMPARE_MACRO(Decb(x4, SVE_MUL3, 3), "decb x4, mul3, mul #3");
510 COMPARE_MACRO(Decb(x4, SVE_ALL, 2), "decb x4, all, mul #2");
511
512 COMPARE_MACRO(Decb(x30), "decb x30");
513 COMPARE_MACRO(Decd(xzr, SVE_POW2), "decd xzr, pow2");
514 COMPARE_MACRO(Decd(xzr, SVE_MUL4, 1), "decd xzr, mul4");
515 COMPARE_MACRO(Dech(x29, SVE_MUL3, 4), "dech x29, mul3, mul #4");
516 COMPARE_MACRO(Decw(x28, SVE_VL256, 16), "decw x28, vl256, mul #16");
517
518 COMPARE_MACRO(Incb(x17), "incb x17");
519 COMPARE_MACRO(Incb(x17, SVE_POW2), "incb x17, pow2");
520 COMPARE_MACRO(Incb(x17, SVE_VL1), "incb x17, vl1");
521 COMPARE_MACRO(Incb(x17, SVE_VL2), "incb x17, vl2");
522 COMPARE_MACRO(Incb(x17, SVE_VL16), "incb x17, vl16");
523 COMPARE_MACRO(Incb(x17, SVE_VL256), "incb x17, vl256");
524 COMPARE_MACRO(Incb(x17, SVE_MUL4), "incb x17, mul4");
525 COMPARE_MACRO(Incb(x17, SVE_MUL3), "incb x17, mul3");
526 COMPARE_MACRO(Incb(x17, SVE_ALL), "incb x17");
527
528 COMPARE_MACRO(Incb(x17, SVE_POW2, 1), "incb x17, pow2");
529 COMPARE_MACRO(Incb(x17, SVE_VL1, 16), "incb x17, vl1, mul #16");
530 COMPARE_MACRO(Incb(x17, SVE_VL2, 15), "incb x17, vl2, mul #15");
531 COMPARE_MACRO(Incb(x17, SVE_VL16, 14), "incb x17, vl16, mul #14");
532 COMPARE_MACRO(Incb(x17, SVE_VL256, 8), "incb x17, vl256, mul #8");
533 COMPARE_MACRO(Incb(x17, SVE_MUL4, 4), "incb x17, mul4, mul #4");
534 COMPARE_MACRO(Incb(x17, SVE_MUL3, 3), "incb x17, mul3, mul #3");
535 COMPARE_MACRO(Incb(x17, SVE_ALL, 2), "incb x17, all, mul #2");
536
537 COMPARE_MACRO(Incb(x30), "incb x30");
538 COMPARE_MACRO(Incd(xzr, SVE_POW2), "incd xzr, pow2");
539 COMPARE_MACRO(Incd(xzr, SVE_MUL4, 1), "incd xzr, mul4");
540 COMPARE_MACRO(Inch(x29, SVE_MUL3, 4), "inch x29, mul3, mul #4");
541 COMPARE_MACRO(Incw(x28, SVE_VL256, 16), "incw x28, vl256, mul #16");
542
543 CLEANUP();
544 }
545
TEST(sve_signed_sat_incdec_reg_element_count)546 TEST(sve_signed_sat_incdec_reg_element_count) {
547 SETUP();
548
549 COMPARE_MACRO(Sqdecb(x12, w12), "sqdecb x12, w12");
550 COMPARE_MACRO(Sqdecb(x12, w12, SVE_POW2), "sqdecb x12, w12, pow2");
551 COMPARE_MACRO(Sqdecb(x12, w12, SVE_VL1), "sqdecb x12, w12, vl1");
552 COMPARE_MACRO(Sqdecb(x12, w12, SVE_VL2), "sqdecb x12, w12, vl2");
553 COMPARE_MACRO(Sqdecb(x12, w12, SVE_VL16), "sqdecb x12, w12, vl16");
554 COMPARE_MACRO(Sqdecb(x12, w12, SVE_VL256), "sqdecb x12, w12, vl256");
555 COMPARE_MACRO(Sqdecb(x12, w12, SVE_MUL4), "sqdecb x12, w12, mul4");
556 COMPARE_MACRO(Sqdecb(x12, w12, SVE_MUL3), "sqdecb x12, w12, mul3");
557 COMPARE_MACRO(Sqdecb(x12, w12, SVE_ALL), "sqdecb x12, w12");
558
559 COMPARE_MACRO(Sqdecb(x12, w12, SVE_POW2, 1), "sqdecb x12, w12, pow2");
560 COMPARE_MACRO(Sqdecb(x12, w12, SVE_VL1, 16), "sqdecb x12, w12, vl1, mul #16");
561 COMPARE_MACRO(Sqdecb(x12, w12, SVE_VL2, 15), "sqdecb x12, w12, vl2, mul #15");
562 COMPARE_MACRO(Sqdecb(x12, w12, SVE_VL16, 14),
563 "sqdecb x12, w12, vl16, mul #14");
564 COMPARE_MACRO(Sqdecb(x12, w12, SVE_VL256, 8),
565 "sqdecb x12, w12, vl256, mul #8");
566 COMPARE_MACRO(Sqdecb(x12, w12, SVE_MUL4, 4), "sqdecb x12, w12, mul4, mul #4");
567 COMPARE_MACRO(Sqdecb(x12, w12, SVE_MUL3, 3), "sqdecb x12, w12, mul3, mul #3");
568 COMPARE_MACRO(Sqdecb(x12, w12, SVE_ALL, 2), "sqdecb x12, w12, all, mul #2");
569
570 COMPARE_MACRO(Sqdecb(xzr, wzr, SVE_POW2), "sqdecb xzr, wzr, pow2");
571 COMPARE_MACRO(Sqdecb(xzr, wzr, SVE_MUL4, 1), "sqdecb xzr, wzr, mul4");
572
573 COMPARE_MACRO(Sqincw(x20, w20, SVE_POW2, 1), "sqincw x20, w20, pow2");
574 COMPARE_MACRO(Sqincd(x20, w20, SVE_VL1, 16), "sqincd x20, w20, vl1, mul #16");
575 COMPARE_MACRO(Sqinch(x20, w20, SVE_VL2, 15), "sqinch x20, w20, vl2, mul #15");
576 COMPARE_MACRO(Sqincw(x20, w20, SVE_VL16, 14),
577 "sqincw x20, w20, vl16, mul #14");
578 COMPARE_MACRO(Sqincd(x20, w20, SVE_VL256, 8),
579 "sqincd x20, w20, vl256, mul #8");
580 COMPARE_MACRO(Sqinch(x20, w20, SVE_MUL4, 4), "sqinch x20, w20, mul4, mul #4");
581 COMPARE_MACRO(Sqincw(x20, w20, SVE_MUL3, 3), "sqincw x20, w20, mul3, mul #3");
582 COMPARE_MACRO(Sqincd(x20, w20, SVE_ALL, 2), "sqincd x20, w20, all, mul #2");
583
584 COMPARE_MACRO(Sqdecb(x5), "sqdecb x5");
585 COMPARE_MACRO(Sqdecb(x5, SVE_POW2), "sqdecb x5, pow2");
586 COMPARE_MACRO(Sqdecb(x5, SVE_VL1), "sqdecb x5, vl1");
587 COMPARE_MACRO(Sqdecb(x5, SVE_VL2), "sqdecb x5, vl2");
588 COMPARE_MACRO(Sqdecb(x5, SVE_VL16), "sqdecb x5, vl16");
589 COMPARE_MACRO(Sqdecb(x5, SVE_VL256), "sqdecb x5, vl256");
590 COMPARE_MACRO(Sqdecb(x5, SVE_MUL4), "sqdecb x5, mul4");
591 COMPARE_MACRO(Sqdecb(x5, SVE_MUL3), "sqdecb x5, mul3");
592 COMPARE_MACRO(Sqdecb(x5, SVE_ALL), "sqdecb x5");
593
594 COMPARE_MACRO(Sqdecb(x5, SVE_POW2, 1), "sqdecb x5, pow2");
595 COMPARE_MACRO(Sqdecb(x5, SVE_VL1, 16), "sqdecb x5, vl1, mul #16");
596 COMPARE_MACRO(Sqdecb(x5, SVE_VL2, 15), "sqdecb x5, vl2, mul #15");
597 COMPARE_MACRO(Sqdecb(x5, SVE_VL16, 14), "sqdecb x5, vl16, mul #14");
598 COMPARE_MACRO(Sqdecb(x5, SVE_VL256, 8), "sqdecb x5, vl256, mul #8");
599 COMPARE_MACRO(Sqdecb(x5, SVE_MUL4, 4), "sqdecb x5, mul4, mul #4");
600 COMPARE_MACRO(Sqdecb(x5, SVE_MUL3, 3), "sqdecb x5, mul3, mul #3");
601 COMPARE_MACRO(Sqdecb(x5, SVE_ALL, 2), "sqdecb x5, all, mul #2");
602
603 COMPARE_MACRO(Sqdecb(xzr, SVE_POW2), "sqdecb xzr, pow2");
604 COMPARE_MACRO(Sqdecb(xzr, SVE_MUL4, 1), "sqdecb xzr, mul4");
605
606 COMPARE_MACRO(Sqincw(x7, SVE_POW2, 1), "sqincw x7, pow2");
607 COMPARE_MACRO(Sqincd(x7, SVE_VL1, 16), "sqincd x7, vl1, mul #16");
608 COMPARE_MACRO(Sqinch(x7, SVE_VL2, 15), "sqinch x7, vl2, mul #15");
609 COMPARE_MACRO(Sqincw(x7, SVE_VL16, 14), "sqincw x7, vl16, mul #14");
610 COMPARE_MACRO(Sqincd(x7, SVE_VL256, 8), "sqincd x7, vl256, mul #8");
611 COMPARE_MACRO(Sqinch(x7, SVE_MUL4, 4), "sqinch x7, mul4, mul #4");
612 COMPARE_MACRO(Sqincw(x7, SVE_MUL3, 3), "sqincw x7, mul3, mul #3");
613 COMPARE_MACRO(Sqincd(x7, SVE_ALL, 2), "sqincd x7, all, mul #2");
614
615 CLEANUP();
616 }
617
TEST(sve_unsigned_sat_incdec_reg_element_count)618 TEST(sve_unsigned_sat_incdec_reg_element_count) {
619 SETUP();
620
621 COMPARE_MACRO(Uqdecb(w12), "uqdecb w12");
622 COMPARE_MACRO(Uqdecb(w12, SVE_POW2), "uqdecb w12, pow2");
623 COMPARE_MACRO(Uqdecb(w12, SVE_VL1), "uqdecb w12, vl1");
624 COMPARE_MACRO(Uqdecb(w12, SVE_VL2), "uqdecb w12, vl2");
625 COMPARE_MACRO(Uqdecb(w12, SVE_VL16), "uqdecb w12, vl16");
626 COMPARE_MACRO(Uqdecb(w12, SVE_VL256), "uqdecb w12, vl256");
627 COMPARE_MACRO(Uqdecb(w12, SVE_MUL4), "uqdecb w12, mul4");
628 COMPARE_MACRO(Uqdecb(w12, SVE_MUL3), "uqdecb w12, mul3");
629 COMPARE_MACRO(Uqdecb(w12, SVE_ALL), "uqdecb w12");
630
631 COMPARE_MACRO(Uqdecb(w12, SVE_POW2, 1), "uqdecb w12, pow2");
632 COMPARE_MACRO(Uqdecb(w12, SVE_VL1, 16), "uqdecb w12, vl1, mul #16");
633 COMPARE_MACRO(Uqdecb(w12, SVE_VL2, 15), "uqdecb w12, vl2, mul #15");
634 COMPARE_MACRO(Uqdecb(w12, SVE_VL16, 14), "uqdecb w12, vl16, mul #14");
635 COMPARE_MACRO(Uqdecb(w12, SVE_VL256, 8), "uqdecb w12, vl256, mul #8");
636 COMPARE_MACRO(Uqdecb(w12, SVE_MUL4, 4), "uqdecb w12, mul4, mul #4");
637 COMPARE_MACRO(Uqdecb(w12, SVE_MUL3, 3), "uqdecb w12, mul3, mul #3");
638 COMPARE_MACRO(Uqdecb(w12, SVE_ALL, 2), "uqdecb w12, all, mul #2");
639
640 COMPARE_MACRO(Uqdecb(x30), "uqdecb x30");
641 COMPARE_MACRO(Uqdecb(xzr, SVE_POW2), "uqdecb xzr, pow2");
642 COMPARE_MACRO(Uqdecb(xzr, SVE_MUL4, 1), "uqdecb xzr, mul4");
643
644 COMPARE_MACRO(Uqdecw(w22), "uqdecw w22");
645 COMPARE_MACRO(Uqdecd(w22, SVE_POW2, 1), "uqdecd w22, pow2");
646 COMPARE_MACRO(Uqdech(w22, SVE_VL1, 16), "uqdech w22, vl1, mul #16");
647 COMPARE_MACRO(Uqdecw(x22, SVE_VL2, 15), "uqdecw x22, vl2, mul #15");
648 COMPARE_MACRO(Uqdecd(x22, SVE_VL16, 14), "uqdecd x22, vl16, mul #14");
649 COMPARE_MACRO(Uqdech(x22, SVE_VL256, 8), "uqdech x22, vl256, mul #8");
650 COMPARE_MACRO(Uqdecw(w22, SVE_MUL4, 4), "uqdecw w22, mul4, mul #4");
651 COMPARE_MACRO(Uqdecd(w22, SVE_MUL3, 3), "uqdecd w22, mul3, mul #3");
652 COMPARE_MACRO(Uqdech(w22, SVE_ALL, 2), "uqdech w22, all, mul #2");
653
654 COMPARE_MACRO(Uqincb(w29), "uqincb w29");
655 COMPARE_MACRO(Uqincb(w29, SVE_POW2), "uqincb w29, pow2");
656 COMPARE_MACRO(Uqincb(w29, SVE_VL1), "uqincb w29, vl1");
657 COMPARE_MACRO(Uqincb(w29, SVE_VL2), "uqincb w29, vl2");
658 COMPARE_MACRO(Uqincb(w29, SVE_VL16), "uqincb w29, vl16");
659 COMPARE_MACRO(Uqincb(w29, SVE_VL256), "uqincb w29, vl256");
660 COMPARE_MACRO(Uqincb(w29, SVE_MUL4), "uqincb w29, mul4");
661 COMPARE_MACRO(Uqincb(w29, SVE_MUL3), "uqincb w29, mul3");
662 COMPARE_MACRO(Uqincb(w29, SVE_ALL), "uqincb w29");
663
664 COMPARE_MACRO(Uqincb(w29, SVE_POW2, 1), "uqincb w29, pow2");
665 COMPARE_MACRO(Uqincb(w29, SVE_VL1, 16), "uqincb w29, vl1, mul #16");
666 COMPARE_MACRO(Uqincb(w29, SVE_VL2, 15), "uqincb w29, vl2, mul #15");
667 COMPARE_MACRO(Uqincb(w29, SVE_VL16, 14), "uqincb w29, vl16, mul #14");
668 COMPARE_MACRO(Uqincb(w29, SVE_VL256, 8), "uqincb w29, vl256, mul #8");
669 COMPARE_MACRO(Uqincb(w29, SVE_MUL4, 4), "uqincb w29, mul4, mul #4");
670 COMPARE_MACRO(Uqincb(w29, SVE_MUL3, 3), "uqincb w29, mul3, mul #3");
671 COMPARE_MACRO(Uqincb(w29, SVE_ALL, 2), "uqincb w29, all, mul #2");
672
673 COMPARE_MACRO(Uqincb(x30), "uqincb x30");
674 COMPARE_MACRO(Uqincb(xzr, SVE_POW2), "uqincb xzr, pow2");
675 COMPARE_MACRO(Uqincb(xzr, SVE_MUL4, 1), "uqincb xzr, mul4");
676
677 COMPARE_MACRO(Uqinch(w11), "uqinch w11");
678 COMPARE_MACRO(Uqincw(w11, SVE_POW2), "uqincw w11, pow2");
679 COMPARE_MACRO(Uqincd(w11, SVE_VL1), "uqincd w11, vl1");
680 COMPARE_MACRO(Uqinch(x11, SVE_VL2), "uqinch x11, vl2");
681 COMPARE_MACRO(Uqincw(x11, SVE_VL16), "uqincw x11, vl16");
682 COMPARE_MACRO(Uqincd(x11, SVE_VL256), "uqincd x11, vl256");
683 COMPARE_MACRO(Uqinch(w11, SVE_MUL4), "uqinch w11, mul4");
684 COMPARE_MACRO(Uqincw(w11, SVE_MUL3), "uqincw w11, mul3");
685 COMPARE_MACRO(Uqincd(w11, SVE_ALL), "uqincd w11");
686
687 CLEANUP();
688 }
689
TEST(sve_vector_dec_element_count)690 TEST(sve_vector_dec_element_count) {
691 SETUP();
692
693 COMPARE_MACRO(Decd(z14.VnD()), "decd z14.d");
694 COMPARE_MACRO(Decd(z14.VnD(), SVE_POW2), "decd z14.d, pow2");
695 COMPARE_MACRO(Decd(z14.VnD(), SVE_VL1), "decd z14.d, vl1");
696 COMPARE_MACRO(Decd(z14.VnD(), SVE_VL2), "decd z14.d, vl2");
697 COMPARE_MACRO(Decd(z14.VnD(), SVE_VL16), "decd z14.d, vl16");
698 COMPARE_MACRO(Decd(z14.VnD(), SVE_VL256), "decd z14.d, vl256");
699 COMPARE_MACRO(Decd(z14.VnD(), SVE_MUL4), "decd z14.d, mul4");
700 COMPARE_MACRO(Decd(z14.VnD(), SVE_MUL3), "decd z14.d, mul3");
701 COMPARE_MACRO(Decd(z14.VnD(), SVE_ALL), "decd z14.d");
702
703 COMPARE_MACRO(Decd(z19.VnD(), SVE_POW2, 1), "decd z19.d, pow2");
704 COMPARE_MACRO(Decd(z19.VnD(), SVE_VL1, 16), "decd z19.d, vl1, mul #16");
705 COMPARE_MACRO(Decd(z19.VnD(), SVE_VL2, 15), "decd z19.d, vl2, mul #15");
706 COMPARE_MACRO(Decd(z19.VnD(), SVE_VL16, 14), "decd z19.d, vl16, mul #14");
707 COMPARE_MACRO(Decd(z19.VnD(), SVE_VL256, 8), "decd z19.d, vl256, mul #8");
708 COMPARE_MACRO(Decd(z19.VnD(), SVE_MUL4, 4), "decd z19.d, mul4, mul #4");
709 COMPARE_MACRO(Decd(z19.VnD(), SVE_MUL3, 3), "decd z19.d, mul3, mul #3");
710 COMPARE_MACRO(Decd(z19.VnD(), SVE_ALL, 2), "decd z19.d, all, mul #2");
711
712 COMPARE_MACRO(Dech(z27.VnH(), SVE_POW2, 1), "dech z27.h, pow2");
713 COMPARE_MACRO(Decw(z27.VnS(), SVE_VL1, 16), "decw z27.s, vl1, mul #16");
714 COMPARE_MACRO(Dech(z27.VnH(), SVE_VL2, 15), "dech z27.h, vl2, mul #15");
715 COMPARE_MACRO(Decw(z27.VnS(), SVE_VL16, 14), "decw z27.s, vl16, mul #14");
716 COMPARE_MACRO(Dech(z27.VnH(), SVE_VL256, 8), "dech z27.h, vl256, mul #8");
717 COMPARE_MACRO(Decw(z27.VnS(), SVE_MUL4, 4), "decw z27.s, mul4, mul #4");
718 COMPARE_MACRO(Dech(z27.VnH(), SVE_MUL3, 3), "dech z27.h, mul3, mul #3");
719 COMPARE_MACRO(Decw(z27.VnS(), SVE_ALL, 2), "decw z27.s, all, mul #2");
720
721 COMPARE_MACRO(Sqdecd(z13.VnD()), "sqdecd z13.d");
722 COMPARE_MACRO(Sqdecd(z13.VnD(), SVE_POW2), "sqdecd z13.d, pow2");
723 COMPARE_MACRO(Sqdecd(z13.VnD(), SVE_VL1), "sqdecd z13.d, vl1");
724 COMPARE_MACRO(Sqdecd(z13.VnD(), SVE_VL2), "sqdecd z13.d, vl2");
725 COMPARE_MACRO(Sqdecd(z13.VnD(), SVE_VL16), "sqdecd z13.d, vl16");
726 COMPARE_MACRO(Sqdecd(z13.VnD(), SVE_VL256), "sqdecd z13.d, vl256");
727 COMPARE_MACRO(Sqdecd(z13.VnD(), SVE_MUL4), "sqdecd z13.d, mul4");
728 COMPARE_MACRO(Sqdecd(z13.VnD(), SVE_MUL3), "sqdecd z13.d, mul3");
729 COMPARE_MACRO(Sqdecd(z13.VnD(), SVE_ALL), "sqdecd z13.d");
730
731 COMPARE_MACRO(Sqdecd(z9.VnD(), SVE_POW2, 1), "sqdecd z9.d, pow2");
732 COMPARE_MACRO(Sqdecd(z9.VnD(), SVE_VL1, 16), "sqdecd z9.d, vl1, mul #16");
733 COMPARE_MACRO(Sqdecd(z9.VnD(), SVE_VL2, 15), "sqdecd z9.d, vl2, mul #15");
734 COMPARE_MACRO(Sqdecd(z9.VnD(), SVE_VL16, 14), "sqdecd z9.d, vl16, mul #14");
735 COMPARE_MACRO(Sqdecd(z9.VnD(), SVE_VL256, 8), "sqdecd z9.d, vl256, mul #8");
736 COMPARE_MACRO(Sqdecd(z9.VnD(), SVE_MUL4, 4), "sqdecd z9.d, mul4, mul #4");
737 COMPARE_MACRO(Sqdecd(z9.VnD(), SVE_MUL3, 3), "sqdecd z9.d, mul3, mul #3");
738 COMPARE_MACRO(Sqdecd(z9.VnD(), SVE_ALL, 2), "sqdecd z9.d, all, mul #2");
739
740 COMPARE_MACRO(Sqdech(z31.VnH(), SVE_POW2, 1), "sqdech z31.h, pow2");
741 COMPARE_MACRO(Sqdecw(z31.VnS(), SVE_VL1, 16), "sqdecw z31.s, vl1, mul #16");
742 COMPARE_MACRO(Sqdech(z31.VnH(), SVE_VL2, 15), "sqdech z31.h, vl2, mul #15");
743 COMPARE_MACRO(Sqdecw(z31.VnS(), SVE_VL16, 14), "sqdecw z31.s, vl16, mul #14");
744 COMPARE_MACRO(Sqdech(z31.VnH(), SVE_VL256, 8), "sqdech z31.h, vl256, mul #8");
745 COMPARE_MACRO(Sqdecw(z31.VnS(), SVE_MUL4, 4), "sqdecw z31.s, mul4, mul #4");
746 COMPARE_MACRO(Sqdech(z31.VnH(), SVE_MUL3, 3), "sqdech z31.h, mul3, mul #3");
747 COMPARE_MACRO(Sqdecw(z31.VnS(), SVE_ALL, 2), "sqdecw z31.s, all, mul #2");
748
749 COMPARE_MACRO(Uqdecd(z1.VnD()), "uqdecd z1.d");
750 COMPARE_MACRO(Uqdecd(z1.VnD(), SVE_POW2), "uqdecd z1.d, pow2");
751 COMPARE_MACRO(Uqdecd(z1.VnD(), SVE_VL1), "uqdecd z1.d, vl1");
752 COMPARE_MACRO(Uqdecd(z1.VnD(), SVE_VL2), "uqdecd z1.d, vl2");
753 COMPARE_MACRO(Uqdecd(z1.VnD(), SVE_VL16), "uqdecd z1.d, vl16");
754 COMPARE_MACRO(Uqdecd(z1.VnD(), SVE_VL256), "uqdecd z1.d, vl256");
755 COMPARE_MACRO(Uqdecd(z1.VnD(), SVE_MUL4), "uqdecd z1.d, mul4");
756 COMPARE_MACRO(Uqdecd(z1.VnD(), SVE_MUL3), "uqdecd z1.d, mul3");
757 COMPARE_MACRO(Uqdecd(z1.VnD(), SVE_ALL), "uqdecd z1.d");
758
759 COMPARE_MACRO(Uqdecd(z7.VnD(), SVE_POW2, 1), "uqdecd z7.d, pow2");
760 COMPARE_MACRO(Uqdecd(z7.VnD(), SVE_VL1, 16), "uqdecd z7.d, vl1, mul #16");
761 COMPARE_MACRO(Uqdecd(z7.VnD(), SVE_VL2, 15), "uqdecd z7.d, vl2, mul #15");
762 COMPARE_MACRO(Uqdecd(z7.VnD(), SVE_VL16, 14), "uqdecd z7.d, vl16, mul #14");
763 COMPARE_MACRO(Uqdecd(z7.VnD(), SVE_VL256, 8), "uqdecd z7.d, vl256, mul #8");
764 COMPARE_MACRO(Uqdecd(z7.VnD(), SVE_MUL4, 4), "uqdecd z7.d, mul4, mul #4");
765 COMPARE_MACRO(Uqdecd(z7.VnD(), SVE_MUL3, 3), "uqdecd z7.d, mul3, mul #3");
766 COMPARE_MACRO(Uqdecd(z7.VnD(), SVE_ALL, 2), "uqdecd z7.d, all, mul #2");
767
768 COMPARE_MACRO(Uqdech(z26.VnH(), SVE_POW2, 1), "uqdech z26.h, pow2");
769 COMPARE_MACRO(Uqdecw(z26.VnS(), SVE_VL1, 16), "uqdecw z26.s, vl1, mul #16");
770 COMPARE_MACRO(Uqdech(z26.VnH(), SVE_VL2, 15), "uqdech z26.h, vl2, mul #15");
771 COMPARE_MACRO(Uqdecw(z26.VnS(), SVE_VL16, 14), "uqdecw z26.s, vl16, mul #14");
772 COMPARE_MACRO(Uqdech(z26.VnH(), SVE_VL256, 8), "uqdech z26.h, vl256, mul #8");
773 COMPARE_MACRO(Uqdecw(z26.VnS(), SVE_MUL4, 4), "uqdecw z26.s, mul4, mul #4");
774 COMPARE_MACRO(Uqdech(z26.VnH(), SVE_MUL3, 3), "uqdech z26.h, mul3, mul #3");
775 COMPARE_MACRO(Uqdecw(z26.VnS(), SVE_ALL, 2), "uqdecw z26.s, all, mul #2");
776
777 CLEANUP();
778 }
779
TEST(sve_vector_inc_element_count)780 TEST(sve_vector_inc_element_count) {
781 SETUP();
782
783 COMPARE_MACRO(Incd(z16.VnD()), "incd z16.d");
784 COMPARE_MACRO(Incd(z16.VnD(), SVE_POW2), "incd z16.d, pow2");
785 COMPARE_MACRO(Incd(z16.VnD(), SVE_VL1), "incd z16.d, vl1");
786 COMPARE_MACRO(Incd(z16.VnD(), SVE_VL2), "incd z16.d, vl2");
787 COMPARE_MACRO(Incd(z16.VnD(), SVE_VL16), "incd z16.d, vl16");
788 COMPARE_MACRO(Incd(z16.VnD(), SVE_VL256), "incd z16.d, vl256");
789 COMPARE_MACRO(Incd(z16.VnD(), SVE_MUL4), "incd z16.d, mul4");
790 COMPARE_MACRO(Incd(z16.VnD(), SVE_MUL3), "incd z16.d, mul3");
791 COMPARE_MACRO(Incd(z16.VnD(), SVE_ALL), "incd z16.d");
792
793 COMPARE_MACRO(Incd(z18.VnD(), SVE_POW2, 1), "incd z18.d, pow2");
794 COMPARE_MACRO(Incd(z18.VnD(), SVE_VL1, 16), "incd z18.d, vl1, mul #16");
795 COMPARE_MACRO(Incd(z18.VnD(), SVE_VL2, 15), "incd z18.d, vl2, mul #15");
796 COMPARE_MACRO(Incd(z18.VnD(), SVE_VL16, 14), "incd z18.d, vl16, mul #14");
797 COMPARE_MACRO(Incd(z18.VnD(), SVE_VL256, 8), "incd z18.d, vl256, mul #8");
798 COMPARE_MACRO(Incd(z18.VnD(), SVE_MUL4, 4), "incd z18.d, mul4, mul #4");
799 COMPARE_MACRO(Incd(z18.VnD(), SVE_MUL3, 3), "incd z18.d, mul3, mul #3");
800 COMPARE_MACRO(Incd(z18.VnD(), SVE_ALL, 2), "incd z18.d, all, mul #2");
801
802 COMPARE_MACRO(Inch(z24.VnH(), SVE_POW2, 1), "inch z24.h, pow2");
803 COMPARE_MACRO(Incw(z24.VnS(), SVE_VL1, 16), "incw z24.s, vl1, mul #16");
804 COMPARE_MACRO(Inch(z24.VnH(), SVE_VL2, 15), "inch z24.h, vl2, mul #15");
805 COMPARE_MACRO(Incw(z24.VnS(), SVE_VL16, 14), "incw z24.s, vl16, mul #14");
806 COMPARE_MACRO(Inch(z24.VnH(), SVE_VL256, 8), "inch z24.h, vl256, mul #8");
807 COMPARE_MACRO(Incw(z24.VnS(), SVE_MUL4, 4), "incw z24.s, mul4, mul #4");
808 COMPARE_MACRO(Inch(z24.VnH(), SVE_MUL3, 3), "inch z24.h, mul3, mul #3");
809 COMPARE_MACRO(Incw(z24.VnS(), SVE_ALL, 2), "incw z24.s, all, mul #2");
810
811 COMPARE_MACRO(Sqincd(z10.VnD()), "sqincd z10.d");
812 COMPARE_MACRO(Sqincd(z10.VnD(), SVE_POW2), "sqincd z10.d, pow2");
813 COMPARE_MACRO(Sqincd(z10.VnD(), SVE_VL1), "sqincd z10.d, vl1");
814 COMPARE_MACRO(Sqincd(z10.VnD(), SVE_VL2), "sqincd z10.d, vl2");
815 COMPARE_MACRO(Sqincd(z10.VnD(), SVE_VL16), "sqincd z10.d, vl16");
816 COMPARE_MACRO(Sqincd(z10.VnD(), SVE_VL256), "sqincd z10.d, vl256");
817 COMPARE_MACRO(Sqincd(z10.VnD(), SVE_MUL4), "sqincd z10.d, mul4");
818 COMPARE_MACRO(Sqincd(z10.VnD(), SVE_MUL3), "sqincd z10.d, mul3");
819 COMPARE_MACRO(Sqincd(z10.VnD(), SVE_ALL), "sqincd z10.d");
820
821 COMPARE_MACRO(Sqincd(z3.VnD(), SVE_POW2, 1), "sqincd z3.d, pow2");
822 COMPARE_MACRO(Sqincd(z3.VnD(), SVE_VL1, 16), "sqincd z3.d, vl1, mul #16");
823 COMPARE_MACRO(Sqincd(z3.VnD(), SVE_VL2, 15), "sqincd z3.d, vl2, mul #15");
824 COMPARE_MACRO(Sqincd(z3.VnD(), SVE_VL16, 14), "sqincd z3.d, vl16, mul #14");
825 COMPARE_MACRO(Sqincd(z3.VnD(), SVE_VL256, 8), "sqincd z3.d, vl256, mul #8");
826 COMPARE_MACRO(Sqincd(z3.VnD(), SVE_MUL4, 4), "sqincd z3.d, mul4, mul #4");
827 COMPARE_MACRO(Sqincd(z3.VnD(), SVE_MUL3, 3), "sqincd z3.d, mul3, mul #3");
828 COMPARE_MACRO(Sqincd(z3.VnD(), SVE_ALL, 2), "sqincd z3.d, all, mul #2");
829
830 COMPARE_MACRO(Sqinch(z24.VnH(), SVE_POW2, 1), "sqinch z24.h, pow2");
831 COMPARE_MACRO(Sqincw(z24.VnS(), SVE_VL1, 16), "sqincw z24.s, vl1, mul #16");
832 COMPARE_MACRO(Sqinch(z24.VnH(), SVE_VL2, 15), "sqinch z24.h, vl2, mul #15");
833 COMPARE_MACRO(Sqincw(z24.VnS(), SVE_VL16, 14), "sqincw z24.s, vl16, mul #14");
834 COMPARE_MACRO(Sqinch(z24.VnH(), SVE_VL256, 8), "sqinch z24.h, vl256, mul #8");
835 COMPARE_MACRO(Sqincw(z24.VnS(), SVE_MUL4, 4), "sqincw z24.s, mul4, mul #4");
836 COMPARE_MACRO(Sqinch(z24.VnH(), SVE_MUL3, 3), "sqinch z24.h, mul3, mul #3");
837 COMPARE_MACRO(Sqincw(z24.VnS(), SVE_ALL, 2), "sqincw z24.s, all, mul #2");
838
839 COMPARE_MACRO(Uqincd(z10.VnD()), "uqincd z10.d");
840 COMPARE_MACRO(Uqincd(z10.VnD(), SVE_POW2), "uqincd z10.d, pow2");
841 COMPARE_MACRO(Uqincd(z10.VnD(), SVE_VL1), "uqincd z10.d, vl1");
842 COMPARE_MACRO(Uqincd(z10.VnD(), SVE_VL2), "uqincd z10.d, vl2");
843 COMPARE_MACRO(Uqincd(z10.VnD(), SVE_VL16), "uqincd z10.d, vl16");
844 COMPARE_MACRO(Uqincd(z10.VnD(), SVE_VL256), "uqincd z10.d, vl256");
845 COMPARE_MACRO(Uqincd(z10.VnD(), SVE_MUL4), "uqincd z10.d, mul4");
846 COMPARE_MACRO(Uqincd(z10.VnD(), SVE_MUL3), "uqincd z10.d, mul3");
847 COMPARE_MACRO(Uqincd(z10.VnD(), SVE_ALL), "uqincd z10.d");
848
849 COMPARE_MACRO(Uqincd(z6.VnD(), SVE_POW2, 1), "uqincd z6.d, pow2");
850 COMPARE_MACRO(Uqincd(z6.VnD(), SVE_VL1, 16), "uqincd z6.d, vl1, mul #16");
851 COMPARE_MACRO(Uqincd(z6.VnD(), SVE_VL2, 15), "uqincd z6.d, vl2, mul #15");
852 COMPARE_MACRO(Uqincd(z6.VnD(), SVE_VL16, 14), "uqincd z6.d, vl16, mul #14");
853 COMPARE_MACRO(Uqincd(z6.VnD(), SVE_VL256, 8), "uqincd z6.d, vl256, mul #8");
854 COMPARE_MACRO(Uqincd(z6.VnD(), SVE_MUL4, 4), "uqincd z6.d, mul4, mul #4");
855 COMPARE_MACRO(Uqincd(z6.VnD(), SVE_MUL3, 3), "uqincd z6.d, mul3, mul #3");
856 COMPARE_MACRO(Uqincd(z6.VnD(), SVE_ALL, 2), "uqincd z6.d, all, mul #2");
857
858 COMPARE_MACRO(Uqinch(z20.VnH(), SVE_POW2, 1), "uqinch z20.h, pow2");
859 COMPARE_MACRO(Uqincw(z20.VnS(), SVE_VL1, 16), "uqincw z20.s, vl1, mul #16");
860 COMPARE_MACRO(Uqinch(z20.VnH(), SVE_VL2, 15), "uqinch z20.h, vl2, mul #15");
861 COMPARE_MACRO(Uqincw(z20.VnS(), SVE_VL16, 14), "uqincw z20.s, vl16, mul #14");
862 COMPARE_MACRO(Uqinch(z20.VnH(), SVE_VL256, 8), "uqinch z20.h, vl256, mul #8");
863 COMPARE_MACRO(Uqincw(z20.VnS(), SVE_MUL4, 4), "uqincw z20.s, mul4, mul #4");
864 COMPARE_MACRO(Uqinch(z20.VnH(), SVE_MUL3, 3), "uqinch z20.h, mul3, mul #3");
865 COMPARE_MACRO(Uqincw(z20.VnS(), SVE_ALL, 2), "uqincw z20.s, all, mul #2");
866
867 CLEANUP();
868 }
869
TEST(sve_fp_accumulating_reduction)870 TEST(sve_fp_accumulating_reduction) {
871 SETUP();
872
873 COMPARE(fadda(h10, p2, h10, z0.VnH()), "fadda h10, p2, h10, z0.h");
874 COMPARE(fadda(s10, p2, s10, z0.VnS()), "fadda s10, p2, s10, z0.s");
875 COMPARE(fadda(d10, p2, d10, z0.VnD()), "fadda d10, p2, d10, z0.d");
876
877 CLEANUP();
878 }
879
TEST(sve_fp_arithmetic_predicated)880 TEST(sve_fp_arithmetic_predicated) {
881 SETUP();
882
883 COMPARE(fdiv(z9.VnH(), p4.Merging(), z9.VnH(), z4.VnH()),
884 "fdiv z9.h, p4/m, z9.h, z4.h");
885 COMPARE(fdiv(z19.VnS(), p5.Merging(), z19.VnS(), z14.VnS()),
886 "fdiv z19.s, p5/m, z19.s, z14.s");
887 COMPARE(fdiv(z29.VnD(), p6.Merging(), z29.VnD(), z24.VnD()),
888 "fdiv z29.d, p6/m, z29.d, z24.d");
889 COMPARE(fdivr(z21.VnH(), p3.Merging(), z21.VnH(), z11.VnH()),
890 "fdivr z21.h, p3/m, z21.h, z11.h");
891 COMPARE(fdivr(z23.VnS(), p5.Merging(), z23.VnS(), z15.VnS()),
892 "fdivr z23.s, p5/m, z23.s, z15.s");
893 COMPARE(fdivr(z25.VnD(), p7.Merging(), z25.VnD(), z19.VnD()),
894 "fdivr z25.d, p7/m, z25.d, z19.d");
895 COMPARE(fmax(z4.VnH(), p1.Merging(), z4.VnH(), z29.VnH()),
896 "fmax z4.h, p1/m, z4.h, z29.h");
897 COMPARE(fmax(z14.VnS(), p3.Merging(), z14.VnS(), z29.VnS()),
898 "fmax z14.s, p3/m, z14.s, z29.s");
899 COMPARE(fmax(z24.VnD(), p5.Merging(), z24.VnD(), z29.VnD()),
900 "fmax z24.d, p5/m, z24.d, z29.d");
901 COMPARE(fmin(z1.VnH(), p2.Merging(), z1.VnH(), z30.VnH()),
902 "fmin z1.h, p2/m, z1.h, z30.h");
903 COMPARE(fmin(z11.VnS(), p4.Merging(), z11.VnS(), z30.VnS()),
904 "fmin z11.s, p4/m, z11.s, z30.s");
905 COMPARE(fmin(z21.VnD(), p6.Merging(), z21.VnD(), z30.VnD()),
906 "fmin z21.d, p6/m, z21.d, z30.d");
907
908 COMPARE(ftmad(z21.VnH(), z21.VnH(), z22.VnH(), 0),
909 "ftmad z21.h, z21.h, z22.h, #0");
910 COMPARE(ftmad(z21.VnH(), z21.VnH(), z22.VnH(), 2),
911 "ftmad z21.h, z21.h, z22.h, #2");
912 COMPARE(ftmad(z2.VnH(), z2.VnH(), z21.VnH(), 7),
913 "ftmad z2.h, z2.h, z21.h, #7");
914 COMPARE(ftmad(z21.VnS(), z21.VnS(), z22.VnS(), 0),
915 "ftmad z21.s, z21.s, z22.s, #0");
916 COMPARE(ftmad(z21.VnS(), z21.VnS(), z22.VnS(), 2),
917 "ftmad z21.s, z21.s, z22.s, #2");
918 COMPARE(ftmad(z2.VnS(), z2.VnS(), z21.VnS(), 7),
919 "ftmad z2.s, z2.s, z21.s, #7");
920 COMPARE(ftmad(z21.VnD(), z21.VnD(), z22.VnD(), 0),
921 "ftmad z21.d, z21.d, z22.d, #0");
922 COMPARE(ftmad(z21.VnD(), z21.VnD(), z22.VnD(), 2),
923 "ftmad z21.d, z21.d, z22.d, #2");
924 COMPARE(ftmad(z2.VnD(), z2.VnD(), z21.VnD(), 7),
925 "ftmad z2.d, z2.d, z21.d, #7");
926
927 COMPARE_MACRO(Ftmad(z3.VnH(), z2.VnH(), z1.VnH(), 1),
928 "movprfx z3, z2\n"
929 "ftmad z3.h, z3.h, z1.h, #1");
930 COMPARE_MACRO(Ftmad(z6.VnS(), z4.VnS(), z6.VnS(), 1),
931 "mov z31.d, z6.d\n"
932 "movprfx z6, z4\n"
933 "ftmad z6.s, z6.s, z31.s, #1");
934
935 COMPARE(fabd(z31.VnH(), p7.Merging(), z31.VnH(), z17.VnH()),
936 "fabd z31.h, p7/m, z31.h, z17.h");
937 COMPARE(fabd(z31.VnS(), p7.Merging(), z31.VnS(), z17.VnS()),
938 "fabd z31.s, p7/m, z31.s, z17.s");
939 COMPARE(fabd(z31.VnD(), p7.Merging(), z31.VnD(), z17.VnD()),
940 "fabd z31.d, p7/m, z31.d, z17.d");
941 COMPARE(fadd(z24.VnH(), p2.Merging(), z24.VnH(), z15.VnH()),
942 "fadd z24.h, p2/m, z24.h, z15.h");
943 COMPARE(fadd(z24.VnS(), p2.Merging(), z24.VnS(), z15.VnS()),
944 "fadd z24.s, p2/m, z24.s, z15.s");
945 COMPARE(fadd(z24.VnD(), p2.Merging(), z24.VnD(), z15.VnD()),
946 "fadd z24.d, p2/m, z24.d, z15.d");
947 COMPARE(fmaxnm(z15.VnH(), p4.Merging(), z15.VnH(), z3.VnH()),
948 "fmaxnm z15.h, p4/m, z15.h, z3.h");
949 COMPARE(fmaxnm(z15.VnS(), p4.Merging(), z15.VnS(), z3.VnS()),
950 "fmaxnm z15.s, p4/m, z15.s, z3.s");
951 COMPARE(fmaxnm(z15.VnD(), p4.Merging(), z15.VnD(), z3.VnD()),
952 "fmaxnm z15.d, p4/m, z15.d, z3.d");
953 COMPARE(fminnm(z19.VnH(), p2.Merging(), z19.VnH(), z29.VnH()),
954 "fminnm z19.h, p2/m, z19.h, z29.h");
955 COMPARE(fminnm(z19.VnS(), p2.Merging(), z19.VnS(), z29.VnS()),
956 "fminnm z19.s, p2/m, z19.s, z29.s");
957 COMPARE(fminnm(z19.VnD(), p2.Merging(), z19.VnD(), z29.VnD()),
958 "fminnm z19.d, p2/m, z19.d, z29.d");
959 COMPARE(fmulx(z30.VnH(), p6.Merging(), z30.VnH(), z20.VnH()),
960 "fmulx z30.h, p6/m, z30.h, z20.h");
961 COMPARE(fmulx(z30.VnS(), p6.Merging(), z30.VnS(), z20.VnS()),
962 "fmulx z30.s, p6/m, z30.s, z20.s");
963 COMPARE(fmulx(z30.VnD(), p6.Merging(), z30.VnD(), z20.VnD()),
964 "fmulx z30.d, p6/m, z30.d, z20.d");
965 COMPARE(fmul(z26.VnH(), p2.Merging(), z26.VnH(), z6.VnH()),
966 "fmul z26.h, p2/m, z26.h, z6.h");
967 COMPARE(fmul(z26.VnS(), p2.Merging(), z26.VnS(), z6.VnS()),
968 "fmul z26.s, p2/m, z26.s, z6.s");
969 COMPARE(fmul(z26.VnD(), p2.Merging(), z26.VnD(), z6.VnD()),
970 "fmul z26.d, p2/m, z26.d, z6.d");
971 COMPARE(fscale(z8.VnH(), p3.Merging(), z8.VnH(), z6.VnH()),
972 "fscale z8.h, p3/m, z8.h, z6.h");
973 COMPARE(fscale(z8.VnS(), p3.Merging(), z8.VnS(), z6.VnS()),
974 "fscale z8.s, p3/m, z8.s, z6.s");
975 COMPARE(fscale(z8.VnD(), p3.Merging(), z8.VnD(), z6.VnD()),
976 "fscale z8.d, p3/m, z8.d, z6.d");
977 COMPARE(fsubr(z16.VnH(), p5.Merging(), z16.VnH(), z15.VnH()),
978 "fsubr z16.h, p5/m, z16.h, z15.h");
979 COMPARE(fsubr(z16.VnS(), p5.Merging(), z16.VnS(), z15.VnS()),
980 "fsubr z16.s, p5/m, z16.s, z15.s");
981 COMPARE(fsubr(z16.VnD(), p5.Merging(), z16.VnD(), z15.VnD()),
982 "fsubr z16.d, p5/m, z16.d, z15.d");
983 COMPARE(fsub(z16.VnH(), p5.Merging(), z16.VnH(), z26.VnH()),
984 "fsub z16.h, p5/m, z16.h, z26.h");
985 COMPARE(fsub(z16.VnS(), p5.Merging(), z16.VnS(), z26.VnS()),
986 "fsub z16.s, p5/m, z16.s, z26.s");
987 COMPARE(fsub(z16.VnD(), p5.Merging(), z16.VnD(), z26.VnD()),
988 "fsub z16.d, p5/m, z16.d, z26.d");
989
990 COMPARE_MACRO(Fsub(z0.VnH(), p0.Merging(), z1.VnH(), z0.VnH()),
991 "fsubr z0.h, p0/m, z0.h, z1.h");
992
993 COMPARE_MACRO(Fadd(z0.VnH(),
994 p0.Merging(),
995 z1.VnH(),
996 z2.VnH(),
997 FastNaNPropagation),
998 "movprfx z0.h, p0/m, z1.h\n"
999 "fadd z0.h, p0/m, z0.h, z2.h");
1000 COMPARE_MACRO(Fadd(z0.VnH(),
1001 p0.Merging(),
1002 z1.VnH(),
1003 z0.VnH(),
1004 FastNaNPropagation),
1005 "fadd z0.h, p0/m, z0.h, z1.h");
1006 COMPARE_MACRO(Fadd(z0.VnH(),
1007 p0.Merging(),
1008 z1.VnH(),
1009 z0.VnH(),
1010 StrictNaNPropagation),
1011 "movprfx z31.h, p0/m, z1.h\n"
1012 "fadd z31.h, p0/m, z31.h, z0.h\n"
1013 "mov z0.d, z31.d");
1014 COMPARE_MACRO(Fmul(z1.VnS(),
1015 p1.Merging(),
1016 z2.VnS(),
1017 z3.VnS(),
1018 FastNaNPropagation),
1019 "movprfx z1.s, p1/m, z2.s\n"
1020 "fmul z1.s, p1/m, z1.s, z3.s");
1021 COMPARE_MACRO(Fmulx(z2.VnD(),
1022 p2.Merging(),
1023 z3.VnD(),
1024 z4.VnD(),
1025 FastNaNPropagation),
1026 "movprfx z2.d, p2/m, z3.d\n"
1027 "fmulx z2.d, p2/m, z2.d, z4.d");
1028 COMPARE_MACRO(Fminnm(z3.VnH(),
1029 p3.Merging(),
1030 z4.VnH(),
1031 z5.VnH(),
1032 FastNaNPropagation),
1033 "movprfx z3.h, p3/m, z4.h\n"
1034 "fminnm z3.h, p3/m, z3.h, z5.h");
1035 COMPARE_MACRO(Fmaxnm(z4.VnS(),
1036 p4.Merging(),
1037 z5.VnS(),
1038 z6.VnS(),
1039 FastNaNPropagation),
1040 "movprfx z4.s, p4/m, z5.s\n"
1041 "fmaxnm z4.s, p4/m, z4.s, z6.s");
1042 COMPARE_MACRO(Fsub(z5.VnD(), p5.Merging(), z6.VnD(), z7.VnD()),
1043 "movprfx z5.d, p5/m, z6.d\n"
1044 "fsub z5.d, p5/m, z5.d, z7.d");
1045 COMPARE_MACRO(Fscale(z6.VnH(), p6.Merging(), z7.VnH(), z8.VnH()),
1046 "movprfx z6.h, p6/m, z7.h\n"
1047 "fscale z6.h, p6/m, z6.h, z8.h");
1048 COMPARE_MACRO(Fscale(z7.VnS(), p7.Merging(), z8.VnS(), z7.VnS()),
1049 "mov z31.d, z7.d\n"
1050 "movprfx z7.s, p7/m, z8.s\n"
1051 "fscale z7.s, p7/m, z7.s, z31.s");
1052
1053 COMPARE(fadd(z18.VnH(), p0.Merging(), z18.VnH(), 0.5),
1054 "fadd z18.h, p0/m, z18.h, #0.5");
1055 COMPARE(fadd(z18.VnS(), p0.Merging(), z18.VnS(), 1.0),
1056 "fadd z18.s, p0/m, z18.s, #1.0");
1057 COMPARE(fadd(z18.VnD(), p0.Merging(), z18.VnD(), 1.0),
1058 "fadd z18.d, p0/m, z18.d, #1.0");
1059 COMPARE(fmaxnm(z6.VnH(), p1.Merging(), z6.VnH(), 0.0),
1060 "fmaxnm z6.h, p1/m, z6.h, #0.0");
1061 COMPARE(fmaxnm(z6.VnS(), p1.Merging(), z6.VnS(), 1.0),
1062 "fmaxnm z6.s, p1/m, z6.s, #1.0");
1063 COMPARE(fmaxnm(z6.VnD(), p1.Merging(), z6.VnD(), 1.0),
1064 "fmaxnm z6.d, p1/m, z6.d, #1.0");
1065 COMPARE(fmax(z8.VnH(), p6.Merging(), z8.VnH(), 0.0),
1066 "fmax z8.h, p6/m, z8.h, #0.0");
1067 COMPARE(fmax(z8.VnS(), p6.Merging(), z8.VnS(), 0.0),
1068 "fmax z8.s, p6/m, z8.s, #0.0");
1069 COMPARE(fmax(z8.VnD(), p6.Merging(), z8.VnD(), 1.0),
1070 "fmax z8.d, p6/m, z8.d, #1.0");
1071 COMPARE(fminnm(z26.VnH(), p0.Merging(), z26.VnH(), 1.0),
1072 "fminnm z26.h, p0/m, z26.h, #1.0");
1073 COMPARE(fminnm(z26.VnS(), p0.Merging(), z26.VnS(), 0.0),
1074 "fminnm z26.s, p0/m, z26.s, #0.0");
1075 COMPARE(fminnm(z26.VnD(), p0.Merging(), z26.VnD(), 1.0),
1076 "fminnm z26.d, p0/m, z26.d, #1.0");
1077 COMPARE(fmin(z22.VnH(), p0.Merging(), z22.VnH(), 1.0),
1078 "fmin z22.h, p0/m, z22.h, #1.0");
1079 COMPARE(fmin(z22.VnS(), p0.Merging(), z22.VnS(), 1.0),
1080 "fmin z22.s, p0/m, z22.s, #1.0");
1081 COMPARE(fmin(z22.VnD(), p0.Merging(), z22.VnD(), 0.0),
1082 "fmin z22.d, p0/m, z22.d, #0.0");
1083 COMPARE(fmul(z21.VnH(), p3.Merging(), z21.VnH(), 0.5),
1084 "fmul z21.h, p3/m, z21.h, #0.5");
1085 COMPARE(fmul(z21.VnS(), p3.Merging(), z21.VnS(), 2.0),
1086 "fmul z21.s, p3/m, z21.s, #2.0");
1087 COMPARE(fmul(z21.VnD(), p3.Merging(), z21.VnD(), 2.0),
1088 "fmul z21.d, p3/m, z21.d, #2.0");
1089 COMPARE(fsubr(z21.VnH(), p3.Merging(), z21.VnH(), 1.0),
1090 "fsubr z21.h, p3/m, z21.h, #1.0");
1091 COMPARE(fsubr(z21.VnS(), p3.Merging(), z21.VnS(), 0.5),
1092 "fsubr z21.s, p3/m, z21.s, #0.5");
1093 COMPARE(fsubr(z21.VnD(), p3.Merging(), z21.VnD(), 1.0),
1094 "fsubr z21.d, p3/m, z21.d, #1.0");
1095 COMPARE(fsub(z26.VnH(), p4.Merging(), z26.VnH(), 0.5),
1096 "fsub z26.h, p4/m, z26.h, #0.5");
1097 COMPARE(fsub(z26.VnS(), p4.Merging(), z26.VnS(), 1.0),
1098 "fsub z26.s, p4/m, z26.s, #1.0");
1099 COMPARE(fsub(z26.VnD(), p4.Merging(), z26.VnD(), 0.5),
1100 "fsub z26.d, p4/m, z26.d, #0.5");
1101
1102 COMPARE_MACRO(Fadd(z18.VnH(), p0.Merging(), z8.VnH(), 1.0),
1103 "movprfx z18.h, p0/m, z8.h\n"
1104 "fadd z18.h, p0/m, z18.h, #1.0");
1105 COMPARE_MACRO(Fsub(z19.VnH(), p1.Merging(), z9.VnH(), 0.5),
1106 "movprfx z19.h, p1/m, z9.h\n"
1107 "fsub z19.h, p1/m, z19.h, #0.5");
1108 COMPARE_MACRO(Fsub(z20.VnH(), p2.Merging(), 1.0, z10.VnH()),
1109 "movprfx z20.h, p2/m, z10.h\n"
1110 "fsubr z20.h, p2/m, z20.h, #1.0");
1111 COMPARE_MACRO(Fmul(z21.VnH(), p3.Merging(), z11.VnH(), 2.0),
1112 "movprfx z21.h, p3/m, z11.h\n"
1113 "fmul z21.h, p3/m, z21.h, #2.0");
1114 COMPARE_MACRO(Fmin(z22.VnH(), p4.Merging(), z12.VnH(), 0.0),
1115 "movprfx z22.h, p4/m, z12.h\n"
1116 "fmin z22.h, p4/m, z22.h, #0.0");
1117 COMPARE_MACRO(Fminnm(z22.VnH(), p4.Merging(), z12.VnH(), 0.0),
1118 "movprfx z22.h, p4/m, z12.h\n"
1119 "fminnm z22.h, p4/m, z22.h, #0.0");
1120 COMPARE_MACRO(Fmax(z23.VnH(), p5.Merging(), z13.VnH(), 1.0),
1121 "movprfx z23.h, p5/m, z13.h\n"
1122 "fmax z23.h, p5/m, z23.h, #1.0");
1123 COMPARE_MACRO(Fmaxnm(z23.VnH(), p5.Merging(), z13.VnH(), 1.0),
1124 "movprfx z23.h, p5/m, z13.h\n"
1125 "fmaxnm z23.h, p5/m, z23.h, #1.0");
1126 CLEANUP();
1127 }
1128
TEST(sve_fp_arithmetic_predicated_macro_fast_nan_propagation)1129 TEST(sve_fp_arithmetic_predicated_macro_fast_nan_propagation) {
1130 #pragma GCC diagnostic push
1131 #pragma GCC diagnostic ignored "-Wshadow"
1132
1133 // Shadow the `MacroAssembler` type so that the test macros work without
1134 // modification.
1135 typedef FastNaNPropagationMacroAssembler MacroAssembler;
1136
1137 SETUP();
1138
1139 COMPARE_MACRO(Fdiv(z8.VnH(), p4.Merging(), z8.VnH(), z4.VnH()),
1140 "fdiv z8.h, p4/m, z8.h, z4.h");
1141 COMPARE_MACRO(Fdiv(z18.VnS(), p5.Merging(), z18.VnS(), z14.VnS()),
1142 "fdiv z18.s, p5/m, z18.s, z14.s");
1143 COMPARE_MACRO(Fdiv(z28.VnD(), p6.Merging(), z28.VnD(), z24.VnD()),
1144 "fdiv z28.d, p6/m, z28.d, z24.d");
1145 COMPARE_MACRO(Fdiv(z17.VnH(), p2.Merging(), z17.VnH(), z17.VnH()),
1146 "fdiv z17.h, p2/m, z17.h, z17.h");
1147 COMPARE_MACRO(Fdiv(z20.VnS(), p5.Merging(), z23.VnS(), z20.VnS()),
1148 "fdivr z20.s, p5/m, z20.s, z23.s");
1149 COMPARE_MACRO(Fdiv(z15.VnD(), p6.Merging(), z30.VnD(), z8.VnD()),
1150 "movprfx z15.d, p6/m, z30.d\n"
1151 "fdiv z15.d, p6/m, z15.d, z8.d");
1152
1153 COMPARE_MACRO(Fmax(z8.VnH(), p4.Merging(), z8.VnH(), z4.VnH()),
1154 "fmax z8.h, p4/m, z8.h, z4.h");
1155 COMPARE_MACRO(Fmax(z18.VnS(), p5.Merging(), z18.VnS(), z14.VnS()),
1156 "fmax z18.s, p5/m, z18.s, z14.s");
1157 COMPARE_MACRO(Fmax(z28.VnD(), p6.Merging(), z28.VnD(), z24.VnD()),
1158 "fmax z28.d, p6/m, z28.d, z24.d");
1159 COMPARE_MACRO(Fmax(z17.VnH(), p2.Merging(), z17.VnH(), z17.VnH()),
1160 "fmax z17.h, p2/m, z17.h, z17.h");
1161 COMPARE_MACRO(Fmax(z20.VnS(), p5.Merging(), z23.VnS(), z20.VnS()),
1162 "fmax z20.s, p5/m, z20.s, z23.s");
1163 COMPARE_MACRO(Fmax(z15.VnD(), p6.Merging(), z30.VnD(), z8.VnD()),
1164 "movprfx z15.d, p6/m, z30.d\n"
1165 "fmax z15.d, p6/m, z15.d, z8.d");
1166
1167 COMPARE_MACRO(Fmin(z8.VnH(), p4.Merging(), z8.VnH(), z4.VnH()),
1168 "fmin z8.h, p4/m, z8.h, z4.h");
1169 COMPARE_MACRO(Fmin(z18.VnS(), p5.Merging(), z18.VnS(), z14.VnS()),
1170 "fmin z18.s, p5/m, z18.s, z14.s");
1171 COMPARE_MACRO(Fmin(z28.VnD(), p6.Merging(), z28.VnD(), z24.VnD()),
1172 "fmin z28.d, p6/m, z28.d, z24.d");
1173 COMPARE_MACRO(Fmin(z17.VnH(), p2.Merging(), z17.VnH(), z17.VnH()),
1174 "fmin z17.h, p2/m, z17.h, z17.h");
1175 COMPARE_MACRO(Fmin(z20.VnS(), p5.Merging(), z23.VnS(), z20.VnS()),
1176 "fmin z20.s, p5/m, z20.s, z23.s");
1177 COMPARE_MACRO(Fmin(z15.VnD(), p6.Merging(), z30.VnD(), z8.VnD()),
1178 "movprfx z15.d, p6/m, z30.d\n"
1179 "fmin z15.d, p6/m, z15.d, z8.d");
1180
1181 CLEANUP();
1182
1183 #pragma GCC diagnostic pop
1184 }
1185
TEST(sve_fp_arithmetic_predicated_macro_strict_nan_propagation)1186 TEST(sve_fp_arithmetic_predicated_macro_strict_nan_propagation) {
1187 #pragma GCC diagnostic push
1188 #pragma GCC diagnostic ignored "-Wshadow"
1189
1190 // Shadow the `MacroAssembler` type so that the test macros work without
1191 // modification.
1192 typedef StrictNaNPropagationMacroAssembler MacroAssembler;
1193
1194 SETUP();
1195
1196 COMPARE_MACRO(Fdiv(z8.VnH(), p4.Merging(), z8.VnH(), z4.VnH()),
1197 "fdiv z8.h, p4/m, z8.h, z4.h");
1198 COMPARE_MACRO(Fdiv(z18.VnS(), p5.Merging(), z18.VnS(), z14.VnS()),
1199 "fdiv z18.s, p5/m, z18.s, z14.s");
1200 COMPARE_MACRO(Fdiv(z28.VnD(), p6.Merging(), z28.VnD(), z24.VnD()),
1201 "fdiv z28.d, p6/m, z28.d, z24.d");
1202 COMPARE_MACRO(Fdiv(z17.VnH(), p2.Merging(), z17.VnH(), z17.VnH()),
1203 "fdiv z17.h, p2/m, z17.h, z17.h");
1204 COMPARE_MACRO(Fdiv(z20.VnS(), p5.Merging(), z23.VnS(), z20.VnS()),
1205 "fdivr z20.s, p5/m, z20.s, z23.s");
1206 COMPARE_MACRO(Fdiv(z15.VnD(), p6.Merging(), z30.VnD(), z8.VnD()),
1207 "movprfx z15.d, p6/m, z30.d\n"
1208 "fdiv z15.d, p6/m, z15.d, z8.d");
1209
1210 COMPARE_MACRO(Fmax(z8.VnH(), p4.Merging(), z8.VnH(), z4.VnH()),
1211 "fmax z8.h, p4/m, z8.h, z4.h");
1212 COMPARE_MACRO(Fmax(z18.VnS(), p5.Merging(), z18.VnS(), z14.VnS()),
1213 "fmax z18.s, p5/m, z18.s, z14.s");
1214 COMPARE_MACRO(Fmax(z28.VnD(), p6.Merging(), z28.VnD(), z24.VnD()),
1215 "fmax z28.d, p6/m, z28.d, z24.d");
1216 COMPARE_MACRO(Fmax(z17.VnH(), p2.Merging(), z17.VnH(), z17.VnH()),
1217 "fmax z17.h, p2/m, z17.h, z17.h");
1218 COMPARE_MACRO(Fmax(z20.VnS(), p5.Merging(), z23.VnS(), z20.VnS()),
1219 "movprfx z31.s, p5/m, z23.s\n"
1220 "fmax z31.s, p5/m, z31.s, z20.s\n"
1221 "mov z20.d, z31.d");
1222 COMPARE_MACRO(Fmax(z15.VnD(), p6.Merging(), z30.VnD(), z8.VnD()),
1223 "movprfx z15.d, p6/m, z30.d\n"
1224 "fmax z15.d, p6/m, z15.d, z8.d");
1225
1226 COMPARE_MACRO(Fmin(z8.VnH(), p4.Merging(), z8.VnH(), z4.VnH()),
1227 "fmin z8.h, p4/m, z8.h, z4.h");
1228 COMPARE_MACRO(Fmin(z18.VnS(), p5.Merging(), z18.VnS(), z14.VnS()),
1229 "fmin z18.s, p5/m, z18.s, z14.s");
1230 COMPARE_MACRO(Fmin(z28.VnD(), p6.Merging(), z28.VnD(), z24.VnD()),
1231 "fmin z28.d, p6/m, z28.d, z24.d");
1232 COMPARE_MACRO(Fmin(z17.VnH(), p2.Merging(), z17.VnH(), z17.VnH()),
1233 "fmin z17.h, p2/m, z17.h, z17.h");
1234 COMPARE_MACRO(Fmin(z20.VnS(), p5.Merging(), z23.VnS(), z20.VnS()),
1235 "movprfx z31.s, p5/m, z23.s\n"
1236 "fmin z31.s, p5/m, z31.s, z20.s\n"
1237 "mov z20.d, z31.d");
1238 COMPARE_MACRO(Fmin(z15.VnD(), p6.Merging(), z30.VnD(), z8.VnD()),
1239 "movprfx z15.d, p6/m, z30.d\n"
1240 "fmin z15.d, p6/m, z15.d, z8.d");
1241
1242 CLEANUP();
1243
1244 #pragma GCC diagnostic pop
1245 }
1246
TEST(sve_fp_arithmetic_unpredicated)1247 TEST(sve_fp_arithmetic_unpredicated) {
1248 SETUP();
1249
1250 COMPARE(fadd(z5.VnH(), z24.VnH(), z11.VnH()), "fadd z5.h, z24.h, z11.h");
1251 COMPARE(fadd(z15.VnS(), z14.VnS(), z12.VnS()), "fadd z15.s, z14.s, z12.s");
1252 COMPARE(fadd(z25.VnD(), z4.VnD(), z13.VnD()), "fadd z25.d, z4.d, z13.d");
1253 COMPARE(fmul(z9.VnH(), z24.VnH(), z10.VnH()), "fmul z9.h, z24.h, z10.h");
1254 COMPARE(fmul(z19.VnS(), z14.VnS(), z0.VnS()), "fmul z19.s, z14.s, z0.s");
1255 COMPARE(fmul(z29.VnD(), z4.VnD(), z20.VnD()), "fmul z29.d, z4.d, z20.d");
1256 COMPARE(fsub(z4.VnH(), z14.VnH(), z29.VnH()), "fsub z4.h, z14.h, z29.h");
1257 COMPARE(fsub(z14.VnS(), z24.VnS(), z9.VnS()), "fsub z14.s, z24.s, z9.s");
1258 COMPARE(fsub(z14.VnD(), z4.VnD(), z19.VnD()), "fsub z14.d, z4.d, z19.d");
1259 COMPARE(frecps(z14.VnH(), z29.VnH(), z18.VnH()),
1260 "frecps z14.h, z29.h, z18.h");
1261 COMPARE(frecps(z14.VnS(), z29.VnS(), z18.VnS()),
1262 "frecps z14.s, z29.s, z18.s");
1263 COMPARE(frecps(z14.VnD(), z29.VnD(), z18.VnD()),
1264 "frecps z14.d, z29.d, z18.d");
1265 COMPARE(frsqrts(z5.VnH(), z6.VnH(), z28.VnH()), "frsqrts z5.h, z6.h, z28.h");
1266 COMPARE(frsqrts(z5.VnS(), z6.VnS(), z28.VnS()), "frsqrts z5.s, z6.s, z28.s");
1267 COMPARE(frsqrts(z5.VnD(), z6.VnD(), z28.VnD()), "frsqrts z5.d, z6.d, z28.d");
1268 COMPARE(ftsmul(z21.VnH(), z17.VnH(), z24.VnH()),
1269 "ftsmul z21.h, z17.h, z24.h");
1270 COMPARE(ftsmul(z21.VnS(), z17.VnS(), z24.VnS()),
1271 "ftsmul z21.s, z17.s, z24.s");
1272 COMPARE(ftsmul(z21.VnD(), z17.VnD(), z24.VnD()),
1273 "ftsmul z21.d, z17.d, z24.d");
1274
1275 CLEANUP();
1276 }
1277
TEST(sve_fp_compare_vectors)1278 TEST(sve_fp_compare_vectors) {
1279 SETUP();
1280
1281
1282 COMPARE(facge(p1.VnH(), p3.Zeroing(), z22.VnH(), z25.VnH()),
1283 "facge p1.h, p3/z, z22.h, z25.h");
1284 COMPARE(facge(p1.VnS(), p3.Zeroing(), z22.VnS(), z25.VnS()),
1285 "facge p1.s, p3/z, z22.s, z25.s");
1286 COMPARE(facge(p1.VnD(), p3.Zeroing(), z22.VnD(), z25.VnD()),
1287 "facge p1.d, p3/z, z22.d, z25.d");
1288 COMPARE(facgt(p8.VnH(), p7.Zeroing(), z25.VnH(), z17.VnH()),
1289 "facgt p8.h, p7/z, z25.h, z17.h");
1290 COMPARE(facgt(p8.VnS(), p7.Zeroing(), z25.VnS(), z17.VnS()),
1291 "facgt p8.s, p7/z, z25.s, z17.s");
1292 COMPARE(facgt(p8.VnD(), p7.Zeroing(), z25.VnD(), z17.VnD()),
1293 "facgt p8.d, p7/z, z25.d, z17.d");
1294 COMPARE(fcmeq(p10.VnH(), p2.Zeroing(), z1.VnH(), z17.VnH()),
1295 "fcmeq p10.h, p2/z, z1.h, z17.h");
1296 COMPARE(fcmeq(p10.VnS(), p2.Zeroing(), z1.VnS(), z17.VnS()),
1297 "fcmeq p10.s, p2/z, z1.s, z17.s");
1298 COMPARE(fcmeq(p10.VnD(), p2.Zeroing(), z1.VnD(), z17.VnD()),
1299 "fcmeq p10.d, p2/z, z1.d, z17.d");
1300 COMPARE(fcmge(p0.VnH(), p0.Zeroing(), z1.VnH(), z0.VnH()),
1301 "fcmge p0.h, p0/z, z1.h, z0.h");
1302 COMPARE(fcmge(p0.VnS(), p0.Zeroing(), z1.VnS(), z0.VnS()),
1303 "fcmge p0.s, p0/z, z1.s, z0.s");
1304 COMPARE(fcmge(p0.VnD(), p0.Zeroing(), z1.VnD(), z0.VnD()),
1305 "fcmge p0.d, p0/z, z1.d, z0.d");
1306 COMPARE(fcmgt(p15.VnH(), p5.Zeroing(), z26.VnH(), z5.VnH()),
1307 "fcmgt p15.h, p5/z, z26.h, z5.h");
1308 COMPARE(fcmgt(p15.VnS(), p5.Zeroing(), z26.VnS(), z5.VnS()),
1309 "fcmgt p15.s, p5/z, z26.s, z5.s");
1310 COMPARE(fcmgt(p15.VnD(), p5.Zeroing(), z26.VnD(), z5.VnD()),
1311 "fcmgt p15.d, p5/z, z26.d, z5.d");
1312 COMPARE(fcmne(p2.VnH(), p1.Zeroing(), z9.VnH(), z4.VnH()),
1313 "fcmne p2.h, p1/z, z9.h, z4.h");
1314 COMPARE(fcmne(p2.VnS(), p1.Zeroing(), z9.VnS(), z4.VnS()),
1315 "fcmne p2.s, p1/z, z9.s, z4.s");
1316 COMPARE(fcmne(p2.VnD(), p1.Zeroing(), z9.VnD(), z4.VnD()),
1317 "fcmne p2.d, p1/z, z9.d, z4.d");
1318 COMPARE(fcmuo(p6.VnH(), p4.Zeroing(), z10.VnH(), z21.VnH()),
1319 "fcmuo p6.h, p4/z, z10.h, z21.h");
1320 COMPARE(fcmuo(p6.VnS(), p4.Zeroing(), z10.VnS(), z21.VnS()),
1321 "fcmuo p6.s, p4/z, z10.s, z21.s");
1322 COMPARE(fcmuo(p6.VnD(), p4.Zeroing(), z10.VnD(), z21.VnD()),
1323 "fcmuo p6.d, p4/z, z10.d, z21.d");
1324
1325 COMPARE_MACRO(Facle(p2.VnH(), p0.Zeroing(), z11.VnH(), z15.VnH()),
1326 "facge p2.h, p0/z, z15.h, z11.h");
1327 COMPARE_MACRO(Facle(p2.VnS(), p0.Zeroing(), z11.VnS(), z15.VnS()),
1328 "facge p2.s, p0/z, z15.s, z11.s");
1329 COMPARE_MACRO(Facle(p2.VnD(), p0.Zeroing(), z11.VnD(), z15.VnD()),
1330 "facge p2.d, p0/z, z15.d, z11.d");
1331 COMPARE_MACRO(Faclt(p9.VnH(), p4.Zeroing(), z27.VnH(), z5.VnH()),
1332 "facgt p9.h, p4/z, z5.h, z27.h");
1333 COMPARE_MACRO(Faclt(p9.VnS(), p4.Zeroing(), z27.VnS(), z5.VnS()),
1334 "facgt p9.s, p4/z, z5.s, z27.s");
1335 COMPARE_MACRO(Faclt(p9.VnD(), p4.Zeroing(), z27.VnD(), z5.VnD()),
1336 "facgt p9.d, p4/z, z5.d, z27.d");
1337
1338 COMPARE_MACRO(Fcmle(p12.VnH(), p2.Zeroing(), z21.VnH(), z29.VnH()),
1339 "fcmge p12.h, p2/z, z29.h, z21.h");
1340 COMPARE_MACRO(Fcmle(p12.VnS(), p2.Zeroing(), z21.VnS(), z29.VnS()),
1341 "fcmge p12.s, p2/z, z29.s, z21.s");
1342 COMPARE_MACRO(Fcmle(p12.VnD(), p2.Zeroing(), z21.VnD(), z29.VnD()),
1343 "fcmge p12.d, p2/z, z29.d, z21.d");
1344 COMPARE_MACRO(Fcmlt(p7.VnH(), p3.Zeroing(), z7.VnH(), z14.VnH()),
1345 "fcmgt p7.h, p3/z, z14.h, z7.h");
1346 COMPARE_MACRO(Fcmlt(p7.VnS(), p3.Zeroing(), z7.VnS(), z14.VnS()),
1347 "fcmgt p7.s, p3/z, z14.s, z7.s");
1348 COMPARE_MACRO(Fcmlt(p7.VnD(), p3.Zeroing(), z7.VnD(), z14.VnD()),
1349 "fcmgt p7.d, p3/z, z14.d, z7.d");
1350
1351 CLEANUP();
1352 }
1353
TEST(sve_fp_compare_with_zero)1354 TEST(sve_fp_compare_with_zero) {
1355 SETUP();
1356
1357 COMPARE(fcmeq(p9.VnH(), p1.Zeroing(), z17.VnH(), 0),
1358 "fcmeq p9.h, p1/z, z17.h, #0.0");
1359 COMPARE(fcmeq(p9.VnS(), p1.Zeroing(), z17.VnS(), 0),
1360 "fcmeq p9.s, p1/z, z17.s, #0.0");
1361 COMPARE(fcmeq(p9.VnD(), p1.Zeroing(), z17.VnD(), 0),
1362 "fcmeq p9.d, p1/z, z17.d, #0.0");
1363 COMPARE(fcmge(p13.VnH(), p3.Zeroing(), z13.VnH(), 0),
1364 "fcmge p13.h, p3/z, z13.h, #0.0");
1365 COMPARE(fcmge(p13.VnS(), p3.Zeroing(), z13.VnS(), 0),
1366 "fcmge p13.s, p3/z, z13.s, #0.0");
1367 COMPARE(fcmge(p13.VnD(), p3.Zeroing(), z13.VnD(), 0),
1368 "fcmge p13.d, p3/z, z13.d, #0.0");
1369 COMPARE(fcmgt(p10.VnH(), p2.Zeroing(), z24.VnH(), 0),
1370 "fcmgt p10.h, p2/z, z24.h, #0.0");
1371 COMPARE(fcmgt(p10.VnS(), p2.Zeroing(), z24.VnS(), 0),
1372 "fcmgt p10.s, p2/z, z24.s, #0.0");
1373 COMPARE(fcmgt(p10.VnD(), p2.Zeroing(), z24.VnD(), 0),
1374 "fcmgt p10.d, p2/z, z24.d, #0.0");
1375 COMPARE(fcmle(p4.VnH(), p7.Zeroing(), z1.VnH(), 0),
1376 "fcmle p4.h, p7/z, z1.h, #0.0");
1377 COMPARE(fcmle(p4.VnS(), p7.Zeroing(), z1.VnS(), 0),
1378 "fcmle p4.s, p7/z, z1.s, #0.0");
1379 COMPARE(fcmle(p4.VnD(), p7.Zeroing(), z1.VnD(), 0),
1380 "fcmle p4.d, p7/z, z1.d, #0.0");
1381 COMPARE(fcmlt(p15.VnH(), p7.Zeroing(), z9.VnH(), 0),
1382 "fcmlt p15.h, p7/z, z9.h, #0.0");
1383 COMPARE(fcmlt(p15.VnS(), p7.Zeroing(), z9.VnS(), 0),
1384 "fcmlt p15.s, p7/z, z9.s, #0.0");
1385 COMPARE(fcmlt(p15.VnD(), p7.Zeroing(), z9.VnD(), 0),
1386 "fcmlt p15.d, p7/z, z9.d, #0.0");
1387 COMPARE(fcmne(p14.VnH(), p7.Zeroing(), z28.VnH(), 0),
1388 "fcmne p14.h, p7/z, z28.h, #0.0");
1389 COMPARE(fcmne(p14.VnS(), p7.Zeroing(), z28.VnS(), 0),
1390 "fcmne p14.s, p7/z, z28.s, #0.0");
1391 COMPARE(fcmne(p14.VnD(), p7.Zeroing(), z28.VnD(), 0),
1392 "fcmne p14.d, p7/z, z28.d, #0.0");
1393
1394 CLEANUP();
1395 }
1396
TEST(sve_fp_complex_addition)1397 TEST(sve_fp_complex_addition) {
1398 SETUP();
1399
1400 COMPARE(fcadd(z12.VnH(), p5.Merging(), z12.VnH(), z13.VnH(), 90),
1401 "fcadd z12.h, p5/m, z12.h, z13.h, #90");
1402 COMPARE(fcadd(z12.VnS(), p5.Merging(), z12.VnS(), z13.VnS(), 90),
1403 "fcadd z12.s, p5/m, z12.s, z13.s, #90");
1404 COMPARE(fcadd(z12.VnD(), p5.Merging(), z12.VnD(), z13.VnD(), 90),
1405 "fcadd z12.d, p5/m, z12.d, z13.d, #90");
1406 COMPARE(fcadd(z22.VnH(), p0.Merging(), z22.VnH(), z23.VnH(), 270),
1407 "fcadd z22.h, p0/m, z22.h, z23.h, #270");
1408 COMPARE(fcadd(z22.VnS(), p0.Merging(), z22.VnS(), z23.VnS(), 270),
1409 "fcadd z22.s, p0/m, z22.s, z23.s, #270");
1410 COMPARE(fcadd(z22.VnD(), p0.Merging(), z22.VnD(), z23.VnD(), 270),
1411 "fcadd z22.d, p0/m, z22.d, z23.d, #270");
1412
1413 COMPARE_MACRO(Fcadd(z12.VnH(), p5.Merging(), z1.VnH(), z13.VnH(), 90),
1414 "movprfx z12.h, p5/m, z1.h\n"
1415 "fcadd z12.h, p5/m, z12.h, z13.h, #90");
1416 COMPARE_MACRO(Fcadd(z12.VnH(), p5.Merging(), z1.VnH(), z12.VnH(), 90),
1417 "movprfx z31.h, p5/m, z1.h\n"
1418 "fcadd z31.h, p5/m, z31.h, z12.h, #90\n"
1419 "mov z12.d, z31.d");
1420 CLEANUP();
1421 }
1422
TEST(sve_fp_complex_mul_add)1423 TEST(sve_fp_complex_mul_add) {
1424 SETUP();
1425
1426 COMPARE_MACRO(Fcmla(z19.VnH(),
1427 p7.Merging(),
1428 z19.VnH(),
1429 z16.VnH(),
1430 z0.VnH(),
1431 90),
1432 "fcmla z19.h, p7/m, z16.h, z0.h, #90");
1433 COMPARE_MACRO(Fcmla(z19.VnS(),
1434 p7.Merging(),
1435 z19.VnS(),
1436 z16.VnS(),
1437 z0.VnS(),
1438 90),
1439 "fcmla z19.s, p7/m, z16.s, z0.s, #90");
1440 COMPARE_MACRO(Fcmla(z19.VnD(),
1441 p7.Merging(),
1442 z19.VnD(),
1443 z16.VnD(),
1444 z0.VnD(),
1445 90),
1446 "fcmla z19.d, p7/m, z16.d, z0.d, #90");
1447
1448 COMPARE_MACRO(Fcmla(z20.VnD(),
1449 p6.Merging(),
1450 z20.VnD(),
1451 z15.VnD(),
1452 z1.VnD(),
1453 0),
1454 "fcmla z20.d, p6/m, z15.d, z1.d, #0");
1455 COMPARE_MACRO(Fcmla(z20.VnD(),
1456 p6.Merging(),
1457 z20.VnD(),
1458 z15.VnD(),
1459 z1.VnD(),
1460 180),
1461 "fcmla z20.d, p6/m, z15.d, z1.d, #180");
1462 COMPARE_MACRO(Fcmla(z20.VnD(),
1463 p6.Merging(),
1464 z20.VnD(),
1465 z15.VnD(),
1466 z1.VnD(),
1467 270),
1468 "fcmla z20.d, p6/m, z15.d, z1.d, #270");
1469
1470 COMPARE_MACRO(Fcmla(z20.VnD(),
1471 p6.Merging(),
1472 z20.VnD(),
1473 z15.VnD(),
1474 z20.VnD(),
1475 270),
1476 "fcmla z20.d, p6/m, z15.d, z20.d, #270");
1477 COMPARE_MACRO(Fcmla(z20.VnD(),
1478 p6.Merging(),
1479 z21.VnD(),
1480 z15.VnD(),
1481 z1.VnD(),
1482 270),
1483 "movprfx z20.d, p6/m, z21.d\n"
1484 "fcmla z20.d, p6/m, z15.d, z1.d, #270");
1485 COMPARE_MACRO(Fcmla(z20.VnD(),
1486 p6.Merging(),
1487 z21.VnD(),
1488 z20.VnD(),
1489 z1.VnD(),
1490 270),
1491 "movprfx z31, z21\n"
1492 "fcmla z31.d, p6/m, z20.d, z1.d, #270\n"
1493 "mov z20.d, p6/m, z31.d");
1494 COMPARE_MACRO(Fcmla(z20.VnD(),
1495 p6.Merging(),
1496 z21.VnD(),
1497 z15.VnD(),
1498 z20.VnD(),
1499 270),
1500 "movprfx z31, z21\n"
1501 "fcmla z31.d, p6/m, z15.d, z20.d, #270\n"
1502 "mov z20.d, p6/m, z31.d");
1503 COMPARE_MACRO(Fcmla(z20.VnD(),
1504 p6.Merging(),
1505 z21.VnD(),
1506 z20.VnD(),
1507 z20.VnD(),
1508 270),
1509 "movprfx z31, z21\n"
1510 "fcmla z31.d, p6/m, z20.d, z20.d, #270\n"
1511 "mov z20.d, p6/m, z31.d");
1512
1513 CLEANUP();
1514 }
1515
TEST(sve_fp_complex_mul_add_index)1516 TEST(sve_fp_complex_mul_add_index) {
1517 SETUP();
1518
1519 COMPARE(fcmla(z30.VnH(), z20.VnH(), z3.VnH(), 0, 0),
1520 "fcmla z30.h, z20.h, z3.h[0], #0");
1521 COMPARE(fcmla(z30.VnH(), z20.VnH(), z3.VnH(), 1, 0),
1522 "fcmla z30.h, z20.h, z3.h[1], #0");
1523 COMPARE(fcmla(z30.VnH(), z20.VnH(), z3.VnH(), 2, 90),
1524 "fcmla z30.h, z20.h, z3.h[2], #90");
1525 COMPARE(fcmla(z30.VnH(), z20.VnH(), z3.VnH(), 0, 270),
1526 "fcmla z30.h, z20.h, z3.h[0], #270");
1527 COMPARE(fcmla(z10.VnS(), z20.VnS(), z1.VnS(), 0, 0),
1528 "fcmla z10.s, z20.s, z1.s[0], #0");
1529 COMPARE(fcmla(z10.VnS(), z20.VnS(), z1.VnS(), 1, 0),
1530 "fcmla z10.s, z20.s, z1.s[1], #0");
1531 COMPARE(fcmla(z10.VnS(), z20.VnS(), z1.VnS(), 1, 90),
1532 "fcmla z10.s, z20.s, z1.s[1], #90");
1533 COMPARE(fcmla(z10.VnS(), z20.VnS(), z1.VnS(), 0, 270),
1534 "fcmla z10.s, z20.s, z1.s[0], #270");
1535
1536 CLEANUP();
1537 }
1538
TEST(sve_fp_fast_reduction)1539 TEST(sve_fp_fast_reduction) {
1540 SETUP();
1541
1542 COMPARE(faddv(h26, p6, z16.VnH()), "faddv h26, p6, z16.h");
1543 COMPARE(faddv(s26, p6, z16.VnS()), "faddv s26, p6, z16.s");
1544 COMPARE(faddv(d26, p6, z16.VnD()), "faddv d26, p6, z16.d");
1545 COMPARE(fmaxnmv(h28, p1, z0.VnH()), "fmaxnmv h28, p1, z0.h");
1546 COMPARE(fmaxnmv(s28, p1, z0.VnS()), "fmaxnmv s28, p1, z0.s");
1547 COMPARE(fmaxnmv(d28, p1, z0.VnD()), "fmaxnmv d28, p1, z0.d");
1548 COMPARE(fmaxv(h3, p1, z23.VnH()), "fmaxv h3, p1, z23.h");
1549 COMPARE(fmaxv(s3, p1, z23.VnS()), "fmaxv s3, p1, z23.s");
1550 COMPARE(fmaxv(d3, p1, z23.VnD()), "fmaxv d3, p1, z23.d");
1551 COMPARE(fminnmv(h20, p6, z21.VnH()), "fminnmv h20, p6, z21.h");
1552 COMPARE(fminnmv(s20, p6, z21.VnS()), "fminnmv s20, p6, z21.s");
1553 COMPARE(fminnmv(d20, p6, z21.VnD()), "fminnmv d20, p6, z21.d");
1554 COMPARE(fminv(h10, p4, z27.VnH()), "fminv h10, p4, z27.h");
1555 COMPARE(fminv(s10, p4, z27.VnS()), "fminv s10, p4, z27.s");
1556 COMPARE(fminv(d10, p4, z27.VnD()), "fminv d10, p4, z27.d");
1557
1558 CLEANUP();
1559 }
1560
TEST(sve_fp_mul_add)1561 TEST(sve_fp_mul_add) {
1562 SETUP();
1563
1564 COMPARE(fmad(z31.VnH(), p2.Merging(), z8.VnH(), z1.VnH()),
1565 "fmad z31.h, p2/m, z8.h, z1.h");
1566 COMPARE(fmad(z31.VnS(), p2.Merging(), z8.VnS(), z1.VnS()),
1567 "fmad z31.s, p2/m, z8.s, z1.s");
1568 COMPARE(fmad(z31.VnD(), p2.Merging(), z8.VnD(), z1.VnD()),
1569 "fmad z31.d, p2/m, z8.d, z1.d");
1570 COMPARE(fmla(z26.VnH(), p7.Merging(), z19.VnH(), z16.VnH()),
1571 "fmla z26.h, p7/m, z19.h, z16.h");
1572 COMPARE(fmla(z26.VnS(), p7.Merging(), z19.VnS(), z16.VnS()),
1573 "fmla z26.s, p7/m, z19.s, z16.s");
1574 COMPARE(fmla(z26.VnD(), p7.Merging(), z19.VnD(), z16.VnD()),
1575 "fmla z26.d, p7/m, z19.d, z16.d");
1576 COMPARE(fmls(z20.VnH(), p6.Merging(), z28.VnH(), z0.VnH()),
1577 "fmls z20.h, p6/m, z28.h, z0.h");
1578 COMPARE(fmls(z20.VnS(), p6.Merging(), z28.VnS(), z0.VnS()),
1579 "fmls z20.s, p6/m, z28.s, z0.s");
1580 COMPARE(fmls(z20.VnD(), p6.Merging(), z28.VnD(), z0.VnD()),
1581 "fmls z20.d, p6/m, z28.d, z0.d");
1582 COMPARE(fmsb(z3.VnH(), p4.Merging(), z8.VnH(), z22.VnH()),
1583 "fmsb z3.h, p4/m, z8.h, z22.h");
1584 COMPARE(fmsb(z3.VnS(), p4.Merging(), z8.VnS(), z22.VnS()),
1585 "fmsb z3.s, p4/m, z8.s, z22.s");
1586 COMPARE(fmsb(z3.VnD(), p4.Merging(), z8.VnD(), z22.VnD()),
1587 "fmsb z3.d, p4/m, z8.d, z22.d");
1588 COMPARE(fnmad(z0.VnH(), p5.Merging(), z20.VnH(), z17.VnH()),
1589 "fnmad z0.h, p5/m, z20.h, z17.h");
1590 COMPARE(fnmad(z0.VnS(), p5.Merging(), z20.VnS(), z17.VnS()),
1591 "fnmad z0.s, p5/m, z20.s, z17.s");
1592 COMPARE(fnmad(z0.VnD(), p5.Merging(), z20.VnD(), z17.VnD()),
1593 "fnmad z0.d, p5/m, z20.d, z17.d");
1594 COMPARE(fnmla(z31.VnH(), p6.Merging(), z14.VnH(), z8.VnH()),
1595 "fnmla z31.h, p6/m, z14.h, z8.h");
1596 COMPARE(fnmla(z31.VnS(), p6.Merging(), z14.VnS(), z8.VnS()),
1597 "fnmla z31.s, p6/m, z14.s, z8.s");
1598 COMPARE(fnmla(z31.VnD(), p6.Merging(), z14.VnD(), z8.VnD()),
1599 "fnmla z31.d, p6/m, z14.d, z8.d");
1600 COMPARE(fnmls(z2.VnH(), p1.Merging(), z23.VnH(), z15.VnH()),
1601 "fnmls z2.h, p1/m, z23.h, z15.h");
1602 COMPARE(fnmls(z2.VnS(), p1.Merging(), z23.VnS(), z15.VnS()),
1603 "fnmls z2.s, p1/m, z23.s, z15.s");
1604 COMPARE(fnmls(z2.VnD(), p1.Merging(), z23.VnD(), z15.VnD()),
1605 "fnmls z2.d, p1/m, z23.d, z15.d");
1606 COMPARE(fnmsb(z28.VnH(), p3.Merging(), z26.VnH(), z11.VnH()),
1607 "fnmsb z28.h, p3/m, z26.h, z11.h");
1608 COMPARE(fnmsb(z28.VnS(), p3.Merging(), z26.VnS(), z11.VnS()),
1609 "fnmsb z28.s, p3/m, z26.s, z11.s");
1610 COMPARE(fnmsb(z28.VnD(), p3.Merging(), z26.VnD(), z11.VnD()),
1611 "fnmsb z28.d, p3/m, z26.d, z11.d");
1612
1613 CLEANUP();
1614 }
1615
TEST(sve_fp_mul_add_macro_strict_nan_propagation)1616 TEST(sve_fp_mul_add_macro_strict_nan_propagation) {
1617 #pragma GCC diagnostic push
1618 #pragma GCC diagnostic ignored "-Wshadow"
1619
1620 // Shadow the `MacroAssembler` type so that the test macros work without
1621 // modification.
1622 typedef StrictNaNPropagationMacroAssembler MacroAssembler;
1623
1624 SETUP();
1625
1626 COMPARE_MACRO(Fmla(z0.VnH(), p1.Merging(), z0.VnH(), z2.VnH(), z4.VnH()),
1627 "fmla z0.h, p1/m, z2.h, z4.h");
1628 COMPARE_MACRO(Fmla(z3.VnH(), p2.Merging(), z4.VnH(), z3.VnH(), z5.VnH()),
1629 "fmad z3.h, p2/m, z5.h, z4.h");
1630 COMPARE_MACRO(Fmla(z4.VnS(), p3.Merging(), z5.VnS(), z6.VnS(), z4.VnS()),
1631 "movprfx z31.s, p3/m, z5.s\n"
1632 "fmla z31.s, p3/m, z6.s, z4.s\n"
1633 "mov z4.d, z31.d");
1634 COMPARE_MACRO(Fmla(z5.VnD(), p4.Merging(), z6.VnD(), z7.VnD(), z8.VnD()),
1635 "movprfx z5.d, p4/m, z6.d\n"
1636 "fmla z5.d, p4/m, z7.d, z8.d");
1637
1638 COMPARE_MACRO(Fmls(z0.VnD(), p1.Merging(), z0.VnD(), z2.VnD(), z4.VnD()),
1639 "fmls z0.d, p1/m, z2.d, z4.d");
1640 COMPARE_MACRO(Fmls(z3.VnS(), p2.Merging(), z4.VnS(), z3.VnS(), z5.VnS()),
1641 "fmsb z3.s, p2/m, z5.s, z4.s");
1642 COMPARE_MACRO(Fmls(z4.VnH(), p3.Merging(), z5.VnH(), z6.VnH(), z4.VnH()),
1643 "movprfx z31.h, p3/m, z5.h\n"
1644 "fmls z31.h, p3/m, z6.h, z4.h\n"
1645 "mov z4.d, z31.d");
1646 COMPARE_MACRO(Fmls(z5.VnD(), p4.Merging(), z6.VnD(), z7.VnD(), z8.VnD()),
1647 "movprfx z5.d, p4/m, z6.d\n"
1648 "fmls z5.d, p4/m, z7.d, z8.d");
1649
1650 COMPARE_MACRO(Fnmla(z10.VnH(), p5.Merging(), z10.VnH(), z12.VnH(), z14.VnH()),
1651 "fnmla z10.h, p5/m, z12.h, z14.h");
1652 COMPARE_MACRO(Fnmla(z13.VnH(), p6.Merging(), z14.VnH(), z13.VnH(), z15.VnH()),
1653 "fnmad z13.h, p6/m, z15.h, z14.h");
1654 COMPARE_MACRO(Fnmla(z14.VnS(), p7.Merging(), z15.VnS(), z16.VnS(), z14.VnS()),
1655 "movprfx z31.s, p7/m, z15.s\n"
1656 "fnmla z31.s, p7/m, z16.s, z14.s\n"
1657 "mov z14.d, z31.d");
1658 COMPARE_MACRO(Fnmla(z15.VnD(), p0.Merging(), z16.VnD(), z17.VnD(), z18.VnD()),
1659 "movprfx z15.d, p0/m, z16.d\n"
1660 "fnmla z15.d, p0/m, z17.d, z18.d");
1661
1662 COMPARE_MACRO(Fnmls(z10.VnD(), p5.Merging(), z10.VnD(), z12.VnD(), z14.VnD()),
1663 "fnmls z10.d, p5/m, z12.d, z14.d");
1664 COMPARE_MACRO(Fnmls(z13.VnS(), p6.Merging(), z14.VnS(), z13.VnS(), z15.VnS()),
1665 "fnmsb z13.s, p6/m, z15.s, z14.s");
1666 COMPARE_MACRO(Fnmls(z14.VnH(), p7.Merging(), z15.VnH(), z16.VnH(), z14.VnH()),
1667 "movprfx z31.h, p7/m, z15.h\n"
1668 "fnmls z31.h, p7/m, z16.h, z14.h\n"
1669 "mov z14.d, z31.d");
1670 COMPARE_MACRO(Fnmls(z15.VnD(), p0.Merging(), z16.VnD(), z17.VnD(), z18.VnD()),
1671 "movprfx z15.d, p0/m, z16.d\n"
1672 "fnmls z15.d, p0/m, z17.d, z18.d");
1673
1674 CLEANUP();
1675
1676 #pragma GCC diagnostic pop
1677 }
1678
TEST(sve_fp_mul_add_macro_fast_nan_propagation)1679 TEST(sve_fp_mul_add_macro_fast_nan_propagation) {
1680 #pragma GCC diagnostic push
1681 #pragma GCC diagnostic ignored "-Wshadow"
1682
1683 // Shadow the `MacroAssembler` type so that the test macros work without
1684 // modification.
1685 typedef FastNaNPropagationMacroAssembler MacroAssembler;
1686
1687 SETUP();
1688
1689 COMPARE_MACRO(Fmla(z0.VnH(), p1.Merging(), z0.VnH(), z2.VnH(), z4.VnH()),
1690 "fmla z0.h, p1/m, z2.h, z4.h");
1691 COMPARE_MACRO(Fmla(z3.VnH(), p2.Merging(), z4.VnH(), z3.VnH(), z5.VnH()),
1692 "fmad z3.h, p2/m, z5.h, z4.h");
1693 COMPARE_MACRO(Fmla(z4.VnS(), p3.Merging(), z5.VnS(), z6.VnS(), z4.VnS()),
1694 "fmad z4.s, p3/m, z6.s, z5.s");
1695 COMPARE_MACRO(Fmla(z5.VnD(), p4.Merging(), z6.VnD(), z7.VnD(), z8.VnD()),
1696 "movprfx z5.d, p4/m, z6.d\n"
1697 "fmla z5.d, p4/m, z7.d, z8.d");
1698
1699 COMPARE_MACRO(Fmls(z0.VnD(), p1.Merging(), z0.VnD(), z2.VnD(), z4.VnD()),
1700 "fmls z0.d, p1/m, z2.d, z4.d");
1701 COMPARE_MACRO(Fmls(z3.VnS(), p2.Merging(), z4.VnS(), z3.VnS(), z5.VnS()),
1702 "fmsb z3.s, p2/m, z5.s, z4.s");
1703 COMPARE_MACRO(Fmls(z4.VnH(), p3.Merging(), z5.VnH(), z6.VnH(), z4.VnH()),
1704 "fmsb z4.h, p3/m, z6.h, z5.h");
1705 COMPARE_MACRO(Fmls(z5.VnD(), p4.Merging(), z6.VnD(), z7.VnD(), z8.VnD()),
1706 "movprfx z5.d, p4/m, z6.d\n"
1707 "fmls z5.d, p4/m, z7.d, z8.d");
1708
1709 COMPARE_MACRO(Fnmla(z10.VnH(), p5.Merging(), z10.VnH(), z12.VnH(), z14.VnH()),
1710 "fnmla z10.h, p5/m, z12.h, z14.h");
1711 COMPARE_MACRO(Fnmla(z13.VnH(), p6.Merging(), z14.VnH(), z13.VnH(), z15.VnH()),
1712 "fnmad z13.h, p6/m, z15.h, z14.h");
1713 COMPARE_MACRO(Fnmla(z14.VnS(), p7.Merging(), z15.VnS(), z16.VnS(), z14.VnS()),
1714 "fnmad z14.s, p7/m, z16.s, z15.s");
1715 COMPARE_MACRO(Fnmla(z15.VnD(), p0.Merging(), z16.VnD(), z17.VnD(), z18.VnD()),
1716 "movprfx z15.d, p0/m, z16.d\n"
1717 "fnmla z15.d, p0/m, z17.d, z18.d");
1718
1719 COMPARE_MACRO(Fnmls(z10.VnD(), p5.Merging(), z10.VnD(), z12.VnD(), z14.VnD()),
1720 "fnmls z10.d, p5/m, z12.d, z14.d");
1721 COMPARE_MACRO(Fnmls(z13.VnS(), p6.Merging(), z14.VnS(), z13.VnS(), z15.VnS()),
1722 "fnmsb z13.s, p6/m, z15.s, z14.s");
1723 COMPARE_MACRO(Fnmls(z14.VnH(), p7.Merging(), z15.VnH(), z16.VnH(), z14.VnH()),
1724 "fnmsb z14.h, p7/m, z16.h, z15.h");
1725 COMPARE_MACRO(Fnmls(z15.VnD(), p0.Merging(), z16.VnD(), z17.VnD(), z18.VnD()),
1726 "movprfx z15.d, p0/m, z16.d\n"
1727 "fnmls z15.d, p0/m, z17.d, z18.d");
1728
1729 CLEANUP();
1730
1731 #pragma GCC diagnostic pop
1732 }
1733
TEST(sve_fp_mul_add_index)1734 TEST(sve_fp_mul_add_index) {
1735 SETUP();
1736
1737 COMPARE(fmla(z25.VnD(), z9.VnD(), z1.VnD(), 0), "fmla z25.d, z9.d, z1.d[0]");
1738 COMPARE(fmla(z25.VnD(), z9.VnD(), z1.VnD(), 1), "fmla z25.d, z9.d, z1.d[1]");
1739
1740 COMPARE(fmla(z13.VnH(), z7.VnH(), z7.VnH(), 0), "fmla z13.h, z7.h, z7.h[0]");
1741 COMPARE(fmla(z13.VnH(), z7.VnH(), z7.VnH(), 2), "fmla z13.h, z7.h, z7.h[2]");
1742 COMPARE(fmla(z13.VnH(), z7.VnH(), z7.VnH(), 5), "fmla z13.h, z7.h, z7.h[5]");
1743 COMPARE(fmla(z13.VnH(), z7.VnH(), z7.VnH(), 7), "fmla z13.h, z7.h, z7.h[7]");
1744
1745 COMPARE(fmla(z17.VnS(), z27.VnS(), z2.VnS(), 0),
1746 "fmla z17.s, z27.s, z2.s[0]");
1747 COMPARE(fmla(z17.VnS(), z27.VnS(), z2.VnS(), 1),
1748 "fmla z17.s, z27.s, z2.s[1]");
1749 COMPARE(fmla(z17.VnS(), z27.VnS(), z2.VnS(), 2),
1750 "fmla z17.s, z27.s, z2.s[2]");
1751 COMPARE(fmla(z17.VnS(), z27.VnS(), z2.VnS(), 3),
1752 "fmla z17.s, z27.s, z2.s[3]");
1753
1754 COMPARE(fmls(z28.VnD(), z2.VnD(), z0.VnD(), 0), "fmls z28.d, z2.d, z0.d[0]");
1755 COMPARE(fmls(z28.VnD(), z2.VnD(), z0.VnD(), 1), "fmls z28.d, z2.d, z0.d[1]");
1756
1757 COMPARE(fmls(z30.VnH(), z29.VnH(), z7.VnH(), 1),
1758 "fmls z30.h, z29.h, z7.h[1]");
1759 COMPARE(fmls(z30.VnH(), z29.VnH(), z7.VnH(), 4),
1760 "fmls z30.h, z29.h, z7.h[4]");
1761 COMPARE(fmls(z30.VnH(), z29.VnH(), z7.VnH(), 3),
1762 "fmls z30.h, z29.h, z7.h[3]");
1763 COMPARE(fmls(z30.VnH(), z29.VnH(), z7.VnH(), 6),
1764 "fmls z30.h, z29.h, z7.h[6]");
1765
1766 COMPARE(fmls(z30.VnS(), z1.VnS(), z6.VnS(), 0), "fmls z30.s, z1.s, z6.s[0]");
1767 COMPARE(fmls(z30.VnS(), z1.VnS(), z6.VnS(), 1), "fmls z30.s, z1.s, z6.s[1]");
1768 COMPARE(fmls(z30.VnS(), z1.VnS(), z6.VnS(), 2), "fmls z30.s, z1.s, z6.s[2]");
1769 COMPARE(fmls(z30.VnS(), z1.VnS(), z6.VnS(), 3), "fmls z30.s, z1.s, z6.s[3]");
1770
1771 COMPARE_MACRO(Fmla(z10.VnH(), z11.VnH(), z12.VnH(), z4.VnH(), 7),
1772 "movprfx z10, z11\n"
1773 "fmla z10.h, z12.h, z4.h[7]");
1774 COMPARE_MACRO(Fmla(z10.VnH(), z10.VnH(), z12.VnH(), z4.VnH(), 6),
1775 "fmla z10.h, z12.h, z4.h[6]");
1776 COMPARE_MACRO(Fmla(z11.VnS(), z12.VnS(), z11.VnS(), z5.VnS(), 3),
1777 "movprfx z31, z12\n"
1778 "fmla z31.s, z11.s, z5.s[3]\n"
1779 "mov z11.d, z31.d");
1780 COMPARE_MACRO(Fmla(z12.VnD(), z13.VnD(), z14.VnD(), z12.VnD(), 1),
1781 "movprfx z31, z13\n"
1782 "fmla z31.d, z14.d, z12.d[1]\n"
1783 "mov z12.d, z31.d");
1784
1785 COMPARE_MACRO(Fmls(z10.VnH(), z11.VnH(), z12.VnH(), z4.VnH(), 7),
1786 "movprfx z10, z11\n"
1787 "fmls z10.h, z12.h, z4.h[7]");
1788 COMPARE_MACRO(Fmls(z10.VnH(), z10.VnH(), z12.VnH(), z4.VnH(), 6),
1789 "fmls z10.h, z12.h, z4.h[6]");
1790 COMPARE_MACRO(Fmls(z11.VnS(), z12.VnS(), z11.VnS(), z5.VnS(), 3),
1791 "movprfx z31, z12\n"
1792 "fmls z31.s, z11.s, z5.s[3]\n"
1793 "mov z11.d, z31.d");
1794 COMPARE_MACRO(Fmls(z12.VnD(), z13.VnD(), z14.VnD(), z12.VnD(), 1),
1795 "movprfx z31, z13\n"
1796 "fmls z31.d, z14.d, z12.d[1]\n"
1797 "mov z12.d, z31.d");
1798
1799 CLEANUP();
1800 }
1801
TEST(sve_fp_mul_index)1802 TEST(sve_fp_mul_index) {
1803 SETUP();
1804
1805 COMPARE(fmul(z12.VnD(), z3.VnD(), z4.VnD(), 0), "fmul z12.d, z3.d, z4.d[0]");
1806 COMPARE(fmul(z12.VnD(), z3.VnD(), z4.VnD(), 1), "fmul z12.d, z3.d, z4.d[1]");
1807
1808 COMPARE(fmul(z22.VnH(), z2.VnH(), z3.VnH(), 0), "fmul z22.h, z2.h, z3.h[0]");
1809 COMPARE(fmul(z22.VnH(), z2.VnH(), z3.VnH(), 3), "fmul z22.h, z2.h, z3.h[3]");
1810 COMPARE(fmul(z22.VnH(), z2.VnH(), z3.VnH(), 4), "fmul z22.h, z2.h, z3.h[4]");
1811 COMPARE(fmul(z22.VnH(), z2.VnH(), z3.VnH(), 7), "fmul z22.h, z2.h, z3.h[7]");
1812
1813 COMPARE(fmul(z2.VnS(), z8.VnS(), z7.VnS(), 0), "fmul z2.s, z8.s, z7.s[0]");
1814 COMPARE(fmul(z2.VnS(), z8.VnS(), z7.VnS(), 1), "fmul z2.s, z8.s, z7.s[1]");
1815 COMPARE(fmul(z2.VnS(), z8.VnS(), z7.VnS(), 2), "fmul z2.s, z8.s, z7.s[2]");
1816 COMPARE(fmul(z2.VnS(), z8.VnS(), z7.VnS(), 3), "fmul z2.s, z8.s, z7.s[3]");
1817
1818 CLEANUP();
1819 }
1820
TEST(sve_fp_unary_op_predicated)1821 TEST(sve_fp_unary_op_predicated) {
1822 SETUP();
1823
1824 COMPARE(fcvtzs(z29.VnS(), p5.Merging(), z8.VnD()),
1825 "fcvtzs z29.s, p5/m, z8.d");
1826 COMPARE(fcvtzs(z30.VnD(), p5.Merging(), z8.VnD()),
1827 "fcvtzs z30.d, p5/m, z8.d");
1828 COMPARE(fcvtzs(z14.VnH(), p1.Merging(), z29.VnH()),
1829 "fcvtzs z14.h, p1/m, z29.h");
1830 COMPARE(fcvtzs(z11.VnS(), p3.Merging(), z16.VnH()),
1831 "fcvtzs z11.s, p3/m, z16.h");
1832 COMPARE(fcvtzs(z4.VnD(), p7.Merging(), z4.VnH()), "fcvtzs z4.d, p7/m, z4.h");
1833 COMPARE(fcvtzs(z24.VnS(), p1.Merging(), z4.VnS()),
1834 "fcvtzs z24.s, p1/m, z4.s");
1835 COMPARE(fcvtzs(z25.VnD(), p4.Merging(), z24.VnS()),
1836 "fcvtzs z25.d, p4/m, z24.s");
1837 COMPARE(fcvtzu(z16.VnS(), p7.Merging(), z14.VnD()),
1838 "fcvtzu z16.s, p7/m, z14.d");
1839 COMPARE(fcvtzu(z31.VnD(), p1.Merging(), z16.VnD()),
1840 "fcvtzu z31.d, p1/m, z16.d");
1841 COMPARE(fcvtzu(z12.VnH(), p2.Merging(), z27.VnH()),
1842 "fcvtzu z12.h, p2/m, z27.h");
1843 COMPARE(fcvtzu(z26.VnS(), p6.Merging(), z29.VnH()),
1844 "fcvtzu z26.s, p6/m, z29.h");
1845 COMPARE(fcvtzu(z29.VnD(), p5.Merging(), z27.VnH()),
1846 "fcvtzu z29.d, p5/m, z27.h");
1847 COMPARE(fcvtzu(z13.VnS(), p2.Merging(), z17.VnS()),
1848 "fcvtzu z13.s, p2/m, z17.s");
1849 COMPARE(fcvtzu(z25.VnD(), p7.Merging(), z28.VnS()),
1850 "fcvtzu z25.d, p7/m, z28.s");
1851 COMPARE(scvtf(z16.VnH(), p6.Merging(), z5.VnH()), "scvtf z16.h, p6/m, z5.h");
1852 COMPARE(scvtf(z31.VnD(), p5.Merging(), z26.VnS()),
1853 "scvtf z31.d, p5/m, z26.s");
1854 COMPARE(scvtf(z0.VnH(), p7.Merging(), z0.VnS()), "scvtf z0.h, p7/m, z0.s");
1855 COMPARE(scvtf(z12.VnS(), p7.Merging(), z0.VnS()), "scvtf z12.s, p7/m, z0.s");
1856 COMPARE(scvtf(z17.VnD(), p1.Merging(), z17.VnD()),
1857 "scvtf z17.d, p1/m, z17.d");
1858 COMPARE(scvtf(z2.VnH(), p0.Merging(), z9.VnD()), "scvtf z2.h, p0/m, z9.d");
1859 COMPARE(scvtf(z26.VnS(), p5.Merging(), z4.VnD()), "scvtf z26.s, p5/m, z4.d");
1860 COMPARE(ucvtf(z27.VnH(), p4.Merging(), z25.VnH()),
1861 "ucvtf z27.h, p4/m, z25.h");
1862 COMPARE(ucvtf(z3.VnD(), p4.Merging(), z3.VnS()), "ucvtf z3.d, p4/m, z3.s");
1863 COMPARE(ucvtf(z24.VnH(), p2.Merging(), z29.VnS()),
1864 "ucvtf z24.h, p2/m, z29.s");
1865 COMPARE(ucvtf(z29.VnS(), p5.Merging(), z14.VnS()),
1866 "ucvtf z29.s, p5/m, z14.s");
1867 COMPARE(ucvtf(z7.VnD(), p2.Merging(), z14.VnD()), "ucvtf z7.d, p2/m, z14.d");
1868 COMPARE(ucvtf(z20.VnH(), p2.Merging(), z14.VnD()),
1869 "ucvtf z20.h, p2/m, z14.d");
1870 COMPARE(ucvtf(z26.VnS(), p1.Merging(), z18.VnD()),
1871 "ucvtf z26.s, p1/m, z18.d");
1872 COMPARE(frinta(z11.VnH(), p0.Merging(), z3.VnH()),
1873 "frinta z11.h, p0/m, z3.h");
1874 COMPARE(frinta(z11.VnS(), p0.Merging(), z3.VnS()),
1875 "frinta z11.s, p0/m, z3.s");
1876 COMPARE(frinta(z11.VnD(), p0.Merging(), z3.VnD()),
1877 "frinta z11.d, p0/m, z3.d");
1878 COMPARE(frinti(z17.VnH(), p0.Merging(), z16.VnH()),
1879 "frinti z17.h, p0/m, z16.h");
1880 COMPARE(frinti(z17.VnS(), p0.Merging(), z16.VnS()),
1881 "frinti z17.s, p0/m, z16.s");
1882 COMPARE(frinti(z17.VnD(), p0.Merging(), z16.VnD()),
1883 "frinti z17.d, p0/m, z16.d");
1884 COMPARE(frintm(z2.VnH(), p7.Merging(), z15.VnH()),
1885 "frintm z2.h, p7/m, z15.h");
1886 COMPARE(frintm(z2.VnS(), p7.Merging(), z15.VnS()),
1887 "frintm z2.s, p7/m, z15.s");
1888 COMPARE(frintm(z2.VnD(), p7.Merging(), z15.VnD()),
1889 "frintm z2.d, p7/m, z15.d");
1890 COMPARE(frintn(z14.VnH(), p5.Merging(), z18.VnH()),
1891 "frintn z14.h, p5/m, z18.h");
1892 COMPARE(frintn(z14.VnS(), p5.Merging(), z18.VnS()),
1893 "frintn z14.s, p5/m, z18.s");
1894 COMPARE(frintn(z14.VnD(), p5.Merging(), z18.VnD()),
1895 "frintn z14.d, p5/m, z18.d");
1896 COMPARE(frintp(z20.VnH(), p6.Merging(), z23.VnH()),
1897 "frintp z20.h, p6/m, z23.h");
1898 COMPARE(frintp(z20.VnS(), p6.Merging(), z23.VnS()),
1899 "frintp z20.s, p6/m, z23.s");
1900 COMPARE(frintp(z20.VnD(), p6.Merging(), z23.VnD()),
1901 "frintp z20.d, p6/m, z23.d");
1902 COMPARE(frintx(z2.VnH(), p6.Merging(), z18.VnH()),
1903 "frintx z2.h, p6/m, z18.h");
1904 COMPARE(frintx(z2.VnS(), p6.Merging(), z18.VnS()),
1905 "frintx z2.s, p6/m, z18.s");
1906 COMPARE(frintx(z2.VnD(), p6.Merging(), z18.VnD()),
1907 "frintx z2.d, p6/m, z18.d");
1908 COMPARE(frintz(z26.VnH(), p7.Merging(), z25.VnH()),
1909 "frintz z26.h, p7/m, z25.h");
1910 COMPARE(frintz(z26.VnS(), p7.Merging(), z25.VnS()),
1911 "frintz z26.s, p7/m, z25.s");
1912 COMPARE(frintz(z26.VnD(), p7.Merging(), z25.VnD()),
1913 "frintz z26.d, p7/m, z25.d");
1914 COMPARE(fcvt(z5.VnH(), p2.Merging(), z11.VnD()), "fcvt z5.h, p2/m, z11.d");
1915 COMPARE(fcvt(z30.VnS(), p7.Merging(), z0.VnD()), "fcvt z30.s, p7/m, z0.d");
1916 COMPARE(fcvt(z10.VnD(), p0.Merging(), z17.VnH()), "fcvt z10.d, p0/m, z17.h");
1917 COMPARE(fcvt(z28.VnS(), p3.Merging(), z27.VnH()), "fcvt z28.s, p3/m, z27.h");
1918 COMPARE(fcvt(z9.VnD(), p7.Merging(), z0.VnS()), "fcvt z9.d, p7/m, z0.s");
1919 COMPARE(fcvt(z27.VnH(), p7.Merging(), z9.VnS()), "fcvt z27.h, p7/m, z9.s");
1920 COMPARE(frecpx(z16.VnH(), p1.Merging(), z29.VnH()),
1921 "frecpx z16.h, p1/m, z29.h");
1922 COMPARE(frecpx(z16.VnS(), p1.Merging(), z29.VnS()),
1923 "frecpx z16.s, p1/m, z29.s");
1924 COMPARE(frecpx(z16.VnD(), p1.Merging(), z29.VnD()),
1925 "frecpx z16.d, p1/m, z29.d");
1926 COMPARE(fsqrt(z30.VnH(), p3.Merging(), z13.VnH()),
1927 "fsqrt z30.h, p3/m, z13.h");
1928 COMPARE(fsqrt(z30.VnS(), p3.Merging(), z13.VnS()),
1929 "fsqrt z30.s, p3/m, z13.s");
1930 COMPARE(fsqrt(z30.VnD(), p3.Merging(), z13.VnD()),
1931 "fsqrt z30.d, p3/m, z13.d");
1932
1933 CLEANUP();
1934 }
1935
TEST(sve_fp_unary_op_predicated_macro)1936 TEST(sve_fp_unary_op_predicated_macro) {
1937 SETUP();
1938
1939 COMPARE_MACRO(Fcvt(z5.VnH(), p2.Zeroing(), z11.VnD()),
1940 "movprfx z5.d, p2/z, z11.d\n"
1941 "fcvt z5.h, p2/m, z11.d");
1942 COMPARE_MACRO(Fcvt(z30.VnS(), p7.Zeroing(), z0.VnD()),
1943 "movprfx z30.d, p7/z, z0.d\n"
1944 "fcvt z30.s, p7/m, z0.d");
1945 COMPARE_MACRO(Fcvt(z10.VnD(), p0.Zeroing(), z17.VnH()),
1946 "movprfx z10.d, p0/z, z17.d\n"
1947 "fcvt z10.d, p0/m, z17.h");
1948 COMPARE_MACRO(Fcvt(z28.VnS(), p3.Zeroing(), z27.VnH()),
1949 "movprfx z28.s, p3/z, z27.s\n"
1950 "fcvt z28.s, p3/m, z27.h");
1951 COMPARE_MACRO(Fcvt(z9.VnD(), p7.Zeroing(), z0.VnS()),
1952 "movprfx z9.d, p7/z, z0.d\n"
1953 "fcvt z9.d, p7/m, z0.s");
1954 COMPARE_MACRO(Fcvt(z27.VnH(), p7.Zeroing(), z9.VnS()),
1955 "movprfx z27.s, p7/z, z9.s\n"
1956 "fcvt z27.h, p7/m, z9.s");
1957 COMPARE_MACRO(Frecpx(z16.VnH(), p1.Zeroing(), z29.VnH()),
1958 "movprfx z16.h, p1/z, z29.h\n"
1959 "frecpx z16.h, p1/m, z29.h");
1960 COMPARE_MACRO(Frecpx(z17.VnS(), p2.Zeroing(), z30.VnS()),
1961 "movprfx z17.s, p2/z, z30.s\n"
1962 "frecpx z17.s, p2/m, z30.s");
1963 COMPARE_MACRO(Frecpx(z18.VnD(), p3.Zeroing(), z31.VnD()),
1964 "movprfx z18.d, p3/z, z31.d\n"
1965 "frecpx z18.d, p3/m, z31.d");
1966 COMPARE_MACRO(Frinta(z6.VnD(), p3.Zeroing(), z12.VnD()),
1967 "movprfx z6.d, p3/z, z12.d\n"
1968 "frinta z6.d, p3/m, z12.d");
1969 COMPARE_MACRO(Frinti(z7.VnS(), p3.Zeroing(), z11.VnS()),
1970 "movprfx z7.s, p3/z, z11.s\n"
1971 "frinti z7.s, p3/m, z11.s");
1972 COMPARE_MACRO(Frintm(z8.VnH(), p3.Zeroing(), z10.VnH()),
1973 "movprfx z8.h, p3/z, z10.h\n"
1974 "frintm z8.h, p3/m, z10.h");
1975 COMPARE_MACRO(Frintn(z9.VnD(), p3.Zeroing(), z9.VnD()),
1976 "movprfx z9.d, p3/z, z9.d\n"
1977 "frintn z9.d, p3/m, z9.d");
1978 COMPARE_MACRO(Frintp(z10.VnS(), p3.Zeroing(), z8.VnS()),
1979 "movprfx z10.s, p3/z, z8.s\n"
1980 "frintp z10.s, p3/m, z8.s");
1981 COMPARE_MACRO(Frintx(z11.VnH(), p3.Zeroing(), z7.VnH()),
1982 "movprfx z11.h, p3/z, z7.h\n"
1983 "frintx z11.h, p3/m, z7.h");
1984 COMPARE_MACRO(Frintz(z12.VnD(), p3.Zeroing(), z6.VnD()),
1985 "movprfx z12.d, p3/z, z6.d\n"
1986 "frintz z12.d, p3/m, z6.d");
1987 COMPARE_MACRO(Fsqrt(z30.VnH(), p3.Zeroing(), z13.VnH()),
1988 "movprfx z30.h, p3/z, z13.h\n"
1989 "fsqrt z30.h, p3/m, z13.h");
1990 COMPARE_MACRO(Fsqrt(z29.VnS(), p3.Zeroing(), z14.VnS()),
1991 "movprfx z29.s, p3/z, z14.s\n"
1992 "fsqrt z29.s, p3/m, z14.s");
1993 COMPARE_MACRO(Fsqrt(z28.VnD(), p3.Zeroing(), z15.VnD()),
1994 "movprfx z28.d, p3/z, z15.d\n"
1995 "fsqrt z28.d, p3/m, z15.d");
1996
1997 CLEANUP();
1998 }
1999
TEST(sve_fp_unary_op_unpredicated)2000 TEST(sve_fp_unary_op_unpredicated) {
2001 SETUP();
2002
2003 COMPARE(frecpe(z0.VnH(), z2.VnH()), "frecpe z0.h, z2.h");
2004 COMPARE(frecpe(z0.VnS(), z2.VnS()), "frecpe z0.s, z2.s");
2005 COMPARE(frecpe(z0.VnD(), z2.VnD()), "frecpe z0.d, z2.d");
2006 COMPARE(frsqrte(z27.VnH(), z14.VnH()), "frsqrte z27.h, z14.h");
2007 COMPARE(frsqrte(z27.VnS(), z14.VnS()), "frsqrte z27.s, z14.s");
2008 COMPARE(frsqrte(z27.VnD(), z14.VnD()), "frsqrte z27.d, z14.d");
2009
2010 CLEANUP();
2011 }
2012
TEST(sve_inc_dec_by_predicate_count)2013 TEST(sve_inc_dec_by_predicate_count) {
2014 SETUP();
2015
2016 COMPARE(decp(x17, p0.VnB()), "decp x17, p0.b");
2017 COMPARE(decp(x17, p0.VnH()), "decp x17, p0.h");
2018 COMPARE(decp(x17, p0.VnS()), "decp x17, p0.s");
2019 COMPARE(decp(x17, p0.VnD()), "decp x17, p0.d");
2020 COMPARE(decp(z2.VnH(), p11), "decp z2.h, p11");
2021 COMPARE(decp(z2.VnS(), p11), "decp z2.s, p11");
2022 COMPARE(decp(z2.VnD(), p11), "decp z2.d, p11");
2023 COMPARE(incp(x26, p8.VnB()), "incp x26, p8.b");
2024 COMPARE(incp(x26, p8.VnH()), "incp x26, p8.h");
2025 COMPARE(incp(x26, p8.VnS()), "incp x26, p8.s");
2026 COMPARE(incp(x26, p8.VnD()), "incp x26, p8.d");
2027 COMPARE(incp(z27.VnH(), p9), "incp z27.h, p9");
2028 COMPARE(incp(z27.VnS(), p9), "incp z27.s, p9");
2029 COMPARE(incp(z27.VnD(), p9), "incp z27.d, p9");
2030 COMPARE(sqdecp(x12, p7.VnB(), w12), "sqdecp x12, p7.b, w12");
2031 COMPARE(sqdecp(x12, p7.VnH(), w12), "sqdecp x12, p7.h, w12");
2032 COMPARE(sqdecp(x12, p7.VnS(), w12), "sqdecp x12, p7.s, w12");
2033 COMPARE(sqdecp(x12, p7.VnD(), w12), "sqdecp x12, p7.d, w12");
2034 COMPARE(sqdecp(x30, p5.VnB()), "sqdecp x30, p5.b");
2035 COMPARE(sqdecp(x30, p5.VnH()), "sqdecp x30, p5.h");
2036 COMPARE(sqdecp(x30, p5.VnS()), "sqdecp x30, p5.s");
2037 COMPARE(sqdecp(x30, p5.VnD()), "sqdecp x30, p5.d");
2038 COMPARE(sqdecp(z13.VnH(), p1), "sqdecp z13.h, p1");
2039 COMPARE(sqdecp(z13.VnS(), p1), "sqdecp z13.s, p1");
2040 COMPARE(sqdecp(z13.VnD(), p1), "sqdecp z13.d, p1");
2041 COMPARE(sqincp(x26, p5.VnB(), w26), "sqincp x26, p5.b, w26");
2042 COMPARE(sqincp(x26, p5.VnH(), w26), "sqincp x26, p5.h, w26");
2043 COMPARE(sqincp(x26, p5.VnS(), w26), "sqincp x26, p5.s, w26");
2044 COMPARE(sqincp(x26, p5.VnD(), w26), "sqincp x26, p5.d, w26");
2045 COMPARE(sqincp(x5, p15.VnB()), "sqincp x5, p15.b");
2046 COMPARE(sqincp(x5, p15.VnH()), "sqincp x5, p15.h");
2047 COMPARE(sqincp(x5, p15.VnS()), "sqincp x5, p15.s");
2048 COMPARE(sqincp(x5, p15.VnD()), "sqincp x5, p15.d");
2049 COMPARE(sqincp(z14.VnH(), p4), "sqincp z14.h, p4");
2050 COMPARE(sqincp(z14.VnS(), p4), "sqincp z14.s, p4");
2051 COMPARE(sqincp(z14.VnD(), p4), "sqincp z14.d, p4");
2052 COMPARE(uqdecp(w3, p13.VnB()), "uqdecp w3, p13.b");
2053 COMPARE(uqdecp(w3, p13.VnH()), "uqdecp w3, p13.h");
2054 COMPARE(uqdecp(w3, p13.VnS()), "uqdecp w3, p13.s");
2055 COMPARE(uqdecp(w3, p13.VnD()), "uqdecp w3, p13.d");
2056 COMPARE(uqdecp(x19, p0.VnB()), "uqdecp x19, p0.b");
2057 COMPARE(uqdecp(x19, p0.VnH()), "uqdecp x19, p0.h");
2058 COMPARE(uqdecp(x19, p0.VnS()), "uqdecp x19, p0.s");
2059 COMPARE(uqdecp(x19, p0.VnD()), "uqdecp x19, p0.d");
2060 COMPARE(uqdecp(z15.VnH(), p9), "uqdecp z15.h, p9");
2061 COMPARE(uqdecp(z15.VnS(), p9), "uqdecp z15.s, p9");
2062 COMPARE(uqdecp(z15.VnD(), p9), "uqdecp z15.d, p9");
2063 COMPARE(uqincp(w18, p1.VnB()), "uqincp w18, p1.b");
2064 COMPARE(uqincp(w18, p1.VnH()), "uqincp w18, p1.h");
2065 COMPARE(uqincp(w18, p1.VnS()), "uqincp w18, p1.s");
2066 COMPARE(uqincp(w18, p1.VnD()), "uqincp w18, p1.d");
2067 COMPARE(uqincp(x17, p15.VnB()), "uqincp x17, p15.b");
2068 COMPARE(uqincp(x17, p15.VnH()), "uqincp x17, p15.h");
2069 COMPARE(uqincp(x17, p15.VnS()), "uqincp x17, p15.s");
2070 COMPARE(uqincp(x17, p15.VnD()), "uqincp x17, p15.d");
2071 COMPARE(uqincp(z4.VnH(), p3), "uqincp z4.h, p3");
2072 COMPARE(uqincp(z4.VnS(), p3), "uqincp z4.s, p3");
2073 COMPARE(uqincp(z4.VnD(), p3), "uqincp z4.d, p3");
2074
2075 CLEANUP();
2076 }
2077
TEST(sve_inc_dec_by_predicate_count_macro)2078 TEST(sve_inc_dec_by_predicate_count_macro) {
2079 SETUP();
2080
2081 // The MacroAssembler automatically generates movprfx where it can.
2082 COMPARE_MACRO(Decp(z0.VnD(), p1), "decp z0.d, p1");
2083 COMPARE_MACRO(Decp(z2.VnS(), p3, z2.VnS()), "decp z2.s, p3");
2084 COMPARE_MACRO(Decp(z3.VnS(), p3, z3.VnS()), "decp z3.s, p3");
2085 COMPARE_MACRO(Decp(z4.VnH(), p5, z6.VnH()),
2086 "movprfx z4, z6\n"
2087 "decp z4.h, p5");
2088 COMPARE_MACRO(Incp(z7.VnD(), p8), "incp z7.d, p8");
2089 COMPARE_MACRO(Incp(z9.VnS(), p10, z9.VnS()), "incp z9.s, p10");
2090 COMPARE_MACRO(Incp(z10.VnS(), p10, z10.VnS()), "incp z10.s, p10");
2091 COMPARE_MACRO(Incp(z10.VnH(), p11, z12.VnH()),
2092 "movprfx z10, z12\n"
2093 "incp z10.h, p11");
2094 COMPARE_MACRO(Sqdecp(z0.VnD(), p1), "sqdecp z0.d, p1");
2095 COMPARE_MACRO(Sqdecp(z2.VnS(), p3, z2.VnS()), "sqdecp z2.s, p3");
2096 COMPARE_MACRO(Sqdecp(z3.VnS(), p3, z3.VnS()), "sqdecp z3.s, p3");
2097 COMPARE_MACRO(Sqdecp(z4.VnH(), p5, z6.VnH()),
2098 "movprfx z4, z6\n"
2099 "sqdecp z4.h, p5");
2100 COMPARE_MACRO(Sqincp(z7.VnD(), p8), "sqincp z7.d, p8");
2101 COMPARE_MACRO(Sqincp(z9.VnS(), p10, z9.VnS()), "sqincp z9.s, p10");
2102 COMPARE_MACRO(Sqincp(z10.VnS(), p10, z10.VnS()), "sqincp z10.s, p10");
2103 COMPARE_MACRO(Sqincp(z10.VnH(), p11, z12.VnH()),
2104 "movprfx z10, z12\n"
2105 "sqincp z10.h, p11");
2106 COMPARE_MACRO(Uqdecp(z0.VnD(), p1), "uqdecp z0.d, p1");
2107 COMPARE_MACRO(Uqdecp(z2.VnS(), p3, z2.VnS()), "uqdecp z2.s, p3");
2108 COMPARE_MACRO(Uqdecp(z3.VnS(), p3, z3.VnS()), "uqdecp z3.s, p3");
2109 COMPARE_MACRO(Uqdecp(z4.VnH(), p5, z6.VnH()),
2110 "movprfx z4, z6\n"
2111 "uqdecp z4.h, p5");
2112 COMPARE_MACRO(Uqincp(z7.VnD(), p8), "uqincp z7.d, p8");
2113 COMPARE_MACRO(Uqincp(z9.VnS(), p10, z9.VnS()), "uqincp z9.s, p10");
2114 COMPARE_MACRO(Uqincp(z10.VnS(), p10, z10.VnS()), "uqincp z10.s, p10");
2115 COMPARE_MACRO(Uqincp(z10.VnH(), p11, z12.VnH()),
2116 "movprfx z10, z12\n"
2117 "uqincp z10.h, p11");
2118
2119 // Sqdecp cannot write into a W register, but Uqdecp can.
2120 COMPARE_MACRO(Uqdecp(w6, p7.VnD()), "uqdecp w6, p7.d");
2121 COMPARE_MACRO(Uqdecp(x10, p11.VnH()), "uqdecp x10, p11.h");
2122 COMPARE_MACRO(Uqdecp(x12, p13.VnS()), "uqdecp x12, p13.s");
2123 COMPARE_MACRO(Uqdecp(w14, p15.VnD()), "uqdecp w14, p15.d");
2124
2125 CLEANUP();
2126 }
2127
TEST(sve_index_generation)2128 TEST(sve_index_generation) {
2129 SETUP();
2130
2131 COMPARE(index(z21.VnB(), -16, 15), "index z21.b, #-16, #15");
2132 COMPARE(index(z22.VnB(), -2, 1), "index z22.b, #-2, #1");
2133 COMPARE(index(z23.VnH(), -1, 0), "index z23.h, #-1, #0");
2134 COMPARE(index(z24.VnS(), 0, -1), "index z24.s, #0, #-1");
2135 COMPARE(index(z25.VnD(), 1, -2), "index z25.d, #1, #-2");
2136 COMPARE(index(z26.VnB(), 15, -16), "index z26.b, #15, #-16");
2137 COMPARE(index(z23.VnB(), -16, w8), "index z23.b, #-16, w8");
2138 COMPARE(index(z24.VnH(), -1, x9), "index z24.h, #-1, w9");
2139 COMPARE(index(z25.VnS(), 0, w10), "index z25.s, #0, w10");
2140 COMPARE(index(z26.VnD(), 15, x11), "index z26.d, #15, x11");
2141 COMPARE(index(z14.VnB(), w15, 15), "index z14.b, w15, #15");
2142 COMPARE(index(z15.VnH(), x16, 1), "index z15.h, w16, #1");
2143 COMPARE(index(z16.VnS(), w17, 0), "index z16.s, w17, #0");
2144 COMPARE(index(z17.VnD(), x18, -16), "index z17.d, x18, #-16");
2145 COMPARE(index(z20.VnB(), w23, w21), "index z20.b, w23, w21");
2146 COMPARE(index(z21.VnH(), x24, w22), "index z21.h, w24, w22");
2147 COMPARE(index(z22.VnS(), w25, x23), "index z22.s, w25, w23");
2148 COMPARE(index(z23.VnD(), x26, x24), "index z23.d, x26, x24");
2149
2150 // Simple pass-through macros.
2151 COMPARE_MACRO(Index(z21.VnB(), -16, 15), "index z21.b, #-16, #15");
2152 COMPARE_MACRO(Index(z22.VnB(), -2, 1), "index z22.b, #-2, #1");
2153 COMPARE_MACRO(Index(z23.VnH(), -1, 0), "index z23.h, #-1, #0");
2154 COMPARE_MACRO(Index(z24.VnS(), 0, -1), "index z24.s, #0, #-1");
2155 COMPARE_MACRO(Index(z25.VnD(), 1, -2), "index z25.d, #1, #-2");
2156 COMPARE_MACRO(Index(z26.VnB(), 15, -16), "index z26.b, #15, #-16");
2157 COMPARE_MACRO(Index(z23.VnB(), -16, w8), "index z23.b, #-16, w8");
2158 COMPARE_MACRO(Index(z24.VnH(), -1, x9), "index z24.h, #-1, w9");
2159 COMPARE_MACRO(Index(z25.VnS(), 0, w10), "index z25.s, #0, w10");
2160 COMPARE_MACRO(Index(z26.VnD(), 15, x11), "index z26.d, #15, x11");
2161 COMPARE_MACRO(Index(z14.VnB(), w15, 15), "index z14.b, w15, #15");
2162 COMPARE_MACRO(Index(z15.VnH(), x16, 1), "index z15.h, w16, #1");
2163 COMPARE_MACRO(Index(z16.VnS(), w17, 0), "index z16.s, w17, #0");
2164 COMPARE_MACRO(Index(z17.VnD(), x18, -16), "index z17.d, x18, #-16");
2165 COMPARE_MACRO(Index(z20.VnB(), w23, w21), "index z20.b, w23, w21");
2166 COMPARE_MACRO(Index(z21.VnH(), x24, w22), "index z21.h, w24, w22");
2167 COMPARE_MACRO(Index(z22.VnS(), w25, x23), "index z22.s, w25, w23");
2168 COMPARE_MACRO(Index(z23.VnD(), x26, x24), "index z23.d, x26, x24");
2169
2170 // Argument synthesis.
2171 COMPARE_MACRO(Index(z0.VnB(), 16, -17),
2172 "mov w16, #0x10\n"
2173 "mov w17, #0xffffffef\n"
2174 "index z0.b, w16, w17");
2175 COMPARE_MACRO(Index(z1.VnH(), x2, -17),
2176 "mov w16, #0xffffffef\n"
2177 "index z1.h, w2, w16");
2178 COMPARE_MACRO(Index(z3.VnS(), 16, w4),
2179 "mov w16, #0x10\n"
2180 "index z3.s, w16, w4");
2181 COMPARE_MACRO(Index(z4.VnD(), -17, 16),
2182 "mov x16, #0xffffffffffffffef\n"
2183 "mov x17, #0x10\n"
2184 "index z4.d, x16, x17");
2185
2186 CLEANUP();
2187 }
2188
TEST(sve_int_arithmetic_unpredicated)2189 TEST(sve_int_arithmetic_unpredicated) {
2190 SETUP();
2191
2192 COMPARE(add(z23.VnB(), z30.VnB(), z31.VnB()), "add z23.b, z30.b, z31.b");
2193 COMPARE(add(z24.VnH(), z29.VnH(), z30.VnH()), "add z24.h, z29.h, z30.h");
2194 COMPARE(add(z25.VnS(), z28.VnS(), z29.VnS()), "add z25.s, z28.s, z29.s");
2195 COMPARE(add(z26.VnD(), z27.VnD(), z28.VnD()), "add z26.d, z27.d, z28.d");
2196 COMPARE(sqadd(z26.VnB(), z21.VnB(), z1.VnB()), "sqadd z26.b, z21.b, z1.b");
2197 COMPARE(sqadd(z25.VnH(), z20.VnH(), z2.VnH()), "sqadd z25.h, z20.h, z2.h");
2198 COMPARE(sqadd(z24.VnS(), z19.VnS(), z3.VnS()), "sqadd z24.s, z19.s, z3.s");
2199 COMPARE(sqadd(z23.VnD(), z18.VnD(), z4.VnD()), "sqadd z23.d, z18.d, z4.d");
2200 COMPARE(sqsub(z1.VnB(), z10.VnB(), z0.VnB()), "sqsub z1.b, z10.b, z0.b");
2201 COMPARE(sqsub(z2.VnH(), z11.VnH(), z1.VnH()), "sqsub z2.h, z11.h, z1.h");
2202 COMPARE(sqsub(z3.VnS(), z12.VnS(), z2.VnS()), "sqsub z3.s, z12.s, z2.s");
2203 COMPARE(sqsub(z4.VnD(), z13.VnD(), z3.VnD()), "sqsub z4.d, z13.d, z3.d");
2204 COMPARE(sub(z9.VnB(), z7.VnB(), z25.VnB()), "sub z9.b, z7.b, z25.b");
2205 COMPARE(sub(z8.VnH(), z8.VnH(), z26.VnH()), "sub z8.h, z8.h, z26.h");
2206 COMPARE(sub(z7.VnS(), z9.VnS(), z27.VnS()), "sub z7.s, z9.s, z27.s");
2207 COMPARE(sub(z6.VnD(), z10.VnD(), z28.VnD()), "sub z6.d, z10.d, z28.d");
2208 COMPARE(uqadd(z13.VnB(), z15.VnB(), z3.VnB()), "uqadd z13.b, z15.b, z3.b");
2209 COMPARE(uqadd(z12.VnH(), z16.VnH(), z2.VnH()), "uqadd z12.h, z16.h, z2.h");
2210 COMPARE(uqadd(z11.VnS(), z17.VnS(), z1.VnS()), "uqadd z11.s, z17.s, z1.s");
2211 COMPARE(uqadd(z10.VnD(), z18.VnD(), z0.VnD()), "uqadd z10.d, z18.d, z0.d");
2212 COMPARE(uqsub(z9.VnB(), z13.VnB(), z13.VnB()), "uqsub z9.b, z13.b, z13.b");
2213 COMPARE(uqsub(z11.VnH(), z15.VnH(), z11.VnH()), "uqsub z11.h, z15.h, z11.h");
2214 COMPARE(uqsub(z13.VnS(), z17.VnS(), z13.VnS()), "uqsub z13.s, z17.s, z13.s");
2215 COMPARE(uqsub(z15.VnD(), z19.VnD(), z15.VnD()), "uqsub z15.d, z19.d, z15.d");
2216
2217 CLEANUP();
2218 }
2219
TEST(sve_int_binary_arithmetic_predicated)2220 TEST(sve_int_binary_arithmetic_predicated) {
2221 SETUP();
2222
2223 COMPARE(add(z22.VnB(), p4.Merging(), z22.VnB(), z20.VnB()),
2224 "add z22.b, p4/m, z22.b, z20.b");
2225 COMPARE(add(z22.VnH(), p4.Merging(), z22.VnH(), z20.VnH()),
2226 "add z22.h, p4/m, z22.h, z20.h");
2227 COMPARE(add(z22.VnS(), p4.Merging(), z22.VnS(), z20.VnS()),
2228 "add z22.s, p4/m, z22.s, z20.s");
2229 COMPARE(add(z22.VnD(), p4.Merging(), z22.VnD(), z20.VnD()),
2230 "add z22.d, p4/m, z22.d, z20.d");
2231 COMPARE(and_(z22.VnB(), p3.Merging(), z22.VnB(), z3.VnB()),
2232 "and z22.b, p3/m, z22.b, z3.b");
2233 COMPARE(and_(z22.VnH(), p3.Merging(), z22.VnH(), z3.VnH()),
2234 "and z22.h, p3/m, z22.h, z3.h");
2235 COMPARE(and_(z22.VnS(), p3.Merging(), z22.VnS(), z3.VnS()),
2236 "and z22.s, p3/m, z22.s, z3.s");
2237 COMPARE(and_(z22.VnD(), p3.Merging(), z22.VnD(), z3.VnD()),
2238 "and z22.d, p3/m, z22.d, z3.d");
2239 COMPARE(bic(z17.VnB(), p7.Merging(), z17.VnB(), z10.VnB()),
2240 "bic z17.b, p7/m, z17.b, z10.b");
2241 COMPARE(bic(z17.VnH(), p7.Merging(), z17.VnH(), z10.VnH()),
2242 "bic z17.h, p7/m, z17.h, z10.h");
2243 COMPARE(bic(z17.VnS(), p7.Merging(), z17.VnS(), z10.VnS()),
2244 "bic z17.s, p7/m, z17.s, z10.s");
2245 COMPARE(bic(z17.VnD(), p7.Merging(), z17.VnD(), z10.VnD()),
2246 "bic z17.d, p7/m, z17.d, z10.d");
2247 COMPARE(eor(z23.VnB(), p4.Merging(), z23.VnB(), z15.VnB()),
2248 "eor z23.b, p4/m, z23.b, z15.b");
2249 COMPARE(eor(z23.VnH(), p4.Merging(), z23.VnH(), z15.VnH()),
2250 "eor z23.h, p4/m, z23.h, z15.h");
2251 COMPARE(eor(z23.VnS(), p4.Merging(), z23.VnS(), z15.VnS()),
2252 "eor z23.s, p4/m, z23.s, z15.s");
2253 COMPARE(eor(z23.VnD(), p4.Merging(), z23.VnD(), z15.VnD()),
2254 "eor z23.d, p4/m, z23.d, z15.d");
2255 COMPARE(mul(z15.VnB(), p5.Merging(), z15.VnB(), z15.VnB()),
2256 "mul z15.b, p5/m, z15.b, z15.b");
2257 COMPARE(mul(z15.VnH(), p5.Merging(), z15.VnH(), z15.VnH()),
2258 "mul z15.h, p5/m, z15.h, z15.h");
2259 COMPARE(mul(z15.VnS(), p5.Merging(), z15.VnS(), z15.VnS()),
2260 "mul z15.s, p5/m, z15.s, z15.s");
2261 COMPARE(mul(z15.VnD(), p5.Merging(), z15.VnD(), z15.VnD()),
2262 "mul z15.d, p5/m, z15.d, z15.d");
2263 COMPARE(orr(z9.VnB(), p1.Merging(), z9.VnB(), z28.VnB()),
2264 "orr z9.b, p1/m, z9.b, z28.b");
2265 COMPARE(orr(z9.VnH(), p1.Merging(), z9.VnH(), z28.VnH()),
2266 "orr z9.h, p1/m, z9.h, z28.h");
2267 COMPARE(orr(z9.VnS(), p1.Merging(), z9.VnS(), z28.VnS()),
2268 "orr z9.s, p1/m, z9.s, z28.s");
2269 COMPARE(orr(z9.VnD(), p1.Merging(), z9.VnD(), z28.VnD()),
2270 "orr z9.d, p1/m, z9.d, z28.d");
2271 COMPARE(sabd(z11.VnB(), p6.Merging(), z11.VnB(), z31.VnB()),
2272 "sabd z11.b, p6/m, z11.b, z31.b");
2273 COMPARE(sabd(z11.VnH(), p6.Merging(), z11.VnH(), z31.VnH()),
2274 "sabd z11.h, p6/m, z11.h, z31.h");
2275 COMPARE(sabd(z11.VnS(), p6.Merging(), z11.VnS(), z31.VnS()),
2276 "sabd z11.s, p6/m, z11.s, z31.s");
2277 COMPARE(sabd(z11.VnD(), p6.Merging(), z11.VnD(), z31.VnD()),
2278 "sabd z11.d, p6/m, z11.d, z31.d");
2279 COMPARE(sdivr(z20.VnS(), p5.Merging(), z20.VnS(), z23.VnS()),
2280 "sdivr z20.s, p5/m, z20.s, z23.s");
2281 COMPARE(sdiv(z15.VnD(), p6.Merging(), z15.VnD(), z8.VnD()),
2282 "sdiv z15.d, p6/m, z15.d, z8.d");
2283 COMPARE(smax(z30.VnB(), p4.Merging(), z30.VnB(), z30.VnB()),
2284 "smax z30.b, p4/m, z30.b, z30.b");
2285 COMPARE(smax(z30.VnH(), p4.Merging(), z30.VnH(), z30.VnH()),
2286 "smax z30.h, p4/m, z30.h, z30.h");
2287 COMPARE(smax(z30.VnS(), p4.Merging(), z30.VnS(), z30.VnS()),
2288 "smax z30.s, p4/m, z30.s, z30.s");
2289 COMPARE(smax(z30.VnD(), p4.Merging(), z30.VnD(), z30.VnD()),
2290 "smax z30.d, p4/m, z30.d, z30.d");
2291 COMPARE(smin(z20.VnB(), p7.Merging(), z20.VnB(), z19.VnB()),
2292 "smin z20.b, p7/m, z20.b, z19.b");
2293 COMPARE(smin(z20.VnH(), p7.Merging(), z20.VnH(), z19.VnH()),
2294 "smin z20.h, p7/m, z20.h, z19.h");
2295 COMPARE(smin(z20.VnS(), p7.Merging(), z20.VnS(), z19.VnS()),
2296 "smin z20.s, p7/m, z20.s, z19.s");
2297 COMPARE(smin(z20.VnD(), p7.Merging(), z20.VnD(), z19.VnD()),
2298 "smin z20.d, p7/m, z20.d, z19.d");
2299 COMPARE(smulh(z23.VnB(), p0.Merging(), z23.VnB(), z3.VnB()),
2300 "smulh z23.b, p0/m, z23.b, z3.b");
2301 COMPARE(smulh(z23.VnH(), p0.Merging(), z23.VnH(), z3.VnH()),
2302 "smulh z23.h, p0/m, z23.h, z3.h");
2303 COMPARE(smulh(z23.VnS(), p0.Merging(), z23.VnS(), z3.VnS()),
2304 "smulh z23.s, p0/m, z23.s, z3.s");
2305 COMPARE(smulh(z23.VnD(), p0.Merging(), z23.VnD(), z3.VnD()),
2306 "smulh z23.d, p0/m, z23.d, z3.d");
2307 COMPARE(subr(z1.VnB(), p6.Merging(), z1.VnB(), z1.VnB()),
2308 "subr z1.b, p6/m, z1.b, z1.b");
2309 COMPARE(subr(z1.VnH(), p6.Merging(), z1.VnH(), z1.VnH()),
2310 "subr z1.h, p6/m, z1.h, z1.h");
2311 COMPARE(subr(z1.VnS(), p6.Merging(), z1.VnS(), z1.VnS()),
2312 "subr z1.s, p6/m, z1.s, z1.s");
2313 COMPARE(subr(z1.VnD(), p6.Merging(), z1.VnD(), z1.VnD()),
2314 "subr z1.d, p6/m, z1.d, z1.d");
2315 COMPARE(sub(z28.VnB(), p2.Merging(), z28.VnB(), z0.VnB()),
2316 "sub z28.b, p2/m, z28.b, z0.b");
2317 COMPARE(sub(z28.VnH(), p2.Merging(), z28.VnH(), z0.VnH()),
2318 "sub z28.h, p2/m, z28.h, z0.h");
2319 COMPARE(sub(z28.VnS(), p2.Merging(), z28.VnS(), z0.VnS()),
2320 "sub z28.s, p2/m, z28.s, z0.s");
2321 COMPARE(sub(z28.VnD(), p2.Merging(), z28.VnD(), z0.VnD()),
2322 "sub z28.d, p2/m, z28.d, z0.d");
2323 COMPARE(uabd(z14.VnB(), p6.Merging(), z14.VnB(), z22.VnB()),
2324 "uabd z14.b, p6/m, z14.b, z22.b");
2325 COMPARE(uabd(z14.VnH(), p6.Merging(), z14.VnH(), z22.VnH()),
2326 "uabd z14.h, p6/m, z14.h, z22.h");
2327 COMPARE(uabd(z14.VnS(), p6.Merging(), z14.VnS(), z22.VnS()),
2328 "uabd z14.s, p6/m, z14.s, z22.s");
2329 COMPARE(uabd(z14.VnD(), p6.Merging(), z14.VnD(), z22.VnD()),
2330 "uabd z14.d, p6/m, z14.d, z22.d");
2331 COMPARE(udivr(z27.VnS(), p5.Merging(), z27.VnS(), z31.VnS()),
2332 "udivr z27.s, p5/m, z27.s, z31.s");
2333 COMPARE(udiv(z13.VnD(), p4.Merging(), z13.VnD(), z11.VnD()),
2334 "udiv z13.d, p4/m, z13.d, z11.d");
2335 COMPARE(umax(z0.VnB(), p5.Merging(), z0.VnB(), z14.VnB()),
2336 "umax z0.b, p5/m, z0.b, z14.b");
2337 COMPARE(umax(z0.VnH(), p5.Merging(), z0.VnH(), z14.VnH()),
2338 "umax z0.h, p5/m, z0.h, z14.h");
2339 COMPARE(umax(z0.VnS(), p5.Merging(), z0.VnS(), z14.VnS()),
2340 "umax z0.s, p5/m, z0.s, z14.s");
2341 COMPARE(umax(z0.VnD(), p5.Merging(), z0.VnD(), z14.VnD()),
2342 "umax z0.d, p5/m, z0.d, z14.d");
2343 COMPARE(umin(z26.VnB(), p5.Merging(), z26.VnB(), z12.VnB()),
2344 "umin z26.b, p5/m, z26.b, z12.b");
2345 COMPARE(umin(z26.VnH(), p5.Merging(), z26.VnH(), z12.VnH()),
2346 "umin z26.h, p5/m, z26.h, z12.h");
2347 COMPARE(umin(z26.VnS(), p5.Merging(), z26.VnS(), z12.VnS()),
2348 "umin z26.s, p5/m, z26.s, z12.s");
2349 COMPARE(umin(z26.VnD(), p5.Merging(), z26.VnD(), z12.VnD()),
2350 "umin z26.d, p5/m, z26.d, z12.d");
2351 COMPARE(umulh(z12.VnB(), p2.Merging(), z12.VnB(), z17.VnB()),
2352 "umulh z12.b, p2/m, z12.b, z17.b");
2353 COMPARE(umulh(z12.VnH(), p2.Merging(), z12.VnH(), z17.VnH()),
2354 "umulh z12.h, p2/m, z12.h, z17.h");
2355 COMPARE(umulh(z12.VnS(), p2.Merging(), z12.VnS(), z17.VnS()),
2356 "umulh z12.s, p2/m, z12.s, z17.s");
2357 COMPARE(umulh(z12.VnD(), p2.Merging(), z12.VnD(), z17.VnD()),
2358 "umulh z12.d, p2/m, z12.d, z17.d");
2359 CLEANUP();
2360 }
2361
TEST(sve_int_binary_arithmetic_predicated_macro)2362 TEST(sve_int_binary_arithmetic_predicated_macro) {
2363 SETUP();
2364
2365 COMPARE_MACRO(Add(z22.VnB(), p4.Merging(), z22.VnB(), z20.VnB()),
2366 "add z22.b, p4/m, z22.b, z20.b");
2367 COMPARE_MACRO(Add(z22.VnH(), p4.Merging(), z20.VnH(), z22.VnH()),
2368 "add z22.h, p4/m, z22.h, z20.h");
2369 COMPARE_MACRO(Add(z22.VnS(), p4.Merging(), z21.VnS(), z20.VnS()),
2370 "movprfx z22.s, p4/m, z21.s\n"
2371 "add z22.s, p4/m, z22.s, z20.s");
2372
2373 COMPARE_MACRO(And(z22.VnH(), p3.Merging(), z22.VnH(), z3.VnH()),
2374 "and z22.h, p3/m, z22.h, z3.h");
2375 COMPARE_MACRO(And(z22.VnS(), p3.Merging(), z3.VnS(), z22.VnS()),
2376 "and z22.s, p3/m, z22.s, z3.s");
2377 COMPARE_MACRO(And(z22.VnD(), p3.Merging(), z2.VnD(), z3.VnD()),
2378 "movprfx z22.d, p3/m, z2.d\n"
2379 "and z22.d, p3/m, z22.d, z3.d");
2380
2381 COMPARE_MACRO(Bic(z17.VnB(), p7.Merging(), z17.VnB(), z10.VnB()),
2382 "bic z17.b, p7/m, z17.b, z10.b");
2383 COMPARE_MACRO(Bic(z17.VnS(), p7.Merging(), z10.VnS(), z17.VnS()),
2384 "mov z31.d, z17.d\n"
2385 "movprfx z17.s, p7/m, z10.s\n"
2386 "bic z17.s, p7/m, z17.s, z31.s");
2387 COMPARE_MACRO(Bic(z17.VnD(), p7.Merging(), z7.VnD(), z27.VnD()),
2388 "movprfx z17.d, p7/m, z7.d\n"
2389 "bic z17.d, p7/m, z17.d, z27.d");
2390
2391 COMPARE_MACRO(Eor(z23.VnB(), p4.Merging(), z23.VnB(), z15.VnB()),
2392 "eor z23.b, p4/m, z23.b, z15.b");
2393 COMPARE_MACRO(Eor(z23.VnH(), p4.Merging(), z23.VnH(), z15.VnH()),
2394 "eor z23.h, p4/m, z23.h, z15.h");
2395 COMPARE_MACRO(Eor(z23.VnD(), p4.Merging(), z18.VnD(), z15.VnD()),
2396 "movprfx z23.d, p4/m, z18.d\n"
2397 "eor z23.d, p4/m, z23.d, z15.d");
2398
2399 COMPARE_MACRO(Mul(z15.VnB(), p5.Merging(), z15.VnB(), z15.VnB()),
2400 "mul z15.b, p5/m, z15.b, z15.b");
2401 COMPARE_MACRO(Mul(z15.VnH(), p5.Merging(), z7.VnH(), z15.VnH()),
2402 "mul z15.h, p5/m, z15.h, z7.h");
2403 COMPARE_MACRO(Mul(z15.VnS(), p5.Merging(), z0.VnS(), z1.VnS()),
2404 "movprfx z15.s, p5/m, z0.s\n"
2405 "mul z15.s, p5/m, z15.s, z1.s");
2406
2407 COMPARE_MACRO(Orr(z9.VnH(), p1.Merging(), z9.VnH(), z28.VnH()),
2408 "orr z9.h, p1/m, z9.h, z28.h");
2409 COMPARE_MACRO(Orr(z9.VnS(), p1.Merging(), z9.VnS(), z28.VnS()),
2410 "orr z9.s, p1/m, z9.s, z28.s");
2411 COMPARE_MACRO(Orr(z9.VnD(), p1.Merging(), z6.VnD(), z7.VnD()),
2412 "movprfx z9.d, p1/m, z6.d\n"
2413 "orr z9.d, p1/m, z9.d, z7.d");
2414
2415 COMPARE_MACRO(Sabd(z11.VnB(), p6.Merging(), z11.VnB(), z31.VnB()),
2416 "sabd z11.b, p6/m, z11.b, z31.b");
2417 COMPARE_MACRO(Sabd(z11.VnH(), p6.Merging(), z31.VnH(), z11.VnH()),
2418 "sabd z11.h, p6/m, z11.h, z31.h");
2419 COMPARE_MACRO(Sabd(z11.VnS(), p6.Merging(), z21.VnS(), z31.VnS()),
2420 "movprfx z11.s, p6/m, z21.s\n"
2421 "sabd z11.s, p6/m, z11.s, z31.s");
2422
2423 COMPARE_MACRO(Sdiv(z20.VnS(), p5.Merging(), z23.VnS(), z20.VnS()),
2424 "sdivr z20.s, p5/m, z20.s, z23.s");
2425 COMPARE_MACRO(Sdiv(z15.VnD(), p6.Merging(), z30.VnD(), z8.VnD()),
2426 "movprfx z15.d, p6/m, z30.d\n"
2427 "sdiv z15.d, p6/m, z15.d, z8.d");
2428
2429 COMPARE_MACRO(Smax(z30.VnB(), p4.Merging(), z30.VnB(), z31.VnB()),
2430 "smax z30.b, p4/m, z30.b, z31.b");
2431 COMPARE_MACRO(Smax(z30.VnS(), p4.Merging(), z3.VnS(), z30.VnS()),
2432 "smax z30.s, p4/m, z30.s, z3.s");
2433 COMPARE_MACRO(Smax(z30.VnD(), p4.Merging(), z1.VnD(), z5.VnD()),
2434 "movprfx z30.d, p4/m, z1.d\n"
2435 "smax z30.d, p4/m, z30.d, z5.d");
2436
2437 COMPARE_MACRO(Smin(z20.VnH(), p7.Merging(), z20.VnH(), z19.VnH()),
2438 "smin z20.h, p7/m, z20.h, z19.h");
2439 COMPARE_MACRO(Smin(z20.VnS(), p7.Merging(), z19.VnS(), z20.VnS()),
2440 "smin z20.s, p7/m, z20.s, z19.s");
2441 COMPARE_MACRO(Smin(z20.VnD(), p7.Merging(), z14.VnD(), z15.VnD()),
2442 "movprfx z20.d, p7/m, z14.d\n"
2443 "smin z20.d, p7/m, z20.d, z15.d");
2444
2445 COMPARE_MACRO(Smulh(z23.VnB(), p0.Merging(), z23.VnB(), z3.VnB()),
2446 "smulh z23.b, p0/m, z23.b, z3.b");
2447 COMPARE_MACRO(Smulh(z23.VnH(), p0.Merging(), z13.VnH(), z23.VnH()),
2448 "smulh z23.h, p0/m, z23.h, z13.h");
2449 COMPARE_MACRO(Smulh(z23.VnD(), p0.Merging(), z30.VnD(), z31.VnD()),
2450 "movprfx z23.d, p0/m, z30.d\n"
2451 "smulh z23.d, p0/m, z23.d, z31.d");
2452
2453 COMPARE_MACRO(Sub(z28.VnB(), p2.Merging(), z28.VnB(), z0.VnB()),
2454 "sub z28.b, p2/m, z28.b, z0.b");
2455 COMPARE_MACRO(Sub(z28.VnH(), p2.Merging(), z14.VnH(), z28.VnH()),
2456 "subr z28.h, p2/m, z28.h, z14.h");
2457 COMPARE_MACRO(Sub(z28.VnS(), p2.Merging(), z7.VnS(), z13.VnS()),
2458 "movprfx z28.s, p2/m, z7.s\n"
2459 "sub z28.s, p2/m, z28.s, z13.s");
2460
2461 COMPARE_MACRO(Uabd(z14.VnH(), p6.Merging(), z14.VnH(), z22.VnH()),
2462 "uabd z14.h, p6/m, z14.h, z22.h");
2463 COMPARE_MACRO(Uabd(z14.VnS(), p6.Merging(), z14.VnS(), z22.VnS()),
2464 "uabd z14.s, p6/m, z14.s, z22.s");
2465 COMPARE_MACRO(Uabd(z14.VnD(), p6.Merging(), z13.VnD(), z22.VnD()),
2466 "movprfx z14.d, p6/m, z13.d\n"
2467 "uabd z14.d, p6/m, z14.d, z22.d");
2468
2469 COMPARE_MACRO(Udiv(z27.VnS(), p5.Merging(), z16.VnS(), z27.VnS()),
2470 "udivr z27.s, p5/m, z27.s, z16.s");
2471 COMPARE_MACRO(Udiv(z13.VnD(), p4.Merging(), z22.VnD(), z11.VnD()),
2472 "movprfx z13.d, p4/m, z22.d\n"
2473 "udiv z13.d, p4/m, z13.d, z11.d");
2474
2475 COMPARE_MACRO(Umax(z0.VnB(), p5.Merging(), z0.VnB(), z14.VnB()),
2476 "umax z0.b, p5/m, z0.b, z14.b");
2477 COMPARE_MACRO(Umax(z0.VnS(), p5.Merging(), z14.VnS(), z0.VnS()),
2478 "umax z0.s, p5/m, z0.s, z14.s");
2479 COMPARE_MACRO(Umax(z0.VnD(), p5.Merging(), z29.VnD(), z14.VnD()),
2480 "movprfx z0.d, p5/m, z29.d\n"
2481 "umax z0.d, p5/m, z0.d, z14.d");
2482
2483 COMPARE_MACRO(Umin(z26.VnB(), p5.Merging(), z26.VnB(), z12.VnB()),
2484 "umin z26.b, p5/m, z26.b, z12.b");
2485 COMPARE_MACRO(Umin(z26.VnH(), p5.Merging(), z27.VnH(), z26.VnH()),
2486 "umin z26.h, p5/m, z26.h, z27.h");
2487 COMPARE_MACRO(Umin(z26.VnD(), p5.Merging(), z13.VnD(), z12.VnD()),
2488 "movprfx z26.d, p5/m, z13.d\n"
2489 "umin z26.d, p5/m, z26.d, z12.d");
2490
2491 COMPARE_MACRO(Umulh(z12.VnB(), p2.Merging(), z12.VnB(), z17.VnB()),
2492 "umulh z12.b, p2/m, z12.b, z17.b");
2493 COMPARE_MACRO(Umulh(z12.VnH(), p2.Merging(), z0.VnH(), z12.VnH()),
2494 "umulh z12.h, p2/m, z12.h, z0.h");
2495 COMPARE_MACRO(Umulh(z12.VnS(), p2.Merging(), z25.VnS(), z17.VnS()),
2496 "movprfx z12.s, p2/m, z25.s\n"
2497 "umulh z12.s, p2/m, z12.s, z17.s");
2498
2499 CLEANUP();
2500 }
2501
TEST(sve_int_compare_scalars)2502 TEST(sve_int_compare_scalars) {
2503 SETUP();
2504
2505 COMPARE(ctermeq(w30, w26), "ctermeq w30, w26");
2506 COMPARE(ctermne(x21, x18), "ctermne x21, x18");
2507 COMPARE(whilele(p10.VnB(), x11, x6), "whilele p10.b, x11, x6");
2508 COMPARE(whilele(p10.VnH(), w11, w6), "whilele p10.h, w11, w6");
2509 COMPARE(whilele(p10.VnH(), x11, x6), "whilele p10.h, x11, x6");
2510 COMPARE(whilele(p10.VnS(), w11, w6), "whilele p10.s, w11, w6");
2511 COMPARE(whilele(p10.VnD(), x11, x6), "whilele p10.d, x11, x6");
2512 COMPARE(whilelo(p4.VnB(), w3, w25), "whilelo p4.b, w3, w25");
2513 COMPARE(whilelo(p4.VnH(), x3, x25), "whilelo p4.h, x3, x25");
2514 COMPARE(whilelo(p4.VnS(), w3, w25), "whilelo p4.s, w3, w25");
2515 COMPARE(whilelo(p4.VnD(), x3, x25), "whilelo p4.d, x3, x25");
2516 COMPARE(whilels(p7.VnB(), w15, w15), "whilels p7.b, w15, w15");
2517 COMPARE(whilels(p7.VnH(), x15, x15), "whilels p7.h, x15, x15");
2518 COMPARE(whilels(p7.VnS(), w15, w15), "whilels p7.s, w15, w15");
2519 COMPARE(whilels(p7.VnD(), x15, x15), "whilels p7.d, x15, x15");
2520 COMPARE(whilelt(p14.VnB(), w11, w14), "whilelt p14.b, w11, w14");
2521 COMPARE(whilelt(p14.VnH(), x11, x14), "whilelt p14.h, x11, x14");
2522 COMPARE(whilelt(p14.VnS(), w11, w14), "whilelt p14.s, w11, w14");
2523 COMPARE(whilelt(p14.VnD(), x11, x14), "whilelt p14.d, x11, x14");
2524
2525 CLEANUP();
2526 }
2527
TEST(sve_int_compare_signed_imm)2528 TEST(sve_int_compare_signed_imm) {
2529 SETUP();
2530
2531 COMPARE(cmpeq(p0.VnB(), p3.Zeroing(), z1.VnB(), 15),
2532 "cmpeq p0.b, p3/z, z1.b, #15");
2533 COMPARE(cmpeq(p0.VnH(), p3.Zeroing(), z1.VnH(), 7),
2534 "cmpeq p0.h, p3/z, z1.h, #7");
2535 COMPARE(cmpeq(p0.VnS(), p3.Zeroing(), z1.VnS(), -3),
2536 "cmpeq p0.s, p3/z, z1.s, #-3");
2537 COMPARE(cmpeq(p0.VnD(), p3.Zeroing(), z1.VnD(), -14),
2538 "cmpeq p0.d, p3/z, z1.d, #-14");
2539 COMPARE(cmpge(p9.VnB(), p6.Zeroing(), z12.VnB(), 14),
2540 "cmpge p9.b, p6/z, z12.b, #14");
2541 COMPARE(cmpge(p9.VnH(), p6.Zeroing(), z12.VnH(), 6),
2542 "cmpge p9.h, p6/z, z12.h, #6");
2543 COMPARE(cmpge(p9.VnS(), p6.Zeroing(), z12.VnS(), -4),
2544 "cmpge p9.s, p6/z, z12.s, #-4");
2545 COMPARE(cmpge(p9.VnD(), p6.Zeroing(), z12.VnD(), -13),
2546 "cmpge p9.d, p6/z, z12.d, #-13");
2547 COMPARE(cmpgt(p15.VnB(), p4.Zeroing(), z23.VnB(), 13),
2548 "cmpgt p15.b, p4/z, z23.b, #13");
2549 COMPARE(cmpgt(p15.VnH(), p4.Zeroing(), z23.VnH(), 5),
2550 "cmpgt p15.h, p4/z, z23.h, #5");
2551 COMPARE(cmpgt(p15.VnS(), p4.Zeroing(), z23.VnS(), -12),
2552 "cmpgt p15.s, p4/z, z23.s, #-12");
2553 COMPARE(cmpgt(p15.VnD(), p4.Zeroing(), z23.VnD(), -5),
2554 "cmpgt p15.d, p4/z, z23.d, #-5");
2555 COMPARE(cmple(p4.VnB(), p3.Zeroing(), z5.VnB(), 12),
2556 "cmple p4.b, p3/z, z5.b, #12");
2557 COMPARE(cmple(p4.VnH(), p3.Zeroing(), z5.VnH(), 4),
2558 "cmple p4.h, p3/z, z5.h, #4");
2559 COMPARE(cmple(p4.VnS(), p3.Zeroing(), z5.VnS(), -11),
2560 "cmple p4.s, p3/z, z5.s, #-11");
2561 COMPARE(cmple(p4.VnD(), p3.Zeroing(), z5.VnD(), -6),
2562 "cmple p4.d, p3/z, z5.d, #-6");
2563 COMPARE(cmplt(p3.VnB(), p7.Zeroing(), z15.VnB(), 11),
2564 "cmplt p3.b, p7/z, z15.b, #11");
2565 COMPARE(cmplt(p3.VnH(), p7.Zeroing(), z15.VnH(), 3),
2566 "cmplt p3.h, p7/z, z15.h, #3");
2567 COMPARE(cmplt(p3.VnS(), p7.Zeroing(), z15.VnS(), -10),
2568 "cmplt p3.s, p7/z, z15.s, #-10");
2569 COMPARE(cmplt(p3.VnD(), p7.Zeroing(), z15.VnD(), -7),
2570 "cmplt p3.d, p7/z, z15.d, #-7");
2571 COMPARE(cmpne(p13.VnB(), p5.Zeroing(), z20.VnB(), 10),
2572 "cmpne p13.b, p5/z, z20.b, #10");
2573 COMPARE(cmpne(p13.VnH(), p5.Zeroing(), z20.VnH(), 2),
2574 "cmpne p13.h, p5/z, z20.h, #2");
2575 COMPARE(cmpne(p13.VnS(), p5.Zeroing(), z20.VnS(), -9),
2576 "cmpne p13.s, p5/z, z20.s, #-9");
2577 COMPARE(cmpne(p13.VnD(), p5.Zeroing(), z20.VnD(), -8),
2578 "cmpne p13.d, p5/z, z20.d, #-8");
2579
2580 CLEANUP();
2581 }
2582
TEST(sve_int_compare_unsigned_imm)2583 TEST(sve_int_compare_unsigned_imm) {
2584 SETUP();
2585
2586 COMPARE(cmphi(p8.VnB(), p6.Zeroing(), z1.VnB(), 127),
2587 "cmphi p8.b, p6/z, z1.b, #127");
2588 COMPARE(cmphi(p8.VnH(), p6.Zeroing(), z1.VnH(), 126),
2589 "cmphi p8.h, p6/z, z1.h, #126");
2590 COMPARE(cmphi(p8.VnS(), p6.Zeroing(), z1.VnS(), 99),
2591 "cmphi p8.s, p6/z, z1.s, #99");
2592 COMPARE(cmphi(p8.VnD(), p6.Zeroing(), z1.VnD(), 78),
2593 "cmphi p8.d, p6/z, z1.d, #78");
2594 COMPARE(cmphs(p11.VnB(), p2.Zeroing(), z8.VnB(), 67),
2595 "cmphs p11.b, p2/z, z8.b, #67");
2596 COMPARE(cmphs(p11.VnH(), p2.Zeroing(), z8.VnH(), 63),
2597 "cmphs p11.h, p2/z, z8.h, #63");
2598 COMPARE(cmphs(p11.VnS(), p2.Zeroing(), z8.VnS(), 51),
2599 "cmphs p11.s, p2/z, z8.s, #51");
2600 COMPARE(cmphs(p11.VnD(), p2.Zeroing(), z8.VnD(), 40),
2601 "cmphs p11.d, p2/z, z8.d, #40");
2602 COMPARE(cmplo(p9.VnB(), p4.Zeroing(), z4.VnB(), 32),
2603 "cmplo p9.b, p4/z, z4.b, #32");
2604 COMPARE(cmplo(p9.VnH(), p4.Zeroing(), z4.VnH(), 22),
2605 "cmplo p9.h, p4/z, z4.h, #22");
2606 COMPARE(cmplo(p9.VnS(), p4.Zeroing(), z4.VnS(), 15),
2607 "cmplo p9.s, p4/z, z4.s, #15");
2608 COMPARE(cmplo(p9.VnD(), p4.Zeroing(), z4.VnD(), 11),
2609 "cmplo p9.d, p4/z, z4.d, #11");
2610 COMPARE(cmpls(p14.VnB(), p5.Zeroing(), z9.VnB(), 7),
2611 "cmpls p14.b, p5/z, z9.b, #7");
2612 COMPARE(cmpls(p14.VnH(), p5.Zeroing(), z9.VnH(), 4),
2613 "cmpls p14.h, p5/z, z9.h, #4");
2614 COMPARE(cmpls(p14.VnS(), p5.Zeroing(), z9.VnS(), 3),
2615 "cmpls p14.s, p5/z, z9.s, #3");
2616 COMPARE(cmpls(p14.VnD(), p5.Zeroing(), z9.VnD(), 1),
2617 "cmpls p14.d, p5/z, z9.d, #1");
2618
2619 CLEANUP();
2620 }
2621
TEST(sve_int_compare_vectors)2622 TEST(sve_int_compare_vectors) {
2623 SETUP();
2624
2625 COMPARE(cmpeq(p13.VnB(), p0.Zeroing(), z26.VnB(), z10.VnD()),
2626 "cmpeq p13.b, p0/z, z26.b, z10.d");
2627 COMPARE(cmpeq(p13.VnH(), p0.Zeroing(), z26.VnH(), z10.VnD()),
2628 "cmpeq p13.h, p0/z, z26.h, z10.d");
2629 COMPARE(cmpeq(p13.VnS(), p0.Zeroing(), z26.VnS(), z10.VnD()),
2630 "cmpeq p13.s, p0/z, z26.s, z10.d");
2631 COMPARE(cmpeq(p14.VnB(), p3.Zeroing(), z18.VnB(), z15.VnB()),
2632 "cmpeq p14.b, p3/z, z18.b, z15.b");
2633 COMPARE(cmpeq(p14.VnH(), p3.Zeroing(), z18.VnH(), z15.VnH()),
2634 "cmpeq p14.h, p3/z, z18.h, z15.h");
2635 COMPARE(cmpeq(p14.VnS(), p3.Zeroing(), z18.VnS(), z15.VnS()),
2636 "cmpeq p14.s, p3/z, z18.s, z15.s");
2637 COMPARE(cmpeq(p14.VnD(), p3.Zeroing(), z18.VnD(), z15.VnD()),
2638 "cmpeq p14.d, p3/z, z18.d, z15.d");
2639 COMPARE(cmpge(p8.VnB(), p3.Zeroing(), z13.VnB(), z0.VnD()),
2640 "cmpge p8.b, p3/z, z13.b, z0.d");
2641 COMPARE(cmpge(p8.VnH(), p3.Zeroing(), z13.VnH(), z0.VnD()),
2642 "cmpge p8.h, p3/z, z13.h, z0.d");
2643 COMPARE(cmpge(p8.VnS(), p3.Zeroing(), z13.VnS(), z0.VnD()),
2644 "cmpge p8.s, p3/z, z13.s, z0.d");
2645 COMPARE(cmpge(p3.VnB(), p4.Zeroing(), z6.VnB(), z1.VnB()),
2646 "cmpge p3.b, p4/z, z6.b, z1.b");
2647 COMPARE(cmpge(p3.VnH(), p4.Zeroing(), z6.VnH(), z1.VnH()),
2648 "cmpge p3.h, p4/z, z6.h, z1.h");
2649 COMPARE(cmpge(p3.VnS(), p4.Zeroing(), z6.VnS(), z1.VnS()),
2650 "cmpge p3.s, p4/z, z6.s, z1.s");
2651 COMPARE(cmpge(p3.VnD(), p4.Zeroing(), z6.VnD(), z1.VnD()),
2652 "cmpge p3.d, p4/z, z6.d, z1.d");
2653 COMPARE(cmpgt(p4.VnB(), p2.Zeroing(), z24.VnB(), z1.VnD()),
2654 "cmpgt p4.b, p2/z, z24.b, z1.d");
2655 COMPARE(cmpgt(p4.VnH(), p2.Zeroing(), z24.VnH(), z1.VnD()),
2656 "cmpgt p4.h, p2/z, z24.h, z1.d");
2657 COMPARE(cmpgt(p4.VnS(), p2.Zeroing(), z24.VnS(), z1.VnD()),
2658 "cmpgt p4.s, p2/z, z24.s, z1.d");
2659 COMPARE(cmpgt(p10.VnB(), p3.Zeroing(), z23.VnB(), z19.VnB()),
2660 "cmpgt p10.b, p3/z, z23.b, z19.b");
2661 COMPARE(cmpgt(p10.VnH(), p3.Zeroing(), z23.VnH(), z19.VnH()),
2662 "cmpgt p10.h, p3/z, z23.h, z19.h");
2663 COMPARE(cmpgt(p10.VnS(), p3.Zeroing(), z23.VnS(), z19.VnS()),
2664 "cmpgt p10.s, p3/z, z23.s, z19.s");
2665 COMPARE(cmpgt(p10.VnD(), p3.Zeroing(), z23.VnD(), z19.VnD()),
2666 "cmpgt p10.d, p3/z, z23.d, z19.d");
2667 COMPARE(cmphi(p10.VnB(), p6.Zeroing(), z6.VnB(), z11.VnD()),
2668 "cmphi p10.b, p6/z, z6.b, z11.d");
2669 COMPARE(cmphi(p10.VnH(), p6.Zeroing(), z6.VnH(), z11.VnD()),
2670 "cmphi p10.h, p6/z, z6.h, z11.d");
2671 COMPARE(cmphi(p10.VnS(), p6.Zeroing(), z6.VnS(), z11.VnD()),
2672 "cmphi p10.s, p6/z, z6.s, z11.d");
2673 COMPARE(cmphi(p1.VnB(), p0.Zeroing(), z4.VnB(), z2.VnB()),
2674 "cmphi p1.b, p0/z, z4.b, z2.b");
2675 COMPARE(cmphi(p1.VnH(), p0.Zeroing(), z4.VnH(), z2.VnH()),
2676 "cmphi p1.h, p0/z, z4.h, z2.h");
2677 COMPARE(cmphi(p1.VnS(), p0.Zeroing(), z4.VnS(), z2.VnS()),
2678 "cmphi p1.s, p0/z, z4.s, z2.s");
2679 COMPARE(cmphi(p1.VnD(), p0.Zeroing(), z4.VnD(), z2.VnD()),
2680 "cmphi p1.d, p0/z, z4.d, z2.d");
2681 COMPARE(cmphs(p10.VnB(), p5.Zeroing(), z22.VnB(), z5.VnD()),
2682 "cmphs p10.b, p5/z, z22.b, z5.d");
2683 COMPARE(cmphs(p10.VnH(), p5.Zeroing(), z22.VnH(), z5.VnD()),
2684 "cmphs p10.h, p5/z, z22.h, z5.d");
2685 COMPARE(cmphs(p10.VnS(), p5.Zeroing(), z22.VnS(), z5.VnD()),
2686 "cmphs p10.s, p5/z, z22.s, z5.d");
2687 COMPARE(cmphs(p12.VnB(), p6.Zeroing(), z20.VnB(), z24.VnB()),
2688 "cmphs p12.b, p6/z, z20.b, z24.b");
2689 COMPARE(cmphs(p12.VnH(), p6.Zeroing(), z20.VnH(), z24.VnH()),
2690 "cmphs p12.h, p6/z, z20.h, z24.h");
2691 COMPARE(cmphs(p12.VnS(), p6.Zeroing(), z20.VnS(), z24.VnS()),
2692 "cmphs p12.s, p6/z, z20.s, z24.s");
2693 COMPARE(cmphs(p12.VnD(), p6.Zeroing(), z20.VnD(), z24.VnD()),
2694 "cmphs p12.d, p6/z, z20.d, z24.d");
2695 COMPARE(cmple(p11.VnB(), p2.Zeroing(), z18.VnB(), z0.VnD()),
2696 "cmple p11.b, p2/z, z18.b, z0.d");
2697 COMPARE(cmple(p11.VnH(), p2.Zeroing(), z18.VnH(), z0.VnD()),
2698 "cmple p11.h, p2/z, z18.h, z0.d");
2699 COMPARE(cmple(p11.VnS(), p2.Zeroing(), z18.VnS(), z0.VnD()),
2700 "cmple p11.s, p2/z, z18.s, z0.d");
2701 COMPARE(cmplo(p12.VnB(), p6.Zeroing(), z21.VnB(), z10.VnD()),
2702 "cmplo p12.b, p6/z, z21.b, z10.d");
2703 COMPARE(cmplo(p12.VnH(), p6.Zeroing(), z21.VnH(), z10.VnD()),
2704 "cmplo p12.h, p6/z, z21.h, z10.d");
2705 COMPARE(cmplo(p12.VnS(), p6.Zeroing(), z21.VnS(), z10.VnD()),
2706 "cmplo p12.s, p6/z, z21.s, z10.d");
2707 COMPARE(cmpls(p8.VnB(), p4.Zeroing(), z9.VnB(), z15.VnD()),
2708 "cmpls p8.b, p4/z, z9.b, z15.d");
2709 COMPARE(cmpls(p8.VnH(), p4.Zeroing(), z9.VnH(), z15.VnD()),
2710 "cmpls p8.h, p4/z, z9.h, z15.d");
2711 COMPARE(cmpls(p8.VnS(), p4.Zeroing(), z9.VnS(), z15.VnD()),
2712 "cmpls p8.s, p4/z, z9.s, z15.d");
2713 COMPARE(cmplt(p6.VnB(), p6.Zeroing(), z4.VnB(), z8.VnD()),
2714 "cmplt p6.b, p6/z, z4.b, z8.d");
2715 COMPARE(cmplt(p6.VnH(), p6.Zeroing(), z4.VnH(), z8.VnD()),
2716 "cmplt p6.h, p6/z, z4.h, z8.d");
2717 COMPARE(cmplt(p6.VnS(), p6.Zeroing(), z4.VnS(), z8.VnD()),
2718 "cmplt p6.s, p6/z, z4.s, z8.d");
2719 COMPARE(cmpne(p1.VnB(), p6.Zeroing(), z31.VnB(), z16.VnD()),
2720 "cmpne p1.b, p6/z, z31.b, z16.d");
2721 COMPARE(cmpne(p1.VnH(), p6.Zeroing(), z31.VnH(), z16.VnD()),
2722 "cmpne p1.h, p6/z, z31.h, z16.d");
2723 COMPARE(cmpne(p1.VnS(), p6.Zeroing(), z31.VnS(), z16.VnD()),
2724 "cmpne p1.s, p6/z, z31.s, z16.d");
2725 COMPARE(cmpne(p11.VnB(), p1.Zeroing(), z3.VnB(), z24.VnB()),
2726 "cmpne p11.b, p1/z, z3.b, z24.b");
2727 COMPARE(cmpne(p11.VnH(), p1.Zeroing(), z3.VnH(), z24.VnH()),
2728 "cmpne p11.h, p1/z, z3.h, z24.h");
2729 COMPARE(cmpne(p11.VnS(), p1.Zeroing(), z3.VnS(), z24.VnS()),
2730 "cmpne p11.s, p1/z, z3.s, z24.s");
2731 COMPARE(cmpne(p11.VnD(), p1.Zeroing(), z3.VnD(), z24.VnD()),
2732 "cmpne p11.d, p1/z, z3.d, z24.d");
2733 COMPARE(cmpls(p8.VnB(), p4.Zeroing(), z9.VnB(), z15.VnB()),
2734 "cmphs p8.b, p4/z, z15.b, z9.b");
2735 COMPARE(cmpls(p8.VnH(), p4.Zeroing(), z9.VnH(), z15.VnH()),
2736 "cmphs p8.h, p4/z, z15.h, z9.h");
2737 COMPARE(cmpls(p8.VnS(), p4.Zeroing(), z9.VnS(), z15.VnS()),
2738 "cmphs p8.s, p4/z, z15.s, z9.s");
2739 COMPARE(cmpls(p8.VnD(), p4.Zeroing(), z9.VnD(), z15.VnD()),
2740 "cmphs p8.d, p4/z, z15.d, z9.d");
2741 COMPARE(cmplo(p10.VnB(), p3.Zeroing(), z14.VnB(), z20.VnB()),
2742 "cmphi p10.b, p3/z, z20.b, z14.b");
2743 COMPARE(cmplo(p10.VnH(), p3.Zeroing(), z14.VnH(), z20.VnH()),
2744 "cmphi p10.h, p3/z, z20.h, z14.h");
2745 COMPARE(cmplo(p10.VnS(), p3.Zeroing(), z14.VnS(), z20.VnS()),
2746 "cmphi p10.s, p3/z, z20.s, z14.s");
2747 COMPARE(cmplo(p10.VnD(), p3.Zeroing(), z14.VnD(), z20.VnD()),
2748 "cmphi p10.d, p3/z, z20.d, z14.d");
2749 COMPARE(cmple(p12.VnB(), p2.Zeroing(), z19.VnB(), z25.VnB()),
2750 "cmpge p12.b, p2/z, z25.b, z19.b");
2751 COMPARE(cmple(p12.VnH(), p2.Zeroing(), z19.VnH(), z25.VnH()),
2752 "cmpge p12.h, p2/z, z25.h, z19.h");
2753 COMPARE(cmple(p12.VnS(), p2.Zeroing(), z19.VnS(), z25.VnS()),
2754 "cmpge p12.s, p2/z, z25.s, z19.s");
2755 COMPARE(cmple(p12.VnD(), p2.Zeroing(), z19.VnD(), z25.VnD()),
2756 "cmpge p12.d, p2/z, z25.d, z19.d");
2757 COMPARE(cmplt(p14.VnB(), p1.Zeroing(), z24.VnB(), z30.VnB()),
2758 "cmpgt p14.b, p1/z, z30.b, z24.b");
2759 COMPARE(cmplt(p14.VnH(), p1.Zeroing(), z24.VnH(), z30.VnH()),
2760 "cmpgt p14.h, p1/z, z30.h, z24.h");
2761 COMPARE(cmplt(p14.VnS(), p1.Zeroing(), z24.VnS(), z30.VnS()),
2762 "cmpgt p14.s, p1/z, z30.s, z24.s");
2763 COMPARE(cmplt(p14.VnD(), p1.Zeroing(), z24.VnD(), z30.VnD()),
2764 "cmpgt p14.d, p1/z, z30.d, z24.d");
2765
2766 CLEANUP();
2767 }
2768
TEST(sve_int_misc_unpredicated)2769 TEST(sve_int_misc_unpredicated) {
2770 SETUP();
2771
2772 COMPARE(fexpa(z5.VnH(), z9.VnH()), "fexpa z5.h, z9.h");
2773 COMPARE(fexpa(z5.VnS(), z9.VnS()), "fexpa z5.s, z9.s");
2774 COMPARE(fexpa(z5.VnD(), z9.VnD()), "fexpa z5.d, z9.d");
2775 COMPARE(ftssel(z17.VnH(), z24.VnH(), z14.VnH()),
2776 "ftssel z17.h, z24.h, z14.h");
2777 COMPARE(ftssel(z17.VnS(), z24.VnS(), z14.VnS()),
2778 "ftssel z17.s, z24.s, z14.s");
2779 COMPARE(ftssel(z17.VnD(), z24.VnD(), z14.VnD()),
2780 "ftssel z17.d, z24.d, z14.d");
2781 COMPARE(movprfx(z24, z1), "movprfx z24, z1");
2782
2783 CLEANUP();
2784 }
2785
TEST(sve_int_mul_add_predicated)2786 TEST(sve_int_mul_add_predicated) {
2787 SETUP();
2788
2789 COMPARE(mad(z29.VnB(), p6.Merging(), z22.VnB(), z21.VnB()),
2790 "mad z29.b, p6/m, z22.b, z21.b");
2791 COMPARE(mad(z29.VnH(), p6.Merging(), z22.VnH(), z21.VnH()),
2792 "mad z29.h, p6/m, z22.h, z21.h");
2793 COMPARE(mad(z29.VnS(), p6.Merging(), z22.VnS(), z21.VnS()),
2794 "mad z29.s, p6/m, z22.s, z21.s");
2795 COMPARE(mad(z29.VnD(), p6.Merging(), z22.VnD(), z21.VnD()),
2796 "mad z29.d, p6/m, z22.d, z21.d");
2797 COMPARE(mla(z23.VnB(), p1.Merging(), z21.VnB(), z23.VnB()),
2798 "mla z23.b, p1/m, z21.b, z23.b");
2799 COMPARE(mla(z23.VnH(), p1.Merging(), z21.VnH(), z23.VnH()),
2800 "mla z23.h, p1/m, z21.h, z23.h");
2801 COMPARE(mla(z23.VnS(), p1.Merging(), z21.VnS(), z23.VnS()),
2802 "mla z23.s, p1/m, z21.s, z23.s");
2803 COMPARE(mla(z23.VnD(), p1.Merging(), z21.VnD(), z23.VnD()),
2804 "mla z23.d, p1/m, z21.d, z23.d");
2805 COMPARE(mls(z4.VnB(), p6.Merging(), z17.VnB(), z28.VnB()),
2806 "mls z4.b, p6/m, z17.b, z28.b");
2807 COMPARE(mls(z4.VnH(), p6.Merging(), z17.VnH(), z28.VnH()),
2808 "mls z4.h, p6/m, z17.h, z28.h");
2809 COMPARE(mls(z4.VnS(), p6.Merging(), z17.VnS(), z28.VnS()),
2810 "mls z4.s, p6/m, z17.s, z28.s");
2811 COMPARE(mls(z4.VnD(), p6.Merging(), z17.VnD(), z28.VnD()),
2812 "mls z4.d, p6/m, z17.d, z28.d");
2813 COMPARE(msb(z27.VnB(), p7.Merging(), z29.VnB(), z1.VnB()),
2814 "msb z27.b, p7/m, z29.b, z1.b");
2815 COMPARE(msb(z27.VnH(), p7.Merging(), z29.VnH(), z1.VnH()),
2816 "msb z27.h, p7/m, z29.h, z1.h");
2817 COMPARE(msb(z27.VnS(), p7.Merging(), z29.VnS(), z1.VnS()),
2818 "msb z27.s, p7/m, z29.s, z1.s");
2819 COMPARE(msb(z27.VnD(), p7.Merging(), z29.VnD(), z1.VnD()),
2820 "msb z27.d, p7/m, z29.d, z1.d");
2821
2822 CLEANUP();
2823 }
2824
TEST(sve_int_mul_add_predicated_macro)2825 TEST(sve_int_mul_add_predicated_macro) {
2826 SETUP();
2827
2828 COMPARE_MACRO(Mla(z0.VnB(), p1.Merging(), z0.VnB(), z2.VnB(), z4.VnB()),
2829 "mla z0.b, p1/m, z2.b, z4.b");
2830 COMPARE_MACRO(Mla(z3.VnH(), p2.Merging(), z4.VnH(), z3.VnH(), z5.VnH()),
2831 "mad z3.h, p2/m, z5.h, z4.h");
2832 COMPARE_MACRO(Mla(z4.VnS(), p3.Merging(), z5.VnS(), z6.VnS(), z4.VnS()),
2833 "mad z4.s, p3/m, z6.s, z5.s");
2834 COMPARE_MACRO(Mla(z5.VnD(), p4.Merging(), z6.VnD(), z7.VnD(), z8.VnD()),
2835 "movprfx z5.d, p4/m, z6.d\n"
2836 "mla z5.d, p4/m, z7.d, z8.d");
2837
2838 COMPARE_MACRO(Mls(z0.VnD(), p1.Merging(), z0.VnD(), z2.VnD(), z4.VnD()),
2839 "mls z0.d, p1/m, z2.d, z4.d");
2840 COMPARE_MACRO(Mls(z3.VnS(), p2.Merging(), z4.VnS(), z3.VnS(), z5.VnS()),
2841 "msb z3.s, p2/m, z5.s, z4.s");
2842 COMPARE_MACRO(Mls(z4.VnH(), p3.Merging(), z5.VnH(), z6.VnH(), z4.VnH()),
2843 "msb z4.h, p3/m, z6.h, z5.h");
2844 COMPARE_MACRO(Mls(z5.VnB(), p4.Merging(), z6.VnB(), z7.VnB(), z8.VnB()),
2845 "movprfx z5.b, p4/m, z6.b\n"
2846 "mls z5.b, p4/m, z7.b, z8.b");
2847
2848 CLEANUP();
2849 }
2850
TEST(sve_int_mul_add_unpredicated)2851 TEST(sve_int_mul_add_unpredicated) {
2852 SETUP();
2853
2854 COMPARE(sdot(z13.VnS(), z12.VnB(), z12.VnB()), "sdot z13.s, z12.b, z12.b");
2855 COMPARE(sdot(z18.VnD(), z27.VnH(), z22.VnH()), "sdot z18.d, z27.h, z22.h");
2856 COMPARE(udot(z23.VnS(), z22.VnB(), z11.VnB()), "udot z23.s, z22.b, z11.b");
2857 COMPARE(udot(z21.VnD(), z27.VnH(), z27.VnH()), "udot z21.d, z27.h, z27.h");
2858
2859 CLEANUP();
2860 }
2861
TEST(sve_int_mul_add_unpredicated_macro)2862 TEST(sve_int_mul_add_unpredicated_macro) {
2863 SETUP();
2864
2865 COMPARE_MACRO(Sdot(z0.VnS(), z0.VnS(), z2.VnB(), z4.VnB()),
2866 "sdot z0.s, z2.b, z4.b");
2867 COMPARE_MACRO(Sdot(z3.VnD(), z4.VnD(), z3.VnH(), z5.VnH()),
2868 "movprfx z31, z4\n"
2869 "sdot z31.d, z3.h, z5.h\n"
2870 "mov z3.d, z31.d");
2871 COMPARE_MACRO(Sdot(z4.VnS(), z5.VnS(), z6.VnB(), z4.VnB()),
2872 "movprfx z31, z5\n"
2873 "sdot z31.s, z6.b, z4.b\n"
2874 "mov z4.d, z31.d");
2875 COMPARE_MACRO(Sdot(z6.VnD(), z7.VnD(), z8.VnH(), z9.VnH()),
2876 "movprfx z6, z7\n"
2877 "sdot z6.d, z8.h, z9.h");
2878 COMPARE_MACRO(Sdot(z5.VnD(), z5.VnD(), z5.VnH(), z5.VnH()),
2879 "sdot z5.d, z5.h, z5.h");
2880
2881 COMPARE_MACRO(Udot(z0.VnD(), z0.VnD(), z2.VnH(), z4.VnH()),
2882 "udot z0.d, z2.h, z4.h");
2883 COMPARE_MACRO(Udot(z3.VnS(), z4.VnS(), z3.VnB(), z5.VnB()),
2884 "movprfx z31, z4\n"
2885 "udot z31.s, z3.b, z5.b\n"
2886 "mov z3.d, z31.d");
2887 COMPARE_MACRO(Udot(z4.VnD(), z5.VnD(), z6.VnH(), z4.VnH()),
2888 "movprfx z31, z5\n"
2889 "udot z31.d, z6.h, z4.h\n"
2890 "mov z4.d, z31.d");
2891 COMPARE_MACRO(Udot(z6.VnS(), z7.VnS(), z8.VnB(), z9.VnB()),
2892 "movprfx z6, z7\n"
2893 "udot z6.s, z8.b, z9.b");
2894 COMPARE_MACRO(Udot(z5.VnS(), z5.VnS(), z5.VnB(), z5.VnB()),
2895 "udot z5.s, z5.b, z5.b");
2896 CLEANUP();
2897 }
2898
TEST(sve_int_reduction)2899 TEST(sve_int_reduction) {
2900 SETUP();
2901
2902 COMPARE(andv(b15, p1, z4.VnB()), "andv b15, p1, z4.b");
2903 COMPARE(andv(h14, p2, z3.VnH()), "andv h14, p2, z3.h");
2904 COMPARE(andv(s13, p3, z2.VnS()), "andv s13, p3, z2.s");
2905 COMPARE(andv(d12, p4, z1.VnD()), "andv d12, p4, z1.d");
2906 COMPARE(eorv(b12, p0, z30.VnB()), "eorv b12, p0, z30.b");
2907 COMPARE(eorv(h11, p1, z29.VnH()), "eorv h11, p1, z29.h");
2908 COMPARE(eorv(s10, p2, z28.VnS()), "eorv s10, p2, z28.s");
2909 COMPARE(eorv(d9, p3, z27.VnD()), "eorv d9, p3, z27.d");
2910 COMPARE(movprfx(z30.VnB(), p2.Zeroing(), z23.VnB()),
2911 "movprfx z30.b, p2/z, z23.b");
2912 COMPARE(movprfx(z10.VnH(), p0.Merging(), z10.VnH()),
2913 "movprfx z10.h, p0/m, z10.h");
2914 COMPARE(movprfx(z0.VnS(), p2.Zeroing(), z23.VnS()),
2915 "movprfx z0.s, p2/z, z23.s");
2916 COMPARE(movprfx(z31.VnD(), p7.Merging(), z23.VnD()),
2917 "movprfx z31.d, p7/m, z23.d");
2918 COMPARE(orv(b4, p0, z16.VnB()), "orv b4, p0, z16.b");
2919 COMPARE(orv(h6, p2, z18.VnH()), "orv h6, p2, z18.h");
2920 COMPARE(orv(s8, p4, z20.VnS()), "orv s8, p4, z20.s");
2921 COMPARE(orv(d10, p6, z22.VnD()), "orv d10, p6, z22.d");
2922 COMPARE(saddv(d20, p1, z12.VnB()), "saddv d20, p1, z12.b");
2923 COMPARE(saddv(d22, p3, z15.VnH()), "saddv d22, p3, z15.h");
2924 COMPARE(saddv(d24, p5, z18.VnS()), "saddv d24, p5, z18.s");
2925 COMPARE(smaxv(b9, p3, z1.VnB()), "smaxv b9, p3, z1.b");
2926 COMPARE(smaxv(h19, p2, z1.VnH()), "smaxv h19, p2, z1.h");
2927 COMPARE(smaxv(s29, p1, z1.VnS()), "smaxv s29, p1, z1.s");
2928 COMPARE(smaxv(d9, p0, z1.VnD()), "smaxv d9, p0, z1.d");
2929 COMPARE(sminv(b8, p3, z14.VnB()), "sminv b8, p3, z14.b");
2930 COMPARE(sminv(h18, p2, z4.VnH()), "sminv h18, p2, z4.h");
2931 COMPARE(sminv(s28, p1, z4.VnS()), "sminv s28, p1, z4.s");
2932 COMPARE(sminv(d8, p0, z24.VnD()), "sminv d8, p0, z24.d");
2933 COMPARE(uaddv(d13, p0, z15.VnB()), "uaddv d13, p0, z15.b");
2934 COMPARE(uaddv(d15, p2, z20.VnH()), "uaddv d15, p2, z20.h");
2935 COMPARE(uaddv(d17, p4, z25.VnS()), "uaddv d17, p4, z25.s");
2936 COMPARE(uaddv(d19, p6, z30.VnD()), "uaddv d19, p6, z30.d");
2937 COMPARE(umaxv(b28, p3, z4.VnB()), "umaxv b28, p3, z4.b");
2938 COMPARE(umaxv(h18, p6, z2.VnH()), "umaxv h18, p6, z2.h");
2939 COMPARE(umaxv(s18, p4, z29.VnS()), "umaxv s18, p4, z29.s");
2940 COMPARE(umaxv(d28, p1, z24.VnD()), "umaxv d28, p1, z24.d");
2941 COMPARE(uminv(b16, p3, z0.VnB()), "uminv b16, p3, z0.b");
2942 COMPARE(uminv(h16, p2, z3.VnH()), "uminv h16, p2, z3.h");
2943 COMPARE(uminv(s16, p1, z5.VnS()), "uminv s16, p1, z5.s");
2944 COMPARE(uminv(d16, p7, z7.VnD()), "uminv d16, p7, z7.d");
2945
2946 CLEANUP();
2947 }
2948
TEST(sve_int_unary_arithmetic_predicated)2949 TEST(sve_int_unary_arithmetic_predicated) {
2950 SETUP();
2951
2952 COMPARE(abs(z5.VnB(), p5.Merging(), z31.VnB()), "abs z5.b, p5/m, z31.b");
2953 COMPARE(abs(z29.VnH(), p5.Merging(), z17.VnH()), "abs z29.h, p5/m, z17.h");
2954 COMPARE(abs(z6.VnS(), p4.Merging(), z24.VnS()), "abs z6.s, p4/m, z24.s");
2955 COMPARE(abs(z19.VnD(), p3.Merging(), z25.VnD()), "abs z19.d, p3/m, z25.d");
2956 COMPARE(cls(z4.VnB(), p0.Merging(), z20.VnB()), "cls z4.b, p0/m, z20.b");
2957 COMPARE(cls(z11.VnH(), p0.Merging(), z26.VnH()), "cls z11.h, p0/m, z26.h");
2958 COMPARE(cls(z10.VnS(), p1.Merging(), z10.VnS()), "cls z10.s, p1/m, z10.s");
2959 COMPARE(cls(z5.VnD(), p1.Merging(), z4.VnD()), "cls z5.d, p1/m, z4.d");
2960 COMPARE(clz(z18.VnB(), p3.Merging(), z1.VnB()), "clz z18.b, p3/m, z1.b");
2961 COMPARE(clz(z13.VnH(), p4.Merging(), z18.VnH()), "clz z13.h, p4/m, z18.h");
2962 COMPARE(clz(z15.VnS(), p4.Merging(), z24.VnS()), "clz z15.s, p4/m, z24.s");
2963 COMPARE(clz(z29.VnD(), p2.Merging(), z22.VnD()), "clz z29.d, p2/m, z22.d");
2964 COMPARE(cnot(z16.VnB(), p6.Merging(), z20.VnB()), "cnot z16.b, p6/m, z20.b");
2965 COMPARE(cnot(z10.VnH(), p5.Merging(), z12.VnH()), "cnot z10.h, p5/m, z12.h");
2966 COMPARE(cnot(z8.VnS(), p5.Merging(), z21.VnS()), "cnot z8.s, p5/m, z21.s");
2967 COMPARE(cnot(z3.VnD(), p3.Merging(), z18.VnD()), "cnot z3.d, p3/m, z18.d");
2968 COMPARE(cnt(z29.VnB(), p3.Merging(), z7.VnB()), "cnt z29.b, p3/m, z7.b");
2969 COMPARE(cnt(z3.VnH(), p6.Merging(), z31.VnH()), "cnt z3.h, p6/m, z31.h");
2970 COMPARE(cnt(z2.VnS(), p4.Merging(), z16.VnS()), "cnt z2.s, p4/m, z16.s");
2971 COMPARE(cnt(z0.VnD(), p0.Merging(), z24.VnD()), "cnt z0.d, p0/m, z24.d");
2972 COMPARE(fabs(z17.VnH(), p7.Merging(), z15.VnH()), "fabs z17.h, p7/m, z15.h");
2973 COMPARE(fabs(z18.VnS(), p0.Merging(), z29.VnS()), "fabs z18.s, p0/m, z29.s");
2974 COMPARE(fabs(z17.VnD(), p1.Merging(), z9.VnD()), "fabs z17.d, p1/m, z9.d");
2975 COMPARE(fneg(z25.VnH(), p1.Merging(), z28.VnH()), "fneg z25.h, p1/m, z28.h");
2976 COMPARE(fneg(z5.VnS(), p1.Merging(), z25.VnS()), "fneg z5.s, p1/m, z25.s");
2977 COMPARE(fneg(z6.VnD(), p1.Merging(), z17.VnD()), "fneg z6.d, p1/m, z17.d");
2978 COMPARE(neg(z25.VnB(), p4.Merging(), z8.VnB()), "neg z25.b, p4/m, z8.b");
2979 COMPARE(neg(z30.VnH(), p3.Merging(), z23.VnH()), "neg z30.h, p3/m, z23.h");
2980 COMPARE(neg(z7.VnS(), p2.Merging(), z26.VnS()), "neg z7.s, p2/m, z26.s");
2981 COMPARE(neg(z21.VnD(), p3.Merging(), z5.VnD()), "neg z21.d, p3/m, z5.d");
2982 COMPARE(not_(z24.VnB(), p1.Merging(), z27.VnB()), "not z24.b, p1/m, z27.b");
2983 COMPARE(not_(z31.VnH(), p6.Merging(), z19.VnH()), "not z31.h, p6/m, z19.h");
2984 COMPARE(not_(z18.VnS(), p5.Merging(), z13.VnS()), "not z18.s, p5/m, z13.s");
2985 COMPARE(not_(z12.VnD(), p2.Merging(), z28.VnD()), "not z12.d, p2/m, z28.d");
2986 COMPARE(sxtb(z19.VnH(), p7.Merging(), z3.VnH()), "sxtb z19.h, p7/m, z3.h");
2987 COMPARE(sxtb(z3.VnS(), p1.Merging(), z17.VnS()), "sxtb z3.s, p1/m, z17.s");
2988 COMPARE(sxtb(z27.VnD(), p0.Merging(), z12.VnD()), "sxtb z27.d, p0/m, z12.d");
2989 COMPARE(sxth(z6.VnS(), p1.Merging(), z17.VnS()), "sxth z6.s, p1/m, z17.s");
2990 COMPARE(sxth(z8.VnD(), p6.Merging(), z2.VnD()), "sxth z8.d, p6/m, z2.d");
2991 COMPARE(sxtw(z13.VnD(), p3.Merging(), z27.VnD()), "sxtw z13.d, p3/m, z27.d");
2992 COMPARE(uxtb(z23.VnH(), p3.Merging(), z21.VnH()), "uxtb z23.h, p3/m, z21.h");
2993 COMPARE(uxtb(z0.VnS(), p2.Merging(), z13.VnS()), "uxtb z0.s, p2/m, z13.s");
2994 COMPARE(uxtb(z1.VnD(), p3.Merging(), z13.VnD()), "uxtb z1.d, p3/m, z13.d");
2995 COMPARE(uxth(z27.VnS(), p0.Merging(), z29.VnS()), "uxth z27.s, p0/m, z29.s");
2996 COMPARE(uxth(z22.VnD(), p4.Merging(), z20.VnD()), "uxth z22.d, p4/m, z20.d");
2997 COMPARE(uxtw(z14.VnD(), p1.Merging(), z13.VnD()), "uxtw z14.d, p1/m, z13.d");
2998
2999 // Check related but undefined encodings.
3000 COMPARE(dci(0x0410a000), "unallocated (Unallocated)"); // sxtb b
3001 COMPARE(dci(0x0412a000), "unallocated (Unallocated)"); // sxth b
3002 COMPARE(dci(0x0452a000), "unallocated (Unallocated)"); // sxth h
3003 COMPARE(dci(0x0414a000), "unallocated (Unallocated)"); // sxtw b
3004 COMPARE(dci(0x0454a000), "unallocated (Unallocated)"); // sxtw h
3005 COMPARE(dci(0x0494a000), "unallocated (Unallocated)"); // sxtw s
3006
3007 COMPARE(dci(0x0411a000), "unallocated (Unallocated)"); // uxtb b
3008 COMPARE(dci(0x0413a000), "unallocated (Unallocated)"); // uxth b
3009 COMPARE(dci(0x0453a000), "unallocated (Unallocated)"); // uxth h
3010 COMPARE(dci(0x0415a000), "unallocated (Unallocated)"); // uxtw b
3011 COMPARE(dci(0x0455a000), "unallocated (Unallocated)"); // uxtw h
3012 COMPARE(dci(0x0495a000), "unallocated (Unallocated)"); // uxtw s
3013
3014 COMPARE(dci(0x041ca000), "unallocated (Unallocated)"); // fabs b
3015 COMPARE(dci(0x041da000), "unallocated (Unallocated)"); // fneg b
3016
3017 CLEANUP();
3018 }
3019
TEST(sve_neg_macro)3020 TEST(sve_neg_macro) {
3021 SETUP();
3022
3023 COMPARE_MACRO(Neg(z0.VnB(), z0.VnB()), "subr z0.b, z0.b, #0");
3024 COMPARE_MACRO(Neg(z1.VnH(), z2.VnH()),
3025 "movprfx z1, z2\n"
3026 "subr z1.h, z1.h, #0");
3027 COMPARE_MACRO(Neg(z29.VnS(), z29.VnS()), "subr z29.s, z29.s, #0");
3028 COMPARE_MACRO(Neg(z30.VnD(), z31.VnD()),
3029 "movprfx z30, z31\n"
3030 "subr z30.d, z30.d, #0");
3031
3032 CLEANUP();
3033 }
3034
TEST(sve_cpy_fcpy_imm)3035 TEST(sve_cpy_fcpy_imm) {
3036 SETUP();
3037
3038 COMPARE(cpy(z25.VnB(), p13.Zeroing(), -1), "mov z25.b, p13/z, #-1");
3039 COMPARE(cpy(z25.VnB(), p13.Merging(), -1), "mov z25.b, p13/m, #-1");
3040 COMPARE(cpy(z25.VnH(), p13.Merging(), 127), "mov z25.h, p13/m, #127");
3041 COMPARE(cpy(z25.VnS(), p13.Merging(), 10752),
3042 "mov z25.s, p13/m, #42, lsl #8");
3043 COMPARE(cpy(z25.VnD(), p13.Merging(), -10752),
3044 "mov z25.d, p13/m, #-42, lsl #8");
3045 COMPARE(mov(z25.VnD(), p13.Merging(), -10752),
3046 "mov z25.d, p13/m, #-42, lsl #8");
3047
3048 COMPARE(fcpy(z20.VnH(), p11.Merging(), 29.0),
3049 "fmov z20.h, p11/m, #0x3d (29.0000)");
3050 COMPARE(fmov(z20.VnS(), p11.Merging(), -31.0),
3051 "fmov z20.s, p11/m, #0xbf (-31.0000)");
3052 COMPARE(fcpy(z20.VnD(), p11.Merging(), 1.0),
3053 "fmov z20.d, p11/m, #0x70 (1.0000)");
3054
3055 CLEANUP();
3056 }
3057
TEST(sve_fmov_zero)3058 TEST(sve_fmov_zero) {
3059 SETUP();
3060
3061 // Predicated `fmov` is an alias for either `fcpy` or `cpy`.
3062 COMPARE(fmov(z13.VnS(), p0.Merging(), 1.0),
3063 "fmov z13.s, p0/m, #0x70 (1.0000)");
3064 COMPARE(fmov(z13.VnS(), p0.Merging(), 0.0), "mov z13.s, p0/m, #0");
3065 COMPARE_MACRO(Fmov(z13.VnD(), p0.Merging(), 1.0),
3066 "fmov z13.d, p0/m, #0x70 (1.0000)");
3067 COMPARE_MACRO(Fmov(z13.VnD(), p0.Merging(), 0.0), "mov z13.d, p0/m, #0");
3068
3069 // Unpredicated `fmov` is an alias for either `fdup` or `dup`.
3070 COMPARE(fmov(z13.VnS(), 1.0), "fmov z13.s, #0x70 (1.0000)");
3071 COMPARE(fmov(z13.VnS(), 0.0), "mov z13.s, #0");
3072 COMPARE_MACRO(Fmov(z13.VnD(), 1.0), "fmov z13.d, #0x70 (1.0000)");
3073 COMPARE_MACRO(Fmov(z13.VnD(), 0.0), "mov z13.d, #0");
3074
3075 // -0.0 cannot be encoded by this alias, but is handled by the MacroAssembler.
3076 COMPARE_MACRO(Fmov(z13.VnD(), p0.Merging(), -0.0),
3077 "mov x16, #0x8000000000000000\n"
3078 "mov z13.d, p0/m, x16");
3079 COMPARE_MACRO(Fmov(z13.VnD(), -0.0), "mov z13.d, #0x8000000000000000");
3080
3081 CLEANUP();
3082 }
3083
TEST(sve_int_wide_imm_unpredicated)3084 TEST(sve_int_wide_imm_unpredicated) {
3085 SETUP();
3086
3087 COMPARE(add(z12.VnB(), z12.VnB(), 0), "add z12.b, z12.b, #0");
3088 COMPARE(add(z13.VnH(), z13.VnH(), 255), "add z13.h, z13.h, #255");
3089 COMPARE(add(z14.VnS(), z14.VnS(), 256), "add z14.s, z14.s, #1, lsl #8");
3090 COMPARE(add(z15.VnD(), z15.VnD(), 255 * 256),
3091 "add z15.d, z15.d, #255, lsl #8");
3092
3093 COMPARE(dup(z6.VnB(), -128), "mov z6.b, #-128");
3094 COMPARE(dup(z7.VnH(), 127), "mov z7.h, #127");
3095 COMPARE(dup(z8.VnS(), -128 * 256), "mov z8.s, #-128, lsl #8");
3096 COMPARE(dup(z9.VnD(), 127 * 256), "mov z9.d, #127, lsl #8");
3097 COMPARE(mov(z8.VnS(), -128 * 256, -1), "mov z8.s, #-128, lsl #8");
3098 COMPARE(mov(z9.VnD(), 127 * 256, -1), "mov z9.d, #127, lsl #8");
3099
3100 COMPARE(sqadd(z7.VnB(), z7.VnB(), 124), "sqadd z7.b, z7.b, #124");
3101 COMPARE(sqadd(z8.VnH(), z8.VnH(), 131), "sqadd z8.h, z8.h, #131");
3102 COMPARE(sqadd(z9.VnS(), z9.VnS(), 252 * 256),
3103 "sqadd z9.s, z9.s, #252, lsl #8");
3104 COMPARE(sqadd(z10.VnD(), z10.VnD(), 20 * 256),
3105 "sqadd z10.d, z10.d, #20, lsl #8");
3106
3107 COMPARE(sqsub(z31.VnB(), z31.VnB(), 132), "sqsub z31.b, z31.b, #132");
3108 COMPARE(sqsub(z30.VnH(), z30.VnH(), 251), "sqsub z30.h, z30.h, #251");
3109 COMPARE(sqsub(z29.VnS(), z29.VnS(), 21 * 256),
3110 "sqsub z29.s, z29.s, #21, lsl #8");
3111 COMPARE(sqsub(z28.VnD(), z28.VnD(), 123 * 256),
3112 "sqsub z28.d, z28.d, #123, lsl #8");
3113
3114 COMPARE(subr(z20.VnB(), z20.VnB(), 250), "subr z20.b, z20.b, #250");
3115 COMPARE(subr(z21.VnH(), z21.VnH(), 22), "subr z21.h, z21.h, #22");
3116 COMPARE(subr(z22.VnS(), z22.VnS(), 122 * 256),
3117 "subr z22.s, z22.s, #122, lsl #8");
3118 COMPARE(subr(z23.VnD(), z23.VnD(), 133 * 256),
3119 "subr z23.d, z23.d, #133, lsl #8");
3120
3121 COMPARE(sub(z18.VnB(), z18.VnB(), 23), "sub z18.b, z18.b, #23");
3122 COMPARE(sub(z19.VnH(), z19.VnH(), 121), "sub z19.h, z19.h, #121");
3123 COMPARE(sub(z20.VnS(), z20.VnS(), 134 * 256),
3124 "sub z20.s, z20.s, #134, lsl #8");
3125 COMPARE(sub(z21.VnD(), z21.VnD(), 249 * 256),
3126 "sub z21.d, z21.d, #249, lsl #8");
3127
3128 COMPARE(uqadd(z21.VnB(), z21.VnB(), 246), "uqadd z21.b, z21.b, #246");
3129 COMPARE(uqadd(z22.VnH(), z22.VnH(), 26), "uqadd z22.h, z22.h, #26");
3130 COMPARE(uqadd(z23.VnS(), z23.VnS(), 118 * 256),
3131 "uqadd z23.s, z23.s, #118, lsl #8");
3132 COMPARE(uqadd(z24.VnD(), z24.VnD(), 137 * 256),
3133 "uqadd z24.d, z24.d, #137, lsl #8");
3134
3135 COMPARE(uqsub(z10.VnB(), z10.VnB(), 27), "uqsub z10.b, z10.b, #27");
3136 COMPARE(uqsub(z11.VnH(), z11.VnH(), 117), "uqsub z11.h, z11.h, #117");
3137 COMPARE(uqsub(z12.VnS(), z12.VnS(), 138 * 256),
3138 "uqsub z12.s, z12.s, #138, lsl #8");
3139 COMPARE(uqsub(z13.VnD(), z13.VnD(), 245 * 256),
3140 "uqsub z13.d, z13.d, #245, lsl #8");
3141
3142 COMPARE(fdup(z26.VnH(), Float16(-5.0f)), "fmov z26.h, #0x94 (-5.0000)");
3143 COMPARE(fdup(z27.VnS(), -13.0f), "fmov z27.s, #0xaa (-13.0000)");
3144 COMPARE(fdup(z28.VnD(), 1.0f), "fmov z28.d, #0x70 (1.0000)");
3145 COMPARE(fmov(z28.VnD(), 1.0f), "fmov z28.d, #0x70 (1.0000)");
3146
3147 COMPARE(mul(z15.VnB(), z15.VnB(), -128), "mul z15.b, z15.b, #-128");
3148 COMPARE(mul(z16.VnH(), z16.VnH(), -1), "mul z16.h, z16.h, #-1");
3149 COMPARE(mul(z17.VnS(), z17.VnS(), 17), "mul z17.s, z17.s, #17");
3150 COMPARE(mul(z18.VnD(), z18.VnD(), 127), "mul z18.d, z18.d, #127");
3151
3152 COMPARE(smax(z7.VnB(), z7.VnB(), -2), "smax z7.b, z7.b, #-2");
3153 COMPARE(smax(z8.VnH(), z8.VnH(), 18), "smax z8.h, z8.h, #18");
3154 COMPARE(smax(z9.VnS(), z9.VnS(), 126), "smax z9.s, z9.s, #126");
3155 COMPARE(smax(z10.VnD(), z10.VnD(), -127), "smax z10.d, z10.d, #-127");
3156
3157 COMPARE(smin(z5.VnB(), z5.VnB(), 19), "smin z5.b, z5.b, #19");
3158 COMPARE(smin(z6.VnH(), z6.VnH(), 125), "smin z6.h, z6.h, #125");
3159 COMPARE(smin(z7.VnS(), z7.VnS(), -126), "smin z7.s, z7.s, #-126");
3160 COMPARE(smin(z8.VnD(), z8.VnD(), -3), "smin z8.d, z8.d, #-3");
3161
3162 COMPARE(umax(z15.VnB(), z15.VnB(), 120), "umax z15.b, z15.b, #120");
3163 COMPARE(umax(z16.VnH(), z16.VnH(), 135), "umax z16.h, z16.h, #135");
3164 COMPARE(umax(z17.VnS(), z17.VnS(), 248), "umax z17.s, z17.s, #248");
3165 COMPARE(umax(z18.VnD(), z18.VnD(), 24), "umax z18.d, z18.d, #24");
3166
3167 COMPARE(umin(z22.VnB(), z22.VnB(), 136), "umin z22.b, z22.b, #136");
3168 COMPARE(umin(z23.VnH(), z23.VnH(), 247), "umin z23.h, z23.h, #247");
3169 COMPARE(umin(z24.VnS(), z24.VnS(), 25), "umin z24.s, z24.s, #25");
3170 COMPARE(umin(z25.VnD(), z25.VnD(), 119), "umin z25.d, z25.d, #119");
3171
3172 CLEANUP();
3173 }
3174
TEST(sve_add_sub_imm_macro)3175 TEST(sve_add_sub_imm_macro) {
3176 SETUP();
3177
3178 // The MacroAssembler automatically generates movprfx where necessary.
3179 COMPARE_MACRO(Add(z12.VnB(), z13.VnB(), 0),
3180 "movprfx z12, z13\n"
3181 "add z12.b, z12.b, #0");
3182 COMPARE_MACRO(Sub(z20.VnB(), 250, z2.VnB()),
3183 "movprfx z20, z2\n"
3184 "subr z20.b, z20.b, #250");
3185 COMPARE_MACRO(Sub(z19.VnH(), z4.VnH(), 121),
3186 "movprfx z19, z4\n"
3187 "sub z19.h, z19.h, #121");
3188
3189 // Add and Sub can make use of two's complement equivalences.
3190 COMPARE_MACRO(Add(z13.VnH(), z13.VnH(), 0xffff), "sub z13.h, z13.h, #1");
3191 COMPARE_MACRO(Add(z15.VnD(), z15.VnD(), 0xffffffffffffffd6),
3192 "sub z15.d, z15.d, #42");
3193 COMPARE_MACRO(Add(z16.VnH(), z16.VnH(), 0xff00),
3194 "add z16.h, z16.h, #255, lsl #8");
3195 COMPARE_MACRO(Sub(z17.VnH(), z17.VnH(), 0xfffe), "add z17.h, z17.h, #2");
3196 COMPARE_MACRO(Sub(z14.VnB(), z14.VnB(), 0x80), "sub z14.b, z14.b, #128");
3197
3198 // The MacroAssembler automatically generates dup if an immediate isn't
3199 // encodable.
3200 COMPARE_MACRO(Add(z15.VnD(), z20.VnD(), 1234567890),
3201 "mov x16, #0x2d2\n"
3202 "movk x16, #0x4996, lsl #16\n"
3203 "mov z31.d, x16\n"
3204 "add z15.d, z20.d, z31.d");
3205 COMPARE_MACRO(Sub(z22.VnS(), 256 * 256, z2.VnS()),
3206 "mov z31.s, #0x10000\n"
3207 "sub z22.s, z31.s, z2.s");
3208 COMPARE_MACRO(Sub(z21.VnD(), z11.VnD(), 111111111111),
3209 "mov x16, #0x1c7\n"
3210 "movk x16, #0xdebd, lsl #16\n"
3211 "movk x16, #0x19, lsl #32\n"
3212 "mov z31.d, x16\n"
3213 "sub z21.d, z11.d, z31.d");
3214
3215 CLEANUP();
3216 }
3217
TEST(sve_uqadd_uqsub_imm_macro)3218 TEST(sve_uqadd_uqsub_imm_macro) {
3219 SETUP();
3220
3221 // The MacroAssembler automatically generates movprfx where necessary.
3222 COMPARE_MACRO(Uqadd(z21.VnB(), z14.VnB(), 246),
3223 "movprfx z21, z14\n"
3224 "uqadd z21.b, z21.b, #246");
3225 COMPARE_MACRO(Uqsub(z10.VnB(), z27.VnB(), 27),
3226 "movprfx z10, z27\n"
3227 "uqsub z10.b, z10.b, #27");
3228 COMPARE_MACRO(Uqadd(z1.VnS(), z2.VnS(), 42 * 256),
3229 "movprfx z1, z2\n"
3230 "uqadd z1.s, z1.s, #42, lsl #8");
3231
3232 COMPARE_MACRO(Uqsub(z3.VnB(), z3.VnB(), 0xff), "uqsub z3.b, z3.b, #255");
3233 COMPARE_MACRO(Uqadd(z8.VnS(), z8.VnS(), 0xff00),
3234 "uqadd z8.s, z8.s, #255, lsl #8");
3235
3236 CLEANUP();
3237 }
3238
TEST(sve_sqadd_sqsub_imm_macro)3239 TEST(sve_sqadd_sqsub_imm_macro) {
3240 SETUP();
3241
3242 // The MacroAssembler automatically generates movprfx where necessary.
3243 COMPARE_MACRO(Sqadd(z21.VnB(), z14.VnB(), 123),
3244 "movprfx z21, z14\n"
3245 "sqadd z21.b, z21.b, #123");
3246 COMPARE_MACRO(Sqsub(z10.VnB(), z27.VnB(), 27),
3247 "movprfx z10, z27\n"
3248 "sqsub z10.b, z10.b, #27");
3249 COMPARE_MACRO(Sqadd(z22.VnS(), z15.VnS(), 256),
3250 "movprfx z22, z15\n"
3251 "sqadd z22.s, z22.s, #1, lsl #8");
3252
3253 COMPARE_MACRO(Sqsub(z3.VnB(), z3.VnB(), 0xff), "sqsub z3.b, z3.b, #255");
3254 COMPARE_MACRO(Sqadd(z4.VnH(), z4.VnH(), 0xff00),
3255 "sqadd z4.h, z4.h, #255, lsl #8");
3256
3257 CLEANUP();
3258 }
3259
TEST(sve_int_wide_imm_unpredicated_macro)3260 TEST(sve_int_wide_imm_unpredicated_macro) {
3261 SETUP();
3262
3263 // The MacroAssembler automatically generates movprfx where it can.
3264 COMPARE_MACRO(Mul(z1.VnD(), z18.VnD(), 127),
3265 "movprfx z1, z18\n"
3266 "mul z1.d, z1.d, #127");
3267 COMPARE_MACRO(Smax(z3.VnS(), z9.VnS(), 126),
3268 "movprfx z3, z9\n"
3269 "smax z3.s, z3.s, #126");
3270 COMPARE_MACRO(Smin(z26.VnH(), z6.VnH(), 125),
3271 "movprfx z26, z6\n"
3272 "smin z26.h, z26.h, #125");
3273 COMPARE_MACRO(Umax(z25.VnB(), z15.VnB(), 120),
3274 "movprfx z25, z15\n"
3275 "umax z25.b, z25.b, #120");
3276 COMPARE_MACRO(Umin(z13.VnD(), z25.VnD(), 119),
3277 "movprfx z13, z25\n"
3278 "umin z13.d, z13.d, #119");
3279 COMPARE_MACRO(Dup(z8.VnS(), -7654321),
3280 "mov w16, #0x344f\n"
3281 "movk w16, #0xff8b, lsl #16\n"
3282 "mov z8.s, w16");
3283
3284 // The MacroAssembler automatically generates dup if an immediate isn't
3285 // encodable, when it is out-of-range for example.
3286 COMPARE_MACRO(Dup(z9.VnD(), 0x80000000), "mov z9.d, #0x80000000");
3287 COMPARE_MACRO(Mov(z9.VnD(), 0x80000000), "mov z9.d, #0x80000000");
3288 COMPARE_MACRO(Fdup(z26.VnH(), Float16(0.0)), "mov z26.h, #0");
3289 COMPARE_MACRO(Fdup(z26.VnH(), Float16(0.0)), "mov z26.h, #0");
3290 COMPARE_MACRO(Fdup(z27.VnS(), 255.0f),
3291 "mov w16, #0x437f0000\n"
3292 "mov z27.s, w16");
3293 COMPARE_MACRO(Fdup(z28.VnD(), 12.3456),
3294 "mov x16, #0xfec5\n"
3295 "movk x16, #0x7bb2, lsl #16\n"
3296 "movk x16, #0xb0f2, lsl #32\n"
3297 "movk x16, #0x4028, lsl #48\n"
3298 "mov z28.d, x16");
3299 COMPARE_MACRO(Fmov(z26.VnH(), Float16(0.0)), "mov z26.h, #0");
3300 COMPARE_MACRO(Fmov(z26.VnH(), Float16(0.0)), "mov z26.h, #0");
3301 COMPARE_MACRO(Fmov(z27.VnS(), 255.0f),
3302 "mov w16, #0x437f0000\n"
3303 "mov z27.s, w16");
3304 COMPARE_MACRO(Fmov(z28.VnD(), 12.3456),
3305 "mov x16, #0xfec5\n"
3306 "movk x16, #0x7bb2, lsl #16\n"
3307 "movk x16, #0xb0f2, lsl #32\n"
3308 "movk x16, #0x4028, lsl #48\n"
3309 "mov z28.d, x16");
3310
3311 // Only predicated version of instruction is supported for unencodable
3312 // immediate.
3313 {
3314 UseScratchRegisterScope temps(&masm);
3315 temps.Include(p7, p15);
3316 COMPARE_MACRO(Mul(z18.VnD(), z18.VnD(), -1270000000),
3317 "ptrue p7.d\n"
3318 "mov x16, #0xffffffffffff5680\n"
3319 "movk x16, #0xb44d, lsl #16\n"
3320 "mov z31.d, x16\n"
3321 "mul z18.d, p7/m, z18.d, z31.d");
3322 COMPARE_MACRO(Smax(z9.VnS(), z11.VnS(), -0x70000001),
3323 "ptrue p7.s\n"
3324 "mov z9.s, #0x8fffffff\n"
3325 "smax z9.s, p7/m, z9.s, z11.s");
3326 COMPARE_MACRO(Smin(z6.VnH(), z6.VnH(), -0x7eef),
3327 "ptrue p7.h\n"
3328 "mov w16, #0xffff8111\n"
3329 "mov z31.h, w16\n"
3330 "smin z6.h, p7/m, z6.h, z31.h");
3331 COMPARE_MACRO(Umax(z15.VnH(), z7.VnH(), 0xfeee),
3332 "ptrue p7.h\n"
3333 "mov w16, #0xfeee\n"
3334 "mov z15.h, w16\n"
3335 "umax z15.h, p7/m, z15.h, z7.h");
3336 COMPARE_MACRO(Umin(z25.VnD(), z25.VnD(), 123123123),
3337 "ptrue p7.d\n"
3338 "mov x16, #0xb5b3\n"
3339 "movk x16, #0x756, lsl #16\n"
3340 "mov z31.d, x16\n"
3341 "umin z25.d, p7/m, z25.d, z31.d");
3342 }
3343 }
3344
TEST(sve_mem_32bit_gather_vector_plus_immediate_macro)3345 TEST(sve_mem_32bit_gather_vector_plus_immediate_macro) {
3346 SETUP();
3347
3348 // Simple cases.
3349 COMPARE_MACRO(Ld1b(z4.VnS(), p4.Zeroing(), SVEMemOperand(z12.VnS(), 31)),
3350 "ld1b {z4.s}, p4/z, [z12.s, #31]");
3351 COMPARE_MACRO(Ld1h(z10.VnS(), p6.Zeroing(), SVEMemOperand(z4.VnS(), 10)),
3352 "ld1h {z10.s}, p6/z, [z4.s, #10]");
3353 COMPARE_MACRO(Ld1w(z16.VnS(), p0.Zeroing(), SVEMemOperand(z26.VnS(), 124)),
3354 "ld1w {z16.s}, p0/z, [z26.s, #124]");
3355 COMPARE_MACRO(Ld1sb(z9.VnS(), p3.Zeroing(), SVEMemOperand(z22.VnS())),
3356 "ld1sb {z9.s}, p3/z, [z22.s]");
3357 COMPARE_MACRO(Ld1sh(z22.VnS(), p1.Zeroing(), SVEMemOperand(z9.VnS(), 62)),
3358 "ld1sh {z22.s}, p1/z, [z9.s, #62]");
3359 COMPARE_MACRO(Ldff1b(z17.VnS(), p2.Zeroing(), SVEMemOperand(z29.VnS(), 0)),
3360 "ldff1b {z17.s}, p2/z, [z29.s]");
3361 COMPARE_MACRO(Ldff1h(z16.VnS(), p3.Zeroing(), SVEMemOperand(z15.VnS())),
3362 "ldff1h {z16.s}, p3/z, [z15.s]");
3363 COMPARE_MACRO(Ldff1w(z7.VnS(), p3.Zeroing(), SVEMemOperand(z20.VnS(), 4)),
3364 "ldff1w {z7.s}, p3/z, [z20.s, #4]");
3365 COMPARE_MACRO(Ldff1sb(z7.VnS(), p1.Zeroing(), SVEMemOperand(z10.VnS(), 21)),
3366 "ldff1sb {z7.s}, p1/z, [z10.s, #21]");
3367 COMPARE_MACRO(Ldff1sh(z0.VnS(), p1.Zeroing(), SVEMemOperand(z23.VnS(), 42)),
3368 "ldff1sh {z0.s}, p1/z, [z23.s, #42]");
3369
3370 // Unencodable cases use a scalar-plus-vector form.
3371 COMPARE_MACRO(Ld1b(z17.VnS(), p2.Zeroing(), SVEMemOperand(z11.VnS(), 32)),
3372 "mov x16, #0x20\n"
3373 "ld1b {z17.s}, p2/z, [x16, z11.s, uxtw]");
3374 COMPARE_MACRO(Ld1h(z11.VnS(), p1.Zeroing(), SVEMemOperand(z8.VnS(), -2)),
3375 "mov x16, #0xfffffffffffffffe\n"
3376 "ld1h {z11.s}, p1/z, [x16, z8.s, uxtw]");
3377 COMPARE_MACRO(Ld1w(z5.VnS(), p6.Zeroing(), SVEMemOperand(z9.VnS(), 42)),
3378 "mov x16, #0x2a\n"
3379 "ld1w {z5.s}, p6/z, [x16, z9.s, uxtw]");
3380 COMPARE_MACRO(Ld1sb(z28.VnS(), p5.Zeroing(), SVEMemOperand(z12.VnS(), -1)),
3381 "mov x16, #0xffffffffffffffff\n"
3382 "ld1sb {z28.s}, p5/z, [x16, z12.s, uxtw]");
3383 COMPARE_MACRO(Ld1sh(z30.VnS(), p5.Zeroing(), SVEMemOperand(z21.VnS(), 64)),
3384 "mov x16, #0x40\n"
3385 "ld1sh {z30.s}, p5/z, [x16, z21.s, uxtw]");
3386 COMPARE_MACRO(Ldff1b(z19.VnS(), p6.Zeroing(), SVEMemOperand(z24.VnS(), 32)),
3387 "mov x16, #0x20\n"
3388 "ldff1b {z19.s}, p6/z, [x16, z24.s, uxtw]");
3389 COMPARE_MACRO(Ldff1h(z5.VnS(), p1.Zeroing(), SVEMemOperand(z24.VnS(), -2)),
3390 "mov x16, #0xfffffffffffffffe\n"
3391 "ldff1h {z5.s}, p1/z, [x16, z24.s, uxtw]");
3392 COMPARE_MACRO(Ldff1w(z17.VnS(), p6.Zeroing(), SVEMemOperand(z18.VnS(), 42)),
3393 "mov x16, #0x2a\n"
3394 "ldff1w {z17.s}, p6/z, [x16, z18.s, uxtw]");
3395 COMPARE_MACRO(Ldff1sb(z31.VnS(), p5.Zeroing(), SVEMemOperand(z21.VnS(), -1)),
3396 "mov x16, #0xffffffffffffffff\n"
3397 "ldff1sb {z31.s}, p5/z, [x16, z21.s, uxtw]");
3398 COMPARE_MACRO(Ldff1sh(z6.VnS(), p5.Zeroing(), SVEMemOperand(z22.VnS(), 64)),
3399 "mov x16, #0x40\n"
3400 "ldff1sh {z6.s}, p5/z, [x16, z22.s, uxtw]");
3401
3402 CLEANUP();
3403 }
3404
TEST(sve_mem_32bit_gather_and_unsized_contiguous)3405 TEST(sve_mem_32bit_gather_and_unsized_contiguous) {
3406 SETUP();
3407
3408 // 32-bit gather load in scalar-plus-vector vform with unscaled offset.
3409 COMPARE(ld1b(z9.VnS(), p5.Zeroing(), SVEMemOperand(x2, z1.VnS(), SXTW)),
3410 "ld1b {z9.s}, p5/z, [x2, z1.s, sxtw]");
3411 COMPARE(ld1b(z9.VnS(), p5.Zeroing(), SVEMemOperand(sp, z1.VnS(), UXTW)),
3412 "ld1b {z9.s}, p5/z, [sp, z1.s, uxtw]");
3413 COMPARE(ld1h(z17.VnS(), p2.Zeroing(), SVEMemOperand(x11, z24.VnS(), SXTW)),
3414 "ld1h {z17.s}, p2/z, [x11, z24.s, sxtw]");
3415 COMPARE(ld1w(z22.VnS(), p6.Zeroing(), SVEMemOperand(sp, z5.VnS(), UXTW)),
3416 "ld1w {z22.s}, p6/z, [sp, z5.s, uxtw]");
3417 COMPARE(ld1sb(z12.VnS(), p7.Zeroing(), SVEMemOperand(x17, z23.VnS(), UXTW)),
3418 "ld1sb {z12.s}, p7/z, [x17, z23.s, uxtw]");
3419 COMPARE(ld1sb(z22.VnS(), p3.Zeroing(), SVEMemOperand(x23, z23.VnS(), SXTW)),
3420 "ld1sb {z22.s}, p3/z, [x23, z23.s, sxtw]");
3421 COMPARE(ld1sh(z11.VnS(), p2.Zeroing(), SVEMemOperand(x18, z10.VnS(), UXTW)),
3422 "ld1sh {z11.s}, p2/z, [x18, z10.s, uxtw]");
3423
3424 // 32-bit gather load in scalar-plus-vector vform with scaled offset.
3425 COMPARE(ld1h(z9.VnS(), p3.Zeroing(), SVEMemOperand(sp, z4.VnS(), UXTW, 1)),
3426 "ld1h {z9.s}, p3/z, [sp, z4.s, uxtw #1]");
3427 COMPARE(ld1w(z0.VnS(), p6.Zeroing(), SVEMemOperand(x28, z21.VnS(), SXTW, 2)),
3428 "ld1w {z0.s}, p6/z, [x28, z21.s, sxtw #2]");
3429 COMPARE(ld1sh(z11.VnS(), p4.Zeroing(), SVEMemOperand(sp, z0.VnS(), SXTW, 1)),
3430 "ld1sh {z11.s}, p4/z, [sp, z0.s, sxtw #1]");
3431
3432 // 32-bit gather first-fault load in scalar-plus-vector vform with 32-bit
3433 // unpacked unscaled offset.
3434 COMPARE(ldff1b(z18.VnS(), p6.Zeroing(), SVEMemOperand(x27, z24.VnS(), UXTW)),
3435 "ldff1b {z18.s}, p6/z, [x27, z24.s, uxtw]");
3436 COMPARE(ldff1h(z28.VnS(), p6.Zeroing(), SVEMemOperand(x1, z30.VnS(), UXTW)),
3437 "ldff1h {z28.s}, p6/z, [x1, z30.s, uxtw]");
3438 COMPARE(ldff1w(z12.VnS(), p3.Zeroing(), SVEMemOperand(x25, z27.VnS(), SXTW)),
3439 "ldff1w {z12.s}, p3/z, [x25, z27.s, sxtw]");
3440 COMPARE(ldff1sb(z15.VnS(), p5.Zeroing(), SVEMemOperand(x5, z14.VnS(), SXTW)),
3441 "ldff1sb {z15.s}, p5/z, [x5, z14.s, sxtw]");
3442 COMPARE(ldff1sh(z18.VnS(), p4.Zeroing(), SVEMemOperand(x25, z25.VnS(), SXTW)),
3443 "ldff1sh {z18.s}, p4/z, [x25, z25.s, sxtw]");
3444
3445 // 32-bit gather first-fault load in scalar-plus-vector vform with 32-bit
3446 // scaled offset.
3447 COMPARE(ldff1h(z25.VnS(),
3448 p3.Zeroing(),
3449 SVEMemOperand(x17, z15.VnS(), SXTW, 1)),
3450 "ldff1h {z25.s}, p3/z, [x17, z15.s, sxtw #1]");
3451 COMPARE(ldff1w(z5.VnS(),
3452 p4.Zeroing(),
3453 SVEMemOperand(x23, z31.VnS(), UXTW, 2)),
3454 "ldff1w {z5.s}, p4/z, [x23, z31.s, uxtw #2]");
3455 COMPARE(ldff1sh(z10.VnS(),
3456 p0.Zeroing(),
3457 SVEMemOperand(x19, z15.VnS(), UXTW, 1)),
3458 "ldff1sh {z10.s}, p0/z, [x19, z15.s, uxtw #1]");
3459
3460 // Load and broadcast data to vector.
3461 COMPARE(ld1rb(z2.VnH(), p0.Zeroing(), SVEMemOperand(x30, 0)),
3462 "ld1rb {z2.h}, p0/z, [x30]");
3463 COMPARE(ld1rb(z14.VnS(), p2.Zeroing(), SVEMemOperand(x11, 63)),
3464 "ld1rb {z14.s}, p2/z, [x11, #63]");
3465 COMPARE(ld1rb(z27.VnD(), p1.Zeroing(), SVEMemOperand(x29, 2)),
3466 "ld1rb {z27.d}, p1/z, [x29, #2]");
3467 COMPARE(ld1rb(z0.VnB(), p3.Zeroing(), SVEMemOperand(sp, 59)),
3468 "ld1rb {z0.b}, p3/z, [sp, #59]");
3469 COMPARE(ld1rh(z19.VnH(), p5.Zeroing(), SVEMemOperand(x1, 0)),
3470 "ld1rh {z19.h}, p5/z, [x1]");
3471 COMPARE(ld1rh(z4.VnS(), p7.Zeroing(), SVEMemOperand(x29, 126)),
3472 "ld1rh {z4.s}, p7/z, [x29, #126]");
3473 COMPARE(ld1rh(z24.VnD(), p0.Zeroing(), SVEMemOperand(sp, 78)),
3474 "ld1rh {z24.d}, p0/z, [sp, #78]");
3475 COMPARE(ld1rw(z19.VnS(), p5.Zeroing(), SVEMemOperand(x4, 252)),
3476 "ld1rw {z19.s}, p5/z, [x4, #252]");
3477 COMPARE(ld1rw(z13.VnD(), p3.Zeroing(), SVEMemOperand(x2, 100)),
3478 "ld1rw {z13.d}, p3/z, [x2, #100]");
3479 COMPARE(ld1rd(z19.VnD(), p7.Zeroing(), SVEMemOperand(x14, 504)),
3480 "ld1rd {z19.d}, p7/z, [x14, #504]");
3481 COMPARE(ld1rsb(z16.VnH(), p1.Zeroing(), SVEMemOperand(x29, 0)),
3482 "ld1rsb {z16.h}, p1/z, [x29]");
3483 COMPARE(ld1rsb(z8.VnS(), p6.Zeroing(), SVEMemOperand(sp, 33)),
3484 "ld1rsb {z8.s}, p6/z, [sp, #33]");
3485 COMPARE(ld1rsb(z25.VnD(), p2.Zeroing(), SVEMemOperand(x18, 63)),
3486 "ld1rsb {z25.d}, p2/z, [x18, #63]");
3487 COMPARE(ld1rsh(z11.VnS(), p5.Zeroing(), SVEMemOperand(x14, 2)),
3488 "ld1rsh {z11.s}, p5/z, [x14, #2]");
3489 COMPARE(ld1rsh(z28.VnD(), p1.Zeroing(), SVEMemOperand(x19, 124)),
3490 "ld1rsh {z28.d}, p1/z, [x19, #124]");
3491 COMPARE(ld1rsw(z23.VnD(), p4.Zeroing(), SVEMemOperand(x10, 8)),
3492 "ld1rsw {z23.d}, p4/z, [x10, #8]");
3493
3494 CLEANUP();
3495 }
3496
TEST(sve_mem_32bit_gather_and_unsized_contiguous_macro)3497 TEST(sve_mem_32bit_gather_and_unsized_contiguous_macro) {
3498 SETUP();
3499
3500 COMPARE_MACRO(Ld1rb(z2.VnB(), p0.Zeroing(), SVEMemOperand(x30, 100)),
3501 "add x16, x30, #0x64 (100)\n"
3502 "ld1rb {z2.b}, p0/z, [x16]");
3503 COMPARE_MACRO(Ld1rh(z4.VnH(), p1.Zeroing(), SVEMemOperand(x21, 201)),
3504 "add x16, x21, #0xc9 (201)\n"
3505 "ld1rh {z4.h}, p1/z, [x16]");
3506 COMPARE_MACRO(Ld1rw(z6.VnS(), p2.Zeroing(), SVEMemOperand(x14, 512)),
3507 "add x16, x14, #0x200 (512)\n"
3508 "ld1rw {z6.s}, p2/z, [x16]");
3509 COMPARE_MACRO(Ld1rd(z8.VnD(), p3.Zeroing(), SVEMemOperand(x3, 1024)),
3510 "add x16, x3, #0x400 (1024)\n"
3511 "ld1rd {z8.d}, p3/z, [x16]");
3512 COMPARE_MACRO(Ld1rsb(z10.VnH(), p4.Zeroing(), SVEMemOperand(sp, -100)),
3513 "sub x16, sp, #0x64 (100)\n"
3514 "ld1rsb {z10.h}, p4/z, [x16]");
3515 COMPARE_MACRO(Ld1rsh(z12.VnS(), p5.Zeroing(), SVEMemOperand(x30, -255)),
3516 "sub x16, x30, #0xff (255)\n"
3517 "ld1rsh {z12.s}, p5/z, [x16]");
3518 COMPARE_MACRO(Ld1rsw(z14.VnD(), p6.Zeroing(), SVEMemOperand(x1, -1024)),
3519 "sub x16, x1, #0x400 (1024)\n"
3520 "ld1rsw {z14.d}, p6/z, [x16]");
3521 }
3522
TEST(sve_mem_64bit_gather_vector_plus_immediate)3523 TEST(sve_mem_64bit_gather_vector_plus_immediate) {
3524 SETUP();
3525
3526 COMPARE(ld1b(z2.VnD(), p2.Zeroing(), SVEMemOperand(z12.VnD(), 31)),
3527 "ld1b {z2.d}, p2/z, [z12.d, #31]");
3528 COMPARE(ld1h(z30.VnD(), p7.Zeroing(), SVEMemOperand(z28.VnD(), 10)),
3529 "ld1h {z30.d}, p7/z, [z28.d, #10]");
3530 COMPARE(ld1w(z10.VnD(), p5.Zeroing(), SVEMemOperand(z4.VnD(), 124)),
3531 "ld1w {z10.d}, p5/z, [z4.d, #124]");
3532 COMPARE(ld1d(z13.VnD(), p3.Zeroing(), SVEMemOperand(z19.VnD(), 248)),
3533 "ld1d {z13.d}, p3/z, [z19.d, #248]");
3534 COMPARE(ld1sb(z16.VnD(), p7.Zeroing(), SVEMemOperand(z31.VnD())),
3535 "ld1sb {z16.d}, p7/z, [z31.d]");
3536 COMPARE(ld1sh(z20.VnD(), p2.Zeroing(), SVEMemOperand(z2.VnD(), 62)),
3537 "ld1sh {z20.d}, p2/z, [z2.d, #62]");
3538 COMPARE(ld1sw(z2.VnD(), p7.Zeroing(), SVEMemOperand(z25.VnD())),
3539 "ld1sw {z2.d}, p7/z, [z25.d]");
3540 COMPARE(ldff1b(z24.VnD(), p5.Zeroing(), SVEMemOperand(z8.VnD(), 0)),
3541 "ldff1b {z24.d}, p5/z, [z8.d]");
3542 COMPARE(ldff1h(z9.VnD(), p3.Zeroing(), SVEMemOperand(z19.VnD())),
3543 "ldff1h {z9.d}, p3/z, [z19.d]");
3544 COMPARE(ldff1w(z26.VnD(), p6.Zeroing(), SVEMemOperand(z15.VnD(), 4)),
3545 "ldff1w {z26.d}, p6/z, [z15.d, #4]");
3546 COMPARE(ldff1d(z19.VnD(), p1.Zeroing(), SVEMemOperand(z14.VnD())),
3547 "ldff1d {z19.d}, p1/z, [z14.d]");
3548 COMPARE(ldff1sb(z26.VnD(), p5.Zeroing(), SVEMemOperand(z14.VnD(), 21)),
3549 "ldff1sb {z26.d}, p5/z, [z14.d, #21]");
3550 COMPARE(ldff1sh(z6.VnD(), p3.Zeroing(), SVEMemOperand(z19.VnD(), 42)),
3551 "ldff1sh {z6.d}, p3/z, [z19.d, #42]");
3552 COMPARE(ldff1sw(z19.VnD(), p7.Zeroing(), SVEMemOperand(z14.VnD(), 84)),
3553 "ldff1sw {z19.d}, p7/z, [z14.d, #84]");
3554
3555 CLEANUP();
3556 }
3557
TEST(sve_mem_64bit_gather_vector_plus_immediate_macro)3558 TEST(sve_mem_64bit_gather_vector_plus_immediate_macro) {
3559 SETUP();
3560
3561 // Simple cases.
3562 COMPARE_MACRO(Ld1b(z18.VnD(), p6.Zeroing(), SVEMemOperand(z31.VnD(), 31)),
3563 "ld1b {z18.d}, p6/z, [z31.d, #31]");
3564 COMPARE_MACRO(Ld1h(z5.VnD(), p3.Zeroing(), SVEMemOperand(z18.VnD(), 10)),
3565 "ld1h {z5.d}, p3/z, [z18.d, #10]");
3566 COMPARE_MACRO(Ld1w(z0.VnD(), p6.Zeroing(), SVEMemOperand(z22.VnD(), 124)),
3567 "ld1w {z0.d}, p6/z, [z22.d, #124]");
3568 COMPARE_MACRO(Ld1d(z18.VnD(), p3.Zeroing(), SVEMemOperand(z19.VnD(), 248)),
3569 "ld1d {z18.d}, p3/z, [z19.d, #248]");
3570 COMPARE_MACRO(Ld1sb(z18.VnD(), p6.Zeroing(), SVEMemOperand(z17.VnD())),
3571 "ld1sb {z18.d}, p6/z, [z17.d]");
3572 COMPARE_MACRO(Ld1sh(z14.VnD(), p3.Zeroing(), SVEMemOperand(z11.VnD(), 62)),
3573 "ld1sh {z14.d}, p3/z, [z11.d, #62]");
3574 COMPARE_MACRO(Ld1sw(z18.VnD(), p5.Zeroing(), SVEMemOperand(z14.VnD())),
3575 "ld1sw {z18.d}, p5/z, [z14.d]");
3576 COMPARE_MACRO(Ldff1b(z20.VnD(), p5.Zeroing(), SVEMemOperand(z17.VnD(), 0)),
3577 "ldff1b {z20.d}, p5/z, [z17.d]");
3578 COMPARE_MACRO(Ldff1h(z20.VnD(), p5.Zeroing(), SVEMemOperand(z16.VnD())),
3579 "ldff1h {z20.d}, p5/z, [z16.d]");
3580 COMPARE_MACRO(Ldff1w(z1.VnD(), p6.Zeroing(), SVEMemOperand(z16.VnD(), 4)),
3581 "ldff1w {z1.d}, p6/z, [z16.d, #4]");
3582 COMPARE_MACRO(Ldff1d(z16.VnD(), p1.Zeroing(), SVEMemOperand(z3.VnD())),
3583 "ldff1d {z16.d}, p1/z, [z3.d]");
3584 COMPARE_MACRO(Ldff1sb(z26.VnD(), p7.Zeroing(), SVEMemOperand(z3.VnD(), 21)),
3585 "ldff1sb {z26.d}, p7/z, [z3.d, #21]");
3586 COMPARE_MACRO(Ldff1sh(z1.VnD(), p7.Zeroing(), SVEMemOperand(z9.VnD(), 42)),
3587 "ldff1sh {z1.d}, p7/z, [z9.d, #42]");
3588 COMPARE_MACRO(Ldff1sw(z19.VnD(), p4.Zeroing(), SVEMemOperand(z3.VnD(), 84)),
3589 "ldff1sw {z19.d}, p4/z, [z3.d, #84]");
3590
3591 // Unencodable cases use a scalar-plus-vector form.
3592 COMPARE_MACRO(Ld1b(z23.VnD(), p6.Zeroing(), SVEMemOperand(z16.VnD(), 32)),
3593 "mov x16, #0x20\n"
3594 "ld1b {z23.d}, p6/z, [x16, z16.d]");
3595 COMPARE_MACRO(Ld1h(z10.VnD(), p6.Zeroing(), SVEMemOperand(z11.VnD(), -2)),
3596 "mov x16, #0xfffffffffffffffe\n"
3597 "ld1h {z10.d}, p6/z, [x16, z11.d]");
3598 COMPARE_MACRO(Ld1w(z14.VnD(), p3.Zeroing(), SVEMemOperand(z11.VnD(), 42)),
3599 "mov x16, #0x2a\n"
3600 "ld1w {z14.d}, p3/z, [x16, z11.d]");
3601 COMPARE_MACRO(Ld1d(z10.VnD(), p4.Zeroing(), SVEMemOperand(z3.VnD(), 256)),
3602 "mov x16, #0x100\n"
3603 "ld1d {z10.d}, p4/z, [x16, z3.d]");
3604 COMPARE_MACRO(Ld1sb(z14.VnD(), p2.Zeroing(), SVEMemOperand(z11.VnD(), -1)),
3605 "mov x16, #0xffffffffffffffff\n"
3606 "ld1sb {z14.d}, p2/z, [x16, z11.d]");
3607 COMPARE_MACRO(Ld1sh(z20.VnD(), p7.Zeroing(), SVEMemOperand(z12.VnD(), 64)),
3608 "mov x16, #0x40\n"
3609 "ld1sh {z20.d}, p7/z, [x16, z12.d]");
3610 COMPARE_MACRO(Ld1sw(z15.VnD(), p6.Zeroing(), SVEMemOperand(z18.VnD(), 42)),
3611 "mov x16, #0x2a\n"
3612 "ld1sw {z15.d}, p6/z, [x16, z18.d]");
3613 COMPARE_MACRO(Ldff1b(z15.VnD(), p0.Zeroing(), SVEMemOperand(z0.VnD(), 32)),
3614 "mov x16, #0x20\n"
3615 "ldff1b {z15.d}, p0/z, [x16, z0.d]");
3616 COMPARE_MACRO(Ldff1h(z23.VnD(), p3.Zeroing(), SVEMemOperand(z31.VnD(), -2)),
3617 "mov x16, #0xfffffffffffffffe\n"
3618 "ldff1h {z23.d}, p3/z, [x16, z31.d]");
3619 COMPARE_MACRO(Ldff1w(z28.VnD(), p3.Zeroing(), SVEMemOperand(z17.VnD(), 42)),
3620 "mov x16, #0x2a\n"
3621 "ldff1w {z28.d}, p3/z, [x16, z17.d]");
3622 COMPARE_MACRO(Ldff1d(z18.VnD(), p3.Zeroing(), SVEMemOperand(z13.VnD(), 256)),
3623 "mov x16, #0x100\n"
3624 "ldff1d {z18.d}, p3/z, [x16, z13.d]");
3625 COMPARE_MACRO(Ldff1sb(z31.VnD(), p7.Zeroing(), SVEMemOperand(z3.VnD(), -1)),
3626 "mov x16, #0xffffffffffffffff\n"
3627 "ldff1sb {z31.d}, p7/z, [x16, z3.d]");
3628 COMPARE_MACRO(Ldff1sh(z13.VnD(), p0.Zeroing(), SVEMemOperand(z15.VnD(), 64)),
3629 "mov x16, #0x40\n"
3630 "ldff1sh {z13.d}, p0/z, [x16, z15.d]");
3631 COMPARE_MACRO(Ldff1sw(z30.VnD(), p7.Zeroing(), SVEMemOperand(z10.VnD(), 42)),
3632 "mov x16, #0x2a\n"
3633 "ldff1sw {z30.d}, p7/z, [x16, z10.d]");
3634
3635 CLEANUP();
3636 }
3637
TEST(sve_mem_64bit_gather_scalar_plus_vector)3638 TEST(sve_mem_64bit_gather_scalar_plus_vector) {
3639 SETUP();
3640
3641 COMPARE(ld1b(z30.VnD(), p6.Zeroing(), SVEMemOperand(sp, z24.VnD())),
3642 "ld1b {z30.d}, p6/z, [sp, z24.d]");
3643 COMPARE(ld1d(z18.VnD(), p5.Zeroing(), SVEMemOperand(x11, z11.VnD())),
3644 "ld1d {z18.d}, p5/z, [x11, z11.d]");
3645 COMPARE(ld1h(z2.VnD(), p3.Zeroing(), SVEMemOperand(x16, z18.VnD())),
3646 "ld1h {z2.d}, p3/z, [x16, z18.d]");
3647 COMPARE(ld1sb(z11.VnD(), p3.Zeroing(), SVEMemOperand(x24, z21.VnD())),
3648 "ld1sb {z11.d}, p3/z, [x24, z21.d]");
3649 COMPARE(ld1sh(z7.VnD(), p7.Zeroing(), SVEMemOperand(x28, z23.VnD())),
3650 "ld1sh {z7.d}, p7/z, [x28, z23.d]");
3651 COMPARE(ld1sw(z29.VnD(), p7.Zeroing(), SVEMemOperand(x27, z4.VnD())),
3652 "ld1sw {z29.d}, p7/z, [x27, z4.d]");
3653 COMPARE(ld1w(z19.VnD(), p1.Zeroing(), SVEMemOperand(x27, z4.VnD())),
3654 "ld1w {z19.d}, p1/z, [x27, z4.d]");
3655
3656 COMPARE(ld1d(z20.VnD(), p3.Zeroing(), SVEMemOperand(x3, z15.VnD(), LSL, 3)),
3657 "ld1d {z20.d}, p3/z, [x3, z15.d, lsl #3]");
3658 COMPARE(ld1h(z24.VnD(), p4.Zeroing(), SVEMemOperand(x6, z11.VnD(), LSL, 1)),
3659 "ld1h {z24.d}, p4/z, [x6, z11.d, lsl #1]");
3660 COMPARE(ld1sh(z22.VnD(), p6.Zeroing(), SVEMemOperand(x7, z31.VnD(), LSL, 1)),
3661 "ld1sh {z22.d}, p6/z, [x7, z31.d, lsl #1]");
3662 COMPARE(ld1sw(z9.VnD(), p0.Zeroing(), SVEMemOperand(x2, z27.VnD(), LSL, 2)),
3663 "ld1sw {z9.d}, p0/z, [x2, z27.d, lsl #2]");
3664 COMPARE(ld1w(z9.VnD(), p2.Zeroing(), SVEMemOperand(x0, z0.VnD(), LSL, 2)),
3665 "ld1w {z9.d}, p2/z, [x0, z0.d, lsl #2]");
3666
3667 COMPARE(ld1b(z19.VnD(), p5.Zeroing(), SVEMemOperand(x21, z29.VnD(), UXTW)),
3668 "ld1b {z19.d}, p5/z, [x21, z29.d, uxtw]");
3669 COMPARE(ld1d(z9.VnD(), p5.Zeroing(), SVEMemOperand(x5, z21.VnD(), SXTW)),
3670 "ld1d {z9.d}, p5/z, [x5, z21.d, sxtw]");
3671 COMPARE(ld1h(z26.VnD(), p3.Zeroing(), SVEMemOperand(x1, z10.VnD(), UXTW)),
3672 "ld1h {z26.d}, p3/z, [x1, z10.d, uxtw]");
3673 COMPARE(ld1sb(z4.VnD(), p1.Zeroing(), SVEMemOperand(x24, z15.VnD(), SXTW)),
3674 "ld1sb {z4.d}, p1/z, [x24, z15.d, sxtw]");
3675 COMPARE(ld1sh(z9.VnD(), p1.Zeroing(), SVEMemOperand(x0, z12.VnD(), UXTW)),
3676 "ld1sh {z9.d}, p1/z, [x0, z12.d, uxtw]");
3677 COMPARE(ld1sw(z19.VnD(), p2.Zeroing(), SVEMemOperand(x19, z16.VnD(), SXTW)),
3678 "ld1sw {z19.d}, p2/z, [x19, z16.d, sxtw]");
3679 COMPARE(ld1w(z13.VnD(), p3.Zeroing(), SVEMemOperand(x8, z10.VnD(), UXTW)),
3680 "ld1w {z13.d}, p3/z, [x8, z10.d, uxtw]");
3681
3682 COMPARE(ld1d(z25.VnD(), p3.Zeroing(), SVEMemOperand(x14, z0.VnD(), UXTW, 3)),
3683 "ld1d {z25.d}, p3/z, [x14, z0.d, uxtw #3]");
3684 COMPARE(ld1h(z21.VnD(), p5.Zeroing(), SVEMemOperand(x13, z8.VnD(), SXTW, 1)),
3685 "ld1h {z21.d}, p5/z, [x13, z8.d, sxtw #1]");
3686 COMPARE(ld1sh(z29.VnD(), p0.Zeroing(), SVEMemOperand(x9, z10.VnD(), UXTW, 1)),
3687 "ld1sh {z29.d}, p0/z, [x9, z10.d, uxtw #1]");
3688 COMPARE(ld1sw(z5.VnD(), p2.Zeroing(), SVEMemOperand(x1, z23.VnD(), SXTW, 2)),
3689 "ld1sw {z5.d}, p2/z, [x1, z23.d, sxtw #2]");
3690 COMPARE(ld1w(z21.VnD(), p1.Zeroing(), SVEMemOperand(x7, z8.VnD(), UXTW, 2)),
3691 "ld1w {z21.d}, p1/z, [x7, z8.d, uxtw #2]");
3692
3693 CLEANUP();
3694 }
3695
TEST(sve_mem_prefetch)3696 TEST(sve_mem_prefetch) {
3697 SETUP();
3698
3699 // Test every encodable prefetch operation.
3700 const char* expected[] = {" pldl1keep",
3701 " pldl1strm",
3702 " pldl2keep",
3703 " pldl2strm",
3704 " pldl3keep",
3705 " pldl3strm",
3706 " pstl1keep",
3707 " pstl1strm",
3708 " pstl2keep",
3709 " pstl2strm",
3710 " pstl3keep",
3711 " pstl3strm"};
3712
3713 const PrefetchOperation kSVEPrfOperations[] = {PLDL1KEEP,
3714 PLDL1STRM,
3715 PLDL2KEEP,
3716 PLDL2STRM,
3717 PLDL3KEEP,
3718 PLDL3STRM,
3719 PSTL1KEEP,
3720 PSTL1STRM,
3721 PSTL2KEEP,
3722 PSTL2STRM,
3723 PSTL3KEEP,
3724 PSTL3STRM};
3725
3726 VIXL_STATIC_ASSERT(ArrayLength(expected) == ArrayLength(kSVEPrfOperations));
3727
3728
3729 #define VIXL_DISAM_PREFETCH_TEST(INSN, NAME, SH) \
3730 do { \
3731 for (size_t i = 0; i < ArrayLength(kSVEPrfOperations); i++) { \
3732 PrefetchOperation op = kSVEPrfOperations[i]; \
3733 std::string str(NAME); \
3734 str.append(expected[i]); \
3735 /* Vector plus immediate */ \
3736 COMPARE_PREFIX(INSN(op, p6, SVEMemOperand(z30.VnS(), 31)), str.c_str()); \
3737 COMPARE_PREFIX(INSN(op, p5, SVEMemOperand(z29.VnD(), 17)), str.c_str()); \
3738 /* Scalar plus immediate */ \
3739 COMPARE_PREFIX(INSN(op, p4, SVEMemOperand(x11, -32, SVE_MUL_VL)), \
3740 str.c_str()); \
3741 COMPARE_PREFIX(INSN(op, p4, SVEMemOperand(sp, 31, SVE_MUL_VL)), \
3742 str.c_str()); \
3743 /* Scalar plus vector */ \
3744 COMPARE_PREFIX(INSN(op, p3, SVEMemOperand(x24, z22.VnS(), UXTW, SH)), \
3745 str.c_str()); \
3746 COMPARE_PREFIX(INSN(op, p2, SVEMemOperand(x24, z22.VnD(), SXTW, SH)), \
3747 str.c_str()); \
3748 COMPARE_PREFIX(INSN(op, p1, SVEMemOperand(x4, z2.VnD(), LSL, SH)), \
3749 str.c_str()); \
3750 /* Scalar plus scalar */ \
3751 COMPARE_PREFIX(INSN(op, p1, SVEMemOperand(x8, x29, LSL, SH)), \
3752 str.c_str()); \
3753 COMPARE_PREFIX(INSN(op, p0, SVEMemOperand(sp, x6, LSL, SH)), \
3754 str.c_str()); \
3755 } \
3756 } while (0)
3757
3758 VIXL_DISAM_PREFETCH_TEST(prfh, "prfh", 1);
3759 VIXL_DISAM_PREFETCH_TEST(prfw, "prfw", 2);
3760 VIXL_DISAM_PREFETCH_TEST(prfd, "prfd", 3);
3761 #undef VIXL_DISAM_PREFETCH_TEST
3762
3763 COMPARE(prfb(PLDL1KEEP, p5, SVEMemOperand(z30.VnS(), 0)),
3764 "prfb pldl1keep, p5, [z30.s]");
3765 COMPARE(prfb(PLDL1STRM, p5, SVEMemOperand(x28, -11, SVE_MUL_VL)),
3766 "prfb pldl1strm, p5, [x28, #-11, mul vl]");
3767 COMPARE(prfb(PLDL2KEEP, p6, SVEMemOperand(x30, x29)),
3768 "prfb pldl2keep, p6, [x30, x29]");
3769 COMPARE(prfb(PLDL2STRM, p6, SVEMemOperand(x7, z12.VnD())),
3770 "prfb pldl2strm, p6, [x7, z12.d]");
3771 COMPARE(prfb(PLDL2STRM, p6, SVEMemOperand(x7, z12.VnS(), UXTW)),
3772 "prfb pldl2strm, p6, [x7, z12.s, uxtw]");
3773 COMPARE(prfd(PLDL3KEEP, p5, SVEMemOperand(z11.VnD(), 9)),
3774 "prfd pldl3keep, p5, [z11.d, #9]");
3775 COMPARE(prfd(PLDL3STRM, p3, SVEMemOperand(x0, 0, SVE_MUL_VL)),
3776 "prfd pldl3strm, p3, [x0]");
3777 COMPARE(prfd(PSTL1KEEP, p7, SVEMemOperand(x5, x5, LSL, 3)),
3778 "prfd pstl1keep, p7, [x5, x5, lsl #3]");
3779 COMPARE(prfd(PSTL1STRM, p1, SVEMemOperand(x19, z18.VnS(), SXTW, 3)),
3780 "prfd pstl1strm, p1, [x19, z18.s, sxtw #3]");
3781 COMPARE(prfh(PSTL2KEEP, p6, SVEMemOperand(z0.VnS(), 31)),
3782 "prfh pstl2keep, p6, [z0.s, #31]");
3783 COMPARE(prfh(PSTL2STRM, p4, SVEMemOperand(x17, -3, SVE_MUL_VL)),
3784 "prfh pstl2strm, p4, [x17, #-3, mul vl]");
3785 COMPARE(prfh(PSTL3KEEP, p3, SVEMemOperand(x0, x0, LSL, 1)),
3786 "prfh pstl3keep, p3, [x0, x0, lsl #1]");
3787 COMPARE(prfh(PSTL3STRM, p4, SVEMemOperand(x20, z0.VnD(), LSL, 1)),
3788 "prfh pstl3strm, p4, [x20, z0.d, lsl #1]");
3789 COMPARE(prfw(PLDL1KEEP, p3, SVEMemOperand(z23.VnD(), 5)),
3790 "prfw pldl1keep, p3, [z23.d, #5]");
3791 COMPARE(prfw(PLDL1STRM, p1, SVEMemOperand(x4, 31, SVE_MUL_VL)),
3792 "prfw pldl1strm, p1, [x4, #31, mul vl]");
3793 COMPARE(prfw(PLDL2KEEP, p2, SVEMemOperand(x22, x22, LSL, 2)),
3794 "prfw pldl2keep, p2, [x22, x22, lsl #2]");
3795 COMPARE(prfw(PLDL2STRM, p1, SVEMemOperand(x2, z6.VnS(), SXTW, 2)),
3796 "prfw pldl2strm, p1, [x2, z6.s, sxtw #2]");
3797
3798 CLEANUP();
3799 }
3800
TEST(sve_mem_64bit_ff_gather_scalar_plus_vector)3801 TEST(sve_mem_64bit_ff_gather_scalar_plus_vector) {
3802 SETUP();
3803
3804 // 64-bit unscaled offset.
3805 COMPARE(ldff1b(z18.VnD(), p6.Zeroing(), SVEMemOperand(x27, z24.VnD())),
3806 "ldff1b {z18.d}, p6/z, [x27, z24.d]");
3807 COMPARE(ldff1h(z28.VnD(), p6.Zeroing(), SVEMemOperand(x1, z30.VnD())),
3808 "ldff1h {z28.d}, p6/z, [x1, z30.d]");
3809 COMPARE(ldff1w(z12.VnD(), p3.Zeroing(), SVEMemOperand(x25, z27.VnD())),
3810 "ldff1w {z12.d}, p3/z, [x25, z27.d]");
3811 COMPARE(ldff1d(z23.VnD(), p5.Zeroing(), SVEMemOperand(x29, z31.VnD())),
3812 "ldff1d {z23.d}, p5/z, [x29, z31.d]");
3813 COMPARE(ldff1sb(z15.VnD(), p5.Zeroing(), SVEMemOperand(x5, z14.VnD())),
3814 "ldff1sb {z15.d}, p5/z, [x5, z14.d]");
3815 COMPARE(ldff1sh(z18.VnD(), p4.Zeroing(), SVEMemOperand(x25, z25.VnD())),
3816 "ldff1sh {z18.d}, p4/z, [x25, z25.d]");
3817 COMPARE(ldff1sw(z12.VnD(), p3.Zeroing(), SVEMemOperand(x25, z27.VnD())),
3818 "ldff1sw {z12.d}, p3/z, [x25, z27.d]");
3819
3820 // 64-bit scaled offset.
3821 COMPARE(ldff1h(z25.VnD(),
3822 p3.Zeroing(),
3823 SVEMemOperand(x17, z15.VnD(), LSL, 1)),
3824 "ldff1h {z25.d}, p3/z, [x17, z15.d, lsl #1]");
3825 COMPARE(ldff1w(z5.VnD(), p4.Zeroing(), SVEMemOperand(x23, z31.VnD(), LSL, 2)),
3826 "ldff1w {z5.d}, p4/z, [x23, z31.d, lsl #2]");
3827 COMPARE(ldff1d(z2.VnD(), p0.Zeroing(), SVEMemOperand(sp, z7.VnD(), LSL, 3)),
3828 "ldff1d {z2.d}, p0/z, [sp, z7.d, lsl #3]");
3829 COMPARE(ldff1sh(z10.VnD(),
3830 p0.Zeroing(),
3831 SVEMemOperand(x19, z15.VnD(), LSL, 1)),
3832 "ldff1sh {z10.d}, p0/z, [x19, z15.d, lsl #1]");
3833 COMPARE(ldff1sw(z5.VnD(),
3834 p4.Zeroing(),
3835 SVEMemOperand(x23, z31.VnD(), LSL, 2)),
3836 "ldff1sw {z5.d}, p4/z, [x23, z31.d, lsl #2]");
3837
3838 // 32-bit unpacked unscaled offset
3839 COMPARE(ldff1b(z18.VnD(), p6.Zeroing(), SVEMemOperand(sp, z24.VnD(), UXTW)),
3840 "ldff1b {z18.d}, p6/z, [sp, z24.d, uxtw]");
3841 COMPARE(ldff1h(z20.VnD(), p5.Zeroing(), SVEMemOperand(x7, z14.VnD(), SXTW)),
3842 "ldff1h {z20.d}, p5/z, [x7, z14.d, sxtw]");
3843 COMPARE(ldff1w(z22.VnD(), p4.Zeroing(), SVEMemOperand(x17, z4.VnD(), UXTW)),
3844 "ldff1w {z22.d}, p4/z, [x17, z4.d, uxtw]");
3845 COMPARE(ldff1d(z24.VnD(), p3.Zeroing(), SVEMemOperand(x3, z24.VnD(), SXTW)),
3846 "ldff1d {z24.d}, p3/z, [x3, z24.d, sxtw]");
3847 COMPARE(ldff1sb(z26.VnD(), p2.Zeroing(), SVEMemOperand(x13, z14.VnD(), UXTW)),
3848 "ldff1sb {z26.d}, p2/z, [x13, z14.d, uxtw]");
3849 COMPARE(ldff1sh(z28.VnD(), p1.Zeroing(), SVEMemOperand(x23, z4.VnD(), SXTW)),
3850 "ldff1sh {z28.d}, p1/z, [x23, z4.d, sxtw]");
3851 COMPARE(ldff1sw(z30.VnD(), p0.Zeroing(), SVEMemOperand(x8, z24.VnD(), UXTW)),
3852 "ldff1sw {z30.d}, p0/z, [x8, z24.d, uxtw]");
3853
3854 // 32-bit unpacked scaled offset
3855 COMPARE(ldff1h(z4.VnD(), p5.Zeroing(), SVEMemOperand(x7, z1.VnD(), SXTW, 1)),
3856 "ldff1h {z4.d}, p5/z, [x7, z1.d, sxtw #1]");
3857 COMPARE(ldff1w(z5.VnD(),
3858 p4.Zeroing(),
3859 SVEMemOperand(x17, z11.VnD(), UXTW, 2)),
3860 "ldff1w {z5.d}, p4/z, [x17, z11.d, uxtw #2]");
3861 COMPARE(ldff1d(z6.VnD(), p3.Zeroing(), SVEMemOperand(x3, z31.VnD(), SXTW, 3)),
3862 "ldff1d {z6.d}, p3/z, [x3, z31.d, sxtw #3]");
3863 COMPARE(ldff1sh(z7.VnD(),
3864 p1.Zeroing(),
3865 SVEMemOperand(x23, z7.VnD(), UXTW, 1)),
3866 "ldff1sh {z7.d}, p1/z, [x23, z7.d, uxtw #1]");
3867 COMPARE(ldff1sw(z8.VnD(),
3868 p0.Zeroing(),
3869 SVEMemOperand(x8, z17.VnD(), SXTW, 2)),
3870 "ldff1sw {z8.d}, p0/z, [x8, z17.d, sxtw #2]");
3871
3872 CLEANUP();
3873 }
3874
TEST(sve_ld2_scalar_plus_immediate)3875 TEST(sve_ld2_scalar_plus_immediate) {
3876 SETUP();
3877
3878 COMPARE(ld2b(z31.VnB(), z0.VnB(), p6.Zeroing(), SVEMemOperand(x19)),
3879 "ld2b {z31.b, z0.b}, p6/z, [x19]");
3880 COMPARE(ld2b(z31.VnB(),
3881 z0.VnB(),
3882 p6.Zeroing(),
3883 SVEMemOperand(x19, 14, SVE_MUL_VL)),
3884 "ld2b {z31.b, z0.b}, p6/z, [x19, #14, mul vl]");
3885 COMPARE(ld2b(z15.VnB(),
3886 z16.VnB(),
3887 p6.Zeroing(),
3888 SVEMemOperand(x19, -16, SVE_MUL_VL)),
3889 "ld2b {z15.b, z16.b}, p6/z, [x19, #-16, mul vl]");
3890
3891 COMPARE(ld2h(z15.VnH(), z16.VnH(), p6.Zeroing(), SVEMemOperand(x19)),
3892 "ld2h {z15.h, z16.h}, p6/z, [x19]");
3893 COMPARE(ld2h(z15.VnH(),
3894 z16.VnH(),
3895 p0.Zeroing(),
3896 SVEMemOperand(x19, 14, SVE_MUL_VL)),
3897 "ld2h {z15.h, z16.h}, p0/z, [x19, #14, mul vl]");
3898 COMPARE(ld2h(z15.VnH(),
3899 z16.VnH(),
3900 p0.Zeroing(),
3901 SVEMemOperand(x19, -16, SVE_MUL_VL)),
3902 "ld2h {z15.h, z16.h}, p0/z, [x19, #-16, mul vl]");
3903
3904 COMPARE(ld2w(z0.VnS(), z1.VnS(), p0.Zeroing(), SVEMemOperand(x19)),
3905 "ld2w {z0.s, z1.s}, p0/z, [x19]");
3906 COMPARE(ld2w(z0.VnS(),
3907 z1.VnS(),
3908 p0.Zeroing(),
3909 SVEMemOperand(x19, 14, SVE_MUL_VL)),
3910 "ld2w {z0.s, z1.s}, p0/z, [x19, #14, mul vl]");
3911 COMPARE(ld2w(z0.VnS(),
3912 z1.VnS(),
3913 p7.Zeroing(),
3914 SVEMemOperand(x19, -16, SVE_MUL_VL)),
3915 "ld2w {z0.s, z1.s}, p7/z, [x19, #-16, mul vl]");
3916
3917 COMPARE(ld2d(z0.VnD(), z1.VnD(), p7.Zeroing(), SVEMemOperand(x19)),
3918 "ld2d {z0.d, z1.d}, p7/z, [x19]");
3919 COMPARE(ld2d(z31.VnD(),
3920 z0.VnD(),
3921 p7.Zeroing(),
3922 SVEMemOperand(x19, 14, SVE_MUL_VL)),
3923 "ld2d {z31.d, z0.d}, p7/z, [x19, #14, mul vl]");
3924 COMPARE(ld2d(z31.VnD(),
3925 z0.VnD(),
3926 p7.Zeroing(),
3927 SVEMemOperand(x19, -16, SVE_MUL_VL)),
3928 "ld2d {z31.d, z0.d}, p7/z, [x19, #-16, mul vl]");
3929
3930 CLEANUP();
3931 }
3932
TEST(sve_ld3_scalar_plus_immediate)3933 TEST(sve_ld3_scalar_plus_immediate) {
3934 SETUP();
3935
3936 COMPARE(ld3b(z30.VnB(),
3937 z31.VnB(),
3938 z0.VnB(),
3939 p7.Zeroing(),
3940 SVEMemOperand(x19)),
3941 "ld3b {z30.b, z31.b, z0.b}, p7/z, [x19]");
3942 COMPARE(ld3b(z30.VnB(),
3943 z31.VnB(),
3944 z0.VnB(),
3945 p6.Zeroing(),
3946 SVEMemOperand(x19, 21, SVE_MUL_VL)),
3947 "ld3b {z30.b, z31.b, z0.b}, p6/z, [x19, #21, mul vl]");
3948 COMPARE(ld3b(z30.VnB(),
3949 z31.VnB(),
3950 z0.VnB(),
3951 p6.Zeroing(),
3952 SVEMemOperand(x19, -24, SVE_MUL_VL)),
3953 "ld3b {z30.b, z31.b, z0.b}, p6/z, [x19, #-24, mul vl]");
3954
3955 COMPARE(ld3h(z15.VnH(),
3956 z16.VnH(),
3957 z17.VnH(),
3958 p6.Zeroing(),
3959 SVEMemOperand(x19)),
3960 "ld3h {z15.h, z16.h, z17.h}, p6/z, [x19]");
3961 COMPARE(ld3h(z15.VnH(),
3962 z16.VnH(),
3963 z17.VnH(),
3964 p6.Zeroing(),
3965 SVEMemOperand(x19, 21, SVE_MUL_VL)),
3966 "ld3h {z15.h, z16.h, z17.h}, p6/z, [x19, #21, mul vl]");
3967 COMPARE(ld3h(z15.VnH(),
3968 z16.VnH(),
3969 z17.VnH(),
3970 p0.Zeroing(),
3971 SVEMemOperand(x19, -24, SVE_MUL_VL)),
3972 "ld3h {z15.h, z16.h, z17.h}, p0/z, [x19, #-24, mul vl]");
3973
3974 COMPARE(ld3w(z15.VnS(),
3975 z16.VnS(),
3976 z17.VnS(),
3977 p0.Zeroing(),
3978 SVEMemOperand(x19)),
3979 "ld3w {z15.s, z16.s, z17.s}, p0/z, [x19]");
3980 COMPARE(ld3w(z0.VnS(),
3981 z1.VnS(),
3982 z2.VnS(),
3983 p0.Zeroing(),
3984 SVEMemOperand(x19, 21, SVE_MUL_VL)),
3985 "ld3w {z0.s, z1.s, z2.s}, p0/z, [x19, #21, mul vl]");
3986 COMPARE(ld3w(z0.VnS(),
3987 z1.VnS(),
3988 z2.VnS(),
3989 p0.Zeroing(),
3990 SVEMemOperand(x19, -24, SVE_MUL_VL)),
3991 "ld3w {z0.s, z1.s, z2.s}, p0/z, [x19, #-24, mul vl]");
3992
3993 COMPARE(ld3d(z0.VnD(), z1.VnD(), z2.VnD(), p7.Zeroing(), SVEMemOperand(x19)),
3994 "ld3d {z0.d, z1.d, z2.d}, p7/z, [x19]");
3995 COMPARE(ld3d(z0.VnD(),
3996 z1.VnD(),
3997 z2.VnD(),
3998 p7.Zeroing(),
3999 SVEMemOperand(x19, 21, SVE_MUL_VL)),
4000 "ld3d {z0.d, z1.d, z2.d}, p7/z, [x19, #21, mul vl]");
4001 COMPARE(ld3d(z30.VnD(),
4002 z31.VnD(),
4003 z0.VnD(),
4004 p7.Zeroing(),
4005 SVEMemOperand(x19, -24, SVE_MUL_VL)),
4006 "ld3d {z30.d, z31.d, z0.d}, p7/z, [x19, #-24, mul vl]");
4007
4008 CLEANUP();
4009 }
4010
TEST(sve_ld4_scalar_plus_immediate)4011 TEST(sve_ld4_scalar_plus_immediate) {
4012 SETUP();
4013
4014 COMPARE(ld4b(z31.VnB(),
4015 z0.VnB(),
4016 z1.VnB(),
4017 z2.VnB(),
4018 p7.Zeroing(),
4019 SVEMemOperand(x19)),
4020 "ld4b {z31.b, z0.b, z1.b, z2.b}, p7/z, [x19]");
4021 COMPARE(ld4b(z31.VnB(),
4022 z0.VnB(),
4023 z1.VnB(),
4024 z2.VnB(),
4025 p7.Zeroing(),
4026 SVEMemOperand(x19, 28, SVE_MUL_VL)),
4027 "ld4b {z31.b, z0.b, z1.b, z2.b}, p7/z, [x19, #28, mul vl]");
4028 COMPARE(ld4b(z31.VnB(),
4029 z0.VnB(),
4030 z1.VnB(),
4031 z2.VnB(),
4032 p6.Zeroing(),
4033 SVEMemOperand(x19, -32, SVE_MUL_VL)),
4034 "ld4b {z31.b, z0.b, z1.b, z2.b}, p6/z, [x19, #-32, mul vl]");
4035
4036 COMPARE(ld4h(z31.VnH(),
4037 z0.VnH(),
4038 z1.VnH(),
4039 z2.VnH(),
4040 p6.Zeroing(),
4041 SVEMemOperand(x19)),
4042 "ld4h {z31.h, z0.h, z1.h, z2.h}, p6/z, [x19]");
4043 COMPARE(ld4h(z15.VnH(),
4044 z16.VnH(),
4045 z17.VnH(),
4046 z18.VnH(),
4047 p6.Zeroing(),
4048 SVEMemOperand(x19, 28, SVE_MUL_VL)),
4049 "ld4h {z15.h, z16.h, z17.h, z18.h}, p6/z, "
4050 "[x19, #28, mul vl]");
4051 COMPARE(ld4h(z15.VnH(),
4052 z16.VnH(),
4053 z17.VnH(),
4054 z18.VnH(),
4055 p6.Zeroing(),
4056 SVEMemOperand(x19, -32, SVE_MUL_VL)),
4057 "ld4h {z15.h, z16.h, z17.h, z18.h}, p6/z, "
4058 "[x19, #-32, mul vl]");
4059
4060 COMPARE(ld4w(z15.VnS(),
4061 z16.VnS(),
4062 z17.VnS(),
4063 z18.VnS(),
4064 p0.Zeroing(),
4065 SVEMemOperand(x19)),
4066 "ld4w {z15.s, z16.s, z17.s, z18.s}, p0/z, [x19]");
4067 COMPARE(ld4w(z15.VnS(),
4068 z16.VnS(),
4069 z17.VnS(),
4070 z18.VnS(),
4071 p0.Zeroing(),
4072 SVEMemOperand(x19, 28, SVE_MUL_VL)),
4073 "ld4w {z15.s, z16.s, z17.s, z18.s}, p0/z, "
4074 "[x19, #28, mul vl]");
4075 COMPARE(ld4w(z0.VnS(),
4076 z1.VnS(),
4077 z2.VnS(),
4078 z3.VnS(),
4079 p0.Zeroing(),
4080 SVEMemOperand(x19, -32, SVE_MUL_VL)),
4081 "ld4w {z0.s, z1.s, z2.s, z3.s}, p0/z, [x19, #-32, mul vl]");
4082
4083 COMPARE(ld4d(z0.VnD(),
4084 z1.VnD(),
4085 z2.VnD(),
4086 z3.VnD(),
4087 p0.Zeroing(),
4088 SVEMemOperand(x19)),
4089 "ld4d {z0.d, z1.d, z2.d, z3.d}, p0/z, [x19]");
4090 COMPARE(ld4d(z0.VnD(),
4091 z1.VnD(),
4092 z2.VnD(),
4093 z3.VnD(),
4094 p7.Zeroing(),
4095 SVEMemOperand(x19, 28, SVE_MUL_VL)),
4096 "ld4d {z0.d, z1.d, z2.d, z3.d}, p7/z, [x19, #28, mul vl]");
4097 COMPARE(ld4d(z0.VnD(),
4098 z1.VnD(),
4099 z2.VnD(),
4100 z3.VnD(),
4101 p7.Zeroing(),
4102 SVEMemOperand(x19, -32, SVE_MUL_VL)),
4103 "ld4d {z0.d, z1.d, z2.d, z3.d}, p7/z, [x19, #-32, mul vl]");
4104
4105 CLEANUP();
4106 }
4107
TEST(sve_ld2_scalar_plus_scalar)4108 TEST(sve_ld2_scalar_plus_scalar) {
4109 SETUP();
4110
4111 COMPARE(ld2b(z25.VnB(), z26.VnB(), p1.Zeroing(), SVEMemOperand(x20, x19)),
4112 "ld2b {z25.b, z26.b}, p1/z, [x20, x19]");
4113 COMPARE(ld2b(z25.VnB(), z26.VnB(), p1.Zeroing(), SVEMemOperand(sp, x19)),
4114 "ld2b {z25.b, z26.b}, p1/z, [sp, x19]");
4115 COMPARE(ld2b(z31.VnB(), z0.VnB(), p1.Zeroing(), SVEMemOperand(sp, x19)),
4116 "ld2b {z31.b, z0.b}, p1/z, [sp, x19]");
4117
4118 COMPARE(ld2h(z31.VnH(),
4119 z0.VnH(),
4120 p1.Zeroing(),
4121 SVEMemOperand(x20, x19, LSL, 1)),
4122 "ld2h {z31.h, z0.h}, p1/z, [x20, x19, lsl #1]");
4123 COMPARE(ld2h(z31.VnH(),
4124 z0.VnH(),
4125 p7.Zeroing(),
4126 SVEMemOperand(sp, x19, LSL, 1)),
4127 "ld2h {z31.h, z0.h}, p7/z, [sp, x19, lsl #1]");
4128 COMPARE(ld2h(z31.VnH(),
4129 z0.VnH(),
4130 p7.Zeroing(),
4131 SVEMemOperand(sp, x19, LSL, 1)),
4132 "ld2h {z31.h, z0.h}, p7/z, [sp, x19, lsl #1]");
4133
4134 COMPARE(ld2w(z16.VnS(),
4135 z17.VnS(),
4136 p7.Zeroing(),
4137 SVEMemOperand(x20, x19, LSL, 2)),
4138 "ld2w {z16.s, z17.s}, p7/z, [x20, x19, lsl #2]");
4139 COMPARE(ld2w(z16.VnS(),
4140 z17.VnS(),
4141 p7.Zeroing(),
4142 SVEMemOperand(sp, x19, LSL, 2)),
4143 "ld2w {z16.s, z17.s}, p7/z, [sp, x19, lsl #2]");
4144 COMPARE(ld2w(z16.VnS(),
4145 z17.VnS(),
4146 p0.Zeroing(),
4147 SVEMemOperand(sp, x19, LSL, 2)),
4148 "ld2w {z16.s, z17.s}, p0/z, [sp, x19, lsl #2]");
4149
4150 COMPARE(ld2d(z16.VnD(),
4151 z17.VnD(),
4152 p0.Zeroing(),
4153 SVEMemOperand(x20, x19, LSL, 3)),
4154 "ld2d {z16.d, z17.d}, p0/z, [x20, x19, lsl #3]");
4155 COMPARE(ld2d(z25.VnD(),
4156 z26.VnD(),
4157 p0.Zeroing(),
4158 SVEMemOperand(sp, x19, LSL, 3)),
4159 "ld2d {z25.d, z26.d}, p0/z, [sp, x19, lsl #3]");
4160 COMPARE(ld2d(z25.VnD(),
4161 z26.VnD(),
4162 p0.Zeroing(),
4163 SVEMemOperand(sp, x19, LSL, 3)),
4164 "ld2d {z25.d, z26.d}, p0/z, [sp, x19, lsl #3]");
4165
4166 CLEANUP();
4167 }
4168
TEST(sve_ld3_scalar_plus_scalar)4169 TEST(sve_ld3_scalar_plus_scalar) {
4170 SETUP();
4171
4172 COMPARE(ld3b(z25.VnB(),
4173 z26.VnB(),
4174 z27.VnB(),
4175 p1.Zeroing(),
4176 SVEMemOperand(x20, x19)),
4177 "ld3b {z25.b, z26.b, z27.b}, p1/z, [x20, x19]");
4178 COMPARE(ld3b(z25.VnB(),
4179 z26.VnB(),
4180 z27.VnB(),
4181 p1.Zeroing(),
4182 SVEMemOperand(sp, x19)),
4183 "ld3b {z25.b, z26.b, z27.b}, p1/z, [sp, x19]");
4184 COMPARE(ld3b(z30.VnB(),
4185 z31.VnB(),
4186 z0.VnB(),
4187 p1.Zeroing(),
4188 SVEMemOperand(sp, x19)),
4189 "ld3b {z30.b, z31.b, z0.b}, p1/z, [sp, x19]");
4190
4191 COMPARE(ld3h(z30.VnH(),
4192 z31.VnH(),
4193 z0.VnH(),
4194 p1.Zeroing(),
4195 SVEMemOperand(x20, x19, LSL, 1)),
4196 "ld3h {z30.h, z31.h, z0.h}, p1/z, [x20, x19, lsl #1]");
4197 COMPARE(ld3h(z30.VnH(),
4198 z31.VnH(),
4199 z0.VnH(),
4200 p7.Zeroing(),
4201 SVEMemOperand(sp, x19, LSL, 1)),
4202 "ld3h {z30.h, z31.h, z0.h}, p7/z, [sp, x19, lsl #1]");
4203 COMPARE(ld3h(z30.VnH(),
4204 z31.VnH(),
4205 z0.VnH(),
4206 p7.Zeroing(),
4207 SVEMemOperand(sp, x19, LSL, 1)),
4208 "ld3h {z30.h, z31.h, z0.h}, p7/z, [sp, x19, lsl #1]");
4209
4210 COMPARE(ld3w(z16.VnS(),
4211 z17.VnS(),
4212 z18.VnS(),
4213 p7.Zeroing(),
4214 SVEMemOperand(x20, x19, LSL, 2)),
4215 "ld3w {z16.s, z17.s, z18.s}, p7/z, [x20, x19, lsl #2]");
4216 COMPARE(ld3w(z16.VnS(),
4217 z17.VnS(),
4218 z18.VnS(),
4219 p7.Zeroing(),
4220 SVEMemOperand(sp, x19, LSL, 2)),
4221 "ld3w {z16.s, z17.s, z18.s}, p7/z, [sp, x19, lsl #2]");
4222 COMPARE(ld3w(z16.VnS(),
4223 z17.VnS(),
4224 z18.VnS(),
4225 p0.Zeroing(),
4226 SVEMemOperand(sp, x19, LSL, 2)),
4227 "ld3w {z16.s, z17.s, z18.s}, p0/z, [sp, x19, lsl #2]");
4228
4229 COMPARE(ld3d(z16.VnD(),
4230 z17.VnD(),
4231 z18.VnD(),
4232 p0.Zeroing(),
4233 SVEMemOperand(x20, x19, LSL, 3)),
4234 "ld3d {z16.d, z17.d, z18.d}, p0/z, [x20, x19, lsl #3]");
4235 COMPARE(ld3d(z25.VnD(),
4236 z26.VnD(),
4237 z27.VnD(),
4238 p0.Zeroing(),
4239 SVEMemOperand(sp, x19, LSL, 3)),
4240 "ld3d {z25.d, z26.d, z27.d}, p0/z, [sp, x19, lsl #3]");
4241 COMPARE(ld3d(z25.VnD(),
4242 z26.VnD(),
4243 z27.VnD(),
4244 p0.Zeroing(),
4245 SVEMemOperand(sp, x19, LSL, 3)),
4246 "ld3d {z25.d, z26.d, z27.d}, p0/z, [sp, x19, lsl #3]");
4247
4248 CLEANUP();
4249 }
4250
TEST(sve_ld4_scalar_plus_scalar)4251 TEST(sve_ld4_scalar_plus_scalar) {
4252 SETUP();
4253
4254 COMPARE(ld4b(z25.VnB(),
4255 z26.VnB(),
4256 z27.VnB(),
4257 z28.VnB(),
4258 p0.Zeroing(),
4259 SVEMemOperand(x20, x19)),
4260 "ld4b {z25.b, z26.b, z27.b, z28.b}, p0/z, [x20, x19]");
4261 COMPARE(ld4b(z25.VnB(),
4262 z26.VnB(),
4263 z27.VnB(),
4264 z28.VnB(),
4265 p1.Zeroing(),
4266 SVEMemOperand(sp, x19)),
4267 "ld4b {z25.b, z26.b, z27.b, z28.b}, p1/z, [sp, x19]");
4268 COMPARE(ld4b(z25.VnB(),
4269 z26.VnB(),
4270 z27.VnB(),
4271 z28.VnB(),
4272 p1.Zeroing(),
4273 SVEMemOperand(sp, x19)),
4274 "ld4b {z25.b, z26.b, z27.b, z28.b}, p1/z, [sp, x19]");
4275
4276 COMPARE(ld4h(z31.VnH(),
4277 z0.VnH(),
4278 z1.VnH(),
4279 z2.VnH(),
4280 p1.Zeroing(),
4281 SVEMemOperand(x20, x19, LSL, 1)),
4282 "ld4h {z31.h, z0.h, z1.h, z2.h}, p1/z, [x20, x19, lsl #1]");
4283 COMPARE(ld4h(z31.VnH(),
4284 z0.VnH(),
4285 z1.VnH(),
4286 z2.VnH(),
4287 p1.Zeroing(),
4288 SVEMemOperand(sp, x19, LSL, 1)),
4289 "ld4h {z31.h, z0.h, z1.h, z2.h}, p1/z, [sp, x19, lsl #1]");
4290 COMPARE(ld4h(z31.VnH(),
4291 z0.VnH(),
4292 z1.VnH(),
4293 z2.VnH(),
4294 p7.Zeroing(),
4295 SVEMemOperand(sp, x19, LSL, 1)),
4296 "ld4h {z31.h, z0.h, z1.h, z2.h}, p7/z, [sp, x19, lsl #1]");
4297
4298 COMPARE(ld4w(z31.VnS(),
4299 z0.VnS(),
4300 z1.VnS(),
4301 z2.VnS(),
4302 p7.Zeroing(),
4303 SVEMemOperand(x20, x19, LSL, 2)),
4304 "ld4w {z31.s, z0.s, z1.s, z2.s}, p7/z, [x20, x19, lsl #2]");
4305 COMPARE(ld4w(z16.VnS(),
4306 z17.VnS(),
4307 z18.VnS(),
4308 z19.VnS(),
4309 p7.Zeroing(),
4310 SVEMemOperand(sp, x19, LSL, 2)),
4311 "ld4w {z16.s, z17.s, z18.s, z19.s}, p7/z, "
4312 "[sp, x19, lsl #2]");
4313 COMPARE(ld4w(z16.VnS(),
4314 z17.VnS(),
4315 z18.VnS(),
4316 z19.VnS(),
4317 p7.Zeroing(),
4318 SVEMemOperand(sp, x19, LSL, 2)),
4319 "ld4w {z16.s, z17.s, z18.s, z19.s}, p7/z, "
4320 "[sp, x19, lsl #2]");
4321
4322 COMPARE(ld4d(z16.VnD(),
4323 z17.VnD(),
4324 z18.VnD(),
4325 z19.VnD(),
4326 p0.Zeroing(),
4327 SVEMemOperand(x20, x19, LSL, 3)),
4328 "ld4d {z16.d, z17.d, z18.d, z19.d}, p0/z, "
4329 "[x20, x19, lsl #3]");
4330 COMPARE(ld4d(z16.VnD(),
4331 z17.VnD(),
4332 z18.VnD(),
4333 z19.VnD(),
4334 p0.Zeroing(),
4335 SVEMemOperand(sp, x19, LSL, 3)),
4336 "ld4d {z16.d, z17.d, z18.d, z19.d}, p0/z, "
4337 "[sp, x19, lsl #3]");
4338 COMPARE(ld4d(z25.VnD(),
4339 z26.VnD(),
4340 z27.VnD(),
4341 z28.VnD(),
4342 p0.Zeroing(),
4343 SVEMemOperand(sp, x19, LSL, 3)),
4344 "ld4d {z25.d, z26.d, z27.d, z28.d}, p0/z, "
4345 "[sp, x19, lsl #3]");
4346
4347 CLEANUP();
4348 }
4349
TEST(sve_ff_contiguous)4350 TEST(sve_ff_contiguous) {
4351 SETUP();
4352
4353 COMPARE(ldff1b(z24.VnB(), p1.Zeroing(), SVEMemOperand(x21)),
4354 "ldff1b {z24.b}, p1/z, [x21]");
4355 COMPARE(ldff1b(z22.VnH(), p5.Zeroing(), SVEMemOperand(x5, x28)),
4356 "ldff1b {z22.h}, p5/z, [x5, x28]");
4357 COMPARE(ldff1b(z2.VnS(), p5.Zeroing(), SVEMemOperand(sp, x11)),
4358 "ldff1b {z2.s}, p5/z, [sp, x11]");
4359 COMPARE(ldff1b(z12.VnD(), p3.Zeroing(), SVEMemOperand(x26, xzr)),
4360 "ldff1b {z12.d}, p3/z, [x26]");
4361 COMPARE(ldff1h(z21.VnH(), p3.Zeroing(), SVEMemOperand(x27)),
4362 "ldff1h {z21.h}, p3/z, [x27]");
4363 COMPARE(ldff1h(z11.VnS(), p6.Zeroing(), SVEMemOperand(sp, x15, LSL, 1)),
4364 "ldff1h {z11.s}, p6/z, [sp, x15, lsl #1]");
4365 COMPARE(ldff1h(z6.VnD(), p7.Zeroing(), SVEMemOperand(x8, xzr, LSL, 1)),
4366 "ldff1h {z6.d}, p7/z, [x8]");
4367 COMPARE(ldff1w(z11.VnS(), p7.Zeroing(), SVEMemOperand(sp)),
4368 "ldff1w {z11.s}, p7/z, [sp]");
4369 COMPARE(ldff1w(z6.VnD(), p6.Zeroing(), SVEMemOperand(x5, x0, LSL, 2)),
4370 "ldff1w {z6.d}, p6/z, [x5, x0, lsl #2]");
4371 COMPARE(ldff1d(z0.VnD(), p3.Zeroing(), SVEMemOperand(x15, x1, LSL, 3)),
4372 "ldff1d {z0.d}, p3/z, [x15, x1, lsl #3]");
4373
4374 COMPARE(ldff1sb(z31.VnH(), p4.Zeroing(), SVEMemOperand(x10, x25)),
4375 "ldff1sb {z31.h}, p4/z, [x10, x25]");
4376 COMPARE(ldff1sb(z25.VnS(), p7.Zeroing(), SVEMemOperand(sp, x20)),
4377 "ldff1sb {z25.s}, p7/z, [sp, x20]");
4378 COMPARE(ldff1sb(z20.VnD(), p3.Zeroing(), SVEMemOperand(x19, xzr)),
4379 "ldff1sb {z20.d}, p3/z, [x19]");
4380 COMPARE(ldff1sh(z18.VnS(), p3.Zeroing(), SVEMemOperand(sp, x0, LSL, 1)),
4381 "ldff1sh {z18.s}, p3/z, [sp, x0, lsl #1]");
4382 COMPARE(ldff1sh(z30.VnD(), p1.Zeroing(), SVEMemOperand(x28, xzr, LSL, 1)),
4383 "ldff1sh {z30.d}, p1/z, [x28]");
4384 COMPARE(ldff1sw(z3.VnD(), p4.Zeroing(), SVEMemOperand(x22, x18, LSL, 2)),
4385 "ldff1sw {z3.d}, p4/z, [x22, x18, lsl #2]");
4386
4387 CLEANUP();
4388 }
4389
TEST(sve_mem_contiguous_load)4390 TEST(sve_mem_contiguous_load) {
4391 SETUP();
4392
4393 COMPARE(ld1rqb(z3.VnB(), p2.Zeroing(), SVEMemOperand(x22, x18)),
4394 "ld1rqb {z3.b}, p2/z, [x22, x18]");
4395 COMPARE(ld1rqd(z6.VnD(), p0.Zeroing(), SVEMemOperand(x18, x9, LSL, 3)),
4396 "ld1rqd {z6.d}, p0/z, [x18, x9, lsl #3]");
4397 COMPARE(ld1rqh(z1.VnH(), p7.Zeroing(), SVEMemOperand(x9, x6, LSL, 1)),
4398 "ld1rqh {z1.h}, p7/z, [x9, x6, lsl #1]");
4399 COMPARE(ld1rqw(z12.VnS(), p4.Zeroing(), SVEMemOperand(sp, xzr, LSL, 2)),
4400 "ld1rqw {z12.s}, p4/z, [sp, xzr, lsl #2]");
4401 COMPARE(ld1rqb(z18.VnB(), p2.Zeroing(), SVEMemOperand(x18, 0)),
4402 "ld1rqb {z18.b}, p2/z, [x18]");
4403 COMPARE(ld1rqb(z18.VnB(), p2.Zeroing(), SVEMemOperand(x18, 16)),
4404 "ld1rqb {z18.b}, p2/z, [x18, #16]");
4405 COMPARE(ld1rqd(z11.VnD(), p1.Zeroing(), SVEMemOperand(x23, -16)),
4406 "ld1rqd {z11.d}, p1/z, [x23, #-16]");
4407 COMPARE(ld1rqh(z11.VnH(), p1.Zeroing(), SVEMemOperand(x0, 112)),
4408 "ld1rqh {z11.h}, p1/z, [x0, #112]");
4409 COMPARE(ld1rqw(z22.VnS(), p3.Zeroing(), SVEMemOperand(sp, -128)),
4410 "ld1rqw {z22.s}, p3/z, [sp, #-128]");
4411
4412 COMPARE_MACRO(Ld1rqb(z0.VnB(), p0.Zeroing(), SVEMemOperand(x0, x1)),
4413 "ld1rqb {z0.b}, p0/z, [x0, x1]");
4414 COMPARE_MACRO(Ld1rqh(z0.VnH(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 1)),
4415 "ld1rqh {z0.h}, p0/z, [x0, x1, lsl #1]");
4416 COMPARE_MACRO(Ld1rqw(z0.VnS(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 2)),
4417 "ld1rqw {z0.s}, p0/z, [x0, x1, lsl #2]");
4418 COMPARE_MACRO(Ld1rqd(z0.VnD(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 3)),
4419 "ld1rqd {z0.d}, p0/z, [x0, x1, lsl #3]");
4420 COMPARE_MACRO(Ld1rqh(z11.VnH(), p1.Zeroing(), SVEMemOperand(x0, 112)),
4421 "ld1rqh {z11.h}, p1/z, [x0, #112]");
4422 COMPARE_MACRO(Ld1rqw(z22.VnS(), p3.Zeroing(), SVEMemOperand(sp, -128)),
4423 "ld1rqw {z22.s}, p3/z, [sp, #-128]");
4424
4425 COMPARE_MACRO(Ld1rqb(z0.VnB(), p0.Zeroing(), SVEMemOperand(x0, 2222)),
4426 "add x16, x0, #0x8ae (2222)\n"
4427 "ld1rqb {z0.b}, p0/z, [x16]");
4428 COMPARE_MACRO(Ld1rqw(z0.VnS(), p0.Zeroing(), SVEMemOperand(x0, x1)),
4429 "add x16, x0, x1\n"
4430 "ld1rqw {z0.s}, p0/z, [x16]");
4431 COMPARE_MACRO(Ld1rqd(z0.VnD(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 1)),
4432 "add x16, x0, x1, lsl #1\n"
4433 "ld1rqd {z0.d}, p0/z, [x16]");
4434
4435 COMPARE(ldnt1b(z21.VnB(), p5.Zeroing(), SVEMemOperand(x1, x23)),
4436 "ldnt1b {z21.b}, p5/z, [x1, x23]");
4437 COMPARE(ldnt1d(z10.VnD(), p0.Zeroing(), SVEMemOperand(x23, x6, LSL, 3)),
4438 "ldnt1d {z10.d}, p0/z, [x23, x6, lsl #3]");
4439 COMPARE(ldnt1h(z30.VnH(), p4.Zeroing(), SVEMemOperand(x6, x11, LSL, 1)),
4440 "ldnt1h {z30.h}, p4/z, [x6, x11, lsl #1]");
4441 COMPARE(ldnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(x11, x1, LSL, 2)),
4442 "ldnt1w {z0.s}, p4/z, [x11, x1, lsl #2]");
4443 COMPARE(ldnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(sp, xzr, LSL, 2)),
4444 "ldnt1w {z0.s}, p4/z, [sp, xzr, lsl #2]");
4445
4446 COMPARE_MACRO(Ldnt1b(z0.VnB(), p0.Zeroing(), SVEMemOperand(x0, x1)),
4447 "ldnt1b {z0.b}, p0/z, [x0, x1]");
4448 COMPARE_MACRO(Ldnt1h(z0.VnH(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 1)),
4449 "ldnt1h {z0.h}, p0/z, [x0, x1, lsl #1]");
4450 COMPARE_MACRO(Ldnt1w(z0.VnS(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 2)),
4451 "ldnt1w {z0.s}, p0/z, [x0, x1, lsl #2]");
4452 COMPARE_MACRO(Ldnt1d(z0.VnD(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 3)),
4453 "ldnt1d {z0.d}, p0/z, [x0, x1, lsl #3]");
4454
4455 COMPARE_MACRO(Ldnt1w(z0.VnS(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 3)),
4456 "add x16, x0, x1, lsl #3\n"
4457 "ldnt1w {z0.s}, p0/z, [x16]");
4458 COMPARE_MACRO(Ldnt1d(z0.VnD(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 4)),
4459 "add x16, x0, x1, lsl #4\n"
4460 "ldnt1d {z0.d}, p0/z, [x16]");
4461
4462 COMPARE(ldnt1b(z1.VnB(), p3.Zeroing(), SVEMemOperand(x11)),
4463 "ldnt1b {z1.b}, p3/z, [x11]");
4464 COMPARE(ldnt1b(z2.VnB(), p2.Zeroing(), SVEMemOperand(x12, -8, SVE_MUL_VL)),
4465 "ldnt1b {z2.b}, p2/z, [x12, #-8, mul vl]");
4466 COMPARE(ldnt1d(z2.VnD(), p7.Zeroing(), SVEMemOperand(x13, -2, SVE_MUL_VL)),
4467 "ldnt1d {z2.d}, p7/z, [x13, #-2, mul vl]");
4468 COMPARE(ldnt1h(z26.VnH(), p4.Zeroing(), SVEMemOperand(x16, 3, SVE_MUL_VL)),
4469 "ldnt1h {z26.h}, p4/z, [x16, #3, mul vl]");
4470 COMPARE(ldnt1w(z17.VnS(), p4.Zeroing(), SVEMemOperand(x15, 7, SVE_MUL_VL)),
4471 "ldnt1w {z17.s}, p4/z, [x15, #7, mul vl]");
4472 COMPARE(ldnt1w(z17.VnS(), p4.Zeroing(), SVEMemOperand(sp, 7, SVE_MUL_VL)),
4473 "ldnt1w {z17.s}, p4/z, [sp, #7, mul vl]");
4474
4475 COMPARE_MACRO(Ldnt1b(z2.VnB(),
4476 p0.Zeroing(),
4477 SVEMemOperand(x10, 42, SVE_MUL_VL)),
4478 "mov x16, #0x2a\n"
4479 "rdvl x17, #1\n"
4480 "madd x16, x16, x17, x10\n"
4481 "ldnt1b {z2.b}, p0/z, [x16]");
4482 COMPARE_MACRO(Ldnt1h(z3.VnH(),
4483 p1.Zeroing(),
4484 SVEMemOperand(x11, 31, SVE_MUL_VL)),
4485 "addvl x16, x11, #31\n"
4486 "ldnt1h {z3.h}, p1/z, [x16]");
4487 COMPARE_MACRO(Ldnt1w(z4.VnS(),
4488 p2.Zeroing(),
4489 SVEMemOperand(x12, -35, SVE_MUL_VL)),
4490 "mov x16, #0xffffffffffffffdd\n"
4491 "rdvl x17, #1\n"
4492 "madd x16, x16, x17, x12\n"
4493 "ldnt1w {z4.s}, p2/z, [x16]");
4494 COMPARE_MACRO(Ldnt1d(z5.VnD(), p3.Zeroing(), SVEMemOperand(x13, 3)),
4495 "add x16, x13, #0x3 (3)\n"
4496 "ldnt1d {z5.d}, p3/z, [x16]");
4497
4498 COMPARE(ldnf1b(z1.VnH(), p0.Zeroing(), SVEMemOperand(x25, -8, SVE_MUL_VL)),
4499 "ldnf1b {z1.h}, p0/z, [x25, #-8, mul vl]");
4500 COMPARE(ldnf1b(z0.VnS(), p0.Zeroing(), SVEMemOperand(x2, 7, SVE_MUL_VL)),
4501 "ldnf1b {z0.s}, p0/z, [x2, #7, mul vl]");
4502 COMPARE(ldnf1b(z31.VnD(), p6.Zeroing(), SVEMemOperand(x0, -7, SVE_MUL_VL)),
4503 "ldnf1b {z31.d}, p6/z, [x0, #-7, mul vl]");
4504 COMPARE(ldnf1b(z25.VnB(), p1.Zeroing(), SVEMemOperand(x5, 6, SVE_MUL_VL)),
4505 "ldnf1b {z25.b}, p1/z, [x5, #6, mul vl]");
4506 COMPARE(ldnf1d(z25.VnD(), p0.Zeroing(), SVEMemOperand(x11, -6, SVE_MUL_VL)),
4507 "ldnf1d {z25.d}, p0/z, [x11, #-6, mul vl]");
4508 COMPARE(ldnf1h(z22.VnH(), p4.Zeroing(), SVEMemOperand(x7, 5, SVE_MUL_VL)),
4509 "ldnf1h {z22.h}, p4/z, [x7, #5, mul vl]");
4510 COMPARE(ldnf1h(z7.VnS(), p2.Zeroing(), SVEMemOperand(x1, -5, SVE_MUL_VL)),
4511 "ldnf1h {z7.s}, p2/z, [x1, #-5, mul vl]");
4512 COMPARE(ldnf1h(z5.VnD(), p3.Zeroing(), SVEMemOperand(x29, 4, SVE_MUL_VL)),
4513 "ldnf1h {z5.d}, p3/z, [x29, #4, mul vl]");
4514 COMPARE(ldnf1sb(z12.VnH(), p5.Zeroing(), SVEMemOperand(x27, -4, SVE_MUL_VL)),
4515 "ldnf1sb {z12.h}, p5/z, [x27, #-4, mul vl]");
4516 COMPARE(ldnf1sb(z10.VnS(), p2.Zeroing(), SVEMemOperand(x13, 3, SVE_MUL_VL)),
4517 "ldnf1sb {z10.s}, p2/z, [x13, #3, mul vl]");
4518 COMPARE(ldnf1sb(z25.VnD(), p6.Zeroing(), SVEMemOperand(x26, -3, SVE_MUL_VL)),
4519 "ldnf1sb {z25.d}, p6/z, [x26, #-3, mul vl]");
4520 COMPARE(ldnf1sh(z3.VnS(), p5.Zeroing(), SVEMemOperand(x1, 2, SVE_MUL_VL)),
4521 "ldnf1sh {z3.s}, p5/z, [x1, #2, mul vl]");
4522 COMPARE(ldnf1sh(z8.VnD(), p6.Zeroing(), SVEMemOperand(x13, -2, SVE_MUL_VL)),
4523 "ldnf1sh {z8.d}, p6/z, [x13, #-2, mul vl]");
4524 COMPARE(ldnf1sw(z5.VnD(), p6.Zeroing(), SVEMemOperand(x2, 1, SVE_MUL_VL)),
4525 "ldnf1sw {z5.d}, p6/z, [x2, #1, mul vl]");
4526 COMPARE(ldnf1w(z11.VnS(), p3.Zeroing(), SVEMemOperand(sp, -1, SVE_MUL_VL)),
4527 "ldnf1w {z11.s}, p3/z, [sp, #-1, mul vl]");
4528 COMPARE(ldnf1w(z10.VnD(), p6.Zeroing(), SVEMemOperand(x12)),
4529 "ldnf1w {z10.d}, p6/z, [x12]");
4530
4531 CLEANUP();
4532 }
4533
TEST(sve_mem_contiguous_store)4534 TEST(sve_mem_contiguous_store) {
4535 SETUP();
4536
4537 COMPARE(stnt1b(z21.VnB(), p5.Zeroing(), SVEMemOperand(x1, x23)),
4538 "stnt1b {z21.b}, p5, [x1, x23]");
4539 COMPARE(stnt1d(z10.VnD(), p0.Zeroing(), SVEMemOperand(x23, x6, LSL, 3)),
4540 "stnt1d {z10.d}, p0, [x23, x6, lsl #3]");
4541 COMPARE(stnt1h(z30.VnH(), p4.Zeroing(), SVEMemOperand(x6, x11, LSL, 1)),
4542 "stnt1h {z30.h}, p4, [x6, x11, lsl #1]");
4543 COMPARE(stnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(x11, x1, LSL, 2)),
4544 "stnt1w {z0.s}, p4, [x11, x1, lsl #2]");
4545 COMPARE(stnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(sp, xzr, LSL, 2)),
4546 "stnt1w {z0.s}, p4, [sp, xzr, lsl #2]");
4547
4548 COMPARE(stnt1b(z1.VnB(), p3.Zeroing(), SVEMemOperand(x11)),
4549 "stnt1b {z1.b}, p3, [x11]");
4550 COMPARE(stnt1b(z2.VnB(), p2.Zeroing(), SVEMemOperand(x12, -8, SVE_MUL_VL)),
4551 "stnt1b {z2.b}, p2, [x12, #-8, mul vl]");
4552 COMPARE(stnt1d(z2.VnD(), p7.Zeroing(), SVEMemOperand(x13, -2, SVE_MUL_VL)),
4553 "stnt1d {z2.d}, p7, [x13, #-2, mul vl]");
4554 COMPARE(stnt1h(z26.VnH(), p4.Zeroing(), SVEMemOperand(x16, 3, SVE_MUL_VL)),
4555 "stnt1h {z26.h}, p4, [x16, #3, mul vl]");
4556 COMPARE(stnt1w(z17.VnS(), p4.Zeroing(), SVEMemOperand(x15, 7, SVE_MUL_VL)),
4557 "stnt1w {z17.s}, p4, [x15, #7, mul vl]");
4558 COMPARE(stnt1w(z17.VnS(), p4.Zeroing(), SVEMemOperand(sp, 7, SVE_MUL_VL)),
4559 "stnt1w {z17.s}, p4, [sp, #7, mul vl]");
4560
4561 COMPARE_MACRO(Stnt1b(z2.VnB(),
4562 p0.Zeroing(),
4563 SVEMemOperand(x10, 42, SVE_MUL_VL)),
4564 "mov x16, #0x2a\n"
4565 "rdvl x17, #1\n"
4566 "madd x16, x16, x17, x10\n"
4567 "stnt1b {z2.b}, p0, [x16]");
4568 COMPARE_MACRO(Stnt1h(z3.VnH(),
4569 p1.Zeroing(),
4570 SVEMemOperand(x11, 31, SVE_MUL_VL)),
4571 "addvl x16, x11, #31\n"
4572 "stnt1h {z3.h}, p1, [x16]");
4573 COMPARE_MACRO(Stnt1w(z4.VnS(),
4574 p2.Zeroing(),
4575 SVEMemOperand(x12, -35, SVE_MUL_VL)),
4576 "mov x16, #0xffffffffffffffdd\n"
4577 "rdvl x17, #1\n"
4578 "madd x16, x16, x17, x12\n"
4579 "stnt1w {z4.s}, p2, [x16]");
4580
4581 COMPARE_MACRO(Stnt1b(z0.VnB(), p0.Zeroing(), SVEMemOperand(x0, x1)),
4582 "stnt1b {z0.b}, p0, [x0, x1]");
4583 COMPARE_MACRO(Stnt1h(z0.VnH(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 1)),
4584 "stnt1h {z0.h}, p0, [x0, x1, lsl #1]");
4585 COMPARE_MACRO(Stnt1w(z0.VnS(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 2)),
4586 "stnt1w {z0.s}, p0, [x0, x1, lsl #2]");
4587 COMPARE_MACRO(Stnt1d(z0.VnD(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 3)),
4588 "stnt1d {z0.d}, p0, [x0, x1, lsl #3]");
4589
4590 CLEANUP();
4591 }
4592
TEST(sve_load_broadcast_octo)4593 TEST(sve_load_broadcast_octo) {
4594 SETUP();
4595
4596 COMPARE_MACRO(Ld1rob(z3.VnB(), p1.Zeroing(), SVEMemOperand(x0, x1)),
4597 "ld1rob {z3.b}, p1/z, [x0, x1]");
4598 COMPARE_MACRO(Ld1roh(z6.VnH(), p4.Zeroing(), SVEMemOperand(sp, x31, LSL, 1)),
4599 "ld1roh {z6.h}, p4/z, [sp]");
4600 COMPARE_MACRO(Ld1roh(z6.VnH(), p4.Zeroing(), SVEMemOperand(sp, x30, LSL, 1)),
4601 "ld1roh {z6.h}, p4/z, [sp, x30, lsl #1]");
4602 COMPARE_MACRO(Ld1row(z2.VnS(), p6.Zeroing(), SVEMemOperand(x30, x30, LSL, 2)),
4603 "ld1row {z2.s}, p6/z, [x30, x30, lsl #2]");
4604 COMPARE_MACRO(Ld1rod(z30.VnD(), p7.Zeroing(), SVEMemOperand(x21, x5, LSL, 3)),
4605 "ld1rod {z30.d}, p7/z, [x21, x5, lsl #3]");
4606 COMPARE_MACRO(Ld1rob(z9.VnB(), p0.Zeroing(), SVEMemOperand(sp, 32)),
4607 "ld1rob {z9.b}, p0/z, [sp, #32]");
4608 COMPARE_MACRO(Ld1roh(z19.VnH(), p3.Zeroing(), SVEMemOperand(x4)),
4609 "ld1roh {z19.h}, p3/z, [x4]");
4610 COMPARE_MACRO(Ld1row(z21.VnS(), p3.Zeroing(), SVEMemOperand(x11, 224)),
4611 "ld1row {z21.s}, p3/z, [x11, #224]");
4612 COMPARE_MACRO(Ld1rod(z0.VnD(), p2.Zeroing(), SVEMemOperand(x16, -256)),
4613 "ld1rod {z0.d}, p2/z, [x16, #-256]");
4614
4615 CLEANUP();
4616 }
4617
TEST(sve_ldr_str_simple)4618 TEST(sve_ldr_str_simple) {
4619 SETUP();
4620
4621 COMPARE(str(p14, SVEMemOperand(x0)), "str p14, [x0]");
4622 COMPARE(str(z14, SVEMemOperand(sp)), "str z14, [sp]");
4623 COMPARE(ldr(p4, SVEMemOperand(x0)), "ldr p4, [x0]");
4624 COMPARE(ldr(z4, SVEMemOperand(sp)), "ldr z4, [sp]");
4625 COMPARE(str(p15, SVEMemOperand(sp, -256, SVE_MUL_VL)),
4626 "str p15, [sp, #-256, mul vl]");
4627 COMPARE(str(z16, SVEMemOperand(x13, 255, SVE_MUL_VL)),
4628 "str z16, [x13, #255, mul vl]");
4629 COMPARE(ldr(p5, SVEMemOperand(sp, -42, SVE_MUL_VL)),
4630 "ldr p5, [sp, #-42, mul vl]");
4631 COMPARE(ldr(z6, SVEMemOperand(x28, 42, SVE_MUL_VL)),
4632 "ldr z6, [x28, #42, mul vl]");
4633
4634 COMPARE_MACRO(Str(p14, SVEMemOperand(x0)), "str p14, [x0]");
4635 COMPARE_MACRO(Str(z14, SVEMemOperand(sp)), "str z14, [sp]");
4636 COMPARE_MACRO(Ldr(p4, SVEMemOperand(x0)), "ldr p4, [x0]");
4637 COMPARE_MACRO(Ldr(z4, SVEMemOperand(sp)), "ldr z4, [sp]");
4638 COMPARE_MACRO(Str(p15, SVEMemOperand(sp, -256, SVE_MUL_VL)),
4639 "str p15, [sp, #-256, mul vl]");
4640 COMPARE_MACRO(Str(z16, SVEMemOperand(x13, 255, SVE_MUL_VL)),
4641 "str z16, [x13, #255, mul vl]");
4642 COMPARE_MACRO(Ldr(p5, SVEMemOperand(sp, -42, SVE_MUL_VL)),
4643 "ldr p5, [sp, #-42, mul vl]");
4644 COMPARE_MACRO(Ldr(z6, SVEMemOperand(x28, 42, SVE_MUL_VL)),
4645 "ldr z6, [x28, #42, mul vl]");
4646
4647 COMPARE_MACRO(Ldr(z6, SVEMemOperand(x28, 42, SVE_MUL_VL)),
4648 "ldr z6, [x28, #42, mul vl]");
4649
4650 // IsEquivalentToScalar
4651 COMPARE_MACRO(Str(p0, SVEMemOperand(x0, xzr)), "str p0, [x0]");
4652 COMPARE_MACRO(Ldr(p1, SVEMemOperand(sp, xzr)), "ldr p1, [sp]");
4653 COMPARE_MACRO(Str(z2, SVEMemOperand(x12, xzr)), "str z2, [x12]");
4654 COMPARE_MACRO(Ldr(z3, SVEMemOperand(x7, xzr)), "ldr z3, [x7]");
4655
4656 // Other cases fall back on Adr. We test Adr separately, so here we just test
4657 // sequences that stress scratch register allocation.
4658 COMPARE_MACRO(Str(p4, SVEMemOperand(x5, 4242, SVE_MUL_VL)),
4659 "mov x16, #0x1092\n"
4660 "rdvl x17, #1\n"
4661 "mul x16, x16, x17\n"
4662 "add x16, x5, x16, asr #3\n"
4663 "str p4, [x16]");
4664 COMPARE_MACRO(Ldr(p6, SVEMemOperand(sp, 4242, SVE_MUL_VL)),
4665 "mov x16, #0x1092\n"
4666 "rdvl x17, #1\n"
4667 "mul x16, x16, x17\n"
4668 "asr x16, x16, #3\n"
4669 "add x16, sp, x16\n"
4670 "ldr p6, [x16]");
4671 COMPARE_MACRO(Str(z7, SVEMemOperand(sp, 4242, SVE_MUL_VL)),
4672 "mov x16, #0x1092\n"
4673 "rdvl x17, #1\n"
4674 "mul x16, x16, x17\n"
4675 "add x16, sp, x16\n"
4676 "str z7, [x16]");
4677 COMPARE_MACRO(Ldr(z8, SVEMemOperand(x9, 4242, SVE_MUL_VL)),
4678 "mov x16, #0x1092\n"
4679 "rdvl x17, #1\n"
4680 "madd x16, x16, x17, x9\n"
4681 "ldr z8, [x16]");
4682
4683 CLEANUP();
4684 }
4685
TEST(sve_ld1_st1)4686 TEST(sve_ld1_st1) {
4687 SETUP();
4688
4689 COMPARE(st1b(z11.VnB(), p0, SVEMemOperand(x22)), "st1b {z11.b}, p0, [x22]");
4690 COMPARE(st1b(z15.VnH(), p1, SVEMemOperand(x15, 7, SVE_MUL_VL)),
4691 "st1b {z15.h}, p1, [x15, #7, mul vl]");
4692 COMPARE(st1b(z19.VnS(), p2, SVEMemOperand(sp, -8, SVE_MUL_VL)),
4693 "st1b {z19.s}, p2, [sp, #-8, mul vl]");
4694 COMPARE(st1b(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)),
4695 "st1b {z23.d}, p3, [x1]");
4696 COMPARE(st1b(z2.VnB(), p4, SVEMemOperand(x1, x2)),
4697 "st1b {z2.b}, p4, [x1, x2]");
4698 COMPARE(st1b(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 0)),
4699 "st1b {z31.d}, p7, [x9, x9]");
4700 COMPARE(st1b(z3.VnS(), p0, SVEMemOperand(z14.VnS(), 30)),
4701 "st1b {z3.s}, p0, [z14.s, #30]");
4702 COMPARE(st1b(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 31)),
4703 "st1b {z14.d}, p4, [z3.d, #31]");
4704 COMPARE(st1b(z15.VnD(), p5, SVEMemOperand(x0, z5.VnD())),
4705 "st1b {z15.d}, p5, [x0, z5.d]");
4706 COMPARE(st1b(z15.VnS(), p5, SVEMemOperand(sp, z2.VnS(), UXTW)),
4707 "st1b {z15.s}, p5, [sp, z2.s, uxtw]");
4708 COMPARE(st1b(z15.VnD(), p5, SVEMemOperand(x0, z25.VnD(), SXTW)),
4709 "st1b {z15.d}, p5, [x0, z25.d, sxtw]");
4710
4711 COMPARE(st1h(z15.VnH(), p1, SVEMemOperand(x15, 7, SVE_MUL_VL)),
4712 "st1h {z15.h}, p1, [x15, #7, mul vl]");
4713 COMPARE(st1h(z19.VnS(), p2, SVEMemOperand(sp, -8, SVE_MUL_VL)),
4714 "st1h {z19.s}, p2, [sp, #-8, mul vl]");
4715 COMPARE(st1h(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)),
4716 "st1h {z23.d}, p3, [x1]");
4717 COMPARE(st1h(z2.VnH(), p4, SVEMemOperand(x1, x2, LSL, 1)),
4718 "st1h {z2.h}, p4, [x1, x2, lsl #1]");
4719 COMPARE(st1h(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 1)),
4720 "st1h {z31.d}, p7, [x9, x9, lsl #1]");
4721 COMPARE(st1h(z3.VnS(), p0, SVEMemOperand(z14.VnS(), 30)),
4722 "st1h {z3.s}, p0, [z14.s, #30]");
4723 COMPARE(st1h(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 62)),
4724 "st1h {z14.d}, p4, [z3.d, #62]");
4725 COMPARE(st1h(z15.VnD(), p6, SVEMemOperand(sp, z6.VnD())),
4726 "st1h {z15.d}, p6, [sp, z6.d]");
4727 COMPARE(st1h(z15.VnD(), p6, SVEMemOperand(sp, z6.VnD(), LSL, 1)),
4728 "st1h {z15.d}, p6, [sp, z6.d, lsl #1]");
4729 COMPARE(st1h(z15.VnS(), p3, SVEMemOperand(x25, z3.VnS(), SXTW)),
4730 "st1h {z15.s}, p3, [x25, z3.s, sxtw]");
4731 COMPARE(st1h(z15.VnS(), p6, SVEMemOperand(x7, z15.VnS(), SXTW, 1)),
4732 "st1h {z15.s}, p6, [x7, z15.s, sxtw #1]");
4733 COMPARE(st1h(z17.VnD(), p3, SVEMemOperand(sp, z26.VnD(), SXTW)),
4734 "st1h {z17.d}, p3, [sp, z26.d, sxtw]");
4735 COMPARE(st1h(z15.VnD(), p6, SVEMemOperand(x13, z9.VnD(), UXTW, 1)),
4736 "st1h {z15.d}, p6, [x13, z9.d, uxtw #1]");
4737
4738 COMPARE(st1w(z19.VnS(), p2, SVEMemOperand(sp, -8, SVE_MUL_VL)),
4739 "st1w {z19.s}, p2, [sp, #-8, mul vl]");
4740 COMPARE(st1w(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)),
4741 "st1w {z23.d}, p3, [x1]");
4742 COMPARE(st1w(z2.VnS(), p4, SVEMemOperand(x1, x2, LSL, 2)),
4743 "st1w {z2.s}, p4, [x1, x2, lsl #2]");
4744 COMPARE(st1w(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 2)),
4745 "st1w {z31.d}, p7, [x9, x9, lsl #2]");
4746 COMPARE(st1w(z3.VnS(), p0, SVEMemOperand(z14.VnS(), 32)),
4747 "st1w {z3.s}, p0, [z14.s, #32]");
4748 COMPARE(st1w(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 124)),
4749 "st1w {z14.d}, p4, [z3.d, #124]");
4750 COMPARE(st1w(z17.VnD(), p2, SVEMemOperand(x30, z5.VnD())),
4751 "st1w {z17.d}, p2, [x30, z5.d]");
4752 COMPARE(st1w(z17.VnD(), p2, SVEMemOperand(x30, z5.VnD(), LSL, 2)),
4753 "st1w {z17.d}, p2, [x30, z5.d, lsl #2]");
4754 COMPARE(st1w(z15.VnS(), p7, SVEMemOperand(x26, z4.VnS(), UXTW)),
4755 "st1w {z15.s}, p7, [x26, z4.s, uxtw]");
4756 COMPARE(st1w(z15.VnS(), p4, SVEMemOperand(x8, z16.VnS(), UXTW, 2)),
4757 "st1w {z15.s}, p4, [x8, z16.s, uxtw #2]");
4758 COMPARE(st1w(z19.VnD(), p7, SVEMemOperand(x1, z27.VnD(), UXTW)),
4759 "st1w {z19.d}, p7, [x1, z27.d, uxtw]");
4760 COMPARE(st1w(z15.VnD(), p4, SVEMemOperand(sp, z10.VnD(), SXTW, 2)),
4761 "st1w {z15.d}, p4, [sp, z10.d, sxtw #2]");
4762
4763 COMPARE(st1d(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)),
4764 "st1d {z23.d}, p3, [x1]");
4765 COMPARE(st1d(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 3)),
4766 "st1d {z31.d}, p7, [x9, x9, lsl #3]");
4767 COMPARE(st1d(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 32)),
4768 "st1d {z14.d}, p4, [z3.d, #32]");
4769 COMPARE(st1d(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 248)),
4770 "st1d {z14.d}, p4, [z3.d, #248]");
4771 COMPARE(st1d(z19.VnD(), p2, SVEMemOperand(x29, z22.VnD())),
4772 "st1d {z19.d}, p2, [x29, z22.d]");
4773 COMPARE(st1d(z19.VnD(), p2, SVEMemOperand(x29, z22.VnD(), LSL, 3)),
4774 "st1d {z19.d}, p2, [x29, z22.d, lsl #3]");
4775 COMPARE(st1d(z21.VnD(), p1, SVEMemOperand(x2, z28.VnD(), SXTW)),
4776 "st1d {z21.d}, p1, [x2, z28.d, sxtw]");
4777 COMPARE(st1d(z15.VnD(), p2, SVEMemOperand(x14, z11.VnD(), UXTW, 3)),
4778 "st1d {z15.d}, p2, [x14, z11.d, uxtw #3]");
4779
4780 COMPARE(ld1b(z11.VnB(), p0.Zeroing(), SVEMemOperand(x22)),
4781 "ld1b {z11.b}, p0/z, [x22]");
4782 COMPARE(ld1b(z15.VnH(), p1.Zeroing(), SVEMemOperand(x15, 7, SVE_MUL_VL)),
4783 "ld1b {z15.h}, p1/z, [x15, #7, mul vl]");
4784 COMPARE(ld1b(z19.VnS(), p2.Zeroing(), SVEMemOperand(sp, -8, SVE_MUL_VL)),
4785 "ld1b {z19.s}, p2/z, [sp, #-8, mul vl]");
4786 COMPARE(ld1b(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)),
4787 "ld1b {z23.d}, p3/z, [x1]");
4788 COMPARE(ld1b(z2.VnB(), p4.Zeroing(), SVEMemOperand(x1, x2)),
4789 "ld1b {z2.b}, p4/z, [x1, x2]");
4790 COMPARE(ld1b(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 0)),
4791 "ld1b {z31.d}, p7/z, [x9, x9]");
4792
4793 COMPARE(ld1h(z15.VnH(), p1.Zeroing(), SVEMemOperand(x15, 7, SVE_MUL_VL)),
4794 "ld1h {z15.h}, p1/z, [x15, #7, mul vl]");
4795 COMPARE(ld1h(z19.VnS(), p2.Zeroing(), SVEMemOperand(sp, -8, SVE_MUL_VL)),
4796 "ld1h {z19.s}, p2/z, [sp, #-8, mul vl]");
4797 COMPARE(ld1h(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)),
4798 "ld1h {z23.d}, p3/z, [x1]");
4799 COMPARE(ld1h(z2.VnH(), p4.Zeroing(), SVEMemOperand(x1, x2, LSL, 1)),
4800 "ld1h {z2.h}, p4/z, [x1, x2, lsl #1]");
4801 COMPARE(ld1h(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 1)),
4802 "ld1h {z31.d}, p7/z, [x9, x9, lsl #1]");
4803
4804 COMPARE(ld1w(z19.VnS(), p2.Zeroing(), SVEMemOperand(sp, -8, SVE_MUL_VL)),
4805 "ld1w {z19.s}, p2/z, [sp, #-8, mul vl]");
4806 COMPARE(ld1w(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)),
4807 "ld1w {z23.d}, p3/z, [x1]");
4808 COMPARE(ld1w(z2.VnS(), p4.Zeroing(), SVEMemOperand(x1, x2, LSL, 2)),
4809 "ld1w {z2.s}, p4/z, [x1, x2, lsl #2]");
4810 COMPARE(ld1w(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 2)),
4811 "ld1w {z31.d}, p7/z, [x9, x9, lsl #2]");
4812
4813 COMPARE(ld1d(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)),
4814 "ld1d {z23.d}, p3/z, [x1]");
4815 COMPARE(ld1d(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 3)),
4816 "ld1d {z31.d}, p7/z, [x9, x9, lsl #3]");
4817
4818 COMPARE(ld1sb(z15.VnH(), p1.Zeroing(), SVEMemOperand(x15, 7, SVE_MUL_VL)),
4819 "ld1sb {z15.h}, p1/z, [x15, #7, mul vl]");
4820 COMPARE(ld1sb(z19.VnS(), p2.Zeroing(), SVEMemOperand(sp, -8, SVE_MUL_VL)),
4821 "ld1sb {z19.s}, p2/z, [sp, #-8, mul vl]");
4822 COMPARE(ld1d(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)),
4823 "ld1d {z23.d}, p3/z, [x1]");
4824 COMPARE(ld1sb(z5.VnH(), p1.Zeroing(), SVEMemOperand(x15, x1, LSL, 0)),
4825 "ld1sb {z5.h}, p1/z, [x15, x1]");
4826 COMPARE(ld1sb(z9.VnS(), p2.Zeroing(), SVEMemOperand(x29, x3, LSL, 0)),
4827 "ld1sb {z9.s}, p2/z, [x29, x3]");
4828 COMPARE(ld1sb(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 0)),
4829 "ld1sb {z31.d}, p7/z, [x9, x9]");
4830
4831 COMPARE(ld1sh(z19.VnS(), p2.Zeroing(), SVEMemOperand(sp, -8, SVE_MUL_VL)),
4832 "ld1sh {z19.s}, p2/z, [sp, #-8, mul vl]");
4833 COMPARE(ld1sh(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)),
4834 "ld1sh {z23.d}, p3/z, [x1]");
4835 COMPARE(ld1sh(z11.VnS(), p4.Zeroing(), SVEMemOperand(x22, x10, LSL, 1)),
4836 "ld1sh {z11.s}, p4/z, [x22, x10, lsl #1]");
4837 COMPARE(ld1sh(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 1)),
4838 "ld1sh {z31.d}, p7/z, [x9, x9, lsl #1]");
4839
4840 COMPARE(ld1sw(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)),
4841 "ld1sw {z23.d}, p3/z, [x1]");
4842 COMPARE(ld1sw(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 2)),
4843 "ld1sw {z31.d}, p7/z, [x9, x9, lsl #2]");
4844
4845 CLEANUP();
4846 }
4847
TEST(sve_ld1_st1_macro)4848 TEST(sve_ld1_st1_macro) {
4849 SETUP();
4850
4851 // Pass-through cases.
4852 COMPARE_MACRO(St1b(z11.VnB(), p0, SVEMemOperand(x22)),
4853 "st1b {z11.b}, p0, [x22]");
4854 COMPARE_MACRO(St1b(z15.VnH(), p1, SVEMemOperand(x15, 7, SVE_MUL_VL)),
4855 "st1b {z15.h}, p1, [x15, #7, mul vl]");
4856 COMPARE_MACRO(St1b(z19.VnS(), p2, SVEMemOperand(sp, -8, SVE_MUL_VL)),
4857 "st1b {z19.s}, p2, [sp, #-8, mul vl]");
4858 COMPARE_MACRO(St1b(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)),
4859 "st1b {z23.d}, p3, [x1]");
4860 COMPARE_MACRO(St1b(z2.VnB(), p4, SVEMemOperand(x1, x2)),
4861 "st1b {z2.b}, p4, [x1, x2]");
4862 COMPARE_MACRO(St1b(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 0)),
4863 "st1b {z31.d}, p7, [x9, x9]");
4864 COMPARE_MACRO(St1b(z3.VnS(), p6, SVEMemOperand(z4.VnS(), 22)),
4865 "st1b {z3.s}, p6, [z4.s, #22]");
4866
4867 COMPARE_MACRO(St1h(z15.VnH(), p1, SVEMemOperand(x15, 7, SVE_MUL_VL)),
4868 "st1h {z15.h}, p1, [x15, #7, mul vl]");
4869 COMPARE_MACRO(St1h(z19.VnS(), p2, SVEMemOperand(sp, -8, SVE_MUL_VL)),
4870 "st1h {z19.s}, p2, [sp, #-8, mul vl]");
4871 COMPARE_MACRO(St1h(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)),
4872 "st1h {z23.d}, p3, [x1]");
4873 COMPARE_MACRO(St1h(z2.VnH(), p4, SVEMemOperand(x1, x2, LSL, 1)),
4874 "st1h {z2.h}, p4, [x1, x2, lsl #1]");
4875 COMPARE_MACRO(St1h(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 1)),
4876 "st1h {z31.d}, p7, [x9, x9, lsl #1]");
4877 COMPARE_MACRO(St1h(z3.VnD(), p5, SVEMemOperand(z0.VnD())),
4878 "st1h {z3.d}, p5, [z0.d]");
4879
4880 COMPARE_MACRO(St1w(z19.VnS(), p2, SVEMemOperand(sp, -8, SVE_MUL_VL)),
4881 "st1w {z19.s}, p2, [sp, #-8, mul vl]");
4882 COMPARE_MACRO(St1w(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)),
4883 "st1w {z23.d}, p3, [x1]");
4884 COMPARE_MACRO(St1w(z2.VnS(), p4, SVEMemOperand(x1, x2, LSL, 2)),
4885 "st1w {z2.s}, p4, [x1, x2, lsl #2]");
4886 COMPARE_MACRO(St1w(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 2)),
4887 "st1w {z31.d}, p7, [x9, x9, lsl #2]");
4888 COMPARE_MACRO(St1w(z12.VnS(), p2, SVEMemOperand(z13.VnS(), 124)),
4889 "st1w {z12.s}, p2, [z13.s, #124]");
4890
4891 COMPARE_MACRO(St1d(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)),
4892 "st1d {z23.d}, p3, [x1]");
4893 COMPARE_MACRO(St1d(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 3)),
4894 "st1d {z31.d}, p7, [x9, x9, lsl #3]");
4895 COMPARE_MACRO(St1d(z13.VnD(), p3, SVEMemOperand(z12.VnD(), 248)),
4896 "st1d {z13.d}, p3, [z12.d, #248]");
4897
4898 // SVEMemOperand synthesis.
4899 // Check that the MacroAssembler falls back on `CalculateSVEAddress` at the
4900 // boundary conditions. We test this helper independently.
4901 COMPARE_MACRO(St1b(z10.VnB(), p7, SVEMemOperand(x0, 8, SVE_MUL_VL)),
4902 "addvl x16, x0, #8\n"
4903 "st1b {z10.b}, p7, [x16]");
4904 COMPARE_MACRO(St1h(z11.VnS(), p5, SVEMemOperand(sp, -9, SVE_MUL_VL)),
4905 "mov x16, #0xffffffffffffffdc\n"
4906 "rdvl x17, #1\n"
4907 "mul x16, x16, x17\n"
4908 "asr x16, x16, #3\n"
4909 "add x16, sp, x16\n"
4910 "st1h {z11.s}, p5, [x16]");
4911 COMPARE_MACRO(St1w(z22.VnS(), p3, SVEMemOperand(sp, 42)),
4912 "add x16, sp, #0x2a (42)\n"
4913 "st1w {z22.s}, p3, [x16]");
4914 COMPARE_MACRO(St1d(z22.VnD(), p1, SVEMemOperand(x3, x4)),
4915 "add x16, x3, x4\n"
4916 "st1d {z22.d}, p1, [x16]");
4917 COMPARE_MACRO(St1b(z30.VnD(), p0, SVEMemOperand(x9, xzr)),
4918 "st1b {z30.d}, p0, [x9]");
4919
4920 // TODO: Fix these - they need scatter-store-to-scalar-plus-vector support.
4921 #if 0
4922 COMPARE_MACRO(St1b(z1.VnD(), p6, SVEMemOperand(z0.VnD(), 32)), "mov x16, #0x20\n" "st1b {z1.d}, p6/z, [x16, z0.d]");
4923 COMPARE_MACRO(St1h(z1.VnS(), p6, SVEMemOperand(z0.VnS(), -1)), "mov x16, #0xffffffffffffffff\n" "st1h {z1.s}, p6/z, [x16, z0.s]");
4924 #endif
4925
4926 COMPARE_MACRO(Ld1b(z11.VnB(), p0.Zeroing(), SVEMemOperand(x22)),
4927 "ld1b {z11.b}, p0/z, [x22]");
4928 COMPARE_MACRO(Ld1b(z15.VnH(),
4929 p1.Zeroing(),
4930 SVEMemOperand(x15, 7, SVE_MUL_VL)),
4931 "ld1b {z15.h}, p1/z, [x15, #7, mul vl]");
4932 COMPARE_MACRO(Ld1b(z19.VnS(),
4933 p2.Zeroing(),
4934 SVEMemOperand(sp, -8, SVE_MUL_VL)),
4935 "ld1b {z19.s}, p2/z, [sp, #-8, mul vl]");
4936 COMPARE_MACRO(Ld1b(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)),
4937 "ld1b {z23.d}, p3/z, [x1]");
4938 COMPARE_MACRO(Ld1b(z2.VnB(), p4.Zeroing(), SVEMemOperand(x1, x2)),
4939 "ld1b {z2.b}, p4/z, [x1, x2]");
4940 COMPARE_MACRO(Ld1b(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 0)),
4941 "ld1b {z31.d}, p7/z, [x9, x9]");
4942
4943 COMPARE_MACRO(Ld1h(z15.VnH(),
4944 p1.Zeroing(),
4945 SVEMemOperand(x15, 7, SVE_MUL_VL)),
4946 "ld1h {z15.h}, p1/z, [x15, #7, mul vl]");
4947 COMPARE_MACRO(Ld1h(z19.VnS(),
4948 p2.Zeroing(),
4949 SVEMemOperand(sp, -8, SVE_MUL_VL)),
4950 "ld1h {z19.s}, p2/z, [sp, #-8, mul vl]");
4951 COMPARE_MACRO(Ld1h(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)),
4952 "ld1h {z23.d}, p3/z, [x1]");
4953 COMPARE_MACRO(Ld1h(z2.VnH(), p4.Zeroing(), SVEMemOperand(x1, x2, LSL, 1)),
4954 "ld1h {z2.h}, p4/z, [x1, x2, lsl #1]");
4955 COMPARE_MACRO(Ld1h(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 1)),
4956 "ld1h {z31.d}, p7/z, [x9, x9, lsl #1]");
4957
4958 COMPARE_MACRO(Ld1w(z19.VnS(),
4959 p2.Zeroing(),
4960 SVEMemOperand(sp, -8, SVE_MUL_VL)),
4961 "ld1w {z19.s}, p2/z, [sp, #-8, mul vl]");
4962 COMPARE_MACRO(Ld1w(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)),
4963 "ld1w {z23.d}, p3/z, [x1]");
4964 COMPARE_MACRO(Ld1w(z2.VnS(), p4.Zeroing(), SVEMemOperand(x1, x2, LSL, 2)),
4965 "ld1w {z2.s}, p4/z, [x1, x2, lsl #2]");
4966 COMPARE_MACRO(Ld1w(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 2)),
4967 "ld1w {z31.d}, p7/z, [x9, x9, lsl #2]");
4968
4969 COMPARE_MACRO(Ld1d(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)),
4970 "ld1d {z23.d}, p3/z, [x1]");
4971 COMPARE_MACRO(Ld1d(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 3)),
4972 "ld1d {z31.d}, p7/z, [x9, x9, lsl #3]");
4973
4974 // SVEMemOperand synthesis.
4975 // Check that the MacroAssembler falls back on `CalculateSVEAddress` at the
4976 // boundary conditions. We test this helper independently.
4977 COMPARE_MACRO(Ld1b(z10.VnB(), p7.Zeroing(), SVEMemOperand(x0, 8, SVE_MUL_VL)),
4978 "addvl x16, x0, #8\n"
4979 "ld1b {z10.b}, p7/z, [x16]");
4980 COMPARE_MACRO(Ld1h(z11.VnS(),
4981 p5.Zeroing(),
4982 SVEMemOperand(sp, -9, SVE_MUL_VL)),
4983 "mov x16, #0xffffffffffffffdc\n"
4984 "rdvl x17, #1\n"
4985 "mul x16, x16, x17\n"
4986 "asr x16, x16, #3\n"
4987 "add x16, sp, x16\n"
4988 "ld1h {z11.s}, p5/z, [x16]");
4989 COMPARE_MACRO(Ld1w(z22.VnS(), p3.Zeroing(), SVEMemOperand(sp, 42)),
4990 "add x16, sp, #0x2a (42)\n"
4991 "ld1w {z22.s}, p3/z, [x16]");
4992 COMPARE_MACRO(Ld1d(z22.VnD(), p1.Zeroing(), SVEMemOperand(x3, x4)),
4993 "add x16, x3, x4\n"
4994 "ld1d {z22.d}, p1/z, [x16]");
4995 COMPARE_MACRO(Ld1b(z30.VnD(), p0.Zeroing(), SVEMemOperand(x9, xzr)),
4996 "ld1b {z30.d}, p0/z, [x9]");
4997 CLEANUP();
4998 }
4999
TEST(sve_st2_scalar_plus_immediate)5000 TEST(sve_st2_scalar_plus_immediate) {
5001 SETUP();
5002
5003 COMPARE(st2b(z31.VnB(), z0.VnB(), p6, SVEMemOperand(x19)),
5004 "st2b {z31.b, z0.b}, p6, [x19]");
5005 COMPARE(st2b(z31.VnB(), z0.VnB(), p6, SVEMemOperand(x19, 14, SVE_MUL_VL)),
5006 "st2b {z31.b, z0.b}, p6, [x19, #14, mul vl]");
5007 COMPARE(st2b(z15.VnB(), z16.VnB(), p6, SVEMemOperand(x19, -16, SVE_MUL_VL)),
5008 "st2b {z15.b, z16.b}, p6, [x19, #-16, mul vl]");
5009
5010 COMPARE(st2h(z15.VnH(), z16.VnH(), p6, SVEMemOperand(x19)),
5011 "st2h {z15.h, z16.h}, p6, [x19]");
5012 COMPARE(st2h(z15.VnH(), z16.VnH(), p0, SVEMemOperand(x19, 14, SVE_MUL_VL)),
5013 "st2h {z15.h, z16.h}, p0, [x19, #14, mul vl]");
5014 COMPARE(st2h(z15.VnH(), z16.VnH(), p0, SVEMemOperand(x19, -16, SVE_MUL_VL)),
5015 "st2h {z15.h, z16.h}, p0, [x19, #-16, mul vl]");
5016
5017 COMPARE(st2w(z0.VnS(), z1.VnS(), p0, SVEMemOperand(x19)),
5018 "st2w {z0.s, z1.s}, p0, [x19]");
5019 COMPARE(st2w(z0.VnS(), z1.VnS(), p0, SVEMemOperand(x19, 14, SVE_MUL_VL)),
5020 "st2w {z0.s, z1.s}, p0, [x19, #14, mul vl]");
5021 COMPARE(st2w(z0.VnS(), z1.VnS(), p7, SVEMemOperand(x19, -16, SVE_MUL_VL)),
5022 "st2w {z0.s, z1.s}, p7, [x19, #-16, mul vl]");
5023
5024 COMPARE(st2d(z0.VnD(), z1.VnD(), p7, SVEMemOperand(x19)),
5025 "st2d {z0.d, z1.d}, p7, [x19]");
5026 COMPARE(st2d(z31.VnD(), z0.VnD(), p7, SVEMemOperand(x19, 14, SVE_MUL_VL)),
5027 "st2d {z31.d, z0.d}, p7, [x19, #14, mul vl]");
5028 COMPARE(st2d(z31.VnD(), z0.VnD(), p7, SVEMemOperand(x19, -16, SVE_MUL_VL)),
5029 "st2d {z31.d, z0.d}, p7, [x19, #-16, mul vl]");
5030
5031 CLEANUP();
5032 }
5033
TEST(sve_st3_scalar_plus_immediate)5034 TEST(sve_st3_scalar_plus_immediate) {
5035 SETUP();
5036
5037 COMPARE(st3b(z30.VnB(), z31.VnB(), z0.VnB(), p7, SVEMemOperand(x19)),
5038 "st3b {z30.b, z31.b, z0.b}, p7, [x19]");
5039 COMPARE(st3b(z30.VnB(),
5040 z31.VnB(),
5041 z0.VnB(),
5042 p6,
5043 SVEMemOperand(x19, 21, SVE_MUL_VL)),
5044 "st3b {z30.b, z31.b, z0.b}, p6, [x19, #21, mul vl]");
5045 COMPARE(st3b(z30.VnB(),
5046 z31.VnB(),
5047 z0.VnB(),
5048 p6,
5049 SVEMemOperand(x19, -24, SVE_MUL_VL)),
5050 "st3b {z30.b, z31.b, z0.b}, p6, [x19, #-24, mul vl]");
5051
5052 COMPARE(st3h(z15.VnH(), z16.VnH(), z17.VnH(), p6, SVEMemOperand(x19)),
5053 "st3h {z15.h, z16.h, z17.h}, p6, [x19]");
5054 COMPARE(st3h(z15.VnH(),
5055 z16.VnH(),
5056 z17.VnH(),
5057 p6,
5058 SVEMemOperand(x19, 21, SVE_MUL_VL)),
5059 "st3h {z15.h, z16.h, z17.h}, p6, [x19, #21, mul vl]");
5060 COMPARE(st3h(z15.VnH(),
5061 z16.VnH(),
5062 z17.VnH(),
5063 p0,
5064 SVEMemOperand(x19, -24, SVE_MUL_VL)),
5065 "st3h {z15.h, z16.h, z17.h}, p0, [x19, #-24, mul vl]");
5066
5067 COMPARE(st3w(z15.VnS(), z16.VnS(), z17.VnS(), p0, SVEMemOperand(x19)),
5068 "st3w {z15.s, z16.s, z17.s}, p0, [x19]");
5069 COMPARE(st3w(z0.VnS(),
5070 z1.VnS(),
5071 z2.VnS(),
5072 p0,
5073 SVEMemOperand(x19, 21, SVE_MUL_VL)),
5074 "st3w {z0.s, z1.s, z2.s}, p0, [x19, #21, mul vl]");
5075 COMPARE(st3w(z0.VnS(),
5076 z1.VnS(),
5077 z2.VnS(),
5078 p0,
5079 SVEMemOperand(x19, -24, SVE_MUL_VL)),
5080 "st3w {z0.s, z1.s, z2.s}, p0, [x19, #-24, mul vl]");
5081
5082 COMPARE(st3d(z0.VnD(), z1.VnD(), z2.VnD(), p7, SVEMemOperand(x19)),
5083 "st3d {z0.d, z1.d, z2.d}, p7, [x19]");
5084 COMPARE(st3d(z0.VnD(),
5085 z1.VnD(),
5086 z2.VnD(),
5087 p7,
5088 SVEMemOperand(x19, 21, SVE_MUL_VL)),
5089 "st3d {z0.d, z1.d, z2.d}, p7, [x19, #21, mul vl]");
5090 COMPARE(st3d(z30.VnD(),
5091 z31.VnD(),
5092 z0.VnD(),
5093 p7,
5094 SVEMemOperand(x19, -24, SVE_MUL_VL)),
5095 "st3d {z30.d, z31.d, z0.d}, p7, [x19, #-24, mul vl]");
5096
5097 CLEANUP();
5098 }
5099
TEST(sve_st4_scalar_plus_immediate)5100 TEST(sve_st4_scalar_plus_immediate) {
5101 SETUP();
5102
5103 COMPARE(st4b(z31.VnB(), z0.VnB(), z1.VnB(), z2.VnB(), p7, SVEMemOperand(x19)),
5104 "st4b {z31.b, z0.b, z1.b, z2.b}, p7, [x19]");
5105 COMPARE(st4b(z31.VnB(),
5106 z0.VnB(),
5107 z1.VnB(),
5108 z2.VnB(),
5109 p7,
5110 SVEMemOperand(x19, 28, SVE_MUL_VL)),
5111 "st4b {z31.b, z0.b, z1.b, z2.b}, p7, [x19, #28, mul vl]");
5112 COMPARE(st4b(z31.VnB(),
5113 z0.VnB(),
5114 z1.VnB(),
5115 z2.VnB(),
5116 p6,
5117 SVEMemOperand(x19, -32, SVE_MUL_VL)),
5118 "st4b {z31.b, z0.b, z1.b, z2.b}, p6, [x19, #-32, mul vl]");
5119
5120 COMPARE(st4h(z31.VnH(), z0.VnH(), z1.VnH(), z2.VnH(), p6, SVEMemOperand(x19)),
5121 "st4h {z31.h, z0.h, z1.h, z2.h}, p6, [x19]");
5122 COMPARE(st4h(z15.VnH(),
5123 z16.VnH(),
5124 z17.VnH(),
5125 z18.VnH(),
5126 p6,
5127 SVEMemOperand(x19, 28, SVE_MUL_VL)),
5128 "st4h {z15.h, z16.h, z17.h, z18.h}, p6, [x19, #28, mul vl]");
5129 COMPARE(st4h(z15.VnH(),
5130 z16.VnH(),
5131 z17.VnH(),
5132 z18.VnH(),
5133 p6,
5134 SVEMemOperand(x19, -32, SVE_MUL_VL)),
5135 "st4h {z15.h, z16.h, z17.h, z18.h}, p6, "
5136 "[x19, #-32, mul vl]");
5137
5138 COMPARE(st4w(z15.VnS(),
5139 z16.VnS(),
5140 z17.VnS(),
5141 z18.VnS(),
5142 p0,
5143 SVEMemOperand(x19)),
5144 "st4w {z15.s, z16.s, z17.s, z18.s}, p0, [x19]");
5145 COMPARE(st4w(z15.VnS(),
5146 z16.VnS(),
5147 z17.VnS(),
5148 z18.VnS(),
5149 p0,
5150 SVEMemOperand(x19, 28, SVE_MUL_VL)),
5151 "st4w {z15.s, z16.s, z17.s, z18.s}, p0, [x19, #28, mul vl]");
5152 COMPARE(st4w(z0.VnS(),
5153 z1.VnS(),
5154 z2.VnS(),
5155 z3.VnS(),
5156 p0,
5157 SVEMemOperand(x19, -32, SVE_MUL_VL)),
5158 "st4w {z0.s, z1.s, z2.s, z3.s}, p0, [x19, #-32, mul vl]");
5159
5160 COMPARE(st4d(z0.VnD(), z1.VnD(), z2.VnD(), z3.VnD(), p0, SVEMemOperand(x19)),
5161 "st4d {z0.d, z1.d, z2.d, z3.d}, p0, [x19]");
5162 COMPARE(st4d(z0.VnD(),
5163 z1.VnD(),
5164 z2.VnD(),
5165 z3.VnD(),
5166 p7,
5167 SVEMemOperand(x19, 28, SVE_MUL_VL)),
5168 "st4d {z0.d, z1.d, z2.d, z3.d}, p7, [x19, #28, mul vl]");
5169 COMPARE(st4d(z0.VnD(),
5170 z1.VnD(),
5171 z2.VnD(),
5172 z3.VnD(),
5173 p7,
5174 SVEMemOperand(x19, -32, SVE_MUL_VL)),
5175 "st4d {z0.d, z1.d, z2.d, z3.d}, p7, [x19, #-32, mul vl]");
5176
5177 CLEANUP();
5178 }
5179
TEST(sve_st2_scalar_plus_scalar)5180 TEST(sve_st2_scalar_plus_scalar) {
5181 SETUP();
5182
5183 COMPARE(st2b(z25.VnB(), z26.VnB(), p1, SVEMemOperand(x20, x19)),
5184 "st2b {z25.b, z26.b}, p1, [x20, x19]");
5185 COMPARE(st2b(z25.VnB(), z26.VnB(), p1, SVEMemOperand(sp, x19)),
5186 "st2b {z25.b, z26.b}, p1, [sp, x19]");
5187 COMPARE(st2b(z31.VnB(), z0.VnB(), p1, SVEMemOperand(sp, x19)),
5188 "st2b {z31.b, z0.b}, p1, [sp, x19]");
5189
5190 COMPARE(st2h(z31.VnH(), z0.VnH(), p1, SVEMemOperand(x20, x19, LSL, 1)),
5191 "st2h {z31.h, z0.h}, p1, [x20, x19, lsl #1]");
5192 COMPARE(st2h(z31.VnH(), z0.VnH(), p7, SVEMemOperand(sp, x19, LSL, 1)),
5193 "st2h {z31.h, z0.h}, p7, [sp, x19, lsl #1]");
5194 COMPARE(st2h(z31.VnH(), z0.VnH(), p7, SVEMemOperand(sp, x19, LSL, 1)),
5195 "st2h {z31.h, z0.h}, p7, [sp, x19, lsl #1]");
5196
5197 COMPARE(st2w(z16.VnS(), z17.VnS(), p7, SVEMemOperand(x20, x19, LSL, 2)),
5198 "st2w {z16.s, z17.s}, p7, [x20, x19, lsl #2]");
5199 COMPARE(st2w(z16.VnS(), z17.VnS(), p7, SVEMemOperand(sp, x19, LSL, 2)),
5200 "st2w {z16.s, z17.s}, p7, [sp, x19, lsl #2]");
5201 COMPARE(st2w(z16.VnS(), z17.VnS(), p0, SVEMemOperand(sp, x19, LSL, 2)),
5202 "st2w {z16.s, z17.s}, p0, [sp, x19, lsl #2]");
5203
5204 COMPARE(st2d(z16.VnD(), z17.VnD(), p0, SVEMemOperand(x20, x19, LSL, 3)),
5205 "st2d {z16.d, z17.d}, p0, [x20, x19, lsl #3]");
5206 COMPARE(st2d(z25.VnD(), z26.VnD(), p0, SVEMemOperand(sp, x19, LSL, 3)),
5207 "st2d {z25.d, z26.d}, p0, [sp, x19, lsl #3]");
5208 COMPARE(st2d(z25.VnD(), z26.VnD(), p0, SVEMemOperand(sp, x19, LSL, 3)),
5209 "st2d {z25.d, z26.d}, p0, [sp, x19, lsl #3]");
5210
5211 CLEANUP();
5212 }
5213
TEST(sve_st3_scalar_plus_scalar)5214 TEST(sve_st3_scalar_plus_scalar) {
5215 SETUP();
5216
5217 COMPARE(st3b(z25.VnB(), z26.VnB(), z27.VnB(), p1, SVEMemOperand(x20, x19)),
5218 "st3b {z25.b, z26.b, z27.b}, p1, [x20, x19]");
5219 COMPARE(st3b(z25.VnB(), z26.VnB(), z27.VnB(), p1, SVEMemOperand(sp, x19)),
5220 "st3b {z25.b, z26.b, z27.b}, p1, [sp, x19]");
5221 COMPARE(st3b(z30.VnB(), z31.VnB(), z0.VnB(), p1, SVEMemOperand(sp, x19)),
5222 "st3b {z30.b, z31.b, z0.b}, p1, [sp, x19]");
5223
5224 COMPARE(st3h(z30.VnH(),
5225 z31.VnH(),
5226 z0.VnH(),
5227 p1,
5228 SVEMemOperand(x20, x19, LSL, 1)),
5229 "st3h {z30.h, z31.h, z0.h}, p1, [x20, x19, lsl #1]");
5230 COMPARE(st3h(z30.VnH(),
5231 z31.VnH(),
5232 z0.VnH(),
5233 p7,
5234 SVEMemOperand(sp, x19, LSL, 1)),
5235 "st3h {z30.h, z31.h, z0.h}, p7, [sp, x19, lsl #1]");
5236 COMPARE(st3h(z30.VnH(),
5237 z31.VnH(),
5238 z0.VnH(),
5239 p7,
5240 SVEMemOperand(sp, x19, LSL, 1)),
5241 "st3h {z30.h, z31.h, z0.h}, p7, [sp, x19, lsl #1]");
5242
5243 COMPARE(st3w(z16.VnS(),
5244 z17.VnS(),
5245 z18.VnS(),
5246 p7,
5247 SVEMemOperand(x20, x19, LSL, 2)),
5248 "st3w {z16.s, z17.s, z18.s}, p7, [x20, x19, lsl #2]");
5249 COMPARE(st3w(z16.VnS(),
5250 z17.VnS(),
5251 z18.VnS(),
5252 p7,
5253 SVEMemOperand(sp, x19, LSL, 2)),
5254 "st3w {z16.s, z17.s, z18.s}, p7, [sp, x19, lsl #2]");
5255 COMPARE(st3w(z16.VnS(),
5256 z17.VnS(),
5257 z18.VnS(),
5258 p0,
5259 SVEMemOperand(sp, x19, LSL, 2)),
5260 "st3w {z16.s, z17.s, z18.s}, p0, [sp, x19, lsl #2]");
5261
5262 COMPARE(st3d(z16.VnD(),
5263 z17.VnD(),
5264 z18.VnD(),
5265 p0,
5266 SVEMemOperand(x20, x19, LSL, 3)),
5267 "st3d {z16.d, z17.d, z18.d}, p0, [x20, x19, lsl #3]");
5268 COMPARE(st3d(z25.VnD(),
5269 z26.VnD(),
5270 z27.VnD(),
5271 p0,
5272 SVEMemOperand(sp, x19, LSL, 3)),
5273 "st3d {z25.d, z26.d, z27.d}, p0, [sp, x19, lsl #3]");
5274 COMPARE(st3d(z25.VnD(),
5275 z26.VnD(),
5276 z27.VnD(),
5277 p0,
5278 SVEMemOperand(sp, x19, LSL, 3)),
5279 "st3d {z25.d, z26.d, z27.d}, p0, [sp, x19, lsl #3]");
5280
5281 CLEANUP();
5282 }
5283
TEST(sve_st4_scalar_plus_scalar)5284 TEST(sve_st4_scalar_plus_scalar) {
5285 SETUP();
5286
5287 COMPARE(st4b(z25.VnB(),
5288 z26.VnB(),
5289 z27.VnB(),
5290 z28.VnB(),
5291 p0,
5292 SVEMemOperand(x20, x19)),
5293 "st4b {z25.b, z26.b, z27.b, z28.b}, p0, [x20, x19]");
5294 COMPARE(st4b(z25.VnB(),
5295 z26.VnB(),
5296 z27.VnB(),
5297 z28.VnB(),
5298 p1,
5299 SVEMemOperand(sp, x19)),
5300 "st4b {z25.b, z26.b, z27.b, z28.b}, p1, [sp, x19]");
5301 COMPARE(st4b(z25.VnB(),
5302 z26.VnB(),
5303 z27.VnB(),
5304 z28.VnB(),
5305 p1,
5306 SVEMemOperand(sp, x19)),
5307 "st4b {z25.b, z26.b, z27.b, z28.b}, p1, [sp, x19]");
5308
5309 COMPARE(st4h(z31.VnH(),
5310 z0.VnH(),
5311 z1.VnH(),
5312 z2.VnH(),
5313 p1,
5314 SVEMemOperand(x20, x19, LSL, 1)),
5315 "st4h {z31.h, z0.h, z1.h, z2.h}, p1, [x20, x19, lsl #1]");
5316 COMPARE(st4h(z31.VnH(),
5317 z0.VnH(),
5318 z1.VnH(),
5319 z2.VnH(),
5320 p1,
5321 SVEMemOperand(sp, x19, LSL, 1)),
5322 "st4h {z31.h, z0.h, z1.h, z2.h}, p1, [sp, x19, lsl #1]");
5323 COMPARE(st4h(z31.VnH(),
5324 z0.VnH(),
5325 z1.VnH(),
5326 z2.VnH(),
5327 p7,
5328 SVEMemOperand(sp, x19, LSL, 1)),
5329 "st4h {z31.h, z0.h, z1.h, z2.h}, p7, [sp, x19, lsl #1]");
5330
5331 COMPARE(st4w(z31.VnS(),
5332 z0.VnS(),
5333 z1.VnS(),
5334 z2.VnS(),
5335 p7,
5336 SVEMemOperand(x20, x19, LSL, 2)),
5337 "st4w {z31.s, z0.s, z1.s, z2.s}, p7, [x20, x19, lsl #2]");
5338 COMPARE(st4w(z16.VnS(),
5339 z17.VnS(),
5340 z18.VnS(),
5341 z19.VnS(),
5342 p7,
5343 SVEMemOperand(sp, x19, LSL, 2)),
5344 "st4w {z16.s, z17.s, z18.s, z19.s}, p7, [sp, x19, lsl #2]");
5345 COMPARE(st4w(z16.VnS(),
5346 z17.VnS(),
5347 z18.VnS(),
5348 z19.VnS(),
5349 p7,
5350 SVEMemOperand(sp, x19, LSL, 2)),
5351 "st4w {z16.s, z17.s, z18.s, z19.s}, p7, [sp, x19, lsl #2]");
5352
5353 COMPARE(st4d(z16.VnD(),
5354 z17.VnD(),
5355 z18.VnD(),
5356 z19.VnD(),
5357 p0,
5358 SVEMemOperand(x20, x19, LSL, 3)),
5359 "st4d {z16.d, z17.d, z18.d, z19.d}, p0, [x20, x19, lsl #3]");
5360 COMPARE(st4d(z16.VnD(),
5361 z17.VnD(),
5362 z18.VnD(),
5363 z19.VnD(),
5364 p0,
5365 SVEMemOperand(sp, x19, LSL, 3)),
5366 "st4d {z16.d, z17.d, z18.d, z19.d}, p0, [sp, x19, lsl #3]");
5367 COMPARE(st4d(z25.VnD(),
5368 z26.VnD(),
5369 z27.VnD(),
5370 z28.VnD(),
5371 p0,
5372 SVEMemOperand(sp, x19, LSL, 3)),
5373 "st4d {z25.d, z26.d, z27.d, z28.d}, p0, [sp, x19, lsl #3]");
5374
5375 CLEANUP();
5376 }
5377
TEST(sve_mul_index)5378 TEST(sve_mul_index) {
5379 SETUP();
5380
5381 COMPARE(sdot(z17.VnD(), z21.VnH(), z15.VnH(), 0),
5382 "sdot z17.d, z21.h, z15.h[0]");
5383 COMPARE(sdot(z28.VnS(), z9.VnB(), z7.VnB(), 1), "sdot z28.s, z9.b, z7.b[1]");
5384 COMPARE(udot(z26.VnD(), z15.VnH(), z1.VnH(), 1),
5385 "udot z26.d, z15.h, z1.h[1]");
5386 COMPARE(udot(z23.VnS(), z24.VnB(), z5.VnB(), 3),
5387 "udot z23.s, z24.b, z5.b[3]");
5388
5389 CLEANUP();
5390 }
5391
TEST(sve_mul_index_macro)5392 TEST(sve_mul_index_macro) {
5393 SETUP();
5394
5395 COMPARE_MACRO(Sdot(z0.VnS(), z0.VnS(), z2.VnB(), z4.VnB(), 0),
5396 "sdot z0.s, z2.b, z4.b[0]");
5397 COMPARE_MACRO(Sdot(z3.VnD(), z4.VnD(), z3.VnH(), z5.VnH(), 1),
5398 "movprfx z31, z4\n"
5399 "sdot z31.d, z3.h, z5.h[1]\n"
5400 "mov z3.d, z31.d");
5401 COMPARE_MACRO(Sdot(z4.VnS(), z5.VnS(), z6.VnB(), z4.VnB(), 2),
5402 "movprfx z31, z5\n"
5403 "sdot z31.s, z6.b, z4.b[2]\n"
5404 "mov z4.d, z31.d");
5405 COMPARE_MACRO(Sdot(z6.VnD(), z7.VnD(), z8.VnH(), z9.VnH(), 0),
5406 "movprfx z6, z7\n"
5407 "sdot z6.d, z8.h, z9.h[0]");
5408 COMPARE_MACRO(Sdot(z5.VnD(), z5.VnD(), z5.VnH(), z5.VnH(), 1),
5409 "sdot z5.d, z5.h, z5.h[1]");
5410
5411 COMPARE_MACRO(Udot(z0.VnD(), z0.VnD(), z2.VnH(), z4.VnH(), 1),
5412 "udot z0.d, z2.h, z4.h[1]");
5413 COMPARE_MACRO(Udot(z3.VnS(), z4.VnS(), z3.VnB(), z5.VnB(), 3),
5414 "movprfx z31, z4\n"
5415 "udot z31.s, z3.b, z5.b[3]\n"
5416 "mov z3.d, z31.d");
5417 COMPARE_MACRO(Udot(z4.VnD(), z5.VnD(), z6.VnH(), z4.VnH(), 0),
5418 "movprfx z31, z5\n"
5419 "udot z31.d, z6.h, z4.h[0]\n"
5420 "mov z4.d, z31.d");
5421 COMPARE_MACRO(Udot(z9.VnS(), z8.VnS(), z7.VnB(), z6.VnB(), 2),
5422 "movprfx z9, z8\n"
5423 "udot z9.s, z7.b, z6.b[2]");
5424 COMPARE_MACRO(Udot(z5.VnS(), z5.VnS(), z5.VnB(), z5.VnB(), 1),
5425 "udot z5.s, z5.b, z5.b[1]");
5426 CLEANUP();
5427 }
5428
TEST(sve_partition_break)5429 TEST(sve_partition_break) {
5430 SETUP();
5431
5432 COMPARE(brkas(p8.VnB(), p5.Zeroing(), p4.VnB()), "brkas p8.b, p5/z, p4.b");
5433 COMPARE(brka(p11.VnB(), p7.Zeroing(), p15.VnB()), "brka p11.b, p7/z, p15.b");
5434 COMPARE(brka(p12.VnB(), p8.Merging(), p13.VnB()), "brka p12.b, p8/m, p13.b");
5435 COMPARE(brkbs(p6.VnB(), p9.Zeroing(), p14.VnB()), "brkbs p6.b, p9/z, p14.b");
5436 COMPARE(brkb(p11.VnB(), p6.Zeroing(), p4.VnB()), "brkb p11.b, p6/z, p4.b");
5437 COMPARE(brkb(p12.VnB(), p7.Merging(), p5.VnB()), "brkb p12.b, p7/m, p5.b");
5438 COMPARE(brkns(p2.VnB(), p11.Zeroing(), p0.VnB(), p2.VnB()),
5439 "brkns p2.b, p11/z, p0.b, p2.b");
5440 COMPARE(brkn(p4.VnB(), p3.Zeroing(), p1.VnB(), p4.VnB()),
5441 "brkn p4.b, p3/z, p1.b, p4.b");
5442
5443 COMPARE_MACRO(Brkns(p3.VnB(), p10.Zeroing(), p2.VnB(), p5.VnB()),
5444 "mov p3.b, p5.b\n"
5445 "brkns p3.b, p10/z, p2.b, p3.b");
5446 COMPARE_MACRO(Brkn(p5.VnB(), p4.Zeroing(), p3.VnB(), p7.VnB()),
5447 "mov p5.b, p7.b\n"
5448 "brkn p5.b, p4/z, p3.b, p5.b");
5449
5450 CLEANUP();
5451 }
5452
TEST(sve_permute_predicate)5453 TEST(sve_permute_predicate) {
5454 SETUP();
5455
5456 COMPARE(rev(p15.VnB(), p6.VnB()), "rev p15.b, p6.b");
5457 COMPARE(rev(p15.VnH(), p6.VnH()), "rev p15.h, p6.h");
5458 COMPARE(rev(p15.VnS(), p6.VnS()), "rev p15.s, p6.s");
5459 COMPARE(rev(p15.VnD(), p6.VnD()), "rev p15.d, p6.d");
5460 COMPARE(trn1(p13.VnB(), p15.VnB(), p12.VnB()), "trn1 p13.b, p15.b, p12.b");
5461 COMPARE(trn1(p13.VnH(), p15.VnH(), p12.VnH()), "trn1 p13.h, p15.h, p12.h");
5462 COMPARE(trn1(p13.VnS(), p15.VnS(), p12.VnS()), "trn1 p13.s, p15.s, p12.s");
5463 COMPARE(trn1(p13.VnD(), p15.VnD(), p12.VnD()), "trn1 p13.d, p15.d, p12.d");
5464 COMPARE(trn2(p5.VnB(), p5.VnB(), p6.VnB()), "trn2 p5.b, p5.b, p6.b");
5465 COMPARE(trn2(p5.VnH(), p5.VnH(), p6.VnH()), "trn2 p5.h, p5.h, p6.h");
5466 COMPARE(trn2(p5.VnS(), p5.VnS(), p6.VnS()), "trn2 p5.s, p5.s, p6.s");
5467 COMPARE(trn2(p5.VnD(), p5.VnD(), p6.VnD()), "trn2 p5.d, p5.d, p6.d");
5468 COMPARE(uzp1(p14.VnB(), p4.VnB(), p14.VnB()), "uzp1 p14.b, p4.b, p14.b");
5469 COMPARE(uzp1(p14.VnH(), p4.VnH(), p14.VnH()), "uzp1 p14.h, p4.h, p14.h");
5470 COMPARE(uzp1(p14.VnS(), p4.VnS(), p14.VnS()), "uzp1 p14.s, p4.s, p14.s");
5471 COMPARE(uzp1(p14.VnD(), p4.VnD(), p14.VnD()), "uzp1 p14.d, p4.d, p14.d");
5472 COMPARE(uzp2(p6.VnB(), p11.VnB(), p2.VnB()), "uzp2 p6.b, p11.b, p2.b");
5473 COMPARE(uzp2(p6.VnH(), p11.VnH(), p2.VnH()), "uzp2 p6.h, p11.h, p2.h");
5474 COMPARE(uzp2(p6.VnS(), p11.VnS(), p2.VnS()), "uzp2 p6.s, p11.s, p2.s");
5475 COMPARE(uzp2(p6.VnD(), p11.VnD(), p2.VnD()), "uzp2 p6.d, p11.d, p2.d");
5476 COMPARE(zip1(p13.VnB(), p4.VnB(), p12.VnB()), "zip1 p13.b, p4.b, p12.b");
5477 COMPARE(zip1(p13.VnH(), p4.VnH(), p12.VnH()), "zip1 p13.h, p4.h, p12.h");
5478 COMPARE(zip1(p13.VnS(), p4.VnS(), p12.VnS()), "zip1 p13.s, p4.s, p12.s");
5479 COMPARE(zip1(p13.VnD(), p4.VnD(), p12.VnD()), "zip1 p13.d, p4.d, p12.d");
5480 COMPARE(zip2(p1.VnB(), p15.VnB(), p2.VnB()), "zip2 p1.b, p15.b, p2.b");
5481 COMPARE(zip2(p1.VnH(), p15.VnH(), p2.VnH()), "zip2 p1.h, p15.h, p2.h");
5482 COMPARE(zip2(p1.VnS(), p15.VnS(), p2.VnS()), "zip2 p1.s, p15.s, p2.s");
5483 COMPARE(zip2(p1.VnD(), p15.VnD(), p2.VnD()), "zip2 p1.d, p15.d, p2.d");
5484 COMPARE(punpkhi(p12.VnH(), p6.VnB()), "punpkhi p12.h, p6.b");
5485 COMPARE(punpklo(p4.VnH(), p14.VnB()), "punpklo p4.h, p14.b");
5486
5487 CLEANUP();
5488 }
5489
TEST(sve_permute_vector_extract)5490 TEST(sve_permute_vector_extract) {
5491 SETUP();
5492
5493 COMPARE_MACRO(Ext(z10.VnB(), z10.VnB(), z2.VnB(), 0),
5494 "ext z10.b, z10.b, z2.b, #0");
5495 COMPARE_MACRO(Ext(z10.VnB(), z10.VnB(), z2.VnB(), 1),
5496 "ext z10.b, z10.b, z2.b, #1");
5497 COMPARE_MACRO(Ext(z2.VnB(), z2.VnB(), z10.VnB(), 254),
5498 "ext z2.b, z2.b, z10.b, #254");
5499 COMPARE_MACRO(Ext(z2.VnB(), z2.VnB(), z10.VnB(), 255),
5500 "ext z2.b, z2.b, z10.b, #255");
5501
5502 CLEANUP();
5503 }
5504
TEST(sve_permute_vector_interleaving)5505 TEST(sve_permute_vector_interleaving) {
5506 SETUP();
5507
5508 COMPARE(trn1(z25.VnB(), z31.VnB(), z17.VnB()), "trn1 z25.b, z31.b, z17.b");
5509 COMPARE(trn1(z25.VnH(), z31.VnH(), z17.VnH()), "trn1 z25.h, z31.h, z17.h");
5510 COMPARE(trn1(z25.VnS(), z31.VnS(), z17.VnS()), "trn1 z25.s, z31.s, z17.s");
5511 COMPARE(trn1(z25.VnD(), z31.VnD(), z17.VnD()), "trn1 z25.d, z31.d, z17.d");
5512 COMPARE(trn2(z23.VnB(), z19.VnB(), z5.VnB()), "trn2 z23.b, z19.b, z5.b");
5513 COMPARE(trn2(z23.VnH(), z19.VnH(), z5.VnH()), "trn2 z23.h, z19.h, z5.h");
5514 COMPARE(trn2(z23.VnS(), z19.VnS(), z5.VnS()), "trn2 z23.s, z19.s, z5.s");
5515 COMPARE(trn2(z23.VnD(), z19.VnD(), z5.VnD()), "trn2 z23.d, z19.d, z5.d");
5516 COMPARE(uzp1(z3.VnB(), z27.VnB(), z10.VnB()), "uzp1 z3.b, z27.b, z10.b");
5517 COMPARE(uzp1(z3.VnH(), z27.VnH(), z10.VnH()), "uzp1 z3.h, z27.h, z10.h");
5518 COMPARE(uzp1(z3.VnS(), z27.VnS(), z10.VnS()), "uzp1 z3.s, z27.s, z10.s");
5519 COMPARE(uzp1(z3.VnD(), z27.VnD(), z10.VnD()), "uzp1 z3.d, z27.d, z10.d");
5520 COMPARE(uzp2(z22.VnB(), z26.VnB(), z15.VnB()), "uzp2 z22.b, z26.b, z15.b");
5521 COMPARE(uzp2(z22.VnH(), z26.VnH(), z15.VnH()), "uzp2 z22.h, z26.h, z15.h");
5522 COMPARE(uzp2(z22.VnS(), z26.VnS(), z15.VnS()), "uzp2 z22.s, z26.s, z15.s");
5523 COMPARE(uzp2(z22.VnD(), z26.VnD(), z15.VnD()), "uzp2 z22.d, z26.d, z15.d");
5524 COMPARE(zip1(z31.VnB(), z2.VnB(), z20.VnB()), "zip1 z31.b, z2.b, z20.b");
5525 COMPARE(zip1(z31.VnH(), z2.VnH(), z20.VnH()), "zip1 z31.h, z2.h, z20.h");
5526 COMPARE(zip1(z31.VnS(), z2.VnS(), z20.VnS()), "zip1 z31.s, z2.s, z20.s");
5527 COMPARE(zip1(z31.VnD(), z2.VnD(), z20.VnD()), "zip1 z31.d, z2.d, z20.d");
5528 COMPARE(zip2(z15.VnB(), z23.VnB(), z12.VnB()), "zip2 z15.b, z23.b, z12.b");
5529 COMPARE(zip2(z15.VnH(), z23.VnH(), z12.VnH()), "zip2 z15.h, z23.h, z12.h");
5530 COMPARE(zip2(z15.VnS(), z23.VnS(), z12.VnS()), "zip2 z15.s, z23.s, z12.s");
5531 COMPARE(zip2(z15.VnD(), z23.VnD(), z12.VnD()), "zip2 z15.d, z23.d, z12.d");
5532
5533 CLEANUP();
5534 }
5535
TEST(sve_cpy_reg)5536 TEST(sve_cpy_reg) {
5537 SETUP();
5538
5539 COMPARE(cpy(z1.VnB(), p2.Merging(), wsp), "mov z1.b, p2/m, wsp");
5540 COMPARE(cpy(z2.VnH(), p6.Merging(), w3), "mov z2.h, p6/m, w3");
5541 COMPARE(cpy(z3.VnS(), p7.Merging(), x5), "mov z3.s, p7/m, w5");
5542 COMPARE(cpy(z4.VnD(), p7.Merging(), x30), "mov z4.d, p7/m, x30");
5543 COMPARE(cpy(z5.VnD(), p7.Merging(), sp), "mov z5.d, p7/m, sp");
5544
5545 COMPARE(cpy(z27.VnB(), p3.Merging(), b23), "mov z27.b, p3/m, b23");
5546 COMPARE(cpy(z27.VnH(), p3.Merging(), h23), "mov z27.h, p3/m, h23");
5547 COMPARE(cpy(z27.VnS(), p3.Merging(), s23), "mov z27.s, p3/m, s23");
5548 COMPARE(cpy(z27.VnD(), p3.Merging(), d23), "mov z27.d, p3/m, d23");
5549
5550 COMPARE(mov(z1.VnB(), p2.Merging(), wsp), "mov z1.b, p2/m, wsp");
5551 COMPARE(mov(z4.VnD(), p7.Merging(), x30), "mov z4.d, p7/m, x30");
5552 COMPARE(mov(z5.VnD(), p7.Merging(), sp), "mov z5.d, p7/m, sp");
5553 COMPARE(mov(z27.VnB(), p3.Merging(), b23), "mov z27.b, p3/m, b23");
5554 COMPARE(mov(z27.VnD(), p3.Merging(), d23), "mov z27.d, p3/m, d23");
5555
5556 COMPARE_MACRO(Mov(z1.VnB(), p2.Merging(), wsp), "mov z1.b, p2/m, wsp");
5557 COMPARE_MACRO(Mov(z4.VnD(), p7.Merging(), x30), "mov z4.d, p7/m, x30");
5558 COMPARE_MACRO(Mov(z5.VnD(), p7.Merging(), sp), "mov z5.d, p7/m, sp");
5559 COMPARE_MACRO(Mov(z27.VnB(), p3.Merging(), b23), "mov z27.b, p3/m, b23");
5560 COMPARE_MACRO(Mov(z27.VnD(), p3.Merging(), d23), "mov z27.d, p3/m, d23");
5561
5562 CLEANUP();
5563 }
5564
TEST(sve_permute_vector_predicated)5565 TEST(sve_permute_vector_predicated) {
5566 SETUP();
5567
5568 COMPARE(compact(z13.VnS(), p7, z1.VnS()), "compact z13.s, p7, z1.s");
5569 COMPARE(compact(z13.VnD(), p7, z1.VnD()), "compact z13.d, p7, z1.d");
5570 COMPARE(splice(z7.VnB(), p6, z7.VnB(), z2.VnB()),
5571 "splice z7.b, p6, z7.b, z2.b");
5572 COMPARE(splice(z7.VnH(), p6, z7.VnH(), z2.VnH()),
5573 "splice z7.h, p6, z7.h, z2.h");
5574 COMPARE(splice(z7.VnS(), p6, z7.VnS(), z2.VnS()),
5575 "splice z7.s, p6, z7.s, z2.s");
5576 COMPARE(splice(z7.VnD(), p6, z7.VnD(), z2.VnD()),
5577 "splice z7.d, p6, z7.d, z2.d");
5578
5579 COMPARE_MACRO(Splice(z0.VnB(), p1, z2.VnB(), z4.VnB()),
5580 "movprfx z0, z2\n"
5581 "splice z0.b, p1, z0.b, z4.b");
5582 COMPARE_MACRO(Splice(z0.VnH(), p1, z2.VnH(), z0.VnH()),
5583 "movprfx z31, z2\n"
5584 "splice z31.h, p1, z31.h, z0.h\n"
5585 "mov z0.d, z31.d");
5586
5587 COMPARE(clasta(z4.VnB(), p2, z4.VnB(), z12.VnB()),
5588 "clasta z4.b, p2, z4.b, z12.b");
5589 COMPARE(clasta(z4.VnH(), p2, z4.VnH(), z12.VnH()),
5590 "clasta z4.h, p2, z4.h, z12.h");
5591 COMPARE(clasta(z4.VnS(), p2, z4.VnS(), z12.VnS()),
5592 "clasta z4.s, p2, z4.s, z12.s");
5593 COMPARE(clasta(z4.VnD(), p2, z4.VnD(), z12.VnD()),
5594 "clasta z4.d, p2, z4.d, z12.d");
5595 COMPARE(clastb(z29.VnB(), p7, z29.VnB(), z26.VnB()),
5596 "clastb z29.b, p7, z29.b, z26.b");
5597 COMPARE(clastb(z29.VnH(), p7, z29.VnH(), z26.VnH()),
5598 "clastb z29.h, p7, z29.h, z26.h");
5599 COMPARE(clastb(z29.VnS(), p7, z29.VnS(), z26.VnS()),
5600 "clastb z29.s, p7, z29.s, z26.s");
5601 COMPARE(clastb(z29.VnD(), p7, z29.VnD(), z26.VnD()),
5602 "clastb z29.d, p7, z29.d, z26.d");
5603
5604 COMPARE_MACRO(Clasta(z5.VnD(), p2, z4.VnD(), z12.VnD()),
5605 "movprfx z5, z4\n"
5606 "clasta z5.d, p2, z5.d, z12.d");
5607 COMPARE_MACRO(Clastb(z30.VnD(), p7, z29.VnD(), z26.VnD()),
5608 "movprfx z30, z29\n"
5609 "clastb z30.d, p7, z30.d, z26.d");
5610 COMPARE_MACRO(Clasta(z9.VnH(), p3, z8.VnH(), z9.VnH()),
5611 "movprfx z31, z8\n"
5612 "clasta z31.h, p3, z31.h, z9.h\n"
5613 "mov z9.d, z31.d");
5614 COMPARE_MACRO(Clastb(z1.VnS(), p1, z1.VnS(), z1.VnS()),
5615 "clastb z1.s, p1, z1.s, z1.s");
5616
5617 COMPARE(clasta(w6, p0, w6, z13.VnB()), "clasta w6, p0, w6, z13.b");
5618 COMPARE(clasta(w6, p0, w6, z13.VnH()), "clasta w6, p0, w6, z13.h");
5619 COMPARE(clasta(w6, p0, w6, z13.VnS()), "clasta w6, p0, w6, z13.s");
5620 COMPARE(clasta(x6, p0, x6, z13.VnD()), "clasta x6, p0, x6, z13.d");
5621 COMPARE(clastb(w21, p2, w21, z27.VnB()), "clastb w21, p2, w21, z27.b");
5622 COMPARE(clastb(w21, p2, w21, z27.VnH()), "clastb w21, p2, w21, z27.h");
5623 COMPARE(clastb(w21, p2, w21, z27.VnS()), "clastb w21, p2, w21, z27.s");
5624 COMPARE(clastb(x21, p2, x21, z27.VnD()), "clastb x21, p2, x21, z27.d");
5625
5626 COMPARE(clasta(b8, p6, b8, z7.VnB()), "clasta b8, p6, b8, z7.b");
5627 COMPARE(clasta(h8, p6, h8, z7.VnH()), "clasta h8, p6, h8, z7.h");
5628 COMPARE(clasta(s8, p6, s8, z7.VnS()), "clasta s8, p6, s8, z7.s");
5629 COMPARE(clasta(d8, p6, d8, z7.VnD()), "clasta d8, p6, d8, z7.d");
5630 COMPARE(clastb(b17, p0, b17, z19.VnB()), "clastb b17, p0, b17, z19.b");
5631 COMPARE(clastb(h17, p0, h17, z19.VnH()), "clastb h17, p0, h17, z19.h");
5632 COMPARE(clastb(s17, p0, s17, z19.VnS()), "clastb s17, p0, s17, z19.s");
5633 COMPARE(clastb(d17, p0, d17, z19.VnD()), "clastb d17, p0, d17, z19.d");
5634
5635 COMPARE(lasta(w15, p3, z3.VnB()), "lasta w15, p3, z3.b");
5636 COMPARE(lasta(w15, p3, z3.VnH()), "lasta w15, p3, z3.h");
5637 COMPARE(lasta(w15, p3, z3.VnS()), "lasta w15, p3, z3.s");
5638 COMPARE(lasta(x15, p3, z3.VnD()), "lasta x15, p3, z3.d");
5639 COMPARE(lasta(b30, p4, z24.VnB()), "lasta b30, p4, z24.b");
5640 COMPARE(lasta(h30, p4, z24.VnH()), "lasta h30, p4, z24.h");
5641 COMPARE(lasta(s30, p4, z24.VnS()), "lasta s30, p4, z24.s");
5642 COMPARE(lasta(d30, p4, z24.VnD()), "lasta d30, p4, z24.d");
5643
5644 COMPARE(lastb(w9, p2, z16.VnB()), "lastb w9, p2, z16.b");
5645 COMPARE(lastb(w9, p2, z16.VnH()), "lastb w9, p2, z16.h");
5646 COMPARE(lastb(w9, p2, z16.VnS()), "lastb w9, p2, z16.s");
5647 COMPARE(lastb(x9, p2, z16.VnD()), "lastb x9, p2, z16.d");
5648 COMPARE(lastb(b14, p5, z2.VnB()), "lastb b14, p5, z2.b");
5649 COMPARE(lastb(h14, p5, z2.VnH()), "lastb h14, p5, z2.h");
5650 COMPARE(lastb(s14, p5, z2.VnS()), "lastb s14, p5, z2.s");
5651 COMPARE(lastb(d14, p5, z2.VnD()), "lastb d14, p5, z2.d");
5652
5653 CLEANUP();
5654 }
5655
TEST(sve_reverse)5656 TEST(sve_reverse) {
5657 SETUP();
5658
5659 COMPARE_MACRO(Rbit(z22.VnB(), p2.Merging(), z24.VnB()),
5660 "rbit z22.b, p2/m, z24.b");
5661 COMPARE_MACRO(Rbit(z22.VnH(), p2.Merging(), z24.VnH()),
5662 "rbit z22.h, p2/m, z24.h");
5663 COMPARE_MACRO(Rbit(z22.VnS(), p2.Merging(), z24.VnS()),
5664 "rbit z22.s, p2/m, z24.s");
5665 COMPARE_MACRO(Rbit(z22.VnD(), p2.Merging(), z24.VnD()),
5666 "rbit z22.d, p2/m, z24.d");
5667 COMPARE_MACRO(Revb(z14.VnH(), p7.Merging(), z25.VnH()),
5668 "revb z14.h, p7/m, z25.h");
5669 COMPARE_MACRO(Revb(z14.VnS(), p7.Merging(), z25.VnS()),
5670 "revb z14.s, p7/m, z25.s");
5671 COMPARE_MACRO(Revb(z14.VnD(), p7.Merging(), z25.VnD()),
5672 "revb z14.d, p7/m, z25.d");
5673 COMPARE_MACRO(Revh(z16.VnS(), p2.Merging(), z4.VnS()),
5674 "revh z16.s, p2/m, z4.s");
5675 COMPARE_MACRO(Revh(z16.VnD(), p2.Merging(), z4.VnD()),
5676 "revh z16.d, p2/m, z4.d");
5677 COMPARE_MACRO(Revw(z26.VnD(), p5.Merging(), z10.VnD()),
5678 "revw z26.d, p5/m, z10.d");
5679
5680 CLEANUP();
5681 }
5682
TEST(sve_permute_vector_unpredicated)5683 TEST(sve_permute_vector_unpredicated) {
5684 SETUP();
5685
5686 COMPARE(dup(z4.VnB(), w7), "mov z4.b, w7");
5687 COMPARE(dup(z5.VnH(), w6), "mov z5.h, w6");
5688 COMPARE(dup(z6.VnS(), sp), "mov z6.s, wsp");
5689 COMPARE(dup(z7.VnD(), x4), "mov z7.d, x4");
5690 COMPARE(dup(z25.VnQ(), z28.VnQ(), 2), "mov z25.q, z28.q[2]");
5691 COMPARE(dup(z26.VnH(), z12.VnH(), 0), "mov z26.h, h12");
5692
5693 COMPARE(mov(z4.VnB(), w7), "mov z4.b, w7");
5694 COMPARE(mov(z5.VnH(), w6), "mov z5.h, w6");
5695 COMPARE(mov(z6.VnS(), sp), "mov z6.s, wsp");
5696 COMPARE(mov(z7.VnD(), x4), "mov z7.d, x4");
5697 COMPARE(mov(z25.VnQ(), z28.VnQ(), 2), "mov z25.q, z28.q[2]");
5698 COMPARE(mov(z0.VnS(), s1), "mov z0.s, s1");
5699
5700 COMPARE_MACRO(Mov(z7.VnD(), x4), "mov z7.d, x4");
5701 COMPARE_MACRO(Mov(z25.VnQ(), z28.VnQ(), 2), "mov z25.q, z28.q[2]");
5702 COMPARE_MACRO(Mov(z2.VnB(), b13), "mov z2.b, b13");
5703 COMPARE_MACRO(Mov(z31.VnQ(), q31), "mov z31.q, q31");
5704
5705 // Test dup with reserved tsz field.
5706 COMPARE_PREFIX(dci(0x05202000), "unimplemented");
5707
5708 COMPARE(insr(z15.VnB(), w13), "insr z15.b, w13");
5709 COMPARE(insr(z16.VnH(), w14), "insr z16.h, w14");
5710 COMPARE(insr(z17.VnS(), w15), "insr z17.s, w15");
5711 COMPARE(insr(z18.VnD(), x16), "insr z18.d, x16");
5712 COMPARE(insr(z5.VnB(), b3), "insr z5.b, b3");
5713 COMPARE(insr(z6.VnH(), h15), "insr z6.h, h15");
5714 COMPARE(insr(z7.VnS(), s22), "insr z7.s, s22");
5715 COMPARE(insr(z8.VnD(), d30), "insr z8.d, d30");
5716 COMPARE(rev(z13.VnB(), z10.VnB()), "rev z13.b, z10.b");
5717 COMPARE(rev(z14.VnH(), z10.VnH()), "rev z14.h, z10.h");
5718 COMPARE(rev(z15.VnS(), z10.VnS()), "rev z15.s, z10.s");
5719 COMPARE(rev(z16.VnD(), z10.VnD()), "rev z16.d, z10.d");
5720 COMPARE(sunpkhi(z10.VnH(), z11.VnB()), "sunpkhi z10.h, z11.b");
5721 COMPARE(sunpkhi(z11.VnS(), z11.VnH()), "sunpkhi z11.s, z11.h");
5722 COMPARE(sunpkhi(z12.VnD(), z11.VnS()), "sunpkhi z12.d, z11.s");
5723 COMPARE(sunpklo(z20.VnH(), z12.VnB()), "sunpklo z20.h, z12.b");
5724 COMPARE(sunpklo(z21.VnS(), z12.VnH()), "sunpklo z21.s, z12.h");
5725 COMPARE(sunpklo(z22.VnD(), z12.VnS()), "sunpklo z22.d, z12.s");
5726 COMPARE(uunpkhi(z17.VnH(), z14.VnB()), "uunpkhi z17.h, z14.b");
5727 COMPARE(uunpkhi(z18.VnS(), z14.VnH()), "uunpkhi z18.s, z14.h");
5728 COMPARE(uunpkhi(z19.VnD(), z14.VnS()), "uunpkhi z19.d, z14.s");
5729 COMPARE(uunpklo(z27.VnH(), z6.VnB()), "uunpklo z27.h, z6.b");
5730 COMPARE(uunpklo(z28.VnS(), z6.VnH()), "uunpklo z28.s, z6.h");
5731 COMPARE(uunpklo(z29.VnD(), z6.VnS()), "uunpklo z29.d, z6.s");
5732 COMPARE(tbl(z24.VnB(), z29.VnB(), z0.VnB()), "tbl z24.b, {z29.b}, z0.b");
5733 COMPARE(tbl(z25.VnH(), z29.VnH(), z1.VnH()), "tbl z25.h, {z29.h}, z1.h");
5734 COMPARE(tbl(z26.VnS(), z29.VnS(), z2.VnS()), "tbl z26.s, {z29.s}, z2.s");
5735 COMPARE(tbl(z27.VnD(), z29.VnD(), z3.VnD()), "tbl z27.d, {z29.d}, z3.d");
5736
5737 CLEANUP();
5738 }
5739
TEST(sve_predicate_count)5740 TEST(sve_predicate_count) {
5741 SETUP();
5742
5743 COMPARE(cntp(x9, p1, p0.VnB()), "cntp x9, p1, p0.b");
5744 COMPARE(cntp(x10, p12, p1.VnH()), "cntp x10, p12, p1.h");
5745 COMPARE(cntp(x11, p13, p14.VnS()), "cntp x11, p13, p14.s");
5746 COMPARE(cntp(x12, p4, p15.VnD()), "cntp x12, p4, p15.d");
5747
5748 COMPARE_MACRO(Cntp(x0, p1, p2.VnB()), "cntp x0, p1, p2.b");
5749 COMPARE_MACRO(Cntp(w10, p11, p12.VnH()), "cntp x10, p11, p12.h");
5750
5751 CLEANUP();
5752 }
5753
TEST(sve_predicate_logical_op)5754 TEST(sve_predicate_logical_op) {
5755 SETUP();
5756
5757 COMPARE(ands(p13.VnB(), p9.Zeroing(), p5.VnB(), p15.VnB()),
5758 "ands p13.b, p9/z, p5.b, p15.b");
5759 COMPARE(and_(p9.VnB(), p3.Zeroing(), p0.VnB(), p14.VnB()),
5760 "and p9.b, p3/z, p0.b, p14.b");
5761 COMPARE(bics(p8.VnB(), p5.Zeroing(), p3.VnB(), p1.VnB()),
5762 "bics p8.b, p5/z, p3.b, p1.b");
5763 COMPARE(bic(p5.VnB(), p5.Zeroing(), p9.VnB(), p9.VnB()),
5764 "bic p5.b, p5/z, p9.b, p9.b");
5765 COMPARE(eors(p11.VnB(), p1.Zeroing(), p1.VnB(), p2.VnB()),
5766 "eors p11.b, p1/z, p1.b, p2.b");
5767 COMPARE(eor(p8.VnB(), p6.Zeroing(), p1.VnB(), p11.VnB()),
5768 "eor p8.b, p6/z, p1.b, p11.b");
5769 COMPARE(nands(p13.VnB(), p0.Zeroing(), p9.VnB(), p4.VnB()),
5770 "nands p13.b, p0/z, p9.b, p4.b");
5771 COMPARE(nand(p7.VnB(), p7.Zeroing(), p15.VnB(), p2.VnB()),
5772 "nand p7.b, p7/z, p15.b, p2.b");
5773 COMPARE(nors(p8.VnB(), p8.Zeroing(), p12.VnB(), p11.VnB()),
5774 "nors p8.b, p8/z, p12.b, p11.b");
5775 COMPARE(nor(p3.VnB(), p6.Zeroing(), p15.VnB(), p12.VnB()),
5776 "nor p3.b, p6/z, p15.b, p12.b");
5777 COMPARE(orns(p10.VnB(), p11.Zeroing(), p0.VnB(), p15.VnB()),
5778 "orns p10.b, p11/z, p0.b, p15.b");
5779 COMPARE(orn(p0.VnB(), p1.Zeroing(), p7.VnB(), p4.VnB()),
5780 "orn p0.b, p1/z, p7.b, p4.b");
5781 COMPARE(orrs(p14.VnB(), p6.Zeroing(), p1.VnB(), p5.VnB()),
5782 "orrs p14.b, p6/z, p1.b, p5.b");
5783 COMPARE(orr(p13.VnB(), p7.Zeroing(), p10.VnB(), p4.VnB()),
5784 "orr p13.b, p7/z, p10.b, p4.b");
5785 COMPARE(sel(p9.VnB(), p15, p15.VnB(), p7.VnB()),
5786 "sel p9.b, p15, p15.b, p7.b");
5787
5788 // Aliases.
5789 COMPARE(eor(p7.VnB(), p6.Zeroing(), p1.VnB(), p6.VnB()),
5790 "not p7.b, p6/z, p1.b");
5791 COMPARE(not_(p7.VnB(), p6.Zeroing(), p1.VnB()), "not p7.b, p6/z, p1.b");
5792 COMPARE(eors(p6.VnB(), p5.Zeroing(), p2.VnB(), p5.VnB()),
5793 "nots p6.b, p5/z, p2.b");
5794 COMPARE(nots(p6.VnB(), p5.Zeroing(), p2.VnB()), "nots p6.b, p5/z, p2.b");
5795 COMPARE(ands(p5.VnB(), p4.Zeroing(), p3.VnB(), p3.VnB()),
5796 "movs p5.b, p4/z, p3.b");
5797 COMPARE(movs(p5.VnB(), p4.Zeroing(), p3.VnB()), "movs p5.b, p4/z, p3.b");
5798 COMPARE(and_(p5.VnB(), p4.Zeroing(), p3.VnB(), p3.VnB()),
5799 "mov p5.b, p4/z, p3.b");
5800 COMPARE(mov(p5.VnB(), p4.Zeroing(), p3.VnB()), "mov p5.b, p4/z, p3.b");
5801 COMPARE(orrs(p4.VnB(), p3.Zeroing(), p3.VnB(), p3.VnB()), "movs p4.b, p3.b");
5802 COMPARE(movs(p4.VnB(), p3.VnB()), "movs p4.b, p3.b");
5803 COMPARE(orr(p4.VnB(), p3.Zeroing(), p3.VnB(), p3.VnB()), "mov p4.b, p3.b");
5804 COMPARE(mov(p4.VnB(), p3.VnB()), "mov p4.b, p3.b");
5805 COMPARE(sel(p3.VnB(), p2, p4.VnB(), p3.VnB()), "mov p3.b, p2/m, p4.b");
5806 COMPARE(mov(p3.VnB(), p2.Merging(), p4.VnB()), "mov p3.b, p2/m, p4.b");
5807
5808 COMPARE_MACRO(Not(p7.VnB(), p6.Zeroing(), p1.VnB()), "not p7.b, p6/z, p1.b");
5809 COMPARE_MACRO(Nots(p6.VnB(), p5.Zeroing(), p2.VnB()),
5810 "nots p6.b, p5/z, p2.b");
5811 COMPARE_MACRO(Movs(p5.VnB(), p4.Zeroing(), p3.VnB()),
5812 "movs p5.b, p4/z, p3.b");
5813 COMPARE_MACRO(Mov(p5.VnB(), p4.Zeroing(), p3.VnB()), "mov p5.b, p4/z, p3.b");
5814 COMPARE_MACRO(Movs(p4.VnB(), p3.VnB()), "movs p4.b, p3.b");
5815 COMPARE_MACRO(Mov(p4, p3), "mov p4.b, p3.b");
5816 COMPARE_MACRO(Mov(p3.VnB(), p2.Merging(), p4.VnB()), "mov p3.b, p2/m, p4.b");
5817
5818 CLEANUP();
5819 }
5820
TEST(sve_predicate_first_active)5821 TEST(sve_predicate_first_active) {
5822 SETUP();
5823
5824 COMPARE(pfirst(p0.VnB(), p7, p0.VnB()), "pfirst p0.b, p7, p0.b");
5825 COMPARE(pfirst(p7.VnB(), p0, p7.VnB()), "pfirst p7.b, p0, p7.b");
5826
5827 COMPARE_MACRO(Pfirst(p1.VnB(), p2, p1.VnB()), "pfirst p1.b, p2, p1.b");
5828 COMPARE_MACRO(Pfirst(p3.VnB(), p4, p5.VnB()),
5829 "mov p3.b, p5.b\n"
5830 "pfirst p3.b, p4, p3.b");
5831
5832 {
5833 UseScratchRegisterScope temps(&masm);
5834 temps.Include(p7, p15);
5835 COMPARE_MACRO(Pfirst(p6.VnB(), p6, p0.VnB()),
5836 "mov p15.b, p6.b\n"
5837 "mov p6.b, p0.b\n"
5838 "pfirst p6.b, p15, p6.b");
5839 }
5840
5841 CLEANUP();
5842 }
5843
TEST(sve_predicate_next_active)5844 TEST(sve_predicate_next_active) {
5845 SETUP();
5846
5847 COMPARE(pnext(p0.VnB(), p8, p0.VnB()), "pnext p0.b, p8, p0.b");
5848 COMPARE(pnext(p1.VnH(), p9, p1.VnH()), "pnext p1.h, p9, p1.h");
5849 COMPARE(pnext(p2.VnS(), p10, p2.VnS()), "pnext p2.s, p10, p2.s");
5850 COMPARE(pnext(p3.VnD(), p11, p3.VnD()), "pnext p3.d, p11, p3.d");
5851
5852 COMPARE(pnext(p12.VnB(), p4, p12.VnB()), "pnext p12.b, p4, p12.b");
5853 COMPARE(pnext(p13.VnH(), p5, p13.VnH()), "pnext p13.h, p5, p13.h");
5854 COMPARE(pnext(p14.VnS(), p6, p14.VnS()), "pnext p14.s, p6, p14.s");
5855 COMPARE(pnext(p15.VnD(), p7, p15.VnD()), "pnext p15.d, p7, p15.d");
5856
5857 COMPARE_MACRO(Pnext(p5.VnB(), p9, p5.VnB()), "pnext p5.b, p9, p5.b");
5858 COMPARE_MACRO(Pnext(p6.VnH(), p8, p6.VnH()), "pnext p6.h, p8, p6.h");
5859 COMPARE_MACRO(Pnext(p7.VnS(), p5, p7.VnS()), "pnext p7.s, p5, p7.s");
5860 COMPARE_MACRO(Pnext(p8.VnD(), p6, p8.VnD()), "pnext p8.d, p6, p8.d");
5861
5862 COMPARE_MACRO(Pnext(p6.VnB(), p4, p5.VnB()),
5863 "mov p6.b, p5.b\n"
5864 "pnext p6.b, p4, p6.b");
5865 COMPARE_MACRO(Pnext(p7.VnH(), p3, p8.VnH()),
5866 "mov p7.b, p8.b\n"
5867 "pnext p7.h, p3, p7.h");
5868 COMPARE_MACRO(Pnext(p8.VnS(), p2, p9.VnS()),
5869 "mov p8.b, p9.b\n"
5870 "pnext p8.s, p2, p8.s");
5871 COMPARE_MACRO(Pnext(p9.VnD(), p1, p10.VnD()),
5872 "mov p9.b, p10.b\n"
5873 "pnext p9.d, p1, p9.d");
5874
5875 {
5876 UseScratchRegisterScope temps(&masm);
5877 temps.Include(p7, p8);
5878 COMPARE_MACRO(Pnext(p6.VnB(), p6, p0.VnB()),
5879 "mov p8.b, p6.b\n"
5880 "mov p6.b, p0.b\n"
5881 "pnext p6.b, p8, p6.b");
5882 COMPARE_MACRO(Pnext(p7.VnH(), p7, p1.VnH()),
5883 "mov p8.b, p7.b\n"
5884 "mov p7.b, p1.b\n"
5885 "pnext p7.h, p8, p7.h");
5886 COMPARE_MACRO(Pnext(p10.VnS(), p10, p2.VnS()),
5887 "mov p8.b, p10.b\n"
5888 "mov p10.b, p2.b\n"
5889 "pnext p10.s, p8, p10.s");
5890 COMPARE_MACRO(Pnext(p11.VnD(), p11, p3.VnD()),
5891 "mov p8.b, p11.b\n"
5892 "mov p11.b, p3.b\n"
5893 "pnext p11.d, p8, p11.d");
5894 }
5895
5896 CLEANUP();
5897 }
5898
TEST(sve_predicate_initialize)5899 TEST(sve_predicate_initialize) {
5900 SETUP();
5901
5902 // Basic forms.
5903 COMPARE(ptrue(p0.VnB(), SVE_POW2), "ptrue p0.b, pow2");
5904 COMPARE(ptrue(p1.VnH(), SVE_VL1), "ptrue p1.h, vl1");
5905 COMPARE(ptrue(p2.VnS(), SVE_VL8), "ptrue p2.s, vl8");
5906 COMPARE(ptrue(p3.VnD(), SVE_VL16), "ptrue p3.d, vl16");
5907 COMPARE(ptrue(p4.VnB(), SVE_VL256), "ptrue p4.b, vl256");
5908 COMPARE(ptrue(p5.VnH(), SVE_MUL3), "ptrue p5.h, mul3");
5909 COMPARE(ptrue(p6.VnS(), SVE_MUL4), "ptrue p6.s, mul4");
5910 COMPARE(ptrue(p7.VnD(), SVE_ALL), "ptrue p7.d");
5911
5912 COMPARE(ptrues(p8.VnB(), SVE_ALL), "ptrues p8.b");
5913 COMPARE(ptrues(p9.VnH(), SVE_MUL4), "ptrues p9.h, mul4");
5914 COMPARE(ptrues(p10.VnS(), SVE_MUL3), "ptrues p10.s, mul3");
5915 COMPARE(ptrues(p11.VnD(), SVE_VL256), "ptrues p11.d, vl256");
5916 COMPARE(ptrues(p12.VnB(), SVE_VL16), "ptrues p12.b, vl16");
5917 COMPARE(ptrues(p13.VnH(), SVE_VL8), "ptrues p13.h, vl8");
5918 COMPARE(ptrues(p14.VnS(), SVE_VL1), "ptrues p14.s, vl1");
5919 COMPARE(ptrues(p15.VnD(), SVE_POW2), "ptrues p15.d, pow2");
5920
5921 // The Assembler supports arbitrary immediates.
5922 COMPARE(ptrue(p7.VnS(), 0xd), "ptrue p7.s, vl256");
5923 COMPARE(ptrue(p8.VnD(), 0xe), "ptrue p8.d, #0xe");
5924 COMPARE(ptrue(p9.VnB(), 0x15), "ptrue p9.b, #0x15");
5925 COMPARE(ptrue(p10.VnH(), 0x19), "ptrue p10.h, #0x19");
5926 COMPARE(ptrue(p11.VnS(), 0x1a), "ptrue p11.s, #0x1a");
5927 COMPARE(ptrue(p12.VnD(), 0x1c), "ptrue p12.d, #0x1c");
5928 COMPARE(ptrue(p13.VnB(), 0x1d), "ptrue p13.b, mul4");
5929
5930 COMPARE(ptrues(p14.VnS(), 0xd), "ptrues p14.s, vl256");
5931 COMPARE(ptrues(p15.VnD(), 0xe), "ptrues p15.d, #0xe");
5932 COMPARE(ptrues(p0.VnB(), 0x15), "ptrues p0.b, #0x15");
5933 COMPARE(ptrues(p1.VnH(), 0x19), "ptrues p1.h, #0x19");
5934 COMPARE(ptrues(p2.VnS(), 0x1a), "ptrues p2.s, #0x1a");
5935 COMPARE(ptrues(p3.VnD(), 0x1c), "ptrues p3.d, #0x1c");
5936 COMPARE(ptrues(p4.VnB(), 0x1d), "ptrues p4.b, mul4");
5937
5938 // SVE_ALL is the default.
5939 COMPARE(ptrue(p15.VnS()), "ptrue p15.s");
5940 COMPARE(ptrues(p0.VnS()), "ptrues p0.s");
5941
5942 // The MacroAssembler provides a `FlagsUpdate` argument.
5943 COMPARE_MACRO(Ptrue(p0.VnB(), SVE_MUL3), "ptrue p0.b, mul3");
5944 COMPARE_MACRO(Ptrues(p1.VnH(), SVE_MUL4), "ptrues p1.h, mul4");
5945 COMPARE_MACRO(Ptrue(p2.VnS(), SVE_VL32, LeaveFlags), "ptrue p2.s, vl32");
5946 COMPARE_MACRO(Ptrue(p3.VnD(), SVE_VL64, SetFlags), "ptrues p3.d, vl64");
5947 }
5948
TEST(sve_pfalse)5949 TEST(sve_pfalse) {
5950 SETUP();
5951
5952 COMPARE(pfalse(p0.VnB()), "pfalse p0.b");
5953 COMPARE(pfalse(p15.VnB()), "pfalse p15.b");
5954
5955 COMPARE_MACRO(Pfalse(p1.VnB()), "pfalse p1.b");
5956 COMPARE_MACRO(Pfalse(p4.VnH()), "pfalse p4.b");
5957 COMPARE_MACRO(Pfalse(p9.VnS()), "pfalse p9.b");
5958 COMPARE_MACRO(Pfalse(p14.VnD()), "pfalse p14.b");
5959 }
5960
TEST(sve_ptest)5961 TEST(sve_ptest) {
5962 SETUP();
5963
5964 COMPARE(ptest(p15, p0.VnB()), "ptest p15, p0.b");
5965 COMPARE(ptest(p0, p15.VnB()), "ptest p0, p15.b");
5966 COMPARE(ptest(p6, p6.VnB()), "ptest p6, p6.b");
5967
5968 COMPARE_MACRO(Ptest(p0, p1.VnB()), "ptest p0, p1.b");
5969 }
5970
TEST(sve_lane_size_relaxing)5971 TEST(sve_lane_size_relaxing) {
5972 SETUP();
5973
5974 COMPARE_MACRO(Rdffr(p3), "rdffr p3.b");
5975 COMPARE_MACRO(Rdffr(p8.VnB()), "rdffr p8.b");
5976
5977 COMPARE_MACRO(Wrffr(p9), "wrffr p9.b");
5978 COMPARE_MACRO(Wrffr(p8.VnB()), "wrffr p8.b");
5979
5980 COMPARE_MACRO(And(z10, z1, z15), "and z10.d, z1.d, z15.d");
5981 COMPARE_MACRO(Bic(z11, z2, z16), "bic z11.d, z2.d, z16.d");
5982 COMPARE_MACRO(Eor(z12, z3, z17), "eor z12.d, z3.d, z17.d");
5983 COMPARE_MACRO(Orr(z13, z4, z18), "orr z13.d, z4.d, z18.d");
5984
5985 COMPARE_MACRO(And(z10.VnD(), z1.VnD(), z15.VnD()), "and z10.d, z1.d, z15.d");
5986 COMPARE_MACRO(Bic(z11.VnS(), z2.VnS(), z16.VnS()), "bic z11.d, z2.d, z16.d");
5987 COMPARE_MACRO(Eor(z12.VnH(), z3.VnH(), z17.VnH()), "eor z12.d, z3.d, z17.d");
5988 COMPARE_MACRO(Orr(z13.VnB(), z4.VnB(), z18.VnB()), "orr z13.d, z4.d, z18.d");
5989
5990 COMPARE_MACRO(Pfalse(p1), "pfalse p1.b");
5991
5992 CLEANUP();
5993 }
5994
TEST(sve_read_ffr)5995 TEST(sve_read_ffr) {
5996 SETUP();
5997
5998 COMPARE(rdffr(p13.VnB()), "rdffr p13.b");
5999 COMPARE(rdffrs(p14.VnB(), p9.Zeroing()), "rdffrs p14.b, p9/z");
6000 COMPARE(rdffr(p5.VnB(), p14.Zeroing()), "rdffr p5.b, p14/z");
6001
6002 CLEANUP();
6003 }
6004
TEST(sve_propagate_break)6005 TEST(sve_propagate_break) {
6006 SETUP();
6007
6008 COMPARE(brkpas(p12.VnB(), p0.Zeroing(), p12.VnB(), p11.VnB()),
6009 "brkpas p12.b, p0/z, p12.b, p11.b");
6010 COMPARE(brkpa(p1.VnB(), p2.Zeroing(), p13.VnB(), p8.VnB()),
6011 "brkpa p1.b, p2/z, p13.b, p8.b");
6012 COMPARE(brkpbs(p14.VnB(), p1.Zeroing(), p8.VnB(), p3.VnB()),
6013 "brkpbs p14.b, p1/z, p8.b, p3.b");
6014 COMPARE(brkpb(p2.VnB(), p5.Zeroing(), p0.VnB(), p14.VnB()),
6015 "brkpb p2.b, p5/z, p0.b, p14.b");
6016
6017 CLEANUP();
6018 }
6019
TEST(sve_stack_allocation)6020 TEST(sve_stack_allocation) {
6021 SETUP();
6022
6023 COMPARE(rdvl(x26, 0), "rdvl x26, #0");
6024 COMPARE(rdvl(x27, 31), "rdvl x27, #31");
6025 COMPARE(rdvl(x28, -32), "rdvl x28, #-32");
6026 COMPARE(rdvl(xzr, 9), "rdvl xzr, #9");
6027
6028 COMPARE(addvl(x6, x20, 0), "addvl x6, x20, #0");
6029 COMPARE(addvl(x7, x21, 31), "addvl x7, x21, #31");
6030 COMPARE(addvl(x8, x22, -32), "addvl x8, x22, #-32");
6031 COMPARE(addvl(sp, x1, 5), "addvl sp, x1, #5");
6032 COMPARE(addvl(x9, sp, -16), "addvl x9, sp, #-16");
6033
6034 COMPARE(addpl(x20, x6, 0), "addpl x20, x6, #0");
6035 COMPARE(addpl(x21, x7, 31), "addpl x21, x7, #31");
6036 COMPARE(addpl(x22, x8, -32), "addpl x22, x8, #-32");
6037 COMPARE(addpl(sp, x1, 5), "addpl sp, x1, #5");
6038 COMPARE(addpl(x9, sp, -16), "addpl x9, sp, #-16");
6039
6040 CLEANUP();
6041 }
6042
TEST(sve_rdvl_macro)6043 TEST(sve_rdvl_macro) {
6044 SETUP();
6045
6046 // Encodable cases use rdvl directly.
6047 COMPARE_MACRO(Rdvl(x0, 3), "rdvl x0, #3");
6048 COMPARE_MACRO(Rdvl(x0, 31), "rdvl x0, #31");
6049 COMPARE_MACRO(Rdvl(x0, -32), "rdvl x0, #-32");
6050
6051 // Unencodable cases fall back on `xn + (VL * multiplier)`.
6052 COMPARE_MACRO(Rdvl(x2, 0x1234),
6053 "mov x2, #0x1234\n"
6054 "rdvl x16, #1\n"
6055 "mul x2, x2, x16");
6056
6057 CLEANUP();
6058 }
6059
TEST(sve_addvl_macro)6060 TEST(sve_addvl_macro) {
6061 SETUP();
6062
6063 // Encodable cases use addvl directly.
6064 COMPARE_MACRO(Addvl(sp, sp, -3), "addvl sp, sp, #-3");
6065 COMPARE_MACRO(Addvl(x0, x1, 8), "addvl x0, x1, #8");
6066
6067 // If xn is xzr, `Addvl` behaves like `Rdvl`.
6068 COMPARE_MACRO(Addvl(x7, xzr, 8), "rdvl x7, #8");
6069
6070 // Unencodable cases fall back on `xn + (VL * multiplier)`.
6071 COMPARE_MACRO(Addvl(x7, x8, 42),
6072 "mov x7, #0x2a\n"
6073 "rdvl x16, #1\n"
6074 "madd x7, x7, x16, x8");
6075 COMPARE_MACRO(Addvl(x10, x10, 42),
6076 "mov x16, #0x2a\n"
6077 "rdvl x17, #1\n"
6078 "madd x10, x16, x17, x10");
6079 COMPARE_MACRO(Addvl(x10, sp, 42),
6080 "mov x10, #0x2a\n"
6081 "rdvl x16, #1\n"
6082 "mul x10, x10, x16\n"
6083 "add x10, sp, x10");
6084 COMPARE_MACRO(Addvl(sp, x10, 42),
6085 "mov x16, #0x2a\n"
6086 "rdvl x17, #1\n"
6087 "mul x16, x16, x17\n"
6088 "add sp, x10, x16");
6089
6090 CLEANUP();
6091 }
6092
TEST(sve_rdpl_macro)6093 TEST(sve_rdpl_macro) {
6094 SETUP();
6095
6096 // There is no `rdpl` instruction. `Rdpl` is implemented as `Addpl` (with
6097 // xzr). However, since `addpl` operates on the stack pointer, some special
6098 // cases exist.
6099
6100 // If the multiplier is a multiple of 8, `Rdpl` will pass through to `Rdvl`.
6101 COMPARE_MACRO(Rdpl(x0, 0), "rdvl x0, #0");
6102 COMPARE_MACRO(Rdpl(x0, 8), "rdvl x0, #1");
6103
6104 // If the multiplier is encodable with `addpl`, we use that with an
6105 // explicitly-zeroed register.
6106 COMPARE_MACRO(Rdpl(x1, 7),
6107 "mov x1, #0x0\n"
6108 "addpl x1, x1, #7");
6109 COMPARE_MACRO(Rdpl(x1, -31),
6110 "mov x1, #0x0\n"
6111 "addpl x1, x1, #-31");
6112
6113 // All other cases use `Rdvl`, and scale the result.
6114 COMPARE_MACRO(Rdpl(x2, 37),
6115 "mov x2, #0x25\n"
6116 "rdvl x16, #1\n"
6117 "mul x2, x2, x16\n"
6118 "asr x2, x2, #3");
6119
6120 CLEANUP();
6121 }
6122
TEST(sve_addpl_macro)6123 TEST(sve_addpl_macro) {
6124 SETUP();
6125
6126 // Encodable cases use addpl directly.
6127 COMPARE_MACRO(Addpl(x22, x22, -3), "addpl x22, x22, #-3");
6128 COMPARE_MACRO(Addpl(x10, x11, 8), "addpl x10, x11, #8");
6129 COMPARE_MACRO(Addpl(x7, sp, 31), "addpl x7, sp, #31");
6130
6131 // Otherwise, if the multiplier is a multiple of 8, `Addpl` will pass through
6132 // to `Addvl`.
6133 COMPARE_MACRO(Addpl(sp, x0, 48), "addvl sp, x0, #6");
6134 COMPARE_MACRO(Addpl(x2, sp, -48), "addvl x2, sp, #-6");
6135
6136 // If xn is xzr, `Addpl` behaves like `Rdpl`.
6137 COMPARE_MACRO(Addpl(x7, xzr, 8), "rdvl x7, #1");
6138 COMPARE_MACRO(Addpl(x29, xzr, 13),
6139 "mov x29, #0x0\n"
6140 "addpl x29, x29, #13");
6141
6142 // All other cases use `Rdvl`, and scale the result before adding it to `xn`.
6143 // Where possible, the scaling `asr` is merged with the `add`.
6144 COMPARE_MACRO(Addpl(x7, x8, 123),
6145 "mov x7, #0x7b\n"
6146 "rdvl x16, #1\n"
6147 "mul x7, x7, x16\n"
6148 "add x7, x8, x7, asr #3");
6149 COMPARE_MACRO(Addpl(x9, x9, 122),
6150 "mov x16, #0x7a\n"
6151 "rdvl x17, #1\n"
6152 "mul x16, x16, x17\n"
6153 "add x9, x9, x16, asr #3");
6154 // If the stack pointer is used, the `asr` and `add` must be separate.
6155 COMPARE_MACRO(Addpl(sp, x0, 33),
6156 "mov x16, #0x21\n"
6157 "rdvl x17, #1\n"
6158 "mul x16, x16, x17\n"
6159 "asr x16, x16, #3\n"
6160 "add sp, x0, x16");
6161
6162 CLEANUP();
6163 }
6164
TEST(sve_vector_select)6165 TEST(sve_vector_select) {
6166 SETUP();
6167
6168 COMPARE_MACRO(Sel(z13.VnB(), p3, z3.VnB(), z25.VnB()),
6169 "sel z13.b, p3, z3.b, z25.b");
6170 COMPARE_MACRO(Sel(z13.VnH(), p3, z3.VnH(), z25.VnH()),
6171 "sel z13.h, p3, z3.h, z25.h");
6172 COMPARE_MACRO(Sel(z13.VnS(), p3, z3.VnS(), z25.VnS()),
6173 "sel z13.s, p3, z3.s, z25.s");
6174 COMPARE_MACRO(Sel(z13.VnD(), p3, z3.VnD(), z25.VnD()),
6175 "sel z13.d, p3, z3.d, z25.d");
6176
6177 // Check sel() where zd == zm disassemble as predicated mov.
6178 COMPARE_MACRO(Sel(z1.VnB(), p4.Merging(), z30.VnB(), z1.VnB()),
6179 "mov z1.b, p4/m, z30.b");
6180 COMPARE_MACRO(Sel(z1.VnH(), p4.Merging(), z30.VnH(), z1.VnH()),
6181 "mov z1.h, p4/m, z30.h");
6182 COMPARE_MACRO(Sel(z1.VnS(), p4.Merging(), z30.VnS(), z1.VnS()),
6183 "mov z1.s, p4/m, z30.s");
6184 COMPARE_MACRO(Sel(z1.VnD(), p4.Merging(), z30.VnD(), z1.VnD()),
6185 "mov z1.d, p4/m, z30.d");
6186
6187 // Check predicated mov() directly.
6188 COMPARE_MACRO(Mov(z2.VnB(), p10.Merging(), z22.VnB()),
6189 "mov z2.b, p10/m, z22.b");
6190 COMPARE_MACRO(Mov(z2.VnH(), p10.Merging(), z22.VnH()),
6191 "mov z2.h, p10/m, z22.h");
6192 COMPARE_MACRO(Mov(z2.VnS(), p10.Merging(), z22.VnS()),
6193 "mov z2.s, p10/m, z22.s");
6194 COMPARE_MACRO(Mov(z2.VnD(), p10.Merging(), z22.VnD()),
6195 "mov z2.d, p10/m, z22.d");
6196
6197 CLEANUP();
6198 }
6199
TEST(sve_write_ffr)6200 TEST(sve_write_ffr) {
6201 SETUP();
6202
6203 COMPARE_PREFIX(setffr(), "setffr");
6204 COMPARE(wrffr(p9.VnB()), "wrffr p9.b");
6205
6206 CLEANUP();
6207 }
6208
TEST(sve2_match_nmatch)6209 TEST(sve2_match_nmatch) {
6210 SETUP();
6211
6212 COMPARE(match(p15.VnB(), p1.Zeroing(), z18.VnB(), z5.VnB()),
6213 "match p15.b, p1/z, z18.b, z5.b");
6214 COMPARE(match(p15.VnH(), p1.Zeroing(), z18.VnH(), z5.VnH()),
6215 "match p15.h, p1/z, z18.h, z5.h");
6216 COMPARE(nmatch(p1.VnB(), p1.Zeroing(), z20.VnB(), z17.VnB()),
6217 "nmatch p1.b, p1/z, z20.b, z17.b");
6218 COMPARE(nmatch(p1.VnH(), p1.Zeroing(), z20.VnH(), z17.VnH()),
6219 "nmatch p1.h, p1/z, z20.h, z17.h");
6220
6221 CLEANUP();
6222 }
6223
TEST(sve2_saba_uaba)6224 TEST(sve2_saba_uaba) {
6225 SETUP();
6226
6227 COMPARE(saba(z13.VnB(), z2.VnB(), z31.VnB()), "saba z13.b, z2.b, z31.b");
6228 COMPARE(saba(z13.VnD(), z2.VnD(), z31.VnD()), "saba z13.d, z2.d, z31.d");
6229 COMPARE(saba(z13.VnH(), z2.VnH(), z31.VnH()), "saba z13.h, z2.h, z31.h");
6230 COMPARE(saba(z13.VnS(), z2.VnS(), z31.VnS()), "saba z13.s, z2.s, z31.s");
6231 COMPARE(uaba(z23.VnB(), z22.VnB(), z20.VnB()), "uaba z23.b, z22.b, z20.b");
6232 COMPARE(uaba(z23.VnD(), z22.VnD(), z20.VnD()), "uaba z23.d, z22.d, z20.d");
6233 COMPARE(uaba(z23.VnH(), z22.VnH(), z20.VnH()), "uaba z23.h, z22.h, z20.h");
6234 COMPARE(uaba(z23.VnS(), z22.VnS(), z20.VnS()), "uaba z23.s, z22.s, z20.s");
6235
6236 COMPARE_MACRO(Saba(z12.VnB(), z12.VnB(), z3.VnB(), z30.VnB()),
6237 "saba z12.b, z3.b, z30.b");
6238 COMPARE_MACRO(Saba(z12.VnB(), z12.VnB(), z3.VnB(), z12.VnB()),
6239 "saba z12.b, z3.b, z12.b");
6240 COMPARE_MACRO(Saba(z12.VnB(), z12.VnB(), z12.VnB(), z30.VnB()),
6241 "saba z12.b, z12.b, z30.b");
6242 COMPARE_MACRO(Saba(z12.VnB(), z12.VnB(), z12.VnB(), z12.VnB()), "");
6243 COMPARE_MACRO(Saba(z12.VnB(), z13.VnB(), z3.VnB(), z30.VnB()),
6244 "movprfx z12, z13\n"
6245 "saba z12.b, z3.b, z30.b");
6246 COMPARE_MACRO(Saba(z12.VnB(), z3.VnB(), z3.VnB(), z30.VnB()),
6247 "movprfx z12, z3\n"
6248 "saba z12.b, z3.b, z30.b");
6249 COMPARE_MACRO(Saba(z12.VnB(), z30.VnB(), z3.VnB(), z30.VnB()),
6250 "movprfx z12, z30\n"
6251 "saba z12.b, z3.b, z30.b");
6252 COMPARE_MACRO(Saba(z12.VnB(), z3.VnB(), z3.VnB(), z3.VnB()),
6253 "mov z12.d, z3.d");
6254 COMPARE_MACRO(Saba(z12.VnB(), z3.VnB(), z12.VnB(), z3.VnB()),
6255 "mov z31.d, z12.d\n"
6256 "movprfx z12, z3\n"
6257 "saba z12.b, z31.b, z3.b");
6258 COMPARE_MACRO(Saba(z12.VnB(), z3.VnB(), z3.VnB(), z12.VnB()),
6259 "mov z31.d, z12.d\n"
6260 "movprfx z12, z3\n"
6261 "saba z12.b, z3.b, z31.b");
6262
6263 COMPARE_MACRO(Uaba(z12.VnB(), z12.VnB(), z3.VnB(), z30.VnB()),
6264 "uaba z12.b, z3.b, z30.b");
6265 COMPARE_MACRO(Uaba(z12.VnB(), z12.VnB(), z3.VnB(), z12.VnB()),
6266 "uaba z12.b, z3.b, z12.b");
6267 COMPARE_MACRO(Uaba(z12.VnB(), z12.VnB(), z12.VnB(), z30.VnB()),
6268 "uaba z12.b, z12.b, z30.b");
6269 COMPARE_MACRO(Uaba(z12.VnB(), z12.VnB(), z12.VnB(), z12.VnB()), "");
6270 COMPARE_MACRO(Uaba(z12.VnB(), z13.VnB(), z3.VnB(), z30.VnB()),
6271 "movprfx z12, z13\n"
6272 "uaba z12.b, z3.b, z30.b");
6273 COMPARE_MACRO(Uaba(z12.VnB(), z3.VnB(), z3.VnB(), z30.VnB()),
6274 "movprfx z12, z3\n"
6275 "uaba z12.b, z3.b, z30.b");
6276 COMPARE_MACRO(Uaba(z12.VnB(), z30.VnB(), z3.VnB(), z30.VnB()),
6277 "movprfx z12, z30\n"
6278 "uaba z12.b, z3.b, z30.b");
6279 COMPARE_MACRO(Uaba(z12.VnB(), z3.VnB(), z3.VnB(), z3.VnB()),
6280 "mov z12.d, z3.d");
6281 COMPARE_MACRO(Uaba(z12.VnB(), z3.VnB(), z12.VnB(), z3.VnB()),
6282 "mov z31.d, z12.d\n"
6283 "movprfx z12, z3\n"
6284 "uaba z12.b, z31.b, z3.b");
6285 COMPARE_MACRO(Uaba(z12.VnB(), z3.VnB(), z3.VnB(), z12.VnB()),
6286 "mov z31.d, z12.d\n"
6287 "movprfx z12, z3\n"
6288 "uaba z12.b, z3.b, z31.b");
6289
6290 CLEANUP();
6291 }
6292
TEST(sve2_halving_arithmetic)6293 TEST(sve2_halving_arithmetic) {
6294 SETUP();
6295
6296 COMPARE(shadd(z20.VnB(), p3.Merging(), z20.VnB(), z7.VnB()),
6297 "shadd z20.b, p3/m, z20.b, z7.b");
6298 COMPARE(shadd(z20.VnD(), p3.Merging(), z20.VnD(), z7.VnD()),
6299 "shadd z20.d, p3/m, z20.d, z7.d");
6300 COMPARE(shadd(z20.VnH(), p3.Merging(), z20.VnH(), z7.VnH()),
6301 "shadd z20.h, p3/m, z20.h, z7.h");
6302 COMPARE(shadd(z20.VnS(), p3.Merging(), z20.VnS(), z7.VnS()),
6303 "shadd z20.s, p3/m, z20.s, z7.s");
6304 COMPARE(shsub(z21.VnB(), p0.Merging(), z21.VnB(), z0.VnB()),
6305 "shsub z21.b, p0/m, z21.b, z0.b");
6306 COMPARE(shsub(z21.VnD(), p0.Merging(), z21.VnD(), z0.VnD()),
6307 "shsub z21.d, p0/m, z21.d, z0.d");
6308 COMPARE(shsub(z21.VnH(), p0.Merging(), z21.VnH(), z0.VnH()),
6309 "shsub z21.h, p0/m, z21.h, z0.h");
6310 COMPARE(shsub(z21.VnS(), p0.Merging(), z21.VnS(), z0.VnS()),
6311 "shsub z21.s, p0/m, z21.s, z0.s");
6312 COMPARE(shsubr(z1.VnB(), p0.Merging(), z1.VnB(), z2.VnB()),
6313 "shsubr z1.b, p0/m, z1.b, z2.b");
6314 COMPARE(shsubr(z1.VnD(), p0.Merging(), z1.VnD(), z2.VnD()),
6315 "shsubr z1.d, p0/m, z1.d, z2.d");
6316 COMPARE(shsubr(z1.VnH(), p0.Merging(), z1.VnH(), z2.VnH()),
6317 "shsubr z1.h, p0/m, z1.h, z2.h");
6318 COMPARE(shsubr(z1.VnS(), p0.Merging(), z1.VnS(), z2.VnS()),
6319 "shsubr z1.s, p0/m, z1.s, z2.s");
6320 COMPARE(srhadd(z23.VnB(), p4.Merging(), z23.VnB(), z14.VnB()),
6321 "srhadd z23.b, p4/m, z23.b, z14.b");
6322 COMPARE(srhadd(z23.VnD(), p4.Merging(), z23.VnD(), z14.VnD()),
6323 "srhadd z23.d, p4/m, z23.d, z14.d");
6324 COMPARE(srhadd(z23.VnH(), p4.Merging(), z23.VnH(), z14.VnH()),
6325 "srhadd z23.h, p4/m, z23.h, z14.h");
6326 COMPARE(srhadd(z23.VnS(), p4.Merging(), z23.VnS(), z14.VnS()),
6327 "srhadd z23.s, p4/m, z23.s, z14.s");
6328
6329 COMPARE(uhadd(z21.VnB(), p2.Merging(), z21.VnB(), z19.VnB()),
6330 "uhadd z21.b, p2/m, z21.b, z19.b");
6331 COMPARE(uhadd(z21.VnD(), p2.Merging(), z21.VnD(), z19.VnD()),
6332 "uhadd z21.d, p2/m, z21.d, z19.d");
6333 COMPARE(uhadd(z21.VnH(), p2.Merging(), z21.VnH(), z19.VnH()),
6334 "uhadd z21.h, p2/m, z21.h, z19.h");
6335 COMPARE(uhadd(z21.VnS(), p2.Merging(), z21.VnS(), z19.VnS()),
6336 "uhadd z21.s, p2/m, z21.s, z19.s");
6337 COMPARE(uhsub(z1.VnB(), p4.Merging(), z1.VnB(), z9.VnB()),
6338 "uhsub z1.b, p4/m, z1.b, z9.b");
6339 COMPARE(uhsub(z1.VnD(), p4.Merging(), z1.VnD(), z9.VnD()),
6340 "uhsub z1.d, p4/m, z1.d, z9.d");
6341 COMPARE(uhsub(z1.VnH(), p4.Merging(), z1.VnH(), z9.VnH()),
6342 "uhsub z1.h, p4/m, z1.h, z9.h");
6343 COMPARE(uhsub(z1.VnS(), p4.Merging(), z1.VnS(), z9.VnS()),
6344 "uhsub z1.s, p4/m, z1.s, z9.s");
6345 COMPARE(uhsubr(z18.VnB(), p0.Merging(), z18.VnB(), z1.VnB()),
6346 "uhsubr z18.b, p0/m, z18.b, z1.b");
6347 COMPARE(uhsubr(z18.VnD(), p0.Merging(), z18.VnD(), z1.VnD()),
6348 "uhsubr z18.d, p0/m, z18.d, z1.d");
6349 COMPARE(uhsubr(z18.VnH(), p0.Merging(), z18.VnH(), z1.VnH()),
6350 "uhsubr z18.h, p0/m, z18.h, z1.h");
6351 COMPARE(uhsubr(z18.VnS(), p0.Merging(), z18.VnS(), z1.VnS()),
6352 "uhsubr z18.s, p0/m, z18.s, z1.s");
6353 COMPARE(urhadd(z29.VnB(), p4.Merging(), z29.VnB(), z10.VnB()),
6354 "urhadd z29.b, p4/m, z29.b, z10.b");
6355 COMPARE(urhadd(z29.VnD(), p4.Merging(), z29.VnD(), z10.VnD()),
6356 "urhadd z29.d, p4/m, z29.d, z10.d");
6357 COMPARE(urhadd(z29.VnH(), p4.Merging(), z29.VnH(), z10.VnH()),
6358 "urhadd z29.h, p4/m, z29.h, z10.h");
6359 COMPARE(urhadd(z29.VnS(), p4.Merging(), z29.VnS(), z10.VnS()),
6360 "urhadd z29.s, p4/m, z29.s, z10.s");
6361
6362 COMPARE_MACRO(Shadd(z0.VnB(), p0.Merging(), z1.VnB(), z2.VnB()),
6363 "movprfx z0.b, p0/m, z1.b\n"
6364 "shadd z0.b, p0/m, z0.b, z2.b");
6365 COMPARE_MACRO(Shadd(z0.VnB(), p0.Merging(), z1.VnB(), z0.VnB()),
6366 "shadd z0.b, p0/m, z0.b, z1.b");
6367 COMPARE_MACRO(Srhadd(z0.VnB(), p0.Merging(), z1.VnB(), z2.VnB()),
6368 "movprfx z0.b, p0/m, z1.b\n"
6369 "srhadd z0.b, p0/m, z0.b, z2.b");
6370 COMPARE_MACRO(Srhadd(z0.VnB(), p0.Merging(), z1.VnB(), z0.VnB()),
6371 "srhadd z0.b, p0/m, z0.b, z1.b");
6372 COMPARE_MACRO(Uhadd(z0.VnB(), p0.Merging(), z1.VnB(), z2.VnB()),
6373 "movprfx z0.b, p0/m, z1.b\n"
6374 "uhadd z0.b, p0/m, z0.b, z2.b");
6375 COMPARE_MACRO(Uhadd(z0.VnB(), p0.Merging(), z1.VnB(), z0.VnB()),
6376 "uhadd z0.b, p0/m, z0.b, z1.b");
6377 COMPARE_MACRO(Urhadd(z0.VnB(), p0.Merging(), z1.VnB(), z2.VnB()),
6378 "movprfx z0.b, p0/m, z1.b\n"
6379 "urhadd z0.b, p0/m, z0.b, z2.b");
6380 COMPARE_MACRO(Urhadd(z0.VnB(), p0.Merging(), z1.VnB(), z0.VnB()),
6381 "urhadd z0.b, p0/m, z0.b, z1.b");
6382
6383 COMPARE_MACRO(Shsub(z0.VnB(), p0.Merging(), z0.VnB(), z1.VnB()),
6384 "shsub z0.b, p0/m, z0.b, z1.b");
6385 COMPARE_MACRO(Shsub(z0.VnB(), p0.Merging(), z1.VnB(), z0.VnB()),
6386 "shsubr z0.b, p0/m, z0.b, z1.b");
6387 COMPARE_MACRO(Uhsub(z0.VnB(), p0.Merging(), z0.VnB(), z1.VnB()),
6388 "uhsub z0.b, p0/m, z0.b, z1.b");
6389 COMPARE_MACRO(Uhsub(z0.VnB(), p0.Merging(), z1.VnB(), z0.VnB()),
6390 "uhsubr z0.b, p0/m, z0.b, z1.b");
6391
6392 CLEANUP();
6393 }
6394
TEST(sve2_sra)6395 TEST(sve2_sra) {
6396 SETUP();
6397
6398 COMPARE(ssra(z0.VnB(), z8.VnB(), 1), "ssra z0.b, z8.b, #1");
6399 COMPARE(ssra(z0.VnB(), z8.VnB(), 2), "ssra z0.b, z8.b, #2");
6400 COMPARE(ssra(z0.VnB(), z8.VnB(), 5), "ssra z0.b, z8.b, #5");
6401 COMPARE(ssra(z0.VnB(), z8.VnB(), 8), "ssra z0.b, z8.b, #8");
6402 COMPARE(ssra(z0.VnH(), z8.VnH(), 1), "ssra z0.h, z8.h, #1");
6403 COMPARE(ssra(z0.VnH(), z8.VnH(), 16), "ssra z0.h, z8.h, #16");
6404 COMPARE(ssra(z0.VnS(), z8.VnS(), 1), "ssra z0.s, z8.s, #1");
6405 COMPARE(ssra(z0.VnS(), z8.VnS(), 31), "ssra z0.s, z8.s, #31");
6406 COMPARE(ssra(z0.VnD(), z8.VnD(), 1), "ssra z0.d, z8.d, #1");
6407 COMPARE(ssra(z0.VnD(), z8.VnD(), 64), "ssra z0.d, z8.d, #64");
6408
6409 COMPARE(srsra(z0.VnB(), z8.VnB(), 1), "srsra z0.b, z8.b, #1");
6410 COMPARE(srsra(z0.VnB(), z8.VnB(), 2), "srsra z0.b, z8.b, #2");
6411 COMPARE(srsra(z0.VnB(), z8.VnB(), 5), "srsra z0.b, z8.b, #5");
6412 COMPARE(srsra(z0.VnB(), z8.VnB(), 8), "srsra z0.b, z8.b, #8");
6413 COMPARE(srsra(z0.VnH(), z8.VnH(), 1), "srsra z0.h, z8.h, #1");
6414 COMPARE(srsra(z0.VnH(), z8.VnH(), 16), "srsra z0.h, z8.h, #16");
6415 COMPARE(srsra(z0.VnS(), z8.VnS(), 1), "srsra z0.s, z8.s, #1");
6416 COMPARE(srsra(z0.VnS(), z8.VnS(), 31), "srsra z0.s, z8.s, #31");
6417 COMPARE(srsra(z0.VnD(), z8.VnD(), 1), "srsra z0.d, z8.d, #1");
6418 COMPARE(srsra(z0.VnD(), z8.VnD(), 64), "srsra z0.d, z8.d, #64");
6419
6420 COMPARE(usra(z0.VnB(), z8.VnB(), 1), "usra z0.b, z8.b, #1");
6421 COMPARE(usra(z0.VnB(), z8.VnB(), 2), "usra z0.b, z8.b, #2");
6422 COMPARE(usra(z0.VnB(), z8.VnB(), 5), "usra z0.b, z8.b, #5");
6423 COMPARE(usra(z0.VnB(), z8.VnB(), 8), "usra z0.b, z8.b, #8");
6424 COMPARE(usra(z0.VnH(), z8.VnH(), 1), "usra z0.h, z8.h, #1");
6425 COMPARE(usra(z0.VnH(), z8.VnH(), 16), "usra z0.h, z8.h, #16");
6426 COMPARE(usra(z0.VnS(), z8.VnS(), 1), "usra z0.s, z8.s, #1");
6427 COMPARE(usra(z0.VnS(), z8.VnS(), 31), "usra z0.s, z8.s, #31");
6428 COMPARE(usra(z0.VnD(), z8.VnD(), 1), "usra z0.d, z8.d, #1");
6429 COMPARE(usra(z0.VnD(), z8.VnD(), 64), "usra z0.d, z8.d, #64");
6430
6431 COMPARE(ursra(z0.VnB(), z8.VnB(), 1), "ursra z0.b, z8.b, #1");
6432 COMPARE(ursra(z0.VnB(), z8.VnB(), 2), "ursra z0.b, z8.b, #2");
6433 COMPARE(ursra(z0.VnB(), z8.VnB(), 5), "ursra z0.b, z8.b, #5");
6434 COMPARE(ursra(z0.VnB(), z8.VnB(), 8), "ursra z0.b, z8.b, #8");
6435 COMPARE(ursra(z0.VnH(), z8.VnH(), 1), "ursra z0.h, z8.h, #1");
6436 COMPARE(ursra(z0.VnH(), z8.VnH(), 16), "ursra z0.h, z8.h, #16");
6437 COMPARE(ursra(z0.VnS(), z8.VnS(), 1), "ursra z0.s, z8.s, #1");
6438 COMPARE(ursra(z0.VnS(), z8.VnS(), 31), "ursra z0.s, z8.s, #31");
6439 COMPARE(ursra(z0.VnD(), z8.VnD(), 1), "ursra z0.d, z8.d, #1");
6440 COMPARE(ursra(z0.VnD(), z8.VnD(), 64), "ursra z0.d, z8.d, #64");
6441
6442 COMPARE_MACRO(Ssra(z0.VnB(), z1.VnB(), z2.VnB(), 2),
6443 "movprfx z0, z1\n"
6444 "ssra z0.b, z2.b, #2");
6445 COMPARE_MACRO(Ssra(z0.VnB(), z0.VnB(), z2.VnB(), 2), "ssra z0.b, z2.b, #2");
6446 COMPARE_MACRO(Ssra(z0.VnB(), z1.VnB(), z1.VnB(), 2),
6447 "movprfx z0, z1\n"
6448 "ssra z0.b, z1.b, #2");
6449 COMPARE_MACRO(Ssra(z2.VnB(), z1.VnB(), z2.VnB(), 2),
6450 "mov z31.d, z2.d\n"
6451 "movprfx z2, z1\n"
6452 "ssra z2.b, z31.b, #2");
6453 COMPARE_MACRO(Ssra(z0.VnB(), z0.VnB(), z0.VnB(), 2), "ssra z0.b, z0.b, #2");
6454
6455 COMPARE_MACRO(Srsra(z2.VnB(), z1.VnB(), z2.VnB(), 2),
6456 "mov z31.d, z2.d\n"
6457 "movprfx z2, z1\n"
6458 "srsra z2.b, z31.b, #2");
6459 COMPARE_MACRO(Usra(z2.VnB(), z1.VnB(), z2.VnB(), 2),
6460 "mov z31.d, z2.d\n"
6461 "movprfx z2, z1\n"
6462 "usra z2.b, z31.b, #2");
6463 COMPARE_MACRO(Ursra(z2.VnB(), z1.VnB(), z2.VnB(), 2),
6464 "mov z31.d, z2.d\n"
6465 "movprfx z2, z1\n"
6466 "ursra z2.b, z31.b, #2");
6467 CLEANUP();
6468 }
6469
TEST(sve2_sri_sli)6470 TEST(sve2_sri_sli) {
6471 SETUP();
6472
6473 COMPARE(sri(z6.VnB(), z9.VnB(), 1), "sri z6.b, z9.b, #1");
6474 COMPARE(sri(z6.VnB(), z9.VnB(), 2), "sri z6.b, z9.b, #2");
6475 COMPARE(sri(z6.VnB(), z9.VnB(), 5), "sri z6.b, z9.b, #5");
6476 COMPARE(sri(z6.VnB(), z9.VnB(), 8), "sri z6.b, z9.b, #8");
6477 COMPARE(sri(z6.VnH(), z9.VnH(), 1), "sri z6.h, z9.h, #1");
6478 COMPARE(sri(z6.VnH(), z9.VnH(), 16), "sri z6.h, z9.h, #16");
6479 COMPARE(sri(z6.VnS(), z9.VnS(), 1), "sri z6.s, z9.s, #1");
6480 COMPARE(sri(z6.VnS(), z9.VnS(), 31), "sri z6.s, z9.s, #31");
6481 COMPARE(sri(z6.VnD(), z9.VnD(), 1), "sri z6.d, z9.d, #1");
6482 COMPARE(sri(z6.VnD(), z9.VnD(), 64), "sri z6.d, z9.d, #64");
6483
6484 COMPARE(sli(z29.VnB(), z7.VnB(), 0), "sli z29.b, z7.b, #0");
6485 COMPARE(sli(z29.VnB(), z7.VnB(), 2), "sli z29.b, z7.b, #2");
6486 COMPARE(sli(z29.VnB(), z7.VnB(), 5), "sli z29.b, z7.b, #5");
6487 COMPARE(sli(z29.VnB(), z7.VnB(), 7), "sli z29.b, z7.b, #7");
6488 COMPARE(sli(z29.VnH(), z7.VnH(), 0), "sli z29.h, z7.h, #0");
6489 COMPARE(sli(z29.VnH(), z7.VnH(), 15), "sli z29.h, z7.h, #15");
6490 COMPARE(sli(z29.VnS(), z7.VnS(), 0), "sli z29.s, z7.s, #0");
6491 COMPARE(sli(z29.VnS(), z7.VnS(), 31), "sli z29.s, z7.s, #31");
6492 COMPARE(sli(z29.VnD(), z7.VnD(), 0), "sli z29.d, z7.d, #0");
6493 COMPARE(sli(z29.VnD(), z7.VnD(), 63), "sli z29.d, z7.d, #63");
6494
6495 CLEANUP();
6496 }
6497
TEST(sve2_shift_imm)6498 TEST(sve2_shift_imm) {
6499 SETUP();
6500
6501 COMPARE(sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 0),
6502 "sqshl z0.b, p5/m, z0.b, #0");
6503 COMPARE(sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 2),
6504 "sqshl z0.b, p5/m, z0.b, #2");
6505 COMPARE(sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 5),
6506 "sqshl z0.b, p5/m, z0.b, #5");
6507 COMPARE(sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 7),
6508 "sqshl z0.b, p5/m, z0.b, #7");
6509 COMPARE(sqshl(z0.VnH(), p5.Merging(), z0.VnH(), 0),
6510 "sqshl z0.h, p5/m, z0.h, #0");
6511 COMPARE(sqshl(z0.VnH(), p5.Merging(), z0.VnH(), 15),
6512 "sqshl z0.h, p5/m, z0.h, #15");
6513 COMPARE(sqshl(z0.VnS(), p5.Merging(), z0.VnS(), 0),
6514 "sqshl z0.s, p5/m, z0.s, #0");
6515 COMPARE(sqshl(z0.VnS(), p5.Merging(), z0.VnS(), 31),
6516 "sqshl z0.s, p5/m, z0.s, #31");
6517 COMPARE(sqshl(z0.VnD(), p5.Merging(), z0.VnD(), 0),
6518 "sqshl z0.d, p5/m, z0.d, #0");
6519 COMPARE(sqshl(z0.VnD(), p5.Merging(), z0.VnD(), 63),
6520 "sqshl z0.d, p5/m, z0.d, #63");
6521
6522 COMPARE(sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 0),
6523 "sqshlu z10.b, p1/m, z10.b, #0");
6524 COMPARE(sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 2),
6525 "sqshlu z10.b, p1/m, z10.b, #2");
6526 COMPARE(sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 5),
6527 "sqshlu z10.b, p1/m, z10.b, #5");
6528 COMPARE(sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 7),
6529 "sqshlu z10.b, p1/m, z10.b, #7");
6530 COMPARE(sqshlu(z10.VnH(), p1.Merging(), z10.VnH(), 0),
6531 "sqshlu z10.h, p1/m, z10.h, #0");
6532 COMPARE(sqshlu(z10.VnH(), p1.Merging(), z10.VnH(), 15),
6533 "sqshlu z10.h, p1/m, z10.h, #15");
6534 COMPARE(sqshlu(z10.VnS(), p1.Merging(), z10.VnS(), 0),
6535 "sqshlu z10.s, p1/m, z10.s, #0");
6536 COMPARE(sqshlu(z10.VnS(), p1.Merging(), z10.VnS(), 31),
6537 "sqshlu z10.s, p1/m, z10.s, #31");
6538 COMPARE(sqshlu(z10.VnD(), p1.Merging(), z10.VnD(), 0),
6539 "sqshlu z10.d, p1/m, z10.d, #0");
6540 COMPARE(sqshlu(z10.VnD(), p1.Merging(), z10.VnD(), 63),
6541 "sqshlu z10.d, p1/m, z10.d, #63");
6542
6543 COMPARE(srshr(z12.VnB(), p0.Merging(), z12.VnB(), 1),
6544 "srshr z12.b, p0/m, z12.b, #1");
6545 COMPARE(srshr(z12.VnB(), p0.Merging(), z12.VnB(), 2),
6546 "srshr z12.b, p0/m, z12.b, #2");
6547 COMPARE(srshr(z12.VnB(), p0.Merging(), z12.VnB(), 5),
6548 "srshr z12.b, p0/m, z12.b, #5");
6549 COMPARE(srshr(z12.VnB(), p0.Merging(), z12.VnB(), 8),
6550 "srshr z12.b, p0/m, z12.b, #8");
6551 COMPARE(srshr(z12.VnH(), p0.Merging(), z12.VnH(), 1),
6552 "srshr z12.h, p0/m, z12.h, #1");
6553 COMPARE(srshr(z12.VnH(), p0.Merging(), z12.VnH(), 16),
6554 "srshr z12.h, p0/m, z12.h, #16");
6555 COMPARE(srshr(z12.VnS(), p0.Merging(), z12.VnS(), 1),
6556 "srshr z12.s, p0/m, z12.s, #1");
6557 COMPARE(srshr(z12.VnS(), p0.Merging(), z12.VnS(), 32),
6558 "srshr z12.s, p0/m, z12.s, #32");
6559 COMPARE(srshr(z12.VnD(), p0.Merging(), z12.VnD(), 1),
6560 "srshr z12.d, p0/m, z12.d, #1");
6561 COMPARE(srshr(z12.VnD(), p0.Merging(), z12.VnD(), 64),
6562 "srshr z12.d, p0/m, z12.d, #64");
6563
6564 COMPARE(uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 0),
6565 "uqshl z29.b, p7/m, z29.b, #0");
6566 COMPARE(uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 2),
6567 "uqshl z29.b, p7/m, z29.b, #2");
6568 COMPARE(uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 5),
6569 "uqshl z29.b, p7/m, z29.b, #5");
6570 COMPARE(uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 7),
6571 "uqshl z29.b, p7/m, z29.b, #7");
6572 COMPARE(uqshl(z29.VnH(), p7.Merging(), z29.VnH(), 0),
6573 "uqshl z29.h, p7/m, z29.h, #0");
6574 COMPARE(uqshl(z29.VnH(), p7.Merging(), z29.VnH(), 15),
6575 "uqshl z29.h, p7/m, z29.h, #15");
6576 COMPARE(uqshl(z29.VnS(), p7.Merging(), z29.VnS(), 0),
6577 "uqshl z29.s, p7/m, z29.s, #0");
6578 COMPARE(uqshl(z29.VnS(), p7.Merging(), z29.VnS(), 31),
6579 "uqshl z29.s, p7/m, z29.s, #31");
6580 COMPARE(uqshl(z29.VnD(), p7.Merging(), z29.VnD(), 0),
6581 "uqshl z29.d, p7/m, z29.d, #0");
6582 COMPARE(uqshl(z29.VnD(), p7.Merging(), z29.VnD(), 63),
6583 "uqshl z29.d, p7/m, z29.d, #63");
6584
6585 COMPARE(urshr(z31.VnB(), p2.Merging(), z31.VnB(), 1),
6586 "urshr z31.b, p2/m, z31.b, #1");
6587 COMPARE(urshr(z31.VnB(), p2.Merging(), z31.VnB(), 2),
6588 "urshr z31.b, p2/m, z31.b, #2");
6589 COMPARE(urshr(z31.VnB(), p2.Merging(), z31.VnB(), 5),
6590 "urshr z31.b, p2/m, z31.b, #5");
6591 COMPARE(urshr(z31.VnB(), p2.Merging(), z31.VnB(), 8),
6592 "urshr z31.b, p2/m, z31.b, #8");
6593 COMPARE(urshr(z31.VnH(), p2.Merging(), z31.VnH(), 1),
6594 "urshr z31.h, p2/m, z31.h, #1");
6595 COMPARE(urshr(z31.VnH(), p2.Merging(), z31.VnH(), 16),
6596 "urshr z31.h, p2/m, z31.h, #16");
6597 COMPARE(urshr(z31.VnS(), p2.Merging(), z31.VnS(), 1),
6598 "urshr z31.s, p2/m, z31.s, #1");
6599 COMPARE(urshr(z31.VnS(), p2.Merging(), z31.VnS(), 32),
6600 "urshr z31.s, p2/m, z31.s, #32");
6601 COMPARE(urshr(z31.VnD(), p2.Merging(), z31.VnD(), 1),
6602 "urshr z31.d, p2/m, z31.d, #1");
6603 COMPARE(urshr(z31.VnD(), p2.Merging(), z31.VnD(), 64),
6604 "urshr z31.d, p2/m, z31.d, #64");
6605
6606 CLEANUP();
6607 }
6608
TEST(sve2_shift_sat)6609 TEST(sve2_shift_sat) {
6610 SETUP();
6611
6612 COMPARE(srshl(z31.VnB(), p7.Merging(), z31.VnB(), z3.VnB()),
6613 "srshl z31.b, p7/m, z31.b, z3.b");
6614 COMPARE(srshl(z31.VnD(), p7.Merging(), z31.VnD(), z3.VnD()),
6615 "srshl z31.d, p7/m, z31.d, z3.d");
6616 COMPARE(srshl(z31.VnH(), p7.Merging(), z31.VnH(), z3.VnH()),
6617 "srshl z31.h, p7/m, z31.h, z3.h");
6618 COMPARE(srshl(z31.VnS(), p7.Merging(), z31.VnS(), z3.VnS()),
6619 "srshl z31.s, p7/m, z31.s, z3.s");
6620
6621 COMPARE(srshlr(z16.VnB(), p7.Merging(), z16.VnB(), z29.VnB()),
6622 "srshlr z16.b, p7/m, z16.b, z29.b");
6623 COMPARE(srshlr(z16.VnD(), p7.Merging(), z16.VnD(), z29.VnD()),
6624 "srshlr z16.d, p7/m, z16.d, z29.d");
6625 COMPARE(srshlr(z16.VnH(), p7.Merging(), z16.VnH(), z29.VnH()),
6626 "srshlr z16.h, p7/m, z16.h, z29.h");
6627 COMPARE(srshlr(z16.VnS(), p7.Merging(), z16.VnS(), z29.VnS()),
6628 "srshlr z16.s, p7/m, z16.s, z29.s");
6629
6630 COMPARE(urshl(z15.VnB(), p2.Merging(), z15.VnB(), z3.VnB()),
6631 "urshl z15.b, p2/m, z15.b, z3.b");
6632 COMPARE(urshl(z15.VnD(), p2.Merging(), z15.VnD(), z3.VnD()),
6633 "urshl z15.d, p2/m, z15.d, z3.d");
6634 COMPARE(urshl(z15.VnH(), p2.Merging(), z15.VnH(), z3.VnH()),
6635 "urshl z15.h, p2/m, z15.h, z3.h");
6636 COMPARE(urshl(z15.VnS(), p2.Merging(), z15.VnS(), z3.VnS()),
6637 "urshl z15.s, p2/m, z15.s, z3.s");
6638
6639 COMPARE(urshlr(z27.VnB(), p1.Merging(), z27.VnB(), z30.VnB()),
6640 "urshlr z27.b, p1/m, z27.b, z30.b");
6641 COMPARE(urshlr(z27.VnD(), p1.Merging(), z27.VnD(), z30.VnD()),
6642 "urshlr z27.d, p1/m, z27.d, z30.d");
6643 COMPARE(urshlr(z27.VnH(), p1.Merging(), z27.VnH(), z30.VnH()),
6644 "urshlr z27.h, p1/m, z27.h, z30.h");
6645 COMPARE(urshlr(z27.VnS(), p1.Merging(), z27.VnS(), z30.VnS()),
6646 "urshlr z27.s, p1/m, z27.s, z30.s");
6647
6648 COMPARE(sqshl(z22.VnB(), p4.Merging(), z22.VnB(), z21.VnB()),
6649 "sqshl z22.b, p4/m, z22.b, z21.b");
6650 COMPARE(sqshl(z22.VnD(), p4.Merging(), z22.VnD(), z21.VnD()),
6651 "sqshl z22.d, p4/m, z22.d, z21.d");
6652 COMPARE(sqshl(z22.VnH(), p4.Merging(), z22.VnH(), z21.VnH()),
6653 "sqshl z22.h, p4/m, z22.h, z21.h");
6654 COMPARE(sqshl(z22.VnS(), p4.Merging(), z22.VnS(), z21.VnS()),
6655 "sqshl z22.s, p4/m, z22.s, z21.s");
6656
6657 COMPARE(sqshlr(z7.VnB(), p3.Merging(), z7.VnB(), z5.VnB()),
6658 "sqshlr z7.b, p3/m, z7.b, z5.b");
6659 COMPARE(sqshlr(z7.VnD(), p3.Merging(), z7.VnD(), z5.VnD()),
6660 "sqshlr z7.d, p3/m, z7.d, z5.d");
6661 COMPARE(sqshlr(z7.VnH(), p3.Merging(), z7.VnH(), z5.VnH()),
6662 "sqshlr z7.h, p3/m, z7.h, z5.h");
6663 COMPARE(sqshlr(z7.VnS(), p3.Merging(), z7.VnS(), z5.VnS()),
6664 "sqshlr z7.s, p3/m, z7.s, z5.s");
6665
6666 COMPARE(uqshl(z10.VnB(), p0.Merging(), z10.VnB(), z21.VnB()),
6667 "uqshl z10.b, p0/m, z10.b, z21.b");
6668 COMPARE(uqshl(z10.VnD(), p0.Merging(), z10.VnD(), z21.VnD()),
6669 "uqshl z10.d, p0/m, z10.d, z21.d");
6670 COMPARE(uqshl(z10.VnH(), p0.Merging(), z10.VnH(), z21.VnH()),
6671 "uqshl z10.h, p0/m, z10.h, z21.h");
6672 COMPARE(uqshl(z10.VnS(), p0.Merging(), z10.VnS(), z21.VnS()),
6673 "uqshl z10.s, p0/m, z10.s, z21.s");
6674
6675 COMPARE(uqshlr(z12.VnB(), p1.Merging(), z12.VnB(), z12.VnB()),
6676 "uqshlr z12.b, p1/m, z12.b, z12.b");
6677 COMPARE(uqshlr(z12.VnD(), p1.Merging(), z12.VnD(), z12.VnD()),
6678 "uqshlr z12.d, p1/m, z12.d, z12.d");
6679 COMPARE(uqshlr(z12.VnH(), p1.Merging(), z12.VnH(), z12.VnH()),
6680 "uqshlr z12.h, p1/m, z12.h, z12.h");
6681 COMPARE(uqshlr(z12.VnS(), p1.Merging(), z12.VnS(), z12.VnS()),
6682 "uqshlr z12.s, p1/m, z12.s, z12.s");
6683
6684 COMPARE(sqrshl(z31.VnB(), p5.Merging(), z31.VnB(), z27.VnB()),
6685 "sqrshl z31.b, p5/m, z31.b, z27.b");
6686 COMPARE(sqrshl(z31.VnD(), p5.Merging(), z31.VnD(), z27.VnD()),
6687 "sqrshl z31.d, p5/m, z31.d, z27.d");
6688 COMPARE(sqrshl(z31.VnH(), p5.Merging(), z31.VnH(), z27.VnH()),
6689 "sqrshl z31.h, p5/m, z31.h, z27.h");
6690 COMPARE(sqrshl(z31.VnS(), p5.Merging(), z31.VnS(), z27.VnS()),
6691 "sqrshl z31.s, p5/m, z31.s, z27.s");
6692
6693 COMPARE(sqrshlr(z25.VnB(), p6.Merging(), z25.VnB(), z7.VnB()),
6694 "sqrshlr z25.b, p6/m, z25.b, z7.b");
6695 COMPARE(sqrshlr(z25.VnD(), p6.Merging(), z25.VnD(), z7.VnD()),
6696 "sqrshlr z25.d, p6/m, z25.d, z7.d");
6697 COMPARE(sqrshlr(z25.VnH(), p6.Merging(), z25.VnH(), z7.VnH()),
6698 "sqrshlr z25.h, p6/m, z25.h, z7.h");
6699 COMPARE(sqrshlr(z25.VnS(), p6.Merging(), z25.VnS(), z7.VnS()),
6700 "sqrshlr z25.s, p6/m, z25.s, z7.s");
6701
6702 COMPARE(uqrshl(z20.VnB(), p1.Merging(), z20.VnB(), z30.VnB()),
6703 "uqrshl z20.b, p1/m, z20.b, z30.b");
6704 COMPARE(uqrshl(z20.VnD(), p1.Merging(), z20.VnD(), z30.VnD()),
6705 "uqrshl z20.d, p1/m, z20.d, z30.d");
6706 COMPARE(uqrshl(z20.VnH(), p1.Merging(), z20.VnH(), z30.VnH()),
6707 "uqrshl z20.h, p1/m, z20.h, z30.h");
6708 COMPARE(uqrshl(z20.VnS(), p1.Merging(), z20.VnS(), z30.VnS()),
6709 "uqrshl z20.s, p1/m, z20.s, z30.s");
6710
6711 COMPARE(uqrshlr(z8.VnB(), p5.Merging(), z8.VnB(), z9.VnB()),
6712 "uqrshlr z8.b, p5/m, z8.b, z9.b");
6713 COMPARE(uqrshlr(z8.VnD(), p5.Merging(), z8.VnD(), z9.VnD()),
6714 "uqrshlr z8.d, p5/m, z8.d, z9.d");
6715 COMPARE(uqrshlr(z8.VnH(), p5.Merging(), z8.VnH(), z9.VnH()),
6716 "uqrshlr z8.h, p5/m, z8.h, z9.h");
6717 COMPARE(uqrshlr(z8.VnS(), p5.Merging(), z8.VnS(), z9.VnS()),
6718 "uqrshlr z8.s, p5/m, z8.s, z9.s");
6719
6720 CLEANUP();
6721 }
6722
TEST(sve2_sat_arith)6723 TEST(sve2_sat_arith) {
6724 SETUP();
6725
6726 COMPARE(sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z3.VnB()),
6727 "sqadd z28.b, p0/m, z28.b, z3.b");
6728 COMPARE(sqadd(z28.VnD(), p0.Merging(), z28.VnD(), z3.VnD()),
6729 "sqadd z28.d, p0/m, z28.d, z3.d");
6730 COMPARE(sqadd(z28.VnH(), p0.Merging(), z28.VnH(), z3.VnH()),
6731 "sqadd z28.h, p0/m, z28.h, z3.h");
6732 COMPARE(sqadd(z28.VnS(), p0.Merging(), z28.VnS(), z3.VnS()),
6733 "sqadd z28.s, p0/m, z28.s, z3.s");
6734 COMPARE(sqsub(z6.VnB(), p0.Merging(), z6.VnB(), z12.VnB()),
6735 "sqsub z6.b, p0/m, z6.b, z12.b");
6736 COMPARE(sqsub(z6.VnD(), p0.Merging(), z6.VnD(), z12.VnD()),
6737 "sqsub z6.d, p0/m, z6.d, z12.d");
6738 COMPARE(sqsub(z6.VnH(), p0.Merging(), z6.VnH(), z12.VnH()),
6739 "sqsub z6.h, p0/m, z6.h, z12.h");
6740 COMPARE(sqsub(z6.VnS(), p0.Merging(), z6.VnS(), z12.VnS()),
6741 "sqsub z6.s, p0/m, z6.s, z12.s");
6742 COMPARE(sqsubr(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB()),
6743 "sqsubr z16.b, p7/m, z16.b, z22.b");
6744 COMPARE(sqsubr(z16.VnD(), p7.Merging(), z16.VnD(), z22.VnD()),
6745 "sqsubr z16.d, p7/m, z16.d, z22.d");
6746 COMPARE(sqsubr(z16.VnH(), p7.Merging(), z16.VnH(), z22.VnH()),
6747 "sqsubr z16.h, p7/m, z16.h, z22.h");
6748 COMPARE(sqsubr(z16.VnS(), p7.Merging(), z16.VnS(), z22.VnS()),
6749 "sqsubr z16.s, p7/m, z16.s, z22.s");
6750 COMPARE(suqadd(z26.VnB(), p2.Merging(), z26.VnB(), z28.VnB()),
6751 "suqadd z26.b, p2/m, z26.b, z28.b");
6752 COMPARE(suqadd(z26.VnD(), p2.Merging(), z26.VnD(), z28.VnD()),
6753 "suqadd z26.d, p2/m, z26.d, z28.d");
6754 COMPARE(suqadd(z26.VnH(), p2.Merging(), z26.VnH(), z28.VnH()),
6755 "suqadd z26.h, p2/m, z26.h, z28.h");
6756 COMPARE(suqadd(z26.VnS(), p2.Merging(), z26.VnS(), z28.VnS()),
6757 "suqadd z26.s, p2/m, z26.s, z28.s");
6758 COMPARE(usqadd(z25.VnB(), p4.Merging(), z25.VnB(), z6.VnB()),
6759 "usqadd z25.b, p4/m, z25.b, z6.b");
6760 COMPARE(usqadd(z25.VnD(), p4.Merging(), z25.VnD(), z6.VnD()),
6761 "usqadd z25.d, p4/m, z25.d, z6.d");
6762 COMPARE(usqadd(z25.VnH(), p4.Merging(), z25.VnH(), z6.VnH()),
6763 "usqadd z25.h, p4/m, z25.h, z6.h");
6764 COMPARE(usqadd(z25.VnS(), p4.Merging(), z25.VnS(), z6.VnS()),
6765 "usqadd z25.s, p4/m, z25.s, z6.s");
6766 COMPARE(uqadd(z24.VnB(), p7.Merging(), z24.VnB(), z1.VnB()),
6767 "uqadd z24.b, p7/m, z24.b, z1.b");
6768 COMPARE(uqadd(z24.VnD(), p7.Merging(), z24.VnD(), z1.VnD()),
6769 "uqadd z24.d, p7/m, z24.d, z1.d");
6770 COMPARE(uqadd(z24.VnH(), p7.Merging(), z24.VnH(), z1.VnH()),
6771 "uqadd z24.h, p7/m, z24.h, z1.h");
6772 COMPARE(uqadd(z24.VnS(), p7.Merging(), z24.VnS(), z1.VnS()),
6773 "uqadd z24.s, p7/m, z24.s, z1.s");
6774 COMPARE(uqsub(z10.VnB(), p3.Merging(), z10.VnB(), z1.VnB()),
6775 "uqsub z10.b, p3/m, z10.b, z1.b");
6776 COMPARE(uqsub(z10.VnD(), p3.Merging(), z10.VnD(), z1.VnD()),
6777 "uqsub z10.d, p3/m, z10.d, z1.d");
6778 COMPARE(uqsub(z10.VnH(), p3.Merging(), z10.VnH(), z1.VnH()),
6779 "uqsub z10.h, p3/m, z10.h, z1.h");
6780 COMPARE(uqsub(z10.VnS(), p3.Merging(), z10.VnS(), z1.VnS()),
6781 "uqsub z10.s, p3/m, z10.s, z1.s");
6782 COMPARE(uqsubr(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB()),
6783 "uqsubr z20.b, p0/m, z20.b, z6.b");
6784 COMPARE(uqsubr(z20.VnD(), p0.Merging(), z20.VnD(), z6.VnD()),
6785 "uqsubr z20.d, p0/m, z20.d, z6.d");
6786 COMPARE(uqsubr(z20.VnH(), p0.Merging(), z20.VnH(), z6.VnH()),
6787 "uqsubr z20.h, p0/m, z20.h, z6.h");
6788 COMPARE(uqsubr(z20.VnS(), p0.Merging(), z20.VnS(), z6.VnS()),
6789 "uqsubr z20.s, p0/m, z20.s, z6.s");
6790
6791 COMPARE_MACRO(Sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()),
6792 "sqadd z28.b, p0/m, z28.b, z28.b");
6793 COMPARE_MACRO(Sqadd(z29.VnB(), p0.Merging(), z28.VnB(), z3.VnB()),
6794 "movprfx z29.b, p0/m, z28.b\n"
6795 "sqadd z29.b, p0/m, z29.b, z3.b");
6796 COMPARE_MACRO(Sqadd(z29.VnB(), p0.Merging(), z28.VnB(), z29.VnB()),
6797 "sqadd z29.b, p0/m, z29.b, z28.b");
6798 COMPARE_MACRO(Uqadd(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()),
6799 "uqadd z28.b, p0/m, z28.b, z28.b");
6800 COMPARE_MACRO(Uqadd(z29.VnB(), p0.Merging(), z28.VnB(), z3.VnB()),
6801 "movprfx z29.b, p0/m, z28.b\n"
6802 "uqadd z29.b, p0/m, z29.b, z3.b");
6803 COMPARE_MACRO(Uqadd(z29.VnB(), p0.Merging(), z28.VnB(), z29.VnB()),
6804 "uqadd z29.b, p0/m, z29.b, z28.b");
6805
6806 COMPARE_MACRO(Sqsub(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()),
6807 "sqsub z28.b, p0/m, z28.b, z28.b");
6808 COMPARE_MACRO(Sqsub(z29.VnB(), p0.Merging(), z28.VnB(), z3.VnB()),
6809 "movprfx z29.b, p0/m, z28.b\n"
6810 "sqsub z29.b, p0/m, z29.b, z3.b");
6811 COMPARE_MACRO(Sqsub(z29.VnB(), p0.Merging(), z28.VnB(), z29.VnB()),
6812 "sqsubr z29.b, p0/m, z29.b, z28.b");
6813 COMPARE_MACRO(Uqsub(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()),
6814 "uqsub z28.b, p0/m, z28.b, z28.b");
6815 COMPARE_MACRO(Uqsub(z29.VnB(), p0.Merging(), z28.VnB(), z3.VnB()),
6816 "movprfx z29.b, p0/m, z28.b\n"
6817 "uqsub z29.b, p0/m, z29.b, z3.b");
6818 COMPARE_MACRO(Uqsub(z29.VnB(), p0.Merging(), z28.VnB(), z29.VnB()),
6819 "uqsubr z29.b, p0/m, z29.b, z28.b");
6820
6821 COMPARE_MACRO(Suqadd(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()),
6822 "suqadd z28.b, p0/m, z28.b, z28.b");
6823 COMPARE_MACRO(Suqadd(z29.VnB(), p0.Merging(), z28.VnB(), z3.VnB()),
6824 "movprfx z29.b, p0/m, z28.b\n"
6825 "suqadd z29.b, p0/m, z29.b, z3.b");
6826 COMPARE_MACRO(Suqadd(z29.VnB(), p0.Merging(), z28.VnB(), z29.VnB()),
6827 "mov z31.d, z29.d\n"
6828 "movprfx z29.b, p0/m, z28.b\n"
6829 "suqadd z29.b, p0/m, z29.b, z31.b");
6830 COMPARE_MACRO(Usqadd(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()),
6831 "usqadd z28.b, p0/m, z28.b, z28.b");
6832 COMPARE_MACRO(Usqadd(z29.VnB(), p0.Merging(), z28.VnB(), z3.VnB()),
6833 "movprfx z29.b, p0/m, z28.b\n"
6834 "usqadd z29.b, p0/m, z29.b, z3.b");
6835 COMPARE_MACRO(Usqadd(z29.VnB(), p0.Merging(), z28.VnB(), z29.VnB()),
6836 "mov z31.d, z29.d\n"
6837 "movprfx z29.b, p0/m, z28.b\n"
6838 "usqadd z29.b, p0/m, z29.b, z31.b");
6839
6840 CLEANUP();
6841 }
6842
TEST(sve2_pair_arith)6843 TEST(sve2_pair_arith) {
6844 SETUP();
6845
6846 COMPARE(addp(z3.VnB(), p1.Merging(), z3.VnB(), z0.VnB()),
6847 "addp z3.b, p1/m, z3.b, z0.b");
6848 COMPARE(addp(z3.VnD(), p1.Merging(), z3.VnD(), z0.VnD()),
6849 "addp z3.d, p1/m, z3.d, z0.d");
6850 COMPARE(addp(z3.VnH(), p1.Merging(), z3.VnH(), z0.VnH()),
6851 "addp z3.h, p1/m, z3.h, z0.h");
6852 COMPARE(addp(z3.VnS(), p1.Merging(), z3.VnS(), z0.VnS()),
6853 "addp z3.s, p1/m, z3.s, z0.s");
6854 COMPARE(smaxp(z5.VnB(), p4.Merging(), z5.VnB(), z10.VnB()),
6855 "smaxp z5.b, p4/m, z5.b, z10.b");
6856 COMPARE(smaxp(z5.VnD(), p4.Merging(), z5.VnD(), z10.VnD()),
6857 "smaxp z5.d, p4/m, z5.d, z10.d");
6858 COMPARE(smaxp(z5.VnH(), p4.Merging(), z5.VnH(), z10.VnH()),
6859 "smaxp z5.h, p4/m, z5.h, z10.h");
6860 COMPARE(smaxp(z5.VnS(), p4.Merging(), z5.VnS(), z10.VnS()),
6861 "smaxp z5.s, p4/m, z5.s, z10.s");
6862 COMPARE(sminp(z27.VnB(), p3.Merging(), z27.VnB(), z1.VnB()),
6863 "sminp z27.b, p3/m, z27.b, z1.b");
6864 COMPARE(sminp(z27.VnD(), p3.Merging(), z27.VnD(), z1.VnD()),
6865 "sminp z27.d, p3/m, z27.d, z1.d");
6866 COMPARE(sminp(z27.VnH(), p3.Merging(), z27.VnH(), z1.VnH()),
6867 "sminp z27.h, p3/m, z27.h, z1.h");
6868 COMPARE(sminp(z27.VnS(), p3.Merging(), z27.VnS(), z1.VnS()),
6869 "sminp z27.s, p3/m, z27.s, z1.s");
6870 COMPARE(umaxp(z7.VnB(), p2.Merging(), z7.VnB(), z23.VnB()),
6871 "umaxp z7.b, p2/m, z7.b, z23.b");
6872 COMPARE(umaxp(z7.VnD(), p2.Merging(), z7.VnD(), z23.VnD()),
6873 "umaxp z7.d, p2/m, z7.d, z23.d");
6874 COMPARE(umaxp(z7.VnH(), p2.Merging(), z7.VnH(), z23.VnH()),
6875 "umaxp z7.h, p2/m, z7.h, z23.h");
6876 COMPARE(umaxp(z7.VnS(), p2.Merging(), z7.VnS(), z23.VnS()),
6877 "umaxp z7.s, p2/m, z7.s, z23.s");
6878 COMPARE(uminp(z10.VnB(), p0.Merging(), z10.VnB(), z22.VnB()),
6879 "uminp z10.b, p0/m, z10.b, z22.b");
6880 COMPARE(uminp(z10.VnD(), p0.Merging(), z10.VnD(), z22.VnD()),
6881 "uminp z10.d, p0/m, z10.d, z22.d");
6882 COMPARE(uminp(z10.VnH(), p0.Merging(), z10.VnH(), z22.VnH()),
6883 "uminp z10.h, p0/m, z10.h, z22.h");
6884 COMPARE(uminp(z10.VnS(), p0.Merging(), z10.VnS(), z22.VnS()),
6885 "uminp z10.s, p0/m, z10.s, z22.s");
6886
6887 COMPARE_MACRO(Addp(z3.VnB(), p1.Merging(), z3.VnB(), z3.VnB()),
6888 "addp z3.b, p1/m, z3.b, z3.b");
6889 COMPARE_MACRO(Addp(z4.VnB(), p1.Merging(), z3.VnB(), z3.VnB()),
6890 "movprfx z4.b, p1/m, z3.b\n"
6891 "addp z4.b, p1/m, z4.b, z3.b");
6892 COMPARE_MACRO(Addp(z4.VnB(), p1.Merging(), z3.VnB(), z4.VnB()),
6893 "mov z31.d, z4.d\n"
6894 "movprfx z4.b, p1/m, z3.b\n"
6895 "addp z4.b, p1/m, z4.b, z31.b");
6896 COMPARE_MACRO(Smaxp(z4.VnB(), p1.Merging(), z3.VnB(), z4.VnB()),
6897 "mov z31.d, z4.d\n"
6898 "movprfx z4.b, p1/m, z3.b\n"
6899 "smaxp z4.b, p1/m, z4.b, z31.b");
6900 COMPARE_MACRO(Sminp(z4.VnB(), p1.Merging(), z3.VnB(), z4.VnB()),
6901 "mov z31.d, z4.d\n"
6902 "movprfx z4.b, p1/m, z3.b\n"
6903 "sminp z4.b, p1/m, z4.b, z31.b");
6904 COMPARE_MACRO(Umaxp(z4.VnB(), p1.Merging(), z3.VnB(), z4.VnB()),
6905 "mov z31.d, z4.d\n"
6906 "movprfx z4.b, p1/m, z3.b\n"
6907 "umaxp z4.b, p1/m, z4.b, z31.b");
6908 COMPARE_MACRO(Uminp(z4.VnB(), p1.Merging(), z3.VnB(), z4.VnB()),
6909 "mov z31.d, z4.d\n"
6910 "movprfx z4.b, p1/m, z3.b\n"
6911 "uminp z4.b, p1/m, z4.b, z31.b");
6912 CLEANUP();
6913 }
6914
TEST(sve2_extract_narrow)6915 TEST(sve2_extract_narrow) {
6916 SETUP();
6917
6918 COMPARE(sqxtnb(z2.VnB(), z0.VnH()), "sqxtnb z2.b, z0.h");
6919 COMPARE(sqxtnb(z2.VnH(), z0.VnS()), "sqxtnb z2.h, z0.s");
6920 COMPARE(sqxtnb(z2.VnS(), z0.VnD()), "sqxtnb z2.s, z0.d");
6921 COMPARE(sqxtnt(z31.VnB(), z18.VnH()), "sqxtnt z31.b, z18.h");
6922 COMPARE(sqxtnt(z31.VnH(), z18.VnS()), "sqxtnt z31.h, z18.s");
6923 COMPARE(sqxtnt(z31.VnS(), z18.VnD()), "sqxtnt z31.s, z18.d");
6924 COMPARE(sqxtunb(z28.VnB(), z6.VnH()), "sqxtunb z28.b, z6.h");
6925 COMPARE(sqxtunb(z28.VnH(), z6.VnS()), "sqxtunb z28.h, z6.s");
6926 COMPARE(sqxtunb(z28.VnS(), z6.VnD()), "sqxtunb z28.s, z6.d");
6927 COMPARE(sqxtunt(z14.VnB(), z31.VnH()), "sqxtunt z14.b, z31.h");
6928 COMPARE(sqxtunt(z14.VnH(), z31.VnS()), "sqxtunt z14.h, z31.s");
6929 COMPARE(sqxtunt(z14.VnS(), z31.VnD()), "sqxtunt z14.s, z31.d");
6930 COMPARE(uqxtnb(z28.VnB(), z4.VnH()), "uqxtnb z28.b, z4.h");
6931 COMPARE(uqxtnb(z28.VnH(), z4.VnS()), "uqxtnb z28.h, z4.s");
6932 COMPARE(uqxtnb(z28.VnS(), z4.VnD()), "uqxtnb z28.s, z4.d");
6933 COMPARE(uqxtnt(z19.VnB(), z7.VnH()), "uqxtnt z19.b, z7.h");
6934 COMPARE(uqxtnt(z19.VnH(), z7.VnS()), "uqxtnt z19.h, z7.s");
6935 COMPARE(uqxtnt(z19.VnS(), z7.VnD()), "uqxtnt z19.s, z7.d");
6936
6937 CLEANUP();
6938 }
6939
TEST(sve2_eorbt_eortb)6940 TEST(sve2_eorbt_eortb) {
6941 SETUP();
6942
6943 COMPARE(eorbt(z3.VnB(), z10.VnB(), z8.VnB()), "eorbt z3.b, z10.b, z8.b");
6944 COMPARE(eorbt(z3.VnD(), z10.VnD(), z8.VnD()), "eorbt z3.d, z10.d, z8.d");
6945 COMPARE(eorbt(z3.VnH(), z10.VnH(), z8.VnH()), "eorbt z3.h, z10.h, z8.h");
6946 COMPARE(eorbt(z3.VnS(), z10.VnS(), z8.VnS()), "eorbt z3.s, z10.s, z8.s");
6947 COMPARE(eortb(z21.VnB(), z21.VnB(), z15.VnB()), "eortb z21.b, z21.b, z15.b");
6948 COMPARE(eortb(z21.VnD(), z21.VnD(), z15.VnD()), "eortb z21.d, z21.d, z15.d");
6949 COMPARE(eortb(z21.VnH(), z21.VnH(), z15.VnH()), "eortb z21.h, z21.h, z15.h");
6950 COMPARE(eortb(z21.VnS(), z21.VnS(), z15.VnS()), "eortb z21.s, z21.s, z15.s");
6951
6952 CLEANUP();
6953 }
6954
TEST(sve2_saturating_multiply_add_high)6955 TEST(sve2_saturating_multiply_add_high) {
6956 SETUP();
6957
6958 COMPARE(sqrdmlah(z27.VnB(), z28.VnB(), z19.VnB()),
6959 "sqrdmlah z27.b, z28.b, z19.b");
6960 COMPARE(sqrdmlah(z27.VnD(), z28.VnD(), z19.VnD()),
6961 "sqrdmlah z27.d, z28.d, z19.d");
6962 COMPARE(sqrdmlah(z27.VnH(), z28.VnH(), z19.VnH()),
6963 "sqrdmlah z27.h, z28.h, z19.h");
6964 COMPARE(sqrdmlah(z27.VnS(), z28.VnS(), z19.VnS()),
6965 "sqrdmlah z27.s, z28.s, z19.s");
6966 COMPARE(sqrdmlsh(z11.VnB(), z16.VnB(), z31.VnB()),
6967 "sqrdmlsh z11.b, z16.b, z31.b");
6968 COMPARE(sqrdmlsh(z11.VnD(), z16.VnD(), z31.VnD()),
6969 "sqrdmlsh z11.d, z16.d, z31.d");
6970 COMPARE(sqrdmlsh(z11.VnH(), z16.VnH(), z31.VnH()),
6971 "sqrdmlsh z11.h, z16.h, z31.h");
6972 COMPARE(sqrdmlsh(z11.VnS(), z16.VnS(), z31.VnS()),
6973 "sqrdmlsh z11.s, z16.s, z31.s");
6974
6975 COMPARE_MACRO(Sqrdmlah(z29.VnD(), z0.VnD(), z29.VnD(), z26.VnD()),
6976 "movprfx z31, z0\n"
6977 "sqrdmlah z31.d, z29.d, z26.d\n"
6978 "mov z29.d, z31.d");
6979 COMPARE_MACRO(Sqrdmlah(z26.VnH(), z0.VnH(), z29.VnH(), z26.VnH()),
6980 "movprfx z31, z0\n"
6981 "sqrdmlah z31.h, z29.h, z26.h\n"
6982 "mov z26.d, z31.d");
6983 COMPARE_MACRO(Sqrdmlsh(z23.VnS(), z31.VnS(), z26.VnS(), z29.VnS()),
6984 "movprfx z23, z31\n"
6985 "sqrdmlsh z23.s, z26.s, z29.s");
6986 COMPARE_MACRO(Sqrdmlsh(z4.VnB(), z31.VnB(), z4.VnB(), z4.VnB()),
6987 "sqrdmlsh z31.b, z4.b, z4.b\n"
6988 "mov z4.d, z31.d");
6989
6990 COMPARE(sqrdmlah(z10.VnD(), z30.VnD(), z11.VnD(), 1),
6991 "sqrdmlah z10.d, z30.d, z11.d[1]");
6992 COMPARE(sqrdmlah(z11.VnH(), z8.VnH(), z3.VnH(), 7),
6993 "sqrdmlah z11.h, z8.h, z3.h[7]");
6994 COMPARE(sqrdmlah(z21.VnS(), z29.VnS(), z7.VnS(), 3),
6995 "sqrdmlah z21.s, z29.s, z7.s[3]");
6996 COMPARE(sqrdmlsh(z2.VnD(), z16.VnD(), z14.VnD(), 0),
6997 "sqrdmlsh z2.d, z16.d, z14.d[0]");
6998 COMPARE(sqrdmlsh(z23.VnH(), z13.VnH(), z6.VnH(), 5),
6999 "sqrdmlsh z23.h, z13.h, z6.h[5]");
7000 COMPARE(sqrdmlsh(z27.VnS(), z8.VnS(), z4.VnS(), 2),
7001 "sqrdmlsh z27.s, z8.s, z4.s[2]");
7002
7003 COMPARE_MACRO(Sqrdmlah(z24.VnD(), z0.VnD(), z24.VnD(), z13.VnD(), 0),
7004 "movprfx z31, z0\n"
7005 "sqrdmlah z31.d, z24.d, z13.d[0]\n"
7006 "mov z24.d, z31.d");
7007 COMPARE_MACRO(Sqrdmlah(z4.VnH(), z0.VnH(), z29.VnH(), z4.VnH(), 6),
7008 "movprfx z31, z0\n"
7009 "sqrdmlah z31.h, z29.h, z4.h[6]\n"
7010 "mov z4.d, z31.d");
7011 COMPARE_MACRO(Sqrdmlsh(z12.VnS(), z31.VnS(), z26.VnS(), z2.VnS(), 2),
7012 "movprfx z12, z31\n"
7013 "sqrdmlsh z12.s, z26.s, z2.s[2]");
7014 COMPARE_MACRO(Sqrdmlsh(z0.VnD(), z31.VnD(), z0.VnD(), z0.VnD(), 1),
7015 "sqrdmlsh z31.d, z0.d, z0.d[1]\n"
7016 "mov z0.d, z31.d");
7017
7018 CLEANUP();
7019 }
7020
TEST(sve2_integer_pairwise_add_accumulate_long)7021 TEST(sve2_integer_pairwise_add_accumulate_long) {
7022 SETUP();
7023
7024 COMPARE(sadalp(z19.VnD(), p5.Merging(), z9.VnS()),
7025 "sadalp z19.d, p5/m, z9.s");
7026 COMPARE(sadalp(z19.VnH(), p5.Merging(), z9.VnB()),
7027 "sadalp z19.h, p5/m, z9.b");
7028 COMPARE(sadalp(z19.VnS(), p5.Merging(), z9.VnH()),
7029 "sadalp z19.s, p5/m, z9.h");
7030 COMPARE(uadalp(z20.VnD(), p4.Merging(), z5.VnS()),
7031 "uadalp z20.d, p4/m, z5.s");
7032 COMPARE(uadalp(z20.VnH(), p4.Merging(), z5.VnB()),
7033 "uadalp z20.h, p4/m, z5.b");
7034 COMPARE(uadalp(z20.VnS(), p4.Merging(), z5.VnH()),
7035 "uadalp z20.s, p4/m, z5.h");
7036
7037 CLEANUP();
7038 }
7039
TEST(sve2_integer_multiply_vectors_unpredicated)7040 TEST(sve2_integer_multiply_vectors_unpredicated) {
7041 SETUP();
7042
7043 COMPARE(mul(z23.VnB(), z0.VnB(), z12.VnB()), "mul z23.b, z0.b, z12.b");
7044 COMPARE(mul(z24.VnD(), z1.VnD(), z14.VnD()), "mul z24.d, z1.d, z14.d");
7045 COMPARE(mul(z25.VnH(), z2.VnH(), z16.VnH()), "mul z25.h, z2.h, z16.h");
7046 COMPARE(mul(z26.VnS(), z3.VnS(), z18.VnS()), "mul z26.s, z3.s, z18.s");
7047
7048 COMPARE(pmul(z0.VnB(), z5.VnB(), z5.VnB()), "pmul z0.b, z5.b, z5.b");
7049
7050 COMPARE(smulh(z11.VnB(), z9.VnB(), z1.VnB()), "smulh z11.b, z9.b, z1.b");
7051 COMPARE(smulh(z21.VnD(), z19.VnD(), z16.VnD()), "smulh z21.d, z19.d, z16.d");
7052 COMPARE(smulh(z11.VnH(), z9.VnH(), z1.VnH()), "smulh z11.h, z9.h, z1.h");
7053 COMPARE(smulh(z21.VnS(), z19.VnS(), z16.VnS()), "smulh z21.s, z19.s, z16.s");
7054
7055 COMPARE(umulh(z5.VnB(), z9.VnB(), z5.VnB()), "umulh z5.b, z9.b, z5.b");
7056 COMPARE(umulh(z18.VnD(), z9.VnD(), z5.VnD()), "umulh z18.d, z9.d, z5.d");
7057 COMPARE(umulh(z18.VnH(), z9.VnH(), z9.VnH()), "umulh z18.h, z9.h, z9.h");
7058 COMPARE(umulh(z18.VnS(), z9.VnS(), z18.VnS()), "umulh z18.s, z9.s, z18.s");
7059
7060 CLEANUP();
7061 }
7062
TEST(sve2_arith_interleaved_long)7063 TEST(sve2_arith_interleaved_long) {
7064 SETUP();
7065
7066 COMPARE(saddlbt(z15.VnD(), z6.VnS(), z18.VnS()),
7067 "saddlbt z15.d, z6.s, z18.s");
7068 COMPARE(saddlbt(z15.VnH(), z6.VnB(), z18.VnB()),
7069 "saddlbt z15.h, z6.b, z18.b");
7070 COMPARE(saddlbt(z15.VnS(), z6.VnH(), z18.VnH()),
7071 "saddlbt z15.s, z6.h, z18.h");
7072 COMPARE(ssublbt(z6.VnD(), z28.VnS(), z12.VnS()),
7073 "ssublbt z6.d, z28.s, z12.s");
7074 COMPARE(ssublbt(z6.VnH(), z28.VnB(), z12.VnB()),
7075 "ssublbt z6.h, z28.b, z12.b");
7076 COMPARE(ssublbt(z6.VnS(), z28.VnH(), z12.VnH()),
7077 "ssublbt z6.s, z28.h, z12.h");
7078 COMPARE(ssubltb(z11.VnD(), z18.VnS(), z19.VnS()),
7079 "ssubltb z11.d, z18.s, z19.s");
7080 COMPARE(ssubltb(z11.VnH(), z18.VnB(), z19.VnB()),
7081 "ssubltb z11.h, z18.b, z19.b");
7082 COMPARE(ssubltb(z11.VnS(), z18.VnH(), z19.VnH()),
7083 "ssubltb z11.s, z18.h, z19.h");
7084
7085 CLEANUP();
7086 }
7087
TEST(sve2_int_unary_predicated)7088 TEST(sve2_int_unary_predicated) {
7089 SETUP();
7090
7091 COMPARE_MACRO(Sqabs(z29.VnB(), p1.Merging(), z18.VnB()),
7092 "sqabs z29.b, p1/m, z18.b");
7093 COMPARE_MACRO(Sqabs(z29.VnD(), p1.Merging(), z18.VnD()),
7094 "sqabs z29.d, p1/m, z18.d");
7095 COMPARE_MACRO(Sqabs(z29.VnH(), p1.Merging(), z18.VnH()),
7096 "sqabs z29.h, p1/m, z18.h");
7097 COMPARE_MACRO(Sqabs(z29.VnS(), p1.Merging(), z18.VnS()),
7098 "sqabs z29.s, p1/m, z18.s");
7099 COMPARE_MACRO(Sqneg(z21.VnB(), p0.Merging(), z17.VnB()),
7100 "sqneg z21.b, p0/m, z17.b");
7101 COMPARE_MACRO(Sqneg(z21.VnD(), p0.Merging(), z17.VnD()),
7102 "sqneg z21.d, p0/m, z17.d");
7103 COMPARE_MACRO(Sqneg(z21.VnH(), p0.Merging(), z17.VnH()),
7104 "sqneg z21.h, p0/m, z17.h");
7105 COMPARE_MACRO(Sqneg(z21.VnS(), p0.Merging(), z17.VnS()),
7106 "sqneg z21.s, p0/m, z17.s");
7107 COMPARE_MACRO(Urecpe(z25.VnS(), p7.Merging(), z2.VnS()),
7108 "urecpe z25.s, p7/m, z2.s");
7109 COMPARE_MACRO(Ursqrte(z4.VnS(), p3.Merging(), z3.VnS()),
7110 "ursqrte z4.s, p3/m, z3.s");
7111
7112 COMPARE_MACRO(Sqabs(z29.VnS(), p1.Zeroing(), z18.VnS()),
7113 "movprfx z29.s, p1/z, z29.s\n"
7114 "sqabs z29.s, p1/m, z18.s");
7115 COMPARE_MACRO(Sqneg(z21.VnB(), p0.Zeroing(), z17.VnB()),
7116 "movprfx z21.b, p0/z, z21.b\n"
7117 "sqneg z21.b, p0/m, z17.b");
7118 COMPARE_MACRO(Urecpe(z25.VnS(), p7.Zeroing(), z2.VnS()),
7119 "movprfx z25.s, p7/z, z25.s\n"
7120 "urecpe z25.s, p7/m, z2.s");
7121 COMPARE_MACRO(Ursqrte(z4.VnS(), p3.Zeroing(), z3.VnS()),
7122 "movprfx z4.s, p3/z, z4.s\n"
7123 "ursqrte z4.s, p3/m, z3.s");
7124 CLEANUP();
7125 }
7126
TEST(sve2_arith_long)7127 TEST(sve2_arith_long) {
7128 SETUP();
7129
7130 COMPARE_MACRO(Sabdlb(z2.VnD(), z21.VnS(), z3.VnS()),
7131 "sabdlb z2.d, z21.s, z3.s");
7132 COMPARE_MACRO(Sabdlb(z2.VnH(), z21.VnB(), z3.VnB()),
7133 "sabdlb z2.h, z21.b, z3.b");
7134 COMPARE_MACRO(Sabdlb(z2.VnS(), z21.VnH(), z3.VnH()),
7135 "sabdlb z2.s, z21.h, z3.h");
7136 COMPARE_MACRO(Sabdlt(z25.VnD(), z23.VnS(), z17.VnS()),
7137 "sabdlt z25.d, z23.s, z17.s");
7138 COMPARE_MACRO(Sabdlt(z25.VnH(), z23.VnB(), z17.VnB()),
7139 "sabdlt z25.h, z23.b, z17.b");
7140 COMPARE_MACRO(Sabdlt(z25.VnS(), z23.VnH(), z17.VnH()),
7141 "sabdlt z25.s, z23.h, z17.h");
7142 COMPARE_MACRO(Saddlb(z24.VnD(), z30.VnS(), z16.VnS()),
7143 "saddlb z24.d, z30.s, z16.s");
7144 COMPARE_MACRO(Saddlb(z24.VnH(), z30.VnB(), z16.VnB()),
7145 "saddlb z24.h, z30.b, z16.b");
7146 COMPARE_MACRO(Saddlb(z24.VnS(), z30.VnH(), z16.VnH()),
7147 "saddlb z24.s, z30.h, z16.h");
7148 COMPARE_MACRO(Saddlt(z21.VnD(), z29.VnS(), z31.VnS()),
7149 "saddlt z21.d, z29.s, z31.s");
7150 COMPARE_MACRO(Saddlt(z21.VnH(), z29.VnB(), z31.VnB()),
7151 "saddlt z21.h, z29.b, z31.b");
7152 COMPARE_MACRO(Saddlt(z21.VnS(), z29.VnH(), z31.VnH()),
7153 "saddlt z21.s, z29.h, z31.h");
7154 COMPARE_MACRO(Ssublb(z4.VnD(), z23.VnS(), z7.VnS()),
7155 "ssublb z4.d, z23.s, z7.s");
7156 COMPARE_MACRO(Ssublb(z4.VnH(), z23.VnB(), z7.VnB()),
7157 "ssublb z4.h, z23.b, z7.b");
7158 COMPARE_MACRO(Ssublb(z4.VnS(), z23.VnH(), z7.VnH()),
7159 "ssublb z4.s, z23.h, z7.h");
7160 COMPARE_MACRO(Ssublt(z12.VnD(), z13.VnS(), z6.VnS()),
7161 "ssublt z12.d, z13.s, z6.s");
7162 COMPARE_MACRO(Ssublt(z12.VnH(), z13.VnB(), z6.VnB()),
7163 "ssublt z12.h, z13.b, z6.b");
7164 COMPARE_MACRO(Ssublt(z12.VnS(), z13.VnH(), z6.VnH()),
7165 "ssublt z12.s, z13.h, z6.h");
7166 COMPARE_MACRO(Uabdlb(z1.VnD(), z26.VnS(), z12.VnS()),
7167 "uabdlb z1.d, z26.s, z12.s");
7168 COMPARE_MACRO(Uabdlb(z1.VnH(), z26.VnB(), z12.VnB()),
7169 "uabdlb z1.h, z26.b, z12.b");
7170 COMPARE_MACRO(Uabdlb(z1.VnS(), z26.VnH(), z12.VnH()),
7171 "uabdlb z1.s, z26.h, z12.h");
7172 COMPARE_MACRO(Uabdlt(z25.VnD(), z29.VnS(), z14.VnS()),
7173 "uabdlt z25.d, z29.s, z14.s");
7174 COMPARE_MACRO(Uabdlt(z25.VnH(), z29.VnB(), z14.VnB()),
7175 "uabdlt z25.h, z29.b, z14.b");
7176 COMPARE_MACRO(Uabdlt(z25.VnS(), z29.VnH(), z14.VnH()),
7177 "uabdlt z25.s, z29.h, z14.h");
7178 COMPARE_MACRO(Uaddlb(z3.VnD(), z5.VnS(), z2.VnS()),
7179 "uaddlb z3.d, z5.s, z2.s");
7180 COMPARE_MACRO(Uaddlb(z3.VnH(), z5.VnB(), z2.VnB()),
7181 "uaddlb z3.h, z5.b, z2.b");
7182 COMPARE_MACRO(Uaddlb(z3.VnS(), z5.VnH(), z2.VnH()),
7183 "uaddlb z3.s, z5.h, z2.h");
7184 COMPARE_MACRO(Uaddlt(z15.VnD(), z28.VnS(), z20.VnS()),
7185 "uaddlt z15.d, z28.s, z20.s");
7186 COMPARE_MACRO(Uaddlt(z15.VnH(), z28.VnB(), z20.VnB()),
7187 "uaddlt z15.h, z28.b, z20.b");
7188 COMPARE_MACRO(Uaddlt(z15.VnS(), z28.VnH(), z20.VnH()),
7189 "uaddlt z15.s, z28.h, z20.h");
7190 COMPARE_MACRO(Usublb(z25.VnD(), z9.VnS(), z17.VnS()),
7191 "usublb z25.d, z9.s, z17.s");
7192 COMPARE_MACRO(Usublb(z25.VnH(), z9.VnB(), z17.VnB()),
7193 "usublb z25.h, z9.b, z17.b");
7194 COMPARE_MACRO(Usublb(z25.VnS(), z9.VnH(), z17.VnH()),
7195 "usublb z25.s, z9.h, z17.h");
7196 COMPARE_MACRO(Usublt(z5.VnD(), z11.VnS(), z15.VnS()),
7197 "usublt z5.d, z11.s, z15.s");
7198 COMPARE_MACRO(Usublt(z5.VnH(), z11.VnB(), z15.VnB()),
7199 "usublt z5.h, z11.b, z15.b");
7200 COMPARE_MACRO(Usublt(z5.VnS(), z11.VnH(), z15.VnH()),
7201 "usublt z5.s, z11.h, z15.h");
7202
7203 CLEANUP();
7204 }
7205
TEST(sve2_arith_wide)7206 TEST(sve2_arith_wide) {
7207 SETUP();
7208
7209 COMPARE_MACRO(Saddwb(z12.VnD(), z8.VnD(), z8.VnS()),
7210 "saddwb z12.d, z8.d, z8.s");
7211 COMPARE_MACRO(Saddwb(z12.VnH(), z8.VnH(), z8.VnB()),
7212 "saddwb z12.h, z8.h, z8.b");
7213 COMPARE_MACRO(Saddwb(z12.VnS(), z8.VnS(), z8.VnH()),
7214 "saddwb z12.s, z8.s, z8.h");
7215 COMPARE_MACRO(Saddwt(z24.VnD(), z0.VnD(), z3.VnS()),
7216 "saddwt z24.d, z0.d, z3.s");
7217 COMPARE_MACRO(Saddwt(z24.VnH(), z0.VnH(), z3.VnB()),
7218 "saddwt z24.h, z0.h, z3.b");
7219 COMPARE_MACRO(Saddwt(z24.VnS(), z0.VnS(), z3.VnH()),
7220 "saddwt z24.s, z0.s, z3.h");
7221 COMPARE_MACRO(Ssubwb(z7.VnD(), z28.VnD(), z11.VnS()),
7222 "ssubwb z7.d, z28.d, z11.s");
7223 COMPARE_MACRO(Ssubwb(z7.VnH(), z28.VnH(), z11.VnB()),
7224 "ssubwb z7.h, z28.h, z11.b");
7225 COMPARE_MACRO(Ssubwb(z7.VnS(), z28.VnS(), z11.VnH()),
7226 "ssubwb z7.s, z28.s, z11.h");
7227 COMPARE_MACRO(Ssubwt(z29.VnD(), z25.VnD(), z20.VnS()),
7228 "ssubwt z29.d, z25.d, z20.s");
7229 COMPARE_MACRO(Ssubwt(z29.VnH(), z25.VnH(), z20.VnB()),
7230 "ssubwt z29.h, z25.h, z20.b");
7231 COMPARE_MACRO(Ssubwt(z29.VnS(), z25.VnS(), z20.VnH()),
7232 "ssubwt z29.s, z25.s, z20.h");
7233 COMPARE_MACRO(Uaddwb(z31.VnD(), z8.VnD(), z25.VnS()),
7234 "uaddwb z31.d, z8.d, z25.s");
7235 COMPARE_MACRO(Uaddwb(z31.VnH(), z8.VnH(), z25.VnB()),
7236 "uaddwb z31.h, z8.h, z25.b");
7237 COMPARE_MACRO(Uaddwb(z31.VnS(), z8.VnS(), z25.VnH()),
7238 "uaddwb z31.s, z8.s, z25.h");
7239 COMPARE_MACRO(Uaddwt(z17.VnD(), z15.VnD(), z2.VnS()),
7240 "uaddwt z17.d, z15.d, z2.s");
7241 COMPARE_MACRO(Uaddwt(z17.VnH(), z15.VnH(), z2.VnB()),
7242 "uaddwt z17.h, z15.h, z2.b");
7243 COMPARE_MACRO(Uaddwt(z17.VnS(), z15.VnS(), z2.VnH()),
7244 "uaddwt z17.s, z15.s, z2.h");
7245 COMPARE_MACRO(Usubwb(z10.VnD(), z13.VnD(), z20.VnS()),
7246 "usubwb z10.d, z13.d, z20.s");
7247 COMPARE_MACRO(Usubwb(z10.VnH(), z13.VnH(), z20.VnB()),
7248 "usubwb z10.h, z13.h, z20.b");
7249 COMPARE_MACRO(Usubwb(z10.VnS(), z13.VnS(), z20.VnH()),
7250 "usubwb z10.s, z13.s, z20.h");
7251 COMPARE_MACRO(Usubwt(z15.VnD(), z8.VnD(), z23.VnS()),
7252 "usubwt z15.d, z8.d, z23.s");
7253 COMPARE_MACRO(Usubwt(z15.VnH(), z8.VnH(), z23.VnB()),
7254 "usubwt z15.h, z8.h, z23.b");
7255 COMPARE_MACRO(Usubwt(z15.VnS(), z8.VnS(), z23.VnH()),
7256 "usubwt z15.s, z8.s, z23.h");
7257
7258 CLEANUP();
7259 }
7260
TEST(sve2_shift_long)7261 TEST(sve2_shift_long) {
7262 SETUP();
7263
7264 COMPARE_MACRO(Sshllb(z2.VnH(), z20.VnB(), 0), "sshllb z2.h, z20.b, #0");
7265 COMPARE_MACRO(Sshllb(z2.VnH(), z20.VnB(), 1), "sshllb z2.h, z20.b, #1");
7266 COMPARE_MACRO(Sshllb(z2.VnH(), z20.VnB(), 5), "sshllb z2.h, z20.b, #5");
7267 COMPARE_MACRO(Sshllb(z2.VnH(), z20.VnB(), 7), "sshllb z2.h, z20.b, #7");
7268 COMPARE_MACRO(Sshllb(z2.VnS(), z20.VnH(), 0), "sshllb z2.s, z20.h, #0");
7269 COMPARE_MACRO(Sshllb(z2.VnS(), z20.VnH(), 15), "sshllb z2.s, z20.h, #15");
7270 COMPARE_MACRO(Sshllb(z2.VnD(), z20.VnS(), 0), "sshllb z2.d, z20.s, #0");
7271 COMPARE_MACRO(Sshllb(z2.VnD(), z20.VnS(), 31), "sshllb z2.d, z20.s, #31");
7272 COMPARE_MACRO(Sshllt(z27.VnH(), z8.VnB(), 0), "sshllt z27.h, z8.b, #0");
7273 COMPARE_MACRO(Sshllt(z27.VnH(), z8.VnB(), 7), "sshllt z27.h, z8.b, #7");
7274 COMPARE_MACRO(Sshllt(z27.VnS(), z8.VnH(), 0), "sshllt z27.s, z8.h, #0");
7275 COMPARE_MACRO(Sshllt(z27.VnS(), z8.VnH(), 15), "sshllt z27.s, z8.h, #15");
7276 COMPARE_MACRO(Sshllt(z27.VnD(), z8.VnS(), 0), "sshllt z27.d, z8.s, #0");
7277 COMPARE_MACRO(Sshllt(z27.VnD(), z8.VnS(), 31), "sshllt z27.d, z8.s, #31");
7278 COMPARE_MACRO(Ushllb(z8.VnH(), z31.VnB(), 0), "ushllb z8.h, z31.b, #0");
7279 COMPARE_MACRO(Ushllb(z8.VnH(), z31.VnB(), 7), "ushllb z8.h, z31.b, #7");
7280 COMPARE_MACRO(Ushllb(z8.VnS(), z31.VnH(), 0), "ushllb z8.s, z31.h, #0");
7281 COMPARE_MACRO(Ushllb(z8.VnS(), z31.VnH(), 15), "ushllb z8.s, z31.h, #15");
7282 COMPARE_MACRO(Ushllb(z8.VnD(), z31.VnS(), 0), "ushllb z8.d, z31.s, #0");
7283 COMPARE_MACRO(Ushllb(z8.VnD(), z31.VnS(), 31), "ushllb z8.d, z31.s, #31");
7284 COMPARE_MACRO(Ushllt(z3.VnH(), z21.VnB(), 0), "ushllt z3.h, z21.b, #0");
7285 COMPARE_MACRO(Ushllt(z3.VnH(), z21.VnB(), 7), "ushllt z3.h, z21.b, #7");
7286 COMPARE_MACRO(Ushllt(z3.VnS(), z21.VnH(), 0), "ushllt z3.s, z21.h, #0");
7287 COMPARE_MACRO(Ushllt(z3.VnS(), z21.VnH(), 15), "ushllt z3.s, z21.h, #15");
7288 COMPARE_MACRO(Ushllt(z3.VnD(), z21.VnS(), 0), "ushllt z3.d, z21.s, #0");
7289 COMPARE_MACRO(Ushllt(z3.VnD(), z21.VnS(), 31), "ushllt z3.d, z21.s, #31");
7290
7291 CLEANUP();
7292 }
7293
TEST(sve2_shift_narrow)7294 TEST(sve2_shift_narrow) {
7295 SETUP();
7296
7297 COMPARE_MACRO(Shrnb(z7.VnB(), z4.VnH(), 1), "shrnb z7.b, z4.h, #1");
7298 COMPARE_MACRO(Shrnb(z7.VnB(), z4.VnH(), 2), "shrnb z7.b, z4.h, #2");
7299 COMPARE_MACRO(Shrnb(z7.VnB(), z4.VnH(), 5), "shrnb z7.b, z4.h, #5");
7300 COMPARE_MACRO(Shrnb(z7.VnB(), z4.VnH(), 8), "shrnb z7.b, z4.h, #8");
7301 COMPARE_MACRO(Shrnb(z7.VnH(), z4.VnS(), 1), "shrnb z7.h, z4.s, #1");
7302 COMPARE_MACRO(Shrnb(z7.VnH(), z4.VnS(), 16), "shrnb z7.h, z4.s, #16");
7303 COMPARE_MACRO(Shrnb(z7.VnS(), z4.VnD(), 1), "shrnb z7.s, z4.d, #1");
7304 COMPARE_MACRO(Shrnb(z7.VnS(), z4.VnD(), 32), "shrnb z7.s, z4.d, #32");
7305 COMPARE_MACRO(Shrnt(z21.VnB(), z29.VnH(), 1), "shrnt z21.b, z29.h, #1");
7306 COMPARE_MACRO(Shrnt(z21.VnB(), z29.VnH(), 8), "shrnt z21.b, z29.h, #8");
7307 COMPARE_MACRO(Shrnt(z21.VnH(), z29.VnS(), 1), "shrnt z21.h, z29.s, #1");
7308 COMPARE_MACRO(Shrnt(z21.VnH(), z29.VnS(), 16), "shrnt z21.h, z29.s, #16");
7309 COMPARE_MACRO(Shrnt(z21.VnS(), z29.VnD(), 1), "shrnt z21.s, z29.d, #1");
7310 COMPARE_MACRO(Shrnt(z21.VnS(), z29.VnD(), 32), "shrnt z21.s, z29.d, #32");
7311
7312 COMPARE_MACRO(Rshrnb(z5.VnB(), z1.VnH(), 1), "rshrnb z5.b, z1.h, #1");
7313 COMPARE_MACRO(Rshrnb(z5.VnB(), z1.VnH(), 8), "rshrnb z5.b, z1.h, #8");
7314 COMPARE_MACRO(Rshrnb(z5.VnH(), z1.VnS(), 16), "rshrnb z5.h, z1.s, #16");
7315 COMPARE_MACRO(Rshrnb(z5.VnS(), z1.VnD(), 32), "rshrnb z5.s, z1.d, #32");
7316 COMPARE_MACRO(Rshrnt(z5.VnB(), z1.VnH(), 8), "rshrnt z5.b, z1.h, #8");
7317 COMPARE_MACRO(Rshrnt(z5.VnH(), z1.VnS(), 16), "rshrnt z5.h, z1.s, #16");
7318 COMPARE_MACRO(Rshrnt(z5.VnS(), z1.VnD(), 32), "rshrnt z5.s, z1.d, #32");
7319
7320 COMPARE_MACRO(Sqrshrnb(z1.VnB(), z1.VnH(), 1), "sqrshrnb z1.b, z1.h, #1");
7321 COMPARE_MACRO(Sqrshrnb(z1.VnB(), z1.VnH(), 8), "sqrshrnb z1.b, z1.h, #8");
7322 COMPARE_MACRO(Sqrshrnb(z1.VnH(), z1.VnS(), 16), "sqrshrnb z1.h, z1.s, #16");
7323 COMPARE_MACRO(Sqrshrnb(z1.VnS(), z1.VnD(), 32), "sqrshrnb z1.s, z1.d, #32");
7324 COMPARE_MACRO(Sqrshrnt(z24.VnB(), z19.VnH(), 8), "sqrshrnt z24.b, z19.h, #8");
7325 COMPARE_MACRO(Sqrshrnt(z24.VnH(), z19.VnS(), 16),
7326 "sqrshrnt z24.h, z19.s, #16");
7327 COMPARE_MACRO(Sqrshrnt(z24.VnS(), z19.VnD(), 32),
7328 "sqrshrnt z24.s, z19.d, #32");
7329
7330 COMPARE_MACRO(Sqshrnb(z25.VnB(), z1.VnH(), 1), "sqshrnb z25.b, z1.h, #1");
7331 COMPARE_MACRO(Sqshrnb(z25.VnB(), z1.VnH(), 8), "sqshrnb z25.b, z1.h, #8");
7332 COMPARE_MACRO(Sqshrnb(z25.VnH(), z1.VnS(), 16), "sqshrnb z25.h, z1.s, #16");
7333 COMPARE_MACRO(Sqshrnb(z25.VnS(), z1.VnD(), 32), "sqshrnb z25.s, z1.d, #32");
7334 COMPARE_MACRO(Sqshrnt(z0.VnB(), z25.VnH(), 8), "sqshrnt z0.b, z25.h, #8");
7335 COMPARE_MACRO(Sqshrnt(z0.VnH(), z25.VnS(), 16), "sqshrnt z0.h, z25.s, #16");
7336 COMPARE_MACRO(Sqshrnt(z0.VnS(), z25.VnD(), 32), "sqshrnt z0.s, z25.d, #32");
7337
7338 COMPARE_MACRO(Uqrshrnb(z30.VnB(), z25.VnH(), 1), "uqrshrnb z30.b, z25.h, #1");
7339 COMPARE_MACRO(Uqrshrnb(z30.VnB(), z25.VnH(), 8), "uqrshrnb z30.b, z25.h, #8");
7340 COMPARE_MACRO(Uqrshrnb(z30.VnH(), z25.VnS(), 16),
7341 "uqrshrnb z30.h, z25.s, #16");
7342 COMPARE_MACRO(Uqrshrnb(z30.VnS(), z25.VnD(), 32),
7343 "uqrshrnb z30.s, z25.d, #32");
7344 COMPARE_MACRO(Uqrshrnt(z3.VnB(), z25.VnH(), 8), "uqrshrnt z3.b, z25.h, #8");
7345 COMPARE_MACRO(Uqrshrnt(z3.VnH(), z25.VnS(), 16), "uqrshrnt z3.h, z25.s, #16");
7346 COMPARE_MACRO(Uqrshrnt(z3.VnS(), z25.VnD(), 32), "uqrshrnt z3.s, z25.d, #32");
7347
7348 COMPARE_MACRO(Uqshrnb(z17.VnB(), z4.VnH(), 1), "uqshrnb z17.b, z4.h, #1");
7349 COMPARE_MACRO(Uqshrnb(z17.VnB(), z4.VnH(), 8), "uqshrnb z17.b, z4.h, #8");
7350 COMPARE_MACRO(Uqshrnb(z17.VnH(), z4.VnS(), 16), "uqshrnb z17.h, z4.s, #16");
7351 COMPARE_MACRO(Uqshrnb(z17.VnS(), z4.VnD(), 32), "uqshrnb z17.s, z4.d, #32");
7352 COMPARE_MACRO(Uqshrnt(z28.VnB(), z18.VnH(), 8), "uqshrnt z28.b, z18.h, #8");
7353 COMPARE_MACRO(Uqshrnt(z28.VnH(), z18.VnS(), 16), "uqshrnt z28.h, z18.s, #16");
7354 COMPARE_MACRO(Uqshrnt(z28.VnS(), z18.VnD(), 32), "uqshrnt z28.s, z18.d, #32");
7355
7356 COMPARE_MACRO(Sqrshrunb(z23.VnB(), z28.VnH(), 1),
7357 "sqrshrunb z23.b, z28.h, #1");
7358 COMPARE_MACRO(Sqrshrunb(z23.VnB(), z28.VnH(), 8),
7359 "sqrshrunb z23.b, z28.h, #8");
7360 COMPARE_MACRO(Sqrshrunb(z23.VnH(), z28.VnS(), 16),
7361 "sqrshrunb z23.h, z28.s, #16");
7362 COMPARE_MACRO(Sqrshrunb(z23.VnS(), z28.VnD(), 32),
7363 "sqrshrunb z23.s, z28.d, #32");
7364 COMPARE_MACRO(Sqrshrunt(z9.VnB(), z15.VnH(), 8), "sqrshrunt z9.b, z15.h, #8");
7365 COMPARE_MACRO(Sqrshrunt(z9.VnH(), z15.VnS(), 16),
7366 "sqrshrunt z9.h, z15.s, #16");
7367 COMPARE_MACRO(Sqrshrunt(z9.VnS(), z15.VnD(), 32),
7368 "sqrshrunt z9.s, z15.d, #32");
7369
7370 COMPARE_MACRO(Sqshrunb(z25.VnB(), z10.VnH(), 1), "sqshrunb z25.b, z10.h, #1");
7371 COMPARE_MACRO(Sqshrunb(z25.VnB(), z10.VnH(), 8), "sqshrunb z25.b, z10.h, #8");
7372 COMPARE_MACRO(Sqshrunb(z25.VnH(), z10.VnS(), 16),
7373 "sqshrunb z25.h, z10.s, #16");
7374 COMPARE_MACRO(Sqshrunb(z25.VnS(), z10.VnD(), 32),
7375 "sqshrunb z25.s, z10.d, #32");
7376 COMPARE_MACRO(Sqshrunt(z20.VnB(), z3.VnH(), 8), "sqshrunt z20.b, z3.h, #8");
7377 COMPARE_MACRO(Sqshrunt(z20.VnH(), z3.VnS(), 16), "sqshrunt z20.h, z3.s, #16");
7378 COMPARE_MACRO(Sqshrunt(z20.VnS(), z3.VnD(), 32), "sqshrunt z20.s, z3.d, #32");
7379
7380 CLEANUP();
7381 }
7382
TEST(sve2_aba_long)7383 TEST(sve2_aba_long) {
7384 SETUP();
7385
7386 COMPARE(sabalb(z13.VnD(), z20.VnS(), z26.VnS()),
7387 "sabalb z13.d, z20.s, z26.s");
7388 COMPARE(sabalb(z13.VnH(), z20.VnB(), z26.VnB()),
7389 "sabalb z13.h, z20.b, z26.b");
7390 COMPARE(sabalb(z13.VnS(), z20.VnH(), z26.VnH()),
7391 "sabalb z13.s, z20.h, z26.h");
7392 COMPARE(sabalt(z14.VnD(), z19.VnS(), z10.VnS()),
7393 "sabalt z14.d, z19.s, z10.s");
7394 COMPARE(sabalt(z14.VnH(), z19.VnB(), z10.VnB()),
7395 "sabalt z14.h, z19.b, z10.b");
7396 COMPARE(sabalt(z14.VnS(), z19.VnH(), z10.VnH()),
7397 "sabalt z14.s, z19.h, z10.h");
7398 COMPARE(uabalb(z11.VnD(), z25.VnS(), z11.VnS()),
7399 "uabalb z11.d, z25.s, z11.s");
7400 COMPARE(uabalb(z11.VnH(), z25.VnB(), z11.VnB()),
7401 "uabalb z11.h, z25.b, z11.b");
7402 COMPARE(uabalb(z11.VnS(), z25.VnH(), z11.VnH()),
7403 "uabalb z11.s, z25.h, z11.h");
7404 COMPARE(uabalt(z4.VnD(), z2.VnS(), z31.VnS()), "uabalt z4.d, z2.s, z31.s");
7405 COMPARE(uabalt(z4.VnH(), z2.VnB(), z31.VnB()), "uabalt z4.h, z2.b, z31.b");
7406 COMPARE(uabalt(z4.VnS(), z2.VnH(), z31.VnH()), "uabalt z4.s, z2.h, z31.h");
7407
7408 COMPARE_MACRO(Sabalb(z12.VnH(), z12.VnH(), z3.VnB(), z30.VnB()),
7409 "sabalb z12.h, z3.b, z30.b");
7410 COMPARE_MACRO(Sabalt(z12.VnH(), z12.VnH(), z3.VnB(), z12.VnB()),
7411 "sabalt z12.h, z3.b, z12.b");
7412 COMPARE_MACRO(Sabalb(z12.VnH(), z12.VnH(), z12.VnB(), z30.VnB()),
7413 "sabalb z12.h, z12.b, z30.b");
7414 COMPARE_MACRO(Sabalt(z12.VnH(), z12.VnH(), z12.VnB(), z12.VnB()), "");
7415 COMPARE_MACRO(Sabalb(z12.VnH(), z13.VnH(), z3.VnB(), z30.VnB()),
7416 "movprfx z12, z13\n"
7417 "sabalb z12.h, z3.b, z30.b");
7418 COMPARE_MACRO(Sabalt(z12.VnH(), z3.VnH(), z3.VnB(), z30.VnB()),
7419 "movprfx z12, z3\n"
7420 "sabalt z12.h, z3.b, z30.b");
7421 COMPARE_MACRO(Sabalb(z12.VnH(), z30.VnH(), z3.VnB(), z30.VnB()),
7422 "movprfx z12, z30\n"
7423 "sabalb z12.h, z3.b, z30.b");
7424 COMPARE_MACRO(Sabalt(z12.VnH(), z3.VnH(), z3.VnB(), z3.VnB()),
7425 "mov z12.d, z3.d");
7426 COMPARE_MACRO(Sabalb(z12.VnH(), z3.VnH(), z12.VnB(), z3.VnB()),
7427 "mov z31.d, z12.d\n"
7428 "movprfx z12, z3\n"
7429 "sabalb z12.h, z31.b, z3.b");
7430 COMPARE_MACRO(Sabalt(z12.VnH(), z3.VnH(), z3.VnB(), z12.VnB()),
7431 "mov z31.d, z12.d\n"
7432 "movprfx z12, z3\n"
7433 "sabalt z12.h, z3.b, z31.b");
7434
7435 COMPARE_MACRO(Uabalt(z12.VnH(), z12.VnH(), z3.VnB(), z30.VnB()),
7436 "uabalt z12.h, z3.b, z30.b");
7437 COMPARE_MACRO(Uabalb(z12.VnH(), z12.VnH(), z3.VnB(), z12.VnB()),
7438 "uabalb z12.h, z3.b, z12.b");
7439 COMPARE_MACRO(Uabalt(z12.VnH(), z12.VnH(), z12.VnB(), z30.VnB()),
7440 "uabalt z12.h, z12.b, z30.b");
7441 COMPARE_MACRO(Uabalb(z12.VnH(), z12.VnH(), z12.VnB(), z12.VnB()), "");
7442 COMPARE_MACRO(Uabalt(z12.VnH(), z13.VnH(), z3.VnB(), z30.VnB()),
7443 "movprfx z12, z13\n"
7444 "uabalt z12.h, z3.b, z30.b");
7445 COMPARE_MACRO(Uabalb(z12.VnH(), z3.VnH(), z3.VnB(), z30.VnB()),
7446 "movprfx z12, z3\n"
7447 "uabalb z12.h, z3.b, z30.b");
7448 COMPARE_MACRO(Uabalt(z12.VnH(), z30.VnH(), z3.VnB(), z30.VnB()),
7449 "movprfx z12, z30\n"
7450 "uabalt z12.h, z3.b, z30.b");
7451 COMPARE_MACRO(Uabalb(z12.VnH(), z3.VnH(), z3.VnB(), z3.VnB()),
7452 "mov z12.d, z3.d");
7453 COMPARE_MACRO(Uabalt(z12.VnH(), z3.VnH(), z12.VnB(), z3.VnB()),
7454 "mov z31.d, z12.d\n"
7455 "movprfx z12, z3\n"
7456 "uabalt z12.h, z31.b, z3.b");
7457 COMPARE_MACRO(Uabalb(z12.VnH(), z3.VnH(), z3.VnB(), z12.VnB()),
7458 "mov z31.d, z12.d\n"
7459 "movprfx z12, z3\n"
7460 "uabalb z12.h, z3.b, z31.b");
7461 CLEANUP();
7462 }
7463
TEST(sve2_add_sub_carry)7464 TEST(sve2_add_sub_carry) {
7465 SETUP();
7466
7467 COMPARE(adclb(z25.VnS(), z17.VnS(), z24.VnS()), "adclb z25.s, z17.s, z24.s");
7468 COMPARE(adclb(z25.VnD(), z17.VnD(), z24.VnD()), "adclb z25.d, z17.d, z24.d");
7469 COMPARE(adclt(z0.VnS(), z2.VnS(), z15.VnS()), "adclt z0.s, z2.s, z15.s");
7470 COMPARE(adclt(z0.VnD(), z2.VnD(), z15.VnD()), "adclt z0.d, z2.d, z15.d");
7471 COMPARE(sbclb(z17.VnS(), z10.VnS(), z8.VnS()), "sbclb z17.s, z10.s, z8.s");
7472 COMPARE(sbclb(z17.VnD(), z10.VnD(), z8.VnD()), "sbclb z17.d, z10.d, z8.d");
7473 COMPARE(sbclt(z20.VnS(), z0.VnS(), z13.VnS()), "sbclt z20.s, z0.s, z13.s");
7474 COMPARE(sbclt(z20.VnD(), z0.VnD(), z13.VnD()), "sbclt z20.d, z0.d, z13.d");
7475
7476 COMPARE_MACRO(Adclb(z25.VnS(), z25.VnS(), z17.VnS(), z24.VnS()),
7477 "adclb z25.s, z17.s, z24.s");
7478 COMPARE_MACRO(Adclb(z25.VnS(), z20.VnS(), z17.VnS(), z24.VnS()),
7479 "movprfx z25, z20\n"
7480 "adclb z25.s, z17.s, z24.s");
7481 COMPARE_MACRO(Adclb(z25.VnS(), z20.VnS(), z25.VnS(), z24.VnS()),
7482 "movprfx z31, z20\n"
7483 "adclb z31.s, z25.s, z24.s\n"
7484 "mov z25.d, z31.d");
7485 COMPARE_MACRO(Adclb(z25.VnS(), z20.VnS(), z24.VnS(), z25.VnS()),
7486 "movprfx z31, z20\n"
7487 "adclb z31.s, z24.s, z25.s\n"
7488 "mov z25.d, z31.d");
7489 COMPARE_MACRO(Adclb(z25.VnS(), z20.VnS(), z25.VnS(), z25.VnS()),
7490 "movprfx z31, z20\n"
7491 "adclb z31.s, z25.s, z25.s\n"
7492 "mov z25.d, z31.d");
7493 COMPARE_MACRO(Adclt(z25.VnS(), z20.VnS(), z25.VnS(), z25.VnS()),
7494 "movprfx z31, z20\n"
7495 "adclt z31.s, z25.s, z25.s\n"
7496 "mov z25.d, z31.d");
7497
7498 COMPARE_MACRO(Sbclb(z30.VnS(), z30.VnS(), z7.VnS(), z29.VnS()),
7499 "sbclb z30.s, z7.s, z29.s");
7500 COMPARE_MACRO(Sbclb(z30.VnS(), z2.VnS(), z7.VnS(), z29.VnS()),
7501 "movprfx z30, z2\n"
7502 "sbclb z30.s, z7.s, z29.s");
7503 COMPARE_MACRO(Sbclb(z30.VnS(), z2.VnS(), z30.VnS(), z29.VnS()),
7504 "movprfx z31, z2\n"
7505 "sbclb z31.s, z30.s, z29.s\n"
7506 "mov z30.d, z31.d");
7507 COMPARE_MACRO(Sbclb(z30.VnS(), z2.VnS(), z29.VnS(), z30.VnS()),
7508 "movprfx z31, z2\n"
7509 "sbclb z31.s, z29.s, z30.s\n"
7510 "mov z30.d, z31.d");
7511 COMPARE_MACRO(Sbclb(z30.VnS(), z2.VnS(), z30.VnS(), z30.VnS()),
7512 "movprfx z31, z2\n"
7513 "sbclb z31.s, z30.s, z30.s\n"
7514 "mov z30.d, z31.d");
7515 COMPARE_MACRO(Sbclt(z30.VnS(), z2.VnS(), z30.VnS(), z30.VnS()),
7516 "movprfx z31, z2\n"
7517 "sbclt z31.s, z30.s, z30.s\n"
7518 "mov z30.d, z31.d");
7519 CLEANUP();
7520 }
7521
TEST(sve2_add_sub_high)7522 TEST(sve2_add_sub_high) {
7523 SETUP();
7524
7525 COMPARE_MACRO(Addhnb(z29.VnS(), z19.VnD(), z2.VnD()),
7526 "addhnb z29.s, z19.d, z2.d");
7527 COMPARE_MACRO(Addhnb(z29.VnB(), z19.VnH(), z2.VnH()),
7528 "addhnb z29.b, z19.h, z2.h");
7529 COMPARE_MACRO(Addhnb(z29.VnH(), z19.VnS(), z2.VnS()),
7530 "addhnb z29.h, z19.s, z2.s");
7531 COMPARE_MACRO(Addhnt(z8.VnS(), z12.VnD(), z6.VnD()),
7532 "addhnt z8.s, z12.d, z6.d");
7533 COMPARE_MACRO(Addhnt(z8.VnB(), z12.VnH(), z6.VnH()),
7534 "addhnt z8.b, z12.h, z6.h");
7535 COMPARE_MACRO(Addhnt(z8.VnH(), z12.VnS(), z6.VnS()),
7536 "addhnt z8.h, z12.s, z6.s");
7537 COMPARE_MACRO(Raddhnb(z0.VnS(), z11.VnD(), z10.VnD()),
7538 "raddhnb z0.s, z11.d, z10.d");
7539 COMPARE_MACRO(Raddhnb(z0.VnB(), z11.VnH(), z10.VnH()),
7540 "raddhnb z0.b, z11.h, z10.h");
7541 COMPARE_MACRO(Raddhnb(z0.VnH(), z11.VnS(), z10.VnS()),
7542 "raddhnb z0.h, z11.s, z10.s");
7543 COMPARE_MACRO(Raddhnt(z23.VnS(), z27.VnD(), z9.VnD()),
7544 "raddhnt z23.s, z27.d, z9.d");
7545 COMPARE_MACRO(Raddhnt(z23.VnB(), z27.VnH(), z9.VnH()),
7546 "raddhnt z23.b, z27.h, z9.h");
7547 COMPARE_MACRO(Raddhnt(z23.VnH(), z27.VnS(), z9.VnS()),
7548 "raddhnt z23.h, z27.s, z9.s");
7549 COMPARE_MACRO(Rsubhnb(z30.VnS(), z29.VnD(), z11.VnD()),
7550 "rsubhnb z30.s, z29.d, z11.d");
7551 COMPARE_MACRO(Rsubhnb(z30.VnB(), z29.VnH(), z11.VnH()),
7552 "rsubhnb z30.b, z29.h, z11.h");
7553 COMPARE_MACRO(Rsubhnb(z30.VnH(), z29.VnS(), z11.VnS()),
7554 "rsubhnb z30.h, z29.s, z11.s");
7555 COMPARE_MACRO(Rsubhnt(z25.VnS(), z7.VnD(), z18.VnD()),
7556 "rsubhnt z25.s, z7.d, z18.d");
7557 COMPARE_MACRO(Rsubhnt(z25.VnB(), z7.VnH(), z18.VnH()),
7558 "rsubhnt z25.b, z7.h, z18.h");
7559 COMPARE_MACRO(Rsubhnt(z25.VnH(), z7.VnS(), z18.VnS()),
7560 "rsubhnt z25.h, z7.s, z18.s");
7561 COMPARE_MACRO(Subhnb(z31.VnS(), z31.VnD(), z7.VnD()),
7562 "subhnb z31.s, z31.d, z7.d");
7563 COMPARE_MACRO(Subhnb(z31.VnB(), z31.VnH(), z7.VnH()),
7564 "subhnb z31.b, z31.h, z7.h");
7565 COMPARE_MACRO(Subhnb(z31.VnH(), z31.VnS(), z7.VnS()),
7566 "subhnb z31.h, z31.s, z7.s");
7567 COMPARE_MACRO(Subhnt(z31.VnS(), z22.VnD(), z27.VnD()),
7568 "subhnt z31.s, z22.d, z27.d");
7569 COMPARE_MACRO(Subhnt(z31.VnB(), z22.VnH(), z27.VnH()),
7570 "subhnt z31.b, z22.h, z27.h");
7571 COMPARE_MACRO(Subhnt(z31.VnH(), z22.VnS(), z27.VnS()),
7572 "subhnt z31.h, z22.s, z27.s");
7573
7574 CLEANUP();
7575 }
7576
TEST(sve2_complex_addition)7577 TEST(sve2_complex_addition) {
7578 SETUP();
7579
7580 COMPARE_MACRO(Cadd(z5.VnB(), z5.VnB(), z12.VnB(), 90),
7581 "cadd z5.b, z5.b, z12.b, #90");
7582 COMPARE_MACRO(Cadd(z5.VnD(), z5.VnD(), z12.VnD(), 90),
7583 "cadd z5.d, z5.d, z12.d, #90");
7584 COMPARE_MACRO(Cadd(z5.VnH(), z5.VnH(), z12.VnH(), 90),
7585 "cadd z5.h, z5.h, z12.h, #90");
7586 COMPARE_MACRO(Cadd(z5.VnS(), z5.VnS(), z12.VnS(), 90),
7587 "cadd z5.s, z5.s, z12.s, #90");
7588 COMPARE_MACRO(Cadd(z5.VnB(), z5.VnB(), z12.VnB(), 270),
7589 "cadd z5.b, z5.b, z12.b, #270");
7590 COMPARE_MACRO(Cadd(z5.VnD(), z5.VnD(), z12.VnD(), 270),
7591 "cadd z5.d, z5.d, z12.d, #270");
7592 COMPARE_MACRO(Cadd(z5.VnH(), z5.VnH(), z12.VnH(), 270),
7593 "cadd z5.h, z5.h, z12.h, #270");
7594 COMPARE_MACRO(Cadd(z5.VnS(), z5.VnS(), z12.VnS(), 270),
7595 "cadd z5.s, z5.s, z12.s, #270");
7596 COMPARE_MACRO(Cadd(z5.VnS(), z6.VnS(), z12.VnS(), 270),
7597 "movprfx z5, z6\n"
7598 "cadd z5.s, z5.s, z12.s, #270");
7599 COMPARE_MACRO(Cadd(z5.VnS(), z6.VnS(), z5.VnS(), 270),
7600 "mov z31.d, z5.d\n"
7601 "movprfx z5, z6\n"
7602 "cadd z5.s, z5.s, z31.s, #270");
7603
7604 COMPARE_MACRO(Sqcadd(z20.VnB(), z20.VnB(), z23.VnB(), 90),
7605 "sqcadd z20.b, z20.b, z23.b, #90");
7606 COMPARE_MACRO(Sqcadd(z20.VnD(), z20.VnD(), z23.VnD(), 90),
7607 "sqcadd z20.d, z20.d, z23.d, #90");
7608 COMPARE_MACRO(Sqcadd(z20.VnH(), z20.VnH(), z23.VnH(), 90),
7609 "sqcadd z20.h, z20.h, z23.h, #90");
7610 COMPARE_MACRO(Sqcadd(z20.VnB(), z20.VnB(), z23.VnB(), 270),
7611 "sqcadd z20.b, z20.b, z23.b, #270");
7612 COMPARE_MACRO(Sqcadd(z20.VnD(), z20.VnD(), z23.VnD(), 270),
7613 "sqcadd z20.d, z20.d, z23.d, #270");
7614 COMPARE_MACRO(Sqcadd(z20.VnH(), z20.VnH(), z23.VnH(), 270),
7615 "sqcadd z20.h, z20.h, z23.h, #270");
7616 COMPARE_MACRO(Sqcadd(z20.VnS(), z20.VnS(), z23.VnS(), 270),
7617 "sqcadd z20.s, z20.s, z23.s, #270");
7618 COMPARE_MACRO(Sqcadd(z20.VnH(), z21.VnH(), z23.VnH(), 270),
7619 "movprfx z20, z21\n"
7620 "sqcadd z20.h, z20.h, z23.h, #270");
7621 COMPARE_MACRO(Sqcadd(z20.VnH(), z21.VnH(), z20.VnH(), 270),
7622 "mov z31.d, z20.d\n"
7623 "movprfx z20, z21\n"
7624 "sqcadd z20.h, z20.h, z31.h, #270");
7625
7626 CLEANUP();
7627 }
7628
TEST(sve2_bit_permute)7629 TEST(sve2_bit_permute) {
7630 SETUP();
7631
7632 COMPARE_MACRO(Bdep(z18.VnB(), z10.VnB(), z0.VnB()),
7633 "bdep z18.b, z10.b, z0.b");
7634 COMPARE_MACRO(Bdep(z18.VnD(), z10.VnD(), z0.VnD()),
7635 "bdep z18.d, z10.d, z0.d");
7636 COMPARE_MACRO(Bdep(z18.VnH(), z10.VnH(), z0.VnH()),
7637 "bdep z18.h, z10.h, z0.h");
7638 COMPARE_MACRO(Bdep(z18.VnS(), z10.VnS(), z0.VnS()),
7639 "bdep z18.s, z10.s, z0.s");
7640 COMPARE_MACRO(Bext(z6.VnB(), z2.VnB(), z5.VnB()), "bext z6.b, z2.b, z5.b");
7641 COMPARE_MACRO(Bext(z6.VnD(), z2.VnD(), z5.VnD()), "bext z6.d, z2.d, z5.d");
7642 COMPARE_MACRO(Bext(z6.VnH(), z2.VnH(), z5.VnH()), "bext z6.h, z2.h, z5.h");
7643 COMPARE_MACRO(Bext(z6.VnS(), z2.VnS(), z5.VnS()), "bext z6.s, z2.s, z5.s");
7644 COMPARE_MACRO(Bgrp(z24.VnB(), z9.VnB(), z5.VnB()), "bgrp z24.b, z9.b, z5.b");
7645 COMPARE_MACRO(Bgrp(z24.VnD(), z9.VnD(), z5.VnD()), "bgrp z24.d, z9.d, z5.d");
7646 COMPARE_MACRO(Bgrp(z24.VnH(), z9.VnH(), z5.VnH()), "bgrp z24.h, z9.h, z5.h");
7647 COMPARE_MACRO(Bgrp(z24.VnS(), z9.VnS(), z5.VnS()), "bgrp z24.s, z9.s, z5.s");
7648
7649 CLEANUP();
7650 }
7651
TEST(sve2_integer_multiply_long_vector)7652 TEST(sve2_integer_multiply_long_vector) {
7653 SETUP();
7654
7655 COMPARE(sqdmullb(z1.VnD(), z31.VnS(), z21.VnS()),
7656 "sqdmullb z1.d, z31.s, z21.s");
7657 COMPARE(sqdmullb(z2.VnH(), z30.VnB(), z22.VnB()),
7658 "sqdmullb z2.h, z30.b, z22.b");
7659 COMPARE(sqdmullb(z3.VnS(), z29.VnH(), z23.VnH()),
7660 "sqdmullb z3.s, z29.h, z23.h");
7661 COMPARE(sqdmullb(z1.VnS(), z27.VnH(), z3.VnH(), 7),
7662 "sqdmullb z1.s, z27.h, z3.h[7]");
7663 COMPARE(sqdmullb(z27.VnD(), z16.VnS(), z5.VnS(), 3),
7664 "sqdmullb z27.d, z16.s, z5.s[3]");
7665
7666 COMPARE(sqdmullt(z2.VnD(), z1.VnS(), z5.VnS()), "sqdmullt z2.d, z1.s, z5.s");
7667 COMPARE(sqdmullt(z12.VnH(), z11.VnB(), z15.VnB()),
7668 "sqdmullt z12.h, z11.b, z15.b");
7669 COMPARE(sqdmullt(z20.VnS(), z21.VnH(), z25.VnH()),
7670 "sqdmullt z20.s, z21.h, z25.h");
7671 COMPARE(sqdmullt(z23.VnS(), z28.VnH(), z2.VnH(), 0),
7672 "sqdmullt z23.s, z28.h, z2.h[0]");
7673 COMPARE(sqdmullt(z7.VnD(), z4.VnS(), z0.VnS(), 0),
7674 "sqdmullt z7.d, z4.s, z0.s[0]");
7675
7676 // Feature `SVEPmull128` is not supported.
7677 // COMPARE(pmullb(z12.VnQ(), z21.VnD(), z12.VnD()),
7678 // "pmullb z12.q, z21.d, z12.d");
7679 COMPARE(pmullb(z12.VnH(), z21.VnB(), z12.VnB()),
7680 "pmullb z12.h, z21.b, z12.b");
7681 COMPARE(pmullt(z31.VnD(), z30.VnS(), z26.VnS()),
7682 "pmullt z31.d, z30.s, z26.s");
7683
7684 COMPARE(smullb(z10.VnD(), z4.VnS(), z4.VnS()), "smullb z10.d, z4.s, z4.s");
7685 COMPARE(smullb(z11.VnH(), z14.VnB(), z14.VnB()),
7686 "smullb z11.h, z14.b, z14.b");
7687 COMPARE(smullb(z12.VnS(), z24.VnH(), z24.VnH()),
7688 "smullb z12.s, z24.h, z24.h");
7689
7690 COMPARE(smullt(z31.VnD(), z26.VnS(), z5.VnS()), "smullt z31.d, z26.s, z5.s");
7691 COMPARE(smullt(z21.VnH(), z16.VnB(), z5.VnB()), "smullt z21.h, z16.b, z5.b");
7692 COMPARE(smullt(z11.VnS(), z6.VnH(), z5.VnH()), "smullt z11.s, z6.h, z5.h");
7693
7694 COMPARE(umullb(z12.VnD(), z5.VnS(), z2.VnS()), "umullb z12.d, z5.s, z2.s");
7695 COMPARE(umullb(z12.VnH(), z15.VnB(), z12.VnB()),
7696 "umullb z12.h, z15.b, z12.b");
7697 COMPARE(umullb(z12.VnS(), z25.VnH(), z22.VnH()),
7698 "umullb z12.s, z25.h, z22.h");
7699
7700 COMPARE(umullt(z24.VnD(), z6.VnS(), z6.VnS()), "umullt z24.d, z6.s, z6.s");
7701 COMPARE(umullt(z24.VnH(), z7.VnB(), z16.VnB()), "umullt z24.h, z7.b, z16.b");
7702 COMPARE(umullt(z24.VnS(), z8.VnH(), z26.VnH()), "umullt z24.s, z8.h, z26.h");
7703
7704 CLEANUP();
7705 }
7706
TEST(sve2_xar)7707 TEST(sve2_xar) {
7708 SETUP();
7709
7710 COMPARE_MACRO(Xar(z16.VnB(), z16.VnB(), z13.VnB(), 1),
7711 "xar z16.b, z16.b, z13.b, #1");
7712 COMPARE_MACRO(Xar(z16.VnB(), z16.VnB(), z13.VnB(), 8),
7713 "xar z16.b, z16.b, z13.b, #8");
7714 COMPARE_MACRO(Xar(z16.VnH(), z16.VnH(), z13.VnH(), 1),
7715 "xar z16.h, z16.h, z13.h, #1");
7716 COMPARE_MACRO(Xar(z16.VnH(), z16.VnH(), z13.VnH(), 16),
7717 "xar z16.h, z16.h, z13.h, #16");
7718 COMPARE_MACRO(Xar(z16.VnS(), z16.VnS(), z13.VnS(), 1),
7719 "xar z16.s, z16.s, z13.s, #1");
7720 COMPARE_MACRO(Xar(z16.VnS(), z16.VnS(), z13.VnS(), 32),
7721 "xar z16.s, z16.s, z13.s, #32");
7722 COMPARE_MACRO(Xar(z16.VnD(), z16.VnD(), z13.VnD(), 1),
7723 "xar z16.d, z16.d, z13.d, #1");
7724 COMPARE_MACRO(Xar(z16.VnD(), z16.VnD(), z13.VnD(), 64),
7725 "xar z16.d, z16.d, z13.d, #64");
7726
7727 COMPARE_MACRO(Xar(z16.VnD(), z13.VnD(), z16.VnD(), 64),
7728 "xar z16.d, z16.d, z13.d, #64");
7729 COMPARE_MACRO(Xar(z16.VnD(), z13.VnD(), z12.VnD(), 64),
7730 "movprfx z16, z13\n"
7731 "xar z16.d, z16.d, z12.d, #64");
7732 COMPARE_MACRO(Xar(z16.VnD(), z16.VnD(), z16.VnD(), 64),
7733 "xar z16.d, z16.d, z16.d, #64");
7734
7735 CLEANUP();
7736 }
7737
TEST(sve2_histogram)7738 TEST(sve2_histogram) {
7739 SETUP();
7740
7741 COMPARE_MACRO(Histcnt(z24.VnS(), p6.Zeroing(), z3.VnS(), z10.VnS()),
7742 "histcnt z24.s, p6/z, z3.s, z10.s");
7743 COMPARE_MACRO(Histcnt(z24.VnD(), p6.Zeroing(), z3.VnD(), z10.VnD()),
7744 "histcnt z24.d, p6/z, z3.d, z10.d");
7745 COMPARE_MACRO(Histseg(z22.VnB(), z14.VnB(), z8.VnB()),
7746 "histseg z22.b, z14.b, z8.b");
7747
7748 CLEANUP();
7749 }
7750
TEST(sve2_table)7751 TEST(sve2_table) {
7752 SETUP();
7753
7754 COMPARE_MACRO(Tbl(z17.VnB(), z1.VnB(), z2.VnB(), z22.VnB()),
7755 "tbl z17.b, {z3.b, z4.b}, z22.b");
7756 COMPARE_MACRO(Tbl(z17.VnD(), z1.VnD(), z2.VnD(), z22.VnD()),
7757 "tbl z17.d, {z3.d, z4.d}, z22.d");
7758 COMPARE_MACRO(Tbl(z17.VnH(), z1.VnH(), z2.VnH(), z22.VnH()),
7759 "tbl z17.h, {z3.h, z4.h}, z22.h");
7760 COMPARE_MACRO(Tbl(z17.VnS(), z31.VnS(), z0.VnS(), z22.VnS()),
7761 "tbl z17.s, {z31.s, z0.s}, z22.s");
7762 COMPARE_MACRO(Tbx(z22.VnB(), z15.VnB(), z19.VnB()),
7763 "tbx z22.b, z15.b, z19.b");
7764 COMPARE_MACRO(Tbx(z22.VnD(), z15.VnD(), z19.VnD()),
7765 "tbx z22.d, z15.d, z19.d");
7766 COMPARE_MACRO(Tbx(z22.VnH(), z15.VnH(), z19.VnH()),
7767 "tbx z22.h, z15.h, z19.h");
7768 COMPARE_MACRO(Tbx(z22.VnS(), z15.VnS(), z19.VnS()),
7769 "tbx z22.s, z15.s, z19.s");
7770
7771 CLEANUP();
7772 }
7773
TEST(sve2_cdot)7774 TEST(sve2_cdot) {
7775 SETUP();
7776
7777 COMPARE_MACRO(Cdot(z7.VnS(), z7.VnS(), z4.VnB(), z10.VnB(), 0),
7778 "cdot z7.s, z4.b, z10.b, #0");
7779 COMPARE_MACRO(Cdot(z7.VnD(), z7.VnD(), z4.VnH(), z10.VnH(), 0),
7780 "cdot z7.d, z4.h, z10.h, #0");
7781 COMPARE_MACRO(Cdot(z7.VnS(), z7.VnS(), z4.VnB(), z10.VnB(), 90),
7782 "cdot z7.s, z4.b, z10.b, #90");
7783 COMPARE_MACRO(Cdot(z7.VnD(), z7.VnD(), z4.VnH(), z10.VnH(), 90),
7784 "cdot z7.d, z4.h, z10.h, #90");
7785 COMPARE_MACRO(Cdot(z7.VnS(), z7.VnS(), z4.VnB(), z10.VnB(), 180),
7786 "cdot z7.s, z4.b, z10.b, #180");
7787 COMPARE_MACRO(Cdot(z7.VnD(), z7.VnD(), z4.VnH(), z10.VnH(), 180),
7788 "cdot z7.d, z4.h, z10.h, #180");
7789 COMPARE_MACRO(Cdot(z7.VnS(), z7.VnS(), z4.VnB(), z10.VnB(), 270),
7790 "cdot z7.s, z4.b, z10.b, #270");
7791 COMPARE_MACRO(Cdot(z7.VnD(), z7.VnD(), z4.VnH(), z10.VnH(), 270),
7792 "cdot z7.d, z4.h, z10.h, #270");
7793
7794 COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z2.VnB(), z3.VnB(), 0),
7795 "movprfx z0, z1\n"
7796 "cdot z0.s, z2.b, z3.b, #0");
7797 COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z0.VnB(), z3.VnB(), 0),
7798 "mov z31.d, z0.d\n"
7799 "movprfx z0, z1\n"
7800 "cdot z0.s, z31.b, z3.b, #0");
7801 COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z2.VnB(), z0.VnB(), 0),
7802 "mov z31.d, z0.d\n"
7803 "movprfx z0, z1\n"
7804 "cdot z0.s, z2.b, z31.b, #0");
7805 COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z0.VnB(), z0.VnB(), 0),
7806 "mov z31.d, z0.d\n"
7807 "movprfx z0, z1\n"
7808 "cdot z0.s, z31.b, z31.b, #0");
7809
7810 COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 0, 0),
7811 "cdot z18.s, z26.b, z7.b[0], #0");
7812 COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 1, 0),
7813 "cdot z18.s, z26.b, z7.b[1], #0");
7814 COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 2, 0),
7815 "cdot z18.s, z26.b, z7.b[2], #0");
7816 COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 3, 0),
7817 "cdot z18.s, z26.b, z7.b[3], #0");
7818 COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 2, 90),
7819 "cdot z18.s, z26.b, z7.b[2], #90");
7820 COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 2, 180),
7821 "cdot z18.s, z26.b, z7.b[2], #180");
7822 COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 2, 270),
7823 "cdot z18.s, z26.b, z7.b[2], #270");
7824 COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 0, 0),
7825 "cdot z5.d, z7.h, z1.h[0], #0");
7826 COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 1, 0),
7827 "cdot z5.d, z7.h, z1.h[1], #0");
7828 COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 1, 90),
7829 "cdot z5.d, z7.h, z1.h[1], #90");
7830 COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 1, 180),
7831 "cdot z5.d, z7.h, z1.h[1], #180");
7832 COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 1, 270),
7833 "cdot z5.d, z7.h, z1.h[1], #270");
7834
7835 COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z2.VnB(), z3.VnB(), 0, 0),
7836 "movprfx z0, z1\n"
7837 "cdot z0.s, z2.b, z3.b[0], #0");
7838 COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z0.VnB(), z3.VnB(), 1, 90),
7839 "movprfx z31, z1\n"
7840 "cdot z31.s, z0.b, z3.b[1], #90\n"
7841 "mov z0.d, z31.d");
7842 COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z2.VnB(), z0.VnB(), 2, 180),
7843 "movprfx z31, z1\n"
7844 "cdot z31.s, z2.b, z0.b[2], #180\n"
7845 "mov z0.d, z31.d");
7846 COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z0.VnB(), z0.VnB(), 3, 270),
7847 "movprfx z31, z1\n"
7848 "cdot z31.s, z0.b, z0.b[3], #270\n"
7849 "mov z0.d, z31.d");
7850
7851 CLEANUP();
7852 }
7853
TEST(sve2_ldnt1)7854 TEST(sve2_ldnt1) {
7855 SETUP();
7856
7857 COMPARE_MACRO(Ldnt1b(z24.VnS(), p4.Zeroing(), SVEMemOperand(z18.VnS(), x13)),
7858 "ldnt1b {z24.s}, p4/z, [z18.s, x13]");
7859 COMPARE_MACRO(Ldnt1h(z3.VnS(), p4.Zeroing(), SVEMemOperand(z15.VnS(), x14)),
7860 "ldnt1h {z3.s}, p4/z, [z15.s, x14]");
7861 COMPARE_MACRO(Ldnt1sb(z7.VnS(), p3.Zeroing(), SVEMemOperand(z18.VnS(), x11)),
7862 "ldnt1sb {z7.s}, p3/z, [z18.s, x11]");
7863 COMPARE_MACRO(Ldnt1sh(z17.VnS(), p5.Zeroing(), SVEMemOperand(z31.VnS(), x19)),
7864 "ldnt1sh {z17.s}, p5/z, [z31.s, x19]");
7865 COMPARE_MACRO(Ldnt1w(z18.VnS(), p5.Zeroing(), SVEMemOperand(z9.VnS(), x17)),
7866 "ldnt1w {z18.s}, p5/z, [z9.s, x17]");
7867
7868 COMPARE_MACRO(Ldnt1b(z27.VnD(), p4.Zeroing(), SVEMemOperand(z27.VnD(), x24)),
7869 "ldnt1b {z27.d}, p4/z, [z27.d, x24]");
7870 COMPARE_MACRO(Ldnt1d(z25.VnD(), p0.Zeroing(), SVEMemOperand(z10.VnD(), x0)),
7871 "ldnt1d {z25.d}, p0/z, [z10.d, x0]");
7872 COMPARE_MACRO(Ldnt1h(z16.VnD(), p2.Zeroing(), SVEMemOperand(z10.VnD(), x9)),
7873 "ldnt1h {z16.d}, p2/z, [z10.d, x9]");
7874 COMPARE_MACRO(Ldnt1sb(z25.VnD(), p0.Zeroing(), SVEMemOperand(z0.VnD(), x3)),
7875 "ldnt1sb {z25.d}, p0/z, [z0.d, x3]");
7876 COMPARE_MACRO(Ldnt1sh(z4.VnD(), p1.Zeroing(), SVEMemOperand(z31.VnD(), x4)),
7877 "ldnt1sh {z4.d}, p1/z, [z31.d, x4]");
7878 COMPARE_MACRO(Ldnt1sw(z3.VnD(), p7.Zeroing(), SVEMemOperand(z1.VnD(), x10)),
7879 "ldnt1sw {z3.d}, p7/z, [z1.d, x10]");
7880 COMPARE_MACRO(Ldnt1w(z17.VnD(), p5.Zeroing(), SVEMemOperand(z8.VnD(), x12)),
7881 "ldnt1w {z17.d}, p5/z, [z8.d, x12]");
7882
7883 CLEANUP();
7884 }
7885
TEST(sve2_stnt1)7886 TEST(sve2_stnt1) {
7887 SETUP();
7888
7889 COMPARE_MACRO(Stnt1b(z29.VnD(), p7, SVEMemOperand(z29.VnD(), x21)),
7890 "stnt1b {z29.d}, p7, [z29.d, x21]");
7891 COMPARE_MACRO(Stnt1d(z19.VnD(), p4, SVEMemOperand(z3.VnD(), x16)),
7892 "stnt1d {z19.d}, p4, [z3.d, x16]");
7893 COMPARE_MACRO(Stnt1h(z11.VnS(), p3, SVEMemOperand(z2.VnS(), x16)),
7894 "stnt1h {z11.s}, p3, [z2.s, x16]");
7895 COMPARE_MACRO(Stnt1h(z3.VnD(), p3, SVEMemOperand(z10.VnD(), x16)),
7896 "stnt1h {z3.d}, p3, [z10.d, x16]");
7897 COMPARE_MACRO(Stnt1w(z11.VnS(), p4, SVEMemOperand(z14.VnS(), x15)),
7898 "stnt1w {z11.s}, p4, [z14.s, x15]");
7899 COMPARE_MACRO(Stnt1w(z7.VnD(), p0, SVEMemOperand(z11.VnD(), x10)),
7900 "stnt1w {z7.d}, p0, [z11.d, x10]");
7901
7902 CLEANUP();
7903 }
7904
TEST(sve2_bitwise_ternary)7905 TEST(sve2_bitwise_ternary) {
7906 SETUP();
7907
7908 COMPARE_MACRO(Bcax(z6.VnD(), z6.VnD(), z12.VnD(), z1.VnD()),
7909 "bcax z6.d, z6.d, z12.d, z1.d");
7910 COMPARE_MACRO(Bsl(z21.VnD(), z21.VnD(), z2.VnD(), z2.VnD()),
7911 "bsl z21.d, z21.d, z2.d, z2.d");
7912 COMPARE_MACRO(Bsl1n(z18.VnD(), z18.VnD(), z8.VnD(), z7.VnD()),
7913 "bsl1n z18.d, z18.d, z8.d, z7.d");
7914 COMPARE_MACRO(Bsl2n(z7.VnD(), z7.VnD(), z3.VnD(), z19.VnD()),
7915 "bsl2n z7.d, z7.d, z3.d, z19.d");
7916 COMPARE_MACRO(Eor3(z10.VnD(), z10.VnD(), z24.VnD(), z23.VnD()),
7917 "eor3 z10.d, z10.d, z24.d, z23.d");
7918 COMPARE_MACRO(Nbsl(z17.VnD(), z17.VnD(), z21.VnD(), z27.VnD()),
7919 "nbsl z17.d, z17.d, z21.d, z27.d");
7920
7921 COMPARE_MACRO(Nbsl(z17.VnD(), z18.VnD(), z21.VnD(), z27.VnD()),
7922 "movprfx z17, z18\n"
7923 "nbsl z17.d, z17.d, z21.d, z27.d");
7924 COMPARE_MACRO(Nbsl(z17.VnD(), z18.VnD(), z17.VnD(), z27.VnD()),
7925 "movprfx z31, z18\n"
7926 "nbsl z31.d, z31.d, z17.d, z27.d\n"
7927 "mov z17.d, z31.d");
7928 COMPARE_MACRO(Nbsl(z17.VnD(), z18.VnD(), z21.VnD(), z17.VnD()),
7929 "movprfx z31, z18\n"
7930 "nbsl z31.d, z31.d, z21.d, z17.d\n"
7931 "mov z17.d, z31.d");
7932 COMPARE_MACRO(Nbsl(z17.VnD(), z18.VnD(), z17.VnD(), z17.VnD()),
7933 "movprfx z31, z18\n"
7934 "nbsl z31.d, z31.d, z17.d, z17.d\n"
7935 "mov z17.d, z31.d");
7936
7937 CLEANUP();
7938 }
7939
TEST(sve2_int_compare_scalars)7940 TEST(sve2_int_compare_scalars) {
7941 SETUP();
7942
7943 COMPARE_MACRO(Whilege(p0.VnB(), w20, w29), "whilege p0.b, w20, w29");
7944 COMPARE_MACRO(Whilege(p0.VnB(), x20, x29), "whilege p0.b, x20, x29");
7945 COMPARE_MACRO(Whilege(p0.VnD(), w20, w29), "whilege p0.d, w20, w29");
7946 COMPARE_MACRO(Whilege(p0.VnD(), x20, x29), "whilege p0.d, x20, x29");
7947 COMPARE_MACRO(Whilege(p0.VnH(), w20, w29), "whilege p0.h, w20, w29");
7948 COMPARE_MACRO(Whilege(p0.VnH(), x20, x29), "whilege p0.h, x20, x29");
7949 COMPARE_MACRO(Whilege(p0.VnS(), w20, w29), "whilege p0.s, w20, w29");
7950 COMPARE_MACRO(Whilege(p0.VnS(), x20, x29), "whilege p0.s, x20, x29");
7951 COMPARE_MACRO(Whilegt(p11.VnB(), w24, w3), "whilegt p11.b, w24, w3");
7952 COMPARE_MACRO(Whilegt(p11.VnD(), w24, w3), "whilegt p11.d, w24, w3");
7953 COMPARE_MACRO(Whilegt(p11.VnH(), x24, x3), "whilegt p11.h, x24, x3");
7954 COMPARE_MACRO(Whilegt(p11.VnS(), x24, x3), "whilegt p11.s, x24, x3");
7955 COMPARE_MACRO(Whilehi(p2.VnB(), x20, x8), "whilehi p2.b, x20, x8");
7956 COMPARE_MACRO(Whilehi(p2.VnD(), x20, x8), "whilehi p2.d, x20, x8");
7957 COMPARE_MACRO(Whilehi(p2.VnH(), w20, w8), "whilehi p2.h, w20, w8");
7958 COMPARE_MACRO(Whilehi(p2.VnS(), w20, w8), "whilehi p2.s, w20, w8");
7959 COMPARE_MACRO(Whilehs(p4.VnB(), w22, w9), "whilehs p4.b, w22, w9");
7960 COMPARE_MACRO(Whilehs(p4.VnD(), x22, x9), "whilehs p4.d, x22, x9");
7961 COMPARE_MACRO(Whilehs(p4.VnH(), w22, w9), "whilehs p4.h, w22, w9");
7962 COMPARE_MACRO(Whilehs(p4.VnS(), x22, x9), "whilehs p4.s, x22, x9");
7963
7964 COMPARE_MACRO(Whilerw(p7.VnB(), x25, x27), "whilerw p7.b, x25, x27");
7965 COMPARE_MACRO(Whilerw(p7.VnD(), x25, x28), "whilerw p7.d, x25, x28");
7966 COMPARE_MACRO(Whilerw(p7.VnH(), x25, x29), "whilerw p7.h, x25, x29");
7967 COMPARE_MACRO(Whilerw(p7.VnS(), x25, x30), "whilerw p7.s, x25, x30");
7968 COMPARE_MACRO(Whilerw(p7.VnS(), x25, xzr), "whilerw p7.s, x25, xzr");
7969 COMPARE_MACRO(Whilewr(p8.VnB(), x14, x14), "whilewr p8.b, x14, x14");
7970 COMPARE_MACRO(Whilewr(p8.VnD(), x14, x13), "whilewr p8.d, x14, x13");
7971 COMPARE_MACRO(Whilewr(p8.VnH(), x14, x12), "whilewr p8.h, x14, x12");
7972 COMPARE_MACRO(Whilewr(p8.VnS(), x14, x11), "whilewr p8.s, x14, x11");
7973 COMPARE_MACRO(Whilewr(p8.VnS(), xzr, x11), "whilewr p8.s, xzr, x11");
7974
7975 CLEANUP();
7976 }
7977
TEST(sve2_splice)7978 TEST(sve2_splice) {
7979 SETUP();
7980
7981 COMPARE_MACRO(Splice(z31.VnB(), p0, z21.VnB(), z22.VnB()),
7982 "splice z31.b, p0, {z21.b, z22.b}");
7983 COMPARE_MACRO(Splice(z31.VnD(), p0, z21.VnD(), z22.VnD()),
7984 "splice z31.d, p0, {z21.d, z22.d}");
7985 COMPARE_MACRO(Splice(z31.VnH(), p0, z21.VnH(), z22.VnH()),
7986 "splice z31.h, p0, {z21.h, z22.h}");
7987 COMPARE_MACRO(Splice(z31.VnS(), p0, z31.VnS(), z0.VnS()),
7988 "splice z31.s, p0, z31.s, z0.s");
7989 COMPARE_MACRO(Splice(z30.VnS(), p0, z31.VnS(), z0.VnS()),
7990 "splice z30.s, p0, {z31.s, z0.s}");
7991
7992 CLEANUP();
7993 }
7994
TEST(sve2_mul_index)7995 TEST(sve2_mul_index) {
7996 SETUP();
7997
7998 COMPARE_MACRO(Mul(z18.VnH(), z5.VnH(), z7.VnH(), 0),
7999 "mul z18.h, z5.h, z7.h[0]");
8000 COMPARE_MACRO(Mul(z18.VnH(), z5.VnH(), z2.VnH(), 2),
8001 "mul z18.h, z5.h, z2.h[2]");
8002 COMPARE_MACRO(Mul(z18.VnH(), z5.VnH(), z2.VnH(), 6),
8003 "mul z18.h, z5.h, z2.h[6]");
8004 COMPARE_MACRO(Mul(z18.VnH(), z5.VnH(), z2.VnH(), 7),
8005 "mul z18.h, z5.h, z2.h[7]");
8006 COMPARE_MACRO(Mul(z8.VnS(), z15.VnS(), z7.VnS(), 0),
8007 "mul z8.s, z15.s, z7.s[0]");
8008 COMPARE_MACRO(Mul(z8.VnS(), z15.VnS(), z0.VnS(), 3),
8009 "mul z8.s, z15.s, z0.s[3]");
8010 COMPARE_MACRO(Mul(z8.VnD(), z15.VnD(), z15.VnD(), 0),
8011 "mul z8.d, z15.d, z15.d[0]");
8012 COMPARE_MACRO(Mul(z8.VnD(), z15.VnD(), z0.VnD(), 1),
8013 "mul z8.d, z15.d, z0.d[1]");
8014
8015 CLEANUP();
8016 }
8017
TEST(sve2_mla_mls_index)8018 TEST(sve2_mla_mls_index) {
8019 SETUP();
8020
8021 COMPARE_MACRO(Mla(z1.VnH(), z1.VnH(), z9.VnH(), z0.VnH(), 0),
8022 "mla z1.h, z9.h, z0.h[0]");
8023 COMPARE_MACRO(Mla(z1.VnH(), z1.VnH(), z9.VnH(), z1.VnH(), 2),
8024 "mla z1.h, z9.h, z1.h[2]");
8025 COMPARE_MACRO(Mla(z1.VnH(), z1.VnH(), z9.VnH(), z2.VnH(), 6),
8026 "mla z1.h, z9.h, z2.h[6]");
8027 COMPARE_MACRO(Mla(z1.VnH(), z1.VnH(), z9.VnH(), z3.VnH(), 7),
8028 "mla z1.h, z9.h, z3.h[7]");
8029 COMPARE_MACRO(Mla(z10.VnS(), z10.VnS(), z22.VnS(), z7.VnS(), 0),
8030 "mla z10.s, z22.s, z7.s[0]");
8031 COMPARE_MACRO(Mla(z10.VnS(), z10.VnS(), z22.VnS(), z0.VnS(), 3),
8032 "mla z10.s, z22.s, z0.s[3]");
8033 COMPARE_MACRO(Mla(z4.VnD(), z4.VnD(), z0.VnD(), z15.VnD(), 0),
8034 "mla z4.d, z0.d, z15.d[0]");
8035 COMPARE_MACRO(Mla(z4.VnD(), z4.VnD(), z0.VnD(), z0.VnD(), 1),
8036 "mla z4.d, z0.d, z0.d[1]");
8037
8038 COMPARE_MACRO(Mla(z4.VnH(), z5.VnH(), z0.VnH(), z1.VnH(), 0),
8039 "movprfx z4, z5\n"
8040 "mla z4.h, z0.h, z1.h[0]");
8041 COMPARE_MACRO(Mla(z4.VnH(), z5.VnH(), z4.VnH(), z1.VnH(), 0),
8042 "movprfx z31, z5\n"
8043 "mla z31.h, z4.h, z1.h[0]\n"
8044 "mov z4.d, z31.d");
8045 COMPARE_MACRO(Mla(z4.VnH(), z5.VnH(), z0.VnH(), z4.VnH(), 0),
8046 "movprfx z31, z5\n"
8047 "mla z31.h, z0.h, z4.h[0]\n"
8048 "mov z4.d, z31.d");
8049 COMPARE_MACRO(Mla(z4.VnH(), z5.VnH(), z4.VnH(), z4.VnH(), 0),
8050 "movprfx z31, z5\n"
8051 "mla z31.h, z4.h, z4.h[0]\n"
8052 "mov z4.d, z31.d");
8053
8054 COMPARE_MACRO(Mls(z1.VnH(), z1.VnH(), z9.VnH(), z0.VnH(), 0),
8055 "mls z1.h, z9.h, z0.h[0]");
8056 COMPARE_MACRO(Mls(z1.VnH(), z1.VnH(), z9.VnH(), z1.VnH(), 2),
8057 "mls z1.h, z9.h, z1.h[2]");
8058 COMPARE_MACRO(Mls(z1.VnH(), z1.VnH(), z9.VnH(), z2.VnH(), 6),
8059 "mls z1.h, z9.h, z2.h[6]");
8060 COMPARE_MACRO(Mls(z1.VnH(), z1.VnH(), z9.VnH(), z3.VnH(), 7),
8061 "mls z1.h, z9.h, z3.h[7]");
8062 COMPARE_MACRO(Mls(z10.VnS(), z10.VnS(), z22.VnS(), z7.VnS(), 0),
8063 "mls z10.s, z22.s, z7.s[0]");
8064 COMPARE_MACRO(Mls(z10.VnS(), z10.VnS(), z22.VnS(), z0.VnS(), 3),
8065 "mls z10.s, z22.s, z0.s[3]");
8066 COMPARE_MACRO(Mls(z4.VnD(), z4.VnD(), z0.VnD(), z15.VnD(), 0),
8067 "mls z4.d, z0.d, z15.d[0]");
8068 COMPARE_MACRO(Mls(z4.VnD(), z4.VnD(), z0.VnD(), z0.VnD(), 1),
8069 "mls z4.d, z0.d, z0.d[1]");
8070
8071 COMPARE_MACRO(Mls(z4.VnS(), z5.VnS(), z0.VnS(), z1.VnS(), 0),
8072 "movprfx z4, z5\n"
8073 "mls z4.s, z0.s, z1.s[0]");
8074 COMPARE_MACRO(Mls(z4.VnS(), z5.VnS(), z4.VnS(), z1.VnS(), 0),
8075 "movprfx z31, z5\n"
8076 "mls z31.s, z4.s, z1.s[0]\n"
8077 "mov z4.d, z31.d");
8078 COMPARE_MACRO(Mls(z4.VnS(), z5.VnS(), z0.VnS(), z4.VnS(), 0),
8079 "movprfx z31, z5\n"
8080 "mls z31.s, z0.s, z4.s[0]\n"
8081 "mov z4.d, z31.d");
8082 COMPARE_MACRO(Mls(z4.VnS(), z5.VnS(), z4.VnS(), z4.VnS(), 0),
8083 "movprfx z31, z5\n"
8084 "mls z31.s, z4.s, z4.s[0]\n"
8085 "mov z4.d, z31.d");
8086
8087 CLEANUP();
8088 }
8089
TEST(sve2_mla_long)8090 TEST(sve2_mla_long) {
8091 SETUP();
8092
8093 COMPARE_MACRO(Smlalb(z1.VnD(), z1.VnD(), z3.VnS(), z23.VnS()),
8094 "smlalb z1.d, z3.s, z23.s");
8095 COMPARE_MACRO(Smlalb(z1.VnH(), z1.VnH(), z3.VnB(), z23.VnB()),
8096 "smlalb z1.h, z3.b, z23.b");
8097 COMPARE_MACRO(Smlalb(z1.VnS(), z1.VnS(), z3.VnH(), z23.VnH()),
8098 "smlalb z1.s, z3.h, z23.h");
8099 COMPARE_MACRO(Smlalt(z31.VnD(), z31.VnD(), z24.VnS(), z29.VnS()),
8100 "smlalt z31.d, z24.s, z29.s");
8101 COMPARE_MACRO(Smlalt(z31.VnH(), z31.VnH(), z24.VnB(), z29.VnB()),
8102 "smlalt z31.h, z24.b, z29.b");
8103 COMPARE_MACRO(Smlalt(z31.VnS(), z31.VnS(), z24.VnH(), z29.VnH()),
8104 "smlalt z31.s, z24.h, z29.h");
8105 COMPARE_MACRO(Smlslb(z5.VnD(), z5.VnD(), z26.VnS(), z27.VnS()),
8106 "smlslb z5.d, z26.s, z27.s");
8107 COMPARE_MACRO(Smlslb(z5.VnH(), z5.VnH(), z26.VnB(), z27.VnB()),
8108 "smlslb z5.h, z26.b, z27.b");
8109 COMPARE_MACRO(Smlslb(z5.VnS(), z5.VnS(), z26.VnH(), z27.VnH()),
8110 "smlslb z5.s, z26.h, z27.h");
8111 COMPARE_MACRO(Smlslt(z23.VnD(), z23.VnD(), z24.VnS(), z25.VnS()),
8112 "smlslt z23.d, z24.s, z25.s");
8113 COMPARE_MACRO(Smlslt(z23.VnH(), z23.VnH(), z24.VnB(), z25.VnB()),
8114 "smlslt z23.h, z24.b, z25.b");
8115 COMPARE_MACRO(Smlslt(z23.VnS(), z23.VnS(), z24.VnH(), z25.VnH()),
8116 "smlslt z23.s, z24.h, z25.h");
8117 COMPARE_MACRO(Umlalb(z31.VnD(), z31.VnD(), z9.VnS(), z21.VnS()),
8118 "umlalb z31.d, z9.s, z21.s");
8119 COMPARE_MACRO(Umlalb(z31.VnH(), z31.VnH(), z9.VnB(), z21.VnB()),
8120 "umlalb z31.h, z9.b, z21.b");
8121 COMPARE_MACRO(Umlalb(z31.VnS(), z31.VnS(), z9.VnH(), z21.VnH()),
8122 "umlalb z31.s, z9.h, z21.h");
8123 COMPARE_MACRO(Umlalt(z11.VnD(), z11.VnD(), z5.VnS(), z22.VnS()),
8124 "umlalt z11.d, z5.s, z22.s");
8125 COMPARE_MACRO(Umlalt(z11.VnH(), z11.VnH(), z5.VnB(), z22.VnB()),
8126 "umlalt z11.h, z5.b, z22.b");
8127 COMPARE_MACRO(Umlalt(z11.VnS(), z11.VnS(), z5.VnH(), z22.VnH()),
8128 "umlalt z11.s, z5.h, z22.h");
8129 COMPARE_MACRO(Umlslb(z28.VnD(), z28.VnD(), z13.VnS(), z9.VnS()),
8130 "umlslb z28.d, z13.s, z9.s");
8131 COMPARE_MACRO(Umlslb(z28.VnH(), z28.VnH(), z13.VnB(), z9.VnB()),
8132 "umlslb z28.h, z13.b, z9.b");
8133 COMPARE_MACRO(Umlslb(z28.VnS(), z28.VnS(), z13.VnH(), z9.VnH()),
8134 "umlslb z28.s, z13.h, z9.h");
8135 COMPARE_MACRO(Umlslt(z9.VnD(), z9.VnD(), z12.VnS(), z30.VnS()),
8136 "umlslt z9.d, z12.s, z30.s");
8137 COMPARE_MACRO(Umlslt(z9.VnH(), z9.VnH(), z12.VnB(), z30.VnB()),
8138 "umlslt z9.h, z12.b, z30.b");
8139 COMPARE_MACRO(Umlslt(z9.VnS(), z9.VnS(), z12.VnH(), z30.VnH()),
8140 "umlslt z9.s, z12.h, z30.h");
8141
8142 COMPARE_MACRO(Smlalt(z0.VnD(), z1.VnD(), z2.VnS(), z3.VnS()),
8143 "movprfx z0, z1\n"
8144 "smlalt z0.d, z2.s, z3.s");
8145 COMPARE_MACRO(Smlalt(z0.VnD(), z1.VnD(), z0.VnS(), z3.VnS()),
8146 "movprfx z31, z1\n"
8147 "smlalt z31.d, z0.s, z3.s\n"
8148 "mov z0.d, z31.d");
8149 COMPARE_MACRO(Smlalt(z0.VnD(), z1.VnD(), z2.VnS(), z0.VnS()),
8150 "movprfx z31, z1\n"
8151 "smlalt z31.d, z2.s, z0.s\n"
8152 "mov z0.d, z31.d");
8153 COMPARE_MACRO(Smlalt(z0.VnD(), z1.VnD(), z0.VnS(), z0.VnS()),
8154 "movprfx z31, z1\n"
8155 "smlalt z31.d, z0.s, z0.s\n"
8156 "mov z0.d, z31.d");
8157
8158 CLEANUP();
8159 }
8160
TEST(sve2_complex_integer_multiply_add)8161 TEST(sve2_complex_integer_multiply_add) {
8162 SETUP();
8163
8164 COMPARE(sqrdcmlah(z31.VnB(), z15.VnB(), z20.VnB(), 0),
8165 "sqrdcmlah z31.b, z15.b, z20.b, #0");
8166 COMPARE(sqrdcmlah(z31.VnD(), z15.VnD(), z20.VnD(), 90),
8167 "sqrdcmlah z31.d, z15.d, z20.d, #90");
8168 COMPARE(sqrdcmlah(z31.VnH(), z15.VnH(), z20.VnH(), 180),
8169 "sqrdcmlah z31.h, z15.h, z20.h, #180");
8170 COMPARE(sqrdcmlah(z31.VnS(), z15.VnS(), z20.VnS(), 270),
8171 "sqrdcmlah z31.s, z15.s, z20.s, #270");
8172
8173 COMPARE(sqrdcmlah(z14.VnS(), z11.VnS(), z8.VnS(), 1, 0),
8174 "sqrdcmlah z14.s, z11.s, z8.s[1], #0");
8175 COMPARE(sqrdcmlah(z31.VnH(), z2.VnH(), z3.VnH(), 2, 180),
8176 "sqrdcmlah z31.h, z2.h, z3.h[2], #180");
8177
8178 COMPARE_MACRO(Sqrdcmlah(z0.VnB(), z1.VnB(), z0.VnB(), z3.VnB(), 0),
8179 "mov z31.d, z0.d\n"
8180 "movprfx z0, z1\n"
8181 "sqrdcmlah z0.b, z31.b, z3.b, #0");
8182 COMPARE_MACRO(Sqrdcmlah(z0.VnH(), z1.VnH(), z2.VnH(), z0.VnH(), 90),
8183 "mov z31.d, z0.d\n"
8184 "movprfx z0, z1\n"
8185 "sqrdcmlah z0.h, z2.h, z31.h, #90");
8186 COMPARE_MACRO(Sqrdcmlah(z0.VnS(), z1.VnS(), z0.VnS(), z0.VnS(), 0, 180),
8187 "movprfx z31, z1\n"
8188 "sqrdcmlah z31.s, z0.s, z0.s[0], #180\n"
8189 "mov z0.d, z31.d");
8190 COMPARE_MACRO(Sqrdcmlah(z5.VnH(), z1.VnH(), z2.VnH(), z5.VnH(), 3, 270),
8191 "movprfx z31, z1\n"
8192 "sqrdcmlah z31.h, z2.h, z5.h[3], #270\n"
8193 "mov z5.d, z31.d");
8194 COMPARE_MACRO(Sqrdcmlah(z3.VnH(), z3.VnH(), z3.VnH(), z3.VnH(), 2, 90),
8195 "sqrdcmlah z3.h, z3.h, z3.h[2], #90");
8196
8197 COMPARE(cmla(z19.VnB(), z7.VnB(), z2.VnB(), 0), "cmla z19.b, z7.b, z2.b, #0");
8198 COMPARE(cmla(z19.VnD(), z7.VnD(), z2.VnD(), 90),
8199 "cmla z19.d, z7.d, z2.d, #90");
8200 COMPARE(cmla(z19.VnH(), z7.VnH(), z2.VnH(), 180),
8201 "cmla z19.h, z7.h, z2.h, #180");
8202 COMPARE(cmla(z19.VnS(), z7.VnS(), z2.VnS(), 270),
8203 "cmla z19.s, z7.s, z2.s, #270");
8204
8205 COMPARE_MACRO(Cmla(z0.VnB(), z1.VnB(), z0.VnB(), z3.VnB(), 0),
8206 "mov z31.d, z0.d\n"
8207 "movprfx z0, z1\n"
8208 "cmla z0.b, z31.b, z3.b, #0");
8209 COMPARE_MACRO(Cmla(z0.VnH(), z1.VnH(), z2.VnH(), z0.VnH(), 90),
8210 "mov z31.d, z0.d\n"
8211 "movprfx z0, z1\n"
8212 "cmla z0.h, z2.h, z31.h, #90");
8213 COMPARE_MACRO(Cmla(z0.VnS(), z1.VnS(), z0.VnS(), z0.VnS(), 180),
8214 "mov z31.d, z0.d\n"
8215 "movprfx z0, z1\n"
8216 "cmla z0.s, z31.s, z31.s, #180");
8217 COMPARE_MACRO(Cmla(z0.VnD(), z1.VnD(), z2.VnD(), z0.VnD(), 270),
8218 "mov z31.d, z0.d\n"
8219 "movprfx z0, z1\n"
8220 "cmla z0.d, z2.d, z31.d, #270");
8221
8222 COMPARE_MACRO(Cmla(z17.VnS(), z17.VnS(), z29.VnS(), z0.VnS(), 1, 0),
8223 "cmla z17.s, z29.s, z0.s[1], #0");
8224 COMPARE_MACRO(Cmla(z17.VnS(), z17.VnS(), z29.VnS(), z1.VnS(), 0, 0),
8225 "cmla z17.s, z29.s, z1.s[0], #0");
8226 COMPARE_MACRO(Cmla(z17.VnS(), z17.VnS(), z29.VnS(), z8.VnS(), 1, 90),
8227 "cmla z17.s, z29.s, z8.s[1], #90");
8228 COMPARE_MACRO(Cmla(z17.VnS(), z17.VnS(), z29.VnS(), z15.VnS(), 0, 180),
8229 "cmla z17.s, z29.s, z15.s[0], #180");
8230 COMPARE_MACRO(Cmla(z18.VnH(), z18.VnH(), z22.VnH(), z0.VnH(), 3, 0),
8231 "cmla z18.h, z22.h, z0.h[3], #0");
8232 COMPARE_MACRO(Cmla(z18.VnH(), z18.VnH(), z22.VnH(), z1.VnH(), 2, 0),
8233 "cmla z18.h, z22.h, z1.h[2], #0");
8234 COMPARE_MACRO(Cmla(z18.VnH(), z18.VnH(), z22.VnH(), z4.VnH(), 1, 270),
8235 "cmla z18.h, z22.h, z4.h[1], #270");
8236 COMPARE_MACRO(Cmla(z18.VnH(), z18.VnH(), z22.VnH(), z7.VnH(), 0, 90),
8237 "cmla z18.h, z22.h, z7.h[0], #90");
8238
8239 COMPARE_MACRO(Cmla(z1.VnH(), z19.VnH(), z22.VnH(), z7.VnH(), 0, 90),
8240 "movprfx z1, z19\n"
8241 "cmla z1.h, z22.h, z7.h[0], #90");
8242 COMPARE_MACRO(Cmla(z1.VnH(), z19.VnH(), z1.VnH(), z7.VnH(), 0, 90),
8243 "movprfx z31, z19\n"
8244 "cmla z31.h, z1.h, z7.h[0], #90\n"
8245 "mov z1.d, z31.d");
8246 COMPARE_MACRO(Cmla(z1.VnH(), z19.VnH(), z22.VnH(), z1.VnH(), 0, 90),
8247 "movprfx z31, z19\n"
8248 "cmla z31.h, z22.h, z1.h[0], #90\n"
8249 "mov z1.d, z31.d");
8250 COMPARE_MACRO(Cmla(z1.VnH(), z19.VnH(), z1.VnH(), z1.VnH(), 0, 90),
8251 "movprfx z31, z19\n"
8252 "cmla z31.h, z1.h, z1.h[0], #90\n"
8253 "mov z1.d, z31.d");
8254
8255 CLEANUP();
8256 }
8257
TEST(sve2_saturating_multiply_add_long)8258 TEST(sve2_saturating_multiply_add_long) {
8259 SETUP();
8260
8261 COMPARE(sqdmlalb(z6.VnD(), z19.VnS(), z25.VnS()),
8262 "sqdmlalb z6.d, z19.s, z25.s");
8263 COMPARE(sqdmlalb(z6.VnH(), z19.VnB(), z25.VnB()),
8264 "sqdmlalb z6.h, z19.b, z25.b");
8265 COMPARE(sqdmlalb(z6.VnS(), z19.VnH(), z25.VnH()),
8266 "sqdmlalb z6.s, z19.h, z25.h");
8267 COMPARE(sqdmlalt(z11.VnD(), z0.VnS(), z10.VnS()),
8268 "sqdmlalt z11.d, z0.s, z10.s");
8269 COMPARE(sqdmlalt(z11.VnH(), z0.VnB(), z10.VnB()),
8270 "sqdmlalt z11.h, z0.b, z10.b");
8271 COMPARE(sqdmlalt(z11.VnS(), z0.VnH(), z10.VnH()),
8272 "sqdmlalt z11.s, z0.h, z10.h");
8273 COMPARE(sqdmlslb(z16.VnD(), z26.VnS(), z25.VnS()),
8274 "sqdmlslb z16.d, z26.s, z25.s");
8275 COMPARE(sqdmlslb(z16.VnH(), z26.VnB(), z25.VnB()),
8276 "sqdmlslb z16.h, z26.b, z25.b");
8277 COMPARE(sqdmlslb(z16.VnS(), z26.VnH(), z25.VnH()),
8278 "sqdmlslb z16.s, z26.h, z25.h");
8279 COMPARE(sqdmlslt(z21.VnD(), z23.VnS(), z9.VnS()),
8280 "sqdmlslt z21.d, z23.s, z9.s");
8281 COMPARE(sqdmlslt(z21.VnH(), z23.VnB(), z9.VnB()),
8282 "sqdmlslt z21.h, z23.b, z9.b");
8283 COMPARE(sqdmlslt(z21.VnS(), z23.VnH(), z9.VnH()),
8284 "sqdmlslt z21.s, z23.h, z9.h");
8285
8286 COMPARE(sqdmlalb(z1.VnD(), z27.VnS(), z11.VnS(), 0),
8287 "sqdmlalb z1.d, z27.s, z11.s[0]");
8288 COMPARE(sqdmlalb(z30.VnS(), z6.VnH(), z3.VnH(), 0),
8289 "sqdmlalb z30.s, z6.h, z3.h[0]");
8290 COMPARE(sqdmlalt(z30.VnD(), z25.VnS(), z15.VnS(), 1),
8291 "sqdmlalt z30.d, z25.s, z15.s[1]");
8292 COMPARE(sqdmlalt(z10.VnS(), z1.VnH(), z1.VnH(), 3),
8293 "sqdmlalt z10.s, z1.h, z1.h[3]");
8294 COMPARE(sqdmlslb(z15.VnD(), z27.VnS(), z15.VnS(), 2),
8295 "sqdmlslb z15.d, z27.s, z15.s[2]");
8296 COMPARE(sqdmlslb(z5.VnS(), z5.VnH(), z7.VnH(), 6),
8297 "sqdmlslb z5.s, z5.h, z7.h[6]");
8298 COMPARE(sqdmlslt(z21.VnD(), z28.VnS(), z13.VnS(), 3),
8299 "sqdmlslt z21.d, z28.s, z13.s[3]");
8300 COMPARE(sqdmlslt(z5.VnS(), z3.VnH(), z1.VnH(), 7),
8301 "sqdmlslt z5.s, z3.h, z1.h[7]");
8302
8303 COMPARE_MACRO(Sqdmlalb(z6.VnD(), z16.VnD(), z19.VnS(), z25.VnS()),
8304 "movprfx z6, z16\n"
8305 "sqdmlalb z6.d, z19.s, z25.s");
8306 COMPARE_MACRO(Sqdmlalt(z4.VnH(), z26.VnH(), z4.VnB(), z24.VnB()),
8307 "movprfx z31, z26\n"
8308 "sqdmlalt z31.h, z4.b, z24.b\n"
8309 "mov z4.d, z31.d");
8310 COMPARE_MACRO(Sqdmlslb(z2.VnS(), z6.VnS(), z17.VnH(), z2.VnH()),
8311 "movprfx z31, z6\n"
8312 "sqdmlslb z31.s, z17.h, z2.h\n"
8313 "mov z2.d, z31.d");
8314 COMPARE_MACRO(Sqdmlslt(z0.VnD(), z1.VnD(), z0.VnS(), z0.VnS()),
8315 "movprfx z31, z1\n"
8316 "sqdmlslt z31.d, z0.s, z0.s\n"
8317 "mov z0.d, z31.d");
8318
8319 COMPARE_MACRO(Sqdmlalb(z6.VnD(), z16.VnD(), z9.VnS(), z15.VnS(), 0),
8320 "movprfx z6, z16\n"
8321 "sqdmlalb z6.d, z9.s, z15.s[0]");
8322 COMPARE_MACRO(Sqdmlalt(z4.VnS(), z6.VnS(), z4.VnH(), z4.VnH(), 3),
8323 "movprfx z31, z6\n"
8324 "sqdmlalt z31.s, z4.h, z4.h[3]\n"
8325 "mov z4.d, z31.d");
8326 COMPARE_MACRO(Sqdmlslb(z2.VnS(), z16.VnS(), z17.VnH(), z2.VnH(), 6),
8327 "movprfx z31, z16\n"
8328 "sqdmlslb z31.s, z17.h, z2.h[6]\n"
8329 "mov z2.d, z31.d");
8330 COMPARE_MACRO(Sqdmlslt(z6.VnD(), z1.VnD(), z6.VnS(), z6.VnS(), 2),
8331 "movprfx z31, z1\n"
8332 "sqdmlslt z31.d, z6.s, z6.s[2]\n"
8333 "mov z6.d, z31.d");
8334
8335 CLEANUP();
8336 }
8337
TEST(sve2_saturating_multiply_add_interleaved_long)8338 TEST(sve2_saturating_multiply_add_interleaved_long) {
8339 SETUP();
8340
8341 COMPARE(sqdmlalbt(z23.VnD(), z29.VnS(), z26.VnS()),
8342 "sqdmlalbt z23.d, z29.s, z26.s");
8343 COMPARE(sqdmlalbt(z23.VnH(), z29.VnB(), z26.VnB()),
8344 "sqdmlalbt z23.h, z29.b, z26.b");
8345 COMPARE(sqdmlalbt(z23.VnS(), z29.VnH(), z26.VnH()),
8346 "sqdmlalbt z23.s, z29.h, z26.h");
8347 COMPARE(sqdmlslbt(z26.VnD(), z23.VnS(), z4.VnS()),
8348 "sqdmlslbt z26.d, z23.s, z4.s");
8349 COMPARE(sqdmlslbt(z26.VnH(), z23.VnB(), z4.VnB()),
8350 "sqdmlslbt z26.h, z23.b, z4.b");
8351 COMPARE(sqdmlslbt(z26.VnS(), z23.VnH(), z4.VnH()),
8352 "sqdmlslbt z26.s, z23.h, z4.h");
8353
8354 COMPARE_MACRO(Sqdmlalbt(z29.VnD(), z0.VnD(), z29.VnS(), z26.VnS()),
8355 "movprfx z31, z0\n"
8356 "sqdmlalbt z31.d, z29.s, z26.s\n"
8357 "mov z29.d, z31.d");
8358 COMPARE_MACRO(Sqdmlalbt(z26.VnH(), z0.VnH(), z29.VnB(), z26.VnB()),
8359 "movprfx z31, z0\n"
8360 "sqdmlalbt z31.h, z29.b, z26.b\n"
8361 "mov z26.d, z31.d");
8362 COMPARE_MACRO(Sqdmlslbt(z23.VnS(), z31.VnS(), z26.VnH(), z29.VnH()),
8363 "movprfx z23, z31\n"
8364 "sqdmlslbt z23.s, z26.h, z29.h");
8365 COMPARE_MACRO(Sqdmlslbt(z4.VnD(), z31.VnD(), z4.VnS(), z4.VnS()),
8366 "sqdmlslbt z31.d, z4.s, z4.s\n"
8367 "mov z4.d, z31.d");
8368
8369 CLEANUP();
8370 }
8371
TEST(sve2_floating_multiply_add_long_vector)8372 TEST(sve2_floating_multiply_add_long_vector) {
8373 SETUP();
8374
8375 COMPARE(fmlalb(z16.VnS(), z18.VnH(), z29.VnH()),
8376 "fmlalb z16.s, z18.h, z29.h");
8377 COMPARE(fmlalb(z3.VnS(), z8.VnH(), z7.VnH()), "fmlalb z3.s, z8.h, z7.h");
8378 COMPARE(fmlalt(z18.VnS(), z13.VnH(), z5.VnH()), "fmlalt z18.s, z13.h, z5.h");
8379 COMPARE(fmlalt(z18.VnS(), z7.VnH(), z16.VnH()), "fmlalt z18.s, z7.h, z16.h");
8380 COMPARE(fmlslb(z16.VnS(), z10.VnH(), z1.VnH()), "fmlslb z16.s, z10.h, z1.h");
8381 COMPARE(fmlslb(z25.VnS(), z11.VnH(), z0.VnH()), "fmlslb z25.s, z11.h, z0.h");
8382 COMPARE(fmlslt(z3.VnS(), z17.VnH(), z14.VnH()), "fmlslt z3.s, z17.h, z14.h");
8383 COMPARE(fmlslt(z5.VnS(), z1.VnH(), z7.VnH()), "fmlslt z5.s, z1.h, z7.h");
8384
8385 CLEANUP();
8386 }
8387
TEST(sve2_mla_long_index)8388 TEST(sve2_mla_long_index) {
8389 SETUP();
8390
8391 COMPARE_MACRO(Smlalb(z11.VnD(), z11.VnD(), z29.VnS(), z0.VnS(), 3),
8392 "smlalb z11.d, z29.s, z0.s[3]");
8393 COMPARE_MACRO(Smlalb(z18.VnS(), z18.VnS(), z17.VnH(), z0.VnH(), 7),
8394 "smlalb z18.s, z17.h, z0.h[7]");
8395 COMPARE_MACRO(Smlalt(z10.VnD(), z10.VnD(), z30.VnS(), z15.VnS(), 0),
8396 "smlalt z10.d, z30.s, z15.s[0]");
8397 COMPARE_MACRO(Smlalt(z23.VnS(), z23.VnS(), z31.VnH(), z7.VnH(), 0),
8398 "smlalt z23.s, z31.h, z7.h[0]");
8399 COMPARE_MACRO(Smlslb(z12.VnD(), z12.VnD(), z23.VnS(), z3.VnS(), 1),
8400 "smlslb z12.d, z23.s, z3.s[1]");
8401 COMPARE_MACRO(Smlslb(z5.VnS(), z5.VnS(), z4.VnH(), z4.VnH(), 2),
8402 "smlslb z5.s, z4.h, z4.h[2]");
8403 COMPARE_MACRO(Smlslt(z7.VnD(), z7.VnD(), z9.VnS(), z6.VnS(), 3),
8404 "smlslt z7.d, z9.s, z6.s[3]");
8405 COMPARE_MACRO(Smlslt(z9.VnS(), z9.VnS(), z21.VnH(), z3.VnH(), 4),
8406 "smlslt z9.s, z21.h, z3.h[4]");
8407 COMPARE_MACRO(Umlalb(z9.VnD(), z9.VnD(), z1.VnS(), z11.VnS(), 0),
8408 "umlalb z9.d, z1.s, z11.s[0]");
8409 COMPARE_MACRO(Umlalb(z9.VnS(), z9.VnS(), z5.VnH(), z1.VnH(), 6),
8410 "umlalb z9.s, z5.h, z1.h[6]");
8411 COMPARE_MACRO(Umlalt(z6.VnD(), z6.VnD(), z17.VnS(), z14.VnS(), 1),
8412 "umlalt z6.d, z17.s, z14.s[1]");
8413 COMPARE_MACRO(Umlalt(z9.VnS(), z9.VnS(), z11.VnH(), z3.VnH(), 7),
8414 "umlalt z9.s, z11.h, z3.h[7]");
8415 COMPARE_MACRO(Umlslb(z12.VnD(), z12.VnD(), z15.VnS(), z9.VnS(), 2),
8416 "umlslb z12.d, z15.s, z9.s[2]");
8417 COMPARE_MACRO(Umlslb(z14.VnS(), z14.VnS(), z10.VnH(), z2.VnH(), 0),
8418 "umlslb z14.s, z10.h, z2.h[0]");
8419 COMPARE_MACRO(Umlslt(z12.VnD(), z12.VnD(), z28.VnS(), z8.VnS(), 3),
8420 "umlslt z12.d, z28.s, z8.s[3]");
8421 COMPARE_MACRO(Umlslt(z24.VnS(), z24.VnS(), z12.VnH(), z6.VnH(), 1),
8422 "umlslt z24.s, z12.h, z6.h[1]");
8423
8424 COMPARE_MACRO(Umlslt(z2.VnS(), z23.VnS(), z12.VnH(), z6.VnH(), 1),
8425 "movprfx z2, z23\n"
8426 "umlslt z2.s, z12.h, z6.h[1]");
8427 COMPARE_MACRO(Umlslt(z2.VnS(), z23.VnS(), z2.VnH(), z6.VnH(), 1),
8428 "movprfx z31, z23\n"
8429 "umlslt z31.s, z2.h, z6.h[1]\n"
8430 "mov z2.d, z31.d");
8431 COMPARE_MACRO(Umlslt(z2.VnS(), z23.VnS(), z12.VnH(), z2.VnH(), 1),
8432 "movprfx z31, z23\n"
8433 "umlslt z31.s, z12.h, z2.h[1]\n"
8434 "mov z2.d, z31.d");
8435 COMPARE_MACRO(Umlslt(z2.VnS(), z23.VnS(), z2.VnH(), z2.VnH(), 1),
8436 "movprfx z31, z23\n"
8437 "umlslt z31.s, z2.h, z2.h[1]\n"
8438 "mov z2.d, z31.d");
8439
8440 CLEANUP();
8441 }
8442
TEST(sve2_mul_long_index)8443 TEST(sve2_mul_long_index) {
8444 SETUP();
8445
8446 COMPARE_MACRO(Smullb(z13.VnS(), z31.VnH(), z0.VnH(), 0),
8447 "smullb z13.s, z31.h, z0.h[0]");
8448 COMPARE_MACRO(Smullb(z8.VnD(), z22.VnS(), z0.VnS(), 0),
8449 "smullb z8.d, z22.s, z0.s[0]");
8450 COMPARE_MACRO(Smullt(z14.VnS(), z30.VnH(), z7.VnH(), 7),
8451 "smullt z14.s, z30.h, z7.h[7]");
8452 COMPARE_MACRO(Smullt(z22.VnD(), z28.VnS(), z15.VnS(), 3),
8453 "smullt z22.d, z28.s, z15.s[3]");
8454 COMPARE_MACRO(Umullb(z24.VnD(), z20.VnS(), z5.VnS(), 1),
8455 "umullb z24.d, z20.s, z5.s[1]");
8456 COMPARE_MACRO(Umullb(z28.VnS(), z19.VnH(), z3.VnH(), 4),
8457 "umullb z28.s, z19.h, z3.h[4]");
8458 COMPARE_MACRO(Umullt(z0.VnD(), z31.VnS(), z8.VnS(), 2),
8459 "umullt z0.d, z31.s, z8.s[2]");
8460 COMPARE_MACRO(Umullt(z14.VnS(), z20.VnH(), z5.VnH(), 6),
8461 "umullt z14.s, z20.h, z5.h[6]");
8462
8463 CLEANUP();
8464 }
8465
TEST(sve2_sat_double_mul_high)8466 TEST(sve2_sat_double_mul_high) {
8467 SETUP();
8468
8469 COMPARE_MACRO(Sqdmulh(z18.VnB(), z25.VnB(), z1.VnB()),
8470 "sqdmulh z18.b, z25.b, z1.b");
8471 COMPARE_MACRO(Sqdmulh(z18.VnD(), z25.VnD(), z1.VnD()),
8472 "sqdmulh z18.d, z25.d, z1.d");
8473 COMPARE_MACRO(Sqdmulh(z18.VnH(), z25.VnH(), z1.VnH()),
8474 "sqdmulh z18.h, z25.h, z1.h");
8475 COMPARE_MACRO(Sqdmulh(z18.VnS(), z25.VnS(), z1.VnS()),
8476 "sqdmulh z18.s, z25.s, z1.s");
8477 COMPARE_MACRO(Sqrdmulh(z21.VnB(), z21.VnB(), z27.VnB()),
8478 "sqrdmulh z21.b, z21.b, z27.b");
8479 COMPARE_MACRO(Sqrdmulh(z21.VnD(), z21.VnD(), z27.VnD()),
8480 "sqrdmulh z21.d, z21.d, z27.d");
8481 COMPARE_MACRO(Sqrdmulh(z21.VnH(), z21.VnH(), z27.VnH()),
8482 "sqrdmulh z21.h, z21.h, z27.h");
8483 COMPARE_MACRO(Sqrdmulh(z21.VnS(), z21.VnS(), z27.VnS()),
8484 "sqrdmulh z21.s, z21.s, z27.s");
8485
8486 CLEANUP();
8487 }
8488
TEST(sve2_flogb)8489 TEST(sve2_flogb) {
8490 SETUP();
8491
8492 COMPARE_MACRO(Flogb(z15.VnH(), p0.Merging(), z3.VnH()),
8493 "flogb z15.h, p0/m, z3.h");
8494 COMPARE_MACRO(Flogb(z15.VnS(), p0.Merging(), z3.VnS()),
8495 "flogb z15.s, p0/m, z3.s");
8496 COMPARE_MACRO(Flogb(z15.VnD(), p0.Merging(), z3.VnD()),
8497 "flogb z15.d, p0/m, z3.d");
8498 COMPARE_MACRO(Flogb(z15.VnD(), p0.Zeroing(), z3.VnD()),
8499 "movprfx z15.d, p0/z, z15.d\n"
8500 "flogb z15.d, p0/m, z3.d");
8501
8502 CLEANUP();
8503 }
8504
TEST(sve2_fp_pair)8505 TEST(sve2_fp_pair) {
8506 SETUP();
8507
8508 COMPARE_MACRO(Faddp(z14.VnD(), p1.Merging(), z14.VnD(), z26.VnD()),
8509 "faddp z14.d, p1/m, z14.d, z26.d");
8510 COMPARE_MACRO(Faddp(z14.VnH(), p1.Merging(), z14.VnH(), z26.VnH()),
8511 "faddp z14.h, p1/m, z14.h, z26.h");
8512 COMPARE_MACRO(Faddp(z14.VnS(), p1.Merging(), z14.VnS(), z26.VnS()),
8513 "faddp z14.s, p1/m, z14.s, z26.s");
8514 COMPARE_MACRO(Fmaxnmp(z2.VnD(), p1.Merging(), z2.VnD(), z14.VnD()),
8515 "fmaxnmp z2.d, p1/m, z2.d, z14.d");
8516 COMPARE_MACRO(Fmaxnmp(z2.VnH(), p1.Merging(), z2.VnH(), z14.VnH()),
8517 "fmaxnmp z2.h, p1/m, z2.h, z14.h");
8518 COMPARE_MACRO(Fmaxnmp(z2.VnS(), p1.Merging(), z2.VnS(), z14.VnS()),
8519 "fmaxnmp z2.s, p1/m, z2.s, z14.s");
8520 COMPARE_MACRO(Fmaxp(z22.VnD(), p1.Merging(), z22.VnD(), z3.VnD()),
8521 "fmaxp z22.d, p1/m, z22.d, z3.d");
8522 COMPARE_MACRO(Fmaxp(z22.VnH(), p1.Merging(), z22.VnH(), z3.VnH()),
8523 "fmaxp z22.h, p1/m, z22.h, z3.h");
8524 COMPARE_MACRO(Fmaxp(z22.VnS(), p1.Merging(), z22.VnS(), z3.VnS()),
8525 "fmaxp z22.s, p1/m, z22.s, z3.s");
8526 COMPARE_MACRO(Fminnmp(z1.VnD(), p0.Merging(), z1.VnD(), z14.VnD()),
8527 "fminnmp z1.d, p0/m, z1.d, z14.d");
8528 COMPARE_MACRO(Fminnmp(z1.VnH(), p0.Merging(), z1.VnH(), z14.VnH()),
8529 "fminnmp z1.h, p0/m, z1.h, z14.h");
8530 COMPARE_MACRO(Fminnmp(z1.VnS(), p0.Merging(), z1.VnS(), z14.VnS()),
8531 "fminnmp z1.s, p0/m, z1.s, z14.s");
8532 COMPARE_MACRO(Fminp(z16.VnD(), p3.Merging(), z16.VnD(), z11.VnD()),
8533 "fminp z16.d, p3/m, z16.d, z11.d");
8534 COMPARE_MACRO(Fminp(z16.VnH(), p3.Merging(), z16.VnH(), z11.VnH()),
8535 "fminp z16.h, p3/m, z16.h, z11.h");
8536 COMPARE_MACRO(Fminp(z16.VnS(), p3.Merging(), z16.VnS(), z11.VnS()),
8537 "fminp z16.s, p3/m, z16.s, z11.s");
8538
8539 COMPARE_MACRO(Faddp(z14.VnD(), p1.Merging(), z13.VnD(), z26.VnD()),
8540 "movprfx z14.d, p1/m, z13.d\n"
8541 "faddp z14.d, p1/m, z14.d, z26.d");
8542 COMPARE_MACRO(Fmaxnmp(z2.VnD(), p1.Merging(), z3.VnD(), z2.VnD()),
8543 "mov z31.d, z2.d\n"
8544 "movprfx z2.d, p1/m, z3.d\n"
8545 "fmaxnmp z2.d, p1/m, z2.d, z31.d");
8546 COMPARE_MACRO(Fmaxp(z22.VnH(), p1.Merging(), z23.VnH(), z3.VnH()),
8547 "movprfx z22.h, p1/m, z23.h\n"
8548 "fmaxp z22.h, p1/m, z22.h, z3.h");
8549 COMPARE_MACRO(Fminnmp(z1.VnH(), p0.Merging(), z4.VnH(), z1.VnH()),
8550 "mov z31.d, z1.d\n"
8551 "movprfx z1.h, p0/m, z4.h\n"
8552 "fminnmp z1.h, p0/m, z1.h, z31.h");
8553 COMPARE_MACRO(Fminp(z16.VnS(), p3.Merging(), z11.VnS(), z11.VnS()),
8554 "movprfx z16.s, p3/m, z11.s\n"
8555 "fminp z16.s, p3/m, z16.s, z11.s");
8556 CLEANUP();
8557 }
8558
TEST(sve2_fmlal_fmlsl_index)8559 TEST(sve2_fmlal_fmlsl_index) {
8560 SETUP();
8561
8562 COMPARE_MACRO(Fmlalb(z16.VnS(), z16.VnS(), z18.VnH(), z2.VnH(), 0),
8563 "fmlalb z16.s, z18.h, z2.h[0]");
8564 COMPARE_MACRO(Fmlalb(z3.VnS(), z3.VnS(), z8.VnH(), z7.VnH(), 7),
8565 "fmlalb z3.s, z8.h, z7.h[7]");
8566 COMPARE_MACRO(Fmlalt(z18.VnS(), z18.VnS(), z13.VnH(), z5.VnH(), 6),
8567 "fmlalt z18.s, z13.h, z5.h[6]");
8568 COMPARE_MACRO(Fmlalt(z18.VnS(), z18.VnS(), z7.VnH(), z6.VnH(), 5),
8569 "fmlalt z18.s, z7.h, z6.h[5]");
8570 COMPARE_MACRO(Fmlslb(z16.VnS(), z16.VnS(), z10.VnH(), z1.VnH(), 4),
8571 "fmlslb z16.s, z10.h, z1.h[4]");
8572 COMPARE_MACRO(Fmlslb(z25.VnS(), z25.VnS(), z11.VnH(), z0.VnH(), 3),
8573 "fmlslb z25.s, z11.h, z0.h[3]");
8574 COMPARE_MACRO(Fmlslt(z3.VnS(), z3.VnS(), z17.VnH(), z4.VnH(), 2),
8575 "fmlslt z3.s, z17.h, z4.h[2]");
8576 COMPARE_MACRO(Fmlslt(z5.VnS(), z5.VnS(), z1.VnH(), z7.VnH(), 1),
8577 "fmlslt z5.s, z1.h, z7.h[1]");
8578
8579 COMPARE_MACRO(Fmlalb(z5.VnS(), z4.VnS(), z1.VnH(), z7.VnH(), 1),
8580 "movprfx z5, z4\n"
8581 "fmlalb z5.s, z1.h, z7.h[1]");
8582 COMPARE_MACRO(Fmlalt(z5.VnS(), z4.VnS(), z5.VnH(), z7.VnH(), 1),
8583 "movprfx z31, z4\n"
8584 "fmlalt z31.s, z5.h, z7.h[1]\n"
8585 "mov z5.d, z31.d");
8586 COMPARE_MACRO(Fmlslb(z5.VnS(), z4.VnS(), z1.VnH(), z5.VnH(), 1),
8587 "movprfx z31, z4\n"
8588 "fmlslb z31.s, z1.h, z5.h[1]\n"
8589 "mov z5.d, z31.d");
8590 COMPARE_MACRO(Fmlslt(z5.VnS(), z4.VnS(), z5.VnH(), z5.VnH(), 1),
8591 "movprfx z31, z4\n"
8592 "fmlslt z31.s, z5.h, z5.h[1]\n"
8593 "mov z5.d, z31.d");
8594 CLEANUP();
8595 }
8596
TEST(sve2_fp_convert)8597 TEST(sve2_fp_convert) {
8598 SETUP();
8599
8600 COMPARE_MACRO(Fcvtx(z14.VnS(), p4.Merging(), z0.VnD()),
8601 "fcvtx z14.s, p4/m, z0.d");
8602 COMPARE_MACRO(Fcvtx(z14.VnS(), p4.Zeroing(), z0.VnD()),
8603 "movprfx z14.d, p4/z, z14.d\n"
8604 "fcvtx z14.s, p4/m, z0.d");
8605 COMPARE_MACRO(Fcvtlt(z1.VnD(), p1.Merging(), z28.VnS()),
8606 "fcvtlt z1.d, p1/m, z28.s");
8607 COMPARE_MACRO(Fcvtlt(z10.VnS(), p5.Merging(), z0.VnH()),
8608 "fcvtlt z10.s, p5/m, z0.h");
8609 COMPARE_MACRO(Fcvtnt(z4.VnH(), p7.Merging(), z0.VnS()),
8610 "fcvtnt z4.h, p7/m, z0.s");
8611 COMPARE_MACRO(Fcvtnt(z8.VnS(), p0.Merging(), z4.VnD()),
8612 "fcvtnt z8.s, p0/m, z4.d");
8613 COMPARE_MACRO(Fcvtx(z14.VnS(), p4.Merging(), z0.VnD()),
8614 "fcvtx z14.s, p4/m, z0.d");
8615 COMPARE_MACRO(Fcvtxnt(z27.VnS(), p0.Merging(), z17.VnD()),
8616 "fcvtxnt z27.s, p0/m, z17.d");
8617
8618 CLEANUP();
8619 }
8620
TEST(sve2_sat_double_mul_high_index)8621 TEST(sve2_sat_double_mul_high_index) {
8622 SETUP();
8623
8624 COMPARE_MACRO(Sqdmulh(z11.VnH(), z20.VnH(), z7.VnH(), 1),
8625 "sqdmulh z11.h, z20.h, z7.h[1]");
8626 COMPARE_MACRO(Sqdmulh(z11.VnH(), z20.VnH(), z2.VnH(), 7),
8627 "sqdmulh z11.h, z20.h, z2.h[7]");
8628 COMPARE_MACRO(Sqdmulh(z8.VnS(), z4.VnS(), z7.VnS(), 1),
8629 "sqdmulh z8.s, z4.s, z7.s[1]");
8630 COMPARE_MACRO(Sqdmulh(z8.VnS(), z4.VnS(), z3.VnS(), 3),
8631 "sqdmulh z8.s, z4.s, z3.s[3]");
8632 COMPARE_MACRO(Sqdmulh(z6.VnD(), z13.VnD(), z0.VnD(), 1),
8633 "sqdmulh z6.d, z13.d, z0.d[1]");
8634 COMPARE_MACRO(Sqdmulh(z6.VnD(), z13.VnD(), z15.VnD(), 0),
8635 "sqdmulh z6.d, z13.d, z15.d[0]");
8636
8637 COMPARE_MACRO(Sqrdmulh(z3.VnH(), z29.VnH(), z7.VnH(), 2),
8638 "sqrdmulh z3.h, z29.h, z7.h[2]");
8639 COMPARE_MACRO(Sqrdmulh(z3.VnH(), z29.VnH(), z3.VnH(), 7),
8640 "sqrdmulh z3.h, z29.h, z3.h[7]");
8641 COMPARE_MACRO(Sqrdmulh(z19.VnS(), z15.VnS(), z7.VnS(), 0),
8642 "sqrdmulh z19.s, z15.s, z7.s[0]");
8643 COMPARE_MACRO(Sqrdmulh(z19.VnS(), z15.VnS(), z2.VnS(), 3),
8644 "sqrdmulh z19.s, z15.s, z2.s[3]");
8645 COMPARE_MACRO(Sqrdmulh(z29.VnD(), z13.VnD(), z0.VnD(), 1),
8646 "sqrdmulh z29.d, z13.d, z0.d[1]");
8647 COMPARE_MACRO(Sqrdmulh(z29.VnD(), z13.VnD(), z15.VnD(), 0),
8648 "sqrdmulh z29.d, z13.d, z15.d[0]");
8649
8650 CLEANUP();
8651 }
8652
TEST(sve2_extract)8653 TEST(sve2_extract) {
8654 SETUP();
8655
8656 COMPARE_MACRO(Ext(z0.VnB(), z1.VnB(), z2.VnB(), 2),
8657 "ext z0.b, {z1.b, z2.b}, #2");
8658 COMPARE_MACRO(Ext(z0.VnB(), z31.VnB(), z0.VnB(), 255),
8659 "ext z0.b, {z31.b, z0.b}, #255");
8660 COMPARE_MACRO(Ext(z0.VnB(), z31.VnB(), z0.VnB(), 0),
8661 "ext z0.b, {z31.b, z0.b}, #0");
8662
8663 // Check destructive form is preferred over constructive.
8664 COMPARE_MACRO(Ext(z0.VnB(), z0.VnB(), z1.VnB(), 42),
8665 "ext z0.b, z0.b, z1.b, #42");
8666
8667 CLEANUP();
8668 }
8669
TEST(sve_matmul)8670 TEST(sve_matmul) {
8671 SETUP();
8672
8673 COMPARE_MACRO(Fmmla(z2.VnS(), z2.VnS(), z3.VnS(), z20.VnS()),
8674 "fmmla z2.s, z3.s, z20.s");
8675 COMPARE_MACRO(Fmmla(z21.VnD(), z21.VnD(), z30.VnD(), z2.VnD()),
8676 "fmmla z21.d, z30.d, z2.d");
8677 COMPARE_MACRO(Smmla(z31.VnS(), z31.VnS(), z7.VnB(), z19.VnB()),
8678 "smmla z31.s, z7.b, z19.b");
8679 COMPARE_MACRO(Ummla(z0.VnS(), z0.VnS(), z1.VnB(), z2.VnB()),
8680 "ummla z0.s, z1.b, z2.b");
8681 COMPARE_MACRO(Usmmla(z30.VnS(), z30.VnS(), z31.VnB(), z4.VnB()),
8682 "usmmla z30.s, z31.b, z4.b");
8683
8684 COMPARE_MACRO(Fmmla(z0.VnS(), z1.VnS(), z2.VnS(), z3.VnS()),
8685 "movprfx z0, z1\n"
8686 "fmmla z0.s, z2.s, z3.s");
8687 COMPARE_MACRO(Smmla(z0.VnS(), z1.VnS(), z0.VnB(), z3.VnB()),
8688 "movprfx z31, z1\n"
8689 "smmla z31.s, z0.b, z3.b\n"
8690 "mov z0.d, z31.d");
8691 COMPARE_MACRO(Ummla(z0.VnS(), z1.VnS(), z2.VnB(), z0.VnB()),
8692 "movprfx z31, z1\n"
8693 "ummla z31.s, z2.b, z0.b\n"
8694 "mov z0.d, z31.d");
8695 COMPARE_MACRO(Usmmla(z0.VnS(), z1.VnS(), z0.VnB(), z0.VnB()),
8696 "movprfx z31, z1\n"
8697 "usmmla z31.s, z0.b, z0.b\n"
8698 "mov z0.d, z31.d");
8699
8700 CLEANUP();
8701 }
8702
TEST(sve_usdot_sudot)8703 TEST(sve_usdot_sudot) {
8704 SETUP();
8705
8706 COMPARE_MACRO(Usdot(z30.VnS(), z30.VnS(), z31.VnB(), z4.VnB()),
8707 "usdot z30.s, z31.b, z4.b");
8708 COMPARE_MACRO(Usdot(z30.VnS(), z30.VnS(), z31.VnB(), z4.VnB(), 0),
8709 "usdot z30.s, z31.b, z4.b[0]");
8710 COMPARE_MACRO(Sudot(z30.VnS(), z30.VnS(), z31.VnB(), z4.VnB(), 3),
8711 "sudot z30.s, z31.b, z4.b[3]");
8712
8713 COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z29.VnB(), z28.VnB()),
8714 "movprfx z0, z30\n"
8715 "usdot z0.s, z29.b, z28.b");
8716 COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z29.VnB(), z0.VnB()),
8717 "movprfx z31, z30\n"
8718 "usdot z31.s, z29.b, z0.b\n"
8719 "mov z0.d, z31.d");
8720 COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z0.VnB(), z28.VnB()),
8721 "movprfx z31, z30\n"
8722 "usdot z31.s, z0.b, z28.b\n"
8723 "mov z0.d, z31.d");
8724 COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z0.VnB(), z0.VnB()),
8725 "movprfx z31, z30\n"
8726 "usdot z31.s, z0.b, z0.b\n"
8727 "mov z0.d, z31.d");
8728 COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z29.VnB(), z4.VnB(), 0),
8729 "movprfx z0, z30\n"
8730 "usdot z0.s, z29.b, z4.b[0]");
8731 COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z0.VnB(), z4.VnB(), 0),
8732 "movprfx z31, z30\n"
8733 "usdot z31.s, z0.b, z4.b[0]\n"
8734 "mov z0.d, z31.d");
8735 COMPARE_MACRO(Sudot(z0.VnS(), z30.VnS(), z29.VnB(), z0.VnB(), 0),
8736 "movprfx z31, z30\n"
8737 "sudot z31.s, z29.b, z0.b[0]\n"
8738 "mov z0.d, z31.d");
8739 COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z0.VnB(), z0.VnB(), 0),
8740 "movprfx z31, z30\n"
8741 "usdot z31.s, z0.b, z0.b[0]\n"
8742 "mov z0.d, z31.d");
8743
8744 CLEANUP();
8745 }
8746 } // namespace aarch64
8747 } // namespace vixl
8748