1 // Copyright 2020, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #include <cfloat>
28 #include <cmath>
29 #include <cstdio>
30 #include <cstdlib>
31 #include <cstring>
32 #include <functional>
33 #include <sys/mman.h>
34 #include <unistd.h>
35
36 #include "test-runner.h"
37 #include "test-utils.h"
38
39 #include "aarch64/cpu-aarch64.h"
40 #include "aarch64/disasm-aarch64.h"
41 #include "aarch64/macro-assembler-aarch64.h"
42 #include "aarch64/simulator-aarch64.h"
43 #include "aarch64/test-utils-aarch64.h"
44 #include "test-assembler-aarch64.h"
45
46 #define TEST_SVE(name) TEST_SVE_INNER("SIM", name)
47
48 namespace vixl {
49 namespace aarch64 {
50
TEST_SVE(sve2_halving_arithmetic)51 TEST_SVE(sve2_halving_arithmetic) {
52 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
53 CPUFeatures::kSVE2,
54 CPUFeatures::kNEON,
55 CPUFeatures::kCRC32);
56 START();
57
58 SetInitialMachineState(&masm);
59 // state = 0xe2bd2480
60
61 {
62 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
63 __ dci(0x441182b2); // uhadd z18.b, p0/m, z18.b, z21.b
64 // vl128 state = 0x8ac2942a
65 __ dci(0x441382f3); // uhsub z19.b, p0/m, z19.b, z23.b
66 // vl128 state = 0x0e0db643
67 __ dci(0x449383fb); // uhsub z27.s, p0/m, z27.s, z31.s
68 // vl128 state = 0x6a97fc8c
69 __ dci(0x441283fa); // shsub z26.b, p0/m, z26.b, z31.b
70 // vl128 state = 0x48a5fd5f
71 __ dci(0x44928372); // shsub z18.s, p0/m, z18.s, z27.s
72 // vl128 state = 0x7c670d36
73 __ dci(0x44d2827a); // shsub z26.d, p0/m, z26.d, z19.d
74 // vl128 state = 0x3a15c66f
75 __ dci(0x4492823b); // shsub z27.s, p0/m, z27.s, z17.s
76 // vl128 state = 0xe407c826
77 __ dci(0x44978239); // uhsubr z25.s, p0/m, z25.s, z17.s
78 // vl128 state = 0xf7157dae
79 __ dci(0x4493827d); // uhsub z29.s, p0/m, z29.s, z19.s
80 // vl128 state = 0xcebff22f
81 __ dci(0x449782f9); // uhsubr z25.s, p0/m, z25.s, z23.s
82 // vl128 state = 0xbe691139
83 __ dci(0x44978231); // uhsubr z17.s, p0/m, z17.s, z17.s
84 // vl128 state = 0x59b2af72
85 __ dci(0x44578233); // uhsubr z19.h, p0/m, z19.h, z17.h
86 // vl128 state = 0xd7fad727
87 __ dci(0x44578312); // uhsubr z18.h, p0/m, z18.h, z24.h
88 // vl128 state = 0x87b5d00a
89 __ dci(0x44578610); // uhsubr z16.h, p1/m, z16.h, z16.h
90 // vl128 state = 0xbaae097d
91 __ dci(0x44578618); // uhsubr z24.h, p1/m, z24.h, z16.h
92 // vl128 state = 0x3887509e
93 __ dci(0x44168608); // shsubr z8.b, p1/m, z8.b, z16.b
94 // vl128 state = 0xc16dc63b
95 __ dci(0x44128700); // shsub z0.b, p1/m, z0.b, z24.b
96 // vl128 state = 0x3eddcd6d
97 __ dci(0x44528f02); // shsub z2.h, p3/m, z2.h, z24.h
98 // vl128 state = 0x2e7ffa0d
99 __ dci(0x44538f40); // uhsub z0.h, p3/m, z0.h, z26.h
100 // vl128 state = 0x1f68bee5
101 __ dci(0x44538342); // uhsub z2.h, p0/m, z2.h, z26.h
102 // vl128 state = 0x2a368049
103 __ dci(0x44538040); // uhsub z0.h, p0/m, z0.h, z2.h
104 // vl128 state = 0x0537f844
105 __ dci(0x44568044); // shsubr z4.h, p0/m, z4.h, z2.h
106 // vl128 state = 0x0dfac1b2
107 __ dci(0x445688cc); // shsubr z12.h, p2/m, z12.h, z6.h
108 // vl128 state = 0xbefa909b
109 __ dci(0x44d288dc); // shsub z28.d, p2/m, z28.d, z6.d
110 // vl128 state = 0xbadc14bb
111 __ dci(0x44d288d8); // shsub z24.d, p2/m, z24.d, z6.d
112 // vl128 state = 0x518130c0
113 __ dci(0x44d088f0); // shadd z16.d, p2/m, z16.d, z7.d
114 // vl128 state = 0xb01856bd
115 __ dci(0x44d08cd2); // shadd z18.d, p3/m, z18.d, z6.d
116 // vl128 state = 0xbbcfeaa2
117 __ dci(0x44d484d0); // srhadd z16.d, p1/m, z16.d, z6.d
118 // vl128 state = 0xefe1d416
119 __ dci(0x44d496d1); // srhadd z17.d, p5/m, z17.d, z22.d
120 // vl128 state = 0xceb574b8
121 __ dci(0x44d196d5); // uhadd z21.d, p5/m, z21.d, z22.d
122 // vl128 state = 0x46cdd268
123 __ dci(0x44d496dd); // srhadd z29.d, p5/m, z29.d, z22.d
124 // vl128 state = 0x21a81b6a
125 __ dci(0x4494969c); // srhadd z28.s, p5/m, z28.s, z20.s
126 // vl128 state = 0x2316cb04
127 __ dci(0x4494968c); // srhadd z12.s, p5/m, z12.s, z20.s
128 // vl128 state = 0x6248cc0a
129 __ dci(0x4415968d); // urhadd z13.b, p5/m, z13.b, z20.b
130 // vl128 state = 0x6edd11e0
131 __ dci(0x44119e8c); // uhadd z12.b, p7/m, z12.b, z20.b
132 // vl128 state = 0x81841eb6
133 __ dci(0x4491968d); // uhadd z13.s, p5/m, z13.s, z20.s
134 // vl128 state = 0x02b8b893
135 __ dci(0x44118685); // uhadd z5.b, p1/m, z5.b, z20.b
136 // vl128 state = 0x707db891
137 __ dci(0x44138e8d); // uhsub z13.b, p3/m, z13.b, z20.b
138 // vl128 state = 0x2caa64dd
139 __ dci(0x44139e0c); // uhsub z12.b, p7/m, z12.b, z16.b
140 // vl128 state = 0xe34695ef
141 __ dci(0x44128e0d); // shsub z13.b, p3/m, z13.b, z16.b
142 // vl128 state = 0x477197dd
143 __ dci(0x44129a1d); // shsub z29.b, p6/m, z29.b, z16.b
144 // vl128 state = 0x19cebaa2
145 __ dci(0x44129a19); // shsub z25.b, p6/m, z25.b, z16.b
146 // vl128 state = 0x0d62dca4
147 __ dci(0x44129249); // shsub z9.b, p4/m, z9.b, z18.b
148 // vl128 state = 0x327e81e3
149 __ dci(0x44129248); // shsub z8.b, p4/m, z8.b, z18.b
150 // vl128 state = 0x28ec9bf8
151 __ dci(0x44169269); // shsubr z9.b, p4/m, z9.b, z19.b
152 // vl128 state = 0x652ca8c9
153 __ dci(0x44168661); // shsubr z1.b, p1/m, z1.b, z19.b
154 // vl128 state = 0x46fcb15a
155 __ dci(0x44168420); // shsubr z0.b, p1/m, z0.b, z1.b
156 // vl128 state = 0x7151e02b
157 __ dci(0x44168428); // shsubr z8.b, p1/m, z8.b, z1.b
158 // vl128 state = 0x4c8921f6
159 __ dci(0x44148409); // srhadd z9.b, p1/m, z9.b, z0.b
160 // vl128 state = 0xd0d2fc1c
161 __ dci(0x44148641); // srhadd z1.b, p1/m, z1.b, z18.b
162 // vl128 state = 0xc821f381
163 }
164
165 uint32_t state;
166 ComputeMachineStateHash(&masm, &state);
167 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
168 __ Ldr(w0, MemOperand(x0));
169
170 END();
171 if (CAN_RUN()) {
172 RUN();
173 uint32_t expected_hashes[] = {
174 0xc821f381,
175 0xc0ad3b7c,
176 0x4eb4ba1b,
177 0xdc8e061a,
178 0x64675a15,
179 0x923703bf,
180 0x6944c0db,
181 0x7ac89bae,
182 0x8fa4c45f,
183 0xf64c8b4c,
184 0x8ba751b7,
185 0x2fe8832e,
186 0xc6b8000d,
187 0x864ba0ff,
188 0xded22c04,
189 0x213cf65e,
190 };
191 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
192 }
193 }
194
TEST_SVE(sve2_sli_sri)195 TEST_SVE(sve2_sli_sri) {
196 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
197 CPUFeatures::kSVE2,
198 CPUFeatures::kNEON,
199 CPUFeatures::kCRC32);
200 START();
201
202 SetInitialMachineState(&masm);
203 // state = 0xe2bd2480
204
205 {
206 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
207 __ dci(0x4509f07f); // sri z31.b, z3.b, #7
208 // vl128 state = 0x509a7a2d
209 __ dci(0x454bf07e); // sri z30.s, z3.s, #21
210 // vl128 state = 0xc973a4e8
211 __ dci(0x450bf17a); // sri z26.b, z11.b, #5
212 // vl128 state = 0xa9dcbcf5
213 __ dci(0x450ef17b); // sri z27.b, z11.b, #2
214 // vl128 state = 0xd56761c1
215 __ dci(0x458ef1f9); // sri z25.d, z15.d, #50
216 // vl128 state = 0xdd84a538
217 __ dci(0x459ff1fb); // sri z27.d, z15.d, #33
218 // vl128 state = 0x4e2dbf4a
219 __ dci(0x459ff5df); // sli z31.d, z14.d, #31
220 // vl128 state = 0x46d9563e
221 __ dci(0x45d7f5cf); // sli z15.d, z14.d, #55
222 // vl128 state = 0xf4fcf912
223 __ dci(0x4593f5ce); // sli z14.d, z14.d, #19
224 // vl128 state = 0xcef34d18
225 __ dci(0x4593f1fe); // sri z30.d, z15.d, #45
226 // vl128 state = 0x69509e94
227 __ dci(0x4581f1ff); // sri z31.d, z15.d, #63
228 // vl128 state = 0x09cd0cf7
229 __ dci(0x45c1f1bd); // sri z29.d, z13.d, #31
230 // vl128 state = 0xfc095f8b
231 __ dci(0x45c1f03c); // sri z28.d, z1.d, #31
232 // vl128 state = 0x0ca836f0
233 __ dci(0x45c1f4b4); // sli z20.d, z5.d, #33
234 // vl128 state = 0x678be6b3
235 __ dci(0x45c1f5f0); // sli z16.d, z15.d, #33
236 // vl128 state = 0x7a743b56
237 __ dci(0x45c7f5f2); // sli z18.d, z15.d, #39
238 // vl128 state = 0x0bbc4117
239 __ dci(0x45c7f5e2); // sli z2.d, z15.d, #39
240 // vl128 state = 0x13e1a7ae
241 __ dci(0x45c7f1a0); // sri z0.d, z13.d, #25
242 // vl128 state = 0x8014a497
243 __ dci(0x4597f1b0); // sri z16.d, z13.d, #41
244 // vl128 state = 0x5f7994a8
245 __ dci(0x4593f5b1); // sli z17.d, z13.d, #19
246 // vl128 state = 0x125f37b5
247 __ dci(0x4591f5f0); // sli z16.d, z15.d, #17
248 // vl128 state = 0x26f1fdf2
249 __ dci(0x4581f5d2); // sli z18.d, z14.d, #1
250 // vl128 state = 0x5b0baccc
251 __ dci(0x4541f5d6); // sli z22.s, z14.s, #1
252 // vl128 state = 0x74f04ecb
253 __ dci(0x4551f1d4); // sri z20.s, z14.s, #15
254 // vl128 state = 0xc43d0586
255 __ dci(0x4553f150); // sri z16.s, z10.s, #13
256 // vl128 state = 0xce8c688a
257 __ dci(0x4557f171); // sri z17.s, z11.s, #9
258 // vl128 state = 0x03a5b3b0
259 __ dci(0x4513f175); // sri z21.h, z11.h, #13
260 // vl128 state = 0x392ab48e
261 __ dci(0x4551f177); // sri z23.s, z11.s, #15
262 // vl128 state = 0xa886dbc8
263 __ dci(0x4551f17f); // sri z31.s, z11.s, #15
264 // vl128 state = 0x37c804bc
265 __ dci(0x4551f16f); // sri z15.s, z11.s, #15
266 // vl128 state = 0x17e99d67
267 __ dci(0x4550f067); // sri z7.s, z3.s, #16
268 // vl128 state = 0xb0bd981a
269 __ dci(0x4550f077); // sri z23.s, z3.s, #16
270 // vl128 state = 0x5f643b3e
271 __ dci(0x4551f0f5); // sri z21.s, z7.s, #15
272 // vl128 state = 0xa0b83a32
273 __ dci(0x4551f09d); // sri z29.s, z4.s, #15
274 // vl128 state = 0x890807a1
275 __ dci(0x4552f08d); // sri z13.s, z4.s, #14
276 // vl128 state = 0x81cb8fa4
277 __ dci(0x4512f01d); // sri z29.h, z0.h, #14
278 // vl128 state = 0x62751a54
279 __ dci(0x4552f419); // sli z25.s, z0.s, #18
280 // vl128 state = 0xfd7c0337
281 __ dci(0x4542f49b); // sli z27.s, z4.s, #2
282 // vl128 state = 0x0089e534
283 __ dci(0x454af09a); // sri z26.s, z4.s, #22
284 // vl128 state = 0xea87d159
285 __ dci(0x45caf0d8); // sri z24.d, z6.d, #22
286 // vl128 state = 0x3c44b845
287 __ dci(0x45c2f2dc); // sri z28.d, z22.d, #30
288 // vl128 state = 0x9b8c17a7
289 __ dci(0x45caf25d); // sri z29.d, z18.d, #22
290 // vl128 state = 0x3e2c1797
291 __ dci(0x45caf0dc); // sri z28.d, z6.d, #22
292 // vl128 state = 0xbf933754
293 __ dci(0x458af1cc); // sri z12.d, z14.d, #54
294 // vl128 state = 0x93e91a23
295 __ dci(0x4586f1cd); // sri z13.d, z14.d, #58
296 // vl128 state = 0x0f7c6faa
297 __ dci(0x458ef0cc); // sri z12.d, z6.d, #50
298 // vl128 state = 0x1d771f71
299 __ dci(0x458ef00d); // sri z13.d, z0.d, #50
300 // vl128 state = 0x29a23da7
301 __ dci(0x450ef05d); // sri z29.b, z2.b, #2
302 // vl128 state = 0x74fd2038
303 __ dci(0x450cf00d); // sri z13.b, z0.b, #4
304 // vl128 state = 0x075bc166
305 __ dci(0x450cf00c); // sri z12.b, z0.b, #4
306 // vl128 state = 0xfd3d290f
307 }
308
309 uint32_t state;
310 ComputeMachineStateHash(&masm, &state);
311 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
312 __ Ldr(w0, MemOperand(x0));
313
314 END();
315 if (CAN_RUN()) {
316 RUN();
317 uint32_t expected_hashes[] = {
318 0xfd3d290f,
319 0x8dd0bdab,
320 0xa25ba843,
321 0x484543ed,
322 0x22df2f4f,
323 0xb62769dc,
324 0x795e30f7,
325 0xe49948e7,
326 0xd4ceb676,
327 0xbf2d359a,
328 0xcf4331a9,
329 0x8cce4eef,
330 0x4fbaec97,
331 0x4fec4d88,
332 0x3efc521d,
333 0xffef31d1,
334 };
335 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
336 }
337 }
338
TEST_SVE(sve2_srshr_urshr)339 TEST_SVE(sve2_srshr_urshr) {
340 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
341 CPUFeatures::kSVE2,
342 CPUFeatures::kNEON,
343 CPUFeatures::kCRC32);
344 START();
345
346 SetInitialMachineState(&masm);
347 // state = 0xe2bd2480
348
349 {
350 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
351 __ dci(0x04cc9074); // srshr z20.d, p4/m, z20.d, #29
352 // vl128 state = 0xecefbcaa
353 __ dci(0x04cc9236); // srshr z22.d, p4/m, z22.d, #15
354 // vl128 state = 0x7eef75c3
355 __ dci(0x04cd927e); // urshr z30.d, p4/m, z30.d, #13
356 // vl128 state = 0xf5ab0a43
357 __ dci(0x04cd9e76); // urshr z22.d, p7/m, z22.d, #13
358 // vl128 state = 0x67a9d15a
359 __ dci(0x04cd9a57); // urshr z23.d, p6/m, z23.d, #14
360 // vl128 state = 0xf1591f3f
361 __ dci(0x044d9247); // urshr z7.s, p4/m, z7.s, #14
362 // vl128 state = 0xcb770d03
363 __ dci(0x044d9245); // urshr z5.s, p4/m, z5.s, #14
364 // vl128 state = 0x7a225c92
365 __ dci(0x044d9241); // urshr z1.s, p4/m, z1.s, #14
366 // vl128 state = 0x31e4f59a
367 __ dci(0x044d8200); // urshr z0.s, p0/m, z0.s, #16
368 // vl128 state = 0x7c0c67fa
369 __ dci(0x044d8330); // urshr z16.s, p0/m, z16.s, #7
370 // vl128 state = 0x2aaa996d
371 __ dci(0x044d8340); // urshr z0.s, p0/m, z0.s, #6
372 // vl128 state = 0x1999a541
373 __ dci(0x044d8104); // urshr z4.s, p0/m, z4.s, #24
374 // vl128 state = 0xbebc22f3
375 __ dci(0x044d8526); // urshr z6.s, p1/m, z6.s, #23
376 // vl128 state = 0x5e9c818d
377 __ dci(0x04cd8502); // urshr z2.d, p1/m, z2.d, #24
378 // vl128 state = 0x9cd88e00
379 __ dci(0x048d9506); // urshr z6.d, p5/m, z6.d, #56
380 // vl128 state = 0xff60a16e
381 __ dci(0x048d9504); // urshr z4.d, p5/m, z4.d, #56
382 // vl128 state = 0xfae64bf4
383 __ dci(0x048d8705); // urshr z5.d, p1/m, z5.d, #40
384 // vl128 state = 0xbd7bc8bb
385 __ dci(0x048d9307); // urshr z7.d, p4/m, z7.d, #40
386 // vl128 state = 0x22e58729
387 __ dci(0x048c9323); // srshr z3.d, p4/m, z3.d, #39
388 // vl128 state = 0x1a2b90d1
389 __ dci(0x048c8721); // srshr z1.d, p1/m, z1.d, #39
390 // vl128 state = 0xf31798ea
391 __ dci(0x04cc8f20); // srshr z0.d, p3/m, z0.d, #7
392 // vl128 state = 0x3a159e41
393 __ dci(0x04cc87b0); // srshr z16.d, p1/m, z16.d, #3
394 // vl128 state = 0x461819c6
395 __ dci(0x04cc8778); // srshr z24.d, p1/m, z24.d, #5
396 // vl128 state = 0x52c8c945
397 __ dci(0x048c8730); // srshr z16.d, p1/m, z16.d, #39
398 // vl128 state = 0xa6724c16
399 __ dci(0x040c8534); // srshr z20.b, p1/m, z20.b, #7
400 // vl128 state = 0xfeae5ea1
401 __ dci(0x040c957c); // srshr z28.b, p5/m, z28.b, #5
402 // vl128 state = 0xe55cac9f
403 __ dci(0x048c9554); // srshr z20.d, p5/m, z20.d, #54
404 // vl128 state = 0x41ccbe50
405 __ dci(0x048c8156); // srshr z22.d, p0/m, z22.d, #54
406 // vl128 state = 0xfef5c71e
407 __ dci(0x040c8957); // srshr z23.b, p2/m, z23.b, #6
408 // vl128 state = 0xac8cf177
409 __ dci(0x040c8bd5); // srshr z21.h, p2/m, z21.h, #2
410 // vl128 state = 0xfe7005fe
411 __ dci(0x040c8354); // srshr z20.h, p0/m, z20.h, #6
412 // vl128 state = 0x1daa6598
413 __ dci(0x040c931c); // srshr z28.h, p4/m, z28.h, #8
414 // vl128 state = 0x8c7f2675
415 __ dci(0x040c9798); // srshr z24.h, p5/m, z24.h, #4
416 // vl128 state = 0x2349e927
417 __ dci(0x044c97ba); // srshr z26.s, p5/m, z26.s, #3
418 // vl128 state = 0xf3670053
419 __ dci(0x040c9faa); // srshr z10.h, p7/m, z10.h, #3
420 // vl128 state = 0x61333578
421 __ dci(0x044d9fae); // urshr z14.s, p7/m, z14.s, #3
422 // vl128 state = 0xdb1232a3
423 __ dci(0x044d8f8f); // urshr z15.s, p3/m, z15.s, #4
424 // vl128 state = 0xb1b4bda1
425 __ dci(0x044d8f87); // urshr z7.s, p3/m, z7.s, #4
426 // vl128 state = 0xba636ab8
427 __ dci(0x044d9d97); // urshr z23.s, p7/m, z23.s, #20
428 // vl128 state = 0x8ab01b49
429 __ dci(0x040d9593); // urshr z19.b, p5/m, z19.b, #4
430 // vl128 state = 0x20ee49b4
431 __ dci(0x040d959b); // urshr z27.b, p5/m, z27.b, #4
432 // vl128 state = 0xe34dcf2e
433 __ dci(0x044c959a); // srshr z26.s, p5/m, z26.s, #20
434 // vl128 state = 0x65bafb28
435 __ dci(0x044d9492); // urshr z18.s, p5/m, z18.s, #28
436 // vl128 state = 0xcbed1382
437 __ dci(0x044c8493); // srshr z19.s, p1/m, z19.s, #28
438 // vl128 state = 0xa54fb84c
439 __ dci(0x044c8cc3); // srshr z3.s, p3/m, z3.s, #26
440 // vl128 state = 0x257267ee
441 __ dci(0x044c8c0b); // srshr z11.s, p3/m, z11.s, #32
442 // vl128 state = 0xd494a3e8
443 __ dci(0x044c8c6f); // srshr z15.s, p3/m, z15.s, #29
444 // vl128 state = 0x63621477
445 __ dci(0x044c9c2e); // srshr z14.s, p7/m, z14.s, #31
446 // vl128 state = 0x4cb2e888
447 __ dci(0x04cc943e); // srshr z30.d, p5/m, z30.d, #31
448 // vl128 state = 0x8e580ba2
449 __ dci(0x04cd953f); // urshr z31.d, p5/m, z31.d, #23
450 // vl128 state = 0x7678cc05
451 }
452
453 uint32_t state;
454 ComputeMachineStateHash(&masm, &state);
455 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
456 __ Ldr(w0, MemOperand(x0));
457
458 END();
459 if (CAN_RUN()) {
460 RUN();
461 uint32_t expected_hashes[] = {
462 0x7678cc05,
463 0x37f2893a,
464 0xce2a105d,
465 0x5a03f5a3,
466 0x81444dfc,
467 0x5581c0c1,
468 0xfee622cc,
469 0x0f6796a5,
470 0xf151a5fd,
471 0x13e9be9c,
472 0x9685f8b5,
473 0xa6827285,
474 0x7ad6d004,
475 0xba7989ae,
476 0x96fe2826,
477 0xd1ddc17e,
478 };
479 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
480 }
481 }
482
TEST_SVE(sve2_sqshl_uqshl)483 TEST_SVE(sve2_sqshl_uqshl) {
484 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
485 CPUFeatures::kSVE2,
486 CPUFeatures::kNEON,
487 CPUFeatures::kCRC32);
488 START();
489
490 SetInitialMachineState(&masm);
491 // state = 0xe2bd2480
492
493 {
494 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
495 __ dci(0x044f86aa); // sqshlu z10.s, p1/m, z10.s, #21
496 // vl128 state = 0x37777991
497 __ dci(0x044f8482); // sqshlu z2.s, p1/m, z2.s, #4
498 // vl128 state = 0x8119dd5a
499 __ dci(0x048f8480); // sqshlu z0.d, p1/m, z0.d, #4
500 // vl128 state = 0x8966cd23
501 __ dci(0x04cf8c82); // sqshlu z2.d, p3/m, z2.d, #36
502 // vl128 state = 0x71b53135
503 __ dci(0x044f8892); // sqshlu z18.s, p2/m, z18.s, #4
504 // vl128 state = 0x44e0e9a7
505 __ dci(0x04cf8996); // sqshlu z22.d, p2/m, z22.d, #44
506 // vl128 state = 0x4e4b77b9
507 __ dci(0x04cf9194); // sqshlu z20.d, p4/m, z20.d, #44
508 // vl128 state = 0x66d72728
509 __ dci(0x04cf9b9c); // sqshlu z28.d, p6/m, z28.d, #60
510 // vl128 state = 0xa80f62ce
511 __ dci(0x04c79f8c); // uqshl z12.d, p7/m, z12.d, #60
512 // vl128 state = 0x87a3a8c0
513 __ dci(0x04469f88); // sqshl z8.s, p7/m, z8.s, #28
514 // vl128 state = 0x3db302cb
515 __ dci(0x04469f8a); // sqshl z10.s, p7/m, z10.s, #28
516 // vl128 state = 0x2d66bbb2
517 __ dci(0x04469a8e); // sqshl z14.s, p6/m, z14.s, #20
518 // vl128 state = 0x39524732
519 __ dci(0x04c69a1e); // sqshl z30.d, p6/m, z30.d, #48
520 // vl128 state = 0x39d71433
521 __ dci(0x04c68a9a); // sqshl z26.d, p2/m, z26.d, #52
522 // vl128 state = 0x58771cfb
523 __ dci(0x04469a8a); // sqshl z10.s, p6/m, z10.s, #20
524 // vl128 state = 0xa773fcc9
525 __ dci(0x04c68a88); // sqshl z8.d, p2/m, z8.d, #52
526 // vl128 state = 0x9dce801c
527 __ dci(0x04469a89); // sqshl z9.s, p6/m, z9.s, #20
528 // vl128 state = 0x4141302f
529 __ dci(0x04479b81); // uqshl z1.s, p6/m, z1.s, #28
530 // vl128 state = 0x369084f9
531 __ dci(0x044f9f91); // sqshlu z17.s, p7/m, z17.s, #28
532 // vl128 state = 0x1570bb90
533 __ dci(0x04479e90); // uqshl z16.s, p7/m, z16.s, #20
534 // vl128 state = 0x27765662
535 __ dci(0x044f9f94); // sqshlu z20.s, p7/m, z20.s, #28
536 // vl128 state = 0xe99bcbb9
537 __ dci(0x04479795); // uqshl z21.s, p5/m, z21.s, #28
538 // vl128 state = 0xb36c3b9f
539 __ dci(0x04479754); // uqshl z20.s, p5/m, z20.s, #26
540 // vl128 state = 0x435e0256
541 __ dci(0x04479750); // uqshl z16.s, p5/m, z16.s, #26
542 // vl128 state = 0x485471e9
543 __ dci(0x04479740); // uqshl z0.s, p5/m, z0.s, #26
544 // vl128 state = 0x170e10cb
545 __ dci(0x04079544); // uqshl z4.b, p5/m, z4.b, #2
546 // vl128 state = 0x026fe32a
547 __ dci(0x04c79546); // uqshl z6.d, p5/m, z6.d, #42
548 // vl128 state = 0x9a92b063
549 __ dci(0x04c78504); // uqshl z4.d, p1/m, z4.d, #40
550 // vl128 state = 0x4e9a105e
551 __ dci(0x04879500); // uqshl z0.d, p5/m, z0.d, #8
552 // vl128 state = 0x958b4d28
553 __ dci(0x04879908); // uqshl z8.d, p6/m, z8.d, #8
554 // vl128 state = 0x420ff82d
555 __ dci(0x04879318); // uqshl z24.d, p4/m, z24.d, #24
556 // vl128 state = 0x88002097
557 __ dci(0x0487931a); // uqshl z26.d, p4/m, z26.d, #24
558 // vl128 state = 0x3047401c
559 __ dci(0x0486938a); // sqshl z10.d, p4/m, z10.d, #28
560 // vl128 state = 0x5b2b7938
561 __ dci(0x04069188); // sqshl z8.b, p4/m, z8.b, #4
562 // vl128 state = 0xb92dd260
563 __ dci(0x04469389); // sqshl z9.s, p4/m, z9.s, #28
564 // vl128 state = 0xdc6370c3
565 __ dci(0x0447918b); // uqshl z11.s, p4/m, z11.s, #12
566 // vl128 state = 0x5e6198f0
567 __ dci(0x0447913b); // uqshl z27.s, p4/m, z27.s, #9
568 // vl128 state = 0x935ed2a3
569 __ dci(0x0447915f); // uqshl z31.s, p4/m, z31.s, #10
570 // vl128 state = 0x76271654
571 __ dci(0x0406915d); // sqshl z29.b, p4/m, z29.b, #2
572 // vl128 state = 0x46a71ae3
573 __ dci(0x0486911f); // sqshl z31.d, p4/m, z31.d, #8
574 // vl128 state = 0x2c7320a6
575 __ dci(0x0486911d); // sqshl z29.d, p4/m, z29.d, #8
576 // vl128 state = 0x4aa0022d
577 __ dci(0x04869b1f); // sqshl z31.d, p6/m, z31.d, #24
578 // vl128 state = 0x2de081d7
579 __ dci(0x04069317); // sqshl z23.h, p4/m, z23.h, #8
580 // vl128 state = 0x879c9ead
581 __ dci(0x0447931f); // uqshl z31.s, p4/m, z31.s, #24
582 // vl128 state = 0x51070552
583 __ dci(0x04479b9e); // uqshl z30.s, p6/m, z30.s, #28
584 // vl128 state = 0x8cc26b2b
585 __ dci(0x04479adf); // uqshl z31.s, p6/m, z31.s, #22
586 // vl128 state = 0x8f4512d3
587 __ dci(0x04479adb); // uqshl z27.s, p6/m, z27.s, #22
588 // vl128 state = 0x3d44e050
589 __ dci(0x04079a99); // uqshl z25.h, p6/m, z25.h, #4
590 // vl128 state = 0xede0c288
591 __ dci(0x04079a89); // uqshl z9.h, p6/m, z9.h, #4
592 // vl128 state = 0x928beed6
593 __ dci(0x04879acb); // uqshl z11.d, p6/m, z11.d, #22
594 // vl128 state = 0x6945e18a
595 }
596
597 uint32_t state;
598 ComputeMachineStateHash(&masm, &state);
599 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
600 __ Ldr(w0, MemOperand(x0));
601
602 END();
603 if (CAN_RUN()) {
604 RUN();
605 uint32_t expected_hashes[] = {
606 0x6945e18a,
607 0x0e954f70,
608 0x3d269eb2,
609 0xefeb5acb,
610 0xfb27cb0c,
611 0x651a1aea,
612 0x07011083,
613 0xd425418b,
614 0xa0e026c6,
615 0x407c416e,
616 0x14e25761,
617 0x21eef576,
618 0xc6ad09eb,
619 0x3642006b,
620 0xdebec165,
621 0x24ae8a32,
622 };
623 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
624 }
625 }
626
TEST_SVE(sve2_unsigned_sat_round_shift)627 TEST_SVE(sve2_unsigned_sat_round_shift) {
628 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
629 CPUFeatures::kSVE2,
630 CPUFeatures::kNEON,
631 CPUFeatures::kCRC32);
632 START();
633
634 SetInitialMachineState(&masm);
635 // state = 0xe2bd2480
636
637 {
638 ExactAssemblyScope scope(&masm, 100 * kInstructionSize);
639 __ dci(0x44cb84cb); // uqrshl z11.d, p1/m, z11.d, z6.d
640 // vl128 state = 0x9794ef4a
641 __ dci(0x444b85db); // uqrshl z27.h, p1/m, z27.h, z14.h
642 // vl128 state = 0xda137fcc
643 __ dci(0x444b874b); // uqrshl z11.h, p1/m, z11.h, z26.h
644 // vl128 state = 0xafc1533b
645 __ dci(0x444b87fb); // uqrshl z27.h, p1/m, z27.h, z31.h
646 // vl128 state = 0x228890a2
647 __ dci(0x444b87f3); // uqrshl z19.h, p1/m, z19.h, z31.h
648 // vl128 state = 0x5cb0d356
649 __ dci(0x444385f1); // urshl z17.h, p1/m, z17.h, z15.h
650 // vl128 state = 0xbb6b6d1d
651 __ dci(0x444795f3); // urshlr z19.h, p5/m, z19.h, z15.h
652 // vl128 state = 0x98b43358
653 __ dci(0x44479552); // urshlr z18.h, p5/m, z18.h, z10.h
654 // vl128 state = 0x472880b2
655 __ dci(0x44c79502); // urshlr z2.d, p5/m, z2.d, z8.d
656 // vl128 state = 0x0995d86f
657 __ dci(0x44879406); // urshlr z6.s, p5/m, z6.s, z0.s
658 // vl128 state = 0x405211cd
659 __ dci(0x44079436); // urshlr z22.b, p5/m, z22.b, z1.b
660 // vl128 state = 0x563647b0
661 __ dci(0x44078c34); // urshlr z20.b, p3/m, z20.b, z1.b
662 // vl128 state = 0x2eacf2d3
663 __ dci(0x440f843c); // uqrshlr z28.b, p1/m, z28.b, z1.b
664 // vl128 state = 0x56f472ce
665 __ dci(0x440f8cbe); // uqrshlr z30.b, p3/m, z30.b, z5.b
666 // vl128 state = 0x910ce8d0
667 __ dci(0x44078eba); // urshlr z26.b, p3/m, z26.b, z21.b
668 // vl128 state = 0xc47b6482
669 __ dci(0x44078ebe); // urshlr z30.b, p3/m, z30.b, z21.b
670 // vl128 state = 0xff805975
671 __ dci(0x440f86b6); // uqrshlr z22.b, p1/m, z22.b, z21.b
672 // vl128 state = 0x132fe792
673 __ dci(0x444b86b7); // uqrshl z23.h, p1/m, z23.h, z21.h
674 // vl128 state = 0xabd3d85c
675 __ dci(0x440b84a7); // uqrshl z7.b, p1/m, z7.b, z5.b
676 // vl128 state = 0x8f718992
677 __ dci(0x440b8085); // uqrshl z5.b, p0/m, z5.b, z4.b
678 // vl128 state = 0x1b05e694
679 __ dci(0x440b8687); // uqrshl z7.b, p1/m, z7.b, z20.b
680 // vl128 state = 0xd9a0c225
681 __ dci(0x440986cf); // uqshl z15.b, p1/m, z15.b, z22.b
682 // vl128 state = 0x98be170a
683 __ dci(0x440b87ce); // uqrshl z14.b, p1/m, z14.b, z30.b
684 // vl128 state = 0x0993d862
685 __ dci(0x440b838c); // uqrshl z12.b, p0/m, z12.b, z28.b
686 // vl128 state = 0xbc95a037
687 __ dci(0x440b839c); // uqrshl z28.b, p0/m, z28.b, z28.b
688 // vl128 state = 0x558159d9
689 __ dci(0x444b8314); // uqrshl z20.h, p0/m, z20.h, z24.h
690 // vl128 state = 0x53798c6b
691 __ dci(0x44498b1c); // uqshl z28.h, p2/m, z28.h, z24.h
692 // vl128 state = 0x83db6a7c
693 __ dci(0x44498b0c); // uqshl z12.h, p2/m, z12.h, z24.h
694 // vl128 state = 0x62bda6cb
695 __ dci(0x44438b0e); // urshl z14.h, p2/m, z14.h, z24.h
696 // vl128 state = 0xc04356eb
697 __ dci(0x44438986); // urshl z6.h, p2/m, z6.h, z12.h
698 // vl128 state = 0x0e2e6682
699 __ dci(0x444389e4); // urshl z4.h, p2/m, z4.h, z15.h
700 // vl128 state = 0xbb28cacd
701 __ dci(0x444391f4); // urshl z20.h, p4/m, z20.h, z15.h
702 // vl128 state = 0x5349f37a
703 __ dci(0x444391f6); // urshl z22.h, p4/m, z22.h, z15.h
704 // vl128 state = 0x99e66890
705 __ dci(0x44c39177); // urshl z23.d, p4/m, z23.d, z11.d
706 // vl128 state = 0x2d48a891
707 __ dci(0x44c79573); // urshlr z19.d, p5/m, z19.d, z11.d
708 // vl128 state = 0xd26e94f9
709 __ dci(0x04c79d63); // uqshl z3.d, p7/m, z3.d, #43
710 // vl128 state = 0x54801050
711 __ dci(0x04c78c67); // uqshl z7.d, p3/m, z7.d, #35
712 // vl128 state = 0xde9f357a
713 __ dci(0x04878c43); // uqshl z3.d, p3/m, z3.d, #2
714 // vl128 state = 0x59e5d53c
715 __ dci(0x44878c0b); // urshlr z11.s, p3/m, z11.s, z0.s
716 // vl128 state = 0x8cfa7532
717 __ dci(0x44878c03); // urshlr z3.s, p3/m, z3.s, z0.s
718 // vl128 state = 0xdb4e86b6
719 __ dci(0x44878d42); // urshlr z2.s, p3/m, z2.s, z10.s
720 // vl128 state = 0x07467a7c
721 __ dci(0x44878d4a); // urshlr z10.s, p3/m, z10.s, z10.s
722 // vl128 state = 0x6a4ad81c
723 __ dci(0x44879948); // urshlr z8.s, p6/m, z8.s, z10.s
724 // vl128 state = 0x91d7bdc0
725 __ dci(0x44879949); // urshlr z9.s, p6/m, z9.s, z10.s
726 // vl128 state = 0x2fe3b819
727 __ dci(0x44879bcb); // urshlr z11.s, p6/m, z11.s, z30.s
728 // vl128 state = 0x5c121b68
729 __ dci(0x04879b4f); // uqshl z15.d, p6/m, z15.d, #26
730 // vl128 state = 0xe678f4f7
731 __ dci(0x44879bdf); // urshlr z31.s, p6/m, z31.s, z30.s
732 // vl128 state = 0x6593da76
733 __ dci(0x4487935e); // urshlr z30.s, p4/m, z30.s, z26.s
734 // vl128 state = 0xb558ba57
735 __ dci(0x440f9356); // uqrshlr z22.b, p4/m, z22.b, z26.b
736 // vl128 state = 0x45d1775e
737 __ dci(0x440f93f7); // uqrshlr z23.b, p4/m, z23.b, z31.b
738 // vl128 state = 0x20974795
739 __ dci(0x448793f5); // urshlr z21.s, p4/m, z21.s, z31.s
740 // vl128 state = 0xeb0bc2ab
741 __ dci(0x448383fd); // urshl z29.s, p0/m, z29.s, z31.s
742 // vl128 state = 0x74557d81
743 __ dci(0x448b82f9); // uqrshl z25.s, p0/m, z25.s, z23.s
744 // vl128 state = 0x34518418
745 __ dci(0x448f82b8); // uqrshlr z24.s, p0/m, z24.s, z21.s
746 // vl128 state = 0x93e637f3
747 __ dci(0x448f82bc); // uqrshlr z28.s, p0/m, z28.s, z21.s
748 // vl128 state = 0x6e35e56a
749 __ dci(0x448f83fe); // uqrshlr z30.s, p0/m, z30.s, z31.s
750 // vl128 state = 0xf3c59bb1
751 __ dci(0x448d83ae); // uqshlr z14.s, p0/m, z14.s, z29.s
752 // vl128 state = 0x95b401a3
753 __ dci(0x448d83aa); // uqshlr z10.s, p0/m, z10.s, z29.s
754 // vl128 state = 0x56ec65b0
755 __ dci(0x448993ae); // uqshl z14.s, p4/m, z14.s, z29.s
756 // vl128 state = 0x28f6e4c6
757 __ dci(0x448993a6); // uqshl z6.s, p4/m, z6.s, z29.s
758 // vl128 state = 0x9ed5eaf3
759 __ dci(0x44c991a4); // uqshl z4.d, p4/m, z4.d, z13.d
760 // vl128 state = 0xa8512b00
761 __ dci(0x44c991a5); // uqshl z5.d, p4/m, z5.d, z13.d
762 // vl128 state = 0x49a10780
763 __ dci(0x44c991a1); // uqshl z1.d, p4/m, z1.d, z13.d
764 // vl128 state = 0x465a2cb4
765 __ dci(0x444b91a0); // uqrshl z0.h, p4/m, z0.h, z13.h
766 // vl128 state = 0x8f6dad8e
767 __ dci(0x444b91a1); // uqrshl z1.h, p4/m, z1.h, z13.h
768 // vl128 state = 0x50dec3f8
769 __ dci(0x440391a3); // urshl z3.b, p4/m, z3.b, z13.b
770 // vl128 state = 0xab2b5ad7
771 __ dci(0x448393a7); // urshl z7.s, p4/m, z7.s, z29.s
772 // vl128 state = 0x2ffd164f
773 __ dci(0x448393af); // urshl z15.s, p4/m, z15.s, z29.s
774 // vl128 state = 0x43a7959b
775 __ dci(0x448393ab); // urshl z11.s, p4/m, z11.s, z29.s
776 // vl128 state = 0xf9526723
777 __ dci(0x448f93af); // uqrshlr z15.s, p4/m, z15.s, z29.s
778 // vl128 state = 0xf9081b27
779 __ dci(0x448f93ae); // uqrshlr z14.s, p4/m, z14.s, z29.s
780 // vl128 state = 0x3a4f693e
781 __ dci(0x048793aa); // uqshl z10.d, p4/m, z10.d, #29
782 // vl128 state = 0xbba37d9a
783 __ dci(0x04c79388); // uqshl z8.d, p4/m, z8.d, #60
784 // vl128 state = 0x3b3f5fa4
785 __ dci(0x04c79380); // uqshl z0.d, p4/m, z0.d, #60
786 // vl128 state = 0xdac48ac2
787 __ dci(0x04878390); // uqshl z16.d, p0/m, z16.d, #28
788 // vl128 state = 0xe3c8148f
789 __ dci(0x44878794); // urshlr z20.s, p1/m, z20.s, z28.s
790 // vl128 state = 0xee2179ec
791 __ dci(0x04878384); // uqshl z4.d, p0/m, z4.d, #28
792 // vl128 state = 0xc6a3796c
793 __ dci(0x048787ac); // uqshl z12.d, p1/m, z12.d, #29
794 // vl128 state = 0x18e0fd43
795 __ dci(0x04c786ae); // uqshl z14.d, p1/m, z14.d, #53
796 // vl128 state = 0x9292503e
797 __ dci(0x04c786be); // uqshl z30.d, p1/m, z30.d, #53
798 // vl128 state = 0xc1ebe042
799 __ dci(0x44c782b6); // urshlr z22.d, p0/m, z22.d, z21.d
800 // vl128 state = 0x0badc025
801 __ dci(0x44c78a3e); // urshlr z30.d, p2/m, z30.d, z17.d
802 // vl128 state = 0x51b3b5ac
803 __ dci(0x04c78b3a); // uqshl z26.d, p2/m, z26.d, #57
804 // vl128 state = 0x334f52f8
805 __ dci(0x04c78832); // uqshl z18.d, p2/m, z18.d, #33
806 // vl128 state = 0xf95df0b7
807 __ dci(0x44cf8833); // uqrshlr z19.d, p2/m, z19.d, z1.d
808 // vl128 state = 0xda88a00a
809 __ dci(0x44cf9811); // uqrshlr z17.d, p6/m, z17.d, z0.d
810 // vl128 state = 0x1e642a4c
811 __ dci(0x44cf9c41); // uqrshlr z1.d, p7/m, z1.d, z2.d
812 // vl128 state = 0xeb7fe4bd
813 __ dci(0x444f8c45); // uqrshlr z5.h, p3/m, z5.h, z2.h
814 // vl128 state = 0x5a82d833
815 __ dci(0x44cf844d); // uqrshlr z13.d, p1/m, z13.d, z2.d
816 // vl128 state = 0x595d42a4
817 __ dci(0x44c7841d); // urshlr z29.d, p1/m, z29.d, z0.d
818 // vl128 state = 0x0b433688
819 __ dci(0x44c7805f); // urshlr z31.d, p0/m, z31.d, z2.d
820 // vl128 state = 0x14b8c29a
821 __ dci(0x44cf807b); // uqrshlr z27.d, p0/m, z27.d, z3.d
822 // vl128 state = 0x12a76015
823 __ dci(0x44c780eb); // urshlr z11.d, p0/m, z11.d, z7.d
824 // vl128 state = 0x73fa7d24
825 __ dci(0x44c794e3); // urshlr z3.d, p5/m, z3.d, z7.d
826 // vl128 state = 0x0a01c859
827 __ dci(0x04c795eb); // uqshl z11.d, p5/m, z11.d, #47
828 // vl128 state = 0x0e7024fd
829 __ dci(0x04c795e9); // uqshl z9.d, p5/m, z9.d, #47
830 // vl128 state = 0x9ca5cb63
831 __ dci(0x04c795f9); // uqshl z25.d, p5/m, z25.d, #47
832 // vl128 state = 0x4c60da07
833 __ dci(0x04c795fb); // uqshl z27.d, p5/m, z27.d, #47
834 // vl128 state = 0x71114c19
835 __ dci(0x04c799f3); // uqshl z19.d, p6/m, z19.d, #47
836 // vl128 state = 0x32d71e12
837 __ dci(0x04c79997); // uqshl z23.d, p6/m, z23.d, #44
838 // vl128 state = 0xab0c9051
839 }
840
841 uint32_t state;
842 ComputeMachineStateHash(&masm, &state);
843 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
844 __ Ldr(w0, MemOperand(x0));
845
846 END();
847 if (CAN_RUN()) {
848 RUN();
849 uint32_t expected_hashes[] = {
850 0xab0c9051,
851 0xc2455013,
852 0x6e4b3f1e,
853 0x631ce7ed,
854 0x031e4f7f,
855 0xa2be23bd,
856 0x2f5f74b0,
857 0x9e60f1ea,
858 0xb1080595,
859 0x953020c9,
860 0x7a5bfffb,
861 0xf0a27817,
862 0x83904886,
863 0x04620572,
864 0xbcd5c8c9,
865 0x3d4abe12,
866 };
867 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
868 }
869 }
870
TEST_SVE(sve2_signed_sat_round_shift)871 TEST_SVE(sve2_signed_sat_round_shift) {
872 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
873 CPUFeatures::kSVE2,
874 CPUFeatures::kNEON,
875 CPUFeatures::kCRC32);
876 START();
877
878 SetInitialMachineState(&masm);
879 // state = 0xe2bd2480
880
881 {
882 ExactAssemblyScope scope(&masm, 100 * kInstructionSize);
883 __ dci(0x048687c6); // sqshl z6.d, p1/m, z6.d, #30
884 // vl128 state = 0xe81d8487
885 __ dci(0x048687c4); // sqshl z4.d, p1/m, z4.d, #30
886 // vl128 state = 0x47cc69b1
887 __ dci(0x04868385); // sqshl z5.d, p0/m, z5.d, #28
888 // vl128 state = 0xec4cab7b
889 __ dci(0x0486838d); // sqshl z13.d, p0/m, z13.d, #28
890 // vl128 state = 0x23b07ac8
891 __ dci(0x048681a9); // sqshl z9.d, p0/m, z9.d, #13
892 // vl128 state = 0xace4253d
893 __ dci(0x04068139); // sqshl z25.b, p0/m, z25.b, #1
894 // vl128 state = 0xf8f14a80
895 __ dci(0x440681b8); // srshlr z24.b, p0/m, z24.b, z13.b
896 // vl128 state = 0xa79d8fc1
897 __ dci(0x4406803a); // srshlr z26.b, p0/m, z26.b, z1.b
898 // vl128 state = 0xed9bb777
899 __ dci(0x4406808a); // srshlr z10.b, p0/m, z10.b, z4.b
900 // vl128 state = 0xbd1dfa2f
901 __ dci(0x440688da); // srshlr z26.b, p2/m, z26.b, z6.b
902 // vl128 state = 0x8f9b61e6
903 __ dci(0x448680db); // srshlr z27.s, p0/m, z27.s, z6.s
904 // vl128 state = 0x0a16f551
905 __ dci(0x440684d3); // srshlr z19.b, p1/m, z19.b, z6.b
906 // vl128 state = 0x0a764f12
907 __ dci(0x448694c3); // srshlr z3.s, p5/m, z3.s, z6.s
908 // vl128 state = 0x8d6f5613
909 __ dci(0x448e9cc7); // sqrshlr z7.s, p7/m, z7.s, z6.s
910 // vl128 state = 0xaf7b559b
911 __ dci(0x448e9ef7); // sqrshlr z23.s, p7/m, z23.s, z23.s
912 // vl128 state = 0x086d6430
913 __ dci(0x448e9673); // sqrshlr z19.s, p5/m, z19.s, z19.s
914 // vl128 state = 0x4a9a5736
915 __ dci(0x448a8663); // sqrshl z3.s, p1/m, z3.s, z19.s
916 // vl128 state = 0x19adf50e
917 __ dci(0x440a8e6b); // sqrshl z11.b, p3/m, z11.b, z19.b
918 // vl128 state = 0x4a01719c
919 __ dci(0x44028eef); // srshl z15.b, p3/m, z15.b, z23.b
920 // vl128 state = 0x1af6d72e
921 __ dci(0x44028e8b); // srshl z11.b, p3/m, z11.b, z20.b
922 // vl128 state = 0xeca2061d
923 __ dci(0x44828f8f); // srshl z15.s, p3/m, z15.s, z28.s
924 // vl128 state = 0x61059832
925 __ dci(0x44828f87); // srshl z7.s, p3/m, z7.s, z28.s
926 // vl128 state = 0x5e4d94cc
927 __ dci(0x44828a97); // srshl z23.s, p2/m, z23.s, z20.s
928 // vl128 state = 0xf5095aa8
929 __ dci(0x44828a93); // srshl z19.s, p2/m, z19.s, z20.s
930 // vl128 state = 0x155ff234
931 __ dci(0x44868a11); // srshlr z17.s, p2/m, z17.s, z16.s
932 // vl128 state = 0xf2844c7f
933 __ dci(0x44c68a90); // srshlr z16.d, p2/m, z16.d, z20.d
934 // vl128 state = 0xcf9f9508
935 __ dci(0x44c68a80); // srshlr z0.d, p2/m, z0.d, z20.d
936 // vl128 state = 0xd476915b
937 __ dci(0x44868a02); // srshlr z2.s, p2/m, z2.s, z16.s
938 // vl128 state = 0x9acbc986
939 __ dci(0x44868a12); // srshlr z18.s, p2/m, z18.s, z16.s
940 // vl128 state = 0xaf9e1114
941 __ dci(0x4486921a); // srshlr z26.s, p4/m, z26.s, z16.s
942 // vl128 state = 0x9d188add
943 __ dci(0x4486909e); // srshlr z30.s, p4/m, z30.s, z4.s
944 // vl128 state = 0xb41018d5
945 __ dci(0x448c9096); // sqshlr z22.s, p4/m, z22.s, z4.s
946 // vl128 state = 0x4ab51dea
947 __ dci(0x448890b4); // sqshl z20.s, p4/m, z20.s, z5.s
948 // vl128 state = 0x600dcc36
949 __ dci(0x448884bc); // sqshl z28.s, p1/m, z28.s, z5.s
950 // vl128 state = 0x84f37050
951 __ dci(0x44c88434); // sqshl z20.d, p1/m, z20.d, z1.d
952 // vl128 state = 0x1f19ce5a
953 __ dci(0x44cc8536); // sqshlr z22.d, p1/m, z22.d, z9.d
954 // vl128 state = 0xa51d3f31
955 __ dci(0x448c8517); // sqshlr z23.s, p1/m, z23.s, z8.s
956 // vl128 state = 0x8d431292
957 __ dci(0x448c8133); // sqshlr z19.s, p0/m, z19.s, z9.s
958 // vl128 state = 0xdd59917f
959 __ dci(0x448c8b23); // sqshlr z3.s, p2/m, z3.s, z25.s
960 // vl128 state = 0xfcdae7d4
961 __ dci(0x448c8b21); // sqshlr z1.s, p2/m, z1.s, z25.s
962 // vl128 state = 0x0f1239a5
963 __ dci(0x448c8b29); // sqshlr z9.s, p2/m, z9.s, z25.s
964 // vl128 state = 0xf6d1f180
965 __ dci(0x448c8b2b); // sqshlr z11.s, p2/m, z11.s, z25.s
966 // vl128 state = 0xe7a1af08
967 __ dci(0x448c8b89); // sqshlr z9.s, p2/m, z9.s, z28.s
968 // vl128 state = 0xa72666cb
969 __ dci(0x448c9bcb); // sqshlr z11.s, p6/m, z11.s, z30.s
970 // vl128 state = 0x9cae5fd7
971 __ dci(0x44869bca); // srshlr z10.s, p6/m, z10.s, z30.s
972 // vl128 state = 0xda133b76
973 __ dci(0x04869b8e); // sqshl z14.d, p6/m, z14.d, #28
974 // vl128 state = 0xf8eb71c2
975 __ dci(0x44869bca); // srshlr z10.s, p6/m, z10.s, z30.s
976 // vl128 state = 0xbe561563
977 __ dci(0x44869ae2); // srshlr z2.s, p6/m, z2.s, z23.s
978 // vl128 state = 0x0c286f7e
979 __ dci(0x44869a46); // srshlr z6.s, p6/m, z6.s, z18.s
980 // vl128 state = 0x59da6464
981 __ dci(0x44869a47); // srshlr z7.s, p6/m, z7.s, z18.s
982 // vl128 state = 0x908e5664
983 __ dci(0x4486920f); // srshlr z15.s, p4/m, z15.s, z16.s
984 // vl128 state = 0x213d23db
985 __ dci(0x44869a87); // srshlr z7.s, p6/m, z7.s, z20.s
986 // vl128 state = 0xd81ea7fb
987 __ dci(0x44469a86); // srshlr z6.h, p6/m, z6.h, z20.h
988 // vl128 state = 0x27d44726
989 __ dci(0x44029a82); // srshl z2.b, p6/m, z2.b, z20.b
990 // vl128 state = 0x2187127f
991 __ dci(0x44069aa0); // srshlr z0.b, p6/m, z0.b, z21.b
992 // vl128 state = 0x68ba9323
993 __ dci(0x444692b0); // srshlr z16.h, p4/m, z16.h, z21.h
994 // vl128 state = 0x148619ff
995 __ dci(0x44468ab2); // srshlr z18.h, p2/m, z18.h, z21.h
996 // vl128 state = 0xae93eae6
997 __ dci(0x444698b6); // srshlr z22.h, p6/m, z22.h, z5.h
998 // vl128 state = 0x0b875035
999 __ dci(0x44469934); // srshlr z20.h, p6/m, z20.h, z9.h
1000 // vl128 state = 0x559132ed
1001 __ dci(0x0406993c); // sqshl z28.b, p6/m, z28.b, #1
1002 // vl128 state = 0xec1782e4
1003 __ dci(0x4406912c); // srshlr z12.b, p4/m, z12.b, z9.b
1004 // vl128 state = 0x089d32a4
1005 __ dci(0x440291ae); // srshl z14.b, p4/m, z14.b, z13.b
1006 // vl128 state = 0xde257893
1007 __ dci(0x44829126); // srshl z6.s, p4/m, z6.s, z9.s
1008 // vl128 state = 0x318d27ef
1009 __ dci(0x448a8127); // sqrshl z7.s, p0/m, z7.s, z9.s
1010 // vl128 state = 0x1bc564fc
1011 __ dci(0x448e8165); // sqrshlr z5.s, p0/m, z5.s, z11.s
1012 // vl128 state = 0xa5e5c696
1013 __ dci(0x44869161); // srshlr z1.s, p4/m, z1.s, z11.s
1014 // vl128 state = 0xd64b6830
1015 __ dci(0x44829120); // srshl z0.s, p4/m, z0.s, z9.s
1016 // vl128 state = 0x107ca84d
1017 __ dci(0x44829124); // srshl z4.s, p4/m, z4.s, z9.s
1018 // vl128 state = 0xcd5688f3
1019 __ dci(0x4482912c); // srshl z12.s, p4/m, z12.s, z9.s
1020 // vl128 state = 0x88dee210
1021 __ dci(0x44829128); // srshl z8.s, p4/m, z8.s, z9.s
1022 // vl128 state = 0xfe8611fa
1023 __ dci(0x44c69120); // srshlr z0.d, p4/m, z0.d, z9.d
1024 // vl128 state = 0xe8b8cabd
1025 __ dci(0x44ce9168); // sqrshlr z8.d, p4/m, z8.d, z11.d
1026 // vl128 state = 0x269af804
1027 __ dci(0x448e9069); // sqrshlr z9.s, p4/m, z9.s, z3.s
1028 // vl128 state = 0x7d425704
1029 __ dci(0x448e8461); // sqrshlr z1.s, p1/m, z1.s, z3.s
1030 // vl128 state = 0x1577bd67
1031 __ dci(0x448e8460); // sqrshlr z0.s, p1/m, z0.s, z3.s
1032 // vl128 state = 0x6966617f
1033 __ dci(0x448a8428); // sqrshl z8.s, p1/m, z8.s, z1.s
1034 // vl128 state = 0x6c9cc508
1035 __ dci(0x44ca8409); // sqrshl z9.d, p1/m, z9.d, z0.d
1036 // vl128 state = 0xb3ea2e65
1037 __ dci(0x44c68408); // srshlr z8.d, p1/m, z8.d, z0.d
1038 // vl128 state = 0x1aef7620
1039 __ dci(0x44c6840a); // srshlr z10.d, p1/m, z10.d, z0.d
1040 // vl128 state = 0x63f2c5a3
1041 __ dci(0x44cc840e); // sqshlr z14.d, p1/m, z14.d, z0.d
1042 // vl128 state = 0xb54a8f94
1043 __ dci(0x44cc8e1e); // sqshlr z30.d, p3/m, z30.d, z16.d
1044 // vl128 state = 0xe247e0a3
1045 __ dci(0x44c68e1a); // srshlr z26.d, p3/m, z26.d, z16.d
1046 // vl128 state = 0xfb8bf060
1047 __ dci(0x44c28a0a); // srshl z10.d, p2/m, z10.d, z16.d
1048 // vl128 state = 0x829643e3
1049 __ dci(0x44c68e0e); // srshlr z14.d, p3/m, z14.d, z16.d
1050 // vl128 state = 0x8bd62d7b
1051 __ dci(0x44c6881e); // srshlr z30.d, p2/m, z30.d, z0.d
1052 // vl128 state = 0x4d8caca2
1053 __ dci(0x44869816); // srshlr z22.s, p6/m, z22.s, z0.s
1054 // vl128 state = 0x027f41ac
1055 __ dci(0x44029817); // srshl z23.b, p6/m, z23.b, z0.b
1056 // vl128 state = 0xab9c9627
1057 __ dci(0x4402993f); // srshl z31.b, p6/m, z31.b, z9.b
1058 // vl128 state = 0x42a71056
1059 __ dci(0x4406991e); // srshlr z30.b, p6/m, z30.b, z8.b
1060 // vl128 state = 0xdcdf1396
1061 __ dci(0x44068d1f); // srshlr z31.b, p3/m, z31.b, z8.b
1062 // vl128 state = 0x84fa5cac
1063 __ dci(0x44068d1d); // srshlr z29.b, p3/m, z29.b, z8.b
1064 // vl128 state = 0x1239cdae
1065 __ dci(0x44468d2d); // srshlr z13.h, p3/m, z13.h, z9.h
1066 // vl128 state = 0xae689b2f
1067 __ dci(0x4446850f); // srshlr z15.h, p1/m, z15.h, z8.h
1068 // vl128 state = 0x6330c9c2
1069 __ dci(0x4446910e); // srshlr z14.h, p4/m, z14.h, z8.h
1070 // vl128 state = 0x326ffb9f
1071 __ dci(0x4446940f); // srshlr z15.h, p5/m, z15.h, z0.h
1072 // vl128 state = 0x3f48f466
1073 __ dci(0x44468487); // srshlr z7.h, p1/m, z7.h, z4.h
1074 // vl128 state = 0x0d3b6c65
1075 __ dci(0x444694b7); // srshlr z23.h, p5/m, z23.h, z5.h
1076 // vl128 state = 0x5ef21cd8
1077 __ dci(0x44469c93); // srshlr z19.h, p7/m, z19.h, z4.h
1078 // vl128 state = 0x413d5573
1079 __ dci(0x44069e92); // srshlr z18.b, p7/m, z18.b, z20.b
1080 // vl128 state = 0xac59d0c3
1081 __ dci(0x44469693); // srshlr z19.h, p5/m, z19.h, z20.h
1082 // vl128 state = 0xb3969968
1083 }
1084
1085 uint32_t state;
1086 ComputeMachineStateHash(&masm, &state);
1087 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
1088 __ Ldr(w0, MemOperand(x0));
1089
1090 END();
1091 if (CAN_RUN()) {
1092 RUN();
1093 uint32_t expected_hashes[] = {
1094 0xb3969968,
1095 0x8ba60941,
1096 0x53937d52,
1097 0xe6737b5d,
1098 0x8649cf1f,
1099 0xb7ee12ca,
1100 0x6fd03bd4,
1101 0x4a82eb52,
1102 0xc0d52997,
1103 0xb52a263f,
1104 0x70599fa2,
1105 0x68cd2ef1,
1106 0x57b84410,
1107 0x1072dde9,
1108 0xe39a23c8,
1109 0xeded9f88,
1110 };
1111 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
1112 }
1113 }
1114
TEST_SVE(sve2_usra)1115 TEST_SVE(sve2_usra) {
1116 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
1117 CPUFeatures::kSVE2,
1118 CPUFeatures::kNEON,
1119 CPUFeatures::kCRC32);
1120 START();
1121
1122 SetInitialMachineState(&masm);
1123 // state = 0xe2bd2480
1124
1125 {
1126 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
1127 __ dci(0x450ce41d); // usra z29.b, z0.b, #4
1128 // vl128 state = 0x57e84943
1129 __ dci(0x450ce635); // usra z21.b, z17.b, #4
1130 // vl128 state = 0xc2696a7c
1131 __ dci(0x45cce637); // usra z23.d, z17.d, #20
1132 // vl128 state = 0x97aec47c
1133 __ dci(0x458cee35); // ursra z21.d, z17.d, #52
1134 // vl128 state = 0xab24864c
1135 __ dci(0x450eee25); // ursra z5.b, z17.b, #2
1136 // vl128 state = 0x8aab49c9
1137 __ dci(0x458eef21); // ursra z1.d, z25.d, #50
1138 // vl128 state = 0x3db09e7f
1139 __ dci(0x458fef65); // ursra z5.d, z27.d, #49
1140 // vl128 state = 0xa9905ae3
1141 __ dci(0x459fef41); // ursra z1.d, z26.d, #33
1142 // vl128 state = 0x624c2e4d
1143 __ dci(0x459fe549); // usra z9.d, z10.d, #33
1144 // vl128 state = 0x5a158f70
1145 __ dci(0x459de561); // usra z1.d, z11.d, #35
1146 // vl128 state = 0xf24ffa83
1147 __ dci(0x451ce565); // usra z5.h, z11.h, #4
1148 // vl128 state = 0x0213f9c7
1149 __ dci(0x4519e564); // usra z4.h, z11.h, #7
1150 // vl128 state = 0x8903ccf3
1151 __ dci(0x4589e56c); // usra z12.d, z11.d, #55
1152 // vl128 state = 0x3c0f6e72
1153 __ dci(0x4589e56e); // usra z14.d, z11.d, #55
1154 // vl128 state = 0x5d9787fc
1155 __ dci(0x4589e56c); // usra z12.d, z11.d, #55
1156 // vl128 state = 0x3bc6fced
1157 __ dci(0x458bed64); // ursra z4.d, z11.d, #53
1158 // vl128 state = 0x966476e2
1159 __ dci(0x45dbed65); // ursra z5.d, z11.d, #5
1160 // vl128 state = 0xf85c4247
1161 __ dci(0x455bedf5); // ursra z21.s, z15.s, #5
1162 // vl128 state = 0xd342f9ae
1163 __ dci(0x450bedfd); // ursra z29.b, z15.b, #5
1164 // vl128 state = 0xc03cb476
1165 __ dci(0x4549edf9); // ursra z25.s, z15.s, #23
1166 // vl128 state = 0x5649b073
1167 __ dci(0x4549ede9); // ursra z9.s, z15.s, #23
1168 // vl128 state = 0xce5a7dbb
1169 __ dci(0x4549ed59); // ursra z25.s, z10.s, #23
1170 // vl128 state = 0x8c98ee08
1171 __ dci(0x4549ed5d); // ursra z29.s, z10.s, #23
1172 // vl128 state = 0xd991a574
1173 __ dci(0x45cded59); // ursra z25.d, z10.d, #19
1174 // vl128 state = 0xebc24746
1175 __ dci(0x45d9ed58); // ursra z24.d, z10.d, #7
1176 // vl128 state = 0x145d5970
1177 __ dci(0x45d8ec50); // ursra z16.d, z2.d, #8
1178 // vl128 state = 0x8f65850c
1179 __ dci(0x45c8ec60); // ursra z0.d, z3.d, #24
1180 // vl128 state = 0xe510a1b4
1181 __ dci(0x45c0ed61); // ursra z1.d, z11.d, #32
1182 // vl128 state = 0xfef468e1
1183 __ dci(0x45c8ec65); // ursra z5.d, z3.d, #24
1184 // vl128 state = 0xa6754589
1185 __ dci(0x45c0e464); // usra z4.d, z3.d, #32
1186 // vl128 state = 0x2b4cd23a
1187 __ dci(0x45c0e4a5); // usra z5.d, z5.d, #32
1188 // vl128 state = 0xfa58fea0
1189 __ dci(0x45c0e4a1); // usra z1.d, z5.d, #32
1190 // vl128 state = 0x015c4435
1191 __ dci(0x45c0e4b1); // usra z17.d, z5.d, #32
1192 // vl128 state = 0x67271050
1193 __ dci(0x45c2ecb3); // ursra z19.d, z5.d, #30
1194 // vl128 state = 0x1d3631c3
1195 __ dci(0x45c0ece3); // ursra z3.d, z7.d, #32
1196 // vl128 state = 0x646e0e43
1197 __ dci(0x45caece7); // ursra z7.d, z7.d, #22
1198 // vl128 state = 0x104bf393
1199 __ dci(0x458aeee3); // ursra z3.d, z23.d, #54
1200 // vl128 state = 0xbac8c54b
1201 __ dci(0x454aeee1); // ursra z1.s, z23.s, #22
1202 // vl128 state = 0x5c2a40db
1203 __ dci(0x4508eee9); // ursra z9.b, z23.b, #8
1204 // vl128 state = 0xe117d81a
1205 __ dci(0x4518ece1); // ursra z1.h, z7.h, #8
1206 // vl128 state = 0xeb43265d
1207 __ dci(0x451cede0); // ursra z0.h, z15.h, #4
1208 // vl128 state = 0xd5c8d09e
1209 __ dci(0x4598edf0); // ursra z16.d, z15.d, #40
1210 // vl128 state = 0x0c060220
1211 __ dci(0x451cede0); // ursra z0.h, z15.h, #4
1212 // vl128 state = 0x0ea52d2d
1213 __ dci(0x459cefe8); // ursra z8.d, z31.d, #36
1214 // vl128 state = 0xa6a7e977
1215 __ dci(0x459ce5f8); // usra z24.d, z15.d, #36
1216 // vl128 state = 0xb0192caf
1217 __ dci(0x458cedfa); // ursra z26.d, z15.d, #52
1218 // vl128 state = 0x154fce29
1219 __ dci(0x458cedfe); // ursra z30.d, z15.d, #52
1220 // vl128 state = 0x369cc3e1
1221 __ dci(0x450cedb6); // ursra z22.b, z13.b, #4
1222 // vl128 state = 0xf613cb4b
1223 __ dci(0x450cedb4); // ursra z20.b, z13.b, #4
1224 // vl128 state = 0xd075c8a9
1225 __ dci(0x458eeda4); // ursra z4.d, z13.d, #50
1226 // vl128 state = 0xc9366682
1227 }
1228
1229 uint32_t state;
1230 ComputeMachineStateHash(&masm, &state);
1231 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
1232 __ Ldr(w0, MemOperand(x0));
1233
1234 END();
1235 if (CAN_RUN()) {
1236 RUN();
1237 uint32_t expected_hashes[] = {
1238 0xc9366682,
1239 0xaf202cff,
1240 0x0e90a7c4,
1241 0xa8c89f40,
1242 0xc7bb56ad,
1243 0xa203dd34,
1244 0xf3b3a749,
1245 0xf16c9d5f,
1246 0x9929dea8,
1247 0xd652c693,
1248 0xe76f701b,
1249 0xe2fe20a3,
1250 0x07182afb,
1251 0x816b928f,
1252 0x52baf33f,
1253 0x9ef46875,
1254 };
1255 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
1256 }
1257 }
1258
TEST_SVE(sve2_ssra)1259 TEST_SVE(sve2_ssra) {
1260 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
1261 CPUFeatures::kSVE2,
1262 CPUFeatures::kNEON,
1263 CPUFeatures::kCRC32);
1264 START();
1265
1266 SetInitialMachineState(&masm);
1267 // state = 0xe2bd2480
1268
1269 {
1270 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
1271 __ dci(0x450ce01d); // ssra z29.b, z0.b, #4
1272 // vl128 state = 0xdf461c2b
1273 __ dci(0x450ce235); // ssra z21.b, z17.b, #4
1274 // vl128 state = 0xd28868a9
1275 __ dci(0x45cce237); // ssra z23.d, z17.d, #20
1276 // vl128 state = 0x874fc6a9
1277 __ dci(0x458cea35); // srsra z21.d, z17.d, #52
1278 // vl128 state = 0xb848785b
1279 __ dci(0x450eea25); // srsra z5.b, z17.b, #2
1280 // vl128 state = 0x8bca62e4
1281 __ dci(0x458eeb21); // srsra z1.d, z25.d, #50
1282 // vl128 state = 0x3cd1b552
1283 __ dci(0x458feb65); // srsra z5.d, z27.d, #49
1284 // vl128 state = 0xd78844fb
1285 __ dci(0x459feb41); // srsra z1.d, z26.d, #33
1286 // vl128 state = 0xa948dc2f
1287 __ dci(0x459fe149); // ssra z9.d, z10.d, #33
1288 // vl128 state = 0x709a83f1
1289 __ dci(0x459de161); // ssra z1.d, z11.d, #35
1290 // vl128 state = 0x1c21e4f6
1291 __ dci(0x451ce165); // ssra z5.h, z11.h, #4
1292 // vl128 state = 0x72288f41
1293 __ dci(0x4519e164); // ssra z4.h, z11.h, #7
1294 // vl128 state = 0x9a8c4c8c
1295 __ dci(0x4589e16c); // ssra z12.d, z11.d, #55
1296 // vl128 state = 0x872585d4
1297 __ dci(0x4589e16e); // ssra z14.d, z11.d, #55
1298 // vl128 state = 0xd237aaa0
1299 __ dci(0x4589e16c); // ssra z12.d, z11.d, #55
1300 // vl128 state = 0x1c828333
1301 __ dci(0x458be964); // srsra z4.d, z11.d, #53
1302 // vl128 state = 0xc190178f
1303 __ dci(0x45dbe965); // srsra z5.d, z11.d, #5
1304 // vl128 state = 0xe9e81bda
1305 __ dci(0x455be9f5); // srsra z21.s, z15.s, #5
1306 // vl128 state = 0x8e58c7a1
1307 __ dci(0x450be9fd); // srsra z29.b, z15.b, #5
1308 // vl128 state = 0x904b404b
1309 __ dci(0x4549e9f9); // srsra z25.s, z15.s, #23
1310 // vl128 state = 0x35a60481
1311 __ dci(0x4549e9e9); // srsra z9.s, z15.s, #23
1312 // vl128 state = 0x6911448b
1313 __ dci(0x4549e959); // srsra z25.s, z10.s, #23
1314 // vl128 state = 0xdb384324
1315 __ dci(0x4549e95d); // srsra z29.s, z10.s, #23
1316 // vl128 state = 0x16acd8ee
1317 __ dci(0x45cde959); // srsra z25.d, z10.d, #19
1318 // vl128 state = 0x56bf7bda
1319 __ dci(0x45d9e958); // srsra z24.d, z10.d, #7
1320 // vl128 state = 0x6a713fa6
1321 __ dci(0x45d8e850); // srsra z16.d, z2.d, #8
1322 // vl128 state = 0xa6394cf3
1323 __ dci(0x45c8e860); // srsra z0.d, z3.d, #24
1324 // vl128 state = 0x829c3d2a
1325 __ dci(0x45c0e961); // srsra z1.d, z11.d, #32
1326 // vl128 state = 0x006d1904
1327 __ dci(0x45c8e865); // srsra z5.d, z3.d, #24
1328 // vl128 state = 0xcc7dffaf
1329 __ dci(0x45c0e064); // ssra z4.d, z3.d, #32
1330 // vl128 state = 0xc9eaddd0
1331 __ dci(0x45c0e0a5); // ssra z5.d, z5.d, #32
1332 // vl128 state = 0x643145e1
1333 __ dci(0x45c0e0a1); // ssra z1.d, z5.d, #32
1334 // vl128 state = 0x03f4c42e
1335 __ dci(0x45c0e0b1); // ssra z17.d, z5.d, #32
1336 // vl128 state = 0x5a8cff35
1337 __ dci(0x45c2e8b3); // srsra z19.d, z5.d, #30
1338 // vl128 state = 0x3ee63e9f
1339 __ dci(0x45c0e8e3); // srsra z3.d, z7.d, #32
1340 // vl128 state = 0x687d943b
1341 __ dci(0x45cae8e7); // srsra z7.d, z7.d, #22
1342 // vl128 state = 0xf5a19cb2
1343 __ dci(0x458aeae3); // srsra z3.d, z23.d, #54
1344 // vl128 state = 0xd1371248
1345 __ dci(0x454aeae1); // srsra z1.s, z23.s, #22
1346 // vl128 state = 0xdb83ef8b
1347 __ dci(0x455ae8e9); // srsra z9.s, z7.s, #6
1348 // vl128 state = 0xc831a54c
1349 __ dci(0x455ee9e8); // srsra z8.s, z15.s, #2
1350 // vl128 state = 0x4342b823
1351 __ dci(0x45dae9f8); // srsra z24.d, z15.d, #6
1352 // vl128 state = 0x52a7151a
1353 __ dci(0x455ee9e8); // srsra z8.s, z15.s, #2
1354 // vl128 state = 0xde8110e0
1355 __ dci(0x45deebe0); // srsra z0.d, z31.d, #2
1356 // vl128 state = 0xd2b28e81
1357 __ dci(0x45dee1f0); // ssra z16.d, z15.d, #2
1358 // vl128 state = 0x56d1c366
1359 __ dci(0x45cee9f2); // srsra z18.d, z15.d, #18
1360 // vl128 state = 0x53537689
1361 __ dci(0x45cee9f6); // srsra z22.d, z15.d, #18
1362 // vl128 state = 0x5e410508
1363 __ dci(0x454ee9be); // srsra z30.s, z13.s, #18
1364 // vl128 state = 0x06245094
1365 __ dci(0x454ee9bc); // srsra z28.s, z13.s, #18
1366 // vl128 state = 0xb92b3929
1367 __ dci(0x45cce9ac); // srsra z12.d, z13.d, #20
1368 // vl128 state = 0xfe6a2830
1369 __ dci(0x45cde93c); // srsra z28.d, z9.d, #19
1370 // vl128 state = 0x737461a1
1371 }
1372
1373 uint32_t state;
1374 ComputeMachineStateHash(&masm, &state);
1375 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
1376 __ Ldr(w0, MemOperand(x0));
1377
1378 END();
1379 if (CAN_RUN()) {
1380 RUN();
1381 uint32_t expected_hashes[] = {
1382 0x737461a1,
1383 0xe1ef707c,
1384 0x9760ba4e,
1385 0x782dd4cd,
1386 0xe793d0c2,
1387 0x991e0de7,
1388 0x34627e21,
1389 0x76c89433,
1390 0x96c9f4ce,
1391 0x38ec4b6f,
1392 0x7aee3ec7,
1393 0x665f9b94,
1394 0x8e166fc3,
1395 0xb4461fac,
1396 0x215de9dc,
1397 0xc23ef1f9,
1398 };
1399 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
1400 }
1401 }
1402
TEST_SVE(sve2_sat_arith)1403 TEST_SVE(sve2_sat_arith) {
1404 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
1405 CPUFeatures::kSVE2,
1406 CPUFeatures::kNEON,
1407 CPUFeatures::kCRC32);
1408 START();
1409
1410 SetInitialMachineState(&masm);
1411 // state = 0xe2bd2480
1412
1413 {
1414 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
1415 __ dci(0x44df9df9); // uqsubr z25.d, p7/m, z25.d, z15.d
1416 // vl128 state = 0x7670ac87
1417 __ dci(0x445f9db1); // uqsubr z17.h, p7/m, z17.h, z13.h
1418 // vl128 state = 0x3c5b39fe
1419 __ dci(0x441f99a1); // uqsubr z1.b, p6/m, z1.b, z13.b
1420 // vl128 state = 0x5df43635
1421 __ dci(0x441d9ba0); // usqadd z0.b, p6/m, z0.b, z29.b
1422 // vl128 state = 0x737bc7a5
1423 __ dci(0x441d9ba8); // usqadd z8.b, p6/m, z8.b, z29.b
1424 // vl128 state = 0xba69890b
1425 __ dci(0x441d9bb8); // usqadd z24.b, p6/m, z24.b, z29.b
1426 // vl128 state = 0x3f81c19d
1427 __ dci(0x441d8b30); // usqadd z16.b, p2/m, z16.b, z25.b
1428 // vl128 state = 0x076c5fc1
1429 __ dci(0x441d8a14); // usqadd z20.b, p2/m, z20.b, z16.b
1430 // vl128 state = 0x67df29dd
1431 __ dci(0x449d8215); // usqadd z21.s, p0/m, z21.s, z16.s
1432 // vl128 state = 0x663b236f
1433 __ dci(0x449d8205); // usqadd z5.s, p0/m, z5.s, z16.s
1434 // vl128 state = 0xe58d41d0
1435 __ dci(0x449d8201); // usqadd z1.s, p0/m, z1.s, z16.s
1436 // vl128 state = 0x82f89d40
1437 __ dci(0x449c8a09); // suqadd z9.s, p2/m, z9.s, z16.s
1438 // vl128 state = 0xa0218390
1439 __ dci(0x44dd8a0d); // usqadd z13.d, p2/m, z13.d, z16.d
1440 // vl128 state = 0xfab22f04
1441 __ dci(0x44d98a2c); // uqadd z12.d, p2/m, z12.d, z17.d
1442 // vl128 state = 0x70911fc9
1443 __ dci(0x44598a0d); // uqadd z13.h, p2/m, z13.h, z16.h
1444 // vl128 state = 0xcc12ec49
1445 __ dci(0x44d99a05); // uqadd z5.d, p6/m, z5.d, z16.d
1446 // vl128 state = 0x31fef46f
1447 __ dci(0x44d99004); // uqadd z4.d, p4/m, z4.d, z0.d
1448 // vl128 state = 0xf81448db
1449 __ dci(0x44d98020); // uqadd z0.d, p0/m, z0.d, z1.d
1450 // vl128 state = 0xe6fe9d31
1451 __ dci(0x44d980e1); // uqadd z1.d, p0/m, z1.d, z7.d
1452 // vl128 state = 0x76fecfc2
1453 __ dci(0x44d981c0); // uqadd z0.d, p0/m, z0.d, z14.d
1454 // vl128 state = 0x4066a558
1455 __ dci(0x44d98161); // uqadd z1.d, p0/m, z1.d, z11.d
1456 // vl128 state = 0x0d3a1487
1457 __ dci(0x44d98031); // uqadd z17.d, p0/m, z17.d, z1.d
1458 // vl128 state = 0x061b4aed
1459 __ dci(0x44d98039); // uqadd z25.d, p0/m, z25.d, z1.d
1460 // vl128 state = 0x02172a17
1461 __ dci(0x44d98029); // uqadd z9.d, p0/m, z9.d, z1.d
1462 // vl128 state = 0xebe138b3
1463 __ dci(0x44d8800d); // sqadd z13.d, p0/m, z13.d, z0.d
1464 // vl128 state = 0x73f0114b
1465 __ dci(0x44d8828f); // sqadd z15.d, p0/m, z15.d, z20.d
1466 // vl128 state = 0x7a8689e0
1467 __ dci(0x44d8829f); // sqadd z31.d, p0/m, z31.d, z20.d
1468 // vl128 state = 0x0800ae49
1469 __ dci(0x44d88e8f); // sqadd z15.d, p3/m, z15.d, z20.d
1470 // vl128 state = 0x9b733fff
1471 __ dci(0x44d88e8b); // sqadd z11.d, p3/m, z11.d, z20.d
1472 // vl128 state = 0x6d01eb90
1473 __ dci(0x44d88e8f); // sqadd z15.d, p3/m, z15.d, z20.d
1474 // vl128 state = 0x337692b3
1475 __ dci(0x44d8968e); // sqadd z14.d, p5/m, z14.d, z20.d
1476 // vl128 state = 0xcd4478b6
1477 __ dci(0x44d886ca); // sqadd z10.d, p1/m, z10.d, z22.d
1478 // vl128 state = 0x335fd099
1479 __ dci(0x44dc87ce); // suqadd z14.d, p1/m, z14.d, z30.d
1480 // vl128 state = 0x0d3b6403
1481 __ dci(0x44de8fcf); // sqsubr z15.d, p3/m, z15.d, z30.d
1482 // vl128 state = 0x41a1073f
1483 __ dci(0x449e9fcd); // sqsubr z13.s, p7/m, z13.s, z30.s
1484 // vl128 state = 0x5a4b1c22
1485 __ dci(0x445e9fcf); // sqsubr z15.h, p7/m, z15.h, z30.h
1486 // vl128 state = 0x5a08ccf1
1487 __ dci(0x441e9ece); // sqsubr z14.b, p7/m, z14.b, z22.b
1488 // vl128 state = 0x3f3c700c
1489 __ dci(0x441e8cde); // sqsubr z30.b, p3/m, z30.b, z6.b
1490 // vl128 state = 0x3b32b296
1491 __ dci(0x441e88fa); // sqsubr z26.b, p2/m, z26.b, z7.b
1492 // vl128 state = 0x7a6472e3
1493 __ dci(0x441f98f8); // uqsubr z24.b, p6/m, z24.b, z7.b
1494 // vl128 state = 0x1d72f5ea
1495 __ dci(0x441f98fc); // uqsubr z28.b, p6/m, z28.b, z7.b
1496 // vl128 state = 0x0245804b
1497 __ dci(0x441b9afe); // uqsub z30.b, p6/m, z30.b, z23.b
1498 // vl128 state = 0x8c7ac3d7
1499 __ dci(0x441b9afc); // uqsub z28.b, p6/m, z28.b, z23.b
1500 // vl128 state = 0xa96d65cb
1501 __ dci(0x449b9a74); // uqsub z20.s, p6/m, z20.s, z19.s
1502 // vl128 state = 0x261eb58f
1503 __ dci(0x449a9b75); // sqsub z21.s, p6/m, z21.s, z27.s
1504 // vl128 state = 0x3464e3e5
1505 __ dci(0x449a9b7d); // sqsub z29.s, p6/m, z29.s, z27.s
1506 // vl128 state = 0xfe3ab427
1507 __ dci(0x445a9b79); // sqsub z25.h, p6/m, z25.h, z27.h
1508 // vl128 state = 0x609eef3a
1509 __ dci(0x445a9b7d); // sqsub z29.h, p6/m, z29.h, z27.h
1510 // vl128 state = 0x0e6d6940
1511 __ dci(0x445e9b5f); // sqsubr z31.h, p6/m, z31.h, z26.h
1512 // vl128 state = 0x60a375e7
1513 __ dci(0x441e8b5b); // sqsubr z27.b, p2/m, z27.b, z26.b
1514 // vl128 state = 0xea9bd16f
1515 }
1516
1517 uint32_t state;
1518 ComputeMachineStateHash(&masm, &state);
1519 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
1520 __ Ldr(w0, MemOperand(x0));
1521
1522 END();
1523 if (CAN_RUN()) {
1524 RUN();
1525 uint32_t expected_hashes[] = {
1526 0xea9bd16f,
1527 0x1296119e,
1528 0x00aaf6dc,
1529 0xb6ce0579,
1530 0xdb3d0829,
1531 0x119f52d0,
1532 0xf697dcd8,
1533 0x2c46a66c,
1534 0x7d838497,
1535 0x6cd68fb3,
1536 0xf98a5c79,
1537 0x51685054,
1538 0xa9494104,
1539 0x8d012936,
1540 0x32726258,
1541 0x091f1956,
1542 };
1543 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
1544 }
1545 }
1546
TEST_SVE(sve2_pair_arith)1547 TEST_SVE(sve2_pair_arith) {
1548 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
1549 CPUFeatures::kSVE2,
1550 CPUFeatures::kNEON,
1551 CPUFeatures::kCRC32);
1552 START();
1553
1554 SetInitialMachineState(&masm);
1555 // state = 0xe2bd2480
1556
1557 {
1558 ExactAssemblyScope scope(&masm, 64 * kInstructionSize);
1559 __ dci(0x4414b214); // smaxp z20.b, p4/m, z20.b, z16.b
1560 // vl128 state = 0x90adc6c9
1561 __ dci(0x4414ba5c); // smaxp z28.b, p6/m, z28.b, z18.b
1562 // vl128 state = 0x0e41b2b9
1563 __ dci(0x4454ba0c); // smaxp z12.h, p6/m, z12.h, z16.h
1564 // vl128 state = 0x472160b8
1565 __ dci(0x4454ba64); // smaxp z4.h, p6/m, z4.h, z19.h
1566 // vl128 state = 0x4f485ba3
1567 __ dci(0x44d4bb65); // smaxp z5.d, p6/m, z5.d, z27.d
1568 // vl128 state = 0x432f5185
1569 __ dci(0x4456bb64); // sminp z4.h, p6/m, z4.h, z27.h
1570 // vl128 state = 0x01bd324a
1571 __ dci(0x4455bb74); // umaxp z20.h, p6/m, z20.h, z27.h
1572 // vl128 state = 0xaf795389
1573 __ dci(0x4451bb35); // addp z21.h, p6/m, z21.h, z25.h
1574 // vl128 state = 0x5f4be111
1575 __ dci(0x4451ab71); // addp z17.h, p2/m, z17.h, z27.h
1576 // vl128 state = 0xc16a8d03
1577 __ dci(0x4451ba75); // addp z21.h, p6/m, z21.h, z19.h
1578 // vl128 state = 0x8cd36853
1579 __ dci(0x4451b225); // addp z5.h, p4/m, z5.h, z17.h
1580 // vl128 state = 0xea3d5389
1581 __ dci(0x4455b627); // umaxp z7.h, p5/m, z7.h, z17.h
1582 // vl128 state = 0xbb42a8e1
1583 __ dci(0x4415b426); // umaxp z6.b, p5/m, z6.b, z1.b
1584 // vl128 state = 0x485ca761
1585 __ dci(0x4415b224); // umaxp z4.b, p4/m, z4.b, z17.b
1586 // vl128 state = 0x6bcfd641
1587 __ dci(0x4455b02c); // umaxp z12.h, p4/m, z12.h, z1.h
1588 // vl128 state = 0x84485a9f
1589 __ dci(0x4455a12d); // umaxp z13.h, p0/m, z13.h, z9.h
1590 // vl128 state = 0xed43519f
1591 __ dci(0x4455b33d); // umaxp z29.h, p4/m, z29.h, z25.h
1592 // vl128 state = 0xcc0b7c40
1593 __ dci(0x4455b7b9); // umaxp z25.h, p5/m, z25.h, z29.h
1594 // vl128 state = 0xe1c14517
1595 __ dci(0x4454b6b8); // smaxp z24.h, p5/m, z24.h, z21.h
1596 // vl128 state = 0x4c5e9f3c
1597 __ dci(0x44d4b4bc); // smaxp z28.d, p5/m, z28.d, z5.d
1598 // vl128 state = 0x7530a2f7
1599 __ dci(0x44d4b4bd); // smaxp z29.d, p5/m, z29.d, z5.d
1600 // vl128 state = 0x37e61b68
1601 __ dci(0x44d4b5ed); // smaxp z13.d, p5/m, z13.d, z15.d
1602 // vl128 state = 0xb592b6e9
1603 __ dci(0x4455b5fd); // umaxp z29.h, p5/m, z29.h, z15.h
1604 // vl128 state = 0xe7f9e492
1605 __ dci(0x4415b57f); // umaxp z31.b, p5/m, z31.b, z11.b
1606 // vl128 state = 0xe4e7b644
1607 __ dci(0x4411b5fe); // addp z30.b, p5/m, z30.b, z15.b
1608 // vl128 state = 0x4bfe144d
1609 __ dci(0x4411a576); // addp z22.b, p1/m, z22.b, z11.b
1610 // vl128 state = 0xb1813df8
1611 __ dci(0x4455a566); // umaxp z6.h, p1/m, z6.h, z11.h
1612 // vl128 state = 0x4aa8b50e
1613 __ dci(0x4455adf6); // umaxp z22.h, p3/m, z22.h, z15.h
1614 // vl128 state = 0xfc13568a
1615 __ dci(0x4454acfe); // smaxp z30.h, p3/m, z30.h, z7.h
1616 // vl128 state = 0x3aac7365
1617 __ dci(0x4454acff); // smaxp z31.h, p3/m, z31.h, z7.h
1618 // vl128 state = 0x610991cf
1619 __ dci(0x44d4a8fb); // smaxp z27.d, p2/m, z27.d, z7.d
1620 // vl128 state = 0x36581f26
1621 __ dci(0x4456a8f3); // sminp z19.h, p2/m, z19.h, z7.h
1622 // vl128 state = 0x249bb813
1623 __ dci(0x4457a8b1); // uminp z17.h, p2/m, z17.h, z5.h
1624 // vl128 state = 0xd48d6d88
1625 __ dci(0x4457a8b5); // uminp z21.h, p2/m, z21.h, z5.h
1626 // vl128 state = 0x1628fb6e
1627 __ dci(0x4456a8f7); // sminp z23.h, p2/m, z23.h, z7.h
1628 // vl128 state = 0x0bd3c76b
1629 __ dci(0x4456a89f); // sminp z31.h, p2/m, z31.h, z4.h
1630 // vl128 state = 0xf09d21e4
1631 __ dci(0x4456aa0f); // sminp z15.h, p2/m, z15.h, z16.h
1632 // vl128 state = 0xd2a92168
1633 __ dci(0x4456b807); // sminp z7.h, p6/m, z7.h, z0.h
1634 // vl128 state = 0x009d0ac8
1635 __ dci(0x4456bc26); // sminp z6.h, p7/m, z6.h, z1.h
1636 // vl128 state = 0x716ddc73
1637 __ dci(0x4456beae); // sminp z14.h, p7/m, z14.h, z21.h
1638 // vl128 state = 0x35a4d900
1639 __ dci(0x4416b6ac); // sminp z12.b, p5/m, z12.b, z21.b
1640 // vl128 state = 0x7929e077
1641 __ dci(0x4416b6bc); // sminp z28.b, p5/m, z28.b, z21.b
1642 // vl128 state = 0x259195ca
1643 __ dci(0x4417b694); // uminp z20.b, p5/m, z20.b, z20.b
1644 // vl128 state = 0x5cc3927b
1645 __ dci(0x4417b684); // uminp z4.b, p5/m, z4.b, z20.b
1646 // vl128 state = 0x2e7c4b88
1647 __ dci(0x4415b6a0); // umaxp z0.b, p5/m, z0.b, z21.b
1648 // vl128 state = 0x1478d524
1649 __ dci(0x4415a690); // umaxp z16.b, p1/m, z16.b, z20.b
1650 // vl128 state = 0xc3ac4a89
1651 __ dci(0x4415b614); // umaxp z20.b, p5/m, z20.b, z16.b
1652 // vl128 state = 0xb94a5aeb
1653 __ dci(0x4415b675); // umaxp z21.b, p5/m, z21.b, z19.b
1654 // vl128 state = 0xabeed92b
1655 __ dci(0x4415a63d); // umaxp z29.b, p1/m, z29.b, z17.b
1656 // vl128 state = 0xe36835ea
1657 __ dci(0x4415a63c); // umaxp z28.b, p1/m, z28.b, z17.b
1658 // vl128 state = 0x087002bb
1659 __ dci(0x4455a61d); // umaxp z29.h, p1/m, z29.h, z16.h
1660 // vl128 state = 0x17388ea4
1661 __ dci(0x4451ae1f); // addp z31.h, p3/m, z31.h, z16.h
1662 // vl128 state = 0x86ee7dbe
1663 __ dci(0x4451ae1b); // addp z27.h, p3/m, z27.h, z16.h
1664 // vl128 state = 0x9846169e
1665 __ dci(0x4451bc0b); // addp z11.h, p7/m, z11.h, z0.h
1666 // vl128 state = 0x5dc31eb0
1667 __ dci(0x4455bc4f); // umaxp z15.h, p7/m, z15.h, z2.h
1668 // vl128 state = 0x9ec9086c
1669 __ dci(0x4455bf47); // umaxp z7.h, p7/m, z7.h, z26.h
1670 // vl128 state = 0xf3a2766b
1671 __ dci(0x44d5b743); // umaxp z3.d, p5/m, z3.d, z26.d
1672 // vl128 state = 0x1ce44f7e
1673 __ dci(0x44d5b7e2); // umaxp z2.d, p5/m, z2.d, z31.d
1674 // vl128 state = 0xf121f7c0
1675 __ dci(0x44d5b7e0); // umaxp z0.d, p5/m, z0.d, z31.d
1676 // vl128 state = 0x4ac0d4f3
1677 __ dci(0x44d5b670); // umaxp z16.d, p5/m, z16.d, z19.d
1678 // vl128 state = 0xdb0d62f5
1679 __ dci(0x44d1b272); // addp z18.d, p4/m, z18.d, z19.d
1680 // vl128 state = 0x34b0c018
1681 __ dci(0x44d1be76); // addp z22.d, p7/m, z22.d, z19.d
1682 // vl128 state = 0x1673f380
1683 __ dci(0x44d1b772); // addp z18.d, p5/m, z18.d, z27.d
1684 // vl128 state = 0xe3e67205
1685 __ dci(0x44d1b162); // addp z2.d, p4/m, z2.d, z11.d
1686 // vl128 state = 0x42907adc
1687 }
1688
1689 uint32_t state;
1690 ComputeMachineStateHash(&masm, &state);
1691 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
1692 __ Ldr(w0, MemOperand(x0));
1693
1694 END();
1695 if (CAN_RUN()) {
1696 RUN();
1697 uint32_t expected_hashes[] = {
1698 0x42907adc,
1699 0xee2f21f5,
1700 0xcbfa0af4,
1701 0x42e7c862,
1702 0x10ef537f,
1703 0x83461e96,
1704 0x2dca0c37,
1705 0xf2080504,
1706 0xf615d956,
1707 0x1732775a,
1708 0x491fec07,
1709 0xf9e33ada,
1710 0x324435d7,
1711 0x08a9c2ca,
1712 0x87ce3994,
1713 0x338adb5d,
1714 };
1715 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
1716 }
1717 }
1718
TEST_SVE(sve2_extract_narrow)1719 TEST_SVE(sve2_extract_narrow) {
1720 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
1721 CPUFeatures::kSVE2,
1722 CPUFeatures::kNEON,
1723 CPUFeatures::kCRC32);
1724 START();
1725
1726 SetInitialMachineState(&masm);
1727 // state = 0xe2bd2480
1728
1729 {
1730 ExactAssemblyScope scope(&masm, 64 * kInstructionSize);
1731 __ dci(0x45284000); // sqxtnb z0.b, z0.h
1732 // vl128 state = 0x874f147b
1733 __ dci(0x45284228); // sqxtnb z8.b, z17.h
1734 // vl128 state = 0xf694d31e
1735 __ dci(0x45284820); // uqxtnb z0.b, z1.h
1736 // vl128 state = 0x5d25df42
1737 __ dci(0x45304821); // uqxtnb z1.h, z1.s
1738 // vl128 state = 0x87eb933f
1739 __ dci(0x45304823); // uqxtnb z3.h, z1.s
1740 // vl128 state = 0x137eddc9
1741 __ dci(0x45604822); // uqxtnb z2.s, z1.d
1742 // vl128 state = 0x26e237a3
1743 __ dci(0x45604d26); // uqxtnt z6.s, z9.d
1744 // vl128 state = 0x72bcf361
1745 __ dci(0x45304d2e); // uqxtnt z14.h, z9.s
1746 // vl128 state = 0x5bcdd232
1747 __ dci(0x45304d3e); // uqxtnt z30.h, z9.s
1748 // vl128 state = 0x9a695f7e
1749 __ dci(0x453049bc); // uqxtnb z28.h, z13.s
1750 // vl128 state = 0x9c2fa230
1751 __ dci(0x453049b8); // uqxtnb z24.h, z13.s
1752 // vl128 state = 0xb590179f
1753 __ dci(0x45304979); // uqxtnb z25.h, z11.s
1754 // vl128 state = 0xc8987735
1755 __ dci(0x4530497d); // uqxtnb z29.h, z11.s
1756 // vl128 state = 0x380f8730
1757 __ dci(0x4530496d); // uqxtnb z13.h, z11.s
1758 // vl128 state = 0x45bf22d4
1759 __ dci(0x45304565); // sqxtnt z5.h, z11.s
1760 // vl128 state = 0xd9237f41
1761 __ dci(0x45304f75); // uqxtnt z21.h, z27.s
1762 // vl128 state = 0x0726a49b
1763 __ dci(0x45304f71); // uqxtnt z17.h, z27.s
1764 // vl128 state = 0xcbc547e0
1765 __ dci(0x45304f73); // uqxtnt z19.h, z27.s
1766 // vl128 state = 0x0b16d843
1767 __ dci(0x45284f72); // uqxtnt z18.b, z27.h
1768 // vl128 state = 0xea84ff1f
1769 __ dci(0x45284f7a); // uqxtnt z26.b, z27.h
1770 // vl128 state = 0x4bdb094d
1771 __ dci(0x45284fca); // uqxtnt z10.b, z30.h
1772 // vl128 state = 0x5986f190
1773 __ dci(0x45284b8b); // uqxtnb z11.b, z28.h
1774 // vl128 state = 0xb40f0b26
1775 __ dci(0x45284bef); // uqxtnb z15.b, z31.h
1776 // vl128 state = 0x7abef2b5
1777 __ dci(0x45284fae); // uqxtnt z14.b, z29.h
1778 // vl128 state = 0x79503b36
1779 __ dci(0x45284fac); // uqxtnt z12.b, z29.h
1780 // vl128 state = 0x481a6879
1781 __ dci(0x45284eed); // uqxtnt z13.b, z23.h
1782 // vl128 state = 0x32da844c
1783 __ dci(0x45284ee9); // uqxtnt z9.b, z23.h
1784 // vl128 state = 0xb8438ca7
1785 __ dci(0x45284ef9); // uqxtnt z25.b, z23.h
1786 // vl128 state = 0x4aa26674
1787 __ dci(0x45284cd1); // uqxtnt z17.b, z6.h
1788 // vl128 state = 0xc5411d78
1789 __ dci(0x45284cd5); // uqxtnt z21.b, z6.h
1790 // vl128 state = 0xee446689
1791 __ dci(0x45284ad4); // uqxtnb z20.b, z22.h
1792 // vl128 state = 0x66ef53ef
1793 __ dci(0x45604adc); // uqxtnb z28.s, z22.d
1794 // vl128 state = 0xa894f4d4
1795 __ dci(0x45604ade); // uqxtnb z30.s, z22.d
1796 // vl128 state = 0x50215eb8
1797 __ dci(0x456040dc); // sqxtnb z28.s, z6.d
1798 // vl128 state = 0x5ee8464d
1799 __ dci(0x456048f4); // uqxtnb z20.s, z7.d
1800 // vl128 state = 0xee2ca07b
1801 __ dci(0x45604c75); // uqxtnt z21.s, z3.d
1802 // vl128 state = 0x0e81e7e0
1803 __ dci(0x45604cb1); // uqxtnt z17.s, z5.d
1804 // vl128 state = 0x5c448cac
1805 __ dci(0x45604e33); // uqxtnt z19.s, z17.d
1806 // vl128 state = 0xcd0d561e
1807 __ dci(0x45604e23); // uqxtnt z3.s, z17.d
1808 // vl128 state = 0x7b8b2204
1809 __ dci(0x45604cab); // uqxtnt z11.s, z5.d
1810 // vl128 state = 0x418cec7f
1811 __ dci(0x45604caa); // uqxtnt z10.s, z5.d
1812 // vl128 state = 0x37064bb6
1813 __ dci(0x45604efa); // uqxtnt z26.s, z23.d
1814 // vl128 state = 0xc83ef05d
1815 __ dci(0x456046db); // sqxtnt z27.s, z22.d
1816 // vl128 state = 0xe30a1f0f
1817 __ dci(0x456046da); // sqxtnt z26.s, z22.d
1818 // vl128 state = 0xe10b92fa
1819 __ dci(0x4560424a); // sqxtnb z10.s, z18.d
1820 // vl128 state = 0x2396410c
1821 __ dci(0x45604a08); // uqxtnb z8.s, z16.d
1822 // vl128 state = 0xf4ae5ad5
1823 __ dci(0x45304a00); // uqxtnb z0.h, z16.s
1824 // vl128 state = 0x26bbb3d1
1825 __ dci(0x45304828); // uqxtnb z8.h, z1.s
1826 // vl128 state = 0x57d91166
1827 __ dci(0x4530422c); // sqxtnb z12.h, z17.s
1828 // vl128 state = 0x5548e0b4
1829 __ dci(0x45305324); // sqxtunb z4.h, z25.s
1830 // vl128 state = 0xf7eb8d9c
1831 __ dci(0x45305325); // sqxtunb z5.h, z25.s
1832 // vl128 state = 0xcf294303
1833 __ dci(0x45305321); // sqxtunb z1.h, z25.s
1834 // vl128 state = 0x6c7597d6
1835 __ dci(0x453057a9); // sqxtunt z9.h, z29.s
1836 // vl128 state = 0xe7be4fd5
1837 __ dci(0x453043b9); // sqxtnb z25.h, z29.s
1838 // vl128 state = 0x376f3f76
1839 __ dci(0x453043bb); // sqxtnb z27.h, z29.s
1840 // vl128 state = 0xf8389159
1841 __ dci(0x4530431a); // sqxtnb z26.h, z24.s
1842 // vl128 state = 0x8ca15413
1843 __ dci(0x45304312); // sqxtnb z18.h, z24.s
1844 // vl128 state = 0x2a6d8b90
1845 __ dci(0x4530491a); // uqxtnb z26.h, z8.s
1846 // vl128 state = 0x7119ff0d
1847 __ dci(0x4530413b); // sqxtnb z27.h, z9.s
1848 // vl128 state = 0x884748db
1849 __ dci(0x4530482b); // uqxtnb z11.h, z1.s
1850 // vl128 state = 0x43296aec
1851 __ dci(0x4530483b); // uqxtnb z27.h, z1.s
1852 // vl128 state = 0xdb9908f0
1853 __ dci(0x45304979); // uqxtnb z25.h, z11.s
1854 // vl128 state = 0xef30bfc8
1855 __ dci(0x453049d1); // uqxtnb z17.h, z14.s
1856 // vl128 state = 0xb46173d8
1857 __ dci(0x456049d3); // uqxtnb z19.s, z14.d
1858 // vl128 state = 0xcb8c3b83
1859 }
1860
1861 uint32_t state;
1862 ComputeMachineStateHash(&masm, &state);
1863 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
1864 __ Ldr(w0, MemOperand(x0));
1865
1866 END();
1867 if (CAN_RUN()) {
1868 RUN();
1869 uint32_t expected_hashes[] = {
1870 0xcb8c3b83,
1871 0x92fb7f98,
1872 0xb7ec6385,
1873 0x81de8602,
1874 0xd970d431,
1875 0x2fe61431,
1876 0x359b1355,
1877 0xdeec900e,
1878 0xfd0c7d7d,
1879 0x62e89b19,
1880 0x43039424,
1881 0xdd42efc9,
1882 0x861010f1,
1883 0x82d68f37,
1884 0x3761a1d0,
1885 0xbcf3c5c9,
1886 };
1887 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
1888 }
1889 }
1890
TEST_SVE(sve2_eorbt_eortb)1891 TEST_SVE(sve2_eorbt_eortb) {
1892 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
1893 CPUFeatures::kSVE2,
1894 CPUFeatures::kNEON,
1895 CPUFeatures::kCRC32);
1896 START();
1897
1898 SetInitialMachineState(&masm);
1899 // state = 0xe2bd2480
1900
1901 {
1902 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
1903 __ dci(0x451892b8); // eorbt z24.b, z21.b, z24.b
1904 // vl128 state = 0xc3f2b082
1905 __ dci(0x455893ba); // eorbt z26.h, z29.h, z24.h
1906 // vl128 state = 0xc7421198
1907 __ dci(0x455892f8); // eorbt z24.h, z23.h, z24.h
1908 // vl128 state = 0x4e155b96
1909 __ dci(0x455092bc); // eorbt z28.h, z21.h, z16.h
1910 // vl128 state = 0x09393ad0
1911 __ dci(0x455893be); // eorbt z30.h, z29.h, z24.h
1912 // vl128 state = 0x6d660844
1913 __ dci(0x4558922e); // eorbt z14.h, z17.h, z24.h
1914 // vl128 state = 0x84f1ff20
1915 __ dci(0x45d892aa); // eorbt z10.d, z21.d, z24.d
1916 // vl128 state = 0x568612d4
1917 __ dci(0x454892a8); // eorbt z8.h, z21.h, z8.h
1918 // vl128 state = 0x699a3e24
1919 __ dci(0x45c890ac); // eorbt z12.d, z5.d, z8.d
1920 // vl128 state = 0x17bb6d9b
1921 __ dci(0x45c990ed); // eorbt z13.d, z7.d, z9.d
1922 // vl128 state = 0xee5be73f
1923 __ dci(0x45c892fd); // eorbt z29.d, z23.d, z8.d
1924 // vl128 state = 0x141c47ed
1925 __ dci(0x45c892f9); // eorbt z25.d, z23.d, z8.d
1926 // vl128 state = 0xc3259593
1927 __ dci(0x45c892f8); // eorbt z24.d, z23.d, z8.d
1928 // vl128 state = 0x3bca0bcc
1929 __ dci(0x45c892e8); // eorbt z8.d, z23.d, z8.d
1930 // vl128 state = 0x4714ab64
1931 __ dci(0x454a92ea); // eorbt z10.h, z23.h, z10.h
1932 // vl128 state = 0x51360c73
1933 __ dci(0x454092e2); // eorbt z2.h, z23.h, z0.h
1934 // vl128 state = 0xe33859fe
1935 __ dci(0x454092f2); // eorbt z18.h, z23.h, z0.h
1936 // vl128 state = 0xa0d81168
1937 __ dci(0x4550927a); // eorbt z26.h, z19.h, z16.h
1938 // vl128 state = 0xe4983274
1939 __ dci(0x4551923b); // eorbt z27.h, z17.h, z17.h
1940 // vl128 state = 0x8e89eab7
1941 __ dci(0x45d3923f); // eorbt z31.d, z17.d, z19.d
1942 // vl128 state = 0x472bd288
1943 __ dci(0x4553921d); // eorbt z29.h, z16.h, z19.h
1944 // vl128 state = 0x61090ed4
1945 __ dci(0x4553932d); // eorbt z13.h, z25.h, z19.h
1946 // vl128 state = 0x3ef228eb
1947 __ dci(0x4513912c); // eorbt z12.b, z9.b, z19.b
1948 // vl128 state = 0x96d4505c
1949 __ dci(0x4551912d); // eorbt z13.h, z9.h, z17.h
1950 // vl128 state = 0x1c32baef
1951 __ dci(0x45119029); // eorbt z9.b, z1.b, z17.b
1952 // vl128 state = 0xa138f554
1953 __ dci(0x45149028); // eorbt z8.b, z1.b, z20.b
1954 // vl128 state = 0xf0681d9a
1955 __ dci(0x459490aa); // eorbt z10.s, z5.s, z20.s
1956 // vl128 state = 0xbd4b30f5
1957 __ dci(0x458590a8); // eorbt z8.s, z5.s, z5.s
1958 // vl128 state = 0x45c5b437
1959 __ dci(0x4585948c); // eortb z12.s, z4.s, z5.s
1960 // vl128 state = 0x22f90a7b
1961 __ dci(0x45cd949c); // eortb z28.d, z4.d, z13.d
1962 // vl128 state = 0x5e4584ca
1963 __ dci(0x4589949d); // eortb z29.s, z4.s, z9.s
1964 // vl128 state = 0x65ac913e
1965 __ dci(0x458990ad); // eorbt z13.s, z5.s, z9.s
1966 // vl128 state = 0x4f13d973
1967 __ dci(0x459b90ac); // eorbt z12.s, z5.s, z27.s
1968 // vl128 state = 0xd13bb801
1969 __ dci(0x45db90ee); // eorbt z14.d, z7.d, z27.d
1970 // vl128 state = 0xf24115d0
1971 __ dci(0x45db916f); // eorbt z15.d, z11.d, z27.d
1972 // vl128 state = 0x04f38375
1973 __ dci(0x45db95e7); // eortb z7.d, z15.d, z27.d
1974 // vl128 state = 0xe1046ae5
1975 __ dci(0x45db94a3); // eortb z3.d, z5.d, z27.d
1976 // vl128 state = 0xaaeae67e
1977 __ dci(0x45dd94a1); // eortb z1.d, z5.d, z29.d
1978 // vl128 state = 0xd67f6823
1979 __ dci(0x45dd94b1); // eortb z17.d, z5.d, z29.d
1980 // vl128 state = 0xf172245b
1981 __ dci(0x45dd90f3); // eorbt z19.d, z7.d, z29.d
1982 // vl128 state = 0xc99195b8
1983 __ dci(0x458d90e3); // eorbt z3.s, z7.s, z13.s
1984 // vl128 state = 0xe1a146cf
1985 __ dci(0x458994e2); // eortb z2.s, z7.s, z9.s
1986 // vl128 state = 0x8038f273
1987 __ dci(0x458b94a3); // eortb z3.s, z5.s, z11.s
1988 // vl128 state = 0x50bda372
1989 __ dci(0x459b9481); // eortb z1.s, z4.s, z27.s
1990 // vl128 state = 0xe8d53012
1991 __ dci(0x455b9485); // eortb z5.h, z4.h, z27.h
1992 // vl128 state = 0xdba33ea5
1993 __ dci(0x454b9087); // eorbt z7.h, z4.h, z11.h
1994 // vl128 state = 0xff7f1815
1995 __ dci(0x45499003); // eorbt z3.h, z0.h, z9.h
1996 // vl128 state = 0x5d6e0104
1997 __ dci(0x454d9022); // eorbt z2.h, z1.h, z13.h
1998 // vl128 state = 0xe9161cfe
1999 __ dci(0x45099026); // eorbt z6.b, z1.b, z9.b
2000 // vl128 state = 0x48126fb9
2001 __ dci(0x454b9024); // eorbt z4.h, z1.h, z11.h
2002 // vl128 state = 0x53cbfc46
2003 }
2004
2005 uint32_t state;
2006 ComputeMachineStateHash(&masm, &state);
2007 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
2008 __ Ldr(w0, MemOperand(x0));
2009
2010 END();
2011 if (CAN_RUN()) {
2012 RUN();
2013 uint32_t expected_hashes[] = {
2014 0x53cbfc46,
2015 0x0f81a01e,
2016 0xf97c4e96,
2017 0x745e9ed6,
2018 0x4487a0a1,
2019 0x7ad79509,
2020 0x53577280,
2021 0x1e589717,
2022 0xaaa96af0,
2023 0x4f2b0884,
2024 0x24d2cd1c,
2025 0x4d89438d,
2026 0x9b327a12,
2027 0xeabfd558,
2028 0xb63e33f1,
2029 0xebd7d9ca,
2030 };
2031 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
2032 }
2033 }
2034
TEST_SVE(sve2_saturating_multiply_add_high_vector)2035 TEST_SVE(sve2_saturating_multiply_add_high_vector) {
2036 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
2037 CPUFeatures::kSVE2,
2038 CPUFeatures::kNEON,
2039 CPUFeatures::kCRC32);
2040 START();
2041
2042 SetInitialMachineState(&masm);
2043 // state = 0xe2bd2480
2044
2045 {
2046 ExactAssemblyScope scope(&masm, 40 * kInstructionSize);
2047 __ dci(0x44d9721a); // sqrdmlah z26.d, z16.d, z25.d
2048 // vl128 state = 0xc0474f3f
2049 __ dci(0x44dd761b); // sqrdmlsh z27.d, z16.d, z29.d
2050 // vl128 state = 0x102712ac
2051 __ dci(0x44d4760b); // sqrdmlsh z11.d, z16.d, z20.d
2052 // vl128 state = 0xe8666aa6
2053 __ dci(0x44947709); // sqrdmlsh z9.s, z24.s, z20.s
2054 // vl128 state = 0xdd18f643
2055 __ dci(0x4494770b); // sqrdmlsh z11.s, z24.s, z20.s
2056 // vl128 state = 0xac4a4d4c
2057 __ dci(0x44d4773b); // sqrdmlsh z27.d, z25.d, z20.d
2058 // vl128 state = 0x1a5447d4
2059 __ dci(0x44dc7639); // sqrdmlsh z25.d, z17.d, z28.d
2060 // vl128 state = 0xf547ac30
2061 __ dci(0x44dc763b); // sqrdmlsh z27.d, z17.d, z28.d
2062 // vl128 state = 0xb42d177a
2063 __ dci(0x44d4743f); // sqrdmlsh z31.d, z1.d, z20.d
2064 // vl128 state = 0xd0da2c6b
2065 __ dci(0x449c742f); // sqrdmlsh z15.s, z1.s, z28.s
2066 // vl128 state = 0xb24c8988
2067 __ dci(0x449c7487); // sqrdmlsh z7.s, z4.s, z28.s
2068 // vl128 state = 0x9e67ddac
2069 __ dci(0x449c7485); // sqrdmlsh z5.s, z4.s, z28.s
2070 // vl128 state = 0xd96b34e2
2071 __ dci(0x448e7481); // sqrdmlsh z1.s, z4.s, z14.s
2072 // vl128 state = 0x81d91007
2073 __ dci(0x448e7480); // sqrdmlsh z0.s, z4.s, z14.s
2074 // vl128 state = 0x901fa692
2075 __ dci(0x449c7488); // sqrdmlsh z8.s, z4.s, z28.s
2076 // vl128 state = 0xeedceee6
2077 __ dci(0x441c758a); // sqrdmlsh z10.b, z12.b, z28.b
2078 // vl128 state = 0x8dc4d389
2079 __ dci(0x441475ae); // sqrdmlsh z14.b, z13.b, z20.b
2080 // vl128 state = 0xb1711932
2081 __ dci(0x440075ac); // sqrdmlsh z12.b, z13.b, z0.b
2082 // vl128 state = 0x8cacf188
2083 __ dci(0x440171bc); // sqrdmlah z28.b, z13.b, z1.b
2084 // vl128 state = 0x9c8b9f4f
2085 __ dci(0x440171b8); // sqrdmlah z24.b, z13.b, z1.b
2086 // vl128 state = 0x562ebefa
2087 __ dci(0x441971b9); // sqrdmlah z25.b, z13.b, z25.b
2088 // vl128 state = 0x1ef60d31
2089 __ dci(0x440970bb); // sqrdmlah z27.b, z5.b, z9.b
2090 // vl128 state = 0x69bd18ee
2091 __ dci(0x441870ba); // sqrdmlah z26.b, z5.b, z24.b
2092 // vl128 state = 0x525b1f84
2093 __ dci(0x441270b8); // sqrdmlah z24.b, z5.b, z18.b
2094 // vl128 state = 0x3c7dadd8
2095 __ dci(0x44927090); // sqrdmlah z16.s, z4.s, z18.s
2096 // vl128 state = 0x276f0567
2097 __ dci(0x44937292); // sqrdmlah z18.s, z20.s, z19.s
2098 // vl128 state = 0x6f0f8bb4
2099 __ dci(0x4491721a); // sqrdmlah z26.s, z16.s, z17.s
2100 // vl128 state = 0x28eb737a
2101 __ dci(0x44d3721b); // sqrdmlah z27.d, z16.d, z19.d
2102 // vl128 state = 0xa3bd1133
2103 __ dci(0x44d372ab); // sqrdmlah z11.d, z21.d, z19.d
2104 // vl128 state = 0x6e81e8fd
2105 __ dci(0x44d372a3); // sqrdmlah z3.d, z21.d, z19.d
2106 // vl128 state = 0x55730750
2107 __ dci(0x445376a1); // sqrdmlsh z1.h, z21.h, z19.h
2108 // vl128 state = 0x7c7afd6d
2109 __ dci(0x44527685); // sqrdmlsh z5.h, z20.h, z18.h
2110 // vl128 state = 0x1c9dc1a1
2111 __ dci(0x44127495); // sqrdmlsh z21.b, z4.b, z18.b
2112 // vl128 state = 0xf2e07e92
2113 __ dci(0x44127794); // sqrdmlsh z20.b, z28.b, z18.b
2114 // vl128 state = 0xc5a2e589
2115 __ dci(0x44527695); // sqrdmlsh z21.h, z20.h, z18.h
2116 // vl128 state = 0x417df395
2117 __ dci(0x445274dd); // sqrdmlsh z29.h, z6.h, z18.h
2118 // vl128 state = 0x2e223308
2119 __ dci(0x445774df); // sqrdmlsh z31.h, z6.h, z23.h
2120 // vl128 state = 0x99047839
2121 __ dci(0x445775fe); // sqrdmlsh z30.h, z15.h, z23.h
2122 // vl128 state = 0x34a4be39
2123 __ dci(0x445175ff); // sqrdmlsh z31.h, z15.h, z17.h
2124 // vl128 state = 0x714b9d66
2125 __ dci(0x44517557); // sqrdmlsh z23.h, z10.h, z17.h
2126 // vl128 state = 0x2aa51ff4
2127 }
2128
2129 uint32_t state;
2130 ComputeMachineStateHash(&masm, &state);
2131 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
2132 __ Ldr(w0, MemOperand(x0));
2133
2134 END();
2135 if (CAN_RUN()) {
2136 RUN();
2137 uint32_t expected_hashes[] = {
2138 0x2aa51ff4,
2139 0xde163ba0,
2140 0x8b237661,
2141 0x30086cf2,
2142 0xabf248f0,
2143 0xcc183608,
2144 0xa4103141,
2145 0x521ebe39,
2146 0xd746470e,
2147 0x141a51a4,
2148 0x695a47fd,
2149 0x0a74d701,
2150 0xd14bae63,
2151 0xf967aadb,
2152 0xdaed8896,
2153 0x7ba556cb,
2154 };
2155 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
2156 }
2157 }
2158
TEST_SVE(sve2_integer_pairwise_add_accumulate_long)2159 TEST_SVE(sve2_integer_pairwise_add_accumulate_long) {
2160 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
2161 CPUFeatures::kSVE2,
2162 CPUFeatures::kNEON,
2163 CPUFeatures::kCRC32);
2164 START();
2165
2166 SetInitialMachineState(&masm);
2167 // state = 0xe2bd2480
2168
2169 {
2170 ExactAssemblyScope scope(&masm, 40 * kInstructionSize);
2171 __ dci(0x4445b4e3); // uadalp z3.h, p5/m, z7.b
2172 // vl128 state = 0x3ad015af
2173 __ dci(0x4445b4e1); // uadalp z1.h, p5/m, z7.b
2174 // vl128 state = 0x3f53978b
2175 __ dci(0x4445bc65); // uadalp z5.h, p7/m, z3.b
2176 // vl128 state = 0xf3340744
2177 __ dci(0x4445be35); // uadalp z21.h, p7/m, z17.b
2178 // vl128 state = 0xb6f81377
2179 __ dci(0x4445be9d); // uadalp z29.h, p7/m, z20.b
2180 // vl128 state = 0xaf772b37
2181 __ dci(0x4444bc9c); // sadalp z28.h, p7/m, z4.b
2182 // vl128 state = 0x591be304
2183 __ dci(0x4444bc9d); // sadalp z29.h, p7/m, z4.b
2184 // vl128 state = 0x406d9d34
2185 __ dci(0x4444ba99); // sadalp z25.h, p6/m, z20.b
2186 // vl128 state = 0xb455880f
2187 __ dci(0x44c4ba09); // sadalp z9.d, p6/m, z16.s
2188 // vl128 state = 0x5ef8e2ed
2189 __ dci(0x44c4ba01); // sadalp z1.d, p6/m, z16.s
2190 // vl128 state = 0xca2ccf0d
2191 __ dci(0x44c4ba11); // sadalp z17.d, p6/m, z16.s
2192 // vl128 state = 0x33bb9903
2193 __ dci(0x4484bb15); // sadalp z21.s, p6/m, z24.h
2194 // vl128 state = 0x3964a356
2195 __ dci(0x4484b957); // sadalp z23.s, p6/m, z10.h
2196 // vl128 state = 0x1e1426d2
2197 __ dci(0x4484b953); // sadalp z19.s, p6/m, z10.h
2198 // vl128 state = 0x83e2e1a6
2199 __ dci(0x4484b943); // sadalp z3.s, p6/m, z10.h
2200 // vl128 state = 0x24335149
2201 __ dci(0x4484b102); // sadalp z2.s, p4/m, z8.h
2202 // vl128 state = 0x8bde109a
2203 __ dci(0x4484bd06); // sadalp z6.s, p7/m, z8.h
2204 // vl128 state = 0x5abf30eb
2205 __ dci(0x4484bdc2); // sadalp z2.s, p7/m, z14.h
2206 // vl128 state = 0xcb199381
2207 __ dci(0x4485b5c6); // uadalp z6.s, p5/m, z14.h
2208 // vl128 state = 0x5f3819ad
2209 __ dci(0x4485b5c2); // uadalp z2.s, p5/m, z14.h
2210 // vl128 state = 0x5f6d69e4
2211 __ dci(0x4485b5ca); // uadalp z10.s, p5/m, z14.h
2212 // vl128 state = 0x1a0d7053
2213 __ dci(0x4485b15a); // uadalp z26.s, p4/m, z10.h
2214 // vl128 state = 0x9081b6cd
2215 __ dci(0x44c5b95e); // uadalp z30.d, p6/m, z10.s
2216 // vl128 state = 0x6b15107e
2217 __ dci(0x44c5a14e); // uadalp z14.d, p0/m, z10.s
2218 // vl128 state = 0x4a127dc2
2219 __ dci(0x4445a1c6); // uadalp z6.h, p0/m, z14.b
2220 // vl128 state = 0x06902399
2221 __ dci(0x4445a1ce); // uadalp z14.h, p0/m, z14.b
2222 // vl128 state = 0x1789be4a
2223 __ dci(0x4444a9de); // sadalp z30.h, p2/m, z14.b
2224 // vl128 state = 0x86732543
2225 __ dci(0x4444adff); // sadalp z31.h, p3/m, z15.b
2226 // vl128 state = 0xe326faef
2227 __ dci(0x4444bdb7); // sadalp z23.h, p7/m, z13.b
2228 // vl128 state = 0x46d5f328
2229 __ dci(0x4444bda7); // sadalp z7.h, p7/m, z13.b
2230 // vl128 state = 0x5cf7a973
2231 __ dci(0x4445bd25); // uadalp z5.h, p7/m, z9.b
2232 // vl128 state = 0xdf8cbb97
2233 __ dci(0x4485bd35); // uadalp z21.s, p7/m, z9.h
2234 // vl128 state = 0x330c3d35
2235 __ dci(0x4485bc17); // uadalp z23.s, p7/m, z0.h
2236 // vl128 state = 0x6ebfa4fe
2237 __ dci(0x4485bc15); // uadalp z21.s, p7/m, z0.h
2238 // vl128 state = 0x52f18385
2239 __ dci(0x4485be91); // uadalp z17.s, p7/m, z20.h
2240 // vl128 state = 0x82fa2d85
2241 __ dci(0x4485be53); // uadalp z19.s, p7/m, z18.h
2242 // vl128 state = 0xa7d6098b
2243 __ dci(0x4485aa52); // uadalp z18.s, p2/m, z18.h
2244 // vl128 state = 0xfe8faafa
2245 __ dci(0x4485ae13); // uadalp z19.s, p3/m, z16.h
2246 // vl128 state = 0xf2465f31
2247 __ dci(0x4485b617); // uadalp z23.s, p5/m, z16.h
2248 // vl128 state = 0xed6be8ed
2249 __ dci(0x4485bc13); // uadalp z19.s, p7/m, z0.h
2250 // vl128 state = 0xb2f95c3d
2251 }
2252
2253 uint32_t state;
2254 ComputeMachineStateHash(&masm, &state);
2255 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
2256 __ Ldr(w0, MemOperand(x0));
2257
2258 END();
2259 if (CAN_RUN()) {
2260 RUN();
2261 uint32_t expected_hashes[] = {
2262 0xb2f95c3d,
2263 0xa4189170,
2264 0xed9e7f9e,
2265 0xfca732cb,
2266 0x4c94b2d7,
2267 0x92a2fb21,
2268 0xbca62a5c,
2269 0x9aec54d6,
2270 0x8df82b02,
2271 0x50c18764,
2272 0xd27e5a0e,
2273 0x1a538cc6,
2274 0x538b673e,
2275 0x37e4b499,
2276 0x7160cbd5,
2277 0x113951bc,
2278 };
2279 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
2280 }
2281 }
2282
TEST_SVE(sve2_pmul_mul_vector_unpredicated)2283 TEST_SVE(sve2_pmul_mul_vector_unpredicated) {
2284 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
2285 CPUFeatures::kSVE2,
2286 CPUFeatures::kNEON,
2287 CPUFeatures::kCRC32);
2288 START();
2289
2290 SetInitialMachineState(&masm);
2291 // state = 0xe2bd2480
2292
2293 {
2294 ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
2295 __ dci(0x04a56309); // mul z9.s, z24.s, z5.s
2296 // vl128 state = 0x0ef461d5
2297 __ dci(0x04a56148); // mul z8.s, z10.s, z5.s
2298 // vl128 state = 0xce9f1381
2299 __ dci(0x04a161d8); // mul z24.s, z14.s, z1.s
2300 // vl128 state = 0x2a14ff8c
2301 __ dci(0x04a16179); // mul z25.s, z11.s, z1.s
2302 // vl128 state = 0x88a0241b
2303 __ dci(0x04b36171); // mul z17.s, z11.s, z19.s
2304 // vl128 state = 0x23aea8a6
2305 __ dci(0x04fb6170); // mul z16.d, z11.d, z27.d
2306 // vl128 state = 0x58eaa46d
2307 __ dci(0x04fb6171); // mul z17.d, z11.d, z27.d
2308 // vl128 state = 0xc733a399
2309 __ dci(0x04fb6350); // mul z16.d, z26.d, z27.d
2310 // vl128 state = 0x2806af41
2311 __ dci(0x04eb6372); // mul z18.d, z27.d, z11.d
2312 // vl128 state = 0x5ec775d1
2313 __ dci(0x04eb6376); // mul z22.d, z27.d, z11.d
2314 // vl128 state = 0x40d03f0d
2315 __ dci(0x04ed637e); // mul z30.d, z27.d, z13.d
2316 // vl128 state = 0xe3a61d56
2317 __ dci(0x04e8637f); // mul z31.d, z27.d, z8.d
2318 // vl128 state = 0x2eb4313f
2319 __ dci(0x04a86337); // mul z23.s, z25.s, z8.s
2320 // vl128 state = 0xc68e329e
2321 __ dci(0x04a86336); // mul z22.s, z25.s, z8.s
2322 // vl128 state = 0x177b1a43
2323 __ dci(0x04ac63be); // mul z30.s, z29.s, z12.s
2324 // vl128 state = 0xaaa415dd
2325 __ dci(0x04ac63d6); // mul z22.s, z30.s, z12.s
2326 // vl128 state = 0xaeb212b8
2327 __ dci(0x042c67d2); // pmul z18.b, z30.b, z12.b
2328 // vl128 state = 0xa11be1c8
2329 __ dci(0x042c65f3); // pmul z19.b, z15.b, z12.b
2330 // vl128 state = 0x8dd03a21
2331 __ dci(0x042e65d2); // pmul z18.b, z14.b, z14.b
2332 // vl128 state = 0x83ef9a66
2333 __ dci(0x042f6550); // pmul z16.b, z10.b, z15.b
2334 // vl128 state = 0x6a495368
2335 __ dci(0x042e6754); // pmul z20.b, z26.b, z14.b
2336 // vl128 state = 0x0b6c3ccf
2337 __ dci(0x042e6750); // pmul z16.b, z26.b, z14.b
2338 // vl128 state = 0xa745457f
2339 __ dci(0x042e6600); // pmul z0.b, z16.b, z14.b
2340 // vl128 state = 0x92fe8b9d
2341 __ dci(0x042e6602); // pmul z2.b, z16.b, z14.b
2342 // vl128 state = 0xda39ebe2
2343 __ dci(0x043f6600); // pmul z0.b, z16.b, z31.b
2344 // vl128 state = 0xcc36d223
2345 __ dci(0x042b6608); // pmul z8.b, z16.b, z11.b
2346 // vl128 state = 0x8b94d25a
2347 __ dci(0x042a6700); // pmul z0.b, z24.b, z10.b
2348 // vl128 state = 0x0118ccba
2349 __ dci(0x042a6710); // pmul z16.b, z24.b, z10.b
2350 // vl128 state = 0x4b38543b
2351 __ dci(0x042a6714); // pmul z20.b, z24.b, z10.b
2352 // vl128 state = 0xa54e126f
2353 __ dci(0x042a6716); // pmul z22.b, z24.b, z10.b
2354 // vl128 state = 0x61ad87c9
2355 }
2356
2357 uint32_t state;
2358 ComputeMachineStateHash(&masm, &state);
2359 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
2360 __ Ldr(w0, MemOperand(x0));
2361
2362 END();
2363 if (CAN_RUN()) {
2364 RUN();
2365 uint32_t expected_hashes[] = {
2366 0x61ad87c9,
2367 0x82df488f,
2368 0xc0d7c1a4,
2369 0x4f86e761,
2370 0x8d651d7b,
2371 0x294cf55a,
2372 0x060ab34c,
2373 0x1db0e99c,
2374 0x4b0b59d7,
2375 0xcee6dfd1,
2376 0x29575669,
2377 0x5c1c7922,
2378 0x4b1957ed,
2379 0x8bc5712b,
2380 0x6ac59fdc,
2381 0x048ce1b5,
2382 };
2383 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
2384 }
2385 }
2386
TEST_SVE(sve2_smulh_umulh_vector_unpredicated)2387 TEST_SVE(sve2_smulh_umulh_vector_unpredicated) {
2388 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
2389 CPUFeatures::kSVE2,
2390 CPUFeatures::kNEON,
2391 CPUFeatures::kCRC32);
2392 START();
2393
2394 SetInitialMachineState(&masm);
2395 // state = 0xe2bd2480
2396
2397 {
2398 ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
2399 __ dci(0x04e46c3b); // umulh z27.d, z1.d, z4.d
2400 // vl128 state = 0xfb66ba83
2401 __ dci(0x04ac6c3a); // umulh z26.s, z1.s, z12.s
2402 // vl128 state = 0x45cdb9a2
2403 __ dci(0x04a86e32); // umulh z18.s, z17.s, z8.s
2404 // vl128 state = 0x4ad150dc
2405 __ dci(0x04a86a7a); // smulh z26.s, z19.s, z8.s
2406 // vl128 state = 0xbf08e2cb
2407 __ dci(0x04e86b7b); // smulh z27.d, z27.d, z8.d
2408 // vl128 state = 0x51ad0655
2409 __ dci(0x04ee6b73); // smulh z19.d, z27.d, z14.d
2410 // vl128 state = 0xf764bda9
2411 __ dci(0x04ec6f7b); // umulh z27.d, z27.d, z12.d
2412 // vl128 state = 0xc90f20ef
2413 __ dci(0x04ac6f3a); // umulh z26.s, z25.s, z12.s
2414 // vl128 state = 0x9ec08333
2415 __ dci(0x04ac6f32); // umulh z18.s, z25.s, z12.s
2416 // vl128 state = 0x3620406c
2417 __ dci(0x042e6f3a); // umulh z26.b, z25.b, z14.b
2418 // vl128 state = 0x4e18467a
2419 __ dci(0x042a6b2a); // smulh z10.b, z25.b, z10.b
2420 // vl128 state = 0x13c7cd6f
2421 __ dci(0x042a6b2b); // smulh z11.b, z25.b, z10.b
2422 // vl128 state = 0x16a44c1b
2423 __ dci(0x043a6b03); // smulh z3.b, z24.b, z26.b
2424 // vl128 state = 0x9f8f203b
2425 __ dci(0x047a690b); // smulh z11.h, z8.h, z26.h
2426 // vl128 state = 0xce0aa45e
2427 __ dci(0x047a690a); // smulh z10.h, z8.h, z26.h
2428 // vl128 state = 0xb667d59b
2429 __ dci(0x0479690e); // smulh z14.h, z8.h, z25.h
2430 // vl128 state = 0xd76639b7
2431 __ dci(0x046d690c); // smulh z12.h, z8.h, z13.h
2432 // vl128 state = 0x736b227e
2433 __ dci(0x042f690e); // smulh z14.b, z8.b, z15.b
2434 // vl128 state = 0xc0804df9
2435 __ dci(0x042f69ac); // smulh z12.b, z13.b, z15.b
2436 // vl128 state = 0x8a5509f5
2437 __ dci(0x042f696e); // smulh z14.b, z11.b, z15.b
2438 // vl128 state = 0x761f9cf8
2439 __ dci(0x042e6b6a); // smulh z10.b, z27.b, z14.b
2440 // vl128 state = 0x3b5f2705
2441 __ dci(0x042e6b6e); // smulh z14.b, z27.b, z14.b
2442 // vl128 state = 0x53b23a0a
2443 __ dci(0x04366b6f); // smulh z15.b, z27.b, z22.b
2444 // vl128 state = 0x5bd53ce9
2445 __ dci(0x04766f7f); // umulh z31.h, z27.h, z22.h
2446 // vl128 state = 0x701bec8f
2447 __ dci(0x04746fef); // umulh z15.h, z31.h, z20.h
2448 // vl128 state = 0x29697c8c
2449 __ dci(0x04706dee); // umulh z14.h, z15.h, z16.h
2450 // vl128 state = 0x2088f1c2
2451 __ dci(0x04706c7e); // umulh z30.h, z3.h, z16.h
2452 // vl128 state = 0x56224145
2453 __ dci(0x04306c2e); // umulh z14.b, z1.b, z16.b
2454 // vl128 state = 0x2ba58c9c
2455 __ dci(0x04b06e2a); // umulh z10.s, z17.s, z16.s
2456 // vl128 state = 0xb933d058
2457 __ dci(0x04b56e2e); // umulh z14.s, z17.s, z21.s
2458 // vl128 state = 0x184daee9
2459 }
2460
2461 uint32_t state;
2462 ComputeMachineStateHash(&masm, &state);
2463 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
2464 __ Ldr(w0, MemOperand(x0));
2465
2466 END();
2467 if (CAN_RUN()) {
2468 RUN();
2469 uint32_t expected_hashes[] = {
2470 0x184daee9,
2471 0x19454232,
2472 0xa56823a3,
2473 0xe334897a,
2474 0xcaa988e1,
2475 0x614cbf4f,
2476 0xfaa384e4,
2477 0x4b45e885,
2478 0xef930ead,
2479 0x49304b9a,
2480 0x4f1d830e,
2481 0xa41c1a95,
2482 0xa1ea8d07,
2483 0x62ca97b4,
2484 0x15f52cac,
2485 0xc190cd57,
2486 };
2487 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
2488 }
2489 }
2490
TEST_SVE(sve2_arith_interleaved_long)2491 TEST_SVE(sve2_arith_interleaved_long) {
2492 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
2493 CPUFeatures::kSVE2,
2494 CPUFeatures::kNEON,
2495 CPUFeatures::kCRC32);
2496 START();
2497
2498 SetInitialMachineState(&masm);
2499 // state = 0xe2bd2480
2500
2501 {
2502 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
2503 __ dci(0x459289bd); // ssublbt z29.s, z13.h, z18.h
2504 // vl128 state = 0xe2e0965a
2505 __ dci(0x459289bf); // ssublbt z31.s, z13.h, z18.h
2506 // vl128 state = 0x64e3e1a3
2507 __ dci(0x45d689be); // ssublbt z30.d, z13.s, z22.s
2508 // vl128 state = 0x02711ec2
2509 __ dci(0x45d68916); // ssublbt z22.d, z8.s, z22.s
2510 // vl128 state = 0x7ff6f63f
2511 __ dci(0x45968957); // ssublbt z23.s, z10.h, z22.h
2512 // vl128 state = 0xa9aace7f
2513 __ dci(0x45968a55); // ssublbt z21.s, z18.h, z22.h
2514 // vl128 state = 0x6007d46c
2515 __ dci(0x45868251); // saddlbt z17.s, z18.h, z6.h
2516 // vl128 state = 0xecea329d
2517 __ dci(0x45868230); // saddlbt z16.s, z17.h, z6.h
2518 // vl128 state = 0xa16880b8
2519 __ dci(0x45868231); // saddlbt z17.s, z17.h, z6.h
2520 // vl128 state = 0xcff73a01
2521 __ dci(0x458c8235); // saddlbt z21.s, z17.h, z12.h
2522 // vl128 state = 0xf6486b24
2523 __ dci(0x458c8231); // saddlbt z17.s, z17.h, z12.h
2524 // vl128 state = 0xa5612e07
2525 __ dci(0x459c8021); // saddlbt z1.s, z1.h, z28.h
2526 // vl128 state = 0xd71ab1e8
2527 __ dci(0x458c8009); // saddlbt z9.s, z0.h, z12.h
2528 // vl128 state = 0xaf74bd16
2529 __ dci(0x459e800b); // saddlbt z11.s, z0.h, z30.h
2530 // vl128 state = 0x96dee616
2531 __ dci(0x45928003); // saddlbt z3.s, z0.h, z18.h
2532 // vl128 state = 0x652e9cca
2533 __ dci(0x45d28207); // saddlbt z7.d, z16.s, z18.s
2534 // vl128 state = 0xc6b07290
2535 __ dci(0x45da8225); // saddlbt z5.d, z17.s, z26.s
2536 // vl128 state = 0x8c74a35d
2537 __ dci(0x45da830d); // saddlbt z13.d, z24.s, z26.s
2538 // vl128 state = 0xff620001
2539 __ dci(0x45cb8309); // saddlbt z9.d, z24.s, z11.s
2540 // vl128 state = 0x2147f374
2541 __ dci(0x45ca8119); // saddlbt z25.d, z8.s, z10.s
2542 // vl128 state = 0x6f961936
2543 __ dci(0x45ce831d); // saddlbt z29.d, z24.s, z14.s
2544 // vl128 state = 0xaa91e68a
2545 __ dci(0x45ce8135); // saddlbt z21.d, z9.s, z14.s
2546 // vl128 state = 0xa5635d0e
2547 __ dci(0x458e8331); // saddlbt z17.s, z25.h, z14.h
2548 // vl128 state = 0xa0705ea7
2549 __ dci(0x458e8030); // saddlbt z16.s, z1.h, z14.h
2550 // vl128 state = 0x397dc4d5
2551 __ dci(0x458e8271); // saddlbt z17.s, z19.h, z14.h
2552 // vl128 state = 0x5e975082
2553 __ dci(0x458a82e1); // saddlbt z1.s, z23.h, z10.h
2554 // vl128 state = 0x048f8dea
2555 __ dci(0x458a8240); // saddlbt z0.s, z18.h, z10.h
2556 // vl128 state = 0xd9104514
2557 __ dci(0x458a8e50); // ssubltb z16.s, z18.h, z10.h
2558 // vl128 state = 0x6afbf8b6
2559 __ dci(0x45988e58); // ssubltb z24.s, z18.h, z24.h
2560 // vl128 state = 0xfe44a2f8
2561 __ dci(0x45d08e59); // ssubltb z25.d, z18.s, z16.s
2562 // vl128 state = 0x050fb0ab
2563 __ dci(0x45d08e58); // ssubltb z24.d, z18.s, z16.s
2564 // vl128 state = 0xc9160f61
2565 __ dci(0x45d08259); // saddlbt z25.d, z18.s, z16.s
2566 // vl128 state = 0x70ae0c4a
2567 __ dci(0x45d08b51); // ssublbt z17.d, z26.s, z16.s
2568 // vl128 state = 0xe627770c
2569 __ dci(0x45d08970); // ssublbt z16.d, z11.s, z16.s
2570 // vl128 state = 0x445fd924
2571 __ dci(0x45d28d74); // ssubltb z20.d, z11.s, z18.s
2572 // vl128 state = 0x8c7dd6c0
2573 __ dci(0x45c28d56); // ssubltb z22.d, z10.s, z2.s
2574 // vl128 state = 0x925de210
2575 __ dci(0x45c28d52); // ssubltb z18.d, z10.s, z2.s
2576 // vl128 state = 0x28b67c05
2577 __ dci(0x45c48d5a); // ssubltb z26.d, z10.s, z4.s
2578 // vl128 state = 0x48e8377c
2579 __ dci(0x45c18d5b); // ssubltb z27.d, z10.s, z1.s
2580 // vl128 state = 0xb46af33e
2581 __ dci(0x45818d13); // ssubltb z19.s, z8.h, z1.h
2582 // vl128 state = 0x12fada0b
2583 __ dci(0x45818d12); // ssubltb z18.s, z8.h, z1.h
2584 // vl128 state = 0xeaeea3cd
2585 __ dci(0x45858d9a); // ssubltb z26.s, z12.h, z5.h
2586 // vl128 state = 0x6d466bd8
2587 __ dci(0x45858df2); // ssubltb z18.s, z15.h, z5.h
2588 // vl128 state = 0x60c67411
2589 __ dci(0x45c58d62); // ssubltb z2.d, z11.s, z5.s
2590 // vl128 state = 0xec3b40ed
2591 __ dci(0x45c58b72); // ssublbt z18.d, z27.s, z5.s
2592 // vl128 state = 0x5b421b0a
2593 __ dci(0x45858a76); // ssublbt z22.s, z19.h, z5.h
2594 // vl128 state = 0x8a0f26e9
2595 __ dci(0x45878877); // ssublbt z23.s, z3.h, z7.h
2596 // vl128 state = 0xc224293b
2597 __ dci(0x458f8073); // saddlbt z19.s, z3.h, z15.h
2598 // vl128 state = 0x9f5c0b50
2599 __ dci(0x45878051); // saddlbt z17.s, z2.h, z7.h
2600 // vl128 state = 0x2ae674c9
2601 __ dci(0x45838841); // ssublbt z1.s, z2.h, z3.h
2602 // vl128 state = 0x1dff4e20
2603 }
2604
2605 uint32_t state;
2606 ComputeMachineStateHash(&masm, &state);
2607 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
2608 __ Ldr(w0, MemOperand(x0));
2609
2610 END();
2611 if (CAN_RUN()) {
2612 RUN();
2613 uint32_t expected_hashes[] = {
2614 0x1dff4e20,
2615 0x3d2c11df,
2616 0x64caeccf,
2617 0x7940c227,
2618 0xf5f59485,
2619 0x7ad48c48,
2620 0xcde4523b,
2621 0xcb5849f0,
2622 0x1e7e9722,
2623 0x8049333f,
2624 0x40d95eb3,
2625 0x628a428d,
2626 0x1cf123f2,
2627 0x8d377510,
2628 0x44a03b91,
2629 0xabe90e98,
2630 };
2631 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
2632 }
2633 }
2634
TEST_SVE(sve2_sqabs_sqneg)2635 TEST_SVE(sve2_sqabs_sqneg) {
2636 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
2637 CPUFeatures::kSVE2,
2638 CPUFeatures::kNEON,
2639 CPUFeatures::kCRC32);
2640 START();
2641
2642 SetInitialMachineState(&masm);
2643 // state = 0xe2bd2480
2644
2645 {
2646 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
2647 __ dci(0x4448b23a); // sqabs z26.h, p4/m, z17.h
2648 // vl128 state = 0x4aadd589
2649 __ dci(0x4448b23e); // sqabs z30.h, p4/m, z17.h
2650 // vl128 state = 0x86da455e
2651 __ dci(0x4448a21c); // sqabs z28.h, p0/m, z16.h
2652 // vl128 state = 0x4eecab5c
2653 __ dci(0x4408a298); // sqabs z24.b, p0/m, z20.b
2654 // vl128 state = 0xf81ee16e
2655 __ dci(0x4408a0dc); // sqabs z28.b, p0/m, z6.b
2656 // vl128 state = 0x84b94ec5
2657 __ dci(0x4408a0de); // sqabs z30.b, p0/m, z6.b
2658 // vl128 state = 0x626db033
2659 __ dci(0x4408a19c); // sqabs z28.b, p0/m, z12.b
2660 // vl128 state = 0x181303a1
2661 __ dci(0x4408a3d4); // sqabs z20.b, p0/m, z30.b
2662 // vl128 state = 0xf4e93ff3
2663 __ dci(0x4489a3dc); // sqneg z28.s, p0/m, z30.s
2664 // vl128 state = 0xffe7a865
2665 __ dci(0x4409a1d4); // sqneg z20.b, p0/m, z14.b
2666 // vl128 state = 0x6a27d8fe
2667 __ dci(0x4408a3d0); // sqabs z16.b, p0/m, z30.b
2668 // vl128 state = 0x9ffc0414
2669 __ dci(0x44c8a3d8); // sqabs z24.d, p0/m, z30.d
2670 // vl128 state = 0xd59acd78
2671 __ dci(0x44c8b3fa); // sqabs z26.d, p4/m, z31.d
2672 // vl128 state = 0x8853f8ac
2673 __ dci(0x44c8a2fb); // sqabs z27.d, p0/m, z23.d
2674 // vl128 state = 0x439e9079
2675 __ dci(0x44c8a2f9); // sqabs z25.d, p0/m, z23.d
2676 // vl128 state = 0xbaaa56a6
2677 __ dci(0x4488a2db); // sqabs z27.s, p0/m, z22.s
2678 // vl128 state = 0x328cbd5a
2679 __ dci(0x4488a2df); // sqabs z31.s, p0/m, z22.s
2680 // vl128 state = 0x4a74b2da
2681 __ dci(0x4488a2cf); // sqabs z15.s, p0/m, z22.s
2682 // vl128 state = 0x52af62a6
2683 __ dci(0x4488a04b); // sqabs z11.s, p0/m, z2.s
2684 // vl128 state = 0xa45aef42
2685 __ dci(0x4488a02f); // sqabs z15.s, p0/m, z1.s
2686 // vl128 state = 0x0b5444ed
2687 __ dci(0x4489a06d); // sqneg z13.s, p0/m, z3.s
2688 // vl128 state = 0x6f0912d5
2689 __ dci(0x4489a449); // sqneg z9.s, p1/m, z2.s
2690 // vl128 state = 0x669ac78a
2691 __ dci(0x4489a50b); // sqneg z11.s, p1/m, z8.s
2692 // vl128 state = 0x58ae27ee
2693 __ dci(0x4488a71b); // sqabs z27.s, p1/m, z24.s
2694 // vl128 state = 0xa54925f9
2695 __ dci(0x4408a519); // sqabs z25.b, p1/m, z8.b
2696 // vl128 state = 0x45c13095
2697 __ dci(0x4408a158); // sqabs z24.b, p0/m, z10.b
2698 // vl128 state = 0x2d6d547a
2699 __ dci(0x4488a168); // sqabs z8.s, p0/m, z11.s
2700 // vl128 state = 0xc976b77b
2701 __ dci(0x44c9a16c); // sqneg z12.d, p0/m, z11.d
2702 // vl128 state = 0x766e750f
2703 __ dci(0x44c9a17c); // sqneg z28.d, p0/m, z11.d
2704 // vl128 state = 0xbf22858d
2705 __ dci(0x44c9a878); // sqneg z24.d, p2/m, z3.d
2706 // vl128 state = 0xe563a474
2707 __ dci(0x44c9a8d9); // sqneg z25.d, p2/m, z6.d
2708 // vl128 state = 0x573c2648
2709 __ dci(0x44c9b85b); // sqneg z27.d, p6/m, z2.d
2710 // vl128 state = 0x03cdf714
2711 __ dci(0x4449b87f); // sqneg z31.h, p6/m, z3.h
2712 // vl128 state = 0xff4e2cb1
2713 __ dci(0x4449b81d); // sqneg z29.h, p6/m, z0.h
2714 // vl128 state = 0xaab7065e
2715 __ dci(0x4449a895); // sqneg z21.h, p2/m, z4.h
2716 // vl128 state = 0x60d4a6d3
2717 __ dci(0x4449a825); // sqneg z5.h, p2/m, z1.h
2718 // vl128 state = 0x3bed34e4
2719 __ dci(0x4449a821); // sqneg z1.h, p2/m, z1.h
2720 // vl128 state = 0xaa750880
2721 __ dci(0x4449a820); // sqneg z0.h, p2/m, z1.h
2722 // vl128 state = 0xfca9d635
2723 __ dci(0x4449a822); // sqneg z2.h, p2/m, z1.h
2724 // vl128 state = 0x8a92f3e7
2725 __ dci(0x4449ae23); // sqneg z3.h, p3/m, z17.h
2726 // vl128 state = 0xc2db1ac5
2727 __ dci(0x4449af73); // sqneg z19.h, p3/m, z27.h
2728 // vl128 state = 0x386f5f27
2729 __ dci(0x4449af77); // sqneg z23.h, p3/m, z27.h
2730 // vl128 state = 0xff4fd505
2731 __ dci(0x4489af67); // sqneg z7.s, p3/m, z27.s
2732 // vl128 state = 0x4c897605
2733 __ dci(0x4489ad25); // sqneg z5.s, p3/m, z9.s
2734 // vl128 state = 0xcc73333a
2735 __ dci(0x4409ad07); // sqneg z7.b, p3/m, z8.b
2736 // vl128 state = 0x58d37b50
2737 __ dci(0x4489ad85); // sqneg z5.s, p3/m, z12.s
2738 // vl128 state = 0x2a142b9d
2739 __ dci(0x44c9a984); // sqneg z4.d, p2/m, z12.d
2740 // vl128 state = 0x006fd35a
2741 __ dci(0x44c9a926); // sqneg z6.d, p2/m, z9.d
2742 // vl128 state = 0x06c05c5d
2743 __ dci(0x4449ab2e); // sqneg z14.h, p2/m, z25.h
2744 // vl128 state = 0xe41a6fc4
2745 __ dci(0x4449ab3e); // sqneg z30.h, p2/m, z25.h
2746 // vl128 state = 0x6e574bec
2747 }
2748
2749 uint32_t state;
2750 ComputeMachineStateHash(&masm, &state);
2751 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
2752 __ Ldr(w0, MemOperand(x0));
2753
2754 END();
2755 if (CAN_RUN()) {
2756 RUN();
2757 uint32_t expected_hashes[] = {
2758 0x6e574bec,
2759 0xec677945,
2760 0xe7357ba7,
2761 0xbbf92859,
2762 0x3f42d943,
2763 0xe2db0bb1,
2764 0x704d1161,
2765 0xc0e1f809,
2766 0x887dd5e7,
2767 0x452b8b80,
2768 0xcf455511,
2769 0x821ad0bc,
2770 0xb98b1eac,
2771 0x49ae6871,
2772 0x16b2e0a6,
2773 0xaba4d260,
2774 };
2775 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
2776 }
2777 }
2778
TEST_SVE(sve2_urecpe_ursqrte)2779 TEST_SVE(sve2_urecpe_ursqrte) {
2780 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
2781 CPUFeatures::kSVE2,
2782 CPUFeatures::kNEON,
2783 CPUFeatures::kCRC32);
2784 START();
2785
2786 SetInitialMachineState(&masm);
2787 // state = 0xe2bd2480
2788
2789 {
2790 ExactAssemblyScope scope(&masm, 20 * kInstructionSize);
2791 __ dci(0x4481bee8); // ursqrte z8.s, p7/m, z23.s
2792 // vl128 state = 0x38c317d5
2793 __ dci(0x4480bea9); // urecpe z9.s, p7/m, z21.s
2794 // vl128 state = 0x8412e46d
2795 __ dci(0x4481bfab); // ursqrte z11.s, p7/m, z29.s
2796 // vl128 state = 0xae6c2805
2797 __ dci(0x4481b9a3); // ursqrte z3.s, p6/m, z13.s
2798 // vl128 state = 0x114331ab
2799 __ dci(0x4481aba2); // ursqrte z2.s, p2/m, z29.s
2800 // vl128 state = 0x88f2308d
2801 __ dci(0x4480abe6); // urecpe z6.s, p2/m, z31.s
2802 // vl128 state = 0x328b45b8
2803 __ dci(0x4480afa2); // urecpe z2.s, p3/m, z29.s
2804 // vl128 state = 0x7b67ded4
2805 __ dci(0x4480ae23); // urecpe z3.s, p3/m, z17.s
2806 // vl128 state = 0x48d1ac45
2807 __ dci(0x4481aa27); // ursqrte z7.s, p2/m, z17.s
2808 // vl128 state = 0x475f61b6
2809 __ dci(0x4481a325); // ursqrte z5.s, p0/m, z25.s
2810 // vl128 state = 0xfbf0b767
2811 __ dci(0x4481a321); // ursqrte z1.s, p0/m, z25.s
2812 // vl128 state = 0x31481484
2813 __ dci(0x4481ab05); // ursqrte z5.s, p2/m, z24.s
2814 // vl128 state = 0x5aca5e43
2815 __ dci(0x4481a995); // ursqrte z21.s, p2/m, z12.s
2816 // vl128 state = 0xe3b96378
2817 __ dci(0x4481bb91); // ursqrte z17.s, p6/m, z28.s
2818 // vl128 state = 0x9d469964
2819 __ dci(0x4481b199); // ursqrte z25.s, p4/m, z12.s
2820 // vl128 state = 0xbbabbb9d
2821 __ dci(0x4481a989); // ursqrte z9.s, p2/m, z12.s
2822 // vl128 state = 0xf83e651c
2823 __ dci(0x4481b18b); // ursqrte z11.s, p4/m, z12.s
2824 // vl128 state = 0x70a808da
2825 __ dci(0x4480b089); // urecpe z9.s, p4/m, z4.s
2826 // vl128 state = 0x427916ac
2827 __ dci(0x4480b2c1); // urecpe z1.s, p4/m, z22.s
2828 // vl128 state = 0xbf35be88
2829 __ dci(0x4480aad1); // urecpe z17.s, p2/m, z22.s
2830 // vl128 state = 0xaf69727b
2831 }
2832
2833 uint32_t state;
2834 ComputeMachineStateHash(&masm, &state);
2835 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
2836 __ Ldr(w0, MemOperand(x0));
2837
2838 END();
2839 if (CAN_RUN()) {
2840 RUN();
2841 uint32_t expected_hashes[] = {
2842 0xaf69727b,
2843 0x7fda1a01,
2844 0xd299e078,
2845 0x9a794a84,
2846 0x47a453c1,
2847 0xecc67cf0,
2848 0x04122ec2,
2849 0x82dd5669,
2850 0xcb2bb910,
2851 0xcc73c54c,
2852 0x4660030f,
2853 0x7c42b056,
2854 0x498a73b1,
2855 0x1de89fad,
2856 0x5411c616,
2857 0x9f378bac,
2858 };
2859 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
2860 }
2861 }
2862
TEST_SVE(sve2_arith_long)2863 TEST_SVE(sve2_arith_long) {
2864 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
2865 CPUFeatures::kSVE2,
2866 CPUFeatures::kNEON,
2867 CPUFeatures::kCRC32);
2868 START();
2869
2870 SetInitialMachineState(&masm);
2871 // state = 0xe2bd2480
2872
2873 {
2874 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
2875 __ dci(0x45573eac); // uabdlt z12.h, z21.b, z23.b
2876 // vl128 state = 0x2ee2e7d4
2877 __ dci(0x45573c84); // uabdlt z4.h, z4.b, z23.b
2878 // vl128 state = 0x33413c6f
2879 __ dci(0x45571d8c); // usublt z12.h, z12.b, z23.b
2880 // vl128 state = 0xb95ffb7e
2881 __ dci(0x45971d8e); // usublt z14.s, z12.h, z23.h
2882 // vl128 state = 0xac4d0015
2883 __ dci(0x45d7158c); // ssublt z12.d, z12.s, z23.s
2884 // vl128 state = 0xe5341703
2885 __ dci(0x4557119c); // ssublb z28.h, z12.b, z23.b
2886 // vl128 state = 0x744f8598
2887 __ dci(0x45d5118c); // ssublb z12.d, z12.s, z21.s
2888 // vl128 state = 0x120c8bf7
2889 __ dci(0x45551088); // ssublb z8.h, z4.b, z21.b
2890 // vl128 state = 0xbf53c9ed
2891 __ dci(0x455410cc); // ssublb z12.h, z6.b, z20.b
2892 // vl128 state = 0x2642a908
2893 __ dci(0x454414c8); // ssublt z8.h, z6.b, z4.b
2894 // vl128 state = 0x0682c7d0
2895 __ dci(0x454510c9); // ssublb z9.h, z6.b, z5.b
2896 // vl128 state = 0x1966420e
2897 __ dci(0x455510ed); // ssublb z13.h, z7.b, z21.b
2898 // vl128 state = 0xdd0ec707
2899 __ dci(0x455508ef); // uaddlb z15.h, z7.b, z21.b
2900 // vl128 state = 0x0756dbf9
2901 __ dci(0x455502e7); // saddlb z7.h, z23.b, z21.b
2902 // vl128 state = 0xb991e688
2903 __ dci(0x455d06f7); // saddlt z23.h, z23.b, z29.b
2904 // vl128 state = 0x55399de0
2905 __ dci(0x455f06df); // saddlt z31.h, z22.b, z31.b
2906 // vl128 state = 0x3379dce4
2907 __ dci(0x45de06db); // saddlt z27.d, z22.s, z30.s
2908 // vl128 state = 0xebf6b857
2909 __ dci(0x45c606da); // saddlt z26.d, z22.s, z6.s
2910 // vl128 state = 0x7625ec15
2911 __ dci(0x45c306db); // saddlt z27.d, z22.s, z3.s
2912 // vl128 state = 0x549988fd
2913 __ dci(0x455306d3); // saddlt z19.h, z22.b, z19.b
2914 // vl128 state = 0xb645cb0f
2915 __ dci(0x455306d1); // saddlt z17.h, z22.b, z19.b
2916 // vl128 state = 0x20a70427
2917 __ dci(0x455306d3); // saddlt z19.h, z22.b, z19.b
2918 // vl128 state = 0xd263ec78
2919 __ dci(0x45510edb); // uaddlt z27.h, z22.b, z17.b
2920 // vl128 state = 0xeecd9b44
2921 __ dci(0x45510bdf); // uaddlb z31.h, z30.b, z17.b
2922 // vl128 state = 0x0577c3d4
2923 __ dci(0x45d10b4f); // uaddlb z15.d, z26.s, z17.s
2924 // vl128 state = 0xca18b475
2925 __ dci(0x45810b47); // uaddlb z7.s, z26.h, z1.h
2926 // vl128 state = 0xdfe68417
2927 __ dci(0x45811bc3); // usublb z3.s, z30.h, z1.h
2928 // vl128 state = 0x96fe0360
2929 __ dci(0x45891b82); // usublb z2.s, z28.h, z9.h
2930 // vl128 state = 0x7e58a9d5
2931 __ dci(0x4589398a); // uabdlb z10.s, z12.h, z9.h
2932 // vl128 state = 0xd7612435
2933 __ dci(0x458919ab); // usublb z11.s, z13.h, z9.h
2934 // vl128 state = 0x8842dbca
2935 __ dci(0x45cb19af); // usublb z15.d, z13.s, z11.s
2936 // vl128 state = 0xfcac3d0f
2937 __ dci(0x45cb19bf); // usublb z31.d, z13.s, z11.s
2938 // vl128 state = 0x7b4952d6
2939 __ dci(0x45cb190f); // usublb z15.d, z8.s, z11.s
2940 // vl128 state = 0xb41cb8a3
2941 __ dci(0x45cb1d8d); // usublt z13.d, z12.s, z11.s
2942 // vl128 state = 0x9197543e
2943 __ dci(0x45cb1d89); // usublt z9.d, z12.s, z11.s
2944 // vl128 state = 0x3cc7e16c
2945 __ dci(0x454b0d8b); // uaddlt z11.h, z12.b, z11.b
2946 // vl128 state = 0x5c52744d
2947 __ dci(0x45cb1d8a); // usublt z10.d, z12.s, z11.s
2948 // vl128 state = 0x24c91c53
2949 __ dci(0x454f1d8e); // usublt z14.h, z12.b, z15.b
2950 // vl128 state = 0x0091f2f1
2951 __ dci(0x455b1d8f); // usublt z15.h, z12.b, z27.b
2952 // vl128 state = 0x521f94f7
2953 __ dci(0x455a1c87); // usublt z7.h, z4.b, z26.b
2954 // vl128 state = 0xa0631870
2955 __ dci(0x454a1cb7); // usublt z23.h, z5.b, z10.b
2956 // vl128 state = 0x089384c7
2957 __ dci(0x454218a7); // usublb z7.h, z5.b, z2.b
2958 // vl128 state = 0xe8c3c063
2959 __ dci(0x454a19a6); // usublb z6.h, z13.b, z10.b
2960 // vl128 state = 0x7a9f53ab
2961 __ dci(0x454a3da2); // uabdlt z2.h, z13.b, z10.b
2962 // vl128 state = 0x68d5f375
2963 __ dci(0x45423ca6); // uabdlt z6.h, z5.b, z2.b
2964 // vl128 state = 0x2c980ff7
2965 __ dci(0x454a34a7); // sabdlt z7.h, z5.b, z10.b
2966 // vl128 state = 0xe38196aa
2967 __ dci(0x454a3466); // sabdlt z6.h, z3.b, z10.b
2968 // vl128 state = 0x86c5bcb2
2969 __ dci(0x454b146e); // ssublt z14.h, z3.b, z11.b
2970 // vl128 state = 0xf8527375
2971 __ dci(0x454b146a); // ssublt z10.h, z3.b, z11.b
2972 // vl128 state = 0xf4bfb710
2973 __ dci(0x454b147a); // ssublt z26.h, z3.b, z11.b
2974 // vl128 state = 0xe1000ccf
2975 }
2976
2977 uint32_t state;
2978 ComputeMachineStateHash(&masm, &state);
2979 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
2980 __ Ldr(w0, MemOperand(x0));
2981
2982 END();
2983 if (CAN_RUN()) {
2984 RUN();
2985 uint32_t expected_hashes[] = {
2986 0xe1000ccf,
2987 0xd320fd27,
2988 0x356a62d9,
2989 0xc6245994,
2990 0x78aeec8a,
2991 0xb5d0402b,
2992 0x06684b9e,
2993 0x6033f51d,
2994 0xd174ee86,
2995 0x80baaecc,
2996 0x2c9b263c,
2997 0x3fba551a,
2998 0x489fb8b7,
2999 0x862c9b27,
3000 0xc0549096,
3001 0xa927d570,
3002 };
3003 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
3004 }
3005 }
3006
TEST_SVE(sve2_arith_wide)3007 TEST_SVE(sve2_arith_wide) {
3008 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
3009 CPUFeatures::kSVE2,
3010 CPUFeatures::kNEON,
3011 CPUFeatures::kCRC32);
3012 START();
3013
3014 SetInitialMachineState(&masm);
3015 // state = 0xe2bd2480
3016
3017 {
3018 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
3019 __ dci(0x45494683); // saddwt z3.h, z20.h, z9.b
3020 // vl128 state = 0x9a3fc71a
3021 __ dci(0x45494687); // saddwt z7.h, z20.h, z9.b
3022 // vl128 state = 0xb016cb2f
3023 __ dci(0x454b46d7); // saddwt z23.h, z22.h, z11.b
3024 // vl128 state = 0x5ce3d8a0
3025 __ dci(0x455b56d5); // ssubwt z21.h, z22.h, z27.b
3026 // vl128 state = 0xbace5453
3027 __ dci(0x455b567d); // ssubwt z29.h, z19.h, z27.b
3028 // vl128 state = 0x1f510928
3029 __ dci(0x455b506d); // ssubwb z13.h, z3.h, z27.b
3030 // vl128 state = 0x19ea553e
3031 __ dci(0x4559502f); // ssubwb z15.h, z1.h, z25.b
3032 // vl128 state = 0x4d88e5db
3033 __ dci(0x45d95427); // ssubwt z7.d, z1.d, z25.s
3034 // vl128 state = 0x069804b6
3035 __ dci(0x45d95426); // ssubwt z6.d, z1.d, z25.s
3036 // vl128 state = 0xfe46cf10
3037 __ dci(0x45db5c36); // usubwt z22.d, z1.d, z27.s
3038 // vl128 state = 0xad3c8120
3039 __ dci(0x45d95d37); // usubwt z23.d, z9.d, z25.s
3040 // vl128 state = 0x833d76fb
3041 __ dci(0x45d55d27); // usubwt z7.d, z9.d, z21.s
3042 // vl128 state = 0xc536845d
3043 __ dci(0x45d44d25); // uaddwt z5.d, z9.d, z20.s
3044 // vl128 state = 0x21f5a29c
3045 __ dci(0x45dc4927); // uaddwb z7.d, z9.d, z28.s
3046 // vl128 state = 0xfe67da2a
3047 __ dci(0x455c490f); // uaddwb z15.h, z8.h, z28.b
3048 // vl128 state = 0x5ec5d506
3049 __ dci(0x455c490b); // uaddwb z11.h, z8.h, z28.b
3050 // vl128 state = 0x74b7d2fc
3051 __ dci(0x45584923); // uaddwb z3.h, z9.h, z24.b
3052 // vl128 state = 0xa785f3c3
3053 __ dci(0x45584922); // uaddwb z2.h, z9.h, z24.b
3054 // vl128 state = 0x373049c0
3055 __ dci(0x45584940); // uaddwb z0.h, z10.h, z24.b
3056 // vl128 state = 0xbf385483
3057 __ dci(0x45da4944); // uaddwb z4.d, z10.d, z26.s
3058 // vl128 state = 0x94cd3b86
3059 __ dci(0x45524945); // uaddwb z5.h, z10.h, z18.b
3060 // vl128 state = 0x8535094f
3061 __ dci(0x4540494d); // uaddwb z13.h, z10.h, z0.b
3062 // vl128 state = 0x328abbdb
3063 __ dci(0x45c04909); // uaddwb z9.d, z8.d, z0.s
3064 // vl128 state = 0x253064cb
3065 __ dci(0x45c8498d); // uaddwb z13.d, z12.d, z8.s
3066 // vl128 state = 0xa1b39fe0
3067 __ dci(0x45c0418f); // saddwb z15.d, z12.d, z0.s
3068 // vl128 state = 0xa72048d9
3069 __ dci(0x45d84187); // saddwb z7.d, z12.d, z24.s
3070 // vl128 state = 0x4c8a23ac
3071 __ dci(0x45dc5197); // ssubwb z23.d, z12.d, z28.s
3072 // vl128 state = 0x352a3d60
3073 __ dci(0x45dc5d93); // usubwt z19.d, z12.d, z28.s
3074 // vl128 state = 0x404b9e8b
3075 __ dci(0x45dd5592); // ssubwt z18.d, z12.d, z29.s
3076 // vl128 state = 0xf46cc758
3077 __ dci(0x45dd5550); // ssubwt z16.d, z10.d, z29.s
3078 // vl128 state = 0x171ebd36
3079 __ dci(0x45cd55d4); // ssubwt z20.d, z14.d, z13.s
3080 // vl128 state = 0x4f2ef46f
3081 __ dci(0x45dd5dd5); // usubwt z21.d, z14.d, z29.s
3082 // vl128 state = 0x0c9ab301
3083 __ dci(0x45dd5dc5); // usubwt z5.d, z14.d, z29.s
3084 // vl128 state = 0x67a10e22
3085 __ dci(0x454d5dd5); // usubwt z21.h, z14.h, z13.b
3086 // vl128 state = 0xb4bd21c0
3087 __ dci(0x454d4dfd); // uaddwt z29.h, z15.h, z13.b
3088 // vl128 state = 0x8df5f90f
3089 __ dci(0x45494fed); // uaddwt z13.h, z31.h, z9.b
3090 // vl128 state = 0x913f7aa4
3091 __ dci(0x45cb4fef); // uaddwt z15.d, z31.d, z11.s
3092 // vl128 state = 0xa23d1307
3093 __ dci(0x454b47ff); // saddwt z31.h, z31.h, z11.b
3094 // vl128 state = 0x026ff306
3095 __ dci(0x454747f7); // saddwt z23.h, z31.h, z7.b
3096 // vl128 state = 0x9abf0566
3097 __ dci(0x45c743f6); // saddwb z22.d, z31.d, z7.s
3098 // vl128 state = 0x27031d0e
3099 __ dci(0x45c74b66); // uaddwb z6.d, z27.d, z7.s
3100 // vl128 state = 0xc6f3a976
3101 __ dci(0x45474be4); // uaddwb z4.h, z31.h, z7.b
3102 // vl128 state = 0xededea24
3103 __ dci(0x454349e0); // uaddwb z0.h, z15.h, z3.b
3104 // vl128 state = 0xf1092d40
3105 __ dci(0x454359c1); // usubwb z1.h, z14.h, z3.b
3106 // vl128 state = 0x2d96f026
3107 __ dci(0x45535983); // usubwb z3.h, z12.h, z19.b
3108 // vl128 state = 0x5a9cab0c
3109 __ dci(0x45535981); // usubwb z1.h, z12.h, z19.b
3110 // vl128 state = 0x7f8d695f
3111 __ dci(0x45535a83); // usubwb z3.h, z20.h, z19.b
3112 // vl128 state = 0xb0ae0f62
3113 __ dci(0x45d35e81); // usubwt z1.d, z20.d, z19.s
3114 // vl128 state = 0xfe7e227b
3115 __ dci(0x45d25ec9); // usubwt z9.d, z22.d, z18.s
3116 // vl128 state = 0xed9dd734
3117 __ dci(0x45d35e88); // usubwt z8.d, z20.d, z19.s
3118 // vl128 state = 0x943f8d24
3119 }
3120
3121 uint32_t state;
3122 ComputeMachineStateHash(&masm, &state);
3123 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
3124 __ Ldr(w0, MemOperand(x0));
3125
3126 END();
3127 if (CAN_RUN()) {
3128 RUN();
3129 uint32_t expected_hashes[] = {
3130 0x943f8d24,
3131 0xfe956248,
3132 0xfefddb40,
3133 0x4d92bfb3,
3134 0x01dcd5b1,
3135 0x29a23c92,
3136 0xb7587530,
3137 0xa56fa28c,
3138 0xa0f8590d,
3139 0xa6b883a4,
3140 0x2e50d1fd,
3141 0x8e976f55,
3142 0xb21bd3b1,
3143 0x0c3586e5,
3144 0xe3d7e7e6,
3145 0xb1e0e34f,
3146 };
3147 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
3148 }
3149 }
3150
TEST_SVE(sve2_shift_long)3151 TEST_SVE(sve2_shift_long) {
3152 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
3153 CPUFeatures::kSVE2,
3154 CPUFeatures::kNEON,
3155 CPUFeatures::kCRC32);
3156 START();
3157
3158 SetInitialMachineState(&masm);
3159 // state = 0xe2bd2480
3160
3161 {
3162 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
3163 __ dci(0x4518aafc); // ushllb z28.s, z23.h, #8
3164 // vl128 state = 0x07dfb216
3165 __ dci(0x4518afec); // ushllt z12.s, z31.h, #8
3166 // vl128 state = 0xe3c5d68c
3167 __ dci(0x4518adc4); // ushllt z4.s, z14.h, #8
3168 // vl128 state = 0xce8721fc
3169 __ dci(0x4518a1c5); // sshllb z5.s, z14.h, #8
3170 // vl128 state = 0x71820bae
3171 __ dci(0x4508a9cd); // ushllb z13.h, z14.b, #0
3172 // vl128 state = 0xfdc3f7b3
3173 __ dci(0x4508ad9d); // ushllt z29.h, z12.b, #0
3174 // vl128 state = 0x93c1f606
3175 __ dci(0x4508a795); // sshllt z21.h, z28.b, #0
3176 // vl128 state = 0x15ebcb72
3177 __ dci(0x450caf94); // ushllt z20.h, z28.b, #4
3178 // vl128 state = 0x76c630f5
3179 __ dci(0x4508afd6); // ushllt z22.h, z30.b, #0
3180 // vl128 state = 0xa9c6dfbc
3181 __ dci(0x4509aed7); // ushllt z23.h, z22.b, #1
3182 // vl128 state = 0xa5942073
3183 __ dci(0x4508ae55); // ushllt z21.h, z18.b, #0
3184 // vl128 state = 0xe4348777
3185 __ dci(0x450cac51); // ushllt z17.h, z2.b, #4
3186 // vl128 state = 0x91c6e6ea
3187 __ dci(0x450ca870); // ushllb z16.h, z3.b, #4
3188 // vl128 state = 0x40393ae8
3189 __ dci(0x450ca031); // sshllb z17.h, z1.b, #4
3190 // vl128 state = 0x8b9526e8
3191 __ dci(0x450aa030); // sshllb z16.h, z1.b, #2
3192 // vl128 state = 0xd3d0857a
3193 __ dci(0x450aa031); // sshllb z17.h, z1.b, #2
3194 // vl128 state = 0xbdd18de2
3195 __ dci(0x450ba233); // sshllb z19.h, z17.b, #3
3196 // vl128 state = 0x5e5f6f2a
3197 __ dci(0x4509a263); // sshllb z3.h, z19.b, #1
3198 // vl128 state = 0xa3b5427b
3199 __ dci(0x450da673); // sshllt z19.h, z19.b, #5
3200 // vl128 state = 0x97472b22
3201 __ dci(0x451da477); // sshllt z23.s, z3.h, #13
3202 // vl128 state = 0xe6da4012
3203 __ dci(0x451da5f6); // sshllt z22.s, z15.h, #13
3204 // vl128 state = 0x11630552
3205 __ dci(0x450da5b4); // sshllt z20.h, z13.b, #5
3206 // vl128 state = 0xe9a4cad0
3207 __ dci(0x450da5d5); // sshllt z21.h, z14.b, #5
3208 // vl128 state = 0x750d4143
3209 __ dci(0x450fa4d7); // sshllt z23.h, z6.b, #7
3210 // vl128 state = 0xc441984c
3211 __ dci(0x451ba4df); // sshllt z31.s, z6.h, #11
3212 // vl128 state = 0x9a3899af
3213 __ dci(0x451ba4db); // sshllt z27.s, z6.h, #11
3214 // vl128 state = 0xbb6684bb
3215 __ dci(0x451ba4bf); // sshllt z31.s, z5.h, #11
3216 // vl128 state = 0x45a2cf1e
3217 __ dci(0x451aa49b); // sshllt z27.s, z4.h, #10
3218 // vl128 state = 0xac10df2f
3219 __ dci(0x451aa49f); // sshllt z31.s, z4.h, #10
3220 // vl128 state = 0x9cecdbd8
3221 __ dci(0x451aa89b); // ushllb z27.s, z4.h, #10
3222 // vl128 state = 0x73fca806
3223 __ dci(0x4518aa9f); // ushllb z31.s, z20.h, #8
3224 // vl128 state = 0xf58883fb
3225 __ dci(0x451aaab7); // ushllb z23.s, z21.h, #10
3226 // vl128 state = 0xf9476b16
3227 __ dci(0x4508aaa7); // ushllb z7.h, z21.b, #0
3228 // vl128 state = 0x6f65ea0e
3229 __ dci(0x4508ae2f); // ushllt z15.h, z17.b, #0
3230 // vl128 state = 0x574341e2
3231 __ dci(0x4509ac27); // ushllt z7.h, z1.b, #1
3232 // vl128 state = 0xe373d23c
3233 __ dci(0x450dae25); // ushllt z5.h, z17.b, #5
3234 // vl128 state = 0xc6ad882b
3235 __ dci(0x4509aea7); // ushllt z7.h, z21.b, #1
3236 // vl128 state = 0xfce8617d
3237 __ dci(0x4509adb7); // ushllt z23.h, z13.b, #1
3238 // vl128 state = 0x30f63baf
3239 __ dci(0x4549ade7); // ushllt z7.d, z15.s, #9
3240 // vl128 state = 0x20522e02
3241 __ dci(0x4549adf7); // ushllt z23.d, z15.s, #9
3242 // vl128 state = 0x18c6aade
3243 __ dci(0x4548aff6); // ushllt z22.d, z31.s, #8
3244 // vl128 state = 0x3ad49ec9
3245 __ dci(0x4548affe); // ushllt z30.d, z31.s, #8
3246 // vl128 state = 0x828be22f
3247 __ dci(0x4548adda); // ushllt z26.d, z14.s, #8
3248 // vl128 state = 0xb4997aa9
3249 __ dci(0x4544add2); // ushllt z18.d, z14.s, #4
3250 // vl128 state = 0x6e7feb55
3251 __ dci(0x454cad42); // ushllt z2.d, z10.s, #12
3252 // vl128 state = 0xb8ff410d
3253 __ dci(0x450dad40); // ushllt z0.h, z10.b, #5
3254 // vl128 state = 0x806bb38f
3255 __ dci(0x4515ad50); // ushllt z16.s, z10.h, #5
3256 // vl128 state = 0x6bd247ad
3257 __ dci(0x4557ad51); // ushllt z17.d, z10.s, #23
3258 // vl128 state = 0xc0959f27
3259 __ dci(0x4557ad41); // ushllt z1.d, z10.s, #23
3260 // vl128 state = 0xf0176482
3261 __ dci(0x4557ad40); // ushllt z0.d, z10.s, #23
3262 // vl128 state = 0xd5c958bf
3263 }
3264
3265 uint32_t state;
3266 ComputeMachineStateHash(&masm, &state);
3267 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
3268 __ Ldr(w0, MemOperand(x0));
3269
3270 END();
3271 if (CAN_RUN()) {
3272 RUN();
3273 uint32_t expected_hashes[] = {
3274 0xd5c958bf,
3275 0xb7546431,
3276 0xee4f6b9f,
3277 0x74f31aeb,
3278 0x98282a7a,
3279 0xf2423509,
3280 0xe3ae7c5c,
3281 0xe544e7ba,
3282 0x7d52fba5,
3283 0x1520b68d,
3284 0xee539501,
3285 0x1a65ba45,
3286 0x0d4c2383,
3287 0x9f4a30c5,
3288 0xca6662a2,
3289 0x64dc5f23,
3290 };
3291 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
3292 }
3293 }
3294
TEST_SVE(sve2_shift_narrow)3295 TEST_SVE(sve2_shift_narrow) {
3296 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
3297 CPUFeatures::kSVE2,
3298 CPUFeatures::kNEON,
3299 CPUFeatures::kCRC32);
3300 START();
3301
3302 SetInitialMachineState(&masm);
3303 // state = 0xe2bd2480
3304
3305 {
3306 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
3307 __ dci(0x456b1458); // shrnt z24.s, z2.d, #21
3308 // vl128 state = 0x70323182
3309 __ dci(0x456b145c); // shrnt z28.s, z2.d, #21
3310 // vl128 state = 0x1d620da3
3311 __ dci(0x45291454); // shrnt z20.b, z2.h, #7
3312 // vl128 state = 0x8e6d3a55
3313 __ dci(0x4539141c); // shrnt z28.h, z0.s, #7
3314 // vl128 state = 0xbc19c1cc
3315 __ dci(0x453914b8); // shrnt z24.h, z5.s, #7
3316 // vl128 state = 0x0bd4d1e8
3317 __ dci(0x453b14f9); // shrnt z25.h, z7.s, #5
3318 // vl128 state = 0x15622295
3319 __ dci(0x453315fd); // shrnt z29.h, z15.s, #13
3320 // vl128 state = 0x45bf3b94
3321 __ dci(0x45331d75); // rshrnt z21.h, z11.s, #13
3322 // vl128 state = 0xbb3574e6
3323 __ dci(0x45331945); // rshrnb z5.h, z10.s, #13
3324 // vl128 state = 0x7b72be5f
3325 __ dci(0x45331941); // rshrnb z1.h, z10.s, #13
3326 // vl128 state = 0x073cdf1a
3327 __ dci(0x45331949); // rshrnb z9.h, z10.s, #13
3328 // vl128 state = 0x3ecd1bf9
3329 __ dci(0x453b1979); // rshrnb z25.h, z11.s, #5
3330 // vl128 state = 0x19f7734e
3331 __ dci(0x453b11f1); // shrnb z17.h, z15.s, #5
3332 // vl128 state = 0x47a3f036
3333 __ dci(0x453711f9); // shrnb z25.h, z15.s, #9
3334 // vl128 state = 0xff283fe4
3335 __ dci(0x453315f8); // shrnt z24.h, z15.s, #13
3336 // vl128 state = 0x1c19f8fb
3337 __ dci(0x453319f0); // rshrnb z16.h, z15.s, #13
3338 // vl128 state = 0x3be08052
3339 __ dci(0x453b1972); // rshrnb z18.h, z11.s, #5
3340 // vl128 state = 0xc5ae76a0
3341 __ dci(0x453b1962); // rshrnb z2.h, z11.s, #5
3342 // vl128 state = 0x75ec3872
3343 __ dci(0x453b1c60); // rshrnt z0.h, z3.s, #5
3344 // vl128 state = 0x9b372229
3345 __ dci(0x45331c44); // rshrnt z4.h, z2.s, #13
3346 // vl128 state = 0xe4e22904
3347 __ dci(0x45371c0c); // rshrnt z12.h, z0.s, #9
3348 // vl128 state = 0x12bc6f4b
3349 __ dci(0x45331d08); // rshrnt z8.h, z8.s, #13
3350 // vl128 state = 0x3ef95245
3351 __ dci(0x45331c98); // rshrnt z24.h, z4.s, #13
3352 // vl128 state = 0x0a4a0d68
3353 __ dci(0x45731e99); // rshrnt z25.s, z20.d, #13
3354 // vl128 state = 0xa01ca6c8
3355 __ dci(0x457b1a98); // rshrnb z24.s, z20.d, #5
3356 // vl128 state = 0x73a50e30
3357 __ dci(0x452b1a9c); // rshrnb z28.b, z20.h, #5
3358 // vl128 state = 0xbad3deda
3359 __ dci(0x452b1818); // rshrnb z24.b, z0.h, #5
3360 // vl128 state = 0x579b3c8f
3361 __ dci(0x452b181a); // rshrnb z26.b, z0.h, #5
3362 // vl128 state = 0xa2b0bf7c
3363 __ dci(0x452b181b); // rshrnb z27.b, z0.h, #5
3364 // vl128 state = 0x7bebdf9e
3365 __ dci(0x45291a1a); // rshrnb z26.b, z16.h, #7
3366 // vl128 state = 0x3f90e1b7
3367 __ dci(0x45681a12); // rshrnb z18.s, z16.d, #24
3368 // vl128 state = 0x57e6295e
3369 __ dci(0x45681290); // shrnb z16.s, z20.d, #24
3370 // vl128 state = 0xa53f48b5
3371 __ dci(0x45281091); // shrnb z17.b, z4.h, #8
3372 // vl128 state = 0x65179ab4
3373 __ dci(0x45281401); // shrnt z1.b, z0.h, #8
3374 // vl128 state = 0x3cc490ba
3375 __ dci(0x45281c83); // rshrnt z3.b, z4.h, #8
3376 // vl128 state = 0x3bc34e69
3377 __ dci(0x45281c93); // rshrnt z19.b, z4.h, #8
3378 // vl128 state = 0x6dded0bb
3379 __ dci(0x45681cb7); // rshrnt z23.s, z5.d, #24
3380 // vl128 state = 0x378f83c0
3381 __ dci(0x45291cb6); // rshrnt z22.b, z5.h, #7
3382 // vl128 state = 0x7e4d1c44
3383 __ dci(0x45391eb2); // rshrnt z18.h, z21.s, #7
3384 // vl128 state = 0x66c0b784
3385 __ dci(0x45281ea2); // rshrnt z2.b, z21.h, #8
3386 // vl128 state = 0x62df2c82
3387 __ dci(0x452c1fa0); // rshrnt z0.b, z29.h, #4
3388 // vl128 state = 0xd79ee307
3389 __ dci(0x456c1ba2); // rshrnb z2.s, z29.d, #20
3390 // vl128 state = 0x8ebb2251
3391 __ dci(0x45641ab2); // rshrnb z18.s, z21.d, #28
3392 // vl128 state = 0x77ec053a
3393 __ dci(0x456c12ba); // shrnb z26.s, z21.d, #20
3394 // vl128 state = 0xcf94b608
3395 __ dci(0x452812b8); // shrnb z24.b, z21.h, #8
3396 // vl128 state = 0x3e067a62
3397 __ dci(0x4568123a); // shrnb z26.s, z17.d, #24
3398 // vl128 state = 0xe451de0f
3399 __ dci(0x456c1338); // shrnb z24.s, z25.d, #20
3400 // vl128 state = 0x4042d707
3401 __ dci(0x456813b9); // shrnb z25.s, z29.d, #24
3402 // vl128 state = 0x5184a2aa
3403 __ dci(0x456812e9); // shrnb z9.s, z23.d, #24
3404 // vl128 state = 0x246344b8
3405 __ dci(0x456812e1); // shrnb z1.s, z23.d, #24
3406 // vl128 state = 0x76866e79
3407 }
3408
3409 uint32_t state;
3410 ComputeMachineStateHash(&masm, &state);
3411 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
3412 __ Ldr(w0, MemOperand(x0));
3413
3414 END();
3415 if (CAN_RUN()) {
3416 RUN();
3417 uint32_t expected_hashes[] = {
3418 0x76866e79,
3419 0x42b52927,
3420 0x84a0bfcc,
3421 0xf8226fc2,
3422 0x444f6df5,
3423 0x2f8dcd68,
3424 0x5a48278a,
3425 0x1cdd7f2f,
3426 0x7816d36c,
3427 0xebae972f,
3428 0xa02adfbe,
3429 0xc93cde0f,
3430 0xce43287b,
3431 0x777d6ce0,
3432 0x9d3be904,
3433 0x3e059dd2,
3434 };
3435 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
3436 }
3437 }
3438
TEST_SVE(sve2_shift_narrow_usat)3439 TEST_SVE(sve2_shift_narrow_usat) {
3440 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
3441 CPUFeatures::kSVE2,
3442 CPUFeatures::kNEON,
3443 CPUFeatures::kCRC32);
3444 START();
3445
3446 SetInitialMachineState(&masm);
3447 // state = 0xe2bd2480
3448
3449 {
3450 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
3451 __ dci(0x457a3207); // uqshrnb z7.s, z16.d, #6
3452 // vl128 state = 0x4b40d14e
3453 __ dci(0x457a3206); // uqshrnb z6.s, z16.d, #6
3454 // vl128 state = 0x4dbc0377
3455 __ dci(0x457a3204); // uqshrnb z4.s, z16.d, #6
3456 // vl128 state = 0xa6fbc7f9
3457 __ dci(0x457e3a14); // uqrshrnb z20.s, z16.d, #2
3458 // vl128 state = 0x9e9414a9
3459 __ dci(0x457b3a15); // uqrshrnb z21.s, z16.d, #5
3460 // vl128 state = 0xe8824afd
3461 __ dci(0x457b3ab7); // uqrshrnb z23.s, z21.d, #5
3462 // vl128 state = 0x81ce1be6
3463 __ dci(0x457b3ab6); // uqrshrnb z22.s, z21.d, #5
3464 // vl128 state = 0x5e343a1e
3465 __ dci(0x457f3af7); // uqrshrnb z23.s, z23.d, #1
3466 // vl128 state = 0x09a5c3a0
3467 __ dci(0x457b38ff); // uqrshrnb z31.s, z7.d, #5
3468 // vl128 state = 0xb50710bf
3469 __ dci(0x453338fe); // uqrshrnb z30.h, z7.s, #13
3470 // vl128 state = 0xfc719c85
3471 __ dci(0x453338ee); // uqrshrnb z14.h, z7.s, #13
3472 // vl128 state = 0x157d826a
3473 __ dci(0x453b386a); // uqrshrnb z10.h, z3.s, #5
3474 // vl128 state = 0x9c735771
3475 __ dci(0x452f386e); // uqrshrnb z14.b, z3.h, #1
3476 // vl128 state = 0xe03bb4a4
3477 __ dci(0x452f3aea); // uqrshrnb z10.b, z23.h, #1
3478 // vl128 state = 0xa841b415
3479 __ dci(0x452f38ba); // uqrshrnb z26.b, z5.h, #1
3480 // vl128 state = 0x55302a6d
3481 __ dci(0x452f3878); // uqrshrnb z24.b, z3.h, #1
3482 // vl128 state = 0x73bee182
3483 __ dci(0x453f385c); // uqrshrnb z28.h, z2.s, #1
3484 // vl128 state = 0x75f81ccc
3485 __ dci(0x453f397d); // uqrshrnb z29.h, z11.s, #1
3486 // vl128 state = 0x856fecc9
3487 __ dci(0x457d397c); // uqrshrnb z28.s, z11.d, #3
3488 // vl128 state = 0x4b144bf2
3489 __ dci(0x457f3878); // uqrshrnb z24.s, z3.d, #1
3490 // vl128 state = 0x7ea5dad3
3491 __ dci(0x457b3c7a); // uqrshrnt z26.s, z3.d, #5
3492 // vl128 state = 0xa7d48543
3493 __ dci(0x45633c72); // uqrshrnt z18.s, z3.d, #29
3494 // vl128 state = 0x18f647a7
3495 __ dci(0x45613d76); // uqrshrnt z22.s, z11.d, #31
3496 // vl128 state = 0x96d4081b
3497 __ dci(0x45693972); // uqrshrnb z18.s, z11.d, #23
3498 // vl128 state = 0xa8369e83
3499 __ dci(0x45693d53); // uqrshrnt z19.s, z10.d, #23
3500 // vl128 state = 0x7553ff55
3501 __ dci(0x45713d51); // uqrshrnt z17.s, z10.d, #15
3502 // vl128 state = 0x52a52ecc
3503 __ dci(0x45713d99); // uqrshrnt z25.s, z12.d, #15
3504 // vl128 state = 0x4de78f7b
3505 __ dci(0x45753f9d); // uqrshrnt z29.s, z28.d, #11
3506 // vl128 state = 0x0f8948cd
3507 __ dci(0x45753f8d); // uqrshrnt z13.s, z28.d, #11
3508 // vl128 state = 0x7f2c1b05
3509 __ dci(0x45753685); // uqshrnt z5.s, z20.d, #11
3510 // vl128 state = 0xbe6f6ea9
3511 __ dci(0x457d3784); // uqshrnt z4.s, z28.d, #3
3512 // vl128 state = 0x716e1acd
3513 __ dci(0x453c3785); // uqshrnt z5.h, z28.s, #4
3514 // vl128 state = 0x828a3cbb
3515 __ dci(0x453837a4); // uqshrnt z4.h, z29.s, #8
3516 // vl128 state = 0x125ddc3c
3517 __ dci(0x457a37a6); // uqshrnt z6.s, z29.d, #6
3518 // vl128 state = 0x8c5c5d4c
3519 __ dci(0x453a37e4); // uqshrnt z4.h, z31.s, #6
3520 // vl128 state = 0xdea9801f
3521 __ dci(0x453f37ec); // uqshrnt z12.h, z31.s, #1
3522 // vl128 state = 0x6caa6537
3523 __ dci(0x457f37dc); // uqshrnt z28.s, z30.d, #1
3524 // vl128 state = 0x66c0c05d
3525 __ dci(0x45773fde); // uqrshrnt z30.s, z30.d, #9
3526 // vl128 state = 0xf8d495e2
3527 __ dci(0x45653fda); // uqrshrnt z26.s, z30.d, #27
3528 // vl128 state = 0xb543c017
3529 __ dci(0x45613ffb); // uqrshrnt z27.s, z31.d, #31
3530 // vl128 state = 0x58a69fb4
3531 __ dci(0x45613feb); // uqrshrnt z11.s, z31.d, #31
3532 // vl128 state = 0xb5a04d48
3533 __ dci(0x45653fca); // uqrshrnt z10.s, z30.d, #27
3534 // vl128 state = 0xd2d445e0
3535 __ dci(0x45753fe8); // uqrshrnt z8.s, z31.d, #11
3536 // vl128 state = 0x67d89d28
3537 __ dci(0x457537ca); // uqshrnt z10.s, z30.d, #11
3538 // vl128 state = 0xcaa2b6dc
3539 __ dci(0x457d35ce); // uqshrnt z14.s, z14.d, #3
3540 // vl128 state = 0x9da6b10f
3541 __ dci(0x452d35de); // uqshrnt z30.b, z14.h, #3
3542 // vl128 state = 0xda8663db
3543 __ dci(0x452d314e); // uqshrnb z14.b, z10.h, #3
3544 // vl128 state = 0x761992a9
3545 __ dci(0x453d304f); // uqshrnb z15.h, z2.s, #3
3546 // vl128 state = 0x71587e6a
3547 __ dci(0x453d386e); // uqrshrnb z14.h, z3.s, #3
3548 // vl128 state = 0xc6118398
3549 __ dci(0x453538ec); // uqrshrnb z12.h, z7.s, #11
3550 // vl128 state = 0x5e542c3a
3551 }
3552
3553 uint32_t state;
3554 ComputeMachineStateHash(&masm, &state);
3555 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
3556 __ Ldr(w0, MemOperand(x0));
3557
3558 END();
3559 if (CAN_RUN()) {
3560 RUN();
3561 uint32_t expected_hashes[] = {
3562 0x5e542c3a,
3563 0xd9128c5a,
3564 0x73f430ed,
3565 0x160c07da,
3566 0x7bff9561,
3567 0x4b2d6335,
3568 0x3738197c,
3569 0x2b624a48,
3570 0xbb257999,
3571 0x0d5d8614,
3572 0xb031d1fc,
3573 0x60f2fce2,
3574 0x92770ad6,
3575 0x6e33aa78,
3576 0x8752089b,
3577 0x37b56a40,
3578 };
3579 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
3580 }
3581 }
3582
TEST_SVE(sve2_shift_narrow_ssat)3583 TEST_SVE(sve2_shift_narrow_ssat) {
3584 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
3585 CPUFeatures::kSVE2,
3586 CPUFeatures::kNEON,
3587 CPUFeatures::kCRC32);
3588 START();
3589
3590 SetInitialMachineState(&masm);
3591 // state = 0xe2bd2480
3592
3593 {
3594 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
3595 __ dci(0x456c0875); // sqrshrunb z21.s, z3.d, #20
3596 // vl128 state = 0x1446427d
3597 __ dci(0x456c0877); // sqrshrunb z23.s, z3.d, #20
3598 // vl128 state = 0xd839ea94
3599 __ dci(0x456c0876); // sqrshrunb z22.s, z3.d, #20
3600 // vl128 state = 0xe4dd3104
3601 __ dci(0x456e0c77); // sqrshrunt z23.s, z3.d, #18
3602 // vl128 state = 0xd86dd8aa
3603 __ dci(0x456e0a73); // sqrshrunb z19.s, z19.d, #18
3604 // vl128 state = 0x7aacf973
3605 __ dci(0x456c0e72); // sqrshrunt z18.s, z19.d, #20
3606 // vl128 state = 0x6e7b28b8
3607 __ dci(0x456c2c62); // sqrshrnt z2.s, z3.d, #20
3608 // vl128 state = 0x242e0a5e
3609 __ dci(0x456c24f2); // sqshrnt z18.s, z7.d, #20
3610 // vl128 state = 0xf9c993ec
3611 __ dci(0x456c2570); // sqshrnt z16.s, z11.d, #20
3612 // vl128 state = 0x087c4fc1
3613 __ dci(0x456e2478); // sqshrnt z24.s, z3.d, #18
3614 // vl128 state = 0x33fdae0c
3615 __ dci(0x456e2c30); // sqrshrnt z16.s, z1.d, #18
3616 // vl128 state = 0x0c957ea2
3617 __ dci(0x456e2d78); // sqrshrnt z24.s, z11.d, #18
3618 // vl128 state = 0x0792e58a
3619 __ dci(0x456f2970); // sqrshrnb z16.s, z11.d, #17
3620 // vl128 state = 0xe7169693
3621 __ dci(0x456b2938); // sqrshrnb z24.s, z9.d, #21
3622 // vl128 state = 0x1372a92d
3623 __ dci(0x45692979); // sqrshrnb z25.s, z11.d, #23
3624 // vl128 state = 0xc1c31387
3625 __ dci(0x4563297d); // sqrshrnb z29.s, z11.d, #29
3626 // vl128 state = 0x50a08538
3627 __ dci(0x45632975); // sqrshrnb z21.s, z11.d, #29
3628 // vl128 state = 0xda962f25
3629 __ dci(0x456309f1); // sqrshrunb z17.s, z15.d, #29
3630 // vl128 state = 0xe149814e
3631 __ dci(0x457308f3); // sqrshrunb z19.s, z7.d, #13
3632 // vl128 state = 0x6d5ea38b
3633 __ dci(0x457329fb); // sqrshrnb z27.s, z15.d, #13
3634 // vl128 state = 0xee932acb
3635 __ dci(0x457721f3); // sqshrnb z19.s, z15.d, #9
3636 // vl128 state = 0x7e05914b
3637 __ dci(0x45732171); // sqshrnb z17.s, z11.d, #13
3638 // vl128 state = 0xe4bf82a4
3639 __ dci(0x45722070); // sqshrnb z16.s, z3.d, #14
3640 // vl128 state = 0xdfc01530
3641 __ dci(0x456a2078); // sqshrnb z24.s, z3.d, #22
3642 // vl128 state = 0x6b48fc15
3643 __ dci(0x452a287c); // sqrshrnb z28.b, z3.h, #6
3644 // vl128 state = 0x45e86048
3645 __ dci(0x45282c78); // sqrshrnt z24.b, z3.h, #8
3646 // vl128 state = 0xb8dc83dd
3647 __ dci(0x45602c68); // sqrshrnt z8.s, z3.d, #32
3648 // vl128 state = 0xda536cf8
3649 __ dci(0x45602678); // sqshrnt z24.s, z19.d, #32
3650 // vl128 state = 0xb548f79b
3651 __ dci(0x45682e70); // sqrshrnt z16.s, z19.d, #24
3652 // vl128 state = 0xd564dd2d
3653 __ dci(0x45682260); // sqshrnb z0.s, z19.d, #24
3654 // vl128 state = 0x7b901f9b
3655 __ dci(0x45682642); // sqshrnt z2.s, z18.d, #24
3656 // vl128 state = 0x1d4fe6f4
3657 __ dci(0x45680606); // sqshrunt z6.s, z16.d, #24
3658 // vl128 state = 0xe82d65a2
3659 __ dci(0x45680282); // sqshrunb z2.s, z20.d, #24
3660 // vl128 state = 0x8a1ae6f6
3661 __ dci(0x45680283); // sqshrunb z3.s, z20.d, #24
3662 // vl128 state = 0x5e345dcf
3663 __ dci(0x4568238b); // sqshrnb z11.s, z28.d, #24
3664 // vl128 state = 0x31f54470
3665 __ dci(0x45682383); // sqshrnb z3.s, z28.d, #24
3666 // vl128 state = 0x6b48975d
3667 __ dci(0x45682682); // sqshrnt z2.s, z20.d, #24
3668 // vl128 state = 0xa9fba153
3669 __ dci(0x45782e8a); // sqrshrnt z10.s, z20.d, #8
3670 // vl128 state = 0x0fe3100f
3671 __ dci(0x45780eba); // sqrshrunt z26.s, z21.d, #8
3672 // vl128 state = 0x1a392151
3673 __ dci(0x45700e32); // sqrshrunt z18.s, z17.d, #16
3674 // vl128 state = 0x08cea935
3675 __ dci(0x45700e42); // sqrshrunt z2.s, z18.d, #16
3676 // vl128 state = 0x353f24b1
3677 __ dci(0x45782e52); // sqrshrnt z18.s, z18.d, #8
3678 // vl128 state = 0xe06219d0
3679 __ dci(0x45782e42); // sqrshrnt z2.s, z18.d, #8
3680 // vl128 state = 0xbb4c6d3b
3681 __ dci(0x45742e46); // sqrshrnt z6.s, z18.d, #12
3682 // vl128 state = 0x77e7393c
3683 __ dci(0x45642ec7); // sqrshrnt z7.s, z22.d, #28
3684 // vl128 state = 0x5201634c
3685 __ dci(0x45642a97); // sqrshrnb z23.s, z20.d, #28
3686 // vl128 state = 0x49c32fc1
3687 __ dci(0x45640b87); // sqrshrunb z7.s, z28.d, #28
3688 // vl128 state = 0xdd09d56d
3689 __ dci(0x45640f0f); // sqrshrunt z15.s, z24.d, #28
3690 // vl128 state = 0x50f7d144
3691 __ dci(0x45600e0e); // sqrshrunt z14.s, z16.d, #32
3692 // vl128 state = 0xd6bbd38a
3693 __ dci(0x45620a0f); // sqrshrunb z15.s, z16.d, #30
3694 // vl128 state = 0x141e2991
3695 }
3696
3697 uint32_t state;
3698 ComputeMachineStateHash(&masm, &state);
3699 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
3700 __ Ldr(w0, MemOperand(x0));
3701
3702 END();
3703 if (CAN_RUN()) {
3704 RUN();
3705 uint32_t expected_hashes[] = {
3706 0x141e2991,
3707 0x8cb951d0,
3708 0x74337526,
3709 0x515534c6,
3710 0xe3789189,
3711 0xfee7d505,
3712 0xfaae7ee8,
3713 0x71a110a3,
3714 0x6469dcda,
3715 0xe61425fc,
3716 0x6840f618,
3717 0xbc1b116d,
3718 0xaad97378,
3719 0x5d91b661,
3720 0x9eb84163,
3721 0xf8ca1e37,
3722 };
3723 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
3724 }
3725 }
3726
TEST_SVE(sve2_aba_long)3727 TEST_SVE(sve2_aba_long) {
3728 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
3729 CPUFeatures::kSVE2,
3730 CPUFeatures::kNEON,
3731 CPUFeatures::kCRC32);
3732 START();
3733
3734 SetInitialMachineState(&masm);
3735 // state = 0xe2bd2480
3736
3737 {
3738 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
3739 __ dci(0x45c2ca3e); // uabalb z30.d, z17.s, z2.s
3740 // vl128 state = 0xac47a81c
3741 __ dci(0x45caca7f); // uabalb z31.d, z19.s, z10.s
3742 // vl128 state = 0x10cd4e69
3743 __ dci(0x455aca7e); // uabalb z30.h, z19.b, z26.b
3744 // vl128 state = 0x8fba3755
3745 __ dci(0x45daca5f); // uabalb z31.d, z18.s, z26.s
3746 // vl128 state = 0x8c18257c
3747 __ dci(0x45d8ca1d); // uabalb z29.d, z16.s, z24.s
3748 // vl128 state = 0xe6eef5ec
3749 __ dci(0x45d8ce95); // uabalt z21.d, z20.s, z24.s
3750 // vl128 state = 0x2368baee
3751 __ dci(0x4598ce14); // uabalt z20.s, z16.h, z24.h
3752 // vl128 state = 0xc9281174
3753 __ dci(0x4598ce04); // uabalt z4.s, z16.h, z24.h
3754 // vl128 state = 0xa0b5fc24
3755 __ dci(0x45d8ce40); // uabalt z0.d, z18.s, z24.s
3756 // vl128 state = 0xb3ef6f1d
3757 __ dci(0x45daca44); // uabalb z4.d, z18.s, z26.s
3758 // vl128 state = 0xcfa3666b
3759 __ dci(0x45dace00); // uabalt z0.d, z16.s, z26.s
3760 // vl128 state = 0x27bb4ba9
3761 __ dci(0x459ece04); // uabalt z4.s, z16.h, z30.h
3762 // vl128 state = 0xb6628d3e
3763 __ dci(0x458ece80); // uabalt z0.s, z20.h, z14.h
3764 // vl128 state = 0xe8db526e
3765 __ dci(0x458ec482); // sabalt z2.s, z4.h, z14.h
3766 // vl128 state = 0x73cd8386
3767 __ dci(0x45cec4a3); // sabalt z3.d, z5.s, z14.s
3768 // vl128 state = 0xba1c4507
3769 __ dci(0x45cec8a1); // uabalb z1.d, z5.s, z14.s
3770 // vl128 state = 0x851cd798
3771 __ dci(0x458ec0a9); // sabalb z9.s, z5.h, z14.h
3772 // vl128 state = 0xc85973b8
3773 __ dci(0x45c6c0ab); // sabalb z11.d, z5.s, z6.s
3774 // vl128 state = 0x84072419
3775 __ dci(0x4544c0a9); // sabalb z9.h, z5.b, z4.b
3776 // vl128 state = 0x533a377a
3777 __ dci(0x4550c0a1); // sabalb z1.h, z5.b, z16.b
3778 // vl128 state = 0x5a216f3a
3779 __ dci(0x4550c0b1); // sabalb z17.h, z5.b, z16.b
3780 // vl128 state = 0x9957b992
3781 __ dci(0x4552c095); // sabalb z21.h, z4.b, z18.b
3782 // vl128 state = 0x666bd8db
3783 __ dci(0x4543c094); // sabalb z20.h, z4.b, z3.b
3784 // vl128 state = 0xd66d3d52
3785 __ dci(0x4543c095); // sabalb z21.h, z4.b, z3.b
3786 // vl128 state = 0x5d47b643
3787 __ dci(0x4543c385); // sabalb z5.h, z28.b, z3.b
3788 // vl128 state = 0x55fc0a65
3789 __ dci(0x4543c38d); // sabalb z13.h, z28.b, z3.b
3790 // vl128 state = 0xbb5ccc0f
3791 __ dci(0x45c3c19d); // sabalb z29.d, z12.s, z3.s
3792 // vl128 state = 0xb3dedffd
3793 __ dci(0x45d3c595); // sabalt z21.d, z12.s, z19.s
3794 // vl128 state = 0xd80597a1
3795 __ dci(0x45d2c185); // sabalb z5.d, z12.s, z18.s
3796 // vl128 state = 0x29a9fafc
3797 __ dci(0x45d2c0b5); // sabalb z21.d, z5.s, z18.s
3798 // vl128 state = 0x85dc16cb
3799 __ dci(0x45d2c0bd); // sabalb z29.d, z5.s, z18.s
3800 // vl128 state = 0xc38b621d
3801 __ dci(0x45d2cab9); // uabalb z25.d, z21.s, z18.s
3802 // vl128 state = 0x3801ad51
3803 __ dci(0x45d0ca9b); // uabalb z27.d, z20.s, z16.s
3804 // vl128 state = 0xd5cc0a31
3805 __ dci(0x45d0ca39); // uabalb z25.d, z17.s, z16.s
3806 // vl128 state = 0x272488a9
3807 __ dci(0x45d0ca3d); // uabalb z29.d, z17.s, z16.s
3808 // vl128 state = 0xea109c4b
3809 __ dci(0x4550ce3c); // uabalt z28.h, z17.b, z16.b
3810 // vl128 state = 0x5a9bdb39
3811 __ dci(0x4559ce38); // uabalt z24.h, z17.b, z25.b
3812 // vl128 state = 0xd90984c9
3813 __ dci(0x455bcf39); // uabalt z25.h, z25.b, z27.b
3814 // vl128 state = 0x6c0884ed
3815 __ dci(0x455bceb1); // uabalt z17.h, z21.b, z27.b
3816 // vl128 state = 0x2f01a6ad
3817 __ dci(0x455bceb3); // uabalt z19.h, z21.b, z27.b
3818 // vl128 state = 0x72a428e1
3819 __ dci(0x455bceb1); // uabalt z17.h, z21.b, z27.b
3820 // vl128 state = 0x27adcf54
3821 __ dci(0x4559ce21); // uabalt z1.h, z17.b, z25.b
3822 // vl128 state = 0xf1899dea
3823 __ dci(0x45d9ce05); // uabalt z5.d, z16.s, z25.s
3824 // vl128 state = 0x41e92a5c
3825 __ dci(0x45dbc604); // sabalt z4.d, z16.s, z27.s
3826 // vl128 state = 0x96021962
3827 __ dci(0x45d3c634); // sabalt z20.d, z17.s, z19.s
3828 // vl128 state = 0x4795c9e2
3829 __ dci(0x45dbc235); // sabalb z21.d, z17.s, z27.s
3830 // vl128 state = 0x6e2eccdb
3831 __ dci(0x45dbc07d); // sabalb z29.d, z3.s, z27.s
3832 // vl128 state = 0x2c2e3625
3833 __ dci(0x459bc87c); // uabalb z28.s, z3.h, z27.h
3834 // vl128 state = 0x618669ad
3835 __ dci(0x459bc878); // uabalb z24.s, z3.h, z27.h
3836 // vl128 state = 0x2d1a9a08
3837 __ dci(0x4593cc79); // uabalt z25.s, z3.h, z19.h
3838 // vl128 state = 0xdb6575df
3839 }
3840
3841 uint32_t state;
3842 ComputeMachineStateHash(&masm, &state);
3843 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
3844 __ Ldr(w0, MemOperand(x0));
3845
3846 END();
3847 if (CAN_RUN()) {
3848 RUN();
3849 uint32_t expected_hashes[] = {
3850 0xdb6575df,
3851 0x691c09fc,
3852 0x6d969d30,
3853 0x83db67a7,
3854 0x8ca1109d,
3855 0x5175b8ff,
3856 0xade3cb1b,
3857 0x1c7b0422,
3858 0x1199a415,
3859 0xd1c715e8,
3860 0x2053b361,
3861 0x577c4450,
3862 0x1557204a,
3863 0xe994b21a,
3864 0xec34be56,
3865 0x1c9e0136,
3866 };
3867 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
3868 }
3869 }
3870
TEST_SVE(sve2_add_sub_carry)3871 TEST_SVE(sve2_add_sub_carry) {
3872 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
3873 CPUFeatures::kSVE2,
3874 CPUFeatures::kNEON,
3875 CPUFeatures::kCRC32);
3876 START();
3877
3878 SetInitialMachineState(&masm);
3879 // state = 0xe2bd2480
3880
3881 {
3882 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
3883 __ dci(0x4548d4a1); // adclt z1.d, z5.d, z8.d
3884 // vl128 state = 0xde78ceb3
3885 __ dci(0x4588d4a5); // sbclt z5.s, z5.s, z8.s
3886 // vl128 state = 0x35dc8534
3887 __ dci(0x4589d421); // sbclt z1.s, z1.s, z9.s
3888 // vl128 state = 0xa72d158b
3889 __ dci(0x45d9d423); // sbclt z3.d, z1.d, z25.d
3890 // vl128 state = 0x197181b9
3891 __ dci(0x45dfd433); // sbclt z19.d, z1.d, z31.d
3892 // vl128 state = 0xaad0d32d
3893 __ dci(0x4597d437); // sbclt z23.s, z1.s, z23.s
3894 // vl128 state = 0xb1c42b7d
3895 __ dci(0x4597d436); // sbclt z22.s, z1.s, z23.s
3896 // vl128 state = 0x6c51a28c
3897 __ dci(0x4587d537); // sbclt z23.s, z9.s, z7.s
3898 // vl128 state = 0x525b5cf8
3899 __ dci(0x4586d727); // sbclt z7.s, z25.s, z6.s
3900 // vl128 state = 0x33942ff9
3901 __ dci(0x45c6d625); // sbclt z5.d, z17.d, z6.d
3902 // vl128 state = 0x24de09b4
3903 __ dci(0x45c2d6b5); // sbclt z21.d, z21.d, z2.d
3904 // vl128 state = 0xabc0063f
3905 __ dci(0x4546d6b7); // adclt z23.d, z21.d, z6.d
3906 // vl128 state = 0x52765e95
3907 __ dci(0x45c7d6a7); // sbclt z7.d, z21.d, z7.d
3908 // vl128 state = 0x7045d250
3909 __ dci(0x4547d4a5); // adclt z5.d, z5.d, z7.d
3910 // vl128 state = 0xb20f5c2a
3911 __ dci(0x4517d4a1); // adclt z1.s, z5.s, z23.s
3912 // vl128 state = 0x5c2c9c29
3913 __ dci(0x4507d5a5); // adclt z5.s, z13.s, z7.s
3914 // vl128 state = 0x788b25f0
3915 __ dci(0x4507d5ad); // adclt z13.s, z13.s, z7.s
3916 // vl128 state = 0xf27eff1e
3917 __ dci(0x4507d0ac); // adclb z12.s, z5.s, z7.s
3918 // vl128 state = 0xc0b629de
3919 __ dci(0x450ed0ad); // adclb z13.s, z5.s, z14.s
3920 // vl128 state = 0x3e15df94
3921 __ dci(0x458ad0a9); // sbclb z9.s, z5.s, z10.s
3922 // vl128 state = 0x68f64c82
3923 __ dci(0x4582d2ad); // sbclb z13.s, z21.s, z2.s
3924 // vl128 state = 0x882379e1
3925 __ dci(0x4502d3af); // adclb z15.s, z29.s, z2.s
3926 // vl128 state = 0x6901994e
3927 __ dci(0x450ad32b); // adclb z11.s, z25.s, z10.s
3928 // vl128 state = 0xa67e9382
3929 __ dci(0x4582d329); // sbclb z9.s, z25.s, z2.s
3930 // vl128 state = 0x9451d0c4
3931 __ dci(0x4592d22b); // sbclb z11.s, z17.s, z18.s
3932 // vl128 state = 0xc19da52e
3933 __ dci(0x459ad2a3); // sbclb z3.s, z21.s, z26.s
3934 // vl128 state = 0x91065b69
3935 __ dci(0x451ad233); // adclb z19.s, z17.s, z26.s
3936 // vl128 state = 0xe3fdc4a5
3937 __ dci(0x450bd232); // adclb z18.s, z17.s, z11.s
3938 // vl128 state = 0x168abbff
3939 __ dci(0x450ad2b6); // adclb z22.s, z21.s, z10.s
3940 // vl128 state = 0x64d0c940
3941 __ dci(0x4582d2b4); // sbclb z20.s, z21.s, z2.s
3942 // vl128 state = 0x37307824
3943 __ dci(0x4582d6e4); // sbclt z4.s, z23.s, z2.s
3944 // vl128 state = 0xd35e02f7
3945 __ dci(0x4500d6f4); // adclt z20.s, z23.s, z0.s
3946 // vl128 state = 0x017ed1b0
3947 __ dci(0x4501d2e4); // adclb z4.s, z23.s, z1.s
3948 // vl128 state = 0x327242bc
3949 __ dci(0x4501d1f4); // adclb z20.s, z15.s, z1.s
3950 // vl128 state = 0x208174e8
3951 __ dci(0x4503d1b0); // adclb z16.s, z13.s, z3.s
3952 // vl128 state = 0xa5a9f61d
3953 __ dci(0x4501d198); // adclb z24.s, z12.s, z1.s
3954 // vl128 state = 0x97e22c2b
3955 __ dci(0x4501d3da); // adclb z26.s, z30.s, z1.s
3956 // vl128 state = 0xd3ac35d5
3957 __ dci(0x4501d6de); // adclt z30.s, z22.s, z1.s
3958 // vl128 state = 0xab835df9
3959 __ dci(0x4503d2dc); // adclb z28.s, z22.s, z3.s
3960 // vl128 state = 0xa048599b
3961 __ dci(0x4502d6d8); // adclt z24.s, z22.s, z2.s
3962 // vl128 state = 0x4c245fee
3963 __ dci(0x4502d6d0); // adclt z16.s, z22.s, z2.s
3964 // vl128 state = 0x0222f3cc
3965 __ dci(0x4502d280); // adclb z0.s, z20.s, z2.s
3966 // vl128 state = 0x16bd7f6a
3967 __ dci(0x458ad284); // sbclb z4.s, z20.s, z10.s
3968 // vl128 state = 0x7ef7d0a2
3969 __ dci(0x458ad6d4); // sbclt z20.s, z22.s, z10.s
3970 // vl128 state = 0x303d8262
3971 __ dci(0x458ad6dc); // sbclt z28.s, z22.s, z10.s
3972 // vl128 state = 0x86b8b0e9
3973 __ dci(0x458bd7cc); // sbclt z12.s, z30.s, z11.s
3974 // vl128 state = 0x068cc5cd
3975 __ dci(0x45dbd7ce); // sbclt z14.d, z30.d, z27.d
3976 // vl128 state = 0x30acfa7f
3977 __ dci(0x45dfd75e); // sbclt z30.d, z26.d, z31.d
3978 // vl128 state = 0xdbd8b32a
3979 __ dci(0x45ddd7ce); // sbclt z14.d, z30.d, z29.d
3980 // vl128 state = 0x59c3c1a9
3981 __ dci(0x45ddd7cf); // sbclt z15.d, z30.d, z29.d
3982 // vl128 state = 0x5c953a50
3983 }
3984
3985 uint32_t state;
3986 ComputeMachineStateHash(&masm, &state);
3987 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
3988 __ Ldr(w0, MemOperand(x0));
3989
3990 END();
3991 if (CAN_RUN()) {
3992 RUN();
3993 uint32_t expected_hashes[] = {
3994 0x5c953a50,
3995 0x22fea196,
3996 0x084c11a8,
3997 0x6e7e24d1,
3998 0x70965ff7,
3999 0x8c7cb797,
4000 0xdb846b66,
4001 0x512f049d,
4002 0x5c45d25c,
4003 0xa349606f,
4004 0x68a853e5,
4005 0xd92fbeff,
4006 0x52e59a6b,
4007 0xf77ee8ce,
4008 0x6c79623b,
4009 0x7efed6cc,
4010 };
4011 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
4012 }
4013 }
4014
TEST_SVE(sve2_add_sub_high)4015 TEST_SVE(sve2_add_sub_high) {
4016 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
4017 CPUFeatures::kSVE2,
4018 CPUFeatures::kNEON,
4019 CPUFeatures::kCRC32);
4020 START();
4021
4022 SetInitialMachineState(&masm);
4023 // state = 0xe2bd2480
4024
4025 {
4026 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
4027 __ dci(0x45fd7464); // subhnt z4.s, z3.d, z29.d
4028 // vl128 state = 0x0eea0f4a
4029 __ dci(0x45fc7c66); // rsubhnt z6.s, z3.d, z28.d
4030 // vl128 state = 0x4dc0d938
4031 __ dci(0x45fc7c6e); // rsubhnt z14.s, z3.d, z28.d
4032 // vl128 state = 0x33de615e
4033 __ dci(0x45f46c7e); // raddhnt z30.s, z3.d, z20.d
4034 // vl128 state = 0xa24af7ae
4035 __ dci(0x45f06e7c); // raddhnt z28.s, z19.d, z16.d
4036 // vl128 state = 0x13883aa2
4037 __ dci(0x45b06a6c); // raddhnb z12.h, z19.s, z16.s
4038 // vl128 state = 0x5bf75f05
4039 __ dci(0x45b96a64); // raddhnb z4.h, z19.s, z25.s
4040 // vl128 state = 0x0e489878
4041 __ dci(0x45b96820); // raddhnb z0.h, z1.s, z25.s
4042 // vl128 state = 0x86df8f5f
4043 __ dci(0x45b96a01); // raddhnb z1.h, z16.s, z25.s
4044 // vl128 state = 0x0d1563f2
4045 __ dci(0x45b96900); // raddhnb z0.h, z8.s, z25.s
4046 // vl128 state = 0xd66de87e
4047 __ dci(0x45a97904); // rsubhnb z4.h, z8.s, z9.s
4048 // vl128 state = 0x0c34bd33
4049 __ dci(0x45a9790c); // rsubhnb z12.h, z8.s, z9.s
4050 // vl128 state = 0x7892f2c5
4051 __ dci(0x45e97988); // rsubhnb z8.s, z12.d, z9.d
4052 // vl128 state = 0x9709efbd
4053 __ dci(0x45f97909); // rsubhnb z9.s, z8.d, z25.d
4054 // vl128 state = 0x029a3116
4055 __ dci(0x45ff790d); // rsubhnb z13.s, z8.d, z31.d
4056 // vl128 state = 0x48cf21c1
4057 __ dci(0x45ff6d05); // raddhnt z5.s, z8.d, z31.d
4058 // vl128 state = 0x44c94a11
4059 __ dci(0x45ff6dc1); // raddhnt z1.s, z14.d, z31.d
4060 // vl128 state = 0x12fab619
4061 __ dci(0x45ff79d1); // rsubhnb z17.s, z14.d, z31.d
4062 // vl128 state = 0x6f749933
4063 __ dci(0x457f7dd0); // rsubhnt z16.b, z14.h, z31.h
4064 // vl128 state = 0x404889de
4065 __ dci(0x457f75f1); // subhnt z17.b, z15.h, z31.h
4066 // vl128 state = 0x1dae2a16
4067 __ dci(0x457f75f3); // subhnt z19.b, z15.h, z31.h
4068 // vl128 state = 0xc441a9f0
4069 __ dci(0x456d75fb); // subhnt z27.b, z15.h, z13.h
4070 // vl128 state = 0xdd79f567
4071 __ dci(0x45ed7dff); // rsubhnt z31.s, z15.d, z13.d
4072 // vl128 state = 0x49b27a1f
4073 __ dci(0x45e17dfe); // rsubhnt z30.s, z15.d, z1.d
4074 // vl128 state = 0x19cddb35
4075 __ dci(0x45e17df6); // rsubhnt z22.s, z15.d, z1.d
4076 // vl128 state = 0xea722faa
4077 __ dci(0x45e37d72); // rsubhnt z18.s, z11.d, z3.d
4078 // vl128 state = 0x907267b3
4079 __ dci(0x45737d62); // rsubhnt z2.b, z11.h, z19.h
4080 // vl128 state = 0x1e5409d8
4081 __ dci(0x45726d6a); // raddhnt z10.b, z11.h, z18.h
4082 // vl128 state = 0xce3b87ca
4083 __ dci(0x45726f5a); // raddhnt z26.b, z26.h, z18.h
4084 // vl128 state = 0x2f330789
4085 __ dci(0x45706f18); // raddhnt z24.b, z24.h, z16.h
4086 // vl128 state = 0xff09606a
4087 __ dci(0x45706f08); // raddhnt z8.b, z24.h, z16.h
4088 // vl128 state = 0x062ac37b
4089 __ dci(0x45706f09); // raddhnt z9.b, z24.h, z16.h
4090 // vl128 state = 0xb12c9142
4091 __ dci(0x45786b08); // raddhnb z8.b, z24.h, z24.h
4092 // vl128 state = 0x77e41545
4093 __ dci(0x45786b0c); // raddhnb z12.b, z24.h, z24.h
4094 // vl128 state = 0x1f3a202d
4095 __ dci(0x457a6308); // addhnb z8.b, z24.h, z26.h
4096 // vl128 state = 0xea51f4b9
4097 __ dci(0x45fb6318); // addhnb z24.s, z24.d, z27.d
4098 // vl128 state = 0x5b98747e
4099 __ dci(0x45b96319); // addhnb z25.h, z24.s, z25.s
4100 // vl128 state = 0xdcebf700
4101 __ dci(0x45bb621d); // addhnb z29.h, z16.s, z27.s
4102 // vl128 state = 0x55a216b1
4103 __ dci(0x45b3625f); // addhnb z31.h, z18.s, z19.s
4104 // vl128 state = 0x3e86d641
4105 __ dci(0x45b3631b); // addhnb z27.h, z24.s, z19.s
4106 // vl128 state = 0x36d052e3
4107 __ dci(0x45bb6213); // addhnb z19.h, z16.s, z27.s
4108 // vl128 state = 0xba012cb8
4109 __ dci(0x45bf7217); // subhnb z23.h, z16.s, z31.s
4110 // vl128 state = 0xdef826a7
4111 __ dci(0x45b67213); // subhnb z19.h, z16.s, z22.s
4112 // vl128 state = 0x5cd11781
4113 __ dci(0x45b66223); // addhnb z3.h, z17.s, z22.s
4114 // vl128 state = 0x2f04c440
4115 __ dci(0x45f66a27); // raddhnb z7.s, z17.d, z22.d
4116 // vl128 state = 0x486d0d03
4117 __ dci(0x45f76825); // raddhnb z5.s, z1.d, z23.d
4118 // vl128 state = 0x8a94d5c9
4119 __ dci(0x45f668a1); // raddhnb z1.s, z5.d, z22.d
4120 // vl128 state = 0x14e8e0e7
4121 __ dci(0x45f469b1); // raddhnb z17.s, z13.d, z20.d
4122 // vl128 state = 0x19b96fb3
4123 __ dci(0x45f469b3); // raddhnb z19.s, z13.d, z20.d
4124 // vl128 state = 0xc98e7d4e
4125 __ dci(0x45f169b7); // raddhnb z23.s, z13.d, z17.d
4126 // vl128 state = 0x7ff24d47
4127 }
4128
4129 uint32_t state;
4130 ComputeMachineStateHash(&masm, &state);
4131 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
4132 __ Ldr(w0, MemOperand(x0));
4133
4134 END();
4135 if (CAN_RUN()) {
4136 RUN();
4137 uint32_t expected_hashes[] = {
4138 0x7ff24d47,
4139 0xc639a9b3,
4140 0x0a1df4a5,
4141 0x30db6e18,
4142 0xf3e2f795,
4143 0x36ff477d,
4144 0x162f1ca5,
4145 0x36da990b,
4146 0x110b2c35,
4147 0xaf1580f5,
4148 0x14e39873,
4149 0x7f5eb52c,
4150 0x2ececb6f,
4151 0x4e4d71f0,
4152 0x800769d1,
4153 0x1bcbe3a3,
4154 };
4155 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
4156 }
4157 }
4158
TEST_SVE(sve2_complex_addition)4159 TEST_SVE(sve2_complex_addition) {
4160 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
4161 CPUFeatures::kSVE2,
4162 CPUFeatures::kNEON,
4163 CPUFeatures::kCRC32);
4164 START();
4165
4166 SetInitialMachineState(&masm);
4167 // state = 0xe2bd2480
4168
4169 {
4170 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
4171 __ dci(0x4500dc43); // cadd z3.b, z3.b, z2.b, #270
4172 // vl128 state = 0x998365c2
4173 __ dci(0x4540dc13); // cadd z19.h, z19.h, z0.h, #270
4174 // vl128 state = 0xcc866131
4175 __ dci(0x4541d81b); // sqcadd z27.h, z27.h, z0.h, #90
4176 // vl128 state = 0x2ae23a6a
4177 __ dci(0x45c1d853); // sqcadd z19.d, z19.d, z2.d, #90
4178 // vl128 state = 0x1f8de2d3
4179 __ dci(0x4541d8c3); // sqcadd z3.h, z3.h, z6.h, #90
4180 // vl128 state = 0x3655c07c
4181 __ dci(0x4541d8d3); // sqcadd z19.h, z19.h, z6.h, #90
4182 // vl128 state = 0x3a8fe2d9
4183 __ dci(0x4541d811); // sqcadd z17.h, z17.h, z0.h, #90
4184 // vl128 state = 0x003c88ea
4185 __ dci(0x4540da10); // cadd z16.h, z16.h, z16.h, #90
4186 // vl128 state = 0xe20c1375
4187 __ dci(0x4540da18); // cadd z24.h, z24.h, z16.h, #90
4188 // vl128 state = 0x67bb0270
4189 __ dci(0x4540de5a); // cadd z26.h, z26.h, z18.h, #270
4190 // vl128 state = 0x7abb4f8f
4191 __ dci(0x4540de4a); // cadd z10.h, z10.h, z18.h, #270
4192 // vl128 state = 0x42850f11
4193 __ dci(0x4500decb); // cadd z11.b, z11.b, z22.b, #270
4194 // vl128 state = 0xda605f59
4195 __ dci(0x4500da83); // cadd z3.b, z3.b, z20.b, #90
4196 // vl128 state = 0x99e63476
4197 __ dci(0x4500dc8b); // cadd z11.b, z11.b, z4.b, #270
4198 // vl128 state = 0xd444a939
4199 __ dci(0x4500dc8f); // cadd z15.b, z15.b, z4.b, #270
4200 // vl128 state = 0xde3ad968
4201 __ dci(0x4500d99f); // cadd z31.b, z31.b, z12.b, #90
4202 // vl128 state = 0xd7cdb177
4203 __ dci(0x4540d91e); // cadd z30.h, z30.h, z8.h, #90
4204 // vl128 state = 0x74575b36
4205 __ dci(0x4541d81a); // sqcadd z26.h, z26.h, z0.h, #90
4206 // vl128 state = 0x3d347b0b
4207 __ dci(0x4501d83b); // sqcadd z27.b, z27.b, z1.b, #90
4208 // vl128 state = 0x03df7859
4209 __ dci(0x45c1d83f); // sqcadd z31.d, z31.d, z1.d, #90
4210 // vl128 state = 0xf0cdbf68
4211 __ dci(0x45c1d83e); // sqcadd z30.d, z30.d, z1.d, #90
4212 // vl128 state = 0x0931dda4
4213 __ dci(0x45c1d83c); // sqcadd z28.d, z28.d, z1.d, #90
4214 // vl128 state = 0x460b5369
4215 __ dci(0x4581da3e); // sqcadd z30.s, z30.s, z17.s, #90
4216 // vl128 state = 0x71af9203
4217 __ dci(0x45c1d83f); // sqcadd z31.d, z31.d, z1.d, #90
4218 // vl128 state = 0xd6babc53
4219 __ dci(0x4581da3e); // sqcadd z30.s, z30.s, z17.s, #90
4220 // vl128 state = 0xd3e4f42f
4221 __ dci(0x4501d83f); // sqcadd z31.b, z31.b, z1.b, #90
4222 // vl128 state = 0x7a594239
4223 __ dci(0x4501dcbb); // sqcadd z27.b, z27.b, z5.b, #270
4224 // vl128 state = 0x24a5a8c9
4225 __ dci(0x4501dfba); // sqcadd z26.b, z26.b, z29.b, #270
4226 // vl128 state = 0x0c3df842
4227 __ dci(0x4581dfea); // sqcadd z10.s, z10.s, z31.s, #270
4228 // vl128 state = 0x6173c97f
4229 __ dci(0x4581db7a); // sqcadd z26.s, z26.s, z27.s, #90
4230 // vl128 state = 0x55090d5f
4231 __ dci(0x4581db1b); // sqcadd z27.s, z27.s, z24.s, #90
4232 // vl128 state = 0x63477385
4233 __ dci(0x4581da93); // sqcadd z19.s, z19.s, z20.s, #90
4234 // vl128 state = 0xc996545e
4235 __ dci(0x45c1db92); // sqcadd z18.d, z18.d, z28.d, #90
4236 // vl128 state = 0xa48bf827
4237 __ dci(0x45c1db93); // sqcadd z19.d, z19.d, z28.d, #90
4238 // vl128 state = 0xf5a3b641
4239 __ dci(0x45c1daa3); // sqcadd z3.d, z3.d, z21.d, #90
4240 // vl128 state = 0x20ad4c28
4241 __ dci(0x4581dba7); // sqcadd z7.s, z7.s, z29.s, #90
4242 // vl128 state = 0xc9e36e96
4243 __ dci(0x45c1daaf); // sqcadd z15.d, z15.d, z21.d, #90
4244 // vl128 state = 0x6eb23fd2
4245 __ dci(0x45c1daae); // sqcadd z14.d, z14.d, z21.d, #90
4246 // vl128 state = 0x585d4d63
4247 __ dci(0x4541dae6); // sqcadd z6.h, z6.h, z23.h, #90
4248 // vl128 state = 0x827cc0a8
4249 __ dci(0x4541daee); // sqcadd z14.h, z14.h, z23.h, #90
4250 // vl128 state = 0xe00543a0
4251 __ dci(0x4501dabe); // sqcadd z30.b, z30.b, z21.b, #90
4252 // vl128 state = 0x2313db47
4253 __ dci(0x4501deff); // sqcadd z31.b, z31.b, z23.b, #270
4254 // vl128 state = 0xe30d4e83
4255 __ dci(0x4501defd); // sqcadd z29.b, z29.b, z23.b, #270
4256 // vl128 state = 0xb95d6d94
4257 __ dci(0x4501def5); // sqcadd z21.b, z21.b, z23.b, #270
4258 // vl128 state = 0x4f18b02e
4259 __ dci(0x4501def4); // sqcadd z20.b, z20.b, z23.b, #270
4260 // vl128 state = 0x20ae9a78
4261 __ dci(0x4501dee4); // sqcadd z4.b, z4.b, z23.b, #270
4262 // vl128 state = 0x4eef87a9
4263 __ dci(0x4501dee6); // sqcadd z6.b, z6.b, z23.b, #270
4264 // vl128 state = 0x1b041a7b
4265 __ dci(0x4501dfc2); // sqcadd z2.b, z2.b, z30.b, #270
4266 // vl128 state = 0xeaf5e18f
4267 __ dci(0x4500df92); // cadd z18.b, z18.b, z28.b, #270
4268 // vl128 state = 0xc47ee5e7
4269 __ dci(0x4500de13); // cadd z19.b, z19.b, z16.b, #270
4270 // vl128 state = 0x6482d75c
4271 }
4272
4273 uint32_t state;
4274 ComputeMachineStateHash(&masm, &state);
4275 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
4276 __ Ldr(w0, MemOperand(x0));
4277
4278 END();
4279 if (CAN_RUN()) {
4280 RUN();
4281 uint32_t expected_hashes[] = {
4282 0x6482d75c,
4283 0x48d9bd2f,
4284 0xd6bd52ae,
4285 0x56be94f0,
4286 0x620cfb69,
4287 0xb646e0fe,
4288 0x6034718f,
4289 0xd8187657,
4290 0x211218bb,
4291 0xc973a707,
4292 0x6020dcc9,
4293 0x8fadad0c,
4294 0x0132ecbc,
4295 0x3a07eb63,
4296 0x5c20eb82,
4297 0xc92d6cb2,
4298 };
4299 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
4300 }
4301 }
4302
TEST_SVE(sve2_bit_permute)4303 TEST_SVE(sve2_bit_permute) {
4304 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
4305 CPUFeatures::kSVE2,
4306 CPUFeatures::kSVEBitPerm,
4307 CPUFeatures::kNEON,
4308 CPUFeatures::kCRC32);
4309 START();
4310
4311 SetInitialMachineState(&masm);
4312 // state = 0xe2bd2480
4313
4314 {
4315 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
4316 __ dci(0x455fbb1a); // bgrp z26.h, z24.h, z31.h
4317 // vl128 state = 0x39fb8e5b
4318 __ dci(0x451fbb58); // bgrp z24.b, z26.b, z31.b
4319 // vl128 state = 0x7fbccdbd
4320 __ dci(0x4517bb19); // bgrp z25.b, z24.b, z23.b
4321 // vl128 state = 0x67caf176
4322 __ dci(0x4517bb18); // bgrp z24.b, z24.b, z23.b
4323 // vl128 state = 0x665fd977
4324 __ dci(0x4517ba5c); // bgrp z28.b, z18.b, z23.b
4325 // vl128 state = 0x0f2c1473
4326 __ dci(0x4517ba38); // bgrp z24.b, z17.b, z23.b
4327 // vl128 state = 0x253789a0
4328 __ dci(0x4517ba3c); // bgrp z28.b, z17.b, z23.b
4329 // vl128 state = 0xd3b26fd2
4330 __ dci(0x4515ba6c); // bgrp z12.b, z19.b, z21.b
4331 // vl128 state = 0x4bad6941
4332 __ dci(0x4515bac4); // bgrp z4.b, z22.b, z21.b
4333 // vl128 state = 0x7c70d2d2
4334 __ dci(0x4517ba86); // bgrp z6.b, z20.b, z23.b
4335 // vl128 state = 0x5794816b
4336 __ dci(0x4517ba87); // bgrp z7.b, z20.b, z23.b
4337 // vl128 state = 0xe67993b1
4338 __ dci(0x4515b297); // bext z23.b, z20.b, z21.b
4339 // vl128 state = 0x3041b7ee
4340 __ dci(0x4517b396); // bext z22.b, z28.b, z23.b
4341 // vl128 state = 0xb571d524
4342 __ dci(0x451bb386); // bext z6.b, z28.b, z27.b
4343 // vl128 state = 0x73ce1823
4344 __ dci(0x4513b784); // bdep z4.b, z28.b, z19.b
4345 // vl128 state = 0x4264f0f2
4346 __ dci(0x4593b7ac); // bdep z12.s, z29.s, z19.s
4347 // vl128 state = 0xf9cb9d26
4348 __ dci(0x4593b7a8); // bdep z8.s, z29.s, z19.s
4349 // vl128 state = 0xa2b310a0
4350 __ dci(0x4597b780); // bdep z0.s, z28.s, z23.s
4351 // vl128 state = 0xee25c82f
4352 __ dci(0x4597b781); // bdep z1.s, z28.s, z23.s
4353 // vl128 state = 0xdca7577f
4354 __ dci(0x4597b7e3); // bdep z3.s, z31.s, z23.s
4355 // vl128 state = 0x32294429
4356 __ dci(0x45dfb7e1); // bdep z1.d, z31.d, z31.d
4357 // vl128 state = 0xc147e511
4358 __ dci(0x455db7e5); // bdep z5.h, z31.h, z29.h
4359 // vl128 state = 0x7a51d422
4360 __ dci(0x45d5b7e4); // bdep z4.d, z31.d, z21.d
4361 // vl128 state = 0x512ad92a
4362 __ dci(0x45c7b7ec); // bdep z12.d, z31.d, z7.d
4363 // vl128 state = 0xe59fbf5c
4364 __ dci(0x4547b7a8); // bdep z8.h, z29.h, z7.h
4365 // vl128 state = 0xb85fd3b1
4366 __ dci(0x454fb72c); // bdep z12.h, z25.h, z15.h
4367 // vl128 state = 0xc820e9d0
4368 __ dci(0x4557b724); // bdep z4.h, z25.h, z23.h
4369 // vl128 state = 0x814ff3f4
4370 __ dci(0x4557bb20); // bgrp z0.h, z25.h, z23.h
4371 // vl128 state = 0xc58dee50
4372 __ dci(0x4556b321); // bext z1.h, z25.h, z22.h
4373 // vl128 state = 0xf19c0956
4374 __ dci(0x4556b3e3); // bext z3.h, z31.h, z22.h
4375 // vl128 state = 0x2a256808
4376 __ dci(0x4546b367); // bext z7.h, z27.h, z6.h
4377 // vl128 state = 0x1c6696f4
4378 __ dci(0x4556bb66); // bgrp z6.h, z27.h, z22.h
4379 // vl128 state = 0x32522ca2
4380 __ dci(0x4556bb76); // bgrp z22.h, z27.h, z22.h
4381 // vl128 state = 0x33fe6590
4382 __ dci(0x45c6bb66); // bgrp z6.d, z27.d, z6.d
4383 // vl128 state = 0x45d26723
4384 __ dci(0x45c2b976); // bgrp z22.d, z11.d, z2.d
4385 // vl128 state = 0x364d9885
4386 __ dci(0x4540b974); // bgrp z20.h, z11.h, z0.h
4387 // vl128 state = 0x36a0bd94
4388 __ dci(0x45c0b164); // bext z4.d, z11.d, z0.d
4389 // vl128 state = 0x4ee9a90c
4390 __ dci(0x45ccb16c); // bext z12.d, z11.d, z12.d
4391 // vl128 state = 0x30c32d69
4392 __ dci(0x458cb368); // bext z8.s, z27.s, z12.s
4393 // vl128 state = 0xfc2c912f
4394 __ dci(0x450cb769); // bdep z9.b, z27.b, z12.b
4395 // vl128 state = 0xef976b44
4396 __ dci(0x458cb7eb); // bdep z11.s, z31.s, z12.s
4397 // vl128 state = 0x6f9e21b8
4398 __ dci(0x4588b5ef); // bdep z15.s, z15.s, z8.s
4399 // vl128 state = 0xa1f212e2
4400 __ dci(0x4598b5ad); // bdep z13.s, z13.s, z24.s
4401 // vl128 state = 0xe4286a40
4402 __ dci(0x4598b5af); // bdep z15.s, z13.s, z24.s
4403 // vl128 state = 0x7d6622e5
4404 __ dci(0x4598b6ad); // bdep z13.s, z21.s, z24.s
4405 // vl128 state = 0xcd00829c
4406 __ dci(0x4518b2af); // bext z15.b, z21.b, z24.b
4407 // vl128 state = 0xa8d58b2d
4408 __ dci(0x4519b2e7); // bext z7.b, z23.b, z25.b
4409 // vl128 state = 0x2b7b7c44
4410 __ dci(0x4518b2a6); // bext z6.b, z21.b, z24.b
4411 // vl128 state = 0x09c81b7e
4412 __ dci(0x4518b2a7); // bext z7.b, z21.b, z24.b
4413 // vl128 state = 0xab1b2b22
4414 __ dci(0x4519b6a5); // bdep z5.b, z21.b, z25.b
4415 // vl128 state = 0x03476e4c
4416 }
4417
4418 uint32_t state;
4419 ComputeMachineStateHash(&masm, &state);
4420 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
4421 __ Ldr(w0, MemOperand(x0));
4422
4423 END();
4424 if (CAN_RUN()) {
4425 RUN();
4426 uint32_t expected_hashes[] = {
4427 0x03476e4c,
4428 0xcc54e76f,
4429 0x08324d66,
4430 0xcc289ee1,
4431 0xacd3ba43,
4432 0xe961aeda,
4433 0x60a204b1,
4434 0xde020904,
4435 0x0652d1e5,
4436 0x7982dc25,
4437 0x02a2c1cb,
4438 0x4dd9e71b,
4439 0xb57f587f,
4440 0xb75e0d62,
4441 0x78330809,
4442 0xbc7046ae,
4443 };
4444 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
4445 }
4446 }
4447
TEST_SVE(sve2_smullb_smullt_umullb_umullt_vector)4448 TEST_SVE(sve2_smullb_smullt_umullb_umullt_vector) {
4449 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
4450 CPUFeatures::kSVE2,
4451 CPUFeatures::kNEON,
4452 CPUFeatures::kCRC32);
4453 START();
4454
4455 SetInitialMachineState(&masm);
4456 // state = 0xe2bd2480
4457
4458 {
4459 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
4460 __ dci(0x455a7bc2); // umullb z2.h, z30.b, z26.b
4461 // vl128 state = 0xe2a2b611
4462 __ dci(0x454a7b92); // umullb z18.h, z28.b, z10.b
4463 // vl128 state = 0x12b3b0c6
4464 __ dci(0x45427bda); // umullb z26.h, z30.b, z2.b
4465 // vl128 state = 0x74f4a891
4466 __ dci(0x45c67bde); // umullb z30.d, z30.s, z6.s
4467 // vl128 state = 0x20402d9f
4468 __ dci(0x45467b56); // umullb z22.h, z26.b, z6.b
4469 // vl128 state = 0x75e15413
4470 __ dci(0x45427f54); // umullt z20.h, z26.b, z2.b
4471 // vl128 state = 0x51478ee1
4472 __ dci(0x45427fe4); // umullt z4.h, z31.b, z2.b
4473 // vl128 state = 0x63381b63
4474 __ dci(0x45567fe5); // umullt z5.h, z31.b, z22.b
4475 // vl128 state = 0x0967f882
4476 __ dci(0x45467df5); // umullt z21.h, z15.b, z6.b
4477 // vl128 state = 0x753e96b9
4478 __ dci(0x454279f1); // umullb z17.h, z15.b, z2.b
4479 // vl128 state = 0xcff906e6
4480 __ dci(0x454078f5); // umullb z21.h, z7.b, z0.b
4481 // vl128 state = 0x5609bd14
4482 __ dci(0x454070d4); // smullb z20.h, z6.b, z0.b
4483 // vl128 state = 0xf284d300
4484 __ dci(0x45407016); // smullb z22.h, z0.b, z0.b
4485 // vl128 state = 0xbb549bf7
4486 __ dci(0x45487086); // smullb z6.h, z4.b, z8.b
4487 // vl128 state = 0x6ef99ff1
4488 __ dci(0x454070c7); // smullb z7.h, z6.b, z0.b
4489 // vl128 state = 0x90177a84
4490 __ dci(0x45407846); // umullb z6.h, z2.b, z0.b
4491 // vl128 state = 0xd3dbb2fe
4492 __ dci(0x45417a56); // umullb z22.h, z18.b, z1.b
4493 // vl128 state = 0x7d30cf73
4494 __ dci(0x45417877); // umullb z23.h, z3.b, z1.b
4495 // vl128 state = 0x0623e678
4496 __ dci(0x45417807); // umullb z7.h, z0.b, z1.b
4497 // vl128 state = 0xe849cf35
4498 __ dci(0x454178a3); // umullb z3.h, z5.b, z1.b
4499 // vl128 state = 0xcad236a9
4500 __ dci(0x45437cab); // umullt z11.h, z5.b, z3.b
4501 // vl128 state = 0xc8dfcb1d
4502 __ dci(0x454b7c3b); // umullt z27.h, z1.b, z11.b
4503 // vl128 state = 0x6136e2d6
4504 __ dci(0x454b7a3a); // umullb z26.h, z17.b, z11.b
4505 // vl128 state = 0x091beb5a
4506 __ dci(0x454b72b2); // smullb z18.h, z21.b, z11.b
4507 // vl128 state = 0x932b30ec
4508 __ dci(0x454b7622); // smullt z2.h, z17.b, z11.b
4509 // vl128 state = 0xee51239c
4510 __ dci(0x454b76ea); // smullt z10.h, z23.b, z11.b
4511 // vl128 state = 0xf4fcc577
4512 __ dci(0x454b74ab); // smullt z11.h, z5.b, z11.b
4513 // vl128 state = 0xcf0c8028
4514 __ dci(0x454d74bb); // smullt z27.h, z5.b, z13.b
4515 // vl128 state = 0x0f8523c8
4516 __ dci(0x454d740b); // smullt z11.h, z0.b, z13.b
4517 // vl128 state = 0xc02b2f52
4518 __ dci(0x454d7403); // smullt z3.h, z0.b, z13.b
4519 // vl128 state = 0x11b4180c
4520 __ dci(0x45557413); // smullt z19.h, z0.b, z21.b
4521 // vl128 state = 0x26eef57a
4522 __ dci(0x45557531); // smullt z17.h, z9.b, z21.b
4523 // vl128 state = 0x6f3fce98
4524 __ dci(0x455574b9); // smullt z25.h, z5.b, z21.b
4525 // vl128 state = 0x0d4ac272
4526 __ dci(0x455571b1); // smullb z17.h, z13.b, z21.b
4527 // vl128 state = 0x7c866a41
4528 __ dci(0x455573e1); // smullb z1.h, z31.b, z21.b
4529 // vl128 state = 0x9c724758
4530 __ dci(0x455473c9); // smullb z9.h, z30.b, z20.b
4531 // vl128 state = 0xa9a8d0aa
4532 __ dci(0x455473cb); // smullb z11.h, z30.b, z20.b
4533 // vl128 state = 0xd7eec117
4534 __ dci(0x455473a9); // smullb z9.h, z29.b, z20.b
4535 // vl128 state = 0x35caaa62
4536 __ dci(0x455473a8); // smullb z8.h, z29.b, z20.b
4537 // vl128 state = 0x97a1d399
4538 __ dci(0x455473b8); // smullb z24.h, z29.b, z20.b
4539 // vl128 state = 0x3adce4ee
4540 __ dci(0x455673fa); // smullb z26.h, z31.b, z22.b
4541 // vl128 state = 0xd17120ea
4542 __ dci(0x455e77ea); // smullt z10.h, z31.b, z30.b
4543 // vl128 state = 0x1e238a9e
4544 __ dci(0x455677da); // smullt z26.h, z30.b, z22.b
4545 // vl128 state = 0xfbccf6c2
4546 __ dci(0x454673d8); // smullb z24.h, z30.b, z6.b
4547 // vl128 state = 0xa47583be
4548 __ dci(0x45c67359); // smullb z25.d, z26.s, z6.s
4549 // vl128 state = 0x4e8a9b37
4550 __ dci(0x45c47751); // smullt z17.d, z26.s, z4.s
4551 // vl128 state = 0xe3c06571
4552 __ dci(0x45d67741); // smullt z1.d, z26.s, z22.s
4553 // vl128 state = 0x6629e034
4554 __ dci(0x45d67b45); // umullb z5.d, z26.s, z22.s
4555 // vl128 state = 0x66a99e85
4556 __ dci(0x45867b47); // umullb z7.s, z26.h, z6.h
4557 // vl128 state = 0xf1cc3339
4558 __ dci(0x45867b45); // umullb z5.s, z26.h, z6.h
4559 // vl128 state = 0x8bf658d7
4560 }
4561
4562 uint32_t state;
4563 ComputeMachineStateHash(&masm, &state);
4564 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
4565 __ Ldr(w0, MemOperand(x0));
4566
4567 END();
4568 if (CAN_RUN()) {
4569 RUN();
4570 uint32_t expected_hashes[] = {
4571 0x8bf658d7,
4572 0x82fac555,
4573 0x07c3d434,
4574 0x25d2ee2b,
4575 0xe70f4394,
4576 0x79223404,
4577 0x368ed35f,
4578 0x6565d842,
4579 0xead08c30,
4580 0xae35e083,
4581 0xe1959b85,
4582 0x94ad31e7,
4583 0x9caeda4d,
4584 0x7611d6dc,
4585 0x22977911,
4586 0xcf3754ec,
4587 };
4588 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
4589 }
4590 }
4591
TEST_SVE(sve2_sqdmullb_sqdmullt_pmullb_pmullb_vector)4592 TEST_SVE(sve2_sqdmullb_sqdmullt_pmullb_pmullb_vector) {
4593 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
4594 CPUFeatures::kSVE2,
4595 CPUFeatures::kNEON,
4596 CPUFeatures::kCRC32);
4597 START();
4598
4599 SetInitialMachineState(&masm);
4600 // state = 0xe2bd2480
4601
4602 {
4603 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
4604 __ dci(0x45936164); // sqdmullb z4.s, z11.h, z19.h
4605 // vl128 state = 0xacc89592
4606 __ dci(0x459161f4); // sqdmullb z20.s, z15.h, z17.h
4607 // vl128 state = 0x142c66e5
4608 __ dci(0x459563f5); // sqdmullb z21.s, z31.h, z21.h
4609 // vl128 state = 0x5cfcb839
4610 __ dci(0x45956265); // sqdmullb z5.s, z19.h, z21.h
4611 // vl128 state = 0x33616223
4612 __ dci(0x45d56235); // sqdmullb z21.d, z17.s, z21.s
4613 // vl128 state = 0x987a4a0d
4614 __ dci(0x45556031); // sqdmullb z17.h, z1.b, z21.b
4615 // vl128 state = 0xf7dd9b01
4616 __ dci(0x45506035); // sqdmullb z21.h, z1.b, z16.b
4617 // vl128 state = 0x6fa54cf3
4618 __ dci(0x45506334); // sqdmullb z20.h, z25.b, z16.b
4619 // vl128 state = 0x04398c6e
4620 __ dci(0x45486336); // sqdmullb z22.h, z25.b, z8.b
4621 // vl128 state = 0x4cda753c
4622 __ dci(0x45486334); // sqdmullb z20.h, z25.b, z8.b
4623 // vl128 state = 0x53993d4a
4624 __ dci(0x45496b35); // pmullb z21.h, z25.b, z9.b
4625 // vl128 state = 0xa591f97c
4626 __ dci(0x45496b37); // pmullb z23.h, z25.b, z9.b
4627 // vl128 state = 0x5cb91e99
4628 __ dci(0x45496fb3); // pmullt z19.h, z29.b, z9.b
4629 // vl128 state = 0x5031ac4d
4630 __ dci(0x45596f3b); // pmullt z27.h, z25.b, z25.b
4631 // vl128 state = 0xb0a76e75
4632 __ dci(0x455d6f13); // pmullt z19.h, z24.b, z29.b
4633 // vl128 state = 0xe84ca196
4634 __ dci(0x455d6fb2); // pmullt z18.h, z29.b, z29.b
4635 // vl128 state = 0xd294ce54
4636 __ dci(0x455c6bb0); // pmullb z16.h, z29.b, z28.b
4637 // vl128 state = 0x90f01471
4638 __ dci(0x45546bf8); // pmullb z24.h, z31.b, z20.b
4639 // vl128 state = 0xd15f23fa
4640 __ dci(0x45546bf9); // pmullb z25.h, z31.b, z20.b
4641 // vl128 state = 0x62ca83ea
4642 __ dci(0x45546bfb); // pmullb z27.h, z31.b, z20.b
4643 // vl128 state = 0xf786c1e4
4644 __ dci(0x454469eb); // pmullb z11.h, z15.b, z4.b
4645 // vl128 state = 0x3cc8c789
4646 __ dci(0x455069fb); // pmullb z27.h, z15.b, z16.b
4647 // vl128 state = 0xb14709ca
4648 __ dci(0x45546dfa); // pmullt z26.h, z15.b, z20.b
4649 // vl128 state = 0x38257820
4650 __ dci(0x45546df8); // pmullt z24.h, z15.b, z20.b
4651 // vl128 state = 0x9cc5cd3a
4652 __ dci(0x45576dfc); // pmullt z28.h, z15.b, z23.b
4653 // vl128 state = 0x704543ec
4654 __ dci(0x45d76d6c); // pmullt z12.d, z11.s, z23.s
4655 // vl128 state = 0x15ec8e77
4656 __ dci(0x455f6d68); // pmullt z8.h, z11.b, z31.b
4657 // vl128 state = 0xfa379a67
4658 __ dci(0x45596d6a); // pmullt z10.h, z11.b, z25.b
4659 // vl128 state = 0x27fcfa49
4660 __ dci(0x45596d7a); // pmullt z26.h, z11.b, z25.b
4661 // vl128 state = 0x13883ef0
4662 __ dci(0x45596532); // sqdmullt z18.h, z9.b, z25.b
4663 // vl128 state = 0x667f8699
4664 __ dci(0x45596536); // sqdmullt z22.h, z9.b, z25.b
4665 // vl128 state = 0x477ded37
4666 __ dci(0x45d16537); // sqdmullt z23.d, z9.s, z17.s
4667 // vl128 state = 0x3323eb48
4668 __ dci(0x45c16515); // sqdmullt z21.d, z8.s, z1.s
4669 // vl128 state = 0x3f581e83
4670 __ dci(0x45456517); // sqdmullt z23.h, z8.b, z5.b
4671 // vl128 state = 0xd844e48b
4672 __ dci(0x45556555); // sqdmullt z21.h, z10.b, z21.b
4673 // vl128 state = 0x95e6094e
4674 __ dci(0x45c56554); // sqdmullt z20.d, z10.s, z5.s
4675 // vl128 state = 0x198a6f75
4676 __ dci(0x45cd6456); // sqdmullt z22.d, z2.s, z13.s
4677 // vl128 state = 0x4d6b7178
4678 __ dci(0x45c96406); // sqdmullt z6.d, z0.s, z9.s
4679 // vl128 state = 0xd989cd0f
4680 __ dci(0x45d96482); // sqdmullt z2.d, z4.s, z25.s
4681 // vl128 state = 0xa80fdf92
4682 __ dci(0x45dd6406); // sqdmullt z6.d, z0.s, z29.s
4683 // vl128 state = 0x9876a20d
4684 __ dci(0x45596404); // sqdmullt z4.h, z0.b, z25.b
4685 // vl128 state = 0x5ad5787c
4686 __ dci(0x454b6414); // sqdmullt z20.h, z0.b, z11.b
4687 // vl128 state = 0x86c077d7
4688 __ dci(0x454a601c); // sqdmullb z28.h, z0.b, z10.b
4689 // vl128 state = 0xfe867841
4690 __ dci(0x4542641d); // sqdmullt z29.h, z0.b, z2.b
4691 // vl128 state = 0x7bf363f1
4692 __ dci(0x4552643c); // sqdmullt z28.h, z1.b, z18.b
4693 // vl128 state = 0x7cf26ed3
4694 __ dci(0x4552673d); // sqdmullt z29.h, z25.b, z18.b
4695 // vl128 state = 0x748f1a99
4696 __ dci(0x45d6673f); // sqdmullt z31.d, z25.s, z22.s
4697 // vl128 state = 0xbb15fd07
4698 __ dci(0x45d2633d); // sqdmullb z29.d, z25.s, z18.s
4699 // vl128 state = 0x28e0985a
4700 __ dci(0x455a6339); // sqdmullb z25.h, z25.b, z26.b
4701 // vl128 state = 0x9c0da0fd
4702 __ dci(0x45526738); // sqdmullt z24.h, z25.b, z18.b
4703 // vl128 state = 0xa970ebb8
4704 }
4705
4706 uint32_t state;
4707 ComputeMachineStateHash(&masm, &state);
4708 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
4709 __ Ldr(w0, MemOperand(x0));
4710
4711 END();
4712 if (CAN_RUN()) {
4713 RUN();
4714 uint32_t expected_hashes[] = {
4715 0xa970ebb8,
4716 0xc665eff5,
4717 0x8cc21595,
4718 0x0ea984f6,
4719 0x1dbce326,
4720 0x0845e911,
4721 0xa6fb6cf4,
4722 0x8544239a,
4723 0x2412d23d,
4724 0xbce6f5e0,
4725 0x780ff264,
4726 0xcf6cf172,
4727 0xef93a3b4,
4728 0x94080541,
4729 0xa0aedeba,
4730 0x8e8bddaa,
4731 };
4732 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
4733 }
4734 }
4735
TEST_SVE(sve2_sqdmullt_sqdmullb_z_zzi)4736 TEST_SVE(sve2_sqdmullt_sqdmullb_z_zzi) {
4737 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
4738 CPUFeatures::kSVE2,
4739 CPUFeatures::kNEON,
4740 CPUFeatures::kCRC32);
4741 START();
4742
4743 SetInitialMachineState(&masm);
4744 // state = 0xe2bd2480
4745
4746 {
4747 ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
4748 __ dci(0x44eae5a9); // sqdmullt z9.d, z13.s, z10.s[#0]
4749 // vl128 state = 0x311dfe35
4750 __ dci(0x44eae9a1); // sqdmullb z1.d, z13.s, z10.s[#1]
4751 // vl128 state = 0x559243c3
4752 __ dci(0x44eae9a5); // sqdmullb z5.d, z13.s, z10.s[#1]
4753 // vl128 state = 0x44d6824c
4754 __ dci(0x44e2edad); // sqdmullt z13.d, z13.s, z2.s[#1]
4755 // vl128 state = 0xb5539592
4756 __ dci(0x44e6e9ac); // sqdmullb z12.d, z13.s, z6.s[#1]
4757 // vl128 state = 0x5e66b9f8
4758 __ dci(0x44e4ebae); // sqdmullb z14.d, z29.s, z4.s[#1]
4759 // vl128 state = 0x4347620a
4760 __ dci(0x44e4ebaf); // sqdmullb z15.d, z29.s, z4.s[#1]
4761 // vl128 state = 0xe7cfe898
4762 __ dci(0x44a5ebad); // sqdmullb z13.s, z29.h, z5.h[#1]
4763 // vl128 state = 0x0ca455c7
4764 __ dci(0x44a5e9fd); // sqdmullb z29.s, z15.h, z5.h[#1]
4765 // vl128 state = 0xcac072a9
4766 __ dci(0x44e5e8fc); // sqdmullb z28.d, z7.s, z5.s[#1]
4767 // vl128 state = 0xe18e8c66
4768 __ dci(0x44ede9ec); // sqdmullb z12.d, z15.s, z13.s[#1]
4769 // vl128 state = 0x32f642cb
4770 __ dci(0x44ede9fc); // sqdmullb z28.d, z15.s, z13.s[#1]
4771 // vl128 state = 0xa0467c8a
4772 __ dci(0x44fce9f4); // sqdmullb z20.d, z15.s, z12.s[#3]
4773 // vl128 state = 0x7ada4130
4774 __ dci(0x44e4e9f6); // sqdmullb z22.d, z15.s, z4.s[#1]
4775 // vl128 state = 0xc87deb44
4776 __ dci(0x44f4e9d2); // sqdmullb z18.d, z14.s, z4.s[#3]
4777 // vl128 state = 0x6dc052ca
4778 __ dci(0x44f5e9e2); // sqdmullb z2.d, z15.s, z5.s[#3]
4779 // vl128 state = 0xe05110d4
4780 __ dci(0x44f5ebb2); // sqdmullb z18.d, z29.s, z5.s[#3]
4781 // vl128 state = 0x7ed21594
4782 __ dci(0x44b5efba); // sqdmullt z26.s, z29.h, z5.h[#5]
4783 // vl128 state = 0x7d5dad40
4784 __ dci(0x44b5ef78); // sqdmullt z24.s, z27.h, z5.h[#5]
4785 // vl128 state = 0x418f84bc
4786 __ dci(0x44f5eb70); // sqdmullb z16.d, z27.s, z5.s[#3]
4787 // vl128 state = 0x72d78d32
4788 __ dci(0x44e5ebf4); // sqdmullb z20.d, z31.s, z5.s[#1]
4789 // vl128 state = 0x391fad35
4790 __ dci(0x44e5efbc); // sqdmullt z28.d, z29.s, z5.s[#1]
4791 // vl128 state = 0xb2143633
4792 __ dci(0x44e1ebbd); // sqdmullb z29.d, z29.s, z1.s[#1]
4793 // vl128 state = 0x468dac6e
4794 __ dci(0x44f1ebed); // sqdmullb z13.d, z31.s, z1.s[#3]
4795 // vl128 state = 0x9ab292bd
4796 __ dci(0x44f5efe5); // sqdmullt z5.d, z31.s, z5.s[#3]
4797 // vl128 state = 0x4f2bd5d1
4798 __ dci(0x44fdeee7); // sqdmullt z7.d, z23.s, z13.s[#3]
4799 // vl128 state = 0x7a810779
4800 __ dci(0x44fdee25); // sqdmullt z5.d, z17.s, z13.s[#3]
4801 // vl128 state = 0x05d23734
4802 __ dci(0x44f5ea27); // sqdmullb z7.d, z17.s, z5.s[#3]
4803 // vl128 state = 0x878580f5
4804 __ dci(0x44f1e225); // sqdmullb z5.d, z17.s, z1.s[#2]
4805 // vl128 state = 0x5fa56f94
4806 __ dci(0x44e1ea21); // sqdmullb z1.d, z17.s, z1.s[#1]
4807 // vl128 state = 0x05f1cdf0
4808 }
4809
4810 uint32_t state;
4811 ComputeMachineStateHash(&masm, &state);
4812 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
4813 __ Ldr(w0, MemOperand(x0));
4814
4815 END();
4816 if (CAN_RUN()) {
4817 RUN();
4818 uint32_t expected_hashes[] = {
4819 0x05f1cdf0,
4820 0x6b88d4f2,
4821 0x83bf279d,
4822 0x12f21868,
4823 0x6c68a5ce,
4824 0x5710343f,
4825 0xa4d0d0ee,
4826 0x335b20c5,
4827 0x0dd491c5,
4828 0x98966292,
4829 0xb68cdacd,
4830 0xa26f9914,
4831 0x6dd60ced,
4832 0x5cd0d62c,
4833 0xebe3fb25,
4834 0xb264d998,
4835 };
4836 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
4837 }
4838 }
4839
TEST_SVE(sve2_xar)4840 TEST_SVE(sve2_xar) {
4841 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
4842 CPUFeatures::kSVE2,
4843 CPUFeatures::kNEON,
4844 CPUFeatures::kCRC32);
4845 START();
4846
4847 SetInitialMachineState(&masm);
4848 // state = 0xe2bd2480
4849
4850 {
4851 ExactAssemblyScope scope(&masm, 20 * kInstructionSize);
4852 __ dci(0x04293719); // xar z25.b, z25.b, z24.b, #7
4853 // vl128 state = 0x596046c4
4854 __ dci(0x04293531); // xar z17.b, z17.b, z9.b, #7
4855 // vl128 state = 0x38332d55
4856 __ dci(0x04e93533); // xar z19.d, z19.d, z9.d, #23
4857 // vl128 state = 0x535c8af7
4858 __ dci(0x046b3523); // xar z3.s, z3.s, z9.s, #21
4859 // vl128 state = 0x879a489f
4860 __ dci(0x04eb3427); // xar z7.d, z7.d, z1.d, #21
4861 // vl128 state = 0xfbac317f
4862 __ dci(0x04ea3463); // xar z3.d, z3.d, z3.d, #22
4863 // vl128 state = 0xfb44482e
4864 __ dci(0x04fa3447); // xar z7.d, z7.d, z2.d, #6
4865 // vl128 state = 0xa59e324c
4866 __ dci(0x04f8346f); // xar z15.d, z15.d, z3.d, #8
4867 // vl128 state = 0x7f064300
4868 __ dci(0x0479346b); // xar z11.s, z11.s, z3.s, #7
4869 // vl128 state = 0x0c0d3573
4870 __ dci(0x0461346a); // xar z10.s, z10.s, z3.s, #31
4871 // vl128 state = 0x3c61530d
4872 __ dci(0x0464346b); // xar z11.s, z11.s, z3.s, #28
4873 // vl128 state = 0x137c1433
4874 __ dci(0x04643469); // xar z9.s, z9.s, z3.s, #28
4875 // vl128 state = 0x81d55bb1
4876 __ dci(0x0464346b); // xar z11.s, z11.s, z3.s, #28
4877 // vl128 state = 0xad2ac5c0
4878 __ dci(0x0434346a); // xar z10.h, z10.h, z3.h, #12
4879 // vl128 state = 0x2997a1d9
4880 __ dci(0x04b434fa); // xar z26.d, z26.d, z7.d, #44
4881 // vl128 state = 0x715f758d
4882 __ dci(0x04e434f2); // xar z18.d, z18.d, z7.d, #28
4883 // vl128 state = 0x8bfa19ef
4884 __ dci(0x04ec34b3); // xar z19.d, z19.d, z5.d, #20
4885 // vl128 state = 0xa8d646a5
4886 __ dci(0x04ae34b7); // xar z23.d, z23.d, z5.d, #50
4887 // vl128 state = 0xf590c489
4888 __ dci(0x04ae34a7); // xar z7.d, z7.d, z5.d, #50
4889 // vl128 state = 0xd6aafb5e
4890 __ dci(0x04ae3417); // xar z23.d, z23.d, z0.d, #50
4891 // vl128 state = 0xd40a8d1a
4892 }
4893
4894 uint32_t state;
4895 ComputeMachineStateHash(&masm, &state);
4896 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
4897 __ Ldr(w0, MemOperand(x0));
4898
4899 END();
4900 if (CAN_RUN()) {
4901 RUN();
4902 uint32_t expected_hashes[] = {
4903 0xd40a8d1a,
4904 0x834982b0,
4905 0x6fd8c07b,
4906 0x2654e6f3,
4907 0x79fa44fb,
4908 0xc8a60223,
4909 0xd12f35f0,
4910 0x1e0a3315,
4911 0x6970dcd2,
4912 0x62305aed,
4913 0xb9846a55,
4914 0x1147e436,
4915 0x97a8ceaa,
4916 0xe8f80c0e,
4917 0xea3ab3e7,
4918 0xb2abd654,
4919 };
4920 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
4921 }
4922 }
4923
TEST_SVE(sve2_histcnt)4924 TEST_SVE(sve2_histcnt) {
4925 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
4926 CPUFeatures::kSVE2,
4927 CPUFeatures::kNEON,
4928 CPUFeatures::kCRC32);
4929 START();
4930
4931 SetInitialMachineState(&masm);
4932 // state = 0xe2bd2480
4933
4934 {
4935 ExactAssemblyScope scope(&masm, 100 * kInstructionSize);
4936 __ dci(0x45e8c2f9); // histcnt z25.d, p0/z, z23.d, z8.d
4937 // vl128 state = 0x892c6962
4938 __ dci(0x45e8c1f1); // histcnt z17.d, p0/z, z15.d, z8.d
4939 // vl128 state = 0x6ef7d729
4940 __ dci(0x45e8c3a1); // histcnt z1.d, p0/z, z29.d, z8.d
4941 // vl128 state = 0x17654f81
4942 __ dci(0x45e8c3a9); // histcnt z9.d, p0/z, z29.d, z8.d
4943 // vl128 state = 0xe1a0067e
4944 __ dci(0x45e8c0a8); // histcnt z8.d, p0/z, z5.d, z8.d
4945 // vl128 state = 0xd41f511b
4946 __ dci(0x45e8d0f8); // histcnt z24.d, p4/z, z7.d, z8.d
4947 // vl128 state = 0x8b73945a
4948 __ dci(0x45e8d0fa); // histcnt z26.d, p4/z, z7.d, z8.d
4949 // vl128 state = 0xc175acec
4950 __ dci(0x45aad0fb); // histcnt z27.s, p4/z, z7.s, z10.s
4951 // vl128 state = 0x44f8385b
4952 __ dci(0x45aad2df); // histcnt z31.s, p4/z, z22.s, z10.s
4953 // vl128 state = 0x52cd5d17
4954 __ dci(0x45aad2dd); // histcnt z29.s, p4/z, z22.s, z10.s
4955 // vl128 state = 0x9f8d9611
4956 __ dci(0x45abd2f5); // histcnt z21.s, p4/z, z23.s, z11.s
4957 // vl128 state = 0x5cc45fb0
4958 __ dci(0x45aad0f7); // histcnt z23.s, p4/z, z7.s, z10.s
4959 // vl128 state = 0x5096a07f
4960 __ dci(0x45aad1b3); // histcnt z19.s, p4/z, z13.s, z10.s
4961 // vl128 state = 0xf25781a6
4962 __ dci(0x45a8d1f2); // histcnt z18.s, p4/z, z15.s, z8.s
4963 // vl128 state = 0xc7025934
4964 __ dci(0x45a0d0f6); // histcnt z22.s, p4/z, z7.s, z0.s
4965 // vl128 state = 0xcda9c72a
4966 __ dci(0x45a0d87e); // histcnt z30.s, p6/z, z3.s, z0.s
4967 // vl128 state = 0x75f6bbcc
4968 __ dci(0x45a0dc4e); // histcnt z14.s, p7/z, z2.s, z0.s
4969 // vl128 state = 0x5e4e9fe0
4970 __ dci(0x45a0dc4a); // histcnt z10.s, p7/z, z2.s, z0.s
4971 // vl128 state = 0x0ec8d2b8
4972 __ dci(0x45b0cc4b); // histcnt z11.s, p3/z, z2.s, z16.s
4973 // vl128 state = 0x1228c442
4974 __ dci(0x45b0cc43); // histcnt z3.s, p3/z, z2.s, z16.s
4975 // vl128 state = 0xc6067f7b
4976 __ dci(0x45b8cc73); // histcnt z19.s, p3/z, z3.s, z24.s
4977 // vl128 state = 0xf04f9753
4978 __ dci(0x45b8d877); // histcnt z23.s, p6/z, z3.s, z24.s
4979 // vl128 state = 0xdeb83b41
4980 __ dci(0x45b8d47f); // histcnt z31.s, p5/z, z3.s, z24.s
4981 // vl128 state = 0x8ab3905f
4982 __ dci(0x45b8d46f); // histcnt z15.s, p5/z, z3.s, z24.s
4983 // vl128 state = 0x762bf277
4984 __ dci(0x45b8d16d); // histcnt z13.s, p4/z, z11.s, z24.s
4985 // vl128 state = 0x9a670783
4986 __ dci(0x45bcd125); // histcnt z5.s, p4/z, z9.s, z28.s
4987 // vl128 state = 0x3e399489
4988 __ dci(0x45b8d021); // histcnt z1.s, p4/z, z1.s, z24.s
4989 // vl128 state = 0x7fc8f1e7
4990 __ dci(0x45f8d220); // histcnt z0.d, p4/z, z17.d, z24.d
4991 // vl128 state = 0x9cb004db
4992 __ dci(0x45f0d621); // histcnt z1.d, p5/z, z17.d, z16.d
4993 // vl128 state = 0xdd4161b5
4994 __ dci(0x45a0d625); // histcnt z5.s, p5/z, z17.s, z0.s
4995 // vl128 state = 0xb5cb70bb
4996 __ dci(0x45a0d4a1); // histcnt z1.s, p5/z, z5.s, z0.s
4997 // vl128 state = 0x4452182b
4998 __ dci(0x45a0d4a3); // histcnt z3.s, p5/z, z5.s, z0.s
4999 // vl128 state = 0x71298d3c
5000 __ dci(0x45a0d4a2); // histcnt z2.s, p5/z, z5.s, z0.s
5001 // vl128 state = 0xa22914e1
5002 __ dci(0x45a2d6a3); // histcnt z3.s, p5/z, z21.s, z2.s
5003 // vl128 state = 0x6183bfbc
5004 __ dci(0x45a2de21); // histcnt z1.s, p7/z, z17.s, z2.s
5005 // vl128 state = 0xd1ebb242
5006 __ dci(0x45e2dc20); // histcnt z0.d, p7/z, z1.d, z2.d
5007 // vl128 state = 0x297a432d
5008 __ dci(0x45e2d8b0); // histcnt z16.d, p6/z, z5.d, z2.d
5009 // vl128 state = 0x1d2557c0
5010 __ dci(0x45eed8b8); // histcnt z24.d, p6/z, z5.d, z14.d
5011 // vl128 state = 0xe6ef07fa
5012 __ dci(0x45eed8a8); // histcnt z8.d, p6/z, z5.d, z14.d
5013 // vl128 state = 0xaf3665bb
5014 __ dci(0x45aed88c); // histcnt z12.s, p6/z, z4.s, z14.s
5015 // vl128 state = 0x5c2b38bc
5016 __ dci(0x45efd88d); // histcnt z13.d, p6/z, z4.d, z15.d
5017 // vl128 state = 0x8d5527d8
5018 __ dci(0x45ffc88f); // histcnt z15.d, p2/z, z4.d, z31.d
5019 // vl128 state = 0x1d2e08d2
5020 __ dci(0x45fbc98d); // histcnt z13.d, p2/z, z12.d, z27.d
5021 // vl128 state = 0x007388b0
5022 __ dci(0x45bbcd8f); // histcnt z15.s, p3/z, z12.s, z27.s
5023 // vl128 state = 0x9008a7ba
5024 __ dci(0x45b3cc9f); // histcnt z31.s, p3/z, z4.s, z19.s
5025 // vl128 state = 0xc4030ca4
5026 __ dci(0x45bbc497); // histcnt z23.s, p1/z, z4.s, z27.s
5027 // vl128 state = 0xeaf4a0b6
5028 __ dci(0x45fbc415); // histcnt z21.d, p1/z, z0.d, z27.d
5029 // vl128 state = 0x03d85428
5030 __ dci(0x45ffc517); // histcnt z23.d, p1/z, z8.d, z31.d
5031 // vl128 state = 0xa836a751
5032 __ dci(0x45fbc596); // histcnt z22.d, p1/z, z12.d, z27.d
5033 // vl128 state = 0x77e33f69
5034 __ dci(0x45fbc4c6); // histcnt z6.d, p1/z, z6.d, z27.d
5035 // vl128 state = 0xf47bb379
5036 __ dci(0x45fbc4ce); // histcnt z14.d, p1/z, z6.d, z27.d
5037 // vl128 state = 0x6dbfff33
5038 __ dci(0x45fad4ca); // histcnt z10.d, p5/z, z6.d, z26.d
5039 // vl128 state = 0xbc04915a
5040 __ dci(0x45ead45a); // histcnt z26.d, p5/z, z2.d, z10.d
5041 // vl128 state = 0x8969b1c5
5042 __ dci(0x45aad4ca); // histcnt z10.s, p5/z, z6.s, z10.s
5043 // vl128 state = 0x58d2dfac
5044 __ dci(0x45aed0ce); // histcnt z14.s, p4/z, z6.s, z14.s
5045 // vl128 state = 0xfa793cc7
5046 __ dci(0x45aec4c6); // histcnt z6.s, p1/z, z6.s, z14.s
5047 // vl128 state = 0xff4c99d8
5048 __ dci(0x45abc4c7); // histcnt z7.s, p1/z, z6.s, z11.s
5049 // vl128 state = 0x2b44a4ae
5050 __ dci(0x45abc4cf); // histcnt z15.s, p1/z, z6.s, z11.s
5051 // vl128 state = 0xbb3f8ba4
5052 __ dci(0x45a9c44e); // histcnt z14.s, p1/z, z2.s, z9.s
5053 // vl128 state = 0x5a3a40a6
5054 __ dci(0x45b9c46f); // histcnt z15.s, p1/z, z3.s, z25.s
5055 // vl128 state = 0x72e31c5f
5056 __ dci(0x45b9c46e); // histcnt z14.s, p1/z, z3.s, z25.s
5057 // vl128 state = 0xde56263e
5058 __ dci(0x45b1c67e); // histcnt z30.s, p1/z, z19.s, z17.s
5059 // vl128 state = 0xc570f0b9
5060 __ dci(0x45b5c63a); // histcnt z26.s, p1/z, z17.s, z21.s
5061 // vl128 state = 0x72ab1716
5062 __ dci(0x45a5c72a); // histcnt z10.s, p1/z, z25.s, z5.s
5063 // vl128 state = 0xe8848b2d
5064 __ dci(0x45a1c77a); // histcnt z26.s, p1/z, z27.s, z1.s
5065 // vl128 state = 0x2975ac38
5066 __ dci(0x45a1c77b); // histcnt z27.s, p1/z, z27.s, z1.s
5067 // vl128 state = 0xb0638363
5068 __ dci(0x45a1c773); // histcnt z19.s, p1/z, z27.s, z1.s
5069 // vl128 state = 0xc9620a45
5070 __ dci(0x45e9c777); // histcnt z23.d, p1/z, z27.d, z9.d
5071 // vl128 state = 0x0414c679
5072 __ dci(0x45ebc67f); // histcnt z31.d, p1/z, z19.d, z11.d
5073 // vl128 state = 0xc1d4410e
5074 __ dci(0x45ebc37b); // histcnt z27.d, p0/z, z27.d, z11.d
5075 // vl128 state = 0x3ae32e36
5076 __ dci(0x45abd373); // histcnt z19.s, p4/z, z27.s, z11.s
5077 // vl128 state = 0x75ffe12c
5078 __ dci(0x45fbd363); // histcnt z3.d, p4/z, z27.d, z27.d
5079 // vl128 state = 0x4084743b
5080 __ dci(0x45ffc36b); // histcnt z11.d, p0/z, z27.d, z31.d
5081 // vl128 state = 0xfade136b
5082 __ dci(0x45ffc3ca); // histcnt z10.d, p0/z, z30.d, z31.d
5083 // vl128 state = 0x60f18f50
5084 __ dci(0x45efc2ce); // histcnt z14.d, p0/z, z22.d, z15.d
5085 // vl128 state = 0x162ed112
5086 __ dci(0x45adc2c6); // histcnt z6.s, p0/z, z22.s, z13.s
5087 // vl128 state = 0x4f84cb96
5088 __ dci(0x45adc2c4); // histcnt z4.s, p0/z, z22.s, z13.s
5089 // vl128 state = 0x5d04ccb6
5090 __ dci(0x45a7c2d4); // histcnt z20.s, p0/z, z22.s, z7.s
5091 // vl128 state = 0x38efdab7
5092 __ dci(0x45a6c0c4); // histcnt z4.s, p0/z, z6.s, z6.s
5093 // vl128 state = 0xff7a0a24
5094 __ dci(0x45a7c2c0); // histcnt z0.s, p0/z, z22.s, z7.s
5095 // vl128 state = 0x5f7b0a31
5096 __ dci(0x45a7d6c1); // histcnt z1.s, p5/z, z22.s, z7.s
5097 // vl128 state = 0x1e8a6f5f
5098 __ dci(0x45afd7c5); // histcnt z5.s, p5/z, z30.s, z15.s
5099 // vl128 state = 0x655ed237
5100 __ dci(0x45add3d5); // histcnt z21.s, p4/z, z30.s, z13.s
5101 // vl128 state = 0x8c7226a9
5102 __ dci(0x45add3d4); // histcnt z20.s, p4/z, z30.s, z13.s
5103 // vl128 state = 0x727304ad
5104 __ dci(0x45bcd3dc); // histcnt z28.s, p4/z, z30.s, z28.s
5105 // vl128 state = 0xce4e49d0
5106 __ dci(0x45bcd3cc); // histcnt z12.s, p4/z, z30.s, z28.s
5107 // vl128 state = 0x5c252d7d
5108 __ dci(0x45bcd15c); // histcnt z28.s, p4/z, z10.s, z28.s
5109 // vl128 state = 0x5e1163f7
5110 __ dci(0x45b5d154); // histcnt z20.s, p4/z, z10.s, z21.s
5111 // vl128 state = 0xf77c50ee
5112 __ dci(0x45b5d156); // histcnt z22.s, p4/z, z10.s, z21.s
5113 // vl128 state = 0xe35c8438
5114 __ dci(0x45b3d157); // histcnt z23.s, p4/z, z10.s, z19.s
5115 // vl128 state = 0xf6926673
5116 __ dci(0x45b3d156); // histcnt z22.s, p4/z, z10.s, z19.s
5117 // vl128 state = 0xf9022ad2
5118 __ dci(0x45b3c554); // histcnt z20.s, p1/z, z10.s, z19.s
5119 // vl128 state = 0xb90dfe28
5120 __ dci(0x45bbd55c); // histcnt z28.s, p5/z, z10.s, z27.s
5121 // vl128 state = 0x9a939b84
5122 __ dci(0x45abd57e); // histcnt z30.s, p5/z, z11.s, z11.s
5123 // vl128 state = 0xd9ad8be7
5124 __ dci(0x45abcd7a); // histcnt z26.s, p3/z, z11.s, z11.s
5125 // vl128 state = 0x14869e4f
5126 __ dci(0x45bbc57b); // histcnt z27.s, p1/z, z11.s, z27.s
5127 // vl128 state = 0x25130793
5128 __ dci(0x45bfcd73); // histcnt z19.s, p3/z, z11.s, z31.s
5129 // vl128 state = 0x53adf455
5130 __ dci(0x45bfc863); // histcnt z3.s, p2/z, z3.s, z31.s
5131 // vl128 state = 0x82fa6c44
5132 __ dci(0x45b7cc62); // histcnt z2.s, p3/z, z3.s, z23.s
5133 // vl128 state = 0xfaefda71
5134 __ dci(0x45b6cce3); // histcnt z3.s, p3/z, z7.s, z22.s
5135 // vl128 state = 0xdd697c2a
5136 }
5137
5138 uint32_t state;
5139 ComputeMachineStateHash(&masm, &state);
5140 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
5141 __ Ldr(w0, MemOperand(x0));
5142
5143 END();
5144 if (CAN_RUN()) {
5145 RUN();
5146 uint32_t expected_hashes[] = {
5147 0xdd697c2a,
5148 0x1415ff61,
5149 0xb9e154c8,
5150 0x566a2af5,
5151 0xef7574b4,
5152 0x6da83471,
5153 0x356d5c4d,
5154 0x798a2403,
5155 0x2c16e862,
5156 0x6fa84021,
5157 0x6e09e8ff,
5158 0xc13a0eb6,
5159 0x88c92928,
5160 0xe51672fe,
5161 0x229b8ed5,
5162 0x9e662757,
5163 };
5164 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
5165 }
5166 }
5167
TEST_SVE(sve2_histseg)5168 TEST_SVE(sve2_histseg) {
5169 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
5170 CPUFeatures::kSVE2,
5171 CPUFeatures::kNEON,
5172 CPUFeatures::kCRC32);
5173 START();
5174
5175 SetInitialMachineState(&masm);
5176 // state = 0xe2bd2480
5177
5178 {
5179 ExactAssemblyScope scope(&masm, 100 * kInstructionSize);
5180 __ dci(0x4524a228); // histseg z8.b, z17.b, z4.b
5181 // vl128 state = 0x21ed28a1
5182 __ dci(0x452ca20c); // histseg z12.b, z16.b, z12.b
5183 // vl128 state = 0xc135d593
5184 __ dci(0x453ca288); // histseg z8.b, z20.b, z28.b
5185 // vl128 state = 0xb86cd6e7
5186 __ dci(0x4538a380); // histseg z0.b, z28.b, z24.b
5187 // vl128 state = 0xd28ddd71
5188 __ dci(0x452aa388); // histseg z8.b, z28.b, z10.b
5189 // vl128 state = 0x322d3aa8
5190 __ dci(0x452aa38c); // histseg z12.b, z28.b, z10.b
5191 // vl128 state = 0x67d668fc
5192 __ dci(0x4532a384); // histseg z4.b, z28.b, z18.b
5193 // vl128 state = 0xc57505d4
5194 __ dci(0x4537a380); // histseg z0.b, z28.b, z23.b
5195 // vl128 state = 0xb47d0a11
5196 __ dci(0x4535a3a8); // histseg z8.b, z29.b, z21.b
5197 // vl128 state = 0x347adf6f
5198 __ dci(0x4535a3ac); // histseg z12.b, z29.b, z21.b
5199 // vl128 state = 0xb763510c
5200 __ dci(0x4535a3ae); // histseg z14.b, z29.b, z21.b
5201 // vl128 state = 0xb28319d5
5202 __ dci(0x4525a39e); // histseg z30.b, z28.b, z5.b
5203 // vl128 state = 0x0adc6533
5204 __ dci(0x4525a38e); // histseg z14.b, z28.b, z5.b
5205 // vl128 state = 0x248409c6
5206 __ dci(0x452da3c6); // histseg z6.b, z30.b, z13.b
5207 // vl128 state = 0xa71c85d6
5208 __ dci(0x452da187); // histseg z7.b, z12.b, z13.b
5209 // vl128 state = 0x7314b8a0
5210 __ dci(0x4525a1a6); // histseg z6.b, z13.b, z5.b
5211 // vl128 state = 0x129013d5
5212 __ dci(0x4527a18e); // histseg z14.b, z12.b, z7.b
5213 // vl128 state = 0xc6b207b7
5214 __ dci(0x4521a18c); // histseg z12.b, z12.b, z1.b
5215 // vl128 state = 0x03957bb5
5216 __ dci(0x4524a18d); // histseg z13.b, z12.b, z4.b
5217 // vl128 state = 0x379af1c6
5218 __ dci(0x4524a125); // histseg z5.b, z9.b, z4.b
5219 // vl128 state = 0x93c462cc
5220 __ dci(0x4522a127); // histseg z7.b, z9.b, z2.b
5221 // vl128 state = 0xc95cb1a9
5222 __ dci(0x4532a117); // histseg z23.b, z8.b, z18.b
5223 // vl128 state = 0xc50e4e66
5224 __ dci(0x4533a15f); // histseg z31.b, z10.b, z19.b
5225 // vl128 state = 0x76663e3e
5226 __ dci(0x4533a14f); // histseg z15.b, z10.b, z19.b
5227 // vl128 state = 0x84f5ca5f
5228 __ dci(0x4533a0ce); // histseg z14.b, z6.b, z19.b
5229 // vl128 state = 0x50d7de3d
5230 __ dci(0x453ba1cc); // histseg z12.b, z14.b, z27.b
5231 // vl128 state = 0x32e3b53f
5232 __ dci(0x453ba0fc); // histseg z28.b, z7.b, z27.b
5233 // vl128 state = 0x0a5d4180
5234 __ dci(0x452ba2f4); // histseg z20.b, z23.b, z11.b
5235 // vl128 state = 0x91b77585
5236 __ dci(0x453ba2c4); // histseg z4.b, z22.b, z27.b
5237 // vl128 state = 0x5cd0c690
5238 __ dci(0x453ba2cc); // histseg z12.b, z22.b, z27.b
5239 // vl128 state = 0xa6a5f749
5240 __ dci(0x453ba1c8); // histseg z8.b, z14.b, z27.b
5241 // vl128 state = 0xe5036937
5242 __ dci(0x4529a1c9); // histseg z9.b, z14.b, z9.b
5243 // vl128 state = 0x13c620c8
5244 __ dci(0x4529a1a8); // histseg z8.b, z13.b, z9.b
5245 // vl128 state = 0xbf71d421
5246 __ dci(0x4521a198); // histseg z24.b, z12.b, z1.b
5247 // vl128 state = 0xe01d1160
5248 __ dci(0x4529a1ba); // histseg z26.b, z13.b, z9.b
5249 // vl128 state = 0xaa1b29d6
5250 __ dci(0x452fa1bb); // histseg z27.b, z13.b, z15.b
5251 // vl128 state = 0x2f96bd61
5252 __ dci(0x452fa0ff); // histseg z31.b, z7.b, z15.b
5253 // vl128 state = 0x5aeb6bec
5254 __ dci(0x4527a0de); // histseg z30.b, z6.b, z7.b
5255 // vl128 state = 0xbcb1b299
5256 __ dci(0x4525a1d6); // histseg z22.b, z14.b, z5.b
5257 // vl128 state = 0x0f89ea9b
5258 __ dci(0x4525a1d7); // histseg z23.b, z14.b, z5.b
5259 // vl128 state = 0xe40f30a2
5260 __ dci(0x4521a3df); // histseg z31.b, z30.b, z1.b
5261 // vl128 state = 0x342ff33b
5262 __ dci(0x4521a197); // histseg z23.b, z12.b, z1.b
5263 // vl128 state = 0xdfa92902
5264 __ dci(0x4521a187); // histseg z7.b, z12.b, z1.b
5265 // vl128 state = 0x8531fa67
5266 __ dci(0x4535a186); // histseg z6.b, z12.b, z21.b
5267 // vl128 state = 0xe4b55112
5268 __ dci(0x4535a196); // histseg z22.b, z12.b, z21.b
5269 // vl128 state = 0x5d26970e
5270 __ dci(0x4525a097); // histseg z23.b, z4.b, z5.b
5271 // vl128 state = 0x7dcb1d13
5272 __ dci(0x4525a095); // histseg z21.b, z4.b, z5.b
5273 // vl128 state = 0x5fb0789c
5274 __ dci(0x452da017); // histseg z23.b, z0.b, z13.b
5275 // vl128 state = 0x7f5df281
5276 __ dci(0x452da295); // histseg z21.b, z20.b, z13.b
5277 // vl128 state = 0x9e6f5eaf
5278 __ dci(0x453da39d); // histseg z29.b, z28.b, z29.b
5279 // vl128 state = 0x532f95a9
5280 __ dci(0x453da39c); // histseg z28.b, z28.b, z29.b
5281 // vl128 state = 0x64202514
5282 __ dci(0x4535a29e); // histseg z30.b, z20.b, z21.b
5283 // vl128 state = 0x44bda972
5284 __ dci(0x4535a0bf); // histseg z31.b, z5.b, z21.b
5285 // vl128 state = 0x258125d6
5286 __ dci(0x4535a0bb); // histseg z27.b, z5.b, z21.b
5287 // vl128 state = 0xec63caaf
5288 __ dci(0x4537a2b3); // histseg z19.b, z21.b, z23.b
5289 // vl128 state = 0xb937b6e8
5290 __ dci(0x4525a2b1); // histseg z17.b, z21.b, z5.b
5291 // vl128 state = 0x1515ee94
5292 __ dci(0x4525a2b5); // histseg z21.b, z21.b, z5.b
5293 // vl128 state = 0x4bb06873
5294 __ dci(0x4525a0fd); // histseg z29.b, z7.b, z5.b
5295 // vl128 state = 0x23446114
5296 __ dci(0x4524a079); // histseg z25.b, z3.b, z4.b
5297 // vl128 state = 0x48d52cf6
5298 __ dci(0x4524a0d8); // histseg z24.b, z6.b, z4.b
5299 // vl128 state = 0x0deef019
5300 __ dci(0x452ca09c); // histseg z28.b, z4.b, z12.b
5301 // vl128 state = 0xaba6e202
5302 __ dci(0x453ca018); // histseg z24.b, z0.b, z28.b
5303 // vl128 state = 0xee9d3eed
5304 __ dci(0x4539a008); // histseg z8.b, z0.b, z25.b
5305 // vl128 state = 0x254c57f3
5306 __ dci(0x4539a00c); // histseg z12.b, z0.b, z25.b
5307 // vl128 state = 0x28fea24d
5308 __ dci(0x4531a048); // histseg z8.b, z2.b, z17.b
5309 // vl128 state = 0xe32fcb53
5310 __ dci(0x4530a0ca); // histseg z10.b, z6.b, z16.b
5311 // vl128 state = 0xb3a9860b
5312 __ dci(0x4520a0ee); // histseg z14.b, z7.b, z0.b
5313 // vl128 state = 0xef9e57fa
5314 __ dci(0x4520a1de); // histseg z30.b, z14.b, z0.b
5315 // vl128 state = 0x295902e9
5316 __ dci(0x4520a38e); // histseg z14.b, z28.b, z0.b
5317 // vl128 state = 0x756ed318
5318 __ dci(0x4528a30f); // histseg z15.b, z24.b, z8.b
5319 // vl128 state = 0x8591dff9
5320 __ dci(0x4538a39f); // histseg z31.b, z28.b, z24.b
5321 // vl128 state = 0xe4ad535d
5322 __ dci(0x4538a39b); // histseg z27.b, z28.b, z24.b
5323 // vl128 state = 0x2d4fbc24
5324 __ dci(0x4538a093); // histseg z19.b, z4.b, z24.b
5325 // vl128 state = 0xd8ee932a
5326 __ dci(0x453aa0a3); // histseg z3.b, z5.b, z26.b
5327 // vl128 state = 0x768b71a6
5328 __ dci(0x453aa0ab); // histseg z11.b, z5.b, z26.b
5329 // vl128 state = 0xa78673d7
5330 __ dci(0x452ea0bb); // histseg z27.b, z5.b, z14.b
5331 // vl128 state = 0x6e649cae
5332 __ dci(0x452fa1bf); // histseg z31.b, z13.b, z15.b
5333 // vl128 state = 0x0f58100a
5334 __ dci(0x452fa1be); // histseg z30.b, z13.b, z15.b
5335 // vl128 state = 0xc99f4519
5336 __ dci(0x452fa3f6); // histseg z22.b, z31.b, z15.b
5337 // vl128 state = 0x700c8305
5338 __ dci(0x452fa3f4); // histseg z20.b, z31.b, z15.b
5339 // vl128 state = 0xbdecfddc
5340 __ dci(0x453fa3b0); // histseg z16.b, z29.b, z31.b
5341 // vl128 state = 0x3f5b7578
5342 __ dci(0x453fa3b8); // histseg z24.b, z29.b, z31.b
5343 // vl128 state = 0xf0076715
5344 __ dci(0x453fa228); // histseg z8.b, z17.b, z31.b
5345 // vl128 state = 0x3bd60e0b
5346 __ dci(0x4536a22a); // histseg z10.b, z17.b, z22.b
5347 // vl128 state = 0x1171f63c
5348 __ dci(0x4530a23a); // histseg z26.b, z17.b, z16.b
5349 // vl128 state = 0x3fef270c
5350 __ dci(0x4522a23e); // histseg z30.b, z17.b, z2.b
5351 // vl128 state = 0xf928721f
5352 __ dci(0x4524a23c); // histseg z28.b, z17.b, z4.b
5353 // vl128 state = 0xecec697b
5354 __ dci(0x4527a238); // histseg z24.b, z17.b, z7.b
5355 // vl128 state = 0x23b07b16
5356 __ dci(0x4525a210); // histseg z16.b, z16.b, z5.b
5357 // vl128 state = 0x9c1c2ac5
5358 __ dci(0x4525a200); // histseg z0.b, z16.b, z5.b
5359 // vl128 state = 0xc446f89b
5360 __ dci(0x4520a202); // histseg z2.b, z16.b, z0.b
5361 // vl128 state = 0x8afba046
5362 __ dci(0x4521a303); // histseg z3.b, z24.b, z1.b
5363 // vl128 state = 0xf0b0f9f3
5364 __ dci(0x4520a201); // histseg z1.b, z16.b, z0.b
5365 // vl128 state = 0x8922615b
5366 __ dci(0x4528a223); // histseg z3.b, z17.b, z8.b
5367 // vl128 state = 0xf36938ee
5368 __ dci(0x4528a367); // histseg z7.b, z27.b, z8.b
5369 // vl128 state = 0xc2d96c41
5370 __ dci(0x452ca3e6); // histseg z6.b, z31.b, z12.b
5371 // vl128 state = 0xf15e835f
5372 __ dci(0x452ea3c4); // histseg z4.b, z30.b, z14.b
5373 // vl128 state = 0xb3964bd8
5374 __ dci(0x452da3c6); // histseg z6.b, z30.b, z13.b
5375 // vl128 state = 0x8011a4c6
5376 __ dci(0x452da0c4); // histseg z4.b, z6.b, z13.b
5377 // vl128 state = 0x0fbedf54
5378 __ dci(0x4529a0ec); // histseg z12.b, z7.b, z9.b
5379 // vl128 state = 0x9a4d7031
5380 }
5381
5382 uint32_t state;
5383 ComputeMachineStateHash(&masm, &state);
5384 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
5385 __ Ldr(w0, MemOperand(x0));
5386
5387 END();
5388 if (CAN_RUN()) {
5389 RUN();
5390 uint32_t expected_hashes[] = {
5391 0x9a4d7031,
5392 0xebaa80ad,
5393 0x702155a3,
5394 0x181fff8d,
5395 0x7b071373,
5396 0x1bf0af96,
5397 0x9ca15297,
5398 0x615d2f4a,
5399 0x7658b554,
5400 0xd2bf7319,
5401 0xddf8d492,
5402 0xf5938d08,
5403 0xbe354cb1,
5404 0xfe2d5d63,
5405 0x29818684,
5406 0x2c862ef9,
5407 };
5408 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
5409 }
5410 }
5411
TEST_SVE(sve2_table)5412 TEST_SVE(sve2_table) {
5413 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
5414 CPUFeatures::kSVE2,
5415 CPUFeatures::kNEON,
5416 CPUFeatures::kCRC32);
5417 START();
5418
5419 SetInitialMachineState(&masm);
5420 // state = 0xe2bd2480
5421
5422 {
5423 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
5424 __ dci(0x05212a38); // tbl z24.b, {z17.b, z18.b}, z1.b
5425 // vl128 state = 0xbdd1e1c1
5426 __ dci(0x05212810); // tbl z16.b, {z0.b, z1.b}, z1.b
5427 // vl128 state = 0x80ca38b6
5428 __ dci(0x05e12812); // tbl z18.d, {z0.d, z1.d}, z1.d
5429 // vl128 state = 0xb59fe024
5430 __ dci(0x05632802); // tbl z2.h, {z0.h, z1.h}, z3.h
5431 // vl128 state = 0xfb22b8f9
5432 __ dci(0x05e32906); // tbl z6.d, {z8.d, z9.d}, z3.d
5433 // vl128 state = 0x78ba34e9
5434 __ dci(0x05e22942); // tbl z2.d, {z10.d, z11.d}, z2.d
5435 // vl128 state = 0x000b006f
5436 __ dci(0x05f22d46); // tbx z6.d, z10.d, z18.d
5437 // vl128 state = 0x28b746e5
5438 __ dci(0x05f32947); // tbl z7.d, {z10.d, z11.d}, z19.d
5439 // vl128 state = 0xfcbf7b93
5440 __ dci(0x05e32963); // tbl z3.d, {z11.d, z12.d}, z3.d
5441 // vl128 state = 0x2891c0aa
5442 __ dci(0x05e33161); // tbl z1.d, {z11.d}, z3.d
5443 // vl128 state = 0x3468b9d4
5444 __ dci(0x05e13149); // tbl z9.d, {z10.d}, z1.d
5445 // vl128 state = 0xc2adf02b
5446 __ dci(0x0560314d); // tbl z13.h, {z10.h}, z0.h
5447 // vl128 state = 0xff9f1abb
5448 __ dci(0x0578314c); // tbl z12.h, {z10.h}, z24.h
5449 // vl128 state = 0x2cffcd38
5450 __ dci(0x05e83144); // tbl z4.d, {z10.d}, z8.d
5451 // vl128 state = 0x8e5ca010
5452 __ dci(0x05e83146); // tbl z6.d, {z10.d}, z8.d
5453 // vl128 state = 0xa6e0e69a
5454 __ dci(0x05b83147); // tbl z7.s, {z10.s}, z24.s
5455 // vl128 state = 0x513e6328
5456 __ dci(0x053831d7); // tbl z23.b, {z14.b}, z24.b
5457 // vl128 state = 0xe2bd7bdf
5458 __ dci(0x056831df); // tbl z31.h, {z14.h}, z8.h
5459 // vl128 state = 0xf4881e93
5460 __ dci(0x0560319e); // tbl z30.h, {z12.h}, z0.h
5461 // vl128 state = 0x4cd76275
5462 __ dci(0x0522319a); // tbl z26.b, {z12.b}, z2.b
5463 // vl128 state = 0x06d15ac3
5464 __ dci(0x0522318a); // tbl z10.b, {z12.b}, z2.b
5465 // vl128 state = 0x5657179b
5466 __ dci(0x0522318e); // tbl z14.b, {z12.b}, z2.b
5467 // vl128 state = 0x7def33b7
5468 __ dci(0x05a6318a); // tbl z10.s, {z12.s}, z6.s
5469 // vl128 state = 0x38ee6756
5470 __ dci(0x05b2318b); // tbl z11.s, {z12.s}, z18.s
5471 // vl128 state = 0x6ba1d599
5472 __ dci(0x05a231bb); // tbl z27.s, {z13.s}, z2.s
5473 // vl128 state = 0xee2c412e
5474 __ dci(0x05a231ab); // tbl z11.s, {z13.s}, z2.s
5475 // vl128 state = 0xa183e51b
5476 __ dci(0x05a831af); // tbl z15.s, {z13.s}, z8.s
5477 // vl128 state = 0xcd60a839
5478 __ dci(0x05ea31a7); // tbl z7.d, {z13.d}, z10.d
5479 // vl128 state = 0x3abe2d8b
5480 __ dci(0x05fa33af); // tbl z15.d, {z29.d}, z26.d
5481 // vl128 state = 0xf596f00c
5482 __ dci(0x05fe32ae); // tbl z14.d, {z21.d}, z30.d
5483 // vl128 state = 0x3e791a5a
5484 __ dci(0x057a32be); // tbl z30.h, {z21.h}, z26.h
5485 // vl128 state = 0x27f4086e
5486 __ dci(0x05fe32ae); // tbl z14.d, {z21.d}, z30.d
5487 // vl128 state = 0xec1be238
5488 __ dci(0x05fe32aa); // tbl z10.d, {z21.d}, z30.d
5489 // vl128 state = 0xa91ab6d9
5490 __ dci(0x057e32e2); // tbl z2.h, {z23.h}, z30.h
5491 // vl128 state = 0xd1ab825f
5492 __ dci(0x057e32e0); // tbl z0.h, {z23.h}, z30.h
5493 // vl128 state = 0xca42860c
5494 __ dci(0x057f3270); // tbl z16.h, {z19.h}, z31.h
5495 // vl128 state = 0xff27daa0
5496 __ dci(0x05673271); // tbl z17.h, {z19.h}, z7.h
5497 // vl128 state = 0x9b358bbf
5498 __ dci(0x05e73379); // tbl z25.d, {z27.d}, z7.d
5499 // vl128 state = 0xf0a4c65d
5500 __ dci(0x05e3333d); // tbl z29.d, {z25.d}, z3.d
5501 // vl128 state = 0x3de40d5b
5502 __ dci(0x05e33335); // tbl z21.d, {z25.d}, z3.d
5503 // vl128 state = 0xfeadc4fa
5504 __ dci(0x05f33137); // tbl z23.d, {z9.d}, z19.d
5505 // vl128 state = 0x417c23c2
5506 __ dci(0x05b33336); // tbl z22.s, {z25.s}, z19.s
5507 // vl128 state = 0x4bd7bddc
5508 __ dci(0x05b1323e); // tbl z30.s, {z17.s}, z17.s
5509 // vl128 state = 0x525aafe8
5510 __ dci(0x05b0303c); // tbl z28.s, {z1.s}, z16.s
5511 // vl128 state = 0xee67e295
5512 __ dci(0x05b0308c); // tbl z12.s, {z4.s}, z16.s
5513 // vl128 state = 0xce1a6811
5514 __ dci(0x05b030e8); // tbl z8.s, {z7.s}, z16.s
5515 // vl128 state = 0xfba53f74
5516 __ dci(0x05a030b8); // tbl z24.s, {z5.s}, z0.s
5517 // vl128 state = 0x56a69350
5518 __ dci(0x05e830b0); // tbl z16.d, {z5.d}, z8.d
5519 // vl128 state = 0xe0665941
5520 __ dci(0x05e830b2); // tbl z18.d, {z5.d}, z8.d
5521 // vl128 state = 0xc6680470
5522 __ dci(0x05e931b3); // tbl z19.d, {z13.d}, z9.d
5523 // vl128 state = 0x64a925a9
5524 }
5525
5526 uint32_t state;
5527 ComputeMachineStateHash(&masm, &state);
5528 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
5529 __ Ldr(w0, MemOperand(x0));
5530
5531 END();
5532 if (CAN_RUN()) {
5533 RUN();
5534 uint32_t expected_hashes[] = {
5535 0x64a925a9,
5536 0x89750b9d,
5537 0xb803659e,
5538 0xa21efc63,
5539 0x67f967b8,
5540 0x4e52e209,
5541 0x42c1692f,
5542 0x4d8539c7,
5543 0x6828f0f4,
5544 0x3c75d27a,
5545 0x2e3341c9,
5546 0xfe4a8f4f,
5547 0xd27b47ae,
5548 0x665d8f8b,
5549 0x3230c584,
5550 0xcf1d6e82,
5551 };
5552 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
5553 }
5554 }
5555
TEST_SVE(sve2_cdot)5556 TEST_SVE(sve2_cdot) {
5557 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
5558 CPUFeatures::kSVE2,
5559 CPUFeatures::kNEON,
5560 CPUFeatures::kCRC32);
5561 START();
5562
5563 SetInitialMachineState(&masm);
5564 // state = 0xe2bd2480
5565
5566 {
5567 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
5568 __ dci(0x4488104f); // cdot z15.s, z2.b, z8.b, #0
5569 // vl128 state = 0x25fd51d1
5570 __ dci(0x448a106e); // cdot z14.s, z3.b, z10.b, #0
5571 // vl128 state = 0x490576d5
5572 __ dci(0x448a1246); // cdot z6.s, z18.b, z10.b, #0
5573 // vl128 state = 0x25a6fe4b
5574 __ dci(0x448e12ce); // cdot z14.s, z22.b, z14.b, #0
5575 // vl128 state = 0xc378b2df
5576 __ dci(0x448412cf); // cdot z15.s, z22.b, z4.b, #0
5577 // vl128 state = 0xe92a358d
5578 __ dci(0x448412c7); // cdot z7.s, z22.b, z4.b, #0
5579 // vl128 state = 0x7408b292
5580 __ dci(0x44c41257); // cdot z23.d, z18.h, z4.h, #0
5581 // vl128 state = 0xebc02289
5582 __ dci(0x448412d5); // cdot z21.s, z22.b, z4.b, #0
5583 // vl128 state = 0x9a7c2f1a
5584 __ dci(0x448712d7); // cdot z23.s, z22.b, z7.b, #0
5585 // vl128 state = 0xed91e0b4
5586 __ dci(0x44831295); // cdot z21.s, z20.b, z3.b, #0
5587 // vl128 state = 0x3dae4184
5588 __ dci(0x44821385); // cdot z5.s, z28.b, z2.b, #0
5589 // vl128 state = 0x213fb541
5590 __ dci(0x44c213c1); // cdot z1.d, z30.h, z2.h, #0
5591 // vl128 state = 0xcba3207a
5592 __ dci(0x44c61340); // cdot z0.d, z26.h, z6.h, #0
5593 // vl128 state = 0x9d6041f3
5594 __ dci(0x44c413d0); // cdot z16.d, z30.h, z4.h, #0
5595 // vl128 state = 0x4b931738
5596 __ dci(0x44cc12d8); // cdot z24.d, z22.h, z12.h, #0
5597 // vl128 state = 0x2503fbcc
5598 __ dci(0x448c1ac8); // cdot z8.s, z22.b, z12.b, #180
5599 // vl128 state = 0x53bc5303
5600 __ dci(0x448c12ec); // cdot z12.s, z23.b, z12.b, #0
5601 // vl128 state = 0xb3bf45c7
5602 __ dci(0x448812ad); // cdot z13.s, z21.b, z8.b, #0
5603 // vl128 state = 0x938b4e4f
5604 __ dci(0x44881689); // cdot z9.s, z20.b, z8.b, #90
5605 // vl128 state = 0x70106ddd
5606 __ dci(0x4498128b); // cdot z11.s, z20.b, z24.b, #0
5607 // vl128 state = 0x92108bb2
5608 __ dci(0x4498129b); // cdot z27.s, z20.b, z24.b, #0
5609 // vl128 state = 0x545230eb
5610 __ dci(0x449a12bf); // cdot z31.s, z21.b, z26.b, #0
5611 // vl128 state = 0x5cd2fb12
5612 __ dci(0x44da10af); // cdot z15.d, z5.h, z26.h, #0
5613 // vl128 state = 0xc03d9146
5614 __ dci(0x44da10ae); // cdot z14.d, z5.h, z26.h, #0
5615 // vl128 state = 0xbc2712f7
5616 __ dci(0x44db12be); // cdot z30.d, z21.h, z27.h, #0
5617 // vl128 state = 0xccf9d667
5618 __ dci(0x449b12ee); // cdot z14.s, z23.b, z27.b, #0
5619 // vl128 state = 0x2c1e08f1
5620 __ dci(0x449b12ef); // cdot z15.s, z23.b, z27.b, #0
5621 // vl128 state = 0x159d17d7
5622 __ dci(0x449b14ee); // cdot z14.s, z7.b, z27.b, #90
5623 // vl128 state = 0x892c97d3
5624 __ dci(0x449b1cac); // cdot z12.s, z5.b, z27.b, #270
5625 // vl128 state = 0x3841ce24
5626 __ dci(0x449b1aae); // cdot z14.s, z21.b, z27.b, #180
5627 // vl128 state = 0x30a24868
5628 __ dci(0x449a1aec); // cdot z12.s, z23.b, z26.b, #180
5629 // vl128 state = 0x2b836c8a
5630 __ dci(0x44981ace); // cdot z14.s, z22.b, z24.b, #180
5631 // vl128 state = 0x16a81963
5632 __ dci(0x44901a86); // cdot z6.s, z20.b, z16.b, #180
5633 // vl128 state = 0x924ac9ee
5634 __ dci(0x44981b8e); // cdot z14.s, z28.b, z24.b, #180
5635 // vl128 state = 0x3953da61
5636 __ dci(0x44891b8a); // cdot z10.s, z28.b, z9.b, #180
5637 // vl128 state = 0xad72b6d5
5638 __ dci(0x4499138b); // cdot z11.s, z28.b, z25.b, #0
5639 // vl128 state = 0x569b1b2c
5640 __ dci(0x4498119b); // cdot z27.s, z12.b, z24.b, #0
5641 // vl128 state = 0xdbb36925
5642 __ dci(0x449c199a); // cdot z26.s, z12.b, z28.b, #180
5643 // vl128 state = 0x4be861d1
5644 __ dci(0x44901992); // cdot z18.s, z12.b, z16.b, #180
5645 // vl128 state = 0x1e83ddb5
5646 __ dci(0x44901a90); // cdot z16.s, z20.b, z16.b, #180
5647 // vl128 state = 0x180556e0
5648 __ dci(0x44911ac0); // cdot z0.s, z22.b, z17.b, #180
5649 // vl128 state = 0x2cbf5db5
5650 __ dci(0x44951bc1); // cdot z1.s, z30.b, z21.b, #180
5651 // vl128 state = 0x428f97bd
5652 __ dci(0x44851b40); // cdot z0.s, z26.b, z5.b, #180
5653 // vl128 state = 0xe0f0659f
5654 __ dci(0x44851a70); // cdot z16.s, z19.b, z5.b, #180
5655 // vl128 state = 0x4142d23c
5656 __ dci(0x44861a74); // cdot z20.s, z19.b, z6.b, #180
5657 // vl128 state = 0x74f7d373
5658 __ dci(0x44921a76); // cdot z22.s, z19.b, z18.b, #180
5659 // vl128 state = 0x5b4ef670
5660 __ dci(0x44921246); // cdot z6.s, z18.b, z18.b, #0
5661 // vl128 state = 0x1fe5d31d
5662 __ dci(0x44981247); // cdot z7.s, z18.b, z24.b, #0
5663 // vl128 state = 0x782a0559
5664 __ dci(0x44981746); // cdot z6.s, z26.b, z24.b, #90
5665 // vl128 state = 0x84cbc61d
5666 __ dci(0x449816c4); // cdot z4.s, z22.b, z24.b, #90
5667 // vl128 state = 0x078aa009
5668 }
5669
5670 uint32_t state;
5671 ComputeMachineStateHash(&masm, &state);
5672 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
5673 __ Ldr(w0, MemOperand(x0));
5674
5675 END();
5676 if (CAN_RUN()) {
5677 RUN();
5678 uint32_t expected_hashes[] = {
5679 0x078aa009,
5680 0x3c4026df,
5681 0x3ae8e644,
5682 0x514dfdcd,
5683 0x2649444a,
5684 0x74a87bbe,
5685 0x14b8e9b3,
5686 0x92c65f4d,
5687 0xa3015fc1,
5688 0xab48b8fa,
5689 0x9e80ef05,
5690 0xb59b0dde,
5691 0xbcf04e6f,
5692 0xa7fa54a1,
5693 0xaed81dfc,
5694 0xdc7ffb07,
5695 };
5696 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
5697 }
5698 }
5699
TEST_SVE(sve2_bitwise_ternary)5700 TEST_SVE(sve2_bitwise_ternary) {
5701 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
5702 CPUFeatures::kSVE2,
5703 CPUFeatures::kNEON,
5704 CPUFeatures::kCRC32);
5705 START();
5706
5707 SetInitialMachineState(&masm);
5708 // state = 0xe2bd2480
5709
5710 {
5711 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
5712 __ dci(0x04793f99); // bsl1n z25.d, z25.d, z25.d, z28.d
5713 // vl128 state = 0x70294e62
5714 __ dci(0x04b93f9b); // bsl2n z27.d, z27.d, z25.d, z28.d
5715 // vl128 state = 0x0a3f0dc1
5716 __ dci(0x04b93f93); // bsl2n z19.d, z19.d, z25.d, z28.d
5717 // vl128 state = 0x46500e35
5718 __ dci(0x04b93dbb); // bsl2n z27.d, z27.d, z25.d, z13.d
5719 // vl128 state = 0x25bdcc83
5720 __ dci(0x04b53db9); // bsl2n z25.d, z25.d, z21.d, z13.d
5721 // vl128 state = 0x6d33b943
5722 __ dci(0x04bd3d29); // bsl2n z9.d, z9.d, z29.d, z9.d
5723 // vl128 state = 0xa218e11a
5724 __ dci(0x04ad3d0d); // bsl2n z13.d, z13.d, z13.d, z8.d
5725 // vl128 state = 0xc5e2f5a2
5726 __ dci(0x04a53d4f); // bsl2n z15.d, z15.d, z5.d, z10.d
5727 // vl128 state = 0x519e4735
5728 __ dci(0x04653d47); // bsl1n z7.d, z7.d, z5.d, z10.d
5729 // vl128 state = 0x132f7ce6
5730 __ dci(0x04613dc6); // bsl1n z6.d, z6.d, z1.d, z14.d
5731 // vl128 state = 0x91bcf19b
5732 __ dci(0x04673dc7); // bsl1n z7.d, z7.d, z7.d, z14.d
5733 // vl128 state = 0x3bd0ba20
5734 __ dci(0x04673dc5); // bsl1n z5.d, z5.d, z7.d, z14.d
5735 // vl128 state = 0xbf3b39fa
5736 __ dci(0x04e73cc1); // nbsl z1.d, z1.d, z7.d, z6.d
5737 // vl128 state = 0xd304b643
5738 __ dci(0x04773cc5); // bsl1n z5.d, z5.d, z23.d, z6.d
5739 // vl128 state = 0xdd6cd3ce
5740 __ dci(0x04773ac1); // bcax z1.d, z1.d, z23.d, z22.d
5741 // vl128 state = 0x3f456acf
5742 __ dci(0x04773ac3); // bcax z3.d, z3.d, z23.d, z22.d
5743 // vl128 state = 0xbe117f80
5744 __ dci(0x047739c7); // bcax z7.d, z7.d, z23.d, z14.d
5745 // vl128 state = 0xd3cd3dcd
5746 __ dci(0x047439c5); // bcax z5.d, z5.d, z20.d, z14.d
5747 // vl128 state = 0xee4f636d
5748 __ dci(0x04743841); // bcax z1.d, z1.d, z20.d, z2.d
5749 // vl128 state = 0xf21b00a1
5750 __ dci(0x04753811); // bcax z17.d, z17.d, z21.d, z0.d
5751 // vl128 state = 0x597ab14d
5752 __ dci(0x04753815); // bcax z21.d, z21.d, z21.d, z0.d
5753 // vl128 state = 0xf5d56322
5754 __ dci(0x04713917); // bcax z23.d, z23.d, z17.d, z8.d
5755 // vl128 state = 0x17f3cedf
5756 __ dci(0x04793987); // bcax z7.d, z7.d, z25.d, z12.d
5757 // vl128 state = 0x7492c4e5
5758 __ dci(0x04693885); // bcax z5.d, z5.d, z9.d, z4.d
5759 // vl128 state = 0xb796548c
5760 __ dci(0x046838d5); // bcax z21.d, z21.d, z8.d, z6.d
5761 // vl128 state = 0xf4e12422
5762 __ dci(0x046838d4); // bcax z20.d, z20.d, z8.d, z6.d
5763 // vl128 state = 0x16187a4c
5764 __ dci(0x043838d6); // eor3 z22.d, z22.d, z24.d, z6.d
5765 // vl128 state = 0xd95e6713
5766 __ dci(0x043c39de); // eor3 z30.d, z30.d, z28.d, z14.d
5767 // vl128 state = 0xb8322807
5768 __ dci(0x047c38ce); // bcax z14.d, z14.d, z28.d, z6.d
5769 // vl128 state = 0x6871619d
5770 __ dci(0x047c38cf); // bcax z15.d, z15.d, z28.d, z6.d
5771 // vl128 state = 0x57c5a4af
5772 __ dci(0x043c384e); // eor3 z14.d, z14.d, z28.d, z2.d
5773 // vl128 state = 0x1a62efdf
5774 __ dci(0x0474385e); // bcax z30.d, z30.d, z20.d, z2.d
5775 // vl128 state = 0xc9d1ea1e
5776 __ dci(0x047c3a4e); // bcax z14.d, z14.d, z28.d, z18.d
5777 // vl128 state = 0xd5ced43e
5778 __ dci(0x047c3c4f); // bsl1n z15.d, z15.d, z28.d, z2.d
5779 // vl128 state = 0x79f22e16
5780 __ dci(0x047d3d4b); // bsl1n z11.d, z11.d, z29.d, z10.d
5781 // vl128 state = 0xc4ee5d6e
5782 __ dci(0x04793c49); // bsl1n z9.d, z9.d, z25.d, z2.d
5783 // vl128 state = 0xea11e840
5784 __ dci(0x04793c99); // bsl1n z25.d, z25.d, z25.d, z4.d
5785 // vl128 state = 0x95221bc2
5786 __ dci(0x04613c91); // bsl1n z17.d, z17.d, z1.d, z4.d
5787 // vl128 state = 0xa40acfbe
5788 __ dci(0x04233c90); // bsl z16.d, z16.d, z3.d, z4.d
5789 // vl128 state = 0x8d3ef22f
5790 __ dci(0x04233c80); // bsl z0.d, z0.d, z3.d, z4.d
5791 // vl128 state = 0xd07d1bb2
5792 __ dci(0x04223ca4); // bsl z4.d, z4.d, z2.d, z5.d
5793 // vl128 state = 0xa2c4169c
5794 __ dci(0x04223ca5); // bsl z5.d, z5.d, z2.d, z5.d
5795 // vl128 state = 0x3c6415e5
5796 __ dci(0x04a03ca1); // bsl2n z1.d, z1.d, z0.d, z5.d
5797 // vl128 state = 0x55b93add
5798 __ dci(0x04a03cb1); // bsl2n z17.d, z17.d, z0.d, z5.d
5799 // vl128 state = 0x9b86e5b3
5800 __ dci(0x04a13cf9); // bsl2n z25.d, z25.d, z1.d, z7.d
5801 // vl128 state = 0xdd310e8f
5802 __ dci(0x04a13cfd); // bsl2n z29.d, z29.d, z1.d, z7.d
5803 // vl128 state = 0xae66fb44
5804 __ dci(0x04a13ced); // bsl2n z13.d, z13.d, z1.d, z7.d
5805 // vl128 state = 0xc69dd926
5806 __ dci(0x04b93ce9); // bsl2n z9.d, z9.d, z25.d, z7.d
5807 // vl128 state = 0x15592b37
5808 __ dci(0x04b93dcb); // bsl2n z11.d, z11.d, z25.d, z14.d
5809 // vl128 state = 0xbfcda4d3
5810 __ dci(0x04b83d4f); // bsl2n z15.d, z15.d, z24.d, z10.d
5811 // vl128 state = 0xaef1e0b6
5812 }
5813
5814 uint32_t state;
5815 ComputeMachineStateHash(&masm, &state);
5816 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
5817 __ Ldr(w0, MemOperand(x0));
5818
5819 END();
5820 if (CAN_RUN()) {
5821 RUN();
5822 uint32_t expected_hashes[] = {
5823 0xaef1e0b6,
5824 0xc9b3303f,
5825 0xc547c948,
5826 0x0fc817f7,
5827 0x22d2eab3,
5828 0x225b3ecd,
5829 0xf7a34a06,
5830 0xa07e68ed,
5831 0xdba0f9fa,
5832 0x64199691,
5833 0xa650bfa3,
5834 0xc6bfeab9,
5835 0x7efe63c4,
5836 0x66e4139c,
5837 0xc580dcf5,
5838 0x95687693,
5839 };
5840 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
5841 }
5842 }
5843
TEST_SVE(sve2_while)5844 TEST_SVE(sve2_while) {
5845 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
5846 CPUFeatures::kSVE2,
5847 CPUFeatures::kNEON,
5848 CPUFeatures::kCRC32);
5849 START();
5850
5851 SetInitialMachineState(&masm);
5852 // state = 0xe2bd2480
5853
5854 {
5855 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
5856 __ dci(0x257109e3); // whilehs p3.h, w15, w17
5857 // vl128 state = 0x4568cc4c
5858 __ dci(0x257709f3); // whilehi p3.h, w15, w23
5859 // vl128 state = 0xf148a8ac
5860 __ dci(0x25f509f7); // whilehi p7.d, w15, w21
5861 // vl128 state = 0x2fe3dcb9
5862 __ dci(0x257508f5); // whilehi p5.h, w7, w21
5863 // vl128 state = 0x88429dee
5864 __ dci(0x257100f1); // whilegt p1.h, w7, w17
5865 // vl128 state = 0x5a3b89ec
5866 __ dci(0x253108f0); // whilehi p0.b, w7, w17
5867 // vl128 state = 0x73276c52
5868 __ dci(0x253108f1); // whilehi p1.b, w7, w17
5869 // vl128 state = 0xa278d7f0
5870 __ dci(0x257508f9); // whilehi p9.h, w7, w21
5871 // vl128 state = 0xa438aefc
5872 __ dci(0x25750858); // whilehi p8.h, w2, w21
5873 // vl128 state = 0x33e13c17
5874 __ dci(0x25770a50); // whilehi p0.h, w18, w23
5875 // vl128 state = 0x01947abe
5876 __ dci(0x25751a52); // whilehi p2.h, x18, x21
5877 // vl128 state = 0x2cf410f2
5878 __ dci(0x25711a7a); // whilehi p10.h, x19, x17
5879 // vl128 state = 0x4bb6efc1
5880 __ dci(0x25391a78); // whilehi p8.b, x19, x25
5881 // vl128 state = 0xec1afdd6
5882 __ dci(0x25290a70); // whilehi p0.b, w19, w9
5883 // vl128 state = 0xde6fbb7f
5884 __ dci(0x25290a78); // whilehi p8.b, w19, w9
5885 // vl128 state = 0x79c3a968
5886 __ dci(0x25a90b68); // whilehs p8.s, w27, w9
5887 // vl128 state = 0x4b32e81a
5888 __ dci(0x25a903e9); // whilege p9.s, wzr, w9
5889 // vl128 state = 0x994bfc18
5890 __ dci(0x25a909ed); // whilehs p13.s, w15, w9
5891 // vl128 state = 0x6d6e231f
5892 __ dci(0x25a909ef); // whilehs p15.s, w15, w9
5893 // vl128 state = 0x41945298
5894 __ dci(0x25a909eb); // whilehs p11.s, w15, w9
5895 // vl128 state = 0x659ccb75
5896 __ dci(0x25b909c9); // whilehs p9.s, w14, w25
5897 // vl128 state = 0xd078a7ed
5898 __ dci(0x25bd098d); // whilehs p13.s, w12, w29
5899 // vl128 state = 0xf6f2d8ae
5900 __ dci(0x25b90909); // whilehs p9.s, w8, w25
5901 // vl128 state = 0x248bccac
5902 __ dci(0x25fb090b); // whilehs p11.d, w8, w27
5903 // vl128 state = 0x09b0b9cc
5904 __ dci(0x25fb090a); // whilehs p10.d, w8, w27
5905 // vl128 state = 0xfa811fef
5906 __ dci(0x25eb0b02); // whilehs p2.d, w24, w11
5907 // vl128 state = 0xdcb96f30
5908 __ dci(0x25eb0bc3); // whilehs p3.d, w30, w11
5909 // vl128 state = 0xbae01fd2
5910 __ dci(0x25e30acb); // whilehs p11.d, w22, w3
5911 // vl128 state = 0xbcfdc2b8
5912 __ dci(0x25eb08c9); // whilehs p9.d, w6, w11
5913 // vl128 state = 0xdb60ba22
5914 __ dci(0x25a308c1); // whilehs p1.s, w6, w3
5915 // vl128 state = 0xe895df80
5916 __ dci(0x25a108e5); // whilehs p5.s, w7, w1
5917 // vl128 state = 0x3aeccb82
5918 __ dci(0x25a009e4); // whilehs p4.s, w15, w0
5919 // vl128 state = 0xe6b1b3b3
5920 __ dci(0x25a009ec); // whilehs p12.s, w15, w0
5921 // vl128 state = 0xd2e10d82
5922 __ dci(0x25a019ae); // whilehs p14.s, x13, x0
5923 // vl128 state = 0x4bf596b8
5924 __ dci(0x25e018af); // whilehs p15.d, x5, x0
5925 // vl128 state = 0xb8d27541
5926 __ dci(0x25e918ad); // whilehs p13.d, x5, x9
5927 // vl128 state = 0x01b6f92f
5928 __ dci(0x25eb188c); // whilehs p12.d, x4, x11
5929 // vl128 state = 0xd3cfed2d
5930 __ dci(0x25eb188e); // whilehs p14.d, x4, x11
5931 // vl128 state = 0x9947e07e
5932 __ dci(0x25e21886); // whilehs p6.d, x4, x2
5933 // vl128 state = 0xd9995e11
5934 __ dci(0x25a21084); // whilege p4.s, x4, x2
5935 // vl128 state = 0xd45d81ed
5936 __ dci(0x25b31085); // whilege p5.s, x4, x19
5937 // vl128 state = 0x4d67b543
5938 __ dci(0x25a3100d); // whilege p13.s, x0, x3
5939 // vl128 state = 0x00f0526c
5940 __ dci(0x252b101d); // whilegt p13.b, x0, x11
5941 // vl128 state = 0x9d176025
5942 __ dci(0x253b1095); // whilegt p5.b, x4, x27
5943 // vl128 state = 0xd6544089
5944 __ dci(0x253b1091); // whilegt p1.b, x4, x27
5945 // vl128 state = 0x37d83129
5946 __ dci(0x253f10d5); // whilegt p5.b, x6, xzr
5947 // vl128 state = 0x8e121615
5948 __ dci(0x252f11d4); // whilegt p4.b, x14, x15
5949 // vl128 state = 0x83d6c9e9
5950 __ dci(0x25af01d5); // whilegt p5.s, w14, w15
5951 // vl128 state = 0xe865fad7
5952 __ dci(0x25eb01c5); // whilege p5.d, w14, w11
5953 // vl128 state = 0x5eaf208e
5954 __ dci(0x25fb0144); // whilege p4.d, w10, w27
5955 // vl128 state = 0x8cd6348c
5956 }
5957
5958 uint32_t state;
5959 ComputeMachineStateHash(&masm, &state);
5960 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
5961 __ Ldr(w0, MemOperand(x0));
5962
5963 END();
5964 if (CAN_RUN()) {
5965 RUN();
5966 uint32_t expected_hashes[] = {
5967 0x8cd6348c,
5968 0x42a1f9b4,
5969 0x13fc2001,
5970 0x492cb2ac,
5971 0xa67cfb65,
5972 0x80d4639f,
5973 0xfa388a09,
5974 0x8c7ad8d9,
5975 0x299c5bfe,
5976 0x9183808a,
5977 0x3fc14d86,
5978 0x7cc08a05,
5979 0x9c85cd48,
5980 0xd06e8299,
5981 0x6a107152,
5982 0x81d99d7c,
5983 };
5984 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
5985 }
5986 }
5987
TEST_SVE(sve2_cdot_index)5988 TEST_SVE(sve2_cdot_index) {
5989 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
5990 CPUFeatures::kSVE2,
5991 CPUFeatures::kNEON,
5992 CPUFeatures::kCRC32);
5993 START();
5994
5995 SetInitialMachineState(&masm);
5996 // state = 0xe2bd2480
5997
5998 {
5999 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
6000 __ dci(0x44bb4ef6); // cdot z22.s, z23.b, z3.b[3], #270
6001 // vl128 state = 0x452d1d6e
6002 __ dci(0x44b94ff7); // cdot z23.s, z31.b, z1.b[3], #270
6003 // vl128 state = 0x546c9569
6004 __ dci(0x44b94dd5); // cdot z21.s, z14.b, z1.b[3], #270
6005 // vl128 state = 0xa2abf834
6006 __ dci(0x44bd45d7); // cdot z23.s, z14.b, z5.b[3], #90
6007 // vl128 state = 0xba77ed64
6008 __ dci(0x44fc45df); // cdot z31.d, z14.h, z12.h[1], #90
6009 // vl128 state = 0xe78163f2
6010 __ dci(0x44f441db); // cdot z27.d, z14.h, z4.h[1], #0
6011 // vl128 state = 0xca3b116d
6012 __ dci(0x44f44dd3); // cdot z19.d, z14.h, z4.h[1], #270
6013 // vl128 state = 0x57ba3771
6014 __ dci(0x44b44d83); // cdot z3.s, z12.b, z4.b[2], #270
6015 // vl128 state = 0x4edccb88
6016 __ dci(0x44ac4d82); // cdot z2.s, z12.b, z4.b[1], #270
6017 // vl128 state = 0xc9543499
6018 __ dci(0x44a84f8a); // cdot z10.s, z28.b, z0.b[1], #270
6019 // vl128 state = 0x9d8fe439
6020 __ dci(0x44a84d08); // cdot z8.s, z8.b, z0.b[1], #270
6021 // vl128 state = 0x3c1bf0cc
6022 __ dci(0x44ba4d09); // cdot z9.s, z8.b, z2.b[3], #270
6023 // vl128 state = 0x983716f1
6024 __ dci(0x44ea4d0d); // cdot z13.d, z8.h, z10.h[0], #270
6025 // vl128 state = 0x2df96300
6026 __ dci(0x44eb491d); // cdot z29.d, z8.h, z11.h[0], #180
6027 // vl128 state = 0xc23edde3
6028 __ dci(0x44e9499f); // cdot z31.d, z12.h, z9.h[0], #180
6029 // vl128 state = 0xef0ace9d
6030 __ dci(0x44e84b9d); // cdot z29.d, z28.h, z8.h[0], #180
6031 // vl128 state = 0x2cce8002
6032 __ dci(0x44e84b99); // cdot z25.d, z28.h, z8.h[0], #180
6033 // vl128 state = 0xd07f46a1
6034 __ dci(0x44f84a9d); // cdot z29.d, z20.h, z8.h[1], #180
6035 // vl128 state = 0x239831e8
6036 __ dci(0x44f84a99); // cdot z25.d, z20.h, z8.h[1], #180
6037 // vl128 state = 0xa110988d
6038 __ dci(0x44e84a09); // cdot z9.d, z16.h, z8.h[0], #180
6039 // vl128 state = 0x2b9ef292
6040 __ dci(0x44e84a19); // cdot z25.d, z16.h, z8.h[0], #180
6041 // vl128 state = 0x50eeb818
6042 __ dci(0x44e04b1b); // cdot z27.d, z24.h, z0.h[0], #180
6043 // vl128 state = 0xc33ce03b
6044 __ dci(0x44e04a2b); // cdot z11.d, z17.h, z0.h[0], #180
6045 // vl128 state = 0xe163b5c9
6046 __ dci(0x44e04b0f); // cdot z15.d, z24.h, z0.h[0], #180
6047 // vl128 state = 0x052a34eb
6048 __ dci(0x44e04b1f); // cdot z31.d, z24.h, z0.h[0], #180
6049 // vl128 state = 0x0660afb4
6050 __ dci(0x44e84b4f); // cdot z15.d, z26.h, z8.h[0], #180
6051 // vl128 state = 0x0ae01233
6052 __ dci(0x44ee4b4e); // cdot z14.d, z26.h, z14.h[0], #180
6053 // vl128 state = 0xde7bdd15
6054 __ dci(0x44ae4b7e); // cdot z30.s, z27.b, z6.b[1], #180
6055 // vl128 state = 0x758973a1
6056 __ dci(0x44a6497f); // cdot z31.s, z11.b, z6.b[0], #180
6057 // vl128 state = 0xb3c5df37
6058 __ dci(0x44a64df7); // cdot z23.s, z15.b, z6.b[0], #270
6059 // vl128 state = 0xe652f054
6060 __ dci(0x44a64c73); // cdot z19.s, z3.b, z6.b[0], #270
6061 // vl128 state = 0xc4b58041
6062 __ dci(0x44a64de3); // cdot z3.s, z15.b, z6.b[0], #270
6063 // vl128 state = 0x1239ca90
6064 __ dci(0x44a749e2); // cdot z2.s, z15.b, z7.b[0], #180
6065 // vl128 state = 0x4a01cdcb
6066 __ dci(0x44a740e0); // cdot z0.s, z7.b, z7.b[0], #0
6067 // vl128 state = 0x604e45cf
6068 __ dci(0x44a344e2); // cdot z2.s, z7.b, z3.b[0], #90
6069 // vl128 state = 0x12fe2972
6070 __ dci(0x44a34ca3); // cdot z3.s, z5.b, z3.b[0], #270
6071 // vl128 state = 0x78e0bb2e
6072 __ dci(0x44e14cb3); // cdot z19.d, z5.h, z1.h[0], #270
6073 // vl128 state = 0xe3a69b46
6074 __ dci(0x44e14d31); // cdot z17.d, z9.h, z1.h[0], #270
6075 // vl128 state = 0xe6b58aa4
6076 __ dci(0x44f14d01); // cdot z1.d, z8.h, z1.h[1], #270
6077 // vl128 state = 0xffcfb597
6078 __ dci(0x44f14551); // cdot z17.d, z10.h, z1.h[1], #90
6079 // vl128 state = 0x2745934b
6080 __ dci(0x44f345d5); // cdot z21.d, z14.h, z3.h[1], #90
6081 // vl128 state = 0xa38b5571
6082 __ dci(0x44f34574); // cdot z20.d, z11.h, z3.h[1], #90
6083 // vl128 state = 0x978afd92
6084 __ dci(0x44f34576); // cdot z22.d, z11.h, z3.h[1], #90
6085 // vl128 state = 0x9f1b19c9
6086 __ dci(0x44f34f77); // cdot z23.d, z27.h, z3.h[1], #270
6087 // vl128 state = 0x61a31d64
6088 __ dci(0x44f24f5f); // cdot z31.d, z26.h, z2.h[1], #270
6089 // vl128 state = 0x1e71023e
6090 __ dci(0x44fa4fcf); // cdot z15.d, z30.h, z10.h[1], #270
6091 // vl128 state = 0xdbe5ffb3
6092 __ dci(0x44ba4f4e); // cdot z14.s, z26.b, z2.b[3], #270
6093 // vl128 state = 0x51390e81
6094 __ dci(0x44ba470c); // cdot z12.s, z24.b, z2.b[3], #90
6095 // vl128 state = 0x59ad5198
6096 __ dci(0x44b2479c); // cdot z28.s, z28.b, z2.b[2], #90
6097 // vl128 state = 0xe997de49
6098 __ dci(0x44b24fbd); // cdot z29.s, z29.b, z2.b[2], #270
6099 // vl128 state = 0x5533cefa
6100 }
6101
6102 uint32_t state;
6103 ComputeMachineStateHash(&masm, &state);
6104 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
6105 __ Ldr(w0, MemOperand(x0));
6106
6107 END();
6108 if (CAN_RUN()) {
6109 RUN();
6110 uint32_t expected_hashes[] = {
6111 0x5533cefa,
6112 0x1462a298,
6113 0x1acb4ead,
6114 0xeb05ddf0,
6115 0x23fe8c86,
6116 0xbb1e9f8c,
6117 0x4a933f43,
6118 0x4cd64b55,
6119 0x84a4b8b7,
6120 0x52019619,
6121 0x4442432b,
6122 0x9b353ce8,
6123 0x333c9eef,
6124 0x291eac87,
6125 0x110f7371,
6126 0x009b25cb,
6127 };
6128 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
6129 }
6130 }
6131
TEST_SVE(sve2_splice)6132 TEST_SVE(sve2_splice) {
6133 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
6134 CPUFeatures::kSVE2,
6135 CPUFeatures::kNEON,
6136 CPUFeatures::kCRC32);
6137 START();
6138
6139 SetInitialMachineState(&masm);
6140 // state = 0xe2bd2480
6141
6142 {
6143 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
6144 __ dci(0x05ed89a7); // splice z7.d, p2, {z13.d, z14.d}
6145 // vl128 state = 0x6acff994
6146 __ dci(0x05ed81e5); // splice z5.d, p0, {z15.d, z16.d}
6147 // vl128 state = 0x2c8b3e5d
6148 __ dci(0x05ed8375); // splice z21.d, p0, {z27.d, z28.d}
6149 // vl128 state = 0x2588e208
6150 __ dci(0x05ed9174); // splice z20.d, p4, {z11.d, z12.d}
6151 // vl128 state = 0x4d6fa6b3
6152 __ dci(0x056d91f6); // splice z22.h, p4, {z15.h, z16.h}
6153 // vl128 state = 0x9f00a308
6154 __ dci(0x056d92f2); // splice z18.h, p4, {z23.h, z24.h}
6155 // vl128 state = 0x5479cc74
6156 __ dci(0x056d96a2); // splice z2.h, p5, {z21.h, z22.h}
6157 // vl128 state = 0xca7a6a63
6158 __ dci(0x056d9fa6); // splice z6.h, p7, {z29.h, z30.h}
6159 // vl128 state = 0x007fc934
6160 __ dci(0x056d9be4); // splice z4.h, p6, {z31.h, z0.h}
6161 // vl128 state = 0x8186741b
6162 __ dci(0x056d97ec); // splice z12.h, p5, {z31.h, z0.h}
6163 // vl128 state = 0x26ab76b9
6164 __ dci(0x056d979c); // splice z28.h, p5, {z28.h, z29.h}
6165 // vl128 state = 0x933201f4
6166 __ dci(0x056d9794); // splice z20.h, p5, {z28.h, z29.h}
6167 // vl128 state = 0x42cf6784
6168 __ dci(0x052d9f96); // splice z22.b, p7, {z28.b, z29.b}
6169 // vl128 state = 0x0838e776
6170 __ dci(0x056d8f9e); // splice z30.h, p3, {z28.h, z29.h}
6171 // vl128 state = 0x89637e78
6172 __ dci(0x056d9fd6); // splice z22.h, p7, {z30.h, z31.h}
6173 // vl128 state = 0xb94dbb49
6174 __ dci(0x056d8dd7); // splice z23.h, p3, {z14.h, z15.h}
6175 // vl128 state = 0x260f8127
6176 __ dci(0x05ad8ddf); // splice z31.s, p3, {z14.s, z15.s}
6177 // vl128 state = 0x16257a12
6178 __ dci(0x05ad8ddd); // splice z29.s, p3, {z14.s, z15.s}
6179 // vl128 state = 0x803d0766
6180 __ dci(0x05ad8d7c); // splice z28.s, p3, {z11.s, z12.s}
6181 // vl128 state = 0xcc405331
6182 __ dci(0x05ad8d74); // splice z20.s, p3, {z11.s, z12.s}
6183 // vl128 state = 0x0ed25e4c
6184 __ dci(0x05ad8d64); // splice z4.s, p3, {z11.s, z12.s}
6185 // vl128 state = 0x167daf8b
6186 __ dci(0x05ed8c6c); // splice z12.d, p3, {z3.d, z4.d}
6187 // vl128 state = 0x435f3bb9
6188 __ dci(0x05ed8cad); // splice z13.d, p3, {z5.d, z6.d}
6189 // vl128 state = 0xe49df619
6190 __ dci(0x056d8dbd); // splice z29.h, p3, {z13.h, z14.h}
6191 // vl128 state = 0x1f54e928
6192 __ dci(0x056d8f2d); // splice z13.h, p3, {z25.h, z26.h}
6193 // vl128 state = 0x24adbe77
6194 __ dci(0x056d8f9d); // splice z29.h, p3, {z28.h, z29.h}
6195 // vl128 state = 0xcc2ec3e6
6196 __ dci(0x056d8f95); // splice z21.h, p3, {z28.h, z29.h}
6197 // vl128 state = 0xb71c64f7
6198 __ dci(0x056d8f34); // splice z20.h, p3, {z25.h, z26.h}
6199 // vl128 state = 0xb32756f0
6200 __ dci(0x05ed8f64); // splice z4.d, p3, {z27.d, z28.d}
6201 // vl128 state = 0x3f7d1f13
6202 __ dci(0x05ad8e60); // splice z0.s, p3, {z19.s, z20.s}
6203 // vl128 state = 0x9a7ffbde
6204 __ dci(0x052d8e50); // splice z16.b, p3, {z18.b, z19.b}
6205 // vl128 state = 0x5c82ed17
6206 __ dci(0x052d9652); // splice z18.b, p5, {z18.b, z19.b}
6207 // vl128 state = 0x28b9cd60
6208 __ dci(0x052d9ed0); // splice z16.b, p7, {z22.b, z23.b}
6209 // vl128 state = 0xab0238ba
6210 __ dci(0x052d9ed4); // splice z20.b, p7, {z22.b, z23.b}
6211 // vl128 state = 0x9f0e0ef9
6212 __ dci(0x056d9cc4); // splice z4.h, p7, {z6.h, z7.h}
6213 // vl128 state = 0xec31d5e7
6214 __ dci(0x056d98e6); // splice z6.h, p6, {z7.h, z8.h}
6215 // vl128 state = 0xbc9c0048
6216 __ dci(0x056d9ee4); // splice z4.h, p7, {z23.h, z24.h}
6217 // vl128 state = 0xe2e9c9a3
6218 __ dci(0x056d9ef4); // splice z20.h, p7, {z23.h, z24.h}
6219 // vl128 state = 0x60ffa98a
6220 __ dci(0x056d9ab6); // splice z22.h, p6, {z21.h, z22.h}
6221 // vl128 state = 0xae70ed0f
6222 __ dci(0x056d9294); // splice z20.h, p4, {z20.h, z21.h}
6223 // vl128 state = 0x5736c563
6224 __ dci(0x056d9284); // splice z4.h, p4, {z20.h, z21.h}
6225 // vl128 state = 0xf31dd2d9
6226 __ dci(0x052d920c); // splice z12.b, p4, {z16.b, z17.b}
6227 // vl128 state = 0x04502fea
6228 __ dci(0x052d921c); // splice z28.b, p4, {z16.b, z17.b}
6229 // vl128 state = 0x852f98b1
6230 __ dci(0x052d9094); // splice z20.b, p4, {z4.b, z5.b}
6231 // vl128 state = 0xb40c5931
6232 __ dci(0x052d90f6); // splice z22.b, p4, {z7.b, z8.b}
6233 // vl128 state = 0x64d6138d
6234 __ dci(0x052d88e6); // splice z6.b, p2, {z7.b, z8.b}
6235 // vl128 state = 0x51bb6564
6236 __ dci(0x052d88e4); // splice z4.b, p2, {z7.b, z8.b}
6237 // vl128 state = 0x7ed599b0
6238 __ dci(0x05ad8865); // splice z5.s, p2, {z3.s, z4.s}
6239 // vl128 state = 0xa201547d
6240 __ dci(0x05ad9961); // splice z1.s, p6, {z11.s, z12.s}
6241 // vl128 state = 0x9508f19c
6242 __ dci(0x05ed9945); // splice z5.d, p6, {z10.d, z11.d}
6243 // vl128 state = 0x95399cfd
6244 }
6245
6246 uint32_t state;
6247 ComputeMachineStateHash(&masm, &state);
6248 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
6249 __ Ldr(w0, MemOperand(x0));
6250
6251 END();
6252 if (CAN_RUN()) {
6253 RUN();
6254 uint32_t expected_hashes[] = {
6255 0x95399cfd,
6256 0xa960b01e,
6257 0x1fedaa18,
6258 0xe2fd3ec3,
6259 0x3edc353b,
6260 0xd809efd8,
6261 0x2a04f527,
6262 0xe4b9bb4a,
6263 0x72e5ed3e,
6264 0x63d6fe93,
6265 0xd2ad18fa,
6266 0x522fe057,
6267 0xc7ba2f7d,
6268 0x2dd44bd3,
6269 0x68b62ae6,
6270 0x06ea6854,
6271 };
6272 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
6273 }
6274 }
6275
TEST_SVE(sve2_whilerw_whilewr)6276 TEST_SVE(sve2_whilerw_whilewr) {
6277 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
6278 CPUFeatures::kSVE2,
6279 CPUFeatures::kNEON,
6280 CPUFeatures::kCRC32);
6281 START();
6282
6283 SetInitialMachineState(&masm);
6284 // state = 0xe2bd2480
6285
6286 {
6287 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
6288 __ dci(0x25ac3026); // whilewr p6.s, x1, x12
6289 // vl128 state = 0x91e301ae
6290 __ dci(0x25ac3024); // whilewr p4.s, x1, x12
6291 // vl128 state = 0x9203b261
6292 __ dci(0x25af3020); // whilewr p0.s, x1, x15
6293 // vl128 state = 0x87505080
6294 __ dci(0x25ef3222); // whilewr p2.d, x17, x15
6295 // vl128 state = 0x4ba695cb
6296 __ dci(0x25eb320a); // whilewr p10.d, x16, x11
6297 // vl128 state = 0x5909d726
6298 __ dci(0x25e33308); // whilewr p8.d, x24, x3
6299 // vl128 state = 0x52766071
6300 __ dci(0x25ea3309); // whilewr p9.d, x24, x10
6301 // vl128 state = 0xe906a65a
6302 __ dci(0x25aa3101); // whilewr p1.s, x8, x10
6303 // vl128 state = 0xd9d56c58
6304 __ dci(0x252b3100); // whilewr p0.b, x8, x11
6305 // vl128 state = 0xcc868eb9
6306 __ dci(0x252a3008); // whilewr p8.b, x0, x10
6307 // vl128 state = 0xf78cb912
6308 __ dci(0x2528304c); // whilewr p12.b, x2, x8
6309 // vl128 state = 0x5493a6c4
6310 __ dci(0x25203004); // whilewr p4.b, x0, x0
6311 // vl128 state = 0xb3d754b6
6312 __ dci(0x25303105); // whilewr p5.b, x8, x16
6313 // vl128 state = 0x7fc526df
6314 __ dci(0x25b4310d); // whilewr p13.s, x8, x20
6315 // vl128 state = 0x5999edda
6316 __ dci(0x25ac310c); // whilewr p12.s, x8, x12
6317 // vl128 state = 0x46a86248
6318 __ dci(0x25ac310e); // whilewr p14.s, x8, x12
6319 // vl128 state = 0x0dc5ed70
6320 __ dci(0x252c330a); // whilewr p10.b, x24, x12
6321 // vl128 state = 0x453a1aa9
6322 __ dci(0x252f330b); // whilewr p11.b, x24, x15
6323 // vl128 state = 0x98fbdcdf
6324 __ dci(0x256e330f); // whilewr p15.h, x24, x14
6325 // vl128 state = 0x84699750
6326 __ dci(0x252e334d); // whilewr p13.b, x26, x14
6327 // vl128 state = 0x198ea519
6328 __ dci(0x252e3349); // whilewr p9.b, x26, x14
6329 // vl128 state = 0xb4956673
6330 __ dci(0x253e33c1); // whilewr p1.b, x30, x30
6331 // vl128 state = 0xfd88dd74
6332 __ dci(0x252e33e3); // whilewr p3.b, xzr, x14
6333 // vl128 state = 0x68cda9df
6334 __ dci(0x25ae33cb); // whilewr p11.s, x30, x14
6335 // vl128 state = 0x9104f644
6336 __ dci(0x25ae33ca); // whilewr p10.s, x30, x14
6337 // vl128 state = 0xd9079300
6338 __ dci(0x25ea33da); // whilerw p10.d, x30, x10
6339 // vl128 state = 0xd9fb019d
6340 __ dci(0x25ae33d8); // whilerw p8.s, x30, x14
6341 // vl128 state = 0x9edf46fa
6342 __ dci(0x25ae32f9); // whilerw p9.s, x23, x14
6343 // vl128 state = 0x3b10562f
6344 __ dci(0x25ee32d8); // whilerw p8.d, x22, x14
6345 // vl128 state = 0x473e26e3
6346 __ dci(0x25ec3299); // whilerw p9.d, x20, x12
6347 // vl128 state = 0x4feaf55c
6348 __ dci(0x25ec329d); // whilerw p13.d, x20, x12
6349 // vl128 state = 0x9f9a203a
6350 __ dci(0x25e8321c); // whilerw p12.d, x16, x8
6351 // vl128 state = 0xd8f32d11
6352 __ dci(0x2568301d); // whilerw p13.h, x0, x8
6353 // vl128 state = 0xf04b6bb8
6354 __ dci(0x2528320d); // whilewr p13.b, x16, x8
6355 // vl128 state = 0x0883f877
6356 __ dci(0x25a8323d); // whilerw p13.s, x17, x8
6357 // vl128 state = 0x9564ca3e
6358 __ dci(0x25a8323f); // whilerw p15.s, x17, x8
6359 // vl128 state = 0xa50cf036
6360 __ dci(0x25e8303d); // whilerw p13.d, x1, x8
6361 // vl128 state = 0xe89b1719
6362 __ dci(0x25e83175); // whilerw p5.d, x11, x8
6363 // vl128 state = 0xe79bea7c
6364 __ dci(0x256a3174); // whilerw p4.h, x11, x10
6365 // vl128 state = 0xc8ca3b74
6366 __ dci(0x256a317c); // whilerw p12.h, x11, x10
6367 // vl128 state = 0xc3c88548
6368 __ dci(0x256a33f8); // whilerw p8.h, xzr, x10
6369 // vl128 state = 0x8b25acc6
6370 __ dci(0x256a33f0); // whilerw p0.h, xzr, x10
6371 // vl128 state = 0x904c0fd1
6372 __ dci(0x25e833e0); // whilewr p0.d, xzr, x8
6373 // vl128 state = 0xc893f4c8
6374 __ dci(0x25ec32e8); // whilewr p8.d, x23, x12
6375 // vl128 state = 0x807edd46
6376 __ dci(0x25ed326c); // whilewr p12.d, x19, x13
6377 // vl128 state = 0x8b7c637a
6378 __ dci(0x256d32ed); // whilewr p13.h, x23, x13
6379 // vl128 state = 0xa3c425d3
6380 __ dci(0x252d30e9); // whilewr p9.b, x7, x13
6381 // vl128 state = 0x0edfe6b9
6382 __ dci(0x252531eb); // whilewr p11.b, x15, x5
6383 // vl128 state = 0xf716b922
6384 __ dci(0x252733ef); // whilewr p15.b, xzr, x7
6385 // vl128 state = 0xbf9aea3e
6386 __ dci(0x25253367); // whilewr p7.b, x27, x5
6387 // vl128 state = 0x357fc408
6388 }
6389
6390 uint32_t state;
6391 ComputeMachineStateHash(&masm, &state);
6392 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
6393 __ Ldr(w0, MemOperand(x0));
6394
6395 END();
6396 if (CAN_RUN()) {
6397 RUN();
6398 uint32_t expected_hashes[] = {
6399 0x357fc408,
6400 0x8d6fc283,
6401 0x5f73c1df,
6402 0x2963d995,
6403 0x80713760,
6404 0x4638fc82,
6405 0x23955ead,
6406 0x52e4c002,
6407 0xd56ab65c,
6408 0x0e5bb2f2,
6409 0x8c78ec14,
6410 0xd9b634d2,
6411 0x83adc3a2,
6412 0x3b664eea,
6413 0x3d1f5422,
6414 0x7cdcd310,
6415 };
6416 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
6417 }
6418 }
6419
TEST_SVE(sve2_mul_index)6420 TEST_SVE(sve2_mul_index) {
6421 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
6422 CPUFeatures::kSVE2,
6423 CPUFeatures::kNEON,
6424 CPUFeatures::kCRC32);
6425 START();
6426
6427 SetInitialMachineState(&masm);
6428 // state = 0xe2bd2480
6429
6430 {
6431 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
6432 __ dci(0x4468fb6e); // mul z14.h, z27.h, z0.h[5]
6433 // vl128 state = 0xcbe81b96
6434 __ dci(0x4468f93e); // mul z30.h, z9.h, z0.h[5]
6435 // vl128 state = 0x8a75362d
6436 __ dci(0x4428f976); // mul z22.h, z11.h, z0.h[1]
6437 // vl128 state = 0x1e3c5184
6438 __ dci(0x4428fa77); // mul z23.h, z19.h, z0.h[1]
6439 // vl128 state = 0x173f58b5
6440 __ dci(0x4429fb67); // mul z7.h, z27.h, z1.h[1]
6441 // vl128 state = 0x15686c87
6442 __ dci(0x4429fb63); // mul z3.h, z27.h, z1.h[1]
6443 // vl128 state = 0x41068a87
6444 __ dci(0x4428fb53); // mul z19.h, z26.h, z0.h[1]
6445 // vl128 state = 0xcfd6e02c
6446 __ dci(0x4429fbd1); // mul z17.h, z30.h, z1.h[1]
6447 // vl128 state = 0xfd3e0e3c
6448 __ dci(0x442afbd9); // mul z25.h, z30.h, z2.h[1]
6449 // vl128 state = 0x1e660bf7
6450 __ dci(0x442afa5b); // mul z27.h, z18.h, z2.h[1]
6451 // vl128 state = 0xb5378f4e
6452 __ dci(0x44abfa4b); // mul z11.s, z18.s, z3.s[1]
6453 // vl128 state = 0xf34416fe
6454 __ dci(0x44abfa4f); // mul z15.s, z18.s, z3.s[1]
6455 // vl128 state = 0xc80d6ad9
6456 __ dci(0x44a9f84e); // mul z14.s, z2.s, z1.s[1]
6457 // vl128 state = 0xa4fe2be7
6458 __ dci(0x44e9fa46); // mul z6.d, z18.d, z9.d[0]
6459 // vl128 state = 0xaf461ebb
6460 __ dci(0x44e9fa8e); // mul z14.d, z20.d, z9.d[0]
6461 // vl128 state = 0x9f7acd20
6462 __ dci(0x44f1fa8f); // mul z15.d, z20.d, z1.d[1]
6463 // vl128 state = 0x1b710469
6464 __ dci(0x4471fa07); // mul z7.h, z16.h, z1.h[6]
6465 // vl128 state = 0xa2120b4c
6466 __ dci(0x4470fa43); // mul z3.h, z18.h, z0.h[6]
6467 // vl128 state = 0xb6d6ce4c
6468 __ dci(0x4474fb47); // mul z7.h, z26.h, z4.h[6]
6469 // vl128 state = 0xeec634bf
6470 __ dci(0x4476fa57); // mul z23.h, z18.h, z6.h[6]
6471 // vl128 state = 0x893bbe37
6472 __ dci(0x447cfa53); // mul z19.h, z18.h, z4.h[7]
6473 // vl128 state = 0x8373940b
6474 __ dci(0x447dfb52); // mul z18.h, z26.h, z5.h[7]
6475 // vl128 state = 0xd1c86434
6476 __ dci(0x4477fb56); // mul z22.h, z26.h, z7.h[6]
6477 // vl128 state = 0xb247cf9e
6478 __ dci(0x4476fb77); // mul z23.h, z27.h, z6.h[6]
6479 // vl128 state = 0x6106a868
6480 __ dci(0x4467fb7f); // mul z31.h, z27.h, z7.h[4]
6481 // vl128 state = 0xc0a11edf
6482 __ dci(0x446ffa77); // mul z23.h, z19.h, z7.h[5]
6483 // vl128 state = 0xe1879a44
6484 __ dci(0x442bfa76); // mul z22.h, z19.h, z3.h[1]
6485 // vl128 state = 0xc773115b
6486 __ dci(0x442bfa7e); // mul z30.h, z19.h, z3.h[1]
6487 // vl128 state = 0x5f5b4793
6488 __ dci(0x442afa2e); // mul z14.h, z17.h, z2.h[1]
6489 // vl128 state = 0x144b30b2
6490 __ dci(0x442afa26); // mul z6.h, z17.h, z2.h[1]
6491 // vl128 state = 0x905f8608
6492 __ dci(0x442afb6e); // mul z14.h, z27.h, z2.h[1]
6493 // vl128 state = 0x0f826c19
6494 __ dci(0x44aefb66); // mul z6.s, z27.s, z6.s[1]
6495 // vl128 state = 0x7043c090
6496 __ dci(0x44aefba4); // mul z4.s, z29.s, z6.s[1]
6497 // vl128 state = 0xab3921a9
6498 __ dci(0x44aefbb4); // mul z20.s, z29.s, z6.s[1]
6499 // vl128 state = 0x7d420495
6500 __ dci(0x44acfbf0); // mul z16.s, z31.s, z4.s[1]
6501 // vl128 state = 0xceb17a45
6502 __ dci(0x44a4fb60); // mul z0.s, z27.s, z4.s[0]
6503 // vl128 state = 0x97ed0929
6504 __ dci(0x44a5fb30); // mul z16.s, z25.s, z5.s[0]
6505 // vl128 state = 0xb7fa54a5
6506 __ dci(0x4425f938); // mul z24.h, z9.h, z5.h[0]
6507 // vl128 state = 0xfcc1c192
6508 __ dci(0x442df830); // mul z16.h, z1.h, z5.h[1]
6509 // vl128 state = 0x933ed51d
6510 __ dci(0x4427f832); // mul z18.h, z1.h, z7.h[0]
6511 // vl128 state = 0x2129d4f0
6512 __ dci(0x442ef822); // mul z2.h, z1.h, z6.h[1]
6513 // vl128 state = 0x76f6854c
6514 __ dci(0x442af803); // mul z3.h, z0.h, z2.h[1]
6515 // vl128 state = 0xe763df2d
6516 __ dci(0x442af801); // mul z1.h, z0.h, z2.h[1]
6517 // vl128 state = 0x61db5a87
6518 __ dci(0x442bf900); // mul z0.h, z8.h, z3.h[1]
6519 // vl128 state = 0x90883cfb
6520 __ dci(0x442bf881); // mul z1.h, z4.h, z3.h[1]
6521 // vl128 state = 0xb4afb9b2
6522 __ dci(0x4427f885); // mul z5.h, z4.h, z7.h[0]
6523 // vl128 state = 0xe512adca
6524 __ dci(0x4425f8ad); // mul z13.h, z5.h, z5.h[0]
6525 // vl128 state = 0xd820475a
6526 __ dci(0x4420f8a5); // mul z5.h, z5.h, z0.h[0]
6527 // vl128 state = 0xea9a6f50
6528 __ dci(0x4431f8a4); // mul z4.h, z5.h, z1.h[2]
6529 // vl128 state = 0x9343e341
6530 __ dci(0x4425f8a0); // mul z0.h, z5.h, z5.h[0]
6531 // vl128 state = 0x20a5f202
6532 }
6533
6534 uint32_t state;
6535 ComputeMachineStateHash(&masm, &state);
6536 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
6537 __ Ldr(w0, MemOperand(x0));
6538
6539 END();
6540 if (CAN_RUN()) {
6541 RUN();
6542 uint32_t expected_hashes[] = {
6543 0x20a5f202,
6544 0xdb7b10ee,
6545 0x0607441b,
6546 0x4966f0ff,
6547 0x5f750338,
6548 0x9be09ff4,
6549 0x8805a320,
6550 0x52cf70b0,
6551 0x5f4c6d92,
6552 0xf8009f1f,
6553 0x56cd1ff6,
6554 0x345f063d,
6555 0x3807ccf3,
6556 0xf7eb85a8,
6557 0x1600c143,
6558 0x97be6c01,
6559 };
6560 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
6561 }
6562 }
6563
TEST_SVE(sve2_mla_mls_index)6564 TEST_SVE(sve2_mla_mls_index) {
6565 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
6566 CPUFeatures::kSVE2,
6567 CPUFeatures::kNEON,
6568 CPUFeatures::kCRC32);
6569 START();
6570
6571 SetInitialMachineState(&masm);
6572 // state = 0xe2bd2480
6573
6574 {
6575 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
6576 __ dci(0x44200800); // mla z0.h, z0.h, z0.h[0]
6577 // vl128 state = 0x06aac22e
6578 __ dci(0x44200a28); // mla z8.h, z17.h, z0.h[0]
6579 // vl128 state = 0xde2255a4
6580 __ dci(0x44e00a2a); // mla z10.d, z17.d, z0.d[0]
6581 // vl128 state = 0x9bf1bae6
6582 __ dci(0x44600e3a); // mls z26.h, z17.h, z0.h[4]
6583 // vl128 state = 0x28b58feb
6584 __ dci(0x44e20e2a); // mls z10.d, z17.d, z2.d[0]
6585 // vl128 state = 0x0ac8fcc8
6586 __ dci(0x44620f2e); // mls z14.h, z25.h, z2.h[4]
6587 // vl128 state = 0x955da860
6588 __ dci(0x44630f6a); // mls z10.h, z27.h, z3.h[4]
6589 // vl128 state = 0x654ee915
6590 __ dci(0x44730b6e); // mla z14.h, z27.h, z3.h[6]
6591 // vl128 state = 0x3fd3e02c
6592 __ dci(0x44720f6f); // mls z15.h, z27.h, z2.h[6]
6593 // vl128 state = 0x46031098
6594 __ dci(0x44620f4b); // mls z11.h, z26.h, z2.h[4]
6595 // vl128 state = 0xd49183cf
6596 __ dci(0x446a0b5b); // mla z27.h, z26.h, z2.h[5]
6597 // vl128 state = 0x4fe290c1
6598 __ dci(0x44680b73); // mla z19.h, z27.h, z0.h[5]
6599 // vl128 state = 0xf6fccd86
6600 __ dci(0x44e90b77); // mla z23.d, z27.d, z9.d[0]
6601 // vl128 state = 0x57b2090d
6602 __ dci(0x44f10b76); // mla z22.d, z27.d, z1.d[1]
6603 // vl128 state = 0x5a6932eb
6604 __ dci(0x44f40b77); // mla z23.d, z27.d, z4.d[1]
6605 // vl128 state = 0x8e33d7d5
6606 __ dci(0x44640b7f); // mla z31.h, z27.h, z4.h[4]
6607 // vl128 state = 0xaa01885d
6608 __ dci(0x44640b7d); // mla z29.h, z27.h, z4.h[4]
6609 // vl128 state = 0x2ef00e60
6610 __ dci(0x44640b7f); // mla z31.h, z27.h, z4.h[4]
6611 // vl128 state = 0x94ac10d3
6612 __ dci(0x44340b7e); // mla z30.h, z27.h, z4.h[2]
6613 // vl128 state = 0x48211118
6614 __ dci(0x44340e7a); // mls z26.h, z19.h, z4.h[2]
6615 // vl128 state = 0x72cc2767
6616 __ dci(0x44b40eea); // mls z10.s, z23.s, z4.s[2]
6617 // vl128 state = 0x3855f70f
6618 __ dci(0x44e40ee2); // mls z2.d, z23.d, z4.d[0]
6619 // vl128 state = 0xf9225160
6620 __ dci(0x44ec0ea3); // mls z3.d, z21.d, z12.d[0]
6621 // vl128 state = 0xf9b94fd0
6622 __ dci(0x44ae0ea7); // mls z7.s, z21.s, z6.s[1]
6623 // vl128 state = 0x06070917
6624 __ dci(0x44ae0eb7); // mls z23.s, z21.s, z6.s[1]
6625 // vl128 state = 0x26ecdd18
6626 __ dci(0x44ae0e07); // mls z7.s, z16.s, z6.s[1]
6627 // vl128 state = 0xaa8e3a32
6628 __ dci(0x44ae0a85); // mla z5.s, z20.s, z6.s[1]
6629 // vl128 state = 0x2379cba0
6630 __ dci(0x44ae0a81); // mla z1.s, z20.s, z6.s[1]
6631 // vl128 state = 0x3cc8a61c
6632 __ dci(0x442a0a85); // mla z5.h, z20.h, z2.h[1]
6633 // vl128 state = 0x96f118ef
6634 __ dci(0x443e0a84); // mla z4.h, z20.h, z6.h[3]
6635 // vl128 state = 0xa3f8cb41
6636 __ dci(0x443f0b8c); // mla z12.h, z28.h, z7.h[3]
6637 // vl128 state = 0x97fcb1da
6638 __ dci(0x442f0bbc); // mla z28.h, z29.h, z7.h[1]
6639 // vl128 state = 0x761e9499
6640 __ dci(0x44270fac); // mls z12.h, z29.h, z7.h[0]
6641 // vl128 state = 0xfb28f943
6642 __ dci(0x442f0ead); // mls z13.h, z21.h, z7.h[1]
6643 // vl128 state = 0x387a2623
6644 __ dci(0x44270fa9); // mls z9.h, z29.h, z7.h[0]
6645 // vl128 state = 0x22f03847
6646 __ dci(0x44270f68); // mls z8.h, z27.h, z7.h[0]
6647 // vl128 state = 0xada4998b
6648 __ dci(0x44270f6c); // mls z12.h, z27.h, z7.h[0]
6649 // vl128 state = 0xdf80a034
6650 __ dci(0x44270f7c); // mls z28.h, z27.h, z7.h[0]
6651 // vl128 state = 0x3ccddaa6
6652 __ dci(0x44250f2c); // mls z12.h, z25.h, z5.h[0]
6653 // vl128 state = 0x588502cb
6654 __ dci(0x442f0f28); // mls z8.h, z25.h, z7.h[1]
6655 // vl128 state = 0x79c90307
6656 __ dci(0x446f0d2c); // mls z12.h, z9.h, z7.h[5]
6657 // vl128 state = 0xaa0b21a9
6658 __ dci(0x44af0d2e); // mls z14.s, z9.s, z7.s[1]
6659 // vl128 state = 0xd5ccc60c
6660 __ dci(0x44ed0d26); // mls z6.d, z9.d, z13.d[0]
6661 // vl128 state = 0x15037cbe
6662 __ dci(0x44fd0f2e); // mls z14.d, z25.d, z13.d[1]
6663 // vl128 state = 0x9f481fdf
6664 __ dci(0x44f90e2f); // mls z15.d, z17.d, z9.d[1]
6665 // vl128 state = 0x93fe8537
6666 __ dci(0x447d0e3f); // mls z31.h, z17.h, z5.h[7]
6667 // vl128 state = 0x14b9edf2
6668 __ dci(0x44f90e2f); // mls z15.d, z17.d, z9.d[1]
6669 // vl128 state = 0xde1c0d1c
6670 __ dci(0x44790c27); // mls z7.h, z1.h, z1.h[7]
6671 // vl128 state = 0x563d614a
6672 __ dci(0x44790c23); // mls z3.h, z1.h, z1.h[7]
6673 // vl128 state = 0x8c6d9baf
6674 __ dci(0x44f90c6b); // mls z11.d, z3.d, z9.d[1]
6675 // vl128 state = 0x1a25c073
6676 }
6677
6678 uint32_t state;
6679 ComputeMachineStateHash(&masm, &state);
6680 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
6681 __ Ldr(w0, MemOperand(x0));
6682
6683 END();
6684 if (CAN_RUN()) {
6685 RUN();
6686 uint32_t expected_hashes[] = {
6687 0x1a25c073,
6688 0xfbb2c945,
6689 0x932b8ab7,
6690 0x99370bee,
6691 0x44a15f80,
6692 0xae898f1d,
6693 0x97382827,
6694 0xafec059e,
6695 0xf11bc007,
6696 0x34c49b30,
6697 0x73b95606,
6698 0x77324772,
6699 0x9ad7d21b,
6700 0x0d0958a7,
6701 0xee4accc3,
6702 0x31d34df8,
6703 };
6704 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
6705 }
6706 }
6707
TEST_SVE(sve2_mla_long)6708 TEST_SVE(sve2_mla_long) {
6709 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
6710 CPUFeatures::kSVE2,
6711 CPUFeatures::kNEON,
6712 CPUFeatures::kCRC32);
6713 START();
6714
6715 SetInitialMachineState(&masm);
6716 // state = 0xe2bd2480
6717
6718 {
6719 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
6720 __ dci(0x44935abe); // umlslb z30.s, z21.h, z19.h
6721 // vl128 state = 0x4fac8e49
6722 __ dci(0x449358fa); // umlslb z26.s, z7.h, z19.h
6723 // vl128 state = 0xca971f04
6724 __ dci(0x44935adb); // umlslb z27.s, z22.h, z19.h
6725 // vl128 state = 0x5652564b
6726 __ dci(0x449359da); // umlslb z26.s, z14.h, z19.h
6727 // vl128 state = 0xf2d81244
6728 __ dci(0x448349de); // umlalb z30.s, z14.h, z3.h
6729 // vl128 state = 0x7cbaa548
6730 __ dci(0x448349d6); // umlalb z22.s, z14.h, z3.h
6731 // vl128 state = 0x9e7b4915
6732 __ dci(0x44c34952); // umlalb z18.d, z10.s, z3.s
6733 // vl128 state = 0x550af70e
6734 __ dci(0x44d349d3); // umlalb z19.d, z14.s, z19.s
6735 // vl128 state = 0x676743b2
6736 __ dci(0x44d549d7); // umlalb z23.d, z14.s, z21.s
6737 // vl128 state = 0x602e09e4
6738 __ dci(0x44d55ddf); // umlslt z31.d, z14.s, z21.s
6739 // vl128 state = 0xd4c245de
6740 __ dci(0x44d55d1b); // umlslt z27.d, z8.s, z21.s
6741 // vl128 state = 0x9c2c1cb4
6742 __ dci(0x44d5490b); // umlalb z11.d, z8.s, z21.s
6743 // vl128 state = 0x8a702002
6744 __ dci(0x44554d0a); // umlalt z10.h, z8.b, z21.b
6745 // vl128 state = 0x6758ce3c
6746 __ dci(0x4455452b); // smlalt z11.h, z9.b, z21.b
6747 // vl128 state = 0x967e596e
6748 __ dci(0x44554529); // smlalt z9.h, z9.b, z21.b
6749 // vl128 state = 0x1300909a
6750 __ dci(0x44474521); // smlalt z1.h, z9.b, z7.b
6751 // vl128 state = 0x01ca26c1
6752 __ dci(0x44c74d25); // umlalt z5.d, z9.s, z7.s
6753 // vl128 state = 0x8e6313b9
6754 __ dci(0x44cb4d24); // umlalt z4.d, z9.s, z11.s
6755 // vl128 state = 0xdb41e004
6756 __ dci(0x44cb4d2c); // umlalt z12.d, z9.s, z11.s
6757 // vl128 state = 0x941401ca
6758 __ dci(0x44c94da8); // umlalt z8.d, z13.s, z9.s
6759 // vl128 state = 0x8a57334b
6760 __ dci(0x44594db8); // umlalt z24.h, z13.b, z25.b
6761 // vl128 state = 0x94333fae
6762 __ dci(0x44585db0); // umlslt z16.h, z13.b, z24.b
6763 // vl128 state = 0xf4fbe251
6764 __ dci(0x44585f80); // umlslt z0.h, z28.b, z24.b
6765 // vl128 state = 0x1f5aeef3
6766 __ dci(0x445a5fc2); // umlslt z2.h, z30.b, z26.b
6767 // vl128 state = 0x4b153d20
6768 __ dci(0x445a5fd2); // umlslt z18.h, z30.b, z26.b
6769 // vl128 state = 0xbd82f0a2
6770 __ dci(0x445a5fd3); // umlslt z19.h, z30.b, z26.b
6771 // vl128 state = 0x72d7083d
6772 __ dci(0x44525bd2); // umlslb z18.h, z30.b, z18.b
6773 // vl128 state = 0x5018a138
6774 __ dci(0x44525bd6); // umlslb z22.h, z30.b, z18.b
6775 // vl128 state = 0xcaf48a01
6776 __ dci(0x445053d2); // smlslb z18.h, z30.b, z16.b
6777 // vl128 state = 0x76e2d850
6778 __ dci(0x44d153c2); // smlslb z2.d, z30.s, z17.s
6779 // vl128 state = 0x8594d6c9
6780 __ dci(0x449353c3); // smlslb z3.s, z30.h, z19.h
6781 // vl128 state = 0x8e0da89d
6782 __ dci(0x449152c7); // smlslb z7.s, z22.h, z17.h
6783 // vl128 state = 0xe7d08864
6784 __ dci(0x44995285); // smlslb z5.s, z20.h, z25.h
6785 // vl128 state = 0xd7c49fca
6786 __ dci(0x449953c1); // smlslb z1.s, z30.h, z25.h
6787 // vl128 state = 0x3b648b39
6788 __ dci(0x449152c9); // smlslb z9.s, z22.h, z17.h
6789 // vl128 state = 0x5b5bab94
6790 __ dci(0x449542cd); // smlalb z13.s, z22.h, z21.h
6791 // vl128 state = 0x65282d76
6792 __ dci(0x449c42c9); // smlalb z9.s, z22.h, z28.h
6793 // vl128 state = 0x94a92486
6794 __ dci(0x449c52f9); // smlslb z25.s, z23.h, z28.h
6795 // vl128 state = 0xd4f62835
6796 __ dci(0x44dc5afd); // umlslb z29.d, z23.s, z28.s
6797 // vl128 state = 0xf124c6a1
6798 __ dci(0x44dd58ff); // umlslb z31.d, z7.s, z29.s
6799 // vl128 state = 0xbc694f1c
6800 __ dci(0x44dc587b); // umlslb z27.d, z3.s, z28.s
6801 // vl128 state = 0xf1621eb2
6802 __ dci(0x44de596b); // umlslb z11.d, z11.s, z30.s
6803 // vl128 state = 0x944b4b75
6804 __ dci(0x44de5969); // umlslb z9.d, z11.s, z30.s
6805 // vl128 state = 0xa98a2c38
6806 __ dci(0x44db596d); // umlslb z13.d, z11.s, z27.s
6807 // vl128 state = 0x6bd60807
6808 __ dci(0x44db5d5d); // umlslt z29.d, z10.s, z27.s
6809 // vl128 state = 0x9c377b51
6810 __ dci(0x449b555f); // smlslt z31.s, z10.h, z27.h
6811 // vl128 state = 0x7c81f1d5
6812 __ dci(0x449b555d); // smlslt z29.s, z10.h, z27.h
6813 // vl128 state = 0xdaab1edb
6814 __ dci(0x44d35559); // smlslt z25.d, z10.s, z19.s
6815 // vl128 state = 0xdc3f25f1
6816 __ dci(0x44d355f8); // smlslt z24.d, z15.s, z19.s
6817 // vl128 state = 0x9c75a3cf
6818 __ dci(0x44d356f9); // smlslt z25.d, z23.s, z19.s
6819 // vl128 state = 0x5b999178
6820 }
6821
6822 uint32_t state;
6823 ComputeMachineStateHash(&masm, &state);
6824 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
6825 __ Ldr(w0, MemOperand(x0));
6826
6827 END();
6828 if (CAN_RUN()) {
6829 RUN();
6830 uint32_t expected_hashes[] = {
6831 0x5b999178,
6832 0xd6191e64,
6833 0x1f3bd2a1,
6834 0x1e0ac282,
6835 0x8d13f5d3,
6836 0x97157e8f,
6837 0x5d6e4134,
6838 0x8d2186b4,
6839 0x88078c65,
6840 0x6dd92db3,
6841 0xfcd02d21,
6842 0x81738dc2,
6843 0x644e3c06,
6844 0x9c9d2ac8,
6845 0xaaa43548,
6846 0x871e9b08,
6847 };
6848 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
6849 }
6850 }
6851
TEST_SVE(sve2_complex_integer_multiply_add_vector)6852 TEST_SVE(sve2_complex_integer_multiply_add_vector) {
6853 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
6854 CPUFeatures::kSVE2,
6855 CPUFeatures::kNEON,
6856 CPUFeatures::kCRC32);
6857 START();
6858
6859 SetInitialMachineState(&masm);
6860 // state = 0xe2bd2480
6861
6862 {
6863 ExactAssemblyScope scope(&masm, 40 * kInstructionSize);
6864 __ dci(0x44dd2f34); // cmla z20.d, z25.d, z29.d, #270
6865 // vl128 state = 0x12e9bd68
6866 __ dci(0x44dd2f3c); // cmla z28.d, z25.d, z29.d, #270
6867 // vl128 state = 0x4fd8ba3e
6868 __ dci(0x44dc2734); // cmla z20.d, z25.d, z28.d, #90
6869 // vl128 state = 0x9b11d64f
6870 __ dci(0x44dc2e36); // cmla z22.d, z17.d, z28.d, #270
6871 // vl128 state = 0x4658e6ae
6872 __ dci(0x44dd2f34); // cmla z20.d, z25.d, z29.d, #270
6873 // vl128 state = 0x5151ea16
6874 __ dci(0x44dc2fb5); // cmla z21.d, z29.d, z28.d, #270
6875 // vl128 state = 0x21c497cc
6876 __ dci(0x44dc2fbd); // cmla z29.d, z29.d, z28.d, #270
6877 // vl128 state = 0xe823fd46
6878 __ dci(0x44dc2e3c); // cmla z28.d, z17.d, z28.d, #270
6879 // vl128 state = 0xcc35cda6
6880 __ dci(0x44dc2e34); // cmla z20.d, z17.d, z28.d, #270
6881 // vl128 state = 0x963047c0
6882 __ dci(0x44d42c30); // cmla z16.d, z1.d, z20.d, #270
6883 // vl128 state = 0x5d2c5643
6884 __ dci(0x44c42c60); // cmla z0.d, z3.d, z4.d, #270
6885 // vl128 state = 0xfd400169
6886 __ dci(0x44842464); // cmla z4.s, z3.s, z4.s, #90
6887 // vl128 state = 0x00116098
6888 __ dci(0x44842d60); // cmla z0.s, z11.s, z4.s, #270
6889 // vl128 state = 0x582d46e3
6890 __ dci(0x44042562); // cmla z2.b, z11.b, z4.b, #90
6891 // vl128 state = 0x1bd70bf0
6892 __ dci(0x44042420); // cmla z0.b, z1.b, z4.b, #90
6893 // vl128 state = 0x7682807d
6894 __ dci(0x44062401); // cmla z1.b, z0.b, z6.b, #90
6895 // vl128 state = 0xaa3e2c64
6896 __ dci(0x44042449); // cmla z9.b, z2.b, z4.b, #90
6897 // vl128 state = 0xd81638f9
6898 __ dci(0x44052059); // cmla z25.b, z2.b, z5.b, #0
6899 // vl128 state = 0x38cb5d96
6900 __ dci(0x4415305d); // sqrdcmlah z29.b, z2.b, z21.b, #0
6901 // vl128 state = 0x4c6b85e0
6902 __ dci(0x44153819); // sqrdcmlah z25.b, z0.b, z21.b, #180
6903 // vl128 state = 0x229b5be9
6904 __ dci(0x4405391b); // sqrdcmlah z27.b, z8.b, z5.b, #180
6905 // vl128 state = 0x82611aec
6906 __ dci(0x4405314b); // sqrdcmlah z11.b, z10.b, z5.b, #0
6907 // vl128 state = 0xe58c48e0
6908 __ dci(0x4407316a); // sqrdcmlah z10.b, z11.b, z7.b, #0
6909 // vl128 state = 0x5282838a
6910 __ dci(0x4407347a); // sqrdcmlah z26.b, z3.b, z7.b, #90
6911 // vl128 state = 0x134a0891
6912 __ dci(0x4413347e); // sqrdcmlah z30.b, z3.b, z19.b, #90
6913 // vl128 state = 0x455ab9e0
6914 __ dci(0x4443347f); // sqrdcmlah z31.h, z3.h, z3.h, #90
6915 // vl128 state = 0x030d9d2c
6916 __ dci(0x444b307e); // sqrdcmlah z30.h, z3.h, z11.h, #0
6917 // vl128 state = 0x91a95a2c
6918 __ dci(0x444b301f); // sqrdcmlah z31.h, z0.h, z11.h, #0
6919 // vl128 state = 0x0f1c8468
6920 __ dci(0x4409300f); // sqrdcmlah z15.b, z0.b, z9.b, #0
6921 // vl128 state = 0x95f802b7
6922 __ dci(0x440c300e); // sqrdcmlah z14.b, z0.b, z12.b, #0
6923 // vl128 state = 0x5fa6d2c6
6924 __ dci(0x4404310c); // sqrdcmlah z12.b, z8.b, z4.b, #0
6925 // vl128 state = 0x192b05a4
6926 __ dci(0x4415310d); // sqrdcmlah z13.b, z8.b, z21.b, #0
6927 // vl128 state = 0xa8a8d37f
6928 __ dci(0x4414350f); // sqrdcmlah z15.b, z8.b, z20.b, #90
6929 // vl128 state = 0xcd890d8c
6930 __ dci(0x4454354d); // sqrdcmlah z13.h, z10.h, z20.h, #90
6931 // vl128 state = 0x91ab863e
6932 __ dci(0x444435c5); // sqrdcmlah z5.h, z14.h, z4.h, #90
6933 // vl128 state = 0x41bbc90c
6934 __ dci(0x444c34c7); // sqrdcmlah z7.h, z6.h, z12.h, #90
6935 // vl128 state = 0xb6329344
6936 __ dci(0x444836c6); // sqrdcmlah z6.h, z22.h, z8.h, #90
6937 // vl128 state = 0xdf5f443c
6938 __ dci(0x444836d6); // sqrdcmlah z22.h, z22.h, z8.h, #90
6939 // vl128 state = 0x719a2e70
6940 __ dci(0x44403694); // sqrdcmlah z20.h, z20.h, z0.h, #90
6941 // vl128 state = 0x28a64934
6942 __ dci(0x4449369c); // sqrdcmlah z28.h, z20.h, z9.h, #90
6943 // vl128 state = 0x5d41ba84
6944 }
6945
6946 uint32_t state;
6947 ComputeMachineStateHash(&masm, &state);
6948 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
6949 __ Ldr(w0, MemOperand(x0));
6950
6951 END();
6952 if (CAN_RUN()) {
6953 RUN();
6954 uint32_t expected_hashes[] = {
6955 0x5d41ba84,
6956 0xd5e52f4d,
6957 0x9f627c0d,
6958 0x111f21a7,
6959 0x5d7b356e,
6960 0x1f345c0e,
6961 0xd881296e,
6962 0x819f9091,
6963 0x59823550,
6964 0xbe2162c7,
6965 0x5f5dca40,
6966 0xad7e429e,
6967 0x4f66661f,
6968 0x7c5fbca0,
6969 0x819ff997,
6970 0x68ebdb56,
6971 };
6972 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
6973 }
6974 }
6975
TEST_SVE(sve2_complex_integer_multiply_add_indexed)6976 TEST_SVE(sve2_complex_integer_multiply_add_indexed) {
6977 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
6978 CPUFeatures::kSVE2,
6979 CPUFeatures::kNEON,
6980 CPUFeatures::kCRC32);
6981 START();
6982
6983 SetInitialMachineState(&masm);
6984 // state = 0xe2bd2480
6985
6986 {
6987 ExactAssemblyScope scope(&masm, 20 * kInstructionSize);
6988 __ dci(0x44fd7d52); // sqrdcmlah z18.s, z10.s, z13.s[1], #270
6989 // vl128 state = 0x5c66baad
6990 __ dci(0x44fd7c13); // sqrdcmlah z19.s, z0.s, z13.s[1], #270
6991 // vl128 state = 0xac8c451b
6992 __ dci(0x44f97e11); // sqrdcmlah z17.s, z16.s, z9.s[1], #270
6993 // vl128 state = 0x02ebccdb
6994 __ dci(0x44e97615); // sqrdcmlah z21.s, z16.s, z9.s[0], #90
6995 // vl128 state = 0xe43b1032
6996 __ dci(0x44e97614); // sqrdcmlah z20.s, z16.s, z9.s[0], #90
6997 // vl128 state = 0xa28d9898
6998 __ dci(0x44e17635); // sqrdcmlah z21.s, z17.s, z1.s[0], #90
6999 // vl128 state = 0x021764c6
7000 __ dci(0x44e17634); // sqrdcmlah z20.s, z17.s, z1.s[0], #90
7001 // vl128 state = 0x812dbf22
7002 __ dci(0x44f07635); // sqrdcmlah z21.s, z17.s, z0.s[1], #90
7003 // vl128 state = 0x5e87a59e
7004 __ dci(0x44f07465); // sqrdcmlah z5.s, z3.s, z0.s[1], #90
7005 // vl128 state = 0xd1a78d9d
7006 __ dci(0x44f87675); // sqrdcmlah z21.s, z19.s, z8.s[1], #90
7007 // vl128 state = 0xd4500975
7008 __ dci(0x44b87e7d); // sqrdcmlah z29.h, z19.h, z0.h[3], #270
7009 // vl128 state = 0x765230ab
7010 __ dci(0x44b876f9); // sqrdcmlah z25.h, z23.h, z0.h[3], #90
7011 // vl128 state = 0xca9c5bb4
7012 __ dci(0x44f874fb); // sqrdcmlah z27.s, z7.s, z8.s[1], #90
7013 // vl128 state = 0xa4bc044a
7014 __ dci(0x44f070fa); // sqrdcmlah z26.s, z7.s, z0.s[1], #0
7015 // vl128 state = 0xd0eaa1df
7016 __ dci(0x44f07038); // sqrdcmlah z24.s, z1.s, z0.s[1], #0
7017 // vl128 state = 0x80836f9f
7018 __ dci(0x44b17030); // sqrdcmlah z16.h, z1.h, z1.h[2], #0
7019 // vl128 state = 0x59ffa1ce
7020 __ dci(0x44b17032); // sqrdcmlah z18.h, z1.h, z1.h[2], #0
7021 // vl128 state = 0xdb8beca5
7022 __ dci(0x44b07430); // sqrdcmlah z16.h, z1.h, z0.h[2], #90
7023 // vl128 state = 0xe5b6a0e3
7024 __ dci(0x44b07438); // sqrdcmlah z24.h, z1.h, z0.h[2], #90
7025 // vl128 state = 0x19cc8c20
7026 __ dci(0x44b0743a); // sqrdcmlah z26.h, z1.h, z0.h[2], #90
7027 // vl128 state = 0x19c819af
7028 }
7029
7030 uint32_t state;
7031 ComputeMachineStateHash(&masm, &state);
7032 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
7033 __ Ldr(w0, MemOperand(x0));
7034
7035 END();
7036 if (CAN_RUN()) {
7037 RUN();
7038 uint32_t expected_hashes[] = {
7039 0x19c819af,
7040 0xbb2225f2,
7041 0x7e54f513,
7042 0xdcbf6f0f,
7043 0x2bfdc97d,
7044 0x48890c54,
7045 0x65542c02,
7046 0xaef6b224,
7047 0x993b14fd,
7048 0x244d27c5,
7049 0xe8767ba8,
7050 0x4397a148,
7051 0xb3efcd2e,
7052 0xb5894aba,
7053 0x2a0f6f7a,
7054 0xbe45142c,
7055 };
7056 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
7057 }
7058 }
7059
TEST_SVE(sve2_saturating_multiply_add_long_vector)7060 TEST_SVE(sve2_saturating_multiply_add_long_vector) {
7061 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
7062 CPUFeatures::kSVE2,
7063 CPUFeatures::kNEON,
7064 CPUFeatures::kCRC32);
7065 START();
7066
7067 SetInitialMachineState(&masm);
7068 // state = 0xe2bd2480
7069
7070 {
7071 ExactAssemblyScope scope(&masm, 40 * kInstructionSize);
7072 __ dci(0x44db629b); // sqdmlalb z27.d, z20.s, z27.s
7073 // vl128 state = 0x61e408e4
7074 __ dci(0x44db631f); // sqdmlalb z31.d, z24.s, z27.s
7075 // vl128 state = 0xf146813f
7076 __ dci(0x44da6b1d); // sqdmlslb z29.d, z24.s, z26.s
7077 // vl128 state = 0xb8d07371
7078 __ dci(0x44da6a35); // sqdmlslb z21.d, z17.s, z26.s
7079 // vl128 state = 0xaf43cc88
7080 __ dci(0x444a6a3d); // sqdmlslb z29.h, z17.b, z10.b
7081 // vl128 state = 0xba4c5067
7082 __ dci(0x444a6a39); // sqdmlslb z25.h, z17.b, z10.b
7083 // vl128 state = 0x396202c3
7084 __ dci(0x445a6829); // sqdmlslb z9.h, z1.b, z26.b
7085 // vl128 state = 0x22095f7f
7086 __ dci(0x445a6b28); // sqdmlslb z8.h, z25.b, z26.b
7087 // vl128 state = 0xa9516b4b
7088 __ dci(0x44da6b69); // sqdmlslb z9.d, z27.s, z26.s
7089 // vl128 state = 0x1f048226
7090 __ dci(0x44da616d); // sqdmlalb z13.d, z11.s, z26.s
7091 // vl128 state = 0x0fdd982f
7092 __ dci(0x4458616f); // sqdmlalb z15.h, z11.b, z24.b
7093 // vl128 state = 0x461ba137
7094 __ dci(0x4449617f); // sqdmlalb z31.h, z11.b, z9.b
7095 // vl128 state = 0xd1071b0c
7096 __ dci(0x4459614f); // sqdmlalb z15.h, z10.b, z25.b
7097 // vl128 state = 0x0fa6bae7
7098 __ dci(0x4458654d); // sqdmlalt z13.h, z10.b, z24.b
7099 // vl128 state = 0xebd08a80
7100 __ dci(0x44586d05); // sqdmlslt z5.h, z8.b, z24.b
7101 // vl128 state = 0xd4c41665
7102 __ dci(0x44506d84); // sqdmlslt z4.h, z12.b, z16.b
7103 // vl128 state = 0x80f619f9
7104 __ dci(0x44506fc6); // sqdmlslt z6.h, z30.b, z16.b
7105 // vl128 state = 0xb588af21
7106 __ dci(0x44566fc4); // sqdmlslt z4.h, z30.b, z22.b
7107 // vl128 state = 0x4dd8437a
7108 __ dci(0x44566f0c); // sqdmlslt z12.h, z24.b, z22.b
7109 // vl128 state = 0x48ca6e5c
7110 __ dci(0x44566f0e); // sqdmlslt z14.h, z24.b, z22.b
7111 // vl128 state = 0x02d6f977
7112 __ dci(0x44566746); // sqdmlalt z6.h, z26.b, z22.b
7113 // vl128 state = 0x179f59f4
7114 __ dci(0x445767c4); // sqdmlalt z4.h, z30.b, z23.b
7115 // vl128 state = 0xf2d2823c
7116 __ dci(0x44d667c0); // sqdmlalt z0.d, z30.s, z22.s
7117 // vl128 state = 0x404c277e
7118 __ dci(0x44566742); // sqdmlalt z2.h, z26.b, z22.b
7119 // vl128 state = 0x986a72c1
7120 __ dci(0x44c6674a); // sqdmlalt z10.d, z26.s, z6.s
7121 // vl128 state = 0xbb8044ab
7122 __ dci(0x44c66742); // sqdmlalt z2.d, z26.s, z6.s
7123 // vl128 state = 0x9f5b244b
7124 __ dci(0x44ce6706); // sqdmlalt z6.d, z24.s, z14.s
7125 // vl128 state = 0xc6ce6266
7126 __ dci(0x44ce670e); // sqdmlalt z14.d, z24.s, z14.s
7127 // vl128 state = 0xc9e1a461
7128 __ dci(0x44de6746); // sqdmlalt z6.d, z26.s, z30.s
7129 // vl128 state = 0x9f133504
7130 __ dci(0x44dc6342); // sqdmlalb z2.d, z26.s, z28.s
7131 // vl128 state = 0x42deb468
7132 __ dci(0x44d46366); // sqdmlalb z6.d, z27.s, z20.s
7133 // vl128 state = 0xb3436cd4
7134 __ dci(0x44d5626e); // sqdmlalb z14.d, z19.s, z21.s
7135 // vl128 state = 0x0e0533ac
7136 __ dci(0x44d5646f); // sqdmlalt z15.d, z3.s, z21.s
7137 // vl128 state = 0x92d04e7b
7138 __ dci(0x44d36467); // sqdmlalt z7.d, z3.s, z19.s
7139 // vl128 state = 0xd9fa8b4d
7140 __ dci(0x44d360ef); // sqdmlalb z15.d, z7.s, z19.s
7141 // vl128 state = 0x9c9a5778
7142 __ dci(0x44d3646b); // sqdmlalt z11.d, z3.s, z19.s
7143 // vl128 state = 0x40d7c923
7144 __ dci(0x4492646f); // sqdmlalt z15.s, z3.h, z18.h
7145 // vl128 state = 0x0b5b2334
7146 __ dci(0x4492647f); // sqdmlalt z31.s, z3.h, z18.h
7147 // vl128 state = 0xfe6302c1
7148 __ dci(0x4494647d); // sqdmlalt z29.s, z3.h, z20.h
7149 // vl128 state = 0xe3c05a37
7150 __ dci(0x4484666d); // sqdmlalt z13.s, z19.h, z4.h
7151 // vl128 state = 0x15169e94
7152 }
7153
7154 uint32_t state;
7155 ComputeMachineStateHash(&masm, &state);
7156 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
7157 __ Ldr(w0, MemOperand(x0));
7158
7159 END();
7160 if (CAN_RUN()) {
7161 RUN();
7162 uint32_t expected_hashes[] = {
7163 0x15169e94,
7164 0x6101102c,
7165 0xa5586d26,
7166 0x3fbf4f9f,
7167 0x8e62994d,
7168 0x4d77a9e5,
7169 0x4ceadc9e,
7170 0x8247db61,
7171 0x4aa10859,
7172 0x0b3280b3,
7173 0x015d75ea,
7174 0x1cf4825e,
7175 0xda7d3fea,
7176 0xc24bd624,
7177 0x60ee565a,
7178 0x7ac92c39,
7179 };
7180 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
7181 }
7182 }
7183
TEST_SVE(sve2_saturating_multiply_add_interleaved_long)7184 TEST_SVE(sve2_saturating_multiply_add_interleaved_long) {
7185 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
7186 CPUFeatures::kSVE2,
7187 CPUFeatures::kNEON,
7188 CPUFeatures::kCRC32);
7189 START();
7190
7191 SetInitialMachineState(&masm);
7192 // state = 0xe2bd2480
7193
7194 {
7195 ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
7196 __ dci(0x449e0ac6); // sqdmlalbt z6.s, z22.h, z30.h
7197 // vl128 state = 0x1f0ef37c
7198 __ dci(0x449c0ae4); // sqdmlalbt z4.s, z23.h, z28.h
7199 // vl128 state = 0xa80bf2c8
7200 __ dci(0x449c0ae6); // sqdmlalbt z6.s, z23.h, z28.h
7201 // vl128 state = 0x4c5b0e8f
7202 __ dci(0x449e0aae); // sqdmlalbt z14.s, z21.h, z30.h
7203 // vl128 state = 0xa6482041
7204 __ dci(0x449e0aaf); // sqdmlalbt z15.s, z21.h, z30.h
7205 // vl128 state = 0x6ef82b7a
7206 __ dci(0x449c0a2b); // sqdmlalbt z11.s, z17.h, z28.h
7207 // vl128 state = 0x0070a7fa
7208 __ dci(0x449e0829); // sqdmlalbt z9.s, z1.h, z30.h
7209 // vl128 state = 0x08b9efc6
7210 __ dci(0x449e0c61); // sqdmlslbt z1.s, z3.h, z30.h
7211 // vl128 state = 0xebd25c16
7212 __ dci(0x449e0c60); // sqdmlslbt z0.s, z3.h, z30.h
7213 // vl128 state = 0x0926abbe
7214 __ dci(0x449e0c70); // sqdmlslbt z16.s, z3.h, z30.h
7215 // vl128 state = 0xe9d3e5a7
7216 __ dci(0x449f0cf4); // sqdmlslbt z20.s, z7.h, z31.h
7217 // vl128 state = 0xf062523d
7218 __ dci(0x449f08b5); // sqdmlalbt z21.s, z5.h, z31.h
7219 // vl128 state = 0x6034c14e
7220 __ dci(0x449f08a5); // sqdmlalbt z5.s, z5.h, z31.h
7221 // vl128 state = 0x0a73c74b
7222 __ dci(0x448e08b5); // sqdmlalbt z21.s, z5.h, z14.h
7223 // vl128 state = 0xa4af2700
7224 __ dci(0x448c08e5); // sqdmlalbt z5.s, z7.h, z12.h
7225 // vl128 state = 0x7499c587
7226 __ dci(0x448c08e1); // sqdmlalbt z1.s, z7.h, z12.h
7227 // vl128 state = 0x968bca0e
7228 __ dci(0x448c0971); // sqdmlalbt z17.s, z11.h, z12.h
7229 // vl128 state = 0xd7890449
7230 __ dci(0x448f0975); // sqdmlalbt z21.s, z11.h, z15.h
7231 // vl128 state = 0xa2393863
7232 __ dci(0x448f0977); // sqdmlalbt z23.s, z11.h, z15.h
7233 // vl128 state = 0x0f7d9688
7234 __ dci(0x449f093f); // sqdmlalbt z31.s, z9.h, z31.h
7235 // vl128 state = 0xeb16ca99
7236 __ dci(0x449f09f7); // sqdmlalbt z23.s, z15.h, z31.h
7237 // vl128 state = 0x5eca8b00
7238 __ dci(0x449f0987); // sqdmlalbt z7.s, z12.h, z31.h
7239 // vl128 state = 0xf8f22744
7240 __ dci(0x449f0a83); // sqdmlalbt z3.s, z20.h, z31.h
7241 // vl128 state = 0xc20d54f5
7242 __ dci(0x449b0ac1); // sqdmlalbt z1.s, z22.h, z27.h
7243 // vl128 state = 0xf371a13b
7244 __ dci(0x449b0aa9); // sqdmlalbt z9.s, z21.h, z27.h
7245 // vl128 state = 0xffae55ce
7246 __ dci(0x449b0ab9); // sqdmlalbt z25.s, z21.h, z27.h
7247 // vl128 state = 0x0c5ab866
7248 __ dci(0x44d30aa9); // sqdmlalbt z9.d, z21.s, z19.s
7249 // vl128 state = 0x388bfe27
7250 __ dci(0x44d30aab); // sqdmlalbt z11.d, z21.s, z19.s
7251 // vl128 state = 0x6dc15ec8
7252 __ dci(0x44d70baf); // sqdmlalbt z15.d, z29.s, z23.s
7253 // vl128 state = 0x6a858021
7254 __ dci(0x44d70ba7); // sqdmlalbt z7.d, z29.s, z23.s
7255 // vl128 state = 0x52416517
7256 }
7257
7258 uint32_t state;
7259 ComputeMachineStateHash(&masm, &state);
7260 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
7261 __ Ldr(w0, MemOperand(x0));
7262
7263 END();
7264 if (CAN_RUN()) {
7265 RUN();
7266 uint32_t expected_hashes[] = {
7267 0x52416517,
7268 0x1a625e10,
7269 0x3eaaa30f,
7270 0x0eefe820,
7271 0x9e2f7744,
7272 0x3dbc3206,
7273 0xca85b926,
7274 0x9428c809,
7275 0x7c35818c,
7276 0xb8bc3648,
7277 0x5b215c50,
7278 0xbdb56ba5,
7279 0xe4e4bc54,
7280 0x69ba132f,
7281 0xa498b17a,
7282 0xf482b2a6,
7283 };
7284 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
7285 }
7286 }
7287
TEST_SVE(sve2_saturating_multiply_add_long_indexed)7288 TEST_SVE(sve2_saturating_multiply_add_long_indexed) {
7289 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
7290 CPUFeatures::kSVE2,
7291 CPUFeatures::kNEON,
7292 CPUFeatures::kCRC32);
7293 START();
7294
7295 SetInitialMachineState(&masm);
7296 // state = 0xe2bd2480
7297
7298 {
7299 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
7300 __ dci(0x44f52e3d); // sqdmlalt z29.d, z17.s, z5.s[3]
7301 // vl128 state = 0x2a284ede
7302 __ dci(0x44f52e3c); // sqdmlalt z28.d, z17.s, z5.s[3]
7303 // vl128 state = 0x48a615e9
7304 __ dci(0x44f72c3d); // sqdmlalt z29.d, z1.s, z7.s[3]
7305 // vl128 state = 0x1bbe9cc5
7306 __ dci(0x44b62c35); // sqdmlalt z21.s, z1.h, z6.h[5]
7307 // vl128 state = 0x99966225
7308 __ dci(0x44b624b7); // sqdmlalt z23.s, z5.h, z6.h[4]
7309 // vl128 state = 0x36da4a3a
7310 __ dci(0x44f626b6); // sqdmlalt z22.d, z21.s, z6.s[2]
7311 // vl128 state = 0xc009e514
7312 __ dci(0x44f62226); // sqdmlalb z6.d, z17.s, z6.s[2]
7313 // vl128 state = 0x2140ee4b
7314 __ dci(0x44fa222e); // sqdmlalb z14.d, z17.s, z10.s[2]
7315 // vl128 state = 0xf78c8bec
7316 __ dci(0x44fa2aac); // sqdmlalb z12.d, z21.s, z10.s[3]
7317 // vl128 state = 0x329238c6
7318 __ dci(0x44fa2abc); // sqdmlalb z28.d, z21.s, z10.s[3]
7319 // vl128 state = 0xadc9f9db
7320 __ dci(0x44fa2aac); // sqdmlalb z12.d, z21.s, z10.s[3]
7321 // vl128 state = 0x877f64cf
7322 __ dci(0x44ba2a88); // sqdmlalb z8.s, z20.h, z2.h[7]
7323 // vl128 state = 0x4e4a3117
7324 __ dci(0x44fb2a89); // sqdmlalb z9.d, z20.s, z11.s[3]
7325 // vl128 state = 0xe26b041b
7326 __ dci(0x44f32ab9); // sqdmlalb z25.d, z21.s, z3.s[3]
7327 // vl128 state = 0xbcf4e0b2
7328 __ dci(0x44e328bd); // sqdmlalb z29.d, z5.s, z3.s[1]
7329 // vl128 state = 0x31391cc2
7330 __ dci(0x44f228ad); // sqdmlalb z13.d, z5.s, z2.s[3]
7331 // vl128 state = 0xf4c6c098
7332 __ dci(0x44e238af); // sqdmlslb z15.d, z5.s, z2.s[1]
7333 // vl128 state = 0x6e7cb20c
7334 __ dci(0x44e639ad); // sqdmlslb z13.d, z13.s, z6.s[1]
7335 // vl128 state = 0xed16e292
7336 __ dci(0x44a63daf); // sqdmlslt z15.s, z13.h, z6.h[1]
7337 // vl128 state = 0x7c0c3a9a
7338 __ dci(0x44ae3cbf); // sqdmlslt z31.s, z5.h, z6.h[3]
7339 // vl128 state = 0x0e2dce8d
7340 __ dci(0x44a634b7); // sqdmlslt z23.s, z5.h, z6.h[0]
7341 // vl128 state = 0xf3eeab27
7342 __ dci(0x44e234b5); // sqdmlslt z21.d, z5.s, z2.s[0]
7343 // vl128 state = 0x55193209
7344 __ dci(0x44a23437); // sqdmlslt z23.s, z1.h, z2.h[0]
7345 // vl128 state = 0x7652b538
7346 __ dci(0x44a63535); // sqdmlslt z21.s, z9.h, z6.h[0]
7347 // vl128 state = 0x76046ab4
7348 __ dci(0x44a235b4); // sqdmlslt z20.s, z13.h, z2.h[0]
7349 // vl128 state = 0x2f23fd0d
7350 __ dci(0x44a234e4); // sqdmlslt z4.s, z7.h, z2.h[0]
7351 // vl128 state = 0x2a50774c
7352 __ dci(0x44a234ec); // sqdmlslt z12.s, z7.h, z2.h[0]
7353 // vl128 state = 0x01ea8843
7354 __ dci(0x44a324e8); // sqdmlalt z8.s, z7.h, z3.h[0]
7355 // vl128 state = 0xed54a157
7356 __ dci(0x44a334c9); // sqdmlslt z9.s, z6.h, z3.h[0]
7357 // vl128 state = 0x39e0227b
7358 __ dci(0x44a324f9); // sqdmlalt z25.s, z7.h, z3.h[0]
7359 // vl128 state = 0xf163fa0b
7360 __ dci(0x44a224d8); // sqdmlalt z24.s, z6.h, z2.h[0]
7361 // vl128 state = 0xbb4e0d24
7362 __ dci(0x44b22448); // sqdmlalt z8.s, z2.h, z2.h[4]
7363 // vl128 state = 0x26c102cc
7364 __ dci(0x44f224d8); // sqdmlalt z24.d, z6.s, z2.s[2]
7365 // vl128 state = 0x40f79dde
7366 __ dci(0x44f220f9); // sqdmlalb z25.d, z7.s, z2.s[2]
7367 // vl128 state = 0xf9d62034
7368 __ dci(0x44f020a9); // sqdmlalb z9.d, z5.s, z0.s[2]
7369 // vl128 state = 0x2b78be2f
7370 __ dci(0x44f424ad); // sqdmlalt z13.d, z5.s, z4.s[2]
7371 // vl128 state = 0xf0701e23
7372 __ dci(0x44f430a5); // sqdmlslb z5.d, z5.s, z4.s[2]
7373 // vl128 state = 0x992b12d6
7374 __ dci(0x44f130a4); // sqdmlslb z4.d, z5.s, z1.s[2]
7375 // vl128 state = 0x50292759
7376 __ dci(0x44f130ac); // sqdmlslb z12.d, z5.s, z1.s[2]
7377 // vl128 state = 0x795462f2
7378 __ dci(0x44f3302d); // sqdmlslb z13.d, z1.s, z3.s[2]
7379 // vl128 state = 0x8ac29815
7380 __ dci(0x44e3300c); // sqdmlslb z12.d, z0.s, z3.s[0]
7381 // vl128 state = 0x842471eb
7382 __ dci(0x44e3300d); // sqdmlslb z13.d, z0.s, z3.s[0]
7383 // vl128 state = 0x28762af1
7384 __ dci(0x44eb321d); // sqdmlslb z29.d, z16.s, z11.s[0]
7385 // vl128 state = 0x352de071
7386 __ dci(0x44ef3259); // sqdmlslb z25.d, z18.s, z15.s[0]
7387 // vl128 state = 0x90a4cf15
7388 __ dci(0x44ff3349); // sqdmlslb z9.d, z26.s, z15.s[2]
7389 // vl128 state = 0x6be7e76a
7390 __ dci(0x44fb3319); // sqdmlslb z25.d, z24.s, z11.s[2]
7391 // vl128 state = 0x7023e2de
7392 __ dci(0x44bb3b18); // sqdmlslb z24.s, z24.h, z3.h[7]
7393 // vl128 state = 0xad48664c
7394 __ dci(0x44bb3b19); // sqdmlslb z25.s, z24.h, z3.h[7]
7395 // vl128 state = 0xc7d8239b
7396 __ dci(0x44bb3b11); // sqdmlslb z17.s, z24.h, z3.h[7]
7397 // vl128 state = 0x0d9b2b9b
7398 __ dci(0x44f33b15); // sqdmlslb z21.d, z24.s, z3.s[3]
7399 // vl128 state = 0xbdb9c559
7400 }
7401
7402 uint32_t state;
7403 ComputeMachineStateHash(&masm, &state);
7404 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
7405 __ Ldr(w0, MemOperand(x0));
7406
7407 END();
7408 if (CAN_RUN()) {
7409 RUN();
7410 uint32_t expected_hashes[] = {
7411 0xbdb9c559,
7412 0x0c2f83d5,
7413 0x3e1f2607,
7414 0x2db954ea,
7415 0xff33857d,
7416 0xd567c205,
7417 0x8b5ced4c,
7418 0x19ecc4d9,
7419 0x8581949e,
7420 0x30f1a921,
7421 0x8c94071b,
7422 0xb9ad4919,
7423 0x32dbb108,
7424 0x634f9cd4,
7425 0x2a122429,
7426 0xdae127f1,
7427 };
7428 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
7429 }
7430 }
7431
TEST_SVE(sve2_floating_multiply_add_long_vector)7432 TEST_SVE(sve2_floating_multiply_add_long_vector) {
7433 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
7434 CPUFeatures::kSVE2,
7435 CPUFeatures::kNEON,
7436 CPUFeatures::kCRC32);
7437 START();
7438
7439 SetInitialMachineState(&masm, kFpInputSet);
7440 // state = 0x1e5cbcac
7441
7442 {
7443 ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
7444 __ dci(0x64bfa635); // fmlslt z21.s, z17.h, z31.h
7445 // vl128 state = 0x48383595
7446 __ dci(0x64bf867d); // fmlalt z29.s, z19.h, z31.h
7447 // vl128 state = 0xf2812c0e
7448 __ dci(0x64af877c); // fmlalt z28.s, z27.h, z15.h
7449 // vl128 state = 0x161daf06
7450 __ dci(0x64af8774); // fmlalt z20.s, z27.h, z15.h
7451 // vl128 state = 0x8146f2bf
7452 __ dci(0x64be877c); // fmlalt z28.s, z27.h, z30.h
7453 // vl128 state = 0x90bcd864
7454 __ dci(0x64bd876c); // fmlalt z12.s, z27.h, z29.h
7455 // vl128 state = 0x22b60b78
7456 __ dci(0x64bf8728); // fmlalt z8.s, z25.h, z31.h
7457 // vl128 state = 0x2c9ce51a
7458 __ dci(0x64bf836a); // fmlalb z10.s, z27.h, z31.h
7459 // vl128 state = 0x40e6b398
7460 __ dci(0x64bf87eb); // fmlalt z11.s, z31.h, z31.h
7461 // vl128 state = 0x479c4a98
7462 __ dci(0x64bf87e9); // fmlalt z9.s, z31.h, z31.h
7463 // vl128 state = 0x25c987ad
7464 __ dci(0x64b78779); // fmlalt z25.s, z27.h, z23.h
7465 // vl128 state = 0xb4fbc429
7466 __ dci(0x64b1877b); // fmlalt z27.s, z27.h, z17.h
7467 // vl128 state = 0x390616d8
7468 __ dci(0x64b1871f); // fmlalt z31.s, z24.h, z17.h
7469 // vl128 state = 0x7f24d2bf
7470 __ dci(0x64b5878f); // fmlalt z15.s, z28.h, z21.h
7471 // vl128 state = 0x01a90318
7472 __ dci(0x64b4870d); // fmlalt z13.s, z24.h, z20.h
7473 // vl128 state = 0x08789c2c
7474 __ dci(0x64b48709); // fmlalt z9.s, z24.h, z20.h
7475 // vl128 state = 0x169f9b57
7476 __ dci(0x64b48779); // fmlalt z25.s, z27.h, z20.h
7477 // vl128 state = 0xad4f23d7
7478 __ dci(0x64bc8671); // fmlalt z17.s, z19.h, z28.h
7479 // vl128 state = 0xf86b0a64
7480 __ dci(0x64b98673); // fmlalt z19.s, z19.h, z25.h
7481 // vl128 state = 0x78a848b2
7482 __ dci(0x64b18623); // fmlalt z3.s, z17.h, z17.h
7483 // vl128 state = 0xcac211c9
7484 __ dci(0x64b18642); // fmlalt z2.s, z18.h, z17.h
7485 // vl128 state = 0x9afcbe3f
7486 __ dci(0x64b1a6c0); // fmlslt z0.s, z22.h, z17.h
7487 // vl128 state = 0x0047e4b2
7488 __ dci(0x64b086c4); // fmlalt z4.s, z22.h, z16.h
7489 // vl128 state = 0x203324b5
7490 __ dci(0x64b28645); // fmlalt z5.s, z18.h, z18.h
7491 // vl128 state = 0x7340c432
7492 __ dci(0x64b28264); // fmlalb z4.s, z19.h, z18.h
7493 // vl128 state = 0x6dc657a9
7494 __ dci(0x64b28765); // fmlalt z5.s, z27.h, z18.h
7495 // vl128 state = 0xa5d3889b
7496 __ dci(0x64ba8561); // fmlalt z1.s, z11.h, z26.h
7497 // vl128 state = 0x5bbd2dd9
7498 __ dci(0x64aa8543); // fmlalt z3.s, z10.h, z10.h
7499 // vl128 state = 0xa65ec305
7500 __ dci(0x64ae8141); // fmlalb z1.s, z10.h, z14.h
7501 // vl128 state = 0xd23d588c
7502 __ dci(0x64ae80c3); // fmlalb z3.s, z6.h, z14.h
7503 // vl128 state = 0x5a082bbc
7504 }
7505
7506 uint32_t state;
7507 ComputeMachineStateHash(&masm, &state);
7508 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
7509 __ Ldr(w0, MemOperand(x0));
7510
7511 END();
7512 if (CAN_RUN()) {
7513 RUN();
7514 uint32_t expected_hashes[] = {
7515 0x5a082bbc,
7516 0x23c41852,
7517 0xf462f328,
7518 0x6fa4d12b,
7519 0x5e5f3e79,
7520 0x9939c7e6,
7521 0x0ed39313,
7522 0x2911107c,
7523 0x18f77b9a,
7524 0x7226d5b3,
7525 0x05df3c07,
7526 0x1653749c,
7527 0xcb4f6acf,
7528 0x4c5f0755,
7529 0xc4eed654,
7530 0x47893eeb,
7531 };
7532 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
7533 }
7534 }
7535
TEST_SVE(sve2_mla_long_index)7536 TEST_SVE(sve2_mla_long_index) {
7537 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
7538 CPUFeatures::kSVE2,
7539 CPUFeatures::kNEON,
7540 CPUFeatures::kCRC32);
7541 START();
7542
7543 SetInitialMachineState(&masm);
7544 // state = 0xe2bd2480
7545
7546 {
7547 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
7548 __ dci(0x44ea8d67); // smlalt z7.d, z11.s, z10.s[1]
7549 // vl128 state = 0xd08dbe24
7550 __ dci(0x44ea9d2f); // umlalt z15.d, z9.s, z10.s[1]
7551 // vl128 state = 0x56f6f237
7552 __ dci(0x44ea9d2d); // umlalt z13.d, z9.s, z10.s[1]
7553 // vl128 state = 0x00f89e4d
7554 __ dci(0x44eb992f); // umlalb z15.d, z9.s, z11.s[1]
7555 // vl128 state = 0xca4e469e
7556 __ dci(0x44ab99ae); // umlalb z14.s, z13.h, z3.h[3]
7557 // vl128 state = 0xd4b18276
7558 __ dci(0x44ad99be); // umlalb z30.s, z13.h, z5.h[3]
7559 // vl128 state = 0x8650a79e
7560 __ dci(0x44ad99ba); // umlalb z26.s, z13.h, z5.h[3]
7561 // vl128 state = 0x6fa1a501
7562 __ dci(0x44adb9f2); // umlslb z18.s, z15.h, z5.h[3]
7563 // vl128 state = 0x1a56a5d4
7564 __ dci(0x44bda9f3); // smlslb z19.s, z15.h, z5.h[7]
7565 // vl128 state = 0xfdb18057
7566 __ dci(0x44b9a1fb); // smlslb z27.s, z15.h, z1.h[6]
7567 // vl128 state = 0xb46b6c28
7568 __ dci(0x44b8a1b3); // smlslb z19.s, z13.h, z0.h[6]
7569 // vl128 state = 0x623c62c3
7570 __ dci(0x44bc81b1); // smlalb z17.s, z13.h, z4.h[6]
7571 // vl128 state = 0x2abab4d3
7572 __ dci(0x44bc82b0); // smlalb z16.s, z21.h, z4.h[6]
7573 // vl128 state = 0x7a028731
7574 __ dci(0x44ac92b8); // umlalb z24.s, z21.h, z4.h[2]
7575 // vl128 state = 0xf48f6936
7576 __ dci(0x44a4923a); // umlalb z26.s, z17.h, z4.h[0]
7577 // vl128 state = 0xbcdf888d
7578 __ dci(0x44b49a3e); // umlalb z30.s, z17.h, z4.h[5]
7579 // vl128 state = 0x5060778e
7580 __ dci(0x44b69a1c); // umlalb z28.s, z16.h, z6.h[5]
7581 // vl128 state = 0x16da3835
7582 __ dci(0x44b6b218); // umlslb z24.s, z16.h, z6.h[4]
7583 // vl128 state = 0xac7fb4d0
7584 __ dci(0x44b2b25a); // umlslb z26.s, z18.h, z2.h[4]
7585 // vl128 state = 0x8d05433b
7586 __ dci(0x44b2ba0a); // umlslb z10.s, z16.h, z2.h[5]
7587 // vl128 state = 0x62630101
7588 __ dci(0x44b29b08); // umlalb z8.s, z24.h, z2.h[5]
7589 // vl128 state = 0x31ae445b
7590 __ dci(0x44b29b00); // umlalb z0.s, z24.h, z2.h[5]
7591 // vl128 state = 0x539a5875
7592 __ dci(0x44b29e08); // umlalt z8.s, z16.h, z2.h[5]
7593 // vl128 state = 0x07d4bf73
7594 __ dci(0x44b29eaa); // umlalt z10.s, z21.h, z2.h[5]
7595 // vl128 state = 0x314f48a8
7596 __ dci(0x44b2be2e); // umlslt z14.s, z17.h, z2.h[5]
7597 // vl128 state = 0x91bd2c17
7598 __ dci(0x44b2be3e); // umlslt z30.s, z17.h, z2.h[5]
7599 // vl128 state = 0x4cbf4360
7600 __ dci(0x44f2be7a); // umlslt z26.d, z19.s, z2.s[3]
7601 // vl128 state = 0xe94e76a9
7602 __ dci(0x44f2ae4a); // smlslt z10.d, z18.s, z2.s[3]
7603 // vl128 state = 0xd0c2c4cc
7604 __ dci(0x44faae6e); // smlslt z14.d, z19.s, z10.s[3]
7605 // vl128 state = 0xc64d6839
7606 __ dci(0x44faae6f); // smlslt z15.d, z19.s, z10.s[3]
7607 // vl128 state = 0xa74358aa
7608 __ dci(0x44faae67); // smlslt z7.d, z19.s, z10.s[3]
7609 // vl128 state = 0xb8d9664b
7610 __ dci(0x44fa8e57); // smlalt z23.d, z18.s, z10.s[3]
7611 // vl128 state = 0xf1032ab4
7612 __ dci(0x44fa8c67); // smlalt z7.d, z3.s, z10.s[3]
7613 // vl128 state = 0x763732f4
7614 __ dci(0x44eaac66); // smlslt z6.d, z3.s, z10.s[1]
7615 // vl128 state = 0xdcf39367
7616 __ dci(0x44eaa456); // smlslt z22.d, z2.s, z10.s[0]
7617 // vl128 state = 0x5ea67d82
7618 __ dci(0x44aea45e); // smlslt z30.s, z2.h, z6.h[2]
7619 // vl128 state = 0x55da0908
7620 __ dci(0x44aaa64e); // smlslt z14.s, z18.h, z2.h[2]
7621 // vl128 state = 0x69d105f5
7622 __ dci(0x44baa75e); // smlslt z30.s, z26.h, z2.h[6]
7623 // vl128 state = 0x191bc065
7624 __ dci(0x44baa75a); // smlslt z26.s, z26.h, z2.h[6]
7625 // vl128 state = 0xbf62d2a0
7626 __ dci(0x44eaa75b); // smlslt z27.d, z26.s, z10.s[0]
7627 // vl128 state = 0x43803a21
7628 __ dci(0x44eabf5f); // umlslt z31.d, z26.s, z10.s[1]
7629 // vl128 state = 0x0b33725c
7630 __ dci(0x44ebbd57); // umlslt z23.d, z10.s, z11.s[1]
7631 // vl128 state = 0x0059a0f5
7632 __ dci(0x44abbf55); // umlslt z21.s, z26.h, z3.h[3]
7633 // vl128 state = 0xb587057f
7634 __ dci(0x44abab5d); // smlslb z29.s, z26.h, z3.h[3]
7635 // vl128 state = 0x0bfa30c6
7636 __ dci(0x44abab5c); // smlslb z28.s, z26.h, z3.h[3]
7637 // vl128 state = 0x151045b4
7638 __ dci(0x44abaf78); // smlslt z24.s, z27.h, z3.h[3]
7639 // vl128 state = 0xedb7fca9
7640 __ dci(0x44aaa77c); // smlslt z28.s, z27.h, z2.h[2]
7641 // vl128 state = 0xb68216f9
7642 __ dci(0x44aaa178); // smlslb z24.s, z11.h, z2.h[2]
7643 // vl128 state = 0x35447b11
7644 __ dci(0x44aa81fa); // smlalb z26.s, z15.h, z2.h[2]
7645 // vl128 state = 0xf532285f
7646 __ dci(0x44aa8198); // smlalb z24.s, z12.h, z2.h[2]
7647 // vl128 state = 0xd414889b
7648 }
7649
7650 uint32_t state;
7651 ComputeMachineStateHash(&masm, &state);
7652 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
7653 __ Ldr(w0, MemOperand(x0));
7654
7655 END();
7656 if (CAN_RUN()) {
7657 RUN();
7658 uint32_t expected_hashes[] = {
7659 0xd414889b,
7660 0x79d8f659,
7661 0xe2c8f06b,
7662 0x91aadf3d,
7663 0xffb92c3e,
7664 0xc2d3138e,
7665 0xdd9f4396,
7666 0xce39a88e,
7667 0xfe68a5ca,
7668 0xdcb072b2,
7669 0x3756ede6,
7670 0x5c2eef22,
7671 0x01fd02a4,
7672 0xdd8d4890,
7673 0x87500dc9,
7674 0x8c895325,
7675 };
7676 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
7677 }
7678 }
7679
TEST_SVE(sve2_mul_long_index)7680 TEST_SVE(sve2_mul_long_index) {
7681 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
7682 CPUFeatures::kSVE2,
7683 CPUFeatures::kNEON,
7684 CPUFeatures::kCRC32);
7685 START();
7686
7687 SetInitialMachineState(&masm);
7688 // state = 0xe2bd2480
7689
7690 {
7691 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
7692 __ dci(0x44f1d492); // umullt z18.d, z4.s, z1.s[2]
7693 // vl128 state = 0x4377a821
7694 __ dci(0x44fdd490); // umullt z16.d, z4.s, z13.s[2]
7695 // vl128 state = 0x5879cb00
7696 __ dci(0x44fdc080); // smullb z0.d, z4.s, z13.s[2]
7697 // vl128 state = 0xbe0f85f8
7698 __ dci(0x44fdc081); // smullb z1.d, z4.s, z13.s[2]
7699 // vl128 state = 0xa0eb0d63
7700 __ dci(0x44fcc000); // smullb z0.d, z0.s, z12.s[2]
7701 // vl128 state = 0xf023feb2
7702 __ dci(0x44ffc001); // smullb z1.d, z0.s, z15.s[2]
7703 // vl128 state = 0xcc0dcc10
7704 __ dci(0x44ffc0c9); // smullb z9.d, z6.s, z15.s[2]
7705 // vl128 state = 0x8e0d2525
7706 __ dci(0x44f7d0c8); // umullb z8.d, z6.s, z7.s[2]
7707 // vl128 state = 0xaf711253
7708 __ dci(0x44b7d080); // umullb z0.s, z4.h, z7.h[4]
7709 // vl128 state = 0x8cea3501
7710 __ dci(0x44f7d290); // umullb z16.d, z20.s, z7.s[2]
7711 // vl128 state = 0x09be9a84
7712 __ dci(0x44f6da92); // umullb z18.d, z20.s, z6.s[3]
7713 // vl128 state = 0x3906715f
7714 __ dci(0x44fed296); // umullb z22.d, z20.s, z14.s[2]
7715 // vl128 state = 0xf399bb76
7716 __ dci(0x44f6c292); // smullb z18.d, z20.s, z6.s[2]
7717 // vl128 state = 0x33ceff98
7718 __ dci(0x44e6c2a2); // smullb z2.d, z21.s, z6.s[0]
7719 // vl128 state = 0x00765739
7720 __ dci(0x44e6c323); // smullb z3.d, z25.s, z6.s[0]
7721 // vl128 state = 0x3dad5b1f
7722 __ dci(0x44e6c333); // smullb z19.d, z25.s, z6.s[0]
7723 // vl128 state = 0xc5b39601
7724 __ dci(0x44e7c377); // smullb z23.d, z27.s, z7.s[0]
7725 // vl128 state = 0x134b3d1f
7726 __ dci(0x44e7d3ff); // umullb z31.d, z31.s, z7.s[0]
7727 // vl128 state = 0xc4be3961
7728 __ dci(0x44e7d3fe); // umullb z30.d, z31.s, z7.s[0]
7729 // vl128 state = 0x195e406b
7730 __ dci(0x44e7c3da); // smullb z26.d, z30.s, z7.s[0]
7731 // vl128 state = 0xae2522f9
7732 __ dci(0x44e7c2fe); // smullb z30.d, z23.s, z7.s[0]
7733 // vl128 state = 0xed267bfb
7734 __ dci(0x44e3c3f6); // smullb z22.d, z31.s, z3.s[0]
7735 // vl128 state = 0x6f6eeec4
7736 __ dci(0x44f3c2f2); // smullb z18.d, z23.s, z3.s[2]
7737 // vl128 state = 0x1689afdf
7738 __ dci(0x44f3c2e2); // smullb z2.d, z23.s, z3.s[2]
7739 // vl128 state = 0x24999374
7740 __ dci(0x44f3c06a); // smullb z10.d, z3.s, z3.s[2]
7741 // vl128 state = 0x046126eb
7742 __ dci(0x44f3c06b); // smullb z11.d, z3.s, z3.s[2]
7743 // vl128 state = 0x6b39941f
7744 __ dci(0x44f3c449); // smullt z9.d, z2.s, z3.s[2]
7745 // vl128 state = 0xf161bcc6
7746 __ dci(0x44f3ccc8); // smullt z8.d, z6.s, z3.s[3]
7747 // vl128 state = 0xbdc67c89
7748 __ dci(0x44f9ccd8); // smullt z24.d, z6.s, z9.s[3]
7749 // vl128 state = 0xfed59871
7750 __ dci(0x44ffccdc); // smullt z28.d, z6.s, z15.s[3]
7751 // vl128 state = 0x72746ff6
7752 __ dci(0x44fecc58); // smullt z24.d, z2.s, z14.s[3]
7753 // vl128 state = 0xa15ee8f2
7754 __ dci(0x44bfcc48); // smullt z8.s, z2.h, z7.h[7]
7755 // vl128 state = 0x3dccd2d6
7756 __ dci(0x44b7c84a); // smullb z10.s, z2.h, z7.h[5]
7757 // vl128 state = 0x4537f0b2
7758 __ dci(0x44a5c84e); // smullb z14.s, z2.h, z5.h[1]
7759 // vl128 state = 0x60e30690
7760 __ dci(0x44adca46); // smullb z6.s, z18.h, z5.h[3]
7761 // vl128 state = 0xaef15cb5
7762 __ dci(0x44add847); // umullb z7.s, z2.h, z5.h[3]
7763 // vl128 state = 0xe7df553d
7764 __ dci(0x44bdd04f); // umullb z15.s, z2.h, z5.h[6]
7765 // vl128 state = 0xa713f809
7766 __ dci(0x44bdc007); // smullb z7.s, z0.h, z5.h[6]
7767 // vl128 state = 0x4907c6b7
7768 __ dci(0x44bdc005); // smullb z5.s, z0.h, z5.h[6]
7769 // vl128 state = 0x98a83fd0
7770 __ dci(0x44bdc0b5); // smullb z21.s, z5.h, z5.h[6]
7771 // vl128 state = 0x3e6cb588
7772 __ dci(0x44bcc094); // smullb z20.s, z4.h, z4.h[6]
7773 // vl128 state = 0x37e5a4ce
7774 __ dci(0x44bcc09c); // smullb z28.s, z4.h, z4.h[6]
7775 // vl128 state = 0x719de631
7776 __ dci(0x44acc88c); // smullb z12.s, z4.h, z4.h[3]
7777 // vl128 state = 0xf0f3dffe
7778 __ dci(0x44aac884); // smullb z4.s, z4.h, z2.h[3]
7779 // vl128 state = 0x61a714ff
7780 __ dci(0x44a8c8ac); // smullb z12.s, z5.h, z0.h[3]
7781 // vl128 state = 0xc47542ea
7782 __ dci(0x44a8cea4); // smullt z4.s, z21.h, z0.h[3]
7783 // vl128 state = 0x37865031
7784 __ dci(0x44a8daa5); // umullb z5.s, z21.h, z0.h[3]
7785 // vl128 state = 0x28cf4dc6
7786 __ dci(0x44b8dae4); // umullb z4.s, z23.h, z0.h[7]
7787 // vl128 state = 0x6fe181d0
7788 __ dci(0x44b9da6c); // umullb z12.s, z19.h, z1.h[7]
7789 // vl128 state = 0xde65c7e3
7790 __ dci(0x44b9da64); // umullb z4.s, z19.h, z1.h[7]
7791 // vl128 state = 0x040a7e45
7792 }
7793
7794 uint32_t state;
7795 ComputeMachineStateHash(&masm, &state);
7796 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
7797 __ Ldr(w0, MemOperand(x0));
7798
7799 END();
7800 if (CAN_RUN()) {
7801 RUN();
7802 uint32_t expected_hashes[] = {
7803 0x040a7e45,
7804 0x48fc4c2b,
7805 0x9a1c67d1,
7806 0xcb88ffdd,
7807 0xcda205bc,
7808 0x7a47b6fb,
7809 0x68ae16c8,
7810 0x483353c9,
7811 0x91d91835,
7812 0x17a9ca4a,
7813 0x4f3d394f,
7814 0x5182776c,
7815 0xc03c1d3b,
7816 0xe52799db,
7817 0x1ddd328e,
7818 0xe33903de,
7819 };
7820 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
7821 }
7822 }
7823
TEST_SVE(sve2_sat_double_mul_high)7824 TEST_SVE(sve2_sat_double_mul_high) {
7825 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
7826 CPUFeatures::kSVE2,
7827 CPUFeatures::kNEON,
7828 CPUFeatures::kCRC32);
7829 START();
7830
7831 SetInitialMachineState(&masm);
7832 // state = 0xe2bd2480
7833
7834 {
7835 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
7836 __ dci(0x046c711a); // sqdmulh z26.h, z8.h, z12.h
7837 // vl128 state = 0xe962209c
7838 __ dci(0x047c7138); // sqdmulh z24.h, z9.h, z28.h
7839 // vl128 state = 0x06a43320
7840 __ dci(0x04fc7539); // sqrdmulh z25.d, z9.d, z28.d
7841 // vl128 state = 0x8ce1cad6
7842 __ dci(0x04fc7029); // sqdmulh z9.d, z1.d, z28.d
7843 // vl128 state = 0x6f3d1b22
7844 __ dci(0x04ac702d); // sqdmulh z13.s, z1.s, z12.s
7845 // vl128 state = 0x14b0451c
7846 __ dci(0x04a4742c); // sqrdmulh z12.s, z1.s, z4.s
7847 // vl128 state = 0x60206a6a
7848 __ dci(0x04a574ad); // sqrdmulh z13.s, z5.s, z5.s
7849 // vl128 state = 0x388a9786
7850 __ dci(0x04a574a9); // sqrdmulh z9.s, z5.s, z5.s
7851 // vl128 state = 0xee590c43
7852 __ dci(0x04e574e8); // sqrdmulh z8.d, z7.d, z5.d
7853 // vl128 state = 0x8d16295c
7854 __ dci(0x04e570ca); // sqdmulh z10.d, z6.d, z5.d
7855 // vl128 state = 0x2a5c234c
7856 __ dci(0x04e670cb); // sqdmulh z11.d, z6.d, z6.d
7857 // vl128 state = 0xfacc9e06
7858 __ dci(0x04f6708f); // sqdmulh z15.d, z4.d, z22.d
7859 // vl128 state = 0x2167ca56
7860 __ dci(0x04f67087); // sqdmulh z7.d, z4.d, z22.d
7861 // vl128 state = 0xc7d7af1d
7862 __ dci(0x04f77185); // sqdmulh z5.d, z12.d, z23.d
7863 // vl128 state = 0x15f82ac2
7864 __ dci(0x04f67104); // sqdmulh z4.d, z8.d, z22.d
7865 // vl128 state = 0xb2484707
7866 __ dci(0x04f6710c); // sqdmulh z12.d, z8.d, z22.d
7867 // vl128 state = 0x5a53b8e7
7868 __ dci(0x04f6708d); // sqdmulh z13.d, z4.d, z22.d
7869 // vl128 state = 0xa9affac2
7870 __ dci(0x04f67085); // sqdmulh z5.d, z4.d, z22.d
7871 // vl128 state = 0xa425052d
7872 __ dci(0x04fe7281); // sqdmulh z1.d, z20.d, z30.d
7873 // vl128 state = 0x1c0f565c
7874 __ dci(0x04ee72d1); // sqdmulh z17.d, z22.d, z14.d
7875 // vl128 state = 0xff12c401
7876 __ dci(0x04ee7393); // sqdmulh z19.d, z28.d, z14.d
7877 // vl128 state = 0xcd1d9d3a
7878 __ dci(0x04ec73b2); // sqdmulh z18.d, z29.d, z12.d
7879 // vl128 state = 0x2aa94767
7880 __ dci(0x04ee73fa); // sqdmulh z26.d, z31.d, z14.d
7881 // vl128 state = 0x5ca68e9c
7882 __ dci(0x04ef77ea); // sqrdmulh z10.d, z31.d, z15.d
7883 // vl128 state = 0xe5b65473
7884 __ dci(0x04ff76e8); // sqrdmulh z8.d, z23.d, z31.d
7885 // vl128 state = 0xcc4e8803
7886 __ dci(0x04fd76c9); // sqrdmulh z9.d, z22.d, z29.d
7887 // vl128 state = 0x19fff884
7888 __ dci(0x04fd73d9); // sqdmulh z25.d, z30.d, z29.d
7889 // vl128 state = 0xb99d6147
7890 __ dci(0x04e973dd); // sqdmulh z29.d, z30.d, z9.d
7891 // vl128 state = 0xe8f11301
7892 __ dci(0x04b973dc); // sqdmulh z28.s, z30.s, z25.s
7893 // vl128 state = 0x24af5ffe
7894 __ dci(0x04b177dd); // sqrdmulh z29.s, z30.s, z17.s
7895 // vl128 state = 0x5c32a08e
7896 __ dci(0x04b177bc); // sqrdmulh z28.s, z29.s, z17.s
7897 // vl128 state = 0x12c8c1c4
7898 __ dci(0x04f377ac); // sqrdmulh z12.d, z29.d, z19.d
7899 // vl128 state = 0x7bc1f2e6
7900 __ dci(0x04f677ad); // sqrdmulh z13.d, z29.d, z22.d
7901 // vl128 state = 0x67d2640f
7902 __ dci(0x04fe76af); // sqrdmulh z15.d, z21.d, z30.d
7903 // vl128 state = 0x98035fbd
7904 __ dci(0x04ef76ae); // sqrdmulh z14.d, z21.d, z15.d
7905 // vl128 state = 0x5e561fd3
7906 __ dci(0x04ee72ac); // sqdmulh z12.d, z21.d, z14.d
7907 // vl128 state = 0xb56c3914
7908 __ dci(0x04ae72ee); // sqdmulh z14.s, z23.s, z14.s
7909 // vl128 state = 0x6bb1c4b1
7910 __ dci(0x04be7266); // sqdmulh z6.s, z19.s, z30.s
7911 // vl128 state = 0x5a5bdda6
7912 __ dci(0x04b67364); // sqdmulh z4.s, z27.s, z22.s
7913 // vl128 state = 0x09a447ea
7914 __ dci(0x04b27165); // sqdmulh z5.s, z11.s, z18.s
7915 // vl128 state = 0xee84be35
7916 __ dci(0x04b27175); // sqdmulh z21.s, z11.s, z18.s
7917 // vl128 state = 0x84146d85
7918 __ dci(0x04ba7137); // sqdmulh z23.s, z9.s, z26.s
7919 // vl128 state = 0x92c2e5f6
7920 __ dci(0x04b3713f); // sqdmulh z31.s, z9.s, z19.s
7921 // vl128 state = 0xe3836fb8
7922 __ dci(0x04b37017); // sqdmulh z23.s, z0.s, z19.s
7923 // vl128 state = 0xb5225206
7924 __ dci(0x04b37615); // sqrdmulh z21.s, z16.s, z19.s
7925 // vl128 state = 0x157484c7
7926 __ dci(0x04b37491); // sqrdmulh z17.s, z4.s, z19.s
7927 // vl128 state = 0x586c4bbf
7928 __ dci(0x04b37481); // sqrdmulh z1.s, z4.s, z19.s
7929 // vl128 state = 0xf5dc07cb
7930 __ dci(0x04b37489); // sqrdmulh z9.s, z4.s, z19.s
7931 // vl128 state = 0x591875a8
7932 __ dci(0x04b5748d); // sqrdmulh z13.s, z4.s, z21.s
7933 // vl128 state = 0xb01f8fd5
7934 __ dci(0x043d748f); // sqrdmulh z15.b, z4.b, z29.b
7935 // vl128 state = 0xd466a58c
7936 }
7937
7938 uint32_t state;
7939 ComputeMachineStateHash(&masm, &state);
7940 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
7941 __ Ldr(w0, MemOperand(x0));
7942
7943 END();
7944 if (CAN_RUN()) {
7945 RUN();
7946 uint32_t expected_hashes[] = {
7947 0xd466a58c,
7948 0xe2ec7fba,
7949 0x1644e93a,
7950 0x7c3ecb2e,
7951 0xed4ecd78,
7952 0xfd5b5783,
7953 0xa7094efe,
7954 0x92bd623f,
7955 0x6da5e423,
7956 0x1648b588,
7957 0x63ce5947,
7958 0xba9c7d90,
7959 0x756ae20d,
7960 0x6d4032ba,
7961 0x87ae8b8f,
7962 0x722b2f6f,
7963 };
7964 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
7965 }
7966 }
7967
TEST_SVE(sve2_cmla_index)7968 TEST_SVE(sve2_cmla_index) {
7969 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
7970 CPUFeatures::kSVE2,
7971 CPUFeatures::kNEON,
7972 CPUFeatures::kCRC32);
7973 START();
7974
7975 SetInitialMachineState(&masm);
7976 // state = 0xe2bd2480
7977
7978 {
7979 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
7980 __ dci(0x44e867e6); // cmla z6.s, z31.s, z8.s[0], #90
7981 // vl128 state = 0xee56e69b
7982 __ dci(0x44e86de4); // cmla z4.s, z15.s, z8.s[0], #270
7983 // vl128 state = 0x0ed2e9f5
7984 __ dci(0x44e86be5); // cmla z5.s, z31.s, z8.s[0], #180
7985 // vl128 state = 0x9074e2a6
7986 __ dci(0x44eb6bf5); // cmla z21.s, z31.s, z11.s[0], #180
7987 // vl128 state = 0x8f43b8a8
7988 __ dci(0x44eb6b31); // cmla z17.s, z25.s, z11.s[0], #180
7989 // vl128 state = 0xb6c51b97
7990 __ dci(0x44eb6135); // cmla z21.s, z9.s, z11.s[0], #0
7991 // vl128 state = 0x4236beed
7992 __ dci(0x44e9633d); // cmla z29.s, z25.s, z9.s[0], #0
7993 // vl128 state = 0x21879fe6
7994 __ dci(0x44f96379); // cmla z25.s, z27.s, z9.s[1], #0
7995 // vl128 state = 0x78172805
7996 __ dci(0x44fd6349); // cmla z9.s, z26.s, z13.s[1], #0
7997 // vl128 state = 0x242a3ae5
7998 __ dci(0x44f76341); // cmla z1.s, z26.s, z7.s[1], #0
7999 // vl128 state = 0xa734ef3b
8000 __ dci(0x44f36305); // cmla z5.s, z24.s, z3.s[1], #0
8001 // vl128 state = 0x00a035b1
8002 __ dci(0x44f76381); // cmla z1.s, z28.s, z7.s[1], #0
8003 // vl128 state = 0xbdfda3d4
8004 __ dci(0x44f763e3); // cmla z3.s, z31.s, z7.s[1], #0
8005 // vl128 state = 0xe1ed6ed9
8006 __ dci(0x44b763cb); // cmla z11.h, z30.h, z7.h[2], #0
8007 // vl128 state = 0xae645ea8
8008 __ dci(0x44a763e9); // cmla z9.h, z31.h, z7.h[0], #0
8009 // vl128 state = 0x392b3511
8010 __ dci(0x44a762ab); // cmla z11.h, z21.h, z7.h[0], #0
8011 // vl128 state = 0x3a05f729
8012 __ dci(0x44a66aaf); // cmla z15.h, z21.h, z6.h[0], #180
8013 // vl128 state = 0x7cfa0c08
8014 __ dci(0x44a66aa7); // cmla z7.h, z21.h, z6.h[0], #180
8015 // vl128 state = 0x91749f43
8016 __ dci(0x44a663a5); // cmla z5.h, z29.h, z6.h[0], #0
8017 // vl128 state = 0x438479ab
8018 __ dci(0x44a66bed); // cmla z13.h, z31.h, z6.h[0], #180
8019 // vl128 state = 0xc25ce86d
8020 __ dci(0x44f66be9); // cmla z9.s, z31.s, z6.s[1], #180
8021 // vl128 state = 0x6e8bdeca
8022 __ dci(0x44b66bd9); // cmla z25.h, z30.h, z6.h[2], #180
8023 // vl128 state = 0x04745a63
8024 __ dci(0x44b66bd8); // cmla z24.h, z30.h, z6.h[2], #180
8025 // vl128 state = 0xbfc59a82
8026 __ dci(0x44b66b7c); // cmla z28.h, z27.h, z6.h[2], #180
8027 // vl128 state = 0x12d70fc2
8028 __ dci(0x44b6617e); // cmla z30.h, z11.h, z6.h[2], #0
8029 // vl128 state = 0x53f4b9a1
8030 __ dci(0x44b7697c); // cmla z28.h, z11.h, z7.h[2], #180
8031 // vl128 state = 0x74e99c24
8032 __ dci(0x44b3692c); // cmla z12.h, z9.h, z3.h[2], #180
8033 // vl128 state = 0xdc80a875
8034 __ dci(0x44a1692e); // cmla z14.h, z9.h, z1.h[0], #180
8035 // vl128 state = 0x307af313
8036 __ dci(0x44b169af); // cmla z15.h, z13.h, z1.h[2], #180
8037 // vl128 state = 0xc92b23fe
8038 __ dci(0x44b165a7); // cmla z7.h, z13.h, z1.h[2], #90
8039 // vl128 state = 0x33a52d1c
8040 __ dci(0x44b165a5); // cmla z5.h, z13.h, z1.h[2], #90
8041 // vl128 state = 0xbc53ebfc
8042 __ dci(0x44f161a1); // cmla z1.s, z13.s, z1.s[1], #0
8043 // vl128 state = 0x7ba34076
8044 __ dci(0x44f261a0); // cmla z0.s, z13.s, z2.s[1], #0
8045 // vl128 state = 0x6fa2bab8
8046 __ dci(0x44b361b0); // cmla z16.h, z13.h, z3.h[2], #0
8047 // vl128 state = 0xaae67807
8048 __ dci(0x44b36092); // cmla z18.h, z4.h, z3.h[2], #0
8049 // vl128 state = 0xf1b05dff
8050 __ dci(0x44b36202); // cmla z2.h, z16.h, z3.h[2], #0
8051 // vl128 state = 0xd226bf15
8052 __ dci(0x44b36a20); // cmla z0.h, z17.h, z3.h[2], #180
8053 // vl128 state = 0x6a8ade58
8054 __ dci(0x44b26a10); // cmla z16.h, z16.h, z2.h[2], #180
8055 // vl128 state = 0x075e00e4
8056 __ dci(0x44b26a18); // cmla z24.h, z16.h, z2.h[2], #180
8057 // vl128 state = 0x9bcef7bd
8058 __ dci(0x44b06a28); // cmla z8.h, z17.h, z0.h[2], #180
8059 // vl128 state = 0x8ac6d4b3
8060 __ dci(0x44b06a2a); // cmla z10.h, z17.h, z0.h[2], #180
8061 // vl128 state = 0x51993d51
8062 __ dci(0x44b0620b); // cmla z11.h, z16.h, z0.h[2], #0
8063 // vl128 state = 0x6d134734
8064 __ dci(0x44b06209); // cmla z9.h, z16.h, z0.h[2], #0
8065 // vl128 state = 0x0ee4031f
8066 __ dci(0x44f06a0d); // cmla z13.s, z16.s, z0.s[1], #180
8067 // vl128 state = 0x08ea247b
8068 __ dci(0x44f06b2c); // cmla z12.s, z25.s, z0.s[1], #180
8069 // vl128 state = 0x6acbb19a
8070 __ dci(0x44f1692d); // cmla z13.s, z9.s, z1.s[1], #180
8071 // vl128 state = 0x3ea2d161
8072 __ dci(0x44b36925); // cmla z5.h, z9.h, z3.h[2], #180
8073 // vl128 state = 0x5b962e9b
8074 __ dci(0x44b36921); // cmla z1.h, z9.h, z3.h[2], #180
8075 // vl128 state = 0x029f0eca
8076 __ dci(0x44b36d69); // cmla z9.h, z11.h, z3.h[2], #270
8077 // vl128 state = 0x39a63c65
8078 __ dci(0x44bb6d28); // cmla z8.h, z9.h, z3.h[3], #270
8079 // vl128 state = 0x6d58c136
8080 }
8081
8082 uint32_t state;
8083 ComputeMachineStateHash(&masm, &state);
8084 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
8085 __ Ldr(w0, MemOperand(x0));
8086
8087 END();
8088 if (CAN_RUN()) {
8089 RUN();
8090 uint32_t expected_hashes[] = {
8091 0x6d58c136,
8092 0xfbdbae97,
8093 0x85c3cf1a,
8094 0xe4b53177,
8095 0x2f714586,
8096 0xde1afee8,
8097 0xd9613d2e,
8098 0x842c85a6,
8099 0xdc285523,
8100 0xccba7ba9,
8101 0x79e1e6f7,
8102 0xb19427f4,
8103 0x20d08a3a,
8104 0xfb7f4c43,
8105 0x0721ed60,
8106 0x4ee795ab,
8107 };
8108 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
8109 }
8110 }
8111
TEST_SVE(sve2_flogb)8112 TEST_SVE(sve2_flogb) {
8113 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
8114 CPUFeatures::kSVE2,
8115 CPUFeatures::kNEON,
8116 CPUFeatures::kCRC32);
8117 START();
8118
8119 SetInitialMachineState(&masm);
8120 // state = 0xe2bd2480
8121
8122 {
8123 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
8124 __ dci(0x651cb31e); // flogb z30.s, p4/m, z24.s
8125 // vl128 state = 0x161f1855
8126 __ dci(0x651cb3ae); // flogb z14.s, p4/m, z29.s
8127 // vl128 state = 0xf9e5ce4d
8128 __ dci(0x651cb3be); // flogb z30.s, p4/m, z29.s
8129 // vl128 state = 0xa06176bc
8130 __ dci(0x651ea3bc); // flogb z28.d, p0/m, z29.d
8131 // vl128 state = 0xf793f7bb
8132 __ dci(0x651ea3cc); // flogb z12.d, p0/m, z30.d
8133 // vl128 state = 0xe5d71081
8134 __ dci(0x651ea3dc); // flogb z28.d, p0/m, z30.d
8135 // vl128 state = 0x33ffc09f
8136 __ dci(0x651ea3d4); // flogb z20.d, p0/m, z30.d
8137 // vl128 state = 0xd908a72e
8138 __ dci(0x651ea3d5); // flogb z21.d, p0/m, z30.d
8139 // vl128 state = 0x9528251a
8140 __ dci(0x651ca394); // flogb z20.s, p0/m, z28.s
8141 // vl128 state = 0xb1ac4188
8142 __ dci(0x651ca396); // flogb z22.s, p0/m, z28.s
8143 // vl128 state = 0xdc328726
8144 __ dci(0x651ca1d7); // flogb z23.s, p0/m, z14.s
8145 // vl128 state = 0xfc232eb7
8146 __ dci(0x651ca947); // flogb z7.s, p2/m, z10.s
8147 // vl128 state = 0xa9c53a1a
8148 __ dci(0x651ca805); // flogb z5.s, p2/m, z0.s
8149 // vl128 state = 0x9e4a47e9
8150 __ dci(0x651ea841); // flogb z1.d, p2/m, z2.d
8151 // vl128 state = 0x7a2aeaf6
8152 __ dci(0x651ea843); // flogb z3.d, p2/m, z2.d
8153 // vl128 state = 0xedd4aa97
8154 __ dci(0x651caa4b); // flogb z11.s, p2/m, z18.s
8155 // vl128 state = 0x7bfefefb
8156 __ dci(0x651cab6f); // flogb z15.s, p2/m, z27.s
8157 // vl128 state = 0x91b5a183
8158 __ dci(0x651ca86b); // flogb z11.s, p2/m, z3.s
8159 // vl128 state = 0x7b2776c2
8160 __ dci(0x651ca47b); // flogb z27.s, p1/m, z3.s
8161 // vl128 state = 0x46ea46c7
8162 __ dci(0x651ca47f); // flogb z31.s, p1/m, z3.s
8163 // vl128 state = 0x6e1d4e89
8164 __ dci(0x651ca477); // flogb z23.s, p1/m, z3.s
8165 // vl128 state = 0x5ea1220c
8166 __ dci(0x651ca035); // flogb z21.s, p0/m, z1.s
8167 // vl128 state = 0xb06e32be
8168 __ dci(0x651ca2a5); // flogb z5.s, p0/m, z21.s
8169 // vl128 state = 0xb856d206
8170 __ dci(0x651caa2d); // flogb z13.s, p2/m, z17.s
8171 // vl128 state = 0xebfd587f
8172 __ dci(0x651caa3d); // flogb z29.s, p2/m, z17.s
8173 // vl128 state = 0xb029ba8d
8174 __ dci(0x651eaa7f); // flogb z31.d, p2/m, z19.d
8175 // vl128 state = 0x07fd3f42
8176 __ dci(0x651ebb7e); // flogb z30.d, p6/m, z27.d
8177 // vl128 state = 0x79761d7a
8178 __ dci(0x651ebb76); // flogb z22.d, p6/m, z27.d
8179 // vl128 state = 0xdf56dd22
8180 __ dci(0x651ebb72); // flogb z18.d, p6/m, z27.d
8181 // vl128 state = 0xce798ad7
8182 __ dci(0x651eb276); // flogb z22.d, p4/m, z19.d
8183 // vl128 state = 0x84dd46d6
8184 __ dci(0x651eb652); // flogb z18.d, p5/m, z18.d
8185 // vl128 state = 0x2ea4a0df
8186 __ dci(0x651cbe42); // flogb z2.s, p7/m, z18.s
8187 // vl128 state = 0x8cdd1250
8188 __ dci(0x651cb852); // flogb z18.s, p6/m, z2.s
8189 // vl128 state = 0x5f5b051d
8190 __ dci(0x651eb956); // flogb z22.d, p6/m, z10.d
8191 // vl128 state = 0x7a17cdd1
8192 __ dci(0x651eb11e); // flogb z30.d, p4/m, z8.d
8193 // vl128 state = 0x7367f8ec
8194 __ dci(0x651ab016); // flogb z22.h, p4/m, z0.h
8195 // vl128 state = 0x8e1bfb06
8196 __ dci(0x651ab014); // flogb z20.h, p4/m, z0.h
8197 // vl128 state = 0x2bcfa0f0
8198 __ dci(0x651aa81c); // flogb z28.h, p2/m, z0.h
8199 // vl128 state = 0xeb9615e8
8200 __ dci(0x651aa80c); // flogb z12.h, p2/m, z0.h
8201 // vl128 state = 0x5b55f5cd
8202 __ dci(0x651aa808); // flogb z8.h, p2/m, z0.h
8203 // vl128 state = 0xdd1718f2
8204 __ dci(0x651aa20a); // flogb z10.h, p0/m, z16.h
8205 // vl128 state = 0x205e88ed
8206 __ dci(0x651ab24e); // flogb z14.h, p4/m, z18.h
8207 // vl128 state = 0x1c9f2035
8208 __ dci(0x651ab36f); // flogb z15.h, p4/m, z27.h
8209 // vl128 state = 0xea22efaf
8210 __ dci(0x651ab36b); // flogb z11.h, p4/m, z27.h
8211 // vl128 state = 0x0cd0b8cd
8212 __ dci(0x651abb29); // flogb z9.h, p6/m, z25.h
8213 // vl128 state = 0xa1a017d1
8214 __ dci(0x651abb2d); // flogb z13.h, p6/m, z25.h
8215 // vl128 state = 0x37d033d2
8216 __ dci(0x651aba0c); // flogb z12.h, p6/m, z16.h
8217 // vl128 state = 0x971bde83
8218 __ dci(0x651cba1c); // flogb z28.s, p6/m, z16.s
8219 // vl128 state = 0xb6b23bc2
8220 __ dci(0x651cba1d); // flogb z29.s, p6/m, z16.s
8221 // vl128 state = 0x1af298e0
8222 __ dci(0x651cba15); // flogb z21.s, p6/m, z16.s
8223 // vl128 state = 0x077a2869
8224 }
8225
8226 uint32_t state;
8227 ComputeMachineStateHash(&masm, &state);
8228 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
8229 __ Ldr(w0, MemOperand(x0));
8230
8231 END();
8232 if (CAN_RUN()) {
8233 RUN();
8234 uint32_t expected_hashes[] = {
8235 0x077a2869,
8236 0xde5bc452,
8237 0xe80f0bc6,
8238 0x1c078cf2,
8239 0x66064034,
8240 0xa9f5264d,
8241 0xb19b24c1,
8242 0xb394864c,
8243 0x42991ea7,
8244 0xcf33094e,
8245 0xc4656d85,
8246 0x4cfa5b7e,
8247 0xbb7c121f,
8248 0xd2e8c839,
8249 0x028134cf,
8250 0x2f3e9779,
8251 };
8252 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
8253 }
8254 }
8255
TEST_SVE(sve2_fp_pair)8256 TEST_SVE(sve2_fp_pair) {
8257 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
8258 CPUFeatures::kSVE2,
8259 CPUFeatures::kNEON,
8260 CPUFeatures::kCRC32);
8261 START();
8262
8263 SetInitialMachineState(&masm, kFpInputSet);
8264 // state = 0x1e5cbcac
8265
8266 {
8267 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
8268 __ dci(0x64d591aa); // fminnmp z10.d, p4/m, z10.d, z13.d
8269 // vl128 state = 0x02a0f18c
8270 __ dci(0x64d59dab); // fminnmp z11.d, p7/m, z11.d, z13.d
8271 // vl128 state = 0xd6d0a87f
8272 __ dci(0x64d59d7b); // fminnmp z27.d, p7/m, z27.d, z11.d
8273 // vl128 state = 0x364f93b4
8274 __ dci(0x64d59c2b); // fminnmp z11.d, p7/m, z11.d, z1.d
8275 // vl128 state = 0xc7ed7476
8276 __ dci(0x64d59f23); // fminnmp z3.d, p7/m, z3.d, z25.d
8277 // vl128 state = 0x7a1ec868
8278 __ dci(0x64d59f22); // fminnmp z2.d, p7/m, z2.d, z25.d
8279 // vl128 state = 0x862a3f3d
8280 __ dci(0x64d49fa0); // fmaxnmp z0.d, p7/m, z0.d, z29.d
8281 // vl128 state = 0x11f71743
8282 __ dci(0x64d49fa8); // fmaxnmp z8.d, p7/m, z8.d, z29.d
8283 // vl128 state = 0x302e45cd
8284 __ dci(0x64d49fa9); // fmaxnmp z9.d, p7/m, z9.d, z29.d
8285 // vl128 state = 0x11cca180
8286 __ dci(0x64d68fb9); // fmaxp z25.d, p3/m, z25.d, z29.d
8287 // vl128 state = 0xee6b2d42
8288 __ dci(0x64d68fb8); // fmaxp z24.d, p3/m, z24.d, z29.d
8289 // vl128 state = 0x060efb2c
8290 __ dci(0x64d49fba); // fmaxnmp z26.d, p7/m, z26.d, z29.d
8291 // vl128 state = 0x4f4232ac
8292 __ dci(0x649497b2); // fmaxnmp z18.s, p5/m, z18.s, z29.s
8293 // vl128 state = 0xe3e04479
8294 __ dci(0x649096b6); // faddp z22.s, p5/m, z22.s, z21.s
8295 // vl128 state = 0x2a407146
8296 __ dci(0x64909237); // faddp z23.s, p4/m, z23.s, z17.s
8297 // vl128 state = 0x6d0b2bb8
8298 __ dci(0x64d09027); // faddp z7.d, p4/m, z7.d, z1.d
8299 // vl128 state = 0x5e7d175f
8300 __ dci(0x64509006); // faddp z6.h, p4/m, z6.h, z0.h
8301 // vl128 state = 0xa0a4cd20
8302 __ dci(0x64d0940e); // faddp z14.d, p5/m, z14.d, z0.d
8303 // vl128 state = 0xf66b9cde
8304 __ dci(0x64d09c4f); // faddp z15.d, p7/m, z15.d, z2.d
8305 // vl128 state = 0x5a2d08c9
8306 __ dci(0x64d09c5f); // faddp z31.d, p7/m, z31.d, z2.d
8307 // vl128 state = 0x2e390409
8308 __ dci(0x64d09c57); // faddp z23.d, p7/m, z23.d, z2.d
8309 // vl128 state = 0xfb4af476
8310 __ dci(0x64d09c56); // faddp z22.d, p7/m, z22.d, z2.d
8311 // vl128 state = 0x8d8c621b
8312 __ dci(0x64d08e5e); // faddp z30.d, p3/m, z30.d, z18.d
8313 // vl128 state = 0xba8962e6
8314 __ dci(0x64d0845c); // faddp z28.d, p1/m, z28.d, z2.d
8315 // vl128 state = 0x224654c6
8316 __ dci(0x64d0845d); // faddp z29.d, p1/m, z29.d, z2.d
8317 // vl128 state = 0xef608134
8318 __ dci(0x64d08e4d); // faddp z13.d, p3/m, z13.d, z18.d
8319 // vl128 state = 0x5adedbf3
8320 __ dci(0x64908645); // faddp z5.s, p1/m, z5.s, z18.s
8321 // vl128 state = 0x04b4f366
8322 __ dci(0x64908a4d); // faddp z13.s, p2/m, z13.s, z18.s
8323 // vl128 state = 0xf0a7482a
8324 __ dci(0x64d08245); // faddp z5.d, p0/m, z5.d, z18.d
8325 // vl128 state = 0x0f2ccd61
8326 __ dci(0x64909255); // faddp z21.s, p4/m, z21.s, z18.s
8327 // vl128 state = 0x7665491f
8328 __ dci(0x649096c5); // faddp z5.s, p5/m, z5.s, z22.s
8329 // vl128 state = 0xc3b53fd3
8330 __ dci(0x649492c1); // fmaxnmp z1.s, p4/m, z1.s, z22.s
8331 // vl128 state = 0x589fd64a
8332 __ dci(0x649096d1); // faddp z17.s, p5/m, z17.s, z22.s
8333 // vl128 state = 0x5a0d0d52
8334 __ dci(0x649096d5); // faddp z21.s, p5/m, z21.s, z22.s
8335 // vl128 state = 0xba57cd51
8336 __ dci(0x649096d4); // faddp z20.s, p5/m, z20.s, z22.s
8337 // vl128 state = 0xa5d7b29d
8338 __ dci(0x649093d0); // faddp z16.s, p4/m, z16.s, z30.s
8339 // vl128 state = 0xa62cce9e
8340 __ dci(0x64909318); // faddp z24.s, p4/m, z24.s, z24.s
8341 // vl128 state = 0x8cc209c7
8342 __ dci(0x64909008); // faddp z8.s, p4/m, z8.s, z0.s
8343 // vl128 state = 0x56a9af04
8344 __ dci(0x64969000); // fmaxp z0.s, p4/m, z0.s, z0.s
8345 // vl128 state = 0xc45f824a
8346 __ dci(0x64569004); // fmaxp z4.h, p4/m, z4.h, z0.h
8347 // vl128 state = 0x82da5cb7
8348 __ dci(0x64569000); // fmaxp z0.h, p4/m, z0.h, z0.h
8349 // vl128 state = 0xa9fff0bf
8350 __ dci(0x64569001); // fmaxp z1.h, p4/m, z1.h, z0.h
8351 // vl128 state = 0x71c2e09a
8352 __ dci(0x64569605); // fmaxp z5.h, p5/m, z5.h, z16.h
8353 // vl128 state = 0xe50c8b49
8354 __ dci(0x64579624); // fminp z4.h, p5/m, z4.h, z17.h
8355 // vl128 state = 0x4f3817cb
8356 __ dci(0x6457962c); // fminp z12.h, p5/m, z12.h, z17.h
8357 // vl128 state = 0x5a773e57
8358 __ dci(0x64d5963c); // fminnmp z28.d, p5/m, z28.d, z17.d
8359 // vl128 state = 0xa5c5e37c
8360 __ dci(0x64d7943e); // fminp z30.d, p5/m, z30.d, z1.d
8361 // vl128 state = 0xc778f8a3
8362 __ dci(0x6457953a); // fminp z26.h, p5/m, z26.h, z9.h
8363 // vl128 state = 0x01abc4af
8364 __ dci(0x6457952a); // fminp z10.h, p5/m, z10.h, z9.h
8365 // vl128 state = 0x45483a17
8366 __ dci(0x64579d7a); // fminp z26.h, p7/m, z26.h, z11.h
8367 // vl128 state = 0x355b08b3
8368 }
8369
8370 uint32_t state;
8371 ComputeMachineStateHash(&masm, &state);
8372 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
8373 __ Ldr(w0, MemOperand(x0));
8374
8375 END();
8376 if (CAN_RUN()) {
8377 RUN();
8378 uint32_t expected_hashes[] = {
8379 0x355b08b3,
8380 0x8f7890cd,
8381 0x5dddb069,
8382 0x030a5f52,
8383 0xc569c150,
8384 0x060423ba,
8385 0x5d729bd0,
8386 0x079b4f8b,
8387 0x06e75e58,
8388 0x6f631884,
8389 0xddc735f0,
8390 0x7213b8e2,
8391 0x8cbf507c,
8392 0x40654268,
8393 0x3cd7ad6c,
8394 0xfba0ee9e,
8395 };
8396 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
8397 }
8398 }
8399
TEST_SVE(sve2_fmlal_fmlsl_index)8400 TEST_SVE(sve2_fmlal_fmlsl_index) {
8401 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
8402 CPUFeatures::kSVE2,
8403 CPUFeatures::kNEON,
8404 CPUFeatures::kCRC32);
8405 START();
8406
8407 SetInitialMachineState(&masm);
8408 // state = 0xe2bd2480
8409
8410 {
8411 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
8412 __ dci(0x64a94f15); // fmlalt z21.s, z24.h, z1.h[3]
8413 // vl128 state = 0x0895849b
8414 __ dci(0x64ab4f9d); // fmlalt z29.s, z28.h, z3.h[3]
8415 // vl128 state = 0x6e0cf3fe
8416 __ dci(0x64a74f9c); // fmlalt z28.s, z28.h, z7.h[1]
8417 // vl128 state = 0x482b4f57
8418 __ dci(0x64a74dde); // fmlalt z30.s, z14.h, z7.h[1]
8419 // vl128 state = 0xf047791e
8420 __ dci(0x64a74cee); // fmlalt z14.s, z7.h, z7.h[1]
8421 // vl128 state = 0xde33332c
8422 __ dci(0x64a648ef); // fmlalb z15.s, z7.h, z6.h[1]
8423 // vl128 state = 0xf7148941
8424 __ dci(0x64a648ee); // fmlalb z14.s, z7.h, z6.h[1]
8425 // vl128 state = 0x69f23fcb
8426 __ dci(0x64b649ea); // fmlalb z10.s, z15.h, z6.h[5]
8427 // vl128 state = 0x979eea1a
8428 __ dci(0x64b649ee); // fmlalb z14.s, z15.h, z6.h[5]
8429 // vl128 state = 0x522917a9
8430 __ dci(0x64b649e6); // fmlalb z6.s, z15.h, z6.h[5]
8431 // vl128 state = 0x7d773525
8432 __ dci(0x64b64ba2); // fmlalb z2.s, z29.h, z6.h[5]
8433 // vl128 state = 0x220960c6
8434 __ dci(0x64b46baa); // fmlslb z10.s, z29.h, z4.h[5]
8435 // vl128 state = 0x2c8e384a
8436 __ dci(0x64b46dab); // fmlslt z11.s, z13.h, z4.h[5]
8437 // vl128 state = 0xa592cde1
8438 __ dci(0x64b467bb); // fmlslt z27.s, z29.h, z4.h[4]
8439 // vl128 state = 0xba31bd61
8440 __ dci(0x64b665b3); // fmlslt z19.s, z13.h, z6.h[4]
8441 // vl128 state = 0x75dade04
8442 __ dci(0x64b663bb); // fmlslb z27.s, z29.h, z6.h[4]
8443 // vl128 state = 0xa7358466
8444 __ dci(0x64a662bf); // fmlslb z31.s, z21.h, z6.h[0]
8445 // vl128 state = 0x6125ca9d
8446 __ dci(0x64a7623e); // fmlslb z30.s, z17.h, z7.h[0]
8447 // vl128 state = 0x4b1cda83
8448 __ dci(0x64a7462e); // fmlalt z14.s, z17.h, z7.h[0]
8449 // vl128 state = 0x00d73a44
8450 __ dci(0x64a6662f); // fmlslt z15.s, z17.h, z6.h[0]
8451 // vl128 state = 0xc5ea9f30
8452 __ dci(0x64a666ed); // fmlslt z13.s, z23.h, z6.h[0]
8453 // vl128 state = 0xe17ba118
8454 __ dci(0x64a26eec); // fmlslt z12.s, z23.h, z2.h[1]
8455 // vl128 state = 0xd1962c7a
8456 __ dci(0x64a26cbc); // fmlslt z28.s, z5.h, z2.h[1]
8457 // vl128 state = 0xde6f1ace
8458 __ dci(0x64a26cb4); // fmlslt z20.s, z5.h, z2.h[1]
8459 // vl128 state = 0x10d69920
8460 __ dci(0x64a26cbc); // fmlslt z28.s, z5.h, z2.h[1]
8461 // vl128 state = 0x8d190aec
8462 __ dci(0x64a26cd8); // fmlslt z24.s, z6.h, z2.h[1]
8463 // vl128 state = 0x432fdda3
8464 __ dci(0x64a26c1a); // fmlslt z26.s, z0.h, z2.h[1]
8465 // vl128 state = 0x9ababf0a
8466 __ dci(0x64a24d1e); // fmlalt z30.s, z8.h, z2.h[1]
8467 // vl128 state = 0x609040ae
8468 __ dci(0x64a24d1c); // fmlalt z28.s, z8.h, z2.h[1]
8469 // vl128 state = 0x0a047710
8470 __ dci(0x64a24d1e); // fmlalt z30.s, z8.h, z2.h[1]
8471 // vl128 state = 0xf273945a
8472 __ dci(0x64a0490e); // fmlalb z14.s, z8.h, z0.h[1]
8473 // vl128 state = 0x3a5456f1
8474 __ dci(0x64a0490c); // fmlalb z12.s, z8.h, z0.h[1]
8475 // vl128 state = 0xdb948daf
8476 __ dci(0x64b04b04); // fmlalb z4.s, z24.h, z0.h[5]
8477 // vl128 state = 0xd2eae2af
8478 __ dci(0x64b04b06); // fmlalb z6.s, z24.h, z0.h[5]
8479 // vl128 state = 0x26627a2c
8480 __ dci(0x64b04b07); // fmlalb z7.s, z24.h, z0.h[5]
8481 // vl128 state = 0x2841173d
8482 __ dci(0x64b84b26); // fmlalb z6.s, z25.h, z0.h[7]
8483 // vl128 state = 0x9b52bcc6
8484 __ dci(0x64ba4f27); // fmlalt z7.s, z25.h, z2.h[7]
8485 // vl128 state = 0x813bbabe
8486 __ dci(0x64ba4923); // fmlalb z3.s, z9.h, z2.h[7]
8487 // vl128 state = 0xbb608dad
8488 __ dci(0x64b84d22); // fmlalt z2.s, z9.h, z0.h[7]
8489 // vl128 state = 0xf4d84ed6
8490 __ dci(0x64b84d23); // fmlalt z3.s, z9.h, z0.h[7]
8491 // vl128 state = 0x1cc0784e
8492 __ dci(0x64bc4527); // fmlalt z7.s, z9.h, z4.h[6]
8493 // vl128 state = 0x4eece4b7
8494 __ dci(0x64bc6737); // fmlslt z23.s, z25.h, z4.h[6]
8495 // vl128 state = 0x00dacf34
8496 __ dci(0x64bc6fa7); // fmlslt z7.s, z29.h, z4.h[7]
8497 // vl128 state = 0x597e23d4
8498 __ dci(0x64bc6e25); // fmlslt z5.s, z17.h, z4.h[7]
8499 // vl128 state = 0xa66b843c
8500 __ dci(0x64be6f2d); // fmlslt z13.s, z25.h, z6.h[7]
8501 // vl128 state = 0xb595ec08
8502 __ dci(0x64be6765); // fmlslt z5.s, z27.h, z6.h[6]
8503 // vl128 state = 0xd6c3af0a
8504 __ dci(0x64be662d); // fmlslt z13.s, z17.h, z6.h[6]
8505 // vl128 state = 0x864f26a8
8506 __ dci(0x64bf6225); // fmlslb z5.s, z17.h, z7.h[6]
8507 // vl128 state = 0xb969be4d
8508 __ dci(0x64bb626d); // fmlslb z13.s, z19.h, z3.h[6]
8509 // vl128 state = 0x73329b58
8510 __ dci(0x64b9622c); // fmlslb z12.s, z17.h, z1.h[6]
8511 // vl128 state = 0xfb7e2da2
8512 }
8513
8514 uint32_t state;
8515 ComputeMachineStateHash(&masm, &state);
8516 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
8517 __ Ldr(w0, MemOperand(x0));
8518
8519 END();
8520 if (CAN_RUN()) {
8521 RUN();
8522 uint32_t expected_hashes[] = {
8523 0xfb7e2da2,
8524 0x34ad546c,
8525 0xd914c0d4,
8526 0xc173287c,
8527 0x07db96b2,
8528 0xab5ece8c,
8529 0xcda13318,
8530 0x6e62dc3f,
8531 0x0268d9b4,
8532 0x15118567,
8533 0xf55fb24f,
8534 0xc4ab4b56,
8535 0x5911f225,
8536 0x6d9c320c,
8537 0xc69bdedf,
8538 0x1635a43f,
8539 };
8540 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
8541 }
8542 }
8543
TEST_SVE(sve2_fp_convert)8544 TEST_SVE(sve2_fp_convert) {
8545 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
8546 CPUFeatures::kSVE2,
8547 CPUFeatures::kNEON,
8548 CPUFeatures::kCRC32);
8549 START();
8550
8551 SetInitialMachineState(&masm);
8552 // state = 0xe2bd2480
8553
8554 {
8555 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
8556 __ dci(0x640ab3ee); // fcvtxnt z14.s, p4/m, z31.d
8557 // vl128 state = 0x3ea71f7a
8558 __ dci(0x64caa9e0); // fcvtnt z0.s, p2/m, z15.d
8559 // vl128 state = 0xe9d750a1
8560 __ dci(0x64cab83d); // fcvtnt z29.s, p6/m, z1.d
8561 // vl128 state = 0x9ce43257
8562 __ dci(0x650aad62); // fcvtx z2.s, p3/m, z11.d
8563 // vl128 state = 0x60283e22
8564 __ dci(0x64cbb42b); // fcvtlt z11.d, p5/m, z1.s
8565 // vl128 state = 0xfbecbe4a
8566 __ dci(0x6488ba54); // fcvtnt z20.h, p6/m, z18.s
8567 // vl128 state = 0xbb81cc05
8568 __ dci(0x64cbb730); // fcvtlt z16.d, p5/m, z25.s
8569 // vl128 state = 0xd9cebdf5
8570 __ dci(0x640aa5e4); // fcvtxnt z4.s, p1/m, z15.d
8571 // vl128 state = 0x9dba64db
8572 __ dci(0x650aa715); // fcvtx z21.s, p1/m, z24.d
8573 // vl128 state = 0x0e68fab9
8574 __ dci(0x64cabe86); // fcvtnt z6.s, p7/m, z20.d
8575 // vl128 state = 0x5936ac32
8576 __ dci(0x64cba075); // fcvtlt z21.d, p0/m, z3.s
8577 // vl128 state = 0x2eb8a37b
8578 __ dci(0x6488b3c5); // fcvtnt z5.h, p4/m, z30.s
8579 // vl128 state = 0x9f471340
8580 __ dci(0x6489b24a); // fcvtlt z10.s, p4/m, z18.h
8581 // vl128 state = 0xcf5e5808
8582 __ dci(0x64cbb514); // fcvtlt z20.d, p5/m, z8.s
8583 // vl128 state = 0x870c5b85
8584 __ dci(0x650ab090); // fcvtx z16.s, p4/m, z4.d
8585 // vl128 state = 0x305da0a0
8586 __ dci(0x64cbb2d3); // fcvtlt z19.d, p4/m, z22.s
8587 // vl128 state = 0x8eb1b5fc
8588 __ dci(0x64cbb093); // fcvtlt z19.d, p4/m, z4.s
8589 // vl128 state = 0x3c070332
8590 __ dci(0x6488b9b8); // fcvtnt z24.h, p6/m, z13.s
8591 // vl128 state = 0xe0fc3455
8592 __ dci(0x650aa64d); // fcvtx z13.s, p1/m, z18.d
8593 // vl128 state = 0x65556c34
8594 __ dci(0x6488b2d7); // fcvtnt z23.h, p4/m, z22.s
8595 // vl128 state = 0xc9ccae47
8596 __ dci(0x650ab36d); // fcvtx z13.s, p4/m, z27.d
8597 // vl128 state = 0x31d942a1
8598 __ dci(0x650aba2c); // fcvtx z12.s, p6/m, z17.d
8599 // vl128 state = 0x27497e26
8600 __ dci(0x650aa377); // fcvtx z23.s, p0/m, z27.d
8601 // vl128 state = 0xbe0a7446
8602 __ dci(0x6489a3a5); // fcvtlt z5.s, p0/m, z29.h
8603 // vl128 state = 0x454c62cc
8604 __ dci(0x64cabeb9); // fcvtnt z25.s, p7/m, z21.d
8605 // vl128 state = 0x808a014f
8606 __ dci(0x6489b4c2); // fcvtlt z2.s, p5/m, z6.h
8607 // vl128 state = 0x55ae2250
8608 __ dci(0x64cba246); // fcvtlt z6.d, p0/m, z18.s
8609 // vl128 state = 0x7ce05c24
8610 __ dci(0x650ab2a6); // fcvtx z6.s, p4/m, z21.d
8611 // vl128 state = 0xa26121f5
8612 __ dci(0x64cbb239); // fcvtlt z25.d, p4/m, z17.s
8613 // vl128 state = 0xb40c58e1
8614 __ dci(0x64cabdd9); // fcvtnt z25.s, p7/m, z14.d
8615 // vl128 state = 0xf5077a54
8616 __ dci(0x650ab75a); // fcvtx z26.s, p5/m, z26.d
8617 // vl128 state = 0x95b006de
8618 __ dci(0x650aa08b); // fcvtx z11.s, p0/m, z4.d
8619 // vl128 state = 0x9ca5060c
8620 __ dci(0x640aafd3); // fcvtxnt z19.s, p3/m, z30.d
8621 // vl128 state = 0x85c89705
8622 __ dci(0x64caaf3a); // fcvtnt z26.s, p3/m, z25.d
8623 // vl128 state = 0x6b6aa4f9
8624 __ dci(0x640abda1); // fcvtxnt z1.s, p7/m, z13.d
8625 // vl128 state = 0x769cf76e
8626 __ dci(0x6489a6f9); // fcvtlt z25.s, p1/m, z23.h
8627 // vl128 state = 0x0a291b3b
8628 __ dci(0x6489b38d); // fcvtlt z13.s, p4/m, z28.h
8629 // vl128 state = 0x6b72e558
8630 __ dci(0x650aaf63); // fcvtx z3.s, p3/m, z27.d
8631 // vl128 state = 0xf4a004e0
8632 __ dci(0x6488bfa4); // fcvtnt z4.h, p7/m, z29.s
8633 // vl128 state = 0xe01c349e
8634 __ dci(0x6489a6ee); // fcvtlt z14.s, p1/m, z23.h
8635 // vl128 state = 0x3b06da53
8636 __ dci(0x64cabbf8); // fcvtnt z24.s, p6/m, z31.d
8637 // vl128 state = 0xc60fbbf0
8638 __ dci(0x6489bc7f); // fcvtlt z31.s, p7/m, z3.h
8639 // vl128 state = 0x8b281c78
8640 __ dci(0x64caaf1f); // fcvtnt z31.s, p3/m, z24.d
8641 // vl128 state = 0x0f17afbb
8642 __ dci(0x650aac71); // fcvtx z17.s, p3/m, z3.d
8643 // vl128 state = 0xce0ac3e1
8644 __ dci(0x650aa1df); // fcvtx z31.s, p0/m, z14.d
8645 // vl128 state = 0x71ba2085
8646 __ dci(0x650aaf9f); // fcvtx z31.s, p3/m, z28.d
8647 // vl128 state = 0xe42caea0
8648 __ dci(0x640abff9); // fcvtxnt z25.s, p7/m, z31.d
8649 // vl128 state = 0xec3c032c
8650 __ dci(0x6489b8e5); // fcvtlt z5.s, p6/m, z7.h
8651 // vl128 state = 0xe41850f7
8652 __ dci(0x640aa1a1); // fcvtxnt z1.s, p0/m, z13.d
8653 // vl128 state = 0xaf3944b4
8654 __ dci(0x6488bf41); // fcvtnt z1.h, p7/m, z26.s
8655 // vl128 state = 0xdffd02bd
8656 }
8657
8658 uint32_t state;
8659 ComputeMachineStateHash(&masm, &state);
8660 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
8661 __ Ldr(w0, MemOperand(x0));
8662
8663 END();
8664 if (CAN_RUN()) {
8665 RUN();
8666 uint32_t expected_hashes[] = {
8667 0xdffd02bd,
8668 0x03d1f711,
8669 0x41cf3358,
8670 0xa351d0f6,
8671 0xffba25ff,
8672 0x14092947,
8673 0x26b194fe,
8674 0x42acd8a3,
8675 0xc0498960,
8676 0xcccf1171,
8677 0x8dca76ed,
8678 0xefbda194,
8679 0xcf04a23d,
8680 0x91e2629f,
8681 0xf05e8f52,
8682 0x4994ad4a,
8683 };
8684 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
8685 }
8686 }
8687
TEST_SVE(sve2_saturating_multiply_add_high_indexed)8688 TEST_SVE(sve2_saturating_multiply_add_high_indexed) {
8689 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
8690 CPUFeatures::kSVE2,
8691 CPUFeatures::kNEON,
8692 CPUFeatures::kCRC32);
8693 START();
8694
8695 SetInitialMachineState(&masm);
8696 // state = 0xe2bd2480
8697
8698 {
8699 ExactAssemblyScope scope(&masm, 40 * kInstructionSize);
8700 __ dci(0x442b1100); // sqrdmlah z0.h, z8.h, z3.h[1]
8701 // vl128 state = 0xb012d377
8702 __ dci(0x44211108); // sqrdmlah z8.h, z8.h, z1.h[0]
8703 // vl128 state = 0xae399e50
8704 __ dci(0x4421110c); // sqrdmlah z12.h, z8.h, z1.h[0]
8705 // vl128 state = 0x1a46b700
8706 __ dci(0x44291188); // sqrdmlah z8.h, z12.h, z1.h[1]
8707 // vl128 state = 0x7525090a
8708 __ dci(0x442811a9); // sqrdmlah z9.h, z13.h, z0.h[1]
8709 // vl128 state = 0xf2907eb8
8710 __ dci(0x442c11eb); // sqrdmlah z11.h, z15.h, z4.h[1]
8711 // vl128 state = 0x65a71d51
8712 __ dci(0x442c11e3); // sqrdmlah z3.h, z15.h, z4.h[1]
8713 // vl128 state = 0x8b30e19b
8714 __ dci(0x442413e1); // sqrdmlah z1.h, z31.h, z4.h[0]
8715 // vl128 state = 0x448e4c0f
8716 __ dci(0x44a413a0); // sqrdmlah z0.s, z29.s, z4.s[0]
8717 // vl128 state = 0x1745e0db
8718 __ dci(0x44241321); // sqrdmlah z1.h, z25.h, z4.h[0]
8719 // vl128 state = 0xe07b491b
8720 __ dci(0x44a413a5); // sqrdmlah z5.s, z29.s, z4.s[0]
8721 // vl128 state = 0xad39c91c
8722 __ dci(0x44e41327); // sqrdmlah z7.d, z25.d, z4.d[0]
8723 // vl128 state = 0xd327dc1c
8724 __ dci(0x44e4132f); // sqrdmlah z15.d, z25.d, z4.d[0]
8725 // vl128 state = 0x8da341ca
8726 __ dci(0x44e5130b); // sqrdmlah z11.d, z24.d, z5.d[0]
8727 // vl128 state = 0x4dbd3ee1
8728 __ dci(0x44e3130a); // sqrdmlah z10.d, z24.d, z3.d[0]
8729 // vl128 state = 0x71452896
8730 __ dci(0x44e3131a); // sqrdmlah z26.d, z24.d, z3.d[0]
8731 // vl128 state = 0x4d6d8b90
8732 __ dci(0x4463135e); // sqrdmlah z30.h, z26.h, z3.h[4]
8733 // vl128 state = 0x0b53f7b4
8734 __ dci(0x44e7135c); // sqrdmlah z28.d, z26.d, z7.d[0]
8735 // vl128 state = 0x78ab2bb9
8736 __ dci(0x44e7134c); // sqrdmlah z12.d, z26.d, z7.d[0]
8737 // vl128 state = 0x3773b9e2
8738 __ dci(0x44e51144); // sqrdmlah z4.d, z10.d, z5.d[0]
8739 // vl128 state = 0x8f8883da
8740 __ dci(0x44e411c0); // sqrdmlah z0.d, z14.d, z4.d[0]
8741 // vl128 state = 0xa27ef92f
8742 __ dci(0x44ec15c4); // sqrdmlsh z4.d, z14.d, z12.d[0]
8743 // vl128 state = 0x6cea3cee
8744 __ dci(0x44ec14e0); // sqrdmlsh z0.d, z7.d, z12.d[0]
8745 // vl128 state = 0xb5e40d5f
8746 __ dci(0x44ee16f0); // sqrdmlsh z16.d, z23.d, z14.d[0]
8747 // vl128 state = 0xacf903eb
8748 __ dci(0x44ea16d4); // sqrdmlsh z20.d, z22.d, z10.d[0]
8749 // vl128 state = 0x698246a6
8750 __ dci(0x44ea16d0); // sqrdmlsh z16.d, z22.d, z10.d[0]
8751 // vl128 state = 0x58015eeb
8752 __ dci(0x44ea16d1); // sqrdmlsh z17.d, z22.d, z10.d[0]
8753 // vl128 state = 0xdbf1d9a6
8754 __ dci(0x44ab16d3); // sqrdmlsh z19.s, z22.s, z3.s[1]
8755 // vl128 state = 0xbde312bb
8756 __ dci(0x44aa17d1); // sqrdmlsh z17.s, z30.s, z2.s[1]
8757 // vl128 state = 0xc033b9a1
8758 __ dci(0x44aa1650); // sqrdmlsh z16.s, z18.s, z2.s[1]
8759 // vl128 state = 0x0e3b4c59
8760 __ dci(0x44aa1632); // sqrdmlsh z18.s, z17.s, z2.s[1]
8761 // vl128 state = 0x6f849e01
8762 __ dci(0x44aa1710); // sqrdmlsh z16.s, z24.s, z2.s[1]
8763 // vl128 state = 0x701e7316
8764 __ dci(0x44aa1711); // sqrdmlsh z17.s, z24.s, z2.s[1]
8765 // vl128 state = 0xbfbc7895
8766 __ dci(0x44a91715); // sqrdmlsh z21.s, z24.s, z1.s[1]
8767 // vl128 state = 0x2307c6f3
8768 __ dci(0x44a91697); // sqrdmlsh z23.s, z20.s, z1.s[1]
8769 // vl128 state = 0x78db6627
8770 __ dci(0x44a91696); // sqrdmlsh z22.s, z20.s, z1.s[1]
8771 // vl128 state = 0x37d25a35
8772 __ dci(0x44a816de); // sqrdmlsh z30.s, z22.s, z0.s[1]
8773 // vl128 state = 0xf611db46
8774 __ dci(0x44ab16dc); // sqrdmlsh z28.s, z22.s, z3.s[1]
8775 // vl128 state = 0x699a840f
8776 __ dci(0x44af165d); // sqrdmlsh z29.s, z18.s, z7.s[1]
8777 // vl128 state = 0x0b5d451f
8778 __ dci(0x44af16f5); // sqrdmlsh z21.s, z23.s, z7.s[1]
8779 // vl128 state = 0xe49e3b59
8780 }
8781
8782 uint32_t state;
8783 ComputeMachineStateHash(&masm, &state);
8784 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
8785 __ Ldr(w0, MemOperand(x0));
8786
8787 END();
8788 if (CAN_RUN()) {
8789 RUN();
8790 uint32_t expected_hashes[] = {
8791 0xe49e3b59,
8792 0xce0062c7,
8793 0xf796ec27,
8794 0x1f952649,
8795 0x4e4354e6,
8796 0x90cb0c51,
8797 0xf0688aee,
8798 0xae9de352,
8799 0x652f0c0d,
8800 0x0000db74,
8801 0xdc23fff7,
8802 0x228c116c,
8803 0x8477dd7c,
8804 0x08377c46,
8805 0x6e05a40f,
8806 0x874126fb,
8807 };
8808 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
8809 }
8810 }
8811
TEST_SVE(sve2_sat_double_mul_high_index)8812 TEST_SVE(sve2_sat_double_mul_high_index) {
8813 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
8814 CPUFeatures::kSVE2,
8815 CPUFeatures::kNEON,
8816 CPUFeatures::kCRC32);
8817 START();
8818
8819 SetInitialMachineState(&masm);
8820 // state = 0xe2bd2480
8821
8822 {
8823 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
8824 __ dci(0x447bf609); // sqrdmulh z9.h, z16.h, z3.h[7]
8825 // vl128 state = 0xacad7d7c
8826 __ dci(0x447bf601); // sqrdmulh z1.h, z16.h, z3.h[7]
8827 // vl128 state = 0xd6a976fe
8828 __ dci(0x447bf600); // sqrdmulh z0.h, z16.h, z3.h[7]
8829 // vl128 state = 0x959d4287
8830 __ dci(0x446bf710); // sqrdmulh z16.h, z24.h, z3.h[5]
8831 // vl128 state = 0x88b70b0e
8832 __ dci(0x446af612); // sqrdmulh z18.h, z16.h, z2.h[5]
8833 // vl128 state = 0xea48068a
8834 __ dci(0x442af636); // sqrdmulh z22.h, z17.h, z2.h[1]
8835 // vl128 state = 0x22135bae
8836 __ dci(0x442af626); // sqrdmulh z6.h, z17.h, z2.h[1]
8837 // vl128 state = 0x1ed137a8
8838 __ dci(0x442af624); // sqrdmulh z4.h, z17.h, z2.h[1]
8839 // vl128 state = 0x37aa44d4
8840 __ dci(0x4420f625); // sqrdmulh z5.h, z17.h, z0.h[0]
8841 // vl128 state = 0x9747863a
8842 __ dci(0x4460f604); // sqrdmulh z4.h, z16.h, z0.h[4]
8843 // vl128 state = 0xf6487f4b
8844 __ dci(0x4460f605); // sqrdmulh z5.h, z16.h, z0.h[4]
8845 // vl128 state = 0xb85302a6
8846 __ dci(0x4420f641); // sqrdmulh z1.h, z18.h, z0.h[0]
8847 // vl128 state = 0xfc85ce98
8848 __ dci(0x4424f669); // sqrdmulh z9.h, z19.h, z4.h[0]
8849 // vl128 state = 0xf0b36dd3
8850 __ dci(0x4460f668); // sqrdmulh z8.h, z19.h, z0.h[4]
8851 // vl128 state = 0x227fe9fe
8852 __ dci(0x4462f6f8); // sqrdmulh z24.h, z23.h, z2.h[4]
8853 // vl128 state = 0x7f4d89ab
8854 __ dci(0x4462f6f0); // sqrdmulh z16.h, z23.h, z2.h[4]
8855 // vl128 state = 0x61520386
8856 __ dci(0x4472f6d1); // sqrdmulh z17.h, z22.h, z2.h[6]
8857 // vl128 state = 0x34d07c81
8858 __ dci(0x4472f250); // sqdmulh z16.h, z18.h, z2.h[6]
8859 // vl128 state = 0x74313b89
8860 __ dci(0x44b2f254); // sqdmulh z20.s, z18.s, z2.s[2]
8861 // vl128 state = 0x7acc9692
8862 __ dci(0x44e2f250); // sqdmulh z16.d, z18.d, z2.d[0]
8863 // vl128 state = 0x3a1f908e
8864 __ dci(0x44e4f251); // sqdmulh z17.d, z18.d, z4.d[0]
8865 // vl128 state = 0xd2ae3642
8866 __ dci(0x44e0f650); // sqrdmulh z16.d, z18.d, z0.d[0]
8867 // vl128 state = 0x74da2dcc
8868 __ dci(0x44f8f640); // sqrdmulh z0.d, z18.d, z8.d[1]
8869 // vl128 state = 0x0273639a
8870 __ dci(0x44f9f742); // sqrdmulh z2.d, z26.d, z9.d[1]
8871 // vl128 state = 0x9c5062c9
8872 __ dci(0x44f9f7e6); // sqrdmulh z6.d, z31.d, z9.d[1]
8873 // vl128 state = 0x095e8fd7
8874 __ dci(0x44fdf7ae); // sqrdmulh z14.d, z29.d, z13.d[1]
8875 // vl128 state = 0x4ab7c261
8876 __ dci(0x44fdf7af); // sqrdmulh z15.d, z29.d, z13.d[1]
8877 // vl128 state = 0x7913f02e
8878 __ dci(0x44f9f7ed); // sqrdmulh z13.d, z31.d, z9.d[1]
8879 // vl128 state = 0xbbffd120
8880 __ dci(0x44f9f7e5); // sqrdmulh z5.d, z31.d, z9.d[1]
8881 // vl128 state = 0xc9cc793f
8882 __ dci(0x44f5f7e4); // sqrdmulh z4.d, z31.d, z5.d[1]
8883 // vl128 state = 0xc7cc2e4b
8884 __ dci(0x44e5f3e0); // sqdmulh z0.d, z31.d, z5.d[0]
8885 // vl128 state = 0x8a4efda7
8886 __ dci(0x44e4f364); // sqdmulh z4.d, z27.d, z4.d[0]
8887 // vl128 state = 0xfa30239a
8888 __ dci(0x44edf366); // sqdmulh z6.d, z27.d, z13.d[0]
8889 // vl128 state = 0x9c538671
8890 __ dci(0x44adf322); // sqdmulh z2.s, z25.s, z5.s[1]
8891 // vl128 state = 0xafb03157
8892 __ dci(0x44adf263); // sqdmulh z3.s, z19.s, z5.s[1]
8893 // vl128 state = 0x6ea1e1ff
8894 __ dci(0x44bdf22b); // sqdmulh z11.s, z17.s, z5.s[3]
8895 // vl128 state = 0x0040a3a0
8896 __ dci(0x44adf62a); // sqrdmulh z10.s, z17.s, z5.s[1]
8897 // vl128 state = 0x8b3e6419
8898 __ dci(0x44adf622); // sqrdmulh z2.s, z17.s, z5.s[1]
8899 // vl128 state = 0x579bf738
8900 __ dci(0x44abf632); // sqrdmulh z18.s, z17.s, z3.s[1]
8901 // vl128 state = 0x2678c680
8902 __ dci(0x44a9f6ba); // sqrdmulh z26.s, z21.s, z1.s[1]
8903 // vl128 state = 0xee25a322
8904 __ dci(0x44a9f6aa); // sqrdmulh z10.s, z21.s, z1.s[1]
8905 // vl128 state = 0x99cfcf9f
8906 __ dci(0x44b1f6ab); // sqrdmulh z11.s, z21.s, z1.s[2]
8907 // vl128 state = 0xa6785a38
8908 __ dci(0x44b1f0bb); // sqdmulh z27.s, z5.s, z1.s[2]
8909 // vl128 state = 0xfc822233
8910 __ dci(0x4439f0bf); // sqdmulh z31.h, z5.h, z1.h[3]
8911 // vl128 state = 0x322d49df
8912 __ dci(0x4433f0be); // sqdmulh z30.h, z5.h, z3.h[2]
8913 // vl128 state = 0xbf6733d2
8914 __ dci(0x4433f0d6); // sqdmulh z22.h, z6.h, z3.h[2]
8915 // vl128 state = 0x99f11483
8916 __ dci(0x4437f2d7); // sqdmulh z23.h, z22.h, z7.h[2]
8917 // vl128 state = 0x9c146ede
8918 __ dci(0x4426f2d6); // sqdmulh z22.h, z22.h, z6.h[0]
8919 // vl128 state = 0xc089284f
8920 __ dci(0x44a6f0de); // sqdmulh z30.s, z6.s, z6.s[0]
8921 // vl128 state = 0xe962a269
8922 __ dci(0x44a4f04e); // sqdmulh z14.s, z2.s, z4.s[0]
8923 // vl128 state = 0xaea2f35e
8924 }
8925
8926 uint32_t state;
8927 ComputeMachineStateHash(&masm, &state);
8928 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
8929 __ Ldr(w0, MemOperand(x0));
8930
8931 END();
8932 if (CAN_RUN()) {
8933 RUN();
8934 uint32_t expected_hashes[] = {
8935 0xaea2f35e,
8936 0xb4e17c50,
8937 0x97dfb966,
8938 0x070d3c78,
8939 0x5b2f880d,
8940 0x8e643be0,
8941 0x4d7f006b,
8942 0xfbd08185,
8943 0x4960a97d,
8944 0x1e85903f,
8945 0x443b62e4,
8946 0xf196453a,
8947 0x50dae6ef,
8948 0x0e4bb245,
8949 0x69d661ab,
8950 0x7d6fb839,
8951 };
8952 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
8953 }
8954 }
8955
TEST_SVE(sve2_extract)8956 TEST_SVE(sve2_extract) {
8957 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
8958 CPUFeatures::kSVE2,
8959 CPUFeatures::kNEON,
8960 CPUFeatures::kCRC32);
8961 START();
8962
8963 SetInitialMachineState(&masm);
8964 // state = 0xe2bd2480
8965
8966 {
8967 ExactAssemblyScope scope(&masm, 60 * kInstructionSize);
8968 __ dci(0x056a1008); // ext z8.b, {z0.b, z1.b}, #84
8969 // vl128 state = 0x06ae6d5d
8970 __ dci(0x05601418); // ext z24.b, {z0.b, z1.b}, #5
8971 // vl128 state = 0x3b73c922
8972 __ dci(0x05601708); // ext z8.b, {z24.b, z25.b}, #5
8973 // vl128 state = 0xc3526a3d
8974 __ dci(0x05601d0c); // ext z12.b, {z8.b, z9.b}, #7
8975 // vl128 state = 0xbde17731
8976 __ dci(0x05600c1c); // ext z28.b, {z0.b, z1.b}, #3
8977 // vl128 state = 0x9ac72141
8978 __ dci(0x05600c58); // ext z24.b, {z2.b, z3.b}, #3
8979 // vl128 state = 0xccecefc0
8980 __ dci(0x05600410); // ext z16.b, {z0.b, z1.b}, #1
8981 // vl128 state = 0xe49d5f89
8982 __ dci(0x05600438); // ext z24.b, {z1.b, z2.b}, #1
8983 // vl128 state = 0x9967df9d
8984 __ dci(0x0560067a); // ext z26.b, {z19.b, z20.b}, #1
8985 // vl128 state = 0x110a8b46
8986 __ dci(0x05601478); // ext z24.b, {z3.b, z4.b}, #5
8987 // vl128 state = 0x558f95f2
8988 __ dci(0x0560117c); // ext z28.b, {z11.b, z12.b}, #4
8989 // vl128 state = 0x18d0f048
8990 __ dci(0x0560157e); // ext z30.b, {z11.b, z12.b}, #5
8991 // vl128 state = 0x1719547f
8992 __ dci(0x05601c7a); // ext z26.b, {z3.b, z4.b}, #7
8993 // vl128 state = 0x600cfa8a
8994 __ dci(0x0560187e); // ext z30.b, {z3.b, z4.b}, #6
8995 // vl128 state = 0xc93e431e
8996 __ dci(0x05601876); // ext z22.b, {z3.b, z4.b}, #6
8997 // vl128 state = 0x5be7af00
8998 __ dci(0x05601c26); // ext z6.b, {z1.b, z2.b}, #7
8999 // vl128 state = 0xd3d69d02
9000 __ dci(0x05601c2e); // ext z14.b, {z1.b, z2.b}, #7
9001 // vl128 state = 0x1d88c27b
9002 __ dci(0x05601d3e); // ext z30.b, {z9.b, z10.b}, #7
9003 // vl128 state = 0x56f91523
9004 __ dci(0x05601dae); // ext z14.b, {z13.b, z14.b}, #7
9005 // vl128 state = 0xbc175582
9006 __ dci(0x056015ef); // ext z15.b, {z15.b, z16.b}, #5
9007 // vl128 state = 0x9289a9ba
9008 __ dci(0x0560157f); // ext z31.b, {z11.b, z12.b}, #5
9009 // vl128 state = 0x46be3725
9010 __ dci(0x0560157e); // ext z30.b, {z11.b, z12.b}, #5
9011 // vl128 state = 0xa4fd59e9
9012 __ dci(0x0560156e); // ext z14.b, {z11.b, z12.b}, #5
9013 // vl128 state = 0x88b9ba85
9014 __ dci(0x05601566); // ext z6.b, {z11.b, z12.b}, #5
9015 // vl128 state = 0x7f3b2a36
9016 __ dci(0x056017e4); // ext z4.b, {z31.b, z0.b}, #5
9017 // vl128 state = 0xa71b8fa9
9018 __ dci(0x05601f74); // ext z20.b, {z27.b, z28.b}, #7
9019 // vl128 state = 0x89dcdeac
9020 __ dci(0x05601f44); // ext z4.b, {z26.b, z27.b}, #7
9021 // vl128 state = 0xa877313f
9022 __ dci(0x05601e45); // ext z5.b, {z18.b, z19.b}, #7
9023 // vl128 state = 0x6181834a
9024 __ dci(0x05601255); // ext z21.b, {z18.b, z19.b}, #4
9025 // vl128 state = 0x7c3595cd
9026 __ dci(0x05701a51); // ext z17.b, {z18.b, z19.b}, #134
9027 // vl128 state = 0x10fdfe4d
9028 __ dci(0x05701ad3); // ext z19.b, {z22.b, z23.b}, #134
9029 // vl128 state = 0x08e923c5
9030 __ dci(0x05701ad1); // ext z17.b, {z22.b, z23.b}, #134
9031 // vl128 state = 0xefb2c9e9
9032 __ dci(0x05701b41); // ext z1.b, {z26.b, z27.b}, #134
9033 // vl128 state = 0xd5dccda9
9034 __ dci(0x05701b40); // ext z0.b, {z26.b, z27.b}, #134
9035 // vl128 state = 0xd424c039
9036 __ dci(0x05701bd0); // ext z16.b, {z30.b, z31.b}, #134
9037 // vl128 state = 0xd914c077
9038 __ dci(0x057013d8); // ext z24.b, {z30.b, z31.b}, #132
9039 // vl128 state = 0x32459b3a
9040 __ dci(0x05701259); // ext z25.b, {z18.b, z19.b}, #132
9041 // vl128 state = 0x422ed7bf
9042 __ dci(0x0570125d); // ext z29.b, {z18.b, z19.b}, #132
9043 // vl128 state = 0x6bfc46ef
9044 __ dci(0x05700215); // ext z21.b, {z16.b, z17.b}, #128
9045 // vl128 state = 0xc53b85ed
9046 __ dci(0x0560021d); // ext z29.b, {z16.b, z17.b}, #0
9047 // vl128 state = 0xd391e5ec
9048 __ dci(0x0570121c); // ext z28.b, {z16.b, z17.b}, #132
9049 // vl128 state = 0x7990c1d7
9050 __ dci(0x0570030c); // ext z12.b, {z24.b, z25.b}, #128
9051 // vl128 state = 0xca0d3db8
9052 __ dci(0x05700b88); // ext z8.b, {z28.b, z29.b}, #130
9053 // vl128 state = 0xe5c71442
9054 __ dci(0x05600b0c); // ext z12.b, {z24.b, z25.b}, #2
9055 // vl128 state = 0x68510d62
9056 __ dci(0x05600f1c); // ext z28.b, {z24.b, z25.b}, #3
9057 // vl128 state = 0x77f9f046
9058 __ dci(0x05600e14); // ext z20.b, {z16.b, z17.b}, #3
9059 // vl128 state = 0x7068dedf
9060 __ dci(0x05600604); // ext z4.b, {z16.b, z17.b}, #1
9061 // vl128 state = 0x8b70c406
9062 __ dci(0x05600406); // ext z6.b, {z0.b, z1.b}, #1
9063 // vl128 state = 0x10e6b48c
9064 __ dci(0x05600056); // ext z22.b, {z2.b, z3.b}, #0
9065 // vl128 state = 0xe1294d7a
9066 __ dci(0x05600052); // ext z18.b, {z2.b, z3.b}, #0
9067 // vl128 state = 0x0762bbb0
9068 __ dci(0x056000d6); // ext z22.b, {z6.b, z7.b}, #0
9069 // vl128 state = 0x58be0ba4
9070 __ dci(0x057008de); // ext z30.b, {z6.b, z7.b}, #130
9071 // vl128 state = 0x8a2018e9
9072 __ dci(0x0570085a); // ext z26.b, {z2.b, z3.b}, #130
9073 // vl128 state = 0xb019b7e0
9074 __ dci(0x057009d2); // ext z18.b, {z14.b, z15.b}, #130
9075 // vl128 state = 0x9e6e14ed
9076 __ dci(0x057008fa); // ext z26.b, {z7.b, z8.b}, #130
9077 // vl128 state = 0x4cf64d22
9078 __ dci(0x057008f2); // ext z18.b, {z7.b, z8.b}, #130
9079 // vl128 state = 0x048c30f9
9080 __ dci(0x057002f3); // ext z19.b, {z23.b, z24.b}, #128
9081 // vl128 state = 0x2d7eb43b
9082 __ dci(0x057006a3); // ext z3.b, {z21.b, z22.b}, #129
9083 // vl128 state = 0xa37aeb5e
9084 __ dci(0x05700687); // ext z7.b, {z20.b, z21.b}, #129
9085 // vl128 state = 0xd8d7cdc7
9086 __ dci(0x056006b7); // ext z23.b, {z21.b, z22.b}, #1
9087 // vl128 state = 0x2480e1d4
9088 }
9089
9090 uint32_t state;
9091 ComputeMachineStateHash(&masm, &state);
9092 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
9093 __ Ldr(w0, MemOperand(x0));
9094
9095 END();
9096 if (CAN_RUN()) {
9097 RUN();
9098 uint32_t expected_hashes[] = {
9099 0x2480e1d4,
9100 0x4dc42cc5,
9101 0x7ac24121,
9102 0x9eaf5c98,
9103 0x1b7b35dc,
9104 0x1b1035fc,
9105 0xe15f6899,
9106 0xaad14717,
9107 0x3327c3fc,
9108 0x7f349408,
9109 0x2d865b00,
9110 0x9819cd29,
9111 0x7f64cace,
9112 0x3751e2c1,
9113 0x7e60fc24,
9114 0xc6b308fc,
9115 };
9116 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
9117 }
9118 }
9119
9120 } // namespace aarch64
9121 } // namespace vixl
9122