1// Copyright 2020, VIXL authors 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are met: 6// 7// * Redistributions of source code must retain the above copyright notice, 8// this list of conditions and the following disclaimer. 9// * Redistributions in binary form must reproduce the above copyright notice, 10// this list of conditions and the following disclaimer in the documentation 11// and/or other materials provided with the distribution. 12// * Neither the name of ARM Limited nor the names of its contributors may be 13// used to endorse or promote products derived from this software without 14// specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27#include <cfloat> 28#include <cmath> 29#include <cstdio> 30#include <cstdlib> 31#include <cstring> 32#include <functional> 33#include <sys/mman.h> 34#include <unistd.h> 35 36#include "test-runner.h" 37#include "test-utils.h" 38 39#include "aarch64/cpu-aarch64.h" 40#include "aarch64/disasm-aarch64.h" 41#include "aarch64/macro-assembler-aarch64.h" 42#include "aarch64/simulator-aarch64.h" 43#include "aarch64/test-utils-aarch64.h" 44#include "test-assembler-aarch64.h" 45 46#define TEST_SVE(name) TEST_SVE_INNER("SIM", name) 47 48namespace vixl { 49namespace aarch64 { 50 51TEST_SVE(sve2_halving_arithmetic) { 52 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 53 CPUFeatures::kSVE2, 54 CPUFeatures::kNEON, 55 CPUFeatures::kCRC32); 56 START(); 57 58 SetInitialMachineState(&masm); 59 // state = 0xe2bd2480 60 61 { 62 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 63 __ dci(0x441182b2); // uhadd z18.b, p0/m, z18.b, z21.b 64 // vl128 state = 0x8ac2942a 65 __ dci(0x441382f3); // uhsub z19.b, p0/m, z19.b, z23.b 66 // vl128 state = 0x0e0db643 67 __ dci(0x449383fb); // uhsub z27.s, p0/m, z27.s, z31.s 68 // vl128 state = 0x6a97fc8c 69 __ dci(0x441283fa); // shsub z26.b, p0/m, z26.b, z31.b 70 // vl128 state = 0x48a5fd5f 71 __ dci(0x44928372); // shsub z18.s, p0/m, z18.s, z27.s 72 // vl128 state = 0x7c670d36 73 __ dci(0x44d2827a); // shsub z26.d, p0/m, z26.d, z19.d 74 // vl128 state = 0x3a15c66f 75 __ dci(0x4492823b); // shsub z27.s, p0/m, z27.s, z17.s 76 // vl128 state = 0xe407c826 77 __ dci(0x44978239); // uhsubr z25.s, p0/m, z25.s, z17.s 78 // vl128 state = 0xf7157dae 79 __ dci(0x4493827d); // uhsub z29.s, p0/m, z29.s, z19.s 80 // vl128 state = 0xcebff22f 81 __ dci(0x449782f9); // uhsubr z25.s, p0/m, z25.s, z23.s 82 // vl128 state = 0xbe691139 83 __ dci(0x44978231); // uhsubr z17.s, p0/m, z17.s, z17.s 84 // vl128 state = 0x59b2af72 85 __ dci(0x44578233); // uhsubr z19.h, p0/m, z19.h, z17.h 86 // vl128 state = 0xd7fad727 87 __ dci(0x44578312); // uhsubr z18.h, p0/m, z18.h, z24.h 88 // vl128 state = 0x87b5d00a 89 __ dci(0x44578610); // uhsubr z16.h, p1/m, z16.h, z16.h 90 // vl128 state = 0xbaae097d 91 __ dci(0x44578618); // uhsubr z24.h, p1/m, z24.h, z16.h 92 // vl128 state = 0x3887509e 93 __ dci(0x44168608); // shsubr z8.b, p1/m, z8.b, z16.b 94 // vl128 state = 0xc16dc63b 95 __ dci(0x44128700); // shsub z0.b, p1/m, z0.b, z24.b 96 // vl128 state = 0x3eddcd6d 97 __ dci(0x44528f02); // shsub z2.h, p3/m, z2.h, z24.h 98 // vl128 state = 0x2e7ffa0d 99 __ dci(0x44538f40); // uhsub z0.h, p3/m, z0.h, z26.h 100 // vl128 state = 0x1f68bee5 101 __ dci(0x44538342); // uhsub z2.h, p0/m, z2.h, z26.h 102 // vl128 state = 0x2a368049 103 __ dci(0x44538040); // uhsub z0.h, p0/m, z0.h, z2.h 104 // vl128 state = 0x0537f844 105 __ dci(0x44568044); // shsubr z4.h, p0/m, z4.h, z2.h 106 // vl128 state = 0x0dfac1b2 107 __ dci(0x445688cc); // shsubr z12.h, p2/m, z12.h, z6.h 108 // vl128 state = 0xbefa909b 109 __ dci(0x44d288dc); // shsub z28.d, p2/m, z28.d, z6.d 110 // vl128 state = 0xbadc14bb 111 __ dci(0x44d288d8); // shsub z24.d, p2/m, z24.d, z6.d 112 // vl128 state = 0x518130c0 113 __ dci(0x44d088f0); // shadd z16.d, p2/m, z16.d, z7.d 114 // vl128 state = 0xb01856bd 115 __ dci(0x44d08cd2); // shadd z18.d, p3/m, z18.d, z6.d 116 // vl128 state = 0xbbcfeaa2 117 __ dci(0x44d484d0); // srhadd z16.d, p1/m, z16.d, z6.d 118 // vl128 state = 0xefe1d416 119 __ dci(0x44d496d1); // srhadd z17.d, p5/m, z17.d, z22.d 120 // vl128 state = 0xceb574b8 121 __ dci(0x44d196d5); // uhadd z21.d, p5/m, z21.d, z22.d 122 // vl128 state = 0x46cdd268 123 __ dci(0x44d496dd); // srhadd z29.d, p5/m, z29.d, z22.d 124 // vl128 state = 0x21a81b6a 125 __ dci(0x4494969c); // srhadd z28.s, p5/m, z28.s, z20.s 126 // vl128 state = 0x2316cb04 127 __ dci(0x4494968c); // srhadd z12.s, p5/m, z12.s, z20.s 128 // vl128 state = 0x6248cc0a 129 __ dci(0x4415968d); // urhadd z13.b, p5/m, z13.b, z20.b 130 // vl128 state = 0x6edd11e0 131 __ dci(0x44119e8c); // uhadd z12.b, p7/m, z12.b, z20.b 132 // vl128 state = 0x81841eb6 133 __ dci(0x4491968d); // uhadd z13.s, p5/m, z13.s, z20.s 134 // vl128 state = 0x02b8b893 135 __ dci(0x44118685); // uhadd z5.b, p1/m, z5.b, z20.b 136 // vl128 state = 0x707db891 137 __ dci(0x44138e8d); // uhsub z13.b, p3/m, z13.b, z20.b 138 // vl128 state = 0x2caa64dd 139 __ dci(0x44139e0c); // uhsub z12.b, p7/m, z12.b, z16.b 140 // vl128 state = 0xe34695ef 141 __ dci(0x44128e0d); // shsub z13.b, p3/m, z13.b, z16.b 142 // vl128 state = 0x477197dd 143 __ dci(0x44129a1d); // shsub z29.b, p6/m, z29.b, z16.b 144 // vl128 state = 0x19cebaa2 145 __ dci(0x44129a19); // shsub z25.b, p6/m, z25.b, z16.b 146 // vl128 state = 0x0d62dca4 147 __ dci(0x44129249); // shsub z9.b, p4/m, z9.b, z18.b 148 // vl128 state = 0x327e81e3 149 __ dci(0x44129248); // shsub z8.b, p4/m, z8.b, z18.b 150 // vl128 state = 0x28ec9bf8 151 __ dci(0x44169269); // shsubr z9.b, p4/m, z9.b, z19.b 152 // vl128 state = 0x652ca8c9 153 __ dci(0x44168661); // shsubr z1.b, p1/m, z1.b, z19.b 154 // vl128 state = 0x46fcb15a 155 __ dci(0x44168420); // shsubr z0.b, p1/m, z0.b, z1.b 156 // vl128 state = 0x7151e02b 157 __ dci(0x44168428); // shsubr z8.b, p1/m, z8.b, z1.b 158 // vl128 state = 0x4c8921f6 159 __ dci(0x44148409); // srhadd z9.b, p1/m, z9.b, z0.b 160 // vl128 state = 0xd0d2fc1c 161 __ dci(0x44148641); // srhadd z1.b, p1/m, z1.b, z18.b 162 // vl128 state = 0xc821f381 163 } 164 165 uint32_t state; 166 ComputeMachineStateHash(&masm, &state); 167 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 168 __ Ldr(w0, MemOperand(x0)); 169 170 END(); 171 if (CAN_RUN()) { 172 RUN(); 173 uint32_t expected_hashes[] = { 174 0xc821f381, 175 0xc0ad3b7c, 176 0x4eb4ba1b, 177 0xdc8e061a, 178 0x64675a15, 179 0x923703bf, 180 0x6944c0db, 181 0x7ac89bae, 182 0x8fa4c45f, 183 0xf64c8b4c, 184 0x8ba751b7, 185 0x2fe8832e, 186 0xc6b8000d, 187 0x864ba0ff, 188 0xded22c04, 189 0x213cf65e, 190 }; 191 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 192 } 193} 194 195TEST_SVE(sve2_sli_sri) { 196 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 197 CPUFeatures::kSVE2, 198 CPUFeatures::kNEON, 199 CPUFeatures::kCRC32); 200 START(); 201 202 SetInitialMachineState(&masm); 203 // state = 0xe2bd2480 204 205 { 206 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 207 __ dci(0x4509f07f); // sri z31.b, z3.b, #7 208 // vl128 state = 0x509a7a2d 209 __ dci(0x454bf07e); // sri z30.s, z3.s, #21 210 // vl128 state = 0xc973a4e8 211 __ dci(0x450bf17a); // sri z26.b, z11.b, #5 212 // vl128 state = 0xa9dcbcf5 213 __ dci(0x450ef17b); // sri z27.b, z11.b, #2 214 // vl128 state = 0xd56761c1 215 __ dci(0x458ef1f9); // sri z25.d, z15.d, #50 216 // vl128 state = 0xdd84a538 217 __ dci(0x459ff1fb); // sri z27.d, z15.d, #33 218 // vl128 state = 0x4e2dbf4a 219 __ dci(0x459ff5df); // sli z31.d, z14.d, #31 220 // vl128 state = 0x46d9563e 221 __ dci(0x45d7f5cf); // sli z15.d, z14.d, #55 222 // vl128 state = 0xf4fcf912 223 __ dci(0x4593f5ce); // sli z14.d, z14.d, #19 224 // vl128 state = 0xcef34d18 225 __ dci(0x4593f1fe); // sri z30.d, z15.d, #45 226 // vl128 state = 0x69509e94 227 __ dci(0x4581f1ff); // sri z31.d, z15.d, #63 228 // vl128 state = 0x09cd0cf7 229 __ dci(0x45c1f1bd); // sri z29.d, z13.d, #31 230 // vl128 state = 0xfc095f8b 231 __ dci(0x45c1f03c); // sri z28.d, z1.d, #31 232 // vl128 state = 0x0ca836f0 233 __ dci(0x45c1f4b4); // sli z20.d, z5.d, #33 234 // vl128 state = 0x678be6b3 235 __ dci(0x45c1f5f0); // sli z16.d, z15.d, #33 236 // vl128 state = 0x7a743b56 237 __ dci(0x45c7f5f2); // sli z18.d, z15.d, #39 238 // vl128 state = 0x0bbc4117 239 __ dci(0x45c7f5e2); // sli z2.d, z15.d, #39 240 // vl128 state = 0x13e1a7ae 241 __ dci(0x45c7f1a0); // sri z0.d, z13.d, #25 242 // vl128 state = 0x8014a497 243 __ dci(0x4597f1b0); // sri z16.d, z13.d, #41 244 // vl128 state = 0x5f7994a8 245 __ dci(0x4593f5b1); // sli z17.d, z13.d, #19 246 // vl128 state = 0x125f37b5 247 __ dci(0x4591f5f0); // sli z16.d, z15.d, #17 248 // vl128 state = 0x26f1fdf2 249 __ dci(0x4581f5d2); // sli z18.d, z14.d, #1 250 // vl128 state = 0x5b0baccc 251 __ dci(0x4541f5d6); // sli z22.s, z14.s, #1 252 // vl128 state = 0x74f04ecb 253 __ dci(0x4551f1d4); // sri z20.s, z14.s, #15 254 // vl128 state = 0xc43d0586 255 __ dci(0x4553f150); // sri z16.s, z10.s, #13 256 // vl128 state = 0xce8c688a 257 __ dci(0x4557f171); // sri z17.s, z11.s, #9 258 // vl128 state = 0x03a5b3b0 259 __ dci(0x4513f175); // sri z21.h, z11.h, #13 260 // vl128 state = 0x392ab48e 261 __ dci(0x4551f177); // sri z23.s, z11.s, #15 262 // vl128 state = 0xa886dbc8 263 __ dci(0x4551f17f); // sri z31.s, z11.s, #15 264 // vl128 state = 0x37c804bc 265 __ dci(0x4551f16f); // sri z15.s, z11.s, #15 266 // vl128 state = 0x17e99d67 267 __ dci(0x4550f067); // sri z7.s, z3.s, #16 268 // vl128 state = 0xb0bd981a 269 __ dci(0x4550f077); // sri z23.s, z3.s, #16 270 // vl128 state = 0x5f643b3e 271 __ dci(0x4551f0f5); // sri z21.s, z7.s, #15 272 // vl128 state = 0xa0b83a32 273 __ dci(0x4551f09d); // sri z29.s, z4.s, #15 274 // vl128 state = 0x890807a1 275 __ dci(0x4552f08d); // sri z13.s, z4.s, #14 276 // vl128 state = 0x81cb8fa4 277 __ dci(0x4512f01d); // sri z29.h, z0.h, #14 278 // vl128 state = 0x62751a54 279 __ dci(0x4552f419); // sli z25.s, z0.s, #18 280 // vl128 state = 0xfd7c0337 281 __ dci(0x4542f49b); // sli z27.s, z4.s, #2 282 // vl128 state = 0x0089e534 283 __ dci(0x454af09a); // sri z26.s, z4.s, #22 284 // vl128 state = 0xea87d159 285 __ dci(0x45caf0d8); // sri z24.d, z6.d, #22 286 // vl128 state = 0x3c44b845 287 __ dci(0x45c2f2dc); // sri z28.d, z22.d, #30 288 // vl128 state = 0x9b8c17a7 289 __ dci(0x45caf25d); // sri z29.d, z18.d, #22 290 // vl128 state = 0x3e2c1797 291 __ dci(0x45caf0dc); // sri z28.d, z6.d, #22 292 // vl128 state = 0xbf933754 293 __ dci(0x458af1cc); // sri z12.d, z14.d, #54 294 // vl128 state = 0x93e91a23 295 __ dci(0x4586f1cd); // sri z13.d, z14.d, #58 296 // vl128 state = 0x0f7c6faa 297 __ dci(0x458ef0cc); // sri z12.d, z6.d, #50 298 // vl128 state = 0x1d771f71 299 __ dci(0x458ef00d); // sri z13.d, z0.d, #50 300 // vl128 state = 0x29a23da7 301 __ dci(0x450ef05d); // sri z29.b, z2.b, #2 302 // vl128 state = 0x74fd2038 303 __ dci(0x450cf00d); // sri z13.b, z0.b, #4 304 // vl128 state = 0x075bc166 305 __ dci(0x450cf00c); // sri z12.b, z0.b, #4 306 // vl128 state = 0xfd3d290f 307 } 308 309 uint32_t state; 310 ComputeMachineStateHash(&masm, &state); 311 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 312 __ Ldr(w0, MemOperand(x0)); 313 314 END(); 315 if (CAN_RUN()) { 316 RUN(); 317 uint32_t expected_hashes[] = { 318 0xfd3d290f, 319 0x8dd0bdab, 320 0xa25ba843, 321 0x484543ed, 322 0x22df2f4f, 323 0xb62769dc, 324 0x795e30f7, 325 0xe49948e7, 326 0xd4ceb676, 327 0xbf2d359a, 328 0xcf4331a9, 329 0x8cce4eef, 330 0x4fbaec97, 331 0x4fec4d88, 332 0x3efc521d, 333 0xffef31d1, 334 }; 335 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 336 } 337} 338 339TEST_SVE(sve2_srshr_urshr) { 340 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 341 CPUFeatures::kSVE2, 342 CPUFeatures::kNEON, 343 CPUFeatures::kCRC32); 344 START(); 345 346 SetInitialMachineState(&masm); 347 // state = 0xe2bd2480 348 349 { 350 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 351 __ dci(0x04cc9074); // srshr z20.d, p4/m, z20.d, #29 352 // vl128 state = 0xecefbcaa 353 __ dci(0x04cc9236); // srshr z22.d, p4/m, z22.d, #15 354 // vl128 state = 0x7eef75c3 355 __ dci(0x04cd927e); // urshr z30.d, p4/m, z30.d, #13 356 // vl128 state = 0xf5ab0a43 357 __ dci(0x04cd9e76); // urshr z22.d, p7/m, z22.d, #13 358 // vl128 state = 0x67a9d15a 359 __ dci(0x04cd9a57); // urshr z23.d, p6/m, z23.d, #14 360 // vl128 state = 0xf1591f3f 361 __ dci(0x044d9247); // urshr z7.s, p4/m, z7.s, #14 362 // vl128 state = 0xcb770d03 363 __ dci(0x044d9245); // urshr z5.s, p4/m, z5.s, #14 364 // vl128 state = 0x7a225c92 365 __ dci(0x044d9241); // urshr z1.s, p4/m, z1.s, #14 366 // vl128 state = 0x31e4f59a 367 __ dci(0x044d8200); // urshr z0.s, p0/m, z0.s, #16 368 // vl128 state = 0x7c0c67fa 369 __ dci(0x044d8330); // urshr z16.s, p0/m, z16.s, #7 370 // vl128 state = 0x2aaa996d 371 __ dci(0x044d8340); // urshr z0.s, p0/m, z0.s, #6 372 // vl128 state = 0x1999a541 373 __ dci(0x044d8104); // urshr z4.s, p0/m, z4.s, #24 374 // vl128 state = 0xbebc22f3 375 __ dci(0x044d8526); // urshr z6.s, p1/m, z6.s, #23 376 // vl128 state = 0x5e9c818d 377 __ dci(0x04cd8502); // urshr z2.d, p1/m, z2.d, #24 378 // vl128 state = 0x9cd88e00 379 __ dci(0x048d9506); // urshr z6.d, p5/m, z6.d, #56 380 // vl128 state = 0xff60a16e 381 __ dci(0x048d9504); // urshr z4.d, p5/m, z4.d, #56 382 // vl128 state = 0xfae64bf4 383 __ dci(0x048d8705); // urshr z5.d, p1/m, z5.d, #40 384 // vl128 state = 0xbd7bc8bb 385 __ dci(0x048d9307); // urshr z7.d, p4/m, z7.d, #40 386 // vl128 state = 0x22e58729 387 __ dci(0x048c9323); // srshr z3.d, p4/m, z3.d, #39 388 // vl128 state = 0x1a2b90d1 389 __ dci(0x048c8721); // srshr z1.d, p1/m, z1.d, #39 390 // vl128 state = 0xf31798ea 391 __ dci(0x04cc8f20); // srshr z0.d, p3/m, z0.d, #7 392 // vl128 state = 0x3a159e41 393 __ dci(0x04cc87b0); // srshr z16.d, p1/m, z16.d, #3 394 // vl128 state = 0x461819c6 395 __ dci(0x04cc8778); // srshr z24.d, p1/m, z24.d, #5 396 // vl128 state = 0x52c8c945 397 __ dci(0x048c8730); // srshr z16.d, p1/m, z16.d, #39 398 // vl128 state = 0xa6724c16 399 __ dci(0x040c8534); // srshr z20.b, p1/m, z20.b, #7 400 // vl128 state = 0xfeae5ea1 401 __ dci(0x040c957c); // srshr z28.b, p5/m, z28.b, #5 402 // vl128 state = 0xe55cac9f 403 __ dci(0x048c9554); // srshr z20.d, p5/m, z20.d, #54 404 // vl128 state = 0x41ccbe50 405 __ dci(0x048c8156); // srshr z22.d, p0/m, z22.d, #54 406 // vl128 state = 0xfef5c71e 407 __ dci(0x040c8957); // srshr z23.b, p2/m, z23.b, #6 408 // vl128 state = 0xac8cf177 409 __ dci(0x040c8bd5); // srshr z21.h, p2/m, z21.h, #2 410 // vl128 state = 0xfe7005fe 411 __ dci(0x040c8354); // srshr z20.h, p0/m, z20.h, #6 412 // vl128 state = 0x1daa6598 413 __ dci(0x040c931c); // srshr z28.h, p4/m, z28.h, #8 414 // vl128 state = 0x8c7f2675 415 __ dci(0x040c9798); // srshr z24.h, p5/m, z24.h, #4 416 // vl128 state = 0x2349e927 417 __ dci(0x044c97ba); // srshr z26.s, p5/m, z26.s, #3 418 // vl128 state = 0xf3670053 419 __ dci(0x040c9faa); // srshr z10.h, p7/m, z10.h, #3 420 // vl128 state = 0x61333578 421 __ dci(0x044d9fae); // urshr z14.s, p7/m, z14.s, #3 422 // vl128 state = 0xdb1232a3 423 __ dci(0x044d8f8f); // urshr z15.s, p3/m, z15.s, #4 424 // vl128 state = 0xb1b4bda1 425 __ dci(0x044d8f87); // urshr z7.s, p3/m, z7.s, #4 426 // vl128 state = 0xba636ab8 427 __ dci(0x044d9d97); // urshr z23.s, p7/m, z23.s, #20 428 // vl128 state = 0x8ab01b49 429 __ dci(0x040d9593); // urshr z19.b, p5/m, z19.b, #4 430 // vl128 state = 0x20ee49b4 431 __ dci(0x040d959b); // urshr z27.b, p5/m, z27.b, #4 432 // vl128 state = 0xe34dcf2e 433 __ dci(0x044c959a); // srshr z26.s, p5/m, z26.s, #20 434 // vl128 state = 0x65bafb28 435 __ dci(0x044d9492); // urshr z18.s, p5/m, z18.s, #28 436 // vl128 state = 0xcbed1382 437 __ dci(0x044c8493); // srshr z19.s, p1/m, z19.s, #28 438 // vl128 state = 0xa54fb84c 439 __ dci(0x044c8cc3); // srshr z3.s, p3/m, z3.s, #26 440 // vl128 state = 0x257267ee 441 __ dci(0x044c8c0b); // srshr z11.s, p3/m, z11.s, #32 442 // vl128 state = 0xd494a3e8 443 __ dci(0x044c8c6f); // srshr z15.s, p3/m, z15.s, #29 444 // vl128 state = 0x63621477 445 __ dci(0x044c9c2e); // srshr z14.s, p7/m, z14.s, #31 446 // vl128 state = 0x4cb2e888 447 __ dci(0x04cc943e); // srshr z30.d, p5/m, z30.d, #31 448 // vl128 state = 0x8e580ba2 449 __ dci(0x04cd953f); // urshr z31.d, p5/m, z31.d, #23 450 // vl128 state = 0x7678cc05 451 } 452 453 uint32_t state; 454 ComputeMachineStateHash(&masm, &state); 455 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 456 __ Ldr(w0, MemOperand(x0)); 457 458 END(); 459 if (CAN_RUN()) { 460 RUN(); 461 uint32_t expected_hashes[] = { 462 0x7678cc05, 463 0x37f2893a, 464 0xce2a105d, 465 0x5a03f5a3, 466 0x81444dfc, 467 0x5581c0c1, 468 0xfee622cc, 469 0x0f6796a5, 470 0xf151a5fd, 471 0x13e9be9c, 472 0x9685f8b5, 473 0xa6827285, 474 0x7ad6d004, 475 0xba7989ae, 476 0x96fe2826, 477 0xd1ddc17e, 478 }; 479 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 480 } 481} 482 483TEST_SVE(sve2_sqshl_uqshl) { 484 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 485 CPUFeatures::kSVE2, 486 CPUFeatures::kNEON, 487 CPUFeatures::kCRC32); 488 START(); 489 490 SetInitialMachineState(&masm); 491 // state = 0xe2bd2480 492 493 { 494 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 495 __ dci(0x044f86aa); // sqshlu z10.s, p1/m, z10.s, #21 496 // vl128 state = 0x37777991 497 __ dci(0x044f8482); // sqshlu z2.s, p1/m, z2.s, #4 498 // vl128 state = 0x8119dd5a 499 __ dci(0x048f8480); // sqshlu z0.d, p1/m, z0.d, #4 500 // vl128 state = 0x8966cd23 501 __ dci(0x04cf8c82); // sqshlu z2.d, p3/m, z2.d, #36 502 // vl128 state = 0x71b53135 503 __ dci(0x044f8892); // sqshlu z18.s, p2/m, z18.s, #4 504 // vl128 state = 0x44e0e9a7 505 __ dci(0x04cf8996); // sqshlu z22.d, p2/m, z22.d, #44 506 // vl128 state = 0x4e4b77b9 507 __ dci(0x04cf9194); // sqshlu z20.d, p4/m, z20.d, #44 508 // vl128 state = 0x66d72728 509 __ dci(0x04cf9b9c); // sqshlu z28.d, p6/m, z28.d, #60 510 // vl128 state = 0xa80f62ce 511 __ dci(0x04c79f8c); // uqshl z12.d, p7/m, z12.d, #60 512 // vl128 state = 0x87a3a8c0 513 __ dci(0x04469f88); // sqshl z8.s, p7/m, z8.s, #28 514 // vl128 state = 0x3db302cb 515 __ dci(0x04469f8a); // sqshl z10.s, p7/m, z10.s, #28 516 // vl128 state = 0x2d66bbb2 517 __ dci(0x04469a8e); // sqshl z14.s, p6/m, z14.s, #20 518 // vl128 state = 0x39524732 519 __ dci(0x04c69a1e); // sqshl z30.d, p6/m, z30.d, #48 520 // vl128 state = 0x39d71433 521 __ dci(0x04c68a9a); // sqshl z26.d, p2/m, z26.d, #52 522 // vl128 state = 0x58771cfb 523 __ dci(0x04469a8a); // sqshl z10.s, p6/m, z10.s, #20 524 // vl128 state = 0xa773fcc9 525 __ dci(0x04c68a88); // sqshl z8.d, p2/m, z8.d, #52 526 // vl128 state = 0x9dce801c 527 __ dci(0x04469a89); // sqshl z9.s, p6/m, z9.s, #20 528 // vl128 state = 0x4141302f 529 __ dci(0x04479b81); // uqshl z1.s, p6/m, z1.s, #28 530 // vl128 state = 0x369084f9 531 __ dci(0x044f9f91); // sqshlu z17.s, p7/m, z17.s, #28 532 // vl128 state = 0x1570bb90 533 __ dci(0x04479e90); // uqshl z16.s, p7/m, z16.s, #20 534 // vl128 state = 0x27765662 535 __ dci(0x044f9f94); // sqshlu z20.s, p7/m, z20.s, #28 536 // vl128 state = 0xe99bcbb9 537 __ dci(0x04479795); // uqshl z21.s, p5/m, z21.s, #28 538 // vl128 state = 0xb36c3b9f 539 __ dci(0x04479754); // uqshl z20.s, p5/m, z20.s, #26 540 // vl128 state = 0x435e0256 541 __ dci(0x04479750); // uqshl z16.s, p5/m, z16.s, #26 542 // vl128 state = 0x485471e9 543 __ dci(0x04479740); // uqshl z0.s, p5/m, z0.s, #26 544 // vl128 state = 0x170e10cb 545 __ dci(0x04079544); // uqshl z4.b, p5/m, z4.b, #2 546 // vl128 state = 0x026fe32a 547 __ dci(0x04c79546); // uqshl z6.d, p5/m, z6.d, #42 548 // vl128 state = 0x9a92b063 549 __ dci(0x04c78504); // uqshl z4.d, p1/m, z4.d, #40 550 // vl128 state = 0x4e9a105e 551 __ dci(0x04879500); // uqshl z0.d, p5/m, z0.d, #8 552 // vl128 state = 0x958b4d28 553 __ dci(0x04879908); // uqshl z8.d, p6/m, z8.d, #8 554 // vl128 state = 0x420ff82d 555 __ dci(0x04879318); // uqshl z24.d, p4/m, z24.d, #24 556 // vl128 state = 0x88002097 557 __ dci(0x0487931a); // uqshl z26.d, p4/m, z26.d, #24 558 // vl128 state = 0x3047401c 559 __ dci(0x0486938a); // sqshl z10.d, p4/m, z10.d, #28 560 // vl128 state = 0x5b2b7938 561 __ dci(0x04069188); // sqshl z8.b, p4/m, z8.b, #4 562 // vl128 state = 0xb92dd260 563 __ dci(0x04469389); // sqshl z9.s, p4/m, z9.s, #28 564 // vl128 state = 0xdc6370c3 565 __ dci(0x0447918b); // uqshl z11.s, p4/m, z11.s, #12 566 // vl128 state = 0x5e6198f0 567 __ dci(0x0447913b); // uqshl z27.s, p4/m, z27.s, #9 568 // vl128 state = 0x935ed2a3 569 __ dci(0x0447915f); // uqshl z31.s, p4/m, z31.s, #10 570 // vl128 state = 0x76271654 571 __ dci(0x0406915d); // sqshl z29.b, p4/m, z29.b, #2 572 // vl128 state = 0x46a71ae3 573 __ dci(0x0486911f); // sqshl z31.d, p4/m, z31.d, #8 574 // vl128 state = 0x2c7320a6 575 __ dci(0x0486911d); // sqshl z29.d, p4/m, z29.d, #8 576 // vl128 state = 0x4aa0022d 577 __ dci(0x04869b1f); // sqshl z31.d, p6/m, z31.d, #24 578 // vl128 state = 0x2de081d7 579 __ dci(0x04069317); // sqshl z23.h, p4/m, z23.h, #8 580 // vl128 state = 0x879c9ead 581 __ dci(0x0447931f); // uqshl z31.s, p4/m, z31.s, #24 582 // vl128 state = 0x51070552 583 __ dci(0x04479b9e); // uqshl z30.s, p6/m, z30.s, #28 584 // vl128 state = 0x8cc26b2b 585 __ dci(0x04479adf); // uqshl z31.s, p6/m, z31.s, #22 586 // vl128 state = 0x8f4512d3 587 __ dci(0x04479adb); // uqshl z27.s, p6/m, z27.s, #22 588 // vl128 state = 0x3d44e050 589 __ dci(0x04079a99); // uqshl z25.h, p6/m, z25.h, #4 590 // vl128 state = 0xede0c288 591 __ dci(0x04079a89); // uqshl z9.h, p6/m, z9.h, #4 592 // vl128 state = 0x928beed6 593 __ dci(0x04879acb); // uqshl z11.d, p6/m, z11.d, #22 594 // vl128 state = 0x6945e18a 595 } 596 597 uint32_t state; 598 ComputeMachineStateHash(&masm, &state); 599 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 600 __ Ldr(w0, MemOperand(x0)); 601 602 END(); 603 if (CAN_RUN()) { 604 RUN(); 605 uint32_t expected_hashes[] = { 606 0x6945e18a, 607 0x0e954f70, 608 0x3d269eb2, 609 0xefeb5acb, 610 0xfb27cb0c, 611 0x651a1aea, 612 0x07011083, 613 0xd425418b, 614 0xa0e026c6, 615 0x407c416e, 616 0x14e25761, 617 0x21eef576, 618 0xc6ad09eb, 619 0x3642006b, 620 0xdebec165, 621 0x24ae8a32, 622 }; 623 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 624 } 625} 626 627TEST_SVE(sve2_unsigned_sat_round_shift) { 628 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 629 CPUFeatures::kSVE2, 630 CPUFeatures::kNEON, 631 CPUFeatures::kCRC32); 632 START(); 633 634 SetInitialMachineState(&masm); 635 // state = 0xe2bd2480 636 637 { 638 ExactAssemblyScope scope(&masm, 100 * kInstructionSize); 639 __ dci(0x44cb84cb); // uqrshl z11.d, p1/m, z11.d, z6.d 640 // vl128 state = 0x9794ef4a 641 __ dci(0x444b85db); // uqrshl z27.h, p1/m, z27.h, z14.h 642 // vl128 state = 0xda137fcc 643 __ dci(0x444b874b); // uqrshl z11.h, p1/m, z11.h, z26.h 644 // vl128 state = 0xafc1533b 645 __ dci(0x444b87fb); // uqrshl z27.h, p1/m, z27.h, z31.h 646 // vl128 state = 0x228890a2 647 __ dci(0x444b87f3); // uqrshl z19.h, p1/m, z19.h, z31.h 648 // vl128 state = 0x5cb0d356 649 __ dci(0x444385f1); // urshl z17.h, p1/m, z17.h, z15.h 650 // vl128 state = 0xbb6b6d1d 651 __ dci(0x444795f3); // urshlr z19.h, p5/m, z19.h, z15.h 652 // vl128 state = 0x98b43358 653 __ dci(0x44479552); // urshlr z18.h, p5/m, z18.h, z10.h 654 // vl128 state = 0x472880b2 655 __ dci(0x44c79502); // urshlr z2.d, p5/m, z2.d, z8.d 656 // vl128 state = 0x0995d86f 657 __ dci(0x44879406); // urshlr z6.s, p5/m, z6.s, z0.s 658 // vl128 state = 0x405211cd 659 __ dci(0x44079436); // urshlr z22.b, p5/m, z22.b, z1.b 660 // vl128 state = 0x563647b0 661 __ dci(0x44078c34); // urshlr z20.b, p3/m, z20.b, z1.b 662 // vl128 state = 0x2eacf2d3 663 __ dci(0x440f843c); // uqrshlr z28.b, p1/m, z28.b, z1.b 664 // vl128 state = 0x56f472ce 665 __ dci(0x440f8cbe); // uqrshlr z30.b, p3/m, z30.b, z5.b 666 // vl128 state = 0x910ce8d0 667 __ dci(0x44078eba); // urshlr z26.b, p3/m, z26.b, z21.b 668 // vl128 state = 0xc47b6482 669 __ dci(0x44078ebe); // urshlr z30.b, p3/m, z30.b, z21.b 670 // vl128 state = 0xff805975 671 __ dci(0x440f86b6); // uqrshlr z22.b, p1/m, z22.b, z21.b 672 // vl128 state = 0x132fe792 673 __ dci(0x444b86b7); // uqrshl z23.h, p1/m, z23.h, z21.h 674 // vl128 state = 0xabd3d85c 675 __ dci(0x440b84a7); // uqrshl z7.b, p1/m, z7.b, z5.b 676 // vl128 state = 0x8f718992 677 __ dci(0x440b8085); // uqrshl z5.b, p0/m, z5.b, z4.b 678 // vl128 state = 0x1b05e694 679 __ dci(0x440b8687); // uqrshl z7.b, p1/m, z7.b, z20.b 680 // vl128 state = 0xd9a0c225 681 __ dci(0x440986cf); // uqshl z15.b, p1/m, z15.b, z22.b 682 // vl128 state = 0x98be170a 683 __ dci(0x440b87ce); // uqrshl z14.b, p1/m, z14.b, z30.b 684 // vl128 state = 0x0993d862 685 __ dci(0x440b838c); // uqrshl z12.b, p0/m, z12.b, z28.b 686 // vl128 state = 0xbc95a037 687 __ dci(0x440b839c); // uqrshl z28.b, p0/m, z28.b, z28.b 688 // vl128 state = 0x558159d9 689 __ dci(0x444b8314); // uqrshl z20.h, p0/m, z20.h, z24.h 690 // vl128 state = 0x53798c6b 691 __ dci(0x44498b1c); // uqshl z28.h, p2/m, z28.h, z24.h 692 // vl128 state = 0x83db6a7c 693 __ dci(0x44498b0c); // uqshl z12.h, p2/m, z12.h, z24.h 694 // vl128 state = 0x62bda6cb 695 __ dci(0x44438b0e); // urshl z14.h, p2/m, z14.h, z24.h 696 // vl128 state = 0xc04356eb 697 __ dci(0x44438986); // urshl z6.h, p2/m, z6.h, z12.h 698 // vl128 state = 0x0e2e6682 699 __ dci(0x444389e4); // urshl z4.h, p2/m, z4.h, z15.h 700 // vl128 state = 0xbb28cacd 701 __ dci(0x444391f4); // urshl z20.h, p4/m, z20.h, z15.h 702 // vl128 state = 0x5349f37a 703 __ dci(0x444391f6); // urshl z22.h, p4/m, z22.h, z15.h 704 // vl128 state = 0x99e66890 705 __ dci(0x44c39177); // urshl z23.d, p4/m, z23.d, z11.d 706 // vl128 state = 0x2d48a891 707 __ dci(0x44c79573); // urshlr z19.d, p5/m, z19.d, z11.d 708 // vl128 state = 0xd26e94f9 709 __ dci(0x04c79d63); // uqshl z3.d, p7/m, z3.d, #43 710 // vl128 state = 0x54801050 711 __ dci(0x04c78c67); // uqshl z7.d, p3/m, z7.d, #35 712 // vl128 state = 0xde9f357a 713 __ dci(0x04878c43); // uqshl z3.d, p3/m, z3.d, #2 714 // vl128 state = 0x59e5d53c 715 __ dci(0x44878c0b); // urshlr z11.s, p3/m, z11.s, z0.s 716 // vl128 state = 0x8cfa7532 717 __ dci(0x44878c03); // urshlr z3.s, p3/m, z3.s, z0.s 718 // vl128 state = 0xdb4e86b6 719 __ dci(0x44878d42); // urshlr z2.s, p3/m, z2.s, z10.s 720 // vl128 state = 0x07467a7c 721 __ dci(0x44878d4a); // urshlr z10.s, p3/m, z10.s, z10.s 722 // vl128 state = 0x6a4ad81c 723 __ dci(0x44879948); // urshlr z8.s, p6/m, z8.s, z10.s 724 // vl128 state = 0x91d7bdc0 725 __ dci(0x44879949); // urshlr z9.s, p6/m, z9.s, z10.s 726 // vl128 state = 0x2fe3b819 727 __ dci(0x44879bcb); // urshlr z11.s, p6/m, z11.s, z30.s 728 // vl128 state = 0x5c121b68 729 __ dci(0x04879b4f); // uqshl z15.d, p6/m, z15.d, #26 730 // vl128 state = 0xe678f4f7 731 __ dci(0x44879bdf); // urshlr z31.s, p6/m, z31.s, z30.s 732 // vl128 state = 0x6593da76 733 __ dci(0x4487935e); // urshlr z30.s, p4/m, z30.s, z26.s 734 // vl128 state = 0xb558ba57 735 __ dci(0x440f9356); // uqrshlr z22.b, p4/m, z22.b, z26.b 736 // vl128 state = 0x45d1775e 737 __ dci(0x440f93f7); // uqrshlr z23.b, p4/m, z23.b, z31.b 738 // vl128 state = 0x20974795 739 __ dci(0x448793f5); // urshlr z21.s, p4/m, z21.s, z31.s 740 // vl128 state = 0xeb0bc2ab 741 __ dci(0x448383fd); // urshl z29.s, p0/m, z29.s, z31.s 742 // vl128 state = 0x74557d81 743 __ dci(0x448b82f9); // uqrshl z25.s, p0/m, z25.s, z23.s 744 // vl128 state = 0x34518418 745 __ dci(0x448f82b8); // uqrshlr z24.s, p0/m, z24.s, z21.s 746 // vl128 state = 0x93e637f3 747 __ dci(0x448f82bc); // uqrshlr z28.s, p0/m, z28.s, z21.s 748 // vl128 state = 0x6e35e56a 749 __ dci(0x448f83fe); // uqrshlr z30.s, p0/m, z30.s, z31.s 750 // vl128 state = 0xf3c59bb1 751 __ dci(0x448d83ae); // uqshlr z14.s, p0/m, z14.s, z29.s 752 // vl128 state = 0x95b401a3 753 __ dci(0x448d83aa); // uqshlr z10.s, p0/m, z10.s, z29.s 754 // vl128 state = 0x56ec65b0 755 __ dci(0x448993ae); // uqshl z14.s, p4/m, z14.s, z29.s 756 // vl128 state = 0x28f6e4c6 757 __ dci(0x448993a6); // uqshl z6.s, p4/m, z6.s, z29.s 758 // vl128 state = 0x9ed5eaf3 759 __ dci(0x44c991a4); // uqshl z4.d, p4/m, z4.d, z13.d 760 // vl128 state = 0xa8512b00 761 __ dci(0x44c991a5); // uqshl z5.d, p4/m, z5.d, z13.d 762 // vl128 state = 0x49a10780 763 __ dci(0x44c991a1); // uqshl z1.d, p4/m, z1.d, z13.d 764 // vl128 state = 0x465a2cb4 765 __ dci(0x444b91a0); // uqrshl z0.h, p4/m, z0.h, z13.h 766 // vl128 state = 0x8f6dad8e 767 __ dci(0x444b91a1); // uqrshl z1.h, p4/m, z1.h, z13.h 768 // vl128 state = 0x50dec3f8 769 __ dci(0x440391a3); // urshl z3.b, p4/m, z3.b, z13.b 770 // vl128 state = 0xab2b5ad7 771 __ dci(0x448393a7); // urshl z7.s, p4/m, z7.s, z29.s 772 // vl128 state = 0x2ffd164f 773 __ dci(0x448393af); // urshl z15.s, p4/m, z15.s, z29.s 774 // vl128 state = 0x43a7959b 775 __ dci(0x448393ab); // urshl z11.s, p4/m, z11.s, z29.s 776 // vl128 state = 0xf9526723 777 __ dci(0x448f93af); // uqrshlr z15.s, p4/m, z15.s, z29.s 778 // vl128 state = 0xf9081b27 779 __ dci(0x448f93ae); // uqrshlr z14.s, p4/m, z14.s, z29.s 780 // vl128 state = 0x3a4f693e 781 __ dci(0x048793aa); // uqshl z10.d, p4/m, z10.d, #29 782 // vl128 state = 0xbba37d9a 783 __ dci(0x04c79388); // uqshl z8.d, p4/m, z8.d, #60 784 // vl128 state = 0x3b3f5fa4 785 __ dci(0x04c79380); // uqshl z0.d, p4/m, z0.d, #60 786 // vl128 state = 0xdac48ac2 787 __ dci(0x04878390); // uqshl z16.d, p0/m, z16.d, #28 788 // vl128 state = 0xe3c8148f 789 __ dci(0x44878794); // urshlr z20.s, p1/m, z20.s, z28.s 790 // vl128 state = 0xee2179ec 791 __ dci(0x04878384); // uqshl z4.d, p0/m, z4.d, #28 792 // vl128 state = 0xc6a3796c 793 __ dci(0x048787ac); // uqshl z12.d, p1/m, z12.d, #29 794 // vl128 state = 0x18e0fd43 795 __ dci(0x04c786ae); // uqshl z14.d, p1/m, z14.d, #53 796 // vl128 state = 0x9292503e 797 __ dci(0x04c786be); // uqshl z30.d, p1/m, z30.d, #53 798 // vl128 state = 0xc1ebe042 799 __ dci(0x44c782b6); // urshlr z22.d, p0/m, z22.d, z21.d 800 // vl128 state = 0x0badc025 801 __ dci(0x44c78a3e); // urshlr z30.d, p2/m, z30.d, z17.d 802 // vl128 state = 0x51b3b5ac 803 __ dci(0x04c78b3a); // uqshl z26.d, p2/m, z26.d, #57 804 // vl128 state = 0x334f52f8 805 __ dci(0x04c78832); // uqshl z18.d, p2/m, z18.d, #33 806 // vl128 state = 0xf95df0b7 807 __ dci(0x44cf8833); // uqrshlr z19.d, p2/m, z19.d, z1.d 808 // vl128 state = 0xda88a00a 809 __ dci(0x44cf9811); // uqrshlr z17.d, p6/m, z17.d, z0.d 810 // vl128 state = 0x1e642a4c 811 __ dci(0x44cf9c41); // uqrshlr z1.d, p7/m, z1.d, z2.d 812 // vl128 state = 0xeb7fe4bd 813 __ dci(0x444f8c45); // uqrshlr z5.h, p3/m, z5.h, z2.h 814 // vl128 state = 0x5a82d833 815 __ dci(0x44cf844d); // uqrshlr z13.d, p1/m, z13.d, z2.d 816 // vl128 state = 0x595d42a4 817 __ dci(0x44c7841d); // urshlr z29.d, p1/m, z29.d, z0.d 818 // vl128 state = 0x0b433688 819 __ dci(0x44c7805f); // urshlr z31.d, p0/m, z31.d, z2.d 820 // vl128 state = 0x14b8c29a 821 __ dci(0x44cf807b); // uqrshlr z27.d, p0/m, z27.d, z3.d 822 // vl128 state = 0x12a76015 823 __ dci(0x44c780eb); // urshlr z11.d, p0/m, z11.d, z7.d 824 // vl128 state = 0x73fa7d24 825 __ dci(0x44c794e3); // urshlr z3.d, p5/m, z3.d, z7.d 826 // vl128 state = 0x0a01c859 827 __ dci(0x04c795eb); // uqshl z11.d, p5/m, z11.d, #47 828 // vl128 state = 0x0e7024fd 829 __ dci(0x04c795e9); // uqshl z9.d, p5/m, z9.d, #47 830 // vl128 state = 0x9ca5cb63 831 __ dci(0x04c795f9); // uqshl z25.d, p5/m, z25.d, #47 832 // vl128 state = 0x4c60da07 833 __ dci(0x04c795fb); // uqshl z27.d, p5/m, z27.d, #47 834 // vl128 state = 0x71114c19 835 __ dci(0x04c799f3); // uqshl z19.d, p6/m, z19.d, #47 836 // vl128 state = 0x32d71e12 837 __ dci(0x04c79997); // uqshl z23.d, p6/m, z23.d, #44 838 // vl128 state = 0xab0c9051 839 } 840 841 uint32_t state; 842 ComputeMachineStateHash(&masm, &state); 843 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 844 __ Ldr(w0, MemOperand(x0)); 845 846 END(); 847 if (CAN_RUN()) { 848 RUN(); 849 uint32_t expected_hashes[] = { 850 0xab0c9051, 851 0xc2455013, 852 0x6e4b3f1e, 853 0x631ce7ed, 854 0x031e4f7f, 855 0xa2be23bd, 856 0x2f5f74b0, 857 0x9e60f1ea, 858 0xb1080595, 859 0x953020c9, 860 0x7a5bfffb, 861 0xf0a27817, 862 0x83904886, 863 0x04620572, 864 0xbcd5c8c9, 865 0x3d4abe12, 866 }; 867 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 868 } 869} 870 871TEST_SVE(sve2_signed_sat_round_shift) { 872 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 873 CPUFeatures::kSVE2, 874 CPUFeatures::kNEON, 875 CPUFeatures::kCRC32); 876 START(); 877 878 SetInitialMachineState(&masm); 879 // state = 0xe2bd2480 880 881 { 882 ExactAssemblyScope scope(&masm, 100 * kInstructionSize); 883 __ dci(0x048687c6); // sqshl z6.d, p1/m, z6.d, #30 884 // vl128 state = 0xe81d8487 885 __ dci(0x048687c4); // sqshl z4.d, p1/m, z4.d, #30 886 // vl128 state = 0x47cc69b1 887 __ dci(0x04868385); // sqshl z5.d, p0/m, z5.d, #28 888 // vl128 state = 0xec4cab7b 889 __ dci(0x0486838d); // sqshl z13.d, p0/m, z13.d, #28 890 // vl128 state = 0x23b07ac8 891 __ dci(0x048681a9); // sqshl z9.d, p0/m, z9.d, #13 892 // vl128 state = 0xace4253d 893 __ dci(0x04068139); // sqshl z25.b, p0/m, z25.b, #1 894 // vl128 state = 0xf8f14a80 895 __ dci(0x440681b8); // srshlr z24.b, p0/m, z24.b, z13.b 896 // vl128 state = 0xa79d8fc1 897 __ dci(0x4406803a); // srshlr z26.b, p0/m, z26.b, z1.b 898 // vl128 state = 0xed9bb777 899 __ dci(0x4406808a); // srshlr z10.b, p0/m, z10.b, z4.b 900 // vl128 state = 0xbd1dfa2f 901 __ dci(0x440688da); // srshlr z26.b, p2/m, z26.b, z6.b 902 // vl128 state = 0x8f9b61e6 903 __ dci(0x448680db); // srshlr z27.s, p0/m, z27.s, z6.s 904 // vl128 state = 0x0a16f551 905 __ dci(0x440684d3); // srshlr z19.b, p1/m, z19.b, z6.b 906 // vl128 state = 0x0a764f12 907 __ dci(0x448694c3); // srshlr z3.s, p5/m, z3.s, z6.s 908 // vl128 state = 0x8d6f5613 909 __ dci(0x448e9cc7); // sqrshlr z7.s, p7/m, z7.s, z6.s 910 // vl128 state = 0xaf7b559b 911 __ dci(0x448e9ef7); // sqrshlr z23.s, p7/m, z23.s, z23.s 912 // vl128 state = 0x086d6430 913 __ dci(0x448e9673); // sqrshlr z19.s, p5/m, z19.s, z19.s 914 // vl128 state = 0x4a9a5736 915 __ dci(0x448a8663); // sqrshl z3.s, p1/m, z3.s, z19.s 916 // vl128 state = 0x19adf50e 917 __ dci(0x440a8e6b); // sqrshl z11.b, p3/m, z11.b, z19.b 918 // vl128 state = 0x4a01719c 919 __ dci(0x44028eef); // srshl z15.b, p3/m, z15.b, z23.b 920 // vl128 state = 0x1af6d72e 921 __ dci(0x44028e8b); // srshl z11.b, p3/m, z11.b, z20.b 922 // vl128 state = 0xeca2061d 923 __ dci(0x44828f8f); // srshl z15.s, p3/m, z15.s, z28.s 924 // vl128 state = 0x61059832 925 __ dci(0x44828f87); // srshl z7.s, p3/m, z7.s, z28.s 926 // vl128 state = 0x5e4d94cc 927 __ dci(0x44828a97); // srshl z23.s, p2/m, z23.s, z20.s 928 // vl128 state = 0xf5095aa8 929 __ dci(0x44828a93); // srshl z19.s, p2/m, z19.s, z20.s 930 // vl128 state = 0x155ff234 931 __ dci(0x44868a11); // srshlr z17.s, p2/m, z17.s, z16.s 932 // vl128 state = 0xf2844c7f 933 __ dci(0x44c68a90); // srshlr z16.d, p2/m, z16.d, z20.d 934 // vl128 state = 0xcf9f9508 935 __ dci(0x44c68a80); // srshlr z0.d, p2/m, z0.d, z20.d 936 // vl128 state = 0xd476915b 937 __ dci(0x44868a02); // srshlr z2.s, p2/m, z2.s, z16.s 938 // vl128 state = 0x9acbc986 939 __ dci(0x44868a12); // srshlr z18.s, p2/m, z18.s, z16.s 940 // vl128 state = 0xaf9e1114 941 __ dci(0x4486921a); // srshlr z26.s, p4/m, z26.s, z16.s 942 // vl128 state = 0x9d188add 943 __ dci(0x4486909e); // srshlr z30.s, p4/m, z30.s, z4.s 944 // vl128 state = 0xb41018d5 945 __ dci(0x448c9096); // sqshlr z22.s, p4/m, z22.s, z4.s 946 // vl128 state = 0x4ab51dea 947 __ dci(0x448890b4); // sqshl z20.s, p4/m, z20.s, z5.s 948 // vl128 state = 0x600dcc36 949 __ dci(0x448884bc); // sqshl z28.s, p1/m, z28.s, z5.s 950 // vl128 state = 0x84f37050 951 __ dci(0x44c88434); // sqshl z20.d, p1/m, z20.d, z1.d 952 // vl128 state = 0x1f19ce5a 953 __ dci(0x44cc8536); // sqshlr z22.d, p1/m, z22.d, z9.d 954 // vl128 state = 0xa51d3f31 955 __ dci(0x448c8517); // sqshlr z23.s, p1/m, z23.s, z8.s 956 // vl128 state = 0x8d431292 957 __ dci(0x448c8133); // sqshlr z19.s, p0/m, z19.s, z9.s 958 // vl128 state = 0xdd59917f 959 __ dci(0x448c8b23); // sqshlr z3.s, p2/m, z3.s, z25.s 960 // vl128 state = 0xfcdae7d4 961 __ dci(0x448c8b21); // sqshlr z1.s, p2/m, z1.s, z25.s 962 // vl128 state = 0x0f1239a5 963 __ dci(0x448c8b29); // sqshlr z9.s, p2/m, z9.s, z25.s 964 // vl128 state = 0xf6d1f180 965 __ dci(0x448c8b2b); // sqshlr z11.s, p2/m, z11.s, z25.s 966 // vl128 state = 0xe7a1af08 967 __ dci(0x448c8b89); // sqshlr z9.s, p2/m, z9.s, z28.s 968 // vl128 state = 0xa72666cb 969 __ dci(0x448c9bcb); // sqshlr z11.s, p6/m, z11.s, z30.s 970 // vl128 state = 0x9cae5fd7 971 __ dci(0x44869bca); // srshlr z10.s, p6/m, z10.s, z30.s 972 // vl128 state = 0xda133b76 973 __ dci(0x04869b8e); // sqshl z14.d, p6/m, z14.d, #28 974 // vl128 state = 0xf8eb71c2 975 __ dci(0x44869bca); // srshlr z10.s, p6/m, z10.s, z30.s 976 // vl128 state = 0xbe561563 977 __ dci(0x44869ae2); // srshlr z2.s, p6/m, z2.s, z23.s 978 // vl128 state = 0x0c286f7e 979 __ dci(0x44869a46); // srshlr z6.s, p6/m, z6.s, z18.s 980 // vl128 state = 0x59da6464 981 __ dci(0x44869a47); // srshlr z7.s, p6/m, z7.s, z18.s 982 // vl128 state = 0x908e5664 983 __ dci(0x4486920f); // srshlr z15.s, p4/m, z15.s, z16.s 984 // vl128 state = 0x213d23db 985 __ dci(0x44869a87); // srshlr z7.s, p6/m, z7.s, z20.s 986 // vl128 state = 0xd81ea7fb 987 __ dci(0x44469a86); // srshlr z6.h, p6/m, z6.h, z20.h 988 // vl128 state = 0x27d44726 989 __ dci(0x44029a82); // srshl z2.b, p6/m, z2.b, z20.b 990 // vl128 state = 0x2187127f 991 __ dci(0x44069aa0); // srshlr z0.b, p6/m, z0.b, z21.b 992 // vl128 state = 0x68ba9323 993 __ dci(0x444692b0); // srshlr z16.h, p4/m, z16.h, z21.h 994 // vl128 state = 0x148619ff 995 __ dci(0x44468ab2); // srshlr z18.h, p2/m, z18.h, z21.h 996 // vl128 state = 0xae93eae6 997 __ dci(0x444698b6); // srshlr z22.h, p6/m, z22.h, z5.h 998 // vl128 state = 0x0b875035 999 __ dci(0x44469934); // srshlr z20.h, p6/m, z20.h, z9.h 1000 // vl128 state = 0x559132ed 1001 __ dci(0x0406993c); // sqshl z28.b, p6/m, z28.b, #1 1002 // vl128 state = 0xec1782e4 1003 __ dci(0x4406912c); // srshlr z12.b, p4/m, z12.b, z9.b 1004 // vl128 state = 0x089d32a4 1005 __ dci(0x440291ae); // srshl z14.b, p4/m, z14.b, z13.b 1006 // vl128 state = 0xde257893 1007 __ dci(0x44829126); // srshl z6.s, p4/m, z6.s, z9.s 1008 // vl128 state = 0x318d27ef 1009 __ dci(0x448a8127); // sqrshl z7.s, p0/m, z7.s, z9.s 1010 // vl128 state = 0x1bc564fc 1011 __ dci(0x448e8165); // sqrshlr z5.s, p0/m, z5.s, z11.s 1012 // vl128 state = 0xa5e5c696 1013 __ dci(0x44869161); // srshlr z1.s, p4/m, z1.s, z11.s 1014 // vl128 state = 0xd64b6830 1015 __ dci(0x44829120); // srshl z0.s, p4/m, z0.s, z9.s 1016 // vl128 state = 0x107ca84d 1017 __ dci(0x44829124); // srshl z4.s, p4/m, z4.s, z9.s 1018 // vl128 state = 0xcd5688f3 1019 __ dci(0x4482912c); // srshl z12.s, p4/m, z12.s, z9.s 1020 // vl128 state = 0x88dee210 1021 __ dci(0x44829128); // srshl z8.s, p4/m, z8.s, z9.s 1022 // vl128 state = 0xfe8611fa 1023 __ dci(0x44c69120); // srshlr z0.d, p4/m, z0.d, z9.d 1024 // vl128 state = 0xe8b8cabd 1025 __ dci(0x44ce9168); // sqrshlr z8.d, p4/m, z8.d, z11.d 1026 // vl128 state = 0x269af804 1027 __ dci(0x448e9069); // sqrshlr z9.s, p4/m, z9.s, z3.s 1028 // vl128 state = 0x7d425704 1029 __ dci(0x448e8461); // sqrshlr z1.s, p1/m, z1.s, z3.s 1030 // vl128 state = 0x1577bd67 1031 __ dci(0x448e8460); // sqrshlr z0.s, p1/m, z0.s, z3.s 1032 // vl128 state = 0x6966617f 1033 __ dci(0x448a8428); // sqrshl z8.s, p1/m, z8.s, z1.s 1034 // vl128 state = 0x6c9cc508 1035 __ dci(0x44ca8409); // sqrshl z9.d, p1/m, z9.d, z0.d 1036 // vl128 state = 0xb3ea2e65 1037 __ dci(0x44c68408); // srshlr z8.d, p1/m, z8.d, z0.d 1038 // vl128 state = 0x1aef7620 1039 __ dci(0x44c6840a); // srshlr z10.d, p1/m, z10.d, z0.d 1040 // vl128 state = 0x63f2c5a3 1041 __ dci(0x44cc840e); // sqshlr z14.d, p1/m, z14.d, z0.d 1042 // vl128 state = 0xb54a8f94 1043 __ dci(0x44cc8e1e); // sqshlr z30.d, p3/m, z30.d, z16.d 1044 // vl128 state = 0xe247e0a3 1045 __ dci(0x44c68e1a); // srshlr z26.d, p3/m, z26.d, z16.d 1046 // vl128 state = 0xfb8bf060 1047 __ dci(0x44c28a0a); // srshl z10.d, p2/m, z10.d, z16.d 1048 // vl128 state = 0x829643e3 1049 __ dci(0x44c68e0e); // srshlr z14.d, p3/m, z14.d, z16.d 1050 // vl128 state = 0x8bd62d7b 1051 __ dci(0x44c6881e); // srshlr z30.d, p2/m, z30.d, z0.d 1052 // vl128 state = 0x4d8caca2 1053 __ dci(0x44869816); // srshlr z22.s, p6/m, z22.s, z0.s 1054 // vl128 state = 0x027f41ac 1055 __ dci(0x44029817); // srshl z23.b, p6/m, z23.b, z0.b 1056 // vl128 state = 0xab9c9627 1057 __ dci(0x4402993f); // srshl z31.b, p6/m, z31.b, z9.b 1058 // vl128 state = 0x42a71056 1059 __ dci(0x4406991e); // srshlr z30.b, p6/m, z30.b, z8.b 1060 // vl128 state = 0xdcdf1396 1061 __ dci(0x44068d1f); // srshlr z31.b, p3/m, z31.b, z8.b 1062 // vl128 state = 0x84fa5cac 1063 __ dci(0x44068d1d); // srshlr z29.b, p3/m, z29.b, z8.b 1064 // vl128 state = 0x1239cdae 1065 __ dci(0x44468d2d); // srshlr z13.h, p3/m, z13.h, z9.h 1066 // vl128 state = 0xae689b2f 1067 __ dci(0x4446850f); // srshlr z15.h, p1/m, z15.h, z8.h 1068 // vl128 state = 0x6330c9c2 1069 __ dci(0x4446910e); // srshlr z14.h, p4/m, z14.h, z8.h 1070 // vl128 state = 0x326ffb9f 1071 __ dci(0x4446940f); // srshlr z15.h, p5/m, z15.h, z0.h 1072 // vl128 state = 0x3f48f466 1073 __ dci(0x44468487); // srshlr z7.h, p1/m, z7.h, z4.h 1074 // vl128 state = 0x0d3b6c65 1075 __ dci(0x444694b7); // srshlr z23.h, p5/m, z23.h, z5.h 1076 // vl128 state = 0x5ef21cd8 1077 __ dci(0x44469c93); // srshlr z19.h, p7/m, z19.h, z4.h 1078 // vl128 state = 0x413d5573 1079 __ dci(0x44069e92); // srshlr z18.b, p7/m, z18.b, z20.b 1080 // vl128 state = 0xac59d0c3 1081 __ dci(0x44469693); // srshlr z19.h, p5/m, z19.h, z20.h 1082 // vl128 state = 0xb3969968 1083 } 1084 1085 uint32_t state; 1086 ComputeMachineStateHash(&masm, &state); 1087 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 1088 __ Ldr(w0, MemOperand(x0)); 1089 1090 END(); 1091 if (CAN_RUN()) { 1092 RUN(); 1093 uint32_t expected_hashes[] = { 1094 0xb3969968, 1095 0x8ba60941, 1096 0x53937d52, 1097 0xe6737b5d, 1098 0x8649cf1f, 1099 0xb7ee12ca, 1100 0x6fd03bd4, 1101 0x4a82eb52, 1102 0xc0d52997, 1103 0xb52a263f, 1104 0x70599fa2, 1105 0x68cd2ef1, 1106 0x57b84410, 1107 0x1072dde9, 1108 0xe39a23c8, 1109 0xeded9f88, 1110 }; 1111 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 1112 } 1113} 1114 1115TEST_SVE(sve2_usra) { 1116 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 1117 CPUFeatures::kSVE2, 1118 CPUFeatures::kNEON, 1119 CPUFeatures::kCRC32); 1120 START(); 1121 1122 SetInitialMachineState(&masm); 1123 // state = 0xe2bd2480 1124 1125 { 1126 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 1127 __ dci(0x450ce41d); // usra z29.b, z0.b, #4 1128 // vl128 state = 0x57e84943 1129 __ dci(0x450ce635); // usra z21.b, z17.b, #4 1130 // vl128 state = 0xc2696a7c 1131 __ dci(0x45cce637); // usra z23.d, z17.d, #20 1132 // vl128 state = 0x97aec47c 1133 __ dci(0x458cee35); // ursra z21.d, z17.d, #52 1134 // vl128 state = 0xab24864c 1135 __ dci(0x450eee25); // ursra z5.b, z17.b, #2 1136 // vl128 state = 0x8aab49c9 1137 __ dci(0x458eef21); // ursra z1.d, z25.d, #50 1138 // vl128 state = 0x3db09e7f 1139 __ dci(0x458fef65); // ursra z5.d, z27.d, #49 1140 // vl128 state = 0xa9905ae3 1141 __ dci(0x459fef41); // ursra z1.d, z26.d, #33 1142 // vl128 state = 0x624c2e4d 1143 __ dci(0x459fe549); // usra z9.d, z10.d, #33 1144 // vl128 state = 0x5a158f70 1145 __ dci(0x459de561); // usra z1.d, z11.d, #35 1146 // vl128 state = 0xf24ffa83 1147 __ dci(0x451ce565); // usra z5.h, z11.h, #4 1148 // vl128 state = 0x0213f9c7 1149 __ dci(0x4519e564); // usra z4.h, z11.h, #7 1150 // vl128 state = 0x8903ccf3 1151 __ dci(0x4589e56c); // usra z12.d, z11.d, #55 1152 // vl128 state = 0x3c0f6e72 1153 __ dci(0x4589e56e); // usra z14.d, z11.d, #55 1154 // vl128 state = 0x5d9787fc 1155 __ dci(0x4589e56c); // usra z12.d, z11.d, #55 1156 // vl128 state = 0x3bc6fced 1157 __ dci(0x458bed64); // ursra z4.d, z11.d, #53 1158 // vl128 state = 0x966476e2 1159 __ dci(0x45dbed65); // ursra z5.d, z11.d, #5 1160 // vl128 state = 0xf85c4247 1161 __ dci(0x455bedf5); // ursra z21.s, z15.s, #5 1162 // vl128 state = 0xd342f9ae 1163 __ dci(0x450bedfd); // ursra z29.b, z15.b, #5 1164 // vl128 state = 0xc03cb476 1165 __ dci(0x4549edf9); // ursra z25.s, z15.s, #23 1166 // vl128 state = 0x5649b073 1167 __ dci(0x4549ede9); // ursra z9.s, z15.s, #23 1168 // vl128 state = 0xce5a7dbb 1169 __ dci(0x4549ed59); // ursra z25.s, z10.s, #23 1170 // vl128 state = 0x8c98ee08 1171 __ dci(0x4549ed5d); // ursra z29.s, z10.s, #23 1172 // vl128 state = 0xd991a574 1173 __ dci(0x45cded59); // ursra z25.d, z10.d, #19 1174 // vl128 state = 0xebc24746 1175 __ dci(0x45d9ed58); // ursra z24.d, z10.d, #7 1176 // vl128 state = 0x145d5970 1177 __ dci(0x45d8ec50); // ursra z16.d, z2.d, #8 1178 // vl128 state = 0x8f65850c 1179 __ dci(0x45c8ec60); // ursra z0.d, z3.d, #24 1180 // vl128 state = 0xe510a1b4 1181 __ dci(0x45c0ed61); // ursra z1.d, z11.d, #32 1182 // vl128 state = 0xfef468e1 1183 __ dci(0x45c8ec65); // ursra z5.d, z3.d, #24 1184 // vl128 state = 0xa6754589 1185 __ dci(0x45c0e464); // usra z4.d, z3.d, #32 1186 // vl128 state = 0x2b4cd23a 1187 __ dci(0x45c0e4a5); // usra z5.d, z5.d, #32 1188 // vl128 state = 0xfa58fea0 1189 __ dci(0x45c0e4a1); // usra z1.d, z5.d, #32 1190 // vl128 state = 0x015c4435 1191 __ dci(0x45c0e4b1); // usra z17.d, z5.d, #32 1192 // vl128 state = 0x67271050 1193 __ dci(0x45c2ecb3); // ursra z19.d, z5.d, #30 1194 // vl128 state = 0x1d3631c3 1195 __ dci(0x45c0ece3); // ursra z3.d, z7.d, #32 1196 // vl128 state = 0x646e0e43 1197 __ dci(0x45caece7); // ursra z7.d, z7.d, #22 1198 // vl128 state = 0x104bf393 1199 __ dci(0x458aeee3); // ursra z3.d, z23.d, #54 1200 // vl128 state = 0xbac8c54b 1201 __ dci(0x454aeee1); // ursra z1.s, z23.s, #22 1202 // vl128 state = 0x5c2a40db 1203 __ dci(0x4508eee9); // ursra z9.b, z23.b, #8 1204 // vl128 state = 0xe117d81a 1205 __ dci(0x4518ece1); // ursra z1.h, z7.h, #8 1206 // vl128 state = 0xeb43265d 1207 __ dci(0x451cede0); // ursra z0.h, z15.h, #4 1208 // vl128 state = 0xd5c8d09e 1209 __ dci(0x4598edf0); // ursra z16.d, z15.d, #40 1210 // vl128 state = 0x0c060220 1211 __ dci(0x451cede0); // ursra z0.h, z15.h, #4 1212 // vl128 state = 0x0ea52d2d 1213 __ dci(0x459cefe8); // ursra z8.d, z31.d, #36 1214 // vl128 state = 0xa6a7e977 1215 __ dci(0x459ce5f8); // usra z24.d, z15.d, #36 1216 // vl128 state = 0xb0192caf 1217 __ dci(0x458cedfa); // ursra z26.d, z15.d, #52 1218 // vl128 state = 0x154fce29 1219 __ dci(0x458cedfe); // ursra z30.d, z15.d, #52 1220 // vl128 state = 0x369cc3e1 1221 __ dci(0x450cedb6); // ursra z22.b, z13.b, #4 1222 // vl128 state = 0xf613cb4b 1223 __ dci(0x450cedb4); // ursra z20.b, z13.b, #4 1224 // vl128 state = 0xd075c8a9 1225 __ dci(0x458eeda4); // ursra z4.d, z13.d, #50 1226 // vl128 state = 0xc9366682 1227 } 1228 1229 uint32_t state; 1230 ComputeMachineStateHash(&masm, &state); 1231 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 1232 __ Ldr(w0, MemOperand(x0)); 1233 1234 END(); 1235 if (CAN_RUN()) { 1236 RUN(); 1237 uint32_t expected_hashes[] = { 1238 0xc9366682, 1239 0xaf202cff, 1240 0x0e90a7c4, 1241 0xa8c89f40, 1242 0xc7bb56ad, 1243 0xa203dd34, 1244 0xf3b3a749, 1245 0xf16c9d5f, 1246 0x9929dea8, 1247 0xd652c693, 1248 0xe76f701b, 1249 0xe2fe20a3, 1250 0x07182afb, 1251 0x816b928f, 1252 0x52baf33f, 1253 0x9ef46875, 1254 }; 1255 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 1256 } 1257} 1258 1259TEST_SVE(sve2_ssra) { 1260 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 1261 CPUFeatures::kSVE2, 1262 CPUFeatures::kNEON, 1263 CPUFeatures::kCRC32); 1264 START(); 1265 1266 SetInitialMachineState(&masm); 1267 // state = 0xe2bd2480 1268 1269 { 1270 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 1271 __ dci(0x450ce01d); // ssra z29.b, z0.b, #4 1272 // vl128 state = 0xdf461c2b 1273 __ dci(0x450ce235); // ssra z21.b, z17.b, #4 1274 // vl128 state = 0xd28868a9 1275 __ dci(0x45cce237); // ssra z23.d, z17.d, #20 1276 // vl128 state = 0x874fc6a9 1277 __ dci(0x458cea35); // srsra z21.d, z17.d, #52 1278 // vl128 state = 0xb848785b 1279 __ dci(0x450eea25); // srsra z5.b, z17.b, #2 1280 // vl128 state = 0x8bca62e4 1281 __ dci(0x458eeb21); // srsra z1.d, z25.d, #50 1282 // vl128 state = 0x3cd1b552 1283 __ dci(0x458feb65); // srsra z5.d, z27.d, #49 1284 // vl128 state = 0xd78844fb 1285 __ dci(0x459feb41); // srsra z1.d, z26.d, #33 1286 // vl128 state = 0xa948dc2f 1287 __ dci(0x459fe149); // ssra z9.d, z10.d, #33 1288 // vl128 state = 0x709a83f1 1289 __ dci(0x459de161); // ssra z1.d, z11.d, #35 1290 // vl128 state = 0x1c21e4f6 1291 __ dci(0x451ce165); // ssra z5.h, z11.h, #4 1292 // vl128 state = 0x72288f41 1293 __ dci(0x4519e164); // ssra z4.h, z11.h, #7 1294 // vl128 state = 0x9a8c4c8c 1295 __ dci(0x4589e16c); // ssra z12.d, z11.d, #55 1296 // vl128 state = 0x872585d4 1297 __ dci(0x4589e16e); // ssra z14.d, z11.d, #55 1298 // vl128 state = 0xd237aaa0 1299 __ dci(0x4589e16c); // ssra z12.d, z11.d, #55 1300 // vl128 state = 0x1c828333 1301 __ dci(0x458be964); // srsra z4.d, z11.d, #53 1302 // vl128 state = 0xc190178f 1303 __ dci(0x45dbe965); // srsra z5.d, z11.d, #5 1304 // vl128 state = 0xe9e81bda 1305 __ dci(0x455be9f5); // srsra z21.s, z15.s, #5 1306 // vl128 state = 0x8e58c7a1 1307 __ dci(0x450be9fd); // srsra z29.b, z15.b, #5 1308 // vl128 state = 0x904b404b 1309 __ dci(0x4549e9f9); // srsra z25.s, z15.s, #23 1310 // vl128 state = 0x35a60481 1311 __ dci(0x4549e9e9); // srsra z9.s, z15.s, #23 1312 // vl128 state = 0x6911448b 1313 __ dci(0x4549e959); // srsra z25.s, z10.s, #23 1314 // vl128 state = 0xdb384324 1315 __ dci(0x4549e95d); // srsra z29.s, z10.s, #23 1316 // vl128 state = 0x16acd8ee 1317 __ dci(0x45cde959); // srsra z25.d, z10.d, #19 1318 // vl128 state = 0x56bf7bda 1319 __ dci(0x45d9e958); // srsra z24.d, z10.d, #7 1320 // vl128 state = 0x6a713fa6 1321 __ dci(0x45d8e850); // srsra z16.d, z2.d, #8 1322 // vl128 state = 0xa6394cf3 1323 __ dci(0x45c8e860); // srsra z0.d, z3.d, #24 1324 // vl128 state = 0x829c3d2a 1325 __ dci(0x45c0e961); // srsra z1.d, z11.d, #32 1326 // vl128 state = 0x006d1904 1327 __ dci(0x45c8e865); // srsra z5.d, z3.d, #24 1328 // vl128 state = 0xcc7dffaf 1329 __ dci(0x45c0e064); // ssra z4.d, z3.d, #32 1330 // vl128 state = 0xc9eaddd0 1331 __ dci(0x45c0e0a5); // ssra z5.d, z5.d, #32 1332 // vl128 state = 0x643145e1 1333 __ dci(0x45c0e0a1); // ssra z1.d, z5.d, #32 1334 // vl128 state = 0x03f4c42e 1335 __ dci(0x45c0e0b1); // ssra z17.d, z5.d, #32 1336 // vl128 state = 0x5a8cff35 1337 __ dci(0x45c2e8b3); // srsra z19.d, z5.d, #30 1338 // vl128 state = 0x3ee63e9f 1339 __ dci(0x45c0e8e3); // srsra z3.d, z7.d, #32 1340 // vl128 state = 0x687d943b 1341 __ dci(0x45cae8e7); // srsra z7.d, z7.d, #22 1342 // vl128 state = 0xf5a19cb2 1343 __ dci(0x458aeae3); // srsra z3.d, z23.d, #54 1344 // vl128 state = 0xd1371248 1345 __ dci(0x454aeae1); // srsra z1.s, z23.s, #22 1346 // vl128 state = 0xdb83ef8b 1347 __ dci(0x455ae8e9); // srsra z9.s, z7.s, #6 1348 // vl128 state = 0xc831a54c 1349 __ dci(0x455ee9e8); // srsra z8.s, z15.s, #2 1350 // vl128 state = 0x4342b823 1351 __ dci(0x45dae9f8); // srsra z24.d, z15.d, #6 1352 // vl128 state = 0x52a7151a 1353 __ dci(0x455ee9e8); // srsra z8.s, z15.s, #2 1354 // vl128 state = 0xde8110e0 1355 __ dci(0x45deebe0); // srsra z0.d, z31.d, #2 1356 // vl128 state = 0xd2b28e81 1357 __ dci(0x45dee1f0); // ssra z16.d, z15.d, #2 1358 // vl128 state = 0x56d1c366 1359 __ dci(0x45cee9f2); // srsra z18.d, z15.d, #18 1360 // vl128 state = 0x53537689 1361 __ dci(0x45cee9f6); // srsra z22.d, z15.d, #18 1362 // vl128 state = 0x5e410508 1363 __ dci(0x454ee9be); // srsra z30.s, z13.s, #18 1364 // vl128 state = 0x06245094 1365 __ dci(0x454ee9bc); // srsra z28.s, z13.s, #18 1366 // vl128 state = 0xb92b3929 1367 __ dci(0x45cce9ac); // srsra z12.d, z13.d, #20 1368 // vl128 state = 0xfe6a2830 1369 __ dci(0x45cde93c); // srsra z28.d, z9.d, #19 1370 // vl128 state = 0x737461a1 1371 } 1372 1373 uint32_t state; 1374 ComputeMachineStateHash(&masm, &state); 1375 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 1376 __ Ldr(w0, MemOperand(x0)); 1377 1378 END(); 1379 if (CAN_RUN()) { 1380 RUN(); 1381 uint32_t expected_hashes[] = { 1382 0x737461a1, 1383 0xe1ef707c, 1384 0x9760ba4e, 1385 0x782dd4cd, 1386 0xe793d0c2, 1387 0x991e0de7, 1388 0x34627e21, 1389 0x76c89433, 1390 0x96c9f4ce, 1391 0x38ec4b6f, 1392 0x7aee3ec7, 1393 0x665f9b94, 1394 0x8e166fc3, 1395 0xb4461fac, 1396 0x215de9dc, 1397 0xc23ef1f9, 1398 }; 1399 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 1400 } 1401} 1402 1403TEST_SVE(sve2_sat_arith) { 1404 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 1405 CPUFeatures::kSVE2, 1406 CPUFeatures::kNEON, 1407 CPUFeatures::kCRC32); 1408 START(); 1409 1410 SetInitialMachineState(&masm); 1411 // state = 0xe2bd2480 1412 1413 { 1414 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 1415 __ dci(0x44df9df9); // uqsubr z25.d, p7/m, z25.d, z15.d 1416 // vl128 state = 0x7670ac87 1417 __ dci(0x445f9db1); // uqsubr z17.h, p7/m, z17.h, z13.h 1418 // vl128 state = 0x3c5b39fe 1419 __ dci(0x441f99a1); // uqsubr z1.b, p6/m, z1.b, z13.b 1420 // vl128 state = 0x5df43635 1421 __ dci(0x441d9ba0); // usqadd z0.b, p6/m, z0.b, z29.b 1422 // vl128 state = 0x737bc7a5 1423 __ dci(0x441d9ba8); // usqadd z8.b, p6/m, z8.b, z29.b 1424 // vl128 state = 0xba69890b 1425 __ dci(0x441d9bb8); // usqadd z24.b, p6/m, z24.b, z29.b 1426 // vl128 state = 0x3f81c19d 1427 __ dci(0x441d8b30); // usqadd z16.b, p2/m, z16.b, z25.b 1428 // vl128 state = 0x076c5fc1 1429 __ dci(0x441d8a14); // usqadd z20.b, p2/m, z20.b, z16.b 1430 // vl128 state = 0x67df29dd 1431 __ dci(0x449d8215); // usqadd z21.s, p0/m, z21.s, z16.s 1432 // vl128 state = 0x663b236f 1433 __ dci(0x449d8205); // usqadd z5.s, p0/m, z5.s, z16.s 1434 // vl128 state = 0xe58d41d0 1435 __ dci(0x449d8201); // usqadd z1.s, p0/m, z1.s, z16.s 1436 // vl128 state = 0x82f89d40 1437 __ dci(0x449c8a09); // suqadd z9.s, p2/m, z9.s, z16.s 1438 // vl128 state = 0xa0218390 1439 __ dci(0x44dd8a0d); // usqadd z13.d, p2/m, z13.d, z16.d 1440 // vl128 state = 0xfab22f04 1441 __ dci(0x44d98a2c); // uqadd z12.d, p2/m, z12.d, z17.d 1442 // vl128 state = 0x70911fc9 1443 __ dci(0x44598a0d); // uqadd z13.h, p2/m, z13.h, z16.h 1444 // vl128 state = 0xcc12ec49 1445 __ dci(0x44d99a05); // uqadd z5.d, p6/m, z5.d, z16.d 1446 // vl128 state = 0x31fef46f 1447 __ dci(0x44d99004); // uqadd z4.d, p4/m, z4.d, z0.d 1448 // vl128 state = 0xf81448db 1449 __ dci(0x44d98020); // uqadd z0.d, p0/m, z0.d, z1.d 1450 // vl128 state = 0xe6fe9d31 1451 __ dci(0x44d980e1); // uqadd z1.d, p0/m, z1.d, z7.d 1452 // vl128 state = 0x76fecfc2 1453 __ dci(0x44d981c0); // uqadd z0.d, p0/m, z0.d, z14.d 1454 // vl128 state = 0x4066a558 1455 __ dci(0x44d98161); // uqadd z1.d, p0/m, z1.d, z11.d 1456 // vl128 state = 0x0d3a1487 1457 __ dci(0x44d98031); // uqadd z17.d, p0/m, z17.d, z1.d 1458 // vl128 state = 0x061b4aed 1459 __ dci(0x44d98039); // uqadd z25.d, p0/m, z25.d, z1.d 1460 // vl128 state = 0x02172a17 1461 __ dci(0x44d98029); // uqadd z9.d, p0/m, z9.d, z1.d 1462 // vl128 state = 0xebe138b3 1463 __ dci(0x44d8800d); // sqadd z13.d, p0/m, z13.d, z0.d 1464 // vl128 state = 0x73f0114b 1465 __ dci(0x44d8828f); // sqadd z15.d, p0/m, z15.d, z20.d 1466 // vl128 state = 0x7a8689e0 1467 __ dci(0x44d8829f); // sqadd z31.d, p0/m, z31.d, z20.d 1468 // vl128 state = 0x0800ae49 1469 __ dci(0x44d88e8f); // sqadd z15.d, p3/m, z15.d, z20.d 1470 // vl128 state = 0x9b733fff 1471 __ dci(0x44d88e8b); // sqadd z11.d, p3/m, z11.d, z20.d 1472 // vl128 state = 0x6d01eb90 1473 __ dci(0x44d88e8f); // sqadd z15.d, p3/m, z15.d, z20.d 1474 // vl128 state = 0x337692b3 1475 __ dci(0x44d8968e); // sqadd z14.d, p5/m, z14.d, z20.d 1476 // vl128 state = 0xcd4478b6 1477 __ dci(0x44d886ca); // sqadd z10.d, p1/m, z10.d, z22.d 1478 // vl128 state = 0x335fd099 1479 __ dci(0x44dc87ce); // suqadd z14.d, p1/m, z14.d, z30.d 1480 // vl128 state = 0x0d3b6403 1481 __ dci(0x44de8fcf); // sqsubr z15.d, p3/m, z15.d, z30.d 1482 // vl128 state = 0x41a1073f 1483 __ dci(0x449e9fcd); // sqsubr z13.s, p7/m, z13.s, z30.s 1484 // vl128 state = 0x5a4b1c22 1485 __ dci(0x445e9fcf); // sqsubr z15.h, p7/m, z15.h, z30.h 1486 // vl128 state = 0x5a08ccf1 1487 __ dci(0x441e9ece); // sqsubr z14.b, p7/m, z14.b, z22.b 1488 // vl128 state = 0x3f3c700c 1489 __ dci(0x441e8cde); // sqsubr z30.b, p3/m, z30.b, z6.b 1490 // vl128 state = 0x3b32b296 1491 __ dci(0x441e88fa); // sqsubr z26.b, p2/m, z26.b, z7.b 1492 // vl128 state = 0x7a6472e3 1493 __ dci(0x441f98f8); // uqsubr z24.b, p6/m, z24.b, z7.b 1494 // vl128 state = 0x1d72f5ea 1495 __ dci(0x441f98fc); // uqsubr z28.b, p6/m, z28.b, z7.b 1496 // vl128 state = 0x0245804b 1497 __ dci(0x441b9afe); // uqsub z30.b, p6/m, z30.b, z23.b 1498 // vl128 state = 0x8c7ac3d7 1499 __ dci(0x441b9afc); // uqsub z28.b, p6/m, z28.b, z23.b 1500 // vl128 state = 0xa96d65cb 1501 __ dci(0x449b9a74); // uqsub z20.s, p6/m, z20.s, z19.s 1502 // vl128 state = 0x261eb58f 1503 __ dci(0x449a9b75); // sqsub z21.s, p6/m, z21.s, z27.s 1504 // vl128 state = 0x3464e3e5 1505 __ dci(0x449a9b7d); // sqsub z29.s, p6/m, z29.s, z27.s 1506 // vl128 state = 0xfe3ab427 1507 __ dci(0x445a9b79); // sqsub z25.h, p6/m, z25.h, z27.h 1508 // vl128 state = 0x609eef3a 1509 __ dci(0x445a9b7d); // sqsub z29.h, p6/m, z29.h, z27.h 1510 // vl128 state = 0x0e6d6940 1511 __ dci(0x445e9b5f); // sqsubr z31.h, p6/m, z31.h, z26.h 1512 // vl128 state = 0x60a375e7 1513 __ dci(0x441e8b5b); // sqsubr z27.b, p2/m, z27.b, z26.b 1514 // vl128 state = 0xea9bd16f 1515 } 1516 1517 uint32_t state; 1518 ComputeMachineStateHash(&masm, &state); 1519 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 1520 __ Ldr(w0, MemOperand(x0)); 1521 1522 END(); 1523 if (CAN_RUN()) { 1524 RUN(); 1525 uint32_t expected_hashes[] = { 1526 0xea9bd16f, 1527 0x1296119e, 1528 0x00aaf6dc, 1529 0xb6ce0579, 1530 0xdb3d0829, 1531 0x119f52d0, 1532 0xf697dcd8, 1533 0x2c46a66c, 1534 0x7d838497, 1535 0x6cd68fb3, 1536 0xf98a5c79, 1537 0x51685054, 1538 0xa9494104, 1539 0x8d012936, 1540 0x32726258, 1541 0x091f1956, 1542 }; 1543 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 1544 } 1545} 1546 1547TEST_SVE(sve2_pair_arith) { 1548 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 1549 CPUFeatures::kSVE2, 1550 CPUFeatures::kNEON, 1551 CPUFeatures::kCRC32); 1552 START(); 1553 1554 SetInitialMachineState(&masm); 1555 // state = 0xe2bd2480 1556 1557 { 1558 ExactAssemblyScope scope(&masm, 64 * kInstructionSize); 1559 __ dci(0x4414b214); // smaxp z20.b, p4/m, z20.b, z16.b 1560 // vl128 state = 0x90adc6c9 1561 __ dci(0x4414ba5c); // smaxp z28.b, p6/m, z28.b, z18.b 1562 // vl128 state = 0x0e41b2b9 1563 __ dci(0x4454ba0c); // smaxp z12.h, p6/m, z12.h, z16.h 1564 // vl128 state = 0x472160b8 1565 __ dci(0x4454ba64); // smaxp z4.h, p6/m, z4.h, z19.h 1566 // vl128 state = 0x4f485ba3 1567 __ dci(0x44d4bb65); // smaxp z5.d, p6/m, z5.d, z27.d 1568 // vl128 state = 0x432f5185 1569 __ dci(0x4456bb64); // sminp z4.h, p6/m, z4.h, z27.h 1570 // vl128 state = 0x01bd324a 1571 __ dci(0x4455bb74); // umaxp z20.h, p6/m, z20.h, z27.h 1572 // vl128 state = 0xaf795389 1573 __ dci(0x4451bb35); // addp z21.h, p6/m, z21.h, z25.h 1574 // vl128 state = 0x5f4be111 1575 __ dci(0x4451ab71); // addp z17.h, p2/m, z17.h, z27.h 1576 // vl128 state = 0xc16a8d03 1577 __ dci(0x4451ba75); // addp z21.h, p6/m, z21.h, z19.h 1578 // vl128 state = 0x8cd36853 1579 __ dci(0x4451b225); // addp z5.h, p4/m, z5.h, z17.h 1580 // vl128 state = 0xea3d5389 1581 __ dci(0x4455b627); // umaxp z7.h, p5/m, z7.h, z17.h 1582 // vl128 state = 0xbb42a8e1 1583 __ dci(0x4415b426); // umaxp z6.b, p5/m, z6.b, z1.b 1584 // vl128 state = 0x485ca761 1585 __ dci(0x4415b224); // umaxp z4.b, p4/m, z4.b, z17.b 1586 // vl128 state = 0x6bcfd641 1587 __ dci(0x4455b02c); // umaxp z12.h, p4/m, z12.h, z1.h 1588 // vl128 state = 0x84485a9f 1589 __ dci(0x4455a12d); // umaxp z13.h, p0/m, z13.h, z9.h 1590 // vl128 state = 0xed43519f 1591 __ dci(0x4455b33d); // umaxp z29.h, p4/m, z29.h, z25.h 1592 // vl128 state = 0xcc0b7c40 1593 __ dci(0x4455b7b9); // umaxp z25.h, p5/m, z25.h, z29.h 1594 // vl128 state = 0xe1c14517 1595 __ dci(0x4454b6b8); // smaxp z24.h, p5/m, z24.h, z21.h 1596 // vl128 state = 0x4c5e9f3c 1597 __ dci(0x44d4b4bc); // smaxp z28.d, p5/m, z28.d, z5.d 1598 // vl128 state = 0x7530a2f7 1599 __ dci(0x44d4b4bd); // smaxp z29.d, p5/m, z29.d, z5.d 1600 // vl128 state = 0x37e61b68 1601 __ dci(0x44d4b5ed); // smaxp z13.d, p5/m, z13.d, z15.d 1602 // vl128 state = 0xb592b6e9 1603 __ dci(0x4455b5fd); // umaxp z29.h, p5/m, z29.h, z15.h 1604 // vl128 state = 0xe7f9e492 1605 __ dci(0x4415b57f); // umaxp z31.b, p5/m, z31.b, z11.b 1606 // vl128 state = 0xe4e7b644 1607 __ dci(0x4411b5fe); // addp z30.b, p5/m, z30.b, z15.b 1608 // vl128 state = 0x4bfe144d 1609 __ dci(0x4411a576); // addp z22.b, p1/m, z22.b, z11.b 1610 // vl128 state = 0xb1813df8 1611 __ dci(0x4455a566); // umaxp z6.h, p1/m, z6.h, z11.h 1612 // vl128 state = 0x4aa8b50e 1613 __ dci(0x4455adf6); // umaxp z22.h, p3/m, z22.h, z15.h 1614 // vl128 state = 0xfc13568a 1615 __ dci(0x4454acfe); // smaxp z30.h, p3/m, z30.h, z7.h 1616 // vl128 state = 0x3aac7365 1617 __ dci(0x4454acff); // smaxp z31.h, p3/m, z31.h, z7.h 1618 // vl128 state = 0x610991cf 1619 __ dci(0x44d4a8fb); // smaxp z27.d, p2/m, z27.d, z7.d 1620 // vl128 state = 0x36581f26 1621 __ dci(0x4456a8f3); // sminp z19.h, p2/m, z19.h, z7.h 1622 // vl128 state = 0x249bb813 1623 __ dci(0x4457a8b1); // uminp z17.h, p2/m, z17.h, z5.h 1624 // vl128 state = 0xd48d6d88 1625 __ dci(0x4457a8b5); // uminp z21.h, p2/m, z21.h, z5.h 1626 // vl128 state = 0x1628fb6e 1627 __ dci(0x4456a8f7); // sminp z23.h, p2/m, z23.h, z7.h 1628 // vl128 state = 0x0bd3c76b 1629 __ dci(0x4456a89f); // sminp z31.h, p2/m, z31.h, z4.h 1630 // vl128 state = 0xf09d21e4 1631 __ dci(0x4456aa0f); // sminp z15.h, p2/m, z15.h, z16.h 1632 // vl128 state = 0xd2a92168 1633 __ dci(0x4456b807); // sminp z7.h, p6/m, z7.h, z0.h 1634 // vl128 state = 0x009d0ac8 1635 __ dci(0x4456bc26); // sminp z6.h, p7/m, z6.h, z1.h 1636 // vl128 state = 0x716ddc73 1637 __ dci(0x4456beae); // sminp z14.h, p7/m, z14.h, z21.h 1638 // vl128 state = 0x35a4d900 1639 __ dci(0x4416b6ac); // sminp z12.b, p5/m, z12.b, z21.b 1640 // vl128 state = 0x7929e077 1641 __ dci(0x4416b6bc); // sminp z28.b, p5/m, z28.b, z21.b 1642 // vl128 state = 0x259195ca 1643 __ dci(0x4417b694); // uminp z20.b, p5/m, z20.b, z20.b 1644 // vl128 state = 0x5cc3927b 1645 __ dci(0x4417b684); // uminp z4.b, p5/m, z4.b, z20.b 1646 // vl128 state = 0x2e7c4b88 1647 __ dci(0x4415b6a0); // umaxp z0.b, p5/m, z0.b, z21.b 1648 // vl128 state = 0x1478d524 1649 __ dci(0x4415a690); // umaxp z16.b, p1/m, z16.b, z20.b 1650 // vl128 state = 0xc3ac4a89 1651 __ dci(0x4415b614); // umaxp z20.b, p5/m, z20.b, z16.b 1652 // vl128 state = 0xb94a5aeb 1653 __ dci(0x4415b675); // umaxp z21.b, p5/m, z21.b, z19.b 1654 // vl128 state = 0xabeed92b 1655 __ dci(0x4415a63d); // umaxp z29.b, p1/m, z29.b, z17.b 1656 // vl128 state = 0xe36835ea 1657 __ dci(0x4415a63c); // umaxp z28.b, p1/m, z28.b, z17.b 1658 // vl128 state = 0x087002bb 1659 __ dci(0x4455a61d); // umaxp z29.h, p1/m, z29.h, z16.h 1660 // vl128 state = 0x17388ea4 1661 __ dci(0x4451ae1f); // addp z31.h, p3/m, z31.h, z16.h 1662 // vl128 state = 0x86ee7dbe 1663 __ dci(0x4451ae1b); // addp z27.h, p3/m, z27.h, z16.h 1664 // vl128 state = 0x9846169e 1665 __ dci(0x4451bc0b); // addp z11.h, p7/m, z11.h, z0.h 1666 // vl128 state = 0x5dc31eb0 1667 __ dci(0x4455bc4f); // umaxp z15.h, p7/m, z15.h, z2.h 1668 // vl128 state = 0x9ec9086c 1669 __ dci(0x4455bf47); // umaxp z7.h, p7/m, z7.h, z26.h 1670 // vl128 state = 0xf3a2766b 1671 __ dci(0x44d5b743); // umaxp z3.d, p5/m, z3.d, z26.d 1672 // vl128 state = 0x1ce44f7e 1673 __ dci(0x44d5b7e2); // umaxp z2.d, p5/m, z2.d, z31.d 1674 // vl128 state = 0xf121f7c0 1675 __ dci(0x44d5b7e0); // umaxp z0.d, p5/m, z0.d, z31.d 1676 // vl128 state = 0x4ac0d4f3 1677 __ dci(0x44d5b670); // umaxp z16.d, p5/m, z16.d, z19.d 1678 // vl128 state = 0xdb0d62f5 1679 __ dci(0x44d1b272); // addp z18.d, p4/m, z18.d, z19.d 1680 // vl128 state = 0x34b0c018 1681 __ dci(0x44d1be76); // addp z22.d, p7/m, z22.d, z19.d 1682 // vl128 state = 0x1673f380 1683 __ dci(0x44d1b772); // addp z18.d, p5/m, z18.d, z27.d 1684 // vl128 state = 0xe3e67205 1685 __ dci(0x44d1b162); // addp z2.d, p4/m, z2.d, z11.d 1686 // vl128 state = 0x42907adc 1687 } 1688 1689 uint32_t state; 1690 ComputeMachineStateHash(&masm, &state); 1691 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 1692 __ Ldr(w0, MemOperand(x0)); 1693 1694 END(); 1695 if (CAN_RUN()) { 1696 RUN(); 1697 uint32_t expected_hashes[] = { 1698 0x42907adc, 1699 0xee2f21f5, 1700 0xcbfa0af4, 1701 0x42e7c862, 1702 0x10ef537f, 1703 0x83461e96, 1704 0x2dca0c37, 1705 0xf2080504, 1706 0xf615d956, 1707 0x1732775a, 1708 0x491fec07, 1709 0xf9e33ada, 1710 0x324435d7, 1711 0x08a9c2ca, 1712 0x87ce3994, 1713 0x338adb5d, 1714 }; 1715 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 1716 } 1717} 1718 1719TEST_SVE(sve2_extract_narrow) { 1720 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 1721 CPUFeatures::kSVE2, 1722 CPUFeatures::kNEON, 1723 CPUFeatures::kCRC32); 1724 START(); 1725 1726 SetInitialMachineState(&masm); 1727 // state = 0xe2bd2480 1728 1729 { 1730 ExactAssemblyScope scope(&masm, 64 * kInstructionSize); 1731 __ dci(0x45284000); // sqxtnb z0.b, z0.h 1732 // vl128 state = 0x874f147b 1733 __ dci(0x45284228); // sqxtnb z8.b, z17.h 1734 // vl128 state = 0xf694d31e 1735 __ dci(0x45284820); // uqxtnb z0.b, z1.h 1736 // vl128 state = 0x5d25df42 1737 __ dci(0x45304821); // uqxtnb z1.h, z1.s 1738 // vl128 state = 0x87eb933f 1739 __ dci(0x45304823); // uqxtnb z3.h, z1.s 1740 // vl128 state = 0x137eddc9 1741 __ dci(0x45604822); // uqxtnb z2.s, z1.d 1742 // vl128 state = 0x26e237a3 1743 __ dci(0x45604d26); // uqxtnt z6.s, z9.d 1744 // vl128 state = 0x72bcf361 1745 __ dci(0x45304d2e); // uqxtnt z14.h, z9.s 1746 // vl128 state = 0x5bcdd232 1747 __ dci(0x45304d3e); // uqxtnt z30.h, z9.s 1748 // vl128 state = 0x9a695f7e 1749 __ dci(0x453049bc); // uqxtnb z28.h, z13.s 1750 // vl128 state = 0x9c2fa230 1751 __ dci(0x453049b8); // uqxtnb z24.h, z13.s 1752 // vl128 state = 0xb590179f 1753 __ dci(0x45304979); // uqxtnb z25.h, z11.s 1754 // vl128 state = 0xc8987735 1755 __ dci(0x4530497d); // uqxtnb z29.h, z11.s 1756 // vl128 state = 0x380f8730 1757 __ dci(0x4530496d); // uqxtnb z13.h, z11.s 1758 // vl128 state = 0x45bf22d4 1759 __ dci(0x45304565); // sqxtnt z5.h, z11.s 1760 // vl128 state = 0xd9237f41 1761 __ dci(0x45304f75); // uqxtnt z21.h, z27.s 1762 // vl128 state = 0x0726a49b 1763 __ dci(0x45304f71); // uqxtnt z17.h, z27.s 1764 // vl128 state = 0xcbc547e0 1765 __ dci(0x45304f73); // uqxtnt z19.h, z27.s 1766 // vl128 state = 0x0b16d843 1767 __ dci(0x45284f72); // uqxtnt z18.b, z27.h 1768 // vl128 state = 0xea84ff1f 1769 __ dci(0x45284f7a); // uqxtnt z26.b, z27.h 1770 // vl128 state = 0x4bdb094d 1771 __ dci(0x45284fca); // uqxtnt z10.b, z30.h 1772 // vl128 state = 0x5986f190 1773 __ dci(0x45284b8b); // uqxtnb z11.b, z28.h 1774 // vl128 state = 0xb40f0b26 1775 __ dci(0x45284bef); // uqxtnb z15.b, z31.h 1776 // vl128 state = 0x7abef2b5 1777 __ dci(0x45284fae); // uqxtnt z14.b, z29.h 1778 // vl128 state = 0x79503b36 1779 __ dci(0x45284fac); // uqxtnt z12.b, z29.h 1780 // vl128 state = 0x481a6879 1781 __ dci(0x45284eed); // uqxtnt z13.b, z23.h 1782 // vl128 state = 0x32da844c 1783 __ dci(0x45284ee9); // uqxtnt z9.b, z23.h 1784 // vl128 state = 0xb8438ca7 1785 __ dci(0x45284ef9); // uqxtnt z25.b, z23.h 1786 // vl128 state = 0x4aa26674 1787 __ dci(0x45284cd1); // uqxtnt z17.b, z6.h 1788 // vl128 state = 0xc5411d78 1789 __ dci(0x45284cd5); // uqxtnt z21.b, z6.h 1790 // vl128 state = 0xee446689 1791 __ dci(0x45284ad4); // uqxtnb z20.b, z22.h 1792 // vl128 state = 0x66ef53ef 1793 __ dci(0x45604adc); // uqxtnb z28.s, z22.d 1794 // vl128 state = 0xa894f4d4 1795 __ dci(0x45604ade); // uqxtnb z30.s, z22.d 1796 // vl128 state = 0x50215eb8 1797 __ dci(0x456040dc); // sqxtnb z28.s, z6.d 1798 // vl128 state = 0x5ee8464d 1799 __ dci(0x456048f4); // uqxtnb z20.s, z7.d 1800 // vl128 state = 0xee2ca07b 1801 __ dci(0x45604c75); // uqxtnt z21.s, z3.d 1802 // vl128 state = 0x0e81e7e0 1803 __ dci(0x45604cb1); // uqxtnt z17.s, z5.d 1804 // vl128 state = 0x5c448cac 1805 __ dci(0x45604e33); // uqxtnt z19.s, z17.d 1806 // vl128 state = 0xcd0d561e 1807 __ dci(0x45604e23); // uqxtnt z3.s, z17.d 1808 // vl128 state = 0x7b8b2204 1809 __ dci(0x45604cab); // uqxtnt z11.s, z5.d 1810 // vl128 state = 0x418cec7f 1811 __ dci(0x45604caa); // uqxtnt z10.s, z5.d 1812 // vl128 state = 0x37064bb6 1813 __ dci(0x45604efa); // uqxtnt z26.s, z23.d 1814 // vl128 state = 0xc83ef05d 1815 __ dci(0x456046db); // sqxtnt z27.s, z22.d 1816 // vl128 state = 0xe30a1f0f 1817 __ dci(0x456046da); // sqxtnt z26.s, z22.d 1818 // vl128 state = 0xe10b92fa 1819 __ dci(0x4560424a); // sqxtnb z10.s, z18.d 1820 // vl128 state = 0x2396410c 1821 __ dci(0x45604a08); // uqxtnb z8.s, z16.d 1822 // vl128 state = 0xf4ae5ad5 1823 __ dci(0x45304a00); // uqxtnb z0.h, z16.s 1824 // vl128 state = 0x26bbb3d1 1825 __ dci(0x45304828); // uqxtnb z8.h, z1.s 1826 // vl128 state = 0x57d91166 1827 __ dci(0x4530422c); // sqxtnb z12.h, z17.s 1828 // vl128 state = 0x5548e0b4 1829 __ dci(0x45305324); // sqxtunb z4.h, z25.s 1830 // vl128 state = 0xf7eb8d9c 1831 __ dci(0x45305325); // sqxtunb z5.h, z25.s 1832 // vl128 state = 0xcf294303 1833 __ dci(0x45305321); // sqxtunb z1.h, z25.s 1834 // vl128 state = 0x6c7597d6 1835 __ dci(0x453057a9); // sqxtunt z9.h, z29.s 1836 // vl128 state = 0xe7be4fd5 1837 __ dci(0x453043b9); // sqxtnb z25.h, z29.s 1838 // vl128 state = 0x376f3f76 1839 __ dci(0x453043bb); // sqxtnb z27.h, z29.s 1840 // vl128 state = 0xf8389159 1841 __ dci(0x4530431a); // sqxtnb z26.h, z24.s 1842 // vl128 state = 0x8ca15413 1843 __ dci(0x45304312); // sqxtnb z18.h, z24.s 1844 // vl128 state = 0x2a6d8b90 1845 __ dci(0x4530491a); // uqxtnb z26.h, z8.s 1846 // vl128 state = 0x7119ff0d 1847 __ dci(0x4530413b); // sqxtnb z27.h, z9.s 1848 // vl128 state = 0x884748db 1849 __ dci(0x4530482b); // uqxtnb z11.h, z1.s 1850 // vl128 state = 0x43296aec 1851 __ dci(0x4530483b); // uqxtnb z27.h, z1.s 1852 // vl128 state = 0xdb9908f0 1853 __ dci(0x45304979); // uqxtnb z25.h, z11.s 1854 // vl128 state = 0xef30bfc8 1855 __ dci(0x453049d1); // uqxtnb z17.h, z14.s 1856 // vl128 state = 0xb46173d8 1857 __ dci(0x456049d3); // uqxtnb z19.s, z14.d 1858 // vl128 state = 0xcb8c3b83 1859 } 1860 1861 uint32_t state; 1862 ComputeMachineStateHash(&masm, &state); 1863 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 1864 __ Ldr(w0, MemOperand(x0)); 1865 1866 END(); 1867 if (CAN_RUN()) { 1868 RUN(); 1869 uint32_t expected_hashes[] = { 1870 0xcb8c3b83, 1871 0x92fb7f98, 1872 0xb7ec6385, 1873 0x81de8602, 1874 0xd970d431, 1875 0x2fe61431, 1876 0x359b1355, 1877 0xdeec900e, 1878 0xfd0c7d7d, 1879 0x62e89b19, 1880 0x43039424, 1881 0xdd42efc9, 1882 0x861010f1, 1883 0x82d68f37, 1884 0x3761a1d0, 1885 0xbcf3c5c9, 1886 }; 1887 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 1888 } 1889} 1890 1891TEST_SVE(sve2_eorbt_eortb) { 1892 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 1893 CPUFeatures::kSVE2, 1894 CPUFeatures::kNEON, 1895 CPUFeatures::kCRC32); 1896 START(); 1897 1898 SetInitialMachineState(&masm); 1899 // state = 0xe2bd2480 1900 1901 { 1902 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 1903 __ dci(0x451892b8); // eorbt z24.b, z21.b, z24.b 1904 // vl128 state = 0xc3f2b082 1905 __ dci(0x455893ba); // eorbt z26.h, z29.h, z24.h 1906 // vl128 state = 0xc7421198 1907 __ dci(0x455892f8); // eorbt z24.h, z23.h, z24.h 1908 // vl128 state = 0x4e155b96 1909 __ dci(0x455092bc); // eorbt z28.h, z21.h, z16.h 1910 // vl128 state = 0x09393ad0 1911 __ dci(0x455893be); // eorbt z30.h, z29.h, z24.h 1912 // vl128 state = 0x6d660844 1913 __ dci(0x4558922e); // eorbt z14.h, z17.h, z24.h 1914 // vl128 state = 0x84f1ff20 1915 __ dci(0x45d892aa); // eorbt z10.d, z21.d, z24.d 1916 // vl128 state = 0x568612d4 1917 __ dci(0x454892a8); // eorbt z8.h, z21.h, z8.h 1918 // vl128 state = 0x699a3e24 1919 __ dci(0x45c890ac); // eorbt z12.d, z5.d, z8.d 1920 // vl128 state = 0x17bb6d9b 1921 __ dci(0x45c990ed); // eorbt z13.d, z7.d, z9.d 1922 // vl128 state = 0xee5be73f 1923 __ dci(0x45c892fd); // eorbt z29.d, z23.d, z8.d 1924 // vl128 state = 0x141c47ed 1925 __ dci(0x45c892f9); // eorbt z25.d, z23.d, z8.d 1926 // vl128 state = 0xc3259593 1927 __ dci(0x45c892f8); // eorbt z24.d, z23.d, z8.d 1928 // vl128 state = 0x3bca0bcc 1929 __ dci(0x45c892e8); // eorbt z8.d, z23.d, z8.d 1930 // vl128 state = 0x4714ab64 1931 __ dci(0x454a92ea); // eorbt z10.h, z23.h, z10.h 1932 // vl128 state = 0x51360c73 1933 __ dci(0x454092e2); // eorbt z2.h, z23.h, z0.h 1934 // vl128 state = 0xe33859fe 1935 __ dci(0x454092f2); // eorbt z18.h, z23.h, z0.h 1936 // vl128 state = 0xa0d81168 1937 __ dci(0x4550927a); // eorbt z26.h, z19.h, z16.h 1938 // vl128 state = 0xe4983274 1939 __ dci(0x4551923b); // eorbt z27.h, z17.h, z17.h 1940 // vl128 state = 0x8e89eab7 1941 __ dci(0x45d3923f); // eorbt z31.d, z17.d, z19.d 1942 // vl128 state = 0x472bd288 1943 __ dci(0x4553921d); // eorbt z29.h, z16.h, z19.h 1944 // vl128 state = 0x61090ed4 1945 __ dci(0x4553932d); // eorbt z13.h, z25.h, z19.h 1946 // vl128 state = 0x3ef228eb 1947 __ dci(0x4513912c); // eorbt z12.b, z9.b, z19.b 1948 // vl128 state = 0x96d4505c 1949 __ dci(0x4551912d); // eorbt z13.h, z9.h, z17.h 1950 // vl128 state = 0x1c32baef 1951 __ dci(0x45119029); // eorbt z9.b, z1.b, z17.b 1952 // vl128 state = 0xa138f554 1953 __ dci(0x45149028); // eorbt z8.b, z1.b, z20.b 1954 // vl128 state = 0xf0681d9a 1955 __ dci(0x459490aa); // eorbt z10.s, z5.s, z20.s 1956 // vl128 state = 0xbd4b30f5 1957 __ dci(0x458590a8); // eorbt z8.s, z5.s, z5.s 1958 // vl128 state = 0x45c5b437 1959 __ dci(0x4585948c); // eortb z12.s, z4.s, z5.s 1960 // vl128 state = 0x22f90a7b 1961 __ dci(0x45cd949c); // eortb z28.d, z4.d, z13.d 1962 // vl128 state = 0x5e4584ca 1963 __ dci(0x4589949d); // eortb z29.s, z4.s, z9.s 1964 // vl128 state = 0x65ac913e 1965 __ dci(0x458990ad); // eorbt z13.s, z5.s, z9.s 1966 // vl128 state = 0x4f13d973 1967 __ dci(0x459b90ac); // eorbt z12.s, z5.s, z27.s 1968 // vl128 state = 0xd13bb801 1969 __ dci(0x45db90ee); // eorbt z14.d, z7.d, z27.d 1970 // vl128 state = 0xf24115d0 1971 __ dci(0x45db916f); // eorbt z15.d, z11.d, z27.d 1972 // vl128 state = 0x04f38375 1973 __ dci(0x45db95e7); // eortb z7.d, z15.d, z27.d 1974 // vl128 state = 0xe1046ae5 1975 __ dci(0x45db94a3); // eortb z3.d, z5.d, z27.d 1976 // vl128 state = 0xaaeae67e 1977 __ dci(0x45dd94a1); // eortb z1.d, z5.d, z29.d 1978 // vl128 state = 0xd67f6823 1979 __ dci(0x45dd94b1); // eortb z17.d, z5.d, z29.d 1980 // vl128 state = 0xf172245b 1981 __ dci(0x45dd90f3); // eorbt z19.d, z7.d, z29.d 1982 // vl128 state = 0xc99195b8 1983 __ dci(0x458d90e3); // eorbt z3.s, z7.s, z13.s 1984 // vl128 state = 0xe1a146cf 1985 __ dci(0x458994e2); // eortb z2.s, z7.s, z9.s 1986 // vl128 state = 0x8038f273 1987 __ dci(0x458b94a3); // eortb z3.s, z5.s, z11.s 1988 // vl128 state = 0x50bda372 1989 __ dci(0x459b9481); // eortb z1.s, z4.s, z27.s 1990 // vl128 state = 0xe8d53012 1991 __ dci(0x455b9485); // eortb z5.h, z4.h, z27.h 1992 // vl128 state = 0xdba33ea5 1993 __ dci(0x454b9087); // eorbt z7.h, z4.h, z11.h 1994 // vl128 state = 0xff7f1815 1995 __ dci(0x45499003); // eorbt z3.h, z0.h, z9.h 1996 // vl128 state = 0x5d6e0104 1997 __ dci(0x454d9022); // eorbt z2.h, z1.h, z13.h 1998 // vl128 state = 0xe9161cfe 1999 __ dci(0x45099026); // eorbt z6.b, z1.b, z9.b 2000 // vl128 state = 0x48126fb9 2001 __ dci(0x454b9024); // eorbt z4.h, z1.h, z11.h 2002 // vl128 state = 0x53cbfc46 2003 } 2004 2005 uint32_t state; 2006 ComputeMachineStateHash(&masm, &state); 2007 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 2008 __ Ldr(w0, MemOperand(x0)); 2009 2010 END(); 2011 if (CAN_RUN()) { 2012 RUN(); 2013 uint32_t expected_hashes[] = { 2014 0x53cbfc46, 2015 0x0f81a01e, 2016 0xf97c4e96, 2017 0x745e9ed6, 2018 0x4487a0a1, 2019 0x7ad79509, 2020 0x53577280, 2021 0x1e589717, 2022 0xaaa96af0, 2023 0x4f2b0884, 2024 0x24d2cd1c, 2025 0x4d89438d, 2026 0x9b327a12, 2027 0xeabfd558, 2028 0xb63e33f1, 2029 0xebd7d9ca, 2030 }; 2031 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 2032 } 2033} 2034 2035TEST_SVE(sve2_saturating_multiply_add_high_vector) { 2036 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 2037 CPUFeatures::kSVE2, 2038 CPUFeatures::kNEON, 2039 CPUFeatures::kCRC32); 2040 START(); 2041 2042 SetInitialMachineState(&masm); 2043 // state = 0xe2bd2480 2044 2045 { 2046 ExactAssemblyScope scope(&masm, 40 * kInstructionSize); 2047 __ dci(0x44d9721a); // sqrdmlah z26.d, z16.d, z25.d 2048 // vl128 state = 0xc0474f3f 2049 __ dci(0x44dd761b); // sqrdmlsh z27.d, z16.d, z29.d 2050 // vl128 state = 0x102712ac 2051 __ dci(0x44d4760b); // sqrdmlsh z11.d, z16.d, z20.d 2052 // vl128 state = 0xe8666aa6 2053 __ dci(0x44947709); // sqrdmlsh z9.s, z24.s, z20.s 2054 // vl128 state = 0xdd18f643 2055 __ dci(0x4494770b); // sqrdmlsh z11.s, z24.s, z20.s 2056 // vl128 state = 0xac4a4d4c 2057 __ dci(0x44d4773b); // sqrdmlsh z27.d, z25.d, z20.d 2058 // vl128 state = 0x1a5447d4 2059 __ dci(0x44dc7639); // sqrdmlsh z25.d, z17.d, z28.d 2060 // vl128 state = 0xf547ac30 2061 __ dci(0x44dc763b); // sqrdmlsh z27.d, z17.d, z28.d 2062 // vl128 state = 0xb42d177a 2063 __ dci(0x44d4743f); // sqrdmlsh z31.d, z1.d, z20.d 2064 // vl128 state = 0xd0da2c6b 2065 __ dci(0x449c742f); // sqrdmlsh z15.s, z1.s, z28.s 2066 // vl128 state = 0xb24c8988 2067 __ dci(0x449c7487); // sqrdmlsh z7.s, z4.s, z28.s 2068 // vl128 state = 0x9e67ddac 2069 __ dci(0x449c7485); // sqrdmlsh z5.s, z4.s, z28.s 2070 // vl128 state = 0xd96b34e2 2071 __ dci(0x448e7481); // sqrdmlsh z1.s, z4.s, z14.s 2072 // vl128 state = 0x81d91007 2073 __ dci(0x448e7480); // sqrdmlsh z0.s, z4.s, z14.s 2074 // vl128 state = 0x901fa692 2075 __ dci(0x449c7488); // sqrdmlsh z8.s, z4.s, z28.s 2076 // vl128 state = 0xeedceee6 2077 __ dci(0x441c758a); // sqrdmlsh z10.b, z12.b, z28.b 2078 // vl128 state = 0x8dc4d389 2079 __ dci(0x441475ae); // sqrdmlsh z14.b, z13.b, z20.b 2080 // vl128 state = 0xb1711932 2081 __ dci(0x440075ac); // sqrdmlsh z12.b, z13.b, z0.b 2082 // vl128 state = 0x8cacf188 2083 __ dci(0x440171bc); // sqrdmlah z28.b, z13.b, z1.b 2084 // vl128 state = 0x9c8b9f4f 2085 __ dci(0x440171b8); // sqrdmlah z24.b, z13.b, z1.b 2086 // vl128 state = 0x562ebefa 2087 __ dci(0x441971b9); // sqrdmlah z25.b, z13.b, z25.b 2088 // vl128 state = 0x1ef60d31 2089 __ dci(0x440970bb); // sqrdmlah z27.b, z5.b, z9.b 2090 // vl128 state = 0x69bd18ee 2091 __ dci(0x441870ba); // sqrdmlah z26.b, z5.b, z24.b 2092 // vl128 state = 0x525b1f84 2093 __ dci(0x441270b8); // sqrdmlah z24.b, z5.b, z18.b 2094 // vl128 state = 0x3c7dadd8 2095 __ dci(0x44927090); // sqrdmlah z16.s, z4.s, z18.s 2096 // vl128 state = 0x276f0567 2097 __ dci(0x44937292); // sqrdmlah z18.s, z20.s, z19.s 2098 // vl128 state = 0x6f0f8bb4 2099 __ dci(0x4491721a); // sqrdmlah z26.s, z16.s, z17.s 2100 // vl128 state = 0x28eb737a 2101 __ dci(0x44d3721b); // sqrdmlah z27.d, z16.d, z19.d 2102 // vl128 state = 0xa3bd1133 2103 __ dci(0x44d372ab); // sqrdmlah z11.d, z21.d, z19.d 2104 // vl128 state = 0x6e81e8fd 2105 __ dci(0x44d372a3); // sqrdmlah z3.d, z21.d, z19.d 2106 // vl128 state = 0x55730750 2107 __ dci(0x445376a1); // sqrdmlsh z1.h, z21.h, z19.h 2108 // vl128 state = 0x7c7afd6d 2109 __ dci(0x44527685); // sqrdmlsh z5.h, z20.h, z18.h 2110 // vl128 state = 0x1c9dc1a1 2111 __ dci(0x44127495); // sqrdmlsh z21.b, z4.b, z18.b 2112 // vl128 state = 0xf2e07e92 2113 __ dci(0x44127794); // sqrdmlsh z20.b, z28.b, z18.b 2114 // vl128 state = 0xc5a2e589 2115 __ dci(0x44527695); // sqrdmlsh z21.h, z20.h, z18.h 2116 // vl128 state = 0x417df395 2117 __ dci(0x445274dd); // sqrdmlsh z29.h, z6.h, z18.h 2118 // vl128 state = 0x2e223308 2119 __ dci(0x445774df); // sqrdmlsh z31.h, z6.h, z23.h 2120 // vl128 state = 0x99047839 2121 __ dci(0x445775fe); // sqrdmlsh z30.h, z15.h, z23.h 2122 // vl128 state = 0x34a4be39 2123 __ dci(0x445175ff); // sqrdmlsh z31.h, z15.h, z17.h 2124 // vl128 state = 0x714b9d66 2125 __ dci(0x44517557); // sqrdmlsh z23.h, z10.h, z17.h 2126 // vl128 state = 0x2aa51ff4 2127 } 2128 2129 uint32_t state; 2130 ComputeMachineStateHash(&masm, &state); 2131 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 2132 __ Ldr(w0, MemOperand(x0)); 2133 2134 END(); 2135 if (CAN_RUN()) { 2136 RUN(); 2137 uint32_t expected_hashes[] = { 2138 0x2aa51ff4, 2139 0xde163ba0, 2140 0x8b237661, 2141 0x30086cf2, 2142 0xabf248f0, 2143 0xcc183608, 2144 0xa4103141, 2145 0x521ebe39, 2146 0xd746470e, 2147 0x141a51a4, 2148 0x695a47fd, 2149 0x0a74d701, 2150 0xd14bae63, 2151 0xf967aadb, 2152 0xdaed8896, 2153 0x7ba556cb, 2154 }; 2155 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 2156 } 2157} 2158 2159TEST_SVE(sve2_integer_pairwise_add_accumulate_long) { 2160 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 2161 CPUFeatures::kSVE2, 2162 CPUFeatures::kNEON, 2163 CPUFeatures::kCRC32); 2164 START(); 2165 2166 SetInitialMachineState(&masm); 2167 // state = 0xe2bd2480 2168 2169 { 2170 ExactAssemblyScope scope(&masm, 40 * kInstructionSize); 2171 __ dci(0x4445b4e3); // uadalp z3.h, p5/m, z7.b 2172 // vl128 state = 0x3ad015af 2173 __ dci(0x4445b4e1); // uadalp z1.h, p5/m, z7.b 2174 // vl128 state = 0x3f53978b 2175 __ dci(0x4445bc65); // uadalp z5.h, p7/m, z3.b 2176 // vl128 state = 0xf3340744 2177 __ dci(0x4445be35); // uadalp z21.h, p7/m, z17.b 2178 // vl128 state = 0xb6f81377 2179 __ dci(0x4445be9d); // uadalp z29.h, p7/m, z20.b 2180 // vl128 state = 0xaf772b37 2181 __ dci(0x4444bc9c); // sadalp z28.h, p7/m, z4.b 2182 // vl128 state = 0x591be304 2183 __ dci(0x4444bc9d); // sadalp z29.h, p7/m, z4.b 2184 // vl128 state = 0x406d9d34 2185 __ dci(0x4444ba99); // sadalp z25.h, p6/m, z20.b 2186 // vl128 state = 0xb455880f 2187 __ dci(0x44c4ba09); // sadalp z9.d, p6/m, z16.s 2188 // vl128 state = 0x5ef8e2ed 2189 __ dci(0x44c4ba01); // sadalp z1.d, p6/m, z16.s 2190 // vl128 state = 0xca2ccf0d 2191 __ dci(0x44c4ba11); // sadalp z17.d, p6/m, z16.s 2192 // vl128 state = 0x33bb9903 2193 __ dci(0x4484bb15); // sadalp z21.s, p6/m, z24.h 2194 // vl128 state = 0x3964a356 2195 __ dci(0x4484b957); // sadalp z23.s, p6/m, z10.h 2196 // vl128 state = 0x1e1426d2 2197 __ dci(0x4484b953); // sadalp z19.s, p6/m, z10.h 2198 // vl128 state = 0x83e2e1a6 2199 __ dci(0x4484b943); // sadalp z3.s, p6/m, z10.h 2200 // vl128 state = 0x24335149 2201 __ dci(0x4484b102); // sadalp z2.s, p4/m, z8.h 2202 // vl128 state = 0x8bde109a 2203 __ dci(0x4484bd06); // sadalp z6.s, p7/m, z8.h 2204 // vl128 state = 0x5abf30eb 2205 __ dci(0x4484bdc2); // sadalp z2.s, p7/m, z14.h 2206 // vl128 state = 0xcb199381 2207 __ dci(0x4485b5c6); // uadalp z6.s, p5/m, z14.h 2208 // vl128 state = 0x5f3819ad 2209 __ dci(0x4485b5c2); // uadalp z2.s, p5/m, z14.h 2210 // vl128 state = 0x5f6d69e4 2211 __ dci(0x4485b5ca); // uadalp z10.s, p5/m, z14.h 2212 // vl128 state = 0x1a0d7053 2213 __ dci(0x4485b15a); // uadalp z26.s, p4/m, z10.h 2214 // vl128 state = 0x9081b6cd 2215 __ dci(0x44c5b95e); // uadalp z30.d, p6/m, z10.s 2216 // vl128 state = 0x6b15107e 2217 __ dci(0x44c5a14e); // uadalp z14.d, p0/m, z10.s 2218 // vl128 state = 0x4a127dc2 2219 __ dci(0x4445a1c6); // uadalp z6.h, p0/m, z14.b 2220 // vl128 state = 0x06902399 2221 __ dci(0x4445a1ce); // uadalp z14.h, p0/m, z14.b 2222 // vl128 state = 0x1789be4a 2223 __ dci(0x4444a9de); // sadalp z30.h, p2/m, z14.b 2224 // vl128 state = 0x86732543 2225 __ dci(0x4444adff); // sadalp z31.h, p3/m, z15.b 2226 // vl128 state = 0xe326faef 2227 __ dci(0x4444bdb7); // sadalp z23.h, p7/m, z13.b 2228 // vl128 state = 0x46d5f328 2229 __ dci(0x4444bda7); // sadalp z7.h, p7/m, z13.b 2230 // vl128 state = 0x5cf7a973 2231 __ dci(0x4445bd25); // uadalp z5.h, p7/m, z9.b 2232 // vl128 state = 0xdf8cbb97 2233 __ dci(0x4485bd35); // uadalp z21.s, p7/m, z9.h 2234 // vl128 state = 0x330c3d35 2235 __ dci(0x4485bc17); // uadalp z23.s, p7/m, z0.h 2236 // vl128 state = 0x6ebfa4fe 2237 __ dci(0x4485bc15); // uadalp z21.s, p7/m, z0.h 2238 // vl128 state = 0x52f18385 2239 __ dci(0x4485be91); // uadalp z17.s, p7/m, z20.h 2240 // vl128 state = 0x82fa2d85 2241 __ dci(0x4485be53); // uadalp z19.s, p7/m, z18.h 2242 // vl128 state = 0xa7d6098b 2243 __ dci(0x4485aa52); // uadalp z18.s, p2/m, z18.h 2244 // vl128 state = 0xfe8faafa 2245 __ dci(0x4485ae13); // uadalp z19.s, p3/m, z16.h 2246 // vl128 state = 0xf2465f31 2247 __ dci(0x4485b617); // uadalp z23.s, p5/m, z16.h 2248 // vl128 state = 0xed6be8ed 2249 __ dci(0x4485bc13); // uadalp z19.s, p7/m, z0.h 2250 // vl128 state = 0xb2f95c3d 2251 } 2252 2253 uint32_t state; 2254 ComputeMachineStateHash(&masm, &state); 2255 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 2256 __ Ldr(w0, MemOperand(x0)); 2257 2258 END(); 2259 if (CAN_RUN()) { 2260 RUN(); 2261 uint32_t expected_hashes[] = { 2262 0xb2f95c3d, 2263 0xa4189170, 2264 0xed9e7f9e, 2265 0xfca732cb, 2266 0x4c94b2d7, 2267 0x92a2fb21, 2268 0xbca62a5c, 2269 0x9aec54d6, 2270 0x8df82b02, 2271 0x50c18764, 2272 0xd27e5a0e, 2273 0x1a538cc6, 2274 0x538b673e, 2275 0x37e4b499, 2276 0x7160cbd5, 2277 0x113951bc, 2278 }; 2279 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 2280 } 2281} 2282 2283TEST_SVE(sve2_pmul_mul_vector_unpredicated) { 2284 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 2285 CPUFeatures::kSVE2, 2286 CPUFeatures::kNEON, 2287 CPUFeatures::kCRC32); 2288 START(); 2289 2290 SetInitialMachineState(&masm); 2291 // state = 0xe2bd2480 2292 2293 { 2294 ExactAssemblyScope scope(&masm, 30 * kInstructionSize); 2295 __ dci(0x04a56309); // mul z9.s, z24.s, z5.s 2296 // vl128 state = 0x0ef461d5 2297 __ dci(0x04a56148); // mul z8.s, z10.s, z5.s 2298 // vl128 state = 0xce9f1381 2299 __ dci(0x04a161d8); // mul z24.s, z14.s, z1.s 2300 // vl128 state = 0x2a14ff8c 2301 __ dci(0x04a16179); // mul z25.s, z11.s, z1.s 2302 // vl128 state = 0x88a0241b 2303 __ dci(0x04b36171); // mul z17.s, z11.s, z19.s 2304 // vl128 state = 0x23aea8a6 2305 __ dci(0x04fb6170); // mul z16.d, z11.d, z27.d 2306 // vl128 state = 0x58eaa46d 2307 __ dci(0x04fb6171); // mul z17.d, z11.d, z27.d 2308 // vl128 state = 0xc733a399 2309 __ dci(0x04fb6350); // mul z16.d, z26.d, z27.d 2310 // vl128 state = 0x2806af41 2311 __ dci(0x04eb6372); // mul z18.d, z27.d, z11.d 2312 // vl128 state = 0x5ec775d1 2313 __ dci(0x04eb6376); // mul z22.d, z27.d, z11.d 2314 // vl128 state = 0x40d03f0d 2315 __ dci(0x04ed637e); // mul z30.d, z27.d, z13.d 2316 // vl128 state = 0xe3a61d56 2317 __ dci(0x04e8637f); // mul z31.d, z27.d, z8.d 2318 // vl128 state = 0x2eb4313f 2319 __ dci(0x04a86337); // mul z23.s, z25.s, z8.s 2320 // vl128 state = 0xc68e329e 2321 __ dci(0x04a86336); // mul z22.s, z25.s, z8.s 2322 // vl128 state = 0x177b1a43 2323 __ dci(0x04ac63be); // mul z30.s, z29.s, z12.s 2324 // vl128 state = 0xaaa415dd 2325 __ dci(0x04ac63d6); // mul z22.s, z30.s, z12.s 2326 // vl128 state = 0xaeb212b8 2327 __ dci(0x042c67d2); // pmul z18.b, z30.b, z12.b 2328 // vl128 state = 0xa11be1c8 2329 __ dci(0x042c65f3); // pmul z19.b, z15.b, z12.b 2330 // vl128 state = 0x8dd03a21 2331 __ dci(0x042e65d2); // pmul z18.b, z14.b, z14.b 2332 // vl128 state = 0x83ef9a66 2333 __ dci(0x042f6550); // pmul z16.b, z10.b, z15.b 2334 // vl128 state = 0x6a495368 2335 __ dci(0x042e6754); // pmul z20.b, z26.b, z14.b 2336 // vl128 state = 0x0b6c3ccf 2337 __ dci(0x042e6750); // pmul z16.b, z26.b, z14.b 2338 // vl128 state = 0xa745457f 2339 __ dci(0x042e6600); // pmul z0.b, z16.b, z14.b 2340 // vl128 state = 0x92fe8b9d 2341 __ dci(0x042e6602); // pmul z2.b, z16.b, z14.b 2342 // vl128 state = 0xda39ebe2 2343 __ dci(0x043f6600); // pmul z0.b, z16.b, z31.b 2344 // vl128 state = 0xcc36d223 2345 __ dci(0x042b6608); // pmul z8.b, z16.b, z11.b 2346 // vl128 state = 0x8b94d25a 2347 __ dci(0x042a6700); // pmul z0.b, z24.b, z10.b 2348 // vl128 state = 0x0118ccba 2349 __ dci(0x042a6710); // pmul z16.b, z24.b, z10.b 2350 // vl128 state = 0x4b38543b 2351 __ dci(0x042a6714); // pmul z20.b, z24.b, z10.b 2352 // vl128 state = 0xa54e126f 2353 __ dci(0x042a6716); // pmul z22.b, z24.b, z10.b 2354 // vl128 state = 0x61ad87c9 2355 } 2356 2357 uint32_t state; 2358 ComputeMachineStateHash(&masm, &state); 2359 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 2360 __ Ldr(w0, MemOperand(x0)); 2361 2362 END(); 2363 if (CAN_RUN()) { 2364 RUN(); 2365 uint32_t expected_hashes[] = { 2366 0x61ad87c9, 2367 0x82df488f, 2368 0xc0d7c1a4, 2369 0x4f86e761, 2370 0x8d651d7b, 2371 0x294cf55a, 2372 0x060ab34c, 2373 0x1db0e99c, 2374 0x4b0b59d7, 2375 0xcee6dfd1, 2376 0x29575669, 2377 0x5c1c7922, 2378 0x4b1957ed, 2379 0x8bc5712b, 2380 0x6ac59fdc, 2381 0x048ce1b5, 2382 }; 2383 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 2384 } 2385} 2386 2387TEST_SVE(sve2_smulh_umulh_vector_unpredicated) { 2388 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 2389 CPUFeatures::kSVE2, 2390 CPUFeatures::kNEON, 2391 CPUFeatures::kCRC32); 2392 START(); 2393 2394 SetInitialMachineState(&masm); 2395 // state = 0xe2bd2480 2396 2397 { 2398 ExactAssemblyScope scope(&masm, 30 * kInstructionSize); 2399 __ dci(0x04e46c3b); // umulh z27.d, z1.d, z4.d 2400 // vl128 state = 0xfb66ba83 2401 __ dci(0x04ac6c3a); // umulh z26.s, z1.s, z12.s 2402 // vl128 state = 0x45cdb9a2 2403 __ dci(0x04a86e32); // umulh z18.s, z17.s, z8.s 2404 // vl128 state = 0x4ad150dc 2405 __ dci(0x04a86a7a); // smulh z26.s, z19.s, z8.s 2406 // vl128 state = 0xbf08e2cb 2407 __ dci(0x04e86b7b); // smulh z27.d, z27.d, z8.d 2408 // vl128 state = 0x51ad0655 2409 __ dci(0x04ee6b73); // smulh z19.d, z27.d, z14.d 2410 // vl128 state = 0xf764bda9 2411 __ dci(0x04ec6f7b); // umulh z27.d, z27.d, z12.d 2412 // vl128 state = 0xc90f20ef 2413 __ dci(0x04ac6f3a); // umulh z26.s, z25.s, z12.s 2414 // vl128 state = 0x9ec08333 2415 __ dci(0x04ac6f32); // umulh z18.s, z25.s, z12.s 2416 // vl128 state = 0x3620406c 2417 __ dci(0x042e6f3a); // umulh z26.b, z25.b, z14.b 2418 // vl128 state = 0x4e18467a 2419 __ dci(0x042a6b2a); // smulh z10.b, z25.b, z10.b 2420 // vl128 state = 0x13c7cd6f 2421 __ dci(0x042a6b2b); // smulh z11.b, z25.b, z10.b 2422 // vl128 state = 0x16a44c1b 2423 __ dci(0x043a6b03); // smulh z3.b, z24.b, z26.b 2424 // vl128 state = 0x9f8f203b 2425 __ dci(0x047a690b); // smulh z11.h, z8.h, z26.h 2426 // vl128 state = 0xce0aa45e 2427 __ dci(0x047a690a); // smulh z10.h, z8.h, z26.h 2428 // vl128 state = 0xb667d59b 2429 __ dci(0x0479690e); // smulh z14.h, z8.h, z25.h 2430 // vl128 state = 0xd76639b7 2431 __ dci(0x046d690c); // smulh z12.h, z8.h, z13.h 2432 // vl128 state = 0x736b227e 2433 __ dci(0x042f690e); // smulh z14.b, z8.b, z15.b 2434 // vl128 state = 0xc0804df9 2435 __ dci(0x042f69ac); // smulh z12.b, z13.b, z15.b 2436 // vl128 state = 0x8a5509f5 2437 __ dci(0x042f696e); // smulh z14.b, z11.b, z15.b 2438 // vl128 state = 0x761f9cf8 2439 __ dci(0x042e6b6a); // smulh z10.b, z27.b, z14.b 2440 // vl128 state = 0x3b5f2705 2441 __ dci(0x042e6b6e); // smulh z14.b, z27.b, z14.b 2442 // vl128 state = 0x53b23a0a 2443 __ dci(0x04366b6f); // smulh z15.b, z27.b, z22.b 2444 // vl128 state = 0x5bd53ce9 2445 __ dci(0x04766f7f); // umulh z31.h, z27.h, z22.h 2446 // vl128 state = 0x701bec8f 2447 __ dci(0x04746fef); // umulh z15.h, z31.h, z20.h 2448 // vl128 state = 0x29697c8c 2449 __ dci(0x04706dee); // umulh z14.h, z15.h, z16.h 2450 // vl128 state = 0x2088f1c2 2451 __ dci(0x04706c7e); // umulh z30.h, z3.h, z16.h 2452 // vl128 state = 0x56224145 2453 __ dci(0x04306c2e); // umulh z14.b, z1.b, z16.b 2454 // vl128 state = 0x2ba58c9c 2455 __ dci(0x04b06e2a); // umulh z10.s, z17.s, z16.s 2456 // vl128 state = 0xb933d058 2457 __ dci(0x04b56e2e); // umulh z14.s, z17.s, z21.s 2458 // vl128 state = 0x184daee9 2459 } 2460 2461 uint32_t state; 2462 ComputeMachineStateHash(&masm, &state); 2463 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 2464 __ Ldr(w0, MemOperand(x0)); 2465 2466 END(); 2467 if (CAN_RUN()) { 2468 RUN(); 2469 uint32_t expected_hashes[] = { 2470 0x184daee9, 2471 0x19454232, 2472 0xa56823a3, 2473 0xe334897a, 2474 0xcaa988e1, 2475 0x614cbf4f, 2476 0xfaa384e4, 2477 0x4b45e885, 2478 0xef930ead, 2479 0x49304b9a, 2480 0x4f1d830e, 2481 0xa41c1a95, 2482 0xa1ea8d07, 2483 0x62ca97b4, 2484 0x15f52cac, 2485 0xc190cd57, 2486 }; 2487 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 2488 } 2489} 2490 2491TEST_SVE(sve2_arith_interleaved_long) { 2492 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 2493 CPUFeatures::kSVE2, 2494 CPUFeatures::kNEON, 2495 CPUFeatures::kCRC32); 2496 START(); 2497 2498 SetInitialMachineState(&masm); 2499 // state = 0xe2bd2480 2500 2501 { 2502 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 2503 __ dci(0x459289bd); // ssublbt z29.s, z13.h, z18.h 2504 // vl128 state = 0xe2e0965a 2505 __ dci(0x459289bf); // ssublbt z31.s, z13.h, z18.h 2506 // vl128 state = 0x64e3e1a3 2507 __ dci(0x45d689be); // ssublbt z30.d, z13.s, z22.s 2508 // vl128 state = 0x02711ec2 2509 __ dci(0x45d68916); // ssublbt z22.d, z8.s, z22.s 2510 // vl128 state = 0x7ff6f63f 2511 __ dci(0x45968957); // ssublbt z23.s, z10.h, z22.h 2512 // vl128 state = 0xa9aace7f 2513 __ dci(0x45968a55); // ssublbt z21.s, z18.h, z22.h 2514 // vl128 state = 0x6007d46c 2515 __ dci(0x45868251); // saddlbt z17.s, z18.h, z6.h 2516 // vl128 state = 0xecea329d 2517 __ dci(0x45868230); // saddlbt z16.s, z17.h, z6.h 2518 // vl128 state = 0xa16880b8 2519 __ dci(0x45868231); // saddlbt z17.s, z17.h, z6.h 2520 // vl128 state = 0xcff73a01 2521 __ dci(0x458c8235); // saddlbt z21.s, z17.h, z12.h 2522 // vl128 state = 0xf6486b24 2523 __ dci(0x458c8231); // saddlbt z17.s, z17.h, z12.h 2524 // vl128 state = 0xa5612e07 2525 __ dci(0x459c8021); // saddlbt z1.s, z1.h, z28.h 2526 // vl128 state = 0xd71ab1e8 2527 __ dci(0x458c8009); // saddlbt z9.s, z0.h, z12.h 2528 // vl128 state = 0xaf74bd16 2529 __ dci(0x459e800b); // saddlbt z11.s, z0.h, z30.h 2530 // vl128 state = 0x96dee616 2531 __ dci(0x45928003); // saddlbt z3.s, z0.h, z18.h 2532 // vl128 state = 0x652e9cca 2533 __ dci(0x45d28207); // saddlbt z7.d, z16.s, z18.s 2534 // vl128 state = 0xc6b07290 2535 __ dci(0x45da8225); // saddlbt z5.d, z17.s, z26.s 2536 // vl128 state = 0x8c74a35d 2537 __ dci(0x45da830d); // saddlbt z13.d, z24.s, z26.s 2538 // vl128 state = 0xff620001 2539 __ dci(0x45cb8309); // saddlbt z9.d, z24.s, z11.s 2540 // vl128 state = 0x2147f374 2541 __ dci(0x45ca8119); // saddlbt z25.d, z8.s, z10.s 2542 // vl128 state = 0x6f961936 2543 __ dci(0x45ce831d); // saddlbt z29.d, z24.s, z14.s 2544 // vl128 state = 0xaa91e68a 2545 __ dci(0x45ce8135); // saddlbt z21.d, z9.s, z14.s 2546 // vl128 state = 0xa5635d0e 2547 __ dci(0x458e8331); // saddlbt z17.s, z25.h, z14.h 2548 // vl128 state = 0xa0705ea7 2549 __ dci(0x458e8030); // saddlbt z16.s, z1.h, z14.h 2550 // vl128 state = 0x397dc4d5 2551 __ dci(0x458e8271); // saddlbt z17.s, z19.h, z14.h 2552 // vl128 state = 0x5e975082 2553 __ dci(0x458a82e1); // saddlbt z1.s, z23.h, z10.h 2554 // vl128 state = 0x048f8dea 2555 __ dci(0x458a8240); // saddlbt z0.s, z18.h, z10.h 2556 // vl128 state = 0xd9104514 2557 __ dci(0x458a8e50); // ssubltb z16.s, z18.h, z10.h 2558 // vl128 state = 0x6afbf8b6 2559 __ dci(0x45988e58); // ssubltb z24.s, z18.h, z24.h 2560 // vl128 state = 0xfe44a2f8 2561 __ dci(0x45d08e59); // ssubltb z25.d, z18.s, z16.s 2562 // vl128 state = 0x050fb0ab 2563 __ dci(0x45d08e58); // ssubltb z24.d, z18.s, z16.s 2564 // vl128 state = 0xc9160f61 2565 __ dci(0x45d08259); // saddlbt z25.d, z18.s, z16.s 2566 // vl128 state = 0x70ae0c4a 2567 __ dci(0x45d08b51); // ssublbt z17.d, z26.s, z16.s 2568 // vl128 state = 0xe627770c 2569 __ dci(0x45d08970); // ssublbt z16.d, z11.s, z16.s 2570 // vl128 state = 0x445fd924 2571 __ dci(0x45d28d74); // ssubltb z20.d, z11.s, z18.s 2572 // vl128 state = 0x8c7dd6c0 2573 __ dci(0x45c28d56); // ssubltb z22.d, z10.s, z2.s 2574 // vl128 state = 0x925de210 2575 __ dci(0x45c28d52); // ssubltb z18.d, z10.s, z2.s 2576 // vl128 state = 0x28b67c05 2577 __ dci(0x45c48d5a); // ssubltb z26.d, z10.s, z4.s 2578 // vl128 state = 0x48e8377c 2579 __ dci(0x45c18d5b); // ssubltb z27.d, z10.s, z1.s 2580 // vl128 state = 0xb46af33e 2581 __ dci(0x45818d13); // ssubltb z19.s, z8.h, z1.h 2582 // vl128 state = 0x12fada0b 2583 __ dci(0x45818d12); // ssubltb z18.s, z8.h, z1.h 2584 // vl128 state = 0xeaeea3cd 2585 __ dci(0x45858d9a); // ssubltb z26.s, z12.h, z5.h 2586 // vl128 state = 0x6d466bd8 2587 __ dci(0x45858df2); // ssubltb z18.s, z15.h, z5.h 2588 // vl128 state = 0x60c67411 2589 __ dci(0x45c58d62); // ssubltb z2.d, z11.s, z5.s 2590 // vl128 state = 0xec3b40ed 2591 __ dci(0x45c58b72); // ssublbt z18.d, z27.s, z5.s 2592 // vl128 state = 0x5b421b0a 2593 __ dci(0x45858a76); // ssublbt z22.s, z19.h, z5.h 2594 // vl128 state = 0x8a0f26e9 2595 __ dci(0x45878877); // ssublbt z23.s, z3.h, z7.h 2596 // vl128 state = 0xc224293b 2597 __ dci(0x458f8073); // saddlbt z19.s, z3.h, z15.h 2598 // vl128 state = 0x9f5c0b50 2599 __ dci(0x45878051); // saddlbt z17.s, z2.h, z7.h 2600 // vl128 state = 0x2ae674c9 2601 __ dci(0x45838841); // ssublbt z1.s, z2.h, z3.h 2602 // vl128 state = 0x1dff4e20 2603 } 2604 2605 uint32_t state; 2606 ComputeMachineStateHash(&masm, &state); 2607 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 2608 __ Ldr(w0, MemOperand(x0)); 2609 2610 END(); 2611 if (CAN_RUN()) { 2612 RUN(); 2613 uint32_t expected_hashes[] = { 2614 0x1dff4e20, 2615 0x3d2c11df, 2616 0x64caeccf, 2617 0x7940c227, 2618 0xf5f59485, 2619 0x7ad48c48, 2620 0xcde4523b, 2621 0xcb5849f0, 2622 0x1e7e9722, 2623 0x8049333f, 2624 0x40d95eb3, 2625 0x628a428d, 2626 0x1cf123f2, 2627 0x8d377510, 2628 0x44a03b91, 2629 0xabe90e98, 2630 }; 2631 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 2632 } 2633} 2634 2635TEST_SVE(sve2_sqabs_sqneg) { 2636 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 2637 CPUFeatures::kSVE2, 2638 CPUFeatures::kNEON, 2639 CPUFeatures::kCRC32); 2640 START(); 2641 2642 SetInitialMachineState(&masm); 2643 // state = 0xe2bd2480 2644 2645 { 2646 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 2647 __ dci(0x4448b23a); // sqabs z26.h, p4/m, z17.h 2648 // vl128 state = 0x4aadd589 2649 __ dci(0x4448b23e); // sqabs z30.h, p4/m, z17.h 2650 // vl128 state = 0x86da455e 2651 __ dci(0x4448a21c); // sqabs z28.h, p0/m, z16.h 2652 // vl128 state = 0x4eecab5c 2653 __ dci(0x4408a298); // sqabs z24.b, p0/m, z20.b 2654 // vl128 state = 0xf81ee16e 2655 __ dci(0x4408a0dc); // sqabs z28.b, p0/m, z6.b 2656 // vl128 state = 0x84b94ec5 2657 __ dci(0x4408a0de); // sqabs z30.b, p0/m, z6.b 2658 // vl128 state = 0x626db033 2659 __ dci(0x4408a19c); // sqabs z28.b, p0/m, z12.b 2660 // vl128 state = 0x181303a1 2661 __ dci(0x4408a3d4); // sqabs z20.b, p0/m, z30.b 2662 // vl128 state = 0xf4e93ff3 2663 __ dci(0x4489a3dc); // sqneg z28.s, p0/m, z30.s 2664 // vl128 state = 0xffe7a865 2665 __ dci(0x4409a1d4); // sqneg z20.b, p0/m, z14.b 2666 // vl128 state = 0x6a27d8fe 2667 __ dci(0x4408a3d0); // sqabs z16.b, p0/m, z30.b 2668 // vl128 state = 0x9ffc0414 2669 __ dci(0x44c8a3d8); // sqabs z24.d, p0/m, z30.d 2670 // vl128 state = 0xd59acd78 2671 __ dci(0x44c8b3fa); // sqabs z26.d, p4/m, z31.d 2672 // vl128 state = 0x8853f8ac 2673 __ dci(0x44c8a2fb); // sqabs z27.d, p0/m, z23.d 2674 // vl128 state = 0x439e9079 2675 __ dci(0x44c8a2f9); // sqabs z25.d, p0/m, z23.d 2676 // vl128 state = 0xbaaa56a6 2677 __ dci(0x4488a2db); // sqabs z27.s, p0/m, z22.s 2678 // vl128 state = 0x328cbd5a 2679 __ dci(0x4488a2df); // sqabs z31.s, p0/m, z22.s 2680 // vl128 state = 0x4a74b2da 2681 __ dci(0x4488a2cf); // sqabs z15.s, p0/m, z22.s 2682 // vl128 state = 0x52af62a6 2683 __ dci(0x4488a04b); // sqabs z11.s, p0/m, z2.s 2684 // vl128 state = 0xa45aef42 2685 __ dci(0x4488a02f); // sqabs z15.s, p0/m, z1.s 2686 // vl128 state = 0x0b5444ed 2687 __ dci(0x4489a06d); // sqneg z13.s, p0/m, z3.s 2688 // vl128 state = 0x6f0912d5 2689 __ dci(0x4489a449); // sqneg z9.s, p1/m, z2.s 2690 // vl128 state = 0x669ac78a 2691 __ dci(0x4489a50b); // sqneg z11.s, p1/m, z8.s 2692 // vl128 state = 0x58ae27ee 2693 __ dci(0x4488a71b); // sqabs z27.s, p1/m, z24.s 2694 // vl128 state = 0xa54925f9 2695 __ dci(0x4408a519); // sqabs z25.b, p1/m, z8.b 2696 // vl128 state = 0x45c13095 2697 __ dci(0x4408a158); // sqabs z24.b, p0/m, z10.b 2698 // vl128 state = 0x2d6d547a 2699 __ dci(0x4488a168); // sqabs z8.s, p0/m, z11.s 2700 // vl128 state = 0xc976b77b 2701 __ dci(0x44c9a16c); // sqneg z12.d, p0/m, z11.d 2702 // vl128 state = 0x766e750f 2703 __ dci(0x44c9a17c); // sqneg z28.d, p0/m, z11.d 2704 // vl128 state = 0xbf22858d 2705 __ dci(0x44c9a878); // sqneg z24.d, p2/m, z3.d 2706 // vl128 state = 0xe563a474 2707 __ dci(0x44c9a8d9); // sqneg z25.d, p2/m, z6.d 2708 // vl128 state = 0x573c2648 2709 __ dci(0x44c9b85b); // sqneg z27.d, p6/m, z2.d 2710 // vl128 state = 0x03cdf714 2711 __ dci(0x4449b87f); // sqneg z31.h, p6/m, z3.h 2712 // vl128 state = 0xff4e2cb1 2713 __ dci(0x4449b81d); // sqneg z29.h, p6/m, z0.h 2714 // vl128 state = 0xaab7065e 2715 __ dci(0x4449a895); // sqneg z21.h, p2/m, z4.h 2716 // vl128 state = 0x60d4a6d3 2717 __ dci(0x4449a825); // sqneg z5.h, p2/m, z1.h 2718 // vl128 state = 0x3bed34e4 2719 __ dci(0x4449a821); // sqneg z1.h, p2/m, z1.h 2720 // vl128 state = 0xaa750880 2721 __ dci(0x4449a820); // sqneg z0.h, p2/m, z1.h 2722 // vl128 state = 0xfca9d635 2723 __ dci(0x4449a822); // sqneg z2.h, p2/m, z1.h 2724 // vl128 state = 0x8a92f3e7 2725 __ dci(0x4449ae23); // sqneg z3.h, p3/m, z17.h 2726 // vl128 state = 0xc2db1ac5 2727 __ dci(0x4449af73); // sqneg z19.h, p3/m, z27.h 2728 // vl128 state = 0x386f5f27 2729 __ dci(0x4449af77); // sqneg z23.h, p3/m, z27.h 2730 // vl128 state = 0xff4fd505 2731 __ dci(0x4489af67); // sqneg z7.s, p3/m, z27.s 2732 // vl128 state = 0x4c897605 2733 __ dci(0x4489ad25); // sqneg z5.s, p3/m, z9.s 2734 // vl128 state = 0xcc73333a 2735 __ dci(0x4409ad07); // sqneg z7.b, p3/m, z8.b 2736 // vl128 state = 0x58d37b50 2737 __ dci(0x4489ad85); // sqneg z5.s, p3/m, z12.s 2738 // vl128 state = 0x2a142b9d 2739 __ dci(0x44c9a984); // sqneg z4.d, p2/m, z12.d 2740 // vl128 state = 0x006fd35a 2741 __ dci(0x44c9a926); // sqneg z6.d, p2/m, z9.d 2742 // vl128 state = 0x06c05c5d 2743 __ dci(0x4449ab2e); // sqneg z14.h, p2/m, z25.h 2744 // vl128 state = 0xe41a6fc4 2745 __ dci(0x4449ab3e); // sqneg z30.h, p2/m, z25.h 2746 // vl128 state = 0x6e574bec 2747 } 2748 2749 uint32_t state; 2750 ComputeMachineStateHash(&masm, &state); 2751 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 2752 __ Ldr(w0, MemOperand(x0)); 2753 2754 END(); 2755 if (CAN_RUN()) { 2756 RUN(); 2757 uint32_t expected_hashes[] = { 2758 0x6e574bec, 2759 0xec677945, 2760 0xe7357ba7, 2761 0xbbf92859, 2762 0x3f42d943, 2763 0xe2db0bb1, 2764 0x704d1161, 2765 0xc0e1f809, 2766 0x887dd5e7, 2767 0x452b8b80, 2768 0xcf455511, 2769 0x821ad0bc, 2770 0xb98b1eac, 2771 0x49ae6871, 2772 0x16b2e0a6, 2773 0xaba4d260, 2774 }; 2775 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 2776 } 2777} 2778 2779TEST_SVE(sve2_urecpe_ursqrte) { 2780 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 2781 CPUFeatures::kSVE2, 2782 CPUFeatures::kNEON, 2783 CPUFeatures::kCRC32); 2784 START(); 2785 2786 SetInitialMachineState(&masm); 2787 // state = 0xe2bd2480 2788 2789 { 2790 ExactAssemblyScope scope(&masm, 20 * kInstructionSize); 2791 __ dci(0x4481bee8); // ursqrte z8.s, p7/m, z23.s 2792 // vl128 state = 0x38c317d5 2793 __ dci(0x4480bea9); // urecpe z9.s, p7/m, z21.s 2794 // vl128 state = 0x8412e46d 2795 __ dci(0x4481bfab); // ursqrte z11.s, p7/m, z29.s 2796 // vl128 state = 0xae6c2805 2797 __ dci(0x4481b9a3); // ursqrte z3.s, p6/m, z13.s 2798 // vl128 state = 0x114331ab 2799 __ dci(0x4481aba2); // ursqrte z2.s, p2/m, z29.s 2800 // vl128 state = 0x88f2308d 2801 __ dci(0x4480abe6); // urecpe z6.s, p2/m, z31.s 2802 // vl128 state = 0x328b45b8 2803 __ dci(0x4480afa2); // urecpe z2.s, p3/m, z29.s 2804 // vl128 state = 0x7b67ded4 2805 __ dci(0x4480ae23); // urecpe z3.s, p3/m, z17.s 2806 // vl128 state = 0x48d1ac45 2807 __ dci(0x4481aa27); // ursqrte z7.s, p2/m, z17.s 2808 // vl128 state = 0x475f61b6 2809 __ dci(0x4481a325); // ursqrte z5.s, p0/m, z25.s 2810 // vl128 state = 0xfbf0b767 2811 __ dci(0x4481a321); // ursqrte z1.s, p0/m, z25.s 2812 // vl128 state = 0x31481484 2813 __ dci(0x4481ab05); // ursqrte z5.s, p2/m, z24.s 2814 // vl128 state = 0x5aca5e43 2815 __ dci(0x4481a995); // ursqrte z21.s, p2/m, z12.s 2816 // vl128 state = 0xe3b96378 2817 __ dci(0x4481bb91); // ursqrte z17.s, p6/m, z28.s 2818 // vl128 state = 0x9d469964 2819 __ dci(0x4481b199); // ursqrte z25.s, p4/m, z12.s 2820 // vl128 state = 0xbbabbb9d 2821 __ dci(0x4481a989); // ursqrte z9.s, p2/m, z12.s 2822 // vl128 state = 0xf83e651c 2823 __ dci(0x4481b18b); // ursqrte z11.s, p4/m, z12.s 2824 // vl128 state = 0x70a808da 2825 __ dci(0x4480b089); // urecpe z9.s, p4/m, z4.s 2826 // vl128 state = 0x427916ac 2827 __ dci(0x4480b2c1); // urecpe z1.s, p4/m, z22.s 2828 // vl128 state = 0xbf35be88 2829 __ dci(0x4480aad1); // urecpe z17.s, p2/m, z22.s 2830 // vl128 state = 0xaf69727b 2831 } 2832 2833 uint32_t state; 2834 ComputeMachineStateHash(&masm, &state); 2835 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 2836 __ Ldr(w0, MemOperand(x0)); 2837 2838 END(); 2839 if (CAN_RUN()) { 2840 RUN(); 2841 uint32_t expected_hashes[] = { 2842 0xaf69727b, 2843 0x7fda1a01, 2844 0xd299e078, 2845 0x9a794a84, 2846 0x47a453c1, 2847 0xecc67cf0, 2848 0x04122ec2, 2849 0x82dd5669, 2850 0xcb2bb910, 2851 0xcc73c54c, 2852 0x4660030f, 2853 0x7c42b056, 2854 0x498a73b1, 2855 0x1de89fad, 2856 0x5411c616, 2857 0x9f378bac, 2858 }; 2859 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 2860 } 2861} 2862 2863TEST_SVE(sve2_arith_long) { 2864 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 2865 CPUFeatures::kSVE2, 2866 CPUFeatures::kNEON, 2867 CPUFeatures::kCRC32); 2868 START(); 2869 2870 SetInitialMachineState(&masm); 2871 // state = 0xe2bd2480 2872 2873 { 2874 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 2875 __ dci(0x45573eac); // uabdlt z12.h, z21.b, z23.b 2876 // vl128 state = 0x2ee2e7d4 2877 __ dci(0x45573c84); // uabdlt z4.h, z4.b, z23.b 2878 // vl128 state = 0x33413c6f 2879 __ dci(0x45571d8c); // usublt z12.h, z12.b, z23.b 2880 // vl128 state = 0xb95ffb7e 2881 __ dci(0x45971d8e); // usublt z14.s, z12.h, z23.h 2882 // vl128 state = 0xac4d0015 2883 __ dci(0x45d7158c); // ssublt z12.d, z12.s, z23.s 2884 // vl128 state = 0xe5341703 2885 __ dci(0x4557119c); // ssublb z28.h, z12.b, z23.b 2886 // vl128 state = 0x744f8598 2887 __ dci(0x45d5118c); // ssublb z12.d, z12.s, z21.s 2888 // vl128 state = 0x120c8bf7 2889 __ dci(0x45551088); // ssublb z8.h, z4.b, z21.b 2890 // vl128 state = 0xbf53c9ed 2891 __ dci(0x455410cc); // ssublb z12.h, z6.b, z20.b 2892 // vl128 state = 0x2642a908 2893 __ dci(0x454414c8); // ssublt z8.h, z6.b, z4.b 2894 // vl128 state = 0x0682c7d0 2895 __ dci(0x454510c9); // ssublb z9.h, z6.b, z5.b 2896 // vl128 state = 0x1966420e 2897 __ dci(0x455510ed); // ssublb z13.h, z7.b, z21.b 2898 // vl128 state = 0xdd0ec707 2899 __ dci(0x455508ef); // uaddlb z15.h, z7.b, z21.b 2900 // vl128 state = 0x0756dbf9 2901 __ dci(0x455502e7); // saddlb z7.h, z23.b, z21.b 2902 // vl128 state = 0xb991e688 2903 __ dci(0x455d06f7); // saddlt z23.h, z23.b, z29.b 2904 // vl128 state = 0x55399de0 2905 __ dci(0x455f06df); // saddlt z31.h, z22.b, z31.b 2906 // vl128 state = 0x3379dce4 2907 __ dci(0x45de06db); // saddlt z27.d, z22.s, z30.s 2908 // vl128 state = 0xebf6b857 2909 __ dci(0x45c606da); // saddlt z26.d, z22.s, z6.s 2910 // vl128 state = 0x7625ec15 2911 __ dci(0x45c306db); // saddlt z27.d, z22.s, z3.s 2912 // vl128 state = 0x549988fd 2913 __ dci(0x455306d3); // saddlt z19.h, z22.b, z19.b 2914 // vl128 state = 0xb645cb0f 2915 __ dci(0x455306d1); // saddlt z17.h, z22.b, z19.b 2916 // vl128 state = 0x20a70427 2917 __ dci(0x455306d3); // saddlt z19.h, z22.b, z19.b 2918 // vl128 state = 0xd263ec78 2919 __ dci(0x45510edb); // uaddlt z27.h, z22.b, z17.b 2920 // vl128 state = 0xeecd9b44 2921 __ dci(0x45510bdf); // uaddlb z31.h, z30.b, z17.b 2922 // vl128 state = 0x0577c3d4 2923 __ dci(0x45d10b4f); // uaddlb z15.d, z26.s, z17.s 2924 // vl128 state = 0xca18b475 2925 __ dci(0x45810b47); // uaddlb z7.s, z26.h, z1.h 2926 // vl128 state = 0xdfe68417 2927 __ dci(0x45811bc3); // usublb z3.s, z30.h, z1.h 2928 // vl128 state = 0x96fe0360 2929 __ dci(0x45891b82); // usublb z2.s, z28.h, z9.h 2930 // vl128 state = 0x7e58a9d5 2931 __ dci(0x4589398a); // uabdlb z10.s, z12.h, z9.h 2932 // vl128 state = 0xd7612435 2933 __ dci(0x458919ab); // usublb z11.s, z13.h, z9.h 2934 // vl128 state = 0x8842dbca 2935 __ dci(0x45cb19af); // usublb z15.d, z13.s, z11.s 2936 // vl128 state = 0xfcac3d0f 2937 __ dci(0x45cb19bf); // usublb z31.d, z13.s, z11.s 2938 // vl128 state = 0x7b4952d6 2939 __ dci(0x45cb190f); // usublb z15.d, z8.s, z11.s 2940 // vl128 state = 0xb41cb8a3 2941 __ dci(0x45cb1d8d); // usublt z13.d, z12.s, z11.s 2942 // vl128 state = 0x9197543e 2943 __ dci(0x45cb1d89); // usublt z9.d, z12.s, z11.s 2944 // vl128 state = 0x3cc7e16c 2945 __ dci(0x454b0d8b); // uaddlt z11.h, z12.b, z11.b 2946 // vl128 state = 0x5c52744d 2947 __ dci(0x45cb1d8a); // usublt z10.d, z12.s, z11.s 2948 // vl128 state = 0x24c91c53 2949 __ dci(0x454f1d8e); // usublt z14.h, z12.b, z15.b 2950 // vl128 state = 0x0091f2f1 2951 __ dci(0x455b1d8f); // usublt z15.h, z12.b, z27.b 2952 // vl128 state = 0x521f94f7 2953 __ dci(0x455a1c87); // usublt z7.h, z4.b, z26.b 2954 // vl128 state = 0xa0631870 2955 __ dci(0x454a1cb7); // usublt z23.h, z5.b, z10.b 2956 // vl128 state = 0x089384c7 2957 __ dci(0x454218a7); // usublb z7.h, z5.b, z2.b 2958 // vl128 state = 0xe8c3c063 2959 __ dci(0x454a19a6); // usublb z6.h, z13.b, z10.b 2960 // vl128 state = 0x7a9f53ab 2961 __ dci(0x454a3da2); // uabdlt z2.h, z13.b, z10.b 2962 // vl128 state = 0x68d5f375 2963 __ dci(0x45423ca6); // uabdlt z6.h, z5.b, z2.b 2964 // vl128 state = 0x2c980ff7 2965 __ dci(0x454a34a7); // sabdlt z7.h, z5.b, z10.b 2966 // vl128 state = 0xe38196aa 2967 __ dci(0x454a3466); // sabdlt z6.h, z3.b, z10.b 2968 // vl128 state = 0x86c5bcb2 2969 __ dci(0x454b146e); // ssublt z14.h, z3.b, z11.b 2970 // vl128 state = 0xf8527375 2971 __ dci(0x454b146a); // ssublt z10.h, z3.b, z11.b 2972 // vl128 state = 0xf4bfb710 2973 __ dci(0x454b147a); // ssublt z26.h, z3.b, z11.b 2974 // vl128 state = 0xe1000ccf 2975 } 2976 2977 uint32_t state; 2978 ComputeMachineStateHash(&masm, &state); 2979 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 2980 __ Ldr(w0, MemOperand(x0)); 2981 2982 END(); 2983 if (CAN_RUN()) { 2984 RUN(); 2985 uint32_t expected_hashes[] = { 2986 0xe1000ccf, 2987 0xd320fd27, 2988 0x356a62d9, 2989 0xc6245994, 2990 0x78aeec8a, 2991 0xb5d0402b, 2992 0x06684b9e, 2993 0x6033f51d, 2994 0xd174ee86, 2995 0x80baaecc, 2996 0x2c9b263c, 2997 0x3fba551a, 2998 0x489fb8b7, 2999 0x862c9b27, 3000 0xc0549096, 3001 0xa927d570, 3002 }; 3003 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 3004 } 3005} 3006 3007TEST_SVE(sve2_arith_wide) { 3008 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 3009 CPUFeatures::kSVE2, 3010 CPUFeatures::kNEON, 3011 CPUFeatures::kCRC32); 3012 START(); 3013 3014 SetInitialMachineState(&masm); 3015 // state = 0xe2bd2480 3016 3017 { 3018 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 3019 __ dci(0x45494683); // saddwt z3.h, z20.h, z9.b 3020 // vl128 state = 0x9a3fc71a 3021 __ dci(0x45494687); // saddwt z7.h, z20.h, z9.b 3022 // vl128 state = 0xb016cb2f 3023 __ dci(0x454b46d7); // saddwt z23.h, z22.h, z11.b 3024 // vl128 state = 0x5ce3d8a0 3025 __ dci(0x455b56d5); // ssubwt z21.h, z22.h, z27.b 3026 // vl128 state = 0xbace5453 3027 __ dci(0x455b567d); // ssubwt z29.h, z19.h, z27.b 3028 // vl128 state = 0x1f510928 3029 __ dci(0x455b506d); // ssubwb z13.h, z3.h, z27.b 3030 // vl128 state = 0x19ea553e 3031 __ dci(0x4559502f); // ssubwb z15.h, z1.h, z25.b 3032 // vl128 state = 0x4d88e5db 3033 __ dci(0x45d95427); // ssubwt z7.d, z1.d, z25.s 3034 // vl128 state = 0x069804b6 3035 __ dci(0x45d95426); // ssubwt z6.d, z1.d, z25.s 3036 // vl128 state = 0xfe46cf10 3037 __ dci(0x45db5c36); // usubwt z22.d, z1.d, z27.s 3038 // vl128 state = 0xad3c8120 3039 __ dci(0x45d95d37); // usubwt z23.d, z9.d, z25.s 3040 // vl128 state = 0x833d76fb 3041 __ dci(0x45d55d27); // usubwt z7.d, z9.d, z21.s 3042 // vl128 state = 0xc536845d 3043 __ dci(0x45d44d25); // uaddwt z5.d, z9.d, z20.s 3044 // vl128 state = 0x21f5a29c 3045 __ dci(0x45dc4927); // uaddwb z7.d, z9.d, z28.s 3046 // vl128 state = 0xfe67da2a 3047 __ dci(0x455c490f); // uaddwb z15.h, z8.h, z28.b 3048 // vl128 state = 0x5ec5d506 3049 __ dci(0x455c490b); // uaddwb z11.h, z8.h, z28.b 3050 // vl128 state = 0x74b7d2fc 3051 __ dci(0x45584923); // uaddwb z3.h, z9.h, z24.b 3052 // vl128 state = 0xa785f3c3 3053 __ dci(0x45584922); // uaddwb z2.h, z9.h, z24.b 3054 // vl128 state = 0x373049c0 3055 __ dci(0x45584940); // uaddwb z0.h, z10.h, z24.b 3056 // vl128 state = 0xbf385483 3057 __ dci(0x45da4944); // uaddwb z4.d, z10.d, z26.s 3058 // vl128 state = 0x94cd3b86 3059 __ dci(0x45524945); // uaddwb z5.h, z10.h, z18.b 3060 // vl128 state = 0x8535094f 3061 __ dci(0x4540494d); // uaddwb z13.h, z10.h, z0.b 3062 // vl128 state = 0x328abbdb 3063 __ dci(0x45c04909); // uaddwb z9.d, z8.d, z0.s 3064 // vl128 state = 0x253064cb 3065 __ dci(0x45c8498d); // uaddwb z13.d, z12.d, z8.s 3066 // vl128 state = 0xa1b39fe0 3067 __ dci(0x45c0418f); // saddwb z15.d, z12.d, z0.s 3068 // vl128 state = 0xa72048d9 3069 __ dci(0x45d84187); // saddwb z7.d, z12.d, z24.s 3070 // vl128 state = 0x4c8a23ac 3071 __ dci(0x45dc5197); // ssubwb z23.d, z12.d, z28.s 3072 // vl128 state = 0x352a3d60 3073 __ dci(0x45dc5d93); // usubwt z19.d, z12.d, z28.s 3074 // vl128 state = 0x404b9e8b 3075 __ dci(0x45dd5592); // ssubwt z18.d, z12.d, z29.s 3076 // vl128 state = 0xf46cc758 3077 __ dci(0x45dd5550); // ssubwt z16.d, z10.d, z29.s 3078 // vl128 state = 0x171ebd36 3079 __ dci(0x45cd55d4); // ssubwt z20.d, z14.d, z13.s 3080 // vl128 state = 0x4f2ef46f 3081 __ dci(0x45dd5dd5); // usubwt z21.d, z14.d, z29.s 3082 // vl128 state = 0x0c9ab301 3083 __ dci(0x45dd5dc5); // usubwt z5.d, z14.d, z29.s 3084 // vl128 state = 0x67a10e22 3085 __ dci(0x454d5dd5); // usubwt z21.h, z14.h, z13.b 3086 // vl128 state = 0xb4bd21c0 3087 __ dci(0x454d4dfd); // uaddwt z29.h, z15.h, z13.b 3088 // vl128 state = 0x8df5f90f 3089 __ dci(0x45494fed); // uaddwt z13.h, z31.h, z9.b 3090 // vl128 state = 0x913f7aa4 3091 __ dci(0x45cb4fef); // uaddwt z15.d, z31.d, z11.s 3092 // vl128 state = 0xa23d1307 3093 __ dci(0x454b47ff); // saddwt z31.h, z31.h, z11.b 3094 // vl128 state = 0x026ff306 3095 __ dci(0x454747f7); // saddwt z23.h, z31.h, z7.b 3096 // vl128 state = 0x9abf0566 3097 __ dci(0x45c743f6); // saddwb z22.d, z31.d, z7.s 3098 // vl128 state = 0x27031d0e 3099 __ dci(0x45c74b66); // uaddwb z6.d, z27.d, z7.s 3100 // vl128 state = 0xc6f3a976 3101 __ dci(0x45474be4); // uaddwb z4.h, z31.h, z7.b 3102 // vl128 state = 0xededea24 3103 __ dci(0x454349e0); // uaddwb z0.h, z15.h, z3.b 3104 // vl128 state = 0xf1092d40 3105 __ dci(0x454359c1); // usubwb z1.h, z14.h, z3.b 3106 // vl128 state = 0x2d96f026 3107 __ dci(0x45535983); // usubwb z3.h, z12.h, z19.b 3108 // vl128 state = 0x5a9cab0c 3109 __ dci(0x45535981); // usubwb z1.h, z12.h, z19.b 3110 // vl128 state = 0x7f8d695f 3111 __ dci(0x45535a83); // usubwb z3.h, z20.h, z19.b 3112 // vl128 state = 0xb0ae0f62 3113 __ dci(0x45d35e81); // usubwt z1.d, z20.d, z19.s 3114 // vl128 state = 0xfe7e227b 3115 __ dci(0x45d25ec9); // usubwt z9.d, z22.d, z18.s 3116 // vl128 state = 0xed9dd734 3117 __ dci(0x45d35e88); // usubwt z8.d, z20.d, z19.s 3118 // vl128 state = 0x943f8d24 3119 } 3120 3121 uint32_t state; 3122 ComputeMachineStateHash(&masm, &state); 3123 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 3124 __ Ldr(w0, MemOperand(x0)); 3125 3126 END(); 3127 if (CAN_RUN()) { 3128 RUN(); 3129 uint32_t expected_hashes[] = { 3130 0x943f8d24, 3131 0xfe956248, 3132 0xfefddb40, 3133 0x4d92bfb3, 3134 0x01dcd5b1, 3135 0x29a23c92, 3136 0xb7587530, 3137 0xa56fa28c, 3138 0xa0f8590d, 3139 0xa6b883a4, 3140 0x2e50d1fd, 3141 0x8e976f55, 3142 0xb21bd3b1, 3143 0x0c3586e5, 3144 0xe3d7e7e6, 3145 0xb1e0e34f, 3146 }; 3147 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 3148 } 3149} 3150 3151TEST_SVE(sve2_shift_long) { 3152 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 3153 CPUFeatures::kSVE2, 3154 CPUFeatures::kNEON, 3155 CPUFeatures::kCRC32); 3156 START(); 3157 3158 SetInitialMachineState(&masm); 3159 // state = 0xe2bd2480 3160 3161 { 3162 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 3163 __ dci(0x4518aafc); // ushllb z28.s, z23.h, #8 3164 // vl128 state = 0x07dfb216 3165 __ dci(0x4518afec); // ushllt z12.s, z31.h, #8 3166 // vl128 state = 0xe3c5d68c 3167 __ dci(0x4518adc4); // ushllt z4.s, z14.h, #8 3168 // vl128 state = 0xce8721fc 3169 __ dci(0x4518a1c5); // sshllb z5.s, z14.h, #8 3170 // vl128 state = 0x71820bae 3171 __ dci(0x4508a9cd); // ushllb z13.h, z14.b, #0 3172 // vl128 state = 0xfdc3f7b3 3173 __ dci(0x4508ad9d); // ushllt z29.h, z12.b, #0 3174 // vl128 state = 0x93c1f606 3175 __ dci(0x4508a795); // sshllt z21.h, z28.b, #0 3176 // vl128 state = 0x15ebcb72 3177 __ dci(0x450caf94); // ushllt z20.h, z28.b, #4 3178 // vl128 state = 0x76c630f5 3179 __ dci(0x4508afd6); // ushllt z22.h, z30.b, #0 3180 // vl128 state = 0xa9c6dfbc 3181 __ dci(0x4509aed7); // ushllt z23.h, z22.b, #1 3182 // vl128 state = 0xa5942073 3183 __ dci(0x4508ae55); // ushllt z21.h, z18.b, #0 3184 // vl128 state = 0xe4348777 3185 __ dci(0x450cac51); // ushllt z17.h, z2.b, #4 3186 // vl128 state = 0x91c6e6ea 3187 __ dci(0x450ca870); // ushllb z16.h, z3.b, #4 3188 // vl128 state = 0x40393ae8 3189 __ dci(0x450ca031); // sshllb z17.h, z1.b, #4 3190 // vl128 state = 0x8b9526e8 3191 __ dci(0x450aa030); // sshllb z16.h, z1.b, #2 3192 // vl128 state = 0xd3d0857a 3193 __ dci(0x450aa031); // sshllb z17.h, z1.b, #2 3194 // vl128 state = 0xbdd18de2 3195 __ dci(0x450ba233); // sshllb z19.h, z17.b, #3 3196 // vl128 state = 0x5e5f6f2a 3197 __ dci(0x4509a263); // sshllb z3.h, z19.b, #1 3198 // vl128 state = 0xa3b5427b 3199 __ dci(0x450da673); // sshllt z19.h, z19.b, #5 3200 // vl128 state = 0x97472b22 3201 __ dci(0x451da477); // sshllt z23.s, z3.h, #13 3202 // vl128 state = 0xe6da4012 3203 __ dci(0x451da5f6); // sshllt z22.s, z15.h, #13 3204 // vl128 state = 0x11630552 3205 __ dci(0x450da5b4); // sshllt z20.h, z13.b, #5 3206 // vl128 state = 0xe9a4cad0 3207 __ dci(0x450da5d5); // sshllt z21.h, z14.b, #5 3208 // vl128 state = 0x750d4143 3209 __ dci(0x450fa4d7); // sshllt z23.h, z6.b, #7 3210 // vl128 state = 0xc441984c 3211 __ dci(0x451ba4df); // sshllt z31.s, z6.h, #11 3212 // vl128 state = 0x9a3899af 3213 __ dci(0x451ba4db); // sshllt z27.s, z6.h, #11 3214 // vl128 state = 0xbb6684bb 3215 __ dci(0x451ba4bf); // sshllt z31.s, z5.h, #11 3216 // vl128 state = 0x45a2cf1e 3217 __ dci(0x451aa49b); // sshllt z27.s, z4.h, #10 3218 // vl128 state = 0xac10df2f 3219 __ dci(0x451aa49f); // sshllt z31.s, z4.h, #10 3220 // vl128 state = 0x9cecdbd8 3221 __ dci(0x451aa89b); // ushllb z27.s, z4.h, #10 3222 // vl128 state = 0x73fca806 3223 __ dci(0x4518aa9f); // ushllb z31.s, z20.h, #8 3224 // vl128 state = 0xf58883fb 3225 __ dci(0x451aaab7); // ushllb z23.s, z21.h, #10 3226 // vl128 state = 0xf9476b16 3227 __ dci(0x4508aaa7); // ushllb z7.h, z21.b, #0 3228 // vl128 state = 0x6f65ea0e 3229 __ dci(0x4508ae2f); // ushllt z15.h, z17.b, #0 3230 // vl128 state = 0x574341e2 3231 __ dci(0x4509ac27); // ushllt z7.h, z1.b, #1 3232 // vl128 state = 0xe373d23c 3233 __ dci(0x450dae25); // ushllt z5.h, z17.b, #5 3234 // vl128 state = 0xc6ad882b 3235 __ dci(0x4509aea7); // ushllt z7.h, z21.b, #1 3236 // vl128 state = 0xfce8617d 3237 __ dci(0x4509adb7); // ushllt z23.h, z13.b, #1 3238 // vl128 state = 0x30f63baf 3239 __ dci(0x4549ade7); // ushllt z7.d, z15.s, #9 3240 // vl128 state = 0x20522e02 3241 __ dci(0x4549adf7); // ushllt z23.d, z15.s, #9 3242 // vl128 state = 0x18c6aade 3243 __ dci(0x4548aff6); // ushllt z22.d, z31.s, #8 3244 // vl128 state = 0x3ad49ec9 3245 __ dci(0x4548affe); // ushllt z30.d, z31.s, #8 3246 // vl128 state = 0x828be22f 3247 __ dci(0x4548adda); // ushllt z26.d, z14.s, #8 3248 // vl128 state = 0xb4997aa9 3249 __ dci(0x4544add2); // ushllt z18.d, z14.s, #4 3250 // vl128 state = 0x6e7feb55 3251 __ dci(0x454cad42); // ushllt z2.d, z10.s, #12 3252 // vl128 state = 0xb8ff410d 3253 __ dci(0x450dad40); // ushllt z0.h, z10.b, #5 3254 // vl128 state = 0x806bb38f 3255 __ dci(0x4515ad50); // ushllt z16.s, z10.h, #5 3256 // vl128 state = 0x6bd247ad 3257 __ dci(0x4557ad51); // ushllt z17.d, z10.s, #23 3258 // vl128 state = 0xc0959f27 3259 __ dci(0x4557ad41); // ushllt z1.d, z10.s, #23 3260 // vl128 state = 0xf0176482 3261 __ dci(0x4557ad40); // ushllt z0.d, z10.s, #23 3262 // vl128 state = 0xd5c958bf 3263 } 3264 3265 uint32_t state; 3266 ComputeMachineStateHash(&masm, &state); 3267 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 3268 __ Ldr(w0, MemOperand(x0)); 3269 3270 END(); 3271 if (CAN_RUN()) { 3272 RUN(); 3273 uint32_t expected_hashes[] = { 3274 0xd5c958bf, 3275 0xb7546431, 3276 0xee4f6b9f, 3277 0x74f31aeb, 3278 0x98282a7a, 3279 0xf2423509, 3280 0xe3ae7c5c, 3281 0xe544e7ba, 3282 0x7d52fba5, 3283 0x1520b68d, 3284 0xee539501, 3285 0x1a65ba45, 3286 0x0d4c2383, 3287 0x9f4a30c5, 3288 0xca6662a2, 3289 0x64dc5f23, 3290 }; 3291 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 3292 } 3293} 3294 3295TEST_SVE(sve2_shift_narrow) { 3296 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 3297 CPUFeatures::kSVE2, 3298 CPUFeatures::kNEON, 3299 CPUFeatures::kCRC32); 3300 START(); 3301 3302 SetInitialMachineState(&masm); 3303 // state = 0xe2bd2480 3304 3305 { 3306 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 3307 __ dci(0x456b1458); // shrnt z24.s, z2.d, #21 3308 // vl128 state = 0x70323182 3309 __ dci(0x456b145c); // shrnt z28.s, z2.d, #21 3310 // vl128 state = 0x1d620da3 3311 __ dci(0x45291454); // shrnt z20.b, z2.h, #7 3312 // vl128 state = 0x8e6d3a55 3313 __ dci(0x4539141c); // shrnt z28.h, z0.s, #7 3314 // vl128 state = 0xbc19c1cc 3315 __ dci(0x453914b8); // shrnt z24.h, z5.s, #7 3316 // vl128 state = 0x0bd4d1e8 3317 __ dci(0x453b14f9); // shrnt z25.h, z7.s, #5 3318 // vl128 state = 0x15622295 3319 __ dci(0x453315fd); // shrnt z29.h, z15.s, #13 3320 // vl128 state = 0x45bf3b94 3321 __ dci(0x45331d75); // rshrnt z21.h, z11.s, #13 3322 // vl128 state = 0xbb3574e6 3323 __ dci(0x45331945); // rshrnb z5.h, z10.s, #13 3324 // vl128 state = 0x7b72be5f 3325 __ dci(0x45331941); // rshrnb z1.h, z10.s, #13 3326 // vl128 state = 0x073cdf1a 3327 __ dci(0x45331949); // rshrnb z9.h, z10.s, #13 3328 // vl128 state = 0x3ecd1bf9 3329 __ dci(0x453b1979); // rshrnb z25.h, z11.s, #5 3330 // vl128 state = 0x19f7734e 3331 __ dci(0x453b11f1); // shrnb z17.h, z15.s, #5 3332 // vl128 state = 0x47a3f036 3333 __ dci(0x453711f9); // shrnb z25.h, z15.s, #9 3334 // vl128 state = 0xff283fe4 3335 __ dci(0x453315f8); // shrnt z24.h, z15.s, #13 3336 // vl128 state = 0x1c19f8fb 3337 __ dci(0x453319f0); // rshrnb z16.h, z15.s, #13 3338 // vl128 state = 0x3be08052 3339 __ dci(0x453b1972); // rshrnb z18.h, z11.s, #5 3340 // vl128 state = 0xc5ae76a0 3341 __ dci(0x453b1962); // rshrnb z2.h, z11.s, #5 3342 // vl128 state = 0x75ec3872 3343 __ dci(0x453b1c60); // rshrnt z0.h, z3.s, #5 3344 // vl128 state = 0x9b372229 3345 __ dci(0x45331c44); // rshrnt z4.h, z2.s, #13 3346 // vl128 state = 0xe4e22904 3347 __ dci(0x45371c0c); // rshrnt z12.h, z0.s, #9 3348 // vl128 state = 0x12bc6f4b 3349 __ dci(0x45331d08); // rshrnt z8.h, z8.s, #13 3350 // vl128 state = 0x3ef95245 3351 __ dci(0x45331c98); // rshrnt z24.h, z4.s, #13 3352 // vl128 state = 0x0a4a0d68 3353 __ dci(0x45731e99); // rshrnt z25.s, z20.d, #13 3354 // vl128 state = 0xa01ca6c8 3355 __ dci(0x457b1a98); // rshrnb z24.s, z20.d, #5 3356 // vl128 state = 0x73a50e30 3357 __ dci(0x452b1a9c); // rshrnb z28.b, z20.h, #5 3358 // vl128 state = 0xbad3deda 3359 __ dci(0x452b1818); // rshrnb z24.b, z0.h, #5 3360 // vl128 state = 0x579b3c8f 3361 __ dci(0x452b181a); // rshrnb z26.b, z0.h, #5 3362 // vl128 state = 0xa2b0bf7c 3363 __ dci(0x452b181b); // rshrnb z27.b, z0.h, #5 3364 // vl128 state = 0x7bebdf9e 3365 __ dci(0x45291a1a); // rshrnb z26.b, z16.h, #7 3366 // vl128 state = 0x3f90e1b7 3367 __ dci(0x45681a12); // rshrnb z18.s, z16.d, #24 3368 // vl128 state = 0x57e6295e 3369 __ dci(0x45681290); // shrnb z16.s, z20.d, #24 3370 // vl128 state = 0xa53f48b5 3371 __ dci(0x45281091); // shrnb z17.b, z4.h, #8 3372 // vl128 state = 0x65179ab4 3373 __ dci(0x45281401); // shrnt z1.b, z0.h, #8 3374 // vl128 state = 0x3cc490ba 3375 __ dci(0x45281c83); // rshrnt z3.b, z4.h, #8 3376 // vl128 state = 0x3bc34e69 3377 __ dci(0x45281c93); // rshrnt z19.b, z4.h, #8 3378 // vl128 state = 0x6dded0bb 3379 __ dci(0x45681cb7); // rshrnt z23.s, z5.d, #24 3380 // vl128 state = 0x378f83c0 3381 __ dci(0x45291cb6); // rshrnt z22.b, z5.h, #7 3382 // vl128 state = 0x7e4d1c44 3383 __ dci(0x45391eb2); // rshrnt z18.h, z21.s, #7 3384 // vl128 state = 0x66c0b784 3385 __ dci(0x45281ea2); // rshrnt z2.b, z21.h, #8 3386 // vl128 state = 0x62df2c82 3387 __ dci(0x452c1fa0); // rshrnt z0.b, z29.h, #4 3388 // vl128 state = 0xd79ee307 3389 __ dci(0x456c1ba2); // rshrnb z2.s, z29.d, #20 3390 // vl128 state = 0x8ebb2251 3391 __ dci(0x45641ab2); // rshrnb z18.s, z21.d, #28 3392 // vl128 state = 0x77ec053a 3393 __ dci(0x456c12ba); // shrnb z26.s, z21.d, #20 3394 // vl128 state = 0xcf94b608 3395 __ dci(0x452812b8); // shrnb z24.b, z21.h, #8 3396 // vl128 state = 0x3e067a62 3397 __ dci(0x4568123a); // shrnb z26.s, z17.d, #24 3398 // vl128 state = 0xe451de0f 3399 __ dci(0x456c1338); // shrnb z24.s, z25.d, #20 3400 // vl128 state = 0x4042d707 3401 __ dci(0x456813b9); // shrnb z25.s, z29.d, #24 3402 // vl128 state = 0x5184a2aa 3403 __ dci(0x456812e9); // shrnb z9.s, z23.d, #24 3404 // vl128 state = 0x246344b8 3405 __ dci(0x456812e1); // shrnb z1.s, z23.d, #24 3406 // vl128 state = 0x76866e79 3407 } 3408 3409 uint32_t state; 3410 ComputeMachineStateHash(&masm, &state); 3411 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 3412 __ Ldr(w0, MemOperand(x0)); 3413 3414 END(); 3415 if (CAN_RUN()) { 3416 RUN(); 3417 uint32_t expected_hashes[] = { 3418 0x76866e79, 3419 0x42b52927, 3420 0x84a0bfcc, 3421 0xf8226fc2, 3422 0x444f6df5, 3423 0x2f8dcd68, 3424 0x5a48278a, 3425 0x1cdd7f2f, 3426 0x7816d36c, 3427 0xebae972f, 3428 0xa02adfbe, 3429 0xc93cde0f, 3430 0xce43287b, 3431 0x777d6ce0, 3432 0x9d3be904, 3433 0x3e059dd2, 3434 }; 3435 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 3436 } 3437} 3438 3439TEST_SVE(sve2_shift_narrow_usat) { 3440 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 3441 CPUFeatures::kSVE2, 3442 CPUFeatures::kNEON, 3443 CPUFeatures::kCRC32); 3444 START(); 3445 3446 SetInitialMachineState(&masm); 3447 // state = 0xe2bd2480 3448 3449 { 3450 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 3451 __ dci(0x457a3207); // uqshrnb z7.s, z16.d, #6 3452 // vl128 state = 0x4b40d14e 3453 __ dci(0x457a3206); // uqshrnb z6.s, z16.d, #6 3454 // vl128 state = 0x4dbc0377 3455 __ dci(0x457a3204); // uqshrnb z4.s, z16.d, #6 3456 // vl128 state = 0xa6fbc7f9 3457 __ dci(0x457e3a14); // uqrshrnb z20.s, z16.d, #2 3458 // vl128 state = 0x9e9414a9 3459 __ dci(0x457b3a15); // uqrshrnb z21.s, z16.d, #5 3460 // vl128 state = 0xe8824afd 3461 __ dci(0x457b3ab7); // uqrshrnb z23.s, z21.d, #5 3462 // vl128 state = 0x81ce1be6 3463 __ dci(0x457b3ab6); // uqrshrnb z22.s, z21.d, #5 3464 // vl128 state = 0x5e343a1e 3465 __ dci(0x457f3af7); // uqrshrnb z23.s, z23.d, #1 3466 // vl128 state = 0x09a5c3a0 3467 __ dci(0x457b38ff); // uqrshrnb z31.s, z7.d, #5 3468 // vl128 state = 0xb50710bf 3469 __ dci(0x453338fe); // uqrshrnb z30.h, z7.s, #13 3470 // vl128 state = 0xfc719c85 3471 __ dci(0x453338ee); // uqrshrnb z14.h, z7.s, #13 3472 // vl128 state = 0x157d826a 3473 __ dci(0x453b386a); // uqrshrnb z10.h, z3.s, #5 3474 // vl128 state = 0x9c735771 3475 __ dci(0x452f386e); // uqrshrnb z14.b, z3.h, #1 3476 // vl128 state = 0xe03bb4a4 3477 __ dci(0x452f3aea); // uqrshrnb z10.b, z23.h, #1 3478 // vl128 state = 0xa841b415 3479 __ dci(0x452f38ba); // uqrshrnb z26.b, z5.h, #1 3480 // vl128 state = 0x55302a6d 3481 __ dci(0x452f3878); // uqrshrnb z24.b, z3.h, #1 3482 // vl128 state = 0x73bee182 3483 __ dci(0x453f385c); // uqrshrnb z28.h, z2.s, #1 3484 // vl128 state = 0x75f81ccc 3485 __ dci(0x453f397d); // uqrshrnb z29.h, z11.s, #1 3486 // vl128 state = 0x856fecc9 3487 __ dci(0x457d397c); // uqrshrnb z28.s, z11.d, #3 3488 // vl128 state = 0x4b144bf2 3489 __ dci(0x457f3878); // uqrshrnb z24.s, z3.d, #1 3490 // vl128 state = 0x7ea5dad3 3491 __ dci(0x457b3c7a); // uqrshrnt z26.s, z3.d, #5 3492 // vl128 state = 0xa7d48543 3493 __ dci(0x45633c72); // uqrshrnt z18.s, z3.d, #29 3494 // vl128 state = 0x18f647a7 3495 __ dci(0x45613d76); // uqrshrnt z22.s, z11.d, #31 3496 // vl128 state = 0x96d4081b 3497 __ dci(0x45693972); // uqrshrnb z18.s, z11.d, #23 3498 // vl128 state = 0xa8369e83 3499 __ dci(0x45693d53); // uqrshrnt z19.s, z10.d, #23 3500 // vl128 state = 0x7553ff55 3501 __ dci(0x45713d51); // uqrshrnt z17.s, z10.d, #15 3502 // vl128 state = 0x52a52ecc 3503 __ dci(0x45713d99); // uqrshrnt z25.s, z12.d, #15 3504 // vl128 state = 0x4de78f7b 3505 __ dci(0x45753f9d); // uqrshrnt z29.s, z28.d, #11 3506 // vl128 state = 0x0f8948cd 3507 __ dci(0x45753f8d); // uqrshrnt z13.s, z28.d, #11 3508 // vl128 state = 0x7f2c1b05 3509 __ dci(0x45753685); // uqshrnt z5.s, z20.d, #11 3510 // vl128 state = 0xbe6f6ea9 3511 __ dci(0x457d3784); // uqshrnt z4.s, z28.d, #3 3512 // vl128 state = 0x716e1acd 3513 __ dci(0x453c3785); // uqshrnt z5.h, z28.s, #4 3514 // vl128 state = 0x828a3cbb 3515 __ dci(0x453837a4); // uqshrnt z4.h, z29.s, #8 3516 // vl128 state = 0x125ddc3c 3517 __ dci(0x457a37a6); // uqshrnt z6.s, z29.d, #6 3518 // vl128 state = 0x8c5c5d4c 3519 __ dci(0x453a37e4); // uqshrnt z4.h, z31.s, #6 3520 // vl128 state = 0xdea9801f 3521 __ dci(0x453f37ec); // uqshrnt z12.h, z31.s, #1 3522 // vl128 state = 0x6caa6537 3523 __ dci(0x457f37dc); // uqshrnt z28.s, z30.d, #1 3524 // vl128 state = 0x66c0c05d 3525 __ dci(0x45773fde); // uqrshrnt z30.s, z30.d, #9 3526 // vl128 state = 0xf8d495e2 3527 __ dci(0x45653fda); // uqrshrnt z26.s, z30.d, #27 3528 // vl128 state = 0xb543c017 3529 __ dci(0x45613ffb); // uqrshrnt z27.s, z31.d, #31 3530 // vl128 state = 0x58a69fb4 3531 __ dci(0x45613feb); // uqrshrnt z11.s, z31.d, #31 3532 // vl128 state = 0xb5a04d48 3533 __ dci(0x45653fca); // uqrshrnt z10.s, z30.d, #27 3534 // vl128 state = 0xd2d445e0 3535 __ dci(0x45753fe8); // uqrshrnt z8.s, z31.d, #11 3536 // vl128 state = 0x67d89d28 3537 __ dci(0x457537ca); // uqshrnt z10.s, z30.d, #11 3538 // vl128 state = 0xcaa2b6dc 3539 __ dci(0x457d35ce); // uqshrnt z14.s, z14.d, #3 3540 // vl128 state = 0x9da6b10f 3541 __ dci(0x452d35de); // uqshrnt z30.b, z14.h, #3 3542 // vl128 state = 0xda8663db 3543 __ dci(0x452d314e); // uqshrnb z14.b, z10.h, #3 3544 // vl128 state = 0x761992a9 3545 __ dci(0x453d304f); // uqshrnb z15.h, z2.s, #3 3546 // vl128 state = 0x71587e6a 3547 __ dci(0x453d386e); // uqrshrnb z14.h, z3.s, #3 3548 // vl128 state = 0xc6118398 3549 __ dci(0x453538ec); // uqrshrnb z12.h, z7.s, #11 3550 // vl128 state = 0x5e542c3a 3551 } 3552 3553 uint32_t state; 3554 ComputeMachineStateHash(&masm, &state); 3555 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 3556 __ Ldr(w0, MemOperand(x0)); 3557 3558 END(); 3559 if (CAN_RUN()) { 3560 RUN(); 3561 uint32_t expected_hashes[] = { 3562 0x5e542c3a, 3563 0xd9128c5a, 3564 0x73f430ed, 3565 0x160c07da, 3566 0x7bff9561, 3567 0x4b2d6335, 3568 0x3738197c, 3569 0x2b624a48, 3570 0xbb257999, 3571 0x0d5d8614, 3572 0xb031d1fc, 3573 0x60f2fce2, 3574 0x92770ad6, 3575 0x6e33aa78, 3576 0x8752089b, 3577 0x37b56a40, 3578 }; 3579 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 3580 } 3581} 3582 3583TEST_SVE(sve2_shift_narrow_ssat) { 3584 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 3585 CPUFeatures::kSVE2, 3586 CPUFeatures::kNEON, 3587 CPUFeatures::kCRC32); 3588 START(); 3589 3590 SetInitialMachineState(&masm); 3591 // state = 0xe2bd2480 3592 3593 { 3594 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 3595 __ dci(0x456c0875); // sqrshrunb z21.s, z3.d, #20 3596 // vl128 state = 0x1446427d 3597 __ dci(0x456c0877); // sqrshrunb z23.s, z3.d, #20 3598 // vl128 state = 0xd839ea94 3599 __ dci(0x456c0876); // sqrshrunb z22.s, z3.d, #20 3600 // vl128 state = 0xe4dd3104 3601 __ dci(0x456e0c77); // sqrshrunt z23.s, z3.d, #18 3602 // vl128 state = 0xd86dd8aa 3603 __ dci(0x456e0a73); // sqrshrunb z19.s, z19.d, #18 3604 // vl128 state = 0x7aacf973 3605 __ dci(0x456c0e72); // sqrshrunt z18.s, z19.d, #20 3606 // vl128 state = 0x6e7b28b8 3607 __ dci(0x456c2c62); // sqrshrnt z2.s, z3.d, #20 3608 // vl128 state = 0x242e0a5e 3609 __ dci(0x456c24f2); // sqshrnt z18.s, z7.d, #20 3610 // vl128 state = 0xf9c993ec 3611 __ dci(0x456c2570); // sqshrnt z16.s, z11.d, #20 3612 // vl128 state = 0x087c4fc1 3613 __ dci(0x456e2478); // sqshrnt z24.s, z3.d, #18 3614 // vl128 state = 0x33fdae0c 3615 __ dci(0x456e2c30); // sqrshrnt z16.s, z1.d, #18 3616 // vl128 state = 0x0c957ea2 3617 __ dci(0x456e2d78); // sqrshrnt z24.s, z11.d, #18 3618 // vl128 state = 0x0792e58a 3619 __ dci(0x456f2970); // sqrshrnb z16.s, z11.d, #17 3620 // vl128 state = 0xe7169693 3621 __ dci(0x456b2938); // sqrshrnb z24.s, z9.d, #21 3622 // vl128 state = 0x1372a92d 3623 __ dci(0x45692979); // sqrshrnb z25.s, z11.d, #23 3624 // vl128 state = 0xc1c31387 3625 __ dci(0x4563297d); // sqrshrnb z29.s, z11.d, #29 3626 // vl128 state = 0x50a08538 3627 __ dci(0x45632975); // sqrshrnb z21.s, z11.d, #29 3628 // vl128 state = 0xda962f25 3629 __ dci(0x456309f1); // sqrshrunb z17.s, z15.d, #29 3630 // vl128 state = 0xe149814e 3631 __ dci(0x457308f3); // sqrshrunb z19.s, z7.d, #13 3632 // vl128 state = 0x6d5ea38b 3633 __ dci(0x457329fb); // sqrshrnb z27.s, z15.d, #13 3634 // vl128 state = 0xee932acb 3635 __ dci(0x457721f3); // sqshrnb z19.s, z15.d, #9 3636 // vl128 state = 0x7e05914b 3637 __ dci(0x45732171); // sqshrnb z17.s, z11.d, #13 3638 // vl128 state = 0xe4bf82a4 3639 __ dci(0x45722070); // sqshrnb z16.s, z3.d, #14 3640 // vl128 state = 0xdfc01530 3641 __ dci(0x456a2078); // sqshrnb z24.s, z3.d, #22 3642 // vl128 state = 0x6b48fc15 3643 __ dci(0x452a287c); // sqrshrnb z28.b, z3.h, #6 3644 // vl128 state = 0x45e86048 3645 __ dci(0x45282c78); // sqrshrnt z24.b, z3.h, #8 3646 // vl128 state = 0xb8dc83dd 3647 __ dci(0x45602c68); // sqrshrnt z8.s, z3.d, #32 3648 // vl128 state = 0xda536cf8 3649 __ dci(0x45602678); // sqshrnt z24.s, z19.d, #32 3650 // vl128 state = 0xb548f79b 3651 __ dci(0x45682e70); // sqrshrnt z16.s, z19.d, #24 3652 // vl128 state = 0xd564dd2d 3653 __ dci(0x45682260); // sqshrnb z0.s, z19.d, #24 3654 // vl128 state = 0x7b901f9b 3655 __ dci(0x45682642); // sqshrnt z2.s, z18.d, #24 3656 // vl128 state = 0x1d4fe6f4 3657 __ dci(0x45680606); // sqshrunt z6.s, z16.d, #24 3658 // vl128 state = 0xe82d65a2 3659 __ dci(0x45680282); // sqshrunb z2.s, z20.d, #24 3660 // vl128 state = 0x8a1ae6f6 3661 __ dci(0x45680283); // sqshrunb z3.s, z20.d, #24 3662 // vl128 state = 0x5e345dcf 3663 __ dci(0x4568238b); // sqshrnb z11.s, z28.d, #24 3664 // vl128 state = 0x31f54470 3665 __ dci(0x45682383); // sqshrnb z3.s, z28.d, #24 3666 // vl128 state = 0x6b48975d 3667 __ dci(0x45682682); // sqshrnt z2.s, z20.d, #24 3668 // vl128 state = 0xa9fba153 3669 __ dci(0x45782e8a); // sqrshrnt z10.s, z20.d, #8 3670 // vl128 state = 0x0fe3100f 3671 __ dci(0x45780eba); // sqrshrunt z26.s, z21.d, #8 3672 // vl128 state = 0x1a392151 3673 __ dci(0x45700e32); // sqrshrunt z18.s, z17.d, #16 3674 // vl128 state = 0x08cea935 3675 __ dci(0x45700e42); // sqrshrunt z2.s, z18.d, #16 3676 // vl128 state = 0x353f24b1 3677 __ dci(0x45782e52); // sqrshrnt z18.s, z18.d, #8 3678 // vl128 state = 0xe06219d0 3679 __ dci(0x45782e42); // sqrshrnt z2.s, z18.d, #8 3680 // vl128 state = 0xbb4c6d3b 3681 __ dci(0x45742e46); // sqrshrnt z6.s, z18.d, #12 3682 // vl128 state = 0x77e7393c 3683 __ dci(0x45642ec7); // sqrshrnt z7.s, z22.d, #28 3684 // vl128 state = 0x5201634c 3685 __ dci(0x45642a97); // sqrshrnb z23.s, z20.d, #28 3686 // vl128 state = 0x49c32fc1 3687 __ dci(0x45640b87); // sqrshrunb z7.s, z28.d, #28 3688 // vl128 state = 0xdd09d56d 3689 __ dci(0x45640f0f); // sqrshrunt z15.s, z24.d, #28 3690 // vl128 state = 0x50f7d144 3691 __ dci(0x45600e0e); // sqrshrunt z14.s, z16.d, #32 3692 // vl128 state = 0xd6bbd38a 3693 __ dci(0x45620a0f); // sqrshrunb z15.s, z16.d, #30 3694 // vl128 state = 0x141e2991 3695 } 3696 3697 uint32_t state; 3698 ComputeMachineStateHash(&masm, &state); 3699 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 3700 __ Ldr(w0, MemOperand(x0)); 3701 3702 END(); 3703 if (CAN_RUN()) { 3704 RUN(); 3705 uint32_t expected_hashes[] = { 3706 0x141e2991, 3707 0x8cb951d0, 3708 0x74337526, 3709 0x515534c6, 3710 0xe3789189, 3711 0xfee7d505, 3712 0xfaae7ee8, 3713 0x71a110a3, 3714 0x6469dcda, 3715 0xe61425fc, 3716 0x6840f618, 3717 0xbc1b116d, 3718 0xaad97378, 3719 0x5d91b661, 3720 0x9eb84163, 3721 0xf8ca1e37, 3722 }; 3723 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 3724 } 3725} 3726 3727TEST_SVE(sve2_aba_long) { 3728 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 3729 CPUFeatures::kSVE2, 3730 CPUFeatures::kNEON, 3731 CPUFeatures::kCRC32); 3732 START(); 3733 3734 SetInitialMachineState(&masm); 3735 // state = 0xe2bd2480 3736 3737 { 3738 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 3739 __ dci(0x45c2ca3e); // uabalb z30.d, z17.s, z2.s 3740 // vl128 state = 0xac47a81c 3741 __ dci(0x45caca7f); // uabalb z31.d, z19.s, z10.s 3742 // vl128 state = 0x10cd4e69 3743 __ dci(0x455aca7e); // uabalb z30.h, z19.b, z26.b 3744 // vl128 state = 0x8fba3755 3745 __ dci(0x45daca5f); // uabalb z31.d, z18.s, z26.s 3746 // vl128 state = 0x8c18257c 3747 __ dci(0x45d8ca1d); // uabalb z29.d, z16.s, z24.s 3748 // vl128 state = 0xe6eef5ec 3749 __ dci(0x45d8ce95); // uabalt z21.d, z20.s, z24.s 3750 // vl128 state = 0x2368baee 3751 __ dci(0x4598ce14); // uabalt z20.s, z16.h, z24.h 3752 // vl128 state = 0xc9281174 3753 __ dci(0x4598ce04); // uabalt z4.s, z16.h, z24.h 3754 // vl128 state = 0xa0b5fc24 3755 __ dci(0x45d8ce40); // uabalt z0.d, z18.s, z24.s 3756 // vl128 state = 0xb3ef6f1d 3757 __ dci(0x45daca44); // uabalb z4.d, z18.s, z26.s 3758 // vl128 state = 0xcfa3666b 3759 __ dci(0x45dace00); // uabalt z0.d, z16.s, z26.s 3760 // vl128 state = 0x27bb4ba9 3761 __ dci(0x459ece04); // uabalt z4.s, z16.h, z30.h 3762 // vl128 state = 0xb6628d3e 3763 __ dci(0x458ece80); // uabalt z0.s, z20.h, z14.h 3764 // vl128 state = 0xe8db526e 3765 __ dci(0x458ec482); // sabalt z2.s, z4.h, z14.h 3766 // vl128 state = 0x73cd8386 3767 __ dci(0x45cec4a3); // sabalt z3.d, z5.s, z14.s 3768 // vl128 state = 0xba1c4507 3769 __ dci(0x45cec8a1); // uabalb z1.d, z5.s, z14.s 3770 // vl128 state = 0x851cd798 3771 __ dci(0x458ec0a9); // sabalb z9.s, z5.h, z14.h 3772 // vl128 state = 0xc85973b8 3773 __ dci(0x45c6c0ab); // sabalb z11.d, z5.s, z6.s 3774 // vl128 state = 0x84072419 3775 __ dci(0x4544c0a9); // sabalb z9.h, z5.b, z4.b 3776 // vl128 state = 0x533a377a 3777 __ dci(0x4550c0a1); // sabalb z1.h, z5.b, z16.b 3778 // vl128 state = 0x5a216f3a 3779 __ dci(0x4550c0b1); // sabalb z17.h, z5.b, z16.b 3780 // vl128 state = 0x9957b992 3781 __ dci(0x4552c095); // sabalb z21.h, z4.b, z18.b 3782 // vl128 state = 0x666bd8db 3783 __ dci(0x4543c094); // sabalb z20.h, z4.b, z3.b 3784 // vl128 state = 0xd66d3d52 3785 __ dci(0x4543c095); // sabalb z21.h, z4.b, z3.b 3786 // vl128 state = 0x5d47b643 3787 __ dci(0x4543c385); // sabalb z5.h, z28.b, z3.b 3788 // vl128 state = 0x55fc0a65 3789 __ dci(0x4543c38d); // sabalb z13.h, z28.b, z3.b 3790 // vl128 state = 0xbb5ccc0f 3791 __ dci(0x45c3c19d); // sabalb z29.d, z12.s, z3.s 3792 // vl128 state = 0xb3dedffd 3793 __ dci(0x45d3c595); // sabalt z21.d, z12.s, z19.s 3794 // vl128 state = 0xd80597a1 3795 __ dci(0x45d2c185); // sabalb z5.d, z12.s, z18.s 3796 // vl128 state = 0x29a9fafc 3797 __ dci(0x45d2c0b5); // sabalb z21.d, z5.s, z18.s 3798 // vl128 state = 0x85dc16cb 3799 __ dci(0x45d2c0bd); // sabalb z29.d, z5.s, z18.s 3800 // vl128 state = 0xc38b621d 3801 __ dci(0x45d2cab9); // uabalb z25.d, z21.s, z18.s 3802 // vl128 state = 0x3801ad51 3803 __ dci(0x45d0ca9b); // uabalb z27.d, z20.s, z16.s 3804 // vl128 state = 0xd5cc0a31 3805 __ dci(0x45d0ca39); // uabalb z25.d, z17.s, z16.s 3806 // vl128 state = 0x272488a9 3807 __ dci(0x45d0ca3d); // uabalb z29.d, z17.s, z16.s 3808 // vl128 state = 0xea109c4b 3809 __ dci(0x4550ce3c); // uabalt z28.h, z17.b, z16.b 3810 // vl128 state = 0x5a9bdb39 3811 __ dci(0x4559ce38); // uabalt z24.h, z17.b, z25.b 3812 // vl128 state = 0xd90984c9 3813 __ dci(0x455bcf39); // uabalt z25.h, z25.b, z27.b 3814 // vl128 state = 0x6c0884ed 3815 __ dci(0x455bceb1); // uabalt z17.h, z21.b, z27.b 3816 // vl128 state = 0x2f01a6ad 3817 __ dci(0x455bceb3); // uabalt z19.h, z21.b, z27.b 3818 // vl128 state = 0x72a428e1 3819 __ dci(0x455bceb1); // uabalt z17.h, z21.b, z27.b 3820 // vl128 state = 0x27adcf54 3821 __ dci(0x4559ce21); // uabalt z1.h, z17.b, z25.b 3822 // vl128 state = 0xf1899dea 3823 __ dci(0x45d9ce05); // uabalt z5.d, z16.s, z25.s 3824 // vl128 state = 0x41e92a5c 3825 __ dci(0x45dbc604); // sabalt z4.d, z16.s, z27.s 3826 // vl128 state = 0x96021962 3827 __ dci(0x45d3c634); // sabalt z20.d, z17.s, z19.s 3828 // vl128 state = 0x4795c9e2 3829 __ dci(0x45dbc235); // sabalb z21.d, z17.s, z27.s 3830 // vl128 state = 0x6e2eccdb 3831 __ dci(0x45dbc07d); // sabalb z29.d, z3.s, z27.s 3832 // vl128 state = 0x2c2e3625 3833 __ dci(0x459bc87c); // uabalb z28.s, z3.h, z27.h 3834 // vl128 state = 0x618669ad 3835 __ dci(0x459bc878); // uabalb z24.s, z3.h, z27.h 3836 // vl128 state = 0x2d1a9a08 3837 __ dci(0x4593cc79); // uabalt z25.s, z3.h, z19.h 3838 // vl128 state = 0xdb6575df 3839 } 3840 3841 uint32_t state; 3842 ComputeMachineStateHash(&masm, &state); 3843 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 3844 __ Ldr(w0, MemOperand(x0)); 3845 3846 END(); 3847 if (CAN_RUN()) { 3848 RUN(); 3849 uint32_t expected_hashes[] = { 3850 0xdb6575df, 3851 0x691c09fc, 3852 0x6d969d30, 3853 0x83db67a7, 3854 0x8ca1109d, 3855 0x5175b8ff, 3856 0xade3cb1b, 3857 0x1c7b0422, 3858 0x1199a415, 3859 0xd1c715e8, 3860 0x2053b361, 3861 0x577c4450, 3862 0x1557204a, 3863 0xe994b21a, 3864 0xec34be56, 3865 0x1c9e0136, 3866 }; 3867 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 3868 } 3869} 3870 3871TEST_SVE(sve2_add_sub_carry) { 3872 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 3873 CPUFeatures::kSVE2, 3874 CPUFeatures::kNEON, 3875 CPUFeatures::kCRC32); 3876 START(); 3877 3878 SetInitialMachineState(&masm); 3879 // state = 0xe2bd2480 3880 3881 { 3882 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 3883 __ dci(0x4548d4a1); // adclt z1.d, z5.d, z8.d 3884 // vl128 state = 0xde78ceb3 3885 __ dci(0x4588d4a5); // sbclt z5.s, z5.s, z8.s 3886 // vl128 state = 0x35dc8534 3887 __ dci(0x4589d421); // sbclt z1.s, z1.s, z9.s 3888 // vl128 state = 0xa72d158b 3889 __ dci(0x45d9d423); // sbclt z3.d, z1.d, z25.d 3890 // vl128 state = 0x197181b9 3891 __ dci(0x45dfd433); // sbclt z19.d, z1.d, z31.d 3892 // vl128 state = 0xaad0d32d 3893 __ dci(0x4597d437); // sbclt z23.s, z1.s, z23.s 3894 // vl128 state = 0xb1c42b7d 3895 __ dci(0x4597d436); // sbclt z22.s, z1.s, z23.s 3896 // vl128 state = 0x6c51a28c 3897 __ dci(0x4587d537); // sbclt z23.s, z9.s, z7.s 3898 // vl128 state = 0x525b5cf8 3899 __ dci(0x4586d727); // sbclt z7.s, z25.s, z6.s 3900 // vl128 state = 0x33942ff9 3901 __ dci(0x45c6d625); // sbclt z5.d, z17.d, z6.d 3902 // vl128 state = 0x24de09b4 3903 __ dci(0x45c2d6b5); // sbclt z21.d, z21.d, z2.d 3904 // vl128 state = 0xabc0063f 3905 __ dci(0x4546d6b7); // adclt z23.d, z21.d, z6.d 3906 // vl128 state = 0x52765e95 3907 __ dci(0x45c7d6a7); // sbclt z7.d, z21.d, z7.d 3908 // vl128 state = 0x7045d250 3909 __ dci(0x4547d4a5); // adclt z5.d, z5.d, z7.d 3910 // vl128 state = 0xb20f5c2a 3911 __ dci(0x4517d4a1); // adclt z1.s, z5.s, z23.s 3912 // vl128 state = 0x5c2c9c29 3913 __ dci(0x4507d5a5); // adclt z5.s, z13.s, z7.s 3914 // vl128 state = 0x788b25f0 3915 __ dci(0x4507d5ad); // adclt z13.s, z13.s, z7.s 3916 // vl128 state = 0xf27eff1e 3917 __ dci(0x4507d0ac); // adclb z12.s, z5.s, z7.s 3918 // vl128 state = 0xc0b629de 3919 __ dci(0x450ed0ad); // adclb z13.s, z5.s, z14.s 3920 // vl128 state = 0x3e15df94 3921 __ dci(0x458ad0a9); // sbclb z9.s, z5.s, z10.s 3922 // vl128 state = 0x68f64c82 3923 __ dci(0x4582d2ad); // sbclb z13.s, z21.s, z2.s 3924 // vl128 state = 0x882379e1 3925 __ dci(0x4502d3af); // adclb z15.s, z29.s, z2.s 3926 // vl128 state = 0x6901994e 3927 __ dci(0x450ad32b); // adclb z11.s, z25.s, z10.s 3928 // vl128 state = 0xa67e9382 3929 __ dci(0x4582d329); // sbclb z9.s, z25.s, z2.s 3930 // vl128 state = 0x9451d0c4 3931 __ dci(0x4592d22b); // sbclb z11.s, z17.s, z18.s 3932 // vl128 state = 0xc19da52e 3933 __ dci(0x459ad2a3); // sbclb z3.s, z21.s, z26.s 3934 // vl128 state = 0x91065b69 3935 __ dci(0x451ad233); // adclb z19.s, z17.s, z26.s 3936 // vl128 state = 0xe3fdc4a5 3937 __ dci(0x450bd232); // adclb z18.s, z17.s, z11.s 3938 // vl128 state = 0x168abbff 3939 __ dci(0x450ad2b6); // adclb z22.s, z21.s, z10.s 3940 // vl128 state = 0x64d0c940 3941 __ dci(0x4582d2b4); // sbclb z20.s, z21.s, z2.s 3942 // vl128 state = 0x37307824 3943 __ dci(0x4582d6e4); // sbclt z4.s, z23.s, z2.s 3944 // vl128 state = 0xd35e02f7 3945 __ dci(0x4500d6f4); // adclt z20.s, z23.s, z0.s 3946 // vl128 state = 0x017ed1b0 3947 __ dci(0x4501d2e4); // adclb z4.s, z23.s, z1.s 3948 // vl128 state = 0x327242bc 3949 __ dci(0x4501d1f4); // adclb z20.s, z15.s, z1.s 3950 // vl128 state = 0x208174e8 3951 __ dci(0x4503d1b0); // adclb z16.s, z13.s, z3.s 3952 // vl128 state = 0xa5a9f61d 3953 __ dci(0x4501d198); // adclb z24.s, z12.s, z1.s 3954 // vl128 state = 0x97e22c2b 3955 __ dci(0x4501d3da); // adclb z26.s, z30.s, z1.s 3956 // vl128 state = 0xd3ac35d5 3957 __ dci(0x4501d6de); // adclt z30.s, z22.s, z1.s 3958 // vl128 state = 0xab835df9 3959 __ dci(0x4503d2dc); // adclb z28.s, z22.s, z3.s 3960 // vl128 state = 0xa048599b 3961 __ dci(0x4502d6d8); // adclt z24.s, z22.s, z2.s 3962 // vl128 state = 0x4c245fee 3963 __ dci(0x4502d6d0); // adclt z16.s, z22.s, z2.s 3964 // vl128 state = 0x0222f3cc 3965 __ dci(0x4502d280); // adclb z0.s, z20.s, z2.s 3966 // vl128 state = 0x16bd7f6a 3967 __ dci(0x458ad284); // sbclb z4.s, z20.s, z10.s 3968 // vl128 state = 0x7ef7d0a2 3969 __ dci(0x458ad6d4); // sbclt z20.s, z22.s, z10.s 3970 // vl128 state = 0x303d8262 3971 __ dci(0x458ad6dc); // sbclt z28.s, z22.s, z10.s 3972 // vl128 state = 0x86b8b0e9 3973 __ dci(0x458bd7cc); // sbclt z12.s, z30.s, z11.s 3974 // vl128 state = 0x068cc5cd 3975 __ dci(0x45dbd7ce); // sbclt z14.d, z30.d, z27.d 3976 // vl128 state = 0x30acfa7f 3977 __ dci(0x45dfd75e); // sbclt z30.d, z26.d, z31.d 3978 // vl128 state = 0xdbd8b32a 3979 __ dci(0x45ddd7ce); // sbclt z14.d, z30.d, z29.d 3980 // vl128 state = 0x59c3c1a9 3981 __ dci(0x45ddd7cf); // sbclt z15.d, z30.d, z29.d 3982 // vl128 state = 0x5c953a50 3983 } 3984 3985 uint32_t state; 3986 ComputeMachineStateHash(&masm, &state); 3987 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 3988 __ Ldr(w0, MemOperand(x0)); 3989 3990 END(); 3991 if (CAN_RUN()) { 3992 RUN(); 3993 uint32_t expected_hashes[] = { 3994 0x5c953a50, 3995 0x22fea196, 3996 0x084c11a8, 3997 0x6e7e24d1, 3998 0x70965ff7, 3999 0x8c7cb797, 4000 0xdb846b66, 4001 0x512f049d, 4002 0x5c45d25c, 4003 0xa349606f, 4004 0x68a853e5, 4005 0xd92fbeff, 4006 0x52e59a6b, 4007 0xf77ee8ce, 4008 0x6c79623b, 4009 0x7efed6cc, 4010 }; 4011 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 4012 } 4013} 4014 4015TEST_SVE(sve2_add_sub_high) { 4016 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 4017 CPUFeatures::kSVE2, 4018 CPUFeatures::kNEON, 4019 CPUFeatures::kCRC32); 4020 START(); 4021 4022 SetInitialMachineState(&masm); 4023 // state = 0xe2bd2480 4024 4025 { 4026 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 4027 __ dci(0x45fd7464); // subhnt z4.s, z3.d, z29.d 4028 // vl128 state = 0x0eea0f4a 4029 __ dci(0x45fc7c66); // rsubhnt z6.s, z3.d, z28.d 4030 // vl128 state = 0x4dc0d938 4031 __ dci(0x45fc7c6e); // rsubhnt z14.s, z3.d, z28.d 4032 // vl128 state = 0x33de615e 4033 __ dci(0x45f46c7e); // raddhnt z30.s, z3.d, z20.d 4034 // vl128 state = 0xa24af7ae 4035 __ dci(0x45f06e7c); // raddhnt z28.s, z19.d, z16.d 4036 // vl128 state = 0x13883aa2 4037 __ dci(0x45b06a6c); // raddhnb z12.h, z19.s, z16.s 4038 // vl128 state = 0x5bf75f05 4039 __ dci(0x45b96a64); // raddhnb z4.h, z19.s, z25.s 4040 // vl128 state = 0x0e489878 4041 __ dci(0x45b96820); // raddhnb z0.h, z1.s, z25.s 4042 // vl128 state = 0x86df8f5f 4043 __ dci(0x45b96a01); // raddhnb z1.h, z16.s, z25.s 4044 // vl128 state = 0x0d1563f2 4045 __ dci(0x45b96900); // raddhnb z0.h, z8.s, z25.s 4046 // vl128 state = 0xd66de87e 4047 __ dci(0x45a97904); // rsubhnb z4.h, z8.s, z9.s 4048 // vl128 state = 0x0c34bd33 4049 __ dci(0x45a9790c); // rsubhnb z12.h, z8.s, z9.s 4050 // vl128 state = 0x7892f2c5 4051 __ dci(0x45e97988); // rsubhnb z8.s, z12.d, z9.d 4052 // vl128 state = 0x9709efbd 4053 __ dci(0x45f97909); // rsubhnb z9.s, z8.d, z25.d 4054 // vl128 state = 0x029a3116 4055 __ dci(0x45ff790d); // rsubhnb z13.s, z8.d, z31.d 4056 // vl128 state = 0x48cf21c1 4057 __ dci(0x45ff6d05); // raddhnt z5.s, z8.d, z31.d 4058 // vl128 state = 0x44c94a11 4059 __ dci(0x45ff6dc1); // raddhnt z1.s, z14.d, z31.d 4060 // vl128 state = 0x12fab619 4061 __ dci(0x45ff79d1); // rsubhnb z17.s, z14.d, z31.d 4062 // vl128 state = 0x6f749933 4063 __ dci(0x457f7dd0); // rsubhnt z16.b, z14.h, z31.h 4064 // vl128 state = 0x404889de 4065 __ dci(0x457f75f1); // subhnt z17.b, z15.h, z31.h 4066 // vl128 state = 0x1dae2a16 4067 __ dci(0x457f75f3); // subhnt z19.b, z15.h, z31.h 4068 // vl128 state = 0xc441a9f0 4069 __ dci(0x456d75fb); // subhnt z27.b, z15.h, z13.h 4070 // vl128 state = 0xdd79f567 4071 __ dci(0x45ed7dff); // rsubhnt z31.s, z15.d, z13.d 4072 // vl128 state = 0x49b27a1f 4073 __ dci(0x45e17dfe); // rsubhnt z30.s, z15.d, z1.d 4074 // vl128 state = 0x19cddb35 4075 __ dci(0x45e17df6); // rsubhnt z22.s, z15.d, z1.d 4076 // vl128 state = 0xea722faa 4077 __ dci(0x45e37d72); // rsubhnt z18.s, z11.d, z3.d 4078 // vl128 state = 0x907267b3 4079 __ dci(0x45737d62); // rsubhnt z2.b, z11.h, z19.h 4080 // vl128 state = 0x1e5409d8 4081 __ dci(0x45726d6a); // raddhnt z10.b, z11.h, z18.h 4082 // vl128 state = 0xce3b87ca 4083 __ dci(0x45726f5a); // raddhnt z26.b, z26.h, z18.h 4084 // vl128 state = 0x2f330789 4085 __ dci(0x45706f18); // raddhnt z24.b, z24.h, z16.h 4086 // vl128 state = 0xff09606a 4087 __ dci(0x45706f08); // raddhnt z8.b, z24.h, z16.h 4088 // vl128 state = 0x062ac37b 4089 __ dci(0x45706f09); // raddhnt z9.b, z24.h, z16.h 4090 // vl128 state = 0xb12c9142 4091 __ dci(0x45786b08); // raddhnb z8.b, z24.h, z24.h 4092 // vl128 state = 0x77e41545 4093 __ dci(0x45786b0c); // raddhnb z12.b, z24.h, z24.h 4094 // vl128 state = 0x1f3a202d 4095 __ dci(0x457a6308); // addhnb z8.b, z24.h, z26.h 4096 // vl128 state = 0xea51f4b9 4097 __ dci(0x45fb6318); // addhnb z24.s, z24.d, z27.d 4098 // vl128 state = 0x5b98747e 4099 __ dci(0x45b96319); // addhnb z25.h, z24.s, z25.s 4100 // vl128 state = 0xdcebf700 4101 __ dci(0x45bb621d); // addhnb z29.h, z16.s, z27.s 4102 // vl128 state = 0x55a216b1 4103 __ dci(0x45b3625f); // addhnb z31.h, z18.s, z19.s 4104 // vl128 state = 0x3e86d641 4105 __ dci(0x45b3631b); // addhnb z27.h, z24.s, z19.s 4106 // vl128 state = 0x36d052e3 4107 __ dci(0x45bb6213); // addhnb z19.h, z16.s, z27.s 4108 // vl128 state = 0xba012cb8 4109 __ dci(0x45bf7217); // subhnb z23.h, z16.s, z31.s 4110 // vl128 state = 0xdef826a7 4111 __ dci(0x45b67213); // subhnb z19.h, z16.s, z22.s 4112 // vl128 state = 0x5cd11781 4113 __ dci(0x45b66223); // addhnb z3.h, z17.s, z22.s 4114 // vl128 state = 0x2f04c440 4115 __ dci(0x45f66a27); // raddhnb z7.s, z17.d, z22.d 4116 // vl128 state = 0x486d0d03 4117 __ dci(0x45f76825); // raddhnb z5.s, z1.d, z23.d 4118 // vl128 state = 0x8a94d5c9 4119 __ dci(0x45f668a1); // raddhnb z1.s, z5.d, z22.d 4120 // vl128 state = 0x14e8e0e7 4121 __ dci(0x45f469b1); // raddhnb z17.s, z13.d, z20.d 4122 // vl128 state = 0x19b96fb3 4123 __ dci(0x45f469b3); // raddhnb z19.s, z13.d, z20.d 4124 // vl128 state = 0xc98e7d4e 4125 __ dci(0x45f169b7); // raddhnb z23.s, z13.d, z17.d 4126 // vl128 state = 0x7ff24d47 4127 } 4128 4129 uint32_t state; 4130 ComputeMachineStateHash(&masm, &state); 4131 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 4132 __ Ldr(w0, MemOperand(x0)); 4133 4134 END(); 4135 if (CAN_RUN()) { 4136 RUN(); 4137 uint32_t expected_hashes[] = { 4138 0x7ff24d47, 4139 0xc639a9b3, 4140 0x0a1df4a5, 4141 0x30db6e18, 4142 0xf3e2f795, 4143 0x36ff477d, 4144 0x162f1ca5, 4145 0x36da990b, 4146 0x110b2c35, 4147 0xaf1580f5, 4148 0x14e39873, 4149 0x7f5eb52c, 4150 0x2ececb6f, 4151 0x4e4d71f0, 4152 0x800769d1, 4153 0x1bcbe3a3, 4154 }; 4155 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 4156 } 4157} 4158 4159TEST_SVE(sve2_complex_addition) { 4160 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 4161 CPUFeatures::kSVE2, 4162 CPUFeatures::kNEON, 4163 CPUFeatures::kCRC32); 4164 START(); 4165 4166 SetInitialMachineState(&masm); 4167 // state = 0xe2bd2480 4168 4169 { 4170 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 4171 __ dci(0x4500dc43); // cadd z3.b, z3.b, z2.b, #270 4172 // vl128 state = 0x998365c2 4173 __ dci(0x4540dc13); // cadd z19.h, z19.h, z0.h, #270 4174 // vl128 state = 0xcc866131 4175 __ dci(0x4541d81b); // sqcadd z27.h, z27.h, z0.h, #90 4176 // vl128 state = 0x2ae23a6a 4177 __ dci(0x45c1d853); // sqcadd z19.d, z19.d, z2.d, #90 4178 // vl128 state = 0x1f8de2d3 4179 __ dci(0x4541d8c3); // sqcadd z3.h, z3.h, z6.h, #90 4180 // vl128 state = 0x3655c07c 4181 __ dci(0x4541d8d3); // sqcadd z19.h, z19.h, z6.h, #90 4182 // vl128 state = 0x3a8fe2d9 4183 __ dci(0x4541d811); // sqcadd z17.h, z17.h, z0.h, #90 4184 // vl128 state = 0x003c88ea 4185 __ dci(0x4540da10); // cadd z16.h, z16.h, z16.h, #90 4186 // vl128 state = 0xe20c1375 4187 __ dci(0x4540da18); // cadd z24.h, z24.h, z16.h, #90 4188 // vl128 state = 0x67bb0270 4189 __ dci(0x4540de5a); // cadd z26.h, z26.h, z18.h, #270 4190 // vl128 state = 0x7abb4f8f 4191 __ dci(0x4540de4a); // cadd z10.h, z10.h, z18.h, #270 4192 // vl128 state = 0x42850f11 4193 __ dci(0x4500decb); // cadd z11.b, z11.b, z22.b, #270 4194 // vl128 state = 0xda605f59 4195 __ dci(0x4500da83); // cadd z3.b, z3.b, z20.b, #90 4196 // vl128 state = 0x99e63476 4197 __ dci(0x4500dc8b); // cadd z11.b, z11.b, z4.b, #270 4198 // vl128 state = 0xd444a939 4199 __ dci(0x4500dc8f); // cadd z15.b, z15.b, z4.b, #270 4200 // vl128 state = 0xde3ad968 4201 __ dci(0x4500d99f); // cadd z31.b, z31.b, z12.b, #90 4202 // vl128 state = 0xd7cdb177 4203 __ dci(0x4540d91e); // cadd z30.h, z30.h, z8.h, #90 4204 // vl128 state = 0x74575b36 4205 __ dci(0x4541d81a); // sqcadd z26.h, z26.h, z0.h, #90 4206 // vl128 state = 0x3d347b0b 4207 __ dci(0x4501d83b); // sqcadd z27.b, z27.b, z1.b, #90 4208 // vl128 state = 0x03df7859 4209 __ dci(0x45c1d83f); // sqcadd z31.d, z31.d, z1.d, #90 4210 // vl128 state = 0xf0cdbf68 4211 __ dci(0x45c1d83e); // sqcadd z30.d, z30.d, z1.d, #90 4212 // vl128 state = 0x0931dda4 4213 __ dci(0x45c1d83c); // sqcadd z28.d, z28.d, z1.d, #90 4214 // vl128 state = 0x460b5369 4215 __ dci(0x4581da3e); // sqcadd z30.s, z30.s, z17.s, #90 4216 // vl128 state = 0x71af9203 4217 __ dci(0x45c1d83f); // sqcadd z31.d, z31.d, z1.d, #90 4218 // vl128 state = 0xd6babc53 4219 __ dci(0x4581da3e); // sqcadd z30.s, z30.s, z17.s, #90 4220 // vl128 state = 0xd3e4f42f 4221 __ dci(0x4501d83f); // sqcadd z31.b, z31.b, z1.b, #90 4222 // vl128 state = 0x7a594239 4223 __ dci(0x4501dcbb); // sqcadd z27.b, z27.b, z5.b, #270 4224 // vl128 state = 0x24a5a8c9 4225 __ dci(0x4501dfba); // sqcadd z26.b, z26.b, z29.b, #270 4226 // vl128 state = 0x0c3df842 4227 __ dci(0x4581dfea); // sqcadd z10.s, z10.s, z31.s, #270 4228 // vl128 state = 0x6173c97f 4229 __ dci(0x4581db7a); // sqcadd z26.s, z26.s, z27.s, #90 4230 // vl128 state = 0x55090d5f 4231 __ dci(0x4581db1b); // sqcadd z27.s, z27.s, z24.s, #90 4232 // vl128 state = 0x63477385 4233 __ dci(0x4581da93); // sqcadd z19.s, z19.s, z20.s, #90 4234 // vl128 state = 0xc996545e 4235 __ dci(0x45c1db92); // sqcadd z18.d, z18.d, z28.d, #90 4236 // vl128 state = 0xa48bf827 4237 __ dci(0x45c1db93); // sqcadd z19.d, z19.d, z28.d, #90 4238 // vl128 state = 0xf5a3b641 4239 __ dci(0x45c1daa3); // sqcadd z3.d, z3.d, z21.d, #90 4240 // vl128 state = 0x20ad4c28 4241 __ dci(0x4581dba7); // sqcadd z7.s, z7.s, z29.s, #90 4242 // vl128 state = 0xc9e36e96 4243 __ dci(0x45c1daaf); // sqcadd z15.d, z15.d, z21.d, #90 4244 // vl128 state = 0x6eb23fd2 4245 __ dci(0x45c1daae); // sqcadd z14.d, z14.d, z21.d, #90 4246 // vl128 state = 0x585d4d63 4247 __ dci(0x4541dae6); // sqcadd z6.h, z6.h, z23.h, #90 4248 // vl128 state = 0x827cc0a8 4249 __ dci(0x4541daee); // sqcadd z14.h, z14.h, z23.h, #90 4250 // vl128 state = 0xe00543a0 4251 __ dci(0x4501dabe); // sqcadd z30.b, z30.b, z21.b, #90 4252 // vl128 state = 0x2313db47 4253 __ dci(0x4501deff); // sqcadd z31.b, z31.b, z23.b, #270 4254 // vl128 state = 0xe30d4e83 4255 __ dci(0x4501defd); // sqcadd z29.b, z29.b, z23.b, #270 4256 // vl128 state = 0xb95d6d94 4257 __ dci(0x4501def5); // sqcadd z21.b, z21.b, z23.b, #270 4258 // vl128 state = 0x4f18b02e 4259 __ dci(0x4501def4); // sqcadd z20.b, z20.b, z23.b, #270 4260 // vl128 state = 0x20ae9a78 4261 __ dci(0x4501dee4); // sqcadd z4.b, z4.b, z23.b, #270 4262 // vl128 state = 0x4eef87a9 4263 __ dci(0x4501dee6); // sqcadd z6.b, z6.b, z23.b, #270 4264 // vl128 state = 0x1b041a7b 4265 __ dci(0x4501dfc2); // sqcadd z2.b, z2.b, z30.b, #270 4266 // vl128 state = 0xeaf5e18f 4267 __ dci(0x4500df92); // cadd z18.b, z18.b, z28.b, #270 4268 // vl128 state = 0xc47ee5e7 4269 __ dci(0x4500de13); // cadd z19.b, z19.b, z16.b, #270 4270 // vl128 state = 0x6482d75c 4271 } 4272 4273 uint32_t state; 4274 ComputeMachineStateHash(&masm, &state); 4275 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 4276 __ Ldr(w0, MemOperand(x0)); 4277 4278 END(); 4279 if (CAN_RUN()) { 4280 RUN(); 4281 uint32_t expected_hashes[] = { 4282 0x6482d75c, 4283 0x48d9bd2f, 4284 0xd6bd52ae, 4285 0x56be94f0, 4286 0x620cfb69, 4287 0xb646e0fe, 4288 0x6034718f, 4289 0xd8187657, 4290 0x211218bb, 4291 0xc973a707, 4292 0x6020dcc9, 4293 0x8fadad0c, 4294 0x0132ecbc, 4295 0x3a07eb63, 4296 0x5c20eb82, 4297 0xc92d6cb2, 4298 }; 4299 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 4300 } 4301} 4302 4303TEST_SVE(sve2_bit_permute) { 4304 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 4305 CPUFeatures::kSVE2, 4306 CPUFeatures::kSVEBitPerm, 4307 CPUFeatures::kNEON, 4308 CPUFeatures::kCRC32); 4309 START(); 4310 4311 SetInitialMachineState(&masm); 4312 // state = 0xe2bd2480 4313 4314 { 4315 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 4316 __ dci(0x455fbb1a); // bgrp z26.h, z24.h, z31.h 4317 // vl128 state = 0x39fb8e5b 4318 __ dci(0x451fbb58); // bgrp z24.b, z26.b, z31.b 4319 // vl128 state = 0x7fbccdbd 4320 __ dci(0x4517bb19); // bgrp z25.b, z24.b, z23.b 4321 // vl128 state = 0x67caf176 4322 __ dci(0x4517bb18); // bgrp z24.b, z24.b, z23.b 4323 // vl128 state = 0x665fd977 4324 __ dci(0x4517ba5c); // bgrp z28.b, z18.b, z23.b 4325 // vl128 state = 0x0f2c1473 4326 __ dci(0x4517ba38); // bgrp z24.b, z17.b, z23.b 4327 // vl128 state = 0x253789a0 4328 __ dci(0x4517ba3c); // bgrp z28.b, z17.b, z23.b 4329 // vl128 state = 0xd3b26fd2 4330 __ dci(0x4515ba6c); // bgrp z12.b, z19.b, z21.b 4331 // vl128 state = 0x4bad6941 4332 __ dci(0x4515bac4); // bgrp z4.b, z22.b, z21.b 4333 // vl128 state = 0x7c70d2d2 4334 __ dci(0x4517ba86); // bgrp z6.b, z20.b, z23.b 4335 // vl128 state = 0x5794816b 4336 __ dci(0x4517ba87); // bgrp z7.b, z20.b, z23.b 4337 // vl128 state = 0xe67993b1 4338 __ dci(0x4515b297); // bext z23.b, z20.b, z21.b 4339 // vl128 state = 0x3041b7ee 4340 __ dci(0x4517b396); // bext z22.b, z28.b, z23.b 4341 // vl128 state = 0xb571d524 4342 __ dci(0x451bb386); // bext z6.b, z28.b, z27.b 4343 // vl128 state = 0x73ce1823 4344 __ dci(0x4513b784); // bdep z4.b, z28.b, z19.b 4345 // vl128 state = 0x4264f0f2 4346 __ dci(0x4593b7ac); // bdep z12.s, z29.s, z19.s 4347 // vl128 state = 0xf9cb9d26 4348 __ dci(0x4593b7a8); // bdep z8.s, z29.s, z19.s 4349 // vl128 state = 0xa2b310a0 4350 __ dci(0x4597b780); // bdep z0.s, z28.s, z23.s 4351 // vl128 state = 0xee25c82f 4352 __ dci(0x4597b781); // bdep z1.s, z28.s, z23.s 4353 // vl128 state = 0xdca7577f 4354 __ dci(0x4597b7e3); // bdep z3.s, z31.s, z23.s 4355 // vl128 state = 0x32294429 4356 __ dci(0x45dfb7e1); // bdep z1.d, z31.d, z31.d 4357 // vl128 state = 0xc147e511 4358 __ dci(0x455db7e5); // bdep z5.h, z31.h, z29.h 4359 // vl128 state = 0x7a51d422 4360 __ dci(0x45d5b7e4); // bdep z4.d, z31.d, z21.d 4361 // vl128 state = 0x512ad92a 4362 __ dci(0x45c7b7ec); // bdep z12.d, z31.d, z7.d 4363 // vl128 state = 0xe59fbf5c 4364 __ dci(0x4547b7a8); // bdep z8.h, z29.h, z7.h 4365 // vl128 state = 0xb85fd3b1 4366 __ dci(0x454fb72c); // bdep z12.h, z25.h, z15.h 4367 // vl128 state = 0xc820e9d0 4368 __ dci(0x4557b724); // bdep z4.h, z25.h, z23.h 4369 // vl128 state = 0x814ff3f4 4370 __ dci(0x4557bb20); // bgrp z0.h, z25.h, z23.h 4371 // vl128 state = 0xc58dee50 4372 __ dci(0x4556b321); // bext z1.h, z25.h, z22.h 4373 // vl128 state = 0xf19c0956 4374 __ dci(0x4556b3e3); // bext z3.h, z31.h, z22.h 4375 // vl128 state = 0x2a256808 4376 __ dci(0x4546b367); // bext z7.h, z27.h, z6.h 4377 // vl128 state = 0x1c6696f4 4378 __ dci(0x4556bb66); // bgrp z6.h, z27.h, z22.h 4379 // vl128 state = 0x32522ca2 4380 __ dci(0x4556bb76); // bgrp z22.h, z27.h, z22.h 4381 // vl128 state = 0x33fe6590 4382 __ dci(0x45c6bb66); // bgrp z6.d, z27.d, z6.d 4383 // vl128 state = 0x45d26723 4384 __ dci(0x45c2b976); // bgrp z22.d, z11.d, z2.d 4385 // vl128 state = 0x364d9885 4386 __ dci(0x4540b974); // bgrp z20.h, z11.h, z0.h 4387 // vl128 state = 0x36a0bd94 4388 __ dci(0x45c0b164); // bext z4.d, z11.d, z0.d 4389 // vl128 state = 0x4ee9a90c 4390 __ dci(0x45ccb16c); // bext z12.d, z11.d, z12.d 4391 // vl128 state = 0x30c32d69 4392 __ dci(0x458cb368); // bext z8.s, z27.s, z12.s 4393 // vl128 state = 0xfc2c912f 4394 __ dci(0x450cb769); // bdep z9.b, z27.b, z12.b 4395 // vl128 state = 0xef976b44 4396 __ dci(0x458cb7eb); // bdep z11.s, z31.s, z12.s 4397 // vl128 state = 0x6f9e21b8 4398 __ dci(0x4588b5ef); // bdep z15.s, z15.s, z8.s 4399 // vl128 state = 0xa1f212e2 4400 __ dci(0x4598b5ad); // bdep z13.s, z13.s, z24.s 4401 // vl128 state = 0xe4286a40 4402 __ dci(0x4598b5af); // bdep z15.s, z13.s, z24.s 4403 // vl128 state = 0x7d6622e5 4404 __ dci(0x4598b6ad); // bdep z13.s, z21.s, z24.s 4405 // vl128 state = 0xcd00829c 4406 __ dci(0x4518b2af); // bext z15.b, z21.b, z24.b 4407 // vl128 state = 0xa8d58b2d 4408 __ dci(0x4519b2e7); // bext z7.b, z23.b, z25.b 4409 // vl128 state = 0x2b7b7c44 4410 __ dci(0x4518b2a6); // bext z6.b, z21.b, z24.b 4411 // vl128 state = 0x09c81b7e 4412 __ dci(0x4518b2a7); // bext z7.b, z21.b, z24.b 4413 // vl128 state = 0xab1b2b22 4414 __ dci(0x4519b6a5); // bdep z5.b, z21.b, z25.b 4415 // vl128 state = 0x03476e4c 4416 } 4417 4418 uint32_t state; 4419 ComputeMachineStateHash(&masm, &state); 4420 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 4421 __ Ldr(w0, MemOperand(x0)); 4422 4423 END(); 4424 if (CAN_RUN()) { 4425 RUN(); 4426 uint32_t expected_hashes[] = { 4427 0x03476e4c, 4428 0xcc54e76f, 4429 0x08324d66, 4430 0xcc289ee1, 4431 0xacd3ba43, 4432 0xe961aeda, 4433 0x60a204b1, 4434 0xde020904, 4435 0x0652d1e5, 4436 0x7982dc25, 4437 0x02a2c1cb, 4438 0x4dd9e71b, 4439 0xb57f587f, 4440 0xb75e0d62, 4441 0x78330809, 4442 0xbc7046ae, 4443 }; 4444 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 4445 } 4446} 4447 4448TEST_SVE(sve2_smullb_smullt_umullb_umullt_vector) { 4449 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 4450 CPUFeatures::kSVE2, 4451 CPUFeatures::kNEON, 4452 CPUFeatures::kCRC32); 4453 START(); 4454 4455 SetInitialMachineState(&masm); 4456 // state = 0xe2bd2480 4457 4458 { 4459 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 4460 __ dci(0x455a7bc2); // umullb z2.h, z30.b, z26.b 4461 // vl128 state = 0xe2a2b611 4462 __ dci(0x454a7b92); // umullb z18.h, z28.b, z10.b 4463 // vl128 state = 0x12b3b0c6 4464 __ dci(0x45427bda); // umullb z26.h, z30.b, z2.b 4465 // vl128 state = 0x74f4a891 4466 __ dci(0x45c67bde); // umullb z30.d, z30.s, z6.s 4467 // vl128 state = 0x20402d9f 4468 __ dci(0x45467b56); // umullb z22.h, z26.b, z6.b 4469 // vl128 state = 0x75e15413 4470 __ dci(0x45427f54); // umullt z20.h, z26.b, z2.b 4471 // vl128 state = 0x51478ee1 4472 __ dci(0x45427fe4); // umullt z4.h, z31.b, z2.b 4473 // vl128 state = 0x63381b63 4474 __ dci(0x45567fe5); // umullt z5.h, z31.b, z22.b 4475 // vl128 state = 0x0967f882 4476 __ dci(0x45467df5); // umullt z21.h, z15.b, z6.b 4477 // vl128 state = 0x753e96b9 4478 __ dci(0x454279f1); // umullb z17.h, z15.b, z2.b 4479 // vl128 state = 0xcff906e6 4480 __ dci(0x454078f5); // umullb z21.h, z7.b, z0.b 4481 // vl128 state = 0x5609bd14 4482 __ dci(0x454070d4); // smullb z20.h, z6.b, z0.b 4483 // vl128 state = 0xf284d300 4484 __ dci(0x45407016); // smullb z22.h, z0.b, z0.b 4485 // vl128 state = 0xbb549bf7 4486 __ dci(0x45487086); // smullb z6.h, z4.b, z8.b 4487 // vl128 state = 0x6ef99ff1 4488 __ dci(0x454070c7); // smullb z7.h, z6.b, z0.b 4489 // vl128 state = 0x90177a84 4490 __ dci(0x45407846); // umullb z6.h, z2.b, z0.b 4491 // vl128 state = 0xd3dbb2fe 4492 __ dci(0x45417a56); // umullb z22.h, z18.b, z1.b 4493 // vl128 state = 0x7d30cf73 4494 __ dci(0x45417877); // umullb z23.h, z3.b, z1.b 4495 // vl128 state = 0x0623e678 4496 __ dci(0x45417807); // umullb z7.h, z0.b, z1.b 4497 // vl128 state = 0xe849cf35 4498 __ dci(0x454178a3); // umullb z3.h, z5.b, z1.b 4499 // vl128 state = 0xcad236a9 4500 __ dci(0x45437cab); // umullt z11.h, z5.b, z3.b 4501 // vl128 state = 0xc8dfcb1d 4502 __ dci(0x454b7c3b); // umullt z27.h, z1.b, z11.b 4503 // vl128 state = 0x6136e2d6 4504 __ dci(0x454b7a3a); // umullb z26.h, z17.b, z11.b 4505 // vl128 state = 0x091beb5a 4506 __ dci(0x454b72b2); // smullb z18.h, z21.b, z11.b 4507 // vl128 state = 0x932b30ec 4508 __ dci(0x454b7622); // smullt z2.h, z17.b, z11.b 4509 // vl128 state = 0xee51239c 4510 __ dci(0x454b76ea); // smullt z10.h, z23.b, z11.b 4511 // vl128 state = 0xf4fcc577 4512 __ dci(0x454b74ab); // smullt z11.h, z5.b, z11.b 4513 // vl128 state = 0xcf0c8028 4514 __ dci(0x454d74bb); // smullt z27.h, z5.b, z13.b 4515 // vl128 state = 0x0f8523c8 4516 __ dci(0x454d740b); // smullt z11.h, z0.b, z13.b 4517 // vl128 state = 0xc02b2f52 4518 __ dci(0x454d7403); // smullt z3.h, z0.b, z13.b 4519 // vl128 state = 0x11b4180c 4520 __ dci(0x45557413); // smullt z19.h, z0.b, z21.b 4521 // vl128 state = 0x26eef57a 4522 __ dci(0x45557531); // smullt z17.h, z9.b, z21.b 4523 // vl128 state = 0x6f3fce98 4524 __ dci(0x455574b9); // smullt z25.h, z5.b, z21.b 4525 // vl128 state = 0x0d4ac272 4526 __ dci(0x455571b1); // smullb z17.h, z13.b, z21.b 4527 // vl128 state = 0x7c866a41 4528 __ dci(0x455573e1); // smullb z1.h, z31.b, z21.b 4529 // vl128 state = 0x9c724758 4530 __ dci(0x455473c9); // smullb z9.h, z30.b, z20.b 4531 // vl128 state = 0xa9a8d0aa 4532 __ dci(0x455473cb); // smullb z11.h, z30.b, z20.b 4533 // vl128 state = 0xd7eec117 4534 __ dci(0x455473a9); // smullb z9.h, z29.b, z20.b 4535 // vl128 state = 0x35caaa62 4536 __ dci(0x455473a8); // smullb z8.h, z29.b, z20.b 4537 // vl128 state = 0x97a1d399 4538 __ dci(0x455473b8); // smullb z24.h, z29.b, z20.b 4539 // vl128 state = 0x3adce4ee 4540 __ dci(0x455673fa); // smullb z26.h, z31.b, z22.b 4541 // vl128 state = 0xd17120ea 4542 __ dci(0x455e77ea); // smullt z10.h, z31.b, z30.b 4543 // vl128 state = 0x1e238a9e 4544 __ dci(0x455677da); // smullt z26.h, z30.b, z22.b 4545 // vl128 state = 0xfbccf6c2 4546 __ dci(0x454673d8); // smullb z24.h, z30.b, z6.b 4547 // vl128 state = 0xa47583be 4548 __ dci(0x45c67359); // smullb z25.d, z26.s, z6.s 4549 // vl128 state = 0x4e8a9b37 4550 __ dci(0x45c47751); // smullt z17.d, z26.s, z4.s 4551 // vl128 state = 0xe3c06571 4552 __ dci(0x45d67741); // smullt z1.d, z26.s, z22.s 4553 // vl128 state = 0x6629e034 4554 __ dci(0x45d67b45); // umullb z5.d, z26.s, z22.s 4555 // vl128 state = 0x66a99e85 4556 __ dci(0x45867b47); // umullb z7.s, z26.h, z6.h 4557 // vl128 state = 0xf1cc3339 4558 __ dci(0x45867b45); // umullb z5.s, z26.h, z6.h 4559 // vl128 state = 0x8bf658d7 4560 } 4561 4562 uint32_t state; 4563 ComputeMachineStateHash(&masm, &state); 4564 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 4565 __ Ldr(w0, MemOperand(x0)); 4566 4567 END(); 4568 if (CAN_RUN()) { 4569 RUN(); 4570 uint32_t expected_hashes[] = { 4571 0x8bf658d7, 4572 0x82fac555, 4573 0x07c3d434, 4574 0x25d2ee2b, 4575 0xe70f4394, 4576 0x79223404, 4577 0x368ed35f, 4578 0x6565d842, 4579 0xead08c30, 4580 0xae35e083, 4581 0xe1959b85, 4582 0x94ad31e7, 4583 0x9caeda4d, 4584 0x7611d6dc, 4585 0x22977911, 4586 0xcf3754ec, 4587 }; 4588 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 4589 } 4590} 4591 4592TEST_SVE(sve2_sqdmullb_sqdmullt_pmullb_pmullb_vector) { 4593 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 4594 CPUFeatures::kSVE2, 4595 CPUFeatures::kNEON, 4596 CPUFeatures::kCRC32); 4597 START(); 4598 4599 SetInitialMachineState(&masm); 4600 // state = 0xe2bd2480 4601 4602 { 4603 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 4604 __ dci(0x45936164); // sqdmullb z4.s, z11.h, z19.h 4605 // vl128 state = 0xacc89592 4606 __ dci(0x459161f4); // sqdmullb z20.s, z15.h, z17.h 4607 // vl128 state = 0x142c66e5 4608 __ dci(0x459563f5); // sqdmullb z21.s, z31.h, z21.h 4609 // vl128 state = 0x5cfcb839 4610 __ dci(0x45956265); // sqdmullb z5.s, z19.h, z21.h 4611 // vl128 state = 0x33616223 4612 __ dci(0x45d56235); // sqdmullb z21.d, z17.s, z21.s 4613 // vl128 state = 0x987a4a0d 4614 __ dci(0x45556031); // sqdmullb z17.h, z1.b, z21.b 4615 // vl128 state = 0xf7dd9b01 4616 __ dci(0x45506035); // sqdmullb z21.h, z1.b, z16.b 4617 // vl128 state = 0x6fa54cf3 4618 __ dci(0x45506334); // sqdmullb z20.h, z25.b, z16.b 4619 // vl128 state = 0x04398c6e 4620 __ dci(0x45486336); // sqdmullb z22.h, z25.b, z8.b 4621 // vl128 state = 0x4cda753c 4622 __ dci(0x45486334); // sqdmullb z20.h, z25.b, z8.b 4623 // vl128 state = 0x53993d4a 4624 __ dci(0x45496b35); // pmullb z21.h, z25.b, z9.b 4625 // vl128 state = 0xa591f97c 4626 __ dci(0x45496b37); // pmullb z23.h, z25.b, z9.b 4627 // vl128 state = 0x5cb91e99 4628 __ dci(0x45496fb3); // pmullt z19.h, z29.b, z9.b 4629 // vl128 state = 0x5031ac4d 4630 __ dci(0x45596f3b); // pmullt z27.h, z25.b, z25.b 4631 // vl128 state = 0xb0a76e75 4632 __ dci(0x455d6f13); // pmullt z19.h, z24.b, z29.b 4633 // vl128 state = 0xe84ca196 4634 __ dci(0x455d6fb2); // pmullt z18.h, z29.b, z29.b 4635 // vl128 state = 0xd294ce54 4636 __ dci(0x455c6bb0); // pmullb z16.h, z29.b, z28.b 4637 // vl128 state = 0x90f01471 4638 __ dci(0x45546bf8); // pmullb z24.h, z31.b, z20.b 4639 // vl128 state = 0xd15f23fa 4640 __ dci(0x45546bf9); // pmullb z25.h, z31.b, z20.b 4641 // vl128 state = 0x62ca83ea 4642 __ dci(0x45546bfb); // pmullb z27.h, z31.b, z20.b 4643 // vl128 state = 0xf786c1e4 4644 __ dci(0x454469eb); // pmullb z11.h, z15.b, z4.b 4645 // vl128 state = 0x3cc8c789 4646 __ dci(0x455069fb); // pmullb z27.h, z15.b, z16.b 4647 // vl128 state = 0xb14709ca 4648 __ dci(0x45546dfa); // pmullt z26.h, z15.b, z20.b 4649 // vl128 state = 0x38257820 4650 __ dci(0x45546df8); // pmullt z24.h, z15.b, z20.b 4651 // vl128 state = 0x9cc5cd3a 4652 __ dci(0x45576dfc); // pmullt z28.h, z15.b, z23.b 4653 // vl128 state = 0x704543ec 4654 __ dci(0x45d76d6c); // pmullt z12.d, z11.s, z23.s 4655 // vl128 state = 0x15ec8e77 4656 __ dci(0x455f6d68); // pmullt z8.h, z11.b, z31.b 4657 // vl128 state = 0xfa379a67 4658 __ dci(0x45596d6a); // pmullt z10.h, z11.b, z25.b 4659 // vl128 state = 0x27fcfa49 4660 __ dci(0x45596d7a); // pmullt z26.h, z11.b, z25.b 4661 // vl128 state = 0x13883ef0 4662 __ dci(0x45596532); // sqdmullt z18.h, z9.b, z25.b 4663 // vl128 state = 0x667f8699 4664 __ dci(0x45596536); // sqdmullt z22.h, z9.b, z25.b 4665 // vl128 state = 0x477ded37 4666 __ dci(0x45d16537); // sqdmullt z23.d, z9.s, z17.s 4667 // vl128 state = 0x3323eb48 4668 __ dci(0x45c16515); // sqdmullt z21.d, z8.s, z1.s 4669 // vl128 state = 0x3f581e83 4670 __ dci(0x45456517); // sqdmullt z23.h, z8.b, z5.b 4671 // vl128 state = 0xd844e48b 4672 __ dci(0x45556555); // sqdmullt z21.h, z10.b, z21.b 4673 // vl128 state = 0x95e6094e 4674 __ dci(0x45c56554); // sqdmullt z20.d, z10.s, z5.s 4675 // vl128 state = 0x198a6f75 4676 __ dci(0x45cd6456); // sqdmullt z22.d, z2.s, z13.s 4677 // vl128 state = 0x4d6b7178 4678 __ dci(0x45c96406); // sqdmullt z6.d, z0.s, z9.s 4679 // vl128 state = 0xd989cd0f 4680 __ dci(0x45d96482); // sqdmullt z2.d, z4.s, z25.s 4681 // vl128 state = 0xa80fdf92 4682 __ dci(0x45dd6406); // sqdmullt z6.d, z0.s, z29.s 4683 // vl128 state = 0x9876a20d 4684 __ dci(0x45596404); // sqdmullt z4.h, z0.b, z25.b 4685 // vl128 state = 0x5ad5787c 4686 __ dci(0x454b6414); // sqdmullt z20.h, z0.b, z11.b 4687 // vl128 state = 0x86c077d7 4688 __ dci(0x454a601c); // sqdmullb z28.h, z0.b, z10.b 4689 // vl128 state = 0xfe867841 4690 __ dci(0x4542641d); // sqdmullt z29.h, z0.b, z2.b 4691 // vl128 state = 0x7bf363f1 4692 __ dci(0x4552643c); // sqdmullt z28.h, z1.b, z18.b 4693 // vl128 state = 0x7cf26ed3 4694 __ dci(0x4552673d); // sqdmullt z29.h, z25.b, z18.b 4695 // vl128 state = 0x748f1a99 4696 __ dci(0x45d6673f); // sqdmullt z31.d, z25.s, z22.s 4697 // vl128 state = 0xbb15fd07 4698 __ dci(0x45d2633d); // sqdmullb z29.d, z25.s, z18.s 4699 // vl128 state = 0x28e0985a 4700 __ dci(0x455a6339); // sqdmullb z25.h, z25.b, z26.b 4701 // vl128 state = 0x9c0da0fd 4702 __ dci(0x45526738); // sqdmullt z24.h, z25.b, z18.b 4703 // vl128 state = 0xa970ebb8 4704 } 4705 4706 uint32_t state; 4707 ComputeMachineStateHash(&masm, &state); 4708 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 4709 __ Ldr(w0, MemOperand(x0)); 4710 4711 END(); 4712 if (CAN_RUN()) { 4713 RUN(); 4714 uint32_t expected_hashes[] = { 4715 0xa970ebb8, 4716 0xc665eff5, 4717 0x8cc21595, 4718 0x0ea984f6, 4719 0x1dbce326, 4720 0x0845e911, 4721 0xa6fb6cf4, 4722 0x8544239a, 4723 0x2412d23d, 4724 0xbce6f5e0, 4725 0x780ff264, 4726 0xcf6cf172, 4727 0xef93a3b4, 4728 0x94080541, 4729 0xa0aedeba, 4730 0x8e8bddaa, 4731 }; 4732 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 4733 } 4734} 4735 4736TEST_SVE(sve2_sqdmullt_sqdmullb_z_zzi) { 4737 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 4738 CPUFeatures::kSVE2, 4739 CPUFeatures::kNEON, 4740 CPUFeatures::kCRC32); 4741 START(); 4742 4743 SetInitialMachineState(&masm); 4744 // state = 0xe2bd2480 4745 4746 { 4747 ExactAssemblyScope scope(&masm, 30 * kInstructionSize); 4748 __ dci(0x44eae5a9); // sqdmullt z9.d, z13.s, z10.s[#0] 4749 // vl128 state = 0x311dfe35 4750 __ dci(0x44eae9a1); // sqdmullb z1.d, z13.s, z10.s[#1] 4751 // vl128 state = 0x559243c3 4752 __ dci(0x44eae9a5); // sqdmullb z5.d, z13.s, z10.s[#1] 4753 // vl128 state = 0x44d6824c 4754 __ dci(0x44e2edad); // sqdmullt z13.d, z13.s, z2.s[#1] 4755 // vl128 state = 0xb5539592 4756 __ dci(0x44e6e9ac); // sqdmullb z12.d, z13.s, z6.s[#1] 4757 // vl128 state = 0x5e66b9f8 4758 __ dci(0x44e4ebae); // sqdmullb z14.d, z29.s, z4.s[#1] 4759 // vl128 state = 0x4347620a 4760 __ dci(0x44e4ebaf); // sqdmullb z15.d, z29.s, z4.s[#1] 4761 // vl128 state = 0xe7cfe898 4762 __ dci(0x44a5ebad); // sqdmullb z13.s, z29.h, z5.h[#1] 4763 // vl128 state = 0x0ca455c7 4764 __ dci(0x44a5e9fd); // sqdmullb z29.s, z15.h, z5.h[#1] 4765 // vl128 state = 0xcac072a9 4766 __ dci(0x44e5e8fc); // sqdmullb z28.d, z7.s, z5.s[#1] 4767 // vl128 state = 0xe18e8c66 4768 __ dci(0x44ede9ec); // sqdmullb z12.d, z15.s, z13.s[#1] 4769 // vl128 state = 0x32f642cb 4770 __ dci(0x44ede9fc); // sqdmullb z28.d, z15.s, z13.s[#1] 4771 // vl128 state = 0xa0467c8a 4772 __ dci(0x44fce9f4); // sqdmullb z20.d, z15.s, z12.s[#3] 4773 // vl128 state = 0x7ada4130 4774 __ dci(0x44e4e9f6); // sqdmullb z22.d, z15.s, z4.s[#1] 4775 // vl128 state = 0xc87deb44 4776 __ dci(0x44f4e9d2); // sqdmullb z18.d, z14.s, z4.s[#3] 4777 // vl128 state = 0x6dc052ca 4778 __ dci(0x44f5e9e2); // sqdmullb z2.d, z15.s, z5.s[#3] 4779 // vl128 state = 0xe05110d4 4780 __ dci(0x44f5ebb2); // sqdmullb z18.d, z29.s, z5.s[#3] 4781 // vl128 state = 0x7ed21594 4782 __ dci(0x44b5efba); // sqdmullt z26.s, z29.h, z5.h[#5] 4783 // vl128 state = 0x7d5dad40 4784 __ dci(0x44b5ef78); // sqdmullt z24.s, z27.h, z5.h[#5] 4785 // vl128 state = 0x418f84bc 4786 __ dci(0x44f5eb70); // sqdmullb z16.d, z27.s, z5.s[#3] 4787 // vl128 state = 0x72d78d32 4788 __ dci(0x44e5ebf4); // sqdmullb z20.d, z31.s, z5.s[#1] 4789 // vl128 state = 0x391fad35 4790 __ dci(0x44e5efbc); // sqdmullt z28.d, z29.s, z5.s[#1] 4791 // vl128 state = 0xb2143633 4792 __ dci(0x44e1ebbd); // sqdmullb z29.d, z29.s, z1.s[#1] 4793 // vl128 state = 0x468dac6e 4794 __ dci(0x44f1ebed); // sqdmullb z13.d, z31.s, z1.s[#3] 4795 // vl128 state = 0x9ab292bd 4796 __ dci(0x44f5efe5); // sqdmullt z5.d, z31.s, z5.s[#3] 4797 // vl128 state = 0x4f2bd5d1 4798 __ dci(0x44fdeee7); // sqdmullt z7.d, z23.s, z13.s[#3] 4799 // vl128 state = 0x7a810779 4800 __ dci(0x44fdee25); // sqdmullt z5.d, z17.s, z13.s[#3] 4801 // vl128 state = 0x05d23734 4802 __ dci(0x44f5ea27); // sqdmullb z7.d, z17.s, z5.s[#3] 4803 // vl128 state = 0x878580f5 4804 __ dci(0x44f1e225); // sqdmullb z5.d, z17.s, z1.s[#2] 4805 // vl128 state = 0x5fa56f94 4806 __ dci(0x44e1ea21); // sqdmullb z1.d, z17.s, z1.s[#1] 4807 // vl128 state = 0x05f1cdf0 4808 } 4809 4810 uint32_t state; 4811 ComputeMachineStateHash(&masm, &state); 4812 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 4813 __ Ldr(w0, MemOperand(x0)); 4814 4815 END(); 4816 if (CAN_RUN()) { 4817 RUN(); 4818 uint32_t expected_hashes[] = { 4819 0x05f1cdf0, 4820 0x6b88d4f2, 4821 0x83bf279d, 4822 0x12f21868, 4823 0x6c68a5ce, 4824 0x5710343f, 4825 0xa4d0d0ee, 4826 0x335b20c5, 4827 0x0dd491c5, 4828 0x98966292, 4829 0xb68cdacd, 4830 0xa26f9914, 4831 0x6dd60ced, 4832 0x5cd0d62c, 4833 0xebe3fb25, 4834 0xb264d998, 4835 }; 4836 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 4837 } 4838} 4839 4840TEST_SVE(sve2_xar) { 4841 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 4842 CPUFeatures::kSVE2, 4843 CPUFeatures::kNEON, 4844 CPUFeatures::kCRC32); 4845 START(); 4846 4847 SetInitialMachineState(&masm); 4848 // state = 0xe2bd2480 4849 4850 { 4851 ExactAssemblyScope scope(&masm, 20 * kInstructionSize); 4852 __ dci(0x04293719); // xar z25.b, z25.b, z24.b, #7 4853 // vl128 state = 0x596046c4 4854 __ dci(0x04293531); // xar z17.b, z17.b, z9.b, #7 4855 // vl128 state = 0x38332d55 4856 __ dci(0x04e93533); // xar z19.d, z19.d, z9.d, #23 4857 // vl128 state = 0x535c8af7 4858 __ dci(0x046b3523); // xar z3.s, z3.s, z9.s, #21 4859 // vl128 state = 0x879a489f 4860 __ dci(0x04eb3427); // xar z7.d, z7.d, z1.d, #21 4861 // vl128 state = 0xfbac317f 4862 __ dci(0x04ea3463); // xar z3.d, z3.d, z3.d, #22 4863 // vl128 state = 0xfb44482e 4864 __ dci(0x04fa3447); // xar z7.d, z7.d, z2.d, #6 4865 // vl128 state = 0xa59e324c 4866 __ dci(0x04f8346f); // xar z15.d, z15.d, z3.d, #8 4867 // vl128 state = 0x7f064300 4868 __ dci(0x0479346b); // xar z11.s, z11.s, z3.s, #7 4869 // vl128 state = 0x0c0d3573 4870 __ dci(0x0461346a); // xar z10.s, z10.s, z3.s, #31 4871 // vl128 state = 0x3c61530d 4872 __ dci(0x0464346b); // xar z11.s, z11.s, z3.s, #28 4873 // vl128 state = 0x137c1433 4874 __ dci(0x04643469); // xar z9.s, z9.s, z3.s, #28 4875 // vl128 state = 0x81d55bb1 4876 __ dci(0x0464346b); // xar z11.s, z11.s, z3.s, #28 4877 // vl128 state = 0xad2ac5c0 4878 __ dci(0x0434346a); // xar z10.h, z10.h, z3.h, #12 4879 // vl128 state = 0x2997a1d9 4880 __ dci(0x04b434fa); // xar z26.d, z26.d, z7.d, #44 4881 // vl128 state = 0x715f758d 4882 __ dci(0x04e434f2); // xar z18.d, z18.d, z7.d, #28 4883 // vl128 state = 0x8bfa19ef 4884 __ dci(0x04ec34b3); // xar z19.d, z19.d, z5.d, #20 4885 // vl128 state = 0xa8d646a5 4886 __ dci(0x04ae34b7); // xar z23.d, z23.d, z5.d, #50 4887 // vl128 state = 0xf590c489 4888 __ dci(0x04ae34a7); // xar z7.d, z7.d, z5.d, #50 4889 // vl128 state = 0xd6aafb5e 4890 __ dci(0x04ae3417); // xar z23.d, z23.d, z0.d, #50 4891 // vl128 state = 0xd40a8d1a 4892 } 4893 4894 uint32_t state; 4895 ComputeMachineStateHash(&masm, &state); 4896 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 4897 __ Ldr(w0, MemOperand(x0)); 4898 4899 END(); 4900 if (CAN_RUN()) { 4901 RUN(); 4902 uint32_t expected_hashes[] = { 4903 0xd40a8d1a, 4904 0x834982b0, 4905 0x6fd8c07b, 4906 0x2654e6f3, 4907 0x79fa44fb, 4908 0xc8a60223, 4909 0xd12f35f0, 4910 0x1e0a3315, 4911 0x6970dcd2, 4912 0x62305aed, 4913 0xb9846a55, 4914 0x1147e436, 4915 0x97a8ceaa, 4916 0xe8f80c0e, 4917 0xea3ab3e7, 4918 0xb2abd654, 4919 }; 4920 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 4921 } 4922} 4923 4924TEST_SVE(sve2_histcnt) { 4925 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 4926 CPUFeatures::kSVE2, 4927 CPUFeatures::kNEON, 4928 CPUFeatures::kCRC32); 4929 START(); 4930 4931 SetInitialMachineState(&masm); 4932 // state = 0xe2bd2480 4933 4934 { 4935 ExactAssemblyScope scope(&masm, 100 * kInstructionSize); 4936 __ dci(0x45e8c2f9); // histcnt z25.d, p0/z, z23.d, z8.d 4937 // vl128 state = 0x892c6962 4938 __ dci(0x45e8c1f1); // histcnt z17.d, p0/z, z15.d, z8.d 4939 // vl128 state = 0x6ef7d729 4940 __ dci(0x45e8c3a1); // histcnt z1.d, p0/z, z29.d, z8.d 4941 // vl128 state = 0x17654f81 4942 __ dci(0x45e8c3a9); // histcnt z9.d, p0/z, z29.d, z8.d 4943 // vl128 state = 0xe1a0067e 4944 __ dci(0x45e8c0a8); // histcnt z8.d, p0/z, z5.d, z8.d 4945 // vl128 state = 0xd41f511b 4946 __ dci(0x45e8d0f8); // histcnt z24.d, p4/z, z7.d, z8.d 4947 // vl128 state = 0x8b73945a 4948 __ dci(0x45e8d0fa); // histcnt z26.d, p4/z, z7.d, z8.d 4949 // vl128 state = 0xc175acec 4950 __ dci(0x45aad0fb); // histcnt z27.s, p4/z, z7.s, z10.s 4951 // vl128 state = 0x44f8385b 4952 __ dci(0x45aad2df); // histcnt z31.s, p4/z, z22.s, z10.s 4953 // vl128 state = 0x52cd5d17 4954 __ dci(0x45aad2dd); // histcnt z29.s, p4/z, z22.s, z10.s 4955 // vl128 state = 0x9f8d9611 4956 __ dci(0x45abd2f5); // histcnt z21.s, p4/z, z23.s, z11.s 4957 // vl128 state = 0x5cc45fb0 4958 __ dci(0x45aad0f7); // histcnt z23.s, p4/z, z7.s, z10.s 4959 // vl128 state = 0x5096a07f 4960 __ dci(0x45aad1b3); // histcnt z19.s, p4/z, z13.s, z10.s 4961 // vl128 state = 0xf25781a6 4962 __ dci(0x45a8d1f2); // histcnt z18.s, p4/z, z15.s, z8.s 4963 // vl128 state = 0xc7025934 4964 __ dci(0x45a0d0f6); // histcnt z22.s, p4/z, z7.s, z0.s 4965 // vl128 state = 0xcda9c72a 4966 __ dci(0x45a0d87e); // histcnt z30.s, p6/z, z3.s, z0.s 4967 // vl128 state = 0x75f6bbcc 4968 __ dci(0x45a0dc4e); // histcnt z14.s, p7/z, z2.s, z0.s 4969 // vl128 state = 0x5e4e9fe0 4970 __ dci(0x45a0dc4a); // histcnt z10.s, p7/z, z2.s, z0.s 4971 // vl128 state = 0x0ec8d2b8 4972 __ dci(0x45b0cc4b); // histcnt z11.s, p3/z, z2.s, z16.s 4973 // vl128 state = 0x1228c442 4974 __ dci(0x45b0cc43); // histcnt z3.s, p3/z, z2.s, z16.s 4975 // vl128 state = 0xc6067f7b 4976 __ dci(0x45b8cc73); // histcnt z19.s, p3/z, z3.s, z24.s 4977 // vl128 state = 0xf04f9753 4978 __ dci(0x45b8d877); // histcnt z23.s, p6/z, z3.s, z24.s 4979 // vl128 state = 0xdeb83b41 4980 __ dci(0x45b8d47f); // histcnt z31.s, p5/z, z3.s, z24.s 4981 // vl128 state = 0x8ab3905f 4982 __ dci(0x45b8d46f); // histcnt z15.s, p5/z, z3.s, z24.s 4983 // vl128 state = 0x762bf277 4984 __ dci(0x45b8d16d); // histcnt z13.s, p4/z, z11.s, z24.s 4985 // vl128 state = 0x9a670783 4986 __ dci(0x45bcd125); // histcnt z5.s, p4/z, z9.s, z28.s 4987 // vl128 state = 0x3e399489 4988 __ dci(0x45b8d021); // histcnt z1.s, p4/z, z1.s, z24.s 4989 // vl128 state = 0x7fc8f1e7 4990 __ dci(0x45f8d220); // histcnt z0.d, p4/z, z17.d, z24.d 4991 // vl128 state = 0x9cb004db 4992 __ dci(0x45f0d621); // histcnt z1.d, p5/z, z17.d, z16.d 4993 // vl128 state = 0xdd4161b5 4994 __ dci(0x45a0d625); // histcnt z5.s, p5/z, z17.s, z0.s 4995 // vl128 state = 0xb5cb70bb 4996 __ dci(0x45a0d4a1); // histcnt z1.s, p5/z, z5.s, z0.s 4997 // vl128 state = 0x4452182b 4998 __ dci(0x45a0d4a3); // histcnt z3.s, p5/z, z5.s, z0.s 4999 // vl128 state = 0x71298d3c 5000 __ dci(0x45a0d4a2); // histcnt z2.s, p5/z, z5.s, z0.s 5001 // vl128 state = 0xa22914e1 5002 __ dci(0x45a2d6a3); // histcnt z3.s, p5/z, z21.s, z2.s 5003 // vl128 state = 0x6183bfbc 5004 __ dci(0x45a2de21); // histcnt z1.s, p7/z, z17.s, z2.s 5005 // vl128 state = 0xd1ebb242 5006 __ dci(0x45e2dc20); // histcnt z0.d, p7/z, z1.d, z2.d 5007 // vl128 state = 0x297a432d 5008 __ dci(0x45e2d8b0); // histcnt z16.d, p6/z, z5.d, z2.d 5009 // vl128 state = 0x1d2557c0 5010 __ dci(0x45eed8b8); // histcnt z24.d, p6/z, z5.d, z14.d 5011 // vl128 state = 0xe6ef07fa 5012 __ dci(0x45eed8a8); // histcnt z8.d, p6/z, z5.d, z14.d 5013 // vl128 state = 0xaf3665bb 5014 __ dci(0x45aed88c); // histcnt z12.s, p6/z, z4.s, z14.s 5015 // vl128 state = 0x5c2b38bc 5016 __ dci(0x45efd88d); // histcnt z13.d, p6/z, z4.d, z15.d 5017 // vl128 state = 0x8d5527d8 5018 __ dci(0x45ffc88f); // histcnt z15.d, p2/z, z4.d, z31.d 5019 // vl128 state = 0x1d2e08d2 5020 __ dci(0x45fbc98d); // histcnt z13.d, p2/z, z12.d, z27.d 5021 // vl128 state = 0x007388b0 5022 __ dci(0x45bbcd8f); // histcnt z15.s, p3/z, z12.s, z27.s 5023 // vl128 state = 0x9008a7ba 5024 __ dci(0x45b3cc9f); // histcnt z31.s, p3/z, z4.s, z19.s 5025 // vl128 state = 0xc4030ca4 5026 __ dci(0x45bbc497); // histcnt z23.s, p1/z, z4.s, z27.s 5027 // vl128 state = 0xeaf4a0b6 5028 __ dci(0x45fbc415); // histcnt z21.d, p1/z, z0.d, z27.d 5029 // vl128 state = 0x03d85428 5030 __ dci(0x45ffc517); // histcnt z23.d, p1/z, z8.d, z31.d 5031 // vl128 state = 0xa836a751 5032 __ dci(0x45fbc596); // histcnt z22.d, p1/z, z12.d, z27.d 5033 // vl128 state = 0x77e33f69 5034 __ dci(0x45fbc4c6); // histcnt z6.d, p1/z, z6.d, z27.d 5035 // vl128 state = 0xf47bb379 5036 __ dci(0x45fbc4ce); // histcnt z14.d, p1/z, z6.d, z27.d 5037 // vl128 state = 0x6dbfff33 5038 __ dci(0x45fad4ca); // histcnt z10.d, p5/z, z6.d, z26.d 5039 // vl128 state = 0xbc04915a 5040 __ dci(0x45ead45a); // histcnt z26.d, p5/z, z2.d, z10.d 5041 // vl128 state = 0x8969b1c5 5042 __ dci(0x45aad4ca); // histcnt z10.s, p5/z, z6.s, z10.s 5043 // vl128 state = 0x58d2dfac 5044 __ dci(0x45aed0ce); // histcnt z14.s, p4/z, z6.s, z14.s 5045 // vl128 state = 0xfa793cc7 5046 __ dci(0x45aec4c6); // histcnt z6.s, p1/z, z6.s, z14.s 5047 // vl128 state = 0xff4c99d8 5048 __ dci(0x45abc4c7); // histcnt z7.s, p1/z, z6.s, z11.s 5049 // vl128 state = 0x2b44a4ae 5050 __ dci(0x45abc4cf); // histcnt z15.s, p1/z, z6.s, z11.s 5051 // vl128 state = 0xbb3f8ba4 5052 __ dci(0x45a9c44e); // histcnt z14.s, p1/z, z2.s, z9.s 5053 // vl128 state = 0x5a3a40a6 5054 __ dci(0x45b9c46f); // histcnt z15.s, p1/z, z3.s, z25.s 5055 // vl128 state = 0x72e31c5f 5056 __ dci(0x45b9c46e); // histcnt z14.s, p1/z, z3.s, z25.s 5057 // vl128 state = 0xde56263e 5058 __ dci(0x45b1c67e); // histcnt z30.s, p1/z, z19.s, z17.s 5059 // vl128 state = 0xc570f0b9 5060 __ dci(0x45b5c63a); // histcnt z26.s, p1/z, z17.s, z21.s 5061 // vl128 state = 0x72ab1716 5062 __ dci(0x45a5c72a); // histcnt z10.s, p1/z, z25.s, z5.s 5063 // vl128 state = 0xe8848b2d 5064 __ dci(0x45a1c77a); // histcnt z26.s, p1/z, z27.s, z1.s 5065 // vl128 state = 0x2975ac38 5066 __ dci(0x45a1c77b); // histcnt z27.s, p1/z, z27.s, z1.s 5067 // vl128 state = 0xb0638363 5068 __ dci(0x45a1c773); // histcnt z19.s, p1/z, z27.s, z1.s 5069 // vl128 state = 0xc9620a45 5070 __ dci(0x45e9c777); // histcnt z23.d, p1/z, z27.d, z9.d 5071 // vl128 state = 0x0414c679 5072 __ dci(0x45ebc67f); // histcnt z31.d, p1/z, z19.d, z11.d 5073 // vl128 state = 0xc1d4410e 5074 __ dci(0x45ebc37b); // histcnt z27.d, p0/z, z27.d, z11.d 5075 // vl128 state = 0x3ae32e36 5076 __ dci(0x45abd373); // histcnt z19.s, p4/z, z27.s, z11.s 5077 // vl128 state = 0x75ffe12c 5078 __ dci(0x45fbd363); // histcnt z3.d, p4/z, z27.d, z27.d 5079 // vl128 state = 0x4084743b 5080 __ dci(0x45ffc36b); // histcnt z11.d, p0/z, z27.d, z31.d 5081 // vl128 state = 0xfade136b 5082 __ dci(0x45ffc3ca); // histcnt z10.d, p0/z, z30.d, z31.d 5083 // vl128 state = 0x60f18f50 5084 __ dci(0x45efc2ce); // histcnt z14.d, p0/z, z22.d, z15.d 5085 // vl128 state = 0x162ed112 5086 __ dci(0x45adc2c6); // histcnt z6.s, p0/z, z22.s, z13.s 5087 // vl128 state = 0x4f84cb96 5088 __ dci(0x45adc2c4); // histcnt z4.s, p0/z, z22.s, z13.s 5089 // vl128 state = 0x5d04ccb6 5090 __ dci(0x45a7c2d4); // histcnt z20.s, p0/z, z22.s, z7.s 5091 // vl128 state = 0x38efdab7 5092 __ dci(0x45a6c0c4); // histcnt z4.s, p0/z, z6.s, z6.s 5093 // vl128 state = 0xff7a0a24 5094 __ dci(0x45a7c2c0); // histcnt z0.s, p0/z, z22.s, z7.s 5095 // vl128 state = 0x5f7b0a31 5096 __ dci(0x45a7d6c1); // histcnt z1.s, p5/z, z22.s, z7.s 5097 // vl128 state = 0x1e8a6f5f 5098 __ dci(0x45afd7c5); // histcnt z5.s, p5/z, z30.s, z15.s 5099 // vl128 state = 0x655ed237 5100 __ dci(0x45add3d5); // histcnt z21.s, p4/z, z30.s, z13.s 5101 // vl128 state = 0x8c7226a9 5102 __ dci(0x45add3d4); // histcnt z20.s, p4/z, z30.s, z13.s 5103 // vl128 state = 0x727304ad 5104 __ dci(0x45bcd3dc); // histcnt z28.s, p4/z, z30.s, z28.s 5105 // vl128 state = 0xce4e49d0 5106 __ dci(0x45bcd3cc); // histcnt z12.s, p4/z, z30.s, z28.s 5107 // vl128 state = 0x5c252d7d 5108 __ dci(0x45bcd15c); // histcnt z28.s, p4/z, z10.s, z28.s 5109 // vl128 state = 0x5e1163f7 5110 __ dci(0x45b5d154); // histcnt z20.s, p4/z, z10.s, z21.s 5111 // vl128 state = 0xf77c50ee 5112 __ dci(0x45b5d156); // histcnt z22.s, p4/z, z10.s, z21.s 5113 // vl128 state = 0xe35c8438 5114 __ dci(0x45b3d157); // histcnt z23.s, p4/z, z10.s, z19.s 5115 // vl128 state = 0xf6926673 5116 __ dci(0x45b3d156); // histcnt z22.s, p4/z, z10.s, z19.s 5117 // vl128 state = 0xf9022ad2 5118 __ dci(0x45b3c554); // histcnt z20.s, p1/z, z10.s, z19.s 5119 // vl128 state = 0xb90dfe28 5120 __ dci(0x45bbd55c); // histcnt z28.s, p5/z, z10.s, z27.s 5121 // vl128 state = 0x9a939b84 5122 __ dci(0x45abd57e); // histcnt z30.s, p5/z, z11.s, z11.s 5123 // vl128 state = 0xd9ad8be7 5124 __ dci(0x45abcd7a); // histcnt z26.s, p3/z, z11.s, z11.s 5125 // vl128 state = 0x14869e4f 5126 __ dci(0x45bbc57b); // histcnt z27.s, p1/z, z11.s, z27.s 5127 // vl128 state = 0x25130793 5128 __ dci(0x45bfcd73); // histcnt z19.s, p3/z, z11.s, z31.s 5129 // vl128 state = 0x53adf455 5130 __ dci(0x45bfc863); // histcnt z3.s, p2/z, z3.s, z31.s 5131 // vl128 state = 0x82fa6c44 5132 __ dci(0x45b7cc62); // histcnt z2.s, p3/z, z3.s, z23.s 5133 // vl128 state = 0xfaefda71 5134 __ dci(0x45b6cce3); // histcnt z3.s, p3/z, z7.s, z22.s 5135 // vl128 state = 0xdd697c2a 5136 } 5137 5138 uint32_t state; 5139 ComputeMachineStateHash(&masm, &state); 5140 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 5141 __ Ldr(w0, MemOperand(x0)); 5142 5143 END(); 5144 if (CAN_RUN()) { 5145 RUN(); 5146 uint32_t expected_hashes[] = { 5147 0xdd697c2a, 5148 0x1415ff61, 5149 0xb9e154c8, 5150 0x566a2af5, 5151 0xef7574b4, 5152 0x6da83471, 5153 0x356d5c4d, 5154 0x798a2403, 5155 0x2c16e862, 5156 0x6fa84021, 5157 0x6e09e8ff, 5158 0xc13a0eb6, 5159 0x88c92928, 5160 0xe51672fe, 5161 0x229b8ed5, 5162 0x9e662757, 5163 }; 5164 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 5165 } 5166} 5167 5168TEST_SVE(sve2_histseg) { 5169 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 5170 CPUFeatures::kSVE2, 5171 CPUFeatures::kNEON, 5172 CPUFeatures::kCRC32); 5173 START(); 5174 5175 SetInitialMachineState(&masm); 5176 // state = 0xe2bd2480 5177 5178 { 5179 ExactAssemblyScope scope(&masm, 100 * kInstructionSize); 5180 __ dci(0x4524a228); // histseg z8.b, z17.b, z4.b 5181 // vl128 state = 0x21ed28a1 5182 __ dci(0x452ca20c); // histseg z12.b, z16.b, z12.b 5183 // vl128 state = 0xc135d593 5184 __ dci(0x453ca288); // histseg z8.b, z20.b, z28.b 5185 // vl128 state = 0xb86cd6e7 5186 __ dci(0x4538a380); // histseg z0.b, z28.b, z24.b 5187 // vl128 state = 0xd28ddd71 5188 __ dci(0x452aa388); // histseg z8.b, z28.b, z10.b 5189 // vl128 state = 0x322d3aa8 5190 __ dci(0x452aa38c); // histseg z12.b, z28.b, z10.b 5191 // vl128 state = 0x67d668fc 5192 __ dci(0x4532a384); // histseg z4.b, z28.b, z18.b 5193 // vl128 state = 0xc57505d4 5194 __ dci(0x4537a380); // histseg z0.b, z28.b, z23.b 5195 // vl128 state = 0xb47d0a11 5196 __ dci(0x4535a3a8); // histseg z8.b, z29.b, z21.b 5197 // vl128 state = 0x347adf6f 5198 __ dci(0x4535a3ac); // histseg z12.b, z29.b, z21.b 5199 // vl128 state = 0xb763510c 5200 __ dci(0x4535a3ae); // histseg z14.b, z29.b, z21.b 5201 // vl128 state = 0xb28319d5 5202 __ dci(0x4525a39e); // histseg z30.b, z28.b, z5.b 5203 // vl128 state = 0x0adc6533 5204 __ dci(0x4525a38e); // histseg z14.b, z28.b, z5.b 5205 // vl128 state = 0x248409c6 5206 __ dci(0x452da3c6); // histseg z6.b, z30.b, z13.b 5207 // vl128 state = 0xa71c85d6 5208 __ dci(0x452da187); // histseg z7.b, z12.b, z13.b 5209 // vl128 state = 0x7314b8a0 5210 __ dci(0x4525a1a6); // histseg z6.b, z13.b, z5.b 5211 // vl128 state = 0x129013d5 5212 __ dci(0x4527a18e); // histseg z14.b, z12.b, z7.b 5213 // vl128 state = 0xc6b207b7 5214 __ dci(0x4521a18c); // histseg z12.b, z12.b, z1.b 5215 // vl128 state = 0x03957bb5 5216 __ dci(0x4524a18d); // histseg z13.b, z12.b, z4.b 5217 // vl128 state = 0x379af1c6 5218 __ dci(0x4524a125); // histseg z5.b, z9.b, z4.b 5219 // vl128 state = 0x93c462cc 5220 __ dci(0x4522a127); // histseg z7.b, z9.b, z2.b 5221 // vl128 state = 0xc95cb1a9 5222 __ dci(0x4532a117); // histseg z23.b, z8.b, z18.b 5223 // vl128 state = 0xc50e4e66 5224 __ dci(0x4533a15f); // histseg z31.b, z10.b, z19.b 5225 // vl128 state = 0x76663e3e 5226 __ dci(0x4533a14f); // histseg z15.b, z10.b, z19.b 5227 // vl128 state = 0x84f5ca5f 5228 __ dci(0x4533a0ce); // histseg z14.b, z6.b, z19.b 5229 // vl128 state = 0x50d7de3d 5230 __ dci(0x453ba1cc); // histseg z12.b, z14.b, z27.b 5231 // vl128 state = 0x32e3b53f 5232 __ dci(0x453ba0fc); // histseg z28.b, z7.b, z27.b 5233 // vl128 state = 0x0a5d4180 5234 __ dci(0x452ba2f4); // histseg z20.b, z23.b, z11.b 5235 // vl128 state = 0x91b77585 5236 __ dci(0x453ba2c4); // histseg z4.b, z22.b, z27.b 5237 // vl128 state = 0x5cd0c690 5238 __ dci(0x453ba2cc); // histseg z12.b, z22.b, z27.b 5239 // vl128 state = 0xa6a5f749 5240 __ dci(0x453ba1c8); // histseg z8.b, z14.b, z27.b 5241 // vl128 state = 0xe5036937 5242 __ dci(0x4529a1c9); // histseg z9.b, z14.b, z9.b 5243 // vl128 state = 0x13c620c8 5244 __ dci(0x4529a1a8); // histseg z8.b, z13.b, z9.b 5245 // vl128 state = 0xbf71d421 5246 __ dci(0x4521a198); // histseg z24.b, z12.b, z1.b 5247 // vl128 state = 0xe01d1160 5248 __ dci(0x4529a1ba); // histseg z26.b, z13.b, z9.b 5249 // vl128 state = 0xaa1b29d6 5250 __ dci(0x452fa1bb); // histseg z27.b, z13.b, z15.b 5251 // vl128 state = 0x2f96bd61 5252 __ dci(0x452fa0ff); // histseg z31.b, z7.b, z15.b 5253 // vl128 state = 0x5aeb6bec 5254 __ dci(0x4527a0de); // histseg z30.b, z6.b, z7.b 5255 // vl128 state = 0xbcb1b299 5256 __ dci(0x4525a1d6); // histseg z22.b, z14.b, z5.b 5257 // vl128 state = 0x0f89ea9b 5258 __ dci(0x4525a1d7); // histseg z23.b, z14.b, z5.b 5259 // vl128 state = 0xe40f30a2 5260 __ dci(0x4521a3df); // histseg z31.b, z30.b, z1.b 5261 // vl128 state = 0x342ff33b 5262 __ dci(0x4521a197); // histseg z23.b, z12.b, z1.b 5263 // vl128 state = 0xdfa92902 5264 __ dci(0x4521a187); // histseg z7.b, z12.b, z1.b 5265 // vl128 state = 0x8531fa67 5266 __ dci(0x4535a186); // histseg z6.b, z12.b, z21.b 5267 // vl128 state = 0xe4b55112 5268 __ dci(0x4535a196); // histseg z22.b, z12.b, z21.b 5269 // vl128 state = 0x5d26970e 5270 __ dci(0x4525a097); // histseg z23.b, z4.b, z5.b 5271 // vl128 state = 0x7dcb1d13 5272 __ dci(0x4525a095); // histseg z21.b, z4.b, z5.b 5273 // vl128 state = 0x5fb0789c 5274 __ dci(0x452da017); // histseg z23.b, z0.b, z13.b 5275 // vl128 state = 0x7f5df281 5276 __ dci(0x452da295); // histseg z21.b, z20.b, z13.b 5277 // vl128 state = 0x9e6f5eaf 5278 __ dci(0x453da39d); // histseg z29.b, z28.b, z29.b 5279 // vl128 state = 0x532f95a9 5280 __ dci(0x453da39c); // histseg z28.b, z28.b, z29.b 5281 // vl128 state = 0x64202514 5282 __ dci(0x4535a29e); // histseg z30.b, z20.b, z21.b 5283 // vl128 state = 0x44bda972 5284 __ dci(0x4535a0bf); // histseg z31.b, z5.b, z21.b 5285 // vl128 state = 0x258125d6 5286 __ dci(0x4535a0bb); // histseg z27.b, z5.b, z21.b 5287 // vl128 state = 0xec63caaf 5288 __ dci(0x4537a2b3); // histseg z19.b, z21.b, z23.b 5289 // vl128 state = 0xb937b6e8 5290 __ dci(0x4525a2b1); // histseg z17.b, z21.b, z5.b 5291 // vl128 state = 0x1515ee94 5292 __ dci(0x4525a2b5); // histseg z21.b, z21.b, z5.b 5293 // vl128 state = 0x4bb06873 5294 __ dci(0x4525a0fd); // histseg z29.b, z7.b, z5.b 5295 // vl128 state = 0x23446114 5296 __ dci(0x4524a079); // histseg z25.b, z3.b, z4.b 5297 // vl128 state = 0x48d52cf6 5298 __ dci(0x4524a0d8); // histseg z24.b, z6.b, z4.b 5299 // vl128 state = 0x0deef019 5300 __ dci(0x452ca09c); // histseg z28.b, z4.b, z12.b 5301 // vl128 state = 0xaba6e202 5302 __ dci(0x453ca018); // histseg z24.b, z0.b, z28.b 5303 // vl128 state = 0xee9d3eed 5304 __ dci(0x4539a008); // histseg z8.b, z0.b, z25.b 5305 // vl128 state = 0x254c57f3 5306 __ dci(0x4539a00c); // histseg z12.b, z0.b, z25.b 5307 // vl128 state = 0x28fea24d 5308 __ dci(0x4531a048); // histseg z8.b, z2.b, z17.b 5309 // vl128 state = 0xe32fcb53 5310 __ dci(0x4530a0ca); // histseg z10.b, z6.b, z16.b 5311 // vl128 state = 0xb3a9860b 5312 __ dci(0x4520a0ee); // histseg z14.b, z7.b, z0.b 5313 // vl128 state = 0xef9e57fa 5314 __ dci(0x4520a1de); // histseg z30.b, z14.b, z0.b 5315 // vl128 state = 0x295902e9 5316 __ dci(0x4520a38e); // histseg z14.b, z28.b, z0.b 5317 // vl128 state = 0x756ed318 5318 __ dci(0x4528a30f); // histseg z15.b, z24.b, z8.b 5319 // vl128 state = 0x8591dff9 5320 __ dci(0x4538a39f); // histseg z31.b, z28.b, z24.b 5321 // vl128 state = 0xe4ad535d 5322 __ dci(0x4538a39b); // histseg z27.b, z28.b, z24.b 5323 // vl128 state = 0x2d4fbc24 5324 __ dci(0x4538a093); // histseg z19.b, z4.b, z24.b 5325 // vl128 state = 0xd8ee932a 5326 __ dci(0x453aa0a3); // histseg z3.b, z5.b, z26.b 5327 // vl128 state = 0x768b71a6 5328 __ dci(0x453aa0ab); // histseg z11.b, z5.b, z26.b 5329 // vl128 state = 0xa78673d7 5330 __ dci(0x452ea0bb); // histseg z27.b, z5.b, z14.b 5331 // vl128 state = 0x6e649cae 5332 __ dci(0x452fa1bf); // histseg z31.b, z13.b, z15.b 5333 // vl128 state = 0x0f58100a 5334 __ dci(0x452fa1be); // histseg z30.b, z13.b, z15.b 5335 // vl128 state = 0xc99f4519 5336 __ dci(0x452fa3f6); // histseg z22.b, z31.b, z15.b 5337 // vl128 state = 0x700c8305 5338 __ dci(0x452fa3f4); // histseg z20.b, z31.b, z15.b 5339 // vl128 state = 0xbdecfddc 5340 __ dci(0x453fa3b0); // histseg z16.b, z29.b, z31.b 5341 // vl128 state = 0x3f5b7578 5342 __ dci(0x453fa3b8); // histseg z24.b, z29.b, z31.b 5343 // vl128 state = 0xf0076715 5344 __ dci(0x453fa228); // histseg z8.b, z17.b, z31.b 5345 // vl128 state = 0x3bd60e0b 5346 __ dci(0x4536a22a); // histseg z10.b, z17.b, z22.b 5347 // vl128 state = 0x1171f63c 5348 __ dci(0x4530a23a); // histseg z26.b, z17.b, z16.b 5349 // vl128 state = 0x3fef270c 5350 __ dci(0x4522a23e); // histseg z30.b, z17.b, z2.b 5351 // vl128 state = 0xf928721f 5352 __ dci(0x4524a23c); // histseg z28.b, z17.b, z4.b 5353 // vl128 state = 0xecec697b 5354 __ dci(0x4527a238); // histseg z24.b, z17.b, z7.b 5355 // vl128 state = 0x23b07b16 5356 __ dci(0x4525a210); // histseg z16.b, z16.b, z5.b 5357 // vl128 state = 0x9c1c2ac5 5358 __ dci(0x4525a200); // histseg z0.b, z16.b, z5.b 5359 // vl128 state = 0xc446f89b 5360 __ dci(0x4520a202); // histseg z2.b, z16.b, z0.b 5361 // vl128 state = 0x8afba046 5362 __ dci(0x4521a303); // histseg z3.b, z24.b, z1.b 5363 // vl128 state = 0xf0b0f9f3 5364 __ dci(0x4520a201); // histseg z1.b, z16.b, z0.b 5365 // vl128 state = 0x8922615b 5366 __ dci(0x4528a223); // histseg z3.b, z17.b, z8.b 5367 // vl128 state = 0xf36938ee 5368 __ dci(0x4528a367); // histseg z7.b, z27.b, z8.b 5369 // vl128 state = 0xc2d96c41 5370 __ dci(0x452ca3e6); // histseg z6.b, z31.b, z12.b 5371 // vl128 state = 0xf15e835f 5372 __ dci(0x452ea3c4); // histseg z4.b, z30.b, z14.b 5373 // vl128 state = 0xb3964bd8 5374 __ dci(0x452da3c6); // histseg z6.b, z30.b, z13.b 5375 // vl128 state = 0x8011a4c6 5376 __ dci(0x452da0c4); // histseg z4.b, z6.b, z13.b 5377 // vl128 state = 0x0fbedf54 5378 __ dci(0x4529a0ec); // histseg z12.b, z7.b, z9.b 5379 // vl128 state = 0x9a4d7031 5380 } 5381 5382 uint32_t state; 5383 ComputeMachineStateHash(&masm, &state); 5384 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 5385 __ Ldr(w0, MemOperand(x0)); 5386 5387 END(); 5388 if (CAN_RUN()) { 5389 RUN(); 5390 uint32_t expected_hashes[] = { 5391 0x9a4d7031, 5392 0xebaa80ad, 5393 0x702155a3, 5394 0x181fff8d, 5395 0x7b071373, 5396 0x1bf0af96, 5397 0x9ca15297, 5398 0x615d2f4a, 5399 0x7658b554, 5400 0xd2bf7319, 5401 0xddf8d492, 5402 0xf5938d08, 5403 0xbe354cb1, 5404 0xfe2d5d63, 5405 0x29818684, 5406 0x2c862ef9, 5407 }; 5408 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 5409 } 5410} 5411 5412TEST_SVE(sve2_table) { 5413 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 5414 CPUFeatures::kSVE2, 5415 CPUFeatures::kNEON, 5416 CPUFeatures::kCRC32); 5417 START(); 5418 5419 SetInitialMachineState(&masm); 5420 // state = 0xe2bd2480 5421 5422 { 5423 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 5424 __ dci(0x05212a38); // tbl z24.b, {z17.b, z18.b}, z1.b 5425 // vl128 state = 0xbdd1e1c1 5426 __ dci(0x05212810); // tbl z16.b, {z0.b, z1.b}, z1.b 5427 // vl128 state = 0x80ca38b6 5428 __ dci(0x05e12812); // tbl z18.d, {z0.d, z1.d}, z1.d 5429 // vl128 state = 0xb59fe024 5430 __ dci(0x05632802); // tbl z2.h, {z0.h, z1.h}, z3.h 5431 // vl128 state = 0xfb22b8f9 5432 __ dci(0x05e32906); // tbl z6.d, {z8.d, z9.d}, z3.d 5433 // vl128 state = 0x78ba34e9 5434 __ dci(0x05e22942); // tbl z2.d, {z10.d, z11.d}, z2.d 5435 // vl128 state = 0x000b006f 5436 __ dci(0x05f22d46); // tbx z6.d, z10.d, z18.d 5437 // vl128 state = 0x28b746e5 5438 __ dci(0x05f32947); // tbl z7.d, {z10.d, z11.d}, z19.d 5439 // vl128 state = 0xfcbf7b93 5440 __ dci(0x05e32963); // tbl z3.d, {z11.d, z12.d}, z3.d 5441 // vl128 state = 0x2891c0aa 5442 __ dci(0x05e33161); // tbl z1.d, {z11.d}, z3.d 5443 // vl128 state = 0x3468b9d4 5444 __ dci(0x05e13149); // tbl z9.d, {z10.d}, z1.d 5445 // vl128 state = 0xc2adf02b 5446 __ dci(0x0560314d); // tbl z13.h, {z10.h}, z0.h 5447 // vl128 state = 0xff9f1abb 5448 __ dci(0x0578314c); // tbl z12.h, {z10.h}, z24.h 5449 // vl128 state = 0x2cffcd38 5450 __ dci(0x05e83144); // tbl z4.d, {z10.d}, z8.d 5451 // vl128 state = 0x8e5ca010 5452 __ dci(0x05e83146); // tbl z6.d, {z10.d}, z8.d 5453 // vl128 state = 0xa6e0e69a 5454 __ dci(0x05b83147); // tbl z7.s, {z10.s}, z24.s 5455 // vl128 state = 0x513e6328 5456 __ dci(0x053831d7); // tbl z23.b, {z14.b}, z24.b 5457 // vl128 state = 0xe2bd7bdf 5458 __ dci(0x056831df); // tbl z31.h, {z14.h}, z8.h 5459 // vl128 state = 0xf4881e93 5460 __ dci(0x0560319e); // tbl z30.h, {z12.h}, z0.h 5461 // vl128 state = 0x4cd76275 5462 __ dci(0x0522319a); // tbl z26.b, {z12.b}, z2.b 5463 // vl128 state = 0x06d15ac3 5464 __ dci(0x0522318a); // tbl z10.b, {z12.b}, z2.b 5465 // vl128 state = 0x5657179b 5466 __ dci(0x0522318e); // tbl z14.b, {z12.b}, z2.b 5467 // vl128 state = 0x7def33b7 5468 __ dci(0x05a6318a); // tbl z10.s, {z12.s}, z6.s 5469 // vl128 state = 0x38ee6756 5470 __ dci(0x05b2318b); // tbl z11.s, {z12.s}, z18.s 5471 // vl128 state = 0x6ba1d599 5472 __ dci(0x05a231bb); // tbl z27.s, {z13.s}, z2.s 5473 // vl128 state = 0xee2c412e 5474 __ dci(0x05a231ab); // tbl z11.s, {z13.s}, z2.s 5475 // vl128 state = 0xa183e51b 5476 __ dci(0x05a831af); // tbl z15.s, {z13.s}, z8.s 5477 // vl128 state = 0xcd60a839 5478 __ dci(0x05ea31a7); // tbl z7.d, {z13.d}, z10.d 5479 // vl128 state = 0x3abe2d8b 5480 __ dci(0x05fa33af); // tbl z15.d, {z29.d}, z26.d 5481 // vl128 state = 0xf596f00c 5482 __ dci(0x05fe32ae); // tbl z14.d, {z21.d}, z30.d 5483 // vl128 state = 0x3e791a5a 5484 __ dci(0x057a32be); // tbl z30.h, {z21.h}, z26.h 5485 // vl128 state = 0x27f4086e 5486 __ dci(0x05fe32ae); // tbl z14.d, {z21.d}, z30.d 5487 // vl128 state = 0xec1be238 5488 __ dci(0x05fe32aa); // tbl z10.d, {z21.d}, z30.d 5489 // vl128 state = 0xa91ab6d9 5490 __ dci(0x057e32e2); // tbl z2.h, {z23.h}, z30.h 5491 // vl128 state = 0xd1ab825f 5492 __ dci(0x057e32e0); // tbl z0.h, {z23.h}, z30.h 5493 // vl128 state = 0xca42860c 5494 __ dci(0x057f3270); // tbl z16.h, {z19.h}, z31.h 5495 // vl128 state = 0xff27daa0 5496 __ dci(0x05673271); // tbl z17.h, {z19.h}, z7.h 5497 // vl128 state = 0x9b358bbf 5498 __ dci(0x05e73379); // tbl z25.d, {z27.d}, z7.d 5499 // vl128 state = 0xf0a4c65d 5500 __ dci(0x05e3333d); // tbl z29.d, {z25.d}, z3.d 5501 // vl128 state = 0x3de40d5b 5502 __ dci(0x05e33335); // tbl z21.d, {z25.d}, z3.d 5503 // vl128 state = 0xfeadc4fa 5504 __ dci(0x05f33137); // tbl z23.d, {z9.d}, z19.d 5505 // vl128 state = 0x417c23c2 5506 __ dci(0x05b33336); // tbl z22.s, {z25.s}, z19.s 5507 // vl128 state = 0x4bd7bddc 5508 __ dci(0x05b1323e); // tbl z30.s, {z17.s}, z17.s 5509 // vl128 state = 0x525aafe8 5510 __ dci(0x05b0303c); // tbl z28.s, {z1.s}, z16.s 5511 // vl128 state = 0xee67e295 5512 __ dci(0x05b0308c); // tbl z12.s, {z4.s}, z16.s 5513 // vl128 state = 0xce1a6811 5514 __ dci(0x05b030e8); // tbl z8.s, {z7.s}, z16.s 5515 // vl128 state = 0xfba53f74 5516 __ dci(0x05a030b8); // tbl z24.s, {z5.s}, z0.s 5517 // vl128 state = 0x56a69350 5518 __ dci(0x05e830b0); // tbl z16.d, {z5.d}, z8.d 5519 // vl128 state = 0xe0665941 5520 __ dci(0x05e830b2); // tbl z18.d, {z5.d}, z8.d 5521 // vl128 state = 0xc6680470 5522 __ dci(0x05e931b3); // tbl z19.d, {z13.d}, z9.d 5523 // vl128 state = 0x64a925a9 5524 } 5525 5526 uint32_t state; 5527 ComputeMachineStateHash(&masm, &state); 5528 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 5529 __ Ldr(w0, MemOperand(x0)); 5530 5531 END(); 5532 if (CAN_RUN()) { 5533 RUN(); 5534 uint32_t expected_hashes[] = { 5535 0x64a925a9, 5536 0x89750b9d, 5537 0xb803659e, 5538 0xa21efc63, 5539 0x67f967b8, 5540 0x4e52e209, 5541 0x42c1692f, 5542 0x4d8539c7, 5543 0x6828f0f4, 5544 0x3c75d27a, 5545 0x2e3341c9, 5546 0xfe4a8f4f, 5547 0xd27b47ae, 5548 0x665d8f8b, 5549 0x3230c584, 5550 0xcf1d6e82, 5551 }; 5552 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 5553 } 5554} 5555 5556TEST_SVE(sve2_cdot) { 5557 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 5558 CPUFeatures::kSVE2, 5559 CPUFeatures::kNEON, 5560 CPUFeatures::kCRC32); 5561 START(); 5562 5563 SetInitialMachineState(&masm); 5564 // state = 0xe2bd2480 5565 5566 { 5567 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 5568 __ dci(0x4488104f); // cdot z15.s, z2.b, z8.b, #0 5569 // vl128 state = 0x25fd51d1 5570 __ dci(0x448a106e); // cdot z14.s, z3.b, z10.b, #0 5571 // vl128 state = 0x490576d5 5572 __ dci(0x448a1246); // cdot z6.s, z18.b, z10.b, #0 5573 // vl128 state = 0x25a6fe4b 5574 __ dci(0x448e12ce); // cdot z14.s, z22.b, z14.b, #0 5575 // vl128 state = 0xc378b2df 5576 __ dci(0x448412cf); // cdot z15.s, z22.b, z4.b, #0 5577 // vl128 state = 0xe92a358d 5578 __ dci(0x448412c7); // cdot z7.s, z22.b, z4.b, #0 5579 // vl128 state = 0x7408b292 5580 __ dci(0x44c41257); // cdot z23.d, z18.h, z4.h, #0 5581 // vl128 state = 0xebc02289 5582 __ dci(0x448412d5); // cdot z21.s, z22.b, z4.b, #0 5583 // vl128 state = 0x9a7c2f1a 5584 __ dci(0x448712d7); // cdot z23.s, z22.b, z7.b, #0 5585 // vl128 state = 0xed91e0b4 5586 __ dci(0x44831295); // cdot z21.s, z20.b, z3.b, #0 5587 // vl128 state = 0x3dae4184 5588 __ dci(0x44821385); // cdot z5.s, z28.b, z2.b, #0 5589 // vl128 state = 0x213fb541 5590 __ dci(0x44c213c1); // cdot z1.d, z30.h, z2.h, #0 5591 // vl128 state = 0xcba3207a 5592 __ dci(0x44c61340); // cdot z0.d, z26.h, z6.h, #0 5593 // vl128 state = 0x9d6041f3 5594 __ dci(0x44c413d0); // cdot z16.d, z30.h, z4.h, #0 5595 // vl128 state = 0x4b931738 5596 __ dci(0x44cc12d8); // cdot z24.d, z22.h, z12.h, #0 5597 // vl128 state = 0x2503fbcc 5598 __ dci(0x448c1ac8); // cdot z8.s, z22.b, z12.b, #180 5599 // vl128 state = 0x53bc5303 5600 __ dci(0x448c12ec); // cdot z12.s, z23.b, z12.b, #0 5601 // vl128 state = 0xb3bf45c7 5602 __ dci(0x448812ad); // cdot z13.s, z21.b, z8.b, #0 5603 // vl128 state = 0x938b4e4f 5604 __ dci(0x44881689); // cdot z9.s, z20.b, z8.b, #90 5605 // vl128 state = 0x70106ddd 5606 __ dci(0x4498128b); // cdot z11.s, z20.b, z24.b, #0 5607 // vl128 state = 0x92108bb2 5608 __ dci(0x4498129b); // cdot z27.s, z20.b, z24.b, #0 5609 // vl128 state = 0x545230eb 5610 __ dci(0x449a12bf); // cdot z31.s, z21.b, z26.b, #0 5611 // vl128 state = 0x5cd2fb12 5612 __ dci(0x44da10af); // cdot z15.d, z5.h, z26.h, #0 5613 // vl128 state = 0xc03d9146 5614 __ dci(0x44da10ae); // cdot z14.d, z5.h, z26.h, #0 5615 // vl128 state = 0xbc2712f7 5616 __ dci(0x44db12be); // cdot z30.d, z21.h, z27.h, #0 5617 // vl128 state = 0xccf9d667 5618 __ dci(0x449b12ee); // cdot z14.s, z23.b, z27.b, #0 5619 // vl128 state = 0x2c1e08f1 5620 __ dci(0x449b12ef); // cdot z15.s, z23.b, z27.b, #0 5621 // vl128 state = 0x159d17d7 5622 __ dci(0x449b14ee); // cdot z14.s, z7.b, z27.b, #90 5623 // vl128 state = 0x892c97d3 5624 __ dci(0x449b1cac); // cdot z12.s, z5.b, z27.b, #270 5625 // vl128 state = 0x3841ce24 5626 __ dci(0x449b1aae); // cdot z14.s, z21.b, z27.b, #180 5627 // vl128 state = 0x30a24868 5628 __ dci(0x449a1aec); // cdot z12.s, z23.b, z26.b, #180 5629 // vl128 state = 0x2b836c8a 5630 __ dci(0x44981ace); // cdot z14.s, z22.b, z24.b, #180 5631 // vl128 state = 0x16a81963 5632 __ dci(0x44901a86); // cdot z6.s, z20.b, z16.b, #180 5633 // vl128 state = 0x924ac9ee 5634 __ dci(0x44981b8e); // cdot z14.s, z28.b, z24.b, #180 5635 // vl128 state = 0x3953da61 5636 __ dci(0x44891b8a); // cdot z10.s, z28.b, z9.b, #180 5637 // vl128 state = 0xad72b6d5 5638 __ dci(0x4499138b); // cdot z11.s, z28.b, z25.b, #0 5639 // vl128 state = 0x569b1b2c 5640 __ dci(0x4498119b); // cdot z27.s, z12.b, z24.b, #0 5641 // vl128 state = 0xdbb36925 5642 __ dci(0x449c199a); // cdot z26.s, z12.b, z28.b, #180 5643 // vl128 state = 0x4be861d1 5644 __ dci(0x44901992); // cdot z18.s, z12.b, z16.b, #180 5645 // vl128 state = 0x1e83ddb5 5646 __ dci(0x44901a90); // cdot z16.s, z20.b, z16.b, #180 5647 // vl128 state = 0x180556e0 5648 __ dci(0x44911ac0); // cdot z0.s, z22.b, z17.b, #180 5649 // vl128 state = 0x2cbf5db5 5650 __ dci(0x44951bc1); // cdot z1.s, z30.b, z21.b, #180 5651 // vl128 state = 0x428f97bd 5652 __ dci(0x44851b40); // cdot z0.s, z26.b, z5.b, #180 5653 // vl128 state = 0xe0f0659f 5654 __ dci(0x44851a70); // cdot z16.s, z19.b, z5.b, #180 5655 // vl128 state = 0x4142d23c 5656 __ dci(0x44861a74); // cdot z20.s, z19.b, z6.b, #180 5657 // vl128 state = 0x74f7d373 5658 __ dci(0x44921a76); // cdot z22.s, z19.b, z18.b, #180 5659 // vl128 state = 0x5b4ef670 5660 __ dci(0x44921246); // cdot z6.s, z18.b, z18.b, #0 5661 // vl128 state = 0x1fe5d31d 5662 __ dci(0x44981247); // cdot z7.s, z18.b, z24.b, #0 5663 // vl128 state = 0x782a0559 5664 __ dci(0x44981746); // cdot z6.s, z26.b, z24.b, #90 5665 // vl128 state = 0x84cbc61d 5666 __ dci(0x449816c4); // cdot z4.s, z22.b, z24.b, #90 5667 // vl128 state = 0x078aa009 5668 } 5669 5670 uint32_t state; 5671 ComputeMachineStateHash(&masm, &state); 5672 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 5673 __ Ldr(w0, MemOperand(x0)); 5674 5675 END(); 5676 if (CAN_RUN()) { 5677 RUN(); 5678 uint32_t expected_hashes[] = { 5679 0x078aa009, 5680 0x3c4026df, 5681 0x3ae8e644, 5682 0x514dfdcd, 5683 0x2649444a, 5684 0x74a87bbe, 5685 0x14b8e9b3, 5686 0x92c65f4d, 5687 0xa3015fc1, 5688 0xab48b8fa, 5689 0x9e80ef05, 5690 0xb59b0dde, 5691 0xbcf04e6f, 5692 0xa7fa54a1, 5693 0xaed81dfc, 5694 0xdc7ffb07, 5695 }; 5696 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 5697 } 5698} 5699 5700TEST_SVE(sve2_bitwise_ternary) { 5701 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 5702 CPUFeatures::kSVE2, 5703 CPUFeatures::kNEON, 5704 CPUFeatures::kCRC32); 5705 START(); 5706 5707 SetInitialMachineState(&masm); 5708 // state = 0xe2bd2480 5709 5710 { 5711 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 5712 __ dci(0x04793f99); // bsl1n z25.d, z25.d, z25.d, z28.d 5713 // vl128 state = 0x70294e62 5714 __ dci(0x04b93f9b); // bsl2n z27.d, z27.d, z25.d, z28.d 5715 // vl128 state = 0x0a3f0dc1 5716 __ dci(0x04b93f93); // bsl2n z19.d, z19.d, z25.d, z28.d 5717 // vl128 state = 0x46500e35 5718 __ dci(0x04b93dbb); // bsl2n z27.d, z27.d, z25.d, z13.d 5719 // vl128 state = 0x25bdcc83 5720 __ dci(0x04b53db9); // bsl2n z25.d, z25.d, z21.d, z13.d 5721 // vl128 state = 0x6d33b943 5722 __ dci(0x04bd3d29); // bsl2n z9.d, z9.d, z29.d, z9.d 5723 // vl128 state = 0xa218e11a 5724 __ dci(0x04ad3d0d); // bsl2n z13.d, z13.d, z13.d, z8.d 5725 // vl128 state = 0xc5e2f5a2 5726 __ dci(0x04a53d4f); // bsl2n z15.d, z15.d, z5.d, z10.d 5727 // vl128 state = 0x519e4735 5728 __ dci(0x04653d47); // bsl1n z7.d, z7.d, z5.d, z10.d 5729 // vl128 state = 0x132f7ce6 5730 __ dci(0x04613dc6); // bsl1n z6.d, z6.d, z1.d, z14.d 5731 // vl128 state = 0x91bcf19b 5732 __ dci(0x04673dc7); // bsl1n z7.d, z7.d, z7.d, z14.d 5733 // vl128 state = 0x3bd0ba20 5734 __ dci(0x04673dc5); // bsl1n z5.d, z5.d, z7.d, z14.d 5735 // vl128 state = 0xbf3b39fa 5736 __ dci(0x04e73cc1); // nbsl z1.d, z1.d, z7.d, z6.d 5737 // vl128 state = 0xd304b643 5738 __ dci(0x04773cc5); // bsl1n z5.d, z5.d, z23.d, z6.d 5739 // vl128 state = 0xdd6cd3ce 5740 __ dci(0x04773ac1); // bcax z1.d, z1.d, z23.d, z22.d 5741 // vl128 state = 0x3f456acf 5742 __ dci(0x04773ac3); // bcax z3.d, z3.d, z23.d, z22.d 5743 // vl128 state = 0xbe117f80 5744 __ dci(0x047739c7); // bcax z7.d, z7.d, z23.d, z14.d 5745 // vl128 state = 0xd3cd3dcd 5746 __ dci(0x047439c5); // bcax z5.d, z5.d, z20.d, z14.d 5747 // vl128 state = 0xee4f636d 5748 __ dci(0x04743841); // bcax z1.d, z1.d, z20.d, z2.d 5749 // vl128 state = 0xf21b00a1 5750 __ dci(0x04753811); // bcax z17.d, z17.d, z21.d, z0.d 5751 // vl128 state = 0x597ab14d 5752 __ dci(0x04753815); // bcax z21.d, z21.d, z21.d, z0.d 5753 // vl128 state = 0xf5d56322 5754 __ dci(0x04713917); // bcax z23.d, z23.d, z17.d, z8.d 5755 // vl128 state = 0x17f3cedf 5756 __ dci(0x04793987); // bcax z7.d, z7.d, z25.d, z12.d 5757 // vl128 state = 0x7492c4e5 5758 __ dci(0x04693885); // bcax z5.d, z5.d, z9.d, z4.d 5759 // vl128 state = 0xb796548c 5760 __ dci(0x046838d5); // bcax z21.d, z21.d, z8.d, z6.d 5761 // vl128 state = 0xf4e12422 5762 __ dci(0x046838d4); // bcax z20.d, z20.d, z8.d, z6.d 5763 // vl128 state = 0x16187a4c 5764 __ dci(0x043838d6); // eor3 z22.d, z22.d, z24.d, z6.d 5765 // vl128 state = 0xd95e6713 5766 __ dci(0x043c39de); // eor3 z30.d, z30.d, z28.d, z14.d 5767 // vl128 state = 0xb8322807 5768 __ dci(0x047c38ce); // bcax z14.d, z14.d, z28.d, z6.d 5769 // vl128 state = 0x6871619d 5770 __ dci(0x047c38cf); // bcax z15.d, z15.d, z28.d, z6.d 5771 // vl128 state = 0x57c5a4af 5772 __ dci(0x043c384e); // eor3 z14.d, z14.d, z28.d, z2.d 5773 // vl128 state = 0x1a62efdf 5774 __ dci(0x0474385e); // bcax z30.d, z30.d, z20.d, z2.d 5775 // vl128 state = 0xc9d1ea1e 5776 __ dci(0x047c3a4e); // bcax z14.d, z14.d, z28.d, z18.d 5777 // vl128 state = 0xd5ced43e 5778 __ dci(0x047c3c4f); // bsl1n z15.d, z15.d, z28.d, z2.d 5779 // vl128 state = 0x79f22e16 5780 __ dci(0x047d3d4b); // bsl1n z11.d, z11.d, z29.d, z10.d 5781 // vl128 state = 0xc4ee5d6e 5782 __ dci(0x04793c49); // bsl1n z9.d, z9.d, z25.d, z2.d 5783 // vl128 state = 0xea11e840 5784 __ dci(0x04793c99); // bsl1n z25.d, z25.d, z25.d, z4.d 5785 // vl128 state = 0x95221bc2 5786 __ dci(0x04613c91); // bsl1n z17.d, z17.d, z1.d, z4.d 5787 // vl128 state = 0xa40acfbe 5788 __ dci(0x04233c90); // bsl z16.d, z16.d, z3.d, z4.d 5789 // vl128 state = 0x8d3ef22f 5790 __ dci(0x04233c80); // bsl z0.d, z0.d, z3.d, z4.d 5791 // vl128 state = 0xd07d1bb2 5792 __ dci(0x04223ca4); // bsl z4.d, z4.d, z2.d, z5.d 5793 // vl128 state = 0xa2c4169c 5794 __ dci(0x04223ca5); // bsl z5.d, z5.d, z2.d, z5.d 5795 // vl128 state = 0x3c6415e5 5796 __ dci(0x04a03ca1); // bsl2n z1.d, z1.d, z0.d, z5.d 5797 // vl128 state = 0x55b93add 5798 __ dci(0x04a03cb1); // bsl2n z17.d, z17.d, z0.d, z5.d 5799 // vl128 state = 0x9b86e5b3 5800 __ dci(0x04a13cf9); // bsl2n z25.d, z25.d, z1.d, z7.d 5801 // vl128 state = 0xdd310e8f 5802 __ dci(0x04a13cfd); // bsl2n z29.d, z29.d, z1.d, z7.d 5803 // vl128 state = 0xae66fb44 5804 __ dci(0x04a13ced); // bsl2n z13.d, z13.d, z1.d, z7.d 5805 // vl128 state = 0xc69dd926 5806 __ dci(0x04b93ce9); // bsl2n z9.d, z9.d, z25.d, z7.d 5807 // vl128 state = 0x15592b37 5808 __ dci(0x04b93dcb); // bsl2n z11.d, z11.d, z25.d, z14.d 5809 // vl128 state = 0xbfcda4d3 5810 __ dci(0x04b83d4f); // bsl2n z15.d, z15.d, z24.d, z10.d 5811 // vl128 state = 0xaef1e0b6 5812 } 5813 5814 uint32_t state; 5815 ComputeMachineStateHash(&masm, &state); 5816 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 5817 __ Ldr(w0, MemOperand(x0)); 5818 5819 END(); 5820 if (CAN_RUN()) { 5821 RUN(); 5822 uint32_t expected_hashes[] = { 5823 0xaef1e0b6, 5824 0xc9b3303f, 5825 0xc547c948, 5826 0x0fc817f7, 5827 0x22d2eab3, 5828 0x225b3ecd, 5829 0xf7a34a06, 5830 0xa07e68ed, 5831 0xdba0f9fa, 5832 0x64199691, 5833 0xa650bfa3, 5834 0xc6bfeab9, 5835 0x7efe63c4, 5836 0x66e4139c, 5837 0xc580dcf5, 5838 0x95687693, 5839 }; 5840 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 5841 } 5842} 5843 5844TEST_SVE(sve2_while) { 5845 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 5846 CPUFeatures::kSVE2, 5847 CPUFeatures::kNEON, 5848 CPUFeatures::kCRC32); 5849 START(); 5850 5851 SetInitialMachineState(&masm); 5852 // state = 0xe2bd2480 5853 5854 { 5855 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 5856 __ dci(0x257109e3); // whilehs p3.h, w15, w17 5857 // vl128 state = 0x4568cc4c 5858 __ dci(0x257709f3); // whilehi p3.h, w15, w23 5859 // vl128 state = 0xf148a8ac 5860 __ dci(0x25f509f7); // whilehi p7.d, w15, w21 5861 // vl128 state = 0x2fe3dcb9 5862 __ dci(0x257508f5); // whilehi p5.h, w7, w21 5863 // vl128 state = 0x88429dee 5864 __ dci(0x257100f1); // whilegt p1.h, w7, w17 5865 // vl128 state = 0x5a3b89ec 5866 __ dci(0x253108f0); // whilehi p0.b, w7, w17 5867 // vl128 state = 0x73276c52 5868 __ dci(0x253108f1); // whilehi p1.b, w7, w17 5869 // vl128 state = 0xa278d7f0 5870 __ dci(0x257508f9); // whilehi p9.h, w7, w21 5871 // vl128 state = 0xa438aefc 5872 __ dci(0x25750858); // whilehi p8.h, w2, w21 5873 // vl128 state = 0x33e13c17 5874 __ dci(0x25770a50); // whilehi p0.h, w18, w23 5875 // vl128 state = 0x01947abe 5876 __ dci(0x25751a52); // whilehi p2.h, x18, x21 5877 // vl128 state = 0x2cf410f2 5878 __ dci(0x25711a7a); // whilehi p10.h, x19, x17 5879 // vl128 state = 0x4bb6efc1 5880 __ dci(0x25391a78); // whilehi p8.b, x19, x25 5881 // vl128 state = 0xec1afdd6 5882 __ dci(0x25290a70); // whilehi p0.b, w19, w9 5883 // vl128 state = 0xde6fbb7f 5884 __ dci(0x25290a78); // whilehi p8.b, w19, w9 5885 // vl128 state = 0x79c3a968 5886 __ dci(0x25a90b68); // whilehs p8.s, w27, w9 5887 // vl128 state = 0x4b32e81a 5888 __ dci(0x25a903e9); // whilege p9.s, wzr, w9 5889 // vl128 state = 0x994bfc18 5890 __ dci(0x25a909ed); // whilehs p13.s, w15, w9 5891 // vl128 state = 0x6d6e231f 5892 __ dci(0x25a909ef); // whilehs p15.s, w15, w9 5893 // vl128 state = 0x41945298 5894 __ dci(0x25a909eb); // whilehs p11.s, w15, w9 5895 // vl128 state = 0x659ccb75 5896 __ dci(0x25b909c9); // whilehs p9.s, w14, w25 5897 // vl128 state = 0xd078a7ed 5898 __ dci(0x25bd098d); // whilehs p13.s, w12, w29 5899 // vl128 state = 0xf6f2d8ae 5900 __ dci(0x25b90909); // whilehs p9.s, w8, w25 5901 // vl128 state = 0x248bccac 5902 __ dci(0x25fb090b); // whilehs p11.d, w8, w27 5903 // vl128 state = 0x09b0b9cc 5904 __ dci(0x25fb090a); // whilehs p10.d, w8, w27 5905 // vl128 state = 0xfa811fef 5906 __ dci(0x25eb0b02); // whilehs p2.d, w24, w11 5907 // vl128 state = 0xdcb96f30 5908 __ dci(0x25eb0bc3); // whilehs p3.d, w30, w11 5909 // vl128 state = 0xbae01fd2 5910 __ dci(0x25e30acb); // whilehs p11.d, w22, w3 5911 // vl128 state = 0xbcfdc2b8 5912 __ dci(0x25eb08c9); // whilehs p9.d, w6, w11 5913 // vl128 state = 0xdb60ba22 5914 __ dci(0x25a308c1); // whilehs p1.s, w6, w3 5915 // vl128 state = 0xe895df80 5916 __ dci(0x25a108e5); // whilehs p5.s, w7, w1 5917 // vl128 state = 0x3aeccb82 5918 __ dci(0x25a009e4); // whilehs p4.s, w15, w0 5919 // vl128 state = 0xe6b1b3b3 5920 __ dci(0x25a009ec); // whilehs p12.s, w15, w0 5921 // vl128 state = 0xd2e10d82 5922 __ dci(0x25a019ae); // whilehs p14.s, x13, x0 5923 // vl128 state = 0x4bf596b8 5924 __ dci(0x25e018af); // whilehs p15.d, x5, x0 5925 // vl128 state = 0xb8d27541 5926 __ dci(0x25e918ad); // whilehs p13.d, x5, x9 5927 // vl128 state = 0x01b6f92f 5928 __ dci(0x25eb188c); // whilehs p12.d, x4, x11 5929 // vl128 state = 0xd3cfed2d 5930 __ dci(0x25eb188e); // whilehs p14.d, x4, x11 5931 // vl128 state = 0x9947e07e 5932 __ dci(0x25e21886); // whilehs p6.d, x4, x2 5933 // vl128 state = 0xd9995e11 5934 __ dci(0x25a21084); // whilege p4.s, x4, x2 5935 // vl128 state = 0xd45d81ed 5936 __ dci(0x25b31085); // whilege p5.s, x4, x19 5937 // vl128 state = 0x4d67b543 5938 __ dci(0x25a3100d); // whilege p13.s, x0, x3 5939 // vl128 state = 0x00f0526c 5940 __ dci(0x252b101d); // whilegt p13.b, x0, x11 5941 // vl128 state = 0x9d176025 5942 __ dci(0x253b1095); // whilegt p5.b, x4, x27 5943 // vl128 state = 0xd6544089 5944 __ dci(0x253b1091); // whilegt p1.b, x4, x27 5945 // vl128 state = 0x37d83129 5946 __ dci(0x253f10d5); // whilegt p5.b, x6, xzr 5947 // vl128 state = 0x8e121615 5948 __ dci(0x252f11d4); // whilegt p4.b, x14, x15 5949 // vl128 state = 0x83d6c9e9 5950 __ dci(0x25af01d5); // whilegt p5.s, w14, w15 5951 // vl128 state = 0xe865fad7 5952 __ dci(0x25eb01c5); // whilege p5.d, w14, w11 5953 // vl128 state = 0x5eaf208e 5954 __ dci(0x25fb0144); // whilege p4.d, w10, w27 5955 // vl128 state = 0x8cd6348c 5956 } 5957 5958 uint32_t state; 5959 ComputeMachineStateHash(&masm, &state); 5960 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 5961 __ Ldr(w0, MemOperand(x0)); 5962 5963 END(); 5964 if (CAN_RUN()) { 5965 RUN(); 5966 uint32_t expected_hashes[] = { 5967 0x8cd6348c, 5968 0x42a1f9b4, 5969 0x13fc2001, 5970 0x492cb2ac, 5971 0xa67cfb65, 5972 0x80d4639f, 5973 0xfa388a09, 5974 0x8c7ad8d9, 5975 0x299c5bfe, 5976 0x9183808a, 5977 0x3fc14d86, 5978 0x7cc08a05, 5979 0x9c85cd48, 5980 0xd06e8299, 5981 0x6a107152, 5982 0x81d99d7c, 5983 }; 5984 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 5985 } 5986} 5987 5988TEST_SVE(sve2_cdot_index) { 5989 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 5990 CPUFeatures::kSVE2, 5991 CPUFeatures::kNEON, 5992 CPUFeatures::kCRC32); 5993 START(); 5994 5995 SetInitialMachineState(&masm); 5996 // state = 0xe2bd2480 5997 5998 { 5999 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 6000 __ dci(0x44bb4ef6); // cdot z22.s, z23.b, z3.b[3], #270 6001 // vl128 state = 0x452d1d6e 6002 __ dci(0x44b94ff7); // cdot z23.s, z31.b, z1.b[3], #270 6003 // vl128 state = 0x546c9569 6004 __ dci(0x44b94dd5); // cdot z21.s, z14.b, z1.b[3], #270 6005 // vl128 state = 0xa2abf834 6006 __ dci(0x44bd45d7); // cdot z23.s, z14.b, z5.b[3], #90 6007 // vl128 state = 0xba77ed64 6008 __ dci(0x44fc45df); // cdot z31.d, z14.h, z12.h[1], #90 6009 // vl128 state = 0xe78163f2 6010 __ dci(0x44f441db); // cdot z27.d, z14.h, z4.h[1], #0 6011 // vl128 state = 0xca3b116d 6012 __ dci(0x44f44dd3); // cdot z19.d, z14.h, z4.h[1], #270 6013 // vl128 state = 0x57ba3771 6014 __ dci(0x44b44d83); // cdot z3.s, z12.b, z4.b[2], #270 6015 // vl128 state = 0x4edccb88 6016 __ dci(0x44ac4d82); // cdot z2.s, z12.b, z4.b[1], #270 6017 // vl128 state = 0xc9543499 6018 __ dci(0x44a84f8a); // cdot z10.s, z28.b, z0.b[1], #270 6019 // vl128 state = 0x9d8fe439 6020 __ dci(0x44a84d08); // cdot z8.s, z8.b, z0.b[1], #270 6021 // vl128 state = 0x3c1bf0cc 6022 __ dci(0x44ba4d09); // cdot z9.s, z8.b, z2.b[3], #270 6023 // vl128 state = 0x983716f1 6024 __ dci(0x44ea4d0d); // cdot z13.d, z8.h, z10.h[0], #270 6025 // vl128 state = 0x2df96300 6026 __ dci(0x44eb491d); // cdot z29.d, z8.h, z11.h[0], #180 6027 // vl128 state = 0xc23edde3 6028 __ dci(0x44e9499f); // cdot z31.d, z12.h, z9.h[0], #180 6029 // vl128 state = 0xef0ace9d 6030 __ dci(0x44e84b9d); // cdot z29.d, z28.h, z8.h[0], #180 6031 // vl128 state = 0x2cce8002 6032 __ dci(0x44e84b99); // cdot z25.d, z28.h, z8.h[0], #180 6033 // vl128 state = 0xd07f46a1 6034 __ dci(0x44f84a9d); // cdot z29.d, z20.h, z8.h[1], #180 6035 // vl128 state = 0x239831e8 6036 __ dci(0x44f84a99); // cdot z25.d, z20.h, z8.h[1], #180 6037 // vl128 state = 0xa110988d 6038 __ dci(0x44e84a09); // cdot z9.d, z16.h, z8.h[0], #180 6039 // vl128 state = 0x2b9ef292 6040 __ dci(0x44e84a19); // cdot z25.d, z16.h, z8.h[0], #180 6041 // vl128 state = 0x50eeb818 6042 __ dci(0x44e04b1b); // cdot z27.d, z24.h, z0.h[0], #180 6043 // vl128 state = 0xc33ce03b 6044 __ dci(0x44e04a2b); // cdot z11.d, z17.h, z0.h[0], #180 6045 // vl128 state = 0xe163b5c9 6046 __ dci(0x44e04b0f); // cdot z15.d, z24.h, z0.h[0], #180 6047 // vl128 state = 0x052a34eb 6048 __ dci(0x44e04b1f); // cdot z31.d, z24.h, z0.h[0], #180 6049 // vl128 state = 0x0660afb4 6050 __ dci(0x44e84b4f); // cdot z15.d, z26.h, z8.h[0], #180 6051 // vl128 state = 0x0ae01233 6052 __ dci(0x44ee4b4e); // cdot z14.d, z26.h, z14.h[0], #180 6053 // vl128 state = 0xde7bdd15 6054 __ dci(0x44ae4b7e); // cdot z30.s, z27.b, z6.b[1], #180 6055 // vl128 state = 0x758973a1 6056 __ dci(0x44a6497f); // cdot z31.s, z11.b, z6.b[0], #180 6057 // vl128 state = 0xb3c5df37 6058 __ dci(0x44a64df7); // cdot z23.s, z15.b, z6.b[0], #270 6059 // vl128 state = 0xe652f054 6060 __ dci(0x44a64c73); // cdot z19.s, z3.b, z6.b[0], #270 6061 // vl128 state = 0xc4b58041 6062 __ dci(0x44a64de3); // cdot z3.s, z15.b, z6.b[0], #270 6063 // vl128 state = 0x1239ca90 6064 __ dci(0x44a749e2); // cdot z2.s, z15.b, z7.b[0], #180 6065 // vl128 state = 0x4a01cdcb 6066 __ dci(0x44a740e0); // cdot z0.s, z7.b, z7.b[0], #0 6067 // vl128 state = 0x604e45cf 6068 __ dci(0x44a344e2); // cdot z2.s, z7.b, z3.b[0], #90 6069 // vl128 state = 0x12fe2972 6070 __ dci(0x44a34ca3); // cdot z3.s, z5.b, z3.b[0], #270 6071 // vl128 state = 0x78e0bb2e 6072 __ dci(0x44e14cb3); // cdot z19.d, z5.h, z1.h[0], #270 6073 // vl128 state = 0xe3a69b46 6074 __ dci(0x44e14d31); // cdot z17.d, z9.h, z1.h[0], #270 6075 // vl128 state = 0xe6b58aa4 6076 __ dci(0x44f14d01); // cdot z1.d, z8.h, z1.h[1], #270 6077 // vl128 state = 0xffcfb597 6078 __ dci(0x44f14551); // cdot z17.d, z10.h, z1.h[1], #90 6079 // vl128 state = 0x2745934b 6080 __ dci(0x44f345d5); // cdot z21.d, z14.h, z3.h[1], #90 6081 // vl128 state = 0xa38b5571 6082 __ dci(0x44f34574); // cdot z20.d, z11.h, z3.h[1], #90 6083 // vl128 state = 0x978afd92 6084 __ dci(0x44f34576); // cdot z22.d, z11.h, z3.h[1], #90 6085 // vl128 state = 0x9f1b19c9 6086 __ dci(0x44f34f77); // cdot z23.d, z27.h, z3.h[1], #270 6087 // vl128 state = 0x61a31d64 6088 __ dci(0x44f24f5f); // cdot z31.d, z26.h, z2.h[1], #270 6089 // vl128 state = 0x1e71023e 6090 __ dci(0x44fa4fcf); // cdot z15.d, z30.h, z10.h[1], #270 6091 // vl128 state = 0xdbe5ffb3 6092 __ dci(0x44ba4f4e); // cdot z14.s, z26.b, z2.b[3], #270 6093 // vl128 state = 0x51390e81 6094 __ dci(0x44ba470c); // cdot z12.s, z24.b, z2.b[3], #90 6095 // vl128 state = 0x59ad5198 6096 __ dci(0x44b2479c); // cdot z28.s, z28.b, z2.b[2], #90 6097 // vl128 state = 0xe997de49 6098 __ dci(0x44b24fbd); // cdot z29.s, z29.b, z2.b[2], #270 6099 // vl128 state = 0x5533cefa 6100 } 6101 6102 uint32_t state; 6103 ComputeMachineStateHash(&masm, &state); 6104 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 6105 __ Ldr(w0, MemOperand(x0)); 6106 6107 END(); 6108 if (CAN_RUN()) { 6109 RUN(); 6110 uint32_t expected_hashes[] = { 6111 0x5533cefa, 6112 0x1462a298, 6113 0x1acb4ead, 6114 0xeb05ddf0, 6115 0x23fe8c86, 6116 0xbb1e9f8c, 6117 0x4a933f43, 6118 0x4cd64b55, 6119 0x84a4b8b7, 6120 0x52019619, 6121 0x4442432b, 6122 0x9b353ce8, 6123 0x333c9eef, 6124 0x291eac87, 6125 0x110f7371, 6126 0x009b25cb, 6127 }; 6128 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 6129 } 6130} 6131 6132TEST_SVE(sve2_splice) { 6133 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 6134 CPUFeatures::kSVE2, 6135 CPUFeatures::kNEON, 6136 CPUFeatures::kCRC32); 6137 START(); 6138 6139 SetInitialMachineState(&masm); 6140 // state = 0xe2bd2480 6141 6142 { 6143 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 6144 __ dci(0x05ed89a7); // splice z7.d, p2, {z13.d, z14.d} 6145 // vl128 state = 0x6acff994 6146 __ dci(0x05ed81e5); // splice z5.d, p0, {z15.d, z16.d} 6147 // vl128 state = 0x2c8b3e5d 6148 __ dci(0x05ed8375); // splice z21.d, p0, {z27.d, z28.d} 6149 // vl128 state = 0x2588e208 6150 __ dci(0x05ed9174); // splice z20.d, p4, {z11.d, z12.d} 6151 // vl128 state = 0x4d6fa6b3 6152 __ dci(0x056d91f6); // splice z22.h, p4, {z15.h, z16.h} 6153 // vl128 state = 0x9f00a308 6154 __ dci(0x056d92f2); // splice z18.h, p4, {z23.h, z24.h} 6155 // vl128 state = 0x5479cc74 6156 __ dci(0x056d96a2); // splice z2.h, p5, {z21.h, z22.h} 6157 // vl128 state = 0xca7a6a63 6158 __ dci(0x056d9fa6); // splice z6.h, p7, {z29.h, z30.h} 6159 // vl128 state = 0x007fc934 6160 __ dci(0x056d9be4); // splice z4.h, p6, {z31.h, z0.h} 6161 // vl128 state = 0x8186741b 6162 __ dci(0x056d97ec); // splice z12.h, p5, {z31.h, z0.h} 6163 // vl128 state = 0x26ab76b9 6164 __ dci(0x056d979c); // splice z28.h, p5, {z28.h, z29.h} 6165 // vl128 state = 0x933201f4 6166 __ dci(0x056d9794); // splice z20.h, p5, {z28.h, z29.h} 6167 // vl128 state = 0x42cf6784 6168 __ dci(0x052d9f96); // splice z22.b, p7, {z28.b, z29.b} 6169 // vl128 state = 0x0838e776 6170 __ dci(0x056d8f9e); // splice z30.h, p3, {z28.h, z29.h} 6171 // vl128 state = 0x89637e78 6172 __ dci(0x056d9fd6); // splice z22.h, p7, {z30.h, z31.h} 6173 // vl128 state = 0xb94dbb49 6174 __ dci(0x056d8dd7); // splice z23.h, p3, {z14.h, z15.h} 6175 // vl128 state = 0x260f8127 6176 __ dci(0x05ad8ddf); // splice z31.s, p3, {z14.s, z15.s} 6177 // vl128 state = 0x16257a12 6178 __ dci(0x05ad8ddd); // splice z29.s, p3, {z14.s, z15.s} 6179 // vl128 state = 0x803d0766 6180 __ dci(0x05ad8d7c); // splice z28.s, p3, {z11.s, z12.s} 6181 // vl128 state = 0xcc405331 6182 __ dci(0x05ad8d74); // splice z20.s, p3, {z11.s, z12.s} 6183 // vl128 state = 0x0ed25e4c 6184 __ dci(0x05ad8d64); // splice z4.s, p3, {z11.s, z12.s} 6185 // vl128 state = 0x167daf8b 6186 __ dci(0x05ed8c6c); // splice z12.d, p3, {z3.d, z4.d} 6187 // vl128 state = 0x435f3bb9 6188 __ dci(0x05ed8cad); // splice z13.d, p3, {z5.d, z6.d} 6189 // vl128 state = 0xe49df619 6190 __ dci(0x056d8dbd); // splice z29.h, p3, {z13.h, z14.h} 6191 // vl128 state = 0x1f54e928 6192 __ dci(0x056d8f2d); // splice z13.h, p3, {z25.h, z26.h} 6193 // vl128 state = 0x24adbe77 6194 __ dci(0x056d8f9d); // splice z29.h, p3, {z28.h, z29.h} 6195 // vl128 state = 0xcc2ec3e6 6196 __ dci(0x056d8f95); // splice z21.h, p3, {z28.h, z29.h} 6197 // vl128 state = 0xb71c64f7 6198 __ dci(0x056d8f34); // splice z20.h, p3, {z25.h, z26.h} 6199 // vl128 state = 0xb32756f0 6200 __ dci(0x05ed8f64); // splice z4.d, p3, {z27.d, z28.d} 6201 // vl128 state = 0x3f7d1f13 6202 __ dci(0x05ad8e60); // splice z0.s, p3, {z19.s, z20.s} 6203 // vl128 state = 0x9a7ffbde 6204 __ dci(0x052d8e50); // splice z16.b, p3, {z18.b, z19.b} 6205 // vl128 state = 0x5c82ed17 6206 __ dci(0x052d9652); // splice z18.b, p5, {z18.b, z19.b} 6207 // vl128 state = 0x28b9cd60 6208 __ dci(0x052d9ed0); // splice z16.b, p7, {z22.b, z23.b} 6209 // vl128 state = 0xab0238ba 6210 __ dci(0x052d9ed4); // splice z20.b, p7, {z22.b, z23.b} 6211 // vl128 state = 0x9f0e0ef9 6212 __ dci(0x056d9cc4); // splice z4.h, p7, {z6.h, z7.h} 6213 // vl128 state = 0xec31d5e7 6214 __ dci(0x056d98e6); // splice z6.h, p6, {z7.h, z8.h} 6215 // vl128 state = 0xbc9c0048 6216 __ dci(0x056d9ee4); // splice z4.h, p7, {z23.h, z24.h} 6217 // vl128 state = 0xe2e9c9a3 6218 __ dci(0x056d9ef4); // splice z20.h, p7, {z23.h, z24.h} 6219 // vl128 state = 0x60ffa98a 6220 __ dci(0x056d9ab6); // splice z22.h, p6, {z21.h, z22.h} 6221 // vl128 state = 0xae70ed0f 6222 __ dci(0x056d9294); // splice z20.h, p4, {z20.h, z21.h} 6223 // vl128 state = 0x5736c563 6224 __ dci(0x056d9284); // splice z4.h, p4, {z20.h, z21.h} 6225 // vl128 state = 0xf31dd2d9 6226 __ dci(0x052d920c); // splice z12.b, p4, {z16.b, z17.b} 6227 // vl128 state = 0x04502fea 6228 __ dci(0x052d921c); // splice z28.b, p4, {z16.b, z17.b} 6229 // vl128 state = 0x852f98b1 6230 __ dci(0x052d9094); // splice z20.b, p4, {z4.b, z5.b} 6231 // vl128 state = 0xb40c5931 6232 __ dci(0x052d90f6); // splice z22.b, p4, {z7.b, z8.b} 6233 // vl128 state = 0x64d6138d 6234 __ dci(0x052d88e6); // splice z6.b, p2, {z7.b, z8.b} 6235 // vl128 state = 0x51bb6564 6236 __ dci(0x052d88e4); // splice z4.b, p2, {z7.b, z8.b} 6237 // vl128 state = 0x7ed599b0 6238 __ dci(0x05ad8865); // splice z5.s, p2, {z3.s, z4.s} 6239 // vl128 state = 0xa201547d 6240 __ dci(0x05ad9961); // splice z1.s, p6, {z11.s, z12.s} 6241 // vl128 state = 0x9508f19c 6242 __ dci(0x05ed9945); // splice z5.d, p6, {z10.d, z11.d} 6243 // vl128 state = 0x95399cfd 6244 } 6245 6246 uint32_t state; 6247 ComputeMachineStateHash(&masm, &state); 6248 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 6249 __ Ldr(w0, MemOperand(x0)); 6250 6251 END(); 6252 if (CAN_RUN()) { 6253 RUN(); 6254 uint32_t expected_hashes[] = { 6255 0x95399cfd, 6256 0xa960b01e, 6257 0x1fedaa18, 6258 0xe2fd3ec3, 6259 0x3edc353b, 6260 0xd809efd8, 6261 0x2a04f527, 6262 0xe4b9bb4a, 6263 0x72e5ed3e, 6264 0x63d6fe93, 6265 0xd2ad18fa, 6266 0x522fe057, 6267 0xc7ba2f7d, 6268 0x2dd44bd3, 6269 0x68b62ae6, 6270 0x06ea6854, 6271 }; 6272 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 6273 } 6274} 6275 6276TEST_SVE(sve2_whilerw_whilewr) { 6277 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 6278 CPUFeatures::kSVE2, 6279 CPUFeatures::kNEON, 6280 CPUFeatures::kCRC32); 6281 START(); 6282 6283 SetInitialMachineState(&masm); 6284 // state = 0xe2bd2480 6285 6286 { 6287 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 6288 __ dci(0x25ac3026); // whilewr p6.s, x1, x12 6289 // vl128 state = 0x91e301ae 6290 __ dci(0x25ac3024); // whilewr p4.s, x1, x12 6291 // vl128 state = 0x9203b261 6292 __ dci(0x25af3020); // whilewr p0.s, x1, x15 6293 // vl128 state = 0x87505080 6294 __ dci(0x25ef3222); // whilewr p2.d, x17, x15 6295 // vl128 state = 0x4ba695cb 6296 __ dci(0x25eb320a); // whilewr p10.d, x16, x11 6297 // vl128 state = 0x5909d726 6298 __ dci(0x25e33308); // whilewr p8.d, x24, x3 6299 // vl128 state = 0x52766071 6300 __ dci(0x25ea3309); // whilewr p9.d, x24, x10 6301 // vl128 state = 0xe906a65a 6302 __ dci(0x25aa3101); // whilewr p1.s, x8, x10 6303 // vl128 state = 0xd9d56c58 6304 __ dci(0x252b3100); // whilewr p0.b, x8, x11 6305 // vl128 state = 0xcc868eb9 6306 __ dci(0x252a3008); // whilewr p8.b, x0, x10 6307 // vl128 state = 0xf78cb912 6308 __ dci(0x2528304c); // whilewr p12.b, x2, x8 6309 // vl128 state = 0x5493a6c4 6310 __ dci(0x25203004); // whilewr p4.b, x0, x0 6311 // vl128 state = 0xb3d754b6 6312 __ dci(0x25303105); // whilewr p5.b, x8, x16 6313 // vl128 state = 0x7fc526df 6314 __ dci(0x25b4310d); // whilewr p13.s, x8, x20 6315 // vl128 state = 0x5999edda 6316 __ dci(0x25ac310c); // whilewr p12.s, x8, x12 6317 // vl128 state = 0x46a86248 6318 __ dci(0x25ac310e); // whilewr p14.s, x8, x12 6319 // vl128 state = 0x0dc5ed70 6320 __ dci(0x252c330a); // whilewr p10.b, x24, x12 6321 // vl128 state = 0x453a1aa9 6322 __ dci(0x252f330b); // whilewr p11.b, x24, x15 6323 // vl128 state = 0x98fbdcdf 6324 __ dci(0x256e330f); // whilewr p15.h, x24, x14 6325 // vl128 state = 0x84699750 6326 __ dci(0x252e334d); // whilewr p13.b, x26, x14 6327 // vl128 state = 0x198ea519 6328 __ dci(0x252e3349); // whilewr p9.b, x26, x14 6329 // vl128 state = 0xb4956673 6330 __ dci(0x253e33c1); // whilewr p1.b, x30, x30 6331 // vl128 state = 0xfd88dd74 6332 __ dci(0x252e33e3); // whilewr p3.b, xzr, x14 6333 // vl128 state = 0x68cda9df 6334 __ dci(0x25ae33cb); // whilewr p11.s, x30, x14 6335 // vl128 state = 0x9104f644 6336 __ dci(0x25ae33ca); // whilewr p10.s, x30, x14 6337 // vl128 state = 0xd9079300 6338 __ dci(0x25ea33da); // whilerw p10.d, x30, x10 6339 // vl128 state = 0xd9fb019d 6340 __ dci(0x25ae33d8); // whilerw p8.s, x30, x14 6341 // vl128 state = 0x9edf46fa 6342 __ dci(0x25ae32f9); // whilerw p9.s, x23, x14 6343 // vl128 state = 0x3b10562f 6344 __ dci(0x25ee32d8); // whilerw p8.d, x22, x14 6345 // vl128 state = 0x473e26e3 6346 __ dci(0x25ec3299); // whilerw p9.d, x20, x12 6347 // vl128 state = 0x4feaf55c 6348 __ dci(0x25ec329d); // whilerw p13.d, x20, x12 6349 // vl128 state = 0x9f9a203a 6350 __ dci(0x25e8321c); // whilerw p12.d, x16, x8 6351 // vl128 state = 0xd8f32d11 6352 __ dci(0x2568301d); // whilerw p13.h, x0, x8 6353 // vl128 state = 0xf04b6bb8 6354 __ dci(0x2528320d); // whilewr p13.b, x16, x8 6355 // vl128 state = 0x0883f877 6356 __ dci(0x25a8323d); // whilerw p13.s, x17, x8 6357 // vl128 state = 0x9564ca3e 6358 __ dci(0x25a8323f); // whilerw p15.s, x17, x8 6359 // vl128 state = 0xa50cf036 6360 __ dci(0x25e8303d); // whilerw p13.d, x1, x8 6361 // vl128 state = 0xe89b1719 6362 __ dci(0x25e83175); // whilerw p5.d, x11, x8 6363 // vl128 state = 0xe79bea7c 6364 __ dci(0x256a3174); // whilerw p4.h, x11, x10 6365 // vl128 state = 0xc8ca3b74 6366 __ dci(0x256a317c); // whilerw p12.h, x11, x10 6367 // vl128 state = 0xc3c88548 6368 __ dci(0x256a33f8); // whilerw p8.h, xzr, x10 6369 // vl128 state = 0x8b25acc6 6370 __ dci(0x256a33f0); // whilerw p0.h, xzr, x10 6371 // vl128 state = 0x904c0fd1 6372 __ dci(0x25e833e0); // whilewr p0.d, xzr, x8 6373 // vl128 state = 0xc893f4c8 6374 __ dci(0x25ec32e8); // whilewr p8.d, x23, x12 6375 // vl128 state = 0x807edd46 6376 __ dci(0x25ed326c); // whilewr p12.d, x19, x13 6377 // vl128 state = 0x8b7c637a 6378 __ dci(0x256d32ed); // whilewr p13.h, x23, x13 6379 // vl128 state = 0xa3c425d3 6380 __ dci(0x252d30e9); // whilewr p9.b, x7, x13 6381 // vl128 state = 0x0edfe6b9 6382 __ dci(0x252531eb); // whilewr p11.b, x15, x5 6383 // vl128 state = 0xf716b922 6384 __ dci(0x252733ef); // whilewr p15.b, xzr, x7 6385 // vl128 state = 0xbf9aea3e 6386 __ dci(0x25253367); // whilewr p7.b, x27, x5 6387 // vl128 state = 0x357fc408 6388 } 6389 6390 uint32_t state; 6391 ComputeMachineStateHash(&masm, &state); 6392 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 6393 __ Ldr(w0, MemOperand(x0)); 6394 6395 END(); 6396 if (CAN_RUN()) { 6397 RUN(); 6398 uint32_t expected_hashes[] = { 6399 0x357fc408, 6400 0x8d6fc283, 6401 0x5f73c1df, 6402 0x2963d995, 6403 0x80713760, 6404 0x4638fc82, 6405 0x23955ead, 6406 0x52e4c002, 6407 0xd56ab65c, 6408 0x0e5bb2f2, 6409 0x8c78ec14, 6410 0xd9b634d2, 6411 0x83adc3a2, 6412 0x3b664eea, 6413 0x3d1f5422, 6414 0x7cdcd310, 6415 }; 6416 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 6417 } 6418} 6419 6420TEST_SVE(sve2_mul_index) { 6421 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 6422 CPUFeatures::kSVE2, 6423 CPUFeatures::kNEON, 6424 CPUFeatures::kCRC32); 6425 START(); 6426 6427 SetInitialMachineState(&masm); 6428 // state = 0xe2bd2480 6429 6430 { 6431 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 6432 __ dci(0x4468fb6e); // mul z14.h, z27.h, z0.h[5] 6433 // vl128 state = 0xcbe81b96 6434 __ dci(0x4468f93e); // mul z30.h, z9.h, z0.h[5] 6435 // vl128 state = 0x8a75362d 6436 __ dci(0x4428f976); // mul z22.h, z11.h, z0.h[1] 6437 // vl128 state = 0x1e3c5184 6438 __ dci(0x4428fa77); // mul z23.h, z19.h, z0.h[1] 6439 // vl128 state = 0x173f58b5 6440 __ dci(0x4429fb67); // mul z7.h, z27.h, z1.h[1] 6441 // vl128 state = 0x15686c87 6442 __ dci(0x4429fb63); // mul z3.h, z27.h, z1.h[1] 6443 // vl128 state = 0x41068a87 6444 __ dci(0x4428fb53); // mul z19.h, z26.h, z0.h[1] 6445 // vl128 state = 0xcfd6e02c 6446 __ dci(0x4429fbd1); // mul z17.h, z30.h, z1.h[1] 6447 // vl128 state = 0xfd3e0e3c 6448 __ dci(0x442afbd9); // mul z25.h, z30.h, z2.h[1] 6449 // vl128 state = 0x1e660bf7 6450 __ dci(0x442afa5b); // mul z27.h, z18.h, z2.h[1] 6451 // vl128 state = 0xb5378f4e 6452 __ dci(0x44abfa4b); // mul z11.s, z18.s, z3.s[1] 6453 // vl128 state = 0xf34416fe 6454 __ dci(0x44abfa4f); // mul z15.s, z18.s, z3.s[1] 6455 // vl128 state = 0xc80d6ad9 6456 __ dci(0x44a9f84e); // mul z14.s, z2.s, z1.s[1] 6457 // vl128 state = 0xa4fe2be7 6458 __ dci(0x44e9fa46); // mul z6.d, z18.d, z9.d[0] 6459 // vl128 state = 0xaf461ebb 6460 __ dci(0x44e9fa8e); // mul z14.d, z20.d, z9.d[0] 6461 // vl128 state = 0x9f7acd20 6462 __ dci(0x44f1fa8f); // mul z15.d, z20.d, z1.d[1] 6463 // vl128 state = 0x1b710469 6464 __ dci(0x4471fa07); // mul z7.h, z16.h, z1.h[6] 6465 // vl128 state = 0xa2120b4c 6466 __ dci(0x4470fa43); // mul z3.h, z18.h, z0.h[6] 6467 // vl128 state = 0xb6d6ce4c 6468 __ dci(0x4474fb47); // mul z7.h, z26.h, z4.h[6] 6469 // vl128 state = 0xeec634bf 6470 __ dci(0x4476fa57); // mul z23.h, z18.h, z6.h[6] 6471 // vl128 state = 0x893bbe37 6472 __ dci(0x447cfa53); // mul z19.h, z18.h, z4.h[7] 6473 // vl128 state = 0x8373940b 6474 __ dci(0x447dfb52); // mul z18.h, z26.h, z5.h[7] 6475 // vl128 state = 0xd1c86434 6476 __ dci(0x4477fb56); // mul z22.h, z26.h, z7.h[6] 6477 // vl128 state = 0xb247cf9e 6478 __ dci(0x4476fb77); // mul z23.h, z27.h, z6.h[6] 6479 // vl128 state = 0x6106a868 6480 __ dci(0x4467fb7f); // mul z31.h, z27.h, z7.h[4] 6481 // vl128 state = 0xc0a11edf 6482 __ dci(0x446ffa77); // mul z23.h, z19.h, z7.h[5] 6483 // vl128 state = 0xe1879a44 6484 __ dci(0x442bfa76); // mul z22.h, z19.h, z3.h[1] 6485 // vl128 state = 0xc773115b 6486 __ dci(0x442bfa7e); // mul z30.h, z19.h, z3.h[1] 6487 // vl128 state = 0x5f5b4793 6488 __ dci(0x442afa2e); // mul z14.h, z17.h, z2.h[1] 6489 // vl128 state = 0x144b30b2 6490 __ dci(0x442afa26); // mul z6.h, z17.h, z2.h[1] 6491 // vl128 state = 0x905f8608 6492 __ dci(0x442afb6e); // mul z14.h, z27.h, z2.h[1] 6493 // vl128 state = 0x0f826c19 6494 __ dci(0x44aefb66); // mul z6.s, z27.s, z6.s[1] 6495 // vl128 state = 0x7043c090 6496 __ dci(0x44aefba4); // mul z4.s, z29.s, z6.s[1] 6497 // vl128 state = 0xab3921a9 6498 __ dci(0x44aefbb4); // mul z20.s, z29.s, z6.s[1] 6499 // vl128 state = 0x7d420495 6500 __ dci(0x44acfbf0); // mul z16.s, z31.s, z4.s[1] 6501 // vl128 state = 0xceb17a45 6502 __ dci(0x44a4fb60); // mul z0.s, z27.s, z4.s[0] 6503 // vl128 state = 0x97ed0929 6504 __ dci(0x44a5fb30); // mul z16.s, z25.s, z5.s[0] 6505 // vl128 state = 0xb7fa54a5 6506 __ dci(0x4425f938); // mul z24.h, z9.h, z5.h[0] 6507 // vl128 state = 0xfcc1c192 6508 __ dci(0x442df830); // mul z16.h, z1.h, z5.h[1] 6509 // vl128 state = 0x933ed51d 6510 __ dci(0x4427f832); // mul z18.h, z1.h, z7.h[0] 6511 // vl128 state = 0x2129d4f0 6512 __ dci(0x442ef822); // mul z2.h, z1.h, z6.h[1] 6513 // vl128 state = 0x76f6854c 6514 __ dci(0x442af803); // mul z3.h, z0.h, z2.h[1] 6515 // vl128 state = 0xe763df2d 6516 __ dci(0x442af801); // mul z1.h, z0.h, z2.h[1] 6517 // vl128 state = 0x61db5a87 6518 __ dci(0x442bf900); // mul z0.h, z8.h, z3.h[1] 6519 // vl128 state = 0x90883cfb 6520 __ dci(0x442bf881); // mul z1.h, z4.h, z3.h[1] 6521 // vl128 state = 0xb4afb9b2 6522 __ dci(0x4427f885); // mul z5.h, z4.h, z7.h[0] 6523 // vl128 state = 0xe512adca 6524 __ dci(0x4425f8ad); // mul z13.h, z5.h, z5.h[0] 6525 // vl128 state = 0xd820475a 6526 __ dci(0x4420f8a5); // mul z5.h, z5.h, z0.h[0] 6527 // vl128 state = 0xea9a6f50 6528 __ dci(0x4431f8a4); // mul z4.h, z5.h, z1.h[2] 6529 // vl128 state = 0x9343e341 6530 __ dci(0x4425f8a0); // mul z0.h, z5.h, z5.h[0] 6531 // vl128 state = 0x20a5f202 6532 } 6533 6534 uint32_t state; 6535 ComputeMachineStateHash(&masm, &state); 6536 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 6537 __ Ldr(w0, MemOperand(x0)); 6538 6539 END(); 6540 if (CAN_RUN()) { 6541 RUN(); 6542 uint32_t expected_hashes[] = { 6543 0x20a5f202, 6544 0xdb7b10ee, 6545 0x0607441b, 6546 0x4966f0ff, 6547 0x5f750338, 6548 0x9be09ff4, 6549 0x8805a320, 6550 0x52cf70b0, 6551 0x5f4c6d92, 6552 0xf8009f1f, 6553 0x56cd1ff6, 6554 0x345f063d, 6555 0x3807ccf3, 6556 0xf7eb85a8, 6557 0x1600c143, 6558 0x97be6c01, 6559 }; 6560 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 6561 } 6562} 6563 6564TEST_SVE(sve2_mla_mls_index) { 6565 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 6566 CPUFeatures::kSVE2, 6567 CPUFeatures::kNEON, 6568 CPUFeatures::kCRC32); 6569 START(); 6570 6571 SetInitialMachineState(&masm); 6572 // state = 0xe2bd2480 6573 6574 { 6575 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 6576 __ dci(0x44200800); // mla z0.h, z0.h, z0.h[0] 6577 // vl128 state = 0x06aac22e 6578 __ dci(0x44200a28); // mla z8.h, z17.h, z0.h[0] 6579 // vl128 state = 0xde2255a4 6580 __ dci(0x44e00a2a); // mla z10.d, z17.d, z0.d[0] 6581 // vl128 state = 0x9bf1bae6 6582 __ dci(0x44600e3a); // mls z26.h, z17.h, z0.h[4] 6583 // vl128 state = 0x28b58feb 6584 __ dci(0x44e20e2a); // mls z10.d, z17.d, z2.d[0] 6585 // vl128 state = 0x0ac8fcc8 6586 __ dci(0x44620f2e); // mls z14.h, z25.h, z2.h[4] 6587 // vl128 state = 0x955da860 6588 __ dci(0x44630f6a); // mls z10.h, z27.h, z3.h[4] 6589 // vl128 state = 0x654ee915 6590 __ dci(0x44730b6e); // mla z14.h, z27.h, z3.h[6] 6591 // vl128 state = 0x3fd3e02c 6592 __ dci(0x44720f6f); // mls z15.h, z27.h, z2.h[6] 6593 // vl128 state = 0x46031098 6594 __ dci(0x44620f4b); // mls z11.h, z26.h, z2.h[4] 6595 // vl128 state = 0xd49183cf 6596 __ dci(0x446a0b5b); // mla z27.h, z26.h, z2.h[5] 6597 // vl128 state = 0x4fe290c1 6598 __ dci(0x44680b73); // mla z19.h, z27.h, z0.h[5] 6599 // vl128 state = 0xf6fccd86 6600 __ dci(0x44e90b77); // mla z23.d, z27.d, z9.d[0] 6601 // vl128 state = 0x57b2090d 6602 __ dci(0x44f10b76); // mla z22.d, z27.d, z1.d[1] 6603 // vl128 state = 0x5a6932eb 6604 __ dci(0x44f40b77); // mla z23.d, z27.d, z4.d[1] 6605 // vl128 state = 0x8e33d7d5 6606 __ dci(0x44640b7f); // mla z31.h, z27.h, z4.h[4] 6607 // vl128 state = 0xaa01885d 6608 __ dci(0x44640b7d); // mla z29.h, z27.h, z4.h[4] 6609 // vl128 state = 0x2ef00e60 6610 __ dci(0x44640b7f); // mla z31.h, z27.h, z4.h[4] 6611 // vl128 state = 0x94ac10d3 6612 __ dci(0x44340b7e); // mla z30.h, z27.h, z4.h[2] 6613 // vl128 state = 0x48211118 6614 __ dci(0x44340e7a); // mls z26.h, z19.h, z4.h[2] 6615 // vl128 state = 0x72cc2767 6616 __ dci(0x44b40eea); // mls z10.s, z23.s, z4.s[2] 6617 // vl128 state = 0x3855f70f 6618 __ dci(0x44e40ee2); // mls z2.d, z23.d, z4.d[0] 6619 // vl128 state = 0xf9225160 6620 __ dci(0x44ec0ea3); // mls z3.d, z21.d, z12.d[0] 6621 // vl128 state = 0xf9b94fd0 6622 __ dci(0x44ae0ea7); // mls z7.s, z21.s, z6.s[1] 6623 // vl128 state = 0x06070917 6624 __ dci(0x44ae0eb7); // mls z23.s, z21.s, z6.s[1] 6625 // vl128 state = 0x26ecdd18 6626 __ dci(0x44ae0e07); // mls z7.s, z16.s, z6.s[1] 6627 // vl128 state = 0xaa8e3a32 6628 __ dci(0x44ae0a85); // mla z5.s, z20.s, z6.s[1] 6629 // vl128 state = 0x2379cba0 6630 __ dci(0x44ae0a81); // mla z1.s, z20.s, z6.s[1] 6631 // vl128 state = 0x3cc8a61c 6632 __ dci(0x442a0a85); // mla z5.h, z20.h, z2.h[1] 6633 // vl128 state = 0x96f118ef 6634 __ dci(0x443e0a84); // mla z4.h, z20.h, z6.h[3] 6635 // vl128 state = 0xa3f8cb41 6636 __ dci(0x443f0b8c); // mla z12.h, z28.h, z7.h[3] 6637 // vl128 state = 0x97fcb1da 6638 __ dci(0x442f0bbc); // mla z28.h, z29.h, z7.h[1] 6639 // vl128 state = 0x761e9499 6640 __ dci(0x44270fac); // mls z12.h, z29.h, z7.h[0] 6641 // vl128 state = 0xfb28f943 6642 __ dci(0x442f0ead); // mls z13.h, z21.h, z7.h[1] 6643 // vl128 state = 0x387a2623 6644 __ dci(0x44270fa9); // mls z9.h, z29.h, z7.h[0] 6645 // vl128 state = 0x22f03847 6646 __ dci(0x44270f68); // mls z8.h, z27.h, z7.h[0] 6647 // vl128 state = 0xada4998b 6648 __ dci(0x44270f6c); // mls z12.h, z27.h, z7.h[0] 6649 // vl128 state = 0xdf80a034 6650 __ dci(0x44270f7c); // mls z28.h, z27.h, z7.h[0] 6651 // vl128 state = 0x3ccddaa6 6652 __ dci(0x44250f2c); // mls z12.h, z25.h, z5.h[0] 6653 // vl128 state = 0x588502cb 6654 __ dci(0x442f0f28); // mls z8.h, z25.h, z7.h[1] 6655 // vl128 state = 0x79c90307 6656 __ dci(0x446f0d2c); // mls z12.h, z9.h, z7.h[5] 6657 // vl128 state = 0xaa0b21a9 6658 __ dci(0x44af0d2e); // mls z14.s, z9.s, z7.s[1] 6659 // vl128 state = 0xd5ccc60c 6660 __ dci(0x44ed0d26); // mls z6.d, z9.d, z13.d[0] 6661 // vl128 state = 0x15037cbe 6662 __ dci(0x44fd0f2e); // mls z14.d, z25.d, z13.d[1] 6663 // vl128 state = 0x9f481fdf 6664 __ dci(0x44f90e2f); // mls z15.d, z17.d, z9.d[1] 6665 // vl128 state = 0x93fe8537 6666 __ dci(0x447d0e3f); // mls z31.h, z17.h, z5.h[7] 6667 // vl128 state = 0x14b9edf2 6668 __ dci(0x44f90e2f); // mls z15.d, z17.d, z9.d[1] 6669 // vl128 state = 0xde1c0d1c 6670 __ dci(0x44790c27); // mls z7.h, z1.h, z1.h[7] 6671 // vl128 state = 0x563d614a 6672 __ dci(0x44790c23); // mls z3.h, z1.h, z1.h[7] 6673 // vl128 state = 0x8c6d9baf 6674 __ dci(0x44f90c6b); // mls z11.d, z3.d, z9.d[1] 6675 // vl128 state = 0x1a25c073 6676 } 6677 6678 uint32_t state; 6679 ComputeMachineStateHash(&masm, &state); 6680 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 6681 __ Ldr(w0, MemOperand(x0)); 6682 6683 END(); 6684 if (CAN_RUN()) { 6685 RUN(); 6686 uint32_t expected_hashes[] = { 6687 0x1a25c073, 6688 0xfbb2c945, 6689 0x932b8ab7, 6690 0x99370bee, 6691 0x44a15f80, 6692 0xae898f1d, 6693 0x97382827, 6694 0xafec059e, 6695 0xf11bc007, 6696 0x34c49b30, 6697 0x73b95606, 6698 0x77324772, 6699 0x9ad7d21b, 6700 0x0d0958a7, 6701 0xee4accc3, 6702 0x31d34df8, 6703 }; 6704 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 6705 } 6706} 6707 6708TEST_SVE(sve2_mla_long) { 6709 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 6710 CPUFeatures::kSVE2, 6711 CPUFeatures::kNEON, 6712 CPUFeatures::kCRC32); 6713 START(); 6714 6715 SetInitialMachineState(&masm); 6716 // state = 0xe2bd2480 6717 6718 { 6719 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 6720 __ dci(0x44935abe); // umlslb z30.s, z21.h, z19.h 6721 // vl128 state = 0x4fac8e49 6722 __ dci(0x449358fa); // umlslb z26.s, z7.h, z19.h 6723 // vl128 state = 0xca971f04 6724 __ dci(0x44935adb); // umlslb z27.s, z22.h, z19.h 6725 // vl128 state = 0x5652564b 6726 __ dci(0x449359da); // umlslb z26.s, z14.h, z19.h 6727 // vl128 state = 0xf2d81244 6728 __ dci(0x448349de); // umlalb z30.s, z14.h, z3.h 6729 // vl128 state = 0x7cbaa548 6730 __ dci(0x448349d6); // umlalb z22.s, z14.h, z3.h 6731 // vl128 state = 0x9e7b4915 6732 __ dci(0x44c34952); // umlalb z18.d, z10.s, z3.s 6733 // vl128 state = 0x550af70e 6734 __ dci(0x44d349d3); // umlalb z19.d, z14.s, z19.s 6735 // vl128 state = 0x676743b2 6736 __ dci(0x44d549d7); // umlalb z23.d, z14.s, z21.s 6737 // vl128 state = 0x602e09e4 6738 __ dci(0x44d55ddf); // umlslt z31.d, z14.s, z21.s 6739 // vl128 state = 0xd4c245de 6740 __ dci(0x44d55d1b); // umlslt z27.d, z8.s, z21.s 6741 // vl128 state = 0x9c2c1cb4 6742 __ dci(0x44d5490b); // umlalb z11.d, z8.s, z21.s 6743 // vl128 state = 0x8a702002 6744 __ dci(0x44554d0a); // umlalt z10.h, z8.b, z21.b 6745 // vl128 state = 0x6758ce3c 6746 __ dci(0x4455452b); // smlalt z11.h, z9.b, z21.b 6747 // vl128 state = 0x967e596e 6748 __ dci(0x44554529); // smlalt z9.h, z9.b, z21.b 6749 // vl128 state = 0x1300909a 6750 __ dci(0x44474521); // smlalt z1.h, z9.b, z7.b 6751 // vl128 state = 0x01ca26c1 6752 __ dci(0x44c74d25); // umlalt z5.d, z9.s, z7.s 6753 // vl128 state = 0x8e6313b9 6754 __ dci(0x44cb4d24); // umlalt z4.d, z9.s, z11.s 6755 // vl128 state = 0xdb41e004 6756 __ dci(0x44cb4d2c); // umlalt z12.d, z9.s, z11.s 6757 // vl128 state = 0x941401ca 6758 __ dci(0x44c94da8); // umlalt z8.d, z13.s, z9.s 6759 // vl128 state = 0x8a57334b 6760 __ dci(0x44594db8); // umlalt z24.h, z13.b, z25.b 6761 // vl128 state = 0x94333fae 6762 __ dci(0x44585db0); // umlslt z16.h, z13.b, z24.b 6763 // vl128 state = 0xf4fbe251 6764 __ dci(0x44585f80); // umlslt z0.h, z28.b, z24.b 6765 // vl128 state = 0x1f5aeef3 6766 __ dci(0x445a5fc2); // umlslt z2.h, z30.b, z26.b 6767 // vl128 state = 0x4b153d20 6768 __ dci(0x445a5fd2); // umlslt z18.h, z30.b, z26.b 6769 // vl128 state = 0xbd82f0a2 6770 __ dci(0x445a5fd3); // umlslt z19.h, z30.b, z26.b 6771 // vl128 state = 0x72d7083d 6772 __ dci(0x44525bd2); // umlslb z18.h, z30.b, z18.b 6773 // vl128 state = 0x5018a138 6774 __ dci(0x44525bd6); // umlslb z22.h, z30.b, z18.b 6775 // vl128 state = 0xcaf48a01 6776 __ dci(0x445053d2); // smlslb z18.h, z30.b, z16.b 6777 // vl128 state = 0x76e2d850 6778 __ dci(0x44d153c2); // smlslb z2.d, z30.s, z17.s 6779 // vl128 state = 0x8594d6c9 6780 __ dci(0x449353c3); // smlslb z3.s, z30.h, z19.h 6781 // vl128 state = 0x8e0da89d 6782 __ dci(0x449152c7); // smlslb z7.s, z22.h, z17.h 6783 // vl128 state = 0xe7d08864 6784 __ dci(0x44995285); // smlslb z5.s, z20.h, z25.h 6785 // vl128 state = 0xd7c49fca 6786 __ dci(0x449953c1); // smlslb z1.s, z30.h, z25.h 6787 // vl128 state = 0x3b648b39 6788 __ dci(0x449152c9); // smlslb z9.s, z22.h, z17.h 6789 // vl128 state = 0x5b5bab94 6790 __ dci(0x449542cd); // smlalb z13.s, z22.h, z21.h 6791 // vl128 state = 0x65282d76 6792 __ dci(0x449c42c9); // smlalb z9.s, z22.h, z28.h 6793 // vl128 state = 0x94a92486 6794 __ dci(0x449c52f9); // smlslb z25.s, z23.h, z28.h 6795 // vl128 state = 0xd4f62835 6796 __ dci(0x44dc5afd); // umlslb z29.d, z23.s, z28.s 6797 // vl128 state = 0xf124c6a1 6798 __ dci(0x44dd58ff); // umlslb z31.d, z7.s, z29.s 6799 // vl128 state = 0xbc694f1c 6800 __ dci(0x44dc587b); // umlslb z27.d, z3.s, z28.s 6801 // vl128 state = 0xf1621eb2 6802 __ dci(0x44de596b); // umlslb z11.d, z11.s, z30.s 6803 // vl128 state = 0x944b4b75 6804 __ dci(0x44de5969); // umlslb z9.d, z11.s, z30.s 6805 // vl128 state = 0xa98a2c38 6806 __ dci(0x44db596d); // umlslb z13.d, z11.s, z27.s 6807 // vl128 state = 0x6bd60807 6808 __ dci(0x44db5d5d); // umlslt z29.d, z10.s, z27.s 6809 // vl128 state = 0x9c377b51 6810 __ dci(0x449b555f); // smlslt z31.s, z10.h, z27.h 6811 // vl128 state = 0x7c81f1d5 6812 __ dci(0x449b555d); // smlslt z29.s, z10.h, z27.h 6813 // vl128 state = 0xdaab1edb 6814 __ dci(0x44d35559); // smlslt z25.d, z10.s, z19.s 6815 // vl128 state = 0xdc3f25f1 6816 __ dci(0x44d355f8); // smlslt z24.d, z15.s, z19.s 6817 // vl128 state = 0x9c75a3cf 6818 __ dci(0x44d356f9); // smlslt z25.d, z23.s, z19.s 6819 // vl128 state = 0x5b999178 6820 } 6821 6822 uint32_t state; 6823 ComputeMachineStateHash(&masm, &state); 6824 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 6825 __ Ldr(w0, MemOperand(x0)); 6826 6827 END(); 6828 if (CAN_RUN()) { 6829 RUN(); 6830 uint32_t expected_hashes[] = { 6831 0x5b999178, 6832 0xd6191e64, 6833 0x1f3bd2a1, 6834 0x1e0ac282, 6835 0x8d13f5d3, 6836 0x97157e8f, 6837 0x5d6e4134, 6838 0x8d2186b4, 6839 0x88078c65, 6840 0x6dd92db3, 6841 0xfcd02d21, 6842 0x81738dc2, 6843 0x644e3c06, 6844 0x9c9d2ac8, 6845 0xaaa43548, 6846 0x871e9b08, 6847 }; 6848 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 6849 } 6850} 6851 6852TEST_SVE(sve2_complex_integer_multiply_add_vector) { 6853 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 6854 CPUFeatures::kSVE2, 6855 CPUFeatures::kNEON, 6856 CPUFeatures::kCRC32); 6857 START(); 6858 6859 SetInitialMachineState(&masm); 6860 // state = 0xe2bd2480 6861 6862 { 6863 ExactAssemblyScope scope(&masm, 40 * kInstructionSize); 6864 __ dci(0x44dd2f34); // cmla z20.d, z25.d, z29.d, #270 6865 // vl128 state = 0x12e9bd68 6866 __ dci(0x44dd2f3c); // cmla z28.d, z25.d, z29.d, #270 6867 // vl128 state = 0x4fd8ba3e 6868 __ dci(0x44dc2734); // cmla z20.d, z25.d, z28.d, #90 6869 // vl128 state = 0x9b11d64f 6870 __ dci(0x44dc2e36); // cmla z22.d, z17.d, z28.d, #270 6871 // vl128 state = 0x4658e6ae 6872 __ dci(0x44dd2f34); // cmla z20.d, z25.d, z29.d, #270 6873 // vl128 state = 0x5151ea16 6874 __ dci(0x44dc2fb5); // cmla z21.d, z29.d, z28.d, #270 6875 // vl128 state = 0x21c497cc 6876 __ dci(0x44dc2fbd); // cmla z29.d, z29.d, z28.d, #270 6877 // vl128 state = 0xe823fd46 6878 __ dci(0x44dc2e3c); // cmla z28.d, z17.d, z28.d, #270 6879 // vl128 state = 0xcc35cda6 6880 __ dci(0x44dc2e34); // cmla z20.d, z17.d, z28.d, #270 6881 // vl128 state = 0x963047c0 6882 __ dci(0x44d42c30); // cmla z16.d, z1.d, z20.d, #270 6883 // vl128 state = 0x5d2c5643 6884 __ dci(0x44c42c60); // cmla z0.d, z3.d, z4.d, #270 6885 // vl128 state = 0xfd400169 6886 __ dci(0x44842464); // cmla z4.s, z3.s, z4.s, #90 6887 // vl128 state = 0x00116098 6888 __ dci(0x44842d60); // cmla z0.s, z11.s, z4.s, #270 6889 // vl128 state = 0x582d46e3 6890 __ dci(0x44042562); // cmla z2.b, z11.b, z4.b, #90 6891 // vl128 state = 0x1bd70bf0 6892 __ dci(0x44042420); // cmla z0.b, z1.b, z4.b, #90 6893 // vl128 state = 0x7682807d 6894 __ dci(0x44062401); // cmla z1.b, z0.b, z6.b, #90 6895 // vl128 state = 0xaa3e2c64 6896 __ dci(0x44042449); // cmla z9.b, z2.b, z4.b, #90 6897 // vl128 state = 0xd81638f9 6898 __ dci(0x44052059); // cmla z25.b, z2.b, z5.b, #0 6899 // vl128 state = 0x38cb5d96 6900 __ dci(0x4415305d); // sqrdcmlah z29.b, z2.b, z21.b, #0 6901 // vl128 state = 0x4c6b85e0 6902 __ dci(0x44153819); // sqrdcmlah z25.b, z0.b, z21.b, #180 6903 // vl128 state = 0x229b5be9 6904 __ dci(0x4405391b); // sqrdcmlah z27.b, z8.b, z5.b, #180 6905 // vl128 state = 0x82611aec 6906 __ dci(0x4405314b); // sqrdcmlah z11.b, z10.b, z5.b, #0 6907 // vl128 state = 0xe58c48e0 6908 __ dci(0x4407316a); // sqrdcmlah z10.b, z11.b, z7.b, #0 6909 // vl128 state = 0x5282838a 6910 __ dci(0x4407347a); // sqrdcmlah z26.b, z3.b, z7.b, #90 6911 // vl128 state = 0x134a0891 6912 __ dci(0x4413347e); // sqrdcmlah z30.b, z3.b, z19.b, #90 6913 // vl128 state = 0x455ab9e0 6914 __ dci(0x4443347f); // sqrdcmlah z31.h, z3.h, z3.h, #90 6915 // vl128 state = 0x030d9d2c 6916 __ dci(0x444b307e); // sqrdcmlah z30.h, z3.h, z11.h, #0 6917 // vl128 state = 0x91a95a2c 6918 __ dci(0x444b301f); // sqrdcmlah z31.h, z0.h, z11.h, #0 6919 // vl128 state = 0x0f1c8468 6920 __ dci(0x4409300f); // sqrdcmlah z15.b, z0.b, z9.b, #0 6921 // vl128 state = 0x95f802b7 6922 __ dci(0x440c300e); // sqrdcmlah z14.b, z0.b, z12.b, #0 6923 // vl128 state = 0x5fa6d2c6 6924 __ dci(0x4404310c); // sqrdcmlah z12.b, z8.b, z4.b, #0 6925 // vl128 state = 0x192b05a4 6926 __ dci(0x4415310d); // sqrdcmlah z13.b, z8.b, z21.b, #0 6927 // vl128 state = 0xa8a8d37f 6928 __ dci(0x4414350f); // sqrdcmlah z15.b, z8.b, z20.b, #90 6929 // vl128 state = 0xcd890d8c 6930 __ dci(0x4454354d); // sqrdcmlah z13.h, z10.h, z20.h, #90 6931 // vl128 state = 0x91ab863e 6932 __ dci(0x444435c5); // sqrdcmlah z5.h, z14.h, z4.h, #90 6933 // vl128 state = 0x41bbc90c 6934 __ dci(0x444c34c7); // sqrdcmlah z7.h, z6.h, z12.h, #90 6935 // vl128 state = 0xb6329344 6936 __ dci(0x444836c6); // sqrdcmlah z6.h, z22.h, z8.h, #90 6937 // vl128 state = 0xdf5f443c 6938 __ dci(0x444836d6); // sqrdcmlah z22.h, z22.h, z8.h, #90 6939 // vl128 state = 0x719a2e70 6940 __ dci(0x44403694); // sqrdcmlah z20.h, z20.h, z0.h, #90 6941 // vl128 state = 0x28a64934 6942 __ dci(0x4449369c); // sqrdcmlah z28.h, z20.h, z9.h, #90 6943 // vl128 state = 0x5d41ba84 6944 } 6945 6946 uint32_t state; 6947 ComputeMachineStateHash(&masm, &state); 6948 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 6949 __ Ldr(w0, MemOperand(x0)); 6950 6951 END(); 6952 if (CAN_RUN()) { 6953 RUN(); 6954 uint32_t expected_hashes[] = { 6955 0x5d41ba84, 6956 0xd5e52f4d, 6957 0x9f627c0d, 6958 0x111f21a7, 6959 0x5d7b356e, 6960 0x1f345c0e, 6961 0xd881296e, 6962 0x819f9091, 6963 0x59823550, 6964 0xbe2162c7, 6965 0x5f5dca40, 6966 0xad7e429e, 6967 0x4f66661f, 6968 0x7c5fbca0, 6969 0x819ff997, 6970 0x68ebdb56, 6971 }; 6972 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 6973 } 6974} 6975 6976TEST_SVE(sve2_complex_integer_multiply_add_indexed) { 6977 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 6978 CPUFeatures::kSVE2, 6979 CPUFeatures::kNEON, 6980 CPUFeatures::kCRC32); 6981 START(); 6982 6983 SetInitialMachineState(&masm); 6984 // state = 0xe2bd2480 6985 6986 { 6987 ExactAssemblyScope scope(&masm, 20 * kInstructionSize); 6988 __ dci(0x44fd7d52); // sqrdcmlah z18.s, z10.s, z13.s[1], #270 6989 // vl128 state = 0x5c66baad 6990 __ dci(0x44fd7c13); // sqrdcmlah z19.s, z0.s, z13.s[1], #270 6991 // vl128 state = 0xac8c451b 6992 __ dci(0x44f97e11); // sqrdcmlah z17.s, z16.s, z9.s[1], #270 6993 // vl128 state = 0x02ebccdb 6994 __ dci(0x44e97615); // sqrdcmlah z21.s, z16.s, z9.s[0], #90 6995 // vl128 state = 0xe43b1032 6996 __ dci(0x44e97614); // sqrdcmlah z20.s, z16.s, z9.s[0], #90 6997 // vl128 state = 0xa28d9898 6998 __ dci(0x44e17635); // sqrdcmlah z21.s, z17.s, z1.s[0], #90 6999 // vl128 state = 0x021764c6 7000 __ dci(0x44e17634); // sqrdcmlah z20.s, z17.s, z1.s[0], #90 7001 // vl128 state = 0x812dbf22 7002 __ dci(0x44f07635); // sqrdcmlah z21.s, z17.s, z0.s[1], #90 7003 // vl128 state = 0x5e87a59e 7004 __ dci(0x44f07465); // sqrdcmlah z5.s, z3.s, z0.s[1], #90 7005 // vl128 state = 0xd1a78d9d 7006 __ dci(0x44f87675); // sqrdcmlah z21.s, z19.s, z8.s[1], #90 7007 // vl128 state = 0xd4500975 7008 __ dci(0x44b87e7d); // sqrdcmlah z29.h, z19.h, z0.h[3], #270 7009 // vl128 state = 0x765230ab 7010 __ dci(0x44b876f9); // sqrdcmlah z25.h, z23.h, z0.h[3], #90 7011 // vl128 state = 0xca9c5bb4 7012 __ dci(0x44f874fb); // sqrdcmlah z27.s, z7.s, z8.s[1], #90 7013 // vl128 state = 0xa4bc044a 7014 __ dci(0x44f070fa); // sqrdcmlah z26.s, z7.s, z0.s[1], #0 7015 // vl128 state = 0xd0eaa1df 7016 __ dci(0x44f07038); // sqrdcmlah z24.s, z1.s, z0.s[1], #0 7017 // vl128 state = 0x80836f9f 7018 __ dci(0x44b17030); // sqrdcmlah z16.h, z1.h, z1.h[2], #0 7019 // vl128 state = 0x59ffa1ce 7020 __ dci(0x44b17032); // sqrdcmlah z18.h, z1.h, z1.h[2], #0 7021 // vl128 state = 0xdb8beca5 7022 __ dci(0x44b07430); // sqrdcmlah z16.h, z1.h, z0.h[2], #90 7023 // vl128 state = 0xe5b6a0e3 7024 __ dci(0x44b07438); // sqrdcmlah z24.h, z1.h, z0.h[2], #90 7025 // vl128 state = 0x19cc8c20 7026 __ dci(0x44b0743a); // sqrdcmlah z26.h, z1.h, z0.h[2], #90 7027 // vl128 state = 0x19c819af 7028 } 7029 7030 uint32_t state; 7031 ComputeMachineStateHash(&masm, &state); 7032 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 7033 __ Ldr(w0, MemOperand(x0)); 7034 7035 END(); 7036 if (CAN_RUN()) { 7037 RUN(); 7038 uint32_t expected_hashes[] = { 7039 0x19c819af, 7040 0xbb2225f2, 7041 0x7e54f513, 7042 0xdcbf6f0f, 7043 0x2bfdc97d, 7044 0x48890c54, 7045 0x65542c02, 7046 0xaef6b224, 7047 0x993b14fd, 7048 0x244d27c5, 7049 0xe8767ba8, 7050 0x4397a148, 7051 0xb3efcd2e, 7052 0xb5894aba, 7053 0x2a0f6f7a, 7054 0xbe45142c, 7055 }; 7056 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 7057 } 7058} 7059 7060TEST_SVE(sve2_saturating_multiply_add_long_vector) { 7061 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 7062 CPUFeatures::kSVE2, 7063 CPUFeatures::kNEON, 7064 CPUFeatures::kCRC32); 7065 START(); 7066 7067 SetInitialMachineState(&masm); 7068 // state = 0xe2bd2480 7069 7070 { 7071 ExactAssemblyScope scope(&masm, 40 * kInstructionSize); 7072 __ dci(0x44db629b); // sqdmlalb z27.d, z20.s, z27.s 7073 // vl128 state = 0x61e408e4 7074 __ dci(0x44db631f); // sqdmlalb z31.d, z24.s, z27.s 7075 // vl128 state = 0xf146813f 7076 __ dci(0x44da6b1d); // sqdmlslb z29.d, z24.s, z26.s 7077 // vl128 state = 0xb8d07371 7078 __ dci(0x44da6a35); // sqdmlslb z21.d, z17.s, z26.s 7079 // vl128 state = 0xaf43cc88 7080 __ dci(0x444a6a3d); // sqdmlslb z29.h, z17.b, z10.b 7081 // vl128 state = 0xba4c5067 7082 __ dci(0x444a6a39); // sqdmlslb z25.h, z17.b, z10.b 7083 // vl128 state = 0x396202c3 7084 __ dci(0x445a6829); // sqdmlslb z9.h, z1.b, z26.b 7085 // vl128 state = 0x22095f7f 7086 __ dci(0x445a6b28); // sqdmlslb z8.h, z25.b, z26.b 7087 // vl128 state = 0xa9516b4b 7088 __ dci(0x44da6b69); // sqdmlslb z9.d, z27.s, z26.s 7089 // vl128 state = 0x1f048226 7090 __ dci(0x44da616d); // sqdmlalb z13.d, z11.s, z26.s 7091 // vl128 state = 0x0fdd982f 7092 __ dci(0x4458616f); // sqdmlalb z15.h, z11.b, z24.b 7093 // vl128 state = 0x461ba137 7094 __ dci(0x4449617f); // sqdmlalb z31.h, z11.b, z9.b 7095 // vl128 state = 0xd1071b0c 7096 __ dci(0x4459614f); // sqdmlalb z15.h, z10.b, z25.b 7097 // vl128 state = 0x0fa6bae7 7098 __ dci(0x4458654d); // sqdmlalt z13.h, z10.b, z24.b 7099 // vl128 state = 0xebd08a80 7100 __ dci(0x44586d05); // sqdmlslt z5.h, z8.b, z24.b 7101 // vl128 state = 0xd4c41665 7102 __ dci(0x44506d84); // sqdmlslt z4.h, z12.b, z16.b 7103 // vl128 state = 0x80f619f9 7104 __ dci(0x44506fc6); // sqdmlslt z6.h, z30.b, z16.b 7105 // vl128 state = 0xb588af21 7106 __ dci(0x44566fc4); // sqdmlslt z4.h, z30.b, z22.b 7107 // vl128 state = 0x4dd8437a 7108 __ dci(0x44566f0c); // sqdmlslt z12.h, z24.b, z22.b 7109 // vl128 state = 0x48ca6e5c 7110 __ dci(0x44566f0e); // sqdmlslt z14.h, z24.b, z22.b 7111 // vl128 state = 0x02d6f977 7112 __ dci(0x44566746); // sqdmlalt z6.h, z26.b, z22.b 7113 // vl128 state = 0x179f59f4 7114 __ dci(0x445767c4); // sqdmlalt z4.h, z30.b, z23.b 7115 // vl128 state = 0xf2d2823c 7116 __ dci(0x44d667c0); // sqdmlalt z0.d, z30.s, z22.s 7117 // vl128 state = 0x404c277e 7118 __ dci(0x44566742); // sqdmlalt z2.h, z26.b, z22.b 7119 // vl128 state = 0x986a72c1 7120 __ dci(0x44c6674a); // sqdmlalt z10.d, z26.s, z6.s 7121 // vl128 state = 0xbb8044ab 7122 __ dci(0x44c66742); // sqdmlalt z2.d, z26.s, z6.s 7123 // vl128 state = 0x9f5b244b 7124 __ dci(0x44ce6706); // sqdmlalt z6.d, z24.s, z14.s 7125 // vl128 state = 0xc6ce6266 7126 __ dci(0x44ce670e); // sqdmlalt z14.d, z24.s, z14.s 7127 // vl128 state = 0xc9e1a461 7128 __ dci(0x44de6746); // sqdmlalt z6.d, z26.s, z30.s 7129 // vl128 state = 0x9f133504 7130 __ dci(0x44dc6342); // sqdmlalb z2.d, z26.s, z28.s 7131 // vl128 state = 0x42deb468 7132 __ dci(0x44d46366); // sqdmlalb z6.d, z27.s, z20.s 7133 // vl128 state = 0xb3436cd4 7134 __ dci(0x44d5626e); // sqdmlalb z14.d, z19.s, z21.s 7135 // vl128 state = 0x0e0533ac 7136 __ dci(0x44d5646f); // sqdmlalt z15.d, z3.s, z21.s 7137 // vl128 state = 0x92d04e7b 7138 __ dci(0x44d36467); // sqdmlalt z7.d, z3.s, z19.s 7139 // vl128 state = 0xd9fa8b4d 7140 __ dci(0x44d360ef); // sqdmlalb z15.d, z7.s, z19.s 7141 // vl128 state = 0x9c9a5778 7142 __ dci(0x44d3646b); // sqdmlalt z11.d, z3.s, z19.s 7143 // vl128 state = 0x40d7c923 7144 __ dci(0x4492646f); // sqdmlalt z15.s, z3.h, z18.h 7145 // vl128 state = 0x0b5b2334 7146 __ dci(0x4492647f); // sqdmlalt z31.s, z3.h, z18.h 7147 // vl128 state = 0xfe6302c1 7148 __ dci(0x4494647d); // sqdmlalt z29.s, z3.h, z20.h 7149 // vl128 state = 0xe3c05a37 7150 __ dci(0x4484666d); // sqdmlalt z13.s, z19.h, z4.h 7151 // vl128 state = 0x15169e94 7152 } 7153 7154 uint32_t state; 7155 ComputeMachineStateHash(&masm, &state); 7156 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 7157 __ Ldr(w0, MemOperand(x0)); 7158 7159 END(); 7160 if (CAN_RUN()) { 7161 RUN(); 7162 uint32_t expected_hashes[] = { 7163 0x15169e94, 7164 0x6101102c, 7165 0xa5586d26, 7166 0x3fbf4f9f, 7167 0x8e62994d, 7168 0x4d77a9e5, 7169 0x4ceadc9e, 7170 0x8247db61, 7171 0x4aa10859, 7172 0x0b3280b3, 7173 0x015d75ea, 7174 0x1cf4825e, 7175 0xda7d3fea, 7176 0xc24bd624, 7177 0x60ee565a, 7178 0x7ac92c39, 7179 }; 7180 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 7181 } 7182} 7183 7184TEST_SVE(sve2_saturating_multiply_add_interleaved_long) { 7185 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 7186 CPUFeatures::kSVE2, 7187 CPUFeatures::kNEON, 7188 CPUFeatures::kCRC32); 7189 START(); 7190 7191 SetInitialMachineState(&masm); 7192 // state = 0xe2bd2480 7193 7194 { 7195 ExactAssemblyScope scope(&masm, 30 * kInstructionSize); 7196 __ dci(0x449e0ac6); // sqdmlalbt z6.s, z22.h, z30.h 7197 // vl128 state = 0x1f0ef37c 7198 __ dci(0x449c0ae4); // sqdmlalbt z4.s, z23.h, z28.h 7199 // vl128 state = 0xa80bf2c8 7200 __ dci(0x449c0ae6); // sqdmlalbt z6.s, z23.h, z28.h 7201 // vl128 state = 0x4c5b0e8f 7202 __ dci(0x449e0aae); // sqdmlalbt z14.s, z21.h, z30.h 7203 // vl128 state = 0xa6482041 7204 __ dci(0x449e0aaf); // sqdmlalbt z15.s, z21.h, z30.h 7205 // vl128 state = 0x6ef82b7a 7206 __ dci(0x449c0a2b); // sqdmlalbt z11.s, z17.h, z28.h 7207 // vl128 state = 0x0070a7fa 7208 __ dci(0x449e0829); // sqdmlalbt z9.s, z1.h, z30.h 7209 // vl128 state = 0x08b9efc6 7210 __ dci(0x449e0c61); // sqdmlslbt z1.s, z3.h, z30.h 7211 // vl128 state = 0xebd25c16 7212 __ dci(0x449e0c60); // sqdmlslbt z0.s, z3.h, z30.h 7213 // vl128 state = 0x0926abbe 7214 __ dci(0x449e0c70); // sqdmlslbt z16.s, z3.h, z30.h 7215 // vl128 state = 0xe9d3e5a7 7216 __ dci(0x449f0cf4); // sqdmlslbt z20.s, z7.h, z31.h 7217 // vl128 state = 0xf062523d 7218 __ dci(0x449f08b5); // sqdmlalbt z21.s, z5.h, z31.h 7219 // vl128 state = 0x6034c14e 7220 __ dci(0x449f08a5); // sqdmlalbt z5.s, z5.h, z31.h 7221 // vl128 state = 0x0a73c74b 7222 __ dci(0x448e08b5); // sqdmlalbt z21.s, z5.h, z14.h 7223 // vl128 state = 0xa4af2700 7224 __ dci(0x448c08e5); // sqdmlalbt z5.s, z7.h, z12.h 7225 // vl128 state = 0x7499c587 7226 __ dci(0x448c08e1); // sqdmlalbt z1.s, z7.h, z12.h 7227 // vl128 state = 0x968bca0e 7228 __ dci(0x448c0971); // sqdmlalbt z17.s, z11.h, z12.h 7229 // vl128 state = 0xd7890449 7230 __ dci(0x448f0975); // sqdmlalbt z21.s, z11.h, z15.h 7231 // vl128 state = 0xa2393863 7232 __ dci(0x448f0977); // sqdmlalbt z23.s, z11.h, z15.h 7233 // vl128 state = 0x0f7d9688 7234 __ dci(0x449f093f); // sqdmlalbt z31.s, z9.h, z31.h 7235 // vl128 state = 0xeb16ca99 7236 __ dci(0x449f09f7); // sqdmlalbt z23.s, z15.h, z31.h 7237 // vl128 state = 0x5eca8b00 7238 __ dci(0x449f0987); // sqdmlalbt z7.s, z12.h, z31.h 7239 // vl128 state = 0xf8f22744 7240 __ dci(0x449f0a83); // sqdmlalbt z3.s, z20.h, z31.h 7241 // vl128 state = 0xc20d54f5 7242 __ dci(0x449b0ac1); // sqdmlalbt z1.s, z22.h, z27.h 7243 // vl128 state = 0xf371a13b 7244 __ dci(0x449b0aa9); // sqdmlalbt z9.s, z21.h, z27.h 7245 // vl128 state = 0xffae55ce 7246 __ dci(0x449b0ab9); // sqdmlalbt z25.s, z21.h, z27.h 7247 // vl128 state = 0x0c5ab866 7248 __ dci(0x44d30aa9); // sqdmlalbt z9.d, z21.s, z19.s 7249 // vl128 state = 0x388bfe27 7250 __ dci(0x44d30aab); // sqdmlalbt z11.d, z21.s, z19.s 7251 // vl128 state = 0x6dc15ec8 7252 __ dci(0x44d70baf); // sqdmlalbt z15.d, z29.s, z23.s 7253 // vl128 state = 0x6a858021 7254 __ dci(0x44d70ba7); // sqdmlalbt z7.d, z29.s, z23.s 7255 // vl128 state = 0x52416517 7256 } 7257 7258 uint32_t state; 7259 ComputeMachineStateHash(&masm, &state); 7260 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 7261 __ Ldr(w0, MemOperand(x0)); 7262 7263 END(); 7264 if (CAN_RUN()) { 7265 RUN(); 7266 uint32_t expected_hashes[] = { 7267 0x52416517, 7268 0x1a625e10, 7269 0x3eaaa30f, 7270 0x0eefe820, 7271 0x9e2f7744, 7272 0x3dbc3206, 7273 0xca85b926, 7274 0x9428c809, 7275 0x7c35818c, 7276 0xb8bc3648, 7277 0x5b215c50, 7278 0xbdb56ba5, 7279 0xe4e4bc54, 7280 0x69ba132f, 7281 0xa498b17a, 7282 0xf482b2a6, 7283 }; 7284 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 7285 } 7286} 7287 7288TEST_SVE(sve2_saturating_multiply_add_long_indexed) { 7289 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 7290 CPUFeatures::kSVE2, 7291 CPUFeatures::kNEON, 7292 CPUFeatures::kCRC32); 7293 START(); 7294 7295 SetInitialMachineState(&masm); 7296 // state = 0xe2bd2480 7297 7298 { 7299 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 7300 __ dci(0x44f52e3d); // sqdmlalt z29.d, z17.s, z5.s[3] 7301 // vl128 state = 0x2a284ede 7302 __ dci(0x44f52e3c); // sqdmlalt z28.d, z17.s, z5.s[3] 7303 // vl128 state = 0x48a615e9 7304 __ dci(0x44f72c3d); // sqdmlalt z29.d, z1.s, z7.s[3] 7305 // vl128 state = 0x1bbe9cc5 7306 __ dci(0x44b62c35); // sqdmlalt z21.s, z1.h, z6.h[5] 7307 // vl128 state = 0x99966225 7308 __ dci(0x44b624b7); // sqdmlalt z23.s, z5.h, z6.h[4] 7309 // vl128 state = 0x36da4a3a 7310 __ dci(0x44f626b6); // sqdmlalt z22.d, z21.s, z6.s[2] 7311 // vl128 state = 0xc009e514 7312 __ dci(0x44f62226); // sqdmlalb z6.d, z17.s, z6.s[2] 7313 // vl128 state = 0x2140ee4b 7314 __ dci(0x44fa222e); // sqdmlalb z14.d, z17.s, z10.s[2] 7315 // vl128 state = 0xf78c8bec 7316 __ dci(0x44fa2aac); // sqdmlalb z12.d, z21.s, z10.s[3] 7317 // vl128 state = 0x329238c6 7318 __ dci(0x44fa2abc); // sqdmlalb z28.d, z21.s, z10.s[3] 7319 // vl128 state = 0xadc9f9db 7320 __ dci(0x44fa2aac); // sqdmlalb z12.d, z21.s, z10.s[3] 7321 // vl128 state = 0x877f64cf 7322 __ dci(0x44ba2a88); // sqdmlalb z8.s, z20.h, z2.h[7] 7323 // vl128 state = 0x4e4a3117 7324 __ dci(0x44fb2a89); // sqdmlalb z9.d, z20.s, z11.s[3] 7325 // vl128 state = 0xe26b041b 7326 __ dci(0x44f32ab9); // sqdmlalb z25.d, z21.s, z3.s[3] 7327 // vl128 state = 0xbcf4e0b2 7328 __ dci(0x44e328bd); // sqdmlalb z29.d, z5.s, z3.s[1] 7329 // vl128 state = 0x31391cc2 7330 __ dci(0x44f228ad); // sqdmlalb z13.d, z5.s, z2.s[3] 7331 // vl128 state = 0xf4c6c098 7332 __ dci(0x44e238af); // sqdmlslb z15.d, z5.s, z2.s[1] 7333 // vl128 state = 0x6e7cb20c 7334 __ dci(0x44e639ad); // sqdmlslb z13.d, z13.s, z6.s[1] 7335 // vl128 state = 0xed16e292 7336 __ dci(0x44a63daf); // sqdmlslt z15.s, z13.h, z6.h[1] 7337 // vl128 state = 0x7c0c3a9a 7338 __ dci(0x44ae3cbf); // sqdmlslt z31.s, z5.h, z6.h[3] 7339 // vl128 state = 0x0e2dce8d 7340 __ dci(0x44a634b7); // sqdmlslt z23.s, z5.h, z6.h[0] 7341 // vl128 state = 0xf3eeab27 7342 __ dci(0x44e234b5); // sqdmlslt z21.d, z5.s, z2.s[0] 7343 // vl128 state = 0x55193209 7344 __ dci(0x44a23437); // sqdmlslt z23.s, z1.h, z2.h[0] 7345 // vl128 state = 0x7652b538 7346 __ dci(0x44a63535); // sqdmlslt z21.s, z9.h, z6.h[0] 7347 // vl128 state = 0x76046ab4 7348 __ dci(0x44a235b4); // sqdmlslt z20.s, z13.h, z2.h[0] 7349 // vl128 state = 0x2f23fd0d 7350 __ dci(0x44a234e4); // sqdmlslt z4.s, z7.h, z2.h[0] 7351 // vl128 state = 0x2a50774c 7352 __ dci(0x44a234ec); // sqdmlslt z12.s, z7.h, z2.h[0] 7353 // vl128 state = 0x01ea8843 7354 __ dci(0x44a324e8); // sqdmlalt z8.s, z7.h, z3.h[0] 7355 // vl128 state = 0xed54a157 7356 __ dci(0x44a334c9); // sqdmlslt z9.s, z6.h, z3.h[0] 7357 // vl128 state = 0x39e0227b 7358 __ dci(0x44a324f9); // sqdmlalt z25.s, z7.h, z3.h[0] 7359 // vl128 state = 0xf163fa0b 7360 __ dci(0x44a224d8); // sqdmlalt z24.s, z6.h, z2.h[0] 7361 // vl128 state = 0xbb4e0d24 7362 __ dci(0x44b22448); // sqdmlalt z8.s, z2.h, z2.h[4] 7363 // vl128 state = 0x26c102cc 7364 __ dci(0x44f224d8); // sqdmlalt z24.d, z6.s, z2.s[2] 7365 // vl128 state = 0x40f79dde 7366 __ dci(0x44f220f9); // sqdmlalb z25.d, z7.s, z2.s[2] 7367 // vl128 state = 0xf9d62034 7368 __ dci(0x44f020a9); // sqdmlalb z9.d, z5.s, z0.s[2] 7369 // vl128 state = 0x2b78be2f 7370 __ dci(0x44f424ad); // sqdmlalt z13.d, z5.s, z4.s[2] 7371 // vl128 state = 0xf0701e23 7372 __ dci(0x44f430a5); // sqdmlslb z5.d, z5.s, z4.s[2] 7373 // vl128 state = 0x992b12d6 7374 __ dci(0x44f130a4); // sqdmlslb z4.d, z5.s, z1.s[2] 7375 // vl128 state = 0x50292759 7376 __ dci(0x44f130ac); // sqdmlslb z12.d, z5.s, z1.s[2] 7377 // vl128 state = 0x795462f2 7378 __ dci(0x44f3302d); // sqdmlslb z13.d, z1.s, z3.s[2] 7379 // vl128 state = 0x8ac29815 7380 __ dci(0x44e3300c); // sqdmlslb z12.d, z0.s, z3.s[0] 7381 // vl128 state = 0x842471eb 7382 __ dci(0x44e3300d); // sqdmlslb z13.d, z0.s, z3.s[0] 7383 // vl128 state = 0x28762af1 7384 __ dci(0x44eb321d); // sqdmlslb z29.d, z16.s, z11.s[0] 7385 // vl128 state = 0x352de071 7386 __ dci(0x44ef3259); // sqdmlslb z25.d, z18.s, z15.s[0] 7387 // vl128 state = 0x90a4cf15 7388 __ dci(0x44ff3349); // sqdmlslb z9.d, z26.s, z15.s[2] 7389 // vl128 state = 0x6be7e76a 7390 __ dci(0x44fb3319); // sqdmlslb z25.d, z24.s, z11.s[2] 7391 // vl128 state = 0x7023e2de 7392 __ dci(0x44bb3b18); // sqdmlslb z24.s, z24.h, z3.h[7] 7393 // vl128 state = 0xad48664c 7394 __ dci(0x44bb3b19); // sqdmlslb z25.s, z24.h, z3.h[7] 7395 // vl128 state = 0xc7d8239b 7396 __ dci(0x44bb3b11); // sqdmlslb z17.s, z24.h, z3.h[7] 7397 // vl128 state = 0x0d9b2b9b 7398 __ dci(0x44f33b15); // sqdmlslb z21.d, z24.s, z3.s[3] 7399 // vl128 state = 0xbdb9c559 7400 } 7401 7402 uint32_t state; 7403 ComputeMachineStateHash(&masm, &state); 7404 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 7405 __ Ldr(w0, MemOperand(x0)); 7406 7407 END(); 7408 if (CAN_RUN()) { 7409 RUN(); 7410 uint32_t expected_hashes[] = { 7411 0xbdb9c559, 7412 0x0c2f83d5, 7413 0x3e1f2607, 7414 0x2db954ea, 7415 0xff33857d, 7416 0xd567c205, 7417 0x8b5ced4c, 7418 0x19ecc4d9, 7419 0x8581949e, 7420 0x30f1a921, 7421 0x8c94071b, 7422 0xb9ad4919, 7423 0x32dbb108, 7424 0x634f9cd4, 7425 0x2a122429, 7426 0xdae127f1, 7427 }; 7428 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 7429 } 7430} 7431 7432TEST_SVE(sve2_floating_multiply_add_long_vector) { 7433 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 7434 CPUFeatures::kSVE2, 7435 CPUFeatures::kNEON, 7436 CPUFeatures::kCRC32); 7437 START(); 7438 7439 SetInitialMachineState(&masm, kFpInputSet); 7440 // state = 0x1e5cbcac 7441 7442 { 7443 ExactAssemblyScope scope(&masm, 30 * kInstructionSize); 7444 __ dci(0x64bfa635); // fmlslt z21.s, z17.h, z31.h 7445 // vl128 state = 0x48383595 7446 __ dci(0x64bf867d); // fmlalt z29.s, z19.h, z31.h 7447 // vl128 state = 0xf2812c0e 7448 __ dci(0x64af877c); // fmlalt z28.s, z27.h, z15.h 7449 // vl128 state = 0x161daf06 7450 __ dci(0x64af8774); // fmlalt z20.s, z27.h, z15.h 7451 // vl128 state = 0x8146f2bf 7452 __ dci(0x64be877c); // fmlalt z28.s, z27.h, z30.h 7453 // vl128 state = 0x90bcd864 7454 __ dci(0x64bd876c); // fmlalt z12.s, z27.h, z29.h 7455 // vl128 state = 0x22b60b78 7456 __ dci(0x64bf8728); // fmlalt z8.s, z25.h, z31.h 7457 // vl128 state = 0x2c9ce51a 7458 __ dci(0x64bf836a); // fmlalb z10.s, z27.h, z31.h 7459 // vl128 state = 0x40e6b398 7460 __ dci(0x64bf87eb); // fmlalt z11.s, z31.h, z31.h 7461 // vl128 state = 0x479c4a98 7462 __ dci(0x64bf87e9); // fmlalt z9.s, z31.h, z31.h 7463 // vl128 state = 0x25c987ad 7464 __ dci(0x64b78779); // fmlalt z25.s, z27.h, z23.h 7465 // vl128 state = 0xb4fbc429 7466 __ dci(0x64b1877b); // fmlalt z27.s, z27.h, z17.h 7467 // vl128 state = 0x390616d8 7468 __ dci(0x64b1871f); // fmlalt z31.s, z24.h, z17.h 7469 // vl128 state = 0x7f24d2bf 7470 __ dci(0x64b5878f); // fmlalt z15.s, z28.h, z21.h 7471 // vl128 state = 0x01a90318 7472 __ dci(0x64b4870d); // fmlalt z13.s, z24.h, z20.h 7473 // vl128 state = 0x08789c2c 7474 __ dci(0x64b48709); // fmlalt z9.s, z24.h, z20.h 7475 // vl128 state = 0x169f9b57 7476 __ dci(0x64b48779); // fmlalt z25.s, z27.h, z20.h 7477 // vl128 state = 0xad4f23d7 7478 __ dci(0x64bc8671); // fmlalt z17.s, z19.h, z28.h 7479 // vl128 state = 0xf86b0a64 7480 __ dci(0x64b98673); // fmlalt z19.s, z19.h, z25.h 7481 // vl128 state = 0x78a848b2 7482 __ dci(0x64b18623); // fmlalt z3.s, z17.h, z17.h 7483 // vl128 state = 0xcac211c9 7484 __ dci(0x64b18642); // fmlalt z2.s, z18.h, z17.h 7485 // vl128 state = 0x9afcbe3f 7486 __ dci(0x64b1a6c0); // fmlslt z0.s, z22.h, z17.h 7487 // vl128 state = 0x0047e4b2 7488 __ dci(0x64b086c4); // fmlalt z4.s, z22.h, z16.h 7489 // vl128 state = 0x203324b5 7490 __ dci(0x64b28645); // fmlalt z5.s, z18.h, z18.h 7491 // vl128 state = 0x7340c432 7492 __ dci(0x64b28264); // fmlalb z4.s, z19.h, z18.h 7493 // vl128 state = 0x6dc657a9 7494 __ dci(0x64b28765); // fmlalt z5.s, z27.h, z18.h 7495 // vl128 state = 0xa5d3889b 7496 __ dci(0x64ba8561); // fmlalt z1.s, z11.h, z26.h 7497 // vl128 state = 0x5bbd2dd9 7498 __ dci(0x64aa8543); // fmlalt z3.s, z10.h, z10.h 7499 // vl128 state = 0xa65ec305 7500 __ dci(0x64ae8141); // fmlalb z1.s, z10.h, z14.h 7501 // vl128 state = 0xd23d588c 7502 __ dci(0x64ae80c3); // fmlalb z3.s, z6.h, z14.h 7503 // vl128 state = 0x5a082bbc 7504 } 7505 7506 uint32_t state; 7507 ComputeMachineStateHash(&masm, &state); 7508 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 7509 __ Ldr(w0, MemOperand(x0)); 7510 7511 END(); 7512 if (CAN_RUN()) { 7513 RUN(); 7514 uint32_t expected_hashes[] = { 7515 0x5a082bbc, 7516 0x23c41852, 7517 0xf462f328, 7518 0x6fa4d12b, 7519 0x5e5f3e79, 7520 0x9939c7e6, 7521 0x0ed39313, 7522 0x2911107c, 7523 0x18f77b9a, 7524 0x7226d5b3, 7525 0x05df3c07, 7526 0x1653749c, 7527 0xcb4f6acf, 7528 0x4c5f0755, 7529 0xc4eed654, 7530 0x47893eeb, 7531 }; 7532 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 7533 } 7534} 7535 7536TEST_SVE(sve2_mla_long_index) { 7537 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 7538 CPUFeatures::kSVE2, 7539 CPUFeatures::kNEON, 7540 CPUFeatures::kCRC32); 7541 START(); 7542 7543 SetInitialMachineState(&masm); 7544 // state = 0xe2bd2480 7545 7546 { 7547 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 7548 __ dci(0x44ea8d67); // smlalt z7.d, z11.s, z10.s[1] 7549 // vl128 state = 0xd08dbe24 7550 __ dci(0x44ea9d2f); // umlalt z15.d, z9.s, z10.s[1] 7551 // vl128 state = 0x56f6f237 7552 __ dci(0x44ea9d2d); // umlalt z13.d, z9.s, z10.s[1] 7553 // vl128 state = 0x00f89e4d 7554 __ dci(0x44eb992f); // umlalb z15.d, z9.s, z11.s[1] 7555 // vl128 state = 0xca4e469e 7556 __ dci(0x44ab99ae); // umlalb z14.s, z13.h, z3.h[3] 7557 // vl128 state = 0xd4b18276 7558 __ dci(0x44ad99be); // umlalb z30.s, z13.h, z5.h[3] 7559 // vl128 state = 0x8650a79e 7560 __ dci(0x44ad99ba); // umlalb z26.s, z13.h, z5.h[3] 7561 // vl128 state = 0x6fa1a501 7562 __ dci(0x44adb9f2); // umlslb z18.s, z15.h, z5.h[3] 7563 // vl128 state = 0x1a56a5d4 7564 __ dci(0x44bda9f3); // smlslb z19.s, z15.h, z5.h[7] 7565 // vl128 state = 0xfdb18057 7566 __ dci(0x44b9a1fb); // smlslb z27.s, z15.h, z1.h[6] 7567 // vl128 state = 0xb46b6c28 7568 __ dci(0x44b8a1b3); // smlslb z19.s, z13.h, z0.h[6] 7569 // vl128 state = 0x623c62c3 7570 __ dci(0x44bc81b1); // smlalb z17.s, z13.h, z4.h[6] 7571 // vl128 state = 0x2abab4d3 7572 __ dci(0x44bc82b0); // smlalb z16.s, z21.h, z4.h[6] 7573 // vl128 state = 0x7a028731 7574 __ dci(0x44ac92b8); // umlalb z24.s, z21.h, z4.h[2] 7575 // vl128 state = 0xf48f6936 7576 __ dci(0x44a4923a); // umlalb z26.s, z17.h, z4.h[0] 7577 // vl128 state = 0xbcdf888d 7578 __ dci(0x44b49a3e); // umlalb z30.s, z17.h, z4.h[5] 7579 // vl128 state = 0x5060778e 7580 __ dci(0x44b69a1c); // umlalb z28.s, z16.h, z6.h[5] 7581 // vl128 state = 0x16da3835 7582 __ dci(0x44b6b218); // umlslb z24.s, z16.h, z6.h[4] 7583 // vl128 state = 0xac7fb4d0 7584 __ dci(0x44b2b25a); // umlslb z26.s, z18.h, z2.h[4] 7585 // vl128 state = 0x8d05433b 7586 __ dci(0x44b2ba0a); // umlslb z10.s, z16.h, z2.h[5] 7587 // vl128 state = 0x62630101 7588 __ dci(0x44b29b08); // umlalb z8.s, z24.h, z2.h[5] 7589 // vl128 state = 0x31ae445b 7590 __ dci(0x44b29b00); // umlalb z0.s, z24.h, z2.h[5] 7591 // vl128 state = 0x539a5875 7592 __ dci(0x44b29e08); // umlalt z8.s, z16.h, z2.h[5] 7593 // vl128 state = 0x07d4bf73 7594 __ dci(0x44b29eaa); // umlalt z10.s, z21.h, z2.h[5] 7595 // vl128 state = 0x314f48a8 7596 __ dci(0x44b2be2e); // umlslt z14.s, z17.h, z2.h[5] 7597 // vl128 state = 0x91bd2c17 7598 __ dci(0x44b2be3e); // umlslt z30.s, z17.h, z2.h[5] 7599 // vl128 state = 0x4cbf4360 7600 __ dci(0x44f2be7a); // umlslt z26.d, z19.s, z2.s[3] 7601 // vl128 state = 0xe94e76a9 7602 __ dci(0x44f2ae4a); // smlslt z10.d, z18.s, z2.s[3] 7603 // vl128 state = 0xd0c2c4cc 7604 __ dci(0x44faae6e); // smlslt z14.d, z19.s, z10.s[3] 7605 // vl128 state = 0xc64d6839 7606 __ dci(0x44faae6f); // smlslt z15.d, z19.s, z10.s[3] 7607 // vl128 state = 0xa74358aa 7608 __ dci(0x44faae67); // smlslt z7.d, z19.s, z10.s[3] 7609 // vl128 state = 0xb8d9664b 7610 __ dci(0x44fa8e57); // smlalt z23.d, z18.s, z10.s[3] 7611 // vl128 state = 0xf1032ab4 7612 __ dci(0x44fa8c67); // smlalt z7.d, z3.s, z10.s[3] 7613 // vl128 state = 0x763732f4 7614 __ dci(0x44eaac66); // smlslt z6.d, z3.s, z10.s[1] 7615 // vl128 state = 0xdcf39367 7616 __ dci(0x44eaa456); // smlslt z22.d, z2.s, z10.s[0] 7617 // vl128 state = 0x5ea67d82 7618 __ dci(0x44aea45e); // smlslt z30.s, z2.h, z6.h[2] 7619 // vl128 state = 0x55da0908 7620 __ dci(0x44aaa64e); // smlslt z14.s, z18.h, z2.h[2] 7621 // vl128 state = 0x69d105f5 7622 __ dci(0x44baa75e); // smlslt z30.s, z26.h, z2.h[6] 7623 // vl128 state = 0x191bc065 7624 __ dci(0x44baa75a); // smlslt z26.s, z26.h, z2.h[6] 7625 // vl128 state = 0xbf62d2a0 7626 __ dci(0x44eaa75b); // smlslt z27.d, z26.s, z10.s[0] 7627 // vl128 state = 0x43803a21 7628 __ dci(0x44eabf5f); // umlslt z31.d, z26.s, z10.s[1] 7629 // vl128 state = 0x0b33725c 7630 __ dci(0x44ebbd57); // umlslt z23.d, z10.s, z11.s[1] 7631 // vl128 state = 0x0059a0f5 7632 __ dci(0x44abbf55); // umlslt z21.s, z26.h, z3.h[3] 7633 // vl128 state = 0xb587057f 7634 __ dci(0x44abab5d); // smlslb z29.s, z26.h, z3.h[3] 7635 // vl128 state = 0x0bfa30c6 7636 __ dci(0x44abab5c); // smlslb z28.s, z26.h, z3.h[3] 7637 // vl128 state = 0x151045b4 7638 __ dci(0x44abaf78); // smlslt z24.s, z27.h, z3.h[3] 7639 // vl128 state = 0xedb7fca9 7640 __ dci(0x44aaa77c); // smlslt z28.s, z27.h, z2.h[2] 7641 // vl128 state = 0xb68216f9 7642 __ dci(0x44aaa178); // smlslb z24.s, z11.h, z2.h[2] 7643 // vl128 state = 0x35447b11 7644 __ dci(0x44aa81fa); // smlalb z26.s, z15.h, z2.h[2] 7645 // vl128 state = 0xf532285f 7646 __ dci(0x44aa8198); // smlalb z24.s, z12.h, z2.h[2] 7647 // vl128 state = 0xd414889b 7648 } 7649 7650 uint32_t state; 7651 ComputeMachineStateHash(&masm, &state); 7652 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 7653 __ Ldr(w0, MemOperand(x0)); 7654 7655 END(); 7656 if (CAN_RUN()) { 7657 RUN(); 7658 uint32_t expected_hashes[] = { 7659 0xd414889b, 7660 0x79d8f659, 7661 0xe2c8f06b, 7662 0x91aadf3d, 7663 0xffb92c3e, 7664 0xc2d3138e, 7665 0xdd9f4396, 7666 0xce39a88e, 7667 0xfe68a5ca, 7668 0xdcb072b2, 7669 0x3756ede6, 7670 0x5c2eef22, 7671 0x01fd02a4, 7672 0xdd8d4890, 7673 0x87500dc9, 7674 0x8c895325, 7675 }; 7676 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 7677 } 7678} 7679 7680TEST_SVE(sve2_mul_long_index) { 7681 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 7682 CPUFeatures::kSVE2, 7683 CPUFeatures::kNEON, 7684 CPUFeatures::kCRC32); 7685 START(); 7686 7687 SetInitialMachineState(&masm); 7688 // state = 0xe2bd2480 7689 7690 { 7691 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 7692 __ dci(0x44f1d492); // umullt z18.d, z4.s, z1.s[2] 7693 // vl128 state = 0x4377a821 7694 __ dci(0x44fdd490); // umullt z16.d, z4.s, z13.s[2] 7695 // vl128 state = 0x5879cb00 7696 __ dci(0x44fdc080); // smullb z0.d, z4.s, z13.s[2] 7697 // vl128 state = 0xbe0f85f8 7698 __ dci(0x44fdc081); // smullb z1.d, z4.s, z13.s[2] 7699 // vl128 state = 0xa0eb0d63 7700 __ dci(0x44fcc000); // smullb z0.d, z0.s, z12.s[2] 7701 // vl128 state = 0xf023feb2 7702 __ dci(0x44ffc001); // smullb z1.d, z0.s, z15.s[2] 7703 // vl128 state = 0xcc0dcc10 7704 __ dci(0x44ffc0c9); // smullb z9.d, z6.s, z15.s[2] 7705 // vl128 state = 0x8e0d2525 7706 __ dci(0x44f7d0c8); // umullb z8.d, z6.s, z7.s[2] 7707 // vl128 state = 0xaf711253 7708 __ dci(0x44b7d080); // umullb z0.s, z4.h, z7.h[4] 7709 // vl128 state = 0x8cea3501 7710 __ dci(0x44f7d290); // umullb z16.d, z20.s, z7.s[2] 7711 // vl128 state = 0x09be9a84 7712 __ dci(0x44f6da92); // umullb z18.d, z20.s, z6.s[3] 7713 // vl128 state = 0x3906715f 7714 __ dci(0x44fed296); // umullb z22.d, z20.s, z14.s[2] 7715 // vl128 state = 0xf399bb76 7716 __ dci(0x44f6c292); // smullb z18.d, z20.s, z6.s[2] 7717 // vl128 state = 0x33ceff98 7718 __ dci(0x44e6c2a2); // smullb z2.d, z21.s, z6.s[0] 7719 // vl128 state = 0x00765739 7720 __ dci(0x44e6c323); // smullb z3.d, z25.s, z6.s[0] 7721 // vl128 state = 0x3dad5b1f 7722 __ dci(0x44e6c333); // smullb z19.d, z25.s, z6.s[0] 7723 // vl128 state = 0xc5b39601 7724 __ dci(0x44e7c377); // smullb z23.d, z27.s, z7.s[0] 7725 // vl128 state = 0x134b3d1f 7726 __ dci(0x44e7d3ff); // umullb z31.d, z31.s, z7.s[0] 7727 // vl128 state = 0xc4be3961 7728 __ dci(0x44e7d3fe); // umullb z30.d, z31.s, z7.s[0] 7729 // vl128 state = 0x195e406b 7730 __ dci(0x44e7c3da); // smullb z26.d, z30.s, z7.s[0] 7731 // vl128 state = 0xae2522f9 7732 __ dci(0x44e7c2fe); // smullb z30.d, z23.s, z7.s[0] 7733 // vl128 state = 0xed267bfb 7734 __ dci(0x44e3c3f6); // smullb z22.d, z31.s, z3.s[0] 7735 // vl128 state = 0x6f6eeec4 7736 __ dci(0x44f3c2f2); // smullb z18.d, z23.s, z3.s[2] 7737 // vl128 state = 0x1689afdf 7738 __ dci(0x44f3c2e2); // smullb z2.d, z23.s, z3.s[2] 7739 // vl128 state = 0x24999374 7740 __ dci(0x44f3c06a); // smullb z10.d, z3.s, z3.s[2] 7741 // vl128 state = 0x046126eb 7742 __ dci(0x44f3c06b); // smullb z11.d, z3.s, z3.s[2] 7743 // vl128 state = 0x6b39941f 7744 __ dci(0x44f3c449); // smullt z9.d, z2.s, z3.s[2] 7745 // vl128 state = 0xf161bcc6 7746 __ dci(0x44f3ccc8); // smullt z8.d, z6.s, z3.s[3] 7747 // vl128 state = 0xbdc67c89 7748 __ dci(0x44f9ccd8); // smullt z24.d, z6.s, z9.s[3] 7749 // vl128 state = 0xfed59871 7750 __ dci(0x44ffccdc); // smullt z28.d, z6.s, z15.s[3] 7751 // vl128 state = 0x72746ff6 7752 __ dci(0x44fecc58); // smullt z24.d, z2.s, z14.s[3] 7753 // vl128 state = 0xa15ee8f2 7754 __ dci(0x44bfcc48); // smullt z8.s, z2.h, z7.h[7] 7755 // vl128 state = 0x3dccd2d6 7756 __ dci(0x44b7c84a); // smullb z10.s, z2.h, z7.h[5] 7757 // vl128 state = 0x4537f0b2 7758 __ dci(0x44a5c84e); // smullb z14.s, z2.h, z5.h[1] 7759 // vl128 state = 0x60e30690 7760 __ dci(0x44adca46); // smullb z6.s, z18.h, z5.h[3] 7761 // vl128 state = 0xaef15cb5 7762 __ dci(0x44add847); // umullb z7.s, z2.h, z5.h[3] 7763 // vl128 state = 0xe7df553d 7764 __ dci(0x44bdd04f); // umullb z15.s, z2.h, z5.h[6] 7765 // vl128 state = 0xa713f809 7766 __ dci(0x44bdc007); // smullb z7.s, z0.h, z5.h[6] 7767 // vl128 state = 0x4907c6b7 7768 __ dci(0x44bdc005); // smullb z5.s, z0.h, z5.h[6] 7769 // vl128 state = 0x98a83fd0 7770 __ dci(0x44bdc0b5); // smullb z21.s, z5.h, z5.h[6] 7771 // vl128 state = 0x3e6cb588 7772 __ dci(0x44bcc094); // smullb z20.s, z4.h, z4.h[6] 7773 // vl128 state = 0x37e5a4ce 7774 __ dci(0x44bcc09c); // smullb z28.s, z4.h, z4.h[6] 7775 // vl128 state = 0x719de631 7776 __ dci(0x44acc88c); // smullb z12.s, z4.h, z4.h[3] 7777 // vl128 state = 0xf0f3dffe 7778 __ dci(0x44aac884); // smullb z4.s, z4.h, z2.h[3] 7779 // vl128 state = 0x61a714ff 7780 __ dci(0x44a8c8ac); // smullb z12.s, z5.h, z0.h[3] 7781 // vl128 state = 0xc47542ea 7782 __ dci(0x44a8cea4); // smullt z4.s, z21.h, z0.h[3] 7783 // vl128 state = 0x37865031 7784 __ dci(0x44a8daa5); // umullb z5.s, z21.h, z0.h[3] 7785 // vl128 state = 0x28cf4dc6 7786 __ dci(0x44b8dae4); // umullb z4.s, z23.h, z0.h[7] 7787 // vl128 state = 0x6fe181d0 7788 __ dci(0x44b9da6c); // umullb z12.s, z19.h, z1.h[7] 7789 // vl128 state = 0xde65c7e3 7790 __ dci(0x44b9da64); // umullb z4.s, z19.h, z1.h[7] 7791 // vl128 state = 0x040a7e45 7792 } 7793 7794 uint32_t state; 7795 ComputeMachineStateHash(&masm, &state); 7796 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 7797 __ Ldr(w0, MemOperand(x0)); 7798 7799 END(); 7800 if (CAN_RUN()) { 7801 RUN(); 7802 uint32_t expected_hashes[] = { 7803 0x040a7e45, 7804 0x48fc4c2b, 7805 0x9a1c67d1, 7806 0xcb88ffdd, 7807 0xcda205bc, 7808 0x7a47b6fb, 7809 0x68ae16c8, 7810 0x483353c9, 7811 0x91d91835, 7812 0x17a9ca4a, 7813 0x4f3d394f, 7814 0x5182776c, 7815 0xc03c1d3b, 7816 0xe52799db, 7817 0x1ddd328e, 7818 0xe33903de, 7819 }; 7820 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 7821 } 7822} 7823 7824TEST_SVE(sve2_sat_double_mul_high) { 7825 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 7826 CPUFeatures::kSVE2, 7827 CPUFeatures::kNEON, 7828 CPUFeatures::kCRC32); 7829 START(); 7830 7831 SetInitialMachineState(&masm); 7832 // state = 0xe2bd2480 7833 7834 { 7835 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 7836 __ dci(0x046c711a); // sqdmulh z26.h, z8.h, z12.h 7837 // vl128 state = 0xe962209c 7838 __ dci(0x047c7138); // sqdmulh z24.h, z9.h, z28.h 7839 // vl128 state = 0x06a43320 7840 __ dci(0x04fc7539); // sqrdmulh z25.d, z9.d, z28.d 7841 // vl128 state = 0x8ce1cad6 7842 __ dci(0x04fc7029); // sqdmulh z9.d, z1.d, z28.d 7843 // vl128 state = 0x6f3d1b22 7844 __ dci(0x04ac702d); // sqdmulh z13.s, z1.s, z12.s 7845 // vl128 state = 0x14b0451c 7846 __ dci(0x04a4742c); // sqrdmulh z12.s, z1.s, z4.s 7847 // vl128 state = 0x60206a6a 7848 __ dci(0x04a574ad); // sqrdmulh z13.s, z5.s, z5.s 7849 // vl128 state = 0x388a9786 7850 __ dci(0x04a574a9); // sqrdmulh z9.s, z5.s, z5.s 7851 // vl128 state = 0xee590c43 7852 __ dci(0x04e574e8); // sqrdmulh z8.d, z7.d, z5.d 7853 // vl128 state = 0x8d16295c 7854 __ dci(0x04e570ca); // sqdmulh z10.d, z6.d, z5.d 7855 // vl128 state = 0x2a5c234c 7856 __ dci(0x04e670cb); // sqdmulh z11.d, z6.d, z6.d 7857 // vl128 state = 0xfacc9e06 7858 __ dci(0x04f6708f); // sqdmulh z15.d, z4.d, z22.d 7859 // vl128 state = 0x2167ca56 7860 __ dci(0x04f67087); // sqdmulh z7.d, z4.d, z22.d 7861 // vl128 state = 0xc7d7af1d 7862 __ dci(0x04f77185); // sqdmulh z5.d, z12.d, z23.d 7863 // vl128 state = 0x15f82ac2 7864 __ dci(0x04f67104); // sqdmulh z4.d, z8.d, z22.d 7865 // vl128 state = 0xb2484707 7866 __ dci(0x04f6710c); // sqdmulh z12.d, z8.d, z22.d 7867 // vl128 state = 0x5a53b8e7 7868 __ dci(0x04f6708d); // sqdmulh z13.d, z4.d, z22.d 7869 // vl128 state = 0xa9affac2 7870 __ dci(0x04f67085); // sqdmulh z5.d, z4.d, z22.d 7871 // vl128 state = 0xa425052d 7872 __ dci(0x04fe7281); // sqdmulh z1.d, z20.d, z30.d 7873 // vl128 state = 0x1c0f565c 7874 __ dci(0x04ee72d1); // sqdmulh z17.d, z22.d, z14.d 7875 // vl128 state = 0xff12c401 7876 __ dci(0x04ee7393); // sqdmulh z19.d, z28.d, z14.d 7877 // vl128 state = 0xcd1d9d3a 7878 __ dci(0x04ec73b2); // sqdmulh z18.d, z29.d, z12.d 7879 // vl128 state = 0x2aa94767 7880 __ dci(0x04ee73fa); // sqdmulh z26.d, z31.d, z14.d 7881 // vl128 state = 0x5ca68e9c 7882 __ dci(0x04ef77ea); // sqrdmulh z10.d, z31.d, z15.d 7883 // vl128 state = 0xe5b65473 7884 __ dci(0x04ff76e8); // sqrdmulh z8.d, z23.d, z31.d 7885 // vl128 state = 0xcc4e8803 7886 __ dci(0x04fd76c9); // sqrdmulh z9.d, z22.d, z29.d 7887 // vl128 state = 0x19fff884 7888 __ dci(0x04fd73d9); // sqdmulh z25.d, z30.d, z29.d 7889 // vl128 state = 0xb99d6147 7890 __ dci(0x04e973dd); // sqdmulh z29.d, z30.d, z9.d 7891 // vl128 state = 0xe8f11301 7892 __ dci(0x04b973dc); // sqdmulh z28.s, z30.s, z25.s 7893 // vl128 state = 0x24af5ffe 7894 __ dci(0x04b177dd); // sqrdmulh z29.s, z30.s, z17.s 7895 // vl128 state = 0x5c32a08e 7896 __ dci(0x04b177bc); // sqrdmulh z28.s, z29.s, z17.s 7897 // vl128 state = 0x12c8c1c4 7898 __ dci(0x04f377ac); // sqrdmulh z12.d, z29.d, z19.d 7899 // vl128 state = 0x7bc1f2e6 7900 __ dci(0x04f677ad); // sqrdmulh z13.d, z29.d, z22.d 7901 // vl128 state = 0x67d2640f 7902 __ dci(0x04fe76af); // sqrdmulh z15.d, z21.d, z30.d 7903 // vl128 state = 0x98035fbd 7904 __ dci(0x04ef76ae); // sqrdmulh z14.d, z21.d, z15.d 7905 // vl128 state = 0x5e561fd3 7906 __ dci(0x04ee72ac); // sqdmulh z12.d, z21.d, z14.d 7907 // vl128 state = 0xb56c3914 7908 __ dci(0x04ae72ee); // sqdmulh z14.s, z23.s, z14.s 7909 // vl128 state = 0x6bb1c4b1 7910 __ dci(0x04be7266); // sqdmulh z6.s, z19.s, z30.s 7911 // vl128 state = 0x5a5bdda6 7912 __ dci(0x04b67364); // sqdmulh z4.s, z27.s, z22.s 7913 // vl128 state = 0x09a447ea 7914 __ dci(0x04b27165); // sqdmulh z5.s, z11.s, z18.s 7915 // vl128 state = 0xee84be35 7916 __ dci(0x04b27175); // sqdmulh z21.s, z11.s, z18.s 7917 // vl128 state = 0x84146d85 7918 __ dci(0x04ba7137); // sqdmulh z23.s, z9.s, z26.s 7919 // vl128 state = 0x92c2e5f6 7920 __ dci(0x04b3713f); // sqdmulh z31.s, z9.s, z19.s 7921 // vl128 state = 0xe3836fb8 7922 __ dci(0x04b37017); // sqdmulh z23.s, z0.s, z19.s 7923 // vl128 state = 0xb5225206 7924 __ dci(0x04b37615); // sqrdmulh z21.s, z16.s, z19.s 7925 // vl128 state = 0x157484c7 7926 __ dci(0x04b37491); // sqrdmulh z17.s, z4.s, z19.s 7927 // vl128 state = 0x586c4bbf 7928 __ dci(0x04b37481); // sqrdmulh z1.s, z4.s, z19.s 7929 // vl128 state = 0xf5dc07cb 7930 __ dci(0x04b37489); // sqrdmulh z9.s, z4.s, z19.s 7931 // vl128 state = 0x591875a8 7932 __ dci(0x04b5748d); // sqrdmulh z13.s, z4.s, z21.s 7933 // vl128 state = 0xb01f8fd5 7934 __ dci(0x043d748f); // sqrdmulh z15.b, z4.b, z29.b 7935 // vl128 state = 0xd466a58c 7936 } 7937 7938 uint32_t state; 7939 ComputeMachineStateHash(&masm, &state); 7940 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 7941 __ Ldr(w0, MemOperand(x0)); 7942 7943 END(); 7944 if (CAN_RUN()) { 7945 RUN(); 7946 uint32_t expected_hashes[] = { 7947 0xd466a58c, 7948 0xe2ec7fba, 7949 0x1644e93a, 7950 0x7c3ecb2e, 7951 0xed4ecd78, 7952 0xfd5b5783, 7953 0xa7094efe, 7954 0x92bd623f, 7955 0x6da5e423, 7956 0x1648b588, 7957 0x63ce5947, 7958 0xba9c7d90, 7959 0x756ae20d, 7960 0x6d4032ba, 7961 0x87ae8b8f, 7962 0x722b2f6f, 7963 }; 7964 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 7965 } 7966} 7967 7968TEST_SVE(sve2_cmla_index) { 7969 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 7970 CPUFeatures::kSVE2, 7971 CPUFeatures::kNEON, 7972 CPUFeatures::kCRC32); 7973 START(); 7974 7975 SetInitialMachineState(&masm); 7976 // state = 0xe2bd2480 7977 7978 { 7979 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 7980 __ dci(0x44e867e6); // cmla z6.s, z31.s, z8.s[0], #90 7981 // vl128 state = 0xee56e69b 7982 __ dci(0x44e86de4); // cmla z4.s, z15.s, z8.s[0], #270 7983 // vl128 state = 0x0ed2e9f5 7984 __ dci(0x44e86be5); // cmla z5.s, z31.s, z8.s[0], #180 7985 // vl128 state = 0x9074e2a6 7986 __ dci(0x44eb6bf5); // cmla z21.s, z31.s, z11.s[0], #180 7987 // vl128 state = 0x8f43b8a8 7988 __ dci(0x44eb6b31); // cmla z17.s, z25.s, z11.s[0], #180 7989 // vl128 state = 0xb6c51b97 7990 __ dci(0x44eb6135); // cmla z21.s, z9.s, z11.s[0], #0 7991 // vl128 state = 0x4236beed 7992 __ dci(0x44e9633d); // cmla z29.s, z25.s, z9.s[0], #0 7993 // vl128 state = 0x21879fe6 7994 __ dci(0x44f96379); // cmla z25.s, z27.s, z9.s[1], #0 7995 // vl128 state = 0x78172805 7996 __ dci(0x44fd6349); // cmla z9.s, z26.s, z13.s[1], #0 7997 // vl128 state = 0x242a3ae5 7998 __ dci(0x44f76341); // cmla z1.s, z26.s, z7.s[1], #0 7999 // vl128 state = 0xa734ef3b 8000 __ dci(0x44f36305); // cmla z5.s, z24.s, z3.s[1], #0 8001 // vl128 state = 0x00a035b1 8002 __ dci(0x44f76381); // cmla z1.s, z28.s, z7.s[1], #0 8003 // vl128 state = 0xbdfda3d4 8004 __ dci(0x44f763e3); // cmla z3.s, z31.s, z7.s[1], #0 8005 // vl128 state = 0xe1ed6ed9 8006 __ dci(0x44b763cb); // cmla z11.h, z30.h, z7.h[2], #0 8007 // vl128 state = 0xae645ea8 8008 __ dci(0x44a763e9); // cmla z9.h, z31.h, z7.h[0], #0 8009 // vl128 state = 0x392b3511 8010 __ dci(0x44a762ab); // cmla z11.h, z21.h, z7.h[0], #0 8011 // vl128 state = 0x3a05f729 8012 __ dci(0x44a66aaf); // cmla z15.h, z21.h, z6.h[0], #180 8013 // vl128 state = 0x7cfa0c08 8014 __ dci(0x44a66aa7); // cmla z7.h, z21.h, z6.h[0], #180 8015 // vl128 state = 0x91749f43 8016 __ dci(0x44a663a5); // cmla z5.h, z29.h, z6.h[0], #0 8017 // vl128 state = 0x438479ab 8018 __ dci(0x44a66bed); // cmla z13.h, z31.h, z6.h[0], #180 8019 // vl128 state = 0xc25ce86d 8020 __ dci(0x44f66be9); // cmla z9.s, z31.s, z6.s[1], #180 8021 // vl128 state = 0x6e8bdeca 8022 __ dci(0x44b66bd9); // cmla z25.h, z30.h, z6.h[2], #180 8023 // vl128 state = 0x04745a63 8024 __ dci(0x44b66bd8); // cmla z24.h, z30.h, z6.h[2], #180 8025 // vl128 state = 0xbfc59a82 8026 __ dci(0x44b66b7c); // cmla z28.h, z27.h, z6.h[2], #180 8027 // vl128 state = 0x12d70fc2 8028 __ dci(0x44b6617e); // cmla z30.h, z11.h, z6.h[2], #0 8029 // vl128 state = 0x53f4b9a1 8030 __ dci(0x44b7697c); // cmla z28.h, z11.h, z7.h[2], #180 8031 // vl128 state = 0x74e99c24 8032 __ dci(0x44b3692c); // cmla z12.h, z9.h, z3.h[2], #180 8033 // vl128 state = 0xdc80a875 8034 __ dci(0x44a1692e); // cmla z14.h, z9.h, z1.h[0], #180 8035 // vl128 state = 0x307af313 8036 __ dci(0x44b169af); // cmla z15.h, z13.h, z1.h[2], #180 8037 // vl128 state = 0xc92b23fe 8038 __ dci(0x44b165a7); // cmla z7.h, z13.h, z1.h[2], #90 8039 // vl128 state = 0x33a52d1c 8040 __ dci(0x44b165a5); // cmla z5.h, z13.h, z1.h[2], #90 8041 // vl128 state = 0xbc53ebfc 8042 __ dci(0x44f161a1); // cmla z1.s, z13.s, z1.s[1], #0 8043 // vl128 state = 0x7ba34076 8044 __ dci(0x44f261a0); // cmla z0.s, z13.s, z2.s[1], #0 8045 // vl128 state = 0x6fa2bab8 8046 __ dci(0x44b361b0); // cmla z16.h, z13.h, z3.h[2], #0 8047 // vl128 state = 0xaae67807 8048 __ dci(0x44b36092); // cmla z18.h, z4.h, z3.h[2], #0 8049 // vl128 state = 0xf1b05dff 8050 __ dci(0x44b36202); // cmla z2.h, z16.h, z3.h[2], #0 8051 // vl128 state = 0xd226bf15 8052 __ dci(0x44b36a20); // cmla z0.h, z17.h, z3.h[2], #180 8053 // vl128 state = 0x6a8ade58 8054 __ dci(0x44b26a10); // cmla z16.h, z16.h, z2.h[2], #180 8055 // vl128 state = 0x075e00e4 8056 __ dci(0x44b26a18); // cmla z24.h, z16.h, z2.h[2], #180 8057 // vl128 state = 0x9bcef7bd 8058 __ dci(0x44b06a28); // cmla z8.h, z17.h, z0.h[2], #180 8059 // vl128 state = 0x8ac6d4b3 8060 __ dci(0x44b06a2a); // cmla z10.h, z17.h, z0.h[2], #180 8061 // vl128 state = 0x51993d51 8062 __ dci(0x44b0620b); // cmla z11.h, z16.h, z0.h[2], #0 8063 // vl128 state = 0x6d134734 8064 __ dci(0x44b06209); // cmla z9.h, z16.h, z0.h[2], #0 8065 // vl128 state = 0x0ee4031f 8066 __ dci(0x44f06a0d); // cmla z13.s, z16.s, z0.s[1], #180 8067 // vl128 state = 0x08ea247b 8068 __ dci(0x44f06b2c); // cmla z12.s, z25.s, z0.s[1], #180 8069 // vl128 state = 0x6acbb19a 8070 __ dci(0x44f1692d); // cmla z13.s, z9.s, z1.s[1], #180 8071 // vl128 state = 0x3ea2d161 8072 __ dci(0x44b36925); // cmla z5.h, z9.h, z3.h[2], #180 8073 // vl128 state = 0x5b962e9b 8074 __ dci(0x44b36921); // cmla z1.h, z9.h, z3.h[2], #180 8075 // vl128 state = 0x029f0eca 8076 __ dci(0x44b36d69); // cmla z9.h, z11.h, z3.h[2], #270 8077 // vl128 state = 0x39a63c65 8078 __ dci(0x44bb6d28); // cmla z8.h, z9.h, z3.h[3], #270 8079 // vl128 state = 0x6d58c136 8080 } 8081 8082 uint32_t state; 8083 ComputeMachineStateHash(&masm, &state); 8084 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 8085 __ Ldr(w0, MemOperand(x0)); 8086 8087 END(); 8088 if (CAN_RUN()) { 8089 RUN(); 8090 uint32_t expected_hashes[] = { 8091 0x6d58c136, 8092 0xfbdbae97, 8093 0x85c3cf1a, 8094 0xe4b53177, 8095 0x2f714586, 8096 0xde1afee8, 8097 0xd9613d2e, 8098 0x842c85a6, 8099 0xdc285523, 8100 0xccba7ba9, 8101 0x79e1e6f7, 8102 0xb19427f4, 8103 0x20d08a3a, 8104 0xfb7f4c43, 8105 0x0721ed60, 8106 0x4ee795ab, 8107 }; 8108 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 8109 } 8110} 8111 8112TEST_SVE(sve2_flogb) { 8113 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 8114 CPUFeatures::kSVE2, 8115 CPUFeatures::kNEON, 8116 CPUFeatures::kCRC32); 8117 START(); 8118 8119 SetInitialMachineState(&masm); 8120 // state = 0xe2bd2480 8121 8122 { 8123 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 8124 __ dci(0x651cb31e); // flogb z30.s, p4/m, z24.s 8125 // vl128 state = 0x161f1855 8126 __ dci(0x651cb3ae); // flogb z14.s, p4/m, z29.s 8127 // vl128 state = 0xf9e5ce4d 8128 __ dci(0x651cb3be); // flogb z30.s, p4/m, z29.s 8129 // vl128 state = 0xa06176bc 8130 __ dci(0x651ea3bc); // flogb z28.d, p0/m, z29.d 8131 // vl128 state = 0xf793f7bb 8132 __ dci(0x651ea3cc); // flogb z12.d, p0/m, z30.d 8133 // vl128 state = 0xe5d71081 8134 __ dci(0x651ea3dc); // flogb z28.d, p0/m, z30.d 8135 // vl128 state = 0x33ffc09f 8136 __ dci(0x651ea3d4); // flogb z20.d, p0/m, z30.d 8137 // vl128 state = 0xd908a72e 8138 __ dci(0x651ea3d5); // flogb z21.d, p0/m, z30.d 8139 // vl128 state = 0x9528251a 8140 __ dci(0x651ca394); // flogb z20.s, p0/m, z28.s 8141 // vl128 state = 0xb1ac4188 8142 __ dci(0x651ca396); // flogb z22.s, p0/m, z28.s 8143 // vl128 state = 0xdc328726 8144 __ dci(0x651ca1d7); // flogb z23.s, p0/m, z14.s 8145 // vl128 state = 0xfc232eb7 8146 __ dci(0x651ca947); // flogb z7.s, p2/m, z10.s 8147 // vl128 state = 0xa9c53a1a 8148 __ dci(0x651ca805); // flogb z5.s, p2/m, z0.s 8149 // vl128 state = 0x9e4a47e9 8150 __ dci(0x651ea841); // flogb z1.d, p2/m, z2.d 8151 // vl128 state = 0x7a2aeaf6 8152 __ dci(0x651ea843); // flogb z3.d, p2/m, z2.d 8153 // vl128 state = 0xedd4aa97 8154 __ dci(0x651caa4b); // flogb z11.s, p2/m, z18.s 8155 // vl128 state = 0x7bfefefb 8156 __ dci(0x651cab6f); // flogb z15.s, p2/m, z27.s 8157 // vl128 state = 0x91b5a183 8158 __ dci(0x651ca86b); // flogb z11.s, p2/m, z3.s 8159 // vl128 state = 0x7b2776c2 8160 __ dci(0x651ca47b); // flogb z27.s, p1/m, z3.s 8161 // vl128 state = 0x46ea46c7 8162 __ dci(0x651ca47f); // flogb z31.s, p1/m, z3.s 8163 // vl128 state = 0x6e1d4e89 8164 __ dci(0x651ca477); // flogb z23.s, p1/m, z3.s 8165 // vl128 state = 0x5ea1220c 8166 __ dci(0x651ca035); // flogb z21.s, p0/m, z1.s 8167 // vl128 state = 0xb06e32be 8168 __ dci(0x651ca2a5); // flogb z5.s, p0/m, z21.s 8169 // vl128 state = 0xb856d206 8170 __ dci(0x651caa2d); // flogb z13.s, p2/m, z17.s 8171 // vl128 state = 0xebfd587f 8172 __ dci(0x651caa3d); // flogb z29.s, p2/m, z17.s 8173 // vl128 state = 0xb029ba8d 8174 __ dci(0x651eaa7f); // flogb z31.d, p2/m, z19.d 8175 // vl128 state = 0x07fd3f42 8176 __ dci(0x651ebb7e); // flogb z30.d, p6/m, z27.d 8177 // vl128 state = 0x79761d7a 8178 __ dci(0x651ebb76); // flogb z22.d, p6/m, z27.d 8179 // vl128 state = 0xdf56dd22 8180 __ dci(0x651ebb72); // flogb z18.d, p6/m, z27.d 8181 // vl128 state = 0xce798ad7 8182 __ dci(0x651eb276); // flogb z22.d, p4/m, z19.d 8183 // vl128 state = 0x84dd46d6 8184 __ dci(0x651eb652); // flogb z18.d, p5/m, z18.d 8185 // vl128 state = 0x2ea4a0df 8186 __ dci(0x651cbe42); // flogb z2.s, p7/m, z18.s 8187 // vl128 state = 0x8cdd1250 8188 __ dci(0x651cb852); // flogb z18.s, p6/m, z2.s 8189 // vl128 state = 0x5f5b051d 8190 __ dci(0x651eb956); // flogb z22.d, p6/m, z10.d 8191 // vl128 state = 0x7a17cdd1 8192 __ dci(0x651eb11e); // flogb z30.d, p4/m, z8.d 8193 // vl128 state = 0x7367f8ec 8194 __ dci(0x651ab016); // flogb z22.h, p4/m, z0.h 8195 // vl128 state = 0x8e1bfb06 8196 __ dci(0x651ab014); // flogb z20.h, p4/m, z0.h 8197 // vl128 state = 0x2bcfa0f0 8198 __ dci(0x651aa81c); // flogb z28.h, p2/m, z0.h 8199 // vl128 state = 0xeb9615e8 8200 __ dci(0x651aa80c); // flogb z12.h, p2/m, z0.h 8201 // vl128 state = 0x5b55f5cd 8202 __ dci(0x651aa808); // flogb z8.h, p2/m, z0.h 8203 // vl128 state = 0xdd1718f2 8204 __ dci(0x651aa20a); // flogb z10.h, p0/m, z16.h 8205 // vl128 state = 0x205e88ed 8206 __ dci(0x651ab24e); // flogb z14.h, p4/m, z18.h 8207 // vl128 state = 0x1c9f2035 8208 __ dci(0x651ab36f); // flogb z15.h, p4/m, z27.h 8209 // vl128 state = 0xea22efaf 8210 __ dci(0x651ab36b); // flogb z11.h, p4/m, z27.h 8211 // vl128 state = 0x0cd0b8cd 8212 __ dci(0x651abb29); // flogb z9.h, p6/m, z25.h 8213 // vl128 state = 0xa1a017d1 8214 __ dci(0x651abb2d); // flogb z13.h, p6/m, z25.h 8215 // vl128 state = 0x37d033d2 8216 __ dci(0x651aba0c); // flogb z12.h, p6/m, z16.h 8217 // vl128 state = 0x971bde83 8218 __ dci(0x651cba1c); // flogb z28.s, p6/m, z16.s 8219 // vl128 state = 0xb6b23bc2 8220 __ dci(0x651cba1d); // flogb z29.s, p6/m, z16.s 8221 // vl128 state = 0x1af298e0 8222 __ dci(0x651cba15); // flogb z21.s, p6/m, z16.s 8223 // vl128 state = 0x077a2869 8224 } 8225 8226 uint32_t state; 8227 ComputeMachineStateHash(&masm, &state); 8228 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 8229 __ Ldr(w0, MemOperand(x0)); 8230 8231 END(); 8232 if (CAN_RUN()) { 8233 RUN(); 8234 uint32_t expected_hashes[] = { 8235 0x077a2869, 8236 0xde5bc452, 8237 0xe80f0bc6, 8238 0x1c078cf2, 8239 0x66064034, 8240 0xa9f5264d, 8241 0xb19b24c1, 8242 0xb394864c, 8243 0x42991ea7, 8244 0xcf33094e, 8245 0xc4656d85, 8246 0x4cfa5b7e, 8247 0xbb7c121f, 8248 0xd2e8c839, 8249 0x028134cf, 8250 0x2f3e9779, 8251 }; 8252 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 8253 } 8254} 8255 8256TEST_SVE(sve2_fp_pair) { 8257 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 8258 CPUFeatures::kSVE2, 8259 CPUFeatures::kNEON, 8260 CPUFeatures::kCRC32); 8261 START(); 8262 8263 SetInitialMachineState(&masm, kFpInputSet); 8264 // state = 0x1e5cbcac 8265 8266 { 8267 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 8268 __ dci(0x64d591aa); // fminnmp z10.d, p4/m, z10.d, z13.d 8269 // vl128 state = 0x02a0f18c 8270 __ dci(0x64d59dab); // fminnmp z11.d, p7/m, z11.d, z13.d 8271 // vl128 state = 0xd6d0a87f 8272 __ dci(0x64d59d7b); // fminnmp z27.d, p7/m, z27.d, z11.d 8273 // vl128 state = 0x364f93b4 8274 __ dci(0x64d59c2b); // fminnmp z11.d, p7/m, z11.d, z1.d 8275 // vl128 state = 0xc7ed7476 8276 __ dci(0x64d59f23); // fminnmp z3.d, p7/m, z3.d, z25.d 8277 // vl128 state = 0x7a1ec868 8278 __ dci(0x64d59f22); // fminnmp z2.d, p7/m, z2.d, z25.d 8279 // vl128 state = 0x862a3f3d 8280 __ dci(0x64d49fa0); // fmaxnmp z0.d, p7/m, z0.d, z29.d 8281 // vl128 state = 0x11f71743 8282 __ dci(0x64d49fa8); // fmaxnmp z8.d, p7/m, z8.d, z29.d 8283 // vl128 state = 0x302e45cd 8284 __ dci(0x64d49fa9); // fmaxnmp z9.d, p7/m, z9.d, z29.d 8285 // vl128 state = 0x11cca180 8286 __ dci(0x64d68fb9); // fmaxp z25.d, p3/m, z25.d, z29.d 8287 // vl128 state = 0xee6b2d42 8288 __ dci(0x64d68fb8); // fmaxp z24.d, p3/m, z24.d, z29.d 8289 // vl128 state = 0x060efb2c 8290 __ dci(0x64d49fba); // fmaxnmp z26.d, p7/m, z26.d, z29.d 8291 // vl128 state = 0x4f4232ac 8292 __ dci(0x649497b2); // fmaxnmp z18.s, p5/m, z18.s, z29.s 8293 // vl128 state = 0xe3e04479 8294 __ dci(0x649096b6); // faddp z22.s, p5/m, z22.s, z21.s 8295 // vl128 state = 0x2a407146 8296 __ dci(0x64909237); // faddp z23.s, p4/m, z23.s, z17.s 8297 // vl128 state = 0x6d0b2bb8 8298 __ dci(0x64d09027); // faddp z7.d, p4/m, z7.d, z1.d 8299 // vl128 state = 0x5e7d175f 8300 __ dci(0x64509006); // faddp z6.h, p4/m, z6.h, z0.h 8301 // vl128 state = 0xa0a4cd20 8302 __ dci(0x64d0940e); // faddp z14.d, p5/m, z14.d, z0.d 8303 // vl128 state = 0xf66b9cde 8304 __ dci(0x64d09c4f); // faddp z15.d, p7/m, z15.d, z2.d 8305 // vl128 state = 0x5a2d08c9 8306 __ dci(0x64d09c5f); // faddp z31.d, p7/m, z31.d, z2.d 8307 // vl128 state = 0x2e390409 8308 __ dci(0x64d09c57); // faddp z23.d, p7/m, z23.d, z2.d 8309 // vl128 state = 0xfb4af476 8310 __ dci(0x64d09c56); // faddp z22.d, p7/m, z22.d, z2.d 8311 // vl128 state = 0x8d8c621b 8312 __ dci(0x64d08e5e); // faddp z30.d, p3/m, z30.d, z18.d 8313 // vl128 state = 0xba8962e6 8314 __ dci(0x64d0845c); // faddp z28.d, p1/m, z28.d, z2.d 8315 // vl128 state = 0x224654c6 8316 __ dci(0x64d0845d); // faddp z29.d, p1/m, z29.d, z2.d 8317 // vl128 state = 0xef608134 8318 __ dci(0x64d08e4d); // faddp z13.d, p3/m, z13.d, z18.d 8319 // vl128 state = 0x5adedbf3 8320 __ dci(0x64908645); // faddp z5.s, p1/m, z5.s, z18.s 8321 // vl128 state = 0x04b4f366 8322 __ dci(0x64908a4d); // faddp z13.s, p2/m, z13.s, z18.s 8323 // vl128 state = 0xf0a7482a 8324 __ dci(0x64d08245); // faddp z5.d, p0/m, z5.d, z18.d 8325 // vl128 state = 0x0f2ccd61 8326 __ dci(0x64909255); // faddp z21.s, p4/m, z21.s, z18.s 8327 // vl128 state = 0x7665491f 8328 __ dci(0x649096c5); // faddp z5.s, p5/m, z5.s, z22.s 8329 // vl128 state = 0xc3b53fd3 8330 __ dci(0x649492c1); // fmaxnmp z1.s, p4/m, z1.s, z22.s 8331 // vl128 state = 0x589fd64a 8332 __ dci(0x649096d1); // faddp z17.s, p5/m, z17.s, z22.s 8333 // vl128 state = 0x5a0d0d52 8334 __ dci(0x649096d5); // faddp z21.s, p5/m, z21.s, z22.s 8335 // vl128 state = 0xba57cd51 8336 __ dci(0x649096d4); // faddp z20.s, p5/m, z20.s, z22.s 8337 // vl128 state = 0xa5d7b29d 8338 __ dci(0x649093d0); // faddp z16.s, p4/m, z16.s, z30.s 8339 // vl128 state = 0xa62cce9e 8340 __ dci(0x64909318); // faddp z24.s, p4/m, z24.s, z24.s 8341 // vl128 state = 0x8cc209c7 8342 __ dci(0x64909008); // faddp z8.s, p4/m, z8.s, z0.s 8343 // vl128 state = 0x56a9af04 8344 __ dci(0x64969000); // fmaxp z0.s, p4/m, z0.s, z0.s 8345 // vl128 state = 0xc45f824a 8346 __ dci(0x64569004); // fmaxp z4.h, p4/m, z4.h, z0.h 8347 // vl128 state = 0x82da5cb7 8348 __ dci(0x64569000); // fmaxp z0.h, p4/m, z0.h, z0.h 8349 // vl128 state = 0xa9fff0bf 8350 __ dci(0x64569001); // fmaxp z1.h, p4/m, z1.h, z0.h 8351 // vl128 state = 0x71c2e09a 8352 __ dci(0x64569605); // fmaxp z5.h, p5/m, z5.h, z16.h 8353 // vl128 state = 0xe50c8b49 8354 __ dci(0x64579624); // fminp z4.h, p5/m, z4.h, z17.h 8355 // vl128 state = 0x4f3817cb 8356 __ dci(0x6457962c); // fminp z12.h, p5/m, z12.h, z17.h 8357 // vl128 state = 0x5a773e57 8358 __ dci(0x64d5963c); // fminnmp z28.d, p5/m, z28.d, z17.d 8359 // vl128 state = 0xa5c5e37c 8360 __ dci(0x64d7943e); // fminp z30.d, p5/m, z30.d, z1.d 8361 // vl128 state = 0xc778f8a3 8362 __ dci(0x6457953a); // fminp z26.h, p5/m, z26.h, z9.h 8363 // vl128 state = 0x01abc4af 8364 __ dci(0x6457952a); // fminp z10.h, p5/m, z10.h, z9.h 8365 // vl128 state = 0x45483a17 8366 __ dci(0x64579d7a); // fminp z26.h, p7/m, z26.h, z11.h 8367 // vl128 state = 0x355b08b3 8368 } 8369 8370 uint32_t state; 8371 ComputeMachineStateHash(&masm, &state); 8372 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 8373 __ Ldr(w0, MemOperand(x0)); 8374 8375 END(); 8376 if (CAN_RUN()) { 8377 RUN(); 8378 uint32_t expected_hashes[] = { 8379 0x355b08b3, 8380 0x8f7890cd, 8381 0x5dddb069, 8382 0x030a5f52, 8383 0xc569c150, 8384 0x060423ba, 8385 0x5d729bd0, 8386 0x079b4f8b, 8387 0x06e75e58, 8388 0x6f631884, 8389 0xddc735f0, 8390 0x7213b8e2, 8391 0x8cbf507c, 8392 0x40654268, 8393 0x3cd7ad6c, 8394 0xfba0ee9e, 8395 }; 8396 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 8397 } 8398} 8399 8400TEST_SVE(sve2_fmlal_fmlsl_index) { 8401 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 8402 CPUFeatures::kSVE2, 8403 CPUFeatures::kNEON, 8404 CPUFeatures::kCRC32); 8405 START(); 8406 8407 SetInitialMachineState(&masm); 8408 // state = 0xe2bd2480 8409 8410 { 8411 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 8412 __ dci(0x64a94f15); // fmlalt z21.s, z24.h, z1.h[3] 8413 // vl128 state = 0x0895849b 8414 __ dci(0x64ab4f9d); // fmlalt z29.s, z28.h, z3.h[3] 8415 // vl128 state = 0x6e0cf3fe 8416 __ dci(0x64a74f9c); // fmlalt z28.s, z28.h, z7.h[1] 8417 // vl128 state = 0x482b4f57 8418 __ dci(0x64a74dde); // fmlalt z30.s, z14.h, z7.h[1] 8419 // vl128 state = 0xf047791e 8420 __ dci(0x64a74cee); // fmlalt z14.s, z7.h, z7.h[1] 8421 // vl128 state = 0xde33332c 8422 __ dci(0x64a648ef); // fmlalb z15.s, z7.h, z6.h[1] 8423 // vl128 state = 0xf7148941 8424 __ dci(0x64a648ee); // fmlalb z14.s, z7.h, z6.h[1] 8425 // vl128 state = 0x69f23fcb 8426 __ dci(0x64b649ea); // fmlalb z10.s, z15.h, z6.h[5] 8427 // vl128 state = 0x979eea1a 8428 __ dci(0x64b649ee); // fmlalb z14.s, z15.h, z6.h[5] 8429 // vl128 state = 0x522917a9 8430 __ dci(0x64b649e6); // fmlalb z6.s, z15.h, z6.h[5] 8431 // vl128 state = 0x7d773525 8432 __ dci(0x64b64ba2); // fmlalb z2.s, z29.h, z6.h[5] 8433 // vl128 state = 0x220960c6 8434 __ dci(0x64b46baa); // fmlslb z10.s, z29.h, z4.h[5] 8435 // vl128 state = 0x2c8e384a 8436 __ dci(0x64b46dab); // fmlslt z11.s, z13.h, z4.h[5] 8437 // vl128 state = 0xa592cde1 8438 __ dci(0x64b467bb); // fmlslt z27.s, z29.h, z4.h[4] 8439 // vl128 state = 0xba31bd61 8440 __ dci(0x64b665b3); // fmlslt z19.s, z13.h, z6.h[4] 8441 // vl128 state = 0x75dade04 8442 __ dci(0x64b663bb); // fmlslb z27.s, z29.h, z6.h[4] 8443 // vl128 state = 0xa7358466 8444 __ dci(0x64a662bf); // fmlslb z31.s, z21.h, z6.h[0] 8445 // vl128 state = 0x6125ca9d 8446 __ dci(0x64a7623e); // fmlslb z30.s, z17.h, z7.h[0] 8447 // vl128 state = 0x4b1cda83 8448 __ dci(0x64a7462e); // fmlalt z14.s, z17.h, z7.h[0] 8449 // vl128 state = 0x00d73a44 8450 __ dci(0x64a6662f); // fmlslt z15.s, z17.h, z6.h[0] 8451 // vl128 state = 0xc5ea9f30 8452 __ dci(0x64a666ed); // fmlslt z13.s, z23.h, z6.h[0] 8453 // vl128 state = 0xe17ba118 8454 __ dci(0x64a26eec); // fmlslt z12.s, z23.h, z2.h[1] 8455 // vl128 state = 0xd1962c7a 8456 __ dci(0x64a26cbc); // fmlslt z28.s, z5.h, z2.h[1] 8457 // vl128 state = 0xde6f1ace 8458 __ dci(0x64a26cb4); // fmlslt z20.s, z5.h, z2.h[1] 8459 // vl128 state = 0x10d69920 8460 __ dci(0x64a26cbc); // fmlslt z28.s, z5.h, z2.h[1] 8461 // vl128 state = 0x8d190aec 8462 __ dci(0x64a26cd8); // fmlslt z24.s, z6.h, z2.h[1] 8463 // vl128 state = 0x432fdda3 8464 __ dci(0x64a26c1a); // fmlslt z26.s, z0.h, z2.h[1] 8465 // vl128 state = 0x9ababf0a 8466 __ dci(0x64a24d1e); // fmlalt z30.s, z8.h, z2.h[1] 8467 // vl128 state = 0x609040ae 8468 __ dci(0x64a24d1c); // fmlalt z28.s, z8.h, z2.h[1] 8469 // vl128 state = 0x0a047710 8470 __ dci(0x64a24d1e); // fmlalt z30.s, z8.h, z2.h[1] 8471 // vl128 state = 0xf273945a 8472 __ dci(0x64a0490e); // fmlalb z14.s, z8.h, z0.h[1] 8473 // vl128 state = 0x3a5456f1 8474 __ dci(0x64a0490c); // fmlalb z12.s, z8.h, z0.h[1] 8475 // vl128 state = 0xdb948daf 8476 __ dci(0x64b04b04); // fmlalb z4.s, z24.h, z0.h[5] 8477 // vl128 state = 0xd2eae2af 8478 __ dci(0x64b04b06); // fmlalb z6.s, z24.h, z0.h[5] 8479 // vl128 state = 0x26627a2c 8480 __ dci(0x64b04b07); // fmlalb z7.s, z24.h, z0.h[5] 8481 // vl128 state = 0x2841173d 8482 __ dci(0x64b84b26); // fmlalb z6.s, z25.h, z0.h[7] 8483 // vl128 state = 0x9b52bcc6 8484 __ dci(0x64ba4f27); // fmlalt z7.s, z25.h, z2.h[7] 8485 // vl128 state = 0x813bbabe 8486 __ dci(0x64ba4923); // fmlalb z3.s, z9.h, z2.h[7] 8487 // vl128 state = 0xbb608dad 8488 __ dci(0x64b84d22); // fmlalt z2.s, z9.h, z0.h[7] 8489 // vl128 state = 0xf4d84ed6 8490 __ dci(0x64b84d23); // fmlalt z3.s, z9.h, z0.h[7] 8491 // vl128 state = 0x1cc0784e 8492 __ dci(0x64bc4527); // fmlalt z7.s, z9.h, z4.h[6] 8493 // vl128 state = 0x4eece4b7 8494 __ dci(0x64bc6737); // fmlslt z23.s, z25.h, z4.h[6] 8495 // vl128 state = 0x00dacf34 8496 __ dci(0x64bc6fa7); // fmlslt z7.s, z29.h, z4.h[7] 8497 // vl128 state = 0x597e23d4 8498 __ dci(0x64bc6e25); // fmlslt z5.s, z17.h, z4.h[7] 8499 // vl128 state = 0xa66b843c 8500 __ dci(0x64be6f2d); // fmlslt z13.s, z25.h, z6.h[7] 8501 // vl128 state = 0xb595ec08 8502 __ dci(0x64be6765); // fmlslt z5.s, z27.h, z6.h[6] 8503 // vl128 state = 0xd6c3af0a 8504 __ dci(0x64be662d); // fmlslt z13.s, z17.h, z6.h[6] 8505 // vl128 state = 0x864f26a8 8506 __ dci(0x64bf6225); // fmlslb z5.s, z17.h, z7.h[6] 8507 // vl128 state = 0xb969be4d 8508 __ dci(0x64bb626d); // fmlslb z13.s, z19.h, z3.h[6] 8509 // vl128 state = 0x73329b58 8510 __ dci(0x64b9622c); // fmlslb z12.s, z17.h, z1.h[6] 8511 // vl128 state = 0xfb7e2da2 8512 } 8513 8514 uint32_t state; 8515 ComputeMachineStateHash(&masm, &state); 8516 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 8517 __ Ldr(w0, MemOperand(x0)); 8518 8519 END(); 8520 if (CAN_RUN()) { 8521 RUN(); 8522 uint32_t expected_hashes[] = { 8523 0xfb7e2da2, 8524 0x34ad546c, 8525 0xd914c0d4, 8526 0xc173287c, 8527 0x07db96b2, 8528 0xab5ece8c, 8529 0xcda13318, 8530 0x6e62dc3f, 8531 0x0268d9b4, 8532 0x15118567, 8533 0xf55fb24f, 8534 0xc4ab4b56, 8535 0x5911f225, 8536 0x6d9c320c, 8537 0xc69bdedf, 8538 0x1635a43f, 8539 }; 8540 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 8541 } 8542} 8543 8544TEST_SVE(sve2_fp_convert) { 8545 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 8546 CPUFeatures::kSVE2, 8547 CPUFeatures::kNEON, 8548 CPUFeatures::kCRC32); 8549 START(); 8550 8551 SetInitialMachineState(&masm); 8552 // state = 0xe2bd2480 8553 8554 { 8555 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 8556 __ dci(0x640ab3ee); // fcvtxnt z14.s, p4/m, z31.d 8557 // vl128 state = 0x3ea71f7a 8558 __ dci(0x64caa9e0); // fcvtnt z0.s, p2/m, z15.d 8559 // vl128 state = 0xe9d750a1 8560 __ dci(0x64cab83d); // fcvtnt z29.s, p6/m, z1.d 8561 // vl128 state = 0x9ce43257 8562 __ dci(0x650aad62); // fcvtx z2.s, p3/m, z11.d 8563 // vl128 state = 0x60283e22 8564 __ dci(0x64cbb42b); // fcvtlt z11.d, p5/m, z1.s 8565 // vl128 state = 0xfbecbe4a 8566 __ dci(0x6488ba54); // fcvtnt z20.h, p6/m, z18.s 8567 // vl128 state = 0xbb81cc05 8568 __ dci(0x64cbb730); // fcvtlt z16.d, p5/m, z25.s 8569 // vl128 state = 0xd9cebdf5 8570 __ dci(0x640aa5e4); // fcvtxnt z4.s, p1/m, z15.d 8571 // vl128 state = 0x9dba64db 8572 __ dci(0x650aa715); // fcvtx z21.s, p1/m, z24.d 8573 // vl128 state = 0x0e68fab9 8574 __ dci(0x64cabe86); // fcvtnt z6.s, p7/m, z20.d 8575 // vl128 state = 0x5936ac32 8576 __ dci(0x64cba075); // fcvtlt z21.d, p0/m, z3.s 8577 // vl128 state = 0x2eb8a37b 8578 __ dci(0x6488b3c5); // fcvtnt z5.h, p4/m, z30.s 8579 // vl128 state = 0x9f471340 8580 __ dci(0x6489b24a); // fcvtlt z10.s, p4/m, z18.h 8581 // vl128 state = 0xcf5e5808 8582 __ dci(0x64cbb514); // fcvtlt z20.d, p5/m, z8.s 8583 // vl128 state = 0x870c5b85 8584 __ dci(0x650ab090); // fcvtx z16.s, p4/m, z4.d 8585 // vl128 state = 0x305da0a0 8586 __ dci(0x64cbb2d3); // fcvtlt z19.d, p4/m, z22.s 8587 // vl128 state = 0x8eb1b5fc 8588 __ dci(0x64cbb093); // fcvtlt z19.d, p4/m, z4.s 8589 // vl128 state = 0x3c070332 8590 __ dci(0x6488b9b8); // fcvtnt z24.h, p6/m, z13.s 8591 // vl128 state = 0xe0fc3455 8592 __ dci(0x650aa64d); // fcvtx z13.s, p1/m, z18.d 8593 // vl128 state = 0x65556c34 8594 __ dci(0x6488b2d7); // fcvtnt z23.h, p4/m, z22.s 8595 // vl128 state = 0xc9ccae47 8596 __ dci(0x650ab36d); // fcvtx z13.s, p4/m, z27.d 8597 // vl128 state = 0x31d942a1 8598 __ dci(0x650aba2c); // fcvtx z12.s, p6/m, z17.d 8599 // vl128 state = 0x27497e26 8600 __ dci(0x650aa377); // fcvtx z23.s, p0/m, z27.d 8601 // vl128 state = 0xbe0a7446 8602 __ dci(0x6489a3a5); // fcvtlt z5.s, p0/m, z29.h 8603 // vl128 state = 0x454c62cc 8604 __ dci(0x64cabeb9); // fcvtnt z25.s, p7/m, z21.d 8605 // vl128 state = 0x808a014f 8606 __ dci(0x6489b4c2); // fcvtlt z2.s, p5/m, z6.h 8607 // vl128 state = 0x55ae2250 8608 __ dci(0x64cba246); // fcvtlt z6.d, p0/m, z18.s 8609 // vl128 state = 0x7ce05c24 8610 __ dci(0x650ab2a6); // fcvtx z6.s, p4/m, z21.d 8611 // vl128 state = 0xa26121f5 8612 __ dci(0x64cbb239); // fcvtlt z25.d, p4/m, z17.s 8613 // vl128 state = 0xb40c58e1 8614 __ dci(0x64cabdd9); // fcvtnt z25.s, p7/m, z14.d 8615 // vl128 state = 0xf5077a54 8616 __ dci(0x650ab75a); // fcvtx z26.s, p5/m, z26.d 8617 // vl128 state = 0x95b006de 8618 __ dci(0x650aa08b); // fcvtx z11.s, p0/m, z4.d 8619 // vl128 state = 0x9ca5060c 8620 __ dci(0x640aafd3); // fcvtxnt z19.s, p3/m, z30.d 8621 // vl128 state = 0x85c89705 8622 __ dci(0x64caaf3a); // fcvtnt z26.s, p3/m, z25.d 8623 // vl128 state = 0x6b6aa4f9 8624 __ dci(0x640abda1); // fcvtxnt z1.s, p7/m, z13.d 8625 // vl128 state = 0x769cf76e 8626 __ dci(0x6489a6f9); // fcvtlt z25.s, p1/m, z23.h 8627 // vl128 state = 0x0a291b3b 8628 __ dci(0x6489b38d); // fcvtlt z13.s, p4/m, z28.h 8629 // vl128 state = 0x6b72e558 8630 __ dci(0x650aaf63); // fcvtx z3.s, p3/m, z27.d 8631 // vl128 state = 0xf4a004e0 8632 __ dci(0x6488bfa4); // fcvtnt z4.h, p7/m, z29.s 8633 // vl128 state = 0xe01c349e 8634 __ dci(0x6489a6ee); // fcvtlt z14.s, p1/m, z23.h 8635 // vl128 state = 0x3b06da53 8636 __ dci(0x64cabbf8); // fcvtnt z24.s, p6/m, z31.d 8637 // vl128 state = 0xc60fbbf0 8638 __ dci(0x6489bc7f); // fcvtlt z31.s, p7/m, z3.h 8639 // vl128 state = 0x8b281c78 8640 __ dci(0x64caaf1f); // fcvtnt z31.s, p3/m, z24.d 8641 // vl128 state = 0x0f17afbb 8642 __ dci(0x650aac71); // fcvtx z17.s, p3/m, z3.d 8643 // vl128 state = 0xce0ac3e1 8644 __ dci(0x650aa1df); // fcvtx z31.s, p0/m, z14.d 8645 // vl128 state = 0x71ba2085 8646 __ dci(0x650aaf9f); // fcvtx z31.s, p3/m, z28.d 8647 // vl128 state = 0xe42caea0 8648 __ dci(0x640abff9); // fcvtxnt z25.s, p7/m, z31.d 8649 // vl128 state = 0xec3c032c 8650 __ dci(0x6489b8e5); // fcvtlt z5.s, p6/m, z7.h 8651 // vl128 state = 0xe41850f7 8652 __ dci(0x640aa1a1); // fcvtxnt z1.s, p0/m, z13.d 8653 // vl128 state = 0xaf3944b4 8654 __ dci(0x6488bf41); // fcvtnt z1.h, p7/m, z26.s 8655 // vl128 state = 0xdffd02bd 8656 } 8657 8658 uint32_t state; 8659 ComputeMachineStateHash(&masm, &state); 8660 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 8661 __ Ldr(w0, MemOperand(x0)); 8662 8663 END(); 8664 if (CAN_RUN()) { 8665 RUN(); 8666 uint32_t expected_hashes[] = { 8667 0xdffd02bd, 8668 0x03d1f711, 8669 0x41cf3358, 8670 0xa351d0f6, 8671 0xffba25ff, 8672 0x14092947, 8673 0x26b194fe, 8674 0x42acd8a3, 8675 0xc0498960, 8676 0xcccf1171, 8677 0x8dca76ed, 8678 0xefbda194, 8679 0xcf04a23d, 8680 0x91e2629f, 8681 0xf05e8f52, 8682 0x4994ad4a, 8683 }; 8684 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 8685 } 8686} 8687 8688TEST_SVE(sve2_saturating_multiply_add_high_indexed) { 8689 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 8690 CPUFeatures::kSVE2, 8691 CPUFeatures::kNEON, 8692 CPUFeatures::kCRC32); 8693 START(); 8694 8695 SetInitialMachineState(&masm); 8696 // state = 0xe2bd2480 8697 8698 { 8699 ExactAssemblyScope scope(&masm, 40 * kInstructionSize); 8700 __ dci(0x442b1100); // sqrdmlah z0.h, z8.h, z3.h[1] 8701 // vl128 state = 0xb012d377 8702 __ dci(0x44211108); // sqrdmlah z8.h, z8.h, z1.h[0] 8703 // vl128 state = 0xae399e50 8704 __ dci(0x4421110c); // sqrdmlah z12.h, z8.h, z1.h[0] 8705 // vl128 state = 0x1a46b700 8706 __ dci(0x44291188); // sqrdmlah z8.h, z12.h, z1.h[1] 8707 // vl128 state = 0x7525090a 8708 __ dci(0x442811a9); // sqrdmlah z9.h, z13.h, z0.h[1] 8709 // vl128 state = 0xf2907eb8 8710 __ dci(0x442c11eb); // sqrdmlah z11.h, z15.h, z4.h[1] 8711 // vl128 state = 0x65a71d51 8712 __ dci(0x442c11e3); // sqrdmlah z3.h, z15.h, z4.h[1] 8713 // vl128 state = 0x8b30e19b 8714 __ dci(0x442413e1); // sqrdmlah z1.h, z31.h, z4.h[0] 8715 // vl128 state = 0x448e4c0f 8716 __ dci(0x44a413a0); // sqrdmlah z0.s, z29.s, z4.s[0] 8717 // vl128 state = 0x1745e0db 8718 __ dci(0x44241321); // sqrdmlah z1.h, z25.h, z4.h[0] 8719 // vl128 state = 0xe07b491b 8720 __ dci(0x44a413a5); // sqrdmlah z5.s, z29.s, z4.s[0] 8721 // vl128 state = 0xad39c91c 8722 __ dci(0x44e41327); // sqrdmlah z7.d, z25.d, z4.d[0] 8723 // vl128 state = 0xd327dc1c 8724 __ dci(0x44e4132f); // sqrdmlah z15.d, z25.d, z4.d[0] 8725 // vl128 state = 0x8da341ca 8726 __ dci(0x44e5130b); // sqrdmlah z11.d, z24.d, z5.d[0] 8727 // vl128 state = 0x4dbd3ee1 8728 __ dci(0x44e3130a); // sqrdmlah z10.d, z24.d, z3.d[0] 8729 // vl128 state = 0x71452896 8730 __ dci(0x44e3131a); // sqrdmlah z26.d, z24.d, z3.d[0] 8731 // vl128 state = 0x4d6d8b90 8732 __ dci(0x4463135e); // sqrdmlah z30.h, z26.h, z3.h[4] 8733 // vl128 state = 0x0b53f7b4 8734 __ dci(0x44e7135c); // sqrdmlah z28.d, z26.d, z7.d[0] 8735 // vl128 state = 0x78ab2bb9 8736 __ dci(0x44e7134c); // sqrdmlah z12.d, z26.d, z7.d[0] 8737 // vl128 state = 0x3773b9e2 8738 __ dci(0x44e51144); // sqrdmlah z4.d, z10.d, z5.d[0] 8739 // vl128 state = 0x8f8883da 8740 __ dci(0x44e411c0); // sqrdmlah z0.d, z14.d, z4.d[0] 8741 // vl128 state = 0xa27ef92f 8742 __ dci(0x44ec15c4); // sqrdmlsh z4.d, z14.d, z12.d[0] 8743 // vl128 state = 0x6cea3cee 8744 __ dci(0x44ec14e0); // sqrdmlsh z0.d, z7.d, z12.d[0] 8745 // vl128 state = 0xb5e40d5f 8746 __ dci(0x44ee16f0); // sqrdmlsh z16.d, z23.d, z14.d[0] 8747 // vl128 state = 0xacf903eb 8748 __ dci(0x44ea16d4); // sqrdmlsh z20.d, z22.d, z10.d[0] 8749 // vl128 state = 0x698246a6 8750 __ dci(0x44ea16d0); // sqrdmlsh z16.d, z22.d, z10.d[0] 8751 // vl128 state = 0x58015eeb 8752 __ dci(0x44ea16d1); // sqrdmlsh z17.d, z22.d, z10.d[0] 8753 // vl128 state = 0xdbf1d9a6 8754 __ dci(0x44ab16d3); // sqrdmlsh z19.s, z22.s, z3.s[1] 8755 // vl128 state = 0xbde312bb 8756 __ dci(0x44aa17d1); // sqrdmlsh z17.s, z30.s, z2.s[1] 8757 // vl128 state = 0xc033b9a1 8758 __ dci(0x44aa1650); // sqrdmlsh z16.s, z18.s, z2.s[1] 8759 // vl128 state = 0x0e3b4c59 8760 __ dci(0x44aa1632); // sqrdmlsh z18.s, z17.s, z2.s[1] 8761 // vl128 state = 0x6f849e01 8762 __ dci(0x44aa1710); // sqrdmlsh z16.s, z24.s, z2.s[1] 8763 // vl128 state = 0x701e7316 8764 __ dci(0x44aa1711); // sqrdmlsh z17.s, z24.s, z2.s[1] 8765 // vl128 state = 0xbfbc7895 8766 __ dci(0x44a91715); // sqrdmlsh z21.s, z24.s, z1.s[1] 8767 // vl128 state = 0x2307c6f3 8768 __ dci(0x44a91697); // sqrdmlsh z23.s, z20.s, z1.s[1] 8769 // vl128 state = 0x78db6627 8770 __ dci(0x44a91696); // sqrdmlsh z22.s, z20.s, z1.s[1] 8771 // vl128 state = 0x37d25a35 8772 __ dci(0x44a816de); // sqrdmlsh z30.s, z22.s, z0.s[1] 8773 // vl128 state = 0xf611db46 8774 __ dci(0x44ab16dc); // sqrdmlsh z28.s, z22.s, z3.s[1] 8775 // vl128 state = 0x699a840f 8776 __ dci(0x44af165d); // sqrdmlsh z29.s, z18.s, z7.s[1] 8777 // vl128 state = 0x0b5d451f 8778 __ dci(0x44af16f5); // sqrdmlsh z21.s, z23.s, z7.s[1] 8779 // vl128 state = 0xe49e3b59 8780 } 8781 8782 uint32_t state; 8783 ComputeMachineStateHash(&masm, &state); 8784 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 8785 __ Ldr(w0, MemOperand(x0)); 8786 8787 END(); 8788 if (CAN_RUN()) { 8789 RUN(); 8790 uint32_t expected_hashes[] = { 8791 0xe49e3b59, 8792 0xce0062c7, 8793 0xf796ec27, 8794 0x1f952649, 8795 0x4e4354e6, 8796 0x90cb0c51, 8797 0xf0688aee, 8798 0xae9de352, 8799 0x652f0c0d, 8800 0x0000db74, 8801 0xdc23fff7, 8802 0x228c116c, 8803 0x8477dd7c, 8804 0x08377c46, 8805 0x6e05a40f, 8806 0x874126fb, 8807 }; 8808 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 8809 } 8810} 8811 8812TEST_SVE(sve2_sat_double_mul_high_index) { 8813 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 8814 CPUFeatures::kSVE2, 8815 CPUFeatures::kNEON, 8816 CPUFeatures::kCRC32); 8817 START(); 8818 8819 SetInitialMachineState(&masm); 8820 // state = 0xe2bd2480 8821 8822 { 8823 ExactAssemblyScope scope(&masm, 50 * kInstructionSize); 8824 __ dci(0x447bf609); // sqrdmulh z9.h, z16.h, z3.h[7] 8825 // vl128 state = 0xacad7d7c 8826 __ dci(0x447bf601); // sqrdmulh z1.h, z16.h, z3.h[7] 8827 // vl128 state = 0xd6a976fe 8828 __ dci(0x447bf600); // sqrdmulh z0.h, z16.h, z3.h[7] 8829 // vl128 state = 0x959d4287 8830 __ dci(0x446bf710); // sqrdmulh z16.h, z24.h, z3.h[5] 8831 // vl128 state = 0x88b70b0e 8832 __ dci(0x446af612); // sqrdmulh z18.h, z16.h, z2.h[5] 8833 // vl128 state = 0xea48068a 8834 __ dci(0x442af636); // sqrdmulh z22.h, z17.h, z2.h[1] 8835 // vl128 state = 0x22135bae 8836 __ dci(0x442af626); // sqrdmulh z6.h, z17.h, z2.h[1] 8837 // vl128 state = 0x1ed137a8 8838 __ dci(0x442af624); // sqrdmulh z4.h, z17.h, z2.h[1] 8839 // vl128 state = 0x37aa44d4 8840 __ dci(0x4420f625); // sqrdmulh z5.h, z17.h, z0.h[0] 8841 // vl128 state = 0x9747863a 8842 __ dci(0x4460f604); // sqrdmulh z4.h, z16.h, z0.h[4] 8843 // vl128 state = 0xf6487f4b 8844 __ dci(0x4460f605); // sqrdmulh z5.h, z16.h, z0.h[4] 8845 // vl128 state = 0xb85302a6 8846 __ dci(0x4420f641); // sqrdmulh z1.h, z18.h, z0.h[0] 8847 // vl128 state = 0xfc85ce98 8848 __ dci(0x4424f669); // sqrdmulh z9.h, z19.h, z4.h[0] 8849 // vl128 state = 0xf0b36dd3 8850 __ dci(0x4460f668); // sqrdmulh z8.h, z19.h, z0.h[4] 8851 // vl128 state = 0x227fe9fe 8852 __ dci(0x4462f6f8); // sqrdmulh z24.h, z23.h, z2.h[4] 8853 // vl128 state = 0x7f4d89ab 8854 __ dci(0x4462f6f0); // sqrdmulh z16.h, z23.h, z2.h[4] 8855 // vl128 state = 0x61520386 8856 __ dci(0x4472f6d1); // sqrdmulh z17.h, z22.h, z2.h[6] 8857 // vl128 state = 0x34d07c81 8858 __ dci(0x4472f250); // sqdmulh z16.h, z18.h, z2.h[6] 8859 // vl128 state = 0x74313b89 8860 __ dci(0x44b2f254); // sqdmulh z20.s, z18.s, z2.s[2] 8861 // vl128 state = 0x7acc9692 8862 __ dci(0x44e2f250); // sqdmulh z16.d, z18.d, z2.d[0] 8863 // vl128 state = 0x3a1f908e 8864 __ dci(0x44e4f251); // sqdmulh z17.d, z18.d, z4.d[0] 8865 // vl128 state = 0xd2ae3642 8866 __ dci(0x44e0f650); // sqrdmulh z16.d, z18.d, z0.d[0] 8867 // vl128 state = 0x74da2dcc 8868 __ dci(0x44f8f640); // sqrdmulh z0.d, z18.d, z8.d[1] 8869 // vl128 state = 0x0273639a 8870 __ dci(0x44f9f742); // sqrdmulh z2.d, z26.d, z9.d[1] 8871 // vl128 state = 0x9c5062c9 8872 __ dci(0x44f9f7e6); // sqrdmulh z6.d, z31.d, z9.d[1] 8873 // vl128 state = 0x095e8fd7 8874 __ dci(0x44fdf7ae); // sqrdmulh z14.d, z29.d, z13.d[1] 8875 // vl128 state = 0x4ab7c261 8876 __ dci(0x44fdf7af); // sqrdmulh z15.d, z29.d, z13.d[1] 8877 // vl128 state = 0x7913f02e 8878 __ dci(0x44f9f7ed); // sqrdmulh z13.d, z31.d, z9.d[1] 8879 // vl128 state = 0xbbffd120 8880 __ dci(0x44f9f7e5); // sqrdmulh z5.d, z31.d, z9.d[1] 8881 // vl128 state = 0xc9cc793f 8882 __ dci(0x44f5f7e4); // sqrdmulh z4.d, z31.d, z5.d[1] 8883 // vl128 state = 0xc7cc2e4b 8884 __ dci(0x44e5f3e0); // sqdmulh z0.d, z31.d, z5.d[0] 8885 // vl128 state = 0x8a4efda7 8886 __ dci(0x44e4f364); // sqdmulh z4.d, z27.d, z4.d[0] 8887 // vl128 state = 0xfa30239a 8888 __ dci(0x44edf366); // sqdmulh z6.d, z27.d, z13.d[0] 8889 // vl128 state = 0x9c538671 8890 __ dci(0x44adf322); // sqdmulh z2.s, z25.s, z5.s[1] 8891 // vl128 state = 0xafb03157 8892 __ dci(0x44adf263); // sqdmulh z3.s, z19.s, z5.s[1] 8893 // vl128 state = 0x6ea1e1ff 8894 __ dci(0x44bdf22b); // sqdmulh z11.s, z17.s, z5.s[3] 8895 // vl128 state = 0x0040a3a0 8896 __ dci(0x44adf62a); // sqrdmulh z10.s, z17.s, z5.s[1] 8897 // vl128 state = 0x8b3e6419 8898 __ dci(0x44adf622); // sqrdmulh z2.s, z17.s, z5.s[1] 8899 // vl128 state = 0x579bf738 8900 __ dci(0x44abf632); // sqrdmulh z18.s, z17.s, z3.s[1] 8901 // vl128 state = 0x2678c680 8902 __ dci(0x44a9f6ba); // sqrdmulh z26.s, z21.s, z1.s[1] 8903 // vl128 state = 0xee25a322 8904 __ dci(0x44a9f6aa); // sqrdmulh z10.s, z21.s, z1.s[1] 8905 // vl128 state = 0x99cfcf9f 8906 __ dci(0x44b1f6ab); // sqrdmulh z11.s, z21.s, z1.s[2] 8907 // vl128 state = 0xa6785a38 8908 __ dci(0x44b1f0bb); // sqdmulh z27.s, z5.s, z1.s[2] 8909 // vl128 state = 0xfc822233 8910 __ dci(0x4439f0bf); // sqdmulh z31.h, z5.h, z1.h[3] 8911 // vl128 state = 0x322d49df 8912 __ dci(0x4433f0be); // sqdmulh z30.h, z5.h, z3.h[2] 8913 // vl128 state = 0xbf6733d2 8914 __ dci(0x4433f0d6); // sqdmulh z22.h, z6.h, z3.h[2] 8915 // vl128 state = 0x99f11483 8916 __ dci(0x4437f2d7); // sqdmulh z23.h, z22.h, z7.h[2] 8917 // vl128 state = 0x9c146ede 8918 __ dci(0x4426f2d6); // sqdmulh z22.h, z22.h, z6.h[0] 8919 // vl128 state = 0xc089284f 8920 __ dci(0x44a6f0de); // sqdmulh z30.s, z6.s, z6.s[0] 8921 // vl128 state = 0xe962a269 8922 __ dci(0x44a4f04e); // sqdmulh z14.s, z2.s, z4.s[0] 8923 // vl128 state = 0xaea2f35e 8924 } 8925 8926 uint32_t state; 8927 ComputeMachineStateHash(&masm, &state); 8928 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 8929 __ Ldr(w0, MemOperand(x0)); 8930 8931 END(); 8932 if (CAN_RUN()) { 8933 RUN(); 8934 uint32_t expected_hashes[] = { 8935 0xaea2f35e, 8936 0xb4e17c50, 8937 0x97dfb966, 8938 0x070d3c78, 8939 0x5b2f880d, 8940 0x8e643be0, 8941 0x4d7f006b, 8942 0xfbd08185, 8943 0x4960a97d, 8944 0x1e85903f, 8945 0x443b62e4, 8946 0xf196453a, 8947 0x50dae6ef, 8948 0x0e4bb245, 8949 0x69d661ab, 8950 0x7d6fb839, 8951 }; 8952 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 8953 } 8954} 8955 8956TEST_SVE(sve2_extract) { 8957 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, 8958 CPUFeatures::kSVE2, 8959 CPUFeatures::kNEON, 8960 CPUFeatures::kCRC32); 8961 START(); 8962 8963 SetInitialMachineState(&masm); 8964 // state = 0xe2bd2480 8965 8966 { 8967 ExactAssemblyScope scope(&masm, 60 * kInstructionSize); 8968 __ dci(0x056a1008); // ext z8.b, {z0.b, z1.b}, #84 8969 // vl128 state = 0x06ae6d5d 8970 __ dci(0x05601418); // ext z24.b, {z0.b, z1.b}, #5 8971 // vl128 state = 0x3b73c922 8972 __ dci(0x05601708); // ext z8.b, {z24.b, z25.b}, #5 8973 // vl128 state = 0xc3526a3d 8974 __ dci(0x05601d0c); // ext z12.b, {z8.b, z9.b}, #7 8975 // vl128 state = 0xbde17731 8976 __ dci(0x05600c1c); // ext z28.b, {z0.b, z1.b}, #3 8977 // vl128 state = 0x9ac72141 8978 __ dci(0x05600c58); // ext z24.b, {z2.b, z3.b}, #3 8979 // vl128 state = 0xccecefc0 8980 __ dci(0x05600410); // ext z16.b, {z0.b, z1.b}, #1 8981 // vl128 state = 0xe49d5f89 8982 __ dci(0x05600438); // ext z24.b, {z1.b, z2.b}, #1 8983 // vl128 state = 0x9967df9d 8984 __ dci(0x0560067a); // ext z26.b, {z19.b, z20.b}, #1 8985 // vl128 state = 0x110a8b46 8986 __ dci(0x05601478); // ext z24.b, {z3.b, z4.b}, #5 8987 // vl128 state = 0x558f95f2 8988 __ dci(0x0560117c); // ext z28.b, {z11.b, z12.b}, #4 8989 // vl128 state = 0x18d0f048 8990 __ dci(0x0560157e); // ext z30.b, {z11.b, z12.b}, #5 8991 // vl128 state = 0x1719547f 8992 __ dci(0x05601c7a); // ext z26.b, {z3.b, z4.b}, #7 8993 // vl128 state = 0x600cfa8a 8994 __ dci(0x0560187e); // ext z30.b, {z3.b, z4.b}, #6 8995 // vl128 state = 0xc93e431e 8996 __ dci(0x05601876); // ext z22.b, {z3.b, z4.b}, #6 8997 // vl128 state = 0x5be7af00 8998 __ dci(0x05601c26); // ext z6.b, {z1.b, z2.b}, #7 8999 // vl128 state = 0xd3d69d02 9000 __ dci(0x05601c2e); // ext z14.b, {z1.b, z2.b}, #7 9001 // vl128 state = 0x1d88c27b 9002 __ dci(0x05601d3e); // ext z30.b, {z9.b, z10.b}, #7 9003 // vl128 state = 0x56f91523 9004 __ dci(0x05601dae); // ext z14.b, {z13.b, z14.b}, #7 9005 // vl128 state = 0xbc175582 9006 __ dci(0x056015ef); // ext z15.b, {z15.b, z16.b}, #5 9007 // vl128 state = 0x9289a9ba 9008 __ dci(0x0560157f); // ext z31.b, {z11.b, z12.b}, #5 9009 // vl128 state = 0x46be3725 9010 __ dci(0x0560157e); // ext z30.b, {z11.b, z12.b}, #5 9011 // vl128 state = 0xa4fd59e9 9012 __ dci(0x0560156e); // ext z14.b, {z11.b, z12.b}, #5 9013 // vl128 state = 0x88b9ba85 9014 __ dci(0x05601566); // ext z6.b, {z11.b, z12.b}, #5 9015 // vl128 state = 0x7f3b2a36 9016 __ dci(0x056017e4); // ext z4.b, {z31.b, z0.b}, #5 9017 // vl128 state = 0xa71b8fa9 9018 __ dci(0x05601f74); // ext z20.b, {z27.b, z28.b}, #7 9019 // vl128 state = 0x89dcdeac 9020 __ dci(0x05601f44); // ext z4.b, {z26.b, z27.b}, #7 9021 // vl128 state = 0xa877313f 9022 __ dci(0x05601e45); // ext z5.b, {z18.b, z19.b}, #7 9023 // vl128 state = 0x6181834a 9024 __ dci(0x05601255); // ext z21.b, {z18.b, z19.b}, #4 9025 // vl128 state = 0x7c3595cd 9026 __ dci(0x05701a51); // ext z17.b, {z18.b, z19.b}, #134 9027 // vl128 state = 0x10fdfe4d 9028 __ dci(0x05701ad3); // ext z19.b, {z22.b, z23.b}, #134 9029 // vl128 state = 0x08e923c5 9030 __ dci(0x05701ad1); // ext z17.b, {z22.b, z23.b}, #134 9031 // vl128 state = 0xefb2c9e9 9032 __ dci(0x05701b41); // ext z1.b, {z26.b, z27.b}, #134 9033 // vl128 state = 0xd5dccda9 9034 __ dci(0x05701b40); // ext z0.b, {z26.b, z27.b}, #134 9035 // vl128 state = 0xd424c039 9036 __ dci(0x05701bd0); // ext z16.b, {z30.b, z31.b}, #134 9037 // vl128 state = 0xd914c077 9038 __ dci(0x057013d8); // ext z24.b, {z30.b, z31.b}, #132 9039 // vl128 state = 0x32459b3a 9040 __ dci(0x05701259); // ext z25.b, {z18.b, z19.b}, #132 9041 // vl128 state = 0x422ed7bf 9042 __ dci(0x0570125d); // ext z29.b, {z18.b, z19.b}, #132 9043 // vl128 state = 0x6bfc46ef 9044 __ dci(0x05700215); // ext z21.b, {z16.b, z17.b}, #128 9045 // vl128 state = 0xc53b85ed 9046 __ dci(0x0560021d); // ext z29.b, {z16.b, z17.b}, #0 9047 // vl128 state = 0xd391e5ec 9048 __ dci(0x0570121c); // ext z28.b, {z16.b, z17.b}, #132 9049 // vl128 state = 0x7990c1d7 9050 __ dci(0x0570030c); // ext z12.b, {z24.b, z25.b}, #128 9051 // vl128 state = 0xca0d3db8 9052 __ dci(0x05700b88); // ext z8.b, {z28.b, z29.b}, #130 9053 // vl128 state = 0xe5c71442 9054 __ dci(0x05600b0c); // ext z12.b, {z24.b, z25.b}, #2 9055 // vl128 state = 0x68510d62 9056 __ dci(0x05600f1c); // ext z28.b, {z24.b, z25.b}, #3 9057 // vl128 state = 0x77f9f046 9058 __ dci(0x05600e14); // ext z20.b, {z16.b, z17.b}, #3 9059 // vl128 state = 0x7068dedf 9060 __ dci(0x05600604); // ext z4.b, {z16.b, z17.b}, #1 9061 // vl128 state = 0x8b70c406 9062 __ dci(0x05600406); // ext z6.b, {z0.b, z1.b}, #1 9063 // vl128 state = 0x10e6b48c 9064 __ dci(0x05600056); // ext z22.b, {z2.b, z3.b}, #0 9065 // vl128 state = 0xe1294d7a 9066 __ dci(0x05600052); // ext z18.b, {z2.b, z3.b}, #0 9067 // vl128 state = 0x0762bbb0 9068 __ dci(0x056000d6); // ext z22.b, {z6.b, z7.b}, #0 9069 // vl128 state = 0x58be0ba4 9070 __ dci(0x057008de); // ext z30.b, {z6.b, z7.b}, #130 9071 // vl128 state = 0x8a2018e9 9072 __ dci(0x0570085a); // ext z26.b, {z2.b, z3.b}, #130 9073 // vl128 state = 0xb019b7e0 9074 __ dci(0x057009d2); // ext z18.b, {z14.b, z15.b}, #130 9075 // vl128 state = 0x9e6e14ed 9076 __ dci(0x057008fa); // ext z26.b, {z7.b, z8.b}, #130 9077 // vl128 state = 0x4cf64d22 9078 __ dci(0x057008f2); // ext z18.b, {z7.b, z8.b}, #130 9079 // vl128 state = 0x048c30f9 9080 __ dci(0x057002f3); // ext z19.b, {z23.b, z24.b}, #128 9081 // vl128 state = 0x2d7eb43b 9082 __ dci(0x057006a3); // ext z3.b, {z21.b, z22.b}, #129 9083 // vl128 state = 0xa37aeb5e 9084 __ dci(0x05700687); // ext z7.b, {z20.b, z21.b}, #129 9085 // vl128 state = 0xd8d7cdc7 9086 __ dci(0x056006b7); // ext z23.b, {z21.b, z22.b}, #1 9087 // vl128 state = 0x2480e1d4 9088 } 9089 9090 uint32_t state; 9091 ComputeMachineStateHash(&masm, &state); 9092 __ Mov(x0, reinterpret_cast<uint64_t>(&state)); 9093 __ Ldr(w0, MemOperand(x0)); 9094 9095 END(); 9096 if (CAN_RUN()) { 9097 RUN(); 9098 uint32_t expected_hashes[] = { 9099 0x2480e1d4, 9100 0x4dc42cc5, 9101 0x7ac24121, 9102 0x9eaf5c98, 9103 0x1b7b35dc, 9104 0x1b1035fc, 9105 0xe15f6899, 9106 0xaad14717, 9107 0x3327c3fc, 9108 0x7f349408, 9109 0x2d865b00, 9110 0x9819cd29, 9111 0x7f64cace, 9112 0x3751e2c1, 9113 0x7e60fc24, 9114 0xc6b308fc, 9115 }; 9116 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0); 9117 } 9118} 9119 9120} // namespace aarch64 9121} // namespace vixl 9122