1// Copyright 2017, VIXL authors 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are met: 6// 7// * Redistributions of source code must retain the above copyright notice, 8// this list of conditions and the following disclaimer. 9// * Redistributions in binary form must reproduce the above copyright notice, 10// this list of conditions and the following disclaimer in the documentation 11// and/or other materials provided with the distribution. 12// * Neither the name of ARM Limited nor the names of its contributors may be 13// used to endorse or promote products derived from this software without 14// specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27#include <cstdio> 28#include <cstring> 29#include <string> 30 31#include "test-runner.h" 32#include "test-utils.h" 33 34#include "aarch64/assembler-aarch64.h" 35#include "aarch64/instructions-aarch64.h" 36#include "aarch64/test-utils-aarch64.h" 37 38#define __ assm. 39#define TEST(name) TEST_(AARCH64_API_##name) 40 41namespace vixl { 42namespace aarch64 { 43 44class InstructionReporter : public DecoderVisitor { 45 public: 46 InstructionReporter() : DecoderVisitor(kNonConstVisitor) {} 47 48 void Visit(Metadata* metadata, const Instruction* instr) VIXL_OVERRIDE { 49 USE(instr); 50 instr_form_ = (*metadata)["form"]; 51 } 52 53 std::string MoveForm() { return instr_form_; } 54 55 private: 56 std::string instr_form_; 57}; 58 59static void CheckAndMaybeDisassembleMovprfxPairs(const CodeBuffer* buffer, 60 bool can_take_movprfx) { 61 const Instruction* pair = buffer->GetStartAddress<Instruction*>(); 62 const Instruction* end = buffer->GetEndAddress<Instruction*>(); 63 bool any_failures = false; 64 PrintDisassembler print_disasm(stdout); 65 Decoder decoder; 66 InstructionReporter reporter; 67 decoder.AppendVisitor(&reporter); 68 69 while (pair < end) { 70 const Instruction* movprfx = pair; 71 const Instruction* candidate = pair->GetNextInstruction(); 72 const Instruction* next_pair = candidate->GetNextInstruction(); 73 VIXL_ASSERT(candidate < end); 74 75 Instr inst = candidate->GetInstructionBits(); 76 decoder.Decode(reinterpret_cast<Instruction*>(&inst)); 77 std::string form = reporter.MoveForm(); 78 bool failed = 79 can_take_movprfx != candidate->CanTakeSVEMovprfx(form.c_str(), movprfx); 80 any_failures = any_failures || failed; 81 82 if (failed || Test::disassemble()) { 83 printf("----\n"); 84 if (failed) { 85 printf("# ERROR: Expected %sCanTakeSVEMovprfx(movprfx):\n", 86 can_take_movprfx ? "" : "!"); 87 } 88 print_disasm.DisassembleBuffer(pair, next_pair); 89 } 90 91 pair = next_pair; 92 } 93 // Abort only at the end, so we can see the individual failures. 94 VIXL_CHECK(!any_failures); 95} 96 97TEST(movprfx_negative_aliasing) { 98 // Test that CanTakeSVEMovprfx() checks that the movprfx destination does not 99 // alias an input to the prefixed instruction. 100 Assembler assm; 101 assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM); 102 { 103 // We have to use the Assembler directly to generate movprfx, so we need 104 // to manually reserve space for the code we're about to emit. 105 static const size_t kPairCount = 79; 106 CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 107 108 __ movprfx(z0.VnB(), p0.Merging(), z9.VnB()); 109 __ abs(z0.VnB(), p0.Merging(), z0.VnB()); 110 111 __ movprfx(z1, z17); 112 __ add(z1.VnH(), p2.Merging(), z1.VnH(), z1.VnH()); 113 114 __ movprfx(z12, z13); 115 __ and_(z12.VnD(), p5.Merging(), z12.VnD(), z12.VnD()); 116 117 __ movprfx(z2, z4); 118 __ asr(z2.VnS(), p2.Merging(), z2.VnS(), z2.VnS()); 119 120 __ movprfx(z10, z18); 121 __ asr(z10.VnH(), p2.Merging(), z10.VnH(), z10.VnD()); 122 123 __ movprfx(z17.VnD(), p5.Zeroing(), z20.VnD()); 124 __ asr(z17.VnD(), p5.Merging(), z17.VnD(), z17.VnD()); 125 126 __ movprfx(z22, z9); 127 __ asrr(z22.VnH(), p1.Merging(), z22.VnH(), z22.VnH()); 128 129 __ movprfx(z0.VnS(), p6.Zeroing(), z6.VnS()); 130 __ bic(z0.VnS(), p6.Merging(), z0.VnS(), z0.VnS()); 131 132 __ movprfx(z12, z16); 133 __ clasta(z12.VnD(), p5, z12.VnD(), z12.VnD()); 134 135 __ movprfx(z7, z15); 136 __ clastb(z7.VnS(), p7, z7.VnS(), z7.VnS()); 137 138 __ movprfx(z10, z29); 139 __ cls(z10.VnH(), p2.Merging(), z10.VnH()); 140 141 __ movprfx(z6, z13); 142 __ clz(z6.VnB(), p4.Merging(), z6.VnB()); 143 144 __ movprfx(z14.VnS(), p6.Zeroing(), z3.VnS()); 145 __ cnot(z14.VnS(), p6.Merging(), z14.VnS()); 146 147 __ movprfx(z5.VnD(), p6.Merging(), z4.VnD()); 148 __ cnt(z5.VnD(), p6.Merging(), z5.VnD()); 149 150 __ movprfx(z19.VnB(), p6.Zeroing(), z4.VnB()); 151 __ eor(z19.VnB(), p6.Merging(), z19.VnB(), z19.VnB()); 152 153 __ movprfx(z27, z2); 154 __ ext(z27.VnB(), z27.VnB(), z27.VnB(), 42); 155 156 __ movprfx(z4.VnS(), p1.Zeroing(), z22.VnS()); 157 __ lsl(z4.VnS(), p1.Merging(), z4.VnS(), z4.VnS()); 158 159 __ movprfx(z4, z5); 160 __ lsl(z4.VnB(), p5.Merging(), z4.VnB(), z4.VnD()); 161 162 __ movprfx(z11.VnD(), p4.Merging(), z29.VnD()); 163 __ lsl(z11.VnD(), p4.Merging(), z11.VnD(), z11.VnD()); 164 165 __ movprfx(z12.VnD(), p6.Merging(), z3.VnD()); 166 __ lslr(z12.VnD(), p6.Merging(), z12.VnD(), z12.VnD()); 167 168 __ movprfx(z7, z2); 169 __ lsr(z7.VnB(), p4.Merging(), z7.VnB(), z7.VnB()); 170 171 __ movprfx(z25.VnH(), p6.Merging(), z28.VnH()); 172 __ lsr(z25.VnH(), p6.Merging(), z25.VnH(), z25.VnD()); 173 174 __ movprfx(z14.VnD(), p6.Merging(), z6.VnD()); 175 __ lsr(z14.VnD(), p6.Merging(), z14.VnD(), z14.VnD()); 176 177 __ movprfx(z26.VnH(), p6.Zeroing(), z27.VnH()); 178 __ lsrr(z26.VnH(), p6.Merging(), z26.VnH(), z26.VnH()); 179 180 __ movprfx(z17.VnS(), p4.Zeroing(), z29.VnS()); 181 __ mad(z17.VnS(), p4.Merging(), z17.VnS(), z23.VnS()); 182 183 __ movprfx(z7, z17); 184 __ mad(z7.VnD(), p5.Merging(), z4.VnD(), z7.VnD()); 185 186 __ movprfx(z11, z7); 187 __ mla(z11.VnS(), p1.Merging(), z11.VnS(), z27.VnS()); 188 189 __ movprfx(z7, z5); 190 __ mla(z7.VnH(), p0.Merging(), z5.VnH(), z7.VnH()); 191 192 __ movprfx(z1.VnH(), p0.Merging(), z17.VnH()); 193 __ mls(z1.VnH(), p0.Merging(), z1.VnH(), z31.VnH()); 194 195 __ movprfx(z22.VnB(), p3.Merging(), z18.VnB()); 196 __ mls(z22.VnB(), p3.Merging(), z18.VnB(), z22.VnB()); 197 198 __ movprfx(z7.VnS(), p0.Merging(), z10.VnS()); 199 __ msb(z7.VnS(), p0.Merging(), z7.VnS(), z10.VnS()); 200 201 __ movprfx(z12, z6); 202 __ msb(z12.VnH(), p7.Merging(), z6.VnH(), z12.VnH()); 203 204 __ movprfx(z8.VnB(), p4.Merging(), z3.VnB()); 205 __ mul(z8.VnB(), p4.Merging(), z8.VnB(), z8.VnB()); 206 207 __ movprfx(z9, z26); 208 __ neg(z9.VnS(), p7.Merging(), z9.VnS()); 209 210 __ movprfx(z16, z8); 211 __ not_(z16.VnH(), p6.Merging(), z16.VnH()); 212 213 __ movprfx(z25.VnH(), p5.Zeroing(), z11.VnH()); 214 __ orr(z25.VnH(), p5.Merging(), z25.VnH(), z25.VnH()); 215 216 __ movprfx(z17.VnH(), p1.Merging(), z22.VnH()); 217 __ rbit(z17.VnH(), p1.Merging(), z17.VnH()); 218 219 __ movprfx(z11, z25); 220 __ revb(z11.VnD(), p6.Merging(), z11.VnD()); 221 222 __ movprfx(z13, z27); 223 __ revh(z13.VnS(), p2.Merging(), z13.VnS()); 224 225 __ movprfx(z30.VnD(), p6.Merging(), z20.VnD()); 226 __ revw(z30.VnD(), p6.Merging(), z30.VnD()); 227 228 __ movprfx(z2.VnD(), p2.Merging(), z21.VnD()); 229 __ sabd(z2.VnD(), p2.Merging(), z2.VnD(), z2.VnD()); 230 231 __ movprfx(z0, z7); 232 __ sdiv(z0.VnD(), p0.Merging(), z0.VnD(), z0.VnD()); 233 234 __ movprfx(z19, z28); 235 __ sdivr(z19.VnS(), p1.Merging(), z19.VnS(), z19.VnS()); 236 237 __ movprfx(z5, z18); 238 __ sdot(z5.VnS(), z18.VnB(), z5.VnB(), 1); 239 240 __ movprfx(z15, z11); 241 __ sdot(z15.VnD(), z2.VnH(), z15.VnH(), 1); 242 243 __ movprfx(z30, z13); 244 __ sdot(z30.VnD(), z30.VnH(), z13.VnH(), 1); 245 246 __ movprfx(z8, z9); 247 __ sdot(z8.VnS(), z8.VnB(), z9.VnB()); 248 249 __ movprfx(z23, z14); 250 __ sdot(z23.VnS(), z14.VnB(), z23.VnB()); 251 252 __ movprfx(z26, z5); 253 __ sdot(z26.VnS(), z26.VnB(), z5.VnB(), 1); 254 255 __ movprfx(z14, z15); 256 __ smax(z14.VnB(), p2.Merging(), z14.VnB(), z14.VnB()); 257 258 __ movprfx(z26.VnS(), p0.Merging(), z10.VnS()); 259 __ smin(z26.VnS(), p0.Merging(), z26.VnS(), z26.VnS()); 260 261 __ movprfx(z22, z18); 262 __ smulh(z22.VnB(), p2.Merging(), z22.VnB(), z22.VnB()); 263 264 __ movprfx(z8, z19); 265 __ splice(z8.VnD(), p2, z8.VnD(), z8.VnD()); 266 267 __ movprfx(z23.VnH(), p6.Zeroing(), z2.VnH()); 268 __ sub(z23.VnH(), p6.Merging(), z23.VnH(), z23.VnH()); 269 270 __ movprfx(z25.VnS(), p2.Merging(), z21.VnS()); 271 __ subr(z25.VnS(), p2.Merging(), z25.VnS(), z25.VnS()); 272 273 __ movprfx(z28, z31); 274 __ sxtb(z28.VnS(), p6.Merging(), z28.VnS()); 275 276 __ movprfx(z14.VnD(), p6.Merging(), z17.VnD()); 277 __ sxth(z14.VnD(), p6.Merging(), z14.VnD()); 278 279 __ movprfx(z21.VnD(), p0.Zeroing(), z28.VnD()); 280 __ sxtw(z21.VnD(), p0.Merging(), z21.VnD()); 281 282 __ movprfx(z25, z30); 283 __ uabd(z25.VnB(), p5.Merging(), z25.VnB(), z25.VnB()); 284 285 __ movprfx(z13.VnD(), p2.Merging(), z30.VnD()); 286 __ udiv(z13.VnD(), p2.Merging(), z13.VnD(), z13.VnD()); 287 288 __ movprfx(z19.VnD(), p4.Zeroing(), z6.VnD()); 289 __ udivr(z19.VnD(), p4.Merging(), z19.VnD(), z19.VnD()); 290 291 __ movprfx(z1, z20); 292 __ udot(z1.VnS(), z18.VnB(), z1.VnB(), 1); 293 294 __ movprfx(z8, z2); 295 __ udot(z8.VnD(), z2.VnH(), z8.VnH(), 1); 296 297 __ movprfx(z28, z10); 298 __ udot(z28.VnD(), z28.VnH(), z7.VnH(), 1); 299 300 __ movprfx(z21, z11); 301 __ udot(z21.VnD(), z21.VnH(), z11.VnH()); 302 303 __ movprfx(z1, z22); 304 __ udot(z1.VnD(), z10.VnH(), z1.VnH()); 305 306 __ movprfx(z8, z23); 307 __ udot(z8.VnS(), z8.VnB(), z0.VnB(), 1); 308 309 __ movprfx(z10.VnB(), p5.Zeroing(), z0.VnB()); 310 __ umax(z10.VnB(), p5.Merging(), z10.VnB(), z10.VnB()); 311 312 __ movprfx(z0.VnS(), p2.Zeroing(), z30.VnS()); 313 __ umin(z0.VnS(), p2.Merging(), z0.VnS(), z0.VnS()); 314 315 __ movprfx(z26.VnD(), p6.Zeroing(), z29.VnD()); 316 __ umulh(z26.VnD(), p6.Merging(), z26.VnD(), z26.VnD()); 317 318 __ movprfx(z23, z25); 319 __ uxtb(z23.VnS(), p7.Merging(), z23.VnS()); 320 321 __ movprfx(z14.VnS(), p3.Zeroing(), z5.VnS()); 322 __ uxth(z14.VnS(), p3.Merging(), z14.VnS()); 323 324 __ movprfx(z14, z5); 325 __ uxtw(z14.VnD(), p3.Merging(), z14.VnD()); 326 327 __ movprfx(z22, z5); 328 __ smmla(z22.VnS(), z22.VnB(), z0.VnB()); 329 330 __ movprfx(z1, z5); 331 __ ummla(z1.VnS(), z10.VnB(), z1.VnB()); 332 333 __ movprfx(z30, z5); 334 __ usmmla(z30.VnS(), z30.VnB(), z18.VnB()); 335 336 __ movprfx(z4, z5); 337 __ usdot(z4.VnS(), z3.VnB(), z4.VnB()); 338 339 __ movprfx(z10, z5); 340 __ usdot(z10.VnS(), z10.VnB(), z0.VnB(), 0); 341 342 __ movprfx(z1, z5); 343 __ sudot(z1.VnS(), z10.VnB(), z1.VnB(), 1); 344 } 345 assm.FinalizeCode(); 346 347 CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); 348} 349 350TEST(movprfx_negative_aliasing_fp) { 351 // Test that CanTakeSVEMovprfx() checks that the movprfx destination does not 352 // alias an input to the prefixed instruction. 353 Assembler assm; 354 assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, 355 CPUFeatures::kSVEF32MM, 356 CPUFeatures::kSVEF64MM); 357 { 358 // We have to use the Assembler directly to generate movprfx, so we need 359 // to manually reserve space for the code we're about to emit. 360 static const size_t kPairCount = 80; 361 CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 362 363 __ movprfx(z17.VnS(), p1.Zeroing(), z12.VnS()); 364 __ fabd(z17.VnS(), p1.Merging(), z17.VnS(), z17.VnS()); 365 366 __ movprfx(z13, z23); 367 __ fabs(z13.VnS(), p4.Merging(), z13.VnS()); 368 369 __ movprfx(z24.VnS(), p5.Merging(), z15.VnS()); 370 __ fadd(z24.VnS(), p5.Merging(), z24.VnS(), z24.VnS()); 371 372 __ movprfx(z28.VnD(), p5.Zeroing(), z14.VnD()); 373 __ fcadd(z28.VnD(), p5.Merging(), z28.VnD(), z28.VnD(), 90); 374 375 __ movprfx(z5, z0); 376 __ fcmla(z5.VnH(), z0.VnH(), z5.VnH(), 2, 180); 377 378 __ movprfx(z10, z4); 379 __ fcmla(z10.VnS(), z8.VnS(), z10.VnS(), 1, 270); 380 381 __ movprfx(z12, z26); 382 __ fcmla(z12.VnH(), z12.VnH(), z3.VnH(), 2, 180); 383 384 __ movprfx(z8, z1); 385 __ fcmla(z8.VnS(), z8.VnS(), z1.VnS(), 1, 270); 386 387 __ movprfx(z16.VnD(), p0.Merging(), z13.VnD()); 388 __ fcvt(z16.VnD(), p0.Merging(), z16.VnH()); 389 390 __ movprfx(z12.VnD(), p7.Zeroing(), z13.VnD()); 391 __ fcvt(z12.VnD(), p7.Merging(), z12.VnS()); 392 393 __ movprfx(z14, z26); 394 __ fcvt(z14.VnS(), p5.Merging(), z14.VnD()); 395 396 __ movprfx(z26, z2); 397 __ fcvt(z26.VnH(), p7.Merging(), z26.VnD()); 398 399 __ movprfx(z25.VnD(), p2.Merging(), z13.VnD()); 400 __ fcvtzs(z25.VnD(), p2.Merging(), z25.VnH()); 401 402 __ movprfx(z31, z2); 403 __ fcvtzs(z31.VnH(), p7.Merging(), z31.VnH()); 404 405 __ movprfx(z21.VnD(), p1.Merging(), z7.VnD()); 406 __ fcvtzs(z21.VnD(), p1.Merging(), z21.VnS()); 407 408 __ movprfx(z5, z17); 409 __ fcvtzs(z5.VnS(), p5.Merging(), z5.VnD()); 410 411 __ movprfx(z19.VnD(), p1.Zeroing(), z16.VnD()); 412 __ fcvtzu(z19.VnD(), p1.Merging(), z19.VnH()); 413 414 __ movprfx(z2.VnH(), p7.Zeroing(), z28.VnH()); 415 __ fcvtzu(z2.VnH(), p7.Merging(), z2.VnH()); 416 417 __ movprfx(z21.VnD(), p7.Zeroing(), z27.VnD()); 418 __ fcvtzu(z21.VnD(), p7.Merging(), z21.VnS()); 419 420 __ movprfx(z22.VnD(), p4.Zeroing(), z8.VnD()); 421 __ fcvtzu(z22.VnS(), p4.Merging(), z22.VnD()); 422 423 __ movprfx(z0.VnS(), p5.Merging(), z5.VnS()); 424 __ fdiv(z0.VnS(), p5.Merging(), z0.VnS(), z0.VnS()); 425 426 __ movprfx(z12, z24); 427 __ fdivr(z12.VnD(), p7.Merging(), z12.VnD(), z12.VnD()); 428 429 __ movprfx(z14.VnD(), p6.Zeroing(), z21.VnD()); 430 __ fmad(z14.VnD(), p6.Merging(), z14.VnD(), z3.VnD()); 431 432 __ movprfx(z2.VnS(), p5.Zeroing(), z10.VnS()); 433 __ fmad(z2.VnS(), p5.Merging(), z14.VnS(), z2.VnS()); 434 435 __ movprfx(z24, z5); 436 __ fmax(z24.VnS(), p1.Merging(), z24.VnS(), z24.VnS()); 437 438 __ movprfx(z15.VnD(), p2.Merging(), z26.VnD()); 439 __ fmaxnm(z15.VnD(), p2.Merging(), z15.VnD(), z15.VnD()); 440 441 __ movprfx(z20, z22); 442 __ fmin(z20.VnH(), p0.Merging(), z20.VnH(), z20.VnH()); 443 444 __ movprfx(z24.VnS(), p6.Zeroing(), z30.VnS()); 445 __ fminnm(z24.VnS(), p6.Merging(), z24.VnS(), z24.VnS()); 446 447 __ movprfx(z4, z24); 448 __ fmla(z4.VnH(), z24.VnH(), z4.VnH(), 7); 449 450 __ movprfx(z4, z7); 451 __ fmla(z4.VnS(), z24.VnS(), z4.VnS(), 3); 452 453 __ movprfx(z5, z28); 454 __ fmla(z5.VnD(), z28.VnD(), z5.VnD(), 1); 455 456 __ movprfx(z24, z2); 457 __ fmla(z24.VnD(), z24.VnD(), z2.VnD(), 1); 458 459 __ movprfx(z7, z21); 460 __ fmla(z7.VnH(), p2.Merging(), z7.VnH(), z31.VnH()); 461 462 __ movprfx(z25.VnH(), p5.Zeroing(), z29.VnH()); 463 __ fmla(z25.VnH(), p5.Merging(), z29.VnH(), z25.VnH()); 464 465 __ movprfx(z31, z25); 466 __ fmla(z31.VnH(), z31.VnH(), z2.VnH(), 7); 467 468 __ movprfx(z15, z4); 469 __ fmla(z15.VnS(), z15.VnS(), z4.VnS(), 3); 470 471 __ movprfx(z7, z11); 472 __ fmls(z7.VnH(), z11.VnH(), z7.VnH(), 4); 473 474 __ movprfx(z3, z10); 475 __ fmls(z3.VnS(), z10.VnS(), z3.VnS(), 3); 476 477 __ movprfx(z5, z16); 478 __ fmls(z5.VnD(), z16.VnD(), z5.VnD(), 1); 479 480 __ movprfx(z31, z26); 481 __ fmls(z31.VnD(), z31.VnD(), z8.VnD(), 1); 482 483 __ movprfx(z5.VnH(), p3.Merging(), z2.VnH()); 484 __ fmls(z5.VnH(), p3.Merging(), z5.VnH(), z2.VnH()); 485 486 __ movprfx(z22.VnS(), p3.Zeroing(), z17.VnS()); 487 __ fmls(z22.VnS(), p3.Merging(), z21.VnS(), z22.VnS()); 488 489 __ movprfx(z17, z2); 490 __ fmls(z17.VnH(), z17.VnH(), z2.VnH(), 4); 491 492 __ movprfx(z28, z11); 493 __ fmls(z28.VnS(), z28.VnS(), z0.VnS(), 3); 494 495 __ movprfx(z15.VnD(), p1.Merging(), z31.VnD()); 496 __ fmsb(z15.VnD(), p1.Merging(), z15.VnD(), z31.VnD()); 497 498 __ movprfx(z21.VnD(), p0.Zeroing(), z5.VnD()); 499 __ fmsb(z21.VnD(), p0.Merging(), z19.VnD(), z21.VnD()); 500 501 __ movprfx(z0.VnH(), p3.Merging(), z31.VnH()); 502 __ fmul(z0.VnH(), p3.Merging(), z0.VnH(), z0.VnH()); 503 504 __ movprfx(z31.VnH(), p6.Merging(), z8.VnH()); 505 __ fmulx(z31.VnH(), p6.Merging(), z31.VnH(), z31.VnH()); 506 507 __ movprfx(z17.VnH(), p1.Zeroing(), z10.VnH()); 508 __ fneg(z17.VnH(), p1.Merging(), z17.VnH()); 509 510 __ movprfx(z22, z31); 511 __ fnmad(z22.VnH(), p1.Merging(), z22.VnH(), z23.VnH()); 512 513 __ movprfx(z14.VnD(), p0.Zeroing(), z26.VnD()); 514 __ fnmad(z14.VnD(), p0.Merging(), z2.VnD(), z14.VnD()); 515 516 __ movprfx(z13.VnH(), p6.Zeroing(), z29.VnH()); 517 __ fnmla(z13.VnH(), p6.Merging(), z13.VnH(), z26.VnH()); 518 519 __ movprfx(z19.VnH(), p7.Zeroing(), z25.VnH()); 520 __ fnmla(z19.VnH(), p7.Merging(), z25.VnH(), z19.VnH()); 521 522 __ movprfx(z27.VnH(), p5.Merging(), z24.VnH()); 523 __ fnmls(z27.VnH(), p5.Merging(), z27.VnH(), z24.VnH()); 524 525 __ movprfx(z6.VnH(), p6.Zeroing(), z21.VnH()); 526 __ fnmls(z6.VnH(), p6.Merging(), z21.VnH(), z6.VnH()); 527 528 __ movprfx(z7.VnS(), p3.Merging(), z23.VnS()); 529 __ fnmsb(z7.VnS(), p3.Merging(), z7.VnS(), z23.VnS()); 530 531 __ movprfx(z29.VnH(), p2.Zeroing(), z24.VnH()); 532 __ fnmsb(z29.VnH(), p2.Merging(), z24.VnH(), z29.VnH()); 533 534 __ movprfx(z7.VnH(), p6.Merging(), z23.VnH()); 535 __ frecpx(z7.VnH(), p6.Merging(), z7.VnH()); 536 537 __ movprfx(z17.VnS(), p5.Zeroing(), z2.VnS()); 538 __ frinta(z17.VnS(), p5.Merging(), z17.VnS()); 539 540 __ movprfx(z0.VnS(), p2.Zeroing(), z7.VnS()); 541 __ frinti(z0.VnS(), p2.Merging(), z0.VnS()); 542 543 __ movprfx(z8.VnH(), p3.Merging(), z20.VnH()); 544 __ frintm(z8.VnH(), p3.Merging(), z8.VnH()); 545 546 __ movprfx(z3.VnD(), p2.Zeroing(), z20.VnD()); 547 __ frintn(z3.VnD(), p2.Merging(), z3.VnD()); 548 549 __ movprfx(z11, z3); 550 __ frintp(z11.VnS(), p4.Merging(), z11.VnS()); 551 552 __ movprfx(z23, z29); 553 __ frintx(z23.VnD(), p4.Merging(), z23.VnD()); 554 555 __ movprfx(z4.VnH(), p4.Zeroing(), z14.VnH()); 556 __ frintz(z4.VnH(), p4.Merging(), z4.VnH()); 557 558 __ movprfx(z18.VnH(), p3.Zeroing(), z0.VnH()); 559 __ fscale(z18.VnH(), p3.Merging(), z18.VnH(), z18.VnH()); 560 561 __ movprfx(z2.VnS(), p6.Zeroing(), z4.VnS()); 562 __ fsqrt(z2.VnS(), p6.Merging(), z2.VnS()); 563 564 __ movprfx(z14.VnD(), p4.Zeroing(), z31.VnD()); 565 __ fsub(z14.VnD(), p4.Merging(), z14.VnD(), z14.VnD()); 566 567 __ movprfx(z31.VnH(), p2.Merging(), z6.VnH()); 568 __ fsubr(z31.VnH(), p2.Merging(), z31.VnH(), z31.VnH()); 569 570 __ movprfx(z4, z30); 571 __ ftmad(z4.VnH(), z4.VnH(), z4.VnH(), 2); 572 573 __ movprfx(z25.VnD(), p6.Zeroing(), z2.VnD()); 574 __ scvtf(z25.VnD(), p6.Merging(), z25.VnS()); 575 576 __ movprfx(z0.VnD(), p3.Merging(), z16.VnD()); 577 __ scvtf(z0.VnD(), p3.Merging(), z0.VnD()); 578 579 __ movprfx(z19, z23); 580 __ scvtf(z19.VnS(), p7.Merging(), z19.VnD()); 581 582 __ movprfx(z19, z4); 583 __ scvtf(z19.VnH(), p4.Merging(), z19.VnD()); 584 585 __ movprfx(z13.VnD(), p4.Zeroing(), z6.VnD()); 586 __ ucvtf(z13.VnD(), p4.Merging(), z13.VnS()); 587 588 __ movprfx(z6.VnH(), p0.Zeroing(), z14.VnH()); 589 __ ucvtf(z6.VnH(), p0.Merging(), z6.VnH()); 590 591 __ movprfx(z19.VnS(), p4.Merging(), z12.VnS()); 592 __ ucvtf(z19.VnH(), p4.Merging(), z19.VnS()); 593 594 __ movprfx(z0.VnD(), p5.Zeroing(), z12.VnD()); 595 __ ucvtf(z0.VnH(), p5.Merging(), z0.VnD()); 596 597 __ movprfx(z30, z5); 598 __ fmmla(z30.VnS(), z30.VnS(), z18.VnS()); 599 600 __ movprfx(z31, z5); 601 __ fmmla(z31.VnD(), z31.VnD(), z18.VnD()); 602 } 603 assm.FinalizeCode(); 604 605 CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); 606} 607 608TEST(movprfx_negative_instructions) { 609 Assembler assm; 610 assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE); 611 { 612 // We have to use the Assembler directly to generate movprfx, so we need 613 // to manually reserve space for the code we're about to emit. 614 static const size_t kPairCount = 13; 615 CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 616 617 __ movprfx(z26, z11); 618 __ add(z26.VnB(), z11.VnB(), z4.VnB()); 619 620 // The merging form can take movprfx, but the zeroing form cannot. 621 __ movprfx(z29.VnB(), p3.Zeroing(), z7.VnB()); 622 __ cpy(z29.VnB(), p3.Zeroing(), -42); 623 624 // Frecpx can take movprfx, but frecpe and frecps cannot. 625 __ movprfx(z13, z15); 626 __ frecpe(z13.VnD(), z26.VnD()); 627 628 __ movprfx(z19, z1); 629 __ frecps(z19.VnD(), z1.VnD(), z12.VnD()); 630 631 __ movprfx(z6, z12); 632 __ frsqrte(z6.VnS(), z12.VnS()); 633 634 __ movprfx(z29, z5); 635 __ frsqrts(z29.VnH(), z5.VnH(), z20.VnH()); 636 637 // Ftmad can take movprfx, but ftsmul and ftssel cannot. 638 __ movprfx(z1, z31); 639 __ ftsmul(z1.VnD(), z31.VnD(), z16.VnD()); 640 641 __ movprfx(z8, z27); 642 __ ftssel(z8.VnH(), z27.VnH(), z1.VnH()); 643 644 // This looks like a merging unary operation, but it's actually an alias of 645 // sel, which isn't destructive. 646 __ movprfx(z0, z18); 647 __ mov(z0.VnS(), p6.Merging(), z18.VnS()); 648 649 // The merging form can take movprfx, but the zeroing form cannot. 650 __ movprfx(z12.VnS(), p2.Merging(), z11.VnS()); 651 __ mov(z12.VnS(), p2.Zeroing(), -42); 652 653 __ movprfx(z13, z6); 654 __ movprfx(z13, z2); 655 656 // Movprfx can never prefix itself. 657 __ movprfx(z3.VnD(), p5.Zeroing(), z8.VnD()); 658 __ movprfx(z3.VnD(), p5.Merging(), z8.VnD()); 659 660 __ movprfx(z1.VnD(), p3.Zeroing(), z14.VnD()); 661 __ movprfx(z1.VnD(), p3.Zeroing(), z18.VnD()); 662 } 663 assm.FinalizeCode(); 664 665 CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); 666} 667 668TEST(movprfx_negative_lane_size) { 669 // Test that CanTakeSVEMovprfx() checks that the (predicated) movprfx lane 670 // size is compatible with the prefixed instruction. 671 Assembler assm; 672 assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE); 673 { 674 // We have to use the Assembler directly to generate movprfx, so we need 675 // to manually reserve space for the code we're about to emit. 676 static const size_t kPairCount = 63; 677 CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 678 679 __ movprfx(z0.VnH(), p2.Zeroing(), z17.VnH()); 680 __ abs(z0.VnS(), p2.Merging(), z17.VnS()); 681 682 __ movprfx(z10.VnD(), p0.Zeroing(), z4.VnD()); 683 __ add(z10.VnS(), p0.Merging(), z10.VnS(), z2.VnS()); 684 685 __ movprfx(z25.VnS(), p4.Zeroing(), z26.VnS()); 686 __ and_(z25.VnB(), p4.Merging(), z25.VnB(), z27.VnB()); 687 688 __ movprfx(z26.VnD(), p5.Merging(), z23.VnD()); 689 __ asr(z26.VnB(), p5.Merging(), z26.VnB(), 3); 690 691 __ movprfx(z25.VnS(), p7.Zeroing(), z14.VnS()); 692 __ asr(z25.VnH(), p7.Merging(), z25.VnH(), z14.VnH()); 693 694 __ movprfx(z12.VnS(), p7.Zeroing(), z23.VnS()); 695 __ asr(z12.VnH(), p7.Merging(), z12.VnH(), z23.VnD()); 696 697 __ movprfx(z3.VnH(), p4.Zeroing(), z18.VnH()); 698 __ asr(z3.VnD(), p4.Merging(), z3.VnD(), z15.VnD()); 699 700 __ movprfx(z29.VnH(), p4.Merging(), z31.VnH()); 701 __ asrd(z29.VnB(), p4.Merging(), z29.VnB(), 3); 702 703 __ movprfx(z31.VnH(), p5.Zeroing(), z14.VnH()); 704 __ asrr(z31.VnB(), p5.Merging(), z31.VnB(), z5.VnB()); 705 706 __ movprfx(z0.VnS(), p6.Zeroing(), z18.VnS()); 707 __ bic(z0.VnB(), p6.Merging(), z0.VnB(), z23.VnB()); 708 709 __ movprfx(z19.VnH(), p2.Zeroing(), z24.VnH()); 710 __ cls(z19.VnB(), p2.Merging(), z24.VnB()); 711 712 __ movprfx(z14.VnS(), p5.Zeroing(), z4.VnS()); 713 __ clz(z14.VnD(), p5.Merging(), z10.VnD()); 714 715 __ movprfx(z0.VnD(), p5.Merging(), z2.VnD()); 716 __ cnot(z0.VnH(), p5.Merging(), z2.VnH()); 717 718 __ movprfx(z0.VnB(), p3.Zeroing(), z19.VnB()); 719 __ cnt(z0.VnH(), p3.Merging(), z8.VnH()); 720 721 __ movprfx(z29.VnS(), p0.Merging(), z7.VnS()); 722 __ cpy(z29.VnD(), p0.Merging(), -42); 723 724 __ movprfx(z13.VnB(), p2.Merging(), z31.VnB()); 725 __ cpy(z13.VnS(), p2.Merging(), w13); 726 727 __ movprfx(z0.VnS(), p3.Merging(), z15.VnS()); 728 __ cpy(z0.VnH(), p3.Merging(), h0); 729 730 __ movprfx(z2.VnD(), p6.Zeroing(), z26.VnD()); 731 __ eor(z2.VnB(), p6.Merging(), z2.VnB(), z26.VnB()); 732 733 __ movprfx(z7.VnS(), p7.Zeroing(), z30.VnS()); 734 __ lsl(z7.VnD(), p7.Merging(), z7.VnD(), 3); 735 736 __ movprfx(z11.VnH(), p3.Merging(), z23.VnH()); 737 __ lsl(z11.VnB(), p3.Merging(), z11.VnB(), z21.VnB()); 738 739 __ movprfx(z31.VnS(), p7.Zeroing(), z21.VnS()); 740 __ lsl(z31.VnH(), p7.Merging(), z31.VnH(), z21.VnD()); 741 742 __ movprfx(z26.VnH(), p0.Merging(), z0.VnH()); 743 __ lsl(z26.VnD(), p0.Merging(), z26.VnD(), z24.VnD()); 744 745 __ movprfx(z1.VnS(), p2.Zeroing(), z6.VnS()); 746 __ lslr(z1.VnB(), p2.Merging(), z1.VnB(), z6.VnB()); 747 748 __ movprfx(z4.VnD(), p4.Zeroing(), z6.VnD()); 749 __ lsr(z4.VnH(), p4.Merging(), z4.VnH(), 3); 750 751 __ movprfx(z27.VnH(), p0.Zeroing(), z29.VnH()); 752 __ lsr(z27.VnS(), p0.Merging(), z27.VnS(), z29.VnS()); 753 754 __ movprfx(z5.VnD(), p2.Zeroing(), z16.VnD()); 755 __ lsr(z5.VnH(), p2.Merging(), z5.VnH(), z2.VnD()); 756 757 __ movprfx(z27.VnB(), p4.Zeroing(), z5.VnB()); 758 __ lsr(z27.VnD(), p4.Merging(), z27.VnD(), z5.VnD()); 759 760 __ movprfx(z27.VnS(), p3.Merging(), z13.VnS()); 761 __ lsrr(z27.VnD(), p3.Merging(), z27.VnD(), z13.VnD()); 762 763 __ movprfx(z30.VnS(), p2.Zeroing(), z14.VnS()); 764 __ mad(z30.VnB(), p2.Merging(), z20.VnB(), z14.VnB()); 765 766 __ movprfx(z14.VnB(), p6.Merging(), z11.VnB()); 767 __ mla(z14.VnD(), p6.Merging(), z28.VnD(), z11.VnD()); 768 769 __ movprfx(z28.VnH(), p2.Zeroing(), z22.VnH()); 770 __ mls(z28.VnS(), p2.Merging(), z3.VnS(), z22.VnS()); 771 772 // Aliases of cpy. 773 __ movprfx(z18.VnH(), p6.Zeroing(), z25.VnH()); 774 __ mov(z18.VnD(), p6.Merging(), -42); 775 776 __ movprfx(z22.VnD(), p2.Zeroing(), z6.VnD()); 777 __ mov(z22.VnS(), p2.Merging(), w22); 778 779 __ movprfx(z3.VnH(), p0.Zeroing(), z13.VnH()); 780 __ mov(z3.VnB(), p0.Merging(), b0); 781 782 __ movprfx(z31.VnS(), p7.Zeroing(), z12.VnS()); 783 __ msb(z31.VnH(), p7.Merging(), z14.VnH(), z12.VnH()); 784 785 __ movprfx(z16.VnS(), p7.Zeroing(), z6.VnS()); 786 __ mul(z16.VnB(), p7.Merging(), z16.VnB(), z30.VnB()); 787 788 __ movprfx(z17.VnD(), p7.Merging(), z1.VnD()); 789 __ neg(z17.VnB(), p7.Merging(), z1.VnB()); 790 791 __ movprfx(z31.VnH(), p4.Zeroing(), z12.VnH()); 792 __ not_(z31.VnB(), p4.Merging(), z12.VnB()); 793 794 __ movprfx(z9.VnH(), p3.Zeroing(), z23.VnH()); 795 __ orr(z9.VnS(), p3.Merging(), z9.VnS(), z13.VnS()); 796 797 __ movprfx(z25.VnD(), p2.Zeroing(), z21.VnD()); 798 __ rbit(z25.VnS(), p2.Merging(), z21.VnS()); 799 800 __ movprfx(z26.VnH(), p3.Merging(), z13.VnH()); 801 __ revb(z26.VnD(), p3.Merging(), z13.VnD()); 802 803 __ movprfx(z8.VnH(), p5.Merging(), z20.VnH()); 804 __ revh(z8.VnS(), p5.Merging(), z0.VnS()); 805 806 __ movprfx(z22.VnH(), p6.Merging(), z15.VnH()); 807 __ revw(z22.VnD(), p6.Merging(), z10.VnD()); 808 809 __ movprfx(z1.VnD(), p3.Merging(), z15.VnD()); 810 __ sabd(z1.VnB(), p3.Merging(), z1.VnB(), z15.VnB()); 811 812 __ movprfx(z25.VnD(), p1.Zeroing(), z30.VnD()); 813 __ sdiv(z25.VnS(), p1.Merging(), z25.VnS(), z30.VnS()); 814 815 __ movprfx(z19.VnS(), p3.Zeroing(), z11.VnS()); 816 __ sdivr(z19.VnD(), p3.Merging(), z19.VnD(), z24.VnD()); 817 818 __ movprfx(z12.VnH(), p2.Merging(), z2.VnH()); 819 __ smax(z12.VnS(), p2.Merging(), z12.VnS(), z24.VnS()); 820 821 __ movprfx(z3.VnD(), p1.Merging(), z15.VnD()); 822 __ smin(z3.VnS(), p1.Merging(), z3.VnS(), z20.VnS()); 823 824 __ movprfx(z13.VnS(), p5.Merging(), z22.VnS()); 825 __ smulh(z13.VnB(), p5.Merging(), z13.VnB(), z27.VnB()); 826 827 __ movprfx(z11.VnH(), p5.Zeroing(), z25.VnH()); 828 __ sub(z11.VnB(), p5.Merging(), z11.VnB(), z7.VnB()); 829 830 __ movprfx(z3.VnB(), p6.Merging(), z13.VnB()); 831 __ subr(z3.VnS(), p6.Merging(), z3.VnS(), z13.VnS()); 832 833 __ movprfx(z26.VnH(), p5.Merging(), z1.VnH()); 834 __ sxtb(z26.VnS(), p5.Merging(), z17.VnS()); 835 836 __ movprfx(z11.VnB(), p7.Zeroing(), z26.VnB()); 837 __ sxth(z11.VnS(), p7.Merging(), z26.VnS()); 838 839 __ movprfx(z1.VnS(), p2.Merging(), z21.VnS()); 840 __ sxtw(z1.VnD(), p2.Merging(), z21.VnD()); 841 842 __ movprfx(z4.VnS(), p6.Zeroing(), z6.VnS()); 843 __ uabd(z4.VnH(), p6.Merging(), z4.VnH(), z6.VnH()); 844 845 __ movprfx(z26.VnB(), p2.Zeroing(), z11.VnB()); 846 __ udiv(z26.VnD(), p2.Merging(), z26.VnD(), z11.VnD()); 847 848 __ movprfx(z19.VnB(), p5.Merging(), z6.VnB()); 849 __ udivr(z19.VnS(), p5.Merging(), z19.VnS(), z9.VnS()); 850 851 __ movprfx(z16.VnB(), p4.Merging(), z6.VnB()); 852 __ umax(z16.VnH(), p4.Merging(), z16.VnH(), z6.VnH()); 853 854 __ movprfx(z1.VnD(), p0.Zeroing(), z4.VnD()); 855 __ umin(z1.VnS(), p0.Merging(), z1.VnS(), z28.VnS()); 856 857 __ movprfx(z25.VnD(), p7.Merging(), z4.VnD()); 858 __ umulh(z25.VnB(), p7.Merging(), z25.VnB(), z16.VnB()); 859 860 __ movprfx(z29.VnB(), p4.Merging(), z2.VnB()); 861 __ uxtb(z29.VnS(), p4.Merging(), z31.VnS()); 862 863 __ movprfx(z27.VnH(), p5.Merging(), z21.VnH()); 864 __ uxth(z27.VnD(), p5.Merging(), z1.VnD()); 865 866 __ movprfx(z29.VnB(), p2.Merging(), z7.VnB()); 867 __ uxtw(z29.VnD(), p2.Merging(), z7.VnD()); 868 } 869 assm.FinalizeCode(); 870 871 CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); 872} 873 874TEST(movprfx_negative_lane_size_fp) { 875 // Test that CanTakeSVEMovprfx() checks that the (predicated) movprfx lane 876 // size is compatible with the prefixed instruction. 877 Assembler assm; 878 assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE); 879 { 880 // We have to use the Assembler directly to generate movprfx, so we need 881 // to manually reserve space for the code we're about to emit. 882 static const size_t kPairCount = 64; 883 CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 884 885 __ movprfx(z29.VnD(), p5.Zeroing(), z8.VnD()); 886 __ fabd(z29.VnS(), p5.Merging(), z29.VnS(), z26.VnS()); 887 888 __ movprfx(z9.VnB(), p0.Zeroing(), z1.VnB()); 889 __ fabs(z9.VnS(), p0.Merging(), z15.VnS()); 890 891 __ movprfx(z24.VnD(), p0.Zeroing(), z8.VnD()); 892 __ fadd(z24.VnH(), p0.Merging(), z24.VnH(), 0.5); 893 894 __ movprfx(z24.VnB(), p1.Zeroing(), z27.VnB()); 895 __ fadd(z24.VnH(), p1.Merging(), z24.VnH(), z27.VnH()); 896 897 __ movprfx(z14.VnH(), p7.Merging(), z12.VnH()); 898 __ fcadd(z14.VnD(), p7.Merging(), z14.VnD(), z12.VnD(), 90); 899 900 __ movprfx(z10.VnB(), p6.Merging(), z11.VnB()); 901 __ fcpy(z10.VnH(), p6.Merging(), 1.25); 902 903 __ movprfx(z12.VnB(), p6.Merging(), z18.VnB()); 904 __ fcvt(z12.VnD(), p6.Merging(), z18.VnH()); 905 906 __ movprfx(z18.VnH(), p7.Zeroing(), z2.VnH()); 907 __ fcvt(z18.VnD(), p7.Merging(), z0.VnS()); 908 909 __ movprfx(z3.VnH(), p5.Merging(), z14.VnH()); 910 __ fcvt(z3.VnS(), p5.Merging(), z21.VnD()); 911 912 __ movprfx(z15.VnH(), p1.Zeroing(), z12.VnH()); 913 __ fcvt(z15.VnH(), p1.Merging(), z12.VnD()); 914 915 __ movprfx(z3.VnH(), p2.Merging(), z22.VnH()); 916 __ fcvtzs(z3.VnD(), p2.Merging(), z7.VnH()); 917 918 __ movprfx(z17.VnS(), p3.Merging(), z14.VnS()); 919 __ fcvtzs(z17.VnD(), p3.Merging(), z14.VnD()); 920 921 __ movprfx(z2.VnH(), p1.Zeroing(), z16.VnH()); 922 __ fcvtzs(z2.VnS(), p1.Merging(), z31.VnH()); 923 924 __ movprfx(z13.VnB(), p2.Merging(), z9.VnB()); 925 __ fcvtzs(z13.VnS(), p2.Merging(), z23.VnD()); 926 927 __ movprfx(z19.VnB(), p1.Merging(), z4.VnB()); 928 __ fcvtzu(z19.VnD(), p1.Merging(), z14.VnH()); 929 930 __ movprfx(z29.VnS(), p2.Merging(), z19.VnS()); 931 __ fcvtzu(z29.VnD(), p2.Merging(), z19.VnD()); 932 933 __ movprfx(z21.VnS(), p4.Zeroing(), z17.VnS()); 934 __ fcvtzu(z21.VnD(), p4.Merging(), z17.VnS()); 935 936 __ movprfx(z19.VnH(), p4.Zeroing(), z30.VnH()); 937 __ fcvtzu(z19.VnS(), p4.Merging(), z16.VnD()); 938 939 __ movprfx(z10.VnS(), p7.Zeroing(), z27.VnS()); 940 __ fdiv(z10.VnH(), p7.Merging(), z10.VnH(), z27.VnH()); 941 942 __ movprfx(z7.VnD(), p7.Zeroing(), z17.VnD()); 943 __ fdivr(z7.VnH(), p7.Merging(), z7.VnH(), z28.VnH()); 944 945 __ movprfx(z22.VnB(), p0.Merging(), z27.VnB()); 946 __ fmad(z22.VnH(), p0.Merging(), z27.VnH(), z15.VnH()); 947 948 __ movprfx(z14.VnD(), p1.Zeroing(), z11.VnD()); 949 __ fmax(z14.VnS(), p1.Merging(), z14.VnS(), 0.0); 950 951 __ movprfx(z27.VnB(), p5.Merging(), z14.VnB()); 952 __ fmax(z27.VnD(), p5.Merging(), z27.VnD(), z14.VnD()); 953 954 __ movprfx(z31.VnH(), p7.Merging(), z24.VnH()); 955 __ fmaxnm(z31.VnD(), p7.Merging(), z31.VnD(), 0.0); 956 957 __ movprfx(z11.VnD(), p7.Zeroing(), z25.VnD()); 958 __ fmaxnm(z11.VnS(), p7.Merging(), z11.VnS(), z28.VnS()); 959 960 __ movprfx(z31.VnD(), p6.Merging(), z19.VnD()); 961 __ fmin(z31.VnH(), p6.Merging(), z31.VnH(), 0.0); 962 963 __ movprfx(z20.VnS(), p3.Zeroing(), z15.VnS()); 964 __ fmin(z20.VnH(), p3.Merging(), z20.VnH(), z8.VnH()); 965 966 __ movprfx(z6.VnS(), p0.Merging(), z30.VnS()); 967 __ fminnm(z6.VnH(), p0.Merging(), z6.VnH(), 0.0); 968 969 __ movprfx(z1.VnH(), p1.Zeroing(), z14.VnH()); 970 __ fminnm(z1.VnS(), p1.Merging(), z1.VnS(), z14.VnS()); 971 972 __ movprfx(z13.VnB(), p3.Zeroing(), z21.VnB()); 973 __ fmla(z13.VnD(), p3.Merging(), z12.VnD(), z21.VnD()); 974 975 __ movprfx(z15.VnS(), p1.Zeroing(), z20.VnS()); 976 __ fmls(z15.VnH(), p1.Merging(), z28.VnH(), z20.VnH()); 977 978 __ movprfx(z19.VnD(), p3.Zeroing(), z31.VnD()); 979 __ fmov(z19.VnH(), p3.Merging(), 0.0); 980 981 __ movprfx(z16.VnS(), p7.Merging(), z30.VnS()); 982 __ fmov(z16.VnH(), p7.Merging(), 2.5); 983 984 __ movprfx(z21.VnB(), p1.Merging(), z28.VnB()); 985 __ fmsb(z21.VnH(), p1.Merging(), z30.VnH(), z28.VnH()); 986 987 __ movprfx(z21.VnS(), p1.Zeroing(), z19.VnS()); 988 __ fmul(z21.VnH(), p1.Merging(), z21.VnH(), 2.0); 989 990 __ movprfx(z28.VnB(), p7.Zeroing(), z8.VnB()); 991 __ fmul(z28.VnS(), p7.Merging(), z28.VnS(), z26.VnS()); 992 993 __ movprfx(z2.VnB(), p4.Merging(), z31.VnB()); 994 __ fmulx(z2.VnH(), p4.Merging(), z2.VnH(), z31.VnH()); 995 996 __ movprfx(z6.VnB(), p2.Zeroing(), z0.VnB()); 997 __ fneg(z6.VnS(), p2.Merging(), z28.VnS()); 998 999 __ movprfx(z26.VnB(), p0.Zeroing(), z21.VnB()); 1000 __ fnmad(z26.VnH(), p0.Merging(), z21.VnH(), z18.VnH()); 1001 1002 __ movprfx(z15.VnB(), p1.Zeroing(), z26.VnB()); 1003 __ fnmla(z15.VnH(), p1.Merging(), z26.VnH(), z18.VnH()); 1004 1005 __ movprfx(z16.VnS(), p0.Merging(), z1.VnS()); 1006 __ fnmls(z16.VnD(), p0.Merging(), z1.VnD(), z13.VnD()); 1007 1008 __ movprfx(z4.VnH(), p0.Zeroing(), z16.VnH()); 1009 __ fnmsb(z4.VnS(), p0.Merging(), z30.VnS(), z3.VnS()); 1010 1011 // Note that frecpe and frecps _cannot_ take movprfx. 1012 __ movprfx(z9.VnH(), p0.Zeroing(), z21.VnH()); 1013 __ frecpx(z9.VnS(), p0.Merging(), z14.VnS()); 1014 1015 __ movprfx(z6.VnH(), p2.Zeroing(), z28.VnH()); 1016 __ frinta(z6.VnD(), p2.Merging(), z28.VnD()); 1017 1018 __ movprfx(z12.VnS(), p4.Zeroing(), z7.VnS()); 1019 __ frinti(z12.VnH(), p4.Merging(), z7.VnH()); 1020 1021 __ movprfx(z6.VnB(), p5.Merging(), z20.VnB()); 1022 __ frintm(z6.VnD(), p5.Merging(), z20.VnD()); 1023 1024 __ movprfx(z7.VnB(), p6.Merging(), z19.VnB()); 1025 __ frintn(z7.VnH(), p6.Merging(), z11.VnH()); 1026 1027 __ movprfx(z12.VnD(), p2.Merging(), z31.VnD()); 1028 __ frintp(z12.VnS(), p2.Merging(), z31.VnS()); 1029 1030 __ movprfx(z1.VnS(), p5.Merging(), z10.VnS()); 1031 __ frintx(z1.VnD(), p5.Merging(), z0.VnD()); 1032 1033 __ movprfx(z6.VnH(), p0.Merging(), z12.VnH()); 1034 __ frintz(z6.VnS(), p0.Merging(), z7.VnS()); 1035 1036 __ movprfx(z8.VnH(), p2.Merging(), z6.VnH()); 1037 __ fscale(z8.VnD(), p2.Merging(), z8.VnD(), z6.VnD()); 1038 1039 __ movprfx(z20.VnH(), p2.Zeroing(), z2.VnH()); 1040 __ fsqrt(z20.VnD(), p2.Merging(), z15.VnD()); 1041 1042 __ movprfx(z28.VnS(), p6.Zeroing(), z19.VnS()); 1043 __ fsub(z28.VnD(), p6.Merging(), z28.VnD(), 1.0); 1044 1045 __ movprfx(z6.VnB(), p0.Zeroing(), z12.VnB()); 1046 __ fsub(z6.VnD(), p0.Merging(), z6.VnD(), z20.VnD()); 1047 1048 __ movprfx(z6.VnS(), p7.Zeroing(), z11.VnS()); 1049 __ fsubr(z6.VnH(), p7.Merging(), z6.VnH(), 1.0); 1050 1051 __ movprfx(z28.VnB(), p3.Merging(), z10.VnB()); 1052 __ fsubr(z28.VnS(), p3.Merging(), z28.VnS(), z9.VnS()); 1053 1054 __ movprfx(z22.VnB(), p3.Zeroing(), z14.VnB()); 1055 __ scvtf(z22.VnD(), p3.Merging(), z24.VnS()); 1056 1057 __ movprfx(z20.VnS(), p2.Merging(), z9.VnS()); 1058 __ scvtf(z20.VnH(), p2.Merging(), z9.VnH()); 1059 1060 __ movprfx(z19.VnH(), p1.Merging(), z21.VnH()); 1061 __ scvtf(z19.VnS(), p1.Merging(), z6.VnD()); 1062 1063 __ movprfx(z31.VnS(), p3.Merging(), z22.VnS()); 1064 __ scvtf(z31.VnH(), p3.Merging(), z22.VnD()); 1065 1066 __ movprfx(z8.VnS(), p3.Merging(), z3.VnS()); 1067 __ ucvtf(z8.VnD(), p3.Merging(), z1.VnS()); 1068 1069 __ movprfx(z0.VnB(), p0.Merging(), z23.VnB()); 1070 __ ucvtf(z0.VnH(), p0.Merging(), z12.VnH()); 1071 1072 __ movprfx(z8.VnH(), p3.Zeroing(), z4.VnH()); 1073 __ ucvtf(z8.VnH(), p3.Merging(), z4.VnS()); 1074 1075 __ movprfx(z20.VnH(), p2.Zeroing(), z10.VnH()); 1076 __ ucvtf(z20.VnH(), p2.Merging(), z11.VnD()); 1077 } 1078 assm.FinalizeCode(); 1079 1080 CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); 1081} 1082 1083TEST(movprfx_negative_predication) { 1084 // Test that CanTakeSVEMovprfx() is false when a predicated movprfx appears 1085 // before an unpredicated instruction. 1086 Assembler assm; 1087 assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM); 1088 { 1089 // We have to use the Assembler directly to generate movprfx, so we need 1090 // to manually reserve space for the code we're about to emit. 1091 static const size_t kPairCount = 60; 1092 CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 1093 1094 __ movprfx(z27.VnS(), p1.Zeroing(), z12.VnS()); 1095 __ add(z27.VnS(), z27.VnS(), 42); 1096 1097 __ movprfx(z31.VnS(), p6.Zeroing(), z1.VnS()); 1098 __ and_(z31.VnS(), z31.VnS(), 4); 1099 1100 __ movprfx(z27.VnS(), p5.Merging(), z24.VnS()); 1101 __ bic(z27.VnS(), z27.VnS(), 4); 1102 1103 __ movprfx(z6.VnH(), p7.Merging(), z30.VnH()); 1104 __ clasta(z6.VnH(), p7, z6.VnH(), z14.VnH()); 1105 1106 __ movprfx(z11.VnB(), p6.Merging(), z5.VnB()); 1107 __ clastb(z11.VnB(), p6, z11.VnB(), z29.VnB()); 1108 1109 __ movprfx(z5.VnD(), p0.Merging(), z1.VnD()); 1110 __ decd(z5.VnD(), SVE_MUL3); 1111 1112 __ movprfx(z11.VnH(), p7.Zeroing(), z28.VnH()); 1113 __ dech(z11.VnH(), SVE_VL2); 1114 1115 __ movprfx(z14.VnS(), p5.Zeroing(), z6.VnS()); 1116 __ decp(z14.VnS(), p5); 1117 1118 __ movprfx(z6.VnS(), p5.Merging(), z10.VnS()); 1119 __ decw(z6.VnS(), SVE_ALL); 1120 1121 __ movprfx(z27.VnH(), p7.Zeroing(), z9.VnH()); 1122 __ eon(z27.VnH(), z27.VnH(), 4); 1123 1124 __ movprfx(z3.VnS(), p3.Zeroing(), z2.VnS()); 1125 __ eor(z3.VnS(), z3.VnS(), 4); 1126 1127 __ movprfx(z30.VnB(), p2.Zeroing(), z25.VnB()); 1128 __ ext(z30.VnB(), z30.VnB(), z25.VnB(), 42); 1129 1130 __ movprfx(z22.VnD(), p0.Merging(), z0.VnD()); 1131 __ incd(z22.VnD(), SVE_MUL3); 1132 1133 __ movprfx(z7.VnH(), p3.Merging(), z3.VnH()); 1134 __ inch(z7.VnH(), SVE_VL2); 1135 1136 __ movprfx(z9.VnD(), p1.Zeroing(), z28.VnD()); 1137 __ incp(z9.VnD(), p1); 1138 1139 __ movprfx(z30.VnS(), p3.Merging(), z4.VnS()); 1140 __ incw(z30.VnS(), SVE_ALL); 1141 1142 __ movprfx(z30.VnB(), p7.Zeroing(), z21.VnB()); 1143 __ insr(z30.VnB(), w30); 1144 1145 __ movprfx(z2.VnB(), p4.Zeroing(), z26.VnB()); 1146 __ insr(z2.VnB(), b0); 1147 1148 __ movprfx(z27.VnS(), p5.Zeroing(), z5.VnS()); 1149 __ mul(z27.VnS(), z27.VnS(), 42); 1150 1151 __ movprfx(z5.VnS(), p0.Merging(), z26.VnS()); 1152 __ orn(z5.VnS(), z5.VnS(), 4); 1153 1154 __ movprfx(z5.VnS(), p0.Merging(), z26.VnS()); 1155 __ orn(z5.VnS(), z5.VnS(), 4); 1156 1157 __ movprfx(z16.VnD(), p1.Merging(), z13.VnD()); 1158 __ sdot(z16.VnD(), z11.VnH(), z7.VnH(), 1); 1159 1160 __ movprfx(z27.VnD(), p5.Merging(), z18.VnD()); 1161 __ sdot(z27.VnD(), z18.VnH(), z0.VnH()); 1162 1163 __ movprfx(z20.VnS(), p6.Merging(), z1.VnS()); 1164 __ sdot(z20.VnS(), z10.VnB(), z1.VnB(), 1); 1165 1166 __ movprfx(z19.VnD(), p0.Zeroing(), z7.VnD()); 1167 __ smax(z19.VnD(), z19.VnD(), 42); 1168 1169 __ movprfx(z15.VnD(), p1.Zeroing(), z7.VnD()); 1170 __ smin(z15.VnD(), z15.VnD(), 42); 1171 1172 __ movprfx(z15.VnB(), p5.Merging(), z3.VnB()); 1173 __ splice(z15.VnB(), p5, z15.VnB(), z3.VnB()); 1174 1175 __ movprfx(z5.VnB(), p6.Zeroing(), z4.VnB()); 1176 __ sqadd(z5.VnB(), z5.VnB(), 42); 1177 1178 __ movprfx(z16.VnD(), p0.Zeroing(), z18.VnD()); 1179 __ sqdecd(z16.VnD(), SVE_MUL3); 1180 1181 __ movprfx(z7.VnH(), p3.Merging(), z28.VnH()); 1182 __ sqdech(z7.VnH(), SVE_VL2); 1183 1184 __ movprfx(z7.VnS(), p2.Merging(), z13.VnS()); 1185 __ sqdecp(z7.VnS(), p2); 1186 1187 __ movprfx(z22.VnS(), p7.Zeroing(), z20.VnS()); 1188 __ sqdecw(z22.VnS(), SVE_ALL); 1189 1190 __ movprfx(z26.VnD(), p1.Zeroing(), z0.VnD()); 1191 __ sqincd(z26.VnD(), SVE_MUL3); 1192 1193 __ movprfx(z15.VnH(), p7.Zeroing(), z27.VnH()); 1194 __ sqinch(z15.VnH(), SVE_VL2); 1195 1196 __ movprfx(z4.VnD(), p7.Merging(), z13.VnD()); 1197 __ sqincp(z4.VnD(), p7); 1198 1199 __ movprfx(z29.VnS(), p6.Merging(), z14.VnS()); 1200 __ sqincw(z29.VnS(), SVE_ALL); 1201 1202 __ movprfx(z17.VnB(), p1.Merging(), z24.VnB()); 1203 __ sqsub(z17.VnB(), z17.VnB(), 42); 1204 1205 __ movprfx(z26.VnS(), p5.Zeroing(), z19.VnS()); 1206 __ sub(z26.VnS(), z26.VnS(), 42); 1207 1208 __ movprfx(z15.VnD(), p1.Merging(), z3.VnD()); 1209 __ subr(z15.VnD(), z15.VnD(), 42); 1210 1211 __ movprfx(z4.VnD(), p2.Zeroing(), z14.VnD()); 1212 __ udot(z4.VnD(), z15.VnH(), z7.VnH(), 1); 1213 1214 __ movprfx(z29.VnD(), p4.Zeroing(), z28.VnD()); 1215 __ udot(z29.VnD(), z2.VnH(), z17.VnH()); 1216 1217 __ movprfx(z7.VnS(), p6.Merging(), z3.VnS()); 1218 __ udot(z7.VnS(), z14.VnB(), z1.VnB(), 1); 1219 1220 __ movprfx(z14.VnB(), p3.Merging(), z5.VnB()); 1221 __ umax(z14.VnB(), z14.VnB(), 42); 1222 1223 __ movprfx(z4.VnD(), p1.Zeroing(), z2.VnD()); 1224 __ umin(z4.VnD(), z4.VnD(), 42); 1225 1226 __ movprfx(z19.VnB(), p0.Zeroing(), z27.VnB()); 1227 __ uqadd(z19.VnB(), z19.VnB(), 42); 1228 1229 __ movprfx(z24.VnD(), p7.Zeroing(), z11.VnD()); 1230 __ uqdecd(z24.VnD(), SVE_MUL3); 1231 1232 __ movprfx(z24.VnH(), p4.Zeroing(), z18.VnH()); 1233 __ uqdech(z24.VnH(), SVE_VL2); 1234 1235 __ movprfx(z31.VnS(), p5.Zeroing(), z2.VnS()); 1236 __ uqdecp(z31.VnS(), p5); 1237 1238 __ movprfx(z19.VnS(), p6.Merging(), z21.VnS()); 1239 __ uqdecw(z19.VnS(), SVE_ALL); 1240 1241 __ movprfx(z27.VnD(), p0.Merging(), z21.VnD()); 1242 __ uqincd(z27.VnD(), SVE_MUL3); 1243 1244 __ movprfx(z13.VnH(), p4.Zeroing(), z12.VnH()); 1245 __ uqinch(z13.VnH(), SVE_VL2); 1246 1247 __ movprfx(z0.VnD(), p4.Zeroing(), z1.VnD()); 1248 __ uqincp(z0.VnD(), p4); 1249 1250 __ movprfx(z12.VnS(), p4.Merging(), z21.VnS()); 1251 __ uqincw(z12.VnS(), SVE_ALL); 1252 1253 __ movprfx(z9.VnD(), p0.Zeroing(), z16.VnD()); 1254 __ uqsub(z9.VnD(), z9.VnD(), 42); 1255 1256 __ movprfx(z22.VnS(), p0.Zeroing(), z5.VnS()); 1257 __ smmla(z22.VnS(), z21.VnB(), z0.VnB()); 1258 1259 __ movprfx(z1.VnS(), p0.Zeroing(), z5.VnS()); 1260 __ ummla(z1.VnS(), z10.VnB(), z2.VnB()); 1261 1262 __ movprfx(z30.VnS(), p0.Zeroing(), z5.VnS()); 1263 __ usmmla(z30.VnS(), z29.VnB(), z18.VnB()); 1264 1265 __ movprfx(z4.VnS(), p0.Zeroing(), z5.VnS()); 1266 __ usdot(z4.VnS(), z3.VnB(), z4.VnB()); 1267 1268 __ movprfx(z10.VnS(), p0.Zeroing(), z5.VnS()); 1269 __ usdot(z10.VnS(), z10.VnB(), z0.VnB(), 0); 1270 1271 __ movprfx(z1.VnS(), p0.Zeroing(), z5.VnS()); 1272 __ sudot(z1.VnS(), z10.VnB(), z1.VnB(), 1); 1273 } 1274 assm.FinalizeCode(); 1275 1276 CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); 1277} 1278 1279TEST(movprfx_negative_predication_fp) { 1280 // Test that CanTakeSVEMovprfx() is false when a predicated movprfx appears 1281 // before an unpredicated instruction. 1282 Assembler assm; 1283 assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, 1284 CPUFeatures::kSVEF32MM, 1285 CPUFeatures::kSVEF64MM); 1286 { 1287 // We have to use the Assembler directly to generate movprfx, so we need 1288 // to manually reserve space for the code we're about to emit. 1289 static const size_t kPairCount = 11; 1290 CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 1291 1292 __ movprfx(z10.VnH(), p3.Zeroing(), z3.VnH()); 1293 __ fcmla(z10.VnH(), z22.VnH(), z3.VnH(), 2, 180); 1294 1295 __ movprfx(z12.VnS(), p4.Merging(), z14.VnS()); 1296 __ fcmla(z12.VnS(), z3.VnS(), z10.VnS(), 1, 270); 1297 1298 __ movprfx(z16.VnD(), p3.Zeroing(), z24.VnD()); 1299 __ fmla(z16.VnD(), z24.VnD(), z8.VnD(), 1); 1300 1301 __ movprfx(z9.VnH(), p7.Zeroing(), z0.VnH()); 1302 __ fmla(z9.VnH(), z8.VnH(), z0.VnH(), 7); 1303 1304 __ movprfx(z23.VnS(), p5.Merging(), z5.VnS()); 1305 __ fmla(z23.VnS(), z7.VnS(), z5.VnS(), 3); 1306 1307 __ movprfx(z19.VnD(), p6.Zeroing(), z8.VnD()); 1308 __ fmls(z19.VnD(), z27.VnD(), z13.VnD(), 1); 1309 1310 __ movprfx(z25.VnH(), p7.Merging(), z24.VnH()); 1311 __ fmls(z25.VnH(), z24.VnH(), z4.VnH(), 4); 1312 1313 __ movprfx(z2.VnS(), p1.Zeroing(), z0.VnS()); 1314 __ fmls(z2.VnS(), z9.VnS(), z0.VnS(), 3); 1315 1316 // Note that ftsmul and ftssel cannot take movprfx. 1317 __ movprfx(z22.VnD(), p6.Merging(), z16.VnD()); 1318 __ ftmad(z22.VnD(), z22.VnD(), z20.VnD(), 2); 1319 1320 __ movprfx(z30.VnS(), p0.Zeroing(), z5.VnS()); 1321 __ fmmla(z30.VnS(), z29.VnS(), z18.VnS()); 1322 1323 __ movprfx(z31.VnD(), p1.Merging(), z5.VnD()); 1324 __ fmmla(z31.VnD(), z30.VnD(), z18.VnD()); 1325 } 1326 assm.FinalizeCode(); 1327 1328 CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); 1329} 1330 1331TEST(movprfx_positive) { 1332 Assembler assm; 1333 assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM); 1334 { 1335 // We have to use the Assembler directly to generate movprfx, so we need 1336 // to manually reserve space for the code we're about to emit. 1337 static const size_t kPairCount = 123; 1338 CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 1339 1340 __ movprfx(z17, z28); 1341 __ abs(z17.VnB(), p6.Merging(), z28.VnB()); 1342 1343 __ movprfx(z9, z7); 1344 __ add(z9.VnB(), p5.Merging(), z9.VnB(), z29.VnB()); 1345 1346 __ movprfx(z11, z0); 1347 __ add(z11.VnD(), z11.VnD(), 42); 1348 1349 __ movprfx(z8.VnS(), p3.Zeroing(), z28.VnS()); 1350 __ and_(z8.VnS(), p3.Merging(), z8.VnS(), z31.VnS()); 1351 1352 __ movprfx(z20, z23); 1353 __ and_(z20.VnS(), z20.VnS(), 4); 1354 1355 __ movprfx(z24.VnD(), p5.Merging(), z11.VnD()); 1356 __ asr(z24.VnD(), p5.Merging(), z24.VnD(), 3); 1357 1358 __ movprfx(z1, z13); 1359 __ asr(z1.VnH(), p3.Merging(), z1.VnH(), z4.VnH()); 1360 1361 __ movprfx(z0.VnB(), p7.Zeroing(), z28.VnB()); 1362 __ asr(z0.VnB(), p7.Merging(), z0.VnB(), z28.VnD()); 1363 1364 __ movprfx(z15, z5); 1365 __ asr(z15.VnD(), p3.Merging(), z15.VnD(), z5.VnD()); 1366 1367 __ movprfx(z24.VnH(), p3.Merging(), z22.VnH()); 1368 __ asrd(z24.VnH(), p3.Merging(), z24.VnH(), 3); 1369 1370 __ movprfx(z2.VnS(), p3.Zeroing(), z20.VnS()); 1371 __ asrr(z2.VnS(), p3.Merging(), z2.VnS(), z15.VnS()); 1372 1373 __ movprfx(z17.VnB(), p7.Merging(), z6.VnB()); 1374 __ bic(z17.VnB(), p7.Merging(), z17.VnB(), z25.VnB()); 1375 1376 __ movprfx(z31, z6); 1377 __ bic(z31.VnD(), z31.VnD(), 4); 1378 1379 __ movprfx(z20, z2); 1380 __ clasta(z20.VnB(), p4, z20.VnB(), z15.VnB()); 1381 1382 __ movprfx(z27, z11); 1383 __ clastb(z27.VnB(), p5, z27.VnB(), z6.VnB()); 1384 1385 __ movprfx(z3.VnS(), p7.Zeroing(), z17.VnS()); 1386 __ cls(z3.VnS(), p7.Merging(), z0.VnS()); 1387 1388 __ movprfx(z29.VnB(), p0.Zeroing(), z24.VnB()); 1389 __ clz(z29.VnB(), p0.Merging(), z7.VnB()); 1390 1391 __ movprfx(z2.VnH(), p7.Zeroing(), z29.VnH()); 1392 __ cnot(z2.VnH(), p7.Merging(), z28.VnH()); 1393 1394 __ movprfx(z23, z5); 1395 __ cnt(z23.VnH(), p0.Merging(), z12.VnH()); 1396 1397 __ movprfx(z5, z3); 1398 __ cpy(z5.VnD(), p1.Merging(), -42); 1399 1400 __ movprfx(z0, z12); 1401 __ cpy(z0.VnB(), p1.Merging(), w0); 1402 1403 __ movprfx(z27, z8); 1404 __ cpy(z27.VnB(), p0.Merging(), b0); 1405 1406 __ movprfx(z20, z24); 1407 __ decd(z20.VnD(), SVE_MUL3); 1408 1409 __ movprfx(z5, z28); 1410 __ dech(z5.VnH(), SVE_VL2); 1411 1412 __ movprfx(z7, z3); 1413 __ decp(z7.VnD(), p2); 1414 1415 __ movprfx(z4, z7); 1416 __ decw(z4.VnS(), SVE_ALL); 1417 1418 __ movprfx(z3, z18); 1419 __ eon(z3.VnS(), z3.VnS(), 4); 1420 1421 __ movprfx(z4.VnD(), p0.Merging(), z10.VnD()); 1422 __ eor(z4.VnD(), p0.Merging(), z4.VnD(), z10.VnD()); 1423 1424 __ movprfx(z15, z18); 1425 __ eor(z15.VnH(), z15.VnH(), 4); 1426 1427 __ movprfx(z17, z30); 1428 __ ext(z17.VnB(), z17.VnB(), z18.VnB(), 2); 1429 1430 __ movprfx(z19, z28); 1431 __ incd(z19.VnD(), SVE_MUL3); 1432 1433 __ movprfx(z13, z7); 1434 __ inch(z13.VnH(), SVE_VL2); 1435 1436 __ movprfx(z14, z21); 1437 __ incp(z14.VnD(), p1); 1438 1439 __ movprfx(z26, z12); 1440 __ incw(z26.VnS(), SVE_ALL); 1441 1442 __ movprfx(z16, z2); 1443 __ insr(z16.VnB(), w16); 1444 1445 __ movprfx(z20, z26); 1446 __ insr(z20.VnB(), b0); 1447 1448 __ movprfx(z30.VnD(), p0.Merging(), z23.VnD()); 1449 __ lsl(z30.VnD(), p0.Merging(), z30.VnD(), 3); 1450 1451 __ movprfx(z28.VnS(), p2.Zeroing(), z6.VnS()); 1452 __ lsl(z28.VnS(), p2.Merging(), z28.VnS(), z6.VnS()); 1453 1454 __ movprfx(z15.VnH(), p6.Zeroing(), z3.VnH()); 1455 __ lsl(z15.VnH(), p6.Merging(), z15.VnH(), z3.VnD()); 1456 1457 __ movprfx(z13.VnD(), p4.Zeroing(), z14.VnD()); 1458 __ lsl(z13.VnD(), p4.Merging(), z13.VnD(), z25.VnD()); 1459 1460 __ movprfx(z14, z5); 1461 __ lslr(z14.VnS(), p0.Merging(), z14.VnS(), z17.VnS()); 1462 1463 __ movprfx(z21, z1); 1464 __ lsr(z21.VnH(), p5.Merging(), z21.VnH(), 3); 1465 1466 __ movprfx(z11.VnH(), p0.Zeroing(), z13.VnH()); 1467 __ lsr(z11.VnH(), p0.Merging(), z11.VnH(), z9.VnH()); 1468 1469 __ movprfx(z24, z29); 1470 __ lsr(z24.VnS(), p4.Merging(), z24.VnS(), z1.VnD()); 1471 1472 __ movprfx(z1.VnD(), p6.Merging(), z9.VnD()); 1473 __ lsr(z1.VnD(), p6.Merging(), z1.VnD(), z9.VnD()); 1474 1475 __ movprfx(z22, z3); 1476 __ lsrr(z22.VnB(), p3.Merging(), z22.VnB(), z3.VnB()); 1477 1478 __ movprfx(z24.VnB(), p2.Zeroing(), z5.VnB()); 1479 __ mad(z24.VnB(), p2.Merging(), z5.VnB(), z10.VnB()); 1480 1481 __ movprfx(z8, z4); 1482 __ mla(z8.VnS(), p6.Merging(), z4.VnS(), z26.VnS()); 1483 1484 __ movprfx(z10, z8); 1485 __ mls(z10.VnS(), p4.Merging(), z23.VnS(), z16.VnS()); 1486 1487 // Aliases of cpy. 1488 __ movprfx(z4.VnH(), p5.Zeroing(), z2.VnH()); 1489 __ mov(z4.VnH(), p5.Merging(), -42); 1490 1491 __ movprfx(z2.VnB(), p3.Zeroing(), z24.VnB()); 1492 __ mov(z2.VnB(), p3.Merging(), w2); 1493 1494 __ movprfx(z27, z13); 1495 __ mov(z27.VnD(), p3.Merging(), d0); 1496 1497 __ movprfx(z18.VnB(), p5.Zeroing(), z11.VnB()); 1498 __ msb(z18.VnB(), p5.Merging(), z3.VnB(), z11.VnB()); 1499 1500 __ movprfx(z29, z16); 1501 __ mul(z29.VnS(), p6.Merging(), z29.VnS(), z9.VnS()); 1502 1503 __ movprfx(z21, z23); 1504 __ mul(z21.VnH(), z21.VnH(), 42); 1505 1506 __ movprfx(z7.VnS(), p4.Merging(), z14.VnS()); 1507 __ neg(z7.VnS(), p4.Merging(), z14.VnS()); 1508 1509 __ movprfx(z8.VnD(), p4.Zeroing(), z5.VnD()); 1510 __ not_(z8.VnD(), p4.Merging(), z5.VnD()); 1511 1512 __ movprfx(z14, z13); 1513 __ orn(z14.VnS(), z14.VnS(), 4); 1514 1515 __ movprfx(z14, z13); 1516 __ orn(z14.VnS(), z14.VnS(), 4); 1517 1518 __ movprfx(z27, z17); 1519 __ orr(z27.VnD(), p2.Merging(), z27.VnD(), z17.VnD()); 1520 1521 __ movprfx(z13.VnH(), p2.Zeroing(), z27.VnH()); 1522 __ rbit(z13.VnH(), p2.Merging(), z1.VnH()); 1523 1524 __ movprfx(z1, z29); 1525 __ revb(z1.VnS(), p4.Merging(), z6.VnS()); 1526 1527 __ movprfx(z18.VnD(), p2.Zeroing(), z10.VnD()); 1528 __ revh(z18.VnD(), p2.Merging(), z16.VnD()); 1529 1530 __ movprfx(z2.VnD(), p1.Merging(), z10.VnD()); 1531 __ revw(z2.VnD(), p1.Merging(), z1.VnD()); 1532 1533 __ movprfx(z28.VnS(), p7.Merging(), z11.VnS()); 1534 __ sabd(z28.VnS(), p7.Merging(), z28.VnS(), z11.VnS()); 1535 1536 __ movprfx(z22.VnS(), p0.Merging(), z20.VnS()); 1537 __ sdiv(z22.VnS(), p0.Merging(), z22.VnS(), z6.VnS()); 1538 1539 __ movprfx(z13.VnS(), p7.Merging(), z0.VnS()); 1540 __ sdivr(z13.VnS(), p7.Merging(), z13.VnS(), z2.VnS()); 1541 1542 __ movprfx(z0, z12); 1543 __ sdot(z0.VnD(), z10.VnH(), z12.VnH(), 1); 1544 1545 __ movprfx(z8, z15); 1546 __ sdot(z8.VnS(), z15.VnB(), z12.VnB()); 1547 1548 __ movprfx(z13, z0); 1549 __ sdot(z13.VnS(), z10.VnB(), z0.VnB(), 1); 1550 1551 __ movprfx(z11, z13); 1552 __ smax(z11.VnB(), p5.Merging(), z11.VnB(), z24.VnB()); 1553 1554 __ movprfx(z3, z17); 1555 __ smax(z3.VnD(), z3.VnD(), 42); 1556 1557 __ movprfx(z10, z29); 1558 __ smin(z10.VnD(), p4.Merging(), z10.VnD(), z29.VnD()); 1559 1560 __ movprfx(z13, z29); 1561 __ smin(z13.VnD(), z13.VnD(), 42); 1562 1563 __ movprfx(z6, z17); 1564 __ smulh(z6.VnS(), p7.Merging(), z6.VnS(), z31.VnS()); 1565 1566 __ movprfx(z19, z20); 1567 __ splice(z19.VnB(), p3, z19.VnB(), z20.VnB()); 1568 1569 __ movprfx(z0, z3); 1570 __ sqadd(z0.VnD(), z0.VnD(), 42); 1571 1572 __ movprfx(z29, z5); 1573 __ sqdecd(z29.VnD(), SVE_MUL3); 1574 1575 __ movprfx(z25, z11); 1576 __ sqdech(z25.VnH(), SVE_VL2); 1577 1578 __ movprfx(z16, z9); 1579 __ sqdecp(z16.VnS(), p1); 1580 1581 __ movprfx(z8, z17); 1582 __ sqdecw(z8.VnS(), SVE_ALL); 1583 1584 __ movprfx(z4, z5); 1585 __ sqincd(z4.VnD(), SVE_MUL3); 1586 1587 __ movprfx(z0, z17); 1588 __ sqinch(z0.VnH(), SVE_VL2); 1589 1590 __ movprfx(z7, z27); 1591 __ sqincp(z7.VnS(), p6); 1592 1593 __ movprfx(z10, z9); 1594 __ sqincw(z10.VnS(), SVE_ALL); 1595 1596 __ movprfx(z31, z22); 1597 __ sqsub(z31.VnB(), z31.VnB(), 42); 1598 1599 __ movprfx(z12.VnH(), p7.Zeroing(), z23.VnH()); 1600 __ sub(z12.VnH(), p7.Merging(), z12.VnH(), z23.VnH()); 1601 1602 __ movprfx(z10, z1); 1603 __ sub(z10.VnH(), z10.VnH(), 42); 1604 1605 __ movprfx(z15.VnB(), p0.Merging(), z0.VnB()); 1606 __ subr(z15.VnB(), p0.Merging(), z15.VnB(), z0.VnB()); 1607 1608 __ movprfx(z17, z2); 1609 __ subr(z17.VnH(), z17.VnH(), 42); 1610 1611 __ movprfx(z5, z3); 1612 __ sxtb(z5.VnD(), p6.Merging(), z20.VnD()); 1613 1614 __ movprfx(z11, z17); 1615 __ sxth(z11.VnD(), p6.Merging(), z25.VnD()); 1616 1617 __ movprfx(z26, z4); 1618 __ sxtw(z26.VnD(), p5.Merging(), z4.VnD()); 1619 1620 __ movprfx(z15.VnD(), p0.Zeroing(), z8.VnD()); 1621 __ uabd(z15.VnD(), p0.Merging(), z15.VnD(), z20.VnD()); 1622 1623 __ movprfx(z21, z24); 1624 __ udiv(z21.VnD(), p3.Merging(), z21.VnD(), z24.VnD()); 1625 1626 __ movprfx(z22, z10); 1627 __ udivr(z22.VnD(), p7.Merging(), z22.VnD(), z27.VnD()); 1628 1629 __ movprfx(z27, z25); 1630 __ udot(z27.VnD(), z29.VnH(), z3.VnH(), 1); 1631 1632 __ movprfx(z29, z10); 1633 __ udot(z29.VnS(), z10.VnB(), z21.VnB()); 1634 1635 __ movprfx(z18, z0); 1636 __ udot(z18.VnS(), z14.VnB(), z0.VnB(), 1); 1637 1638 __ movprfx(z6, z30); 1639 __ umax(z6.VnS(), p2.Merging(), z6.VnS(), z27.VnS()); 1640 1641 __ movprfx(z31, z17); 1642 __ umax(z31.VnD(), z31.VnD(), 42); 1643 1644 __ movprfx(z27.VnS(), p0.Merging(), z20.VnS()); 1645 __ umin(z27.VnS(), p0.Merging(), z27.VnS(), z8.VnS()); 1646 1647 __ movprfx(z0, z11); 1648 __ umin(z0.VnH(), z0.VnH(), 42); 1649 1650 __ movprfx(z21, z17); 1651 __ umulh(z21.VnB(), p0.Merging(), z21.VnB(), z30.VnB()); 1652 1653 __ movprfx(z9, z24); 1654 __ uqadd(z9.VnD(), z9.VnD(), 42); 1655 1656 __ movprfx(z18, z13); 1657 __ uqdecd(z18.VnD(), SVE_MUL3); 1658 1659 __ movprfx(z20, z23); 1660 __ uqdech(z20.VnH(), SVE_VL2); 1661 1662 __ movprfx(z12, z29); 1663 __ uqdecp(z12.VnS(), p7); 1664 1665 __ movprfx(z24, z25); 1666 __ uqdecw(z24.VnS(), SVE_ALL); 1667 1668 __ movprfx(z13, z1); 1669 __ uqincd(z13.VnD(), SVE_MUL3); 1670 1671 __ movprfx(z5, z19); 1672 __ uqinch(z5.VnH(), SVE_VL2); 1673 1674 __ movprfx(z6, z25); 1675 __ uqincp(z6.VnS(), p5); 1676 1677 __ movprfx(z12, z14); 1678 __ uqincw(z12.VnS(), SVE_ALL); 1679 1680 __ movprfx(z13, z6); 1681 __ uqsub(z13.VnH(), z13.VnH(), 42); 1682 1683 __ movprfx(z31, z3); 1684 __ uxtb(z31.VnS(), p0.Merging(), z3.VnS()); 1685 1686 __ movprfx(z18.VnD(), p4.Merging(), z25.VnD()); 1687 __ uxth(z18.VnD(), p4.Merging(), z25.VnD()); 1688 1689 __ movprfx(z18.VnD(), p7.Merging(), z25.VnD()); 1690 __ uxtw(z18.VnD(), p7.Merging(), z25.VnD()); 1691 1692 __ movprfx(z22, z5); 1693 __ smmla(z22.VnS(), z21.VnB(), z0.VnB()); 1694 1695 __ movprfx(z1, z5); 1696 __ ummla(z1.VnS(), z10.VnB(), z0.VnB()); 1697 1698 __ movprfx(z30, z5); 1699 __ usmmla(z30.VnS(), z31.VnB(), z18.VnB()); 1700 1701 __ movprfx(z4, z5); 1702 __ usdot(z4.VnS(), z3.VnB(), z3.VnB()); 1703 1704 __ movprfx(z10, z5); 1705 __ usdot(z10.VnS(), z9.VnB(), z0.VnB(), 0); 1706 1707 __ movprfx(z1, z5); 1708 __ sudot(z1.VnS(), z10.VnB(), z2.VnB(), 1); 1709 } 1710 assm.FinalizeCode(); 1711 1712 CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), true); 1713} 1714 1715TEST(movprfx_positive_fp) { 1716 Assembler assm; 1717 assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, 1718 CPUFeatures::kSVEF32MM, 1719 CPUFeatures::kSVEF64MM); 1720 { 1721 // We have to use the Assembler directly to generate movprfx, so we need 1722 // to manually reserve space for the code we're about to emit. 1723 static const size_t kPairCount = 75; 1724 CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 1725 1726 __ movprfx(z18.VnS(), p6.Zeroing(), z20.VnS()); 1727 __ fabd(z18.VnS(), p6.Merging(), z18.VnS(), z19.VnS()); 1728 1729 __ movprfx(z28.VnD(), p4.Zeroing(), z24.VnD()); 1730 __ fabs(z28.VnD(), p4.Merging(), z24.VnD()); 1731 1732 __ movprfx(z12, z8); 1733 __ fadd(z12.VnS(), p2.Merging(), z12.VnS(), 0.5); 1734 1735 __ movprfx(z0.VnS(), p1.Merging(), z9.VnS()); 1736 __ fadd(z0.VnS(), p1.Merging(), z0.VnS(), z9.VnS()); 1737 1738 __ movprfx(z10.VnH(), p2.Merging(), z2.VnH()); 1739 __ fcadd(z10.VnH(), p2.Merging(), z10.VnH(), z20.VnH(), 90); 1740 1741 __ movprfx(z21, z6); 1742 __ fcmla(z21.VnH(), z31.VnH(), z6.VnH(), 2, 180); 1743 1744 __ movprfx(z16, z6); 1745 __ fcmla(z16.VnS(), z11.VnS(), z6.VnS(), 1, 270); 1746 1747 __ movprfx(z15.VnH(), p6.Merging(), z16.VnH()); 1748 __ fcpy(z15.VnH(), p6.Merging(), 1.25); 1749 1750 __ movprfx(z1, z14); 1751 __ fcvt(z1.VnD(), p2.Merging(), z4.VnH()); 1752 1753 __ movprfx(z25.VnD(), p6.Merging(), z1.VnD()); 1754 __ fcvt(z25.VnD(), p6.Merging(), z1.VnS()); 1755 1756 __ movprfx(z18.VnS(), p2.Merging(), z2.VnS()); 1757 __ fcvt(z18.VnH(), p2.Merging(), z7.VnS()); 1758 1759 __ movprfx(z21.VnD(), p5.Zeroing(), z26.VnD()); 1760 __ fcvt(z21.VnH(), p5.Merging(), z26.VnD()); 1761 1762 __ movprfx(z12.VnD(), p1.Merging(), z18.VnD()); 1763 __ fcvtzs(z12.VnD(), p1.Merging(), z18.VnH()); 1764 1765 __ movprfx(z3.VnS(), p2.Merging(), z0.VnS()); 1766 __ fcvtzs(z3.VnS(), p2.Merging(), z26.VnS()); 1767 1768 __ movprfx(z21.VnS(), p4.Merging(), z7.VnS()); 1769 __ fcvtzs(z21.VnS(), p4.Merging(), z7.VnH()); 1770 1771 __ movprfx(z16.VnD(), p3.Zeroing(), z4.VnD()); 1772 __ fcvtzs(z16.VnS(), p3.Merging(), z28.VnD()); 1773 1774 __ movprfx(z31.VnD(), p4.Merging(), z1.VnD()); 1775 __ fcvtzu(z31.VnD(), p4.Merging(), z1.VnH()); 1776 1777 __ movprfx(z23.VnH(), p0.Zeroing(), z28.VnH()); 1778 __ fcvtzu(z23.VnH(), p0.Merging(), z28.VnH()); 1779 1780 __ movprfx(z2, z12); 1781 __ fcvtzu(z2.VnD(), p3.Merging(), z28.VnS()); 1782 1783 __ movprfx(z4, z7); 1784 __ fcvtzu(z4.VnS(), p7.Merging(), z16.VnD()); 1785 1786 __ movprfx(z13.VnS(), p3.Zeroing(), z23.VnS()); 1787 __ fdiv(z13.VnS(), p3.Merging(), z13.VnS(), z23.VnS()); 1788 1789 __ movprfx(z6.VnD(), p1.Zeroing(), z16.VnD()); 1790 __ fdivr(z6.VnD(), p1.Merging(), z6.VnD(), z5.VnD()); 1791 1792 __ movprfx(z31, z23); 1793 __ fmad(z31.VnS(), p5.Merging(), z23.VnS(), z11.VnS()); 1794 1795 __ movprfx(z14.VnH(), p7.Merging(), z21.VnH()); 1796 __ fmax(z14.VnH(), p7.Merging(), z14.VnH(), 0.0); 1797 1798 __ movprfx(z17.VnS(), p4.Merging(), z9.VnS()); 1799 __ fmax(z17.VnS(), p4.Merging(), z17.VnS(), z9.VnS()); 1800 1801 __ movprfx(z1.VnS(), p3.Zeroing(), z30.VnS()); 1802 __ fmaxnm(z1.VnS(), p3.Merging(), z1.VnS(), 0.0); 1803 1804 __ movprfx(z10.VnD(), p1.Zeroing(), z17.VnD()); 1805 __ fmaxnm(z10.VnD(), p1.Merging(), z10.VnD(), z17.VnD()); 1806 1807 __ movprfx(z3, z13); 1808 __ fmin(z3.VnS(), p0.Merging(), z3.VnS(), 0.0); 1809 1810 __ movprfx(z15, z21); 1811 __ fmin(z15.VnS(), p4.Merging(), z15.VnS(), z21.VnS()); 1812 1813 __ movprfx(z30.VnH(), p7.Zeroing(), z25.VnH()); 1814 __ fminnm(z30.VnH(), p7.Merging(), z30.VnH(), 0.0); 1815 1816 __ movprfx(z31, z15); 1817 __ fminnm(z31.VnD(), p5.Merging(), z31.VnD(), z25.VnD()); 1818 1819 __ movprfx(z27, z28); 1820 __ fmla(z27.VnD(), z28.VnD(), z12.VnD(), 1); 1821 1822 __ movprfx(z26.VnH(), p6.Zeroing(), z13.VnH()); 1823 __ fmla(z26.VnH(), p6.Merging(), z13.VnH(), z7.VnH()); 1824 1825 __ movprfx(z26, z10); 1826 __ fmla(z26.VnH(), z10.VnH(), z1.VnH(), 7); 1827 1828 __ movprfx(z0, z1); 1829 __ fmla(z0.VnS(), z25.VnS(), z1.VnS(), 3); 1830 1831 __ movprfx(z7, z3); 1832 __ fmls(z7.VnD(), z30.VnD(), z3.VnD(), 1); 1833 1834 __ movprfx(z1, z24); 1835 __ fmls(z1.VnD(), p5.Merging(), z20.VnD(), z24.VnD()); 1836 1837 __ movprfx(z19, z18); 1838 __ fmls(z19.VnH(), z18.VnH(), z7.VnH(), 4); 1839 1840 __ movprfx(z0, z26); 1841 __ fmls(z0.VnS(), z17.VnS(), z4.VnS(), 3); 1842 1843 __ movprfx(z19.VnS(), p7.Zeroing(), z6.VnS()); 1844 __ fmov(z19.VnS(), p7.Merging(), 0.0); 1845 1846 __ movprfx(z21, z15); 1847 __ fmov(z21.VnH(), p7.Merging(), 2.5); 1848 1849 __ movprfx(z23, z18); 1850 __ fmsb(z23.VnS(), p4.Merging(), z1.VnS(), z7.VnS()); 1851 1852 __ movprfx(z8, z28); 1853 __ fmul(z8.VnS(), p4.Merging(), z8.VnS(), 2.0); 1854 1855 __ movprfx(z6.VnD(), p6.Merging(), z27.VnD()); 1856 __ fmul(z6.VnD(), p6.Merging(), z6.VnD(), z27.VnD()); 1857 1858 __ movprfx(z6.VnH(), p0.Merging(), z19.VnH()); 1859 __ fmulx(z6.VnH(), p0.Merging(), z6.VnH(), z19.VnH()); 1860 1861 __ movprfx(z5.VnH(), p0.Merging(), z1.VnH()); 1862 __ fneg(z5.VnH(), p0.Merging(), z1.VnH()); 1863 1864 __ movprfx(z22.VnD(), p4.Zeroing(), z24.VnD()); 1865 __ fnmad(z22.VnD(), p4.Merging(), z24.VnD(), z12.VnD()); 1866 1867 __ movprfx(z5.VnS(), p0.Merging(), z29.VnS()); 1868 __ fnmla(z5.VnS(), p0.Merging(), z17.VnS(), z29.VnS()); 1869 1870 __ movprfx(z5, z3); 1871 __ fnmls(z5.VnD(), p5.Merging(), z3.VnD(), z2.VnD()); 1872 1873 __ movprfx(z9.VnD(), p2.Zeroing(), z7.VnD()); 1874 __ fnmsb(z9.VnD(), p2.Merging(), z7.VnD(), z23.VnD()); 1875 1876 // Note that frecpe and frecps _cannot_ take movprfx. 1877 __ movprfx(z12.VnH(), p1.Zeroing(), z17.VnH()); 1878 __ frecpx(z12.VnH(), p1.Merging(), z4.VnH()); 1879 1880 __ movprfx(z28.VnS(), p4.Zeroing(), z27.VnS()); 1881 __ frinta(z28.VnS(), p4.Merging(), z24.VnS()); 1882 1883 __ movprfx(z7.VnD(), p7.Merging(), z25.VnD()); 1884 __ frinti(z7.VnD(), p7.Merging(), z25.VnD()); 1885 1886 __ movprfx(z10, z21); 1887 __ frintm(z10.VnD(), p5.Merging(), z26.VnD()); 1888 1889 __ movprfx(z25, z21); 1890 __ frintn(z25.VnH(), p4.Merging(), z1.VnH()); 1891 1892 __ movprfx(z25, z9); 1893 __ frintp(z25.VnH(), p1.Merging(), z9.VnH()); 1894 1895 __ movprfx(z30, z16); 1896 __ frintx(z30.VnS(), p1.Merging(), z16.VnS()); 1897 1898 __ movprfx(z0.VnD(), p5.Merging(), z9.VnD()); 1899 __ frintz(z0.VnD(), p5.Merging(), z23.VnD()); 1900 1901 __ movprfx(z11.VnD(), p7.Merging(), z2.VnD()); 1902 __ fscale(z11.VnD(), p7.Merging(), z11.VnD(), z2.VnD()); 1903 1904 __ movprfx(z23.VnS(), p4.Merging(), z17.VnS()); 1905 __ fsqrt(z23.VnS(), p4.Merging(), z10.VnS()); 1906 1907 __ movprfx(z0.VnD(), p2.Merging(), z26.VnD()); 1908 __ fsub(z0.VnD(), p2.Merging(), z0.VnD(), 1.0); 1909 1910 __ movprfx(z28.VnD(), p1.Zeroing(), z16.VnD()); 1911 __ fsub(z28.VnD(), p1.Merging(), z28.VnD(), z16.VnD()); 1912 1913 __ movprfx(z22, z27); 1914 __ fsubr(z22.VnD(), p4.Merging(), z22.VnD(), 1.0); 1915 1916 __ movprfx(z4.VnS(), p2.Merging(), z26.VnS()); 1917 __ fsubr(z4.VnS(), p2.Merging(), z4.VnS(), z26.VnS()); 1918 1919 // Note that ftsmul and ftssel _cannot_ take movprfx. 1920 __ movprfx(z10, z4); 1921 __ ftmad(z10.VnS(), z10.VnS(), z4.VnS(), 2); 1922 1923 __ movprfx(z2, z16); 1924 __ scvtf(z2.VnD(), p1.Merging(), z16.VnS()); 1925 1926 __ movprfx(z10, z20); 1927 __ scvtf(z10.VnD(), p5.Merging(), z20.VnD()); 1928 1929 __ movprfx(z29, z28); 1930 __ scvtf(z29.VnS(), p0.Merging(), z31.VnD()); 1931 1932 __ movprfx(z26.VnD(), p3.Merging(), z13.VnD()); 1933 __ scvtf(z26.VnH(), p3.Merging(), z5.VnD()); 1934 1935 __ movprfx(z7.VnD(), p3.Zeroing(), z26.VnD()); 1936 __ ucvtf(z7.VnD(), p3.Merging(), z26.VnS()); 1937 1938 __ movprfx(z13, z17); 1939 __ ucvtf(z13.VnD(), p7.Merging(), z17.VnD()); 1940 1941 __ movprfx(z24.VnD(), p1.Merging(), z31.VnD()); 1942 __ ucvtf(z24.VnS(), p1.Merging(), z18.VnD()); 1943 1944 __ movprfx(z17.VnD(), p4.Merging(), z22.VnD()); 1945 __ ucvtf(z17.VnH(), p4.Merging(), z4.VnD()); 1946 1947 __ movprfx(z30, z5); 1948 __ fmmla(z30.VnS(), z29.VnS(), z18.VnS()); 1949 1950 __ movprfx(z31, z5); 1951 __ fmmla(z31.VnD(), z30.VnD(), z18.VnD()); 1952 } 1953 assm.FinalizeCode(); 1954 1955 CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), true); 1956} 1957 1958TEST(movprfx_positive_sve2) { 1959 Assembler assm; 1960 assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVE2); 1961 { 1962 // We have to use the Assembler directly to generate movprfx, so we need 1963 // to manually reserve space for the code we're about to emit. 1964 static const size_t kPairCount = 145; 1965 CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 1966 1967 __ movprfx(z25, z26); 1968 __ adclb(z25.VnS(), z17.VnS(), z24.VnS()); 1969 1970 __ movprfx(z0, z1); 1971 __ adclt(z0.VnS(), z2.VnS(), z15.VnS()); 1972 1973 __ movprfx(z3, z4); 1974 __ addp(z3.VnB(), p1.Merging(), z3.VnB(), z0.VnB()); 1975 1976 __ movprfx(z6, z7); 1977 __ bcax(z6.VnD(), z6.VnD(), z12.VnD(), z1.VnD()); 1978 1979 __ movprfx(z18, z19); 1980 __ bsl1n(z18.VnD(), z18.VnD(), z8.VnD(), z7.VnD()); 1981 1982 __ movprfx(z7, z8); 1983 __ bsl2n(z7.VnD(), z7.VnD(), z3.VnD(), z19.VnD()); 1984 1985 __ movprfx(z21, z22); 1986 __ bsl(z21.VnD(), z21.VnD(), z2.VnD(), z2.VnD()); 1987 1988 __ movprfx(z5, z6); 1989 __ cadd(z5.VnB(), z5.VnB(), z12.VnB(), 90); 1990 1991 __ movprfx(z7, z8); 1992 __ cdot(z7.VnS(), z4.VnB(), z10.VnB(), 0); 1993 1994 __ movprfx(z7, z8); 1995 __ cdot(z7.VnS(), z4.VnB(), z0.VnB(), 0, 0); 1996 1997 __ movprfx(z7, z8); 1998 __ cdot(z7.VnD(), z4.VnH(), z0.VnH(), 0, 0); 1999 2000 __ movprfx(z19, z20); 2001 __ cmla(z19.VnB(), z7.VnB(), z2.VnB(), 0); 2002 2003 __ movprfx(z19, z20); 2004 __ cmla(z19.VnS(), z7.VnS(), z2.VnS(), 0, 0); 2005 2006 __ movprfx(z19, z20); 2007 __ cmla(z19.VnH(), z7.VnH(), z2.VnH(), 0, 0); 2008 2009 __ movprfx(z10, z11); 2010 __ eor3(z10.VnD(), z10.VnD(), z24.VnD(), z23.VnD()); 2011 2012 __ movprfx(z3, z4); 2013 __ eorbt(z3.VnB(), z10.VnB(), z8.VnB()); 2014 2015 __ movprfx(z20, z22); 2016 __ eortb(z20.VnB(), z21.VnB(), z15.VnB()); 2017 2018 __ movprfx(z14, z15); 2019 __ faddp(z14.VnD(), p1.Merging(), z14.VnD(), z26.VnD()); 2020 2021 __ movprfx(z14.VnD(), p4.Merging(), z15.VnD()); 2022 __ fcvtx(z14.VnS(), p4.Merging(), z0.VnD()); 2023 2024 __ movprfx(z15.VnH(), p0.Merging(), z16.VnH()); 2025 __ flogb(z15.VnH(), p0.Merging(), z3.VnH()); 2026 2027 __ movprfx(z2, z3); 2028 __ fmaxnmp(z2.VnD(), p1.Merging(), z2.VnD(), z14.VnD()); 2029 2030 __ movprfx(z22, z23); 2031 __ fmaxp(z22.VnD(), p1.Merging(), z22.VnD(), z3.VnD()); 2032 2033 __ movprfx(z1, z2); 2034 __ fminnmp(z1.VnD(), p0.Merging(), z1.VnD(), z14.VnD()); 2035 2036 __ movprfx(z16, z17); 2037 __ fminp(z16.VnD(), p3.Merging(), z16.VnD(), z11.VnD()); 2038 2039 __ movprfx(z16, z17); 2040 __ fmlalb(z16.VnS(), z18.VnH(), z29.VnH()); 2041 2042 __ movprfx(z16, z17); 2043 __ fmlalb(z16.VnS(), z18.VnH(), z2.VnH(), 0); 2044 2045 __ movprfx(z18, z19); 2046 __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH()); 2047 2048 __ movprfx(z18, z19); 2049 __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH(), 0); 2050 2051 __ movprfx(z16, z17); 2052 __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH()); 2053 2054 __ movprfx(z16, z17); 2055 __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH(), 0); 2056 2057 __ movprfx(z3, z4); 2058 __ fmlslt(z3.VnS(), z17.VnH(), z14.VnH()); 2059 2060 __ movprfx(z3, z4); 2061 __ fmlslt(z3.VnS(), z17.VnH(), z1.VnH(), 0); 2062 2063 __ movprfx(z2, z3); 2064 __ mla(z2.VnH(), z0.VnH(), z1.VnH(), 0); 2065 2066 __ movprfx(z2, z3); 2067 __ mla(z2.VnS(), z0.VnS(), z1.VnS(), 0); 2068 2069 __ movprfx(z2, z3); 2070 __ mla(z2.VnD(), z0.VnD(), z1.VnD(), 0); 2071 2072 __ movprfx(z2, z3); 2073 __ mls(z2.VnH(), z0.VnH(), z1.VnH(), 0); 2074 2075 __ movprfx(z2, z3); 2076 __ mls(z2.VnS(), z0.VnS(), z1.VnS(), 0); 2077 2078 __ movprfx(z2, z3); 2079 __ mls(z2.VnD(), z0.VnD(), z1.VnD(), 0); 2080 2081 __ movprfx(z17, z18); 2082 __ nbsl(z17.VnD(), z17.VnD(), z21.VnD(), z27.VnD()); 2083 2084 __ movprfx(z13, z14); 2085 __ saba(z13.VnB(), z2.VnB(), z31.VnB()); 2086 2087 __ movprfx(z13, z14); 2088 __ sabalb(z13.VnD(), z20.VnS(), z26.VnS()); 2089 2090 __ movprfx(z14, z15); 2091 __ sabalt(z14.VnD(), z19.VnS(), z10.VnS()); 2092 2093 __ movprfx(z19.VnD(), p5.Merging(), z20.VnD()); 2094 __ sadalp(z19.VnD(), p5.Merging(), z9.VnS()); 2095 2096 __ movprfx(z17, z18); 2097 __ sbclb(z17.VnS(), z10.VnS(), z8.VnS()); 2098 2099 __ movprfx(z20, z21); 2100 __ sbclt(z20.VnS(), z0.VnS(), z13.VnS()); 2101 2102 __ movprfx(z20.VnB(), p3.Merging(), z21.VnB()); 2103 __ shadd(z20.VnB(), p3.Merging(), z20.VnB(), z7.VnB()); 2104 2105 __ movprfx(z21.VnB(), p0.Merging(), z22.VnB()); 2106 __ shsub(z21.VnB(), p0.Merging(), z21.VnB(), z0.VnB()); 2107 2108 __ movprfx(z1.VnB(), p0.Merging(), z2.VnB()); 2109 __ shsubr(z1.VnB(), p0.Merging(), z1.VnB(), z2.VnB()); 2110 2111 __ movprfx(z5, z6); 2112 __ smaxp(z5.VnB(), p4.Merging(), z5.VnB(), z10.VnB()); 2113 2114 __ movprfx(z27, z28); 2115 __ sminp(z27.VnB(), p3.Merging(), z27.VnB(), z1.VnB()); 2116 2117 __ movprfx(z1, z2); 2118 __ smlalb(z1.VnD(), z3.VnS(), z23.VnS()); 2119 2120 __ movprfx(z1, z2); 2121 __ smlalb(z1.VnD(), z3.VnS(), z2.VnS(), 0); 2122 2123 __ movprfx(z1, z2); 2124 __ smlalb(z1.VnS(), z3.VnH(), z2.VnH(), 0); 2125 2126 __ movprfx(z1, z2); 2127 __ smlalt(z1.VnD(), z3.VnS(), z23.VnS()); 2128 2129 __ movprfx(z1, z2); 2130 __ smlalt(z1.VnD(), z3.VnS(), z2.VnS(), 0); 2131 2132 __ movprfx(z1, z2); 2133 __ smlalt(z1.VnS(), z3.VnH(), z2.VnH(), 0); 2134 2135 __ movprfx(z1, z2); 2136 __ smlslb(z1.VnD(), z3.VnS(), z23.VnS()); 2137 2138 __ movprfx(z1, z2); 2139 __ smlslb(z1.VnD(), z3.VnS(), z2.VnS(), 0); 2140 2141 __ movprfx(z1, z2); 2142 __ smlslb(z1.VnS(), z3.VnH(), z2.VnH(), 0); 2143 2144 __ movprfx(z1, z2); 2145 __ smlslt(z1.VnD(), z3.VnS(), z23.VnS()); 2146 2147 __ movprfx(z1, z2); 2148 __ smlslt(z1.VnD(), z3.VnS(), z2.VnS(), 0); 2149 2150 __ movprfx(z1, z2); 2151 __ smlslt(z1.VnS(), z3.VnH(), z2.VnH(), 0); 2152 2153 __ movprfx(z29.VnB(), p1.Merging(), z30.VnB()); 2154 __ sqabs(z29.VnB(), p1.Merging(), z18.VnB()); 2155 2156 __ movprfx(z28.VnB(), p0.Merging(), z29.VnB()); 2157 __ sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z3.VnB()); 2158 2159 __ movprfx(z20, z21); 2160 __ sqcadd(z20.VnB(), z20.VnB(), z23.VnB(), 90); 2161 2162 __ movprfx(z6, z7); 2163 __ sqdmlalb(z6.VnD(), z19.VnS(), z25.VnS()); 2164 2165 __ movprfx(z6, z7); 2166 __ sqdmlalb(z6.VnD(), z19.VnS(), z2.VnS(), 0); 2167 2168 __ movprfx(z6, z7); 2169 __ sqdmlalb(z6.VnS(), z19.VnH(), z2.VnH(), 0); 2170 2171 __ movprfx(z23, z24); 2172 __ sqdmlalbt(z23.VnD(), z29.VnS(), z26.VnS()); 2173 2174 __ movprfx(z11, z12); 2175 __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS()); 2176 2177 __ movprfx(z11, z12); 2178 __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS(), 0); 2179 2180 __ movprfx(z11, z12); 2181 __ sqdmlalt(z11.VnS(), z0.VnH(), z0.VnH(), 0); 2182 2183 __ movprfx(z16, z17); 2184 __ sqdmlslb(z16.VnD(), z26.VnS(), z25.VnS()); 2185 2186 __ movprfx(z16, z17); 2187 __ sqdmlslb(z16.VnD(), z26.VnS(), z2.VnS(), 0); 2188 2189 __ movprfx(z16, z17); 2190 __ sqdmlslb(z16.VnS(), z26.VnH(), z2.VnH(), 0); 2191 2192 __ movprfx(z26, z27); 2193 __ sqdmlslbt(z26.VnD(), z23.VnS(), z4.VnS()); 2194 2195 __ movprfx(z21, z22); 2196 __ sqdmlslt(z21.VnD(), z23.VnS(), z9.VnS()); 2197 2198 __ movprfx(z21, z22); 2199 __ sqdmlslt(z21.VnD(), z23.VnS(), z0.VnS(), 0); 2200 2201 __ movprfx(z21, z22); 2202 __ sqdmlslt(z21.VnS(), z23.VnH(), z0.VnH(), 0); 2203 2204 __ movprfx(z21.VnB(), p0.Merging(), z22.VnB()); 2205 __ sqneg(z21.VnB(), p0.Merging(), z17.VnB()); 2206 2207 __ movprfx(z31, z0); 2208 __ sqrdcmlah(z31.VnB(), z15.VnB(), z20.VnB(), 0); 2209 2210 __ movprfx(z31, z0); 2211 __ sqrdcmlah(z31.VnH(), z15.VnH(), z2.VnH(), 0, 0); 2212 2213 __ movprfx(z31, z0); 2214 __ sqrdcmlah(z31.VnS(), z15.VnS(), z2.VnS(), 0, 0); 2215 2216 __ movprfx(z27, z28); 2217 __ sqrdmlah(z27.VnB(), z28.VnB(), z19.VnB()); 2218 2219 __ movprfx(z27, z28); 2220 __ sqrdmlah(z27.VnH(), z28.VnH(), z1.VnH(), 0); 2221 2222 __ movprfx(z27, z28); 2223 __ sqrdmlah(z27.VnS(), z28.VnS(), z1.VnS(), 0); 2224 2225 __ movprfx(z27, z28); 2226 __ sqrdmlah(z27.VnD(), z28.VnD(), z1.VnD(), 0); 2227 2228 __ movprfx(z11, z12); 2229 __ sqrdmlsh(z11.VnB(), z16.VnB(), z31.VnB()); 2230 2231 __ movprfx(z11, z12); 2232 __ sqrdmlsh(z11.VnH(), z16.VnH(), z1.VnH(), 0); 2233 2234 __ movprfx(z11, z12); 2235 __ sqrdmlsh(z11.VnS(), z16.VnS(), z1.VnS(), 0); 2236 2237 __ movprfx(z11, z12); 2238 __ sqrdmlsh(z11.VnD(), z16.VnD(), z1.VnD(), 0); 2239 2240 __ movprfx(z31.VnB(), p5.Merging(), z0.VnB()); 2241 __ sqrshl(z31.VnB(), p5.Merging(), z31.VnB(), z27.VnB()); 2242 2243 __ movprfx(z25.VnB(), p6.Merging(), z26.VnB()); 2244 __ sqrshlr(z25.VnB(), p6.Merging(), z25.VnB(), z7.VnB()); 2245 2246 __ movprfx(z0.VnB(), p5.Merging(), z1.VnB()); 2247 __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 0); 2248 2249 __ movprfx(z0.VnB(), p5.Merging(), z1.VnB()); 2250 __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), z2.VnB()); 2251 2252 __ movprfx(z7.VnB(), p3.Merging(), z8.VnB()); 2253 __ sqshlr(z7.VnB(), p3.Merging(), z7.VnB(), z5.VnB()); 2254 2255 __ movprfx(z10.VnB(), p1.Merging(), z11.VnB()); 2256 __ sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 0); 2257 2258 __ movprfx(z16.VnB(), p7.Merging(), z17.VnB()); 2259 __ sqsub(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB()); 2260 2261 __ movprfx(z16.VnB(), p7.Merging(), z17.VnB()); 2262 __ sqsubr(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB()); 2263 2264 __ movprfx(z23.VnB(), p4.Merging(), z24.VnB()); 2265 __ srhadd(z23.VnB(), p4.Merging(), z23.VnB(), z14.VnB()); 2266 2267 __ movprfx(z31.VnB(), p7.Merging(), z0.VnB()); 2268 __ srshl(z31.VnB(), p7.Merging(), z31.VnB(), z3.VnB()); 2269 2270 __ movprfx(z16.VnB(), p7.Merging(), z17.VnB()); 2271 __ srshlr(z16.VnB(), p7.Merging(), z16.VnB(), z29.VnB()); 2272 2273 __ movprfx(z12.VnB(), p0.Merging(), z13.VnB()); 2274 __ srshr(z12.VnB(), p0.Merging(), z12.VnB(), 1); 2275 2276 __ movprfx(z0, z1); 2277 __ srsra(z0.VnB(), z8.VnB(), 1); 2278 2279 __ movprfx(z0, z1); 2280 __ ssra(z0.VnB(), z8.VnB(), 1); 2281 2282 __ movprfx(z26.VnB(), p2.Merging(), z27.VnB()); 2283 __ suqadd(z26.VnB(), p2.Merging(), z26.VnB(), z28.VnB()); 2284 2285 __ movprfx(z23, z24); 2286 __ uaba(z23.VnB(), z22.VnB(), z20.VnB()); 2287 2288 __ movprfx(z11, z12); 2289 __ uabalb(z11.VnD(), z25.VnS(), z12.VnS()); 2290 2291 __ movprfx(z4, z5); 2292 __ uabalt(z4.VnD(), z2.VnS(), z31.VnS()); 2293 2294 __ movprfx(z20.VnD(), p4.Merging(), z21.VnD()); 2295 __ uadalp(z20.VnD(), p4.Merging(), z5.VnS()); 2296 2297 __ movprfx(z21.VnB(), p2.Merging(), z22.VnB()); 2298 __ uhadd(z21.VnB(), p2.Merging(), z21.VnB(), z19.VnB()); 2299 2300 __ movprfx(z1.VnB(), p4.Merging(), z2.VnB()); 2301 __ uhsub(z1.VnB(), p4.Merging(), z1.VnB(), z9.VnB()); 2302 2303 __ movprfx(z18.VnB(), p0.Merging(), z19.VnB()); 2304 __ uhsubr(z18.VnB(), p0.Merging(), z18.VnB(), z1.VnB()); 2305 2306 __ movprfx(z7, z8); 2307 __ umaxp(z7.VnB(), p2.Merging(), z7.VnB(), z23.VnB()); 2308 2309 __ movprfx(z10, z11); 2310 __ uminp(z10.VnB(), p0.Merging(), z10.VnB(), z22.VnB()); 2311 2312 __ movprfx(z31, z0); 2313 __ umlalb(z31.VnD(), z9.VnS(), z21.VnS()); 2314 2315 __ movprfx(z31, z0); 2316 __ umlalb(z31.VnD(), z9.VnS(), z1.VnS(), 0); 2317 2318 __ movprfx(z31, z0); 2319 __ umlalb(z31.VnS(), z9.VnH(), z1.VnH(), 0); 2320 2321 __ movprfx(z11, z12); 2322 __ umlalt(z11.VnD(), z5.VnS(), z22.VnS()); 2323 2324 __ movprfx(z11, z12); 2325 __ umlalt(z11.VnD(), z5.VnS(), z2.VnS(), 0); 2326 2327 __ movprfx(z11, z12); 2328 __ umlalt(z11.VnS(), z5.VnH(), z2.VnH(), 0); 2329 2330 __ movprfx(z28, z29); 2331 __ umlslb(z28.VnD(), z13.VnS(), z9.VnS()); 2332 2333 __ movprfx(z28, z29); 2334 __ umlslb(z28.VnD(), z13.VnS(), z1.VnS(), 0); 2335 2336 __ movprfx(z28, z29); 2337 __ umlslb(z28.VnS(), z13.VnH(), z1.VnH(), 0); 2338 2339 __ movprfx(z9, z10); 2340 __ umlslt(z9.VnD(), z12.VnS(), z30.VnS()); 2341 2342 __ movprfx(z9, z10); 2343 __ umlslt(z9.VnD(), z12.VnS(), z0.VnS(), 0); 2344 2345 __ movprfx(z9, z10); 2346 __ umlslt(z9.VnS(), z12.VnH(), z0.VnH(), 0); 2347 2348 __ movprfx(z24.VnB(), p7.Merging(), z25.VnB()); 2349 __ uqadd(z24.VnB(), p7.Merging(), z24.VnB(), z1.VnB()), 2350 2351 __ movprfx(z20.VnB(), p1.Merging(), z21.VnB()); 2352 __ uqrshl(z20.VnB(), p1.Merging(), z20.VnB(), z30.VnB()); 2353 2354 __ movprfx(z8.VnB(), p5.Merging(), z9.VnB()); 2355 __ uqrshlr(z8.VnB(), p5.Merging(), z8.VnB(), z9.VnB()); 2356 2357 __ movprfx(z29.VnB(), p7.Merging(), z30.VnB()); 2358 __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 0); 2359 2360 __ movprfx(z29.VnB(), p7.Merging(), z30.VnB()); 2361 __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), z30.VnB()); 2362 2363 __ movprfx(z12.VnB(), p1.Merging(), z13.VnB()); 2364 __ uqshlr(z12.VnB(), p1.Merging(), z12.VnB(), z13.VnB()); 2365 2366 __ movprfx(z20.VnB(), p0.Merging(), z21.VnB()); 2367 __ uqsub(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB()); 2368 2369 __ movprfx(z20.VnB(), p0.Merging(), z21.VnB()); 2370 __ uqsubr(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB()); 2371 2372 __ movprfx(z25.VnS(), p7.Merging(), z26.VnS()); 2373 __ urecpe(z25.VnS(), p7.Merging(), z2.VnS()); 2374 2375 __ movprfx(z29.VnB(), p4.Merging(), z30.VnB()); 2376 __ urhadd(z29.VnB(), p4.Merging(), z29.VnB(), z10.VnB()); 2377 2378 __ movprfx(z15.VnB(), p2.Merging(), z16.VnB()); 2379 __ urshl(z15.VnB(), p2.Merging(), z15.VnB(), z3.VnB()); 2380 2381 __ movprfx(z27.VnB(), p1.Merging(), z28.VnB()); 2382 __ urshlr(z27.VnB(), p1.Merging(), z27.VnB(), z30.VnB()); 2383 2384 __ movprfx(z31.VnB(), p2.Merging(), z0.VnB()); 2385 __ urshr(z31.VnB(), p2.Merging(), z31.VnB(), 1); 2386 2387 __ movprfx(z4.VnS(), p3.Merging(), z5.VnS()); 2388 __ ursqrte(z4.VnS(), p3.Merging(), z3.VnS()); 2389 2390 __ movprfx(z0, z1); 2391 __ ursra(z0.VnB(), z8.VnB(), 1); 2392 2393 __ movprfx(z25.VnB(), p4.Merging(), z26.VnB()); 2394 __ usqadd(z25.VnB(), p4.Merging(), z25.VnB(), z6.VnB()); 2395 2396 __ movprfx(z0, z1); 2397 __ usra(z0.VnB(), z8.VnB(), 1); 2398 2399 __ movprfx(z16, z17); 2400 __ xar(z16.VnB(), z16.VnB(), z13.VnB(), 1); 2401 } 2402 assm.FinalizeCode(); 2403 2404 CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), true); 2405} 2406 2407TEST(movprfx_negative_instructions_sve2) { 2408 Assembler assm; 2409 assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, 2410 CPUFeatures::kSVE2, 2411 CPUFeatures::kSVEBitPerm); 2412 { 2413 // We have to use the Assembler directly to generate movprfx, so we need 2414 // to manually reserve space for the code we're about to emit. 2415 static const size_t kPairCount = 134; 2416 CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 2417 2418 __ movprfx(z29, z30); 2419 __ addhnb(z29.VnS(), z19.VnD(), z2.VnD()); 2420 2421 __ movprfx(z8, z9); 2422 __ addhnt(z8.VnS(), z12.VnD(), z6.VnD()); 2423 2424 __ movprfx(z18, z19); 2425 __ bdep(z18.VnB(), z10.VnB(), z0.VnB()); 2426 2427 __ movprfx(z6, z7); 2428 __ bext(z6.VnB(), z2.VnB(), z5.VnB()); 2429 2430 __ movprfx(z24, z25); 2431 __ bgrp(z24.VnB(), z9.VnB(), z5.VnB()); 2432 2433 __ movprfx(z1, z2); 2434 __ fcvtlt(z1.VnD(), p1.Merging(), z28.VnS()); 2435 2436 __ movprfx(z1, z2); 2437 __ fcvtlt(z1.VnS(), p1.Merging(), z28.VnH()); 2438 2439 __ movprfx(z4, z5); 2440 __ fcvtnt(z4.VnH(), p7.Merging(), z0.VnS()); 2441 2442 __ movprfx(z4, z5); 2443 __ fcvtnt(z4.VnS(), p7.Merging(), z0.VnD()); 2444 2445 __ movprfx(z27, z28); 2446 __ fcvtxnt(z27.VnS(), p0.Merging(), z17.VnD()); 2447 2448 __ movprfx(z24, z25); 2449 __ histcnt(z24.VnS(), p6.Zeroing(), z3.VnS(), z10.VnS()); 2450 2451 __ movprfx(z22, z23); 2452 __ histseg(z22.VnB(), z14.VnB(), z8.VnB()); 2453 2454 __ movprfx(z21, z22); 2455 __ ldnt1b(z21.VnS(), p5.Zeroing(), SVEMemOperand(z21.VnS(), x23)); 2456 2457 __ movprfx(z21, z22); 2458 __ ldnt1b(z21.VnD(), p5.Zeroing(), SVEMemOperand(z1.VnD(), x23)); 2459 2460 __ movprfx(z10, z11); 2461 __ ldnt1d(z10.VnD(), p0.Zeroing(), SVEMemOperand(z23.VnD(), x6)); 2462 2463 __ movprfx(z30, z31); 2464 __ ldnt1h(z30.VnS(), p4.Zeroing(), SVEMemOperand(z6.VnS(), x11)); 2465 2466 __ movprfx(z30, z31); 2467 __ ldnt1h(z30.VnD(), p4.Zeroing(), SVEMemOperand(z6.VnD(), x11)); 2468 2469 __ movprfx(z7, z8); 2470 __ ldnt1sb(z7.VnS(), p3.Zeroing(), SVEMemOperand(z18.VnS(), x11)); 2471 2472 __ movprfx(z7, z8); 2473 __ ldnt1sb(z7.VnD(), p3.Zeroing(), SVEMemOperand(z18.VnD(), x11)); 2474 2475 __ movprfx(z17, z18); 2476 __ ldnt1sh(z17.VnS(), p5.Zeroing(), SVEMemOperand(z31.VnS(), x19)); 2477 2478 __ movprfx(z17, z18); 2479 __ ldnt1sh(z17.VnD(), p5.Zeroing(), SVEMemOperand(z31.VnD(), x19)); 2480 2481 __ movprfx(z3, z4); 2482 __ ldnt1sw(z3.VnD(), p7.Zeroing(), SVEMemOperand(z1.VnD(), x10)); 2483 2484 __ movprfx(z0, z1); 2485 __ ldnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(z11.VnS(), x1)); 2486 2487 __ movprfx(z0, z1); 2488 __ ldnt1w(z0.VnD(), p4.Zeroing(), SVEMemOperand(z11.VnD(), x1)); 2489 2490 __ movprfx(z18, z19); 2491 __ match(p15.VnB(), p1.Zeroing(), z18.VnB(), z5.VnB()); 2492 2493 __ movprfx(z15, z16); 2494 __ mul(z15.VnB(), z15.VnB(), z15.VnB()); 2495 2496 __ movprfx(z15, z16); 2497 __ mul(z15.VnH(), z15.VnH(), z1.VnH(), 0); 2498 2499 __ movprfx(z15, z16); 2500 __ mul(z15.VnS(), z15.VnS(), z1.VnS(), 0); 2501 2502 __ movprfx(z15, z16); 2503 __ mul(z15.VnD(), z15.VnD(), z1.VnD(), 0); 2504 2505 __ movprfx(z20, z21); 2506 __ nmatch(p1.VnB(), p1.Zeroing(), z20.VnB(), z17.VnB()); 2507 2508 __ movprfx(z0, z1); 2509 __ pmul(z0.VnB(), z5.VnB(), z5.VnB()); 2510 2511 __ movprfx(z12, z13); 2512 __ pmullb(z12.VnD(), z21.VnS(), z12.VnS()); 2513 2514 __ movprfx(z31, z0); 2515 __ pmullt(z31.VnD(), z30.VnS(), z26.VnS()); 2516 2517 __ movprfx(z0, z1); 2518 __ raddhnb(z0.VnS(), z11.VnD(), z10.VnD()); 2519 2520 __ movprfx(z23, z24); 2521 __ raddhnt(z23.VnS(), z27.VnD(), z9.VnD()); 2522 2523 __ movprfx(z5, z6); 2524 __ rshrnb(z5.VnB(), z1.VnH(), 1); 2525 2526 __ movprfx(z5, z6); 2527 __ rshrnt(z5.VnB(), z1.VnH(), 8); 2528 2529 __ movprfx(z30, z31); 2530 __ rsubhnb(z30.VnS(), z29.VnD(), z11.VnD()); 2531 2532 __ movprfx(z25, z26); 2533 __ rsubhnt(z25.VnS(), z7.VnD(), z18.VnD()); 2534 2535 __ movprfx(z2, z3); 2536 __ sabdlb(z2.VnD(), z21.VnS(), z3.VnS()); 2537 2538 __ movprfx(z25, z26); 2539 __ sabdlt(z25.VnD(), z23.VnS(), z17.VnS()); 2540 2541 __ movprfx(z24, z25); 2542 __ saddlb(z24.VnD(), z30.VnS(), z16.VnS()); 2543 2544 __ movprfx(z15, z16); 2545 __ saddlbt(z15.VnD(), z6.VnS(), z18.VnS()); 2546 2547 __ movprfx(z21, z22); 2548 __ saddlt(z21.VnD(), z29.VnS(), z31.VnS()); 2549 2550 __ movprfx(z12, z13); 2551 __ saddwb(z12.VnD(), z8.VnD(), z8.VnS()); 2552 2553 __ movprfx(z24, z25); 2554 __ saddwt(z24.VnD(), z0.VnD(), z3.VnS()); 2555 2556 __ movprfx(z7, z8); 2557 __ shrnb(z7.VnB(), z4.VnH(), 1); 2558 2559 __ movprfx(z21, z22); 2560 __ shrnt(z21.VnB(), z29.VnH(), 1); 2561 2562 __ movprfx(z29, z30); 2563 __ sli(z29.VnB(), z7.VnB(), 0); 2564 2565 __ movprfx(z23, z24); 2566 __ smulh(z23.VnB(), z23.VnB(), z3.VnB()); 2567 2568 __ movprfx(z10, z11); 2569 __ smullb(z10.VnD(), z4.VnS(), z4.VnS()); 2570 2571 __ movprfx(z10, z11); 2572 __ smullb(z10.VnS(), z4.VnH(), z4.VnH(), 0); 2573 2574 __ movprfx(z10, z11); 2575 __ smullb(z10.VnD(), z4.VnS(), z4.VnS(), 0); 2576 2577 __ movprfx(z31, z0); 2578 __ smullt(z31.VnD(), z26.VnS(), z5.VnS()); 2579 2580 __ movprfx(z31, z0); 2581 __ smullt(z31.VnS(), z26.VnH(), z5.VnH(), 0); 2582 2583 __ movprfx(z31, z0); 2584 __ smullt(z31.VnD(), z26.VnS(), z5.VnS(), 0); 2585 2586 __ movprfx(z4, z5); 2587 __ splice_con(z4.VnB(), p7.Merging(), z0.VnB(), z1.VnB()); 2588 2589 __ movprfx(z18, z19); 2590 __ sqdmulh(z18.VnB(), z25.VnB(), z1.VnB()); 2591 2592 __ movprfx(z18, z19); 2593 __ sqdmulh(z18.VnH(), z25.VnH(), z1.VnH(), 0); 2594 2595 __ movprfx(z18, z19); 2596 __ sqdmulh(z18.VnS(), z25.VnS(), z1.VnS(), 0); 2597 2598 __ movprfx(z18, z19); 2599 __ sqdmulh(z18.VnD(), z25.VnD(), z1.VnD(), 0); 2600 2601 __ movprfx(z1, z2); 2602 __ sqdmullb(z1.VnD(), z31.VnS(), z21.VnS()); 2603 2604 __ movprfx(z1, z2); 2605 __ sqdmullb(z1.VnS(), z31.VnH(), z1.VnH(), 0); 2606 2607 __ movprfx(z1, z2); 2608 __ sqdmullb(z1.VnD(), z31.VnS(), z1.VnS(), 0); 2609 2610 __ movprfx(z2, z3); 2611 __ sqdmullt(z2.VnD(), z1.VnS(), z5.VnS()); 2612 2613 __ movprfx(z2, z3); 2614 __ sqdmullt(z2.VnS(), z1.VnH(), z5.VnH(), 0); 2615 2616 __ movprfx(z2, z3); 2617 __ sqdmullt(z2.VnD(), z1.VnS(), z5.VnS(), 0); 2618 2619 __ movprfx(z21, z22); 2620 __ sqrdmulh(z21.VnB(), z21.VnB(), z27.VnB()); 2621 2622 __ movprfx(z21, z22); 2623 __ sqrdmulh(z21.VnH(), z21.VnH(), z2.VnH(), 0); 2624 2625 __ movprfx(z21, z22); 2626 __ sqrdmulh(z21.VnS(), z21.VnS(), z2.VnS(), 0); 2627 2628 __ movprfx(z21, z22); 2629 __ sqrdmulh(z21.VnD(), z21.VnD(), z2.VnD(), 0); 2630 2631 __ movprfx(z1, z2); 2632 __ sqrshrnb(z1.VnB(), z1.VnH(), 1); 2633 2634 __ movprfx(z24, z25); 2635 __ sqrshrnt(z24.VnB(), z19.VnH(), 8); 2636 2637 __ movprfx(z23, z24); 2638 __ sqrshrunb(z23.VnB(), z28.VnH(), 1); 2639 2640 __ movprfx(z9, z10); 2641 __ sqrshrunt(z9.VnB(), z15.VnH(), 8); 2642 2643 __ movprfx(z25, z26); 2644 __ sqshrnb(z25.VnB(), z1.VnH(), 1); 2645 2646 __ movprfx(z0, z1); 2647 __ sqshrnt(z0.VnB(), z25.VnH(), 8); 2648 2649 __ movprfx(z25, z26); 2650 __ sqshrunb(z25.VnB(), z10.VnH(), 1); 2651 2652 __ movprfx(z20, z21); 2653 __ sqshrunt(z20.VnB(), z3.VnH(), 8); 2654 2655 __ movprfx(z2, z3); 2656 __ sqxtnb(z2.VnB(), z0.VnH()); 2657 2658 __ movprfx(z31, z0); 2659 __ sqxtnt(z31.VnB(), z18.VnH()); 2660 2661 __ movprfx(z28, z29); 2662 __ sqxtunb(z28.VnB(), z6.VnH()); 2663 2664 __ movprfx(z14, z15); 2665 __ sqxtunt(z14.VnB(), z31.VnH()); 2666 2667 __ movprfx(z6, z7); 2668 __ sri(z6.VnB(), z9.VnB(), 1); 2669 2670 __ movprfx(z2, z3); 2671 __ sshllb(z2.VnH(), z20.VnB(), 0); 2672 2673 __ movprfx(z27, z28); 2674 __ sshllt(z27.VnH(), z8.VnB(), 0); 2675 2676 __ movprfx(z4, z5); 2677 __ ssublb(z4.VnD(), z23.VnS(), z7.VnS()); 2678 2679 __ movprfx(z6, z7); 2680 __ ssublbt(z6.VnD(), z28.VnS(), z12.VnS()); 2681 2682 __ movprfx(z12, z13); 2683 __ ssublt(z12.VnD(), z13.VnS(), z6.VnS()); 2684 2685 __ movprfx(z11, z12); 2686 __ ssubltb(z11.VnD(), z18.VnS(), z19.VnS()); 2687 2688 __ movprfx(z7, z8); 2689 __ ssubwb(z7.VnD(), z28.VnD(), z11.VnS()); 2690 2691 __ movprfx(z29, z30); 2692 __ ssubwt(z29.VnD(), z25.VnD(), z20.VnS()); 2693 2694 __ movprfx(z21, z22); 2695 __ stnt1b(z21.VnS(), p5.Zeroing(), SVEMemOperand(z1.VnS(), x23)); 2696 2697 __ movprfx(z21, z22); 2698 __ stnt1b(z21.VnD(), p5.Zeroing(), SVEMemOperand(z1.VnD(), x23)); 2699 2700 __ movprfx(z10, z11); 2701 __ stnt1d(z10.VnD(), p0.Zeroing(), SVEMemOperand(z1.VnD(), x23)); 2702 2703 __ movprfx(z30, z31); 2704 __ stnt1h(z30.VnS(), p4.Zeroing(), SVEMemOperand(z6.VnS(), x6)); 2705 2706 __ movprfx(z30, z31); 2707 __ stnt1h(z30.VnD(), p4.Zeroing(), SVEMemOperand(z6.VnD(), x6)); 2708 2709 __ movprfx(z0, z1); 2710 __ stnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(z11.VnS(), x1)); 2711 2712 __ movprfx(z0, z1); 2713 __ stnt1w(z0.VnD(), p4.Zeroing(), SVEMemOperand(z11.VnD(), x1)); 2714 2715 __ movprfx(z31, z0); 2716 __ subhnb(z31.VnS(), z31.VnD(), z7.VnD()); 2717 2718 __ movprfx(z31, z0); 2719 __ subhnt(z31.VnS(), z22.VnD(), z27.VnD()); 2720 2721 __ movprfx(z24, z25); 2722 __ tbl(z24.VnB(), z29.VnB(), z30.VnB(), z0.VnB()); 2723 2724 __ movprfx(z22, z23); 2725 __ tbx(z22.VnB(), z15.VnB(), z19.VnB()); 2726 2727 __ movprfx(z1, z2); 2728 __ uabdlb(z1.VnD(), z26.VnS(), z12.VnS()); 2729 2730 __ movprfx(z25, z26); 2731 __ uabdlt(z25.VnD(), z29.VnS(), z14.VnS()); 2732 2733 __ movprfx(z3, z4); 2734 __ uaddlb(z3.VnD(), z5.VnS(), z2.VnS()); 2735 2736 __ movprfx(z15, z16); 2737 __ uaddlt(z15.VnD(), z28.VnS(), z20.VnS()); 2738 2739 __ movprfx(z31, z0); 2740 __ uaddwb(z31.VnD(), z8.VnD(), z25.VnS()); 2741 2742 __ movprfx(z17, z18); 2743 __ uaddwt(z17.VnD(), z15.VnD(), z2.VnS()); 2744 2745 __ movprfx(z12, z13); 2746 __ umulh(z12.VnB(), z12.VnB(), z17.VnB()); 2747 2748 __ movprfx(z12, z13); 2749 __ umullb(z12.VnD(), z5.VnS(), z2.VnS()); 2750 2751 __ movprfx(z12, z13); 2752 __ umullb(z12.VnS(), z5.VnH(), z2.VnH(), 0); 2753 2754 __ movprfx(z12, z13); 2755 __ umullb(z12.VnD(), z5.VnS(), z2.VnS(), 0); 2756 2757 __ movprfx(z24, z25); 2758 __ umullt(z24.VnD(), z6.VnS(), z6.VnS()); 2759 2760 __ movprfx(z24, z25); 2761 __ umullt(z24.VnS(), z6.VnH(), z1.VnH(), 0); 2762 2763 __ movprfx(z24, z25); 2764 __ umullt(z24.VnD(), z6.VnS(), z1.VnS(), 0); 2765 2766 __ movprfx(z30, z31); 2767 __ uqrshrnb(z30.VnB(), z25.VnH(), 1); 2768 2769 __ movprfx(z3, z4); 2770 __ uqrshrnt(z3.VnB(), z25.VnH(), 8); 2771 2772 __ movprfx(z17, z18); 2773 __ uqshrnb(z17.VnB(), z4.VnH(), 1); 2774 2775 __ movprfx(z28, z29); 2776 __ uqshrnt(z28.VnB(), z18.VnH(), 8); 2777 2778 __ movprfx(z28, z29); 2779 __ uqxtnb(z28.VnB(), z4.VnH()); 2780 2781 __ movprfx(z19, z20); 2782 __ uqxtnt(z19.VnB(), z7.VnH()); 2783 2784 __ movprfx(z8, z9); 2785 __ ushllb(z8.VnH(), z31.VnB(), 0); 2786 2787 __ movprfx(z3, z4); 2788 __ ushllt(z3.VnH(), z21.VnB(), 0); 2789 2790 __ movprfx(z25, z26); 2791 __ usublb(z25.VnD(), z9.VnS(), z17.VnS()); 2792 2793 __ movprfx(z5, z6); 2794 __ usublt(z5.VnD(), z11.VnS(), z15.VnS()); 2795 2796 __ movprfx(z10, z11); 2797 __ usubwb(z10.VnD(), z13.VnD(), z20.VnS()); 2798 2799 __ movprfx(z15, z16); 2800 __ usubwt(z15.VnD(), z8.VnD(), z23.VnS()); 2801 2802 __ movprfx(z20, z21); 2803 __ whilege(p0.VnB(), w20, w29); 2804 2805 __ movprfx(z24, z25); 2806 __ whilegt(p11.VnB(), w24, w3); 2807 2808 __ movprfx(z20, z21); 2809 __ whilehi(p2.VnB(), x20, x8); 2810 2811 __ movprfx(z22, z23); 2812 __ whilehs(p4.VnB(), w22, w9); 2813 2814 __ movprfx(z25, z26); 2815 __ whilerw(p7.VnB(), x25, x27); 2816 2817 __ movprfx(z14, z15); 2818 __ whilewr(p8.VnB(), x14, x14); 2819 } 2820 assm.FinalizeCode(); 2821 2822 CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); 2823} 2824 2825TEST(movprfx_negative_predication_sve2) { 2826 Assembler assm; 2827 assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVE2); 2828 { 2829 // We have to use the Assembler directly to generate movprfx, so we need 2830 // to manually reserve space for the code we're about to emit. 2831 static const size_t kPairCount = 140; 2832 CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 2833 2834 __ movprfx(z25.VnS(), p0.Zeroing(), z26.VnS()); 2835 __ adclb(z25.VnS(), z17.VnS(), z24.VnS()); 2836 2837 __ movprfx(z0.VnS(), p0.Zeroing(), z1.VnS()); 2838 __ adclt(z0.VnS(), z2.VnS(), z15.VnS()); 2839 2840 __ movprfx(z6.VnD(), p0.Zeroing(), z7.VnD()); 2841 __ bcax(z6.VnD(), z6.VnD(), z12.VnD(), z1.VnD()); 2842 2843 __ movprfx(z18.VnD(), p0.Zeroing(), z19.VnD()); 2844 __ bsl1n(z18.VnD(), z18.VnD(), z8.VnD(), z7.VnD()); 2845 2846 __ movprfx(z7.VnD(), p0.Zeroing(), z8.VnD()); 2847 __ bsl2n(z7.VnD(), z7.VnD(), z3.VnD(), z19.VnD()); 2848 2849 __ movprfx(z21.VnD(), p0.Zeroing(), z22.VnD()); 2850 __ bsl(z21.VnD(), z21.VnD(), z2.VnD(), z2.VnD()); 2851 2852 __ movprfx(z5.VnB(), p0.Zeroing(), z6.VnB()); 2853 __ cadd(z5.VnB(), z5.VnB(), z12.VnB(), 90); 2854 2855 __ movprfx(z7.VnS(), p0.Zeroing(), z8.VnS()); 2856 __ cdot(z7.VnS(), z4.VnB(), z10.VnB(), 0); 2857 2858 __ movprfx(z7.VnS(), p0.Zeroing(), z8.VnS()); 2859 __ cdot(z7.VnS(), z4.VnB(), z0.VnB(), 0, 0); 2860 2861 __ movprfx(z7.VnD(), p0.Zeroing(), z8.VnD()); 2862 __ cdot(z7.VnD(), z4.VnH(), z0.VnH(), 0, 0); 2863 2864 __ movprfx(z19.VnB(), p0.Zeroing(), z20.VnB()); 2865 __ cmla(z19.VnB(), z7.VnB(), z2.VnB(), 0); 2866 2867 __ movprfx(z19.VnS(), p0.Zeroing(), z20.VnS()); 2868 __ cmla(z19.VnS(), z7.VnS(), z2.VnS(), 0, 0); 2869 2870 __ movprfx(z19.VnH(), p0.Zeroing(), z20.VnH()); 2871 __ cmla(z19.VnH(), z7.VnH(), z2.VnH(), 0, 0); 2872 2873 __ movprfx(z10.VnD(), p0.Zeroing(), z11.VnD()); 2874 __ eor3(z10.VnD(), z10.VnD(), z24.VnD(), z23.VnD()); 2875 2876 __ movprfx(z3.VnB(), p0.Zeroing(), z4.VnB()); 2877 __ eorbt(z3.VnB(), z10.VnB(), z8.VnB()); 2878 2879 __ movprfx(z20.VnB(), p0.Zeroing(), z22.VnB()); 2880 __ eortb(z20.VnB(), z21.VnB(), z15.VnB()); 2881 2882 __ movprfx(z14.VnD(), p0.Zeroing(), z15.VnD()); 2883 __ faddp(z14.VnD(), p1.Merging(), z14.VnD(), z26.VnD()); 2884 2885 __ movprfx(z2.VnD(), p0.Zeroing(), z3.VnD()); 2886 __ fmaxnmp(z2.VnD(), p1.Merging(), z2.VnD(), z14.VnD()); 2887 2888 __ movprfx(z22.VnD(), p0.Zeroing(), z23.VnD()); 2889 __ fmaxp(z22.VnD(), p1.Merging(), z22.VnD(), z3.VnD()); 2890 2891 __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); 2892 __ fminnmp(z1.VnD(), p0.Merging(), z1.VnD(), z14.VnD()); 2893 2894 __ movprfx(z16.VnD(), p0.Zeroing(), z17.VnD()); 2895 __ fminp(z16.VnD(), p3.Merging(), z16.VnD(), z11.VnD()); 2896 2897 __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS()); 2898 __ fmlalb(z16.VnS(), z18.VnH(), z29.VnH()); 2899 2900 __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS()); 2901 __ fmlalb(z16.VnS(), z18.VnH(), z2.VnH(), 0); 2902 2903 __ movprfx(z18.VnS(), p0.Zeroing(), z19.VnS()); 2904 __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH()); 2905 2906 __ movprfx(z18.VnS(), p0.Zeroing(), z19.VnS()); 2907 __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH(), 0); 2908 2909 __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS()); 2910 __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH()); 2911 2912 __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS()); 2913 __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH(), 0); 2914 2915 __ movprfx(z3.VnS(), p0.Zeroing(), z4.VnS()); 2916 __ fmlslt(z3.VnS(), z17.VnH(), z14.VnH()); 2917 2918 __ movprfx(z3.VnS(), p0.Zeroing(), z4.VnS()); 2919 __ fmlslt(z3.VnS(), z17.VnH(), z1.VnH(), 0); 2920 2921 __ movprfx(z2.VnH(), p0.Zeroing(), z3.VnH()); 2922 __ mla(z2.VnH(), z0.VnH(), z1.VnH(), 0); 2923 2924 __ movprfx(z2.VnS(), p0.Zeroing(), z3.VnS()); 2925 __ mla(z2.VnS(), z0.VnS(), z1.VnS(), 0); 2926 2927 __ movprfx(z2.VnD(), p0.Zeroing(), z3.VnD()); 2928 __ mla(z2.VnD(), z0.VnD(), z1.VnD(), 0); 2929 2930 __ movprfx(z2.VnH(), p0.Zeroing(), z3.VnH()); 2931 __ mls(z2.VnH(), z0.VnH(), z1.VnH(), 0); 2932 2933 __ movprfx(z2.VnS(), p0.Zeroing(), z3.VnS()); 2934 __ mls(z2.VnS(), z0.VnS(), z1.VnS(), 0); 2935 2936 __ movprfx(z2.VnD(), p0.Zeroing(), z3.VnD()); 2937 __ mls(z2.VnD(), z0.VnD(), z1.VnD(), 0); 2938 2939 __ movprfx(z17.VnD(), p0.Zeroing(), z18.VnD()); 2940 __ nbsl(z17.VnD(), z17.VnD(), z21.VnD(), z27.VnD()); 2941 2942 __ movprfx(z13.VnB(), p0.Zeroing(), z14.VnB()); 2943 __ saba(z13.VnB(), z2.VnB(), z31.VnB()); 2944 2945 __ movprfx(z13.VnD(), p0.Zeroing(), z14.VnD()); 2946 __ sabalb(z13.VnD(), z20.VnS(), z26.VnS()); 2947 2948 __ movprfx(z14.VnD(), p0.Zeroing(), z15.VnD()); 2949 __ sabalt(z14.VnD(), z19.VnS(), z10.VnS()); 2950 2951 __ movprfx(z17.VnS(), p0.Zeroing(), z18.VnS()); 2952 __ sbclb(z17.VnS(), z10.VnS(), z8.VnS()); 2953 2954 __ movprfx(z20.VnS(), p0.Zeroing(), z21.VnS()); 2955 __ sbclt(z20.VnS(), z0.VnS(), z13.VnS()); 2956 2957 __ movprfx(z5.VnB(), p0.Zeroing(), z6.VnB()); 2958 __ smaxp(z5.VnB(), p4.Merging(), z5.VnB(), z10.VnB()); 2959 2960 __ movprfx(z27.VnB(), p0.Zeroing(), z28.VnB()); 2961 __ sminp(z27.VnB(), p3.Merging(), z27.VnB(), z1.VnB()); 2962 2963 __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); 2964 __ smlalb(z1.VnD(), z3.VnS(), z23.VnS()); 2965 2966 __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); 2967 __ smlalb(z1.VnD(), z3.VnS(), z2.VnS(), 0); 2968 2969 __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS()); 2970 __ smlalb(z1.VnS(), z3.VnH(), z2.VnH(), 0); 2971 2972 __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); 2973 __ smlalt(z1.VnD(), z3.VnS(), z23.VnS()); 2974 2975 __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); 2976 __ smlalt(z1.VnD(), z3.VnS(), z2.VnS(), 0); 2977 2978 __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS()); 2979 __ smlalt(z1.VnS(), z3.VnH(), z2.VnH(), 0); 2980 2981 __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); 2982 __ smlslb(z1.VnD(), z3.VnS(), z23.VnS()); 2983 2984 __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); 2985 __ smlslb(z1.VnD(), z3.VnS(), z2.VnS(), 0); 2986 2987 __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS()); 2988 __ smlslb(z1.VnS(), z3.VnH(), z2.VnH(), 0); 2989 2990 __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); 2991 __ smlslt(z1.VnD(), z3.VnS(), z23.VnS()); 2992 2993 __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD()); 2994 __ smlslt(z1.VnD(), z3.VnS(), z2.VnS(), 0); 2995 2996 __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS()); 2997 __ smlslt(z1.VnS(), z3.VnH(), z2.VnH(), 0); 2998 2999 __ movprfx(z20.VnB(), p0.Zeroing(), z21.VnB()); 3000 __ sqcadd(z20.VnB(), z20.VnB(), z23.VnB(), 90); 3001 3002 __ movprfx(z6.VnD(), p0.Zeroing(), z7.VnD()); 3003 __ sqdmlalb(z6.VnD(), z19.VnS(), z25.VnS()); 3004 3005 __ movprfx(z6.VnD(), p0.Zeroing(), z7.VnD()); 3006 __ sqdmlalb(z6.VnD(), z19.VnS(), z2.VnS(), 0); 3007 3008 __ movprfx(z6.VnS(), p0.Zeroing(), z7.VnS()); 3009 __ sqdmlalb(z6.VnS(), z19.VnH(), z2.VnH(), 0); 3010 3011 __ movprfx(z23.VnD(), p0.Zeroing(), z24.VnD()); 3012 __ sqdmlalbt(z23.VnD(), z29.VnS(), z26.VnS()); 3013 3014 __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD()); 3015 __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS()); 3016 3017 __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD()); 3018 __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS(), 0); 3019 3020 __ movprfx(z11.VnS(), p0.Zeroing(), z12.VnS()); 3021 __ sqdmlalt(z11.VnS(), z0.VnH(), z0.VnH(), 0); 3022 3023 __ movprfx(z16.VnD(), p0.Zeroing(), z17.VnD()); 3024 __ sqdmlslb(z16.VnD(), z26.VnS(), z25.VnS()); 3025 3026 __ movprfx(z16.VnD(), p0.Zeroing(), z17.VnD()); 3027 __ sqdmlslb(z16.VnD(), z26.VnS(), z2.VnS(), 0); 3028 3029 __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS()); 3030 __ sqdmlslb(z16.VnS(), z26.VnH(), z2.VnH(), 0); 3031 3032 __ movprfx(z26.VnD(), p0.Zeroing(), z27.VnD()); 3033 __ sqdmlslbt(z26.VnD(), z23.VnS(), z4.VnS()); 3034 3035 __ movprfx(z21.VnD(), p0.Zeroing(), z22.VnD()); 3036 __ sqdmlslt(z21.VnD(), z23.VnS(), z9.VnS()); 3037 3038 __ movprfx(z21.VnD(), p0.Zeroing(), z22.VnD()); 3039 __ sqdmlslt(z21.VnD(), z23.VnS(), z0.VnS(), 0); 3040 3041 __ movprfx(z21.VnS(), p0.Zeroing(), z22.VnS()); 3042 __ sqdmlslt(z21.VnS(), z23.VnH(), z0.VnH(), 0); 3043 3044 __ movprfx(z31.VnB(), p0.Zeroing(), z0.VnB()); 3045 __ sqrdcmlah(z31.VnB(), z15.VnB(), z20.VnB(), 0); 3046 3047 __ movprfx(z31.VnH(), p0.Zeroing(), z0.VnH()); 3048 __ sqrdcmlah(z31.VnH(), z15.VnH(), z2.VnH(), 0, 0); 3049 3050 __ movprfx(z31.VnS(), p0.Zeroing(), z0.VnS()); 3051 __ sqrdcmlah(z31.VnS(), z15.VnS(), z2.VnS(), 0, 0); 3052 3053 __ movprfx(z27.VnB(), p0.Zeroing(), z28.VnB()); 3054 __ sqrdmlah(z27.VnB(), z28.VnB(), z19.VnB()); 3055 3056 __ movprfx(z27.VnH(), p0.Zeroing(), z28.VnH()); 3057 __ sqrdmlah(z27.VnH(), z28.VnH(), z1.VnH(), 0); 3058 3059 __ movprfx(z27.VnS(), p0.Zeroing(), z28.VnS()); 3060 __ sqrdmlah(z27.VnS(), z28.VnS(), z1.VnS(), 0); 3061 3062 __ movprfx(z27.VnD(), p0.Zeroing(), z28.VnD()); 3063 __ sqrdmlah(z27.VnD(), z28.VnD(), z1.VnD(), 0); 3064 3065 __ movprfx(z11.VnB(), p0.Zeroing(), z12.VnB()); 3066 __ sqrdmlsh(z11.VnB(), z16.VnB(), z31.VnB()); 3067 3068 __ movprfx(z11.VnH(), p0.Zeroing(), z12.VnH()); 3069 __ sqrdmlsh(z11.VnH(), z16.VnH(), z1.VnH(), 0); 3070 3071 __ movprfx(z11.VnS(), p0.Zeroing(), z12.VnS()); 3072 __ sqrdmlsh(z11.VnS(), z16.VnS(), z1.VnS(), 0); 3073 3074 __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD()); 3075 __ sqrdmlsh(z11.VnD(), z16.VnD(), z1.VnD(), 0); 3076 3077 __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB()); 3078 __ srsra(z0.VnB(), z8.VnB(), 1); 3079 3080 __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB()); 3081 __ ssra(z0.VnB(), z8.VnB(), 1); 3082 3083 __ movprfx(z23.VnB(), p0.Zeroing(), z24.VnB()); 3084 __ uaba(z23.VnB(), z22.VnB(), z20.VnB()); 3085 3086 __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD()); 3087 __ uabalb(z11.VnD(), z25.VnS(), z12.VnS()); 3088 3089 __ movprfx(z4.VnD(), p0.Zeroing(), z5.VnD()); 3090 __ uabalt(z4.VnD(), z2.VnS(), z31.VnS()); 3091 3092 __ movprfx(z7.VnB(), p0.Zeroing(), z8.VnB()); 3093 __ umaxp(z7.VnB(), p2.Merging(), z7.VnB(), z23.VnB()); 3094 3095 __ movprfx(z10.VnB(), p0.Zeroing(), z11.VnB()); 3096 __ uminp(z10.VnB(), p0.Merging(), z10.VnB(), z22.VnB()); 3097 3098 __ movprfx(z31.VnD(), p0.Zeroing(), z0.VnD()); 3099 __ umlalb(z31.VnD(), z9.VnS(), z21.VnS()); 3100 3101 __ movprfx(z31.VnD(), p0.Zeroing(), z0.VnD()); 3102 __ umlalb(z31.VnD(), z9.VnS(), z1.VnS(), 0); 3103 3104 __ movprfx(z31.VnS(), p0.Zeroing(), z0.VnS()); 3105 __ umlalb(z31.VnS(), z9.VnH(), z1.VnH(), 0); 3106 3107 __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD()); 3108 __ umlalt(z11.VnD(), z5.VnS(), z22.VnS()); 3109 3110 __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD()); 3111 __ umlalt(z11.VnD(), z5.VnS(), z2.VnS(), 0); 3112 3113 __ movprfx(z11.VnS(), p0.Zeroing(), z12.VnS()); 3114 __ umlalt(z11.VnS(), z5.VnH(), z2.VnH(), 0); 3115 3116 __ movprfx(z28.VnD(), p0.Zeroing(), z29.VnD()); 3117 __ umlslb(z28.VnD(), z13.VnS(), z9.VnS()); 3118 3119 __ movprfx(z28.VnD(), p0.Zeroing(), z29.VnD()); 3120 __ umlslb(z28.VnD(), z13.VnS(), z1.VnS(), 0); 3121 3122 __ movprfx(z28.VnS(), p0.Zeroing(), z29.VnS()); 3123 __ umlslb(z28.VnS(), z13.VnH(), z1.VnH(), 0); 3124 3125 __ movprfx(z9.VnD(), p0.Zeroing(), z10.VnD()); 3126 __ umlslt(z9.VnD(), z12.VnS(), z30.VnS()); 3127 3128 __ movprfx(z9.VnD(), p0.Zeroing(), z10.VnD()); 3129 __ umlslt(z9.VnD(), z12.VnS(), z0.VnS(), 0); 3130 3131 __ movprfx(z9.VnS(), p0.Zeroing(), z10.VnS()); 3132 __ umlslt(z9.VnS(), z12.VnH(), z0.VnH(), 0); 3133 3134 __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB()); 3135 __ ursra(z0.VnB(), z8.VnB(), 1); 3136 3137 __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB()); 3138 __ usra(z0.VnB(), z8.VnB(), 1); 3139 3140 __ movprfx(z16.VnB(), p0.Zeroing(), z17.VnB()); 3141 __ xar(z16.VnB(), z16.VnB(), z13.VnB(), 1); 3142 } 3143 assm.FinalizeCode(); 3144 3145 CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); 3146} 3147 3148TEST(movprfx_negative_aliasing_sve2) { 3149 Assembler assm; 3150 assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVE2); 3151 { 3152 // We have to use the Assembler directly to generate movprfx, so we need 3153 // to manually reserve space for the code we're about to emit. 3154 static const size_t kPairCount = 140; 3155 CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 3156 3157 __ movprfx(z25, z26); 3158 __ adclb(z25.VnS(), z17.VnS(), z25.VnS()); 3159 3160 __ movprfx(z0, z1); 3161 __ adclt(z0.VnS(), z2.VnS(), z0.VnS()); 3162 3163 __ movprfx(z3, z4); 3164 __ addp(z3.VnB(), p1.Merging(), z3.VnB(), z3.VnB()); 3165 3166 __ movprfx(z6, z7); 3167 __ bcax(z6.VnD(), z6.VnD(), z12.VnD(), z6.VnD()); 3168 3169 __ movprfx(z18, z19); 3170 __ bsl1n(z18.VnD(), z18.VnD(), z8.VnD(), z18.VnD()); 3171 3172 __ movprfx(z7, z8); 3173 __ bsl2n(z7.VnD(), z7.VnD(), z3.VnD(), z7.VnD()); 3174 3175 __ movprfx(z21, z22); 3176 __ bsl(z21.VnD(), z21.VnD(), z2.VnD(), z21.VnD()); 3177 3178 __ movprfx(z5, z6); 3179 __ cadd(z5.VnB(), z5.VnB(), z5.VnB(), 90); 3180 3181 __ movprfx(z7, z8); 3182 __ cdot(z7.VnS(), z4.VnB(), z7.VnB(), 0); 3183 3184 __ movprfx(z7, z8); 3185 __ cdot(z7.VnS(), z4.VnB(), z7.VnB(), 0, 0); 3186 3187 __ movprfx(z7, z8); 3188 __ cdot(z7.VnD(), z7.VnH(), z0.VnH(), 0, 0); 3189 3190 __ movprfx(z19, z20); 3191 __ cmla(z19.VnB(), z19.VnB(), z2.VnB(), 0); 3192 3193 __ movprfx(z19, z20); 3194 __ cmla(z19.VnS(), z19.VnS(), z2.VnS(), 0, 0); 3195 3196 __ movprfx(z1, z20); 3197 __ cmla(z1.VnH(), z7.VnH(), z1.VnH(), 0, 0); 3198 3199 __ movprfx(z10, z11); 3200 __ eor3(z10.VnD(), z10.VnD(), z10.VnD(), z23.VnD()); 3201 3202 __ movprfx(z3, z4); 3203 __ eorbt(z3.VnB(), z10.VnB(), z3.VnB()); 3204 3205 __ movprfx(z20, z22); 3206 __ eortb(z20.VnB(), z21.VnB(), z20.VnB()); 3207 3208 __ movprfx(z14, z15); 3209 __ faddp(z14.VnD(), p1.Merging(), z14.VnD(), z14.VnD()); 3210 3211 __ movprfx(z14.VnD(), p4.Merging(), z15.VnD()); 3212 __ fcvtx(z14.VnS(), p4.Merging(), z14.VnD()); 3213 3214 __ movprfx(z15.VnH(), p0.Merging(), z16.VnH()); 3215 __ flogb(z15.VnH(), p0.Merging(), z15.VnH()); 3216 3217 __ movprfx(z2, z3); 3218 __ fmaxnmp(z2.VnD(), p1.Merging(), z2.VnD(), z2.VnD()); 3219 3220 __ movprfx(z22, z23); 3221 __ fmaxp(z22.VnD(), p1.Merging(), z22.VnD(), z22.VnD()); 3222 3223 __ movprfx(z1, z2); 3224 __ fminnmp(z1.VnD(), p0.Merging(), z1.VnD(), z1.VnD()); 3225 3226 __ movprfx(z16, z17); 3227 __ fminp(z16.VnD(), p3.Merging(), z16.VnD(), z16.VnD()); 3228 3229 __ movprfx(z16, z17); 3230 __ fmlalb(z16.VnS(), z18.VnH(), z16.VnH()); 3231 3232 __ movprfx(z16, z17); 3233 __ fmlalb(z16.VnS(), z16.VnH(), z2.VnH(), 0); 3234 3235 __ movprfx(z18, z19); 3236 __ fmlalt(z18.VnS(), z13.VnH(), z18.VnH()); 3237 3238 __ movprfx(z18, z19); 3239 __ fmlalt(z18.VnS(), z18.VnH(), z5.VnH(), 0); 3240 3241 __ movprfx(z16, z17); 3242 __ fmlslb(z16.VnS(), z16.VnH(), z1.VnH()); 3243 3244 __ movprfx(z16, z17); 3245 __ fmlslb(z16.VnS(), z16.VnH(), z1.VnH(), 0); 3246 3247 __ movprfx(z3, z4); 3248 __ fmlslt(z3.VnS(), z17.VnH(), z3.VnH()); 3249 3250 __ movprfx(z3, z4); 3251 __ fmlslt(z3.VnS(), z17.VnH(), z3.VnH(), 0); 3252 3253 __ movprfx(z2, z3); 3254 __ mla(z2.VnH(), z0.VnH(), z2.VnH(), 0); 3255 3256 __ movprfx(z2, z3); 3257 __ mla(z2.VnS(), z0.VnS(), z2.VnS(), 0); 3258 3259 __ movprfx(z2, z3); 3260 __ mla(z2.VnD(), z0.VnD(), z2.VnD(), 0); 3261 3262 __ movprfx(z2, z3); 3263 __ mls(z2.VnH(), z0.VnH(), z2.VnH(), 0); 3264 3265 __ movprfx(z2, z3); 3266 __ mls(z2.VnS(), z0.VnS(), z2.VnS(), 0); 3267 3268 __ movprfx(z2, z3); 3269 __ mls(z2.VnD(), z0.VnD(), z2.VnD(), 0); 3270 3271 __ movprfx(z17, z18); 3272 __ nbsl(z17.VnD(), z17.VnD(), z21.VnD(), z17.VnD()); 3273 3274 __ movprfx(z13, z14); 3275 __ saba(z13.VnB(), z2.VnB(), z13.VnB()); 3276 3277 __ movprfx(z13, z14); 3278 __ sabalb(z13.VnD(), z13.VnS(), z26.VnS()); 3279 3280 __ movprfx(z14, z15); 3281 __ sabalt(z14.VnD(), z14.VnS(), z10.VnS()); 3282 3283 __ movprfx(z19.VnD(), p5.Merging(), z20.VnD()); 3284 __ sadalp(z19.VnD(), p5.Merging(), z19.VnS()); 3285 3286 __ movprfx(z17, z18); 3287 __ sbclb(z17.VnS(), z17.VnS(), z8.VnS()); 3288 3289 __ movprfx(z20, z21); 3290 __ sbclt(z20.VnS(), z20.VnS(), z13.VnS()); 3291 3292 __ movprfx(z20.VnB(), p3.Merging(), z21.VnB()); 3293 __ shadd(z20.VnB(), p3.Merging(), z20.VnB(), z20.VnB()); 3294 3295 __ movprfx(z21.VnB(), p0.Merging(), z22.VnB()); 3296 __ shsub(z21.VnB(), p0.Merging(), z21.VnB(), z21.VnB()); 3297 3298 __ movprfx(z1.VnB(), p0.Merging(), z2.VnB()); 3299 __ shsubr(z1.VnB(), p0.Merging(), z1.VnB(), z1.VnB()); 3300 3301 __ movprfx(z5, z6); 3302 __ smaxp(z5.VnB(), p4.Merging(), z5.VnB(), z5.VnB()); 3303 3304 __ movprfx(z27, z28); 3305 __ sminp(z27.VnB(), p3.Merging(), z27.VnB(), z27.VnB()); 3306 3307 __ movprfx(z1, z2); 3308 __ smlalb(z1.VnD(), z3.VnS(), z1.VnS()); 3309 3310 __ movprfx(z1, z2); 3311 __ smlalb(z1.VnD(), z3.VnS(), z1.VnS(), 0); 3312 3313 __ movprfx(z1, z2); 3314 __ smlalb(z1.VnS(), z1.VnH(), z2.VnH(), 0); 3315 3316 __ movprfx(z1, z2); 3317 __ smlalt(z1.VnD(), z1.VnS(), z23.VnS()); 3318 3319 __ movprfx(z1, z2); 3320 __ smlalt(z1.VnD(), z3.VnS(), z1.VnS(), 0); 3321 3322 __ movprfx(z1, z2); 3323 __ smlalt(z1.VnS(), z1.VnH(), z2.VnH(), 0); 3324 3325 __ movprfx(z1, z2); 3326 __ smlslb(z1.VnD(), z1.VnS(), z23.VnS()); 3327 3328 __ movprfx(z1, z2); 3329 __ smlslb(z1.VnD(), z3.VnS(), z1.VnS(), 0); 3330 3331 __ movprfx(z1, z2); 3332 __ smlslb(z1.VnS(), z3.VnH(), z1.VnH(), 0); 3333 3334 __ movprfx(z1, z2); 3335 __ smlslt(z1.VnD(), z1.VnS(), z23.VnS()); 3336 3337 __ movprfx(z1, z2); 3338 __ smlslt(z1.VnD(), z3.VnS(), z1.VnS(), 0); 3339 3340 __ movprfx(z1, z2); 3341 __ smlslt(z1.VnS(), z1.VnH(), z2.VnH(), 0); 3342 3343 __ movprfx(z29.VnB(), p1.Merging(), z30.VnB()); 3344 __ sqabs(z29.VnB(), p1.Merging(), z29.VnB()); 3345 3346 __ movprfx(z28.VnB(), p0.Merging(), z29.VnB()); 3347 __ sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()); 3348 3349 __ movprfx(z20, z21); 3350 __ sqcadd(z20.VnB(), z20.VnB(), z20.VnB(), 90); 3351 3352 __ movprfx(z6, z7); 3353 __ sqdmlalb(z6.VnD(), z6.VnS(), z25.VnS()); 3354 3355 __ movprfx(z6, z7); 3356 __ sqdmlalb(z6.VnD(), z6.VnS(), z2.VnS(), 0); 3357 3358 __ movprfx(z6, z7); 3359 __ sqdmlalb(z6.VnS(), z6.VnH(), z2.VnH(), 0); 3360 3361 __ movprfx(z23, z24); 3362 __ sqdmlalbt(z23.VnD(), z23.VnS(), z26.VnS()); 3363 3364 __ movprfx(z11, z12); 3365 __ sqdmlalt(z11.VnD(), z11.VnS(), z0.VnS()); 3366 3367 __ movprfx(z11, z12); 3368 __ sqdmlalt(z11.VnD(), z11.VnS(), z0.VnS(), 0); 3369 3370 __ movprfx(z1, z12); 3371 __ sqdmlalt(z1.VnS(), z0.VnH(), z1.VnH(), 0); 3372 3373 __ movprfx(z16, z17); 3374 __ sqdmlslb(z16.VnD(), z26.VnS(), z16.VnS()); 3375 3376 __ movprfx(z16, z17); 3377 __ sqdmlslb(z16.VnD(), z16.VnS(), z2.VnS(), 0); 3378 3379 __ movprfx(z16, z17); 3380 __ sqdmlslb(z16.VnS(), z16.VnH(), z2.VnH(), 0); 3381 3382 __ movprfx(z26, z27); 3383 __ sqdmlslbt(z26.VnD(), z26.VnS(), z4.VnS()); 3384 3385 __ movprfx(z21, z22); 3386 __ sqdmlslt(z21.VnD(), z23.VnS(), z21.VnS()); 3387 3388 __ movprfx(z21, z22); 3389 __ sqdmlslt(z21.VnD(), z21.VnS(), z0.VnS(), 0); 3390 3391 __ movprfx(z1, z22); 3392 __ sqdmlslt(z21.VnS(), z23.VnH(), z1.VnH(), 0); 3393 3394 __ movprfx(z21.VnB(), p0.Merging(), z22.VnB()); 3395 __ sqneg(z21.VnB(), p0.Merging(), z21.VnB()); 3396 3397 __ movprfx(z31, z0); 3398 __ sqrdcmlah(z31.VnB(), z15.VnB(), z31.VnB(), 0); 3399 3400 __ movprfx(z31, z0); 3401 __ sqrdcmlah(z31.VnH(), z31.VnH(), z2.VnH(), 0, 0); 3402 3403 __ movprfx(z31, z0); 3404 __ sqrdcmlah(z31.VnS(), z31.VnS(), z2.VnS(), 0, 0); 3405 3406 __ movprfx(z27, z28); 3407 __ sqrdmlah(z27.VnB(), z27.VnB(), z19.VnB()); 3408 3409 __ movprfx(z27, z28); 3410 __ sqrdmlah(z27.VnH(), z27.VnH(), z1.VnH(), 0); 3411 3412 __ movprfx(z27, z28); 3413 __ sqrdmlah(z27.VnS(), z27.VnS(), z1.VnS(), 0); 3414 3415 __ movprfx(z27, z28); 3416 __ sqrdmlah(z27.VnD(), z27.VnD(), z1.VnD(), 0); 3417 3418 __ movprfx(z11, z12); 3419 __ sqrdmlsh(z11.VnB(), z16.VnB(), z11.VnB()); 3420 3421 __ movprfx(z11, z12); 3422 __ sqrdmlsh(z11.VnH(), z11.VnH(), z1.VnH(), 0); 3423 3424 __ movprfx(z11, z12); 3425 __ sqrdmlsh(z11.VnS(), z11.VnS(), z1.VnS(), 0); 3426 3427 __ movprfx(z11, z12); 3428 __ sqrdmlsh(z11.VnD(), z11.VnD(), z1.VnD(), 0); 3429 3430 __ movprfx(z31.VnB(), p5.Merging(), z0.VnB()); 3431 __ sqrshl(z31.VnB(), p5.Merging(), z31.VnB(), z31.VnB()); 3432 3433 __ movprfx(z25.VnB(), p6.Merging(), z26.VnB()); 3434 __ sqrshlr(z25.VnB(), p6.Merging(), z25.VnB(), z25.VnB()); 3435 3436 __ movprfx(z0.VnB(), p5.Merging(), z1.VnB()); 3437 __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), z0.VnB()); 3438 3439 __ movprfx(z7.VnB(), p3.Merging(), z8.VnB()); 3440 __ sqshlr(z7.VnB(), p3.Merging(), z7.VnB(), z7.VnB()); 3441 3442 __ movprfx(z16.VnB(), p7.Merging(), z17.VnB()); 3443 __ sqsub(z16.VnB(), p7.Merging(), z16.VnB(), z16.VnB()); 3444 3445 __ movprfx(z16.VnB(), p7.Merging(), z17.VnB()); 3446 __ sqsubr(z16.VnB(), p7.Merging(), z16.VnB(), z16.VnB()); 3447 3448 __ movprfx(z23.VnB(), p4.Merging(), z24.VnB()); 3449 __ srhadd(z23.VnB(), p4.Merging(), z23.VnB(), z23.VnB()); 3450 3451 __ movprfx(z31.VnB(), p7.Merging(), z0.VnB()); 3452 __ srshl(z31.VnB(), p7.Merging(), z31.VnB(), z31.VnB()); 3453 3454 __ movprfx(z16.VnB(), p7.Merging(), z17.VnB()); 3455 __ srshlr(z16.VnB(), p7.Merging(), z16.VnB(), z16.VnB()); 3456 3457 __ movprfx(z0, z1); 3458 __ srsra(z0.VnB(), z0.VnB(), 1); 3459 3460 __ movprfx(z0, z1); 3461 __ ssra(z0.VnB(), z0.VnB(), 1); 3462 3463 __ movprfx(z26.VnB(), p2.Merging(), z27.VnB()); 3464 __ suqadd(z26.VnB(), p2.Merging(), z26.VnB(), z26.VnB()); 3465 3466 __ movprfx(z23, z24); 3467 __ uaba(z23.VnB(), z22.VnB(), z23.VnB()); 3468 3469 __ movprfx(z11, z12); 3470 __ uabalb(z11.VnD(), z25.VnS(), z11.VnS()); 3471 3472 __ movprfx(z4, z5); 3473 __ uabalt(z4.VnD(), z4.VnS(), z31.VnS()); 3474 3475 __ movprfx(z20.VnD(), p4.Merging(), z21.VnD()); 3476 __ uadalp(z20.VnD(), p4.Merging(), z20.VnS()); 3477 3478 __ movprfx(z21.VnB(), p2.Merging(), z22.VnB()); 3479 __ uhadd(z21.VnB(), p2.Merging(), z21.VnB(), z21.VnB()); 3480 3481 __ movprfx(z1.VnB(), p4.Merging(), z2.VnB()); 3482 __ uhsub(z1.VnB(), p4.Merging(), z1.VnB(), z1.VnB()); 3483 3484 __ movprfx(z18.VnB(), p0.Merging(), z19.VnB()); 3485 __ uhsubr(z18.VnB(), p0.Merging(), z18.VnB(), z18.VnB()); 3486 3487 __ movprfx(z7, z8); 3488 __ umaxp(z7.VnB(), p2.Merging(), z7.VnB(), z7.VnB()); 3489 3490 __ movprfx(z10, z11); 3491 __ uminp(z10.VnB(), p0.Merging(), z10.VnB(), z10.VnB()); 3492 3493 __ movprfx(z31, z0); 3494 __ umlalb(z31.VnD(), z9.VnS(), z31.VnS()); 3495 3496 __ movprfx(z31, z0); 3497 __ umlalb(z31.VnD(), z31.VnS(), z1.VnS(), 0); 3498 3499 __ movprfx(z31, z0); 3500 __ umlalb(z31.VnS(), z31.VnH(), z1.VnH(), 0); 3501 3502 __ movprfx(z11, z12); 3503 __ umlalt(z11.VnD(), z11.VnS(), z22.VnS()); 3504 3505 __ movprfx(z11, z12); 3506 __ umlalt(z11.VnD(), z11.VnS(), z2.VnS(), 0); 3507 3508 __ movprfx(z1, z12); 3509 __ umlalt(z1.VnS(), z5.VnH(), z1.VnH(), 0); 3510 3511 __ movprfx(z28, z29); 3512 __ umlslb(z28.VnD(), z28.VnS(), z9.VnS()); 3513 3514 __ movprfx(z28, z29); 3515 __ umlslb(z28.VnD(), z28.VnS(), z1.VnS(), 0); 3516 3517 __ movprfx(z28, z29); 3518 __ umlslb(z28.VnS(), z28.VnH(), z1.VnH(), 0); 3519 3520 __ movprfx(z9, z10); 3521 __ umlslt(z9.VnD(), z9.VnS(), z30.VnS()); 3522 3523 __ movprfx(z9, z10); 3524 __ umlslt(z9.VnD(), z9.VnS(), z0.VnS(), 0); 3525 3526 __ movprfx(z9, z10); 3527 __ umlslt(z9.VnS(), z9.VnH(), z0.VnH(), 0); 3528 3529 __ movprfx(z24.VnB(), p7.Merging(), z25.VnB()); 3530 __ uqadd(z24.VnB(), p7.Merging(), z24.VnB(), z24.VnB()), 3531 3532 __ movprfx(z20.VnB(), p1.Merging(), z21.VnB()); 3533 __ uqrshl(z20.VnB(), p1.Merging(), z20.VnB(), z20.VnB()); 3534 3535 __ movprfx(z8.VnB(), p5.Merging(), z9.VnB()); 3536 __ uqrshlr(z8.VnB(), p5.Merging(), z8.VnB(), z8.VnB()); 3537 3538 __ movprfx(z29.VnB(), p7.Merging(), z30.VnB()); 3539 __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), z29.VnB()); 3540 3541 __ movprfx(z12.VnB(), p1.Merging(), z13.VnB()); 3542 __ uqshlr(z12.VnB(), p1.Merging(), z12.VnB(), z12.VnB()); 3543 3544 __ movprfx(z20.VnB(), p0.Merging(), z21.VnB()); 3545 __ uqsub(z20.VnB(), p0.Merging(), z20.VnB(), z20.VnB()); 3546 3547 __ movprfx(z20.VnB(), p0.Merging(), z21.VnB()); 3548 __ uqsubr(z20.VnB(), p0.Merging(), z20.VnB(), z20.VnB()); 3549 3550 __ movprfx(z25.VnS(), p7.Merging(), z26.VnS()); 3551 __ urecpe(z25.VnS(), p7.Merging(), z25.VnS()); 3552 3553 __ movprfx(z29.VnB(), p4.Merging(), z30.VnB()); 3554 __ urhadd(z29.VnB(), p4.Merging(), z29.VnB(), z29.VnB()); 3555 3556 __ movprfx(z15.VnB(), p2.Merging(), z16.VnB()); 3557 __ urshl(z15.VnB(), p2.Merging(), z15.VnB(), z15.VnB()); 3558 3559 __ movprfx(z27.VnB(), p1.Merging(), z28.VnB()); 3560 __ urshlr(z27.VnB(), p1.Merging(), z27.VnB(), z27.VnB()); 3561 3562 __ movprfx(z4.VnS(), p3.Merging(), z5.VnS()); 3563 __ ursqrte(z4.VnS(), p3.Merging(), z4.VnS()); 3564 3565 __ movprfx(z0, z1); 3566 __ ursra(z0.VnB(), z0.VnB(), 1); 3567 3568 __ movprfx(z25.VnB(), p4.Merging(), z26.VnB()); 3569 __ usqadd(z25.VnB(), p4.Merging(), z25.VnB(), z25.VnB()); 3570 3571 __ movprfx(z0, z1); 3572 __ usra(z0.VnB(), z0.VnB(), 1); 3573 3574 __ movprfx(z16, z17); 3575 __ xar(z16.VnB(), z16.VnB(), z16.VnB(), 1); 3576 } 3577 assm.FinalizeCode(); 3578 3579 CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); 3580} 3581 3582TEST(movprfx_negative_lane_size_sve2) { 3583 Assembler assm; 3584 assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVE2); 3585 { 3586 // We have to use the Assembler directly to generate movprfx, so we need 3587 // to manually reserve space for the code we're about to emit. 3588 static const size_t kPairCount = 140; 3589 CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize); 3590 3591 __ movprfx(z14.VnS(), p4.Merging(), z15.VnS()); 3592 __ fcvtx(z14.VnS(), p4.Merging(), z0.VnD()); 3593 3594 __ movprfx(z15.VnS(), p0.Merging(), z16.VnS()); 3595 __ flogb(z15.VnH(), p0.Merging(), z3.VnH()); 3596 3597 __ movprfx(z19.VnB(), p5.Merging(), z20.VnB()); 3598 __ sadalp(z19.VnD(), p5.Merging(), z9.VnS()); 3599 3600 __ movprfx(z20.VnH(), p3.Merging(), z21.VnH()); 3601 __ shadd(z20.VnB(), p3.Merging(), z20.VnB(), z7.VnB()); 3602 3603 __ movprfx(z21.VnH(), p0.Merging(), z22.VnH()); 3604 __ shsub(z21.VnB(), p0.Merging(), z21.VnB(), z0.VnB()); 3605 3606 __ movprfx(z1.VnS(), p0.Merging(), z2.VnS()); 3607 __ shsubr(z1.VnB(), p0.Merging(), z1.VnB(), z2.VnB()); 3608 3609 __ movprfx(z29.VnD(), p1.Merging(), z30.VnD()); 3610 __ sqabs(z29.VnB(), p1.Merging(), z18.VnB()); 3611 3612 __ movprfx(z28.VnH(), p0.Merging(), z29.VnH()); 3613 __ sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z3.VnB()); 3614 3615 __ movprfx(z21.VnH(), p0.Merging(), z22.VnH()); 3616 __ sqneg(z21.VnB(), p0.Merging(), z17.VnB()); 3617 3618 __ movprfx(z31.VnS(), p5.Merging(), z0.VnS()); 3619 __ sqrshl(z31.VnB(), p5.Merging(), z31.VnB(), z27.VnB()); 3620 3621 __ movprfx(z25.VnD(), p6.Merging(), z26.VnD()); 3622 __ sqrshlr(z25.VnB(), p6.Merging(), z25.VnB(), z7.VnB()); 3623 3624 __ movprfx(z0.VnH(), p5.Merging(), z1.VnH()); 3625 __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 0); 3626 3627 __ movprfx(z0.VnS(), p5.Merging(), z1.VnS()); 3628 __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), z2.VnB()); 3629 3630 __ movprfx(z7.VnD(), p3.Merging(), z8.VnD()); 3631 __ sqshlr(z7.VnB(), p3.Merging(), z7.VnB(), z5.VnB()); 3632 3633 __ movprfx(z10.VnH(), p1.Merging(), z11.VnH()); 3634 __ sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 0); 3635 3636 __ movprfx(z16.VnH(), p7.Merging(), z17.VnH()); 3637 __ sqsub(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB()); 3638 3639 __ movprfx(z16.VnS(), p7.Merging(), z17.VnS()); 3640 __ sqsubr(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB()); 3641 3642 __ movprfx(z23.VnD(), p4.Merging(), z24.VnD()); 3643 __ srhadd(z23.VnB(), p4.Merging(), z23.VnB(), z14.VnB()); 3644 3645 __ movprfx(z31.VnH(), p7.Merging(), z0.VnH()); 3646 __ srshl(z31.VnB(), p7.Merging(), z31.VnB(), z3.VnB()); 3647 3648 __ movprfx(z16.VnH(), p7.Merging(), z17.VnH()); 3649 __ srshlr(z16.VnB(), p7.Merging(), z16.VnB(), z29.VnB()); 3650 3651 __ movprfx(z12.VnH(), p0.Merging(), z13.VnH()); 3652 __ srshr(z12.VnB(), p0.Merging(), z12.VnB(), 1); 3653 3654 __ movprfx(z26.VnH(), p2.Merging(), z27.VnH()); 3655 __ suqadd(z26.VnB(), p2.Merging(), z26.VnB(), z28.VnB()); 3656 3657 __ movprfx(z20.VnB(), p4.Merging(), z21.VnB()); 3658 __ uadalp(z20.VnD(), p4.Merging(), z5.VnS()); 3659 3660 __ movprfx(z21.VnH(), p2.Merging(), z22.VnH()); 3661 __ uhadd(z21.VnB(), p2.Merging(), z21.VnB(), z19.VnB()); 3662 3663 __ movprfx(z1.VnH(), p4.Merging(), z2.VnH()); 3664 __ uhsub(z1.VnB(), p4.Merging(), z1.VnB(), z9.VnB()); 3665 3666 __ movprfx(z18.VnH(), p0.Merging(), z19.VnH()); 3667 __ uhsubr(z18.VnB(), p0.Merging(), z18.VnB(), z1.VnB()); 3668 3669 __ movprfx(z24.VnH(), p7.Merging(), z25.VnH()); 3670 __ uqadd(z24.VnB(), p7.Merging(), z24.VnB(), z1.VnB()), 3671 3672 __ movprfx(z20.VnS(), p1.Merging(), z21.VnS()); 3673 __ uqrshl(z20.VnB(), p1.Merging(), z20.VnB(), z30.VnB()); 3674 3675 __ movprfx(z8.VnS(), p5.Merging(), z9.VnS()); 3676 __ uqrshlr(z8.VnB(), p5.Merging(), z8.VnB(), z9.VnB()); 3677 3678 __ movprfx(z29.VnS(), p7.Merging(), z30.VnS()); 3679 __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 0); 3680 3681 __ movprfx(z29.VnS(), p7.Merging(), z30.VnS()); 3682 __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), z30.VnB()); 3683 3684 __ movprfx(z12.VnS(), p1.Merging(), z13.VnS()); 3685 __ uqshlr(z12.VnB(), p1.Merging(), z12.VnB(), z13.VnB()); 3686 3687 __ movprfx(z20.VnS(), p0.Merging(), z21.VnS()); 3688 __ uqsub(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB()); 3689 3690 __ movprfx(z20.VnS(), p0.Merging(), z21.VnS()); 3691 __ uqsubr(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB()); 3692 3693 __ movprfx(z25.VnB(), p7.Merging(), z26.VnB()); 3694 __ urecpe(z25.VnS(), p7.Merging(), z2.VnS()); 3695 3696 __ movprfx(z29.VnD(), p4.Merging(), z30.VnD()); 3697 __ urhadd(z29.VnB(), p4.Merging(), z29.VnB(), z10.VnB()); 3698 3699 __ movprfx(z15.VnD(), p2.Merging(), z16.VnD()); 3700 __ urshl(z15.VnB(), p2.Merging(), z15.VnB(), z3.VnB()); 3701 3702 __ movprfx(z27.VnD(), p1.Merging(), z28.VnD()); 3703 __ urshlr(z27.VnB(), p1.Merging(), z27.VnB(), z30.VnB()); 3704 3705 __ movprfx(z31.VnD(), p2.Merging(), z0.VnD()); 3706 __ urshr(z31.VnB(), p2.Merging(), z31.VnB(), 1); 3707 3708 __ movprfx(z4.VnH(), p3.Merging(), z5.VnH()); 3709 __ ursqrte(z4.VnS(), p3.Merging(), z3.VnS()); 3710 3711 __ movprfx(z25.VnD(), p4.Merging(), z26.VnD()); 3712 __ usqadd(z25.VnB(), p4.Merging(), z25.VnB(), z6.VnB()); 3713 } 3714 assm.FinalizeCode(); 3715 3716 CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false); 3717} 3718 3719} // namespace aarch64 3720} // namespace vixl 3721