1// Copyright 2015, VIXL authors 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are met: 6// 7// * Redistributions of source code must retain the above copyright notice, 8// this list of conditions and the following disclaimer. 9// * Redistributions in binary form must reproduce the above copyright notice, 10// this list of conditions and the following disclaimer in the documentation 11// and/or other materials provided with the distribution. 12// * Neither the name of ARM Limited nor the names of its contributors may be 13// used to endorse or promote products derived from this software without 14// specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27#include <cfloat> 28#include <cstdio> 29#include <sstream> 30 31#include "test-runner.h" 32#include "test-utils.h" 33 34#include "aarch64/cpu-features-auditor-aarch64.h" 35#include "aarch64/macro-assembler-aarch64.h" 36#include "aarch64/simulator-aarch64.h" 37#include "aarch64/test-simulator-inputs-aarch64.h" 38#include "aarch64/test-simulator-traces-aarch64.h" 39#include "aarch64/test-utils-aarch64.h" 40 41namespace vixl { 42namespace aarch64 { 43 44// ==== Simulator Tests ==== 45// 46// These simulator tests check instruction behaviour against a trace taken from 47// real AArch64 hardware. The same test code is used to generate the trace; the 48// results are printed to stdout when the test is run with 49// --generate_test_trace. 50// 51// The input lists and expected results are stored in test/traces. The expected 52// results can be regenerated using tools/generate_simulator_traces.py. Adding a 53// test for a new instruction is described at the top of 54// test-simulator-traces-aarch64.h. 55 56#define __ masm. 57#define TEST(name) TEST_(AARCH64_SIM_##name) 58 59#define SETUP() SETUP_WITH_FEATURES(CPUFeatures()) 60 61#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 62 63#define SETUP_WITH_FEATURES(...) \ 64 MacroAssembler masm; \ 65 masm.SetCPUFeatures(CPUFeatures(__VA_ARGS__)); \ 66 Decoder decoder; \ 67 Simulator simulator(&decoder); \ 68 simulator.SetColouredTrace(Test::coloured_trace()); 69 70#define START() \ 71 masm.Reset(); \ 72 simulator.ResetState(); \ 73 __ PushCalleeSavedRegisters(); \ 74 /* The infrastructure code hasn't been covered at the moment, e.g. */ \ 75 /* prologue/epilogue. Suppress tagging mis-match exception before */ \ 76 /* this point. */ \ 77 if (masm.GetCPUFeatures()->Has(CPUFeatures::kMTE)) { \ 78 __ Hlt(DebugHltOpcode::kMTEActive); \ 79 } \ 80 if (Test::trace_reg()) { \ 81 __ Trace(LOG_STATE, TRACE_ENABLE); \ 82 } \ 83 if (Test::trace_write()) { \ 84 __ Trace(LOG_WRITE, TRACE_ENABLE); \ 85 } \ 86 if (Test::trace_sim()) { \ 87 __ Trace(LOG_DISASM, TRACE_ENABLE); \ 88 } 89 90#define END() \ 91 if (masm.GetCPUFeatures()->Has(CPUFeatures::kMTE)) { \ 92 __ Hlt(DebugHltOpcode::kMTEInactive); \ 93 } \ 94 __ Trace(LOG_ALL, TRACE_DISABLE); \ 95 __ PopCalleeSavedRegisters(); \ 96 __ Ret(); \ 97 masm.FinalizeCode() 98 99#define TRY_RUN(skipped) \ 100 DISASSEMBLE(); \ 101 simulator.RunFrom(masm.GetBuffer()->GetStartAddress<Instruction*>()); \ 102 /* The simulator can run every test. */ \ 103 *skipped = false 104 105 106#else // VIXL_INCLUDE_SIMULATOR_AARCH64 107 108#define SETUP_WITH_FEATURES(...) \ 109 MacroAssembler masm; \ 110 masm.SetCPUFeatures(CPUFeatures(__VA_ARGS__)); \ 111 CPU::SetUp() 112 113#define START() \ 114 masm.Reset(); \ 115 __ PushCalleeSavedRegisters() 116 117#define END() \ 118 __ PopCalleeSavedRegisters(); \ 119 __ Ret(); \ 120 masm.FinalizeCode() 121 122#define TRY_RUN(skipped) \ 123 DISASSEMBLE(); \ 124 /* If the test uses features that the current CPU doesn't support, don't */ \ 125 /* attempt to run it natively. */ \ 126 { \ 127 Decoder decoder; \ 128 /* TODO: Once available, use runtime feature detection. The use of */ \ 129 /* AArch64LegacyBaseline is a stopgap. */ \ 130 const CPUFeatures& this_machine = CPUFeatures::AArch64LegacyBaseline(); \ 131 CPUFeaturesAuditor auditor(&decoder, this_machine); \ 132 CodeBuffer* buffer = masm.GetBuffer(); \ 133 decoder.Decode(buffer->GetStartAddress<Instruction*>(), \ 134 buffer->GetEndAddress<Instruction*>()); \ 135 const CPUFeatures& requirements = auditor.GetSeenFeatures(); \ 136 if (this_machine.Has(requirements)) { \ 137 masm.GetBuffer()->SetExecutable(); \ 138 ExecuteMemory(buffer->GetStartAddress<byte*>(), \ 139 masm.GetSizeOfCodeGenerated()); \ 140 masm.GetBuffer()->SetWritable(); \ 141 *skipped = false; \ 142 } else { \ 143 std::stringstream os; \ 144 /* Note: This message needs to match REGEXP_MISSING_FEATURES from */ \ 145 /* tools/threaded_test.py. */ \ 146 os << "SKIPPED: Missing features: { "; \ 147 os << requirements.Without(this_machine) << " }\n"; \ 148 printf("%s", os.str().c_str()); \ 149 *skipped = true; \ 150 } \ 151 } 152 153 154#endif // VIXL_INCLUDE_SIMULATOR_AARCH64 155 156 157#define DISASSEMBLE() \ 158 if (Test::disassemble()) { \ 159 PrintDisassembler disasm(stdout); \ 160 CodeBuffer* buffer = masm.GetBuffer(); \ 161 Instruction* start = buffer->GetStartAddress<Instruction*>(); \ 162 Instruction* end = buffer->GetEndAddress<Instruction*>(); \ 163 disasm.DisassembleBuffer(start, end); \ 164 } 165 166// The maximum number of errors to report in detail for each test. 167static const unsigned kErrorReportLimit = 8; 168 169 170// Overloaded versions of RawbitsToDouble and RawbitsToFloat for use in the 171// templated test functions. 172static float rawbits_to_fp(uint32_t bits) { return RawbitsToFloat(bits); } 173 174static double rawbits_to_fp(uint64_t bits) { return RawbitsToDouble(bits); } 175 176// The rawbits_to_fp functions are only used for printing decimal values so we 177// just approximate FP16 as double. 178static double rawbits_to_fp(uint16_t bits) { 179 return FPToDouble(RawbitsToFloat16(bits), kIgnoreDefaultNaN); 180} 181 182 183// MacroAssembler member function pointers to pass to the test dispatchers. 184typedef void (MacroAssembler::*Test1OpFPHelper_t)(const VRegister& fd, 185 const VRegister& fn); 186typedef void (MacroAssembler::*Test2OpFPHelper_t)(const VRegister& fd, 187 const VRegister& fn, 188 const VRegister& fm); 189typedef void (MacroAssembler::*Test3OpFPHelper_t)(const VRegister& fd, 190 const VRegister& fn, 191 const VRegister& fm, 192 const VRegister& fa); 193typedef void (MacroAssembler::*TestFPCmpHelper_t)(const VRegister& fn, 194 const VRegister& fm); 195typedef void (MacroAssembler::*TestFPCmpZeroHelper_t)(const VRegister& fn, 196 double value); 197typedef void (MacroAssembler::*TestFPToIntHelper_t)(const Register& rd, 198 const VRegister& fn); 199typedef void (MacroAssembler::*TestFPToFixedHelper_t)(const Register& rd, 200 const VRegister& fn, 201 int fbits); 202typedef void (MacroAssembler::*TestFixedToFPHelper_t)(const VRegister& fd, 203 const Register& rn, 204 int fbits); 205// TODO: 'Test2OpNEONHelper_t' and 'Test2OpFPHelper_t' can be 206// consolidated into one routine. 207typedef void (MacroAssembler::*Test1OpNEONHelper_t)(const VRegister& vd, 208 const VRegister& vn); 209typedef void (MacroAssembler::*Test2OpNEONHelper_t)(const VRegister& vd, 210 const VRegister& vn, 211 const VRegister& vm); 212typedef void (MacroAssembler::*TestByElementNEONHelper_t)(const VRegister& vd, 213 const VRegister& vn, 214 const VRegister& vm, 215 int vm_index); 216typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)( 217 const VRegister& vd, int imm1, const VRegister& vn, int imm2); 218 219// This helps using the same typename for both the function pointer 220// and the array of immediates passed to helper routines. 221template <typename T> 222class Test2OpImmediateNEONHelper_t { 223 public: 224 typedef void (MacroAssembler::*mnemonic)(const VRegister& vd, 225 const VRegister& vn, 226 T imm); 227}; 228 229 230// Maximum number of hex characters required to represent values of either 231// templated type. 232template <typename Ta, typename Tb> 233static unsigned MaxHexCharCount() { 234 unsigned count = static_cast<unsigned>(std::max(sizeof(Ta), sizeof(Tb))); 235 return (count * 8) / 4; 236} 237 238 239// Standard test dispatchers. 240 241 242static void Test1Op_Helper(Test1OpFPHelper_t helper, 243 uintptr_t inputs, 244 unsigned inputs_length, 245 uintptr_t results, 246 unsigned d_size, 247 unsigned n_size, 248 bool* skipped) { 249 VIXL_ASSERT((d_size == kDRegSize) || (d_size == kSRegSize) || 250 (d_size == kHRegSize)); 251 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) || 252 (n_size == kHRegSize)); 253 254 CPUFeatures features; 255 features.Combine(CPUFeatures::kFP, CPUFeatures::kFPHalf); 256 // For frint{32,64}{x,y} variants. 257 features.Combine(CPUFeatures::kFrintToFixedSizedInt); 258 SETUP_WITH_FEATURES(features); 259 START(); 260 261 // Roll up the loop to keep the code size down. 262 Label loop_n; 263 264 Register out = x0; 265 Register inputs_base = x1; 266 Register length = w2; 267 Register index_n = w3; 268 269 int n_index_shift; 270 VRegister fd; 271 VRegister fn; 272 if (n_size == kDRegSize) { 273 n_index_shift = kDRegSizeInBytesLog2; 274 fn = d1; 275 } else if (n_size == kSRegSize) { 276 n_index_shift = kSRegSizeInBytesLog2; 277 fn = s1; 278 } else { 279 n_index_shift = kHRegSizeInBytesLog2; 280 fn = h1; 281 } 282 283 if (d_size == kDRegSize) { 284 fd = d0; 285 } else if (d_size == kSRegSize) { 286 fd = s0; 287 } else { 288 fd = h0; 289 } 290 291 292 __ Mov(out, results); 293 __ Mov(inputs_base, inputs); 294 __ Mov(length, inputs_length); 295 296 __ Mov(index_n, 0); 297 __ Bind(&loop_n); 298 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift)); 299 300 { 301 SingleEmissionCheckScope guard(&masm); 302 (masm.*helper)(fd, fn); 303 } 304 __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex)); 305 306 __ Add(index_n, index_n, 1); 307 __ Cmp(index_n, inputs_length); 308 __ B(lo, &loop_n); 309 310 END(); 311 TRY_RUN(skipped); 312} 313 314 315// Test FP instructions. The inputs[] and expected[] arrays should be arrays of 316// rawbits representations of doubles or floats. This ensures that exact bit 317// comparisons can be performed. 318template <typename Tn, typename Td> 319static void Test1Op(const char* name, 320 Test1OpFPHelper_t helper, 321 const Tn inputs[], 322 unsigned inputs_length, 323 const Td expected[], 324 unsigned expected_length) { 325 VIXL_ASSERT(inputs_length > 0); 326 327 const unsigned results_length = inputs_length; 328 Td* results = new Td[results_length]; 329 330 const unsigned d_bits = sizeof(Td) * 8; 331 const unsigned n_bits = sizeof(Tn) * 8; 332 bool skipped; 333 334 Test1Op_Helper(helper, 335 reinterpret_cast<uintptr_t>(inputs), 336 inputs_length, 337 reinterpret_cast<uintptr_t>(results), 338 d_bits, 339 n_bits, 340 &skipped); 341 342 if (Test::generate_test_trace()) { 343 // Print the results. 344 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name); 345 for (unsigned d = 0; d < results_length; d++) { 346 printf(" 0x%0*" PRIx64 ",\n", 347 d_bits / 4, 348 static_cast<uint64_t>(results[d])); 349 } 350 printf("};\n"); 351 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 352 } else if (!skipped) { 353 // Check the results. 354 VIXL_CHECK(expected_length == results_length); 355 unsigned error_count = 0; 356 unsigned d = 0; 357 for (unsigned n = 0; n < inputs_length; n++, d++) { 358 if (results[d] != expected[d]) { 359 if (++error_count > kErrorReportLimit) continue; 360 361 printf("%s 0x%0*" PRIx64 " (%s %g):\n", 362 name, 363 n_bits / 4, 364 static_cast<uint64_t>(inputs[n]), 365 name, 366 rawbits_to_fp(inputs[n])); 367 printf(" Expected: 0x%0*" PRIx64 " (%g)\n", 368 d_bits / 4, 369 static_cast<uint64_t>(expected[d]), 370 rawbits_to_fp(expected[d])); 371 printf(" Found: 0x%0*" PRIx64 " (%g)\n", 372 d_bits / 4, 373 static_cast<uint64_t>(results[d]), 374 rawbits_to_fp(results[d])); 375 printf("\n"); 376 } 377 } 378 VIXL_ASSERT(d == expected_length); 379 if (error_count > kErrorReportLimit) { 380 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 381 } 382 VIXL_CHECK(error_count == 0); 383 } 384 delete[] results; 385} 386 387 388static void Test2Op_Helper(Test2OpFPHelper_t helper, 389 uintptr_t inputs, 390 unsigned inputs_length, 391 uintptr_t results, 392 unsigned reg_size, 393 bool* skipped) { 394 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize) || 395 (reg_size == kHRegSize)); 396 397 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf); 398 START(); 399 400 // Roll up the loop to keep the code size down. 401 Label loop_n, loop_m; 402 403 Register out = x0; 404 Register inputs_base = x1; 405 Register length = w2; 406 Register index_n = w3; 407 Register index_m = w4; 408 409 bool double_op = reg_size == kDRegSize; 410 bool float_op = reg_size == kSRegSize; 411 int index_shift; 412 if (double_op) { 413 index_shift = kDRegSizeInBytesLog2; 414 } else if (float_op) { 415 index_shift = kSRegSizeInBytesLog2; 416 } else { 417 index_shift = kHRegSizeInBytesLog2; 418 } 419 420 VRegister fd; 421 VRegister fn; 422 VRegister fm; 423 424 if (double_op) { 425 fd = d0; 426 fn = d1; 427 fm = d2; 428 } else if (float_op) { 429 fd = s0; 430 fn = s1; 431 fm = s2; 432 } else { 433 fd = h0; 434 fn = h1; 435 fm = h2; 436 } 437 438 __ Mov(out, results); 439 __ Mov(inputs_base, inputs); 440 __ Mov(length, inputs_length); 441 442 __ Mov(index_n, 0); 443 __ Bind(&loop_n); 444 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift)); 445 446 __ Mov(index_m, 0); 447 __ Bind(&loop_m); 448 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift)); 449 450 { 451 SingleEmissionCheckScope guard(&masm); 452 (masm.*helper)(fd, fn, fm); 453 } 454 __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex)); 455 456 __ Add(index_m, index_m, 1); 457 __ Cmp(index_m, inputs_length); 458 __ B(lo, &loop_m); 459 460 __ Add(index_n, index_n, 1); 461 __ Cmp(index_n, inputs_length); 462 __ B(lo, &loop_n); 463 464 END(); 465 TRY_RUN(skipped); 466} 467 468 469// Test FP instructions. The inputs[] and expected[] arrays should be arrays of 470// rawbits representations of doubles or floats. This ensures that exact bit 471// comparisons can be performed. 472template <typename T> 473static void Test2Op(const char* name, 474 Test2OpFPHelper_t helper, 475 const T inputs[], 476 unsigned inputs_length, 477 const T expected[], 478 unsigned expected_length) { 479 VIXL_ASSERT(inputs_length > 0); 480 481 const unsigned results_length = inputs_length * inputs_length; 482 T* results = new T[results_length]; 483 484 const unsigned bits = sizeof(T) * 8; 485 bool skipped; 486 487 Test2Op_Helper(helper, 488 reinterpret_cast<uintptr_t>(inputs), 489 inputs_length, 490 reinterpret_cast<uintptr_t>(results), 491 bits, 492 &skipped); 493 494 if (Test::generate_test_trace()) { 495 // Print the results. 496 printf("const uint%u_t kExpected_%s[] = {\n", bits, name); 497 for (unsigned d = 0; d < results_length; d++) { 498 printf(" 0x%0*" PRIx64 ",\n", 499 bits / 4, 500 static_cast<uint64_t>(results[d])); 501 } 502 printf("};\n"); 503 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 504 } else if (!skipped) { 505 // Check the results. 506 VIXL_CHECK(expected_length == results_length); 507 unsigned error_count = 0; 508 unsigned d = 0; 509 for (unsigned n = 0; n < inputs_length; n++) { 510 for (unsigned m = 0; m < inputs_length; m++, d++) { 511 if (results[d] != expected[d]) { 512 if (++error_count > kErrorReportLimit) continue; 513 514 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n", 515 name, 516 bits / 4, 517 static_cast<uint64_t>(inputs[n]), 518 bits / 4, 519 static_cast<uint64_t>(inputs[m]), 520 name, 521 rawbits_to_fp(inputs[n]), 522 rawbits_to_fp(inputs[m])); 523 printf(" Expected: 0x%0*" PRIx64 " (%g)\n", 524 bits / 4, 525 static_cast<uint64_t>(expected[d]), 526 rawbits_to_fp(expected[d])); 527 printf(" Found: 0x%0*" PRIx64 " (%g)\n", 528 bits / 4, 529 static_cast<uint64_t>(results[d]), 530 rawbits_to_fp(results[d])); 531 printf("\n"); 532 } 533 } 534 } 535 VIXL_ASSERT(d == expected_length); 536 if (error_count > kErrorReportLimit) { 537 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 538 } 539 VIXL_CHECK(error_count == 0); 540 } 541 delete[] results; 542} 543 544 545static void Test3Op_Helper(Test3OpFPHelper_t helper, 546 uintptr_t inputs, 547 unsigned inputs_length, 548 uintptr_t results, 549 unsigned reg_size, 550 bool* skipped) { 551 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize) || 552 (reg_size == kHRegSize)); 553 554 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf); 555 START(); 556 557 // Roll up the loop to keep the code size down. 558 Label loop_n, loop_m, loop_a; 559 560 Register out = x0; 561 Register inputs_base = x1; 562 Register length = w2; 563 Register index_n = w3; 564 Register index_m = w4; 565 Register index_a = w5; 566 567 bool double_op = reg_size == kDRegSize; 568 bool single_op = reg_size == kSRegSize; 569 int index_shift; 570 VRegister fd(0, reg_size); 571 VRegister fn(1, reg_size); 572 VRegister fm(2, reg_size); 573 VRegister fa(3, reg_size); 574 if (double_op) { 575 index_shift = kDRegSizeInBytesLog2; 576 } else if (single_op) { 577 index_shift = kSRegSizeInBytesLog2; 578 } else { 579 index_shift = kHRegSizeInBytesLog2; 580 } 581 582 __ Mov(out, results); 583 __ Mov(inputs_base, inputs); 584 __ Mov(length, inputs_length); 585 586 __ Mov(index_n, 0); 587 __ Bind(&loop_n); 588 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift)); 589 590 __ Mov(index_m, 0); 591 __ Bind(&loop_m); 592 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift)); 593 594 __ Mov(index_a, 0); 595 __ Bind(&loop_a); 596 __ Ldr(fa, MemOperand(inputs_base, index_a, UXTW, index_shift)); 597 598 { 599 SingleEmissionCheckScope guard(&masm); 600 (masm.*helper)(fd, fn, fm, fa); 601 } 602 __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex)); 603 604 __ Add(index_a, index_a, 1); 605 __ Cmp(index_a, inputs_length); 606 __ B(lo, &loop_a); 607 608 __ Add(index_m, index_m, 1); 609 __ Cmp(index_m, inputs_length); 610 __ B(lo, &loop_m); 611 612 __ Add(index_n, index_n, 1); 613 __ Cmp(index_n, inputs_length); 614 __ B(lo, &loop_n); 615 616 END(); 617 TRY_RUN(skipped); 618} 619 620 621// Test FP instructions. The inputs[] and expected[] arrays should be arrays of 622// rawbits representations of doubles or floats. This ensures that exact bit 623// comparisons can be performed. 624template <typename T> 625static void Test3Op(const char* name, 626 Test3OpFPHelper_t helper, 627 const T inputs[], 628 unsigned inputs_length, 629 const T expected[], 630 unsigned expected_length) { 631 VIXL_ASSERT(inputs_length > 0); 632 633 const unsigned results_length = inputs_length * inputs_length * inputs_length; 634 T* results = new T[results_length]; 635 636 const unsigned bits = sizeof(T) * 8; 637 bool skipped; 638 639 Test3Op_Helper(helper, 640 reinterpret_cast<uintptr_t>(inputs), 641 inputs_length, 642 reinterpret_cast<uintptr_t>(results), 643 bits, 644 &skipped); 645 646 if (Test::generate_test_trace()) { 647 // Print the results. 648 printf("const uint%u_t kExpected_%s[] = {\n", bits, name); 649 for (unsigned d = 0; d < results_length; d++) { 650 printf(" 0x%0*" PRIx64 ",\n", 651 bits / 4, 652 static_cast<uint64_t>(results[d])); 653 } 654 printf("};\n"); 655 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 656 } else if (!skipped) { 657 // Check the results. 658 VIXL_CHECK(expected_length == results_length); 659 unsigned error_count = 0; 660 unsigned d = 0; 661 for (unsigned n = 0; n < inputs_length; n++) { 662 for (unsigned m = 0; m < inputs_length; m++) { 663 for (unsigned a = 0; a < inputs_length; a++, d++) { 664 if (results[d] != expected[d]) { 665 if (++error_count > kErrorReportLimit) continue; 666 667 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 ", 0x%0*" PRIx64 668 " (%s %g %g %g):\n", 669 name, 670 bits / 4, 671 static_cast<uint64_t>(inputs[n]), 672 bits / 4, 673 static_cast<uint64_t>(inputs[m]), 674 bits / 4, 675 static_cast<uint64_t>(inputs[a]), 676 name, 677 rawbits_to_fp(inputs[n]), 678 rawbits_to_fp(inputs[m]), 679 rawbits_to_fp(inputs[a])); 680 printf(" Expected: 0x%0*" PRIx64 " (%g)\n", 681 bits / 4, 682 static_cast<uint64_t>(expected[d]), 683 rawbits_to_fp(expected[d])); 684 printf(" Found: 0x%0*" PRIx64 " (%g)\n", 685 bits / 4, 686 static_cast<uint64_t>(results[d]), 687 rawbits_to_fp(results[d])); 688 printf("\n"); 689 } 690 } 691 } 692 } 693 VIXL_ASSERT(d == expected_length); 694 if (error_count > kErrorReportLimit) { 695 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 696 } 697 VIXL_CHECK(error_count == 0); 698 } 699 delete[] results; 700} 701 702 703static void TestCmp_Helper(TestFPCmpHelper_t helper, 704 uintptr_t inputs, 705 unsigned inputs_length, 706 uintptr_t results, 707 unsigned reg_size, 708 bool* skipped) { 709 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize)); 710 711 SETUP_WITH_FEATURES(CPUFeatures::kFP); 712 START(); 713 714 // Roll up the loop to keep the code size down. 715 Label loop_n, loop_m; 716 717 Register out = x0; 718 Register inputs_base = x1; 719 Register length = w2; 720 Register index_n = w3; 721 Register index_m = w4; 722 Register flags = x5; 723 724 bool double_op = reg_size == kDRegSize; 725 const int index_shift = 726 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2; 727 728 VRegister fn = double_op ? d1 : s1; 729 VRegister fm = double_op ? d2 : s2; 730 731 __ Mov(out, results); 732 __ Mov(inputs_base, inputs); 733 __ Mov(length, inputs_length); 734 735 __ Mov(index_n, 0); 736 __ Bind(&loop_n); 737 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift)); 738 739 __ Mov(index_m, 0); 740 __ Bind(&loop_m); 741 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift)); 742 743 { 744 SingleEmissionCheckScope guard(&masm); 745 (masm.*helper)(fn, fm); 746 } 747 __ Mrs(flags, NZCV); 748 __ Ubfx(flags, flags, 28, 4); 749 __ Strb(flags, MemOperand(out, 1, PostIndex)); 750 751 __ Add(index_m, index_m, 1); 752 __ Cmp(index_m, inputs_length); 753 __ B(lo, &loop_m); 754 755 __ Add(index_n, index_n, 1); 756 __ Cmp(index_n, inputs_length); 757 __ B(lo, &loop_n); 758 759 END(); 760 TRY_RUN(skipped); 761} 762 763 764// Test FP instructions. The inputs[] and expected[] arrays should be arrays of 765// rawbits representations of doubles or floats. This ensures that exact bit 766// comparisons can be performed. 767template <typename T> 768static void TestCmp(const char* name, 769 TestFPCmpHelper_t helper, 770 const T inputs[], 771 unsigned inputs_length, 772 const uint8_t expected[], 773 unsigned expected_length) { 774 VIXL_ASSERT(inputs_length > 0); 775 776 const unsigned results_length = inputs_length * inputs_length; 777 uint8_t* results = new uint8_t[results_length]; 778 779 const unsigned bits = sizeof(T) * 8; 780 bool skipped; 781 782 TestCmp_Helper(helper, 783 reinterpret_cast<uintptr_t>(inputs), 784 inputs_length, 785 reinterpret_cast<uintptr_t>(results), 786 bits, 787 &skipped); 788 789 if (Test::generate_test_trace()) { 790 // Print the results. 791 printf("const uint8_t kExpected_%s[] = {\n", name); 792 for (unsigned d = 0; d < results_length; d++) { 793 // Each NZCV result only requires 4 bits. 794 VIXL_ASSERT((results[d] & 0xf) == results[d]); 795 printf(" 0x%" PRIx8 ",\n", results[d]); 796 } 797 printf("};\n"); 798 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 799 } else if (!skipped) { 800 // Check the results. 801 VIXL_CHECK(expected_length == results_length); 802 unsigned error_count = 0; 803 unsigned d = 0; 804 for (unsigned n = 0; n < inputs_length; n++) { 805 for (unsigned m = 0; m < inputs_length; m++, d++) { 806 if (results[d] != expected[d]) { 807 if (++error_count > kErrorReportLimit) continue; 808 809 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n", 810 name, 811 bits / 4, 812 static_cast<uint64_t>(inputs[n]), 813 bits / 4, 814 static_cast<uint64_t>(inputs[m]), 815 name, 816 rawbits_to_fp(inputs[n]), 817 rawbits_to_fp(inputs[m])); 818 printf(" Expected: %c%c%c%c (0x%" PRIx8 ")\n", 819 (expected[d] & 0x8) ? 'N' : 'n', 820 (expected[d] & 0x4) ? 'Z' : 'z', 821 (expected[d] & 0x2) ? 'C' : 'c', 822 (expected[d] & 0x1) ? 'V' : 'v', 823 expected[d]); 824 printf(" Found: %c%c%c%c (0x%" PRIx8 ")\n", 825 (results[d] & 0x8) ? 'N' : 'n', 826 (results[d] & 0x4) ? 'Z' : 'z', 827 (results[d] & 0x2) ? 'C' : 'c', 828 (results[d] & 0x1) ? 'V' : 'v', 829 results[d]); 830 printf("\n"); 831 } 832 } 833 } 834 VIXL_ASSERT(d == expected_length); 835 if (error_count > kErrorReportLimit) { 836 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 837 } 838 VIXL_CHECK(error_count == 0); 839 } 840 delete[] results; 841} 842 843 844static void TestCmpZero_Helper(TestFPCmpZeroHelper_t helper, 845 uintptr_t inputs, 846 unsigned inputs_length, 847 uintptr_t results, 848 unsigned reg_size, 849 bool* skipped) { 850 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize)); 851 852 SETUP_WITH_FEATURES(CPUFeatures::kFP); 853 START(); 854 855 // Roll up the loop to keep the code size down. 856 Label loop_n, loop_m; 857 858 Register out = x0; 859 Register inputs_base = x1; 860 Register length = w2; 861 Register index_n = w3; 862 Register flags = x4; 863 864 bool double_op = reg_size == kDRegSize; 865 const int index_shift = 866 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2; 867 868 VRegister fn = double_op ? d1 : s1; 869 870 __ Mov(out, results); 871 __ Mov(inputs_base, inputs); 872 __ Mov(length, inputs_length); 873 874 __ Mov(index_n, 0); 875 __ Bind(&loop_n); 876 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift)); 877 878 { 879 SingleEmissionCheckScope guard(&masm); 880 (masm.*helper)(fn, 0.0); 881 } 882 __ Mrs(flags, NZCV); 883 __ Ubfx(flags, flags, 28, 4); 884 __ Strb(flags, MemOperand(out, 1, PostIndex)); 885 886 __ Add(index_n, index_n, 1); 887 __ Cmp(index_n, inputs_length); 888 __ B(lo, &loop_n); 889 890 END(); 891 TRY_RUN(skipped); 892} 893 894 895// Test FP instructions. The inputs[] and expected[] arrays should be arrays of 896// rawbits representations of doubles or floats. This ensures that exact bit 897// comparisons can be performed. 898template <typename T> 899static void TestCmpZero(const char* name, 900 TestFPCmpZeroHelper_t helper, 901 const T inputs[], 902 unsigned inputs_length, 903 const uint8_t expected[], 904 unsigned expected_length) { 905 VIXL_ASSERT(inputs_length > 0); 906 907 const unsigned results_length = inputs_length; 908 uint8_t* results = new uint8_t[results_length]; 909 910 const unsigned bits = sizeof(T) * 8; 911 bool skipped; 912 913 TestCmpZero_Helper(helper, 914 reinterpret_cast<uintptr_t>(inputs), 915 inputs_length, 916 reinterpret_cast<uintptr_t>(results), 917 bits, 918 &skipped); 919 920 if (Test::generate_test_trace()) { 921 // Print the results. 922 printf("const uint8_t kExpected_%s[] = {\n", name); 923 for (unsigned d = 0; d < results_length; d++) { 924 // Each NZCV result only requires 4 bits. 925 VIXL_ASSERT((results[d] & 0xf) == results[d]); 926 printf(" 0x%" PRIx8 ",\n", results[d]); 927 } 928 printf("};\n"); 929 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 930 } else if (!skipped) { 931 // Check the results. 932 VIXL_CHECK(expected_length == results_length); 933 unsigned error_count = 0; 934 unsigned d = 0; 935 for (unsigned n = 0; n < inputs_length; n++, d++) { 936 if (results[d] != expected[d]) { 937 if (++error_count > kErrorReportLimit) continue; 938 939 printf("%s 0x%0*" PRIx64 ", 0x%0*u (%s %g #0.0):\n", 940 name, 941 bits / 4, 942 static_cast<uint64_t>(inputs[n]), 943 bits / 4, 944 0, 945 name, 946 rawbits_to_fp(inputs[n])); 947 printf(" Expected: %c%c%c%c (0x%" PRIx8 ")\n", 948 (expected[d] & 0x8) ? 'N' : 'n', 949 (expected[d] & 0x4) ? 'Z' : 'z', 950 (expected[d] & 0x2) ? 'C' : 'c', 951 (expected[d] & 0x1) ? 'V' : 'v', 952 expected[d]); 953 printf(" Found: %c%c%c%c (0x%" PRIx8 ")\n", 954 (results[d] & 0x8) ? 'N' : 'n', 955 (results[d] & 0x4) ? 'Z' : 'z', 956 (results[d] & 0x2) ? 'C' : 'c', 957 (results[d] & 0x1) ? 'V' : 'v', 958 results[d]); 959 printf("\n"); 960 } 961 } 962 VIXL_ASSERT(d == expected_length); 963 if (error_count > kErrorReportLimit) { 964 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 965 } 966 VIXL_CHECK(error_count == 0); 967 } 968 delete[] results; 969} 970 971 972static void TestFPToFixed_Helper(TestFPToFixedHelper_t helper, 973 uintptr_t inputs, 974 unsigned inputs_length, 975 uintptr_t results, 976 unsigned d_size, 977 unsigned n_size, 978 bool* skipped) { 979 VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize)); 980 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) || 981 (n_size == kHRegSize)); 982 983 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf); 984 START(); 985 986 // Roll up the loop to keep the code size down. 987 Label loop_n; 988 989 Register out = x0; 990 Register inputs_base = x1; 991 Register length = w2; 992 Register index_n = w3; 993 994 int n_index_shift; 995 if (n_size == kDRegSize) { 996 n_index_shift = kDRegSizeInBytesLog2; 997 } else if (n_size == kSRegSize) { 998 n_index_shift = kSRegSizeInBytesLog2; 999 } else { 1000 n_index_shift = kHRegSizeInBytesLog2; 1001 } 1002 1003 Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10); 1004 VRegister fn; 1005 if (n_size == kDRegSize) { 1006 fn = d1; 1007 } else if (n_size == kSRegSize) { 1008 fn = s1; 1009 } else { 1010 fn = h1; 1011 } 1012 1013 __ Mov(out, results); 1014 __ Mov(inputs_base, inputs); 1015 __ Mov(length, inputs_length); 1016 1017 __ Mov(index_n, 0); 1018 __ Bind(&loop_n); 1019 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift)); 1020 1021 for (unsigned fbits = 0; fbits <= d_size; ++fbits) { 1022 { 1023 SingleEmissionCheckScope guard(&masm); 1024 (masm.*helper)(rd, fn, fbits); 1025 } 1026 __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex)); 1027 } 1028 1029 __ Add(index_n, index_n, 1); 1030 __ Cmp(index_n, inputs_length); 1031 __ B(lo, &loop_n); 1032 1033 END(); 1034 TRY_RUN(skipped); 1035} 1036 1037 1038static void TestFPToInt_Helper(TestFPToIntHelper_t helper, 1039 uintptr_t inputs, 1040 unsigned inputs_length, 1041 uintptr_t results, 1042 unsigned d_size, 1043 unsigned n_size, 1044 bool* skipped) { 1045 VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize)); 1046 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) || 1047 (n_size == kHRegSize)); 1048 1049 SETUP_WITH_FEATURES(CPUFeatures::kFP, 1050 CPUFeatures::kFPHalf, 1051 CPUFeatures::kJSCVT); 1052 START(); 1053 1054 // Roll up the loop to keep the code size down. 1055 Label loop_n; 1056 1057 Register out = x0; 1058 Register inputs_base = x1; 1059 Register length = w2; 1060 Register index_n = w3; 1061 1062 int n_index_shift; 1063 if (n_size == kDRegSize) { 1064 n_index_shift = kDRegSizeInBytesLog2; 1065 } else if (n_size == kSRegSize) { 1066 n_index_shift = kSRegSizeInBytesLog2; 1067 } else { 1068 n_index_shift = kHRegSizeInBytesLog2; 1069 } 1070 1071 Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10); 1072 VRegister fn; 1073 if (n_size == kDRegSize) { 1074 fn = d1; 1075 } else if (n_size == kSRegSize) { 1076 fn = s1; 1077 } else { 1078 fn = h1; 1079 } 1080 1081 __ Mov(out, results); 1082 __ Mov(inputs_base, inputs); 1083 __ Mov(length, inputs_length); 1084 1085 __ Mov(index_n, 0); 1086 __ Bind(&loop_n); 1087 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift)); 1088 1089 { 1090 SingleEmissionCheckScope guard(&masm); 1091 (masm.*helper)(rd, fn); 1092 } 1093 __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex)); 1094 1095 __ Add(index_n, index_n, 1); 1096 __ Cmp(index_n, inputs_length); 1097 __ B(lo, &loop_n); 1098 1099 END(); 1100 TRY_RUN(skipped); 1101} 1102 1103 1104// Test FP instructions. 1105// - The inputs[] array should be an array of rawbits representations of 1106// doubles or floats. This ensures that exact bit comparisons can be 1107// performed. 1108// - The expected[] array should be an array of signed integers. 1109template <typename Tn, typename Td> 1110static void TestFPToS(const char* name, 1111 TestFPToIntHelper_t helper, 1112 const Tn inputs[], 1113 unsigned inputs_length, 1114 const Td expected[], 1115 unsigned expected_length) { 1116 VIXL_ASSERT(inputs_length > 0); 1117 1118 const unsigned results_length = inputs_length; 1119 Td* results = new Td[results_length]; 1120 1121 const unsigned d_bits = sizeof(Td) * 8; 1122 const unsigned n_bits = sizeof(Tn) * 8; 1123 bool skipped; 1124 1125 TestFPToInt_Helper(helper, 1126 reinterpret_cast<uintptr_t>(inputs), 1127 inputs_length, 1128 reinterpret_cast<uintptr_t>(results), 1129 d_bits, 1130 n_bits, 1131 &skipped); 1132 1133 if (Test::generate_test_trace()) { 1134 // Print the results. 1135 printf("const int%u_t kExpected_%s[] = {\n", d_bits, name); 1136 // There is no simple C++ literal for INT*_MIN that doesn't produce 1137 // warnings, so we use an appropriate constant in that case instead. 1138 // Deriving int_d_min in this way (rather than just checking INT64_MIN and 1139 // the like) avoids warnings about comparing values with differing ranges. 1140 const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1; 1141 const int64_t int_d_min = -(int_d_max)-1; 1142 for (unsigned d = 0; d < results_length; d++) { 1143 if (results[d] == int_d_min) { 1144 printf(" -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max); 1145 } else { 1146 // Some constants (such as those between INT32_MAX and UINT32_MAX) 1147 // trigger compiler warnings. To avoid these warnings, use an 1148 // appropriate macro to make the type explicit. 1149 int64_t result_int64 = static_cast<int64_t>(results[d]); 1150 if (result_int64 >= 0) { 1151 printf(" INT%u_C(%" PRId64 "),\n", d_bits, result_int64); 1152 } else { 1153 printf(" -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64); 1154 } 1155 } 1156 } 1157 printf("};\n"); 1158 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 1159 } else if (!skipped) { 1160 // Check the results. 1161 VIXL_CHECK(expected_length == results_length); 1162 unsigned error_count = 0; 1163 unsigned d = 0; 1164 for (unsigned n = 0; n < inputs_length; n++, d++) { 1165 if (results[d] != expected[d]) { 1166 if (++error_count > kErrorReportLimit) continue; 1167 1168 printf("%s 0x%0*" PRIx64 " (%s %g):\n", 1169 name, 1170 n_bits / 4, 1171 static_cast<uint64_t>(inputs[n]), 1172 name, 1173 rawbits_to_fp(inputs[n])); 1174 printf(" Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n", 1175 d_bits / 4, 1176 static_cast<uint64_t>(expected[d]), 1177 static_cast<int64_t>(expected[d])); 1178 printf(" Found: 0x%0*" PRIx64 " (%" PRId64 ")\n", 1179 d_bits / 4, 1180 static_cast<uint64_t>(results[d]), 1181 static_cast<int64_t>(results[d])); 1182 printf("\n"); 1183 } 1184 } 1185 VIXL_ASSERT(d == expected_length); 1186 if (error_count > kErrorReportLimit) { 1187 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1188 } 1189 VIXL_CHECK(error_count == 0); 1190 } 1191 delete[] results; 1192} 1193 1194 1195// Test FP instructions. 1196// - The inputs[] array should be an array of rawbits representations of 1197// doubles or floats. This ensures that exact bit comparisons can be 1198// performed. 1199// - The expected[] array should be an array of unsigned integers. 1200template <typename Tn, typename Td> 1201static void TestFPToU(const char* name, 1202 TestFPToIntHelper_t helper, 1203 const Tn inputs[], 1204 unsigned inputs_length, 1205 const Td expected[], 1206 unsigned expected_length) { 1207 VIXL_ASSERT(inputs_length > 0); 1208 1209 const unsigned results_length = inputs_length; 1210 Td* results = new Td[results_length]; 1211 1212 const unsigned d_bits = sizeof(Td) * 8; 1213 const unsigned n_bits = sizeof(Tn) * 8; 1214 bool skipped; 1215 1216 TestFPToInt_Helper(helper, 1217 reinterpret_cast<uintptr_t>(inputs), 1218 inputs_length, 1219 reinterpret_cast<uintptr_t>(results), 1220 d_bits, 1221 n_bits, 1222 &skipped); 1223 1224 if (Test::generate_test_trace()) { 1225 // Print the results. 1226 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name); 1227 for (unsigned d = 0; d < results_length; d++) { 1228 printf(" %" PRIu64 "u,\n", static_cast<uint64_t>(results[d])); 1229 } 1230 printf("};\n"); 1231 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 1232 } else if (!skipped) { 1233 // Check the results. 1234 VIXL_CHECK(expected_length == results_length); 1235 unsigned error_count = 0; 1236 unsigned d = 0; 1237 for (unsigned n = 0; n < inputs_length; n++, d++) { 1238 if (results[d] != expected[d]) { 1239 if (++error_count > kErrorReportLimit) continue; 1240 1241 printf("%s 0x%0*" PRIx64 " (%s %g):\n", 1242 name, 1243 n_bits / 4, 1244 static_cast<uint64_t>(inputs[n]), 1245 name, 1246 rawbits_to_fp(inputs[n])); 1247 printf(" Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n", 1248 d_bits / 4, 1249 static_cast<uint64_t>(expected[d]), 1250 static_cast<uint64_t>(expected[d])); 1251 printf(" Found: 0x%0*" PRIx64 " (%" PRIu64 ")\n", 1252 d_bits / 4, 1253 static_cast<uint64_t>(results[d]), 1254 static_cast<uint64_t>(results[d])); 1255 printf("\n"); 1256 } 1257 } 1258 VIXL_ASSERT(d == expected_length); 1259 if (error_count > kErrorReportLimit) { 1260 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1261 } 1262 VIXL_CHECK(error_count == 0); 1263 } 1264 delete[] results; 1265} 1266 1267 1268// Test FP instructions. 1269// - The inputs[] array should be an array of rawbits representations of 1270// doubles or floats. This ensures that exact bit comparisons can be 1271// performed. 1272// - The expected[] array should be an array of signed integers. 1273template <typename Tn, typename Td> 1274static void TestFPToFixedS(const char* name, 1275 TestFPToFixedHelper_t helper, 1276 const Tn inputs[], 1277 unsigned inputs_length, 1278 const Td expected[], 1279 unsigned expected_length) { 1280 VIXL_ASSERT(inputs_length > 0); 1281 1282 const unsigned d_bits = sizeof(Td) * 8; 1283 const unsigned n_bits = sizeof(Tn) * 8; 1284 1285 const unsigned results_length = inputs_length * (d_bits + 1); 1286 Td* results = new Td[results_length]; 1287 1288 bool skipped; 1289 1290 TestFPToFixed_Helper(helper, 1291 reinterpret_cast<uintptr_t>(inputs), 1292 inputs_length, 1293 reinterpret_cast<uintptr_t>(results), 1294 d_bits, 1295 n_bits, 1296 &skipped); 1297 1298 if (Test::generate_test_trace()) { 1299 // Print the results. 1300 printf("const int%u_t kExpected_%s[] = {\n", d_bits, name); 1301 // There is no simple C++ literal for INT*_MIN that doesn't produce 1302 // warnings, so we use an appropriate constant in that case instead. 1303 // Deriving int_d_min in this way (rather than just checking INT64_MIN and 1304 // the like) avoids warnings about comparing values with differing ranges. 1305 const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1; 1306 const int64_t int_d_min = -(int_d_max)-1; 1307 for (unsigned d = 0; d < results_length; d++) { 1308 if (results[d] == int_d_min) { 1309 printf(" -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max); 1310 } else { 1311 // Some constants (such as those between INT32_MAX and UINT32_MAX) 1312 // trigger compiler warnings. To avoid these warnings, use an 1313 // appropriate macro to make the type explicit. 1314 int64_t result_int64 = static_cast<int64_t>(results[d]); 1315 if (result_int64 >= 0) { 1316 printf(" INT%u_C(%" PRId64 "),\n", d_bits, result_int64); 1317 } else { 1318 printf(" -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64); 1319 } 1320 } 1321 } 1322 printf("};\n"); 1323 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 1324 } else if (!skipped) { 1325 // Check the results. 1326 VIXL_CHECK(expected_length == results_length); 1327 unsigned error_count = 0; 1328 unsigned d = 0; 1329 for (unsigned n = 0; n < inputs_length; n++) { 1330 for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) { 1331 if (results[d] != expected[d]) { 1332 if (++error_count > kErrorReportLimit) continue; 1333 1334 printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n", 1335 name, 1336 n_bits / 4, 1337 static_cast<uint64_t>(inputs[n]), 1338 fbits, 1339 name, 1340 rawbits_to_fp(inputs[n]), 1341 fbits); 1342 printf(" Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n", 1343 d_bits / 4, 1344 static_cast<uint64_t>(expected[d]), 1345 static_cast<int64_t>(expected[d])); 1346 printf(" Found: 0x%0*" PRIx64 " (%" PRId64 ")\n", 1347 d_bits / 4, 1348 static_cast<uint64_t>(results[d]), 1349 static_cast<int64_t>(results[d])); 1350 printf("\n"); 1351 } 1352 } 1353 } 1354 VIXL_ASSERT(d == expected_length); 1355 if (error_count > kErrorReportLimit) { 1356 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1357 } 1358 VIXL_CHECK(error_count == 0); 1359 } 1360 delete[] results; 1361} 1362 1363 1364// Test FP instructions. 1365// - The inputs[] array should be an array of rawbits representations of 1366// doubles or floats. This ensures that exact bit comparisons can be 1367// performed. 1368// - The expected[] array should be an array of unsigned integers. 1369template <typename Tn, typename Td> 1370static void TestFPToFixedU(const char* name, 1371 TestFPToFixedHelper_t helper, 1372 const Tn inputs[], 1373 unsigned inputs_length, 1374 const Td expected[], 1375 unsigned expected_length) { 1376 VIXL_ASSERT(inputs_length > 0); 1377 1378 const unsigned d_bits = sizeof(Td) * 8; 1379 const unsigned n_bits = sizeof(Tn) * 8; 1380 1381 const unsigned results_length = inputs_length * (d_bits + 1); 1382 Td* results = new Td[results_length]; 1383 1384 bool skipped; 1385 1386 TestFPToFixed_Helper(helper, 1387 reinterpret_cast<uintptr_t>(inputs), 1388 inputs_length, 1389 reinterpret_cast<uintptr_t>(results), 1390 d_bits, 1391 n_bits, 1392 &skipped); 1393 1394 if (Test::generate_test_trace()) { 1395 // Print the results. 1396 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name); 1397 for (unsigned d = 0; d < results_length; d++) { 1398 printf(" %" PRIu64 "u,\n", static_cast<uint64_t>(results[d])); 1399 } 1400 printf("};\n"); 1401 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 1402 } else if (!skipped) { 1403 // Check the results. 1404 VIXL_CHECK(expected_length == results_length); 1405 unsigned error_count = 0; 1406 unsigned d = 0; 1407 for (unsigned n = 0; n < inputs_length; n++) { 1408 for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) { 1409 if (results[d] != expected[d]) { 1410 if (++error_count > kErrorReportLimit) continue; 1411 1412 printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n", 1413 name, 1414 n_bits / 4, 1415 static_cast<uint64_t>(inputs[n]), 1416 fbits, 1417 name, 1418 rawbits_to_fp(inputs[n]), 1419 fbits); 1420 printf(" Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n", 1421 d_bits / 4, 1422 static_cast<uint64_t>(expected[d]), 1423 static_cast<uint64_t>(expected[d])); 1424 printf(" Found: 0x%0*" PRIx64 " (%" PRIu64 ")\n", 1425 d_bits / 4, 1426 static_cast<uint64_t>(results[d]), 1427 static_cast<uint64_t>(results[d])); 1428 printf("\n"); 1429 } 1430 } 1431 } 1432 VIXL_ASSERT(d == expected_length); 1433 if (error_count > kErrorReportLimit) { 1434 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1435 } 1436 VIXL_CHECK(error_count == 0); 1437 } 1438 delete[] results; 1439} 1440 1441 1442// ==== Tests for instructions of the form <INST> VReg, VReg. ==== 1443 1444 1445static void Test1OpNEON_Helper(Test1OpNEONHelper_t helper, 1446 uintptr_t inputs_n, 1447 unsigned inputs_n_length, 1448 uintptr_t results, 1449 VectorFormat vd_form, 1450 VectorFormat vn_form, 1451 bool* skipped) { 1452 VIXL_ASSERT(vd_form != kFormatUndefined); 1453 VIXL_ASSERT(vn_form != kFormatUndefined); 1454 1455 CPUFeatures features; 1456 features.Combine(CPUFeatures::kNEON, 1457 CPUFeatures::kFP, 1458 CPUFeatures::kRDM, 1459 CPUFeatures::kNEONHalf); 1460 // For frint{32,64}{x,y} variants. 1461 features.Combine(CPUFeatures::kFrintToFixedSizedInt); 1462 SETUP_WITH_FEATURES(features); 1463 START(); 1464 1465 // Roll up the loop to keep the code size down. 1466 Label loop_n; 1467 1468 Register out = x0; 1469 Register inputs_n_base = x1; 1470 Register inputs_n_last_16bytes = x3; 1471 Register index_n = x5; 1472 1473 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 1474 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 1475 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 1476 1477 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 1478 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1479 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 1480 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 1481 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 1482 1483 1484 // These will be either a D- or a Q-register form, with a single lane 1485 // (for use in scalar load and store operations). 1486 VRegister vd = VRegister(0, vd_bits); 1487 VRegister vn = v1.V16B(); 1488 VRegister vntmp = v3.V16B(); 1489 1490 // These will have the correct format for use when calling 'helper'. 1491 VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count); 1492 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 1493 1494 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 1495 VRegister vntmp_single = VRegister(3, vn_lane_bits); 1496 1497 __ Mov(out, results); 1498 1499 __ Mov(inputs_n_base, inputs_n); 1500 __ Mov(inputs_n_last_16bytes, 1501 inputs_n + (vn_lane_bytes * inputs_n_length) - 16); 1502 1503 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); 1504 1505 __ Mov(index_n, 0); 1506 __ Bind(&loop_n); 1507 1508 __ Ldr(vntmp_single, 1509 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); 1510 __ Ext(vn, vn, vntmp, vn_lane_bytes); 1511 1512 // Set the destination to zero. 1513 // TODO: Setting the destination to values other than zero 1514 // might be a better test for instructions such as sqxtn2 1515 // which may leave parts of V registers unchanged. 1516 __ Movi(vd.V16B(), 0); 1517 1518 { 1519 SingleEmissionCheckScope guard(&masm); 1520 (masm.*helper)(vd_helper, vn_helper); 1521 } 1522 __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex)); 1523 1524 __ Add(index_n, index_n, 1); 1525 __ Cmp(index_n, inputs_n_length); 1526 __ B(lo, &loop_n); 1527 1528 END(); 1529 TRY_RUN(skipped); 1530} 1531 1532 1533// Test NEON instructions. The inputs_*[] and expected[] arrays should be 1534// arrays of rawbit representation of input values. This ensures that 1535// exact bit comparisons can be performed. 1536template <typename Td, typename Tn> 1537static void Test1OpNEON(const char* name, 1538 Test1OpNEONHelper_t helper, 1539 const Tn inputs_n[], 1540 unsigned inputs_n_length, 1541 const Td expected[], 1542 unsigned expected_length, 1543 VectorFormat vd_form, 1544 VectorFormat vn_form) { 1545 VIXL_ASSERT(inputs_n_length > 0); 1546 1547 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 1548 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 1549 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1550 1551 const unsigned results_length = inputs_n_length; 1552 Td* results = new Td[results_length * vd_lane_count]; 1553 const unsigned lane_bit = sizeof(Td) * 8; 1554 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); 1555 1556 bool skipped; 1557 1558 Test1OpNEON_Helper(helper, 1559 reinterpret_cast<uintptr_t>(inputs_n), 1560 inputs_n_length, 1561 reinterpret_cast<uintptr_t>(results), 1562 vd_form, 1563 vn_form, 1564 &skipped); 1565 1566 if (Test::generate_test_trace()) { 1567 // Print the results. 1568 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 1569 for (unsigned iteration = 0; iteration < results_length; iteration++) { 1570 printf(" "); 1571 // Output a separate result for each element of the result vector. 1572 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1573 unsigned index = lane + (iteration * vd_lane_count); 1574 printf(" 0x%0*" PRIx64 ",", 1575 lane_len_in_hex, 1576 static_cast<uint64_t>(results[index])); 1577 } 1578 printf("\n"); 1579 } 1580 1581 printf("};\n"); 1582 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 1583 name, 1584 results_length); 1585 } else if (!skipped) { 1586 // Check the results. 1587 VIXL_CHECK(expected_length == results_length); 1588 unsigned error_count = 0; 1589 unsigned d = 0; 1590 const char* padding = " "; 1591 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 1592 for (unsigned n = 0; n < inputs_n_length; n++, d++) { 1593 bool error_in_vector = false; 1594 1595 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1596 unsigned output_index = (n * vd_lane_count) + lane; 1597 1598 if (results[output_index] != expected[output_index]) { 1599 error_in_vector = true; 1600 break; 1601 } 1602 } 1603 1604 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 1605 printf("%s\n", name); 1606 printf(" Vn%.*s| Vd%.*s| Expected\n", 1607 lane_len_in_hex + 1, 1608 padding, 1609 lane_len_in_hex + 1, 1610 padding); 1611 1612 const unsigned first_index_n = 1613 inputs_n_length - (16 / vn_lane_bytes) + n + 1; 1614 1615 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count); 1616 lane++) { 1617 unsigned output_index = (n * vd_lane_count) + lane; 1618 unsigned input_index_n = (first_index_n + lane) % inputs_n_length; 1619 1620 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 1621 " " 1622 "| 0x%0*" PRIx64 "\n", 1623 results[output_index] != expected[output_index] ? '*' : ' ', 1624 lane_len_in_hex, 1625 static_cast<uint64_t>(inputs_n[input_index_n]), 1626 lane_len_in_hex, 1627 static_cast<uint64_t>(results[output_index]), 1628 lane_len_in_hex, 1629 static_cast<uint64_t>(expected[output_index])); 1630 } 1631 } 1632 } 1633 VIXL_ASSERT(d == expected_length); 1634 if (error_count > kErrorReportLimit) { 1635 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1636 } 1637 VIXL_CHECK(error_count == 0); 1638 } 1639 delete[] results; 1640} 1641 1642 1643// ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ==== 1644// where <V> is one of B, H, S or D registers. 1645// e.g. saddlv H1, v0.8B 1646 1647// TODO: Change tests to store all lanes of the resulting V register. 1648// Some tests store all 128 bits of the resulting V register to 1649// check the simulator's behaviour on the rest of the register. 1650// This is better than storing the affected lanes only. 1651// Change any tests such as the 'Across' template to do the same. 1652 1653static void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper, 1654 uintptr_t inputs_n, 1655 unsigned inputs_n_length, 1656 uintptr_t results, 1657 VectorFormat vd_form, 1658 VectorFormat vn_form, 1659 bool* skipped) { 1660 VIXL_ASSERT(vd_form != kFormatUndefined); 1661 VIXL_ASSERT(vn_form != kFormatUndefined); 1662 1663 SETUP_WITH_FEATURES(CPUFeatures::kNEON, 1664 CPUFeatures::kFP, 1665 CPUFeatures::kNEONHalf); 1666 START(); 1667 1668 // Roll up the loop to keep the code size down. 1669 Label loop_n; 1670 1671 Register out = x0; 1672 Register inputs_n_base = x1; 1673 Register inputs_n_last_vector = x3; 1674 Register index_n = x5; 1675 1676 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 1677 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 1678 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 1679 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1680 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 1681 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 1682 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 1683 1684 // Test destructive operations by (arbitrarily) using the same register for 1685 // B and S lane sizes. 1686 bool destructive = (vd_bits == kBRegSize) || (vd_bits == kSRegSize); 1687 1688 // Create two aliases for v0; the first is the destination for the tested 1689 // instruction, the second, the whole Q register to check the results. 1690 VRegister vd = VRegister(0, vd_bits); 1691 VRegister vdstr = VRegister(0, kQRegSize); 1692 1693 VRegister vn = VRegister(1, vn_bits); 1694 VRegister vntmp = VRegister(3, vn_bits); 1695 1696 // These will have the correct format for use when calling 'helper'. 1697 VRegister vd_helper = VRegister(0, vn_bits, vn_lane_count); 1698 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 1699 1700 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 1701 VRegister vntmp_single = VRegister(3, vn_lane_bits); 1702 1703 // Same registers for use in the 'ext' instructions. 1704 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B(); 1705 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B(); 1706 1707 __ Mov(out, results); 1708 1709 __ Mov(inputs_n_base, inputs_n); 1710 __ Mov(inputs_n_last_vector, 1711 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count)); 1712 1713 __ Ldr(vn, MemOperand(inputs_n_last_vector)); 1714 1715 __ Mov(index_n, 0); 1716 __ Bind(&loop_n); 1717 1718 __ Ldr(vntmp_single, 1719 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); 1720 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes); 1721 1722 if (destructive) { 1723 __ Mov(vd_helper, vn_helper); 1724 SingleEmissionCheckScope guard(&masm); 1725 (masm.*helper)(vd, vd_helper); 1726 } else { 1727 SingleEmissionCheckScope guard(&masm); 1728 (masm.*helper)(vd, vn_helper); 1729 } 1730 1731 __ Str(vdstr, MemOperand(out, kQRegSizeInBytes, PostIndex)); 1732 1733 __ Add(index_n, index_n, 1); 1734 __ Cmp(index_n, inputs_n_length); 1735 __ B(lo, &loop_n); 1736 1737 END(); 1738 TRY_RUN(skipped); 1739} 1740 1741// Test NEON instructions. The inputs_*[] and expected[] arrays should be 1742// arrays of rawbit representation of input values. This ensures that 1743// exact bit comparisons can be performed. 1744template <typename Td, typename Tn> 1745static void Test1OpAcrossNEON(const char* name, 1746 Test1OpNEONHelper_t helper, 1747 const Tn inputs_n[], 1748 unsigned inputs_n_length, 1749 const Td expected[], 1750 unsigned expected_length, 1751 VectorFormat vd_form, 1752 VectorFormat vn_form) { 1753 VIXL_ASSERT(inputs_n_length > 0); 1754 1755 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 1756 const unsigned vd_lanes_per_q = MaxLaneCountFromFormat(vd_form); 1757 1758 const unsigned results_length = inputs_n_length; 1759 Td* results = new Td[results_length * vd_lanes_per_q]; 1760 const unsigned lane_bit = sizeof(Td) * 8; 1761 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); 1762 1763 bool skipped; 1764 1765 Test1OpAcrossNEON_Helper(helper, 1766 reinterpret_cast<uintptr_t>(inputs_n), 1767 inputs_n_length, 1768 reinterpret_cast<uintptr_t>(results), 1769 vd_form, 1770 vn_form, 1771 &skipped); 1772 1773 if (Test::generate_test_trace()) { 1774 // Print the results. 1775 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 1776 for (unsigned iteration = 0; iteration < results_length; iteration++) { 1777 printf(" "); 1778 // Output a separate result for each element of the result vector. 1779 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1780 unsigned index = lane + (iteration * vd_lanes_per_q); 1781 printf(" 0x%0*" PRIx64 ",", 1782 lane_len_in_hex, 1783 static_cast<uint64_t>(results[index])); 1784 } 1785 printf("\n"); 1786 } 1787 1788 printf("};\n"); 1789 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 1790 name, 1791 results_length); 1792 } else if (!skipped) { 1793 // Check the results. 1794 VIXL_CHECK(expected_length == results_length); 1795 unsigned error_count = 0; 1796 unsigned d = 0; 1797 const char* padding = " "; 1798 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 1799 for (unsigned n = 0; n < inputs_n_length; n++, d++) { 1800 bool error_in_vector = false; 1801 1802 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1803 unsigned expected_index = (n * vd_lane_count) + lane; 1804 unsigned results_index = (n * vd_lanes_per_q) + lane; 1805 1806 if (results[results_index] != expected[expected_index]) { 1807 error_in_vector = true; 1808 break; 1809 } 1810 } 1811 1812 // For across operations, the remaining lanes should be zero. 1813 for (unsigned lane = vd_lane_count; lane < vd_lanes_per_q; lane++) { 1814 unsigned results_index = (n * vd_lanes_per_q) + lane; 1815 if (results[results_index] != 0) { 1816 error_in_vector = true; 1817 break; 1818 } 1819 } 1820 1821 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 1822 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1823 1824 printf("%s\n", name); 1825 printf(" Vn%.*s| Vd%.*s| Expected\n", 1826 lane_len_in_hex + 1, 1827 padding, 1828 lane_len_in_hex + 1, 1829 padding); 1830 1831 // TODO: In case of an error, all tests print out as many elements as 1832 // there are lanes in the output or input vectors. This way 1833 // the viewer can read all the values that were needed for the 1834 // operation but the output contains also unnecessary values. 1835 // These prints can be improved according to the arguments 1836 // passed to test functions. 1837 // This output for the 'Across' category has the required 1838 // modifications. 1839 for (unsigned lane = 0; lane < vn_lane_count; lane++) { 1840 unsigned results_index = 1841 (n * vd_lanes_per_q) + ((vn_lane_count - 1) - lane); 1842 unsigned input_index_n = 1843 (inputs_n_length - vn_lane_count + n + 1 + lane) % 1844 inputs_n_length; 1845 1846 Td expect = 0; 1847 if ((vn_lane_count - 1) == lane) { 1848 // This is the last lane to be printed, ie. the least-significant 1849 // lane, so use the expected value; any other lane should be zero. 1850 unsigned expected_index = n * vd_lane_count; 1851 expect = expected[expected_index]; 1852 } 1853 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", 1854 results[results_index] != expect ? '*' : ' ', 1855 lane_len_in_hex, 1856 static_cast<uint64_t>(inputs_n[input_index_n]), 1857 lane_len_in_hex, 1858 static_cast<uint64_t>(results[results_index]), 1859 lane_len_in_hex, 1860 static_cast<uint64_t>(expect)); 1861 } 1862 } 1863 } 1864 VIXL_ASSERT(d == expected_length); 1865 if (error_count > kErrorReportLimit) { 1866 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1867 } 1868 VIXL_CHECK(error_count == 0); 1869 } 1870 delete[] results; 1871} 1872 1873 1874// ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ==== 1875 1876// TODO: Iterate over inputs_d once the traces file is split. 1877 1878static void Test2OpNEON_Helper(Test2OpNEONHelper_t helper, 1879 uintptr_t inputs_d, 1880 uintptr_t inputs_n, 1881 unsigned inputs_n_length, 1882 uintptr_t inputs_m, 1883 unsigned inputs_m_length, 1884 uintptr_t results, 1885 VectorFormat vd_form, 1886 VectorFormat vn_form, 1887 VectorFormat vm_form, 1888 bool* skipped) { 1889 VIXL_ASSERT(vd_form != kFormatUndefined); 1890 VIXL_ASSERT(vn_form != kFormatUndefined); 1891 VIXL_ASSERT(vm_form != kFormatUndefined); 1892 1893 CPUFeatures features; 1894 features.Combine(CPUFeatures::kNEON, CPUFeatures::kNEONHalf); 1895 features.Combine(CPUFeatures::kFP); 1896 features.Combine(CPUFeatures::kRDM); 1897 features.Combine(CPUFeatures::kDotProduct); 1898 features.Combine(CPUFeatures::kFHM); 1899 SETUP_WITH_FEATURES(features); 1900 START(); 1901 1902 // Roll up the loop to keep the code size down. 1903 Label loop_n, loop_m; 1904 1905 Register out = x0; 1906 Register inputs_n_base = x1; 1907 Register inputs_m_base = x2; 1908 Register inputs_d_base = x3; 1909 Register inputs_n_last_16bytes = x4; 1910 Register inputs_m_last_16bytes = x5; 1911 Register index_n = x6; 1912 Register index_m = x7; 1913 1914 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 1915 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 1916 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 1917 1918 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 1919 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1920 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 1921 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 1922 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 1923 1924 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form); 1925 const unsigned vm_lane_count = LaneCountFromFormat(vm_form); 1926 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form); 1927 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form); 1928 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form); 1929 1930 1931 // Always load and store 128 bits regardless of the format. 1932 VRegister vd = v0.V16B(); 1933 VRegister vn = v1.V16B(); 1934 VRegister vm = v2.V16B(); 1935 VRegister vntmp = v3.V16B(); 1936 VRegister vmtmp = v4.V16B(); 1937 VRegister vres = v5.V16B(); 1938 1939 // These will have the correct format for calling the 'helper'. 1940 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 1941 VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count); 1942 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count); 1943 1944 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 1945 VRegister vntmp_single = VRegister(3, vn_lane_bits); 1946 VRegister vmtmp_single = VRegister(4, vm_lane_bits); 1947 1948 __ Mov(out, results); 1949 1950 __ Mov(inputs_d_base, inputs_d); 1951 1952 __ Mov(inputs_n_base, inputs_n); 1953 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16)); 1954 __ Mov(inputs_m_base, inputs_m); 1955 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16)); 1956 1957 __ Ldr(vd, MemOperand(inputs_d_base)); 1958 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); 1959 __ Ldr(vm, MemOperand(inputs_m_last_16bytes)); 1960 1961 __ Mov(index_n, 0); 1962 __ Bind(&loop_n); 1963 1964 __ Ldr(vntmp_single, 1965 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); 1966 __ Ext(vn, vn, vntmp, vn_lane_bytes); 1967 1968 __ Mov(index_m, 0); 1969 __ Bind(&loop_m); 1970 1971 __ Ldr(vmtmp_single, 1972 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2)); 1973 __ Ext(vm, vm, vmtmp, vm_lane_bytes); 1974 1975 __ Mov(vres, vd); 1976 { 1977 SingleEmissionCheckScope guard(&masm); 1978 (masm.*helper)(vres_helper, vn_helper, vm_helper); 1979 } 1980 __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex)); 1981 1982 __ Add(index_m, index_m, 1); 1983 __ Cmp(index_m, inputs_m_length); 1984 __ B(lo, &loop_m); 1985 1986 __ Add(index_n, index_n, 1); 1987 __ Cmp(index_n, inputs_n_length); 1988 __ B(lo, &loop_n); 1989 1990 END(); 1991 TRY_RUN(skipped); 1992} 1993 1994 1995// Test NEON instructions. The inputs_*[] and expected[] arrays should be 1996// arrays of rawbit representation of input values. This ensures that 1997// exact bit comparisons can be performed. 1998template <typename Td, typename Tn, typename Tm> 1999static void Test2OpNEON(const char* name, 2000 Test2OpNEONHelper_t helper, 2001 const Td inputs_d[], 2002 const Tn inputs_n[], 2003 unsigned inputs_n_length, 2004 const Tm inputs_m[], 2005 unsigned inputs_m_length, 2006 const Td expected[], 2007 unsigned expected_length, 2008 VectorFormat vd_form, 2009 VectorFormat vn_form, 2010 VectorFormat vm_form) { 2011 VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0); 2012 2013 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form); 2014 2015 const unsigned results_length = inputs_n_length * inputs_m_length; 2016 Td* results = new Td[results_length * vd_lane_count]; 2017 const unsigned lane_bit = sizeof(Td) * 8; 2018 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>(); 2019 2020 bool skipped; 2021 2022 Test2OpNEON_Helper(helper, 2023 reinterpret_cast<uintptr_t>(inputs_d), 2024 reinterpret_cast<uintptr_t>(inputs_n), 2025 inputs_n_length, 2026 reinterpret_cast<uintptr_t>(inputs_m), 2027 inputs_m_length, 2028 reinterpret_cast<uintptr_t>(results), 2029 vd_form, 2030 vn_form, 2031 vm_form, 2032 &skipped); 2033 2034 if (Test::generate_test_trace()) { 2035 // Print the results. 2036 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 2037 for (unsigned iteration = 0; iteration < results_length; iteration++) { 2038 printf(" "); 2039 // Output a separate result for each element of the result vector. 2040 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2041 unsigned index = lane + (iteration * vd_lane_count); 2042 printf(" 0x%0*" PRIx64 ",", 2043 lane_len_in_hex, 2044 static_cast<uint64_t>(results[index])); 2045 } 2046 printf("\n"); 2047 } 2048 2049 printf("};\n"); 2050 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 2051 name, 2052 results_length); 2053 } else if (!skipped) { 2054 // Check the results. 2055 VIXL_CHECK(expected_length == results_length); 2056 unsigned error_count = 0; 2057 unsigned d = 0; 2058 const char* padding = " "; 2059 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 2060 for (unsigned n = 0; n < inputs_n_length; n++) { 2061 for (unsigned m = 0; m < inputs_m_length; m++, d++) { 2062 bool error_in_vector = false; 2063 2064 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2065 unsigned output_index = (n * inputs_m_length * vd_lane_count) + 2066 (m * vd_lane_count) + lane; 2067 2068 if (results[output_index] != expected[output_index]) { 2069 error_in_vector = true; 2070 break; 2071 } 2072 } 2073 2074 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 2075 printf("%s\n", name); 2076 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n", 2077 lane_len_in_hex + 1, 2078 padding, 2079 lane_len_in_hex + 1, 2080 padding, 2081 lane_len_in_hex + 1, 2082 padding, 2083 lane_len_in_hex + 1, 2084 padding); 2085 2086 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2087 unsigned output_index = (n * inputs_m_length * vd_lane_count) + 2088 (m * vd_lane_count) + lane; 2089 unsigned input_index_n = 2090 (inputs_n_length - vd_lane_count + n + 1 + lane) % 2091 inputs_n_length; 2092 unsigned input_index_m = 2093 (inputs_m_length - vd_lane_count + m + 1 + lane) % 2094 inputs_m_length; 2095 2096 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 2097 " " 2098 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", 2099 results[output_index] != expected[output_index] ? '*' : ' ', 2100 lane_len_in_hex, 2101 static_cast<uint64_t>(inputs_d[lane]), 2102 lane_len_in_hex, 2103 static_cast<uint64_t>(inputs_n[input_index_n]), 2104 lane_len_in_hex, 2105 static_cast<uint64_t>(inputs_m[input_index_m]), 2106 lane_len_in_hex, 2107 static_cast<uint64_t>(results[output_index]), 2108 lane_len_in_hex, 2109 static_cast<uint64_t>(expected[output_index])); 2110 } 2111 } 2112 } 2113 } 2114 VIXL_ASSERT(d == expected_length); 2115 if (error_count > kErrorReportLimit) { 2116 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 2117 } 2118 VIXL_CHECK(error_count == 0); 2119 } 2120 delete[] results; 2121} 2122 2123 2124// ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ==== 2125 2126static void TestByElementNEON_Helper(TestByElementNEONHelper_t helper, 2127 uintptr_t inputs_d, 2128 uintptr_t inputs_n, 2129 unsigned inputs_n_length, 2130 uintptr_t inputs_m, 2131 unsigned inputs_m_length, 2132 const int indices[], 2133 unsigned indices_length, 2134 uintptr_t results, 2135 VectorFormat vd_form, 2136 VectorFormat vn_form, 2137 VectorFormat vm_form, 2138 unsigned vm_subvector_count, 2139 bool* skipped) { 2140 VIXL_ASSERT(vd_form != kFormatUndefined); 2141 VIXL_ASSERT(vn_form != kFormatUndefined); 2142 VIXL_ASSERT(vm_form != kFormatUndefined); 2143 VIXL_ASSERT((vm_subvector_count != 0) && IsPowerOf2(vm_subvector_count)); 2144 2145 CPUFeatures features; 2146 features.Combine(CPUFeatures::kNEON, CPUFeatures::kNEONHalf); 2147 features.Combine(CPUFeatures::kFP); 2148 features.Combine(CPUFeatures::kRDM); 2149 features.Combine(CPUFeatures::kDotProduct); 2150 features.Combine(CPUFeatures::kFHM); 2151 SETUP_WITH_FEATURES(features); 2152 2153 START(); 2154 2155 // Roll up the loop to keep the code size down. 2156 Label loop_n, loop_m; 2157 2158 Register out = x0; 2159 Register inputs_n_base = x1; 2160 Register inputs_m_base = x2; 2161 Register inputs_d_base = x3; 2162 Register inputs_n_last_16bytes = x4; 2163 Register inputs_m_last_16bytes = x5; 2164 Register index_n = x6; 2165 Register index_m = x7; 2166 2167 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 2168 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 2169 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 2170 2171 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 2172 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 2173 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 2174 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 2175 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 2176 2177 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form); 2178 const unsigned vm_lane_count = LaneCountFromFormat(vm_form); 2179 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form); 2180 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form); 2181 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form); 2182 2183 VIXL_ASSERT((vm_bits * vm_subvector_count) <= kQRegSize); 2184 2185 // Always load and store 128 bits regardless of the format. 2186 VRegister vd = v0.V16B(); 2187 VRegister vn = v1.V16B(); 2188 VRegister vm = v2.V16B(); 2189 VRegister vntmp = v3.V16B(); 2190 VRegister vmtmp = v4.V16B(); 2191 VRegister vres = v5.V16B(); 2192 2193 // These will have the correct format for calling the 'helper'. 2194 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 2195 VRegister vm_helper = 2196 VRegister(2, vm_bits * vm_subvector_count, vm_lane_count); 2197 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count); 2198 2199 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 2200 VRegister vntmp_single = VRegister(3, vn_lane_bits); 2201 VRegister vmtmp_single = VRegister(4, vm_lane_bits); 2202 2203 __ Mov(out, results); 2204 2205 __ Mov(inputs_d_base, inputs_d); 2206 2207 __ Mov(inputs_n_base, inputs_n); 2208 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16)); 2209 __ Mov(inputs_m_base, inputs_m); 2210 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16)); 2211 2212 __ Ldr(vd, MemOperand(inputs_d_base)); 2213 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); 2214 __ Ldr(vm, MemOperand(inputs_m_last_16bytes)); 2215 2216 __ Mov(index_n, 0); 2217 __ Bind(&loop_n); 2218 2219 __ Ldr(vntmp_single, 2220 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); 2221 __ Ext(vn, vn, vntmp, vn_lane_bytes); 2222 2223 __ Mov(index_m, 0); 2224 __ Bind(&loop_m); 2225 2226 __ Ldr(vmtmp_single, 2227 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2)); 2228 __ Ext(vm, vm, vmtmp, vm_lane_bytes); 2229 2230 __ Mov(vres, vd); 2231 { 2232 for (unsigned i = 0; i < indices_length; i++) { 2233 { 2234 SingleEmissionCheckScope guard(&masm); 2235 (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]); 2236 } 2237 __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex)); 2238 } 2239 } 2240 2241 __ Add(index_m, index_m, 1); 2242 __ Cmp(index_m, inputs_m_length); 2243 __ B(lo, &loop_m); 2244 2245 __ Add(index_n, index_n, 1); 2246 __ Cmp(index_n, inputs_n_length); 2247 __ B(lo, &loop_n); 2248 2249 END(); 2250 TRY_RUN(skipped); 2251} 2252 2253 2254// Test NEON instructions. The inputs_*[] and expected[] arrays should be 2255// arrays of rawbit representation of input values. This ensures that 2256// exact bit comparisons can be performed. 2257template <typename Td, typename Tn, typename Tm> 2258static void TestByElementNEON(const char* name, 2259 TestByElementNEONHelper_t helper, 2260 const Td inputs_d[], 2261 const Tn inputs_n[], 2262 unsigned inputs_n_length, 2263 const Tm inputs_m[], 2264 unsigned inputs_m_length, 2265 const int indices[], 2266 unsigned indices_length, 2267 const Td expected[], 2268 unsigned expected_length, 2269 VectorFormat vd_form, 2270 VectorFormat vn_form, 2271 VectorFormat vm_form, 2272 unsigned vm_subvector_count = 1) { 2273 VIXL_ASSERT(inputs_n_length > 0); 2274 VIXL_ASSERT(inputs_m_length > 0); 2275 VIXL_ASSERT(indices_length > 0); 2276 2277 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form); 2278 2279 const unsigned results_length = 2280 inputs_n_length * inputs_m_length * indices_length; 2281 Td* results = new Td[results_length * vd_lane_count]; 2282 const unsigned lane_bit = sizeof(Td) * 8; 2283 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>(); 2284 2285 bool skipped; 2286 2287 TestByElementNEON_Helper(helper, 2288 reinterpret_cast<uintptr_t>(inputs_d), 2289 reinterpret_cast<uintptr_t>(inputs_n), 2290 inputs_n_length, 2291 reinterpret_cast<uintptr_t>(inputs_m), 2292 inputs_m_length, 2293 indices, 2294 indices_length, 2295 reinterpret_cast<uintptr_t>(results), 2296 vd_form, 2297 vn_form, 2298 vm_form, 2299 vm_subvector_count, 2300 &skipped); 2301 2302 if (Test::generate_test_trace()) { 2303 // Print the results. 2304 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 2305 for (unsigned iteration = 0; iteration < results_length; iteration++) { 2306 printf(" "); 2307 // Output a separate result for each element of the result vector. 2308 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2309 unsigned index = lane + (iteration * vd_lane_count); 2310 printf(" 0x%0*" PRIx64 ",", 2311 lane_len_in_hex, 2312 static_cast<uint64_t>(results[index])); 2313 } 2314 printf("\n"); 2315 } 2316 2317 printf("};\n"); 2318 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 2319 name, 2320 results_length); 2321 } else if (!skipped) { 2322 // Check the results. 2323 VIXL_CHECK(expected_length == results_length); 2324 unsigned error_count = 0; 2325 unsigned d = 0; 2326 const char* padding = " "; 2327 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 2328 for (unsigned n = 0; n < inputs_n_length; n++) { 2329 for (unsigned m = 0; m < inputs_m_length; m++) { 2330 for (unsigned index = 0; index < indices_length; index++, d++) { 2331 bool error_in_vector = false; 2332 2333 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2334 unsigned output_index = 2335 (n * inputs_m_length * indices_length * vd_lane_count) + 2336 (m * indices_length * vd_lane_count) + (index * vd_lane_count) + 2337 lane; 2338 2339 if (results[output_index] != expected[output_index]) { 2340 error_in_vector = true; 2341 break; 2342 } 2343 } 2344 2345 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 2346 printf("%s\n", name); 2347 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n", 2348 lane_len_in_hex + 1, 2349 padding, 2350 lane_len_in_hex + 1, 2351 padding, 2352 lane_len_in_hex + 1, 2353 padding, 2354 lane_len_in_hex + 1, 2355 padding); 2356 2357 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2358 unsigned output_index = 2359 (n * inputs_m_length * indices_length * vd_lane_count) + 2360 (m * indices_length * vd_lane_count) + 2361 (index * vd_lane_count) + lane; 2362 unsigned input_index_n = 2363 (inputs_n_length - vd_lane_count + n + 1 + lane) % 2364 inputs_n_length; 2365 unsigned input_index_m = 2366 (inputs_m_length - vd_lane_count + m + 1 + lane) % 2367 inputs_m_length; 2368 2369 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 2370 " " 2371 "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", 2372 results[output_index] != expected[output_index] ? '*' 2373 : ' ', 2374 lane_len_in_hex, 2375 static_cast<uint64_t>(inputs_d[lane]), 2376 lane_len_in_hex, 2377 static_cast<uint64_t>(inputs_n[input_index_n]), 2378 lane_len_in_hex, 2379 static_cast<uint64_t>(inputs_m[input_index_m]), 2380 indices[index], 2381 lane_len_in_hex, 2382 static_cast<uint64_t>(results[output_index]), 2383 lane_len_in_hex, 2384 static_cast<uint64_t>(expected[output_index])); 2385 } 2386 } 2387 } 2388 } 2389 } 2390 VIXL_ASSERT(d == expected_length); 2391 if (error_count > kErrorReportLimit) { 2392 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 2393 } 2394 VIXL_CHECK(error_count == 0); 2395 } 2396 delete[] results; 2397} 2398 2399 2400// ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ==== 2401 2402 2403template <typename Tm> 2404void Test2OpImmNEON_Helper( 2405 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper, 2406 uintptr_t inputs_n, 2407 unsigned inputs_n_length, 2408 const Tm inputs_m[], 2409 unsigned inputs_m_length, 2410 uintptr_t results, 2411 VectorFormat vd_form, 2412 VectorFormat vn_form, 2413 bool* skipped) { 2414 VIXL_ASSERT(vd_form != kFormatUndefined && vn_form != kFormatUndefined); 2415 2416 SETUP_WITH_FEATURES(CPUFeatures::kNEON, 2417 CPUFeatures::kFP, 2418 CPUFeatures::kNEONHalf); 2419 START(); 2420 2421 // Roll up the loop to keep the code size down. 2422 Label loop_n; 2423 2424 Register out = x0; 2425 Register inputs_n_base = x1; 2426 Register inputs_n_last_16bytes = x3; 2427 Register index_n = x5; 2428 2429 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 2430 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 2431 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 2432 2433 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 2434 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 2435 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 2436 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 2437 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 2438 2439 2440 // These will be either a D- or a Q-register form, with a single lane 2441 // (for use in scalar load and store operations). 2442 VRegister vd = VRegister(0, vd_bits); 2443 VRegister vn = v1.V16B(); 2444 VRegister vntmp = v3.V16B(); 2445 2446 // These will have the correct format for use when calling 'helper'. 2447 VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count); 2448 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 2449 2450 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 2451 VRegister vntmp_single = VRegister(3, vn_lane_bits); 2452 2453 __ Mov(out, results); 2454 2455 __ Mov(inputs_n_base, inputs_n); 2456 __ Mov(inputs_n_last_16bytes, 2457 inputs_n + (vn_lane_bytes * inputs_n_length) - 16); 2458 2459 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); 2460 2461 __ Mov(index_n, 0); 2462 __ Bind(&loop_n); 2463 2464 __ Ldr(vntmp_single, 2465 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); 2466 __ Ext(vn, vn, vntmp, vn_lane_bytes); 2467 2468 // Set the destination to zero for tests such as '[r]shrn2'. 2469 // TODO: Setting the destination to values other than zero might be a better 2470 // test for shift and accumulate instructions (srsra/ssra/usra/ursra). 2471 __ Movi(vd.V16B(), 0); 2472 2473 { 2474 for (unsigned i = 0; i < inputs_m_length; i++) { 2475 { 2476 SingleEmissionCheckScope guard(&masm); 2477 (masm.*helper)(vd_helper, vn_helper, inputs_m[i]); 2478 } 2479 __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex)); 2480 } 2481 } 2482 2483 __ Add(index_n, index_n, 1); 2484 __ Cmp(index_n, inputs_n_length); 2485 __ B(lo, &loop_n); 2486 2487 END(); 2488 TRY_RUN(skipped); 2489} 2490 2491 2492// Test NEON instructions. The inputs_*[] and expected[] arrays should be 2493// arrays of rawbit representation of input values. This ensures that 2494// exact bit comparisons can be performed. 2495template <typename Td, typename Tn, typename Tm> 2496static void Test2OpImmNEON( 2497 const char* name, 2498 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper, 2499 const Tn inputs_n[], 2500 unsigned inputs_n_length, 2501 const Tm inputs_m[], 2502 unsigned inputs_m_length, 2503 const Td expected[], 2504 unsigned expected_length, 2505 VectorFormat vd_form, 2506 VectorFormat vn_form) { 2507 VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0); 2508 2509 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 2510 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 2511 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 2512 2513 const unsigned results_length = inputs_n_length * inputs_m_length; 2514 Td* results = new Td[results_length * vd_lane_count]; 2515 const unsigned lane_bit = sizeof(Td) * 8; 2516 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); 2517 2518 bool skipped; 2519 2520 Test2OpImmNEON_Helper(helper, 2521 reinterpret_cast<uintptr_t>(inputs_n), 2522 inputs_n_length, 2523 inputs_m, 2524 inputs_m_length, 2525 reinterpret_cast<uintptr_t>(results), 2526 vd_form, 2527 vn_form, 2528 &skipped); 2529 2530 if (Test::generate_test_trace()) { 2531 // Print the results. 2532 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 2533 for (unsigned iteration = 0; iteration < results_length; iteration++) { 2534 printf(" "); 2535 // Output a separate result for each element of the result vector. 2536 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2537 unsigned index = lane + (iteration * vd_lane_count); 2538 printf(" 0x%0*" PRIx64 ",", 2539 lane_len_in_hex, 2540 static_cast<uint64_t>(results[index])); 2541 } 2542 printf("\n"); 2543 } 2544 2545 printf("};\n"); 2546 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 2547 name, 2548 results_length); 2549 } else if (!skipped) { 2550 // Check the results. 2551 VIXL_CHECK(expected_length == results_length); 2552 unsigned error_count = 0; 2553 unsigned d = 0; 2554 const char* padding = " "; 2555 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 2556 for (unsigned n = 0; n < inputs_n_length; n++) { 2557 for (unsigned m = 0; m < inputs_m_length; m++, d++) { 2558 bool error_in_vector = false; 2559 2560 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2561 unsigned output_index = (n * inputs_m_length * vd_lane_count) + 2562 (m * vd_lane_count) + lane; 2563 2564 if (results[output_index] != expected[output_index]) { 2565 error_in_vector = true; 2566 break; 2567 } 2568 } 2569 2570 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 2571 printf("%s\n", name); 2572 printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n", 2573 lane_len_in_hex + 1, 2574 padding, 2575 lane_len_in_hex, 2576 padding, 2577 lane_len_in_hex + 1, 2578 padding); 2579 2580 const unsigned first_index_n = 2581 inputs_n_length - (16 / vn_lane_bytes) + n + 1; 2582 2583 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count); 2584 lane++) { 2585 unsigned output_index = (n * inputs_m_length * vd_lane_count) + 2586 (m * vd_lane_count) + lane; 2587 unsigned input_index_n = (first_index_n + lane) % inputs_n_length; 2588 unsigned input_index_m = m; 2589 2590 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 2591 " " 2592 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", 2593 results[output_index] != expected[output_index] ? '*' : ' ', 2594 lane_len_in_hex, 2595 static_cast<uint64_t>(inputs_n[input_index_n]), 2596 lane_len_in_hex, 2597 static_cast<uint64_t>(inputs_m[input_index_m]), 2598 lane_len_in_hex, 2599 static_cast<uint64_t>(results[output_index]), 2600 lane_len_in_hex, 2601 static_cast<uint64_t>(expected[output_index])); 2602 } 2603 } 2604 } 2605 } 2606 VIXL_ASSERT(d == expected_length); 2607 if (error_count > kErrorReportLimit) { 2608 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 2609 } 2610 VIXL_CHECK(error_count == 0); 2611 } 2612 delete[] results; 2613} 2614 2615 2616// ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ==== 2617 2618 2619static void TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper, 2620 uintptr_t inputs_d, 2621 const int inputs_imm1[], 2622 unsigned inputs_imm1_length, 2623 uintptr_t inputs_n, 2624 unsigned inputs_n_length, 2625 const int inputs_imm2[], 2626 unsigned inputs_imm2_length, 2627 uintptr_t results, 2628 VectorFormat vd_form, 2629 VectorFormat vn_form, 2630 bool* skipped) { 2631 VIXL_ASSERT(vd_form != kFormatUndefined); 2632 VIXL_ASSERT(vn_form != kFormatUndefined); 2633 2634 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP); 2635 START(); 2636 2637 // Roll up the loop to keep the code size down. 2638 Label loop_n; 2639 2640 Register out = x0; 2641 Register inputs_d_base = x1; 2642 Register inputs_n_base = x2; 2643 Register inputs_n_last_vector = x4; 2644 Register index_n = x6; 2645 2646 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 2647 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 2648 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 2649 2650 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 2651 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 2652 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 2653 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 2654 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 2655 2656 2657 // These will be either a D- or a Q-register form, with a single lane 2658 // (for use in scalar load and store operations). 2659 VRegister vd = VRegister(0, vd_bits); 2660 VRegister vn = VRegister(1, vn_bits); 2661 VRegister vntmp = VRegister(4, vn_bits); 2662 VRegister vres = VRegister(5, vn_bits); 2663 2664 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 2665 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count); 2666 2667 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 2668 VRegister vntmp_single = VRegister(4, vn_lane_bits); 2669 2670 // Same registers for use in the 'ext' instructions. 2671 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B(); 2672 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B(); 2673 2674 __ Mov(out, results); 2675 2676 __ Mov(inputs_d_base, inputs_d); 2677 2678 __ Mov(inputs_n_base, inputs_n); 2679 __ Mov(inputs_n_last_vector, 2680 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count)); 2681 2682 __ Ldr(vd, MemOperand(inputs_d_base)); 2683 2684 __ Ldr(vn, MemOperand(inputs_n_last_vector)); 2685 2686 __ Mov(index_n, 0); 2687 __ Bind(&loop_n); 2688 2689 __ Ldr(vntmp_single, 2690 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); 2691 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes); 2692 2693 { 2694 EmissionCheckScope guard(&masm, 2695 kInstructionSize * inputs_imm1_length * 2696 inputs_imm2_length * 3); 2697 for (unsigned i = 0; i < inputs_imm1_length; i++) { 2698 for (unsigned j = 0; j < inputs_imm2_length; j++) { 2699 __ Mov(vres, vd); 2700 (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]); 2701 __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex)); 2702 } 2703 } 2704 } 2705 2706 __ Add(index_n, index_n, 1); 2707 __ Cmp(index_n, inputs_n_length); 2708 __ B(lo, &loop_n); 2709 2710 END(); 2711 TRY_RUN(skipped); 2712} 2713 2714 2715// Test NEON instructions. The inputs_*[] and expected[] arrays should be 2716// arrays of rawbit representation of input values. This ensures that 2717// exact bit comparisons can be performed. 2718template <typename Td, typename Tn> 2719static void TestOpImmOpImmNEON(const char* name, 2720 TestOpImmOpImmVdUpdateNEONHelper_t helper, 2721 const Td inputs_d[], 2722 const int inputs_imm1[], 2723 unsigned inputs_imm1_length, 2724 const Tn inputs_n[], 2725 unsigned inputs_n_length, 2726 const int inputs_imm2[], 2727 unsigned inputs_imm2_length, 2728 const Td expected[], 2729 unsigned expected_length, 2730 VectorFormat vd_form, 2731 VectorFormat vn_form) { 2732 VIXL_ASSERT(inputs_n_length > 0); 2733 VIXL_ASSERT(inputs_imm1_length > 0); 2734 VIXL_ASSERT(inputs_imm2_length > 0); 2735 2736 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 2737 2738 const unsigned results_length = 2739 inputs_n_length * inputs_imm1_length * inputs_imm2_length; 2740 2741 Td* results = new Td[results_length * vd_lane_count]; 2742 const unsigned lane_bit = sizeof(Td) * 8; 2743 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); 2744 2745 bool skipped; 2746 2747 TestOpImmOpImmNEON_Helper(helper, 2748 reinterpret_cast<uintptr_t>(inputs_d), 2749 inputs_imm1, 2750 inputs_imm1_length, 2751 reinterpret_cast<uintptr_t>(inputs_n), 2752 inputs_n_length, 2753 inputs_imm2, 2754 inputs_imm2_length, 2755 reinterpret_cast<uintptr_t>(results), 2756 vd_form, 2757 vn_form, 2758 &skipped); 2759 2760 if (Test::generate_test_trace()) { 2761 // Print the results. 2762 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 2763 for (unsigned iteration = 0; iteration < results_length; iteration++) { 2764 printf(" "); 2765 // Output a separate result for each element of the result vector. 2766 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2767 unsigned index = lane + (iteration * vd_lane_count); 2768 printf(" 0x%0*" PRIx64 ",", 2769 lane_len_in_hex, 2770 static_cast<uint64_t>(results[index])); 2771 } 2772 printf("\n"); 2773 } 2774 2775 printf("};\n"); 2776 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 2777 name, 2778 results_length); 2779 } else if (!skipped) { 2780 // Check the results. 2781 VIXL_CHECK(expected_length == results_length); 2782 unsigned error_count = 0; 2783 unsigned counted_length = 0; 2784 const char* padding = " "; 2785 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 2786 for (unsigned n = 0; n < inputs_n_length; n++) { 2787 for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) { 2788 for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) { 2789 bool error_in_vector = false; 2790 2791 counted_length++; 2792 2793 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2794 unsigned output_index = 2795 (n * inputs_imm1_length * inputs_imm2_length * vd_lane_count) + 2796 (imm1 * inputs_imm2_length * vd_lane_count) + 2797 (imm2 * vd_lane_count) + lane; 2798 2799 if (results[output_index] != expected[output_index]) { 2800 error_in_vector = true; 2801 break; 2802 } 2803 } 2804 2805 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 2806 printf("%s\n", name); 2807 printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n", 2808 lane_len_in_hex + 1, 2809 padding, 2810 lane_len_in_hex, 2811 padding, 2812 lane_len_in_hex + 1, 2813 padding, 2814 lane_len_in_hex, 2815 padding, 2816 lane_len_in_hex + 1, 2817 padding); 2818 2819 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2820 unsigned output_index = 2821 (n * inputs_imm1_length * inputs_imm2_length * 2822 vd_lane_count) + 2823 (imm1 * inputs_imm2_length * vd_lane_count) + 2824 (imm2 * vd_lane_count) + lane; 2825 unsigned input_index_n = 2826 (inputs_n_length - vd_lane_count + n + 1 + lane) % 2827 inputs_n_length; 2828 unsigned input_index_imm1 = imm1; 2829 unsigned input_index_imm2 = imm2; 2830 2831 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 2832 " " 2833 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", 2834 results[output_index] != expected[output_index] ? '*' 2835 : ' ', 2836 lane_len_in_hex, 2837 static_cast<uint64_t>(inputs_d[lane]), 2838 lane_len_in_hex, 2839 static_cast<uint64_t>(inputs_imm1[input_index_imm1]), 2840 lane_len_in_hex, 2841 static_cast<uint64_t>(inputs_n[input_index_n]), 2842 lane_len_in_hex, 2843 static_cast<uint64_t>(inputs_imm2[input_index_imm2]), 2844 lane_len_in_hex, 2845 static_cast<uint64_t>(results[output_index]), 2846 lane_len_in_hex, 2847 static_cast<uint64_t>(expected[output_index])); 2848 } 2849 } 2850 } 2851 } 2852 } 2853 VIXL_ASSERT(counted_length == expected_length); 2854 if (error_count > kErrorReportLimit) { 2855 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 2856 } 2857 VIXL_CHECK(error_count == 0); 2858 } 2859 delete[] results; 2860} 2861 2862 2863// ==== Floating-point tests. ==== 2864 2865 2866// Standard floating-point test expansion for both double- and single-precision 2867// operations. 2868#define STRINGIFY(s) #s 2869 2870#define CALL_TEST_FP_HELPER(mnemonic, variant, type, input) \ 2871 Test##type(STRINGIFY(mnemonic) "_" STRINGIFY(variant), \ 2872 &MacroAssembler::mnemonic, \ 2873 input, \ 2874 sizeof(input) / sizeof(input[0]), \ 2875 kExpected_##mnemonic##_##variant, \ 2876 kExpectedCount_##mnemonic##_##variant) 2877 2878#define DEFINE_TEST_FP(mnemonic, type, input) \ 2879 TEST(mnemonic##_d) { \ 2880 CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input); \ 2881 } \ 2882 TEST(mnemonic##_s) { \ 2883 CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input); \ 2884 } 2885 2886#define DEFINE_TEST_FP_FP16(mnemonic, type, input) \ 2887 TEST(mnemonic##_d) { \ 2888 CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input); \ 2889 } \ 2890 TEST(mnemonic##_s) { \ 2891 CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input); \ 2892 } \ 2893 TEST(mnemonic##_h) { \ 2894 CALL_TEST_FP_HELPER(mnemonic, h, type, kInputFloat16##input); \ 2895 } 2896 2897 2898// TODO: Test with a newer version of valgrind. 2899// 2900// Note: valgrind-3.10.0 does not properly interpret libm's fma() on x86_64. 2901// Therefore this test will be exiting though an ASSERT and thus leaking 2902// memory. 2903DEFINE_TEST_FP_FP16(fmadd, 3Op, Basic) 2904DEFINE_TEST_FP_FP16(fmsub, 3Op, Basic) 2905DEFINE_TEST_FP_FP16(fnmadd, 3Op, Basic) 2906DEFINE_TEST_FP_FP16(fnmsub, 3Op, Basic) 2907 2908DEFINE_TEST_FP_FP16(fadd, 2Op, Basic) 2909DEFINE_TEST_FP_FP16(fdiv, 2Op, Basic) 2910DEFINE_TEST_FP_FP16(fmax, 2Op, Basic) 2911DEFINE_TEST_FP_FP16(fmaxnm, 2Op, Basic) 2912DEFINE_TEST_FP_FP16(fmin, 2Op, Basic) 2913DEFINE_TEST_FP_FP16(fminnm, 2Op, Basic) 2914DEFINE_TEST_FP_FP16(fmul, 2Op, Basic) 2915DEFINE_TEST_FP_FP16(fsub, 2Op, Basic) 2916DEFINE_TEST_FP_FP16(fnmul, 2Op, Basic) 2917 2918DEFINE_TEST_FP_FP16(fabs, 1Op, Basic) 2919DEFINE_TEST_FP_FP16(fmov, 1Op, Basic) 2920DEFINE_TEST_FP_FP16(fneg, 1Op, Basic) 2921DEFINE_TEST_FP_FP16(fsqrt, 1Op, Basic) 2922DEFINE_TEST_FP(frint32x, 1Op, Conversions) 2923DEFINE_TEST_FP(frint64x, 1Op, Conversions) 2924DEFINE_TEST_FP(frint32z, 1Op, Conversions) 2925DEFINE_TEST_FP(frint64z, 1Op, Conversions) 2926DEFINE_TEST_FP_FP16(frinta, 1Op, Conversions) 2927DEFINE_TEST_FP_FP16(frinti, 1Op, Conversions) 2928DEFINE_TEST_FP_FP16(frintm, 1Op, Conversions) 2929DEFINE_TEST_FP_FP16(frintn, 1Op, Conversions) 2930DEFINE_TEST_FP_FP16(frintp, 1Op, Conversions) 2931DEFINE_TEST_FP_FP16(frintx, 1Op, Conversions) 2932DEFINE_TEST_FP_FP16(frintz, 1Op, Conversions) 2933 2934TEST(fcmp_d) { CALL_TEST_FP_HELPER(fcmp, d, Cmp, kInputDoubleBasic); } 2935TEST(fcmp_s) { CALL_TEST_FP_HELPER(fcmp, s, Cmp, kInputFloatBasic); } 2936TEST(fcmp_dz) { CALL_TEST_FP_HELPER(fcmp, dz, CmpZero, kInputDoubleBasic); } 2937TEST(fcmp_sz) { CALL_TEST_FP_HELPER(fcmp, sz, CmpZero, kInputFloatBasic); } 2938 2939TEST(fcvt_sd) { CALL_TEST_FP_HELPER(fcvt, sd, 1Op, kInputDoubleConversions); } 2940TEST(fcvt_ds) { CALL_TEST_FP_HELPER(fcvt, ds, 1Op, kInputFloatConversions); } 2941 2942#define DEFINE_TEST_FP_TO_INT(mnemonic, type, input) \ 2943 TEST(mnemonic##_xd) { \ 2944 CALL_TEST_FP_HELPER(mnemonic, xd, type, kInputDouble##input); \ 2945 } \ 2946 TEST(mnemonic##_xs) { \ 2947 CALL_TEST_FP_HELPER(mnemonic, xs, type, kInputFloat##input); \ 2948 } \ 2949 TEST(mnemonic##_xh) { \ 2950 CALL_TEST_FP_HELPER(mnemonic, xh, type, kInputFloat16##input); \ 2951 } \ 2952 TEST(mnemonic##_wd) { \ 2953 CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \ 2954 } \ 2955 TEST(mnemonic##_ws) { \ 2956 CALL_TEST_FP_HELPER(mnemonic, ws, type, kInputFloat##input); \ 2957 } \ 2958 TEST(mnemonic##_wh) { \ 2959 CALL_TEST_FP_HELPER(mnemonic, wh, type, kInputFloat16##input); \ 2960 } 2961 2962DEFINE_TEST_FP_TO_INT(fcvtas, FPToS, Conversions) 2963DEFINE_TEST_FP_TO_INT(fcvtau, FPToU, Conversions) 2964DEFINE_TEST_FP_TO_INT(fcvtms, FPToS, Conversions) 2965DEFINE_TEST_FP_TO_INT(fcvtmu, FPToU, Conversions) 2966DEFINE_TEST_FP_TO_INT(fcvtns, FPToS, Conversions) 2967DEFINE_TEST_FP_TO_INT(fcvtnu, FPToU, Conversions) 2968DEFINE_TEST_FP_TO_INT(fcvtzs, FPToFixedS, Conversions) 2969DEFINE_TEST_FP_TO_INT(fcvtzu, FPToFixedU, Conversions) 2970 2971#define DEFINE_TEST_FP_TO_JS_INT(mnemonic, type, input) \ 2972 TEST(mnemonic##_wd) { \ 2973 CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \ 2974 } 2975 2976DEFINE_TEST_FP_TO_JS_INT(fjcvtzs, FPToS, Conversions) 2977 2978// TODO: Scvtf-fixed-point 2979// TODO: Scvtf-integer 2980// TODO: Ucvtf-fixed-point 2981// TODO: Ucvtf-integer 2982 2983// TODO: Fccmp 2984// TODO: Fcsel 2985 2986 2987// ==== NEON Tests. ==== 2988 2989#define CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) \ 2990 Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \ 2991 &MacroAssembler::mnemonic, \ 2992 input_n, \ 2993 (sizeof(input_n) / sizeof(input_n[0])), \ 2994 kExpected_NEON_##mnemonic##_##vdform, \ 2995 kExpectedCount_NEON_##mnemonic##_##vdform, \ 2996 kFormat##vdform, \ 2997 kFormat##vnform) 2998 2999#define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vdform, vnform, input_n) \ 3000 Test1OpAcrossNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \ 3001 vnform), \ 3002 &MacroAssembler::mnemonic, \ 3003 input_n, \ 3004 (sizeof(input_n) / sizeof(input_n[0])), \ 3005 kExpected_NEON_##mnemonic##_##vdform##_##vnform, \ 3006 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform, \ 3007 kFormat##vdform, \ 3008 kFormat##vnform) 3009 3010#define CALL_TEST_NEON_HELPER_2Op(mnemonic, \ 3011 vdform, \ 3012 vnform, \ 3013 vmform, \ 3014 input_d, \ 3015 input_n, \ 3016 input_m) \ 3017 Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \ 3018 &MacroAssembler::mnemonic, \ 3019 input_d, \ 3020 input_n, \ 3021 (sizeof(input_n) / sizeof(input_n[0])), \ 3022 input_m, \ 3023 (sizeof(input_m) / sizeof(input_m[0])), \ 3024 kExpected_NEON_##mnemonic##_##vdform, \ 3025 kExpectedCount_NEON_##mnemonic##_##vdform, \ 3026 kFormat##vdform, \ 3027 kFormat##vnform, \ 3028 kFormat##vmform) 3029 3030#define CALL_TEST_NEON_HELPER_2OpImm(mnemonic, \ 3031 vdform, \ 3032 vnform, \ 3033 input_n, \ 3034 input_m) \ 3035 Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM", \ 3036 &MacroAssembler::mnemonic, \ 3037 input_n, \ 3038 (sizeof(input_n) / sizeof(input_n[0])), \ 3039 input_m, \ 3040 (sizeof(input_m) / sizeof(input_m[0])), \ 3041 kExpected_NEON_##mnemonic##_##vdform##_2OPIMM, \ 3042 kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM, \ 3043 kFormat##vdform, \ 3044 kFormat##vnform) 3045 3046#define CALL_TEST_NEON_HELPER_ByElement(mnemonic, \ 3047 vdform, \ 3048 vnform, \ 3049 vmform, \ 3050 input_d, \ 3051 input_n, \ 3052 input_m, \ 3053 indices) \ 3054 TestByElementNEON( \ 3055 STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \ 3056 vnform) "_" STRINGIFY(vmform), \ 3057 &MacroAssembler::mnemonic, \ 3058 input_d, \ 3059 input_n, \ 3060 (sizeof(input_n) / sizeof(input_n[0])), \ 3061 input_m, \ 3062 (sizeof(input_m) / sizeof(input_m[0])), \ 3063 indices, \ 3064 (sizeof(indices) / sizeof(indices[0])), \ 3065 kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \ 3066 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \ 3067 kFormat##vdform, \ 3068 kFormat##vnform, \ 3069 kFormat##vmform) 3070 3071#define CALL_TEST_NEON_HELPER_ByElement_Dot_Product(mnemonic, \ 3072 vdform, \ 3073 vnform, \ 3074 vmform, \ 3075 input_d, \ 3076 input_n, \ 3077 input_m, \ 3078 indices, \ 3079 vm_subvector_count) \ 3080 TestByElementNEON( \ 3081 STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \ 3082 vnform) "_" STRINGIFY(vmform), \ 3083 &MacroAssembler::mnemonic, \ 3084 input_d, \ 3085 input_n, \ 3086 (sizeof(input_n) / sizeof(input_n[0])), \ 3087 input_m, \ 3088 (sizeof(input_m) / sizeof(input_m[0])), \ 3089 indices, \ 3090 (sizeof(indices) / sizeof(indices[0])), \ 3091 kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \ 3092 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \ 3093 kFormat##vdform, \ 3094 kFormat##vnform, \ 3095 kFormat##vmform, \ 3096 vm_subvector_count) 3097 3098#define CALL_TEST_NEON_HELPER_OpImmOpImm(helper, \ 3099 mnemonic, \ 3100 vdform, \ 3101 vnform, \ 3102 input_d, \ 3103 input_imm1, \ 3104 input_n, \ 3105 input_imm2) \ 3106 TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \ 3107 helper, \ 3108 input_d, \ 3109 input_imm1, \ 3110 (sizeof(input_imm1) / sizeof(input_imm1[0])), \ 3111 input_n, \ 3112 (sizeof(input_n) / sizeof(input_n[0])), \ 3113 input_imm2, \ 3114 (sizeof(input_imm2) / sizeof(input_imm2[0])), \ 3115 kExpected_NEON_##mnemonic##_##vdform, \ 3116 kExpectedCount_NEON_##mnemonic##_##vdform, \ 3117 kFormat##vdform, \ 3118 kFormat##vnform) 3119 3120#define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input) \ 3121 CALL_TEST_NEON_HELPER_1Op(mnemonic, variant, variant, input) 3122 3123#define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \ 3124 TEST(mnemonic##_8B) { \ 3125 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input); \ 3126 } \ 3127 TEST(mnemonic##_16B) { \ 3128 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input); \ 3129 } 3130 3131#define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input) \ 3132 TEST(mnemonic##_4H) { \ 3133 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input); \ 3134 } \ 3135 TEST(mnemonic##_8H) { \ 3136 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input); \ 3137 } 3138 3139#define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \ 3140 TEST(mnemonic##_2S) { \ 3141 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input); \ 3142 } \ 3143 TEST(mnemonic##_4S) { \ 3144 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input); \ 3145 } 3146 3147#define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \ 3148 DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \ 3149 DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input) 3150 3151#define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \ 3152 DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \ 3153 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) 3154 3155#define DEFINE_TEST_NEON_2SAME(mnemonic, input) \ 3156 DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \ 3157 TEST(mnemonic##_2D) { \ 3158 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \ 3159 } 3160#define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input) \ 3161 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \ 3162 TEST(mnemonic##_2D) { \ 3163 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \ 3164 } 3165 3166#define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input) \ 3167 TEST(mnemonic##_2S) { \ 3168 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input); \ 3169 } \ 3170 TEST(mnemonic##_4S) { \ 3171 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input); \ 3172 } \ 3173 TEST(mnemonic##_2D) { \ 3174 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input); \ 3175 } 3176 3177#define DEFINE_TEST_NEON_2SAME_FP_FP16(mnemonic, input) \ 3178 DEFINE_TEST_NEON_2SAME_FP(mnemonic, input) \ 3179 TEST(mnemonic##_4H) { \ 3180 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInputFloat16##input); \ 3181 } \ 3182 TEST(mnemonic##_8H) { \ 3183 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInputFloat16##input); \ 3184 } 3185 3186#define DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(mnemonic, input) \ 3187 TEST(mnemonic##_H) { \ 3188 CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInputFloat16##input); \ 3189 } \ 3190 TEST(mnemonic##_S) { \ 3191 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input); \ 3192 } \ 3193 TEST(mnemonic##_D) { \ 3194 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input); \ 3195 } 3196 3197#define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \ 3198 TEST(mnemonic##_B) { \ 3199 CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input); \ 3200 } 3201#define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \ 3202 TEST(mnemonic##_H) { \ 3203 CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input); \ 3204 } 3205#define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ 3206 TEST(mnemonic##_S) { \ 3207 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input); \ 3208 } 3209#define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) \ 3210 TEST(mnemonic##_D) { \ 3211 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input); \ 3212 } 3213 3214#define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input) \ 3215 DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \ 3216 DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \ 3217 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ 3218 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) 3219 3220#define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input) \ 3221 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ 3222 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) 3223 3224 3225#define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n) \ 3226 CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vd_form, vn_form, input_n) 3227 3228#define DEFINE_TEST_NEON_ACROSS(mnemonic, input) \ 3229 TEST(mnemonic##_B_8B) { \ 3230 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input); \ 3231 } \ 3232 TEST(mnemonic##_B_16B) { \ 3233 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input); \ 3234 } \ 3235 TEST(mnemonic##_H_4H) { \ 3236 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input); \ 3237 } \ 3238 TEST(mnemonic##_H_8H) { \ 3239 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input); \ 3240 } \ 3241 TEST(mnemonic##_S_4S) { \ 3242 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input); \ 3243 } 3244 3245#define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input) \ 3246 TEST(mnemonic##_H_8B) { \ 3247 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input); \ 3248 } \ 3249 TEST(mnemonic##_H_16B) { \ 3250 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input); \ 3251 } \ 3252 TEST(mnemonic##_S_4H) { \ 3253 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input); \ 3254 } \ 3255 TEST(mnemonic##_S_8H) { \ 3256 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input); \ 3257 } \ 3258 TEST(mnemonic##_D_4S) { \ 3259 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input); \ 3260 } 3261 3262#define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input) \ 3263 TEST(mnemonic##_H_4H) { \ 3264 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInputFloat16##input); \ 3265 } \ 3266 TEST(mnemonic##_H_8H) { \ 3267 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInputFloat16##input); \ 3268 } \ 3269 TEST(mnemonic##_S_4S) { \ 3270 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input); \ 3271 } 3272 3273#define CALL_TEST_NEON_HELPER_2DIFF(mnemonic, vdform, vnform, input_n) \ 3274 CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) 3275 3276#define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input) \ 3277 TEST(mnemonic##_4H) { \ 3278 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input); \ 3279 } \ 3280 TEST(mnemonic##_8H) { \ 3281 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input); \ 3282 } \ 3283 TEST(mnemonic##_2S) { \ 3284 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input); \ 3285 } \ 3286 TEST(mnemonic##_4S) { \ 3287 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input); \ 3288 } \ 3289 TEST(mnemonic##_1D) { \ 3290 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input); \ 3291 } \ 3292 TEST(mnemonic##_2D) { \ 3293 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input); \ 3294 } 3295 3296#define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input) \ 3297 TEST(mnemonic##_8B) { \ 3298 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input); \ 3299 } \ 3300 TEST(mnemonic##_4H) { \ 3301 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input); \ 3302 } \ 3303 TEST(mnemonic##_2S) { \ 3304 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input); \ 3305 } \ 3306 TEST(mnemonic##2_16B) { \ 3307 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input); \ 3308 } \ 3309 TEST(mnemonic##2_8H) { \ 3310 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input); \ 3311 } \ 3312 TEST(mnemonic##2_4S) { \ 3313 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input); \ 3314 } 3315 3316#define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input) \ 3317 TEST(mnemonic##_4S) { \ 3318 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input); \ 3319 } \ 3320 TEST(mnemonic##_2D) { \ 3321 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input); \ 3322 } \ 3323 TEST(mnemonic##2_4S) { \ 3324 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input); \ 3325 } \ 3326 TEST(mnemonic##2_2D) { \ 3327 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input); \ 3328 } 3329 3330#define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input) \ 3331 TEST(mnemonic##_4H) { \ 3332 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input); \ 3333 } \ 3334 TEST(mnemonic##_2S) { \ 3335 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \ 3336 } \ 3337 TEST(mnemonic##2_8H) { \ 3338 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input); \ 3339 } \ 3340 TEST(mnemonic##2_4S) { \ 3341 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \ 3342 } 3343 3344#define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input) \ 3345 TEST(mnemonic##_2S) { \ 3346 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \ 3347 } \ 3348 TEST(mnemonic##2_4S) { \ 3349 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \ 3350 } 3351 3352#define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input) \ 3353 TEST(mnemonic##_B) { \ 3354 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input); \ 3355 } \ 3356 TEST(mnemonic##_H) { \ 3357 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input); \ 3358 } \ 3359 TEST(mnemonic##_S) { \ 3360 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input); \ 3361 } 3362 3363#define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input) \ 3364 TEST(mnemonic##_S) { \ 3365 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input); \ 3366 } \ 3367 TEST(mnemonic##_D) { \ 3368 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input); \ 3369 } \ 3370 TEST(mnemonic##_H) { \ 3371 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, 2H, kInputFloat16##input); \ 3372 } 3373 3374#define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) \ 3375 { \ 3376 CALL_TEST_NEON_HELPER_2Op(mnemonic, \ 3377 variant, \ 3378 variant, \ 3379 variant, \ 3380 input_d, \ 3381 input_nm, \ 3382 input_nm); \ 3383 } 3384 3385#define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \ 3386 TEST(mnemonic##_8B) { \ 3387 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3388 8B, \ 3389 kInput8bitsAccDestination, \ 3390 kInput8bits##input); \ 3391 } \ 3392 TEST(mnemonic##_16B) { \ 3393 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3394 16B, \ 3395 kInput8bitsAccDestination, \ 3396 kInput8bits##input); \ 3397 } 3398 3399#define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input) \ 3400 TEST(mnemonic##_4H) { \ 3401 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3402 4H, \ 3403 kInput16bitsAccDestination, \ 3404 kInput16bits##input); \ 3405 } \ 3406 TEST(mnemonic##_8H) { \ 3407 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3408 8H, \ 3409 kInput16bitsAccDestination, \ 3410 kInput16bits##input); \ 3411 } \ 3412 TEST(mnemonic##_2S) { \ 3413 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3414 2S, \ 3415 kInput32bitsAccDestination, \ 3416 kInput32bits##input); \ 3417 } \ 3418 TEST(mnemonic##_4S) { \ 3419 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3420 4S, \ 3421 kInput32bitsAccDestination, \ 3422 kInput32bits##input); \ 3423 } 3424 3425#define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \ 3426 DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \ 3427 DEFINE_TEST_NEON_3SAME_HS(mnemonic, input) 3428 3429#define DEFINE_TEST_NEON_3SAME(mnemonic, input) \ 3430 DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \ 3431 TEST(mnemonic##_2D) { \ 3432 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3433 2D, \ 3434 kInput64bitsAccDestination, \ 3435 kInput64bits##input); \ 3436 } 3437 3438#define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input) \ 3439 TEST(mnemonic##_4H) { \ 3440 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3441 4H, \ 3442 kInputFloat16AccDestination, \ 3443 kInputFloat16##input); \ 3444 } \ 3445 TEST(mnemonic##_8H) { \ 3446 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3447 8H, \ 3448 kInputFloat16AccDestination, \ 3449 kInputFloat16##input); \ 3450 } \ 3451 TEST(mnemonic##_2S) { \ 3452 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3453 2S, \ 3454 kInputFloatAccDestination, \ 3455 kInputFloat##input); \ 3456 } \ 3457 TEST(mnemonic##_4S) { \ 3458 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3459 4S, \ 3460 kInputFloatAccDestination, \ 3461 kInputFloat##input); \ 3462 } \ 3463 TEST(mnemonic##_2D) { \ 3464 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3465 2D, \ 3466 kInputDoubleAccDestination, \ 3467 kInputDouble##input); \ 3468 } 3469 3470#define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input) \ 3471 TEST(mnemonic##_D) { \ 3472 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3473 D, \ 3474 kInput64bitsAccDestination, \ 3475 kInput64bits##input); \ 3476 } 3477 3478#define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input) \ 3479 TEST(mnemonic##_H) { \ 3480 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3481 H, \ 3482 kInput16bitsAccDestination, \ 3483 kInput16bits##input); \ 3484 } \ 3485 TEST(mnemonic##_S) { \ 3486 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3487 S, \ 3488 kInput32bitsAccDestination, \ 3489 kInput32bits##input); \ 3490 } 3491 3492#define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input) \ 3493 TEST(mnemonic##_B) { \ 3494 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3495 B, \ 3496 kInput8bitsAccDestination, \ 3497 kInput8bits##input); \ 3498 } \ 3499 TEST(mnemonic##_H) { \ 3500 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3501 H, \ 3502 kInput16bitsAccDestination, \ 3503 kInput16bits##input); \ 3504 } \ 3505 TEST(mnemonic##_S) { \ 3506 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3507 S, \ 3508 kInput32bitsAccDestination, \ 3509 kInput32bits##input); \ 3510 } \ 3511 TEST(mnemonic##_D) { \ 3512 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3513 D, \ 3514 kInput64bitsAccDestination, \ 3515 kInput64bits##input); \ 3516 } 3517 3518#define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input) \ 3519 TEST(mnemonic##_H) { \ 3520 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3521 H, \ 3522 kInputFloat16AccDestination, \ 3523 kInputFloat16##input); \ 3524 } \ 3525 TEST(mnemonic##_S) { \ 3526 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3527 S, \ 3528 kInputFloatAccDestination, \ 3529 kInputFloat##input); \ 3530 } \ 3531 TEST(mnemonic##_D) { \ 3532 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3533 D, \ 3534 kInputDoubleAccDestination, \ 3535 kInputDouble##input); \ 3536 } 3537 3538#define DEFINE_TEST_NEON_FHM(mnemonic, input_d, input_n, input_m) \ 3539 TEST(mnemonic##_2S) { \ 3540 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3541 2S, \ 3542 2H, \ 3543 2H, \ 3544 kInputFloatAccDestination, \ 3545 kInputFloat16##input_n, \ 3546 kInputFloat16##input_m); \ 3547 } \ 3548 TEST(mnemonic##_4S) { \ 3549 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3550 4S, \ 3551 4H, \ 3552 4H, \ 3553 kInputFloatAccDestination, \ 3554 kInputFloat16##input_n, \ 3555 kInputFloat16##input_m); \ 3556 } 3557 3558#define CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3559 vdform, \ 3560 vnform, \ 3561 vmform, \ 3562 input_d, \ 3563 input_n, \ 3564 input_m) \ 3565 { \ 3566 CALL_TEST_NEON_HELPER_2Op(mnemonic, \ 3567 vdform, \ 3568 vnform, \ 3569 vmform, \ 3570 input_d, \ 3571 input_n, \ 3572 input_m); \ 3573 } 3574 3575#define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \ 3576 TEST(mnemonic##_8H) { \ 3577 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3578 8H, \ 3579 8B, \ 3580 8B, \ 3581 kInput16bitsAccDestination, \ 3582 kInput8bits##input, \ 3583 kInput8bits##input); \ 3584 } \ 3585 TEST(mnemonic##2_8H) { \ 3586 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3587 8H, \ 3588 16B, \ 3589 16B, \ 3590 kInput16bitsAccDestination, \ 3591 kInput8bits##input, \ 3592 kInput8bits##input); \ 3593 } 3594 3595#define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ 3596 TEST(mnemonic##_4S) { \ 3597 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3598 4S, \ 3599 4H, \ 3600 4H, \ 3601 kInput32bitsAccDestination, \ 3602 kInput16bits##input, \ 3603 kInput16bits##input); \ 3604 } \ 3605 TEST(mnemonic##2_4S) { \ 3606 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3607 4S, \ 3608 8H, \ 3609 8H, \ 3610 kInput32bitsAccDestination, \ 3611 kInput16bits##input, \ 3612 kInput16bits##input); \ 3613 } 3614 3615#define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) \ 3616 TEST(mnemonic##_2D) { \ 3617 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3618 2D, \ 3619 2S, \ 3620 2S, \ 3621 kInput64bitsAccDestination, \ 3622 kInput32bits##input, \ 3623 kInput32bits##input); \ 3624 } \ 3625 TEST(mnemonic##2_2D) { \ 3626 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3627 2D, \ 3628 4S, \ 3629 4S, \ 3630 kInput64bitsAccDestination, \ 3631 kInput32bits##input, \ 3632 kInput32bits##input); \ 3633 } 3634 3635#define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input) \ 3636 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ 3637 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) 3638 3639#define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input) \ 3640 DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \ 3641 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ 3642 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) 3643 3644#define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \ 3645 TEST(mnemonic##_S) { \ 3646 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3647 S, \ 3648 H, \ 3649 H, \ 3650 kInput32bitsAccDestination, \ 3651 kInput16bits##input, \ 3652 kInput16bits##input); \ 3653 } 3654 3655#define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) \ 3656 TEST(mnemonic##_D) { \ 3657 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3658 D, \ 3659 S, \ 3660 S, \ 3661 kInput64bitsAccDestination, \ 3662 kInput32bits##input, \ 3663 kInput32bits##input); \ 3664 } 3665 3666#define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input) \ 3667 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \ 3668 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) 3669 3670#define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input) \ 3671 TEST(mnemonic##_8H) { \ 3672 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3673 8H, \ 3674 8H, \ 3675 8B, \ 3676 kInput16bitsAccDestination, \ 3677 kInput16bits##input, \ 3678 kInput8bits##input); \ 3679 } \ 3680 TEST(mnemonic##_4S) { \ 3681 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3682 4S, \ 3683 4S, \ 3684 4H, \ 3685 kInput32bitsAccDestination, \ 3686 kInput32bits##input, \ 3687 kInput16bits##input); \ 3688 } \ 3689 TEST(mnemonic##_2D) { \ 3690 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3691 2D, \ 3692 2D, \ 3693 2S, \ 3694 kInput64bitsAccDestination, \ 3695 kInput64bits##input, \ 3696 kInput32bits##input); \ 3697 } \ 3698 TEST(mnemonic##2_8H) { \ 3699 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3700 8H, \ 3701 8H, \ 3702 16B, \ 3703 kInput16bitsAccDestination, \ 3704 kInput16bits##input, \ 3705 kInput8bits##input); \ 3706 } \ 3707 TEST(mnemonic##2_4S) { \ 3708 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3709 4S, \ 3710 4S, \ 3711 8H, \ 3712 kInput32bitsAccDestination, \ 3713 kInput32bits##input, \ 3714 kInput16bits##input); \ 3715 } \ 3716 TEST(mnemonic##2_2D) { \ 3717 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3718 2D, \ 3719 2D, \ 3720 4S, \ 3721 kInput64bitsAccDestination, \ 3722 kInput64bits##input, \ 3723 kInput32bits##input); \ 3724 } 3725 3726#define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input) \ 3727 TEST(mnemonic##_8B) { \ 3728 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3729 8B, \ 3730 8H, \ 3731 8H, \ 3732 kInput8bitsAccDestination, \ 3733 kInput16bits##input, \ 3734 kInput16bits##input); \ 3735 } \ 3736 TEST(mnemonic##_4H) { \ 3737 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3738 4H, \ 3739 4S, \ 3740 4S, \ 3741 kInput16bitsAccDestination, \ 3742 kInput32bits##input, \ 3743 kInput32bits##input); \ 3744 } \ 3745 TEST(mnemonic##_2S) { \ 3746 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3747 2S, \ 3748 2D, \ 3749 2D, \ 3750 kInput32bitsAccDestination, \ 3751 kInput64bits##input, \ 3752 kInput64bits##input); \ 3753 } \ 3754 TEST(mnemonic##2_16B) { \ 3755 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3756 16B, \ 3757 8H, \ 3758 8H, \ 3759 kInput8bitsAccDestination, \ 3760 kInput16bits##input, \ 3761 kInput16bits##input); \ 3762 } \ 3763 TEST(mnemonic##2_8H) { \ 3764 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3765 8H, \ 3766 4S, \ 3767 4S, \ 3768 kInput16bitsAccDestination, \ 3769 kInput32bits##input, \ 3770 kInput32bits##input); \ 3771 } \ 3772 TEST(mnemonic##2_4S) { \ 3773 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3774 4S, \ 3775 2D, \ 3776 2D, \ 3777 kInput32bitsAccDestination, \ 3778 kInput64bits##input, \ 3779 kInput64bits##input); \ 3780 } 3781 3782#define DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(mnemonic, input) \ 3783 TEST(mnemonic##_2S) { \ 3784 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3785 2S, \ 3786 8B, \ 3787 8B, \ 3788 kInput32bitsAccDestination, \ 3789 kInput8bits##input, \ 3790 kInput8bits##input); \ 3791 } \ 3792 TEST(mnemonic##_4S) { \ 3793 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3794 4S, \ 3795 16B, \ 3796 16B, \ 3797 kInput32bitsAccDestination, \ 3798 kInput8bits##input, \ 3799 kInput8bits##input); \ 3800 } 3801 3802 3803#define CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3804 vdform, \ 3805 vnform, \ 3806 input_n, \ 3807 input_imm) \ 3808 { \ 3809 CALL_TEST_NEON_HELPER_2OpImm(mnemonic, \ 3810 vdform, \ 3811 vnform, \ 3812 input_n, \ 3813 input_imm); \ 3814 } 3815 3816#define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm) \ 3817 TEST(mnemonic##_8B_2OPIMM) { \ 3818 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3819 8B, \ 3820 8B, \ 3821 kInput8bits##input, \ 3822 kInput8bitsImm##input_imm); \ 3823 } \ 3824 TEST(mnemonic##_16B_2OPIMM) { \ 3825 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3826 16B, \ 3827 16B, \ 3828 kInput8bits##input, \ 3829 kInput8bitsImm##input_imm); \ 3830 } \ 3831 TEST(mnemonic##_4H_2OPIMM) { \ 3832 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3833 4H, \ 3834 4H, \ 3835 kInput16bits##input, \ 3836 kInput16bitsImm##input_imm); \ 3837 } \ 3838 TEST(mnemonic##_8H_2OPIMM) { \ 3839 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3840 8H, \ 3841 8H, \ 3842 kInput16bits##input, \ 3843 kInput16bitsImm##input_imm); \ 3844 } \ 3845 TEST(mnemonic##_2S_2OPIMM) { \ 3846 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3847 2S, \ 3848 2S, \ 3849 kInput32bits##input, \ 3850 kInput32bitsImm##input_imm); \ 3851 } \ 3852 TEST(mnemonic##_4S_2OPIMM) { \ 3853 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3854 4S, \ 3855 4S, \ 3856 kInput32bits##input, \ 3857 kInput32bitsImm##input_imm); \ 3858 } \ 3859 TEST(mnemonic##_2D_2OPIMM) { \ 3860 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3861 2D, \ 3862 2D, \ 3863 kInput64bits##input, \ 3864 kInput64bitsImm##input_imm); \ 3865 } 3866 3867#define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm) \ 3868 TEST(mnemonic##_8B_2OPIMM) { \ 3869 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3870 8B, \ 3871 B, \ 3872 kInput8bits##input, \ 3873 kInput8bitsImm##input_imm); \ 3874 } \ 3875 TEST(mnemonic##_16B_2OPIMM) { \ 3876 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3877 16B, \ 3878 B, \ 3879 kInput8bits##input, \ 3880 kInput8bitsImm##input_imm); \ 3881 } \ 3882 TEST(mnemonic##_4H_2OPIMM) { \ 3883 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3884 4H, \ 3885 H, \ 3886 kInput16bits##input, \ 3887 kInput16bitsImm##input_imm); \ 3888 } \ 3889 TEST(mnemonic##_8H_2OPIMM) { \ 3890 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3891 8H, \ 3892 H, \ 3893 kInput16bits##input, \ 3894 kInput16bitsImm##input_imm); \ 3895 } \ 3896 TEST(mnemonic##_2S_2OPIMM) { \ 3897 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3898 2S, \ 3899 S, \ 3900 kInput32bits##input, \ 3901 kInput32bitsImm##input_imm); \ 3902 } \ 3903 TEST(mnemonic##_4S_2OPIMM) { \ 3904 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3905 4S, \ 3906 S, \ 3907 kInput32bits##input, \ 3908 kInput32bitsImm##input_imm); \ 3909 } \ 3910 TEST(mnemonic##_2D_2OPIMM) { \ 3911 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3912 2D, \ 3913 D, \ 3914 kInput64bits##input, \ 3915 kInput64bitsImm##input_imm); \ 3916 } 3917 3918#define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm) \ 3919 TEST(mnemonic##_8B_2OPIMM) { \ 3920 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3921 8B, \ 3922 8H, \ 3923 kInput16bits##input, \ 3924 kInput8bitsImm##input_imm); \ 3925 } \ 3926 TEST(mnemonic##_4H_2OPIMM) { \ 3927 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3928 4H, \ 3929 4S, \ 3930 kInput32bits##input, \ 3931 kInput16bitsImm##input_imm); \ 3932 } \ 3933 TEST(mnemonic##_2S_2OPIMM) { \ 3934 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3935 2S, \ 3936 2D, \ 3937 kInput64bits##input, \ 3938 kInput32bitsImm##input_imm); \ 3939 } \ 3940 TEST(mnemonic##2_16B_2OPIMM) { \ 3941 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 3942 16B, \ 3943 8H, \ 3944 kInput16bits##input, \ 3945 kInput8bitsImm##input_imm); \ 3946 } \ 3947 TEST(mnemonic##2_8H_2OPIMM) { \ 3948 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 3949 8H, \ 3950 4S, \ 3951 kInput32bits##input, \ 3952 kInput16bitsImm##input_imm); \ 3953 } \ 3954 TEST(mnemonic##2_4S_2OPIMM) { \ 3955 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 3956 4S, \ 3957 2D, \ 3958 kInput64bits##input, \ 3959 kInput32bitsImm##input_imm); \ 3960 } 3961 3962#define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm) \ 3963 TEST(mnemonic##_B_2OPIMM) { \ 3964 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3965 B, \ 3966 H, \ 3967 kInput16bits##input, \ 3968 kInput8bitsImm##input_imm); \ 3969 } \ 3970 TEST(mnemonic##_H_2OPIMM) { \ 3971 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3972 H, \ 3973 S, \ 3974 kInput32bits##input, \ 3975 kInput16bitsImm##input_imm); \ 3976 } \ 3977 TEST(mnemonic##_S_2OPIMM) { \ 3978 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3979 S, \ 3980 D, \ 3981 kInput64bits##input, \ 3982 kInput32bitsImm##input_imm); \ 3983 } 3984 3985#define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm) \ 3986 TEST(mnemonic##_4H_2OPIMM) { \ 3987 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3988 4H, \ 3989 4H, \ 3990 kInputFloat16##input, \ 3991 kInputDoubleImm##input_imm); \ 3992 } \ 3993 TEST(mnemonic##_8H_2OPIMM) { \ 3994 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3995 8H, \ 3996 8H, \ 3997 kInputFloat16##input, \ 3998 kInputDoubleImm##input_imm); \ 3999 } \ 4000 TEST(mnemonic##_2S_2OPIMM) { \ 4001 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4002 2S, \ 4003 2S, \ 4004 kInputFloat##Basic, \ 4005 kInputDoubleImm##input_imm); \ 4006 } \ 4007 TEST(mnemonic##_4S_2OPIMM) { \ 4008 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4009 4S, \ 4010 4S, \ 4011 kInputFloat##input, \ 4012 kInputDoubleImm##input_imm); \ 4013 } \ 4014 TEST(mnemonic##_2D_2OPIMM) { \ 4015 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4016 2D, \ 4017 2D, \ 4018 kInputDouble##input, \ 4019 kInputDoubleImm##input_imm); \ 4020 } 4021 4022#define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm) \ 4023 TEST(mnemonic##_4H_2OPIMM) { \ 4024 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4025 4H, \ 4026 4H, \ 4027 kInputFloat16##input, \ 4028 kInput16bitsImm##input_imm); \ 4029 } \ 4030 TEST(mnemonic##_8H_2OPIMM) { \ 4031 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4032 8H, \ 4033 8H, \ 4034 kInputFloat16##input, \ 4035 kInput16bitsImm##input_imm); \ 4036 } \ 4037 TEST(mnemonic##_2S_2OPIMM) { \ 4038 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4039 2S, \ 4040 2S, \ 4041 kInputFloat##Basic, \ 4042 kInput32bitsImm##input_imm); \ 4043 } \ 4044 TEST(mnemonic##_4S_2OPIMM) { \ 4045 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4046 4S, \ 4047 4S, \ 4048 kInputFloat##input, \ 4049 kInput32bitsImm##input_imm); \ 4050 } \ 4051 TEST(mnemonic##_2D_2OPIMM) { \ 4052 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4053 2D, \ 4054 2D, \ 4055 kInputDouble##input, \ 4056 kInput64bitsImm##input_imm); \ 4057 } 4058 4059#define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm) \ 4060 TEST(mnemonic##_H_2OPIMM) { \ 4061 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4062 H, \ 4063 H, \ 4064 kInputFloat16##Basic, \ 4065 kInput16bitsImm##input_imm); \ 4066 } \ 4067 TEST(mnemonic##_S_2OPIMM) { \ 4068 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4069 S, \ 4070 S, \ 4071 kInputFloat##Basic, \ 4072 kInput32bitsImm##input_imm); \ 4073 } \ 4074 TEST(mnemonic##_D_2OPIMM) { \ 4075 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4076 D, \ 4077 D, \ 4078 kInputDouble##input, \ 4079 kInput64bitsImm##input_imm); \ 4080 } 4081 4082#define DEFINE_TEST_NEON_2OPIMM_HSD(mnemonic, input, input_imm) \ 4083 TEST(mnemonic##_4H_2OPIMM) { \ 4084 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4085 4H, \ 4086 4H, \ 4087 kInput16bits##input, \ 4088 kInput16bitsImm##input_imm); \ 4089 } \ 4090 TEST(mnemonic##_8H_2OPIMM) { \ 4091 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4092 8H, \ 4093 8H, \ 4094 kInput16bits##input, \ 4095 kInput16bitsImm##input_imm); \ 4096 } \ 4097 TEST(mnemonic##_2S_2OPIMM) { \ 4098 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4099 2S, \ 4100 2S, \ 4101 kInput32bits##input, \ 4102 kInput32bitsImm##input_imm); \ 4103 } \ 4104 TEST(mnemonic##_4S_2OPIMM) { \ 4105 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4106 4S, \ 4107 4S, \ 4108 kInput32bits##input, \ 4109 kInput32bitsImm##input_imm); \ 4110 } \ 4111 TEST(mnemonic##_2D_2OPIMM) { \ 4112 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4113 2D, \ 4114 2D, \ 4115 kInput64bits##input, \ 4116 kInput64bitsImm##input_imm); \ 4117 } 4118 4119#define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) \ 4120 TEST(mnemonic##_D_2OPIMM) { \ 4121 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4122 D, \ 4123 D, \ 4124 kInput64bits##input, \ 4125 kInput64bitsImm##input_imm); \ 4126 } 4127 4128#define DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(mnemonic, input, input_imm) \ 4129 TEST(mnemonic##_H_2OPIMM) { \ 4130 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4131 H, \ 4132 H, \ 4133 kInput16bits##input, \ 4134 kInput16bitsImm##input_imm); \ 4135 } \ 4136 TEST(mnemonic##_S_2OPIMM) { \ 4137 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4138 S, \ 4139 S, \ 4140 kInput32bits##input, \ 4141 kInput32bitsImm##input_imm); \ 4142 } \ 4143 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) 4144 4145#define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) \ 4146 TEST(mnemonic##_D_2OPIMM) { \ 4147 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4148 D, \ 4149 D, \ 4150 kInputDouble##input, \ 4151 kInputDoubleImm##input_imm); \ 4152 } 4153 4154#define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(mnemonic, input, input_imm) \ 4155 TEST(mnemonic##_H_2OPIMM) { \ 4156 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4157 H, \ 4158 H, \ 4159 kInputFloat16##input, \ 4160 kInputDoubleImm##input_imm); \ 4161 } \ 4162 TEST(mnemonic##_S_2OPIMM) { \ 4163 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4164 S, \ 4165 S, \ 4166 kInputFloat##input, \ 4167 kInputDoubleImm##input_imm); \ 4168 } \ 4169 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) 4170 4171#define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm) \ 4172 TEST(mnemonic##_B_2OPIMM) { \ 4173 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4174 B, \ 4175 B, \ 4176 kInput8bits##input, \ 4177 kInput8bitsImm##input_imm); \ 4178 } \ 4179 DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(mnemonic, input, input_imm) 4180 4181#define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm) \ 4182 TEST(mnemonic##_8H_2OPIMM) { \ 4183 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4184 8H, \ 4185 8B, \ 4186 kInput8bits##input, \ 4187 kInput8bitsImm##input_imm); \ 4188 } \ 4189 TEST(mnemonic##_4S_2OPIMM) { \ 4190 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4191 4S, \ 4192 4H, \ 4193 kInput16bits##input, \ 4194 kInput16bitsImm##input_imm); \ 4195 } \ 4196 TEST(mnemonic##_2D_2OPIMM) { \ 4197 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4198 2D, \ 4199 2S, \ 4200 kInput32bits##input, \ 4201 kInput32bitsImm##input_imm); \ 4202 } \ 4203 TEST(mnemonic##2_8H_2OPIMM) { \ 4204 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 4205 8H, \ 4206 16B, \ 4207 kInput8bits##input, \ 4208 kInput8bitsImm##input_imm); \ 4209 } \ 4210 TEST(mnemonic##2_4S_2OPIMM) { \ 4211 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 4212 4S, \ 4213 8H, \ 4214 kInput16bits##input, \ 4215 kInput16bitsImm##input_imm); \ 4216 } \ 4217 TEST(mnemonic##2_2D_2OPIMM) { \ 4218 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 4219 2D, \ 4220 4S, \ 4221 kInput32bits##input, \ 4222 kInput32bitsImm##input_imm); \ 4223 } 4224 4225#define CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic, \ 4226 vdform, \ 4227 vnform, \ 4228 vmform, \ 4229 input_d, \ 4230 input_n, \ 4231 input_m, \ 4232 indices, \ 4233 vm_subvector_count) \ 4234 { \ 4235 CALL_TEST_NEON_HELPER_ByElement_Dot_Product(mnemonic, \ 4236 vdform, \ 4237 vnform, \ 4238 vmform, \ 4239 input_d, \ 4240 input_n, \ 4241 input_m, \ 4242 indices, \ 4243 vm_subvector_count); \ 4244 } 4245 4246#define DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(mnemonic, \ 4247 input_d, \ 4248 input_n, \ 4249 input_m) \ 4250 TEST(mnemonic##_2S_8B_B) { \ 4251 CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic, \ 4252 2S, \ 4253 8B, \ 4254 B, \ 4255 kInput32bits##input_d, \ 4256 kInput8bits##input_n, \ 4257 kInput8bits##input_m, \ 4258 kInputSIndices, \ 4259 4); \ 4260 } \ 4261 TEST(mnemonic##_4S_16B_B) { \ 4262 CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic, \ 4263 4S, \ 4264 16B, \ 4265 B, \ 4266 kInput32bits##input_d, \ 4267 kInput8bits##input_n, \ 4268 kInput8bits##input_m, \ 4269 kInputSIndices, \ 4270 4); \ 4271 } 4272 4273#define CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4274 vdform, \ 4275 vnform, \ 4276 vmform, \ 4277 input_d, \ 4278 input_n, \ 4279 input_m, \ 4280 indices) \ 4281 { \ 4282 CALL_TEST_NEON_HELPER_ByElement(mnemonic, \ 4283 vdform, \ 4284 vnform, \ 4285 vmform, \ 4286 input_d, \ 4287 input_n, \ 4288 input_m, \ 4289 indices); \ 4290 } 4291 4292#define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m) \ 4293 TEST(mnemonic##_4H_4H_H) { \ 4294 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4295 4H, \ 4296 4H, \ 4297 H, \ 4298 kInput16bits##input_d, \ 4299 kInput16bits##input_n, \ 4300 kInput16bits##input_m, \ 4301 kInputHIndices); \ 4302 } \ 4303 TEST(mnemonic##_8H_8H_H) { \ 4304 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4305 8H, \ 4306 8H, \ 4307 H, \ 4308 kInput16bits##input_d, \ 4309 kInput16bits##input_n, \ 4310 kInput16bits##input_m, \ 4311 kInputHIndices); \ 4312 } \ 4313 TEST(mnemonic##_2S_2S_S) { \ 4314 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4315 2S, \ 4316 2S, \ 4317 S, \ 4318 kInput32bits##input_d, \ 4319 kInput32bits##input_n, \ 4320 kInput32bits##input_m, \ 4321 kInputSIndices); \ 4322 } \ 4323 TEST(mnemonic##_4S_4S_S) { \ 4324 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4325 4S, \ 4326 4S, \ 4327 S, \ 4328 kInput32bits##input_d, \ 4329 kInput32bits##input_n, \ 4330 kInput32bits##input_m, \ 4331 kInputSIndices); \ 4332 } 4333 4334#define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic, input_d, input_n, input_m) \ 4335 TEST(mnemonic##_H_H_H) { \ 4336 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4337 H, \ 4338 H, \ 4339 H, \ 4340 kInput16bits##input_d, \ 4341 kInput16bits##input_n, \ 4342 kInput16bits##input_m, \ 4343 kInputHIndices); \ 4344 } \ 4345 TEST(mnemonic##_S_S_S) { \ 4346 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4347 S, \ 4348 S, \ 4349 S, \ 4350 kInput32bits##input_d, \ 4351 kInput32bits##input_n, \ 4352 kInput32bits##input_m, \ 4353 kInputSIndices); \ 4354 } 4355 4356#define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m) \ 4357 TEST(mnemonic##_4H_4H_H) { \ 4358 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4359 4H, \ 4360 4H, \ 4361 H, \ 4362 kInputFloat16##input_d, \ 4363 kInputFloat16##input_n, \ 4364 kInputFloat16##input_m, \ 4365 kInputHIndices); \ 4366 } \ 4367 TEST(mnemonic##_8H_8H_H) { \ 4368 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4369 8H, \ 4370 8H, \ 4371 H, \ 4372 kInputFloat16##input_d, \ 4373 kInputFloat16##input_n, \ 4374 kInputFloat16##input_m, \ 4375 kInputHIndices); \ 4376 } \ 4377 TEST(mnemonic##_2S_2S_S) { \ 4378 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4379 2S, \ 4380 2S, \ 4381 S, \ 4382 kInputFloat##input_d, \ 4383 kInputFloat##input_n, \ 4384 kInputFloat##input_m, \ 4385 kInputSIndices); \ 4386 } \ 4387 TEST(mnemonic##_4S_4S_S) { \ 4388 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4389 4S, \ 4390 4S, \ 4391 S, \ 4392 kInputFloat##input_d, \ 4393 kInputFloat##input_n, \ 4394 kInputFloat##input_m, \ 4395 kInputSIndices); \ 4396 } \ 4397 TEST(mnemonic##_2D_2D_D) { \ 4398 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4399 2D, \ 4400 2D, \ 4401 D, \ 4402 kInputDouble##input_d, \ 4403 kInputDouble##input_n, \ 4404 kInputDouble##input_m, \ 4405 kInputDIndices); \ 4406 } 4407 4408#define DEFINE_TEST_NEON_FHM_BYELEMENT(mnemonic, input_d, input_n, input_m) \ 4409 TEST(mnemonic##_2S_2H_H) { \ 4410 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4411 2S, \ 4412 2H, \ 4413 H, \ 4414 kInputFloatAccDestination, \ 4415 kInputFloat16##input_n, \ 4416 kInputFloat16##input_m, \ 4417 kInputHIndices); \ 4418 } \ 4419 TEST(mnemonic##_4S_4H_H) { \ 4420 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4421 4S, \ 4422 4H, \ 4423 H, \ 4424 kInputFloatAccDestination, \ 4425 kInputFloat16##input_n, \ 4426 kInputFloat16##input_m, \ 4427 kInputHIndices); \ 4428 } 4429 4430#define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m) \ 4431 TEST(mnemonic##_H_H_H) { \ 4432 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4433 H, \ 4434 H, \ 4435 H, \ 4436 kInputFloat16##inp_d, \ 4437 kInputFloat16##inp_n, \ 4438 kInputFloat16##inp_m, \ 4439 kInputHIndices); \ 4440 } \ 4441 TEST(mnemonic##_S_S_S) { \ 4442 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4443 S, \ 4444 S, \ 4445 S, \ 4446 kInputFloat##inp_d, \ 4447 kInputFloat##inp_n, \ 4448 kInputFloat##inp_m, \ 4449 kInputSIndices); \ 4450 } \ 4451 TEST(mnemonic##_D_D_D) { \ 4452 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4453 D, \ 4454 D, \ 4455 D, \ 4456 kInputDouble##inp_d, \ 4457 kInputDouble##inp_n, \ 4458 kInputDouble##inp_m, \ 4459 kInputDIndices); \ 4460 } 4461 4462 4463#define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \ 4464 TEST(mnemonic##_4S_4H_H) { \ 4465 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4466 4S, \ 4467 4H, \ 4468 H, \ 4469 kInput32bits##input_d, \ 4470 kInput16bits##input_n, \ 4471 kInput16bits##input_m, \ 4472 kInputHIndices); \ 4473 } \ 4474 TEST(mnemonic##2_4S_8H_H) { \ 4475 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2, \ 4476 4S, \ 4477 8H, \ 4478 H, \ 4479 kInput32bits##input_d, \ 4480 kInput16bits##input_n, \ 4481 kInput16bits##input_m, \ 4482 kInputHIndices); \ 4483 } \ 4484 TEST(mnemonic##_2D_2S_S) { \ 4485 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4486 2D, \ 4487 2S, \ 4488 S, \ 4489 kInput64bits##input_d, \ 4490 kInput32bits##input_n, \ 4491 kInput32bits##input_m, \ 4492 kInputSIndices); \ 4493 } \ 4494 TEST(mnemonic##2_2D_4S_S) { \ 4495 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2, \ 4496 2D, \ 4497 4S, \ 4498 S, \ 4499 kInput64bits##input_d, \ 4500 kInput32bits##input_n, \ 4501 kInput32bits##input_m, \ 4502 kInputSIndices); \ 4503 } 4504 4505#define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(mnemonic, \ 4506 input_d, \ 4507 input_n, \ 4508 input_m) \ 4509 TEST(mnemonic##_S_H_H) { \ 4510 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4511 S, \ 4512 H, \ 4513 H, \ 4514 kInput32bits##input_d, \ 4515 kInput16bits##input_n, \ 4516 kInput16bits##input_m, \ 4517 kInputHIndices); \ 4518 } \ 4519 TEST(mnemonic##_D_S_S) { \ 4520 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4521 D, \ 4522 S, \ 4523 S, \ 4524 kInput64bits##input_d, \ 4525 kInput32bits##input_n, \ 4526 kInput32bits##input_m, \ 4527 kInputSIndices); \ 4528 } 4529 4530 4531#define CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \ 4532 variant, \ 4533 input_d, \ 4534 input_imm1, \ 4535 input_n, \ 4536 input_imm2) \ 4537 { \ 4538 CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic, \ 4539 mnemonic, \ 4540 variant, \ 4541 variant, \ 4542 input_d, \ 4543 input_imm1, \ 4544 input_n, \ 4545 input_imm2); \ 4546 } 4547 4548#define DEFINE_TEST_NEON_2OP2IMM(mnemonic, \ 4549 input_d, \ 4550 input_imm1, \ 4551 input_n, \ 4552 input_imm2) \ 4553 TEST(mnemonic##_B) { \ 4554 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \ 4555 16B, \ 4556 kInput8bits##input_d, \ 4557 kInput8bitsImm##input_imm1, \ 4558 kInput8bits##input_n, \ 4559 kInput8bitsImm##input_imm2); \ 4560 } \ 4561 TEST(mnemonic##_H) { \ 4562 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \ 4563 8H, \ 4564 kInput16bits##input_d, \ 4565 kInput16bitsImm##input_imm1, \ 4566 kInput16bits##input_n, \ 4567 kInput16bitsImm##input_imm2); \ 4568 } \ 4569 TEST(mnemonic##_S) { \ 4570 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \ 4571 4S, \ 4572 kInput32bits##input_d, \ 4573 kInput32bitsImm##input_imm1, \ 4574 kInput32bits##input_n, \ 4575 kInput32bitsImm##input_imm2); \ 4576 } \ 4577 TEST(mnemonic##_D) { \ 4578 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \ 4579 2D, \ 4580 kInput64bits##input_d, \ 4581 kInput64bitsImm##input_imm1, \ 4582 kInput64bits##input_n, \ 4583 kInput64bitsImm##input_imm2); \ 4584 } 4585 4586 4587// Advanced SIMD copy. 4588DEFINE_TEST_NEON_2OP2IMM( 4589 ins, Basic, LaneCountFromZero, Basic, LaneCountFromZero) 4590DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero) 4591 4592 4593// Advanced SIMD scalar copy. 4594DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero) 4595 4596 4597// Advanced SIMD three same. 4598DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic) 4599DEFINE_TEST_NEON_3SAME(sqadd, Basic) 4600DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic) 4601DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic) 4602DEFINE_TEST_NEON_3SAME(sqsub, Basic) 4603DEFINE_TEST_NEON_3SAME(cmgt, Basic) 4604DEFINE_TEST_NEON_3SAME(cmge, Basic) 4605DEFINE_TEST_NEON_3SAME(sshl, Basic) 4606DEFINE_TEST_NEON_3SAME(sqshl, Basic) 4607DEFINE_TEST_NEON_3SAME(srshl, Basic) 4608DEFINE_TEST_NEON_3SAME(sqrshl, Basic) 4609DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic) 4610DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic) 4611DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic) 4612DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic) 4613DEFINE_TEST_NEON_3SAME(add, Basic) 4614DEFINE_TEST_NEON_3SAME(cmtst, Basic) 4615DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic) 4616DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic) 4617DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic) 4618DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic) 4619DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic) 4620DEFINE_TEST_NEON_3SAME(addp, Basic) 4621DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic) 4622DEFINE_TEST_NEON_3SAME_FP(fmla, Basic) 4623DEFINE_TEST_NEON_3SAME_FP(fadd, Basic) 4624DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic) 4625DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic) 4626DEFINE_TEST_NEON_3SAME_FP(fmax, Basic) 4627DEFINE_TEST_NEON_3SAME_FP(frecps, Basic) 4628DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic) 4629DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic) 4630DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic) 4631DEFINE_TEST_NEON_3SAME_FP(fmls, Basic) 4632DEFINE_TEST_NEON_3SAME_FP(fsub, Basic) 4633DEFINE_TEST_NEON_3SAME_FP(fmin, Basic) 4634DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic) 4635DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic) 4636DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic) 4637DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic) 4638DEFINE_TEST_NEON_3SAME(uqadd, Basic) 4639DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic) 4640DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic) 4641DEFINE_TEST_NEON_3SAME(uqsub, Basic) 4642DEFINE_TEST_NEON_3SAME(cmhi, Basic) 4643DEFINE_TEST_NEON_3SAME(cmhs, Basic) 4644DEFINE_TEST_NEON_3SAME(ushl, Basic) 4645DEFINE_TEST_NEON_3SAME(uqshl, Basic) 4646DEFINE_TEST_NEON_3SAME(urshl, Basic) 4647DEFINE_TEST_NEON_3SAME(uqrshl, Basic) 4648DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic) 4649DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic) 4650DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic) 4651DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic) 4652DEFINE_TEST_NEON_3SAME(sub, Basic) 4653DEFINE_TEST_NEON_3SAME(cmeq, Basic) 4654DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic) 4655DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic) 4656DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic) 4657DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic) 4658DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic) 4659DEFINE_TEST_NEON_3SAME_HS(sqrdmlah, Basic) 4660DEFINE_TEST_NEON_3SAME_HS(sqrdmlsh, Basic) 4661DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(udot, Basic) 4662DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(sdot, Basic) 4663DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic) 4664DEFINE_TEST_NEON_3SAME_FP(faddp, Basic) 4665DEFINE_TEST_NEON_3SAME_FP(fmul, Basic) 4666DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic) 4667DEFINE_TEST_NEON_3SAME_FP(facge, Basic) 4668DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic) 4669DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic) 4670DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic) 4671DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic) 4672DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic) 4673DEFINE_TEST_NEON_3SAME_FP(fabd, Basic) 4674DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic) 4675DEFINE_TEST_NEON_3SAME_FP(facgt, Basic) 4676DEFINE_TEST_NEON_3SAME_FP(fminp, Basic) 4677DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic) 4678DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic) 4679 4680 4681// Advanced SIMD scalar three same. 4682DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic) 4683DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic) 4684DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic) 4685DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic) 4686DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic) 4687DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic) 4688DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic) 4689DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic) 4690DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic) 4691DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic) 4692DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic) 4693DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic) 4694DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic) 4695DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic) 4696DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic) 4697DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic) 4698DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic) 4699DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic) 4700DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic) 4701DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic) 4702DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic) 4703DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic) 4704DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic) 4705DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic) 4706DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic) 4707DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic) 4708DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmlah, Basic) 4709DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmlsh, Basic) 4710DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic) 4711DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic) 4712DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic) 4713DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic) 4714DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic) 4715 4716 4717// Advanced SIMD FHM instructions (FMLAL, FMLSL). 4718// These are oddballs: they are encoded under the 3SAME group but behave 4719// quite differently. 4720DEFINE_TEST_NEON_FHM(fmlal, Basic, Basic, Basic) 4721DEFINE_TEST_NEON_FHM(fmlal2, Basic, Basic, Basic) 4722DEFINE_TEST_NEON_FHM(fmlsl, Basic, Basic, Basic) 4723DEFINE_TEST_NEON_FHM(fmlsl2, Basic, Basic, Basic) 4724 4725 4726// Advanced SIMD three different. 4727DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic) 4728DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic) 4729DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic) 4730DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic) 4731DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic) 4732DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic) 4733DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic) 4734DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic) 4735DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic) 4736DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic) 4737DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic) 4738DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic) 4739DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic) 4740DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic) 4741DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic) 4742DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic) 4743DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic) 4744DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic) 4745DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic) 4746DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic) 4747DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic) 4748DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic) 4749DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic) 4750DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic) 4751DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic) 4752DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic) 4753 4754 4755// Advanced SIMD scalar three different. 4756DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic) 4757DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic) 4758DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic) 4759 4760 4761// Advanced SIMD scalar pairwise. 4762TEST(addp_SCALAR) { 4763 CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic); 4764} 4765DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic) 4766DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic) 4767DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic) 4768DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic) 4769DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic) 4770 4771 4772// Advanced SIMD shift by immediate. 4773DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth) 4774DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth) 4775DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth) 4776DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth) 4777DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero) 4778DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero) 4779DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth) 4780DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth) 4781DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth) 4782DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth) 4783DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero) 4784DEFINE_TEST_NEON_2OPIMM_HSD(scvtf, 4785 FixedPointConversions, 4786 TypeWidthFromZeroToWidth) 4787DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth) 4788DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth) 4789DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth) 4790DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth) 4791DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth) 4792DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth) 4793DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero) 4794DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero) 4795DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero) 4796DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth) 4797DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth) 4798DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth) 4799DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth) 4800DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero) 4801DEFINE_TEST_NEON_2OPIMM_HSD(ucvtf, 4802 FixedPointConversions, 4803 TypeWidthFromZeroToWidth) 4804DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth) 4805 4806 4807// Advanced SIMD scalar shift by immediate.. 4808DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth) 4809DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth) 4810DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth) 4811DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth) 4812DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero) 4813DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero) 4814DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth) 4815DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth) 4816DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(scvtf, 4817 FixedPointConversions, 4818 TypeWidthFromZeroToWidth) 4819DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth) 4820DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth) 4821DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth) 4822DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth) 4823DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth) 4824DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth) 4825DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero) 4826DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero) 4827DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero) 4828DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth) 4829DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth) 4830DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth) 4831DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth) 4832DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(ucvtf, 4833 FixedPointConversions, 4834 TypeWidthFromZeroToWidth) 4835DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth) 4836 4837 4838// Advanced SIMD two-register miscellaneous. 4839DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic) 4840DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic) 4841DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic) 4842DEFINE_TEST_NEON_2SAME(suqadd, Basic) 4843DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic) 4844DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic) 4845DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic) 4846DEFINE_TEST_NEON_2SAME(sqabs, Basic) 4847DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero) 4848DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero) 4849DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero) 4850DEFINE_TEST_NEON_2SAME(abs, Basic) 4851DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic) 4852DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic) 4853DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions) 4854DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions) 4855DEFINE_TEST_NEON_2SAME_FP_FP16(frintn, Conversions) 4856DEFINE_TEST_NEON_2SAME_FP_FP16(frintm, Conversions) 4857DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtns, Conversions) 4858DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtms, Conversions) 4859DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtas, Conversions) 4860// SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0. 4861DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero) 4862DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero) 4863DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero) 4864DEFINE_TEST_NEON_2SAME_FP_FP16(fabs, Basic) 4865DEFINE_TEST_NEON_2SAME_FP_FP16(frintp, Conversions) 4866DEFINE_TEST_NEON_2SAME_FP_FP16(frintz, Conversions) 4867DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtps, Conversions) 4868// FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0. 4869DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic) 4870DEFINE_TEST_NEON_2SAME_FP_FP16(frecpe, Basic) 4871DEFINE_TEST_NEON_2SAME_BH(rev32, Basic) 4872DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic) 4873DEFINE_TEST_NEON_2SAME(usqadd, Basic) 4874DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic) 4875DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic) 4876DEFINE_TEST_NEON_2SAME(sqneg, Basic) 4877DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero) 4878DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero) 4879DEFINE_TEST_NEON_2SAME(neg, Basic) 4880DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic) 4881DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL) 4882DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic) 4883DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions) 4884DEFINE_TEST_NEON_2SAME_FP(frint32x, Conversions) 4885DEFINE_TEST_NEON_2SAME_FP(frint64x, Conversions) 4886DEFINE_TEST_NEON_2SAME_FP(frint32z, Conversions) 4887DEFINE_TEST_NEON_2SAME_FP(frint64z, Conversions) 4888DEFINE_TEST_NEON_2SAME_FP_FP16(frinta, Conversions) 4889DEFINE_TEST_NEON_2SAME_FP_FP16(frintx, Conversions) 4890DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtnu, Conversions) 4891DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtmu, Conversions) 4892DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtau, Conversions) 4893// UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0. 4894DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic) 4895DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic) 4896DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero) 4897DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero) 4898DEFINE_TEST_NEON_2SAME_FP_FP16(fneg, Basic) 4899DEFINE_TEST_NEON_2SAME_FP_FP16(frinti, Conversions) 4900DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtpu, Conversions) 4901// FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0. 4902DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic) 4903DEFINE_TEST_NEON_2SAME_FP_FP16(frsqrte, Basic) 4904DEFINE_TEST_NEON_2SAME_FP_FP16(fsqrt, Basic) 4905 4906 4907// Advanced SIMD scalar two-register miscellaneous. 4908DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic) 4909DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic) 4910DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero) 4911DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero) 4912DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero) 4913DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic) 4914DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic) 4915DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtns, Conversions) 4916DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtms, Conversions) 4917DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtas, Conversions) 4918// SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0. 4919DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmgt, Basic, Zero) 4920DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmeq, Basic, Zero) 4921DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmlt, Basic, Zero) 4922DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtps, Conversions) 4923// FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0. 4924DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frecpe, Basic) 4925DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frecpx, Basic) 4926DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic) 4927DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic) 4928DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero) 4929DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero) 4930DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic) 4931DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic) 4932DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic) 4933TEST(fcvtxn_SCALAR) { 4934 CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions); 4935} 4936DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtnu, Conversions) 4937DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtmu, Conversions) 4938DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtau, Conversions) 4939// UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0. 4940DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmge, Basic, Zero) 4941DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmle, Basic, Zero) 4942DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtpu, Conversions) 4943// FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0. 4944DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frsqrte, Basic) 4945 4946 4947// Advanced SIMD across lanes. 4948DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic) 4949DEFINE_TEST_NEON_ACROSS(smaxv, Basic) 4950DEFINE_TEST_NEON_ACROSS(sminv, Basic) 4951DEFINE_TEST_NEON_ACROSS(addv, Basic) 4952DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic) 4953DEFINE_TEST_NEON_ACROSS(umaxv, Basic) 4954DEFINE_TEST_NEON_ACROSS(uminv, Basic) 4955DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic) 4956DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic) 4957DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic) 4958DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic) 4959 4960 4961// Advanced SIMD permute. 4962DEFINE_TEST_NEON_3SAME(uzp1, Basic) 4963DEFINE_TEST_NEON_3SAME(trn1, Basic) 4964DEFINE_TEST_NEON_3SAME(zip1, Basic) 4965DEFINE_TEST_NEON_3SAME(uzp2, Basic) 4966DEFINE_TEST_NEON_3SAME(trn2, Basic) 4967DEFINE_TEST_NEON_3SAME(zip2, Basic) 4968 4969 4970// Advanced SIMD vector x indexed element. 4971DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic) 4972DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic) 4973DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic) 4974DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic) 4975DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic) 4976DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic) 4977DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic) 4978DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic) 4979DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic) 4980DEFINE_TEST_NEON_BYELEMENT(sqrdmlah, Basic, Basic, Basic) 4981DEFINE_TEST_NEON_BYELEMENT(sqrdmlsh, Basic, Basic, Basic) 4982DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(udot, Basic, Basic, Basic) 4983DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(sdot, Basic, Basic, Basic) 4984DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic) 4985DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic) 4986DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic) 4987DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic) 4988DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic) 4989DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic) 4990DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic) 4991DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic) 4992DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic) 4993 4994 4995// Advanced SIMD scalar x indexed element. 4996DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic) 4997DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic) 4998DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic) 4999DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic) 5000DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic) 5001DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmlah, Basic, Basic, Basic) 5002DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmlsh, Basic, Basic, Basic) 5003DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic) 5004DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic) 5005DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic) 5006DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic) 5007 5008 5009DEFINE_TEST_NEON_FHM_BYELEMENT(fmlal, Basic, Basic, Basic) 5010DEFINE_TEST_NEON_FHM_BYELEMENT(fmlal2, Basic, Basic, Basic) 5011DEFINE_TEST_NEON_FHM_BYELEMENT(fmlsl, Basic, Basic, Basic) 5012DEFINE_TEST_NEON_FHM_BYELEMENT(fmlsl2, Basic, Basic, Basic) 5013 5014 5015#undef __ 5016#define __ masm-> 5017 5018#if defined(VIXL_INCLUDE_SIMULATOR_AARCH64) && \ 5019 defined(VIXL_HAS_ABI_SUPPORT) && __cplusplus >= 201103L && \ 5020 (defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1)) 5021 5022// Generate a function that stores zero to a hard-coded address. 5023Instruction* GenerateStoreZero(MacroAssembler* masm, int32_t* target) { 5024 masm->Reset(); 5025 5026 UseScratchRegisterScope temps(masm); 5027 Register temp = temps.AcquireX(); 5028 __ Mov(temp, reinterpret_cast<intptr_t>(target)); 5029 __ Str(wzr, MemOperand(temp)); 5030 __ Ret(); 5031 5032 masm->FinalizeCode(); 5033 return masm->GetBuffer()->GetStartAddress<Instruction*>(); 5034} 5035 5036 5037// Generate a function that stores the `int32_t` argument to a hard-coded 5038// address. 5039// In this example and the other below, we use the `abi` object to retrieve 5040// argument and return locations even though we could easily hard code them. 5041// This mirrors how more generic code (e.g. templated) user would use these 5042// mechanisms. 5043Instruction* GenerateStoreInput(MacroAssembler* masm, int32_t* target) { 5044 masm->Reset(); 5045 5046 ABI abi; 5047 Register input = 5048 Register(abi.GetNextParameterGenericOperand<int32_t>().GetCPURegister()); 5049 5050 UseScratchRegisterScope temps(masm); 5051 Register temp = temps.AcquireX(); 5052 __ Mov(temp, reinterpret_cast<intptr_t>(target)); 5053 __ Str(input, MemOperand(temp)); 5054 __ Ret(); 5055 5056 masm->FinalizeCode(); 5057 return masm->GetBuffer()->GetStartAddress<Instruction*>(); 5058} 5059 5060 5061// A minimal implementation of a `pow` function. 5062Instruction* GeneratePow(MacroAssembler* masm, unsigned pow) { 5063 masm->Reset(); 5064 5065 ABI abi; 5066 Register input = 5067 Register(abi.GetNextParameterGenericOperand<int64_t>().GetCPURegister()); 5068 Register result = 5069 Register(abi.GetReturnGenericOperand<int64_t>().GetCPURegister()); 5070 UseScratchRegisterScope temps(masm); 5071 Register temp = temps.AcquireX(); 5072 5073 __ Mov(temp, 1); 5074 for (unsigned i = 0; i < pow; i++) { 5075 __ Mul(temp, temp, input); 5076 } 5077 __ Mov(result, temp); 5078 __ Ret(); 5079 5080 masm->FinalizeCode(); 5081 return masm->GetBuffer()->GetStartAddress<Instruction*>(); 5082} 5083 5084 5085Instruction* GenerateSum(MacroAssembler* masm) { 5086 masm->Reset(); 5087 5088 ABI abi; 5089 VRegister input_1 = 5090 VRegister(abi.GetNextParameterGenericOperand<float>().GetCPURegister()); 5091 Register input_2 = 5092 Register(abi.GetNextParameterGenericOperand<int64_t>().GetCPURegister()); 5093 VRegister input_3 = 5094 VRegister(abi.GetNextParameterGenericOperand<double>().GetCPURegister()); 5095 VRegister result = 5096 VRegister(abi.GetReturnGenericOperand<double>().GetCPURegister()); 5097 5098 UseScratchRegisterScope temps(masm); 5099 VRegister temp = temps.AcquireD(); 5100 5101 __ Fcvt(input_1.D(), input_1); 5102 __ Scvtf(temp, input_2); 5103 __ Fadd(temp, temp, input_1.D()); 5104 __ Fadd(result, temp, input_3); 5105 __ Ret(); 5106 5107 masm->FinalizeCode(); 5108 return masm->GetBuffer()->GetStartAddress<Instruction*>(); 5109} 5110 5111 5112TEST(RunFrom) { 5113 SETUP_WITH_FEATURES(CPUFeatures::kFP); 5114 5115 // Run a function returning `void` and taking no argument. 5116 int32_t value = 0xbad; 5117 simulator.RunFrom(GenerateStoreZero(&masm, &value)); 5118 VIXL_CHECK(value == 0); 5119 5120 // Run a function returning `void` and taking one argument. 5121 int32_t argument = 0xf00d; 5122 simulator.RunFrom<void, int32_t>(GenerateStoreInput(&masm, &value), argument); 5123 VIXL_CHECK(value == 0xf00d); 5124 5125 // Run a function taking one argument and returning a value. 5126 int64_t res_int64_t; 5127 res_int64_t = 5128 simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 0), 0xbad); 5129 VIXL_CHECK(res_int64_t == 1); 5130 res_int64_t = simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 1), 123); 5131 VIXL_CHECK(res_int64_t == 123); 5132 res_int64_t = simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 10), 2); 5133 VIXL_CHECK(res_int64_t == 1024); 5134 5135 // Run a function taking multiple arguments in registers. 5136 double res_double = 5137 simulator.RunFrom<double, float, int64_t, double>(GenerateSum(&masm), 5138 1.0, 5139 2, 5140 3.0); 5141 VIXL_CHECK(res_double == 6.0); 5142} 5143#endif 5144 5145 5146} // namespace aarch64 5147} // namespace vixl 5148