1// Copyright 2015, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7//   * Redistributions of source code must retain the above copyright notice,
8//     this list of conditions and the following disclaimer.
9//   * Redistributions in binary form must reproduce the above copyright notice,
10//     this list of conditions and the following disclaimer in the documentation
11//     and/or other materials provided with the distribution.
12//   * Neither the name of ARM Limited nor the names of its contributors may be
13//     used to endorse or promote products derived from this software without
14//     specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27#include <cfloat>
28#include <cstdio>
29#include <sstream>
30
31#include "test-runner.h"
32#include "test-utils.h"
33
34#include "aarch64/cpu-features-auditor-aarch64.h"
35#include "aarch64/macro-assembler-aarch64.h"
36#include "aarch64/simulator-aarch64.h"
37#include "aarch64/test-simulator-inputs-aarch64.h"
38#include "aarch64/test-simulator-traces-aarch64.h"
39#include "aarch64/test-utils-aarch64.h"
40
41namespace vixl {
42namespace aarch64 {
43
44// ==== Simulator Tests ====
45//
46// These simulator tests check instruction behaviour against a trace taken from
47// real AArch64 hardware. The same test code is used to generate the trace; the
48// results are printed to stdout when the test is run with
49// --generate_test_trace.
50//
51// The input lists and expected results are stored in test/traces. The expected
52// results can be regenerated using tools/generate_simulator_traces.py. Adding a
53// test for a new instruction is described at the top of
54// test-simulator-traces-aarch64.h.
55
56#define __ masm.
57#define TEST(name) TEST_(AARCH64_SIM_##name)
58
59#define SETUP() SETUP_WITH_FEATURES(CPUFeatures())
60
61#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
62
63#define SETUP_WITH_FEATURES(...)                 \
64  MacroAssembler masm;                           \
65  masm.SetCPUFeatures(CPUFeatures(__VA_ARGS__)); \
66  Decoder decoder;                               \
67  Simulator simulator(&decoder);                 \
68  simulator.SetColouredTrace(Test::coloured_trace());
69
70#define START()                                                         \
71  masm.Reset();                                                         \
72  simulator.ResetState();                                               \
73  __ PushCalleeSavedRegisters();                                        \
74  /* The infrastructure code hasn't been covered at the moment, e.g. */ \
75  /* prologue/epilogue. Suppress tagging mis-match exception before  */ \
76  /* this point. */                                                     \
77  if (masm.GetCPUFeatures()->Has(CPUFeatures::kMTE)) {                  \
78    __ Hlt(DebugHltOpcode::kMTEActive);                                 \
79  }                                                                     \
80  if (Test::trace_reg()) {                                              \
81    __ Trace(LOG_STATE, TRACE_ENABLE);                                  \
82  }                                                                     \
83  if (Test::trace_write()) {                                            \
84    __ Trace(LOG_WRITE, TRACE_ENABLE);                                  \
85  }                                                                     \
86  if (Test::trace_sim()) {                                              \
87    __ Trace(LOG_DISASM, TRACE_ENABLE);                                 \
88  }
89
90#define END()                                          \
91  if (masm.GetCPUFeatures()->Has(CPUFeatures::kMTE)) { \
92    __ Hlt(DebugHltOpcode::kMTEInactive);              \
93  }                                                    \
94  __ Trace(LOG_ALL, TRACE_DISABLE);                    \
95  __ PopCalleeSavedRegisters();                        \
96  __ Ret();                                            \
97  masm.FinalizeCode()
98
99#define TRY_RUN(skipped)                                                \
100  DISASSEMBLE();                                                        \
101  simulator.RunFrom(masm.GetBuffer()->GetStartAddress<Instruction*>()); \
102  /* The simulator can run every test. */                               \
103  *skipped = false
104
105
106#else  // VIXL_INCLUDE_SIMULATOR_AARCH64
107
108#define SETUP_WITH_FEATURES(...)                 \
109  MacroAssembler masm;                           \
110  masm.SetCPUFeatures(CPUFeatures(__VA_ARGS__)); \
111  CPU::SetUp()
112
113#define START() \
114  masm.Reset(); \
115  __ PushCalleeSavedRegisters()
116
117#define END()                   \
118  __ PopCalleeSavedRegisters(); \
119  __ Ret();                     \
120  masm.FinalizeCode()
121
122#define TRY_RUN(skipped)                                                      \
123  DISASSEMBLE();                                                              \
124  /* If the test uses features that the current CPU doesn't support, don't */ \
125  /* attempt to run it natively.                                           */ \
126  {                                                                           \
127    Decoder decoder;                                                          \
128    /* TODO: Once available, use runtime feature detection. The use of  */    \
129    /* AArch64LegacyBaseline is a stopgap.                              */    \
130    const CPUFeatures& this_machine = CPUFeatures::AArch64LegacyBaseline();   \
131    CPUFeaturesAuditor auditor(&decoder, this_machine);                       \
132    CodeBuffer* buffer = masm.GetBuffer();                                    \
133    decoder.Decode(buffer->GetStartAddress<Instruction*>(),                   \
134                   buffer->GetEndAddress<Instruction*>());                    \
135    const CPUFeatures& requirements = auditor.GetSeenFeatures();              \
136    if (this_machine.Has(requirements)) {                                     \
137      masm.GetBuffer()->SetExecutable();                                      \
138      ExecuteMemory(buffer->GetStartAddress<byte*>(),                         \
139                    masm.GetSizeOfCodeGenerated());                           \
140      masm.GetBuffer()->SetWritable();                                        \
141      *skipped = false;                                                       \
142    } else {                                                                  \
143      std::stringstream os;                                                   \
144      /* Note: This message needs to match REGEXP_MISSING_FEATURES from    */ \
145      /* tools/threaded_test.py.                                           */ \
146      os << "SKIPPED: Missing features: { ";                                  \
147      os << requirements.Without(this_machine) << " }\n";                     \
148      printf("%s", os.str().c_str());                                         \
149      *skipped = true;                                                        \
150    }                                                                         \
151  }
152
153
154#endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
155
156
157#define DISASSEMBLE()                                             \
158  if (Test::disassemble()) {                                      \
159    PrintDisassembler disasm(stdout);                             \
160    CodeBuffer* buffer = masm.GetBuffer();                        \
161    Instruction* start = buffer->GetStartAddress<Instruction*>(); \
162    Instruction* end = buffer->GetEndAddress<Instruction*>();     \
163    disasm.DisassembleBuffer(start, end);                         \
164  }
165
166// The maximum number of errors to report in detail for each test.
167static const unsigned kErrorReportLimit = 8;
168
169
170// Overloaded versions of RawbitsToDouble and RawbitsToFloat for use in the
171// templated test functions.
172static float rawbits_to_fp(uint32_t bits) { return RawbitsToFloat(bits); }
173
174static double rawbits_to_fp(uint64_t bits) { return RawbitsToDouble(bits); }
175
176// The rawbits_to_fp functions are only used for printing decimal values so we
177// just approximate FP16 as double.
178static double rawbits_to_fp(uint16_t bits) {
179  return FPToDouble(RawbitsToFloat16(bits), kIgnoreDefaultNaN);
180}
181
182
183// MacroAssembler member function pointers to pass to the test dispatchers.
184typedef void (MacroAssembler::*Test1OpFPHelper_t)(const VRegister& fd,
185                                                  const VRegister& fn);
186typedef void (MacroAssembler::*Test2OpFPHelper_t)(const VRegister& fd,
187                                                  const VRegister& fn,
188                                                  const VRegister& fm);
189typedef void (MacroAssembler::*Test3OpFPHelper_t)(const VRegister& fd,
190                                                  const VRegister& fn,
191                                                  const VRegister& fm,
192                                                  const VRegister& fa);
193typedef void (MacroAssembler::*TestFPCmpHelper_t)(const VRegister& fn,
194                                                  const VRegister& fm);
195typedef void (MacroAssembler::*TestFPCmpZeroHelper_t)(const VRegister& fn,
196                                                      double value);
197typedef void (MacroAssembler::*TestFPToIntHelper_t)(const Register& rd,
198                                                    const VRegister& fn);
199typedef void (MacroAssembler::*TestFPToFixedHelper_t)(const Register& rd,
200                                                      const VRegister& fn,
201                                                      int fbits);
202typedef void (MacroAssembler::*TestFixedToFPHelper_t)(const VRegister& fd,
203                                                      const Register& rn,
204                                                      int fbits);
205// TODO: 'Test2OpNEONHelper_t' and 'Test2OpFPHelper_t' can be
206//       consolidated into one routine.
207typedef void (MacroAssembler::*Test1OpNEONHelper_t)(const VRegister& vd,
208                                                    const VRegister& vn);
209typedef void (MacroAssembler::*Test2OpNEONHelper_t)(const VRegister& vd,
210                                                    const VRegister& vn,
211                                                    const VRegister& vm);
212typedef void (MacroAssembler::*TestByElementNEONHelper_t)(const VRegister& vd,
213                                                          const VRegister& vn,
214                                                          const VRegister& vm,
215                                                          int vm_index);
216typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)(
217    const VRegister& vd, int imm1, const VRegister& vn, int imm2);
218
219// This helps using the same typename for both the function pointer
220// and the array of immediates passed to helper routines.
221template <typename T>
222class Test2OpImmediateNEONHelper_t {
223 public:
224  typedef void (MacroAssembler::*mnemonic)(const VRegister& vd,
225                                           const VRegister& vn,
226                                           T imm);
227};
228
229
230// Maximum number of hex characters required to represent values of either
231// templated type.
232template <typename Ta, typename Tb>
233static unsigned MaxHexCharCount() {
234  unsigned count = static_cast<unsigned>(std::max(sizeof(Ta), sizeof(Tb)));
235  return (count * 8) / 4;
236}
237
238
239// Standard test dispatchers.
240
241
242static void Test1Op_Helper(Test1OpFPHelper_t helper,
243                           uintptr_t inputs,
244                           unsigned inputs_length,
245                           uintptr_t results,
246                           unsigned d_size,
247                           unsigned n_size,
248                           bool* skipped) {
249  VIXL_ASSERT((d_size == kDRegSize) || (d_size == kSRegSize) ||
250              (d_size == kHRegSize));
251  VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) ||
252              (n_size == kHRegSize));
253
254  CPUFeatures features;
255  features.Combine(CPUFeatures::kFP, CPUFeatures::kFPHalf);
256  // For frint{32,64}{x,y} variants.
257  features.Combine(CPUFeatures::kFrintToFixedSizedInt);
258  SETUP_WITH_FEATURES(features);
259  START();
260
261  // Roll up the loop to keep the code size down.
262  Label loop_n;
263
264  Register out = x0;
265  Register inputs_base = x1;
266  Register length = w2;
267  Register index_n = w3;
268
269  int n_index_shift;
270  VRegister fd;
271  VRegister fn;
272  if (n_size == kDRegSize) {
273    n_index_shift = kDRegSizeInBytesLog2;
274    fn = d1;
275  } else if (n_size == kSRegSize) {
276    n_index_shift = kSRegSizeInBytesLog2;
277    fn = s1;
278  } else {
279    n_index_shift = kHRegSizeInBytesLog2;
280    fn = h1;
281  }
282
283  if (d_size == kDRegSize) {
284    fd = d0;
285  } else if (d_size == kSRegSize) {
286    fd = s0;
287  } else {
288    fd = h0;
289  }
290
291
292  __ Mov(out, results);
293  __ Mov(inputs_base, inputs);
294  __ Mov(length, inputs_length);
295
296  __ Mov(index_n, 0);
297  __ Bind(&loop_n);
298  __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
299
300  {
301    SingleEmissionCheckScope guard(&masm);
302    (masm.*helper)(fd, fn);
303  }
304  __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
305
306  __ Add(index_n, index_n, 1);
307  __ Cmp(index_n, inputs_length);
308  __ B(lo, &loop_n);
309
310  END();
311  TRY_RUN(skipped);
312}
313
314
315// Test FP instructions. The inputs[] and expected[] arrays should be arrays of
316// rawbits representations of doubles or floats. This ensures that exact bit
317// comparisons can be performed.
318template <typename Tn, typename Td>
319static void Test1Op(const char* name,
320                    Test1OpFPHelper_t helper,
321                    const Tn inputs[],
322                    unsigned inputs_length,
323                    const Td expected[],
324                    unsigned expected_length) {
325  VIXL_ASSERT(inputs_length > 0);
326
327  const unsigned results_length = inputs_length;
328  Td* results = new Td[results_length];
329
330  const unsigned d_bits = sizeof(Td) * 8;
331  const unsigned n_bits = sizeof(Tn) * 8;
332  bool skipped;
333
334  Test1Op_Helper(helper,
335                 reinterpret_cast<uintptr_t>(inputs),
336                 inputs_length,
337                 reinterpret_cast<uintptr_t>(results),
338                 d_bits,
339                 n_bits,
340                 &skipped);
341
342  if (Test::generate_test_trace()) {
343    // Print the results.
344    printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
345    for (unsigned d = 0; d < results_length; d++) {
346      printf("  0x%0*" PRIx64 ",\n",
347             d_bits / 4,
348             static_cast<uint64_t>(results[d]));
349    }
350    printf("};\n");
351    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
352  } else if (!skipped) {
353    // Check the results.
354    VIXL_CHECK(expected_length == results_length);
355    unsigned error_count = 0;
356    unsigned d = 0;
357    for (unsigned n = 0; n < inputs_length; n++, d++) {
358      if (results[d] != expected[d]) {
359        if (++error_count > kErrorReportLimit) continue;
360
361        printf("%s 0x%0*" PRIx64 " (%s %g):\n",
362               name,
363               n_bits / 4,
364               static_cast<uint64_t>(inputs[n]),
365               name,
366               rawbits_to_fp(inputs[n]));
367        printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
368               d_bits / 4,
369               static_cast<uint64_t>(expected[d]),
370               rawbits_to_fp(expected[d]));
371        printf("  Found:    0x%0*" PRIx64 " (%g)\n",
372               d_bits / 4,
373               static_cast<uint64_t>(results[d]),
374               rawbits_to_fp(results[d]));
375        printf("\n");
376      }
377    }
378    VIXL_ASSERT(d == expected_length);
379    if (error_count > kErrorReportLimit) {
380      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
381    }
382    VIXL_CHECK(error_count == 0);
383  }
384  delete[] results;
385}
386
387
388static void Test2Op_Helper(Test2OpFPHelper_t helper,
389                           uintptr_t inputs,
390                           unsigned inputs_length,
391                           uintptr_t results,
392                           unsigned reg_size,
393                           bool* skipped) {
394  VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize) ||
395              (reg_size == kHRegSize));
396
397  SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
398  START();
399
400  // Roll up the loop to keep the code size down.
401  Label loop_n, loop_m;
402
403  Register out = x0;
404  Register inputs_base = x1;
405  Register length = w2;
406  Register index_n = w3;
407  Register index_m = w4;
408
409  bool double_op = reg_size == kDRegSize;
410  bool float_op = reg_size == kSRegSize;
411  int index_shift;
412  if (double_op) {
413    index_shift = kDRegSizeInBytesLog2;
414  } else if (float_op) {
415    index_shift = kSRegSizeInBytesLog2;
416  } else {
417    index_shift = kHRegSizeInBytesLog2;
418  }
419
420  VRegister fd;
421  VRegister fn;
422  VRegister fm;
423
424  if (double_op) {
425    fd = d0;
426    fn = d1;
427    fm = d2;
428  } else if (float_op) {
429    fd = s0;
430    fn = s1;
431    fm = s2;
432  } else {
433    fd = h0;
434    fn = h1;
435    fm = h2;
436  }
437
438  __ Mov(out, results);
439  __ Mov(inputs_base, inputs);
440  __ Mov(length, inputs_length);
441
442  __ Mov(index_n, 0);
443  __ Bind(&loop_n);
444  __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
445
446  __ Mov(index_m, 0);
447  __ Bind(&loop_m);
448  __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
449
450  {
451    SingleEmissionCheckScope guard(&masm);
452    (masm.*helper)(fd, fn, fm);
453  }
454  __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
455
456  __ Add(index_m, index_m, 1);
457  __ Cmp(index_m, inputs_length);
458  __ B(lo, &loop_m);
459
460  __ Add(index_n, index_n, 1);
461  __ Cmp(index_n, inputs_length);
462  __ B(lo, &loop_n);
463
464  END();
465  TRY_RUN(skipped);
466}
467
468
469// Test FP instructions. The inputs[] and expected[] arrays should be arrays of
470// rawbits representations of doubles or floats. This ensures that exact bit
471// comparisons can be performed.
472template <typename T>
473static void Test2Op(const char* name,
474                    Test2OpFPHelper_t helper,
475                    const T inputs[],
476                    unsigned inputs_length,
477                    const T expected[],
478                    unsigned expected_length) {
479  VIXL_ASSERT(inputs_length > 0);
480
481  const unsigned results_length = inputs_length * inputs_length;
482  T* results = new T[results_length];
483
484  const unsigned bits = sizeof(T) * 8;
485  bool skipped;
486
487  Test2Op_Helper(helper,
488                 reinterpret_cast<uintptr_t>(inputs),
489                 inputs_length,
490                 reinterpret_cast<uintptr_t>(results),
491                 bits,
492                 &skipped);
493
494  if (Test::generate_test_trace()) {
495    // Print the results.
496    printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
497    for (unsigned d = 0; d < results_length; d++) {
498      printf("  0x%0*" PRIx64 ",\n",
499             bits / 4,
500             static_cast<uint64_t>(results[d]));
501    }
502    printf("};\n");
503    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
504  } else if (!skipped) {
505    // Check the results.
506    VIXL_CHECK(expected_length == results_length);
507    unsigned error_count = 0;
508    unsigned d = 0;
509    for (unsigned n = 0; n < inputs_length; n++) {
510      for (unsigned m = 0; m < inputs_length; m++, d++) {
511        if (results[d] != expected[d]) {
512          if (++error_count > kErrorReportLimit) continue;
513
514          printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
515                 name,
516                 bits / 4,
517                 static_cast<uint64_t>(inputs[n]),
518                 bits / 4,
519                 static_cast<uint64_t>(inputs[m]),
520                 name,
521                 rawbits_to_fp(inputs[n]),
522                 rawbits_to_fp(inputs[m]));
523          printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
524                 bits / 4,
525                 static_cast<uint64_t>(expected[d]),
526                 rawbits_to_fp(expected[d]));
527          printf("  Found:    0x%0*" PRIx64 " (%g)\n",
528                 bits / 4,
529                 static_cast<uint64_t>(results[d]),
530                 rawbits_to_fp(results[d]));
531          printf("\n");
532        }
533      }
534    }
535    VIXL_ASSERT(d == expected_length);
536    if (error_count > kErrorReportLimit) {
537      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
538    }
539    VIXL_CHECK(error_count == 0);
540  }
541  delete[] results;
542}
543
544
545static void Test3Op_Helper(Test3OpFPHelper_t helper,
546                           uintptr_t inputs,
547                           unsigned inputs_length,
548                           uintptr_t results,
549                           unsigned reg_size,
550                           bool* skipped) {
551  VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize) ||
552              (reg_size == kHRegSize));
553
554  SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
555  START();
556
557  // Roll up the loop to keep the code size down.
558  Label loop_n, loop_m, loop_a;
559
560  Register out = x0;
561  Register inputs_base = x1;
562  Register length = w2;
563  Register index_n = w3;
564  Register index_m = w4;
565  Register index_a = w5;
566
567  bool double_op = reg_size == kDRegSize;
568  bool single_op = reg_size == kSRegSize;
569  int index_shift;
570  VRegister fd(0, reg_size);
571  VRegister fn(1, reg_size);
572  VRegister fm(2, reg_size);
573  VRegister fa(3, reg_size);
574  if (double_op) {
575    index_shift = kDRegSizeInBytesLog2;
576  } else if (single_op) {
577    index_shift = kSRegSizeInBytesLog2;
578  } else {
579    index_shift = kHRegSizeInBytesLog2;
580  }
581
582  __ Mov(out, results);
583  __ Mov(inputs_base, inputs);
584  __ Mov(length, inputs_length);
585
586  __ Mov(index_n, 0);
587  __ Bind(&loop_n);
588  __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
589
590  __ Mov(index_m, 0);
591  __ Bind(&loop_m);
592  __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
593
594  __ Mov(index_a, 0);
595  __ Bind(&loop_a);
596  __ Ldr(fa, MemOperand(inputs_base, index_a, UXTW, index_shift));
597
598  {
599    SingleEmissionCheckScope guard(&masm);
600    (masm.*helper)(fd, fn, fm, fa);
601  }
602  __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
603
604  __ Add(index_a, index_a, 1);
605  __ Cmp(index_a, inputs_length);
606  __ B(lo, &loop_a);
607
608  __ Add(index_m, index_m, 1);
609  __ Cmp(index_m, inputs_length);
610  __ B(lo, &loop_m);
611
612  __ Add(index_n, index_n, 1);
613  __ Cmp(index_n, inputs_length);
614  __ B(lo, &loop_n);
615
616  END();
617  TRY_RUN(skipped);
618}
619
620
621// Test FP instructions. The inputs[] and expected[] arrays should be arrays of
622// rawbits representations of doubles or floats. This ensures that exact bit
623// comparisons can be performed.
624template <typename T>
625static void Test3Op(const char* name,
626                    Test3OpFPHelper_t helper,
627                    const T inputs[],
628                    unsigned inputs_length,
629                    const T expected[],
630                    unsigned expected_length) {
631  VIXL_ASSERT(inputs_length > 0);
632
633  const unsigned results_length = inputs_length * inputs_length * inputs_length;
634  T* results = new T[results_length];
635
636  const unsigned bits = sizeof(T) * 8;
637  bool skipped;
638
639  Test3Op_Helper(helper,
640                 reinterpret_cast<uintptr_t>(inputs),
641                 inputs_length,
642                 reinterpret_cast<uintptr_t>(results),
643                 bits,
644                 &skipped);
645
646  if (Test::generate_test_trace()) {
647    // Print the results.
648    printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
649    for (unsigned d = 0; d < results_length; d++) {
650      printf("  0x%0*" PRIx64 ",\n",
651             bits / 4,
652             static_cast<uint64_t>(results[d]));
653    }
654    printf("};\n");
655    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
656  } else if (!skipped) {
657    // Check the results.
658    VIXL_CHECK(expected_length == results_length);
659    unsigned error_count = 0;
660    unsigned d = 0;
661    for (unsigned n = 0; n < inputs_length; n++) {
662      for (unsigned m = 0; m < inputs_length; m++) {
663        for (unsigned a = 0; a < inputs_length; a++, d++) {
664          if (results[d] != expected[d]) {
665            if (++error_count > kErrorReportLimit) continue;
666
667            printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 ", 0x%0*" PRIx64
668                   " (%s %g %g %g):\n",
669                   name,
670                   bits / 4,
671                   static_cast<uint64_t>(inputs[n]),
672                   bits / 4,
673                   static_cast<uint64_t>(inputs[m]),
674                   bits / 4,
675                   static_cast<uint64_t>(inputs[a]),
676                   name,
677                   rawbits_to_fp(inputs[n]),
678                   rawbits_to_fp(inputs[m]),
679                   rawbits_to_fp(inputs[a]));
680            printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
681                   bits / 4,
682                   static_cast<uint64_t>(expected[d]),
683                   rawbits_to_fp(expected[d]));
684            printf("  Found:    0x%0*" PRIx64 " (%g)\n",
685                   bits / 4,
686                   static_cast<uint64_t>(results[d]),
687                   rawbits_to_fp(results[d]));
688            printf("\n");
689          }
690        }
691      }
692    }
693    VIXL_ASSERT(d == expected_length);
694    if (error_count > kErrorReportLimit) {
695      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
696    }
697    VIXL_CHECK(error_count == 0);
698  }
699  delete[] results;
700}
701
702
703static void TestCmp_Helper(TestFPCmpHelper_t helper,
704                           uintptr_t inputs,
705                           unsigned inputs_length,
706                           uintptr_t results,
707                           unsigned reg_size,
708                           bool* skipped) {
709  VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
710
711  SETUP_WITH_FEATURES(CPUFeatures::kFP);
712  START();
713
714  // Roll up the loop to keep the code size down.
715  Label loop_n, loop_m;
716
717  Register out = x0;
718  Register inputs_base = x1;
719  Register length = w2;
720  Register index_n = w3;
721  Register index_m = w4;
722  Register flags = x5;
723
724  bool double_op = reg_size == kDRegSize;
725  const int index_shift =
726      double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
727
728  VRegister fn = double_op ? d1 : s1;
729  VRegister fm = double_op ? d2 : s2;
730
731  __ Mov(out, results);
732  __ Mov(inputs_base, inputs);
733  __ Mov(length, inputs_length);
734
735  __ Mov(index_n, 0);
736  __ Bind(&loop_n);
737  __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
738
739  __ Mov(index_m, 0);
740  __ Bind(&loop_m);
741  __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
742
743  {
744    SingleEmissionCheckScope guard(&masm);
745    (masm.*helper)(fn, fm);
746  }
747  __ Mrs(flags, NZCV);
748  __ Ubfx(flags, flags, 28, 4);
749  __ Strb(flags, MemOperand(out, 1, PostIndex));
750
751  __ Add(index_m, index_m, 1);
752  __ Cmp(index_m, inputs_length);
753  __ B(lo, &loop_m);
754
755  __ Add(index_n, index_n, 1);
756  __ Cmp(index_n, inputs_length);
757  __ B(lo, &loop_n);
758
759  END();
760  TRY_RUN(skipped);
761}
762
763
764// Test FP instructions. The inputs[] and expected[] arrays should be arrays of
765// rawbits representations of doubles or floats. This ensures that exact bit
766// comparisons can be performed.
767template <typename T>
768static void TestCmp(const char* name,
769                    TestFPCmpHelper_t helper,
770                    const T inputs[],
771                    unsigned inputs_length,
772                    const uint8_t expected[],
773                    unsigned expected_length) {
774  VIXL_ASSERT(inputs_length > 0);
775
776  const unsigned results_length = inputs_length * inputs_length;
777  uint8_t* results = new uint8_t[results_length];
778
779  const unsigned bits = sizeof(T) * 8;
780  bool skipped;
781
782  TestCmp_Helper(helper,
783                 reinterpret_cast<uintptr_t>(inputs),
784                 inputs_length,
785                 reinterpret_cast<uintptr_t>(results),
786                 bits,
787                 &skipped);
788
789  if (Test::generate_test_trace()) {
790    // Print the results.
791    printf("const uint8_t kExpected_%s[] = {\n", name);
792    for (unsigned d = 0; d < results_length; d++) {
793      // Each NZCV result only requires 4 bits.
794      VIXL_ASSERT((results[d] & 0xf) == results[d]);
795      printf("  0x%" PRIx8 ",\n", results[d]);
796    }
797    printf("};\n");
798    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
799  } else if (!skipped) {
800    // Check the results.
801    VIXL_CHECK(expected_length == results_length);
802    unsigned error_count = 0;
803    unsigned d = 0;
804    for (unsigned n = 0; n < inputs_length; n++) {
805      for (unsigned m = 0; m < inputs_length; m++, d++) {
806        if (results[d] != expected[d]) {
807          if (++error_count > kErrorReportLimit) continue;
808
809          printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
810                 name,
811                 bits / 4,
812                 static_cast<uint64_t>(inputs[n]),
813                 bits / 4,
814                 static_cast<uint64_t>(inputs[m]),
815                 name,
816                 rawbits_to_fp(inputs[n]),
817                 rawbits_to_fp(inputs[m]));
818          printf("  Expected: %c%c%c%c (0x%" PRIx8 ")\n",
819                 (expected[d] & 0x8) ? 'N' : 'n',
820                 (expected[d] & 0x4) ? 'Z' : 'z',
821                 (expected[d] & 0x2) ? 'C' : 'c',
822                 (expected[d] & 0x1) ? 'V' : 'v',
823                 expected[d]);
824          printf("  Found:    %c%c%c%c (0x%" PRIx8 ")\n",
825                 (results[d] & 0x8) ? 'N' : 'n',
826                 (results[d] & 0x4) ? 'Z' : 'z',
827                 (results[d] & 0x2) ? 'C' : 'c',
828                 (results[d] & 0x1) ? 'V' : 'v',
829                 results[d]);
830          printf("\n");
831        }
832      }
833    }
834    VIXL_ASSERT(d == expected_length);
835    if (error_count > kErrorReportLimit) {
836      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
837    }
838    VIXL_CHECK(error_count == 0);
839  }
840  delete[] results;
841}
842
843
844static void TestCmpZero_Helper(TestFPCmpZeroHelper_t helper,
845                               uintptr_t inputs,
846                               unsigned inputs_length,
847                               uintptr_t results,
848                               unsigned reg_size,
849                               bool* skipped) {
850  VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
851
852  SETUP_WITH_FEATURES(CPUFeatures::kFP);
853  START();
854
855  // Roll up the loop to keep the code size down.
856  Label loop_n, loop_m;
857
858  Register out = x0;
859  Register inputs_base = x1;
860  Register length = w2;
861  Register index_n = w3;
862  Register flags = x4;
863
864  bool double_op = reg_size == kDRegSize;
865  const int index_shift =
866      double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
867
868  VRegister fn = double_op ? d1 : s1;
869
870  __ Mov(out, results);
871  __ Mov(inputs_base, inputs);
872  __ Mov(length, inputs_length);
873
874  __ Mov(index_n, 0);
875  __ Bind(&loop_n);
876  __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
877
878  {
879    SingleEmissionCheckScope guard(&masm);
880    (masm.*helper)(fn, 0.0);
881  }
882  __ Mrs(flags, NZCV);
883  __ Ubfx(flags, flags, 28, 4);
884  __ Strb(flags, MemOperand(out, 1, PostIndex));
885
886  __ Add(index_n, index_n, 1);
887  __ Cmp(index_n, inputs_length);
888  __ B(lo, &loop_n);
889
890  END();
891  TRY_RUN(skipped);
892}
893
894
895// Test FP instructions. The inputs[] and expected[] arrays should be arrays of
896// rawbits representations of doubles or floats. This ensures that exact bit
897// comparisons can be performed.
898template <typename T>
899static void TestCmpZero(const char* name,
900                        TestFPCmpZeroHelper_t helper,
901                        const T inputs[],
902                        unsigned inputs_length,
903                        const uint8_t expected[],
904                        unsigned expected_length) {
905  VIXL_ASSERT(inputs_length > 0);
906
907  const unsigned results_length = inputs_length;
908  uint8_t* results = new uint8_t[results_length];
909
910  const unsigned bits = sizeof(T) * 8;
911  bool skipped;
912
913  TestCmpZero_Helper(helper,
914                     reinterpret_cast<uintptr_t>(inputs),
915                     inputs_length,
916                     reinterpret_cast<uintptr_t>(results),
917                     bits,
918                     &skipped);
919
920  if (Test::generate_test_trace()) {
921    // Print the results.
922    printf("const uint8_t kExpected_%s[] = {\n", name);
923    for (unsigned d = 0; d < results_length; d++) {
924      // Each NZCV result only requires 4 bits.
925      VIXL_ASSERT((results[d] & 0xf) == results[d]);
926      printf("  0x%" PRIx8 ",\n", results[d]);
927    }
928    printf("};\n");
929    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
930  } else if (!skipped) {
931    // Check the results.
932    VIXL_CHECK(expected_length == results_length);
933    unsigned error_count = 0;
934    unsigned d = 0;
935    for (unsigned n = 0; n < inputs_length; n++, d++) {
936      if (results[d] != expected[d]) {
937        if (++error_count > kErrorReportLimit) continue;
938
939        printf("%s 0x%0*" PRIx64 ", 0x%0*u (%s %g #0.0):\n",
940               name,
941               bits / 4,
942               static_cast<uint64_t>(inputs[n]),
943               bits / 4,
944               0,
945               name,
946               rawbits_to_fp(inputs[n]));
947        printf("  Expected: %c%c%c%c (0x%" PRIx8 ")\n",
948               (expected[d] & 0x8) ? 'N' : 'n',
949               (expected[d] & 0x4) ? 'Z' : 'z',
950               (expected[d] & 0x2) ? 'C' : 'c',
951               (expected[d] & 0x1) ? 'V' : 'v',
952               expected[d]);
953        printf("  Found:    %c%c%c%c (0x%" PRIx8 ")\n",
954               (results[d] & 0x8) ? 'N' : 'n',
955               (results[d] & 0x4) ? 'Z' : 'z',
956               (results[d] & 0x2) ? 'C' : 'c',
957               (results[d] & 0x1) ? 'V' : 'v',
958               results[d]);
959        printf("\n");
960      }
961    }
962    VIXL_ASSERT(d == expected_length);
963    if (error_count > kErrorReportLimit) {
964      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
965    }
966    VIXL_CHECK(error_count == 0);
967  }
968  delete[] results;
969}
970
971
972static void TestFPToFixed_Helper(TestFPToFixedHelper_t helper,
973                                 uintptr_t inputs,
974                                 unsigned inputs_length,
975                                 uintptr_t results,
976                                 unsigned d_size,
977                                 unsigned n_size,
978                                 bool* skipped) {
979  VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
980  VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) ||
981              (n_size == kHRegSize));
982
983  SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
984  START();
985
986  // Roll up the loop to keep the code size down.
987  Label loop_n;
988
989  Register out = x0;
990  Register inputs_base = x1;
991  Register length = w2;
992  Register index_n = w3;
993
994  int n_index_shift;
995  if (n_size == kDRegSize) {
996    n_index_shift = kDRegSizeInBytesLog2;
997  } else if (n_size == kSRegSize) {
998    n_index_shift = kSRegSizeInBytesLog2;
999  } else {
1000    n_index_shift = kHRegSizeInBytesLog2;
1001  }
1002
1003  Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10);
1004  VRegister fn;
1005  if (n_size == kDRegSize) {
1006    fn = d1;
1007  } else if (n_size == kSRegSize) {
1008    fn = s1;
1009  } else {
1010    fn = h1;
1011  }
1012
1013  __ Mov(out, results);
1014  __ Mov(inputs_base, inputs);
1015  __ Mov(length, inputs_length);
1016
1017  __ Mov(index_n, 0);
1018  __ Bind(&loop_n);
1019  __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
1020
1021  for (unsigned fbits = 0; fbits <= d_size; ++fbits) {
1022    {
1023      SingleEmissionCheckScope guard(&masm);
1024      (masm.*helper)(rd, fn, fbits);
1025    }
1026    __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex));
1027  }
1028
1029  __ Add(index_n, index_n, 1);
1030  __ Cmp(index_n, inputs_length);
1031  __ B(lo, &loop_n);
1032
1033  END();
1034  TRY_RUN(skipped);
1035}
1036
1037
1038static void TestFPToInt_Helper(TestFPToIntHelper_t helper,
1039                               uintptr_t inputs,
1040                               unsigned inputs_length,
1041                               uintptr_t results,
1042                               unsigned d_size,
1043                               unsigned n_size,
1044                               bool* skipped) {
1045  VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
1046  VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) ||
1047              (n_size == kHRegSize));
1048
1049  SETUP_WITH_FEATURES(CPUFeatures::kFP,
1050                      CPUFeatures::kFPHalf,
1051                      CPUFeatures::kJSCVT);
1052  START();
1053
1054  // Roll up the loop to keep the code size down.
1055  Label loop_n;
1056
1057  Register out = x0;
1058  Register inputs_base = x1;
1059  Register length = w2;
1060  Register index_n = w3;
1061
1062  int n_index_shift;
1063  if (n_size == kDRegSize) {
1064    n_index_shift = kDRegSizeInBytesLog2;
1065  } else if (n_size == kSRegSize) {
1066    n_index_shift = kSRegSizeInBytesLog2;
1067  } else {
1068    n_index_shift = kHRegSizeInBytesLog2;
1069  }
1070
1071  Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10);
1072  VRegister fn;
1073  if (n_size == kDRegSize) {
1074    fn = d1;
1075  } else if (n_size == kSRegSize) {
1076    fn = s1;
1077  } else {
1078    fn = h1;
1079  }
1080
1081  __ Mov(out, results);
1082  __ Mov(inputs_base, inputs);
1083  __ Mov(length, inputs_length);
1084
1085  __ Mov(index_n, 0);
1086  __ Bind(&loop_n);
1087  __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
1088
1089  {
1090    SingleEmissionCheckScope guard(&masm);
1091    (masm.*helper)(rd, fn);
1092  }
1093  __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex));
1094
1095  __ Add(index_n, index_n, 1);
1096  __ Cmp(index_n, inputs_length);
1097  __ B(lo, &loop_n);
1098
1099  END();
1100  TRY_RUN(skipped);
1101}
1102
1103
1104// Test FP instructions.
1105//  - The inputs[] array should be an array of rawbits representations of
1106//    doubles or floats. This ensures that exact bit comparisons can be
1107//    performed.
1108//  - The expected[] array should be an array of signed integers.
1109template <typename Tn, typename Td>
1110static void TestFPToS(const char* name,
1111                      TestFPToIntHelper_t helper,
1112                      const Tn inputs[],
1113                      unsigned inputs_length,
1114                      const Td expected[],
1115                      unsigned expected_length) {
1116  VIXL_ASSERT(inputs_length > 0);
1117
1118  const unsigned results_length = inputs_length;
1119  Td* results = new Td[results_length];
1120
1121  const unsigned d_bits = sizeof(Td) * 8;
1122  const unsigned n_bits = sizeof(Tn) * 8;
1123  bool skipped;
1124
1125  TestFPToInt_Helper(helper,
1126                     reinterpret_cast<uintptr_t>(inputs),
1127                     inputs_length,
1128                     reinterpret_cast<uintptr_t>(results),
1129                     d_bits,
1130                     n_bits,
1131                     &skipped);
1132
1133  if (Test::generate_test_trace()) {
1134    // Print the results.
1135    printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
1136    // There is no simple C++ literal for INT*_MIN that doesn't produce
1137    // warnings, so we use an appropriate constant in that case instead.
1138    // Deriving int_d_min in this way (rather than just checking INT64_MIN and
1139    // the like) avoids warnings about comparing values with differing ranges.
1140    const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
1141    const int64_t int_d_min = -(int_d_max)-1;
1142    for (unsigned d = 0; d < results_length; d++) {
1143      if (results[d] == int_d_min) {
1144        printf("  -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
1145      } else {
1146        // Some constants (such as those between INT32_MAX and UINT32_MAX)
1147        // trigger compiler warnings. To avoid these warnings, use an
1148        // appropriate macro to make the type explicit.
1149        int64_t result_int64 = static_cast<int64_t>(results[d]);
1150        if (result_int64 >= 0) {
1151          printf("  INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
1152        } else {
1153          printf("  -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
1154        }
1155      }
1156    }
1157    printf("};\n");
1158    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1159  } else if (!skipped) {
1160    // Check the results.
1161    VIXL_CHECK(expected_length == results_length);
1162    unsigned error_count = 0;
1163    unsigned d = 0;
1164    for (unsigned n = 0; n < inputs_length; n++, d++) {
1165      if (results[d] != expected[d]) {
1166        if (++error_count > kErrorReportLimit) continue;
1167
1168        printf("%s 0x%0*" PRIx64 " (%s %g):\n",
1169               name,
1170               n_bits / 4,
1171               static_cast<uint64_t>(inputs[n]),
1172               name,
1173               rawbits_to_fp(inputs[n]));
1174        printf("  Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1175               d_bits / 4,
1176               static_cast<uint64_t>(expected[d]),
1177               static_cast<int64_t>(expected[d]));
1178        printf("  Found:    0x%0*" PRIx64 " (%" PRId64 ")\n",
1179               d_bits / 4,
1180               static_cast<uint64_t>(results[d]),
1181               static_cast<int64_t>(results[d]));
1182        printf("\n");
1183      }
1184    }
1185    VIXL_ASSERT(d == expected_length);
1186    if (error_count > kErrorReportLimit) {
1187      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1188    }
1189    VIXL_CHECK(error_count == 0);
1190  }
1191  delete[] results;
1192}
1193
1194
1195// Test FP instructions.
1196//  - The inputs[] array should be an array of rawbits representations of
1197//    doubles or floats. This ensures that exact bit comparisons can be
1198//    performed.
1199//  - The expected[] array should be an array of unsigned integers.
1200template <typename Tn, typename Td>
1201static void TestFPToU(const char* name,
1202                      TestFPToIntHelper_t helper,
1203                      const Tn inputs[],
1204                      unsigned inputs_length,
1205                      const Td expected[],
1206                      unsigned expected_length) {
1207  VIXL_ASSERT(inputs_length > 0);
1208
1209  const unsigned results_length = inputs_length;
1210  Td* results = new Td[results_length];
1211
1212  const unsigned d_bits = sizeof(Td) * 8;
1213  const unsigned n_bits = sizeof(Tn) * 8;
1214  bool skipped;
1215
1216  TestFPToInt_Helper(helper,
1217                     reinterpret_cast<uintptr_t>(inputs),
1218                     inputs_length,
1219                     reinterpret_cast<uintptr_t>(results),
1220                     d_bits,
1221                     n_bits,
1222                     &skipped);
1223
1224  if (Test::generate_test_trace()) {
1225    // Print the results.
1226    printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
1227    for (unsigned d = 0; d < results_length; d++) {
1228      printf("  %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
1229    }
1230    printf("};\n");
1231    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1232  } else if (!skipped) {
1233    // Check the results.
1234    VIXL_CHECK(expected_length == results_length);
1235    unsigned error_count = 0;
1236    unsigned d = 0;
1237    for (unsigned n = 0; n < inputs_length; n++, d++) {
1238      if (results[d] != expected[d]) {
1239        if (++error_count > kErrorReportLimit) continue;
1240
1241        printf("%s 0x%0*" PRIx64 " (%s %g):\n",
1242               name,
1243               n_bits / 4,
1244               static_cast<uint64_t>(inputs[n]),
1245               name,
1246               rawbits_to_fp(inputs[n]));
1247        printf("  Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1248               d_bits / 4,
1249               static_cast<uint64_t>(expected[d]),
1250               static_cast<uint64_t>(expected[d]));
1251        printf("  Found:    0x%0*" PRIx64 " (%" PRIu64 ")\n",
1252               d_bits / 4,
1253               static_cast<uint64_t>(results[d]),
1254               static_cast<uint64_t>(results[d]));
1255        printf("\n");
1256      }
1257    }
1258    VIXL_ASSERT(d == expected_length);
1259    if (error_count > kErrorReportLimit) {
1260      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1261    }
1262    VIXL_CHECK(error_count == 0);
1263  }
1264  delete[] results;
1265}
1266
1267
1268// Test FP instructions.
1269//  - The inputs[] array should be an array of rawbits representations of
1270//    doubles or floats. This ensures that exact bit comparisons can be
1271//    performed.
1272//  - The expected[] array should be an array of signed integers.
1273template <typename Tn, typename Td>
1274static void TestFPToFixedS(const char* name,
1275                           TestFPToFixedHelper_t helper,
1276                           const Tn inputs[],
1277                           unsigned inputs_length,
1278                           const Td expected[],
1279                           unsigned expected_length) {
1280  VIXL_ASSERT(inputs_length > 0);
1281
1282  const unsigned d_bits = sizeof(Td) * 8;
1283  const unsigned n_bits = sizeof(Tn) * 8;
1284
1285  const unsigned results_length = inputs_length * (d_bits + 1);
1286  Td* results = new Td[results_length];
1287
1288  bool skipped;
1289
1290  TestFPToFixed_Helper(helper,
1291                       reinterpret_cast<uintptr_t>(inputs),
1292                       inputs_length,
1293                       reinterpret_cast<uintptr_t>(results),
1294                       d_bits,
1295                       n_bits,
1296                       &skipped);
1297
1298  if (Test::generate_test_trace()) {
1299    // Print the results.
1300    printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
1301    // There is no simple C++ literal for INT*_MIN that doesn't produce
1302    // warnings, so we use an appropriate constant in that case instead.
1303    // Deriving int_d_min in this way (rather than just checking INT64_MIN and
1304    // the like) avoids warnings about comparing values with differing ranges.
1305    const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
1306    const int64_t int_d_min = -(int_d_max)-1;
1307    for (unsigned d = 0; d < results_length; d++) {
1308      if (results[d] == int_d_min) {
1309        printf("  -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
1310      } else {
1311        // Some constants (such as those between INT32_MAX and UINT32_MAX)
1312        // trigger compiler warnings. To avoid these warnings, use an
1313        // appropriate macro to make the type explicit.
1314        int64_t result_int64 = static_cast<int64_t>(results[d]);
1315        if (result_int64 >= 0) {
1316          printf("  INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
1317        } else {
1318          printf("  -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
1319        }
1320      }
1321    }
1322    printf("};\n");
1323    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1324  } else if (!skipped) {
1325    // Check the results.
1326    VIXL_CHECK(expected_length == results_length);
1327    unsigned error_count = 0;
1328    unsigned d = 0;
1329    for (unsigned n = 0; n < inputs_length; n++) {
1330      for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
1331        if (results[d] != expected[d]) {
1332          if (++error_count > kErrorReportLimit) continue;
1333
1334          printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
1335                 name,
1336                 n_bits / 4,
1337                 static_cast<uint64_t>(inputs[n]),
1338                 fbits,
1339                 name,
1340                 rawbits_to_fp(inputs[n]),
1341                 fbits);
1342          printf("  Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1343                 d_bits / 4,
1344                 static_cast<uint64_t>(expected[d]),
1345                 static_cast<int64_t>(expected[d]));
1346          printf("  Found:    0x%0*" PRIx64 " (%" PRId64 ")\n",
1347                 d_bits / 4,
1348                 static_cast<uint64_t>(results[d]),
1349                 static_cast<int64_t>(results[d]));
1350          printf("\n");
1351        }
1352      }
1353    }
1354    VIXL_ASSERT(d == expected_length);
1355    if (error_count > kErrorReportLimit) {
1356      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1357    }
1358    VIXL_CHECK(error_count == 0);
1359  }
1360  delete[] results;
1361}
1362
1363
1364// Test FP instructions.
1365//  - The inputs[] array should be an array of rawbits representations of
1366//    doubles or floats. This ensures that exact bit comparisons can be
1367//    performed.
1368//  - The expected[] array should be an array of unsigned integers.
1369template <typename Tn, typename Td>
1370static void TestFPToFixedU(const char* name,
1371                           TestFPToFixedHelper_t helper,
1372                           const Tn inputs[],
1373                           unsigned inputs_length,
1374                           const Td expected[],
1375                           unsigned expected_length) {
1376  VIXL_ASSERT(inputs_length > 0);
1377
1378  const unsigned d_bits = sizeof(Td) * 8;
1379  const unsigned n_bits = sizeof(Tn) * 8;
1380
1381  const unsigned results_length = inputs_length * (d_bits + 1);
1382  Td* results = new Td[results_length];
1383
1384  bool skipped;
1385
1386  TestFPToFixed_Helper(helper,
1387                       reinterpret_cast<uintptr_t>(inputs),
1388                       inputs_length,
1389                       reinterpret_cast<uintptr_t>(results),
1390                       d_bits,
1391                       n_bits,
1392                       &skipped);
1393
1394  if (Test::generate_test_trace()) {
1395    // Print the results.
1396    printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
1397    for (unsigned d = 0; d < results_length; d++) {
1398      printf("  %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
1399    }
1400    printf("};\n");
1401    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1402  } else if (!skipped) {
1403    // Check the results.
1404    VIXL_CHECK(expected_length == results_length);
1405    unsigned error_count = 0;
1406    unsigned d = 0;
1407    for (unsigned n = 0; n < inputs_length; n++) {
1408      for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
1409        if (results[d] != expected[d]) {
1410          if (++error_count > kErrorReportLimit) continue;
1411
1412          printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
1413                 name,
1414                 n_bits / 4,
1415                 static_cast<uint64_t>(inputs[n]),
1416                 fbits,
1417                 name,
1418                 rawbits_to_fp(inputs[n]),
1419                 fbits);
1420          printf("  Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1421                 d_bits / 4,
1422                 static_cast<uint64_t>(expected[d]),
1423                 static_cast<uint64_t>(expected[d]));
1424          printf("  Found:    0x%0*" PRIx64 " (%" PRIu64 ")\n",
1425                 d_bits / 4,
1426                 static_cast<uint64_t>(results[d]),
1427                 static_cast<uint64_t>(results[d]));
1428          printf("\n");
1429        }
1430      }
1431    }
1432    VIXL_ASSERT(d == expected_length);
1433    if (error_count > kErrorReportLimit) {
1434      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1435    }
1436    VIXL_CHECK(error_count == 0);
1437  }
1438  delete[] results;
1439}
1440
1441
1442// ==== Tests for instructions of the form <INST> VReg, VReg. ====
1443
1444
1445static void Test1OpNEON_Helper(Test1OpNEONHelper_t helper,
1446                               uintptr_t inputs_n,
1447                               unsigned inputs_n_length,
1448                               uintptr_t results,
1449                               VectorFormat vd_form,
1450                               VectorFormat vn_form,
1451                               bool* skipped) {
1452  VIXL_ASSERT(vd_form != kFormatUndefined);
1453  VIXL_ASSERT(vn_form != kFormatUndefined);
1454
1455  CPUFeatures features;
1456  features.Combine(CPUFeatures::kNEON,
1457                   CPUFeatures::kFP,
1458                   CPUFeatures::kRDM,
1459                   CPUFeatures::kNEONHalf);
1460  // For frint{32,64}{x,y} variants.
1461  features.Combine(CPUFeatures::kFrintToFixedSizedInt);
1462  SETUP_WITH_FEATURES(features);
1463  START();
1464
1465  // Roll up the loop to keep the code size down.
1466  Label loop_n;
1467
1468  Register out = x0;
1469  Register inputs_n_base = x1;
1470  Register inputs_n_last_16bytes = x3;
1471  Register index_n = x5;
1472
1473  // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1474  const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1475  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1476
1477  const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1478  const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1479  const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1480  const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1481  const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1482
1483
1484  // These will be either a D- or a Q-register form, with a single lane
1485  // (for use in scalar load and store operations).
1486  VRegister vd = VRegister(0, vd_bits);
1487  VRegister vn = v1.V16B();
1488  VRegister vntmp = v3.V16B();
1489
1490  // These will have the correct format for use when calling 'helper'.
1491  VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
1492  VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1493
1494  // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1495  VRegister vntmp_single = VRegister(3, vn_lane_bits);
1496
1497  __ Mov(out, results);
1498
1499  __ Mov(inputs_n_base, inputs_n);
1500  __ Mov(inputs_n_last_16bytes,
1501         inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
1502
1503  __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1504
1505  __ Mov(index_n, 0);
1506  __ Bind(&loop_n);
1507
1508  __ Ldr(vntmp_single,
1509         MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1510  __ Ext(vn, vn, vntmp, vn_lane_bytes);
1511
1512  // Set the destination to zero.
1513  // TODO: Setting the destination to values other than zero
1514  //       might be a better test for instructions such as sqxtn2
1515  //       which may leave parts of V registers unchanged.
1516  __ Movi(vd.V16B(), 0);
1517
1518  {
1519    SingleEmissionCheckScope guard(&masm);
1520    (masm.*helper)(vd_helper, vn_helper);
1521  }
1522  __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
1523
1524  __ Add(index_n, index_n, 1);
1525  __ Cmp(index_n, inputs_n_length);
1526  __ B(lo, &loop_n);
1527
1528  END();
1529  TRY_RUN(skipped);
1530}
1531
1532
1533// Test NEON instructions. The inputs_*[] and expected[] arrays should be
1534// arrays of rawbit representation of input values. This ensures that
1535// exact bit comparisons can be performed.
1536template <typename Td, typename Tn>
1537static void Test1OpNEON(const char* name,
1538                        Test1OpNEONHelper_t helper,
1539                        const Tn inputs_n[],
1540                        unsigned inputs_n_length,
1541                        const Td expected[],
1542                        unsigned expected_length,
1543                        VectorFormat vd_form,
1544                        VectorFormat vn_form) {
1545  VIXL_ASSERT(inputs_n_length > 0);
1546
1547  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1548  const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1549  const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1550
1551  const unsigned results_length = inputs_n_length;
1552  Td* results = new Td[results_length * vd_lane_count];
1553  const unsigned lane_bit = sizeof(Td) * 8;
1554  const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1555
1556  bool skipped;
1557
1558  Test1OpNEON_Helper(helper,
1559                     reinterpret_cast<uintptr_t>(inputs_n),
1560                     inputs_n_length,
1561                     reinterpret_cast<uintptr_t>(results),
1562                     vd_form,
1563                     vn_form,
1564                     &skipped);
1565
1566  if (Test::generate_test_trace()) {
1567    // Print the results.
1568    printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1569    for (unsigned iteration = 0; iteration < results_length; iteration++) {
1570      printf(" ");
1571      // Output a separate result for each element of the result vector.
1572      for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1573        unsigned index = lane + (iteration * vd_lane_count);
1574        printf(" 0x%0*" PRIx64 ",",
1575               lane_len_in_hex,
1576               static_cast<uint64_t>(results[index]));
1577      }
1578      printf("\n");
1579    }
1580
1581    printf("};\n");
1582    printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1583           name,
1584           results_length);
1585  } else if (!skipped) {
1586    // Check the results.
1587    VIXL_CHECK(expected_length == results_length);
1588    unsigned error_count = 0;
1589    unsigned d = 0;
1590    const char* padding = "                    ";
1591    VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1592    for (unsigned n = 0; n < inputs_n_length; n++, d++) {
1593      bool error_in_vector = false;
1594
1595      for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1596        unsigned output_index = (n * vd_lane_count) + lane;
1597
1598        if (results[output_index] != expected[output_index]) {
1599          error_in_vector = true;
1600          break;
1601        }
1602      }
1603
1604      if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1605        printf("%s\n", name);
1606        printf(" Vn%.*s| Vd%.*s| Expected\n",
1607               lane_len_in_hex + 1,
1608               padding,
1609               lane_len_in_hex + 1,
1610               padding);
1611
1612        const unsigned first_index_n =
1613            inputs_n_length - (16 / vn_lane_bytes) + n + 1;
1614
1615        for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
1616             lane++) {
1617          unsigned output_index = (n * vd_lane_count) + lane;
1618          unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
1619
1620          printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64
1621                 " "
1622                 "| 0x%0*" PRIx64 "\n",
1623                 results[output_index] != expected[output_index] ? '*' : ' ',
1624                 lane_len_in_hex,
1625                 static_cast<uint64_t>(inputs_n[input_index_n]),
1626                 lane_len_in_hex,
1627                 static_cast<uint64_t>(results[output_index]),
1628                 lane_len_in_hex,
1629                 static_cast<uint64_t>(expected[output_index]));
1630        }
1631      }
1632    }
1633    VIXL_ASSERT(d == expected_length);
1634    if (error_count > kErrorReportLimit) {
1635      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1636    }
1637    VIXL_CHECK(error_count == 0);
1638  }
1639  delete[] results;
1640}
1641
1642
1643// ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ====
1644//      where <V> is one of B, H, S or D registers.
1645//      e.g. saddlv H1, v0.8B
1646
1647// TODO: Change tests to store all lanes of the resulting V register.
1648//       Some tests store all 128 bits of the resulting V register to
1649//       check the simulator's behaviour on the rest of the register.
1650//       This is better than storing the affected lanes only.
1651//       Change any tests such as the 'Across' template to do the same.
1652
1653static void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper,
1654                                     uintptr_t inputs_n,
1655                                     unsigned inputs_n_length,
1656                                     uintptr_t results,
1657                                     VectorFormat vd_form,
1658                                     VectorFormat vn_form,
1659                                     bool* skipped) {
1660  VIXL_ASSERT(vd_form != kFormatUndefined);
1661  VIXL_ASSERT(vn_form != kFormatUndefined);
1662
1663  SETUP_WITH_FEATURES(CPUFeatures::kNEON,
1664                      CPUFeatures::kFP,
1665                      CPUFeatures::kNEONHalf);
1666  START();
1667
1668  // Roll up the loop to keep the code size down.
1669  Label loop_n;
1670
1671  Register out = x0;
1672  Register inputs_n_base = x1;
1673  Register inputs_n_last_vector = x3;
1674  Register index_n = x5;
1675
1676  // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1677  const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1678  const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1679  const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1680  const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1681  const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1682  const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1683
1684  // Test destructive operations by (arbitrarily) using the same register for
1685  // B and S lane sizes.
1686  bool destructive = (vd_bits == kBRegSize) || (vd_bits == kSRegSize);
1687
1688  // Create two aliases for v0; the first is the destination for the tested
1689  // instruction, the second, the whole Q register to check the results.
1690  VRegister vd = VRegister(0, vd_bits);
1691  VRegister vdstr = VRegister(0, kQRegSize);
1692
1693  VRegister vn = VRegister(1, vn_bits);
1694  VRegister vntmp = VRegister(3, vn_bits);
1695
1696  // These will have the correct format for use when calling 'helper'.
1697  VRegister vd_helper = VRegister(0, vn_bits, vn_lane_count);
1698  VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1699
1700  // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1701  VRegister vntmp_single = VRegister(3, vn_lane_bits);
1702
1703  // Same registers for use in the 'ext' instructions.
1704  VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
1705  VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
1706
1707  __ Mov(out, results);
1708
1709  __ Mov(inputs_n_base, inputs_n);
1710  __ Mov(inputs_n_last_vector,
1711         inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
1712
1713  __ Ldr(vn, MemOperand(inputs_n_last_vector));
1714
1715  __ Mov(index_n, 0);
1716  __ Bind(&loop_n);
1717
1718  __ Ldr(vntmp_single,
1719         MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1720  __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
1721
1722  if (destructive) {
1723    __ Mov(vd_helper, vn_helper);
1724    SingleEmissionCheckScope guard(&masm);
1725    (masm.*helper)(vd, vd_helper);
1726  } else {
1727    SingleEmissionCheckScope guard(&masm);
1728    (masm.*helper)(vd, vn_helper);
1729  }
1730
1731  __ Str(vdstr, MemOperand(out, kQRegSizeInBytes, PostIndex));
1732
1733  __ Add(index_n, index_n, 1);
1734  __ Cmp(index_n, inputs_n_length);
1735  __ B(lo, &loop_n);
1736
1737  END();
1738  TRY_RUN(skipped);
1739}
1740
1741// Test NEON instructions. The inputs_*[] and expected[] arrays should be
1742// arrays of rawbit representation of input values. This ensures that
1743// exact bit comparisons can be performed.
1744template <typename Td, typename Tn>
1745static void Test1OpAcrossNEON(const char* name,
1746                              Test1OpNEONHelper_t helper,
1747                              const Tn inputs_n[],
1748                              unsigned inputs_n_length,
1749                              const Td expected[],
1750                              unsigned expected_length,
1751                              VectorFormat vd_form,
1752                              VectorFormat vn_form) {
1753  VIXL_ASSERT(inputs_n_length > 0);
1754
1755  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1756  const unsigned vd_lanes_per_q = MaxLaneCountFromFormat(vd_form);
1757
1758  const unsigned results_length = inputs_n_length;
1759  Td* results = new Td[results_length * vd_lanes_per_q];
1760  const unsigned lane_bit = sizeof(Td) * 8;
1761  const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1762
1763  bool skipped;
1764
1765  Test1OpAcrossNEON_Helper(helper,
1766                           reinterpret_cast<uintptr_t>(inputs_n),
1767                           inputs_n_length,
1768                           reinterpret_cast<uintptr_t>(results),
1769                           vd_form,
1770                           vn_form,
1771                           &skipped);
1772
1773  if (Test::generate_test_trace()) {
1774    // Print the results.
1775    printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1776    for (unsigned iteration = 0; iteration < results_length; iteration++) {
1777      printf(" ");
1778      // Output a separate result for each element of the result vector.
1779      for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1780        unsigned index = lane + (iteration * vd_lanes_per_q);
1781        printf(" 0x%0*" PRIx64 ",",
1782               lane_len_in_hex,
1783               static_cast<uint64_t>(results[index]));
1784      }
1785      printf("\n");
1786    }
1787
1788    printf("};\n");
1789    printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1790           name,
1791           results_length);
1792  } else if (!skipped) {
1793    // Check the results.
1794    VIXL_CHECK(expected_length == results_length);
1795    unsigned error_count = 0;
1796    unsigned d = 0;
1797    const char* padding = "                    ";
1798    VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1799    for (unsigned n = 0; n < inputs_n_length; n++, d++) {
1800      bool error_in_vector = false;
1801
1802      for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1803        unsigned expected_index = (n * vd_lane_count) + lane;
1804        unsigned results_index = (n * vd_lanes_per_q) + lane;
1805
1806        if (results[results_index] != expected[expected_index]) {
1807          error_in_vector = true;
1808          break;
1809        }
1810      }
1811
1812      // For across operations, the remaining lanes should be zero.
1813      for (unsigned lane = vd_lane_count; lane < vd_lanes_per_q; lane++) {
1814        unsigned results_index = (n * vd_lanes_per_q) + lane;
1815        if (results[results_index] != 0) {
1816          error_in_vector = true;
1817          break;
1818        }
1819      }
1820
1821      if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1822        const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1823
1824        printf("%s\n", name);
1825        printf(" Vn%.*s| Vd%.*s| Expected\n",
1826               lane_len_in_hex + 1,
1827               padding,
1828               lane_len_in_hex + 1,
1829               padding);
1830
1831        // TODO: In case of an error, all tests print out as many elements as
1832        //       there are lanes in the output or input vectors. This way
1833        //       the viewer can read all the values that were needed for the
1834        //       operation but the output contains also unnecessary values.
1835        //       These prints can be improved according to the arguments
1836        //       passed to test functions.
1837        //       This output for the 'Across' category has the required
1838        //       modifications.
1839        for (unsigned lane = 0; lane < vn_lane_count; lane++) {
1840          unsigned results_index =
1841              (n * vd_lanes_per_q) + ((vn_lane_count - 1) - lane);
1842          unsigned input_index_n =
1843              (inputs_n_length - vn_lane_count + n + 1 + lane) %
1844              inputs_n_length;
1845
1846          Td expect = 0;
1847          if ((vn_lane_count - 1) == lane) {
1848            // This is the last lane to be printed, ie. the least-significant
1849            // lane, so use the expected value; any other lane should be zero.
1850            unsigned expected_index = n * vd_lane_count;
1851            expect = expected[expected_index];
1852          }
1853          printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
1854                 results[results_index] != expect ? '*' : ' ',
1855                 lane_len_in_hex,
1856                 static_cast<uint64_t>(inputs_n[input_index_n]),
1857                 lane_len_in_hex,
1858                 static_cast<uint64_t>(results[results_index]),
1859                 lane_len_in_hex,
1860                 static_cast<uint64_t>(expect));
1861        }
1862      }
1863    }
1864    VIXL_ASSERT(d == expected_length);
1865    if (error_count > kErrorReportLimit) {
1866      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1867    }
1868    VIXL_CHECK(error_count == 0);
1869  }
1870  delete[] results;
1871}
1872
1873
1874// ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ====
1875
1876// TODO: Iterate over inputs_d once the traces file is split.
1877
1878static void Test2OpNEON_Helper(Test2OpNEONHelper_t helper,
1879                               uintptr_t inputs_d,
1880                               uintptr_t inputs_n,
1881                               unsigned inputs_n_length,
1882                               uintptr_t inputs_m,
1883                               unsigned inputs_m_length,
1884                               uintptr_t results,
1885                               VectorFormat vd_form,
1886                               VectorFormat vn_form,
1887                               VectorFormat vm_form,
1888                               bool* skipped) {
1889  VIXL_ASSERT(vd_form != kFormatUndefined);
1890  VIXL_ASSERT(vn_form != kFormatUndefined);
1891  VIXL_ASSERT(vm_form != kFormatUndefined);
1892
1893  CPUFeatures features;
1894  features.Combine(CPUFeatures::kNEON, CPUFeatures::kNEONHalf);
1895  features.Combine(CPUFeatures::kFP);
1896  features.Combine(CPUFeatures::kRDM);
1897  features.Combine(CPUFeatures::kDotProduct);
1898  features.Combine(CPUFeatures::kFHM);
1899  SETUP_WITH_FEATURES(features);
1900  START();
1901
1902  // Roll up the loop to keep the code size down.
1903  Label loop_n, loop_m;
1904
1905  Register out = x0;
1906  Register inputs_n_base = x1;
1907  Register inputs_m_base = x2;
1908  Register inputs_d_base = x3;
1909  Register inputs_n_last_16bytes = x4;
1910  Register inputs_m_last_16bytes = x5;
1911  Register index_n = x6;
1912  Register index_m = x7;
1913
1914  // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1915  const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1916  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1917
1918  const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1919  const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1920  const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1921  const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1922  const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1923
1924  const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
1925  const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
1926  const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
1927  const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
1928  const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
1929
1930
1931  // Always load and store 128 bits regardless of the format.
1932  VRegister vd = v0.V16B();
1933  VRegister vn = v1.V16B();
1934  VRegister vm = v2.V16B();
1935  VRegister vntmp = v3.V16B();
1936  VRegister vmtmp = v4.V16B();
1937  VRegister vres = v5.V16B();
1938
1939  // These will have the correct format for calling the 'helper'.
1940  VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1941  VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count);
1942  VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
1943
1944  // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1945  VRegister vntmp_single = VRegister(3, vn_lane_bits);
1946  VRegister vmtmp_single = VRegister(4, vm_lane_bits);
1947
1948  __ Mov(out, results);
1949
1950  __ Mov(inputs_d_base, inputs_d);
1951
1952  __ Mov(inputs_n_base, inputs_n);
1953  __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
1954  __ Mov(inputs_m_base, inputs_m);
1955  __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
1956
1957  __ Ldr(vd, MemOperand(inputs_d_base));
1958  __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1959  __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
1960
1961  __ Mov(index_n, 0);
1962  __ Bind(&loop_n);
1963
1964  __ Ldr(vntmp_single,
1965         MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1966  __ Ext(vn, vn, vntmp, vn_lane_bytes);
1967
1968  __ Mov(index_m, 0);
1969  __ Bind(&loop_m);
1970
1971  __ Ldr(vmtmp_single,
1972         MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
1973  __ Ext(vm, vm, vmtmp, vm_lane_bytes);
1974
1975  __ Mov(vres, vd);
1976  {
1977    SingleEmissionCheckScope guard(&masm);
1978    (masm.*helper)(vres_helper, vn_helper, vm_helper);
1979  }
1980  __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
1981
1982  __ Add(index_m, index_m, 1);
1983  __ Cmp(index_m, inputs_m_length);
1984  __ B(lo, &loop_m);
1985
1986  __ Add(index_n, index_n, 1);
1987  __ Cmp(index_n, inputs_n_length);
1988  __ B(lo, &loop_n);
1989
1990  END();
1991  TRY_RUN(skipped);
1992}
1993
1994
1995// Test NEON instructions. The inputs_*[] and expected[] arrays should be
1996// arrays of rawbit representation of input values. This ensures that
1997// exact bit comparisons can be performed.
1998template <typename Td, typename Tn, typename Tm>
1999static void Test2OpNEON(const char* name,
2000                        Test2OpNEONHelper_t helper,
2001                        const Td inputs_d[],
2002                        const Tn inputs_n[],
2003                        unsigned inputs_n_length,
2004                        const Tm inputs_m[],
2005                        unsigned inputs_m_length,
2006                        const Td expected[],
2007                        unsigned expected_length,
2008                        VectorFormat vd_form,
2009                        VectorFormat vn_form,
2010                        VectorFormat vm_form) {
2011  VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
2012
2013  const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
2014
2015  const unsigned results_length = inputs_n_length * inputs_m_length;
2016  Td* results = new Td[results_length * vd_lane_count];
2017  const unsigned lane_bit = sizeof(Td) * 8;
2018  const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
2019
2020  bool skipped;
2021
2022  Test2OpNEON_Helper(helper,
2023                     reinterpret_cast<uintptr_t>(inputs_d),
2024                     reinterpret_cast<uintptr_t>(inputs_n),
2025                     inputs_n_length,
2026                     reinterpret_cast<uintptr_t>(inputs_m),
2027                     inputs_m_length,
2028                     reinterpret_cast<uintptr_t>(results),
2029                     vd_form,
2030                     vn_form,
2031                     vm_form,
2032                     &skipped);
2033
2034  if (Test::generate_test_trace()) {
2035    // Print the results.
2036    printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2037    for (unsigned iteration = 0; iteration < results_length; iteration++) {
2038      printf(" ");
2039      // Output a separate result for each element of the result vector.
2040      for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2041        unsigned index = lane + (iteration * vd_lane_count);
2042        printf(" 0x%0*" PRIx64 ",",
2043               lane_len_in_hex,
2044               static_cast<uint64_t>(results[index]));
2045      }
2046      printf("\n");
2047    }
2048
2049    printf("};\n");
2050    printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2051           name,
2052           results_length);
2053  } else if (!skipped) {
2054    // Check the results.
2055    VIXL_CHECK(expected_length == results_length);
2056    unsigned error_count = 0;
2057    unsigned d = 0;
2058    const char* padding = "                    ";
2059    VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2060    for (unsigned n = 0; n < inputs_n_length; n++) {
2061      for (unsigned m = 0; m < inputs_m_length; m++, d++) {
2062        bool error_in_vector = false;
2063
2064        for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2065          unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2066                                  (m * vd_lane_count) + lane;
2067
2068          if (results[output_index] != expected[output_index]) {
2069            error_in_vector = true;
2070            break;
2071          }
2072        }
2073
2074        if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2075          printf("%s\n", name);
2076          printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n",
2077                 lane_len_in_hex + 1,
2078                 padding,
2079                 lane_len_in_hex + 1,
2080                 padding,
2081                 lane_len_in_hex + 1,
2082                 padding,
2083                 lane_len_in_hex + 1,
2084                 padding);
2085
2086          for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2087            unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2088                                    (m * vd_lane_count) + lane;
2089            unsigned input_index_n =
2090                (inputs_n_length - vd_lane_count + n + 1 + lane) %
2091                inputs_n_length;
2092            unsigned input_index_m =
2093                (inputs_m_length - vd_lane_count + m + 1 + lane) %
2094                inputs_m_length;
2095
2096            printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
2097                   " "
2098                   "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2099                   results[output_index] != expected[output_index] ? '*' : ' ',
2100                   lane_len_in_hex,
2101                   static_cast<uint64_t>(inputs_d[lane]),
2102                   lane_len_in_hex,
2103                   static_cast<uint64_t>(inputs_n[input_index_n]),
2104                   lane_len_in_hex,
2105                   static_cast<uint64_t>(inputs_m[input_index_m]),
2106                   lane_len_in_hex,
2107                   static_cast<uint64_t>(results[output_index]),
2108                   lane_len_in_hex,
2109                   static_cast<uint64_t>(expected[output_index]));
2110          }
2111        }
2112      }
2113    }
2114    VIXL_ASSERT(d == expected_length);
2115    if (error_count > kErrorReportLimit) {
2116      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2117    }
2118    VIXL_CHECK(error_count == 0);
2119  }
2120  delete[] results;
2121}
2122
2123
2124// ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ====
2125
2126static void TestByElementNEON_Helper(TestByElementNEONHelper_t helper,
2127                                     uintptr_t inputs_d,
2128                                     uintptr_t inputs_n,
2129                                     unsigned inputs_n_length,
2130                                     uintptr_t inputs_m,
2131                                     unsigned inputs_m_length,
2132                                     const int indices[],
2133                                     unsigned indices_length,
2134                                     uintptr_t results,
2135                                     VectorFormat vd_form,
2136                                     VectorFormat vn_form,
2137                                     VectorFormat vm_form,
2138                                     unsigned vm_subvector_count,
2139                                     bool* skipped) {
2140  VIXL_ASSERT(vd_form != kFormatUndefined);
2141  VIXL_ASSERT(vn_form != kFormatUndefined);
2142  VIXL_ASSERT(vm_form != kFormatUndefined);
2143  VIXL_ASSERT((vm_subvector_count != 0) && IsPowerOf2(vm_subvector_count));
2144
2145  CPUFeatures features;
2146  features.Combine(CPUFeatures::kNEON, CPUFeatures::kNEONHalf);
2147  features.Combine(CPUFeatures::kFP);
2148  features.Combine(CPUFeatures::kRDM);
2149  features.Combine(CPUFeatures::kDotProduct);
2150  features.Combine(CPUFeatures::kFHM);
2151  SETUP_WITH_FEATURES(features);
2152
2153  START();
2154
2155  // Roll up the loop to keep the code size down.
2156  Label loop_n, loop_m;
2157
2158  Register out = x0;
2159  Register inputs_n_base = x1;
2160  Register inputs_m_base = x2;
2161  Register inputs_d_base = x3;
2162  Register inputs_n_last_16bytes = x4;
2163  Register inputs_m_last_16bytes = x5;
2164  Register index_n = x6;
2165  Register index_m = x7;
2166
2167  // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2168  const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2169  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2170
2171  const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2172  const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2173  const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2174  const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2175  const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2176
2177  const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
2178  const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
2179  const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
2180  const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
2181  const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
2182
2183  VIXL_ASSERT((vm_bits * vm_subvector_count) <= kQRegSize);
2184
2185  // Always load and store 128 bits regardless of the format.
2186  VRegister vd = v0.V16B();
2187  VRegister vn = v1.V16B();
2188  VRegister vm = v2.V16B();
2189  VRegister vntmp = v3.V16B();
2190  VRegister vmtmp = v4.V16B();
2191  VRegister vres = v5.V16B();
2192
2193  // These will have the correct format for calling the 'helper'.
2194  VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2195  VRegister vm_helper =
2196      VRegister(2, vm_bits * vm_subvector_count, vm_lane_count);
2197  VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
2198
2199  // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2200  VRegister vntmp_single = VRegister(3, vn_lane_bits);
2201  VRegister vmtmp_single = VRegister(4, vm_lane_bits);
2202
2203  __ Mov(out, results);
2204
2205  __ Mov(inputs_d_base, inputs_d);
2206
2207  __ Mov(inputs_n_base, inputs_n);
2208  __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
2209  __ Mov(inputs_m_base, inputs_m);
2210  __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
2211
2212  __ Ldr(vd, MemOperand(inputs_d_base));
2213  __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
2214  __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
2215
2216  __ Mov(index_n, 0);
2217  __ Bind(&loop_n);
2218
2219  __ Ldr(vntmp_single,
2220         MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2221  __ Ext(vn, vn, vntmp, vn_lane_bytes);
2222
2223  __ Mov(index_m, 0);
2224  __ Bind(&loop_m);
2225
2226  __ Ldr(vmtmp_single,
2227         MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
2228  __ Ext(vm, vm, vmtmp, vm_lane_bytes);
2229
2230  __ Mov(vres, vd);
2231  {
2232    for (unsigned i = 0; i < indices_length; i++) {
2233      {
2234        SingleEmissionCheckScope guard(&masm);
2235        (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]);
2236      }
2237      __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2238    }
2239  }
2240
2241  __ Add(index_m, index_m, 1);
2242  __ Cmp(index_m, inputs_m_length);
2243  __ B(lo, &loop_m);
2244
2245  __ Add(index_n, index_n, 1);
2246  __ Cmp(index_n, inputs_n_length);
2247  __ B(lo, &loop_n);
2248
2249  END();
2250  TRY_RUN(skipped);
2251}
2252
2253
2254// Test NEON instructions. The inputs_*[] and expected[] arrays should be
2255// arrays of rawbit representation of input values. This ensures that
2256// exact bit comparisons can be performed.
2257template <typename Td, typename Tn, typename Tm>
2258static void TestByElementNEON(const char* name,
2259                              TestByElementNEONHelper_t helper,
2260                              const Td inputs_d[],
2261                              const Tn inputs_n[],
2262                              unsigned inputs_n_length,
2263                              const Tm inputs_m[],
2264                              unsigned inputs_m_length,
2265                              const int indices[],
2266                              unsigned indices_length,
2267                              const Td expected[],
2268                              unsigned expected_length,
2269                              VectorFormat vd_form,
2270                              VectorFormat vn_form,
2271                              VectorFormat vm_form,
2272                              unsigned vm_subvector_count = 1) {
2273  VIXL_ASSERT(inputs_n_length > 0);
2274  VIXL_ASSERT(inputs_m_length > 0);
2275  VIXL_ASSERT(indices_length > 0);
2276
2277  const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
2278
2279  const unsigned results_length =
2280      inputs_n_length * inputs_m_length * indices_length;
2281  Td* results = new Td[results_length * vd_lane_count];
2282  const unsigned lane_bit = sizeof(Td) * 8;
2283  const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
2284
2285  bool skipped;
2286
2287  TestByElementNEON_Helper(helper,
2288                           reinterpret_cast<uintptr_t>(inputs_d),
2289                           reinterpret_cast<uintptr_t>(inputs_n),
2290                           inputs_n_length,
2291                           reinterpret_cast<uintptr_t>(inputs_m),
2292                           inputs_m_length,
2293                           indices,
2294                           indices_length,
2295                           reinterpret_cast<uintptr_t>(results),
2296                           vd_form,
2297                           vn_form,
2298                           vm_form,
2299                           vm_subvector_count,
2300                           &skipped);
2301
2302  if (Test::generate_test_trace()) {
2303    // Print the results.
2304    printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2305    for (unsigned iteration = 0; iteration < results_length; iteration++) {
2306      printf(" ");
2307      // Output a separate result for each element of the result vector.
2308      for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2309        unsigned index = lane + (iteration * vd_lane_count);
2310        printf(" 0x%0*" PRIx64 ",",
2311               lane_len_in_hex,
2312               static_cast<uint64_t>(results[index]));
2313      }
2314      printf("\n");
2315    }
2316
2317    printf("};\n");
2318    printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2319           name,
2320           results_length);
2321  } else if (!skipped) {
2322    // Check the results.
2323    VIXL_CHECK(expected_length == results_length);
2324    unsigned error_count = 0;
2325    unsigned d = 0;
2326    const char* padding = "                    ";
2327    VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2328    for (unsigned n = 0; n < inputs_n_length; n++) {
2329      for (unsigned m = 0; m < inputs_m_length; m++) {
2330        for (unsigned index = 0; index < indices_length; index++, d++) {
2331          bool error_in_vector = false;
2332
2333          for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2334            unsigned output_index =
2335                (n * inputs_m_length * indices_length * vd_lane_count) +
2336                (m * indices_length * vd_lane_count) + (index * vd_lane_count) +
2337                lane;
2338
2339            if (results[output_index] != expected[output_index]) {
2340              error_in_vector = true;
2341              break;
2342            }
2343          }
2344
2345          if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2346            printf("%s\n", name);
2347            printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n",
2348                   lane_len_in_hex + 1,
2349                   padding,
2350                   lane_len_in_hex + 1,
2351                   padding,
2352                   lane_len_in_hex + 1,
2353                   padding,
2354                   lane_len_in_hex + 1,
2355                   padding);
2356
2357            for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2358              unsigned output_index =
2359                  (n * inputs_m_length * indices_length * vd_lane_count) +
2360                  (m * indices_length * vd_lane_count) +
2361                  (index * vd_lane_count) + lane;
2362              unsigned input_index_n =
2363                  (inputs_n_length - vd_lane_count + n + 1 + lane) %
2364                  inputs_n_length;
2365              unsigned input_index_m =
2366                  (inputs_m_length - vd_lane_count + m + 1 + lane) %
2367                  inputs_m_length;
2368
2369              printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
2370                     " "
2371                     "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2372                     results[output_index] != expected[output_index] ? '*'
2373                                                                     : ' ',
2374                     lane_len_in_hex,
2375                     static_cast<uint64_t>(inputs_d[lane]),
2376                     lane_len_in_hex,
2377                     static_cast<uint64_t>(inputs_n[input_index_n]),
2378                     lane_len_in_hex,
2379                     static_cast<uint64_t>(inputs_m[input_index_m]),
2380                     indices[index],
2381                     lane_len_in_hex,
2382                     static_cast<uint64_t>(results[output_index]),
2383                     lane_len_in_hex,
2384                     static_cast<uint64_t>(expected[output_index]));
2385            }
2386          }
2387        }
2388      }
2389    }
2390    VIXL_ASSERT(d == expected_length);
2391    if (error_count > kErrorReportLimit) {
2392      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2393    }
2394    VIXL_CHECK(error_count == 0);
2395  }
2396  delete[] results;
2397}
2398
2399
2400// ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ====
2401
2402
2403template <typename Tm>
2404void Test2OpImmNEON_Helper(
2405    typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
2406    uintptr_t inputs_n,
2407    unsigned inputs_n_length,
2408    const Tm inputs_m[],
2409    unsigned inputs_m_length,
2410    uintptr_t results,
2411    VectorFormat vd_form,
2412    VectorFormat vn_form,
2413    bool* skipped) {
2414  VIXL_ASSERT(vd_form != kFormatUndefined && vn_form != kFormatUndefined);
2415
2416  SETUP_WITH_FEATURES(CPUFeatures::kNEON,
2417                      CPUFeatures::kFP,
2418                      CPUFeatures::kNEONHalf);
2419  START();
2420
2421  // Roll up the loop to keep the code size down.
2422  Label loop_n;
2423
2424  Register out = x0;
2425  Register inputs_n_base = x1;
2426  Register inputs_n_last_16bytes = x3;
2427  Register index_n = x5;
2428
2429  // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2430  const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2431  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2432
2433  const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2434  const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2435  const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2436  const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2437  const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2438
2439
2440  // These will be either a D- or a Q-register form, with a single lane
2441  // (for use in scalar load and store operations).
2442  VRegister vd = VRegister(0, vd_bits);
2443  VRegister vn = v1.V16B();
2444  VRegister vntmp = v3.V16B();
2445
2446  // These will have the correct format for use when calling 'helper'.
2447  VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
2448  VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2449
2450  // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2451  VRegister vntmp_single = VRegister(3, vn_lane_bits);
2452
2453  __ Mov(out, results);
2454
2455  __ Mov(inputs_n_base, inputs_n);
2456  __ Mov(inputs_n_last_16bytes,
2457         inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
2458
2459  __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
2460
2461  __ Mov(index_n, 0);
2462  __ Bind(&loop_n);
2463
2464  __ Ldr(vntmp_single,
2465         MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2466  __ Ext(vn, vn, vntmp, vn_lane_bytes);
2467
2468  // Set the destination to zero for tests such as '[r]shrn2'.
2469  // TODO: Setting the destination to values other than zero might be a better
2470  //       test for shift and accumulate instructions (srsra/ssra/usra/ursra).
2471  __ Movi(vd.V16B(), 0);
2472
2473  {
2474    for (unsigned i = 0; i < inputs_m_length; i++) {
2475      {
2476        SingleEmissionCheckScope guard(&masm);
2477        (masm.*helper)(vd_helper, vn_helper, inputs_m[i]);
2478      }
2479      __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2480    }
2481  }
2482
2483  __ Add(index_n, index_n, 1);
2484  __ Cmp(index_n, inputs_n_length);
2485  __ B(lo, &loop_n);
2486
2487  END();
2488  TRY_RUN(skipped);
2489}
2490
2491
2492// Test NEON instructions. The inputs_*[] and expected[] arrays should be
2493// arrays of rawbit representation of input values. This ensures that
2494// exact bit comparisons can be performed.
2495template <typename Td, typename Tn, typename Tm>
2496static void Test2OpImmNEON(
2497    const char* name,
2498    typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
2499    const Tn inputs_n[],
2500    unsigned inputs_n_length,
2501    const Tm inputs_m[],
2502    unsigned inputs_m_length,
2503    const Td expected[],
2504    unsigned expected_length,
2505    VectorFormat vd_form,
2506    VectorFormat vn_form) {
2507  VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
2508
2509  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2510  const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2511  const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2512
2513  const unsigned results_length = inputs_n_length * inputs_m_length;
2514  Td* results = new Td[results_length * vd_lane_count];
2515  const unsigned lane_bit = sizeof(Td) * 8;
2516  const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
2517
2518  bool skipped;
2519
2520  Test2OpImmNEON_Helper(helper,
2521                        reinterpret_cast<uintptr_t>(inputs_n),
2522                        inputs_n_length,
2523                        inputs_m,
2524                        inputs_m_length,
2525                        reinterpret_cast<uintptr_t>(results),
2526                        vd_form,
2527                        vn_form,
2528                        &skipped);
2529
2530  if (Test::generate_test_trace()) {
2531    // Print the results.
2532    printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2533    for (unsigned iteration = 0; iteration < results_length; iteration++) {
2534      printf(" ");
2535      // Output a separate result for each element of the result vector.
2536      for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2537        unsigned index = lane + (iteration * vd_lane_count);
2538        printf(" 0x%0*" PRIx64 ",",
2539               lane_len_in_hex,
2540               static_cast<uint64_t>(results[index]));
2541      }
2542      printf("\n");
2543    }
2544
2545    printf("};\n");
2546    printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2547           name,
2548           results_length);
2549  } else if (!skipped) {
2550    // Check the results.
2551    VIXL_CHECK(expected_length == results_length);
2552    unsigned error_count = 0;
2553    unsigned d = 0;
2554    const char* padding = "                    ";
2555    VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2556    for (unsigned n = 0; n < inputs_n_length; n++) {
2557      for (unsigned m = 0; m < inputs_m_length; m++, d++) {
2558        bool error_in_vector = false;
2559
2560        for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2561          unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2562                                  (m * vd_lane_count) + lane;
2563
2564          if (results[output_index] != expected[output_index]) {
2565            error_in_vector = true;
2566            break;
2567          }
2568        }
2569
2570        if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2571          printf("%s\n", name);
2572          printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
2573                 lane_len_in_hex + 1,
2574                 padding,
2575                 lane_len_in_hex,
2576                 padding,
2577                 lane_len_in_hex + 1,
2578                 padding);
2579
2580          const unsigned first_index_n =
2581              inputs_n_length - (16 / vn_lane_bytes) + n + 1;
2582
2583          for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
2584               lane++) {
2585            unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2586                                    (m * vd_lane_count) + lane;
2587            unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
2588            unsigned input_index_m = m;
2589
2590            printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64
2591                   " "
2592                   "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2593                   results[output_index] != expected[output_index] ? '*' : ' ',
2594                   lane_len_in_hex,
2595                   static_cast<uint64_t>(inputs_n[input_index_n]),
2596                   lane_len_in_hex,
2597                   static_cast<uint64_t>(inputs_m[input_index_m]),
2598                   lane_len_in_hex,
2599                   static_cast<uint64_t>(results[output_index]),
2600                   lane_len_in_hex,
2601                   static_cast<uint64_t>(expected[output_index]));
2602          }
2603        }
2604      }
2605    }
2606    VIXL_ASSERT(d == expected_length);
2607    if (error_count > kErrorReportLimit) {
2608      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2609    }
2610    VIXL_CHECK(error_count == 0);
2611  }
2612  delete[] results;
2613}
2614
2615
2616// ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ====
2617
2618
2619static void TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper,
2620                                      uintptr_t inputs_d,
2621                                      const int inputs_imm1[],
2622                                      unsigned inputs_imm1_length,
2623                                      uintptr_t inputs_n,
2624                                      unsigned inputs_n_length,
2625                                      const int inputs_imm2[],
2626                                      unsigned inputs_imm2_length,
2627                                      uintptr_t results,
2628                                      VectorFormat vd_form,
2629                                      VectorFormat vn_form,
2630                                      bool* skipped) {
2631  VIXL_ASSERT(vd_form != kFormatUndefined);
2632  VIXL_ASSERT(vn_form != kFormatUndefined);
2633
2634  SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
2635  START();
2636
2637  // Roll up the loop to keep the code size down.
2638  Label loop_n;
2639
2640  Register out = x0;
2641  Register inputs_d_base = x1;
2642  Register inputs_n_base = x2;
2643  Register inputs_n_last_vector = x4;
2644  Register index_n = x6;
2645
2646  // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2647  const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2648  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2649
2650  const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2651  const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2652  const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2653  const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2654  const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2655
2656
2657  // These will be either a D- or a Q-register form, with a single lane
2658  // (for use in scalar load and store operations).
2659  VRegister vd = VRegister(0, vd_bits);
2660  VRegister vn = VRegister(1, vn_bits);
2661  VRegister vntmp = VRegister(4, vn_bits);
2662  VRegister vres = VRegister(5, vn_bits);
2663
2664  VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2665  VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
2666
2667  // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2668  VRegister vntmp_single = VRegister(4, vn_lane_bits);
2669
2670  // Same registers for use in the 'ext' instructions.
2671  VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
2672  VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
2673
2674  __ Mov(out, results);
2675
2676  __ Mov(inputs_d_base, inputs_d);
2677
2678  __ Mov(inputs_n_base, inputs_n);
2679  __ Mov(inputs_n_last_vector,
2680         inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
2681
2682  __ Ldr(vd, MemOperand(inputs_d_base));
2683
2684  __ Ldr(vn, MemOperand(inputs_n_last_vector));
2685
2686  __ Mov(index_n, 0);
2687  __ Bind(&loop_n);
2688
2689  __ Ldr(vntmp_single,
2690         MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2691  __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
2692
2693  {
2694    EmissionCheckScope guard(&masm,
2695                             kInstructionSize * inputs_imm1_length *
2696                                 inputs_imm2_length * 3);
2697    for (unsigned i = 0; i < inputs_imm1_length; i++) {
2698      for (unsigned j = 0; j < inputs_imm2_length; j++) {
2699        __ Mov(vres, vd);
2700        (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]);
2701        __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2702      }
2703    }
2704  }
2705
2706  __ Add(index_n, index_n, 1);
2707  __ Cmp(index_n, inputs_n_length);
2708  __ B(lo, &loop_n);
2709
2710  END();
2711  TRY_RUN(skipped);
2712}
2713
2714
2715// Test NEON instructions. The inputs_*[] and expected[] arrays should be
2716// arrays of rawbit representation of input values. This ensures that
2717// exact bit comparisons can be performed.
2718template <typename Td, typename Tn>
2719static void TestOpImmOpImmNEON(const char* name,
2720                               TestOpImmOpImmVdUpdateNEONHelper_t helper,
2721                               const Td inputs_d[],
2722                               const int inputs_imm1[],
2723                               unsigned inputs_imm1_length,
2724                               const Tn inputs_n[],
2725                               unsigned inputs_n_length,
2726                               const int inputs_imm2[],
2727                               unsigned inputs_imm2_length,
2728                               const Td expected[],
2729                               unsigned expected_length,
2730                               VectorFormat vd_form,
2731                               VectorFormat vn_form) {
2732  VIXL_ASSERT(inputs_n_length > 0);
2733  VIXL_ASSERT(inputs_imm1_length > 0);
2734  VIXL_ASSERT(inputs_imm2_length > 0);
2735
2736  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2737
2738  const unsigned results_length =
2739      inputs_n_length * inputs_imm1_length * inputs_imm2_length;
2740
2741  Td* results = new Td[results_length * vd_lane_count];
2742  const unsigned lane_bit = sizeof(Td) * 8;
2743  const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
2744
2745  bool skipped;
2746
2747  TestOpImmOpImmNEON_Helper(helper,
2748                            reinterpret_cast<uintptr_t>(inputs_d),
2749                            inputs_imm1,
2750                            inputs_imm1_length,
2751                            reinterpret_cast<uintptr_t>(inputs_n),
2752                            inputs_n_length,
2753                            inputs_imm2,
2754                            inputs_imm2_length,
2755                            reinterpret_cast<uintptr_t>(results),
2756                            vd_form,
2757                            vn_form,
2758                            &skipped);
2759
2760  if (Test::generate_test_trace()) {
2761    // Print the results.
2762    printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2763    for (unsigned iteration = 0; iteration < results_length; iteration++) {
2764      printf(" ");
2765      // Output a separate result for each element of the result vector.
2766      for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2767        unsigned index = lane + (iteration * vd_lane_count);
2768        printf(" 0x%0*" PRIx64 ",",
2769               lane_len_in_hex,
2770               static_cast<uint64_t>(results[index]));
2771      }
2772      printf("\n");
2773    }
2774
2775    printf("};\n");
2776    printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2777           name,
2778           results_length);
2779  } else if (!skipped) {
2780    // Check the results.
2781    VIXL_CHECK(expected_length == results_length);
2782    unsigned error_count = 0;
2783    unsigned counted_length = 0;
2784    const char* padding = "                    ";
2785    VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2786    for (unsigned n = 0; n < inputs_n_length; n++) {
2787      for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) {
2788        for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) {
2789          bool error_in_vector = false;
2790
2791          counted_length++;
2792
2793          for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2794            unsigned output_index =
2795                (n * inputs_imm1_length * inputs_imm2_length * vd_lane_count) +
2796                (imm1 * inputs_imm2_length * vd_lane_count) +
2797                (imm2 * vd_lane_count) + lane;
2798
2799            if (results[output_index] != expected[output_index]) {
2800              error_in_vector = true;
2801              break;
2802            }
2803          }
2804
2805          if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2806            printf("%s\n", name);
2807            printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
2808                   lane_len_in_hex + 1,
2809                   padding,
2810                   lane_len_in_hex,
2811                   padding,
2812                   lane_len_in_hex + 1,
2813                   padding,
2814                   lane_len_in_hex,
2815                   padding,
2816                   lane_len_in_hex + 1,
2817                   padding);
2818
2819            for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2820              unsigned output_index =
2821                  (n * inputs_imm1_length * inputs_imm2_length *
2822                   vd_lane_count) +
2823                  (imm1 * inputs_imm2_length * vd_lane_count) +
2824                  (imm2 * vd_lane_count) + lane;
2825              unsigned input_index_n =
2826                  (inputs_n_length - vd_lane_count + n + 1 + lane) %
2827                  inputs_n_length;
2828              unsigned input_index_imm1 = imm1;
2829              unsigned input_index_imm2 = imm2;
2830
2831              printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
2832                     " "
2833                     "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2834                     results[output_index] != expected[output_index] ? '*'
2835                                                                     : ' ',
2836                     lane_len_in_hex,
2837                     static_cast<uint64_t>(inputs_d[lane]),
2838                     lane_len_in_hex,
2839                     static_cast<uint64_t>(inputs_imm1[input_index_imm1]),
2840                     lane_len_in_hex,
2841                     static_cast<uint64_t>(inputs_n[input_index_n]),
2842                     lane_len_in_hex,
2843                     static_cast<uint64_t>(inputs_imm2[input_index_imm2]),
2844                     lane_len_in_hex,
2845                     static_cast<uint64_t>(results[output_index]),
2846                     lane_len_in_hex,
2847                     static_cast<uint64_t>(expected[output_index]));
2848            }
2849          }
2850        }
2851      }
2852    }
2853    VIXL_ASSERT(counted_length == expected_length);
2854    if (error_count > kErrorReportLimit) {
2855      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2856    }
2857    VIXL_CHECK(error_count == 0);
2858  }
2859  delete[] results;
2860}
2861
2862
2863// ==== Floating-point tests. ====
2864
2865
2866// Standard floating-point test expansion for both double- and single-precision
2867// operations.
2868#define STRINGIFY(s) #s
2869
2870#define CALL_TEST_FP_HELPER(mnemonic, variant, type, input) \
2871  Test##type(STRINGIFY(mnemonic) "_" STRINGIFY(variant),    \
2872             &MacroAssembler::mnemonic,                     \
2873             input,                                         \
2874             sizeof(input) / sizeof(input[0]),              \
2875             kExpected_##mnemonic##_##variant,              \
2876             kExpectedCount_##mnemonic##_##variant)
2877
2878#define DEFINE_TEST_FP(mnemonic, type, input)                    \
2879  TEST(mnemonic##_d) {                                           \
2880    CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input); \
2881  }                                                              \
2882  TEST(mnemonic##_s) {                                           \
2883    CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input);  \
2884  }
2885
2886#define DEFINE_TEST_FP_FP16(mnemonic, type, input)                \
2887  TEST(mnemonic##_d) {                                            \
2888    CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input);  \
2889  }                                                               \
2890  TEST(mnemonic##_s) {                                            \
2891    CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input);   \
2892  }                                                               \
2893  TEST(mnemonic##_h) {                                            \
2894    CALL_TEST_FP_HELPER(mnemonic, h, type, kInputFloat16##input); \
2895  }
2896
2897
2898// TODO: Test with a newer version of valgrind.
2899//
2900// Note: valgrind-3.10.0 does not properly interpret libm's fma() on x86_64.
2901// Therefore this test will be exiting though an ASSERT and thus leaking
2902// memory.
2903DEFINE_TEST_FP_FP16(fmadd, 3Op, Basic)
2904DEFINE_TEST_FP_FP16(fmsub, 3Op, Basic)
2905DEFINE_TEST_FP_FP16(fnmadd, 3Op, Basic)
2906DEFINE_TEST_FP_FP16(fnmsub, 3Op, Basic)
2907
2908DEFINE_TEST_FP_FP16(fadd, 2Op, Basic)
2909DEFINE_TEST_FP_FP16(fdiv, 2Op, Basic)
2910DEFINE_TEST_FP_FP16(fmax, 2Op, Basic)
2911DEFINE_TEST_FP_FP16(fmaxnm, 2Op, Basic)
2912DEFINE_TEST_FP_FP16(fmin, 2Op, Basic)
2913DEFINE_TEST_FP_FP16(fminnm, 2Op, Basic)
2914DEFINE_TEST_FP_FP16(fmul, 2Op, Basic)
2915DEFINE_TEST_FP_FP16(fsub, 2Op, Basic)
2916DEFINE_TEST_FP_FP16(fnmul, 2Op, Basic)
2917
2918DEFINE_TEST_FP_FP16(fabs, 1Op, Basic)
2919DEFINE_TEST_FP_FP16(fmov, 1Op, Basic)
2920DEFINE_TEST_FP_FP16(fneg, 1Op, Basic)
2921DEFINE_TEST_FP_FP16(fsqrt, 1Op, Basic)
2922DEFINE_TEST_FP(frint32x, 1Op, Conversions)
2923DEFINE_TEST_FP(frint64x, 1Op, Conversions)
2924DEFINE_TEST_FP(frint32z, 1Op, Conversions)
2925DEFINE_TEST_FP(frint64z, 1Op, Conversions)
2926DEFINE_TEST_FP_FP16(frinta, 1Op, Conversions)
2927DEFINE_TEST_FP_FP16(frinti, 1Op, Conversions)
2928DEFINE_TEST_FP_FP16(frintm, 1Op, Conversions)
2929DEFINE_TEST_FP_FP16(frintn, 1Op, Conversions)
2930DEFINE_TEST_FP_FP16(frintp, 1Op, Conversions)
2931DEFINE_TEST_FP_FP16(frintx, 1Op, Conversions)
2932DEFINE_TEST_FP_FP16(frintz, 1Op, Conversions)
2933
2934TEST(fcmp_d) { CALL_TEST_FP_HELPER(fcmp, d, Cmp, kInputDoubleBasic); }
2935TEST(fcmp_s) { CALL_TEST_FP_HELPER(fcmp, s, Cmp, kInputFloatBasic); }
2936TEST(fcmp_dz) { CALL_TEST_FP_HELPER(fcmp, dz, CmpZero, kInputDoubleBasic); }
2937TEST(fcmp_sz) { CALL_TEST_FP_HELPER(fcmp, sz, CmpZero, kInputFloatBasic); }
2938
2939TEST(fcvt_sd) { CALL_TEST_FP_HELPER(fcvt, sd, 1Op, kInputDoubleConversions); }
2940TEST(fcvt_ds) { CALL_TEST_FP_HELPER(fcvt, ds, 1Op, kInputFloatConversions); }
2941
2942#define DEFINE_TEST_FP_TO_INT(mnemonic, type, input)               \
2943  TEST(mnemonic##_xd) {                                            \
2944    CALL_TEST_FP_HELPER(mnemonic, xd, type, kInputDouble##input);  \
2945  }                                                                \
2946  TEST(mnemonic##_xs) {                                            \
2947    CALL_TEST_FP_HELPER(mnemonic, xs, type, kInputFloat##input);   \
2948  }                                                                \
2949  TEST(mnemonic##_xh) {                                            \
2950    CALL_TEST_FP_HELPER(mnemonic, xh, type, kInputFloat16##input); \
2951  }                                                                \
2952  TEST(mnemonic##_wd) {                                            \
2953    CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input);  \
2954  }                                                                \
2955  TEST(mnemonic##_ws) {                                            \
2956    CALL_TEST_FP_HELPER(mnemonic, ws, type, kInputFloat##input);   \
2957  }                                                                \
2958  TEST(mnemonic##_wh) {                                            \
2959    CALL_TEST_FP_HELPER(mnemonic, wh, type, kInputFloat16##input); \
2960  }
2961
2962DEFINE_TEST_FP_TO_INT(fcvtas, FPToS, Conversions)
2963DEFINE_TEST_FP_TO_INT(fcvtau, FPToU, Conversions)
2964DEFINE_TEST_FP_TO_INT(fcvtms, FPToS, Conversions)
2965DEFINE_TEST_FP_TO_INT(fcvtmu, FPToU, Conversions)
2966DEFINE_TEST_FP_TO_INT(fcvtns, FPToS, Conversions)
2967DEFINE_TEST_FP_TO_INT(fcvtnu, FPToU, Conversions)
2968DEFINE_TEST_FP_TO_INT(fcvtzs, FPToFixedS, Conversions)
2969DEFINE_TEST_FP_TO_INT(fcvtzu, FPToFixedU, Conversions)
2970
2971#define DEFINE_TEST_FP_TO_JS_INT(mnemonic, type, input)           \
2972  TEST(mnemonic##_wd) {                                           \
2973    CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \
2974  }
2975
2976DEFINE_TEST_FP_TO_JS_INT(fjcvtzs, FPToS, Conversions)
2977
2978// TODO: Scvtf-fixed-point
2979// TODO: Scvtf-integer
2980// TODO: Ucvtf-fixed-point
2981// TODO: Ucvtf-integer
2982
2983// TODO: Fccmp
2984// TODO: Fcsel
2985
2986
2987// ==== NEON Tests. ====
2988
2989#define CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) \
2990  Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),             \
2991              &MacroAssembler::mnemonic,                             \
2992              input_n,                                               \
2993              (sizeof(input_n) / sizeof(input_n[0])),                \
2994              kExpected_NEON_##mnemonic##_##vdform,                  \
2995              kExpectedCount_NEON_##mnemonic##_##vdform,             \
2996              kFormat##vdform,                                       \
2997              kFormat##vnform)
2998
2999#define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vdform, vnform, input_n)   \
3000  Test1OpAcrossNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \
3001                        vnform),                                             \
3002                    &MacroAssembler::mnemonic,                               \
3003                    input_n,                                                 \
3004                    (sizeof(input_n) / sizeof(input_n[0])),                  \
3005                    kExpected_NEON_##mnemonic##_##vdform##_##vnform,         \
3006                    kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform,    \
3007                    kFormat##vdform,                                         \
3008                    kFormat##vnform)
3009
3010#define CALL_TEST_NEON_HELPER_2Op(mnemonic,              \
3011                                  vdform,                \
3012                                  vnform,                \
3013                                  vmform,                \
3014                                  input_d,               \
3015                                  input_n,               \
3016                                  input_m)               \
3017  Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \
3018              &MacroAssembler::mnemonic,                 \
3019              input_d,                                   \
3020              input_n,                                   \
3021              (sizeof(input_n) / sizeof(input_n[0])),    \
3022              input_m,                                   \
3023              (sizeof(input_m) / sizeof(input_m[0])),    \
3024              kExpected_NEON_##mnemonic##_##vdform,      \
3025              kExpectedCount_NEON_##mnemonic##_##vdform, \
3026              kFormat##vdform,                           \
3027              kFormat##vnform,                           \
3028              kFormat##vmform)
3029
3030#define CALL_TEST_NEON_HELPER_2OpImm(mnemonic,                        \
3031                                     vdform,                          \
3032                                     vnform,                          \
3033                                     input_n,                         \
3034                                     input_m)                         \
3035  Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM", \
3036                 &MacroAssembler::mnemonic,                           \
3037                 input_n,                                             \
3038                 (sizeof(input_n) / sizeof(input_n[0])),              \
3039                 input_m,                                             \
3040                 (sizeof(input_m) / sizeof(input_m[0])),              \
3041                 kExpected_NEON_##mnemonic##_##vdform##_2OPIMM,       \
3042                 kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM,  \
3043                 kFormat##vdform,                                     \
3044                 kFormat##vnform)
3045
3046#define CALL_TEST_NEON_HELPER_ByElement(mnemonic,                      \
3047                                        vdform,                        \
3048                                        vnform,                        \
3049                                        vmform,                        \
3050                                        input_d,                       \
3051                                        input_n,                       \
3052                                        input_m,                       \
3053                                        indices)                       \
3054  TestByElementNEON(                                                   \
3055      STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY(         \
3056          vnform) "_" STRINGIFY(vmform),                               \
3057      &MacroAssembler::mnemonic,                                       \
3058      input_d,                                                         \
3059      input_n,                                                         \
3060      (sizeof(input_n) / sizeof(input_n[0])),                          \
3061      input_m,                                                         \
3062      (sizeof(input_m) / sizeof(input_m[0])),                          \
3063      indices,                                                         \
3064      (sizeof(indices) / sizeof(indices[0])),                          \
3065      kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,      \
3066      kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \
3067      kFormat##vdform,                                                 \
3068      kFormat##vnform,                                                 \
3069      kFormat##vmform)
3070
3071#define CALL_TEST_NEON_HELPER_ByElement_Dot_Product(mnemonic,           \
3072                                                    vdform,             \
3073                                                    vnform,             \
3074                                                    vmform,             \
3075                                                    input_d,            \
3076                                                    input_n,            \
3077                                                    input_m,            \
3078                                                    indices,            \
3079                                                    vm_subvector_count) \
3080  TestByElementNEON(                                                    \
3081      STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY(          \
3082          vnform) "_" STRINGIFY(vmform),                                \
3083      &MacroAssembler::mnemonic,                                        \
3084      input_d,                                                          \
3085      input_n,                                                          \
3086      (sizeof(input_n) / sizeof(input_n[0])),                           \
3087      input_m,                                                          \
3088      (sizeof(input_m) / sizeof(input_m[0])),                           \
3089      indices,                                                          \
3090      (sizeof(indices) / sizeof(indices[0])),                           \
3091      kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,       \
3092      kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,  \
3093      kFormat##vdform,                                                  \
3094      kFormat##vnform,                                                  \
3095      kFormat##vmform,                                                  \
3096      vm_subvector_count)
3097
3098#define CALL_TEST_NEON_HELPER_OpImmOpImm(helper,                   \
3099                                         mnemonic,                 \
3100                                         vdform,                   \
3101                                         vnform,                   \
3102                                         input_d,                  \
3103                                         input_imm1,               \
3104                                         input_n,                  \
3105                                         input_imm2)               \
3106  TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),    \
3107                     helper,                                       \
3108                     input_d,                                      \
3109                     input_imm1,                                   \
3110                     (sizeof(input_imm1) / sizeof(input_imm1[0])), \
3111                     input_n,                                      \
3112                     (sizeof(input_n) / sizeof(input_n[0])),       \
3113                     input_imm2,                                   \
3114                     (sizeof(input_imm2) / sizeof(input_imm2[0])), \
3115                     kExpected_NEON_##mnemonic##_##vdform,         \
3116                     kExpectedCount_NEON_##mnemonic##_##vdform,    \
3117                     kFormat##vdform,                              \
3118                     kFormat##vnform)
3119
3120#define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input) \
3121  CALL_TEST_NEON_HELPER_1Op(mnemonic, variant, variant, input)
3122
3123#define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input)              \
3124  TEST(mnemonic##_8B) {                                             \
3125    CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input);  \
3126  }                                                                 \
3127  TEST(mnemonic##_16B) {                                            \
3128    CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input); \
3129  }
3130
3131#define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)               \
3132  TEST(mnemonic##_4H) {                                             \
3133    CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input); \
3134  }                                                                 \
3135  TEST(mnemonic##_8H) {                                             \
3136    CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input); \
3137  }
3138
3139#define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)               \
3140  TEST(mnemonic##_2S) {                                             \
3141    CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input); \
3142  }                                                                 \
3143  TEST(mnemonic##_4S) {                                             \
3144    CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input); \
3145  }
3146
3147#define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \
3148  DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input)   \
3149  DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)
3150
3151#define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \
3152  DEFINE_TEST_NEON_2SAME_BH(mnemonic, input)         \
3153  DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)
3154
3155#define DEFINE_TEST_NEON_2SAME(mnemonic, input)                     \
3156  DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input)                      \
3157  TEST(mnemonic##_2D) {                                             \
3158    CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
3159  }
3160#define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input)                  \
3161  DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)                     \
3162  TEST(mnemonic##_2D) {                                             \
3163    CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
3164  }
3165
3166#define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input)                  \
3167  TEST(mnemonic##_2S) {                                             \
3168    CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input);  \
3169  }                                                                 \
3170  TEST(mnemonic##_4S) {                                             \
3171    CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input);  \
3172  }                                                                 \
3173  TEST(mnemonic##_2D) {                                             \
3174    CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input); \
3175  }
3176
3177#define DEFINE_TEST_NEON_2SAME_FP_FP16(mnemonic, input)              \
3178  DEFINE_TEST_NEON_2SAME_FP(mnemonic, input)                         \
3179  TEST(mnemonic##_4H) {                                              \
3180    CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInputFloat16##input); \
3181  }                                                                  \
3182  TEST(mnemonic##_8H) {                                              \
3183    CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInputFloat16##input); \
3184  }
3185
3186#define DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(mnemonic, input)      \
3187  TEST(mnemonic##_H) {                                              \
3188    CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInputFloat16##input); \
3189  }                                                                 \
3190  TEST(mnemonic##_S) {                                              \
3191    CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input);   \
3192  }                                                                 \
3193  TEST(mnemonic##_D) {                                              \
3194    CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input);  \
3195  }
3196
3197#define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input)          \
3198  TEST(mnemonic##_B) {                                            \
3199    CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input); \
3200  }
3201#define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input)           \
3202  TEST(mnemonic##_H) {                                             \
3203    CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input); \
3204  }
3205#define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)           \
3206  TEST(mnemonic##_S) {                                             \
3207    CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input); \
3208  }
3209#define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)           \
3210  TEST(mnemonic##_D) {                                             \
3211    CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input); \
3212  }
3213
3214#define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input) \
3215  DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input)     \
3216  DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input)     \
3217  DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)     \
3218  DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
3219
3220#define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input) \
3221  DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)        \
3222  DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
3223
3224
3225#define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n) \
3226  CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vd_form, vn_form, input_n)
3227
3228#define DEFINE_TEST_NEON_ACROSS(mnemonic, input)                        \
3229  TEST(mnemonic##_B_8B) {                                               \
3230    CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input);  \
3231  }                                                                     \
3232  TEST(mnemonic##_B_16B) {                                              \
3233    CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input); \
3234  }                                                                     \
3235  TEST(mnemonic##_H_4H) {                                               \
3236    CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input); \
3237  }                                                                     \
3238  TEST(mnemonic##_H_8H) {                                               \
3239    CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input); \
3240  }                                                                     \
3241  TEST(mnemonic##_S_4S) {                                               \
3242    CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input); \
3243  }
3244
3245#define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input)                   \
3246  TEST(mnemonic##_H_8B) {                                               \
3247    CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input);  \
3248  }                                                                     \
3249  TEST(mnemonic##_H_16B) {                                              \
3250    CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input); \
3251  }                                                                     \
3252  TEST(mnemonic##_S_4H) {                                               \
3253    CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input); \
3254  }                                                                     \
3255  TEST(mnemonic##_S_8H) {                                               \
3256    CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input); \
3257  }                                                                     \
3258  TEST(mnemonic##_D_4S) {                                               \
3259    CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input); \
3260  }
3261
3262#define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input)                      \
3263  TEST(mnemonic##_H_4H) {                                                \
3264    CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInputFloat16##input); \
3265  }                                                                      \
3266  TEST(mnemonic##_H_8H) {                                                \
3267    CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInputFloat16##input); \
3268  }                                                                      \
3269  TEST(mnemonic##_S_4S) {                                                \
3270    CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input);   \
3271  }
3272
3273#define CALL_TEST_NEON_HELPER_2DIFF(mnemonic, vdform, vnform, input_n) \
3274  CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n)
3275
3276#define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input)                    \
3277  TEST(mnemonic##_4H) {                                                 \
3278    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input);  \
3279  }                                                                     \
3280  TEST(mnemonic##_8H) {                                                 \
3281    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input); \
3282  }                                                                     \
3283  TEST(mnemonic##_2S) {                                                 \
3284    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input); \
3285  }                                                                     \
3286  TEST(mnemonic##_4S) {                                                 \
3287    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input); \
3288  }                                                                     \
3289  TEST(mnemonic##_1D) {                                                 \
3290    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input); \
3291  }                                                                     \
3292  TEST(mnemonic##_2D) {                                                 \
3293    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input); \
3294  }
3295
3296#define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input)                      \
3297  TEST(mnemonic##_8B) {                                                     \
3298    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input);     \
3299  }                                                                         \
3300  TEST(mnemonic##_4H) {                                                     \
3301    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input);     \
3302  }                                                                         \
3303  TEST(mnemonic##_2S) {                                                     \
3304    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input);     \
3305  }                                                                         \
3306  TEST(mnemonic##2_16B) {                                                   \
3307    CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input); \
3308  }                                                                         \
3309  TEST(mnemonic##2_8H) {                                                    \
3310    CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input);  \
3311  }                                                                         \
3312  TEST(mnemonic##2_4S) {                                                    \
3313    CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input);  \
3314  }
3315
3316#define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input)                     \
3317  TEST(mnemonic##_4S) {                                                     \
3318    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input);    \
3319  }                                                                         \
3320  TEST(mnemonic##_2D) {                                                     \
3321    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input);      \
3322  }                                                                         \
3323  TEST(mnemonic##2_4S) {                                                    \
3324    CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input); \
3325  }                                                                         \
3326  TEST(mnemonic##2_2D) {                                                    \
3327    CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input);   \
3328  }
3329
3330#define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input)                  \
3331  TEST(mnemonic##_4H) {                                                    \
3332    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input);     \
3333  }                                                                        \
3334  TEST(mnemonic##_2S) {                                                    \
3335    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input);    \
3336  }                                                                        \
3337  TEST(mnemonic##2_8H) {                                                   \
3338    CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input);  \
3339  }                                                                        \
3340  TEST(mnemonic##2_4S) {                                                   \
3341    CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
3342  }
3343
3344#define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input)               \
3345  TEST(mnemonic##_2S) {                                                    \
3346    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input);    \
3347  }                                                                        \
3348  TEST(mnemonic##2_4S) {                                                   \
3349    CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
3350  }
3351
3352#define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input)         \
3353  TEST(mnemonic##_B) {                                                \
3354    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input); \
3355  }                                                                   \
3356  TEST(mnemonic##_H) {                                                \
3357    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input); \
3358  }                                                                   \
3359  TEST(mnemonic##_S) {                                                \
3360    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input); \
3361  }
3362
3363#define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input)            \
3364  TEST(mnemonic##_S) {                                                  \
3365    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input);   \
3366  }                                                                     \
3367  TEST(mnemonic##_D) {                                                  \
3368    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input);  \
3369  }                                                                     \
3370  TEST(mnemonic##_H) {                                                  \
3371    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, 2H, kInputFloat16##input); \
3372  }
3373
3374#define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) \
3375  {                                                                       \
3376    CALL_TEST_NEON_HELPER_2Op(mnemonic,                                   \
3377                              variant,                                    \
3378                              variant,                                    \
3379                              variant,                                    \
3380                              input_d,                                    \
3381                              input_nm,                                   \
3382                              input_nm);                                  \
3383  }
3384
3385#define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input)     \
3386  TEST(mnemonic##_8B) {                                    \
3387    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                  \
3388                                8B,                        \
3389                                kInput8bitsAccDestination, \
3390                                kInput8bits##input);       \
3391  }                                                        \
3392  TEST(mnemonic##_16B) {                                   \
3393    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                  \
3394                                16B,                       \
3395                                kInput8bitsAccDestination, \
3396                                kInput8bits##input);       \
3397  }
3398
3399#define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)          \
3400  TEST(mnemonic##_4H) {                                     \
3401    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3402                                4H,                         \
3403                                kInput16bitsAccDestination, \
3404                                kInput16bits##input);       \
3405  }                                                         \
3406  TEST(mnemonic##_8H) {                                     \
3407    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3408                                8H,                         \
3409                                kInput16bitsAccDestination, \
3410                                kInput16bits##input);       \
3411  }                                                         \
3412  TEST(mnemonic##_2S) {                                     \
3413    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3414                                2S,                         \
3415                                kInput32bitsAccDestination, \
3416                                kInput32bits##input);       \
3417  }                                                         \
3418  TEST(mnemonic##_4S) {                                     \
3419    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3420                                4S,                         \
3421                                kInput32bitsAccDestination, \
3422                                kInput32bits##input);       \
3423  }
3424
3425#define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \
3426  DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input)     \
3427  DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)
3428
3429#define DEFINE_TEST_NEON_3SAME(mnemonic, input)             \
3430  DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input)              \
3431  TEST(mnemonic##_2D) {                                     \
3432    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3433                                2D,                         \
3434                                kInput64bitsAccDestination, \
3435                                kInput64bits##input);       \
3436  }
3437
3438#define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input)           \
3439  TEST(mnemonic##_4H) {                                      \
3440    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3441                                4H,                          \
3442                                kInputFloat16AccDestination, \
3443                                kInputFloat16##input);       \
3444  }                                                          \
3445  TEST(mnemonic##_8H) {                                      \
3446    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3447                                8H,                          \
3448                                kInputFloat16AccDestination, \
3449                                kInputFloat16##input);       \
3450  }                                                          \
3451  TEST(mnemonic##_2S) {                                      \
3452    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3453                                2S,                          \
3454                                kInputFloatAccDestination,   \
3455                                kInputFloat##input);         \
3456  }                                                          \
3457  TEST(mnemonic##_4S) {                                      \
3458    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3459                                4S,                          \
3460                                kInputFloatAccDestination,   \
3461                                kInputFloat##input);         \
3462  }                                                          \
3463  TEST(mnemonic##_2D) {                                      \
3464    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3465                                2D,                          \
3466                                kInputDoubleAccDestination,  \
3467                                kInputDouble##input);        \
3468  }
3469
3470#define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input)    \
3471  TEST(mnemonic##_D) {                                      \
3472    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3473                                D,                          \
3474                                kInput64bitsAccDestination, \
3475                                kInput64bits##input);       \
3476  }
3477
3478#define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input)   \
3479  TEST(mnemonic##_H) {                                      \
3480    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3481                                H,                          \
3482                                kInput16bitsAccDestination, \
3483                                kInput16bits##input);       \
3484  }                                                         \
3485  TEST(mnemonic##_S) {                                      \
3486    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3487                                S,                          \
3488                                kInput32bitsAccDestination, \
3489                                kInput32bits##input);       \
3490  }
3491
3492#define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input)      \
3493  TEST(mnemonic##_B) {                                      \
3494    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3495                                B,                          \
3496                                kInput8bitsAccDestination,  \
3497                                kInput8bits##input);        \
3498  }                                                         \
3499  TEST(mnemonic##_H) {                                      \
3500    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3501                                H,                          \
3502                                kInput16bitsAccDestination, \
3503                                kInput16bits##input);       \
3504  }                                                         \
3505  TEST(mnemonic##_S) {                                      \
3506    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3507                                S,                          \
3508                                kInput32bitsAccDestination, \
3509                                kInput32bits##input);       \
3510  }                                                         \
3511  TEST(mnemonic##_D) {                                      \
3512    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3513                                D,                          \
3514                                kInput64bitsAccDestination, \
3515                                kInput64bits##input);       \
3516  }
3517
3518#define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input)    \
3519  TEST(mnemonic##_H) {                                       \
3520    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3521                                H,                           \
3522                                kInputFloat16AccDestination, \
3523                                kInputFloat16##input);       \
3524  }                                                          \
3525  TEST(mnemonic##_S) {                                       \
3526    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3527                                S,                           \
3528                                kInputFloatAccDestination,   \
3529                                kInputFloat##input);         \
3530  }                                                          \
3531  TEST(mnemonic##_D) {                                       \
3532    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3533                                D,                           \
3534                                kInputDoubleAccDestination,  \
3535                                kInputDouble##input);        \
3536  }
3537
3538#define DEFINE_TEST_NEON_FHM(mnemonic, input_d, input_n, input_m) \
3539  TEST(mnemonic##_2S) {                                           \
3540    CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                         \
3541                                2S,                               \
3542                                2H,                               \
3543                                2H,                               \
3544                                kInputFloatAccDestination,        \
3545                                kInputFloat16##input_n,           \
3546                                kInputFloat16##input_m);          \
3547  }                                                               \
3548  TEST(mnemonic##_4S) {                                           \
3549    CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                         \
3550                                4S,                               \
3551                                4H,                               \
3552                                4H,                               \
3553                                kInputFloatAccDestination,        \
3554                                kInputFloat16##input_n,           \
3555                                kInputFloat16##input_m);          \
3556  }
3557
3558#define CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3559                                    vdform,   \
3560                                    vnform,   \
3561                                    vmform,   \
3562                                    input_d,  \
3563                                    input_n,  \
3564                                    input_m)  \
3565  {                                           \
3566    CALL_TEST_NEON_HELPER_2Op(mnemonic,       \
3567                              vdform,         \
3568                              vnform,         \
3569                              vmform,         \
3570                              input_d,        \
3571                              input_n,        \
3572                              input_m);       \
3573  }
3574
3575#define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)     \
3576  TEST(mnemonic##_8H) {                                     \
3577    CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3578                                8H,                         \
3579                                8B,                         \
3580                                8B,                         \
3581                                kInput16bitsAccDestination, \
3582                                kInput8bits##input,         \
3583                                kInput8bits##input);        \
3584  }                                                         \
3585  TEST(mnemonic##2_8H) {                                    \
3586    CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3587                                8H,                         \
3588                                16B,                        \
3589                                16B,                        \
3590                                kInput16bitsAccDestination, \
3591                                kInput8bits##input,         \
3592                                kInput8bits##input);        \
3593  }
3594
3595#define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)     \
3596  TEST(mnemonic##_4S) {                                     \
3597    CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3598                                4S,                         \
3599                                4H,                         \
3600                                4H,                         \
3601                                kInput32bitsAccDestination, \
3602                                kInput16bits##input,        \
3603                                kInput16bits##input);       \
3604  }                                                         \
3605  TEST(mnemonic##2_4S) {                                    \
3606    CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3607                                4S,                         \
3608                                8H,                         \
3609                                8H,                         \
3610                                kInput32bitsAccDestination, \
3611                                kInput16bits##input,        \
3612                                kInput16bits##input);       \
3613  }
3614
3615#define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)     \
3616  TEST(mnemonic##_2D) {                                     \
3617    CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3618                                2D,                         \
3619                                2S,                         \
3620                                2S,                         \
3621                                kInput64bitsAccDestination, \
3622                                kInput32bits##input,        \
3623                                kInput32bits##input);       \
3624  }                                                         \
3625  TEST(mnemonic##2_2D) {                                    \
3626    CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3627                                2D,                         \
3628                                4S,                         \
3629                                4S,                         \
3630                                kInput64bitsAccDestination, \
3631                                kInput32bits##input,        \
3632                                kInput32bits##input);       \
3633  }
3634
3635#define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input) \
3636  DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)       \
3637  DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
3638
3639#define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input) \
3640  DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)    \
3641  DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)    \
3642  DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
3643
3644#define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \
3645  TEST(mnemonic##_S) {                                        \
3646    CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                     \
3647                                S,                            \
3648                                H,                            \
3649                                H,                            \
3650                                kInput32bitsAccDestination,   \
3651                                kInput16bits##input,          \
3652                                kInput16bits##input);         \
3653  }
3654
3655#define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) \
3656  TEST(mnemonic##_D) {                                        \
3657    CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                     \
3658                                D,                            \
3659                                S,                            \
3660                                S,                            \
3661                                kInput64bitsAccDestination,   \
3662                                kInput32bits##input,          \
3663                                kInput32bits##input);         \
3664  }
3665
3666#define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input) \
3667  DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input)        \
3668  DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input)
3669
3670#define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input)        \
3671  TEST(mnemonic##_8H) {                                     \
3672    CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3673                                8H,                         \
3674                                8H,                         \
3675                                8B,                         \
3676                                kInput16bitsAccDestination, \
3677                                kInput16bits##input,        \
3678                                kInput8bits##input);        \
3679  }                                                         \
3680  TEST(mnemonic##_4S) {                                     \
3681    CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3682                                4S,                         \
3683                                4S,                         \
3684                                4H,                         \
3685                                kInput32bitsAccDestination, \
3686                                kInput32bits##input,        \
3687                                kInput16bits##input);       \
3688  }                                                         \
3689  TEST(mnemonic##_2D) {                                     \
3690    CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3691                                2D,                         \
3692                                2D,                         \
3693                                2S,                         \
3694                                kInput64bitsAccDestination, \
3695                                kInput64bits##input,        \
3696                                kInput32bits##input);       \
3697  }                                                         \
3698  TEST(mnemonic##2_8H) {                                    \
3699    CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3700                                8H,                         \
3701                                8H,                         \
3702                                16B,                        \
3703                                kInput16bitsAccDestination, \
3704                                kInput16bits##input,        \
3705                                kInput8bits##input);        \
3706  }                                                         \
3707  TEST(mnemonic##2_4S) {                                    \
3708    CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3709                                4S,                         \
3710                                4S,                         \
3711                                8H,                         \
3712                                kInput32bitsAccDestination, \
3713                                kInput32bits##input,        \
3714                                kInput16bits##input);       \
3715  }                                                         \
3716  TEST(mnemonic##2_2D) {                                    \
3717    CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3718                                2D,                         \
3719                                2D,                         \
3720                                4S,                         \
3721                                kInput64bitsAccDestination, \
3722                                kInput64bits##input,        \
3723                                kInput32bits##input);       \
3724  }
3725
3726#define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input)      \
3727  TEST(mnemonic##_8B) {                                     \
3728    CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3729                                8B,                         \
3730                                8H,                         \
3731                                8H,                         \
3732                                kInput8bitsAccDestination,  \
3733                                kInput16bits##input,        \
3734                                kInput16bits##input);       \
3735  }                                                         \
3736  TEST(mnemonic##_4H) {                                     \
3737    CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3738                                4H,                         \
3739                                4S,                         \
3740                                4S,                         \
3741                                kInput16bitsAccDestination, \
3742                                kInput32bits##input,        \
3743                                kInput32bits##input);       \
3744  }                                                         \
3745  TEST(mnemonic##_2S) {                                     \
3746    CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3747                                2S,                         \
3748                                2D,                         \
3749                                2D,                         \
3750                                kInput32bitsAccDestination, \
3751                                kInput64bits##input,        \
3752                                kInput64bits##input);       \
3753  }                                                         \
3754  TEST(mnemonic##2_16B) {                                   \
3755    CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3756                                16B,                        \
3757                                8H,                         \
3758                                8H,                         \
3759                                kInput8bitsAccDestination,  \
3760                                kInput16bits##input,        \
3761                                kInput16bits##input);       \
3762  }                                                         \
3763  TEST(mnemonic##2_8H) {                                    \
3764    CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3765                                8H,                         \
3766                                4S,                         \
3767                                4S,                         \
3768                                kInput16bitsAccDestination, \
3769                                kInput32bits##input,        \
3770                                kInput32bits##input);       \
3771  }                                                         \
3772  TEST(mnemonic##2_4S) {                                    \
3773    CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3774                                4S,                         \
3775                                2D,                         \
3776                                2D,                         \
3777                                kInput32bitsAccDestination, \
3778                                kInput64bits##input,        \
3779                                kInput64bits##input);       \
3780  }
3781
3782#define DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(mnemonic, input) \
3783  TEST(mnemonic##_2S) {                                     \
3784    CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3785                                2S,                         \
3786                                8B,                         \
3787                                8B,                         \
3788                                kInput32bitsAccDestination, \
3789                                kInput8bits##input,         \
3790                                kInput8bits##input);        \
3791  }                                                         \
3792  TEST(mnemonic##_4S) {                                     \
3793    CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3794                                4S,                         \
3795                                16B,                        \
3796                                16B,                        \
3797                                kInput32bitsAccDestination, \
3798                                kInput8bits##input,         \
3799                                kInput8bits##input);        \
3800  }
3801
3802
3803#define CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,  \
3804                                     vdform,    \
3805                                     vnform,    \
3806                                     input_n,   \
3807                                     input_imm) \
3808  {                                             \
3809    CALL_TEST_NEON_HELPER_2OpImm(mnemonic,      \
3810                                 vdform,        \
3811                                 vnform,        \
3812                                 input_n,       \
3813                                 input_imm);    \
3814  }
3815
3816#define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm)   \
3817  TEST(mnemonic##_8B_2OPIMM) {                                \
3818    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3819                                 8B,                          \
3820                                 8B,                          \
3821                                 kInput8bits##input,          \
3822                                 kInput8bitsImm##input_imm);  \
3823  }                                                           \
3824  TEST(mnemonic##_16B_2OPIMM) {                               \
3825    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3826                                 16B,                         \
3827                                 16B,                         \
3828                                 kInput8bits##input,          \
3829                                 kInput8bitsImm##input_imm);  \
3830  }                                                           \
3831  TEST(mnemonic##_4H_2OPIMM) {                                \
3832    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3833                                 4H,                          \
3834                                 4H,                          \
3835                                 kInput16bits##input,         \
3836                                 kInput16bitsImm##input_imm); \
3837  }                                                           \
3838  TEST(mnemonic##_8H_2OPIMM) {                                \
3839    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3840                                 8H,                          \
3841                                 8H,                          \
3842                                 kInput16bits##input,         \
3843                                 kInput16bitsImm##input_imm); \
3844  }                                                           \
3845  TEST(mnemonic##_2S_2OPIMM) {                                \
3846    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3847                                 2S,                          \
3848                                 2S,                          \
3849                                 kInput32bits##input,         \
3850                                 kInput32bitsImm##input_imm); \
3851  }                                                           \
3852  TEST(mnemonic##_4S_2OPIMM) {                                \
3853    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3854                                 4S,                          \
3855                                 4S,                          \
3856                                 kInput32bits##input,         \
3857                                 kInput32bitsImm##input_imm); \
3858  }                                                           \
3859  TEST(mnemonic##_2D_2OPIMM) {                                \
3860    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3861                                 2D,                          \
3862                                 2D,                          \
3863                                 kInput64bits##input,         \
3864                                 kInput64bitsImm##input_imm); \
3865  }
3866
3867#define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm) \
3868  TEST(mnemonic##_8B_2OPIMM) {                                   \
3869    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3870                                 8B,                             \
3871                                 B,                              \
3872                                 kInput8bits##input,             \
3873                                 kInput8bitsImm##input_imm);     \
3874  }                                                              \
3875  TEST(mnemonic##_16B_2OPIMM) {                                  \
3876    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3877                                 16B,                            \
3878                                 B,                              \
3879                                 kInput8bits##input,             \
3880                                 kInput8bitsImm##input_imm);     \
3881  }                                                              \
3882  TEST(mnemonic##_4H_2OPIMM) {                                   \
3883    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3884                                 4H,                             \
3885                                 H,                              \
3886                                 kInput16bits##input,            \
3887                                 kInput16bitsImm##input_imm);    \
3888  }                                                              \
3889  TEST(mnemonic##_8H_2OPIMM) {                                   \
3890    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3891                                 8H,                             \
3892                                 H,                              \
3893                                 kInput16bits##input,            \
3894                                 kInput16bitsImm##input_imm);    \
3895  }                                                              \
3896  TEST(mnemonic##_2S_2OPIMM) {                                   \
3897    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3898                                 2S,                             \
3899                                 S,                              \
3900                                 kInput32bits##input,            \
3901                                 kInput32bitsImm##input_imm);    \
3902  }                                                              \
3903  TEST(mnemonic##_4S_2OPIMM) {                                   \
3904    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3905                                 4S,                             \
3906                                 S,                              \
3907                                 kInput32bits##input,            \
3908                                 kInput32bitsImm##input_imm);    \
3909  }                                                              \
3910  TEST(mnemonic##_2D_2OPIMM) {                                   \
3911    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3912                                 2D,                             \
3913                                 D,                              \
3914                                 kInput64bits##input,            \
3915                                 kInput64bitsImm##input_imm);    \
3916  }
3917
3918#define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm) \
3919  TEST(mnemonic##_8B_2OPIMM) {                                     \
3920    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
3921                                 8B,                               \
3922                                 8H,                               \
3923                                 kInput16bits##input,              \
3924                                 kInput8bitsImm##input_imm);       \
3925  }                                                                \
3926  TEST(mnemonic##_4H_2OPIMM) {                                     \
3927    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
3928                                 4H,                               \
3929                                 4S,                               \
3930                                 kInput32bits##input,              \
3931                                 kInput16bitsImm##input_imm);      \
3932  }                                                                \
3933  TEST(mnemonic##_2S_2OPIMM) {                                     \
3934    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
3935                                 2S,                               \
3936                                 2D,                               \
3937                                 kInput64bits##input,              \
3938                                 kInput32bitsImm##input_imm);      \
3939  }                                                                \
3940  TEST(mnemonic##2_16B_2OPIMM) {                                   \
3941    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                      \
3942                                 16B,                              \
3943                                 8H,                               \
3944                                 kInput16bits##input,              \
3945                                 kInput8bitsImm##input_imm);       \
3946  }                                                                \
3947  TEST(mnemonic##2_8H_2OPIMM) {                                    \
3948    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                      \
3949                                 8H,                               \
3950                                 4S,                               \
3951                                 kInput32bits##input,              \
3952                                 kInput16bitsImm##input_imm);      \
3953  }                                                                \
3954  TEST(mnemonic##2_4S_2OPIMM) {                                    \
3955    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                      \
3956                                 4S,                               \
3957                                 2D,                               \
3958                                 kInput64bits##input,              \
3959                                 kInput32bitsImm##input_imm);      \
3960  }
3961
3962#define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm) \
3963  TEST(mnemonic##_B_2OPIMM) {                                             \
3964    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
3965                                 B,                                       \
3966                                 H,                                       \
3967                                 kInput16bits##input,                     \
3968                                 kInput8bitsImm##input_imm);              \
3969  }                                                                       \
3970  TEST(mnemonic##_H_2OPIMM) {                                             \
3971    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
3972                                 H,                                       \
3973                                 S,                                       \
3974                                 kInput32bits##input,                     \
3975                                 kInput16bitsImm##input_imm);             \
3976  }                                                                       \
3977  TEST(mnemonic##_S_2OPIMM) {                                             \
3978    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
3979                                 S,                                       \
3980                                 D,                                       \
3981                                 kInput64bits##input,                     \
3982                                 kInput32bitsImm##input_imm);             \
3983  }
3984
3985#define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm) \
3986  TEST(mnemonic##_4H_2OPIMM) {                                        \
3987    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
3988                                 4H,                                  \
3989                                 4H,                                  \
3990                                 kInputFloat16##input,                \
3991                                 kInputDoubleImm##input_imm);         \
3992  }                                                                   \
3993  TEST(mnemonic##_8H_2OPIMM) {                                        \
3994    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
3995                                 8H,                                  \
3996                                 8H,                                  \
3997                                 kInputFloat16##input,                \
3998                                 kInputDoubleImm##input_imm);         \
3999  }                                                                   \
4000  TEST(mnemonic##_2S_2OPIMM) {                                        \
4001    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
4002                                 2S,                                  \
4003                                 2S,                                  \
4004                                 kInputFloat##Basic,                  \
4005                                 kInputDoubleImm##input_imm);         \
4006  }                                                                   \
4007  TEST(mnemonic##_4S_2OPIMM) {                                        \
4008    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
4009                                 4S,                                  \
4010                                 4S,                                  \
4011                                 kInputFloat##input,                  \
4012                                 kInputDoubleImm##input_imm);         \
4013  }                                                                   \
4014  TEST(mnemonic##_2D_2OPIMM) {                                        \
4015    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
4016                                 2D,                                  \
4017                                 2D,                                  \
4018                                 kInputDouble##input,                 \
4019                                 kInputDoubleImm##input_imm);         \
4020  }
4021
4022#define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm) \
4023  TEST(mnemonic##_4H_2OPIMM) {                                 \
4024    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
4025                                 4H,                           \
4026                                 4H,                           \
4027                                 kInputFloat16##input,         \
4028                                 kInput16bitsImm##input_imm);  \
4029  }                                                            \
4030  TEST(mnemonic##_8H_2OPIMM) {                                 \
4031    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
4032                                 8H,                           \
4033                                 8H,                           \
4034                                 kInputFloat16##input,         \
4035                                 kInput16bitsImm##input_imm);  \
4036  }                                                            \
4037  TEST(mnemonic##_2S_2OPIMM) {                                 \
4038    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
4039                                 2S,                           \
4040                                 2S,                           \
4041                                 kInputFloat##Basic,           \
4042                                 kInput32bitsImm##input_imm);  \
4043  }                                                            \
4044  TEST(mnemonic##_4S_2OPIMM) {                                 \
4045    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
4046                                 4S,                           \
4047                                 4S,                           \
4048                                 kInputFloat##input,           \
4049                                 kInput32bitsImm##input_imm);  \
4050  }                                                            \
4051  TEST(mnemonic##_2D_2OPIMM) {                                 \
4052    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
4053                                 2D,                           \
4054                                 2D,                           \
4055                                 kInputDouble##input,          \
4056                                 kInput64bitsImm##input_imm);  \
4057  }
4058
4059#define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm) \
4060  TEST(mnemonic##_H_2OPIMM) {                                         \
4061    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
4062                                 H,                                   \
4063                                 H,                                   \
4064                                 kInputFloat16##Basic,                \
4065                                 kInput16bitsImm##input_imm);         \
4066  }                                                                   \
4067  TEST(mnemonic##_S_2OPIMM) {                                         \
4068    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
4069                                 S,                                   \
4070                                 S,                                   \
4071                                 kInputFloat##Basic,                  \
4072                                 kInput32bitsImm##input_imm);         \
4073  }                                                                   \
4074  TEST(mnemonic##_D_2OPIMM) {                                         \
4075    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
4076                                 D,                                   \
4077                                 D,                                   \
4078                                 kInputDouble##input,                 \
4079                                 kInput64bitsImm##input_imm);         \
4080  }
4081
4082#define DEFINE_TEST_NEON_2OPIMM_HSD(mnemonic, input, input_imm) \
4083  TEST(mnemonic##_4H_2OPIMM) {                                  \
4084    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                      \
4085                                 4H,                            \
4086                                 4H,                            \
4087                                 kInput16bits##input,           \
4088                                 kInput16bitsImm##input_imm);   \
4089  }                                                             \
4090  TEST(mnemonic##_8H_2OPIMM) {                                  \
4091    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                      \
4092                                 8H,                            \
4093                                 8H,                            \
4094                                 kInput16bits##input,           \
4095                                 kInput16bitsImm##input_imm);   \
4096  }                                                             \
4097  TEST(mnemonic##_2S_2OPIMM) {                                  \
4098    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                      \
4099                                 2S,                            \
4100                                 2S,                            \
4101                                 kInput32bits##input,           \
4102                                 kInput32bitsImm##input_imm);   \
4103  }                                                             \
4104  TEST(mnemonic##_4S_2OPIMM) {                                  \
4105    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                      \
4106                                 4S,                            \
4107                                 4S,                            \
4108                                 kInput32bits##input,           \
4109                                 kInput32bitsImm##input_imm);   \
4110  }                                                             \
4111  TEST(mnemonic##_2D_2OPIMM) {                                  \
4112    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                      \
4113                                 2D,                            \
4114                                 2D,                            \
4115                                 kInput64bits##input,           \
4116                                 kInput64bitsImm##input_imm);   \
4117  }
4118
4119#define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) \
4120  TEST(mnemonic##_D_2OPIMM) {                                        \
4121    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                           \
4122                                 D,                                  \
4123                                 D,                                  \
4124                                 kInput64bits##input,                \
4125                                 kInput64bitsImm##input_imm);        \
4126  }
4127
4128#define DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(mnemonic, input, input_imm) \
4129  TEST(mnemonic##_H_2OPIMM) {                                          \
4130    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                             \
4131                                 H,                                    \
4132                                 H,                                    \
4133                                 kInput16bits##input,                  \
4134                                 kInput16bitsImm##input_imm);          \
4135  }                                                                    \
4136  TEST(mnemonic##_S_2OPIMM) {                                          \
4137    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                             \
4138                                 S,                                    \
4139                                 S,                                    \
4140                                 kInput32bits##input,                  \
4141                                 kInput32bitsImm##input_imm);          \
4142  }                                                                    \
4143  DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm)
4144
4145#define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) \
4146  TEST(mnemonic##_D_2OPIMM) {                                           \
4147    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                              \
4148                                 D,                                     \
4149                                 D,                                     \
4150                                 kInputDouble##input,                   \
4151                                 kInputDoubleImm##input_imm);           \
4152  }
4153
4154#define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(mnemonic, input, input_imm) \
4155  TEST(mnemonic##_H_2OPIMM) {                                             \
4156    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
4157                                 H,                                       \
4158                                 H,                                       \
4159                                 kInputFloat16##input,                    \
4160                                 kInputDoubleImm##input_imm);             \
4161  }                                                                       \
4162  TEST(mnemonic##_S_2OPIMM) {                                             \
4163    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
4164                                 S,                                       \
4165                                 S,                                       \
4166                                 kInputFloat##input,                      \
4167                                 kInputDoubleImm##input_imm);             \
4168  }                                                                       \
4169  DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm)
4170
4171#define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm) \
4172  TEST(mnemonic##_B_2OPIMM) {                                      \
4173    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
4174                                 B,                                \
4175                                 B,                                \
4176                                 kInput8bits##input,               \
4177                                 kInput8bitsImm##input_imm);       \
4178  }                                                                \
4179  DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(mnemonic, input, input_imm)
4180
4181#define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm) \
4182  TEST(mnemonic##_8H_2OPIMM) {                                   \
4183    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
4184                                 8H,                             \
4185                                 8B,                             \
4186                                 kInput8bits##input,             \
4187                                 kInput8bitsImm##input_imm);     \
4188  }                                                              \
4189  TEST(mnemonic##_4S_2OPIMM) {                                   \
4190    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
4191                                 4S,                             \
4192                                 4H,                             \
4193                                 kInput16bits##input,            \
4194                                 kInput16bitsImm##input_imm);    \
4195  }                                                              \
4196  TEST(mnemonic##_2D_2OPIMM) {                                   \
4197    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
4198                                 2D,                             \
4199                                 2S,                             \
4200                                 kInput32bits##input,            \
4201                                 kInput32bitsImm##input_imm);    \
4202  }                                                              \
4203  TEST(mnemonic##2_8H_2OPIMM) {                                  \
4204    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                    \
4205                                 8H,                             \
4206                                 16B,                            \
4207                                 kInput8bits##input,             \
4208                                 kInput8bitsImm##input_imm);     \
4209  }                                                              \
4210  TEST(mnemonic##2_4S_2OPIMM) {                                  \
4211    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                    \
4212                                 4S,                             \
4213                                 8H,                             \
4214                                 kInput16bits##input,            \
4215                                 kInput16bitsImm##input_imm);    \
4216  }                                                              \
4217  TEST(mnemonic##2_2D_2OPIMM) {                                  \
4218    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                    \
4219                                 2D,                             \
4220                                 4S,                             \
4221                                 kInput32bits##input,            \
4222                                 kInput32bitsImm##input_imm);    \
4223  }
4224
4225#define CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic,           \
4226                                                    vdform,             \
4227                                                    vnform,             \
4228                                                    vmform,             \
4229                                                    input_d,            \
4230                                                    input_n,            \
4231                                                    input_m,            \
4232                                                    indices,            \
4233                                                    vm_subvector_count) \
4234  {                                                                     \
4235    CALL_TEST_NEON_HELPER_ByElement_Dot_Product(mnemonic,               \
4236                                                vdform,                 \
4237                                                vnform,                 \
4238                                                vmform,                 \
4239                                                input_d,                \
4240                                                input_n,                \
4241                                                input_m,                \
4242                                                indices,                \
4243                                                vm_subvector_count);    \
4244  }
4245
4246#define DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(mnemonic,               \
4247                                               input_d,                \
4248                                               input_n,                \
4249                                               input_m)                \
4250  TEST(mnemonic##_2S_8B_B) {                                           \
4251    CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic,              \
4252                                                2S,                    \
4253                                                8B,                    \
4254                                                B,                     \
4255                                                kInput32bits##input_d, \
4256                                                kInput8bits##input_n,  \
4257                                                kInput8bits##input_m,  \
4258                                                kInputSIndices,        \
4259                                                4);                    \
4260  }                                                                    \
4261  TEST(mnemonic##_4S_16B_B) {                                          \
4262    CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic,              \
4263                                                4S,                    \
4264                                                16B,                   \
4265                                                B,                     \
4266                                                kInput32bits##input_d, \
4267                                                kInput8bits##input_n,  \
4268                                                kInput8bits##input_m,  \
4269                                                kInputSIndices,        \
4270                                                4);                    \
4271  }
4272
4273#define CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4274                                        vdform,   \
4275                                        vnform,   \
4276                                        vmform,   \
4277                                        input_d,  \
4278                                        input_n,  \
4279                                        input_m,  \
4280                                        indices)  \
4281  {                                               \
4282    CALL_TEST_NEON_HELPER_ByElement(mnemonic,     \
4283                                    vdform,       \
4284                                    vnform,       \
4285                                    vmform,       \
4286                                    input_d,      \
4287                                    input_n,      \
4288                                    input_m,      \
4289                                    indices);     \
4290  }
4291
4292#define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m) \
4293  TEST(mnemonic##_4H_4H_H) {                                            \
4294    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
4295                                    4H,                                 \
4296                                    4H,                                 \
4297                                    H,                                  \
4298                                    kInput16bits##input_d,              \
4299                                    kInput16bits##input_n,              \
4300                                    kInput16bits##input_m,              \
4301                                    kInputHIndices);                    \
4302  }                                                                     \
4303  TEST(mnemonic##_8H_8H_H) {                                            \
4304    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
4305                                    8H,                                 \
4306                                    8H,                                 \
4307                                    H,                                  \
4308                                    kInput16bits##input_d,              \
4309                                    kInput16bits##input_n,              \
4310                                    kInput16bits##input_m,              \
4311                                    kInputHIndices);                    \
4312  }                                                                     \
4313  TEST(mnemonic##_2S_2S_S) {                                            \
4314    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
4315                                    2S,                                 \
4316                                    2S,                                 \
4317                                    S,                                  \
4318                                    kInput32bits##input_d,              \
4319                                    kInput32bits##input_n,              \
4320                                    kInput32bits##input_m,              \
4321                                    kInputSIndices);                    \
4322  }                                                                     \
4323  TEST(mnemonic##_4S_4S_S) {                                            \
4324    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
4325                                    4S,                                 \
4326                                    4S,                                 \
4327                                    S,                                  \
4328                                    kInput32bits##input_d,              \
4329                                    kInput32bits##input_n,              \
4330                                    kInput32bits##input_m,              \
4331                                    kInputSIndices);                    \
4332  }
4333
4334#define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic, input_d, input_n, input_m) \
4335  TEST(mnemonic##_H_H_H) {                                                     \
4336    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                  \
4337                                    H,                                         \
4338                                    H,                                         \
4339                                    H,                                         \
4340                                    kInput16bits##input_d,                     \
4341                                    kInput16bits##input_n,                     \
4342                                    kInput16bits##input_m,                     \
4343                                    kInputHIndices);                           \
4344  }                                                                            \
4345  TEST(mnemonic##_S_S_S) {                                                     \
4346    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                  \
4347                                    S,                                         \
4348                                    S,                                         \
4349                                    S,                                         \
4350                                    kInput32bits##input_d,                     \
4351                                    kInput32bits##input_n,                     \
4352                                    kInput32bits##input_m,                     \
4353                                    kInputSIndices);                           \
4354  }
4355
4356#define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m) \
4357  TEST(mnemonic##_4H_4H_H) {                                               \
4358    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
4359                                    4H,                                    \
4360                                    4H,                                    \
4361                                    H,                                     \
4362                                    kInputFloat16##input_d,                \
4363                                    kInputFloat16##input_n,                \
4364                                    kInputFloat16##input_m,                \
4365                                    kInputHIndices);                       \
4366  }                                                                        \
4367  TEST(mnemonic##_8H_8H_H) {                                               \
4368    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
4369                                    8H,                                    \
4370                                    8H,                                    \
4371                                    H,                                     \
4372                                    kInputFloat16##input_d,                \
4373                                    kInputFloat16##input_n,                \
4374                                    kInputFloat16##input_m,                \
4375                                    kInputHIndices);                       \
4376  }                                                                        \
4377  TEST(mnemonic##_2S_2S_S) {                                               \
4378    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
4379                                    2S,                                    \
4380                                    2S,                                    \
4381                                    S,                                     \
4382                                    kInputFloat##input_d,                  \
4383                                    kInputFloat##input_n,                  \
4384                                    kInputFloat##input_m,                  \
4385                                    kInputSIndices);                       \
4386  }                                                                        \
4387  TEST(mnemonic##_4S_4S_S) {                                               \
4388    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
4389                                    4S,                                    \
4390                                    4S,                                    \
4391                                    S,                                     \
4392                                    kInputFloat##input_d,                  \
4393                                    kInputFloat##input_n,                  \
4394                                    kInputFloat##input_m,                  \
4395                                    kInputSIndices);                       \
4396  }                                                                        \
4397  TEST(mnemonic##_2D_2D_D) {                                               \
4398    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
4399                                    2D,                                    \
4400                                    2D,                                    \
4401                                    D,                                     \
4402                                    kInputDouble##input_d,                 \
4403                                    kInputDouble##input_n,                 \
4404                                    kInputDouble##input_m,                 \
4405                                    kInputDIndices);                       \
4406  }
4407
4408#define DEFINE_TEST_NEON_FHM_BYELEMENT(mnemonic, input_d, input_n, input_m) \
4409  TEST(mnemonic##_2S_2H_H) {                                                \
4410    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
4411                                    2S,                                     \
4412                                    2H,                                     \
4413                                    H,                                      \
4414                                    kInputFloatAccDestination,              \
4415                                    kInputFloat16##input_n,                 \
4416                                    kInputFloat16##input_m,                 \
4417                                    kInputHIndices);                        \
4418  }                                                                         \
4419  TEST(mnemonic##_4S_4H_H) {                                                \
4420    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
4421                                    4S,                                     \
4422                                    4H,                                     \
4423                                    H,                                      \
4424                                    kInputFloatAccDestination,              \
4425                                    kInputFloat16##input_n,                 \
4426                                    kInputFloat16##input_m,                 \
4427                                    kInputHIndices);                        \
4428  }
4429
4430#define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m) \
4431  TEST(mnemonic##_H_H_H) {                                                  \
4432    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
4433                                    H,                                      \
4434                                    H,                                      \
4435                                    H,                                      \
4436                                    kInputFloat16##inp_d,                   \
4437                                    kInputFloat16##inp_n,                   \
4438                                    kInputFloat16##inp_m,                   \
4439                                    kInputHIndices);                        \
4440  }                                                                         \
4441  TEST(mnemonic##_S_S_S) {                                                  \
4442    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
4443                                    S,                                      \
4444                                    S,                                      \
4445                                    S,                                      \
4446                                    kInputFloat##inp_d,                     \
4447                                    kInputFloat##inp_n,                     \
4448                                    kInputFloat##inp_m,                     \
4449                                    kInputSIndices);                        \
4450  }                                                                         \
4451  TEST(mnemonic##_D_D_D) {                                                  \
4452    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
4453                                    D,                                      \
4454                                    D,                                      \
4455                                    D,                                      \
4456                                    kInputDouble##inp_d,                    \
4457                                    kInputDouble##inp_n,                    \
4458                                    kInputDouble##inp_m,                    \
4459                                    kInputDIndices);                        \
4460  }
4461
4462
4463#define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \
4464  TEST(mnemonic##_4S_4H_H) {                                                 \
4465    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                \
4466                                    4S,                                      \
4467                                    4H,                                      \
4468                                    H,                                       \
4469                                    kInput32bits##input_d,                   \
4470                                    kInput16bits##input_n,                   \
4471                                    kInput16bits##input_m,                   \
4472                                    kInputHIndices);                         \
4473  }                                                                          \
4474  TEST(mnemonic##2_4S_8H_H) {                                                \
4475    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2,                             \
4476                                    4S,                                      \
4477                                    8H,                                      \
4478                                    H,                                       \
4479                                    kInput32bits##input_d,                   \
4480                                    kInput16bits##input_n,                   \
4481                                    kInput16bits##input_m,                   \
4482                                    kInputHIndices);                         \
4483  }                                                                          \
4484  TEST(mnemonic##_2D_2S_S) {                                                 \
4485    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                \
4486                                    2D,                                      \
4487                                    2S,                                      \
4488                                    S,                                       \
4489                                    kInput64bits##input_d,                   \
4490                                    kInput32bits##input_n,                   \
4491                                    kInput32bits##input_m,                   \
4492                                    kInputSIndices);                         \
4493  }                                                                          \
4494  TEST(mnemonic##2_2D_4S_S) {                                                \
4495    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2,                             \
4496                                    2D,                                      \
4497                                    4S,                                      \
4498                                    S,                                       \
4499                                    kInput64bits##input_d,                   \
4500                                    kInput32bits##input_n,                   \
4501                                    kInput32bits##input_m,                   \
4502                                    kInputSIndices);                         \
4503  }
4504
4505#define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(mnemonic,   \
4506                                               input_d,    \
4507                                               input_n,    \
4508                                               input_m)    \
4509  TEST(mnemonic##_S_H_H) {                                 \
4510    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,              \
4511                                    S,                     \
4512                                    H,                     \
4513                                    H,                     \
4514                                    kInput32bits##input_d, \
4515                                    kInput16bits##input_n, \
4516                                    kInput16bits##input_m, \
4517                                    kInputHIndices);       \
4518  }                                                        \
4519  TEST(mnemonic##_D_S_S) {                                 \
4520    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,              \
4521                                    D,                     \
4522                                    S,                     \
4523                                    S,                     \
4524                                    kInput64bits##input_d, \
4525                                    kInput32bits##input_n, \
4526                                    kInput32bits##input_m, \
4527                                    kInputSIndices);       \
4528  }
4529
4530
4531#define CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                 \
4532                                      variant,                  \
4533                                      input_d,                  \
4534                                      input_imm1,               \
4535                                      input_n,                  \
4536                                      input_imm2)               \
4537  {                                                             \
4538    CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic, \
4539                                     mnemonic,                  \
4540                                     variant,                   \
4541                                     variant,                   \
4542                                     input_d,                   \
4543                                     input_imm1,                \
4544                                     input_n,                   \
4545                                     input_imm2);               \
4546  }
4547
4548#define DEFINE_TEST_NEON_2OP2IMM(mnemonic,                      \
4549                                 input_d,                       \
4550                                 input_imm1,                    \
4551                                 input_n,                       \
4552                                 input_imm2)                    \
4553  TEST(mnemonic##_B) {                                          \
4554    CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
4555                                  16B,                          \
4556                                  kInput8bits##input_d,         \
4557                                  kInput8bitsImm##input_imm1,   \
4558                                  kInput8bits##input_n,         \
4559                                  kInput8bitsImm##input_imm2);  \
4560  }                                                             \
4561  TEST(mnemonic##_H) {                                          \
4562    CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
4563                                  8H,                           \
4564                                  kInput16bits##input_d,        \
4565                                  kInput16bitsImm##input_imm1,  \
4566                                  kInput16bits##input_n,        \
4567                                  kInput16bitsImm##input_imm2); \
4568  }                                                             \
4569  TEST(mnemonic##_S) {                                          \
4570    CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
4571                                  4S,                           \
4572                                  kInput32bits##input_d,        \
4573                                  kInput32bitsImm##input_imm1,  \
4574                                  kInput32bits##input_n,        \
4575                                  kInput32bitsImm##input_imm2); \
4576  }                                                             \
4577  TEST(mnemonic##_D) {                                          \
4578    CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
4579                                  2D,                           \
4580                                  kInput64bits##input_d,        \
4581                                  kInput64bitsImm##input_imm1,  \
4582                                  kInput64bits##input_n,        \
4583                                  kInput64bitsImm##input_imm2); \
4584  }
4585
4586
4587// Advanced SIMD copy.
4588DEFINE_TEST_NEON_2OP2IMM(
4589    ins, Basic, LaneCountFromZero, Basic, LaneCountFromZero)
4590DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero)
4591
4592
4593// Advanced SIMD scalar copy.
4594DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero)
4595
4596
4597// Advanced SIMD three same.
4598DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic)
4599DEFINE_TEST_NEON_3SAME(sqadd, Basic)
4600DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic)
4601DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic)
4602DEFINE_TEST_NEON_3SAME(sqsub, Basic)
4603DEFINE_TEST_NEON_3SAME(cmgt, Basic)
4604DEFINE_TEST_NEON_3SAME(cmge, Basic)
4605DEFINE_TEST_NEON_3SAME(sshl, Basic)
4606DEFINE_TEST_NEON_3SAME(sqshl, Basic)
4607DEFINE_TEST_NEON_3SAME(srshl, Basic)
4608DEFINE_TEST_NEON_3SAME(sqrshl, Basic)
4609DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic)
4610DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic)
4611DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic)
4612DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic)
4613DEFINE_TEST_NEON_3SAME(add, Basic)
4614DEFINE_TEST_NEON_3SAME(cmtst, Basic)
4615DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic)
4616DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic)
4617DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic)
4618DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic)
4619DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic)
4620DEFINE_TEST_NEON_3SAME(addp, Basic)
4621DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic)
4622DEFINE_TEST_NEON_3SAME_FP(fmla, Basic)
4623DEFINE_TEST_NEON_3SAME_FP(fadd, Basic)
4624DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic)
4625DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic)
4626DEFINE_TEST_NEON_3SAME_FP(fmax, Basic)
4627DEFINE_TEST_NEON_3SAME_FP(frecps, Basic)
4628DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic)
4629DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic)
4630DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic)
4631DEFINE_TEST_NEON_3SAME_FP(fmls, Basic)
4632DEFINE_TEST_NEON_3SAME_FP(fsub, Basic)
4633DEFINE_TEST_NEON_3SAME_FP(fmin, Basic)
4634DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic)
4635DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic)
4636DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic)
4637DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic)
4638DEFINE_TEST_NEON_3SAME(uqadd, Basic)
4639DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic)
4640DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic)
4641DEFINE_TEST_NEON_3SAME(uqsub, Basic)
4642DEFINE_TEST_NEON_3SAME(cmhi, Basic)
4643DEFINE_TEST_NEON_3SAME(cmhs, Basic)
4644DEFINE_TEST_NEON_3SAME(ushl, Basic)
4645DEFINE_TEST_NEON_3SAME(uqshl, Basic)
4646DEFINE_TEST_NEON_3SAME(urshl, Basic)
4647DEFINE_TEST_NEON_3SAME(uqrshl, Basic)
4648DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic)
4649DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic)
4650DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic)
4651DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic)
4652DEFINE_TEST_NEON_3SAME(sub, Basic)
4653DEFINE_TEST_NEON_3SAME(cmeq, Basic)
4654DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic)
4655DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic)
4656DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic)
4657DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic)
4658DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic)
4659DEFINE_TEST_NEON_3SAME_HS(sqrdmlah, Basic)
4660DEFINE_TEST_NEON_3SAME_HS(sqrdmlsh, Basic)
4661DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(udot, Basic)
4662DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(sdot, Basic)
4663DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic)
4664DEFINE_TEST_NEON_3SAME_FP(faddp, Basic)
4665DEFINE_TEST_NEON_3SAME_FP(fmul, Basic)
4666DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic)
4667DEFINE_TEST_NEON_3SAME_FP(facge, Basic)
4668DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic)
4669DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic)
4670DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic)
4671DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic)
4672DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic)
4673DEFINE_TEST_NEON_3SAME_FP(fabd, Basic)
4674DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic)
4675DEFINE_TEST_NEON_3SAME_FP(facgt, Basic)
4676DEFINE_TEST_NEON_3SAME_FP(fminp, Basic)
4677DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic)
4678DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic)
4679
4680
4681// Advanced SIMD scalar three same.
4682DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic)
4683DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic)
4684DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic)
4685DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic)
4686DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic)
4687DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic)
4688DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic)
4689DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic)
4690DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic)
4691DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic)
4692DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic)
4693DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic)
4694DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic)
4695DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic)
4696DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic)
4697DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic)
4698DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic)
4699DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic)
4700DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic)
4701DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic)
4702DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic)
4703DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic)
4704DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic)
4705DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic)
4706DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic)
4707DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic)
4708DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmlah, Basic)
4709DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmlsh, Basic)
4710DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic)
4711DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic)
4712DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic)
4713DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic)
4714DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic)
4715
4716
4717// Advanced SIMD FHM instructions (FMLAL, FMLSL).
4718// These are oddballs: they are encoded under the 3SAME group but behave
4719// quite differently.
4720DEFINE_TEST_NEON_FHM(fmlal, Basic, Basic, Basic)
4721DEFINE_TEST_NEON_FHM(fmlal2, Basic, Basic, Basic)
4722DEFINE_TEST_NEON_FHM(fmlsl, Basic, Basic, Basic)
4723DEFINE_TEST_NEON_FHM(fmlsl2, Basic, Basic, Basic)
4724
4725
4726// Advanced SIMD three different.
4727DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic)
4728DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic)
4729DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic)
4730DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic)
4731DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic)
4732DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic)
4733DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic)
4734DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic)
4735DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic)
4736DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic)
4737DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic)
4738DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic)
4739DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic)
4740DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic)
4741DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic)
4742DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic)
4743DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic)
4744DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic)
4745DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic)
4746DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic)
4747DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic)
4748DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic)
4749DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic)
4750DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic)
4751DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic)
4752DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic)
4753
4754
4755// Advanced SIMD scalar three different.
4756DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic)
4757DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic)
4758DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic)
4759
4760
4761// Advanced SIMD scalar pairwise.
4762TEST(addp_SCALAR) {
4763  CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic);
4764}
4765DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic)
4766DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic)
4767DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic)
4768DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic)
4769DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic)
4770
4771
4772// Advanced SIMD shift by immediate.
4773DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth)
4774DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth)
4775DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth)
4776DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth)
4777DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero)
4778DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero)
4779DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth)
4780DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth)
4781DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth)
4782DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth)
4783DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero)
4784DEFINE_TEST_NEON_2OPIMM_HSD(scvtf,
4785                            FixedPointConversions,
4786                            TypeWidthFromZeroToWidth)
4787DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
4788DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth)
4789DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth)
4790DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth)
4791DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth)
4792DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth)
4793DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero)
4794DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero)
4795DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero)
4796DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth)
4797DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth)
4798DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth)
4799DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth)
4800DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero)
4801DEFINE_TEST_NEON_2OPIMM_HSD(ucvtf,
4802                            FixedPointConversions,
4803                            TypeWidthFromZeroToWidth)
4804DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
4805
4806
4807// Advanced SIMD scalar shift by immediate..
4808DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth)
4809DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth)
4810DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth)
4811DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth)
4812DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero)
4813DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero)
4814DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth)
4815DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth)
4816DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(scvtf,
4817                                   FixedPointConversions,
4818                                   TypeWidthFromZeroToWidth)
4819DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
4820DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth)
4821DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth)
4822DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth)
4823DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth)
4824DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth)
4825DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero)
4826DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero)
4827DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero)
4828DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth)
4829DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth)
4830DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth)
4831DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth)
4832DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(ucvtf,
4833                                   FixedPointConversions,
4834                                   TypeWidthFromZeroToWidth)
4835DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
4836
4837
4838// Advanced SIMD two-register miscellaneous.
4839DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic)
4840DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic)
4841DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic)
4842DEFINE_TEST_NEON_2SAME(suqadd, Basic)
4843DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic)
4844DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic)
4845DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic)
4846DEFINE_TEST_NEON_2SAME(sqabs, Basic)
4847DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero)
4848DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero)
4849DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero)
4850DEFINE_TEST_NEON_2SAME(abs, Basic)
4851DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic)
4852DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic)
4853DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions)
4854DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions)
4855DEFINE_TEST_NEON_2SAME_FP_FP16(frintn, Conversions)
4856DEFINE_TEST_NEON_2SAME_FP_FP16(frintm, Conversions)
4857DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtns, Conversions)
4858DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtms, Conversions)
4859DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtas, Conversions)
4860// SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
4861DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero)
4862DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero)
4863DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero)
4864DEFINE_TEST_NEON_2SAME_FP_FP16(fabs, Basic)
4865DEFINE_TEST_NEON_2SAME_FP_FP16(frintp, Conversions)
4866DEFINE_TEST_NEON_2SAME_FP_FP16(frintz, Conversions)
4867DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtps, Conversions)
4868// FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
4869DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic)
4870DEFINE_TEST_NEON_2SAME_FP_FP16(frecpe, Basic)
4871DEFINE_TEST_NEON_2SAME_BH(rev32, Basic)
4872DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic)
4873DEFINE_TEST_NEON_2SAME(usqadd, Basic)
4874DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic)
4875DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic)
4876DEFINE_TEST_NEON_2SAME(sqneg, Basic)
4877DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero)
4878DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero)
4879DEFINE_TEST_NEON_2SAME(neg, Basic)
4880DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic)
4881DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL)
4882DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic)
4883DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions)
4884DEFINE_TEST_NEON_2SAME_FP(frint32x, Conversions)
4885DEFINE_TEST_NEON_2SAME_FP(frint64x, Conversions)
4886DEFINE_TEST_NEON_2SAME_FP(frint32z, Conversions)
4887DEFINE_TEST_NEON_2SAME_FP(frint64z, Conversions)
4888DEFINE_TEST_NEON_2SAME_FP_FP16(frinta, Conversions)
4889DEFINE_TEST_NEON_2SAME_FP_FP16(frintx, Conversions)
4890DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtnu, Conversions)
4891DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtmu, Conversions)
4892DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtau, Conversions)
4893// UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
4894DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic)
4895DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic)
4896DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero)
4897DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero)
4898DEFINE_TEST_NEON_2SAME_FP_FP16(fneg, Basic)
4899DEFINE_TEST_NEON_2SAME_FP_FP16(frinti, Conversions)
4900DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtpu, Conversions)
4901// FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
4902DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic)
4903DEFINE_TEST_NEON_2SAME_FP_FP16(frsqrte, Basic)
4904DEFINE_TEST_NEON_2SAME_FP_FP16(fsqrt, Basic)
4905
4906
4907// Advanced SIMD scalar two-register miscellaneous.
4908DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic)
4909DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic)
4910DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero)
4911DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero)
4912DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero)
4913DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic)
4914DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic)
4915DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtns, Conversions)
4916DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtms, Conversions)
4917DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtas, Conversions)
4918// SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
4919DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmgt, Basic, Zero)
4920DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmeq, Basic, Zero)
4921DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmlt, Basic, Zero)
4922DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtps, Conversions)
4923// FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
4924DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frecpe, Basic)
4925DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frecpx, Basic)
4926DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic)
4927DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic)
4928DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero)
4929DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero)
4930DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic)
4931DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic)
4932DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic)
4933TEST(fcvtxn_SCALAR) {
4934  CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions);
4935}
4936DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtnu, Conversions)
4937DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtmu, Conversions)
4938DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtau, Conversions)
4939// UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
4940DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmge, Basic, Zero)
4941DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmle, Basic, Zero)
4942DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtpu, Conversions)
4943// FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
4944DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frsqrte, Basic)
4945
4946
4947// Advanced SIMD across lanes.
4948DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic)
4949DEFINE_TEST_NEON_ACROSS(smaxv, Basic)
4950DEFINE_TEST_NEON_ACROSS(sminv, Basic)
4951DEFINE_TEST_NEON_ACROSS(addv, Basic)
4952DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic)
4953DEFINE_TEST_NEON_ACROSS(umaxv, Basic)
4954DEFINE_TEST_NEON_ACROSS(uminv, Basic)
4955DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic)
4956DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic)
4957DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic)
4958DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic)
4959
4960
4961// Advanced SIMD permute.
4962DEFINE_TEST_NEON_3SAME(uzp1, Basic)
4963DEFINE_TEST_NEON_3SAME(trn1, Basic)
4964DEFINE_TEST_NEON_3SAME(zip1, Basic)
4965DEFINE_TEST_NEON_3SAME(uzp2, Basic)
4966DEFINE_TEST_NEON_3SAME(trn2, Basic)
4967DEFINE_TEST_NEON_3SAME(zip2, Basic)
4968
4969
4970// Advanced SIMD vector x indexed element.
4971DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic)
4972DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic)
4973DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic)
4974DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic)
4975DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic)
4976DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic)
4977DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic)
4978DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic)
4979DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic)
4980DEFINE_TEST_NEON_BYELEMENT(sqrdmlah, Basic, Basic, Basic)
4981DEFINE_TEST_NEON_BYELEMENT(sqrdmlsh, Basic, Basic, Basic)
4982DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(udot, Basic, Basic, Basic)
4983DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(sdot, Basic, Basic, Basic)
4984DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic)
4985DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic)
4986DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic)
4987DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic)
4988DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic)
4989DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic)
4990DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic)
4991DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic)
4992DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic)
4993
4994
4995// Advanced SIMD scalar x indexed element.
4996DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic)
4997DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic)
4998DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic)
4999DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic)
5000DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic)
5001DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmlah, Basic, Basic, Basic)
5002DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmlsh, Basic, Basic, Basic)
5003DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic)
5004DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic)
5005DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic)
5006DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic)
5007
5008
5009DEFINE_TEST_NEON_FHM_BYELEMENT(fmlal, Basic, Basic, Basic)
5010DEFINE_TEST_NEON_FHM_BYELEMENT(fmlal2, Basic, Basic, Basic)
5011DEFINE_TEST_NEON_FHM_BYELEMENT(fmlsl, Basic, Basic, Basic)
5012DEFINE_TEST_NEON_FHM_BYELEMENT(fmlsl2, Basic, Basic, Basic)
5013
5014
5015#undef __
5016#define __ masm->
5017
5018#if defined(VIXL_INCLUDE_SIMULATOR_AARCH64) &&                 \
5019    defined(VIXL_HAS_ABI_SUPPORT) && __cplusplus >= 201103L && \
5020    (defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1))
5021
5022// Generate a function that stores zero to a hard-coded address.
5023Instruction* GenerateStoreZero(MacroAssembler* masm, int32_t* target) {
5024  masm->Reset();
5025
5026  UseScratchRegisterScope temps(masm);
5027  Register temp = temps.AcquireX();
5028  __ Mov(temp, reinterpret_cast<intptr_t>(target));
5029  __ Str(wzr, MemOperand(temp));
5030  __ Ret();
5031
5032  masm->FinalizeCode();
5033  return masm->GetBuffer()->GetStartAddress<Instruction*>();
5034}
5035
5036
5037// Generate a function that stores the `int32_t` argument to a hard-coded
5038// address.
5039// In this example and the other below, we use the `abi` object to retrieve
5040// argument and return locations even though we could easily hard code them.
5041// This mirrors how more generic code (e.g. templated) user would use these
5042// mechanisms.
5043Instruction* GenerateStoreInput(MacroAssembler* masm, int32_t* target) {
5044  masm->Reset();
5045
5046  ABI abi;
5047  Register input =
5048      Register(abi.GetNextParameterGenericOperand<int32_t>().GetCPURegister());
5049
5050  UseScratchRegisterScope temps(masm);
5051  Register temp = temps.AcquireX();
5052  __ Mov(temp, reinterpret_cast<intptr_t>(target));
5053  __ Str(input, MemOperand(temp));
5054  __ Ret();
5055
5056  masm->FinalizeCode();
5057  return masm->GetBuffer()->GetStartAddress<Instruction*>();
5058}
5059
5060
5061// A minimal implementation of a `pow` function.
5062Instruction* GeneratePow(MacroAssembler* masm, unsigned pow) {
5063  masm->Reset();
5064
5065  ABI abi;
5066  Register input =
5067      Register(abi.GetNextParameterGenericOperand<int64_t>().GetCPURegister());
5068  Register result =
5069      Register(abi.GetReturnGenericOperand<int64_t>().GetCPURegister());
5070  UseScratchRegisterScope temps(masm);
5071  Register temp = temps.AcquireX();
5072
5073  __ Mov(temp, 1);
5074  for (unsigned i = 0; i < pow; i++) {
5075    __ Mul(temp, temp, input);
5076  }
5077  __ Mov(result, temp);
5078  __ Ret();
5079
5080  masm->FinalizeCode();
5081  return masm->GetBuffer()->GetStartAddress<Instruction*>();
5082}
5083
5084
5085Instruction* GenerateSum(MacroAssembler* masm) {
5086  masm->Reset();
5087
5088  ABI abi;
5089  VRegister input_1 =
5090      VRegister(abi.GetNextParameterGenericOperand<float>().GetCPURegister());
5091  Register input_2 =
5092      Register(abi.GetNextParameterGenericOperand<int64_t>().GetCPURegister());
5093  VRegister input_3 =
5094      VRegister(abi.GetNextParameterGenericOperand<double>().GetCPURegister());
5095  VRegister result =
5096      VRegister(abi.GetReturnGenericOperand<double>().GetCPURegister());
5097
5098  UseScratchRegisterScope temps(masm);
5099  VRegister temp = temps.AcquireD();
5100
5101  __ Fcvt(input_1.D(), input_1);
5102  __ Scvtf(temp, input_2);
5103  __ Fadd(temp, temp, input_1.D());
5104  __ Fadd(result, temp, input_3);
5105  __ Ret();
5106
5107  masm->FinalizeCode();
5108  return masm->GetBuffer()->GetStartAddress<Instruction*>();
5109}
5110
5111
5112TEST(RunFrom) {
5113  SETUP_WITH_FEATURES(CPUFeatures::kFP);
5114
5115  // Run a function returning `void` and taking no argument.
5116  int32_t value = 0xbad;
5117  simulator.RunFrom(GenerateStoreZero(&masm, &value));
5118  VIXL_CHECK(value == 0);
5119
5120  // Run a function returning `void` and taking one argument.
5121  int32_t argument = 0xf00d;
5122  simulator.RunFrom<void, int32_t>(GenerateStoreInput(&masm, &value), argument);
5123  VIXL_CHECK(value == 0xf00d);
5124
5125  // Run a function taking one argument and returning a value.
5126  int64_t res_int64_t;
5127  res_int64_t =
5128      simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 0), 0xbad);
5129  VIXL_CHECK(res_int64_t == 1);
5130  res_int64_t = simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 1), 123);
5131  VIXL_CHECK(res_int64_t == 123);
5132  res_int64_t = simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 10), 2);
5133  VIXL_CHECK(res_int64_t == 1024);
5134
5135  // Run a function taking multiple arguments in registers.
5136  double res_double =
5137      simulator.RunFrom<double, float, int64_t, double>(GenerateSum(&masm),
5138                                                        1.0,
5139                                                        2,
5140                                                        3.0);
5141  VIXL_CHECK(res_double == 6.0);
5142}
5143#endif
5144
5145
5146}  // namespace aarch64
5147}  // namespace vixl
5148