1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #include <cfloat>
28 #include <cstdio>
29 #include <sstream>
30 
31 #include "test-runner.h"
32 #include "test-utils.h"
33 
34 #include "aarch64/cpu-features-auditor-aarch64.h"
35 #include "aarch64/macro-assembler-aarch64.h"
36 #include "aarch64/simulator-aarch64.h"
37 #include "aarch64/test-simulator-inputs-aarch64.h"
38 #include "aarch64/test-simulator-traces-aarch64.h"
39 #include "aarch64/test-utils-aarch64.h"
40 
41 namespace vixl {
42 namespace aarch64 {
43 
44 // ==== Simulator Tests ====
45 //
46 // These simulator tests check instruction behaviour against a trace taken from
47 // real AArch64 hardware. The same test code is used to generate the trace; the
48 // results are printed to stdout when the test is run with
49 // --generate_test_trace.
50 //
51 // The input lists and expected results are stored in test/traces. The expected
52 // results can be regenerated using tools/generate_simulator_traces.py. Adding a
53 // test for a new instruction is described at the top of
54 // test-simulator-traces-aarch64.h.
55 
56 #define __ masm.
57 #define TEST(name) TEST_(AARCH64_SIM_##name)
58 
59 #define SETUP() SETUP_WITH_FEATURES(CPUFeatures())
60 
61 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
62 
63 #define SETUP_WITH_FEATURES(...)                 \
64   MacroAssembler masm;                           \
65   masm.SetCPUFeatures(CPUFeatures(__VA_ARGS__)); \
66   Decoder decoder;                               \
67   Simulator simulator(&decoder);                 \
68   simulator.SetColouredTrace(Test::coloured_trace());
69 
70 #define START()                                                         \
71   masm.Reset();                                                         \
72   simulator.ResetState();                                               \
73   __ PushCalleeSavedRegisters();                                        \
74   /* The infrastructure code hasn't been covered at the moment, e.g. */ \
75   /* prologue/epilogue. Suppress tagging mis-match exception before  */ \
76   /* this point. */                                                     \
77   if (masm.GetCPUFeatures()->Has(CPUFeatures::kMTE)) {                  \
78     __ Hlt(DebugHltOpcode::kMTEActive);                                 \
79   }                                                                     \
80   if (Test::trace_reg()) {                                              \
81     __ Trace(LOG_STATE, TRACE_ENABLE);                                  \
82   }                                                                     \
83   if (Test::trace_write()) {                                            \
84     __ Trace(LOG_WRITE, TRACE_ENABLE);                                  \
85   }                                                                     \
86   if (Test::trace_sim()) {                                              \
87     __ Trace(LOG_DISASM, TRACE_ENABLE);                                 \
88   }
89 
90 #define END()                                          \
91   if (masm.GetCPUFeatures()->Has(CPUFeatures::kMTE)) { \
92     __ Hlt(DebugHltOpcode::kMTEInactive);              \
93   }                                                    \
94   __ Trace(LOG_ALL, TRACE_DISABLE);                    \
95   __ PopCalleeSavedRegisters();                        \
96   __ Ret();                                            \
97   masm.FinalizeCode()
98 
99 #define TRY_RUN(skipped)                                                \
100   DISASSEMBLE();                                                        \
101   simulator.RunFrom(masm.GetBuffer()->GetStartAddress<Instruction*>()); \
102   /* The simulator can run every test. */                               \
103   *skipped = false
104 
105 
106 #else  // VIXL_INCLUDE_SIMULATOR_AARCH64
107 
108 #define SETUP_WITH_FEATURES(...)                 \
109   MacroAssembler masm;                           \
110   masm.SetCPUFeatures(CPUFeatures(__VA_ARGS__)); \
111   CPU::SetUp()
112 
113 #define START() \
114   masm.Reset(); \
115   __ PushCalleeSavedRegisters()
116 
117 #define END()                   \
118   __ PopCalleeSavedRegisters(); \
119   __ Ret();                     \
120   masm.FinalizeCode()
121 
122 #define TRY_RUN(skipped)                                                      \
123   DISASSEMBLE();                                                              \
124   /* If the test uses features that the current CPU doesn't support, don't */ \
125   /* attempt to run it natively.                                           */ \
126   {                                                                           \
127     Decoder decoder;                                                          \
128     /* TODO: Once available, use runtime feature detection. The use of  */    \
129     /* AArch64LegacyBaseline is a stopgap.                              */    \
130     const CPUFeatures& this_machine = CPUFeatures::AArch64LegacyBaseline();   \
131     CPUFeaturesAuditor auditor(&decoder, this_machine);                       \
132     CodeBuffer* buffer = masm.GetBuffer();                                    \
133     decoder.Decode(buffer->GetStartAddress<Instruction*>(),                   \
134                    buffer->GetEndAddress<Instruction*>());                    \
135     const CPUFeatures& requirements = auditor.GetSeenFeatures();              \
136     if (this_machine.Has(requirements)) {                                     \
137       masm.GetBuffer()->SetExecutable();                                      \
138       ExecuteMemory(buffer->GetStartAddress<byte*>(),                         \
139                     masm.GetSizeOfCodeGenerated());                           \
140       masm.GetBuffer()->SetWritable();                                        \
141       *skipped = false;                                                       \
142     } else {                                                                  \
143       std::stringstream os;                                                   \
144       /* Note: This message needs to match REGEXP_MISSING_FEATURES from    */ \
145       /* tools/threaded_test.py.                                           */ \
146       os << "SKIPPED: Missing features: { ";                                  \
147       os << requirements.Without(this_machine) << " }\n";                     \
148       printf("%s", os.str().c_str());                                         \
149       *skipped = true;                                                        \
150     }                                                                         \
151   }
152 
153 
154 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
155 
156 
157 #define DISASSEMBLE()                                             \
158   if (Test::disassemble()) {                                      \
159     PrintDisassembler disasm(stdout);                             \
160     CodeBuffer* buffer = masm.GetBuffer();                        \
161     Instruction* start = buffer->GetStartAddress<Instruction*>(); \
162     Instruction* end = buffer->GetEndAddress<Instruction*>();     \
163     disasm.DisassembleBuffer(start, end);                         \
164   }
165 
166 // The maximum number of errors to report in detail for each test.
167 static const unsigned kErrorReportLimit = 8;
168 
169 
170 // Overloaded versions of RawbitsToDouble and RawbitsToFloat for use in the
171 // templated test functions.
rawbits_to_fp(uint32_t bits)172 static float rawbits_to_fp(uint32_t bits) { return RawbitsToFloat(bits); }
173 
rawbits_to_fp(uint64_t bits)174 static double rawbits_to_fp(uint64_t bits) { return RawbitsToDouble(bits); }
175 
176 // The rawbits_to_fp functions are only used for printing decimal values so we
177 // just approximate FP16 as double.
rawbits_to_fp(uint16_t bits)178 static double rawbits_to_fp(uint16_t bits) {
179   return FPToDouble(RawbitsToFloat16(bits), kIgnoreDefaultNaN);
180 }
181 
182 
183 // MacroAssembler member function pointers to pass to the test dispatchers.
184 typedef void (MacroAssembler::*Test1OpFPHelper_t)(const VRegister& fd,
185                                                   const VRegister& fn);
186 typedef void (MacroAssembler::*Test2OpFPHelper_t)(const VRegister& fd,
187                                                   const VRegister& fn,
188                                                   const VRegister& fm);
189 typedef void (MacroAssembler::*Test3OpFPHelper_t)(const VRegister& fd,
190                                                   const VRegister& fn,
191                                                   const VRegister& fm,
192                                                   const VRegister& fa);
193 typedef void (MacroAssembler::*TestFPCmpHelper_t)(const VRegister& fn,
194                                                   const VRegister& fm);
195 typedef void (MacroAssembler::*TestFPCmpZeroHelper_t)(const VRegister& fn,
196                                                       double value);
197 typedef void (MacroAssembler::*TestFPToIntHelper_t)(const Register& rd,
198                                                     const VRegister& fn);
199 typedef void (MacroAssembler::*TestFPToFixedHelper_t)(const Register& rd,
200                                                       const VRegister& fn,
201                                                       int fbits);
202 typedef void (MacroAssembler::*TestFixedToFPHelper_t)(const VRegister& fd,
203                                                       const Register& rn,
204                                                       int fbits);
205 // TODO: 'Test2OpNEONHelper_t' and 'Test2OpFPHelper_t' can be
206 //       consolidated into one routine.
207 typedef void (MacroAssembler::*Test1OpNEONHelper_t)(const VRegister& vd,
208                                                     const VRegister& vn);
209 typedef void (MacroAssembler::*Test2OpNEONHelper_t)(const VRegister& vd,
210                                                     const VRegister& vn,
211                                                     const VRegister& vm);
212 typedef void (MacroAssembler::*TestByElementNEONHelper_t)(const VRegister& vd,
213                                                           const VRegister& vn,
214                                                           const VRegister& vm,
215                                                           int vm_index);
216 typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)(
217     const VRegister& vd, int imm1, const VRegister& vn, int imm2);
218 
219 // This helps using the same typename for both the function pointer
220 // and the array of immediates passed to helper routines.
221 template <typename T>
222 class Test2OpImmediateNEONHelper_t {
223  public:
224   typedef void (MacroAssembler::*mnemonic)(const VRegister& vd,
225                                            const VRegister& vn,
226                                            T imm);
227 };
228 
229 
230 // Maximum number of hex characters required to represent values of either
231 // templated type.
232 template <typename Ta, typename Tb>
MaxHexCharCount()233 static unsigned MaxHexCharCount() {
234   unsigned count = static_cast<unsigned>(std::max(sizeof(Ta), sizeof(Tb)));
235   return (count * 8) / 4;
236 }
237 
238 
239 // Standard test dispatchers.
240 
241 
Test1Op_Helper(Test1OpFPHelper_t helper, uintptr_t inputs, unsigned inputs_length, uintptr_t results, unsigned d_size, unsigned n_size, bool* skipped)242 static void Test1Op_Helper(Test1OpFPHelper_t helper,
243                            uintptr_t inputs,
244                            unsigned inputs_length,
245                            uintptr_t results,
246                            unsigned d_size,
247                            unsigned n_size,
248                            bool* skipped) {
249   VIXL_ASSERT((d_size == kDRegSize) || (d_size == kSRegSize) ||
250               (d_size == kHRegSize));
251   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) ||
252               (n_size == kHRegSize));
253 
254   CPUFeatures features;
255   features.Combine(CPUFeatures::kFP, CPUFeatures::kFPHalf);
256   // For frint{32,64}{x,y} variants.
257   features.Combine(CPUFeatures::kFrintToFixedSizedInt);
258   SETUP_WITH_FEATURES(features);
259   START();
260 
261   // Roll up the loop to keep the code size down.
262   Label loop_n;
263 
264   Register out = x0;
265   Register inputs_base = x1;
266   Register length = w2;
267   Register index_n = w3;
268 
269   int n_index_shift;
270   VRegister fd;
271   VRegister fn;
272   if (n_size == kDRegSize) {
273     n_index_shift = kDRegSizeInBytesLog2;
274     fn = d1;
275   } else if (n_size == kSRegSize) {
276     n_index_shift = kSRegSizeInBytesLog2;
277     fn = s1;
278   } else {
279     n_index_shift = kHRegSizeInBytesLog2;
280     fn = h1;
281   }
282 
283   if (d_size == kDRegSize) {
284     fd = d0;
285   } else if (d_size == kSRegSize) {
286     fd = s0;
287   } else {
288     fd = h0;
289   }
290 
291 
292   __ Mov(out, results);
293   __ Mov(inputs_base, inputs);
294   __ Mov(length, inputs_length);
295 
296   __ Mov(index_n, 0);
297   __ Bind(&loop_n);
298   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
299 
300   {
301     SingleEmissionCheckScope guard(&masm);
302     (masm.*helper)(fd, fn);
303   }
304   __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
305 
306   __ Add(index_n, index_n, 1);
307   __ Cmp(index_n, inputs_length);
308   __ B(lo, &loop_n);
309 
310   END();
311   TRY_RUN(skipped);
312 }
313 
314 
315 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
316 // rawbits representations of doubles or floats. This ensures that exact bit
317 // comparisons can be performed.
318 template <typename Tn, typename Td>
Test1Op(const char* name, Test1OpFPHelper_t helper, const Tn inputs[], unsigned inputs_length, const Td expected[], unsigned expected_length)319 static void Test1Op(const char* name,
320                     Test1OpFPHelper_t helper,
321                     const Tn inputs[],
322                     unsigned inputs_length,
323                     const Td expected[],
324                     unsigned expected_length) {
325   VIXL_ASSERT(inputs_length > 0);
326 
327   const unsigned results_length = inputs_length;
328   Td* results = new Td[results_length];
329 
330   const unsigned d_bits = sizeof(Td) * 8;
331   const unsigned n_bits = sizeof(Tn) * 8;
332   bool skipped;
333 
334   Test1Op_Helper(helper,
335                  reinterpret_cast<uintptr_t>(inputs),
336                  inputs_length,
337                  reinterpret_cast<uintptr_t>(results),
338                  d_bits,
339                  n_bits,
340                  &skipped);
341 
342   if (Test::generate_test_trace()) {
343     // Print the results.
344     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
345     for (unsigned d = 0; d < results_length; d++) {
346       printf("  0x%0*" PRIx64 ",\n",
347              d_bits / 4,
348              static_cast<uint64_t>(results[d]));
349     }
350     printf("};\n");
351     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
352   } else if (!skipped) {
353     // Check the results.
354     VIXL_CHECK(expected_length == results_length);
355     unsigned error_count = 0;
356     unsigned d = 0;
357     for (unsigned n = 0; n < inputs_length; n++, d++) {
358       if (results[d] != expected[d]) {
359         if (++error_count > kErrorReportLimit) continue;
360 
361         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
362                name,
363                n_bits / 4,
364                static_cast<uint64_t>(inputs[n]),
365                name,
366                rawbits_to_fp(inputs[n]));
367         printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
368                d_bits / 4,
369                static_cast<uint64_t>(expected[d]),
370                rawbits_to_fp(expected[d]));
371         printf("  Found:    0x%0*" PRIx64 " (%g)\n",
372                d_bits / 4,
373                static_cast<uint64_t>(results[d]),
374                rawbits_to_fp(results[d]));
375         printf("\n");
376       }
377     }
378     VIXL_ASSERT(d == expected_length);
379     if (error_count > kErrorReportLimit) {
380       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
381     }
382     VIXL_CHECK(error_count == 0);
383   }
384   delete[] results;
385 }
386 
387 
Test2Op_Helper(Test2OpFPHelper_t helper, uintptr_t inputs, unsigned inputs_length, uintptr_t results, unsigned reg_size, bool* skipped)388 static void Test2Op_Helper(Test2OpFPHelper_t helper,
389                            uintptr_t inputs,
390                            unsigned inputs_length,
391                            uintptr_t results,
392                            unsigned reg_size,
393                            bool* skipped) {
394   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize) ||
395               (reg_size == kHRegSize));
396 
397   SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
398   START();
399 
400   // Roll up the loop to keep the code size down.
401   Label loop_n, loop_m;
402 
403   Register out = x0;
404   Register inputs_base = x1;
405   Register length = w2;
406   Register index_n = w3;
407   Register index_m = w4;
408 
409   bool double_op = reg_size == kDRegSize;
410   bool float_op = reg_size == kSRegSize;
411   int index_shift;
412   if (double_op) {
413     index_shift = kDRegSizeInBytesLog2;
414   } else if (float_op) {
415     index_shift = kSRegSizeInBytesLog2;
416   } else {
417     index_shift = kHRegSizeInBytesLog2;
418   }
419 
420   VRegister fd;
421   VRegister fn;
422   VRegister fm;
423 
424   if (double_op) {
425     fd = d0;
426     fn = d1;
427     fm = d2;
428   } else if (float_op) {
429     fd = s0;
430     fn = s1;
431     fm = s2;
432   } else {
433     fd = h0;
434     fn = h1;
435     fm = h2;
436   }
437 
438   __ Mov(out, results);
439   __ Mov(inputs_base, inputs);
440   __ Mov(length, inputs_length);
441 
442   __ Mov(index_n, 0);
443   __ Bind(&loop_n);
444   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
445 
446   __ Mov(index_m, 0);
447   __ Bind(&loop_m);
448   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
449 
450   {
451     SingleEmissionCheckScope guard(&masm);
452     (masm.*helper)(fd, fn, fm);
453   }
454   __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
455 
456   __ Add(index_m, index_m, 1);
457   __ Cmp(index_m, inputs_length);
458   __ B(lo, &loop_m);
459 
460   __ Add(index_n, index_n, 1);
461   __ Cmp(index_n, inputs_length);
462   __ B(lo, &loop_n);
463 
464   END();
465   TRY_RUN(skipped);
466 }
467 
468 
469 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
470 // rawbits representations of doubles or floats. This ensures that exact bit
471 // comparisons can be performed.
472 template <typename T>
Test2Op(const char* name, Test2OpFPHelper_t helper, const T inputs[], unsigned inputs_length, const T expected[], unsigned expected_length)473 static void Test2Op(const char* name,
474                     Test2OpFPHelper_t helper,
475                     const T inputs[],
476                     unsigned inputs_length,
477                     const T expected[],
478                     unsigned expected_length) {
479   VIXL_ASSERT(inputs_length > 0);
480 
481   const unsigned results_length = inputs_length * inputs_length;
482   T* results = new T[results_length];
483 
484   const unsigned bits = sizeof(T) * 8;
485   bool skipped;
486 
487   Test2Op_Helper(helper,
488                  reinterpret_cast<uintptr_t>(inputs),
489                  inputs_length,
490                  reinterpret_cast<uintptr_t>(results),
491                  bits,
492                  &skipped);
493 
494   if (Test::generate_test_trace()) {
495     // Print the results.
496     printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
497     for (unsigned d = 0; d < results_length; d++) {
498       printf("  0x%0*" PRIx64 ",\n",
499              bits / 4,
500              static_cast<uint64_t>(results[d]));
501     }
502     printf("};\n");
503     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
504   } else if (!skipped) {
505     // Check the results.
506     VIXL_CHECK(expected_length == results_length);
507     unsigned error_count = 0;
508     unsigned d = 0;
509     for (unsigned n = 0; n < inputs_length; n++) {
510       for (unsigned m = 0; m < inputs_length; m++, d++) {
511         if (results[d] != expected[d]) {
512           if (++error_count > kErrorReportLimit) continue;
513 
514           printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
515                  name,
516                  bits / 4,
517                  static_cast<uint64_t>(inputs[n]),
518                  bits / 4,
519                  static_cast<uint64_t>(inputs[m]),
520                  name,
521                  rawbits_to_fp(inputs[n]),
522                  rawbits_to_fp(inputs[m]));
523           printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
524                  bits / 4,
525                  static_cast<uint64_t>(expected[d]),
526                  rawbits_to_fp(expected[d]));
527           printf("  Found:    0x%0*" PRIx64 " (%g)\n",
528                  bits / 4,
529                  static_cast<uint64_t>(results[d]),
530                  rawbits_to_fp(results[d]));
531           printf("\n");
532         }
533       }
534     }
535     VIXL_ASSERT(d == expected_length);
536     if (error_count > kErrorReportLimit) {
537       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
538     }
539     VIXL_CHECK(error_count == 0);
540   }
541   delete[] results;
542 }
543 
544 
Test3Op_Helper(Test3OpFPHelper_t helper, uintptr_t inputs, unsigned inputs_length, uintptr_t results, unsigned reg_size, bool* skipped)545 static void Test3Op_Helper(Test3OpFPHelper_t helper,
546                            uintptr_t inputs,
547                            unsigned inputs_length,
548                            uintptr_t results,
549                            unsigned reg_size,
550                            bool* skipped) {
551   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize) ||
552               (reg_size == kHRegSize));
553 
554   SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
555   START();
556 
557   // Roll up the loop to keep the code size down.
558   Label loop_n, loop_m, loop_a;
559 
560   Register out = x0;
561   Register inputs_base = x1;
562   Register length = w2;
563   Register index_n = w3;
564   Register index_m = w4;
565   Register index_a = w5;
566 
567   bool double_op = reg_size == kDRegSize;
568   bool single_op = reg_size == kSRegSize;
569   int index_shift;
570   VRegister fd(0, reg_size);
571   VRegister fn(1, reg_size);
572   VRegister fm(2, reg_size);
573   VRegister fa(3, reg_size);
574   if (double_op) {
575     index_shift = kDRegSizeInBytesLog2;
576   } else if (single_op) {
577     index_shift = kSRegSizeInBytesLog2;
578   } else {
579     index_shift = kHRegSizeInBytesLog2;
580   }
581 
582   __ Mov(out, results);
583   __ Mov(inputs_base, inputs);
584   __ Mov(length, inputs_length);
585 
586   __ Mov(index_n, 0);
587   __ Bind(&loop_n);
588   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
589 
590   __ Mov(index_m, 0);
591   __ Bind(&loop_m);
592   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
593 
594   __ Mov(index_a, 0);
595   __ Bind(&loop_a);
596   __ Ldr(fa, MemOperand(inputs_base, index_a, UXTW, index_shift));
597 
598   {
599     SingleEmissionCheckScope guard(&masm);
600     (masm.*helper)(fd, fn, fm, fa);
601   }
602   __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
603 
604   __ Add(index_a, index_a, 1);
605   __ Cmp(index_a, inputs_length);
606   __ B(lo, &loop_a);
607 
608   __ Add(index_m, index_m, 1);
609   __ Cmp(index_m, inputs_length);
610   __ B(lo, &loop_m);
611 
612   __ Add(index_n, index_n, 1);
613   __ Cmp(index_n, inputs_length);
614   __ B(lo, &loop_n);
615 
616   END();
617   TRY_RUN(skipped);
618 }
619 
620 
621 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
622 // rawbits representations of doubles or floats. This ensures that exact bit
623 // comparisons can be performed.
624 template <typename T>
Test3Op(const char* name, Test3OpFPHelper_t helper, const T inputs[], unsigned inputs_length, const T expected[], unsigned expected_length)625 static void Test3Op(const char* name,
626                     Test3OpFPHelper_t helper,
627                     const T inputs[],
628                     unsigned inputs_length,
629                     const T expected[],
630                     unsigned expected_length) {
631   VIXL_ASSERT(inputs_length > 0);
632 
633   const unsigned results_length = inputs_length * inputs_length * inputs_length;
634   T* results = new T[results_length];
635 
636   const unsigned bits = sizeof(T) * 8;
637   bool skipped;
638 
639   Test3Op_Helper(helper,
640                  reinterpret_cast<uintptr_t>(inputs),
641                  inputs_length,
642                  reinterpret_cast<uintptr_t>(results),
643                  bits,
644                  &skipped);
645 
646   if (Test::generate_test_trace()) {
647     // Print the results.
648     printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
649     for (unsigned d = 0; d < results_length; d++) {
650       printf("  0x%0*" PRIx64 ",\n",
651              bits / 4,
652              static_cast<uint64_t>(results[d]));
653     }
654     printf("};\n");
655     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
656   } else if (!skipped) {
657     // Check the results.
658     VIXL_CHECK(expected_length == results_length);
659     unsigned error_count = 0;
660     unsigned d = 0;
661     for (unsigned n = 0; n < inputs_length; n++) {
662       for (unsigned m = 0; m < inputs_length; m++) {
663         for (unsigned a = 0; a < inputs_length; a++, d++) {
664           if (results[d] != expected[d]) {
665             if (++error_count > kErrorReportLimit) continue;
666 
667             printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 ", 0x%0*" PRIx64
668                    " (%s %g %g %g):\n",
669                    name,
670                    bits / 4,
671                    static_cast<uint64_t>(inputs[n]),
672                    bits / 4,
673                    static_cast<uint64_t>(inputs[m]),
674                    bits / 4,
675                    static_cast<uint64_t>(inputs[a]),
676                    name,
677                    rawbits_to_fp(inputs[n]),
678                    rawbits_to_fp(inputs[m]),
679                    rawbits_to_fp(inputs[a]));
680             printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
681                    bits / 4,
682                    static_cast<uint64_t>(expected[d]),
683                    rawbits_to_fp(expected[d]));
684             printf("  Found:    0x%0*" PRIx64 " (%g)\n",
685                    bits / 4,
686                    static_cast<uint64_t>(results[d]),
687                    rawbits_to_fp(results[d]));
688             printf("\n");
689           }
690         }
691       }
692     }
693     VIXL_ASSERT(d == expected_length);
694     if (error_count > kErrorReportLimit) {
695       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
696     }
697     VIXL_CHECK(error_count == 0);
698   }
699   delete[] results;
700 }
701 
702 
TestCmp_Helper(TestFPCmpHelper_t helper, uintptr_t inputs, unsigned inputs_length, uintptr_t results, unsigned reg_size, bool* skipped)703 static void TestCmp_Helper(TestFPCmpHelper_t helper,
704                            uintptr_t inputs,
705                            unsigned inputs_length,
706                            uintptr_t results,
707                            unsigned reg_size,
708                            bool* skipped) {
709   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
710 
711   SETUP_WITH_FEATURES(CPUFeatures::kFP);
712   START();
713 
714   // Roll up the loop to keep the code size down.
715   Label loop_n, loop_m;
716 
717   Register out = x0;
718   Register inputs_base = x1;
719   Register length = w2;
720   Register index_n = w3;
721   Register index_m = w4;
722   Register flags = x5;
723 
724   bool double_op = reg_size == kDRegSize;
725   const int index_shift =
726       double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
727 
728   VRegister fn = double_op ? d1 : s1;
729   VRegister fm = double_op ? d2 : s2;
730 
731   __ Mov(out, results);
732   __ Mov(inputs_base, inputs);
733   __ Mov(length, inputs_length);
734 
735   __ Mov(index_n, 0);
736   __ Bind(&loop_n);
737   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
738 
739   __ Mov(index_m, 0);
740   __ Bind(&loop_m);
741   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
742 
743   {
744     SingleEmissionCheckScope guard(&masm);
745     (masm.*helper)(fn, fm);
746   }
747   __ Mrs(flags, NZCV);
748   __ Ubfx(flags, flags, 28, 4);
749   __ Strb(flags, MemOperand(out, 1, PostIndex));
750 
751   __ Add(index_m, index_m, 1);
752   __ Cmp(index_m, inputs_length);
753   __ B(lo, &loop_m);
754 
755   __ Add(index_n, index_n, 1);
756   __ Cmp(index_n, inputs_length);
757   __ B(lo, &loop_n);
758 
759   END();
760   TRY_RUN(skipped);
761 }
762 
763 
764 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
765 // rawbits representations of doubles or floats. This ensures that exact bit
766 // comparisons can be performed.
767 template <typename T>
TestCmp(const char* name, TestFPCmpHelper_t helper, const T inputs[], unsigned inputs_length, const uint8_t expected[], unsigned expected_length)768 static void TestCmp(const char* name,
769                     TestFPCmpHelper_t helper,
770                     const T inputs[],
771                     unsigned inputs_length,
772                     const uint8_t expected[],
773                     unsigned expected_length) {
774   VIXL_ASSERT(inputs_length > 0);
775 
776   const unsigned results_length = inputs_length * inputs_length;
777   uint8_t* results = new uint8_t[results_length];
778 
779   const unsigned bits = sizeof(T) * 8;
780   bool skipped;
781 
782   TestCmp_Helper(helper,
783                  reinterpret_cast<uintptr_t>(inputs),
784                  inputs_length,
785                  reinterpret_cast<uintptr_t>(results),
786                  bits,
787                  &skipped);
788 
789   if (Test::generate_test_trace()) {
790     // Print the results.
791     printf("const uint8_t kExpected_%s[] = {\n", name);
792     for (unsigned d = 0; d < results_length; d++) {
793       // Each NZCV result only requires 4 bits.
794       VIXL_ASSERT((results[d] & 0xf) == results[d]);
795       printf("  0x%" PRIx8 ",\n", results[d]);
796     }
797     printf("};\n");
798     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
799   } else if (!skipped) {
800     // Check the results.
801     VIXL_CHECK(expected_length == results_length);
802     unsigned error_count = 0;
803     unsigned d = 0;
804     for (unsigned n = 0; n < inputs_length; n++) {
805       for (unsigned m = 0; m < inputs_length; m++, d++) {
806         if (results[d] != expected[d]) {
807           if (++error_count > kErrorReportLimit) continue;
808 
809           printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
810                  name,
811                  bits / 4,
812                  static_cast<uint64_t>(inputs[n]),
813                  bits / 4,
814                  static_cast<uint64_t>(inputs[m]),
815                  name,
816                  rawbits_to_fp(inputs[n]),
817                  rawbits_to_fp(inputs[m]));
818           printf("  Expected: %c%c%c%c (0x%" PRIx8 ")\n",
819                  (expected[d] & 0x8) ? 'N' : 'n',
820                  (expected[d] & 0x4) ? 'Z' : 'z',
821                  (expected[d] & 0x2) ? 'C' : 'c',
822                  (expected[d] & 0x1) ? 'V' : 'v',
823                  expected[d]);
824           printf("  Found:    %c%c%c%c (0x%" PRIx8 ")\n",
825                  (results[d] & 0x8) ? 'N' : 'n',
826                  (results[d] & 0x4) ? 'Z' : 'z',
827                  (results[d] & 0x2) ? 'C' : 'c',
828                  (results[d] & 0x1) ? 'V' : 'v',
829                  results[d]);
830           printf("\n");
831         }
832       }
833     }
834     VIXL_ASSERT(d == expected_length);
835     if (error_count > kErrorReportLimit) {
836       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
837     }
838     VIXL_CHECK(error_count == 0);
839   }
840   delete[] results;
841 }
842 
843 
TestCmpZero_Helper(TestFPCmpZeroHelper_t helper, uintptr_t inputs, unsigned inputs_length, uintptr_t results, unsigned reg_size, bool* skipped)844 static void TestCmpZero_Helper(TestFPCmpZeroHelper_t helper,
845                                uintptr_t inputs,
846                                unsigned inputs_length,
847                                uintptr_t results,
848                                unsigned reg_size,
849                                bool* skipped) {
850   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
851 
852   SETUP_WITH_FEATURES(CPUFeatures::kFP);
853   START();
854 
855   // Roll up the loop to keep the code size down.
856   Label loop_n, loop_m;
857 
858   Register out = x0;
859   Register inputs_base = x1;
860   Register length = w2;
861   Register index_n = w3;
862   Register flags = x4;
863 
864   bool double_op = reg_size == kDRegSize;
865   const int index_shift =
866       double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
867 
868   VRegister fn = double_op ? d1 : s1;
869 
870   __ Mov(out, results);
871   __ Mov(inputs_base, inputs);
872   __ Mov(length, inputs_length);
873 
874   __ Mov(index_n, 0);
875   __ Bind(&loop_n);
876   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
877 
878   {
879     SingleEmissionCheckScope guard(&masm);
880     (masm.*helper)(fn, 0.0);
881   }
882   __ Mrs(flags, NZCV);
883   __ Ubfx(flags, flags, 28, 4);
884   __ Strb(flags, MemOperand(out, 1, PostIndex));
885 
886   __ Add(index_n, index_n, 1);
887   __ Cmp(index_n, inputs_length);
888   __ B(lo, &loop_n);
889 
890   END();
891   TRY_RUN(skipped);
892 }
893 
894 
895 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
896 // rawbits representations of doubles or floats. This ensures that exact bit
897 // comparisons can be performed.
898 template <typename T>
TestCmpZero(const char* name, TestFPCmpZeroHelper_t helper, const T inputs[], unsigned inputs_length, const uint8_t expected[], unsigned expected_length)899 static void TestCmpZero(const char* name,
900                         TestFPCmpZeroHelper_t helper,
901                         const T inputs[],
902                         unsigned inputs_length,
903                         const uint8_t expected[],
904                         unsigned expected_length) {
905   VIXL_ASSERT(inputs_length > 0);
906 
907   const unsigned results_length = inputs_length;
908   uint8_t* results = new uint8_t[results_length];
909 
910   const unsigned bits = sizeof(T) * 8;
911   bool skipped;
912 
913   TestCmpZero_Helper(helper,
914                      reinterpret_cast<uintptr_t>(inputs),
915                      inputs_length,
916                      reinterpret_cast<uintptr_t>(results),
917                      bits,
918                      &skipped);
919 
920   if (Test::generate_test_trace()) {
921     // Print the results.
922     printf("const uint8_t kExpected_%s[] = {\n", name);
923     for (unsigned d = 0; d < results_length; d++) {
924       // Each NZCV result only requires 4 bits.
925       VIXL_ASSERT((results[d] & 0xf) == results[d]);
926       printf("  0x%" PRIx8 ",\n", results[d]);
927     }
928     printf("};\n");
929     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
930   } else if (!skipped) {
931     // Check the results.
932     VIXL_CHECK(expected_length == results_length);
933     unsigned error_count = 0;
934     unsigned d = 0;
935     for (unsigned n = 0; n < inputs_length; n++, d++) {
936       if (results[d] != expected[d]) {
937         if (++error_count > kErrorReportLimit) continue;
938 
939         printf("%s 0x%0*" PRIx64 ", 0x%0*u (%s %g #0.0):\n",
940                name,
941                bits / 4,
942                static_cast<uint64_t>(inputs[n]),
943                bits / 4,
944                0,
945                name,
946                rawbits_to_fp(inputs[n]));
947         printf("  Expected: %c%c%c%c (0x%" PRIx8 ")\n",
948                (expected[d] & 0x8) ? 'N' : 'n',
949                (expected[d] & 0x4) ? 'Z' : 'z',
950                (expected[d] & 0x2) ? 'C' : 'c',
951                (expected[d] & 0x1) ? 'V' : 'v',
952                expected[d]);
953         printf("  Found:    %c%c%c%c (0x%" PRIx8 ")\n",
954                (results[d] & 0x8) ? 'N' : 'n',
955                (results[d] & 0x4) ? 'Z' : 'z',
956                (results[d] & 0x2) ? 'C' : 'c',
957                (results[d] & 0x1) ? 'V' : 'v',
958                results[d]);
959         printf("\n");
960       }
961     }
962     VIXL_ASSERT(d == expected_length);
963     if (error_count > kErrorReportLimit) {
964       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
965     }
966     VIXL_CHECK(error_count == 0);
967   }
968   delete[] results;
969 }
970 
971 
TestFPToFixed_Helper(TestFPToFixedHelper_t helper, uintptr_t inputs, unsigned inputs_length, uintptr_t results, unsigned d_size, unsigned n_size, bool* skipped)972 static void TestFPToFixed_Helper(TestFPToFixedHelper_t helper,
973                                  uintptr_t inputs,
974                                  unsigned inputs_length,
975                                  uintptr_t results,
976                                  unsigned d_size,
977                                  unsigned n_size,
978                                  bool* skipped) {
979   VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
980   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) ||
981               (n_size == kHRegSize));
982 
983   SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
984   START();
985 
986   // Roll up the loop to keep the code size down.
987   Label loop_n;
988 
989   Register out = x0;
990   Register inputs_base = x1;
991   Register length = w2;
992   Register index_n = w3;
993 
994   int n_index_shift;
995   if (n_size == kDRegSize) {
996     n_index_shift = kDRegSizeInBytesLog2;
997   } else if (n_size == kSRegSize) {
998     n_index_shift = kSRegSizeInBytesLog2;
999   } else {
1000     n_index_shift = kHRegSizeInBytesLog2;
1001   }
1002 
1003   Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10);
1004   VRegister fn;
1005   if (n_size == kDRegSize) {
1006     fn = d1;
1007   } else if (n_size == kSRegSize) {
1008     fn = s1;
1009   } else {
1010     fn = h1;
1011   }
1012 
1013   __ Mov(out, results);
1014   __ Mov(inputs_base, inputs);
1015   __ Mov(length, inputs_length);
1016 
1017   __ Mov(index_n, 0);
1018   __ Bind(&loop_n);
1019   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
1020 
1021   for (unsigned fbits = 0; fbits <= d_size; ++fbits) {
1022     {
1023       SingleEmissionCheckScope guard(&masm);
1024       (masm.*helper)(rd, fn, fbits);
1025     }
1026     __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex));
1027   }
1028 
1029   __ Add(index_n, index_n, 1);
1030   __ Cmp(index_n, inputs_length);
1031   __ B(lo, &loop_n);
1032 
1033   END();
1034   TRY_RUN(skipped);
1035 }
1036 
1037 
TestFPToInt_Helper(TestFPToIntHelper_t helper, uintptr_t inputs, unsigned inputs_length, uintptr_t results, unsigned d_size, unsigned n_size, bool* skipped)1038 static void TestFPToInt_Helper(TestFPToIntHelper_t helper,
1039                                uintptr_t inputs,
1040                                unsigned inputs_length,
1041                                uintptr_t results,
1042                                unsigned d_size,
1043                                unsigned n_size,
1044                                bool* skipped) {
1045   VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
1046   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) ||
1047               (n_size == kHRegSize));
1048 
1049   SETUP_WITH_FEATURES(CPUFeatures::kFP,
1050                       CPUFeatures::kFPHalf,
1051                       CPUFeatures::kJSCVT);
1052   START();
1053 
1054   // Roll up the loop to keep the code size down.
1055   Label loop_n;
1056 
1057   Register out = x0;
1058   Register inputs_base = x1;
1059   Register length = w2;
1060   Register index_n = w3;
1061 
1062   int n_index_shift;
1063   if (n_size == kDRegSize) {
1064     n_index_shift = kDRegSizeInBytesLog2;
1065   } else if (n_size == kSRegSize) {
1066     n_index_shift = kSRegSizeInBytesLog2;
1067   } else {
1068     n_index_shift = kHRegSizeInBytesLog2;
1069   }
1070 
1071   Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10);
1072   VRegister fn;
1073   if (n_size == kDRegSize) {
1074     fn = d1;
1075   } else if (n_size == kSRegSize) {
1076     fn = s1;
1077   } else {
1078     fn = h1;
1079   }
1080 
1081   __ Mov(out, results);
1082   __ Mov(inputs_base, inputs);
1083   __ Mov(length, inputs_length);
1084 
1085   __ Mov(index_n, 0);
1086   __ Bind(&loop_n);
1087   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
1088 
1089   {
1090     SingleEmissionCheckScope guard(&masm);
1091     (masm.*helper)(rd, fn);
1092   }
1093   __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex));
1094 
1095   __ Add(index_n, index_n, 1);
1096   __ Cmp(index_n, inputs_length);
1097   __ B(lo, &loop_n);
1098 
1099   END();
1100   TRY_RUN(skipped);
1101 }
1102 
1103 
1104 // Test FP instructions.
1105 //  - The inputs[] array should be an array of rawbits representations of
1106 //    doubles or floats. This ensures that exact bit comparisons can be
1107 //    performed.
1108 //  - The expected[] array should be an array of signed integers.
1109 template <typename Tn, typename Td>
TestFPToS(const char* name, TestFPToIntHelper_t helper, const Tn inputs[], unsigned inputs_length, const Td expected[], unsigned expected_length)1110 static void TestFPToS(const char* name,
1111                       TestFPToIntHelper_t helper,
1112                       const Tn inputs[],
1113                       unsigned inputs_length,
1114                       const Td expected[],
1115                       unsigned expected_length) {
1116   VIXL_ASSERT(inputs_length > 0);
1117 
1118   const unsigned results_length = inputs_length;
1119   Td* results = new Td[results_length];
1120 
1121   const unsigned d_bits = sizeof(Td) * 8;
1122   const unsigned n_bits = sizeof(Tn) * 8;
1123   bool skipped;
1124 
1125   TestFPToInt_Helper(helper,
1126                      reinterpret_cast<uintptr_t>(inputs),
1127                      inputs_length,
1128                      reinterpret_cast<uintptr_t>(results),
1129                      d_bits,
1130                      n_bits,
1131                      &skipped);
1132 
1133   if (Test::generate_test_trace()) {
1134     // Print the results.
1135     printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
1136     // There is no simple C++ literal for INT*_MIN that doesn't produce
1137     // warnings, so we use an appropriate constant in that case instead.
1138     // Deriving int_d_min in this way (rather than just checking INT64_MIN and
1139     // the like) avoids warnings about comparing values with differing ranges.
1140     const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
1141     const int64_t int_d_min = -(int_d_max)-1;
1142     for (unsigned d = 0; d < results_length; d++) {
1143       if (results[d] == int_d_min) {
1144         printf("  -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
1145       } else {
1146         // Some constants (such as those between INT32_MAX and UINT32_MAX)
1147         // trigger compiler warnings. To avoid these warnings, use an
1148         // appropriate macro to make the type explicit.
1149         int64_t result_int64 = static_cast<int64_t>(results[d]);
1150         if (result_int64 >= 0) {
1151           printf("  INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
1152         } else {
1153           printf("  -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
1154         }
1155       }
1156     }
1157     printf("};\n");
1158     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1159   } else if (!skipped) {
1160     // Check the results.
1161     VIXL_CHECK(expected_length == results_length);
1162     unsigned error_count = 0;
1163     unsigned d = 0;
1164     for (unsigned n = 0; n < inputs_length; n++, d++) {
1165       if (results[d] != expected[d]) {
1166         if (++error_count > kErrorReportLimit) continue;
1167 
1168         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
1169                name,
1170                n_bits / 4,
1171                static_cast<uint64_t>(inputs[n]),
1172                name,
1173                rawbits_to_fp(inputs[n]));
1174         printf("  Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1175                d_bits / 4,
1176                static_cast<uint64_t>(expected[d]),
1177                static_cast<int64_t>(expected[d]));
1178         printf("  Found:    0x%0*" PRIx64 " (%" PRId64 ")\n",
1179                d_bits / 4,
1180                static_cast<uint64_t>(results[d]),
1181                static_cast<int64_t>(results[d]));
1182         printf("\n");
1183       }
1184     }
1185     VIXL_ASSERT(d == expected_length);
1186     if (error_count > kErrorReportLimit) {
1187       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1188     }
1189     VIXL_CHECK(error_count == 0);
1190   }
1191   delete[] results;
1192 }
1193 
1194 
1195 // Test FP instructions.
1196 //  - The inputs[] array should be an array of rawbits representations of
1197 //    doubles or floats. This ensures that exact bit comparisons can be
1198 //    performed.
1199 //  - The expected[] array should be an array of unsigned integers.
1200 template <typename Tn, typename Td>
TestFPToU(const char* name, TestFPToIntHelper_t helper, const Tn inputs[], unsigned inputs_length, const Td expected[], unsigned expected_length)1201 static void TestFPToU(const char* name,
1202                       TestFPToIntHelper_t helper,
1203                       const Tn inputs[],
1204                       unsigned inputs_length,
1205                       const Td expected[],
1206                       unsigned expected_length) {
1207   VIXL_ASSERT(inputs_length > 0);
1208 
1209   const unsigned results_length = inputs_length;
1210   Td* results = new Td[results_length];
1211 
1212   const unsigned d_bits = sizeof(Td) * 8;
1213   const unsigned n_bits = sizeof(Tn) * 8;
1214   bool skipped;
1215 
1216   TestFPToInt_Helper(helper,
1217                      reinterpret_cast<uintptr_t>(inputs),
1218                      inputs_length,
1219                      reinterpret_cast<uintptr_t>(results),
1220                      d_bits,
1221                      n_bits,
1222                      &skipped);
1223 
1224   if (Test::generate_test_trace()) {
1225     // Print the results.
1226     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
1227     for (unsigned d = 0; d < results_length; d++) {
1228       printf("  %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
1229     }
1230     printf("};\n");
1231     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1232   } else if (!skipped) {
1233     // Check the results.
1234     VIXL_CHECK(expected_length == results_length);
1235     unsigned error_count = 0;
1236     unsigned d = 0;
1237     for (unsigned n = 0; n < inputs_length; n++, d++) {
1238       if (results[d] != expected[d]) {
1239         if (++error_count > kErrorReportLimit) continue;
1240 
1241         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
1242                name,
1243                n_bits / 4,
1244                static_cast<uint64_t>(inputs[n]),
1245                name,
1246                rawbits_to_fp(inputs[n]));
1247         printf("  Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1248                d_bits / 4,
1249                static_cast<uint64_t>(expected[d]),
1250                static_cast<uint64_t>(expected[d]));
1251         printf("  Found:    0x%0*" PRIx64 " (%" PRIu64 ")\n",
1252                d_bits / 4,
1253                static_cast<uint64_t>(results[d]),
1254                static_cast<uint64_t>(results[d]));
1255         printf("\n");
1256       }
1257     }
1258     VIXL_ASSERT(d == expected_length);
1259     if (error_count > kErrorReportLimit) {
1260       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1261     }
1262     VIXL_CHECK(error_count == 0);
1263   }
1264   delete[] results;
1265 }
1266 
1267 
1268 // Test FP instructions.
1269 //  - The inputs[] array should be an array of rawbits representations of
1270 //    doubles or floats. This ensures that exact bit comparisons can be
1271 //    performed.
1272 //  - The expected[] array should be an array of signed integers.
1273 template <typename Tn, typename Td>
TestFPToFixedS(const char* name, TestFPToFixedHelper_t helper, const Tn inputs[], unsigned inputs_length, const Td expected[], unsigned expected_length)1274 static void TestFPToFixedS(const char* name,
1275                            TestFPToFixedHelper_t helper,
1276                            const Tn inputs[],
1277                            unsigned inputs_length,
1278                            const Td expected[],
1279                            unsigned expected_length) {
1280   VIXL_ASSERT(inputs_length > 0);
1281 
1282   const unsigned d_bits = sizeof(Td) * 8;
1283   const unsigned n_bits = sizeof(Tn) * 8;
1284 
1285   const unsigned results_length = inputs_length * (d_bits + 1);
1286   Td* results = new Td[results_length];
1287 
1288   bool skipped;
1289 
1290   TestFPToFixed_Helper(helper,
1291                        reinterpret_cast<uintptr_t>(inputs),
1292                        inputs_length,
1293                        reinterpret_cast<uintptr_t>(results),
1294                        d_bits,
1295                        n_bits,
1296                        &skipped);
1297 
1298   if (Test::generate_test_trace()) {
1299     // Print the results.
1300     printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
1301     // There is no simple C++ literal for INT*_MIN that doesn't produce
1302     // warnings, so we use an appropriate constant in that case instead.
1303     // Deriving int_d_min in this way (rather than just checking INT64_MIN and
1304     // the like) avoids warnings about comparing values with differing ranges.
1305     const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
1306     const int64_t int_d_min = -(int_d_max)-1;
1307     for (unsigned d = 0; d < results_length; d++) {
1308       if (results[d] == int_d_min) {
1309         printf("  -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
1310       } else {
1311         // Some constants (such as those between INT32_MAX and UINT32_MAX)
1312         // trigger compiler warnings. To avoid these warnings, use an
1313         // appropriate macro to make the type explicit.
1314         int64_t result_int64 = static_cast<int64_t>(results[d]);
1315         if (result_int64 >= 0) {
1316           printf("  INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
1317         } else {
1318           printf("  -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
1319         }
1320       }
1321     }
1322     printf("};\n");
1323     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1324   } else if (!skipped) {
1325     // Check the results.
1326     VIXL_CHECK(expected_length == results_length);
1327     unsigned error_count = 0;
1328     unsigned d = 0;
1329     for (unsigned n = 0; n < inputs_length; n++) {
1330       for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
1331         if (results[d] != expected[d]) {
1332           if (++error_count > kErrorReportLimit) continue;
1333 
1334           printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
1335                  name,
1336                  n_bits / 4,
1337                  static_cast<uint64_t>(inputs[n]),
1338                  fbits,
1339                  name,
1340                  rawbits_to_fp(inputs[n]),
1341                  fbits);
1342           printf("  Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1343                  d_bits / 4,
1344                  static_cast<uint64_t>(expected[d]),
1345                  static_cast<int64_t>(expected[d]));
1346           printf("  Found:    0x%0*" PRIx64 " (%" PRId64 ")\n",
1347                  d_bits / 4,
1348                  static_cast<uint64_t>(results[d]),
1349                  static_cast<int64_t>(results[d]));
1350           printf("\n");
1351         }
1352       }
1353     }
1354     VIXL_ASSERT(d == expected_length);
1355     if (error_count > kErrorReportLimit) {
1356       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1357     }
1358     VIXL_CHECK(error_count == 0);
1359   }
1360   delete[] results;
1361 }
1362 
1363 
1364 // Test FP instructions.
1365 //  - The inputs[] array should be an array of rawbits representations of
1366 //    doubles or floats. This ensures that exact bit comparisons can be
1367 //    performed.
1368 //  - The expected[] array should be an array of unsigned integers.
1369 template <typename Tn, typename Td>
TestFPToFixedU(const char* name, TestFPToFixedHelper_t helper, const Tn inputs[], unsigned inputs_length, const Td expected[], unsigned expected_length)1370 static void TestFPToFixedU(const char* name,
1371                            TestFPToFixedHelper_t helper,
1372                            const Tn inputs[],
1373                            unsigned inputs_length,
1374                            const Td expected[],
1375                            unsigned expected_length) {
1376   VIXL_ASSERT(inputs_length > 0);
1377 
1378   const unsigned d_bits = sizeof(Td) * 8;
1379   const unsigned n_bits = sizeof(Tn) * 8;
1380 
1381   const unsigned results_length = inputs_length * (d_bits + 1);
1382   Td* results = new Td[results_length];
1383 
1384   bool skipped;
1385 
1386   TestFPToFixed_Helper(helper,
1387                        reinterpret_cast<uintptr_t>(inputs),
1388                        inputs_length,
1389                        reinterpret_cast<uintptr_t>(results),
1390                        d_bits,
1391                        n_bits,
1392                        &skipped);
1393 
1394   if (Test::generate_test_trace()) {
1395     // Print the results.
1396     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
1397     for (unsigned d = 0; d < results_length; d++) {
1398       printf("  %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
1399     }
1400     printf("};\n");
1401     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1402   } else if (!skipped) {
1403     // Check the results.
1404     VIXL_CHECK(expected_length == results_length);
1405     unsigned error_count = 0;
1406     unsigned d = 0;
1407     for (unsigned n = 0; n < inputs_length; n++) {
1408       for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
1409         if (results[d] != expected[d]) {
1410           if (++error_count > kErrorReportLimit) continue;
1411 
1412           printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
1413                  name,
1414                  n_bits / 4,
1415                  static_cast<uint64_t>(inputs[n]),
1416                  fbits,
1417                  name,
1418                  rawbits_to_fp(inputs[n]),
1419                  fbits);
1420           printf("  Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1421                  d_bits / 4,
1422                  static_cast<uint64_t>(expected[d]),
1423                  static_cast<uint64_t>(expected[d]));
1424           printf("  Found:    0x%0*" PRIx64 " (%" PRIu64 ")\n",
1425                  d_bits / 4,
1426                  static_cast<uint64_t>(results[d]),
1427                  static_cast<uint64_t>(results[d]));
1428           printf("\n");
1429         }
1430       }
1431     }
1432     VIXL_ASSERT(d == expected_length);
1433     if (error_count > kErrorReportLimit) {
1434       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1435     }
1436     VIXL_CHECK(error_count == 0);
1437   }
1438   delete[] results;
1439 }
1440 
1441 
1442 // ==== Tests for instructions of the form <INST> VReg, VReg. ====
1443 
1444 
Test1OpNEON_Helper(Test1OpNEONHelper_t helper, uintptr_t inputs_n, unsigned inputs_n_length, uintptr_t results, VectorFormat vd_form, VectorFormat vn_form, bool* skipped)1445 static void Test1OpNEON_Helper(Test1OpNEONHelper_t helper,
1446                                uintptr_t inputs_n,
1447                                unsigned inputs_n_length,
1448                                uintptr_t results,
1449                                VectorFormat vd_form,
1450                                VectorFormat vn_form,
1451                                bool* skipped) {
1452   VIXL_ASSERT(vd_form != kFormatUndefined);
1453   VIXL_ASSERT(vn_form != kFormatUndefined);
1454 
1455   CPUFeatures features;
1456   features.Combine(CPUFeatures::kNEON,
1457                    CPUFeatures::kFP,
1458                    CPUFeatures::kRDM,
1459                    CPUFeatures::kNEONHalf);
1460   // For frint{32,64}{x,y} variants.
1461   features.Combine(CPUFeatures::kFrintToFixedSizedInt);
1462   SETUP_WITH_FEATURES(features);
1463   START();
1464 
1465   // Roll up the loop to keep the code size down.
1466   Label loop_n;
1467 
1468   Register out = x0;
1469   Register inputs_n_base = x1;
1470   Register inputs_n_last_16bytes = x3;
1471   Register index_n = x5;
1472 
1473   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1474   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1475   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1476 
1477   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1478   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1479   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1480   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1481   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1482 
1483 
1484   // These will be either a D- or a Q-register form, with a single lane
1485   // (for use in scalar load and store operations).
1486   VRegister vd = VRegister(0, vd_bits);
1487   VRegister vn = v1.V16B();
1488   VRegister vntmp = v3.V16B();
1489 
1490   // These will have the correct format for use when calling 'helper'.
1491   VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
1492   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1493 
1494   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1495   VRegister vntmp_single = VRegister(3, vn_lane_bits);
1496 
1497   __ Mov(out, results);
1498 
1499   __ Mov(inputs_n_base, inputs_n);
1500   __ Mov(inputs_n_last_16bytes,
1501          inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
1502 
1503   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1504 
1505   __ Mov(index_n, 0);
1506   __ Bind(&loop_n);
1507 
1508   __ Ldr(vntmp_single,
1509          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1510   __ Ext(vn, vn, vntmp, vn_lane_bytes);
1511 
1512   // Set the destination to zero.
1513   // TODO: Setting the destination to values other than zero
1514   //       might be a better test for instructions such as sqxtn2
1515   //       which may leave parts of V registers unchanged.
1516   __ Movi(vd.V16B(), 0);
1517 
1518   {
1519     SingleEmissionCheckScope guard(&masm);
1520     (masm.*helper)(vd_helper, vn_helper);
1521   }
1522   __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
1523 
1524   __ Add(index_n, index_n, 1);
1525   __ Cmp(index_n, inputs_n_length);
1526   __ B(lo, &loop_n);
1527 
1528   END();
1529   TRY_RUN(skipped);
1530 }
1531 
1532 
1533 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1534 // arrays of rawbit representation of input values. This ensures that
1535 // exact bit comparisons can be performed.
1536 template <typename Td, typename Tn>
Test1OpNEON(const char* name, Test1OpNEONHelper_t helper, const Tn inputs_n[], unsigned inputs_n_length, const Td expected[], unsigned expected_length, VectorFormat vd_form, VectorFormat vn_form)1537 static void Test1OpNEON(const char* name,
1538                         Test1OpNEONHelper_t helper,
1539                         const Tn inputs_n[],
1540                         unsigned inputs_n_length,
1541                         const Td expected[],
1542                         unsigned expected_length,
1543                         VectorFormat vd_form,
1544                         VectorFormat vn_form) {
1545   VIXL_ASSERT(inputs_n_length > 0);
1546 
1547   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1548   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1549   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1550 
1551   const unsigned results_length = inputs_n_length;
1552   Td* results = new Td[results_length * vd_lane_count];
1553   const unsigned lane_bit = sizeof(Td) * 8;
1554   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1555 
1556   bool skipped;
1557 
1558   Test1OpNEON_Helper(helper,
1559                      reinterpret_cast<uintptr_t>(inputs_n),
1560                      inputs_n_length,
1561                      reinterpret_cast<uintptr_t>(results),
1562                      vd_form,
1563                      vn_form,
1564                      &skipped);
1565 
1566   if (Test::generate_test_trace()) {
1567     // Print the results.
1568     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1569     for (unsigned iteration = 0; iteration < results_length; iteration++) {
1570       printf(" ");
1571       // Output a separate result for each element of the result vector.
1572       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1573         unsigned index = lane + (iteration * vd_lane_count);
1574         printf(" 0x%0*" PRIx64 ",",
1575                lane_len_in_hex,
1576                static_cast<uint64_t>(results[index]));
1577       }
1578       printf("\n");
1579     }
1580 
1581     printf("};\n");
1582     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1583            name,
1584            results_length);
1585   } else if (!skipped) {
1586     // Check the results.
1587     VIXL_CHECK(expected_length == results_length);
1588     unsigned error_count = 0;
1589     unsigned d = 0;
1590     const char* padding = "                    ";
1591     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1592     for (unsigned n = 0; n < inputs_n_length; n++, d++) {
1593       bool error_in_vector = false;
1594 
1595       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1596         unsigned output_index = (n * vd_lane_count) + lane;
1597 
1598         if (results[output_index] != expected[output_index]) {
1599           error_in_vector = true;
1600           break;
1601         }
1602       }
1603 
1604       if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1605         printf("%s\n", name);
1606         printf(" Vn%.*s| Vd%.*s| Expected\n",
1607                lane_len_in_hex + 1,
1608                padding,
1609                lane_len_in_hex + 1,
1610                padding);
1611 
1612         const unsigned first_index_n =
1613             inputs_n_length - (16 / vn_lane_bytes) + n + 1;
1614 
1615         for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
1616              lane++) {
1617           unsigned output_index = (n * vd_lane_count) + lane;
1618           unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
1619 
1620           printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64
1621                  " "
1622                  "| 0x%0*" PRIx64 "\n",
1623                  results[output_index] != expected[output_index] ? '*' : ' ',
1624                  lane_len_in_hex,
1625                  static_cast<uint64_t>(inputs_n[input_index_n]),
1626                  lane_len_in_hex,
1627                  static_cast<uint64_t>(results[output_index]),
1628                  lane_len_in_hex,
1629                  static_cast<uint64_t>(expected[output_index]));
1630         }
1631       }
1632     }
1633     VIXL_ASSERT(d == expected_length);
1634     if (error_count > kErrorReportLimit) {
1635       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1636     }
1637     VIXL_CHECK(error_count == 0);
1638   }
1639   delete[] results;
1640 }
1641 
1642 
1643 // ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ====
1644 //      where <V> is one of B, H, S or D registers.
1645 //      e.g. saddlv H1, v0.8B
1646 
1647 // TODO: Change tests to store all lanes of the resulting V register.
1648 //       Some tests store all 128 bits of the resulting V register to
1649 //       check the simulator's behaviour on the rest of the register.
1650 //       This is better than storing the affected lanes only.
1651 //       Change any tests such as the 'Across' template to do the same.
1652 
Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper, uintptr_t inputs_n, unsigned inputs_n_length, uintptr_t results, VectorFormat vd_form, VectorFormat vn_form, bool* skipped)1653 static void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper,
1654                                      uintptr_t inputs_n,
1655                                      unsigned inputs_n_length,
1656                                      uintptr_t results,
1657                                      VectorFormat vd_form,
1658                                      VectorFormat vn_form,
1659                                      bool* skipped) {
1660   VIXL_ASSERT(vd_form != kFormatUndefined);
1661   VIXL_ASSERT(vn_form != kFormatUndefined);
1662 
1663   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
1664                       CPUFeatures::kFP,
1665                       CPUFeatures::kNEONHalf);
1666   START();
1667 
1668   // Roll up the loop to keep the code size down.
1669   Label loop_n;
1670 
1671   Register out = x0;
1672   Register inputs_n_base = x1;
1673   Register inputs_n_last_vector = x3;
1674   Register index_n = x5;
1675 
1676   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1677   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1678   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1679   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1680   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1681   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1682   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1683 
1684   // Test destructive operations by (arbitrarily) using the same register for
1685   // B and S lane sizes.
1686   bool destructive = (vd_bits == kBRegSize) || (vd_bits == kSRegSize);
1687 
1688   // Create two aliases for v0; the first is the destination for the tested
1689   // instruction, the second, the whole Q register to check the results.
1690   VRegister vd = VRegister(0, vd_bits);
1691   VRegister vdstr = VRegister(0, kQRegSize);
1692 
1693   VRegister vn = VRegister(1, vn_bits);
1694   VRegister vntmp = VRegister(3, vn_bits);
1695 
1696   // These will have the correct format for use when calling 'helper'.
1697   VRegister vd_helper = VRegister(0, vn_bits, vn_lane_count);
1698   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1699 
1700   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1701   VRegister vntmp_single = VRegister(3, vn_lane_bits);
1702 
1703   // Same registers for use in the 'ext' instructions.
1704   VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
1705   VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
1706 
1707   __ Mov(out, results);
1708 
1709   __ Mov(inputs_n_base, inputs_n);
1710   __ Mov(inputs_n_last_vector,
1711          inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
1712 
1713   __ Ldr(vn, MemOperand(inputs_n_last_vector));
1714 
1715   __ Mov(index_n, 0);
1716   __ Bind(&loop_n);
1717 
1718   __ Ldr(vntmp_single,
1719          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1720   __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
1721 
1722   if (destructive) {
1723     __ Mov(vd_helper, vn_helper);
1724     SingleEmissionCheckScope guard(&masm);
1725     (masm.*helper)(vd, vd_helper);
1726   } else {
1727     SingleEmissionCheckScope guard(&masm);
1728     (masm.*helper)(vd, vn_helper);
1729   }
1730 
1731   __ Str(vdstr, MemOperand(out, kQRegSizeInBytes, PostIndex));
1732 
1733   __ Add(index_n, index_n, 1);
1734   __ Cmp(index_n, inputs_n_length);
1735   __ B(lo, &loop_n);
1736 
1737   END();
1738   TRY_RUN(skipped);
1739 }
1740 
1741 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1742 // arrays of rawbit representation of input values. This ensures that
1743 // exact bit comparisons can be performed.
1744 template <typename Td, typename Tn>
Test1OpAcrossNEON(const char* name, Test1OpNEONHelper_t helper, const Tn inputs_n[], unsigned inputs_n_length, const Td expected[], unsigned expected_length, VectorFormat vd_form, VectorFormat vn_form)1745 static void Test1OpAcrossNEON(const char* name,
1746                               Test1OpNEONHelper_t helper,
1747                               const Tn inputs_n[],
1748                               unsigned inputs_n_length,
1749                               const Td expected[],
1750                               unsigned expected_length,
1751                               VectorFormat vd_form,
1752                               VectorFormat vn_form) {
1753   VIXL_ASSERT(inputs_n_length > 0);
1754 
1755   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1756   const unsigned vd_lanes_per_q = MaxLaneCountFromFormat(vd_form);
1757 
1758   const unsigned results_length = inputs_n_length;
1759   Td* results = new Td[results_length * vd_lanes_per_q];
1760   const unsigned lane_bit = sizeof(Td) * 8;
1761   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1762 
1763   bool skipped;
1764 
1765   Test1OpAcrossNEON_Helper(helper,
1766                            reinterpret_cast<uintptr_t>(inputs_n),
1767                            inputs_n_length,
1768                            reinterpret_cast<uintptr_t>(results),
1769                            vd_form,
1770                            vn_form,
1771                            &skipped);
1772 
1773   if (Test::generate_test_trace()) {
1774     // Print the results.
1775     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1776     for (unsigned iteration = 0; iteration < results_length; iteration++) {
1777       printf(" ");
1778       // Output a separate result for each element of the result vector.
1779       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1780         unsigned index = lane + (iteration * vd_lanes_per_q);
1781         printf(" 0x%0*" PRIx64 ",",
1782                lane_len_in_hex,
1783                static_cast<uint64_t>(results[index]));
1784       }
1785       printf("\n");
1786     }
1787 
1788     printf("};\n");
1789     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1790            name,
1791            results_length);
1792   } else if (!skipped) {
1793     // Check the results.
1794     VIXL_CHECK(expected_length == results_length);
1795     unsigned error_count = 0;
1796     unsigned d = 0;
1797     const char* padding = "                    ";
1798     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1799     for (unsigned n = 0; n < inputs_n_length; n++, d++) {
1800       bool error_in_vector = false;
1801 
1802       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1803         unsigned expected_index = (n * vd_lane_count) + lane;
1804         unsigned results_index = (n * vd_lanes_per_q) + lane;
1805 
1806         if (results[results_index] != expected[expected_index]) {
1807           error_in_vector = true;
1808           break;
1809         }
1810       }
1811 
1812       // For across operations, the remaining lanes should be zero.
1813       for (unsigned lane = vd_lane_count; lane < vd_lanes_per_q; lane++) {
1814         unsigned results_index = (n * vd_lanes_per_q) + lane;
1815         if (results[results_index] != 0) {
1816           error_in_vector = true;
1817           break;
1818         }
1819       }
1820 
1821       if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1822         const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1823 
1824         printf("%s\n", name);
1825         printf(" Vn%.*s| Vd%.*s| Expected\n",
1826                lane_len_in_hex + 1,
1827                padding,
1828                lane_len_in_hex + 1,
1829                padding);
1830 
1831         // TODO: In case of an error, all tests print out as many elements as
1832         //       there are lanes in the output or input vectors. This way
1833         //       the viewer can read all the values that were needed for the
1834         //       operation but the output contains also unnecessary values.
1835         //       These prints can be improved according to the arguments
1836         //       passed to test functions.
1837         //       This output for the 'Across' category has the required
1838         //       modifications.
1839         for (unsigned lane = 0; lane < vn_lane_count; lane++) {
1840           unsigned results_index =
1841               (n * vd_lanes_per_q) + ((vn_lane_count - 1) - lane);
1842           unsigned input_index_n =
1843               (inputs_n_length - vn_lane_count + n + 1 + lane) %
1844               inputs_n_length;
1845 
1846           Td expect = 0;
1847           if ((vn_lane_count - 1) == lane) {
1848             // This is the last lane to be printed, ie. the least-significant
1849             // lane, so use the expected value; any other lane should be zero.
1850             unsigned expected_index = n * vd_lane_count;
1851             expect = expected[expected_index];
1852           }
1853           printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
1854                  results[results_index] != expect ? '*' : ' ',
1855                  lane_len_in_hex,
1856                  static_cast<uint64_t>(inputs_n[input_index_n]),
1857                  lane_len_in_hex,
1858                  static_cast<uint64_t>(results[results_index]),
1859                  lane_len_in_hex,
1860                  static_cast<uint64_t>(expect));
1861         }
1862       }
1863     }
1864     VIXL_ASSERT(d == expected_length);
1865     if (error_count > kErrorReportLimit) {
1866       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1867     }
1868     VIXL_CHECK(error_count == 0);
1869   }
1870   delete[] results;
1871 }
1872 
1873 
1874 // ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ====
1875 
1876 // TODO: Iterate over inputs_d once the traces file is split.
1877 
Test2OpNEON_Helper(Test2OpNEONHelper_t helper, uintptr_t inputs_d, uintptr_t inputs_n, unsigned inputs_n_length, uintptr_t inputs_m, unsigned inputs_m_length, uintptr_t results, VectorFormat vd_form, VectorFormat vn_form, VectorFormat vm_form, bool* skipped)1878 static void Test2OpNEON_Helper(Test2OpNEONHelper_t helper,
1879                                uintptr_t inputs_d,
1880                                uintptr_t inputs_n,
1881                                unsigned inputs_n_length,
1882                                uintptr_t inputs_m,
1883                                unsigned inputs_m_length,
1884                                uintptr_t results,
1885                                VectorFormat vd_form,
1886                                VectorFormat vn_form,
1887                                VectorFormat vm_form,
1888                                bool* skipped) {
1889   VIXL_ASSERT(vd_form != kFormatUndefined);
1890   VIXL_ASSERT(vn_form != kFormatUndefined);
1891   VIXL_ASSERT(vm_form != kFormatUndefined);
1892 
1893   CPUFeatures features;
1894   features.Combine(CPUFeatures::kNEON, CPUFeatures::kNEONHalf);
1895   features.Combine(CPUFeatures::kFP);
1896   features.Combine(CPUFeatures::kRDM);
1897   features.Combine(CPUFeatures::kDotProduct);
1898   features.Combine(CPUFeatures::kFHM);
1899   SETUP_WITH_FEATURES(features);
1900   START();
1901 
1902   // Roll up the loop to keep the code size down.
1903   Label loop_n, loop_m;
1904 
1905   Register out = x0;
1906   Register inputs_n_base = x1;
1907   Register inputs_m_base = x2;
1908   Register inputs_d_base = x3;
1909   Register inputs_n_last_16bytes = x4;
1910   Register inputs_m_last_16bytes = x5;
1911   Register index_n = x6;
1912   Register index_m = x7;
1913 
1914   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1915   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1916   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1917 
1918   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1919   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1920   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1921   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1922   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1923 
1924   const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
1925   const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
1926   const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
1927   const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
1928   const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
1929 
1930 
1931   // Always load and store 128 bits regardless of the format.
1932   VRegister vd = v0.V16B();
1933   VRegister vn = v1.V16B();
1934   VRegister vm = v2.V16B();
1935   VRegister vntmp = v3.V16B();
1936   VRegister vmtmp = v4.V16B();
1937   VRegister vres = v5.V16B();
1938 
1939   // These will have the correct format for calling the 'helper'.
1940   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1941   VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count);
1942   VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
1943 
1944   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1945   VRegister vntmp_single = VRegister(3, vn_lane_bits);
1946   VRegister vmtmp_single = VRegister(4, vm_lane_bits);
1947 
1948   __ Mov(out, results);
1949 
1950   __ Mov(inputs_d_base, inputs_d);
1951 
1952   __ Mov(inputs_n_base, inputs_n);
1953   __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
1954   __ Mov(inputs_m_base, inputs_m);
1955   __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
1956 
1957   __ Ldr(vd, MemOperand(inputs_d_base));
1958   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1959   __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
1960 
1961   __ Mov(index_n, 0);
1962   __ Bind(&loop_n);
1963 
1964   __ Ldr(vntmp_single,
1965          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1966   __ Ext(vn, vn, vntmp, vn_lane_bytes);
1967 
1968   __ Mov(index_m, 0);
1969   __ Bind(&loop_m);
1970 
1971   __ Ldr(vmtmp_single,
1972          MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
1973   __ Ext(vm, vm, vmtmp, vm_lane_bytes);
1974 
1975   __ Mov(vres, vd);
1976   {
1977     SingleEmissionCheckScope guard(&masm);
1978     (masm.*helper)(vres_helper, vn_helper, vm_helper);
1979   }
1980   __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
1981 
1982   __ Add(index_m, index_m, 1);
1983   __ Cmp(index_m, inputs_m_length);
1984   __ B(lo, &loop_m);
1985 
1986   __ Add(index_n, index_n, 1);
1987   __ Cmp(index_n, inputs_n_length);
1988   __ B(lo, &loop_n);
1989 
1990   END();
1991   TRY_RUN(skipped);
1992 }
1993 
1994 
1995 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1996 // arrays of rawbit representation of input values. This ensures that
1997 // exact bit comparisons can be performed.
1998 template <typename Td, typename Tn, typename Tm>
Test2OpNEON(const char* name, Test2OpNEONHelper_t helper, const Td inputs_d[], const Tn inputs_n[], unsigned inputs_n_length, const Tm inputs_m[], unsigned inputs_m_length, const Td expected[], unsigned expected_length, VectorFormat vd_form, VectorFormat vn_form, VectorFormat vm_form)1999 static void Test2OpNEON(const char* name,
2000                         Test2OpNEONHelper_t helper,
2001                         const Td inputs_d[],
2002                         const Tn inputs_n[],
2003                         unsigned inputs_n_length,
2004                         const Tm inputs_m[],
2005                         unsigned inputs_m_length,
2006                         const Td expected[],
2007                         unsigned expected_length,
2008                         VectorFormat vd_form,
2009                         VectorFormat vn_form,
2010                         VectorFormat vm_form) {
2011   VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
2012 
2013   const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
2014 
2015   const unsigned results_length = inputs_n_length * inputs_m_length;
2016   Td* results = new Td[results_length * vd_lane_count];
2017   const unsigned lane_bit = sizeof(Td) * 8;
2018   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
2019 
2020   bool skipped;
2021 
2022   Test2OpNEON_Helper(helper,
2023                      reinterpret_cast<uintptr_t>(inputs_d),
2024                      reinterpret_cast<uintptr_t>(inputs_n),
2025                      inputs_n_length,
2026                      reinterpret_cast<uintptr_t>(inputs_m),
2027                      inputs_m_length,
2028                      reinterpret_cast<uintptr_t>(results),
2029                      vd_form,
2030                      vn_form,
2031                      vm_form,
2032                      &skipped);
2033 
2034   if (Test::generate_test_trace()) {
2035     // Print the results.
2036     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2037     for (unsigned iteration = 0; iteration < results_length; iteration++) {
2038       printf(" ");
2039       // Output a separate result for each element of the result vector.
2040       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2041         unsigned index = lane + (iteration * vd_lane_count);
2042         printf(" 0x%0*" PRIx64 ",",
2043                lane_len_in_hex,
2044                static_cast<uint64_t>(results[index]));
2045       }
2046       printf("\n");
2047     }
2048 
2049     printf("};\n");
2050     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2051            name,
2052            results_length);
2053   } else if (!skipped) {
2054     // Check the results.
2055     VIXL_CHECK(expected_length == results_length);
2056     unsigned error_count = 0;
2057     unsigned d = 0;
2058     const char* padding = "                    ";
2059     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2060     for (unsigned n = 0; n < inputs_n_length; n++) {
2061       for (unsigned m = 0; m < inputs_m_length; m++, d++) {
2062         bool error_in_vector = false;
2063 
2064         for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2065           unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2066                                   (m * vd_lane_count) + lane;
2067 
2068           if (results[output_index] != expected[output_index]) {
2069             error_in_vector = true;
2070             break;
2071           }
2072         }
2073 
2074         if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2075           printf("%s\n", name);
2076           printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n",
2077                  lane_len_in_hex + 1,
2078                  padding,
2079                  lane_len_in_hex + 1,
2080                  padding,
2081                  lane_len_in_hex + 1,
2082                  padding,
2083                  lane_len_in_hex + 1,
2084                  padding);
2085 
2086           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2087             unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2088                                     (m * vd_lane_count) + lane;
2089             unsigned input_index_n =
2090                 (inputs_n_length - vd_lane_count + n + 1 + lane) %
2091                 inputs_n_length;
2092             unsigned input_index_m =
2093                 (inputs_m_length - vd_lane_count + m + 1 + lane) %
2094                 inputs_m_length;
2095 
2096             printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
2097                    " "
2098                    "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2099                    results[output_index] != expected[output_index] ? '*' : ' ',
2100                    lane_len_in_hex,
2101                    static_cast<uint64_t>(inputs_d[lane]),
2102                    lane_len_in_hex,
2103                    static_cast<uint64_t>(inputs_n[input_index_n]),
2104                    lane_len_in_hex,
2105                    static_cast<uint64_t>(inputs_m[input_index_m]),
2106                    lane_len_in_hex,
2107                    static_cast<uint64_t>(results[output_index]),
2108                    lane_len_in_hex,
2109                    static_cast<uint64_t>(expected[output_index]));
2110           }
2111         }
2112       }
2113     }
2114     VIXL_ASSERT(d == expected_length);
2115     if (error_count > kErrorReportLimit) {
2116       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2117     }
2118     VIXL_CHECK(error_count == 0);
2119   }
2120   delete[] results;
2121 }
2122 
2123 
2124 // ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ====
2125 
TestByElementNEON_Helper(TestByElementNEONHelper_t helper, uintptr_t inputs_d, uintptr_t inputs_n, unsigned inputs_n_length, uintptr_t inputs_m, unsigned inputs_m_length, const int indices[], unsigned indices_length, uintptr_t results, VectorFormat vd_form, VectorFormat vn_form, VectorFormat vm_form, unsigned vm_subvector_count, bool* skipped)2126 static void TestByElementNEON_Helper(TestByElementNEONHelper_t helper,
2127                                      uintptr_t inputs_d,
2128                                      uintptr_t inputs_n,
2129                                      unsigned inputs_n_length,
2130                                      uintptr_t inputs_m,
2131                                      unsigned inputs_m_length,
2132                                      const int indices[],
2133                                      unsigned indices_length,
2134                                      uintptr_t results,
2135                                      VectorFormat vd_form,
2136                                      VectorFormat vn_form,
2137                                      VectorFormat vm_form,
2138                                      unsigned vm_subvector_count,
2139                                      bool* skipped) {
2140   VIXL_ASSERT(vd_form != kFormatUndefined);
2141   VIXL_ASSERT(vn_form != kFormatUndefined);
2142   VIXL_ASSERT(vm_form != kFormatUndefined);
2143   VIXL_ASSERT((vm_subvector_count != 0) && IsPowerOf2(vm_subvector_count));
2144 
2145   CPUFeatures features;
2146   features.Combine(CPUFeatures::kNEON, CPUFeatures::kNEONHalf);
2147   features.Combine(CPUFeatures::kFP);
2148   features.Combine(CPUFeatures::kRDM);
2149   features.Combine(CPUFeatures::kDotProduct);
2150   features.Combine(CPUFeatures::kFHM);
2151   SETUP_WITH_FEATURES(features);
2152 
2153   START();
2154 
2155   // Roll up the loop to keep the code size down.
2156   Label loop_n, loop_m;
2157 
2158   Register out = x0;
2159   Register inputs_n_base = x1;
2160   Register inputs_m_base = x2;
2161   Register inputs_d_base = x3;
2162   Register inputs_n_last_16bytes = x4;
2163   Register inputs_m_last_16bytes = x5;
2164   Register index_n = x6;
2165   Register index_m = x7;
2166 
2167   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2168   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2169   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2170 
2171   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2172   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2173   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2174   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2175   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2176 
2177   const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
2178   const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
2179   const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
2180   const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
2181   const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
2182 
2183   VIXL_ASSERT((vm_bits * vm_subvector_count) <= kQRegSize);
2184 
2185   // Always load and store 128 bits regardless of the format.
2186   VRegister vd = v0.V16B();
2187   VRegister vn = v1.V16B();
2188   VRegister vm = v2.V16B();
2189   VRegister vntmp = v3.V16B();
2190   VRegister vmtmp = v4.V16B();
2191   VRegister vres = v5.V16B();
2192 
2193   // These will have the correct format for calling the 'helper'.
2194   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2195   VRegister vm_helper =
2196       VRegister(2, vm_bits * vm_subvector_count, vm_lane_count);
2197   VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
2198 
2199   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2200   VRegister vntmp_single = VRegister(3, vn_lane_bits);
2201   VRegister vmtmp_single = VRegister(4, vm_lane_bits);
2202 
2203   __ Mov(out, results);
2204 
2205   __ Mov(inputs_d_base, inputs_d);
2206 
2207   __ Mov(inputs_n_base, inputs_n);
2208   __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
2209   __ Mov(inputs_m_base, inputs_m);
2210   __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
2211 
2212   __ Ldr(vd, MemOperand(inputs_d_base));
2213   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
2214   __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
2215 
2216   __ Mov(index_n, 0);
2217   __ Bind(&loop_n);
2218 
2219   __ Ldr(vntmp_single,
2220          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2221   __ Ext(vn, vn, vntmp, vn_lane_bytes);
2222 
2223   __ Mov(index_m, 0);
2224   __ Bind(&loop_m);
2225 
2226   __ Ldr(vmtmp_single,
2227          MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
2228   __ Ext(vm, vm, vmtmp, vm_lane_bytes);
2229 
2230   __ Mov(vres, vd);
2231   {
2232     for (unsigned i = 0; i < indices_length; i++) {
2233       {
2234         SingleEmissionCheckScope guard(&masm);
2235         (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]);
2236       }
2237       __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2238     }
2239   }
2240 
2241   __ Add(index_m, index_m, 1);
2242   __ Cmp(index_m, inputs_m_length);
2243   __ B(lo, &loop_m);
2244 
2245   __ Add(index_n, index_n, 1);
2246   __ Cmp(index_n, inputs_n_length);
2247   __ B(lo, &loop_n);
2248 
2249   END();
2250   TRY_RUN(skipped);
2251 }
2252 
2253 
2254 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2255 // arrays of rawbit representation of input values. This ensures that
2256 // exact bit comparisons can be performed.
2257 template <typename Td, typename Tn, typename Tm>
TestByElementNEON(const char* name, TestByElementNEONHelper_t helper, const Td inputs_d[], const Tn inputs_n[], unsigned inputs_n_length, const Tm inputs_m[], unsigned inputs_m_length, const int indices[], unsigned indices_length, const Td expected[], unsigned expected_length, VectorFormat vd_form, VectorFormat vn_form, VectorFormat vm_form, unsigned vm_subvector_count = 1)2258 static void TestByElementNEON(const char* name,
2259                               TestByElementNEONHelper_t helper,
2260                               const Td inputs_d[],
2261                               const Tn inputs_n[],
2262                               unsigned inputs_n_length,
2263                               const Tm inputs_m[],
2264                               unsigned inputs_m_length,
2265                               const int indices[],
2266                               unsigned indices_length,
2267                               const Td expected[],
2268                               unsigned expected_length,
2269                               VectorFormat vd_form,
2270                               VectorFormat vn_form,
2271                               VectorFormat vm_form,
2272                               unsigned vm_subvector_count = 1) {
2273   VIXL_ASSERT(inputs_n_length > 0);
2274   VIXL_ASSERT(inputs_m_length > 0);
2275   VIXL_ASSERT(indices_length > 0);
2276 
2277   const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
2278 
2279   const unsigned results_length =
2280       inputs_n_length * inputs_m_length * indices_length;
2281   Td* results = new Td[results_length * vd_lane_count];
2282   const unsigned lane_bit = sizeof(Td) * 8;
2283   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
2284 
2285   bool skipped;
2286 
2287   TestByElementNEON_Helper(helper,
2288                            reinterpret_cast<uintptr_t>(inputs_d),
2289                            reinterpret_cast<uintptr_t>(inputs_n),
2290                            inputs_n_length,
2291                            reinterpret_cast<uintptr_t>(inputs_m),
2292                            inputs_m_length,
2293                            indices,
2294                            indices_length,
2295                            reinterpret_cast<uintptr_t>(results),
2296                            vd_form,
2297                            vn_form,
2298                            vm_form,
2299                            vm_subvector_count,
2300                            &skipped);
2301 
2302   if (Test::generate_test_trace()) {
2303     // Print the results.
2304     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2305     for (unsigned iteration = 0; iteration < results_length; iteration++) {
2306       printf(" ");
2307       // Output a separate result for each element of the result vector.
2308       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2309         unsigned index = lane + (iteration * vd_lane_count);
2310         printf(" 0x%0*" PRIx64 ",",
2311                lane_len_in_hex,
2312                static_cast<uint64_t>(results[index]));
2313       }
2314       printf("\n");
2315     }
2316 
2317     printf("};\n");
2318     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2319            name,
2320            results_length);
2321   } else if (!skipped) {
2322     // Check the results.
2323     VIXL_CHECK(expected_length == results_length);
2324     unsigned error_count = 0;
2325     unsigned d = 0;
2326     const char* padding = "                    ";
2327     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2328     for (unsigned n = 0; n < inputs_n_length; n++) {
2329       for (unsigned m = 0; m < inputs_m_length; m++) {
2330         for (unsigned index = 0; index < indices_length; index++, d++) {
2331           bool error_in_vector = false;
2332 
2333           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2334             unsigned output_index =
2335                 (n * inputs_m_length * indices_length * vd_lane_count) +
2336                 (m * indices_length * vd_lane_count) + (index * vd_lane_count) +
2337                 lane;
2338 
2339             if (results[output_index] != expected[output_index]) {
2340               error_in_vector = true;
2341               break;
2342             }
2343           }
2344 
2345           if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2346             printf("%s\n", name);
2347             printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n",
2348                    lane_len_in_hex + 1,
2349                    padding,
2350                    lane_len_in_hex + 1,
2351                    padding,
2352                    lane_len_in_hex + 1,
2353                    padding,
2354                    lane_len_in_hex + 1,
2355                    padding);
2356 
2357             for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2358               unsigned output_index =
2359                   (n * inputs_m_length * indices_length * vd_lane_count) +
2360                   (m * indices_length * vd_lane_count) +
2361                   (index * vd_lane_count) + lane;
2362               unsigned input_index_n =
2363                   (inputs_n_length - vd_lane_count + n + 1 + lane) %
2364                   inputs_n_length;
2365               unsigned input_index_m =
2366                   (inputs_m_length - vd_lane_count + m + 1 + lane) %
2367                   inputs_m_length;
2368 
2369               printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
2370                      " "
2371                      "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2372                      results[output_index] != expected[output_index] ? '*'
2373                                                                      : ' ',
2374                      lane_len_in_hex,
2375                      static_cast<uint64_t>(inputs_d[lane]),
2376                      lane_len_in_hex,
2377                      static_cast<uint64_t>(inputs_n[input_index_n]),
2378                      lane_len_in_hex,
2379                      static_cast<uint64_t>(inputs_m[input_index_m]),
2380                      indices[index],
2381                      lane_len_in_hex,
2382                      static_cast<uint64_t>(results[output_index]),
2383                      lane_len_in_hex,
2384                      static_cast<uint64_t>(expected[output_index]));
2385             }
2386           }
2387         }
2388       }
2389     }
2390     VIXL_ASSERT(d == expected_length);
2391     if (error_count > kErrorReportLimit) {
2392       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2393     }
2394     VIXL_CHECK(error_count == 0);
2395   }
2396   delete[] results;
2397 }
2398 
2399 
2400 // ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ====
2401 
2402 
2403 template <typename Tm>
Test2OpImmNEON_Helper( typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper, uintptr_t inputs_n, unsigned inputs_n_length, const Tm inputs_m[], unsigned inputs_m_length, uintptr_t results, VectorFormat vd_form, VectorFormat vn_form, bool* skipped)2404 void Test2OpImmNEON_Helper(
2405     typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
2406     uintptr_t inputs_n,
2407     unsigned inputs_n_length,
2408     const Tm inputs_m[],
2409     unsigned inputs_m_length,
2410     uintptr_t results,
2411     VectorFormat vd_form,
2412     VectorFormat vn_form,
2413     bool* skipped) {
2414   VIXL_ASSERT(vd_form != kFormatUndefined && vn_form != kFormatUndefined);
2415 
2416   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
2417                       CPUFeatures::kFP,
2418                       CPUFeatures::kNEONHalf);
2419   START();
2420 
2421   // Roll up the loop to keep the code size down.
2422   Label loop_n;
2423 
2424   Register out = x0;
2425   Register inputs_n_base = x1;
2426   Register inputs_n_last_16bytes = x3;
2427   Register index_n = x5;
2428 
2429   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2430   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2431   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2432 
2433   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2434   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2435   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2436   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2437   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2438 
2439 
2440   // These will be either a D- or a Q-register form, with a single lane
2441   // (for use in scalar load and store operations).
2442   VRegister vd = VRegister(0, vd_bits);
2443   VRegister vn = v1.V16B();
2444   VRegister vntmp = v3.V16B();
2445 
2446   // These will have the correct format for use when calling 'helper'.
2447   VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
2448   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2449 
2450   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2451   VRegister vntmp_single = VRegister(3, vn_lane_bits);
2452 
2453   __ Mov(out, results);
2454 
2455   __ Mov(inputs_n_base, inputs_n);
2456   __ Mov(inputs_n_last_16bytes,
2457          inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
2458 
2459   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
2460 
2461   __ Mov(index_n, 0);
2462   __ Bind(&loop_n);
2463 
2464   __ Ldr(vntmp_single,
2465          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2466   __ Ext(vn, vn, vntmp, vn_lane_bytes);
2467 
2468   // Set the destination to zero for tests such as '[r]shrn2'.
2469   // TODO: Setting the destination to values other than zero might be a better
2470   //       test for shift and accumulate instructions (srsra/ssra/usra/ursra).
2471   __ Movi(vd.V16B(), 0);
2472 
2473   {
2474     for (unsigned i = 0; i < inputs_m_length; i++) {
2475       {
2476         SingleEmissionCheckScope guard(&masm);
2477         (masm.*helper)(vd_helper, vn_helper, inputs_m[i]);
2478       }
2479       __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2480     }
2481   }
2482 
2483   __ Add(index_n, index_n, 1);
2484   __ Cmp(index_n, inputs_n_length);
2485   __ B(lo, &loop_n);
2486 
2487   END();
2488   TRY_RUN(skipped);
2489 }
2490 
2491 
2492 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2493 // arrays of rawbit representation of input values. This ensures that
2494 // exact bit comparisons can be performed.
2495 template <typename Td, typename Tn, typename Tm>
Test2OpImmNEON( const char* name, typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper, const Tn inputs_n[], unsigned inputs_n_length, const Tm inputs_m[], unsigned inputs_m_length, const Td expected[], unsigned expected_length, VectorFormat vd_form, VectorFormat vn_form)2496 static void Test2OpImmNEON(
2497     const char* name,
2498     typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
2499     const Tn inputs_n[],
2500     unsigned inputs_n_length,
2501     const Tm inputs_m[],
2502     unsigned inputs_m_length,
2503     const Td expected[],
2504     unsigned expected_length,
2505     VectorFormat vd_form,
2506     VectorFormat vn_form) {
2507   VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
2508 
2509   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2510   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2511   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2512 
2513   const unsigned results_length = inputs_n_length * inputs_m_length;
2514   Td* results = new Td[results_length * vd_lane_count];
2515   const unsigned lane_bit = sizeof(Td) * 8;
2516   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
2517 
2518   bool skipped;
2519 
2520   Test2OpImmNEON_Helper(helper,
2521                         reinterpret_cast<uintptr_t>(inputs_n),
2522                         inputs_n_length,
2523                         inputs_m,
2524                         inputs_m_length,
2525                         reinterpret_cast<uintptr_t>(results),
2526                         vd_form,
2527                         vn_form,
2528                         &skipped);
2529 
2530   if (Test::generate_test_trace()) {
2531     // Print the results.
2532     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2533     for (unsigned iteration = 0; iteration < results_length; iteration++) {
2534       printf(" ");
2535       // Output a separate result for each element of the result vector.
2536       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2537         unsigned index = lane + (iteration * vd_lane_count);
2538         printf(" 0x%0*" PRIx64 ",",
2539                lane_len_in_hex,
2540                static_cast<uint64_t>(results[index]));
2541       }
2542       printf("\n");
2543     }
2544 
2545     printf("};\n");
2546     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2547            name,
2548            results_length);
2549   } else if (!skipped) {
2550     // Check the results.
2551     VIXL_CHECK(expected_length == results_length);
2552     unsigned error_count = 0;
2553     unsigned d = 0;
2554     const char* padding = "                    ";
2555     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2556     for (unsigned n = 0; n < inputs_n_length; n++) {
2557       for (unsigned m = 0; m < inputs_m_length; m++, d++) {
2558         bool error_in_vector = false;
2559 
2560         for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2561           unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2562                                   (m * vd_lane_count) + lane;
2563 
2564           if (results[output_index] != expected[output_index]) {
2565             error_in_vector = true;
2566             break;
2567           }
2568         }
2569 
2570         if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2571           printf("%s\n", name);
2572           printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
2573                  lane_len_in_hex + 1,
2574                  padding,
2575                  lane_len_in_hex,
2576                  padding,
2577                  lane_len_in_hex + 1,
2578                  padding);
2579 
2580           const unsigned first_index_n =
2581               inputs_n_length - (16 / vn_lane_bytes) + n + 1;
2582 
2583           for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
2584                lane++) {
2585             unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2586                                     (m * vd_lane_count) + lane;
2587             unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
2588             unsigned input_index_m = m;
2589 
2590             printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64
2591                    " "
2592                    "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2593                    results[output_index] != expected[output_index] ? '*' : ' ',
2594                    lane_len_in_hex,
2595                    static_cast<uint64_t>(inputs_n[input_index_n]),
2596                    lane_len_in_hex,
2597                    static_cast<uint64_t>(inputs_m[input_index_m]),
2598                    lane_len_in_hex,
2599                    static_cast<uint64_t>(results[output_index]),
2600                    lane_len_in_hex,
2601                    static_cast<uint64_t>(expected[output_index]));
2602           }
2603         }
2604       }
2605     }
2606     VIXL_ASSERT(d == expected_length);
2607     if (error_count > kErrorReportLimit) {
2608       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2609     }
2610     VIXL_CHECK(error_count == 0);
2611   }
2612   delete[] results;
2613 }
2614 
2615 
2616 // ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ====
2617 
2618 
TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper, uintptr_t inputs_d, const int inputs_imm1[], unsigned inputs_imm1_length, uintptr_t inputs_n, unsigned inputs_n_length, const int inputs_imm2[], unsigned inputs_imm2_length, uintptr_t results, VectorFormat vd_form, VectorFormat vn_form, bool* skipped)2619 static void TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper,
2620                                       uintptr_t inputs_d,
2621                                       const int inputs_imm1[],
2622                                       unsigned inputs_imm1_length,
2623                                       uintptr_t inputs_n,
2624                                       unsigned inputs_n_length,
2625                                       const int inputs_imm2[],
2626                                       unsigned inputs_imm2_length,
2627                                       uintptr_t results,
2628                                       VectorFormat vd_form,
2629                                       VectorFormat vn_form,
2630                                       bool* skipped) {
2631   VIXL_ASSERT(vd_form != kFormatUndefined);
2632   VIXL_ASSERT(vn_form != kFormatUndefined);
2633 
2634   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
2635   START();
2636 
2637   // Roll up the loop to keep the code size down.
2638   Label loop_n;
2639 
2640   Register out = x0;
2641   Register inputs_d_base = x1;
2642   Register inputs_n_base = x2;
2643   Register inputs_n_last_vector = x4;
2644   Register index_n = x6;
2645 
2646   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2647   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2648   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2649 
2650   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2651   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2652   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2653   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2654   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2655 
2656 
2657   // These will be either a D- or a Q-register form, with a single lane
2658   // (for use in scalar load and store operations).
2659   VRegister vd = VRegister(0, vd_bits);
2660   VRegister vn = VRegister(1, vn_bits);
2661   VRegister vntmp = VRegister(4, vn_bits);
2662   VRegister vres = VRegister(5, vn_bits);
2663 
2664   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2665   VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
2666 
2667   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2668   VRegister vntmp_single = VRegister(4, vn_lane_bits);
2669 
2670   // Same registers for use in the 'ext' instructions.
2671   VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
2672   VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
2673 
2674   __ Mov(out, results);
2675 
2676   __ Mov(inputs_d_base, inputs_d);
2677 
2678   __ Mov(inputs_n_base, inputs_n);
2679   __ Mov(inputs_n_last_vector,
2680          inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
2681 
2682   __ Ldr(vd, MemOperand(inputs_d_base));
2683 
2684   __ Ldr(vn, MemOperand(inputs_n_last_vector));
2685 
2686   __ Mov(index_n, 0);
2687   __ Bind(&loop_n);
2688 
2689   __ Ldr(vntmp_single,
2690          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2691   __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
2692 
2693   {
2694     EmissionCheckScope guard(&masm,
2695                              kInstructionSize * inputs_imm1_length *
2696                                  inputs_imm2_length * 3);
2697     for (unsigned i = 0; i < inputs_imm1_length; i++) {
2698       for (unsigned j = 0; j < inputs_imm2_length; j++) {
2699         __ Mov(vres, vd);
2700         (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]);
2701         __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2702       }
2703     }
2704   }
2705 
2706   __ Add(index_n, index_n, 1);
2707   __ Cmp(index_n, inputs_n_length);
2708   __ B(lo, &loop_n);
2709 
2710   END();
2711   TRY_RUN(skipped);
2712 }
2713 
2714 
2715 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2716 // arrays of rawbit representation of input values. This ensures that
2717 // exact bit comparisons can be performed.
2718 template <typename Td, typename Tn>
TestOpImmOpImmNEON(const char* name, TestOpImmOpImmVdUpdateNEONHelper_t helper, const Td inputs_d[], const int inputs_imm1[], unsigned inputs_imm1_length, const Tn inputs_n[], unsigned inputs_n_length, const int inputs_imm2[], unsigned inputs_imm2_length, const Td expected[], unsigned expected_length, VectorFormat vd_form, VectorFormat vn_form)2719 static void TestOpImmOpImmNEON(const char* name,
2720                                TestOpImmOpImmVdUpdateNEONHelper_t helper,
2721                                const Td inputs_d[],
2722                                const int inputs_imm1[],
2723                                unsigned inputs_imm1_length,
2724                                const Tn inputs_n[],
2725                                unsigned inputs_n_length,
2726                                const int inputs_imm2[],
2727                                unsigned inputs_imm2_length,
2728                                const Td expected[],
2729                                unsigned expected_length,
2730                                VectorFormat vd_form,
2731                                VectorFormat vn_form) {
2732   VIXL_ASSERT(inputs_n_length > 0);
2733   VIXL_ASSERT(inputs_imm1_length > 0);
2734   VIXL_ASSERT(inputs_imm2_length > 0);
2735 
2736   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2737 
2738   const unsigned results_length =
2739       inputs_n_length * inputs_imm1_length * inputs_imm2_length;
2740 
2741   Td* results = new Td[results_length * vd_lane_count];
2742   const unsigned lane_bit = sizeof(Td) * 8;
2743   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
2744 
2745   bool skipped;
2746 
2747   TestOpImmOpImmNEON_Helper(helper,
2748                             reinterpret_cast<uintptr_t>(inputs_d),
2749                             inputs_imm1,
2750                             inputs_imm1_length,
2751                             reinterpret_cast<uintptr_t>(inputs_n),
2752                             inputs_n_length,
2753                             inputs_imm2,
2754                             inputs_imm2_length,
2755                             reinterpret_cast<uintptr_t>(results),
2756                             vd_form,
2757                             vn_form,
2758                             &skipped);
2759 
2760   if (Test::generate_test_trace()) {
2761     // Print the results.
2762     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2763     for (unsigned iteration = 0; iteration < results_length; iteration++) {
2764       printf(" ");
2765       // Output a separate result for each element of the result vector.
2766       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2767         unsigned index = lane + (iteration * vd_lane_count);
2768         printf(" 0x%0*" PRIx64 ",",
2769                lane_len_in_hex,
2770                static_cast<uint64_t>(results[index]));
2771       }
2772       printf("\n");
2773     }
2774 
2775     printf("};\n");
2776     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2777            name,
2778            results_length);
2779   } else if (!skipped) {
2780     // Check the results.
2781     VIXL_CHECK(expected_length == results_length);
2782     unsigned error_count = 0;
2783     unsigned counted_length = 0;
2784     const char* padding = "                    ";
2785     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2786     for (unsigned n = 0; n < inputs_n_length; n++) {
2787       for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) {
2788         for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) {
2789           bool error_in_vector = false;
2790 
2791           counted_length++;
2792 
2793           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2794             unsigned output_index =
2795                 (n * inputs_imm1_length * inputs_imm2_length * vd_lane_count) +
2796                 (imm1 * inputs_imm2_length * vd_lane_count) +
2797                 (imm2 * vd_lane_count) + lane;
2798 
2799             if (results[output_index] != expected[output_index]) {
2800               error_in_vector = true;
2801               break;
2802             }
2803           }
2804 
2805           if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2806             printf("%s\n", name);
2807             printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
2808                    lane_len_in_hex + 1,
2809                    padding,
2810                    lane_len_in_hex,
2811                    padding,
2812                    lane_len_in_hex + 1,
2813                    padding,
2814                    lane_len_in_hex,
2815                    padding,
2816                    lane_len_in_hex + 1,
2817                    padding);
2818 
2819             for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2820               unsigned output_index =
2821                   (n * inputs_imm1_length * inputs_imm2_length *
2822                    vd_lane_count) +
2823                   (imm1 * inputs_imm2_length * vd_lane_count) +
2824                   (imm2 * vd_lane_count) + lane;
2825               unsigned input_index_n =
2826                   (inputs_n_length - vd_lane_count + n + 1 + lane) %
2827                   inputs_n_length;
2828               unsigned input_index_imm1 = imm1;
2829               unsigned input_index_imm2 = imm2;
2830 
2831               printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
2832                      " "
2833                      "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2834                      results[output_index] != expected[output_index] ? '*'
2835                                                                      : ' ',
2836                      lane_len_in_hex,
2837                      static_cast<uint64_t>(inputs_d[lane]),
2838                      lane_len_in_hex,
2839                      static_cast<uint64_t>(inputs_imm1[input_index_imm1]),
2840                      lane_len_in_hex,
2841                      static_cast<uint64_t>(inputs_n[input_index_n]),
2842                      lane_len_in_hex,
2843                      static_cast<uint64_t>(inputs_imm2[input_index_imm2]),
2844                      lane_len_in_hex,
2845                      static_cast<uint64_t>(results[output_index]),
2846                      lane_len_in_hex,
2847                      static_cast<uint64_t>(expected[output_index]));
2848             }
2849           }
2850         }
2851       }
2852     }
2853     VIXL_ASSERT(counted_length == expected_length);
2854     if (error_count > kErrorReportLimit) {
2855       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2856     }
2857     VIXL_CHECK(error_count == 0);
2858   }
2859   delete[] results;
2860 }
2861 
2862 
2863 // ==== Floating-point tests. ====
2864 
2865 
2866 // Standard floating-point test expansion for both double- and single-precision
2867 // operations.
2868 #define STRINGIFY(s) #s
2869 
2870 #define CALL_TEST_FP_HELPER(mnemonic, variant, type, input) \
2871   Test##type(STRINGIFY(mnemonic) "_" STRINGIFY(variant),    \
2872              &MacroAssembler::mnemonic,                     \
2873              input,                                         \
2874              sizeof(input) / sizeof(input[0]),              \
2875              kExpected_##mnemonic##_##variant,              \
2876              kExpectedCount_##mnemonic##_##variant)
2877 
2878 #define DEFINE_TEST_FP(mnemonic, type, input)                    \
2879   TEST(mnemonic##_d) {                                           \
2880     CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input); \
2881   }                                                              \
2882   TEST(mnemonic##_s) {                                           \
2883     CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input);  \
2884   }
2885 
2886 #define DEFINE_TEST_FP_FP16(mnemonic, type, input)                \
2887   TEST(mnemonic##_d) {                                            \
2888     CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input);  \
2889   }                                                               \
2890   TEST(mnemonic##_s) {                                            \
2891     CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input);   \
2892   }                                                               \
2893   TEST(mnemonic##_h) {                                            \
2894     CALL_TEST_FP_HELPER(mnemonic, h, type, kInputFloat16##input); \
2895   }
2896 
2897 
2898 // TODO: Test with a newer version of valgrind.
2899 //
2900 // Note: valgrind-3.10.0 does not properly interpret libm's fma() on x86_64.
2901 // Therefore this test will be exiting though an ASSERT and thus leaking
2902 // memory.
2903 DEFINE_TEST_FP_FP16(fmadd, 3Op, Basic)
2904 DEFINE_TEST_FP_FP16(fmsub, 3Op, Basic)
2905 DEFINE_TEST_FP_FP16(fnmadd, 3Op, Basic)
2906 DEFINE_TEST_FP_FP16(fnmsub, 3Op, Basic)
2907 
2908 DEFINE_TEST_FP_FP16(fadd, 2Op, Basic)
2909 DEFINE_TEST_FP_FP16(fdiv, 2Op, Basic)
2910 DEFINE_TEST_FP_FP16(fmax, 2Op, Basic)
2911 DEFINE_TEST_FP_FP16(fmaxnm, 2Op, Basic)
2912 DEFINE_TEST_FP_FP16(fmin, 2Op, Basic)
2913 DEFINE_TEST_FP_FP16(fminnm, 2Op, Basic)
2914 DEFINE_TEST_FP_FP16(fmul, 2Op, Basic)
2915 DEFINE_TEST_FP_FP16(fsub, 2Op, Basic)
2916 DEFINE_TEST_FP_FP16(fnmul, 2Op, Basic)
2917 
2918 DEFINE_TEST_FP_FP16(fabs, 1Op, Basic)
2919 DEFINE_TEST_FP_FP16(fmov, 1Op, Basic)
2920 DEFINE_TEST_FP_FP16(fneg, 1Op, Basic)
2921 DEFINE_TEST_FP_FP16(fsqrt, 1Op, Basic)
2922 DEFINE_TEST_FP(frint32x, 1Op, Conversions)
2923 DEFINE_TEST_FP(frint64x, 1Op, Conversions)
2924 DEFINE_TEST_FP(frint32z, 1Op, Conversions)
2925 DEFINE_TEST_FP(frint64z, 1Op, Conversions)
2926 DEFINE_TEST_FP_FP16(frinta, 1Op, Conversions)
2927 DEFINE_TEST_FP_FP16(frinti, 1Op, Conversions)
2928 DEFINE_TEST_FP_FP16(frintm, 1Op, Conversions)
2929 DEFINE_TEST_FP_FP16(frintn, 1Op, Conversions)
2930 DEFINE_TEST_FP_FP16(frintp, 1Op, Conversions)
2931 DEFINE_TEST_FP_FP16(frintx, 1Op, Conversions)
2932 DEFINE_TEST_FP_FP16(frintz, 1Op, Conversions)
2933 
TEST(fcmp_d)2934 TEST(fcmp_d) { CALL_TEST_FP_HELPER(fcmp, d, Cmp, kInputDoubleBasic); }
TEST(fcmp_s)2935 TEST(fcmp_s) { CALL_TEST_FP_HELPER(fcmp, s, Cmp, kInputFloatBasic); }
TEST(fcmp_dz)2936 TEST(fcmp_dz) { CALL_TEST_FP_HELPER(fcmp, dz, CmpZero, kInputDoubleBasic); }
TEST(fcmp_sz)2937 TEST(fcmp_sz) { CALL_TEST_FP_HELPER(fcmp, sz, CmpZero, kInputFloatBasic); }
2938 
TEST(fcvt_sd)2939 TEST(fcvt_sd) { CALL_TEST_FP_HELPER(fcvt, sd, 1Op, kInputDoubleConversions); }
TEST(fcvt_ds)2940 TEST(fcvt_ds) { CALL_TEST_FP_HELPER(fcvt, ds, 1Op, kInputFloatConversions); }
2941 
2942 #define DEFINE_TEST_FP_TO_INT(mnemonic, type, input)               \
2943   TEST(mnemonic##_xd) {                                            \
2944     CALL_TEST_FP_HELPER(mnemonic, xd, type, kInputDouble##input);  \
2945   }                                                                \
2946   TEST(mnemonic##_xs) {                                            \
2947     CALL_TEST_FP_HELPER(mnemonic, xs, type, kInputFloat##input);   \
2948   }                                                                \
2949   TEST(mnemonic##_xh) {                                            \
2950     CALL_TEST_FP_HELPER(mnemonic, xh, type, kInputFloat16##input); \
2951   }                                                                \
2952   TEST(mnemonic##_wd) {                                            \
2953     CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input);  \
2954   }                                                                \
2955   TEST(mnemonic##_ws) {                                            \
2956     CALL_TEST_FP_HELPER(mnemonic, ws, type, kInputFloat##input);   \
2957   }                                                                \
2958   TEST(mnemonic##_wh) {                                            \
2959     CALL_TEST_FP_HELPER(mnemonic, wh, type, kInputFloat16##input); \
2960   }
2961 
2962 DEFINE_TEST_FP_TO_INT(fcvtas, FPToS, Conversions)
2963 DEFINE_TEST_FP_TO_INT(fcvtau, FPToU, Conversions)
2964 DEFINE_TEST_FP_TO_INT(fcvtms, FPToS, Conversions)
2965 DEFINE_TEST_FP_TO_INT(fcvtmu, FPToU, Conversions)
2966 DEFINE_TEST_FP_TO_INT(fcvtns, FPToS, Conversions)
2967 DEFINE_TEST_FP_TO_INT(fcvtnu, FPToU, Conversions)
2968 DEFINE_TEST_FP_TO_INT(fcvtzs, FPToFixedS, Conversions)
2969 DEFINE_TEST_FP_TO_INT(fcvtzu, FPToFixedU, Conversions)
2970 
2971 #define DEFINE_TEST_FP_TO_JS_INT(mnemonic, type, input)           \
2972   TEST(mnemonic##_wd) {                                           \
2973     CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \
2974   }
2975 
2976 DEFINE_TEST_FP_TO_JS_INT(fjcvtzs, FPToS, Conversions)
2977 
2978 // TODO: Scvtf-fixed-point
2979 // TODO: Scvtf-integer
2980 // TODO: Ucvtf-fixed-point
2981 // TODO: Ucvtf-integer
2982 
2983 // TODO: Fccmp
2984 // TODO: Fcsel
2985 
2986 
2987 // ==== NEON Tests. ====
2988 
2989 #define CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) \
2990   Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),             \
2991               &MacroAssembler::mnemonic,                             \
2992               input_n,                                               \
2993               (sizeof(input_n) / sizeof(input_n[0])),                \
2994               kExpected_NEON_##mnemonic##_##vdform,                  \
2995               kExpectedCount_NEON_##mnemonic##_##vdform,             \
2996               kFormat##vdform,                                       \
2997               kFormat##vnform)
2998 
2999 #define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vdform, vnform, input_n)   \
3000   Test1OpAcrossNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \
3001                         vnform),                                             \
3002                     &MacroAssembler::mnemonic,                               \
3003                     input_n,                                                 \
3004                     (sizeof(input_n) / sizeof(input_n[0])),                  \
3005                     kExpected_NEON_##mnemonic##_##vdform##_##vnform,         \
3006                     kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform,    \
3007                     kFormat##vdform,                                         \
3008                     kFormat##vnform)
3009 
3010 #define CALL_TEST_NEON_HELPER_2Op(mnemonic,              \
3011                                   vdform,                \
3012                                   vnform,                \
3013                                   vmform,                \
3014                                   input_d,               \
3015                                   input_n,               \
3016                                   input_m)               \
3017   Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \
3018               &MacroAssembler::mnemonic,                 \
3019               input_d,                                   \
3020               input_n,                                   \
3021               (sizeof(input_n) / sizeof(input_n[0])),    \
3022               input_m,                                   \
3023               (sizeof(input_m) / sizeof(input_m[0])),    \
3024               kExpected_NEON_##mnemonic##_##vdform,      \
3025               kExpectedCount_NEON_##mnemonic##_##vdform, \
3026               kFormat##vdform,                           \
3027               kFormat##vnform,                           \
3028               kFormat##vmform)
3029 
3030 #define CALL_TEST_NEON_HELPER_2OpImm(mnemonic,                        \
3031                                      vdform,                          \
3032                                      vnform,                          \
3033                                      input_n,                         \
3034                                      input_m)                         \
3035   Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM", \
3036                  &MacroAssembler::mnemonic,                           \
3037                  input_n,                                             \
3038                  (sizeof(input_n) / sizeof(input_n[0])),              \
3039                  input_m,                                             \
3040                  (sizeof(input_m) / sizeof(input_m[0])),              \
3041                  kExpected_NEON_##mnemonic##_##vdform##_2OPIMM,       \
3042                  kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM,  \
3043                  kFormat##vdform,                                     \
3044                  kFormat##vnform)
3045 
3046 #define CALL_TEST_NEON_HELPER_ByElement(mnemonic,                      \
3047                                         vdform,                        \
3048                                         vnform,                        \
3049                                         vmform,                        \
3050                                         input_d,                       \
3051                                         input_n,                       \
3052                                         input_m,                       \
3053                                         indices)                       \
3054   TestByElementNEON(                                                   \
3055       STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY(         \
3056           vnform) "_" STRINGIFY(vmform),                               \
3057       &MacroAssembler::mnemonic,                                       \
3058       input_d,                                                         \
3059       input_n,                                                         \
3060       (sizeof(input_n) / sizeof(input_n[0])),                          \
3061       input_m,                                                         \
3062       (sizeof(input_m) / sizeof(input_m[0])),                          \
3063       indices,                                                         \
3064       (sizeof(indices) / sizeof(indices[0])),                          \
3065       kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,      \
3066       kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \
3067       kFormat##vdform,                                                 \
3068       kFormat##vnform,                                                 \
3069       kFormat##vmform)
3070 
3071 #define CALL_TEST_NEON_HELPER_ByElement_Dot_Product(mnemonic,           \
3072                                                     vdform,             \
3073                                                     vnform,             \
3074                                                     vmform,             \
3075                                                     input_d,            \
3076                                                     input_n,            \
3077                                                     input_m,            \
3078                                                     indices,            \
3079                                                     vm_subvector_count) \
3080   TestByElementNEON(                                                    \
3081       STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY(          \
3082           vnform) "_" STRINGIFY(vmform),                                \
3083       &MacroAssembler::mnemonic,                                        \
3084       input_d,                                                          \
3085       input_n,                                                          \
3086       (sizeof(input_n) / sizeof(input_n[0])),                           \
3087       input_m,                                                          \
3088       (sizeof(input_m) / sizeof(input_m[0])),                           \
3089       indices,                                                          \
3090       (sizeof(indices) / sizeof(indices[0])),                           \
3091       kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,       \
3092       kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,  \
3093       kFormat##vdform,                                                  \
3094       kFormat##vnform,                                                  \
3095       kFormat##vmform,                                                  \
3096       vm_subvector_count)
3097 
3098 #define CALL_TEST_NEON_HELPER_OpImmOpImm(helper,                   \
3099                                          mnemonic,                 \
3100                                          vdform,                   \
3101                                          vnform,                   \
3102                                          input_d,                  \
3103                                          input_imm1,               \
3104                                          input_n,                  \
3105                                          input_imm2)               \
3106   TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),    \
3107                      helper,                                       \
3108                      input_d,                                      \
3109                      input_imm1,                                   \
3110                      (sizeof(input_imm1) / sizeof(input_imm1[0])), \
3111                      input_n,                                      \
3112                      (sizeof(input_n) / sizeof(input_n[0])),       \
3113                      input_imm2,                                   \
3114                      (sizeof(input_imm2) / sizeof(input_imm2[0])), \
3115                      kExpected_NEON_##mnemonic##_##vdform,         \
3116                      kExpectedCount_NEON_##mnemonic##_##vdform,    \
3117                      kFormat##vdform,                              \
3118                      kFormat##vnform)
3119 
3120 #define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input) \
3121   CALL_TEST_NEON_HELPER_1Op(mnemonic, variant, variant, input)
3122 
3123 #define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input)              \
3124   TEST(mnemonic##_8B) {                                             \
3125     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input);  \
3126   }                                                                 \
3127   TEST(mnemonic##_16B) {                                            \
3128     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input); \
3129   }
3130 
3131 #define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)               \
3132   TEST(mnemonic##_4H) {                                             \
3133     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input); \
3134   }                                                                 \
3135   TEST(mnemonic##_8H) {                                             \
3136     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input); \
3137   }
3138 
3139 #define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)               \
3140   TEST(mnemonic##_2S) {                                             \
3141     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input); \
3142   }                                                                 \
3143   TEST(mnemonic##_4S) {                                             \
3144     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input); \
3145   }
3146 
3147 #define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \
3148   DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input)   \
3149   DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)
3150 
3151 #define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \
3152   DEFINE_TEST_NEON_2SAME_BH(mnemonic, input)         \
3153   DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)
3154 
3155 #define DEFINE_TEST_NEON_2SAME(mnemonic, input)                     \
3156   DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input)                      \
3157   TEST(mnemonic##_2D) {                                             \
3158     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
3159   }
3160 #define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input)                  \
3161   DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)                     \
3162   TEST(mnemonic##_2D) {                                             \
3163     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
3164   }
3165 
3166 #define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input)                  \
3167   TEST(mnemonic##_2S) {                                             \
3168     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input);  \
3169   }                                                                 \
3170   TEST(mnemonic##_4S) {                                             \
3171     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input);  \
3172   }                                                                 \
3173   TEST(mnemonic##_2D) {                                             \
3174     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input); \
3175   }
3176 
3177 #define DEFINE_TEST_NEON_2SAME_FP_FP16(mnemonic, input)              \
3178   DEFINE_TEST_NEON_2SAME_FP(mnemonic, input)                         \
3179   TEST(mnemonic##_4H) {                                              \
3180     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInputFloat16##input); \
3181   }                                                                  \
3182   TEST(mnemonic##_8H) {                                              \
3183     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInputFloat16##input); \
3184   }
3185 
3186 #define DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(mnemonic, input)      \
3187   TEST(mnemonic##_H) {                                              \
3188     CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInputFloat16##input); \
3189   }                                                                 \
3190   TEST(mnemonic##_S) {                                              \
3191     CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input);   \
3192   }                                                                 \
3193   TEST(mnemonic##_D) {                                              \
3194     CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input);  \
3195   }
3196 
3197 #define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input)          \
3198   TEST(mnemonic##_B) {                                            \
3199     CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input); \
3200   }
3201 #define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input)           \
3202   TEST(mnemonic##_H) {                                             \
3203     CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input); \
3204   }
3205 #define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)           \
3206   TEST(mnemonic##_S) {                                             \
3207     CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input); \
3208   }
3209 #define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)           \
3210   TEST(mnemonic##_D) {                                             \
3211     CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input); \
3212   }
3213 
3214 #define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input) \
3215   DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input)     \
3216   DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input)     \
3217   DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)     \
3218   DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
3219 
3220 #define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input) \
3221   DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)        \
3222   DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
3223 
3224 
3225 #define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n) \
3226   CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vd_form, vn_form, input_n)
3227 
3228 #define DEFINE_TEST_NEON_ACROSS(mnemonic, input)                        \
3229   TEST(mnemonic##_B_8B) {                                               \
3230     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input);  \
3231   }                                                                     \
3232   TEST(mnemonic##_B_16B) {                                              \
3233     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input); \
3234   }                                                                     \
3235   TEST(mnemonic##_H_4H) {                                               \
3236     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input); \
3237   }                                                                     \
3238   TEST(mnemonic##_H_8H) {                                               \
3239     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input); \
3240   }                                                                     \
3241   TEST(mnemonic##_S_4S) {                                               \
3242     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input); \
3243   }
3244 
3245 #define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input)                   \
3246   TEST(mnemonic##_H_8B) {                                               \
3247     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input);  \
3248   }                                                                     \
3249   TEST(mnemonic##_H_16B) {                                              \
3250     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input); \
3251   }                                                                     \
3252   TEST(mnemonic##_S_4H) {                                               \
3253     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input); \
3254   }                                                                     \
3255   TEST(mnemonic##_S_8H) {                                               \
3256     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input); \
3257   }                                                                     \
3258   TEST(mnemonic##_D_4S) {                                               \
3259     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input); \
3260   }
3261 
3262 #define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input)                      \
3263   TEST(mnemonic##_H_4H) {                                                \
3264     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInputFloat16##input); \
3265   }                                                                      \
3266   TEST(mnemonic##_H_8H) {                                                \
3267     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInputFloat16##input); \
3268   }                                                                      \
3269   TEST(mnemonic##_S_4S) {                                                \
3270     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input);   \
3271   }
3272 
3273 #define CALL_TEST_NEON_HELPER_2DIFF(mnemonic, vdform, vnform, input_n) \
3274   CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n)
3275 
3276 #define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input)                    \
3277   TEST(mnemonic##_4H) {                                                 \
3278     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input);  \
3279   }                                                                     \
3280   TEST(mnemonic##_8H) {                                                 \
3281     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input); \
3282   }                                                                     \
3283   TEST(mnemonic##_2S) {                                                 \
3284     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input); \
3285   }                                                                     \
3286   TEST(mnemonic##_4S) {                                                 \
3287     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input); \
3288   }                                                                     \
3289   TEST(mnemonic##_1D) {                                                 \
3290     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input); \
3291   }                                                                     \
3292   TEST(mnemonic##_2D) {                                                 \
3293     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input); \
3294   }
3295 
3296 #define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input)                      \
3297   TEST(mnemonic##_8B) {                                                     \
3298     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input);     \
3299   }                                                                         \
3300   TEST(mnemonic##_4H) {                                                     \
3301     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input);     \
3302   }                                                                         \
3303   TEST(mnemonic##_2S) {                                                     \
3304     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input);     \
3305   }                                                                         \
3306   TEST(mnemonic##2_16B) {                                                   \
3307     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input); \
3308   }                                                                         \
3309   TEST(mnemonic##2_8H) {                                                    \
3310     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input);  \
3311   }                                                                         \
3312   TEST(mnemonic##2_4S) {                                                    \
3313     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input);  \
3314   }
3315 
3316 #define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input)                     \
3317   TEST(mnemonic##_4S) {                                                     \
3318     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input);    \
3319   }                                                                         \
3320   TEST(mnemonic##_2D) {                                                     \
3321     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input);      \
3322   }                                                                         \
3323   TEST(mnemonic##2_4S) {                                                    \
3324     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input); \
3325   }                                                                         \
3326   TEST(mnemonic##2_2D) {                                                    \
3327     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input);   \
3328   }
3329 
3330 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input)                  \
3331   TEST(mnemonic##_4H) {                                                    \
3332     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input);     \
3333   }                                                                        \
3334   TEST(mnemonic##_2S) {                                                    \
3335     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input);    \
3336   }                                                                        \
3337   TEST(mnemonic##2_8H) {                                                   \
3338     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input);  \
3339   }                                                                        \
3340   TEST(mnemonic##2_4S) {                                                   \
3341     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
3342   }
3343 
3344 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input)               \
3345   TEST(mnemonic##_2S) {                                                    \
3346     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input);    \
3347   }                                                                        \
3348   TEST(mnemonic##2_4S) {                                                   \
3349     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
3350   }
3351 
3352 #define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input)         \
3353   TEST(mnemonic##_B) {                                                \
3354     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input); \
3355   }                                                                   \
3356   TEST(mnemonic##_H) {                                                \
3357     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input); \
3358   }                                                                   \
3359   TEST(mnemonic##_S) {                                                \
3360     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input); \
3361   }
3362 
3363 #define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input)            \
3364   TEST(mnemonic##_S) {                                                  \
3365     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input);   \
3366   }                                                                     \
3367   TEST(mnemonic##_D) {                                                  \
3368     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input);  \
3369   }                                                                     \
3370   TEST(mnemonic##_H) {                                                  \
3371     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, 2H, kInputFloat16##input); \
3372   }
3373 
3374 #define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) \
3375   {                                                                       \
3376     CALL_TEST_NEON_HELPER_2Op(mnemonic,                                   \
3377                               variant,                                    \
3378                               variant,                                    \
3379                               variant,                                    \
3380                               input_d,                                    \
3381                               input_nm,                                   \
3382                               input_nm);                                  \
3383   }
3384 
3385 #define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input)     \
3386   TEST(mnemonic##_8B) {                                    \
3387     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                  \
3388                                 8B,                        \
3389                                 kInput8bitsAccDestination, \
3390                                 kInput8bits##input);       \
3391   }                                                        \
3392   TEST(mnemonic##_16B) {                                   \
3393     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                  \
3394                                 16B,                       \
3395                                 kInput8bitsAccDestination, \
3396                                 kInput8bits##input);       \
3397   }
3398 
3399 #define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)          \
3400   TEST(mnemonic##_4H) {                                     \
3401     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3402                                 4H,                         \
3403                                 kInput16bitsAccDestination, \
3404                                 kInput16bits##input);       \
3405   }                                                         \
3406   TEST(mnemonic##_8H) {                                     \
3407     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3408                                 8H,                         \
3409                                 kInput16bitsAccDestination, \
3410                                 kInput16bits##input);       \
3411   }                                                         \
3412   TEST(mnemonic##_2S) {                                     \
3413     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3414                                 2S,                         \
3415                                 kInput32bitsAccDestination, \
3416                                 kInput32bits##input);       \
3417   }                                                         \
3418   TEST(mnemonic##_4S) {                                     \
3419     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3420                                 4S,                         \
3421                                 kInput32bitsAccDestination, \
3422                                 kInput32bits##input);       \
3423   }
3424 
3425 #define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \
3426   DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input)     \
3427   DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)
3428 
3429 #define DEFINE_TEST_NEON_3SAME(mnemonic, input)             \
3430   DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input)              \
3431   TEST(mnemonic##_2D) {                                     \
3432     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3433                                 2D,                         \
3434                                 kInput64bitsAccDestination, \
3435                                 kInput64bits##input);       \
3436   }
3437 
3438 #define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input)           \
3439   TEST(mnemonic##_4H) {                                      \
3440     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3441                                 4H,                          \
3442                                 kInputFloat16AccDestination, \
3443                                 kInputFloat16##input);       \
3444   }                                                          \
3445   TEST(mnemonic##_8H) {                                      \
3446     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3447                                 8H,                          \
3448                                 kInputFloat16AccDestination, \
3449                                 kInputFloat16##input);       \
3450   }                                                          \
3451   TEST(mnemonic##_2S) {                                      \
3452     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3453                                 2S,                          \
3454                                 kInputFloatAccDestination,   \
3455                                 kInputFloat##input);         \
3456   }                                                          \
3457   TEST(mnemonic##_4S) {                                      \
3458     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3459                                 4S,                          \
3460                                 kInputFloatAccDestination,   \
3461                                 kInputFloat##input);         \
3462   }                                                          \
3463   TEST(mnemonic##_2D) {                                      \
3464     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3465                                 2D,                          \
3466                                 kInputDoubleAccDestination,  \
3467                                 kInputDouble##input);        \
3468   }
3469 
3470 #define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input)    \
3471   TEST(mnemonic##_D) {                                      \
3472     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3473                                 D,                          \
3474                                 kInput64bitsAccDestination, \
3475                                 kInput64bits##input);       \
3476   }
3477 
3478 #define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input)   \
3479   TEST(mnemonic##_H) {                                      \
3480     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3481                                 H,                          \
3482                                 kInput16bitsAccDestination, \
3483                                 kInput16bits##input);       \
3484   }                                                         \
3485   TEST(mnemonic##_S) {                                      \
3486     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3487                                 S,                          \
3488                                 kInput32bitsAccDestination, \
3489                                 kInput32bits##input);       \
3490   }
3491 
3492 #define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input)      \
3493   TEST(mnemonic##_B) {                                      \
3494     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3495                                 B,                          \
3496                                 kInput8bitsAccDestination,  \
3497                                 kInput8bits##input);        \
3498   }                                                         \
3499   TEST(mnemonic##_H) {                                      \
3500     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3501                                 H,                          \
3502                                 kInput16bitsAccDestination, \
3503                                 kInput16bits##input);       \
3504   }                                                         \
3505   TEST(mnemonic##_S) {                                      \
3506     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3507                                 S,                          \
3508                                 kInput32bitsAccDestination, \
3509                                 kInput32bits##input);       \
3510   }                                                         \
3511   TEST(mnemonic##_D) {                                      \
3512     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3513                                 D,                          \
3514                                 kInput64bitsAccDestination, \
3515                                 kInput64bits##input);       \
3516   }
3517 
3518 #define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input)    \
3519   TEST(mnemonic##_H) {                                       \
3520     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3521                                 H,                           \
3522                                 kInputFloat16AccDestination, \
3523                                 kInputFloat16##input);       \
3524   }                                                          \
3525   TEST(mnemonic##_S) {                                       \
3526     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3527                                 S,                           \
3528                                 kInputFloatAccDestination,   \
3529                                 kInputFloat##input);         \
3530   }                                                          \
3531   TEST(mnemonic##_D) {                                       \
3532     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3533                                 D,                           \
3534                                 kInputDoubleAccDestination,  \
3535                                 kInputDouble##input);        \
3536   }
3537 
3538 #define DEFINE_TEST_NEON_FHM(mnemonic, input_d, input_n, input_m) \
3539   TEST(mnemonic##_2S) {                                           \
3540     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                         \
3541                                 2S,                               \
3542                                 2H,                               \
3543                                 2H,                               \
3544                                 kInputFloatAccDestination,        \
3545                                 kInputFloat16##input_n,           \
3546                                 kInputFloat16##input_m);          \
3547   }                                                               \
3548   TEST(mnemonic##_4S) {                                           \
3549     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                         \
3550                                 4S,                               \
3551                                 4H,                               \
3552                                 4H,                               \
3553                                 kInputFloatAccDestination,        \
3554                                 kInputFloat16##input_n,           \
3555                                 kInputFloat16##input_m);          \
3556   }
3557 
3558 #define CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3559                                     vdform,   \
3560                                     vnform,   \
3561                                     vmform,   \
3562                                     input_d,  \
3563                                     input_n,  \
3564                                     input_m)  \
3565   {                                           \
3566     CALL_TEST_NEON_HELPER_2Op(mnemonic,       \
3567                               vdform,         \
3568                               vnform,         \
3569                               vmform,         \
3570                               input_d,        \
3571                               input_n,        \
3572                               input_m);       \
3573   }
3574 
3575 #define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)     \
3576   TEST(mnemonic##_8H) {                                     \
3577     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3578                                 8H,                         \
3579                                 8B,                         \
3580                                 8B,                         \
3581                                 kInput16bitsAccDestination, \
3582                                 kInput8bits##input,         \
3583                                 kInput8bits##input);        \
3584   }                                                         \
3585   TEST(mnemonic##2_8H) {                                    \
3586     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3587                                 8H,                         \
3588                                 16B,                        \
3589                                 16B,                        \
3590                                 kInput16bitsAccDestination, \
3591                                 kInput8bits##input,         \
3592                                 kInput8bits##input);        \
3593   }
3594 
3595 #define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)     \
3596   TEST(mnemonic##_4S) {                                     \
3597     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3598                                 4S,                         \
3599                                 4H,                         \
3600                                 4H,                         \
3601                                 kInput32bitsAccDestination, \
3602                                 kInput16bits##input,        \
3603                                 kInput16bits##input);       \
3604   }                                                         \
3605   TEST(mnemonic##2_4S) {                                    \
3606     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3607                                 4S,                         \
3608                                 8H,                         \
3609                                 8H,                         \
3610                                 kInput32bitsAccDestination, \
3611                                 kInput16bits##input,        \
3612                                 kInput16bits##input);       \
3613   }
3614 
3615 #define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)     \
3616   TEST(mnemonic##_2D) {                                     \
3617     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3618                                 2D,                         \
3619                                 2S,                         \
3620                                 2S,                         \
3621                                 kInput64bitsAccDestination, \
3622                                 kInput32bits##input,        \
3623                                 kInput32bits##input);       \
3624   }                                                         \
3625   TEST(mnemonic##2_2D) {                                    \
3626     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3627                                 2D,                         \
3628                                 4S,                         \
3629                                 4S,                         \
3630                                 kInput64bitsAccDestination, \
3631                                 kInput32bits##input,        \
3632                                 kInput32bits##input);       \
3633   }
3634 
3635 #define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input) \
3636   DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)       \
3637   DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
3638 
3639 #define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input) \
3640   DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)    \
3641   DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)    \
3642   DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
3643 
3644 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \
3645   TEST(mnemonic##_S) {                                        \
3646     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                     \
3647                                 S,                            \
3648                                 H,                            \
3649                                 H,                            \
3650                                 kInput32bitsAccDestination,   \
3651                                 kInput16bits##input,          \
3652                                 kInput16bits##input);         \
3653   }
3654 
3655 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) \
3656   TEST(mnemonic##_D) {                                        \
3657     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                     \
3658                                 D,                            \
3659                                 S,                            \
3660                                 S,                            \
3661                                 kInput64bitsAccDestination,   \
3662                                 kInput32bits##input,          \
3663                                 kInput32bits##input);         \
3664   }
3665 
3666 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input) \
3667   DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input)        \
3668   DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input)
3669 
3670 #define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input)        \
3671   TEST(mnemonic##_8H) {                                     \
3672     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3673                                 8H,                         \
3674                                 8H,                         \
3675                                 8B,                         \
3676                                 kInput16bitsAccDestination, \
3677                                 kInput16bits##input,        \
3678                                 kInput8bits##input);        \
3679   }                                                         \
3680   TEST(mnemonic##_4S) {                                     \
3681     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3682                                 4S,                         \
3683                                 4S,                         \
3684                                 4H,                         \
3685                                 kInput32bitsAccDestination, \
3686                                 kInput32bits##input,        \
3687                                 kInput16bits##input);       \
3688   }                                                         \
3689   TEST(mnemonic##_2D) {                                     \
3690     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3691                                 2D,                         \
3692                                 2D,                         \
3693                                 2S,                         \
3694                                 kInput64bitsAccDestination, \
3695                                 kInput64bits##input,        \
3696                                 kInput32bits##input);       \
3697   }                                                         \
3698   TEST(mnemonic##2_8H) {                                    \
3699     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3700                                 8H,                         \
3701                                 8H,                         \
3702                                 16B,                        \
3703                                 kInput16bitsAccDestination, \
3704                                 kInput16bits##input,        \
3705                                 kInput8bits##input);        \
3706   }                                                         \
3707   TEST(mnemonic##2_4S) {                                    \
3708     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3709                                 4S,                         \
3710                                 4S,                         \
3711                                 8H,                         \
3712                                 kInput32bitsAccDestination, \
3713                                 kInput32bits##input,        \
3714                                 kInput16bits##input);       \
3715   }                                                         \
3716   TEST(mnemonic##2_2D) {                                    \
3717     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3718                                 2D,                         \
3719                                 2D,                         \
3720                                 4S,                         \
3721                                 kInput64bitsAccDestination, \
3722                                 kInput64bits##input,        \
3723                                 kInput32bits##input);       \
3724   }
3725 
3726 #define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input)      \
3727   TEST(mnemonic##_8B) {                                     \
3728     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3729                                 8B,                         \
3730                                 8H,                         \
3731                                 8H,                         \
3732                                 kInput8bitsAccDestination,  \
3733                                 kInput16bits##input,        \
3734                                 kInput16bits##input);       \
3735   }                                                         \
3736   TEST(mnemonic##_4H) {                                     \
3737     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3738                                 4H,                         \
3739                                 4S,                         \
3740                                 4S,                         \
3741                                 kInput16bitsAccDestination, \
3742                                 kInput32bits##input,        \
3743                                 kInput32bits##input);       \
3744   }                                                         \
3745   TEST(mnemonic##_2S) {                                     \
3746     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3747                                 2S,                         \
3748                                 2D,                         \
3749                                 2D,                         \
3750                                 kInput32bitsAccDestination, \
3751                                 kInput64bits##input,        \
3752                                 kInput64bits##input);       \
3753   }                                                         \
3754   TEST(mnemonic##2_16B) {                                   \
3755     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3756                                 16B,                        \
3757                                 8H,                         \
3758                                 8H,                         \
3759                                 kInput8bitsAccDestination,  \
3760                                 kInput16bits##input,        \
3761                                 kInput16bits##input);       \
3762   }                                                         \
3763   TEST(mnemonic##2_8H) {                                    \
3764     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3765                                 8H,                         \
3766                                 4S,                         \
3767                                 4S,                         \
3768                                 kInput16bitsAccDestination, \
3769                                 kInput32bits##input,        \
3770                                 kInput32bits##input);       \
3771   }                                                         \
3772   TEST(mnemonic##2_4S) {                                    \
3773     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3774                                 4S,                         \
3775                                 2D,                         \
3776                                 2D,                         \
3777                                 kInput32bitsAccDestination, \
3778                                 kInput64bits##input,        \
3779                                 kInput64bits##input);       \
3780   }
3781 
3782 #define DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(mnemonic, input) \
3783   TEST(mnemonic##_2S) {                                     \
3784     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3785                                 2S,                         \
3786                                 8B,                         \
3787                                 8B,                         \
3788                                 kInput32bitsAccDestination, \
3789                                 kInput8bits##input,         \
3790                                 kInput8bits##input);        \
3791   }                                                         \
3792   TEST(mnemonic##_4S) {                                     \
3793     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3794                                 4S,                         \
3795                                 16B,                        \
3796                                 16B,                        \
3797                                 kInput32bitsAccDestination, \
3798                                 kInput8bits##input,         \
3799                                 kInput8bits##input);        \
3800   }
3801 
3802 
3803 #define CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,  \
3804                                      vdform,    \
3805                                      vnform,    \
3806                                      input_n,   \
3807                                      input_imm) \
3808   {                                             \
3809     CALL_TEST_NEON_HELPER_2OpImm(mnemonic,      \
3810                                  vdform,        \
3811                                  vnform,        \
3812                                  input_n,       \
3813                                  input_imm);    \
3814   }
3815 
3816 #define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm)   \
3817   TEST(mnemonic##_8B_2OPIMM) {                                \
3818     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3819                                  8B,                          \
3820                                  8B,                          \
3821                                  kInput8bits##input,          \
3822                                  kInput8bitsImm##input_imm);  \
3823   }                                                           \
3824   TEST(mnemonic##_16B_2OPIMM) {                               \
3825     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3826                                  16B,                         \
3827                                  16B,                         \
3828                                  kInput8bits##input,          \
3829                                  kInput8bitsImm##input_imm);  \
3830   }                                                           \
3831   TEST(mnemonic##_4H_2OPIMM) {                                \
3832     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3833                                  4H,                          \
3834                                  4H,                          \
3835                                  kInput16bits##input,         \
3836                                  kInput16bitsImm##input_imm); \
3837   }                                                           \
3838   TEST(mnemonic##_8H_2OPIMM) {                                \
3839     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3840                                  8H,                          \
3841                                  8H,                          \
3842                                  kInput16bits##input,         \
3843                                  kInput16bitsImm##input_imm); \
3844   }                                                           \
3845   TEST(mnemonic##_2S_2OPIMM) {                                \
3846     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3847                                  2S,                          \
3848                                  2S,                          \
3849                                  kInput32bits##input,         \
3850                                  kInput32bitsImm##input_imm); \
3851   }                                                           \
3852   TEST(mnemonic##_4S_2OPIMM) {                                \
3853     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3854                                  4S,                          \
3855                                  4S,                          \
3856                                  kInput32bits##input,         \
3857                                  kInput32bitsImm##input_imm); \
3858   }                                                           \
3859   TEST(mnemonic##_2D_2OPIMM) {                                \
3860     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3861                                  2D,                          \
3862                                  2D,                          \
3863                                  kInput64bits##input,         \
3864                                  kInput64bitsImm##input_imm); \
3865   }
3866 
3867 #define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm) \
3868   TEST(mnemonic##_8B_2OPIMM) {                                   \
3869     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3870                                  8B,                             \
3871                                  B,                              \
3872                                  kInput8bits##input,             \
3873                                  kInput8bitsImm##input_imm);     \
3874   }                                                              \
3875   TEST(mnemonic##_16B_2OPIMM) {                                  \
3876     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3877                                  16B,                            \
3878                                  B,                              \
3879                                  kInput8bits##input,             \
3880                                  kInput8bitsImm##input_imm);     \
3881   }                                                              \
3882   TEST(mnemonic##_4H_2OPIMM) {                                   \
3883     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3884                                  4H,                             \
3885                                  H,                              \
3886                                  kInput16bits##input,            \
3887                                  kInput16bitsImm##input_imm);    \
3888   }                                                              \
3889   TEST(mnemonic##_8H_2OPIMM) {                                   \
3890     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3891                                  8H,                             \
3892                                  H,                              \
3893                                  kInput16bits##input,            \
3894                                  kInput16bitsImm##input_imm);    \
3895   }                                                              \
3896   TEST(mnemonic##_2S_2OPIMM) {                                   \
3897     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3898                                  2S,                             \
3899                                  S,                              \
3900                                  kInput32bits##input,            \
3901                                  kInput32bitsImm##input_imm);    \
3902   }                                                              \
3903   TEST(mnemonic##_4S_2OPIMM) {                                   \
3904     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3905                                  4S,                             \
3906                                  S,                              \
3907                                  kInput32bits##input,            \
3908                                  kInput32bitsImm##input_imm);    \
3909   }                                                              \
3910   TEST(mnemonic##_2D_2OPIMM) {                                   \
3911     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3912                                  2D,                             \
3913                                  D,                              \
3914                                  kInput64bits##input,            \
3915                                  kInput64bitsImm##input_imm);    \
3916   }
3917 
3918 #define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm) \
3919   TEST(mnemonic##_8B_2OPIMM) {                                     \
3920     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
3921                                  8B,                               \
3922                                  8H,                               \
3923                                  kInput16bits##input,              \
3924                                  kInput8bitsImm##input_imm);       \
3925   }                                                                \
3926   TEST(mnemonic##_4H_2OPIMM) {                                     \
3927     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
3928                                  4H,                               \
3929                                  4S,                               \
3930                                  kInput32bits##input,              \
3931                                  kInput16bitsImm##input_imm);      \
3932   }                                                                \
3933   TEST(mnemonic##_2S_2OPIMM) {                                     \
3934     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
3935                                  2S,                               \
3936                                  2D,                               \
3937                                  kInput64bits##input,              \
3938                                  kInput32bitsImm##input_imm);      \
3939   }                                                                \
3940   TEST(mnemonic##2_16B_2OPIMM) {                                   \
3941     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                      \
3942                                  16B,                              \
3943                                  8H,                               \
3944                                  kInput16bits##input,              \
3945                                  kInput8bitsImm##input_imm);       \
3946   }                                                                \
3947   TEST(mnemonic##2_8H_2OPIMM) {                                    \
3948     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                      \
3949                                  8H,                               \
3950                                  4S,                               \
3951                                  kInput32bits##input,              \
3952                                  kInput16bitsImm##input_imm);      \
3953   }                                                                \
3954   TEST(mnemonic##2_4S_2OPIMM) {                                    \
3955     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                      \
3956                                  4S,                               \
3957                                  2D,                               \
3958                                  kInput64bits##input,              \
3959                                  kInput32bitsImm##input_imm);      \
3960   }
3961 
3962 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm) \
3963   TEST(mnemonic##_B_2OPIMM) {                                             \
3964     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
3965                                  B,                                       \
3966                                  H,                                       \
3967                                  kInput16bits##input,                     \
3968                                  kInput8bitsImm##input_imm);              \
3969   }                                                                       \
3970   TEST(mnemonic##_H_2OPIMM) {                                             \
3971     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
3972                                  H,                                       \
3973                                  S,                                       \
3974                                  kInput32bits##input,                     \
3975                                  kInput16bitsImm##input_imm);             \
3976   }                                                                       \
3977   TEST(mnemonic##_S_2OPIMM) {                                             \
3978     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
3979                                  S,                                       \
3980                                  D,                                       \
3981                                  kInput64bits##input,                     \
3982                                  kInput32bitsImm##input_imm);             \
3983   }
3984 
3985 #define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm) \
3986   TEST(mnemonic##_4H_2OPIMM) {                                        \
3987     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
3988                                  4H,                                  \
3989                                  4H,                                  \
3990                                  kInputFloat16##input,                \
3991                                  kInputDoubleImm##input_imm);         \
3992   }                                                                   \
3993   TEST(mnemonic##_8H_2OPIMM) {                                        \
3994     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
3995                                  8H,                                  \
3996                                  8H,                                  \
3997                                  kInputFloat16##input,                \
3998                                  kInputDoubleImm##input_imm);         \
3999   }                                                                   \
4000   TEST(mnemonic##_2S_2OPIMM) {                                        \
4001     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
4002                                  2S,                                  \
4003                                  2S,                                  \
4004                                  kInputFloat##Basic,                  \
4005                                  kInputDoubleImm##input_imm);         \
4006   }                                                                   \
4007   TEST(mnemonic##_4S_2OPIMM) {                                        \
4008     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
4009                                  4S,                                  \
4010                                  4S,                                  \
4011                                  kInputFloat##input,                  \
4012                                  kInputDoubleImm##input_imm);         \
4013   }                                                                   \
4014   TEST(mnemonic##_2D_2OPIMM) {                                        \
4015     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
4016                                  2D,                                  \
4017                                  2D,                                  \
4018                                  kInputDouble##input,                 \
4019                                  kInputDoubleImm##input_imm);         \
4020   }
4021 
4022 #define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm) \
4023   TEST(mnemonic##_4H_2OPIMM) {                                 \
4024     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
4025                                  4H,                           \
4026                                  4H,                           \
4027                                  kInputFloat16##input,         \
4028                                  kInput16bitsImm##input_imm);  \
4029   }                                                            \
4030   TEST(mnemonic##_8H_2OPIMM) {                                 \
4031     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
4032                                  8H,                           \
4033                                  8H,                           \
4034                                  kInputFloat16##input,         \
4035                                  kInput16bitsImm##input_imm);  \
4036   }                                                            \
4037   TEST(mnemonic##_2S_2OPIMM) {                                 \
4038     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
4039                                  2S,                           \
4040                                  2S,                           \
4041                                  kInputFloat##Basic,           \
4042                                  kInput32bitsImm##input_imm);  \
4043   }                                                            \
4044   TEST(mnemonic##_4S_2OPIMM) {                                 \
4045     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
4046                                  4S,                           \
4047                                  4S,                           \
4048                                  kInputFloat##input,           \
4049                                  kInput32bitsImm##input_imm);  \
4050   }                                                            \
4051   TEST(mnemonic##_2D_2OPIMM) {                                 \
4052     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
4053                                  2D,                           \
4054                                  2D,                           \
4055                                  kInputDouble##input,          \
4056                                  kInput64bitsImm##input_imm);  \
4057   }
4058 
4059 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm) \
4060   TEST(mnemonic##_H_2OPIMM) {                                         \
4061     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
4062                                  H,                                   \
4063                                  H,                                   \
4064                                  kInputFloat16##Basic,                \
4065                                  kInput16bitsImm##input_imm);         \
4066   }                                                                   \
4067   TEST(mnemonic##_S_2OPIMM) {                                         \
4068     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
4069                                  S,                                   \
4070                                  S,                                   \
4071                                  kInputFloat##Basic,                  \
4072                                  kInput32bitsImm##input_imm);         \
4073   }                                                                   \
4074   TEST(mnemonic##_D_2OPIMM) {                                         \
4075     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
4076                                  D,                                   \
4077                                  D,                                   \
4078                                  kInputDouble##input,                 \
4079                                  kInput64bitsImm##input_imm);         \
4080   }
4081 
4082 #define DEFINE_TEST_NEON_2OPIMM_HSD(mnemonic, input, input_imm) \
4083   TEST(mnemonic##_4H_2OPIMM) {                                  \
4084     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                      \
4085                                  4H,                            \
4086                                  4H,                            \
4087                                  kInput16bits##input,           \
4088                                  kInput16bitsImm##input_imm);   \
4089   }                                                             \
4090   TEST(mnemonic##_8H_2OPIMM) {                                  \
4091     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                      \
4092                                  8H,                            \
4093                                  8H,                            \
4094                                  kInput16bits##input,           \
4095                                  kInput16bitsImm##input_imm);   \
4096   }                                                             \
4097   TEST(mnemonic##_2S_2OPIMM) {                                  \
4098     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                      \
4099                                  2S,                            \
4100                                  2S,                            \
4101                                  kInput32bits##input,           \
4102                                  kInput32bitsImm##input_imm);   \
4103   }                                                             \
4104   TEST(mnemonic##_4S_2OPIMM) {                                  \
4105     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                      \
4106                                  4S,                            \
4107                                  4S,                            \
4108                                  kInput32bits##input,           \
4109                                  kInput32bitsImm##input_imm);   \
4110   }                                                             \
4111   TEST(mnemonic##_2D_2OPIMM) {                                  \
4112     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                      \
4113                                  2D,                            \
4114                                  2D,                            \
4115                                  kInput64bits##input,           \
4116                                  kInput64bitsImm##input_imm);   \
4117   }
4118 
4119 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) \
4120   TEST(mnemonic##_D_2OPIMM) {                                        \
4121     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                           \
4122                                  D,                                  \
4123                                  D,                                  \
4124                                  kInput64bits##input,                \
4125                                  kInput64bitsImm##input_imm);        \
4126   }
4127 
4128 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(mnemonic, input, input_imm) \
4129   TEST(mnemonic##_H_2OPIMM) {                                          \
4130     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                             \
4131                                  H,                                    \
4132                                  H,                                    \
4133                                  kInput16bits##input,                  \
4134                                  kInput16bitsImm##input_imm);          \
4135   }                                                                    \
4136   TEST(mnemonic##_S_2OPIMM) {                                          \
4137     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                             \
4138                                  S,                                    \
4139                                  S,                                    \
4140                                  kInput32bits##input,                  \
4141                                  kInput32bitsImm##input_imm);          \
4142   }                                                                    \
4143   DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm)
4144 
4145 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) \
4146   TEST(mnemonic##_D_2OPIMM) {                                           \
4147     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                              \
4148                                  D,                                     \
4149                                  D,                                     \
4150                                  kInputDouble##input,                   \
4151                                  kInputDoubleImm##input_imm);           \
4152   }
4153 
4154 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(mnemonic, input, input_imm) \
4155   TEST(mnemonic##_H_2OPIMM) {                                             \
4156     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
4157                                  H,                                       \
4158                                  H,                                       \
4159                                  kInputFloat16##input,                    \
4160                                  kInputDoubleImm##input_imm);             \
4161   }                                                                       \
4162   TEST(mnemonic##_S_2OPIMM) {                                             \
4163     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
4164                                  S,                                       \
4165                                  S,                                       \
4166                                  kInputFloat##input,                      \
4167                                  kInputDoubleImm##input_imm);             \
4168   }                                                                       \
4169   DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm)
4170 
4171 #define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm) \
4172   TEST(mnemonic##_B_2OPIMM) {                                      \
4173     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
4174                                  B,                                \
4175                                  B,                                \
4176                                  kInput8bits##input,               \
4177                                  kInput8bitsImm##input_imm);       \
4178   }                                                                \
4179   DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(mnemonic, input, input_imm)
4180 
4181 #define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm) \
4182   TEST(mnemonic##_8H_2OPIMM) {                                   \
4183     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
4184                                  8H,                             \
4185                                  8B,                             \
4186                                  kInput8bits##input,             \
4187                                  kInput8bitsImm##input_imm);     \
4188   }                                                              \
4189   TEST(mnemonic##_4S_2OPIMM) {                                   \
4190     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
4191                                  4S,                             \
4192                                  4H,                             \
4193                                  kInput16bits##input,            \
4194                                  kInput16bitsImm##input_imm);    \
4195   }                                                              \
4196   TEST(mnemonic##_2D_2OPIMM) {                                   \
4197     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
4198                                  2D,                             \
4199                                  2S,                             \
4200                                  kInput32bits##input,            \
4201                                  kInput32bitsImm##input_imm);    \
4202   }                                                              \
4203   TEST(mnemonic##2_8H_2OPIMM) {                                  \
4204     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                    \
4205                                  8H,                             \
4206                                  16B,                            \
4207                                  kInput8bits##input,             \
4208                                  kInput8bitsImm##input_imm);     \
4209   }                                                              \
4210   TEST(mnemonic##2_4S_2OPIMM) {                                  \
4211     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                    \
4212                                  4S,                             \
4213                                  8H,                             \
4214                                  kInput16bits##input,            \
4215                                  kInput16bitsImm##input_imm);    \
4216   }                                                              \
4217   TEST(mnemonic##2_2D_2OPIMM) {                                  \
4218     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                    \
4219                                  2D,                             \
4220                                  4S,                             \
4221                                  kInput32bits##input,            \
4222                                  kInput32bitsImm##input_imm);    \
4223   }
4224 
4225 #define CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic,           \
4226                                                     vdform,             \
4227                                                     vnform,             \
4228                                                     vmform,             \
4229                                                     input_d,            \
4230                                                     input_n,            \
4231                                                     input_m,            \
4232                                                     indices,            \
4233                                                     vm_subvector_count) \
4234   {                                                                     \
4235     CALL_TEST_NEON_HELPER_ByElement_Dot_Product(mnemonic,               \
4236                                                 vdform,                 \
4237                                                 vnform,                 \
4238                                                 vmform,                 \
4239                                                 input_d,                \
4240                                                 input_n,                \
4241                                                 input_m,                \
4242                                                 indices,                \
4243                                                 vm_subvector_count);    \
4244   }
4245 
4246 #define DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(mnemonic,               \
4247                                                input_d,                \
4248                                                input_n,                \
4249                                                input_m)                \
4250   TEST(mnemonic##_2S_8B_B) {                                           \
4251     CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic,              \
4252                                                 2S,                    \
4253                                                 8B,                    \
4254                                                 B,                     \
4255                                                 kInput32bits##input_d, \
4256                                                 kInput8bits##input_n,  \
4257                                                 kInput8bits##input_m,  \
4258                                                 kInputSIndices,        \
4259                                                 4);                    \
4260   }                                                                    \
4261   TEST(mnemonic##_4S_16B_B) {                                          \
4262     CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic,              \
4263                                                 4S,                    \
4264                                                 16B,                   \
4265                                                 B,                     \
4266                                                 kInput32bits##input_d, \
4267                                                 kInput8bits##input_n,  \
4268                                                 kInput8bits##input_m,  \
4269                                                 kInputSIndices,        \
4270                                                 4);                    \
4271   }
4272 
4273 #define CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4274                                         vdform,   \
4275                                         vnform,   \
4276                                         vmform,   \
4277                                         input_d,  \
4278                                         input_n,  \
4279                                         input_m,  \
4280                                         indices)  \
4281   {                                               \
4282     CALL_TEST_NEON_HELPER_ByElement(mnemonic,     \
4283                                     vdform,       \
4284                                     vnform,       \
4285                                     vmform,       \
4286                                     input_d,      \
4287                                     input_n,      \
4288                                     input_m,      \
4289                                     indices);     \
4290   }
4291 
4292 #define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m) \
4293   TEST(mnemonic##_4H_4H_H) {                                            \
4294     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
4295                                     4H,                                 \
4296                                     4H,                                 \
4297                                     H,                                  \
4298                                     kInput16bits##input_d,              \
4299                                     kInput16bits##input_n,              \
4300                                     kInput16bits##input_m,              \
4301                                     kInputHIndices);                    \
4302   }                                                                     \
4303   TEST(mnemonic##_8H_8H_H) {                                            \
4304     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
4305                                     8H,                                 \
4306                                     8H,                                 \
4307                                     H,                                  \
4308                                     kInput16bits##input_d,              \
4309                                     kInput16bits##input_n,              \
4310                                     kInput16bits##input_m,              \
4311                                     kInputHIndices);                    \
4312   }                                                                     \
4313   TEST(mnemonic##_2S_2S_S) {                                            \
4314     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
4315                                     2S,                                 \
4316                                     2S,                                 \
4317                                     S,                                  \
4318                                     kInput32bits##input_d,              \
4319                                     kInput32bits##input_n,              \
4320                                     kInput32bits##input_m,              \
4321                                     kInputSIndices);                    \
4322   }                                                                     \
4323   TEST(mnemonic##_4S_4S_S) {                                            \
4324     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
4325                                     4S,                                 \
4326                                     4S,                                 \
4327                                     S,                                  \
4328                                     kInput32bits##input_d,              \
4329                                     kInput32bits##input_n,              \
4330                                     kInput32bits##input_m,              \
4331                                     kInputSIndices);                    \
4332   }
4333 
4334 #define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic, input_d, input_n, input_m) \
4335   TEST(mnemonic##_H_H_H) {                                                     \
4336     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                  \
4337                                     H,                                         \
4338                                     H,                                         \
4339                                     H,                                         \
4340                                     kInput16bits##input_d,                     \
4341                                     kInput16bits##input_n,                     \
4342                                     kInput16bits##input_m,                     \
4343                                     kInputHIndices);                           \
4344   }                                                                            \
4345   TEST(mnemonic##_S_S_S) {                                                     \
4346     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                  \
4347                                     S,                                         \
4348                                     S,                                         \
4349                                     S,                                         \
4350                                     kInput32bits##input_d,                     \
4351                                     kInput32bits##input_n,                     \
4352                                     kInput32bits##input_m,                     \
4353                                     kInputSIndices);                           \
4354   }
4355 
4356 #define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m) \
4357   TEST(mnemonic##_4H_4H_H) {                                               \
4358     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
4359                                     4H,                                    \
4360                                     4H,                                    \
4361                                     H,                                     \
4362                                     kInputFloat16##input_d,                \
4363                                     kInputFloat16##input_n,                \
4364                                     kInputFloat16##input_m,                \
4365                                     kInputHIndices);                       \
4366   }                                                                        \
4367   TEST(mnemonic##_8H_8H_H) {                                               \
4368     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
4369                                     8H,                                    \
4370                                     8H,                                    \
4371                                     H,                                     \
4372                                     kInputFloat16##input_d,                \
4373                                     kInputFloat16##input_n,                \
4374                                     kInputFloat16##input_m,                \
4375                                     kInputHIndices);                       \
4376   }                                                                        \
4377   TEST(mnemonic##_2S_2S_S) {                                               \
4378     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
4379                                     2S,                                    \
4380                                     2S,                                    \
4381                                     S,                                     \
4382                                     kInputFloat##input_d,                  \
4383                                     kInputFloat##input_n,                  \
4384                                     kInputFloat##input_m,                  \
4385                                     kInputSIndices);                       \
4386   }                                                                        \
4387   TEST(mnemonic##_4S_4S_S) {                                               \
4388     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
4389                                     4S,                                    \
4390                                     4S,                                    \
4391                                     S,                                     \
4392                                     kInputFloat##input_d,                  \
4393                                     kInputFloat##input_n,                  \
4394                                     kInputFloat##input_m,                  \
4395                                     kInputSIndices);                       \
4396   }                                                                        \
4397   TEST(mnemonic##_2D_2D_D) {                                               \
4398     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
4399                                     2D,                                    \
4400                                     2D,                                    \
4401                                     D,                                     \
4402                                     kInputDouble##input_d,                 \
4403                                     kInputDouble##input_n,                 \
4404                                     kInputDouble##input_m,                 \
4405                                     kInputDIndices);                       \
4406   }
4407 
4408 #define DEFINE_TEST_NEON_FHM_BYELEMENT(mnemonic, input_d, input_n, input_m) \
4409   TEST(mnemonic##_2S_2H_H) {                                                \
4410     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
4411                                     2S,                                     \
4412                                     2H,                                     \
4413                                     H,                                      \
4414                                     kInputFloatAccDestination,              \
4415                                     kInputFloat16##input_n,                 \
4416                                     kInputFloat16##input_m,                 \
4417                                     kInputHIndices);                        \
4418   }                                                                         \
4419   TEST(mnemonic##_4S_4H_H) {                                                \
4420     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
4421                                     4S,                                     \
4422                                     4H,                                     \
4423                                     H,                                      \
4424                                     kInputFloatAccDestination,              \
4425                                     kInputFloat16##input_n,                 \
4426                                     kInputFloat16##input_m,                 \
4427                                     kInputHIndices);                        \
4428   }
4429 
4430 #define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m) \
4431   TEST(mnemonic##_H_H_H) {                                                  \
4432     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
4433                                     H,                                      \
4434                                     H,                                      \
4435                                     H,                                      \
4436                                     kInputFloat16##inp_d,                   \
4437                                     kInputFloat16##inp_n,                   \
4438                                     kInputFloat16##inp_m,                   \
4439                                     kInputHIndices);                        \
4440   }                                                                         \
4441   TEST(mnemonic##_S_S_S) {                                                  \
4442     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
4443                                     S,                                      \
4444                                     S,                                      \
4445                                     S,                                      \
4446                                     kInputFloat##inp_d,                     \
4447                                     kInputFloat##inp_n,                     \
4448                                     kInputFloat##inp_m,                     \
4449                                     kInputSIndices);                        \
4450   }                                                                         \
4451   TEST(mnemonic##_D_D_D) {                                                  \
4452     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
4453                                     D,                                      \
4454                                     D,                                      \
4455                                     D,                                      \
4456                                     kInputDouble##inp_d,                    \
4457                                     kInputDouble##inp_n,                    \
4458                                     kInputDouble##inp_m,                    \
4459                                     kInputDIndices);                        \
4460   }
4461 
4462 
4463 #define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \
4464   TEST(mnemonic##_4S_4H_H) {                                                 \
4465     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                \
4466                                     4S,                                      \
4467                                     4H,                                      \
4468                                     H,                                       \
4469                                     kInput32bits##input_d,                   \
4470                                     kInput16bits##input_n,                   \
4471                                     kInput16bits##input_m,                   \
4472                                     kInputHIndices);                         \
4473   }                                                                          \
4474   TEST(mnemonic##2_4S_8H_H) {                                                \
4475     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2,                             \
4476                                     4S,                                      \
4477                                     8H,                                      \
4478                                     H,                                       \
4479                                     kInput32bits##input_d,                   \
4480                                     kInput16bits##input_n,                   \
4481                                     kInput16bits##input_m,                   \
4482                                     kInputHIndices);                         \
4483   }                                                                          \
4484   TEST(mnemonic##_2D_2S_S) {                                                 \
4485     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                \
4486                                     2D,                                      \
4487                                     2S,                                      \
4488                                     S,                                       \
4489                                     kInput64bits##input_d,                   \
4490                                     kInput32bits##input_n,                   \
4491                                     kInput32bits##input_m,                   \
4492                                     kInputSIndices);                         \
4493   }                                                                          \
4494   TEST(mnemonic##2_2D_4S_S) {                                                \
4495     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2,                             \
4496                                     2D,                                      \
4497                                     4S,                                      \
4498                                     S,                                       \
4499                                     kInput64bits##input_d,                   \
4500                                     kInput32bits##input_n,                   \
4501                                     kInput32bits##input_m,                   \
4502                                     kInputSIndices);                         \
4503   }
4504 
4505 #define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(mnemonic,   \
4506                                                input_d,    \
4507                                                input_n,    \
4508                                                input_m)    \
4509   TEST(mnemonic##_S_H_H) {                                 \
4510     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,              \
4511                                     S,                     \
4512                                     H,                     \
4513                                     H,                     \
4514                                     kInput32bits##input_d, \
4515                                     kInput16bits##input_n, \
4516                                     kInput16bits##input_m, \
4517                                     kInputHIndices);       \
4518   }                                                        \
4519   TEST(mnemonic##_D_S_S) {                                 \
4520     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,              \
4521                                     D,                     \
4522                                     S,                     \
4523                                     S,                     \
4524                                     kInput64bits##input_d, \
4525                                     kInput32bits##input_n, \
4526                                     kInput32bits##input_m, \
4527                                     kInputSIndices);       \
4528   }
4529 
4530 
4531 #define CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                 \
4532                                       variant,                  \
4533                                       input_d,                  \
4534                                       input_imm1,               \
4535                                       input_n,                  \
4536                                       input_imm2)               \
4537   {                                                             \
4538     CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic, \
4539                                      mnemonic,                  \
4540                                      variant,                   \
4541                                      variant,                   \
4542                                      input_d,                   \
4543                                      input_imm1,                \
4544                                      input_n,                   \
4545                                      input_imm2);               \
4546   }
4547 
4548 #define DEFINE_TEST_NEON_2OP2IMM(mnemonic,                      \
4549                                  input_d,                       \
4550                                  input_imm1,                    \
4551                                  input_n,                       \
4552                                  input_imm2)                    \
4553   TEST(mnemonic##_B) {                                          \
4554     CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
4555                                   16B,                          \
4556                                   kInput8bits##input_d,         \
4557                                   kInput8bitsImm##input_imm1,   \
4558                                   kInput8bits##input_n,         \
4559                                   kInput8bitsImm##input_imm2);  \
4560   }                                                             \
4561   TEST(mnemonic##_H) {                                          \
4562     CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
4563                                   8H,                           \
4564                                   kInput16bits##input_d,        \
4565                                   kInput16bitsImm##input_imm1,  \
4566                                   kInput16bits##input_n,        \
4567                                   kInput16bitsImm##input_imm2); \
4568   }                                                             \
4569   TEST(mnemonic##_S) {                                          \
4570     CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
4571                                   4S,                           \
4572                                   kInput32bits##input_d,        \
4573                                   kInput32bitsImm##input_imm1,  \
4574                                   kInput32bits##input_n,        \
4575                                   kInput32bitsImm##input_imm2); \
4576   }                                                             \
4577   TEST(mnemonic##_D) {                                          \
4578     CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
4579                                   2D,                           \
4580                                   kInput64bits##input_d,        \
4581                                   kInput64bitsImm##input_imm1,  \
4582                                   kInput64bits##input_n,        \
4583                                   kInput64bitsImm##input_imm2); \
4584   }
4585 
4586 
4587 // Advanced SIMD copy.
4588 DEFINE_TEST_NEON_2OP2IMM(
4589     ins, Basic, LaneCountFromZero, Basic, LaneCountFromZero)
4590 DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero)
4591 
4592 
4593 // Advanced SIMD scalar copy.
4594 DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero)
4595 
4596 
4597 // Advanced SIMD three same.
4598 DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic)
4599 DEFINE_TEST_NEON_3SAME(sqadd, Basic)
4600 DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic)
4601 DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic)
4602 DEFINE_TEST_NEON_3SAME(sqsub, Basic)
4603 DEFINE_TEST_NEON_3SAME(cmgt, Basic)
4604 DEFINE_TEST_NEON_3SAME(cmge, Basic)
4605 DEFINE_TEST_NEON_3SAME(sshl, Basic)
4606 DEFINE_TEST_NEON_3SAME(sqshl, Basic)
4607 DEFINE_TEST_NEON_3SAME(srshl, Basic)
4608 DEFINE_TEST_NEON_3SAME(sqrshl, Basic)
4609 DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic)
4610 DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic)
4611 DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic)
4612 DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic)
4613 DEFINE_TEST_NEON_3SAME(add, Basic)
4614 DEFINE_TEST_NEON_3SAME(cmtst, Basic)
4615 DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic)
4616 DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic)
4617 DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic)
4618 DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic)
4619 DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic)
4620 DEFINE_TEST_NEON_3SAME(addp, Basic)
4621 DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic)
4622 DEFINE_TEST_NEON_3SAME_FP(fmla, Basic)
4623 DEFINE_TEST_NEON_3SAME_FP(fadd, Basic)
4624 DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic)
4625 DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic)
4626 DEFINE_TEST_NEON_3SAME_FP(fmax, Basic)
4627 DEFINE_TEST_NEON_3SAME_FP(frecps, Basic)
4628 DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic)
4629 DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic)
4630 DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic)
4631 DEFINE_TEST_NEON_3SAME_FP(fmls, Basic)
4632 DEFINE_TEST_NEON_3SAME_FP(fsub, Basic)
4633 DEFINE_TEST_NEON_3SAME_FP(fmin, Basic)
4634 DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic)
4635 DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic)
4636 DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic)
4637 DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic)
4638 DEFINE_TEST_NEON_3SAME(uqadd, Basic)
4639 DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic)
4640 DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic)
4641 DEFINE_TEST_NEON_3SAME(uqsub, Basic)
4642 DEFINE_TEST_NEON_3SAME(cmhi, Basic)
4643 DEFINE_TEST_NEON_3SAME(cmhs, Basic)
4644 DEFINE_TEST_NEON_3SAME(ushl, Basic)
4645 DEFINE_TEST_NEON_3SAME(uqshl, Basic)
4646 DEFINE_TEST_NEON_3SAME(urshl, Basic)
4647 DEFINE_TEST_NEON_3SAME(uqrshl, Basic)
4648 DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic)
4649 DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic)
4650 DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic)
4651 DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic)
4652 DEFINE_TEST_NEON_3SAME(sub, Basic)
4653 DEFINE_TEST_NEON_3SAME(cmeq, Basic)
4654 DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic)
4655 DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic)
4656 DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic)
4657 DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic)
4658 DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic)
4659 DEFINE_TEST_NEON_3SAME_HS(sqrdmlah, Basic)
4660 DEFINE_TEST_NEON_3SAME_HS(sqrdmlsh, Basic)
4661 DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(udot, Basic)
4662 DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(sdot, Basic)
4663 DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic)
4664 DEFINE_TEST_NEON_3SAME_FP(faddp, Basic)
4665 DEFINE_TEST_NEON_3SAME_FP(fmul, Basic)
4666 DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic)
4667 DEFINE_TEST_NEON_3SAME_FP(facge, Basic)
4668 DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic)
4669 DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic)
4670 DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic)
4671 DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic)
4672 DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic)
4673 DEFINE_TEST_NEON_3SAME_FP(fabd, Basic)
4674 DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic)
4675 DEFINE_TEST_NEON_3SAME_FP(facgt, Basic)
4676 DEFINE_TEST_NEON_3SAME_FP(fminp, Basic)
4677 DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic)
4678 DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic)
4679 
4680 
4681 // Advanced SIMD scalar three same.
4682 DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic)
4683 DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic)
4684 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic)
4685 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic)
4686 DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic)
4687 DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic)
4688 DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic)
4689 DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic)
4690 DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic)
4691 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic)
4692 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic)
4693 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic)
4694 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic)
4695 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic)
4696 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic)
4697 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic)
4698 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic)
4699 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic)
4700 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic)
4701 DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic)
4702 DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic)
4703 DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic)
4704 DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic)
4705 DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic)
4706 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic)
4707 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic)
4708 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmlah, Basic)
4709 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmlsh, Basic)
4710 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic)
4711 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic)
4712 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic)
4713 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic)
4714 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic)
4715 
4716 
4717 // Advanced SIMD FHM instructions (FMLAL, FMLSL).
4718 // These are oddballs: they are encoded under the 3SAME group but behave
4719 // quite differently.
4720 DEFINE_TEST_NEON_FHM(fmlal, Basic, Basic, Basic)
4721 DEFINE_TEST_NEON_FHM(fmlal2, Basic, Basic, Basic)
4722 DEFINE_TEST_NEON_FHM(fmlsl, Basic, Basic, Basic)
4723 DEFINE_TEST_NEON_FHM(fmlsl2, Basic, Basic, Basic)
4724 
4725 
4726 // Advanced SIMD three different.
4727 DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic)
4728 DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic)
4729 DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic)
4730 DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic)
4731 DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic)
4732 DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic)
4733 DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic)
4734 DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic)
4735 DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic)
4736 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic)
4737 DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic)
4738 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic)
4739 DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic)
4740 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic)
4741 DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic)
4742 DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic)
4743 DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic)
4744 DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic)
4745 DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic)
4746 DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic)
4747 DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic)
4748 DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic)
4749 DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic)
4750 DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic)
4751 DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic)
4752 DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic)
4753 
4754 
4755 // Advanced SIMD scalar three different.
4756 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic)
4757 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic)
4758 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic)
4759 
4760 
4761 // Advanced SIMD scalar pairwise.
TEST(addp_SCALAR)4762 TEST(addp_SCALAR) {
4763   CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic);
4764 }
4765 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic)
4766 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic)
4767 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic)
4768 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic)
4769 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic)
4770 
4771 
4772 // Advanced SIMD shift by immediate.
4773 DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth)
4774 DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth)
4775 DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth)
4776 DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth)
4777 DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero)
4778 DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero)
4779 DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth)
4780 DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth)
4781 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth)
4782 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth)
4783 DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero)
4784 DEFINE_TEST_NEON_2OPIMM_HSD(scvtf,
4785                             FixedPointConversions,
4786                             TypeWidthFromZeroToWidth)
4787 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
4788 DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth)
4789 DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth)
4790 DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth)
4791 DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth)
4792 DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth)
4793 DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero)
4794 DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero)
4795 DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero)
4796 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth)
4797 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth)
4798 DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth)
4799 DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth)
4800 DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero)
4801 DEFINE_TEST_NEON_2OPIMM_HSD(ucvtf,
4802                             FixedPointConversions,
4803                             TypeWidthFromZeroToWidth)
4804 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
4805 
4806 
4807 // Advanced SIMD scalar shift by immediate..
4808 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth)
4809 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth)
4810 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth)
4811 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth)
4812 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero)
4813 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero)
4814 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth)
4815 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth)
4816 DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(scvtf,
4817                                    FixedPointConversions,
4818                                    TypeWidthFromZeroToWidth)
4819 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
4820 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth)
4821 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth)
4822 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth)
4823 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth)
4824 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth)
4825 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero)
4826 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero)
4827 DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero)
4828 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth)
4829 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth)
4830 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth)
4831 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth)
4832 DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(ucvtf,
4833                                    FixedPointConversions,
4834                                    TypeWidthFromZeroToWidth)
4835 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
4836 
4837 
4838 // Advanced SIMD two-register miscellaneous.
4839 DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic)
4840 DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic)
4841 DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic)
4842 DEFINE_TEST_NEON_2SAME(suqadd, Basic)
4843 DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic)
4844 DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic)
4845 DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic)
4846 DEFINE_TEST_NEON_2SAME(sqabs, Basic)
4847 DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero)
4848 DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero)
4849 DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero)
4850 DEFINE_TEST_NEON_2SAME(abs, Basic)
4851 DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic)
4852 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic)
4853 DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions)
4854 DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions)
4855 DEFINE_TEST_NEON_2SAME_FP_FP16(frintn, Conversions)
4856 DEFINE_TEST_NEON_2SAME_FP_FP16(frintm, Conversions)
4857 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtns, Conversions)
4858 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtms, Conversions)
4859 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtas, Conversions)
4860 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
4861 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero)
4862 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero)
4863 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero)
4864 DEFINE_TEST_NEON_2SAME_FP_FP16(fabs, Basic)
4865 DEFINE_TEST_NEON_2SAME_FP_FP16(frintp, Conversions)
4866 DEFINE_TEST_NEON_2SAME_FP_FP16(frintz, Conversions)
4867 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtps, Conversions)
4868 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
4869 DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic)
4870 DEFINE_TEST_NEON_2SAME_FP_FP16(frecpe, Basic)
4871 DEFINE_TEST_NEON_2SAME_BH(rev32, Basic)
4872 DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic)
4873 DEFINE_TEST_NEON_2SAME(usqadd, Basic)
4874 DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic)
4875 DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic)
4876 DEFINE_TEST_NEON_2SAME(sqneg, Basic)
4877 DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero)
4878 DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero)
4879 DEFINE_TEST_NEON_2SAME(neg, Basic)
4880 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic)
4881 DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL)
4882 DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic)
4883 DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions)
4884 DEFINE_TEST_NEON_2SAME_FP(frint32x, Conversions)
4885 DEFINE_TEST_NEON_2SAME_FP(frint64x, Conversions)
4886 DEFINE_TEST_NEON_2SAME_FP(frint32z, Conversions)
4887 DEFINE_TEST_NEON_2SAME_FP(frint64z, Conversions)
4888 DEFINE_TEST_NEON_2SAME_FP_FP16(frinta, Conversions)
4889 DEFINE_TEST_NEON_2SAME_FP_FP16(frintx, Conversions)
4890 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtnu, Conversions)
4891 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtmu, Conversions)
4892 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtau, Conversions)
4893 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
4894 DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic)
4895 DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic)
4896 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero)
4897 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero)
4898 DEFINE_TEST_NEON_2SAME_FP_FP16(fneg, Basic)
4899 DEFINE_TEST_NEON_2SAME_FP_FP16(frinti, Conversions)
4900 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtpu, Conversions)
4901 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
4902 DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic)
4903 DEFINE_TEST_NEON_2SAME_FP_FP16(frsqrte, Basic)
4904 DEFINE_TEST_NEON_2SAME_FP_FP16(fsqrt, Basic)
4905 
4906 
4907 // Advanced SIMD scalar two-register miscellaneous.
4908 DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic)
4909 DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic)
4910 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero)
4911 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero)
4912 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero)
4913 DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic)
4914 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic)
4915 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtns, Conversions)
4916 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtms, Conversions)
4917 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtas, Conversions)
4918 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
4919 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmgt, Basic, Zero)
4920 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmeq, Basic, Zero)
4921 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmlt, Basic, Zero)
4922 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtps, Conversions)
4923 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
4924 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frecpe, Basic)
4925 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frecpx, Basic)
4926 DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic)
4927 DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic)
4928 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero)
4929 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero)
4930 DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic)
4931 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic)
4932 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic)
TEST(fcvtxn_SCALAR)4933 TEST(fcvtxn_SCALAR) {
4934   CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions);
4935 }
4936 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtnu, Conversions)
4937 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtmu, Conversions)
4938 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtau, Conversions)
4939 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
4940 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmge, Basic, Zero)
4941 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmle, Basic, Zero)
4942 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtpu, Conversions)
4943 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
4944 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frsqrte, Basic)
4945 
4946 
4947 // Advanced SIMD across lanes.
4948 DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic)
4949 DEFINE_TEST_NEON_ACROSS(smaxv, Basic)
4950 DEFINE_TEST_NEON_ACROSS(sminv, Basic)
4951 DEFINE_TEST_NEON_ACROSS(addv, Basic)
4952 DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic)
4953 DEFINE_TEST_NEON_ACROSS(umaxv, Basic)
4954 DEFINE_TEST_NEON_ACROSS(uminv, Basic)
4955 DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic)
4956 DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic)
4957 DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic)
4958 DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic)
4959 
4960 
4961 // Advanced SIMD permute.
4962 DEFINE_TEST_NEON_3SAME(uzp1, Basic)
4963 DEFINE_TEST_NEON_3SAME(trn1, Basic)
4964 DEFINE_TEST_NEON_3SAME(zip1, Basic)
4965 DEFINE_TEST_NEON_3SAME(uzp2, Basic)
4966 DEFINE_TEST_NEON_3SAME(trn2, Basic)
4967 DEFINE_TEST_NEON_3SAME(zip2, Basic)
4968 
4969 
4970 // Advanced SIMD vector x indexed element.
4971 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic)
4972 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic)
4973 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic)
4974 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic)
4975 DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic)
4976 DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic)
4977 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic)
4978 DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic)
4979 DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic)
4980 DEFINE_TEST_NEON_BYELEMENT(sqrdmlah, Basic, Basic, Basic)
4981 DEFINE_TEST_NEON_BYELEMENT(sqrdmlsh, Basic, Basic, Basic)
4982 DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(udot, Basic, Basic, Basic)
4983 DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(sdot, Basic, Basic, Basic)
4984 DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic)
4985 DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic)
4986 DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic)
4987 DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic)
4988 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic)
4989 DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic)
4990 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic)
4991 DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic)
4992 DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic)
4993 
4994 
4995 // Advanced SIMD scalar x indexed element.
4996 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic)
4997 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic)
4998 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic)
4999 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic)
5000 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic)
5001 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmlah, Basic, Basic, Basic)
5002 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmlsh, Basic, Basic, Basic)
5003 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic)
5004 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic)
5005 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic)
5006 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic)
5007 
5008 
5009 DEFINE_TEST_NEON_FHM_BYELEMENT(fmlal, Basic, Basic, Basic)
5010 DEFINE_TEST_NEON_FHM_BYELEMENT(fmlal2, Basic, Basic, Basic)
5011 DEFINE_TEST_NEON_FHM_BYELEMENT(fmlsl, Basic, Basic, Basic)
5012 DEFINE_TEST_NEON_FHM_BYELEMENT(fmlsl2, Basic, Basic, Basic)
5013 
5014 
5015 #undef __
5016 #define __ masm->
5017 
5018 #if defined(VIXL_INCLUDE_SIMULATOR_AARCH64) &&                 \
5019     defined(VIXL_HAS_ABI_SUPPORT) && __cplusplus >= 201103L && \
5020     (defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1))
5021 
5022 // Generate a function that stores zero to a hard-coded address.
GenerateStoreZero(MacroAssembler* masm, int32_t* target)5023 Instruction* GenerateStoreZero(MacroAssembler* masm, int32_t* target) {
5024   masm->Reset();
5025 
5026   UseScratchRegisterScope temps(masm);
5027   Register temp = temps.AcquireX();
5028   __ Mov(temp, reinterpret_cast<intptr_t>(target));
5029   __ Str(wzr, MemOperand(temp));
5030   __ Ret();
5031 
5032   masm->FinalizeCode();
5033   return masm->GetBuffer()->GetStartAddress<Instruction*>();
5034 }
5035 
5036 
5037 // Generate a function that stores the `int32_t` argument to a hard-coded
5038 // address.
5039 // In this example and the other below, we use the `abi` object to retrieve
5040 // argument and return locations even though we could easily hard code them.
5041 // This mirrors how more generic code (e.g. templated) user would use these
5042 // mechanisms.
GenerateStoreInput(MacroAssembler* masm, int32_t* target)5043 Instruction* GenerateStoreInput(MacroAssembler* masm, int32_t* target) {
5044   masm->Reset();
5045 
5046   ABI abi;
5047   Register input =
5048       Register(abi.GetNextParameterGenericOperand<int32_t>().GetCPURegister());
5049 
5050   UseScratchRegisterScope temps(masm);
5051   Register temp = temps.AcquireX();
5052   __ Mov(temp, reinterpret_cast<intptr_t>(target));
5053   __ Str(input, MemOperand(temp));
5054   __ Ret();
5055 
5056   masm->FinalizeCode();
5057   return masm->GetBuffer()->GetStartAddress<Instruction*>();
5058 }
5059 
5060 
5061 // A minimal implementation of a `pow` function.
GeneratePow(MacroAssembler* masm, unsigned pow)5062 Instruction* GeneratePow(MacroAssembler* masm, unsigned pow) {
5063   masm->Reset();
5064 
5065   ABI abi;
5066   Register input =
5067       Register(abi.GetNextParameterGenericOperand<int64_t>().GetCPURegister());
5068   Register result =
5069       Register(abi.GetReturnGenericOperand<int64_t>().GetCPURegister());
5070   UseScratchRegisterScope temps(masm);
5071   Register temp = temps.AcquireX();
5072 
5073   __ Mov(temp, 1);
5074   for (unsigned i = 0; i < pow; i++) {
5075     __ Mul(temp, temp, input);
5076   }
5077   __ Mov(result, temp);
5078   __ Ret();
5079 
5080   masm->FinalizeCode();
5081   return masm->GetBuffer()->GetStartAddress<Instruction*>();
5082 }
5083 
5084 
GenerateSum(MacroAssembler* masm)5085 Instruction* GenerateSum(MacroAssembler* masm) {
5086   masm->Reset();
5087 
5088   ABI abi;
5089   VRegister input_1 =
5090       VRegister(abi.GetNextParameterGenericOperand<float>().GetCPURegister());
5091   Register input_2 =
5092       Register(abi.GetNextParameterGenericOperand<int64_t>().GetCPURegister());
5093   VRegister input_3 =
5094       VRegister(abi.GetNextParameterGenericOperand<double>().GetCPURegister());
5095   VRegister result =
5096       VRegister(abi.GetReturnGenericOperand<double>().GetCPURegister());
5097 
5098   UseScratchRegisterScope temps(masm);
5099   VRegister temp = temps.AcquireD();
5100 
5101   __ Fcvt(input_1.D(), input_1);
5102   __ Scvtf(temp, input_2);
5103   __ Fadd(temp, temp, input_1.D());
5104   __ Fadd(result, temp, input_3);
5105   __ Ret();
5106 
5107   masm->FinalizeCode();
5108   return masm->GetBuffer()->GetStartAddress<Instruction*>();
5109 }
5110 
5111 
TEST(RunFrom)5112 TEST(RunFrom) {
5113   SETUP_WITH_FEATURES(CPUFeatures::kFP);
5114 
5115   // Run a function returning `void` and taking no argument.
5116   int32_t value = 0xbad;
5117   simulator.RunFrom(GenerateStoreZero(&masm, &value));
5118   VIXL_CHECK(value == 0);
5119 
5120   // Run a function returning `void` and taking one argument.
5121   int32_t argument = 0xf00d;
5122   simulator.RunFrom<void, int32_t>(GenerateStoreInput(&masm, &value), argument);
5123   VIXL_CHECK(value == 0xf00d);
5124 
5125   // Run a function taking one argument and returning a value.
5126   int64_t res_int64_t;
5127   res_int64_t =
5128       simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 0), 0xbad);
5129   VIXL_CHECK(res_int64_t == 1);
5130   res_int64_t = simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 1), 123);
5131   VIXL_CHECK(res_int64_t == 123);
5132   res_int64_t = simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 10), 2);
5133   VIXL_CHECK(res_int64_t == 1024);
5134 
5135   // Run a function taking multiple arguments in registers.
5136   double res_double =
5137       simulator.RunFrom<double, float, int64_t, double>(GenerateSum(&masm),
5138                                                         1.0,
5139                                                         2,
5140                                                         3.0);
5141   VIXL_CHECK(res_double == 6.0);
5142 }
5143 #endif
5144 
5145 
5146 }  // namespace aarch64
5147 }  // namespace vixl
5148