1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #include <cfloat>
28 #include <cstdio>
29 #include <sstream>
30
31 #include "test-runner.h"
32 #include "test-utils.h"
33
34 #include "aarch64/cpu-features-auditor-aarch64.h"
35 #include "aarch64/macro-assembler-aarch64.h"
36 #include "aarch64/simulator-aarch64.h"
37 #include "aarch64/test-simulator-inputs-aarch64.h"
38 #include "aarch64/test-simulator-traces-aarch64.h"
39 #include "aarch64/test-utils-aarch64.h"
40
41 namespace vixl {
42 namespace aarch64 {
43
44 // ==== Simulator Tests ====
45 //
46 // These simulator tests check instruction behaviour against a trace taken from
47 // real AArch64 hardware. The same test code is used to generate the trace; the
48 // results are printed to stdout when the test is run with
49 // --generate_test_trace.
50 //
51 // The input lists and expected results are stored in test/traces. The expected
52 // results can be regenerated using tools/generate_simulator_traces.py. Adding a
53 // test for a new instruction is described at the top of
54 // test-simulator-traces-aarch64.h.
55
56 #define __ masm.
57 #define TEST(name) TEST_(AARCH64_SIM_##name)
58
59 #define SETUP() SETUP_WITH_FEATURES(CPUFeatures())
60
61 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
62
63 #define SETUP_WITH_FEATURES(...) \
64 MacroAssembler masm; \
65 masm.SetCPUFeatures(CPUFeatures(__VA_ARGS__)); \
66 Decoder decoder; \
67 Simulator simulator(&decoder); \
68 simulator.SetColouredTrace(Test::coloured_trace());
69
70 #define START() \
71 masm.Reset(); \
72 simulator.ResetState(); \
73 __ PushCalleeSavedRegisters(); \
74 /* The infrastructure code hasn't been covered at the moment, e.g. */ \
75 /* prologue/epilogue. Suppress tagging mis-match exception before */ \
76 /* this point. */ \
77 if (masm.GetCPUFeatures()->Has(CPUFeatures::kMTE)) { \
78 __ Hlt(DebugHltOpcode::kMTEActive); \
79 } \
80 if (Test::trace_reg()) { \
81 __ Trace(LOG_STATE, TRACE_ENABLE); \
82 } \
83 if (Test::trace_write()) { \
84 __ Trace(LOG_WRITE, TRACE_ENABLE); \
85 } \
86 if (Test::trace_sim()) { \
87 __ Trace(LOG_DISASM, TRACE_ENABLE); \
88 }
89
90 #define END() \
91 if (masm.GetCPUFeatures()->Has(CPUFeatures::kMTE)) { \
92 __ Hlt(DebugHltOpcode::kMTEInactive); \
93 } \
94 __ Trace(LOG_ALL, TRACE_DISABLE); \
95 __ PopCalleeSavedRegisters(); \
96 __ Ret(); \
97 masm.FinalizeCode()
98
99 #define TRY_RUN(skipped) \
100 DISASSEMBLE(); \
101 simulator.RunFrom(masm.GetBuffer()->GetStartAddress<Instruction*>()); \
102 /* The simulator can run every test. */ \
103 *skipped = false
104
105
106 #else // VIXL_INCLUDE_SIMULATOR_AARCH64
107
108 #define SETUP_WITH_FEATURES(...) \
109 MacroAssembler masm; \
110 masm.SetCPUFeatures(CPUFeatures(__VA_ARGS__)); \
111 CPU::SetUp()
112
113 #define START() \
114 masm.Reset(); \
115 __ PushCalleeSavedRegisters()
116
117 #define END() \
118 __ PopCalleeSavedRegisters(); \
119 __ Ret(); \
120 masm.FinalizeCode()
121
122 #define TRY_RUN(skipped) \
123 DISASSEMBLE(); \
124 /* If the test uses features that the current CPU doesn't support, don't */ \
125 /* attempt to run it natively. */ \
126 { \
127 Decoder decoder; \
128 /* TODO: Once available, use runtime feature detection. The use of */ \
129 /* AArch64LegacyBaseline is a stopgap. */ \
130 const CPUFeatures& this_machine = CPUFeatures::AArch64LegacyBaseline(); \
131 CPUFeaturesAuditor auditor(&decoder, this_machine); \
132 CodeBuffer* buffer = masm.GetBuffer(); \
133 decoder.Decode(buffer->GetStartAddress<Instruction*>(), \
134 buffer->GetEndAddress<Instruction*>()); \
135 const CPUFeatures& requirements = auditor.GetSeenFeatures(); \
136 if (this_machine.Has(requirements)) { \
137 masm.GetBuffer()->SetExecutable(); \
138 ExecuteMemory(buffer->GetStartAddress<byte*>(), \
139 masm.GetSizeOfCodeGenerated()); \
140 masm.GetBuffer()->SetWritable(); \
141 *skipped = false; \
142 } else { \
143 std::stringstream os; \
144 /* Note: This message needs to match REGEXP_MISSING_FEATURES from */ \
145 /* tools/threaded_test.py. */ \
146 os << "SKIPPED: Missing features: { "; \
147 os << requirements.Without(this_machine) << " }\n"; \
148 printf("%s", os.str().c_str()); \
149 *skipped = true; \
150 } \
151 }
152
153
154 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
155
156
157 #define DISASSEMBLE() \
158 if (Test::disassemble()) { \
159 PrintDisassembler disasm(stdout); \
160 CodeBuffer* buffer = masm.GetBuffer(); \
161 Instruction* start = buffer->GetStartAddress<Instruction*>(); \
162 Instruction* end = buffer->GetEndAddress<Instruction*>(); \
163 disasm.DisassembleBuffer(start, end); \
164 }
165
166 // The maximum number of errors to report in detail for each test.
167 static const unsigned kErrorReportLimit = 8;
168
169
170 // Overloaded versions of RawbitsToDouble and RawbitsToFloat for use in the
171 // templated test functions.
rawbits_to_fp(uint32_t bits)172 static float rawbits_to_fp(uint32_t bits) { return RawbitsToFloat(bits); }
173
rawbits_to_fp(uint64_t bits)174 static double rawbits_to_fp(uint64_t bits) { return RawbitsToDouble(bits); }
175
176 // The rawbits_to_fp functions are only used for printing decimal values so we
177 // just approximate FP16 as double.
rawbits_to_fp(uint16_t bits)178 static double rawbits_to_fp(uint16_t bits) {
179 return FPToDouble(RawbitsToFloat16(bits), kIgnoreDefaultNaN);
180 }
181
182
183 // MacroAssembler member function pointers to pass to the test dispatchers.
184 typedef void (MacroAssembler::*Test1OpFPHelper_t)(const VRegister& fd,
185 const VRegister& fn);
186 typedef void (MacroAssembler::*Test2OpFPHelper_t)(const VRegister& fd,
187 const VRegister& fn,
188 const VRegister& fm);
189 typedef void (MacroAssembler::*Test3OpFPHelper_t)(const VRegister& fd,
190 const VRegister& fn,
191 const VRegister& fm,
192 const VRegister& fa);
193 typedef void (MacroAssembler::*TestFPCmpHelper_t)(const VRegister& fn,
194 const VRegister& fm);
195 typedef void (MacroAssembler::*TestFPCmpZeroHelper_t)(const VRegister& fn,
196 double value);
197 typedef void (MacroAssembler::*TestFPToIntHelper_t)(const Register& rd,
198 const VRegister& fn);
199 typedef void (MacroAssembler::*TestFPToFixedHelper_t)(const Register& rd,
200 const VRegister& fn,
201 int fbits);
202 typedef void (MacroAssembler::*TestFixedToFPHelper_t)(const VRegister& fd,
203 const Register& rn,
204 int fbits);
205 // TODO: 'Test2OpNEONHelper_t' and 'Test2OpFPHelper_t' can be
206 // consolidated into one routine.
207 typedef void (MacroAssembler::*Test1OpNEONHelper_t)(const VRegister& vd,
208 const VRegister& vn);
209 typedef void (MacroAssembler::*Test2OpNEONHelper_t)(const VRegister& vd,
210 const VRegister& vn,
211 const VRegister& vm);
212 typedef void (MacroAssembler::*TestByElementNEONHelper_t)(const VRegister& vd,
213 const VRegister& vn,
214 const VRegister& vm,
215 int vm_index);
216 typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)(
217 const VRegister& vd, int imm1, const VRegister& vn, int imm2);
218
219 // This helps using the same typename for both the function pointer
220 // and the array of immediates passed to helper routines.
221 template <typename T>
222 class Test2OpImmediateNEONHelper_t {
223 public:
224 typedef void (MacroAssembler::*mnemonic)(const VRegister& vd,
225 const VRegister& vn,
226 T imm);
227 };
228
229
230 // Maximum number of hex characters required to represent values of either
231 // templated type.
232 template <typename Ta, typename Tb>
MaxHexCharCount()233 static unsigned MaxHexCharCount() {
234 unsigned count = static_cast<unsigned>(std::max(sizeof(Ta), sizeof(Tb)));
235 return (count * 8) / 4;
236 }
237
238
239 // Standard test dispatchers.
240
241
Test1Op_Helper(Test1OpFPHelper_t helper, uintptr_t inputs, unsigned inputs_length, uintptr_t results, unsigned d_size, unsigned n_size, bool* skipped)242 static void Test1Op_Helper(Test1OpFPHelper_t helper,
243 uintptr_t inputs,
244 unsigned inputs_length,
245 uintptr_t results,
246 unsigned d_size,
247 unsigned n_size,
248 bool* skipped) {
249 VIXL_ASSERT((d_size == kDRegSize) || (d_size == kSRegSize) ||
250 (d_size == kHRegSize));
251 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) ||
252 (n_size == kHRegSize));
253
254 CPUFeatures features;
255 features.Combine(CPUFeatures::kFP, CPUFeatures::kFPHalf);
256 // For frint{32,64}{x,y} variants.
257 features.Combine(CPUFeatures::kFrintToFixedSizedInt);
258 SETUP_WITH_FEATURES(features);
259 START();
260
261 // Roll up the loop to keep the code size down.
262 Label loop_n;
263
264 Register out = x0;
265 Register inputs_base = x1;
266 Register length = w2;
267 Register index_n = w3;
268
269 int n_index_shift;
270 VRegister fd;
271 VRegister fn;
272 if (n_size == kDRegSize) {
273 n_index_shift = kDRegSizeInBytesLog2;
274 fn = d1;
275 } else if (n_size == kSRegSize) {
276 n_index_shift = kSRegSizeInBytesLog2;
277 fn = s1;
278 } else {
279 n_index_shift = kHRegSizeInBytesLog2;
280 fn = h1;
281 }
282
283 if (d_size == kDRegSize) {
284 fd = d0;
285 } else if (d_size == kSRegSize) {
286 fd = s0;
287 } else {
288 fd = h0;
289 }
290
291
292 __ Mov(out, results);
293 __ Mov(inputs_base, inputs);
294 __ Mov(length, inputs_length);
295
296 __ Mov(index_n, 0);
297 __ Bind(&loop_n);
298 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
299
300 {
301 SingleEmissionCheckScope guard(&masm);
302 (masm.*helper)(fd, fn);
303 }
304 __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
305
306 __ Add(index_n, index_n, 1);
307 __ Cmp(index_n, inputs_length);
308 __ B(lo, &loop_n);
309
310 END();
311 TRY_RUN(skipped);
312 }
313
314
315 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
316 // rawbits representations of doubles or floats. This ensures that exact bit
317 // comparisons can be performed.
318 template <typename Tn, typename Td>
Test1Op(const char* name, Test1OpFPHelper_t helper, const Tn inputs[], unsigned inputs_length, const Td expected[], unsigned expected_length)319 static void Test1Op(const char* name,
320 Test1OpFPHelper_t helper,
321 const Tn inputs[],
322 unsigned inputs_length,
323 const Td expected[],
324 unsigned expected_length) {
325 VIXL_ASSERT(inputs_length > 0);
326
327 const unsigned results_length = inputs_length;
328 Td* results = new Td[results_length];
329
330 const unsigned d_bits = sizeof(Td) * 8;
331 const unsigned n_bits = sizeof(Tn) * 8;
332 bool skipped;
333
334 Test1Op_Helper(helper,
335 reinterpret_cast<uintptr_t>(inputs),
336 inputs_length,
337 reinterpret_cast<uintptr_t>(results),
338 d_bits,
339 n_bits,
340 &skipped);
341
342 if (Test::generate_test_trace()) {
343 // Print the results.
344 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
345 for (unsigned d = 0; d < results_length; d++) {
346 printf(" 0x%0*" PRIx64 ",\n",
347 d_bits / 4,
348 static_cast<uint64_t>(results[d]));
349 }
350 printf("};\n");
351 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
352 } else if (!skipped) {
353 // Check the results.
354 VIXL_CHECK(expected_length == results_length);
355 unsigned error_count = 0;
356 unsigned d = 0;
357 for (unsigned n = 0; n < inputs_length; n++, d++) {
358 if (results[d] != expected[d]) {
359 if (++error_count > kErrorReportLimit) continue;
360
361 printf("%s 0x%0*" PRIx64 " (%s %g):\n",
362 name,
363 n_bits / 4,
364 static_cast<uint64_t>(inputs[n]),
365 name,
366 rawbits_to_fp(inputs[n]));
367 printf(" Expected: 0x%0*" PRIx64 " (%g)\n",
368 d_bits / 4,
369 static_cast<uint64_t>(expected[d]),
370 rawbits_to_fp(expected[d]));
371 printf(" Found: 0x%0*" PRIx64 " (%g)\n",
372 d_bits / 4,
373 static_cast<uint64_t>(results[d]),
374 rawbits_to_fp(results[d]));
375 printf("\n");
376 }
377 }
378 VIXL_ASSERT(d == expected_length);
379 if (error_count > kErrorReportLimit) {
380 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
381 }
382 VIXL_CHECK(error_count == 0);
383 }
384 delete[] results;
385 }
386
387
Test2Op_Helper(Test2OpFPHelper_t helper, uintptr_t inputs, unsigned inputs_length, uintptr_t results, unsigned reg_size, bool* skipped)388 static void Test2Op_Helper(Test2OpFPHelper_t helper,
389 uintptr_t inputs,
390 unsigned inputs_length,
391 uintptr_t results,
392 unsigned reg_size,
393 bool* skipped) {
394 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize) ||
395 (reg_size == kHRegSize));
396
397 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
398 START();
399
400 // Roll up the loop to keep the code size down.
401 Label loop_n, loop_m;
402
403 Register out = x0;
404 Register inputs_base = x1;
405 Register length = w2;
406 Register index_n = w3;
407 Register index_m = w4;
408
409 bool double_op = reg_size == kDRegSize;
410 bool float_op = reg_size == kSRegSize;
411 int index_shift;
412 if (double_op) {
413 index_shift = kDRegSizeInBytesLog2;
414 } else if (float_op) {
415 index_shift = kSRegSizeInBytesLog2;
416 } else {
417 index_shift = kHRegSizeInBytesLog2;
418 }
419
420 VRegister fd;
421 VRegister fn;
422 VRegister fm;
423
424 if (double_op) {
425 fd = d0;
426 fn = d1;
427 fm = d2;
428 } else if (float_op) {
429 fd = s0;
430 fn = s1;
431 fm = s2;
432 } else {
433 fd = h0;
434 fn = h1;
435 fm = h2;
436 }
437
438 __ Mov(out, results);
439 __ Mov(inputs_base, inputs);
440 __ Mov(length, inputs_length);
441
442 __ Mov(index_n, 0);
443 __ Bind(&loop_n);
444 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
445
446 __ Mov(index_m, 0);
447 __ Bind(&loop_m);
448 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
449
450 {
451 SingleEmissionCheckScope guard(&masm);
452 (masm.*helper)(fd, fn, fm);
453 }
454 __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
455
456 __ Add(index_m, index_m, 1);
457 __ Cmp(index_m, inputs_length);
458 __ B(lo, &loop_m);
459
460 __ Add(index_n, index_n, 1);
461 __ Cmp(index_n, inputs_length);
462 __ B(lo, &loop_n);
463
464 END();
465 TRY_RUN(skipped);
466 }
467
468
469 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
470 // rawbits representations of doubles or floats. This ensures that exact bit
471 // comparisons can be performed.
472 template <typename T>
Test2Op(const char* name, Test2OpFPHelper_t helper, const T inputs[], unsigned inputs_length, const T expected[], unsigned expected_length)473 static void Test2Op(const char* name,
474 Test2OpFPHelper_t helper,
475 const T inputs[],
476 unsigned inputs_length,
477 const T expected[],
478 unsigned expected_length) {
479 VIXL_ASSERT(inputs_length > 0);
480
481 const unsigned results_length = inputs_length * inputs_length;
482 T* results = new T[results_length];
483
484 const unsigned bits = sizeof(T) * 8;
485 bool skipped;
486
487 Test2Op_Helper(helper,
488 reinterpret_cast<uintptr_t>(inputs),
489 inputs_length,
490 reinterpret_cast<uintptr_t>(results),
491 bits,
492 &skipped);
493
494 if (Test::generate_test_trace()) {
495 // Print the results.
496 printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
497 for (unsigned d = 0; d < results_length; d++) {
498 printf(" 0x%0*" PRIx64 ",\n",
499 bits / 4,
500 static_cast<uint64_t>(results[d]));
501 }
502 printf("};\n");
503 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
504 } else if (!skipped) {
505 // Check the results.
506 VIXL_CHECK(expected_length == results_length);
507 unsigned error_count = 0;
508 unsigned d = 0;
509 for (unsigned n = 0; n < inputs_length; n++) {
510 for (unsigned m = 0; m < inputs_length; m++, d++) {
511 if (results[d] != expected[d]) {
512 if (++error_count > kErrorReportLimit) continue;
513
514 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
515 name,
516 bits / 4,
517 static_cast<uint64_t>(inputs[n]),
518 bits / 4,
519 static_cast<uint64_t>(inputs[m]),
520 name,
521 rawbits_to_fp(inputs[n]),
522 rawbits_to_fp(inputs[m]));
523 printf(" Expected: 0x%0*" PRIx64 " (%g)\n",
524 bits / 4,
525 static_cast<uint64_t>(expected[d]),
526 rawbits_to_fp(expected[d]));
527 printf(" Found: 0x%0*" PRIx64 " (%g)\n",
528 bits / 4,
529 static_cast<uint64_t>(results[d]),
530 rawbits_to_fp(results[d]));
531 printf("\n");
532 }
533 }
534 }
535 VIXL_ASSERT(d == expected_length);
536 if (error_count > kErrorReportLimit) {
537 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
538 }
539 VIXL_CHECK(error_count == 0);
540 }
541 delete[] results;
542 }
543
544
Test3Op_Helper(Test3OpFPHelper_t helper, uintptr_t inputs, unsigned inputs_length, uintptr_t results, unsigned reg_size, bool* skipped)545 static void Test3Op_Helper(Test3OpFPHelper_t helper,
546 uintptr_t inputs,
547 unsigned inputs_length,
548 uintptr_t results,
549 unsigned reg_size,
550 bool* skipped) {
551 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize) ||
552 (reg_size == kHRegSize));
553
554 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
555 START();
556
557 // Roll up the loop to keep the code size down.
558 Label loop_n, loop_m, loop_a;
559
560 Register out = x0;
561 Register inputs_base = x1;
562 Register length = w2;
563 Register index_n = w3;
564 Register index_m = w4;
565 Register index_a = w5;
566
567 bool double_op = reg_size == kDRegSize;
568 bool single_op = reg_size == kSRegSize;
569 int index_shift;
570 VRegister fd(0, reg_size);
571 VRegister fn(1, reg_size);
572 VRegister fm(2, reg_size);
573 VRegister fa(3, reg_size);
574 if (double_op) {
575 index_shift = kDRegSizeInBytesLog2;
576 } else if (single_op) {
577 index_shift = kSRegSizeInBytesLog2;
578 } else {
579 index_shift = kHRegSizeInBytesLog2;
580 }
581
582 __ Mov(out, results);
583 __ Mov(inputs_base, inputs);
584 __ Mov(length, inputs_length);
585
586 __ Mov(index_n, 0);
587 __ Bind(&loop_n);
588 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
589
590 __ Mov(index_m, 0);
591 __ Bind(&loop_m);
592 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
593
594 __ Mov(index_a, 0);
595 __ Bind(&loop_a);
596 __ Ldr(fa, MemOperand(inputs_base, index_a, UXTW, index_shift));
597
598 {
599 SingleEmissionCheckScope guard(&masm);
600 (masm.*helper)(fd, fn, fm, fa);
601 }
602 __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
603
604 __ Add(index_a, index_a, 1);
605 __ Cmp(index_a, inputs_length);
606 __ B(lo, &loop_a);
607
608 __ Add(index_m, index_m, 1);
609 __ Cmp(index_m, inputs_length);
610 __ B(lo, &loop_m);
611
612 __ Add(index_n, index_n, 1);
613 __ Cmp(index_n, inputs_length);
614 __ B(lo, &loop_n);
615
616 END();
617 TRY_RUN(skipped);
618 }
619
620
621 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
622 // rawbits representations of doubles or floats. This ensures that exact bit
623 // comparisons can be performed.
624 template <typename T>
Test3Op(const char* name, Test3OpFPHelper_t helper, const T inputs[], unsigned inputs_length, const T expected[], unsigned expected_length)625 static void Test3Op(const char* name,
626 Test3OpFPHelper_t helper,
627 const T inputs[],
628 unsigned inputs_length,
629 const T expected[],
630 unsigned expected_length) {
631 VIXL_ASSERT(inputs_length > 0);
632
633 const unsigned results_length = inputs_length * inputs_length * inputs_length;
634 T* results = new T[results_length];
635
636 const unsigned bits = sizeof(T) * 8;
637 bool skipped;
638
639 Test3Op_Helper(helper,
640 reinterpret_cast<uintptr_t>(inputs),
641 inputs_length,
642 reinterpret_cast<uintptr_t>(results),
643 bits,
644 &skipped);
645
646 if (Test::generate_test_trace()) {
647 // Print the results.
648 printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
649 for (unsigned d = 0; d < results_length; d++) {
650 printf(" 0x%0*" PRIx64 ",\n",
651 bits / 4,
652 static_cast<uint64_t>(results[d]));
653 }
654 printf("};\n");
655 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
656 } else if (!skipped) {
657 // Check the results.
658 VIXL_CHECK(expected_length == results_length);
659 unsigned error_count = 0;
660 unsigned d = 0;
661 for (unsigned n = 0; n < inputs_length; n++) {
662 for (unsigned m = 0; m < inputs_length; m++) {
663 for (unsigned a = 0; a < inputs_length; a++, d++) {
664 if (results[d] != expected[d]) {
665 if (++error_count > kErrorReportLimit) continue;
666
667 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 ", 0x%0*" PRIx64
668 " (%s %g %g %g):\n",
669 name,
670 bits / 4,
671 static_cast<uint64_t>(inputs[n]),
672 bits / 4,
673 static_cast<uint64_t>(inputs[m]),
674 bits / 4,
675 static_cast<uint64_t>(inputs[a]),
676 name,
677 rawbits_to_fp(inputs[n]),
678 rawbits_to_fp(inputs[m]),
679 rawbits_to_fp(inputs[a]));
680 printf(" Expected: 0x%0*" PRIx64 " (%g)\n",
681 bits / 4,
682 static_cast<uint64_t>(expected[d]),
683 rawbits_to_fp(expected[d]));
684 printf(" Found: 0x%0*" PRIx64 " (%g)\n",
685 bits / 4,
686 static_cast<uint64_t>(results[d]),
687 rawbits_to_fp(results[d]));
688 printf("\n");
689 }
690 }
691 }
692 }
693 VIXL_ASSERT(d == expected_length);
694 if (error_count > kErrorReportLimit) {
695 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
696 }
697 VIXL_CHECK(error_count == 0);
698 }
699 delete[] results;
700 }
701
702
TestCmp_Helper(TestFPCmpHelper_t helper, uintptr_t inputs, unsigned inputs_length, uintptr_t results, unsigned reg_size, bool* skipped)703 static void TestCmp_Helper(TestFPCmpHelper_t helper,
704 uintptr_t inputs,
705 unsigned inputs_length,
706 uintptr_t results,
707 unsigned reg_size,
708 bool* skipped) {
709 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
710
711 SETUP_WITH_FEATURES(CPUFeatures::kFP);
712 START();
713
714 // Roll up the loop to keep the code size down.
715 Label loop_n, loop_m;
716
717 Register out = x0;
718 Register inputs_base = x1;
719 Register length = w2;
720 Register index_n = w3;
721 Register index_m = w4;
722 Register flags = x5;
723
724 bool double_op = reg_size == kDRegSize;
725 const int index_shift =
726 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
727
728 VRegister fn = double_op ? d1 : s1;
729 VRegister fm = double_op ? d2 : s2;
730
731 __ Mov(out, results);
732 __ Mov(inputs_base, inputs);
733 __ Mov(length, inputs_length);
734
735 __ Mov(index_n, 0);
736 __ Bind(&loop_n);
737 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
738
739 __ Mov(index_m, 0);
740 __ Bind(&loop_m);
741 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
742
743 {
744 SingleEmissionCheckScope guard(&masm);
745 (masm.*helper)(fn, fm);
746 }
747 __ Mrs(flags, NZCV);
748 __ Ubfx(flags, flags, 28, 4);
749 __ Strb(flags, MemOperand(out, 1, PostIndex));
750
751 __ Add(index_m, index_m, 1);
752 __ Cmp(index_m, inputs_length);
753 __ B(lo, &loop_m);
754
755 __ Add(index_n, index_n, 1);
756 __ Cmp(index_n, inputs_length);
757 __ B(lo, &loop_n);
758
759 END();
760 TRY_RUN(skipped);
761 }
762
763
764 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
765 // rawbits representations of doubles or floats. This ensures that exact bit
766 // comparisons can be performed.
767 template <typename T>
TestCmp(const char* name, TestFPCmpHelper_t helper, const T inputs[], unsigned inputs_length, const uint8_t expected[], unsigned expected_length)768 static void TestCmp(const char* name,
769 TestFPCmpHelper_t helper,
770 const T inputs[],
771 unsigned inputs_length,
772 const uint8_t expected[],
773 unsigned expected_length) {
774 VIXL_ASSERT(inputs_length > 0);
775
776 const unsigned results_length = inputs_length * inputs_length;
777 uint8_t* results = new uint8_t[results_length];
778
779 const unsigned bits = sizeof(T) * 8;
780 bool skipped;
781
782 TestCmp_Helper(helper,
783 reinterpret_cast<uintptr_t>(inputs),
784 inputs_length,
785 reinterpret_cast<uintptr_t>(results),
786 bits,
787 &skipped);
788
789 if (Test::generate_test_trace()) {
790 // Print the results.
791 printf("const uint8_t kExpected_%s[] = {\n", name);
792 for (unsigned d = 0; d < results_length; d++) {
793 // Each NZCV result only requires 4 bits.
794 VIXL_ASSERT((results[d] & 0xf) == results[d]);
795 printf(" 0x%" PRIx8 ",\n", results[d]);
796 }
797 printf("};\n");
798 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
799 } else if (!skipped) {
800 // Check the results.
801 VIXL_CHECK(expected_length == results_length);
802 unsigned error_count = 0;
803 unsigned d = 0;
804 for (unsigned n = 0; n < inputs_length; n++) {
805 for (unsigned m = 0; m < inputs_length; m++, d++) {
806 if (results[d] != expected[d]) {
807 if (++error_count > kErrorReportLimit) continue;
808
809 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
810 name,
811 bits / 4,
812 static_cast<uint64_t>(inputs[n]),
813 bits / 4,
814 static_cast<uint64_t>(inputs[m]),
815 name,
816 rawbits_to_fp(inputs[n]),
817 rawbits_to_fp(inputs[m]));
818 printf(" Expected: %c%c%c%c (0x%" PRIx8 ")\n",
819 (expected[d] & 0x8) ? 'N' : 'n',
820 (expected[d] & 0x4) ? 'Z' : 'z',
821 (expected[d] & 0x2) ? 'C' : 'c',
822 (expected[d] & 0x1) ? 'V' : 'v',
823 expected[d]);
824 printf(" Found: %c%c%c%c (0x%" PRIx8 ")\n",
825 (results[d] & 0x8) ? 'N' : 'n',
826 (results[d] & 0x4) ? 'Z' : 'z',
827 (results[d] & 0x2) ? 'C' : 'c',
828 (results[d] & 0x1) ? 'V' : 'v',
829 results[d]);
830 printf("\n");
831 }
832 }
833 }
834 VIXL_ASSERT(d == expected_length);
835 if (error_count > kErrorReportLimit) {
836 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
837 }
838 VIXL_CHECK(error_count == 0);
839 }
840 delete[] results;
841 }
842
843
TestCmpZero_Helper(TestFPCmpZeroHelper_t helper, uintptr_t inputs, unsigned inputs_length, uintptr_t results, unsigned reg_size, bool* skipped)844 static void TestCmpZero_Helper(TestFPCmpZeroHelper_t helper,
845 uintptr_t inputs,
846 unsigned inputs_length,
847 uintptr_t results,
848 unsigned reg_size,
849 bool* skipped) {
850 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
851
852 SETUP_WITH_FEATURES(CPUFeatures::kFP);
853 START();
854
855 // Roll up the loop to keep the code size down.
856 Label loop_n, loop_m;
857
858 Register out = x0;
859 Register inputs_base = x1;
860 Register length = w2;
861 Register index_n = w3;
862 Register flags = x4;
863
864 bool double_op = reg_size == kDRegSize;
865 const int index_shift =
866 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
867
868 VRegister fn = double_op ? d1 : s1;
869
870 __ Mov(out, results);
871 __ Mov(inputs_base, inputs);
872 __ Mov(length, inputs_length);
873
874 __ Mov(index_n, 0);
875 __ Bind(&loop_n);
876 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
877
878 {
879 SingleEmissionCheckScope guard(&masm);
880 (masm.*helper)(fn, 0.0);
881 }
882 __ Mrs(flags, NZCV);
883 __ Ubfx(flags, flags, 28, 4);
884 __ Strb(flags, MemOperand(out, 1, PostIndex));
885
886 __ Add(index_n, index_n, 1);
887 __ Cmp(index_n, inputs_length);
888 __ B(lo, &loop_n);
889
890 END();
891 TRY_RUN(skipped);
892 }
893
894
895 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
896 // rawbits representations of doubles or floats. This ensures that exact bit
897 // comparisons can be performed.
898 template <typename T>
TestCmpZero(const char* name, TestFPCmpZeroHelper_t helper, const T inputs[], unsigned inputs_length, const uint8_t expected[], unsigned expected_length)899 static void TestCmpZero(const char* name,
900 TestFPCmpZeroHelper_t helper,
901 const T inputs[],
902 unsigned inputs_length,
903 const uint8_t expected[],
904 unsigned expected_length) {
905 VIXL_ASSERT(inputs_length > 0);
906
907 const unsigned results_length = inputs_length;
908 uint8_t* results = new uint8_t[results_length];
909
910 const unsigned bits = sizeof(T) * 8;
911 bool skipped;
912
913 TestCmpZero_Helper(helper,
914 reinterpret_cast<uintptr_t>(inputs),
915 inputs_length,
916 reinterpret_cast<uintptr_t>(results),
917 bits,
918 &skipped);
919
920 if (Test::generate_test_trace()) {
921 // Print the results.
922 printf("const uint8_t kExpected_%s[] = {\n", name);
923 for (unsigned d = 0; d < results_length; d++) {
924 // Each NZCV result only requires 4 bits.
925 VIXL_ASSERT((results[d] & 0xf) == results[d]);
926 printf(" 0x%" PRIx8 ",\n", results[d]);
927 }
928 printf("};\n");
929 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
930 } else if (!skipped) {
931 // Check the results.
932 VIXL_CHECK(expected_length == results_length);
933 unsigned error_count = 0;
934 unsigned d = 0;
935 for (unsigned n = 0; n < inputs_length; n++, d++) {
936 if (results[d] != expected[d]) {
937 if (++error_count > kErrorReportLimit) continue;
938
939 printf("%s 0x%0*" PRIx64 ", 0x%0*u (%s %g #0.0):\n",
940 name,
941 bits / 4,
942 static_cast<uint64_t>(inputs[n]),
943 bits / 4,
944 0,
945 name,
946 rawbits_to_fp(inputs[n]));
947 printf(" Expected: %c%c%c%c (0x%" PRIx8 ")\n",
948 (expected[d] & 0x8) ? 'N' : 'n',
949 (expected[d] & 0x4) ? 'Z' : 'z',
950 (expected[d] & 0x2) ? 'C' : 'c',
951 (expected[d] & 0x1) ? 'V' : 'v',
952 expected[d]);
953 printf(" Found: %c%c%c%c (0x%" PRIx8 ")\n",
954 (results[d] & 0x8) ? 'N' : 'n',
955 (results[d] & 0x4) ? 'Z' : 'z',
956 (results[d] & 0x2) ? 'C' : 'c',
957 (results[d] & 0x1) ? 'V' : 'v',
958 results[d]);
959 printf("\n");
960 }
961 }
962 VIXL_ASSERT(d == expected_length);
963 if (error_count > kErrorReportLimit) {
964 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
965 }
966 VIXL_CHECK(error_count == 0);
967 }
968 delete[] results;
969 }
970
971
TestFPToFixed_Helper(TestFPToFixedHelper_t helper, uintptr_t inputs, unsigned inputs_length, uintptr_t results, unsigned d_size, unsigned n_size, bool* skipped)972 static void TestFPToFixed_Helper(TestFPToFixedHelper_t helper,
973 uintptr_t inputs,
974 unsigned inputs_length,
975 uintptr_t results,
976 unsigned d_size,
977 unsigned n_size,
978 bool* skipped) {
979 VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
980 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) ||
981 (n_size == kHRegSize));
982
983 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
984 START();
985
986 // Roll up the loop to keep the code size down.
987 Label loop_n;
988
989 Register out = x0;
990 Register inputs_base = x1;
991 Register length = w2;
992 Register index_n = w3;
993
994 int n_index_shift;
995 if (n_size == kDRegSize) {
996 n_index_shift = kDRegSizeInBytesLog2;
997 } else if (n_size == kSRegSize) {
998 n_index_shift = kSRegSizeInBytesLog2;
999 } else {
1000 n_index_shift = kHRegSizeInBytesLog2;
1001 }
1002
1003 Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10);
1004 VRegister fn;
1005 if (n_size == kDRegSize) {
1006 fn = d1;
1007 } else if (n_size == kSRegSize) {
1008 fn = s1;
1009 } else {
1010 fn = h1;
1011 }
1012
1013 __ Mov(out, results);
1014 __ Mov(inputs_base, inputs);
1015 __ Mov(length, inputs_length);
1016
1017 __ Mov(index_n, 0);
1018 __ Bind(&loop_n);
1019 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
1020
1021 for (unsigned fbits = 0; fbits <= d_size; ++fbits) {
1022 {
1023 SingleEmissionCheckScope guard(&masm);
1024 (masm.*helper)(rd, fn, fbits);
1025 }
1026 __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex));
1027 }
1028
1029 __ Add(index_n, index_n, 1);
1030 __ Cmp(index_n, inputs_length);
1031 __ B(lo, &loop_n);
1032
1033 END();
1034 TRY_RUN(skipped);
1035 }
1036
1037
TestFPToInt_Helper(TestFPToIntHelper_t helper, uintptr_t inputs, unsigned inputs_length, uintptr_t results, unsigned d_size, unsigned n_size, bool* skipped)1038 static void TestFPToInt_Helper(TestFPToIntHelper_t helper,
1039 uintptr_t inputs,
1040 unsigned inputs_length,
1041 uintptr_t results,
1042 unsigned d_size,
1043 unsigned n_size,
1044 bool* skipped) {
1045 VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
1046 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) ||
1047 (n_size == kHRegSize));
1048
1049 SETUP_WITH_FEATURES(CPUFeatures::kFP,
1050 CPUFeatures::kFPHalf,
1051 CPUFeatures::kJSCVT);
1052 START();
1053
1054 // Roll up the loop to keep the code size down.
1055 Label loop_n;
1056
1057 Register out = x0;
1058 Register inputs_base = x1;
1059 Register length = w2;
1060 Register index_n = w3;
1061
1062 int n_index_shift;
1063 if (n_size == kDRegSize) {
1064 n_index_shift = kDRegSizeInBytesLog2;
1065 } else if (n_size == kSRegSize) {
1066 n_index_shift = kSRegSizeInBytesLog2;
1067 } else {
1068 n_index_shift = kHRegSizeInBytesLog2;
1069 }
1070
1071 Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10);
1072 VRegister fn;
1073 if (n_size == kDRegSize) {
1074 fn = d1;
1075 } else if (n_size == kSRegSize) {
1076 fn = s1;
1077 } else {
1078 fn = h1;
1079 }
1080
1081 __ Mov(out, results);
1082 __ Mov(inputs_base, inputs);
1083 __ Mov(length, inputs_length);
1084
1085 __ Mov(index_n, 0);
1086 __ Bind(&loop_n);
1087 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
1088
1089 {
1090 SingleEmissionCheckScope guard(&masm);
1091 (masm.*helper)(rd, fn);
1092 }
1093 __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex));
1094
1095 __ Add(index_n, index_n, 1);
1096 __ Cmp(index_n, inputs_length);
1097 __ B(lo, &loop_n);
1098
1099 END();
1100 TRY_RUN(skipped);
1101 }
1102
1103
1104 // Test FP instructions.
1105 // - The inputs[] array should be an array of rawbits representations of
1106 // doubles or floats. This ensures that exact bit comparisons can be
1107 // performed.
1108 // - The expected[] array should be an array of signed integers.
1109 template <typename Tn, typename Td>
TestFPToS(const char* name, TestFPToIntHelper_t helper, const Tn inputs[], unsigned inputs_length, const Td expected[], unsigned expected_length)1110 static void TestFPToS(const char* name,
1111 TestFPToIntHelper_t helper,
1112 const Tn inputs[],
1113 unsigned inputs_length,
1114 const Td expected[],
1115 unsigned expected_length) {
1116 VIXL_ASSERT(inputs_length > 0);
1117
1118 const unsigned results_length = inputs_length;
1119 Td* results = new Td[results_length];
1120
1121 const unsigned d_bits = sizeof(Td) * 8;
1122 const unsigned n_bits = sizeof(Tn) * 8;
1123 bool skipped;
1124
1125 TestFPToInt_Helper(helper,
1126 reinterpret_cast<uintptr_t>(inputs),
1127 inputs_length,
1128 reinterpret_cast<uintptr_t>(results),
1129 d_bits,
1130 n_bits,
1131 &skipped);
1132
1133 if (Test::generate_test_trace()) {
1134 // Print the results.
1135 printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
1136 // There is no simple C++ literal for INT*_MIN that doesn't produce
1137 // warnings, so we use an appropriate constant in that case instead.
1138 // Deriving int_d_min in this way (rather than just checking INT64_MIN and
1139 // the like) avoids warnings about comparing values with differing ranges.
1140 const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
1141 const int64_t int_d_min = -(int_d_max)-1;
1142 for (unsigned d = 0; d < results_length; d++) {
1143 if (results[d] == int_d_min) {
1144 printf(" -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
1145 } else {
1146 // Some constants (such as those between INT32_MAX and UINT32_MAX)
1147 // trigger compiler warnings. To avoid these warnings, use an
1148 // appropriate macro to make the type explicit.
1149 int64_t result_int64 = static_cast<int64_t>(results[d]);
1150 if (result_int64 >= 0) {
1151 printf(" INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
1152 } else {
1153 printf(" -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
1154 }
1155 }
1156 }
1157 printf("};\n");
1158 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1159 } else if (!skipped) {
1160 // Check the results.
1161 VIXL_CHECK(expected_length == results_length);
1162 unsigned error_count = 0;
1163 unsigned d = 0;
1164 for (unsigned n = 0; n < inputs_length; n++, d++) {
1165 if (results[d] != expected[d]) {
1166 if (++error_count > kErrorReportLimit) continue;
1167
1168 printf("%s 0x%0*" PRIx64 " (%s %g):\n",
1169 name,
1170 n_bits / 4,
1171 static_cast<uint64_t>(inputs[n]),
1172 name,
1173 rawbits_to_fp(inputs[n]));
1174 printf(" Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1175 d_bits / 4,
1176 static_cast<uint64_t>(expected[d]),
1177 static_cast<int64_t>(expected[d]));
1178 printf(" Found: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1179 d_bits / 4,
1180 static_cast<uint64_t>(results[d]),
1181 static_cast<int64_t>(results[d]));
1182 printf("\n");
1183 }
1184 }
1185 VIXL_ASSERT(d == expected_length);
1186 if (error_count > kErrorReportLimit) {
1187 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1188 }
1189 VIXL_CHECK(error_count == 0);
1190 }
1191 delete[] results;
1192 }
1193
1194
1195 // Test FP instructions.
1196 // - The inputs[] array should be an array of rawbits representations of
1197 // doubles or floats. This ensures that exact bit comparisons can be
1198 // performed.
1199 // - The expected[] array should be an array of unsigned integers.
1200 template <typename Tn, typename Td>
TestFPToU(const char* name, TestFPToIntHelper_t helper, const Tn inputs[], unsigned inputs_length, const Td expected[], unsigned expected_length)1201 static void TestFPToU(const char* name,
1202 TestFPToIntHelper_t helper,
1203 const Tn inputs[],
1204 unsigned inputs_length,
1205 const Td expected[],
1206 unsigned expected_length) {
1207 VIXL_ASSERT(inputs_length > 0);
1208
1209 const unsigned results_length = inputs_length;
1210 Td* results = new Td[results_length];
1211
1212 const unsigned d_bits = sizeof(Td) * 8;
1213 const unsigned n_bits = sizeof(Tn) * 8;
1214 bool skipped;
1215
1216 TestFPToInt_Helper(helper,
1217 reinterpret_cast<uintptr_t>(inputs),
1218 inputs_length,
1219 reinterpret_cast<uintptr_t>(results),
1220 d_bits,
1221 n_bits,
1222 &skipped);
1223
1224 if (Test::generate_test_trace()) {
1225 // Print the results.
1226 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
1227 for (unsigned d = 0; d < results_length; d++) {
1228 printf(" %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
1229 }
1230 printf("};\n");
1231 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1232 } else if (!skipped) {
1233 // Check the results.
1234 VIXL_CHECK(expected_length == results_length);
1235 unsigned error_count = 0;
1236 unsigned d = 0;
1237 for (unsigned n = 0; n < inputs_length; n++, d++) {
1238 if (results[d] != expected[d]) {
1239 if (++error_count > kErrorReportLimit) continue;
1240
1241 printf("%s 0x%0*" PRIx64 " (%s %g):\n",
1242 name,
1243 n_bits / 4,
1244 static_cast<uint64_t>(inputs[n]),
1245 name,
1246 rawbits_to_fp(inputs[n]));
1247 printf(" Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1248 d_bits / 4,
1249 static_cast<uint64_t>(expected[d]),
1250 static_cast<uint64_t>(expected[d]));
1251 printf(" Found: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1252 d_bits / 4,
1253 static_cast<uint64_t>(results[d]),
1254 static_cast<uint64_t>(results[d]));
1255 printf("\n");
1256 }
1257 }
1258 VIXL_ASSERT(d == expected_length);
1259 if (error_count > kErrorReportLimit) {
1260 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1261 }
1262 VIXL_CHECK(error_count == 0);
1263 }
1264 delete[] results;
1265 }
1266
1267
1268 // Test FP instructions.
1269 // - The inputs[] array should be an array of rawbits representations of
1270 // doubles or floats. This ensures that exact bit comparisons can be
1271 // performed.
1272 // - The expected[] array should be an array of signed integers.
1273 template <typename Tn, typename Td>
TestFPToFixedS(const char* name, TestFPToFixedHelper_t helper, const Tn inputs[], unsigned inputs_length, const Td expected[], unsigned expected_length)1274 static void TestFPToFixedS(const char* name,
1275 TestFPToFixedHelper_t helper,
1276 const Tn inputs[],
1277 unsigned inputs_length,
1278 const Td expected[],
1279 unsigned expected_length) {
1280 VIXL_ASSERT(inputs_length > 0);
1281
1282 const unsigned d_bits = sizeof(Td) * 8;
1283 const unsigned n_bits = sizeof(Tn) * 8;
1284
1285 const unsigned results_length = inputs_length * (d_bits + 1);
1286 Td* results = new Td[results_length];
1287
1288 bool skipped;
1289
1290 TestFPToFixed_Helper(helper,
1291 reinterpret_cast<uintptr_t>(inputs),
1292 inputs_length,
1293 reinterpret_cast<uintptr_t>(results),
1294 d_bits,
1295 n_bits,
1296 &skipped);
1297
1298 if (Test::generate_test_trace()) {
1299 // Print the results.
1300 printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
1301 // There is no simple C++ literal for INT*_MIN that doesn't produce
1302 // warnings, so we use an appropriate constant in that case instead.
1303 // Deriving int_d_min in this way (rather than just checking INT64_MIN and
1304 // the like) avoids warnings about comparing values with differing ranges.
1305 const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
1306 const int64_t int_d_min = -(int_d_max)-1;
1307 for (unsigned d = 0; d < results_length; d++) {
1308 if (results[d] == int_d_min) {
1309 printf(" -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
1310 } else {
1311 // Some constants (such as those between INT32_MAX and UINT32_MAX)
1312 // trigger compiler warnings. To avoid these warnings, use an
1313 // appropriate macro to make the type explicit.
1314 int64_t result_int64 = static_cast<int64_t>(results[d]);
1315 if (result_int64 >= 0) {
1316 printf(" INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
1317 } else {
1318 printf(" -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
1319 }
1320 }
1321 }
1322 printf("};\n");
1323 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1324 } else if (!skipped) {
1325 // Check the results.
1326 VIXL_CHECK(expected_length == results_length);
1327 unsigned error_count = 0;
1328 unsigned d = 0;
1329 for (unsigned n = 0; n < inputs_length; n++) {
1330 for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
1331 if (results[d] != expected[d]) {
1332 if (++error_count > kErrorReportLimit) continue;
1333
1334 printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
1335 name,
1336 n_bits / 4,
1337 static_cast<uint64_t>(inputs[n]),
1338 fbits,
1339 name,
1340 rawbits_to_fp(inputs[n]),
1341 fbits);
1342 printf(" Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1343 d_bits / 4,
1344 static_cast<uint64_t>(expected[d]),
1345 static_cast<int64_t>(expected[d]));
1346 printf(" Found: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1347 d_bits / 4,
1348 static_cast<uint64_t>(results[d]),
1349 static_cast<int64_t>(results[d]));
1350 printf("\n");
1351 }
1352 }
1353 }
1354 VIXL_ASSERT(d == expected_length);
1355 if (error_count > kErrorReportLimit) {
1356 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1357 }
1358 VIXL_CHECK(error_count == 0);
1359 }
1360 delete[] results;
1361 }
1362
1363
1364 // Test FP instructions.
1365 // - The inputs[] array should be an array of rawbits representations of
1366 // doubles or floats. This ensures that exact bit comparisons can be
1367 // performed.
1368 // - The expected[] array should be an array of unsigned integers.
1369 template <typename Tn, typename Td>
TestFPToFixedU(const char* name, TestFPToFixedHelper_t helper, const Tn inputs[], unsigned inputs_length, const Td expected[], unsigned expected_length)1370 static void TestFPToFixedU(const char* name,
1371 TestFPToFixedHelper_t helper,
1372 const Tn inputs[],
1373 unsigned inputs_length,
1374 const Td expected[],
1375 unsigned expected_length) {
1376 VIXL_ASSERT(inputs_length > 0);
1377
1378 const unsigned d_bits = sizeof(Td) * 8;
1379 const unsigned n_bits = sizeof(Tn) * 8;
1380
1381 const unsigned results_length = inputs_length * (d_bits + 1);
1382 Td* results = new Td[results_length];
1383
1384 bool skipped;
1385
1386 TestFPToFixed_Helper(helper,
1387 reinterpret_cast<uintptr_t>(inputs),
1388 inputs_length,
1389 reinterpret_cast<uintptr_t>(results),
1390 d_bits,
1391 n_bits,
1392 &skipped);
1393
1394 if (Test::generate_test_trace()) {
1395 // Print the results.
1396 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
1397 for (unsigned d = 0; d < results_length; d++) {
1398 printf(" %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
1399 }
1400 printf("};\n");
1401 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1402 } else if (!skipped) {
1403 // Check the results.
1404 VIXL_CHECK(expected_length == results_length);
1405 unsigned error_count = 0;
1406 unsigned d = 0;
1407 for (unsigned n = 0; n < inputs_length; n++) {
1408 for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
1409 if (results[d] != expected[d]) {
1410 if (++error_count > kErrorReportLimit) continue;
1411
1412 printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
1413 name,
1414 n_bits / 4,
1415 static_cast<uint64_t>(inputs[n]),
1416 fbits,
1417 name,
1418 rawbits_to_fp(inputs[n]),
1419 fbits);
1420 printf(" Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1421 d_bits / 4,
1422 static_cast<uint64_t>(expected[d]),
1423 static_cast<uint64_t>(expected[d]));
1424 printf(" Found: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1425 d_bits / 4,
1426 static_cast<uint64_t>(results[d]),
1427 static_cast<uint64_t>(results[d]));
1428 printf("\n");
1429 }
1430 }
1431 }
1432 VIXL_ASSERT(d == expected_length);
1433 if (error_count > kErrorReportLimit) {
1434 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1435 }
1436 VIXL_CHECK(error_count == 0);
1437 }
1438 delete[] results;
1439 }
1440
1441
1442 // ==== Tests for instructions of the form <INST> VReg, VReg. ====
1443
1444
Test1OpNEON_Helper(Test1OpNEONHelper_t helper, uintptr_t inputs_n, unsigned inputs_n_length, uintptr_t results, VectorFormat vd_form, VectorFormat vn_form, bool* skipped)1445 static void Test1OpNEON_Helper(Test1OpNEONHelper_t helper,
1446 uintptr_t inputs_n,
1447 unsigned inputs_n_length,
1448 uintptr_t results,
1449 VectorFormat vd_form,
1450 VectorFormat vn_form,
1451 bool* skipped) {
1452 VIXL_ASSERT(vd_form != kFormatUndefined);
1453 VIXL_ASSERT(vn_form != kFormatUndefined);
1454
1455 CPUFeatures features;
1456 features.Combine(CPUFeatures::kNEON,
1457 CPUFeatures::kFP,
1458 CPUFeatures::kRDM,
1459 CPUFeatures::kNEONHalf);
1460 // For frint{32,64}{x,y} variants.
1461 features.Combine(CPUFeatures::kFrintToFixedSizedInt);
1462 SETUP_WITH_FEATURES(features);
1463 START();
1464
1465 // Roll up the loop to keep the code size down.
1466 Label loop_n;
1467
1468 Register out = x0;
1469 Register inputs_n_base = x1;
1470 Register inputs_n_last_16bytes = x3;
1471 Register index_n = x5;
1472
1473 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1474 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1475 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1476
1477 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1478 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1479 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1480 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1481 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1482
1483
1484 // These will be either a D- or a Q-register form, with a single lane
1485 // (for use in scalar load and store operations).
1486 VRegister vd = VRegister(0, vd_bits);
1487 VRegister vn = v1.V16B();
1488 VRegister vntmp = v3.V16B();
1489
1490 // These will have the correct format for use when calling 'helper'.
1491 VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
1492 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1493
1494 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1495 VRegister vntmp_single = VRegister(3, vn_lane_bits);
1496
1497 __ Mov(out, results);
1498
1499 __ Mov(inputs_n_base, inputs_n);
1500 __ Mov(inputs_n_last_16bytes,
1501 inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
1502
1503 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1504
1505 __ Mov(index_n, 0);
1506 __ Bind(&loop_n);
1507
1508 __ Ldr(vntmp_single,
1509 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1510 __ Ext(vn, vn, vntmp, vn_lane_bytes);
1511
1512 // Set the destination to zero.
1513 // TODO: Setting the destination to values other than zero
1514 // might be a better test for instructions such as sqxtn2
1515 // which may leave parts of V registers unchanged.
1516 __ Movi(vd.V16B(), 0);
1517
1518 {
1519 SingleEmissionCheckScope guard(&masm);
1520 (masm.*helper)(vd_helper, vn_helper);
1521 }
1522 __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
1523
1524 __ Add(index_n, index_n, 1);
1525 __ Cmp(index_n, inputs_n_length);
1526 __ B(lo, &loop_n);
1527
1528 END();
1529 TRY_RUN(skipped);
1530 }
1531
1532
1533 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1534 // arrays of rawbit representation of input values. This ensures that
1535 // exact bit comparisons can be performed.
1536 template <typename Td, typename Tn>
Test1OpNEON(const char* name, Test1OpNEONHelper_t helper, const Tn inputs_n[], unsigned inputs_n_length, const Td expected[], unsigned expected_length, VectorFormat vd_form, VectorFormat vn_form)1537 static void Test1OpNEON(const char* name,
1538 Test1OpNEONHelper_t helper,
1539 const Tn inputs_n[],
1540 unsigned inputs_n_length,
1541 const Td expected[],
1542 unsigned expected_length,
1543 VectorFormat vd_form,
1544 VectorFormat vn_form) {
1545 VIXL_ASSERT(inputs_n_length > 0);
1546
1547 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1548 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1549 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1550
1551 const unsigned results_length = inputs_n_length;
1552 Td* results = new Td[results_length * vd_lane_count];
1553 const unsigned lane_bit = sizeof(Td) * 8;
1554 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1555
1556 bool skipped;
1557
1558 Test1OpNEON_Helper(helper,
1559 reinterpret_cast<uintptr_t>(inputs_n),
1560 inputs_n_length,
1561 reinterpret_cast<uintptr_t>(results),
1562 vd_form,
1563 vn_form,
1564 &skipped);
1565
1566 if (Test::generate_test_trace()) {
1567 // Print the results.
1568 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1569 for (unsigned iteration = 0; iteration < results_length; iteration++) {
1570 printf(" ");
1571 // Output a separate result for each element of the result vector.
1572 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1573 unsigned index = lane + (iteration * vd_lane_count);
1574 printf(" 0x%0*" PRIx64 ",",
1575 lane_len_in_hex,
1576 static_cast<uint64_t>(results[index]));
1577 }
1578 printf("\n");
1579 }
1580
1581 printf("};\n");
1582 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1583 name,
1584 results_length);
1585 } else if (!skipped) {
1586 // Check the results.
1587 VIXL_CHECK(expected_length == results_length);
1588 unsigned error_count = 0;
1589 unsigned d = 0;
1590 const char* padding = " ";
1591 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1592 for (unsigned n = 0; n < inputs_n_length; n++, d++) {
1593 bool error_in_vector = false;
1594
1595 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1596 unsigned output_index = (n * vd_lane_count) + lane;
1597
1598 if (results[output_index] != expected[output_index]) {
1599 error_in_vector = true;
1600 break;
1601 }
1602 }
1603
1604 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1605 printf("%s\n", name);
1606 printf(" Vn%.*s| Vd%.*s| Expected\n",
1607 lane_len_in_hex + 1,
1608 padding,
1609 lane_len_in_hex + 1,
1610 padding);
1611
1612 const unsigned first_index_n =
1613 inputs_n_length - (16 / vn_lane_bytes) + n + 1;
1614
1615 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
1616 lane++) {
1617 unsigned output_index = (n * vd_lane_count) + lane;
1618 unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
1619
1620 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64
1621 " "
1622 "| 0x%0*" PRIx64 "\n",
1623 results[output_index] != expected[output_index] ? '*' : ' ',
1624 lane_len_in_hex,
1625 static_cast<uint64_t>(inputs_n[input_index_n]),
1626 lane_len_in_hex,
1627 static_cast<uint64_t>(results[output_index]),
1628 lane_len_in_hex,
1629 static_cast<uint64_t>(expected[output_index]));
1630 }
1631 }
1632 }
1633 VIXL_ASSERT(d == expected_length);
1634 if (error_count > kErrorReportLimit) {
1635 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1636 }
1637 VIXL_CHECK(error_count == 0);
1638 }
1639 delete[] results;
1640 }
1641
1642
1643 // ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ====
1644 // where <V> is one of B, H, S or D registers.
1645 // e.g. saddlv H1, v0.8B
1646
1647 // TODO: Change tests to store all lanes of the resulting V register.
1648 // Some tests store all 128 bits of the resulting V register to
1649 // check the simulator's behaviour on the rest of the register.
1650 // This is better than storing the affected lanes only.
1651 // Change any tests such as the 'Across' template to do the same.
1652
Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper, uintptr_t inputs_n, unsigned inputs_n_length, uintptr_t results, VectorFormat vd_form, VectorFormat vn_form, bool* skipped)1653 static void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper,
1654 uintptr_t inputs_n,
1655 unsigned inputs_n_length,
1656 uintptr_t results,
1657 VectorFormat vd_form,
1658 VectorFormat vn_form,
1659 bool* skipped) {
1660 VIXL_ASSERT(vd_form != kFormatUndefined);
1661 VIXL_ASSERT(vn_form != kFormatUndefined);
1662
1663 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
1664 CPUFeatures::kFP,
1665 CPUFeatures::kNEONHalf);
1666 START();
1667
1668 // Roll up the loop to keep the code size down.
1669 Label loop_n;
1670
1671 Register out = x0;
1672 Register inputs_n_base = x1;
1673 Register inputs_n_last_vector = x3;
1674 Register index_n = x5;
1675
1676 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1677 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1678 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1679 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1680 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1681 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1682 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1683
1684 // Test destructive operations by (arbitrarily) using the same register for
1685 // B and S lane sizes.
1686 bool destructive = (vd_bits == kBRegSize) || (vd_bits == kSRegSize);
1687
1688 // Create two aliases for v0; the first is the destination for the tested
1689 // instruction, the second, the whole Q register to check the results.
1690 VRegister vd = VRegister(0, vd_bits);
1691 VRegister vdstr = VRegister(0, kQRegSize);
1692
1693 VRegister vn = VRegister(1, vn_bits);
1694 VRegister vntmp = VRegister(3, vn_bits);
1695
1696 // These will have the correct format for use when calling 'helper'.
1697 VRegister vd_helper = VRegister(0, vn_bits, vn_lane_count);
1698 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1699
1700 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1701 VRegister vntmp_single = VRegister(3, vn_lane_bits);
1702
1703 // Same registers for use in the 'ext' instructions.
1704 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
1705 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
1706
1707 __ Mov(out, results);
1708
1709 __ Mov(inputs_n_base, inputs_n);
1710 __ Mov(inputs_n_last_vector,
1711 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
1712
1713 __ Ldr(vn, MemOperand(inputs_n_last_vector));
1714
1715 __ Mov(index_n, 0);
1716 __ Bind(&loop_n);
1717
1718 __ Ldr(vntmp_single,
1719 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1720 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
1721
1722 if (destructive) {
1723 __ Mov(vd_helper, vn_helper);
1724 SingleEmissionCheckScope guard(&masm);
1725 (masm.*helper)(vd, vd_helper);
1726 } else {
1727 SingleEmissionCheckScope guard(&masm);
1728 (masm.*helper)(vd, vn_helper);
1729 }
1730
1731 __ Str(vdstr, MemOperand(out, kQRegSizeInBytes, PostIndex));
1732
1733 __ Add(index_n, index_n, 1);
1734 __ Cmp(index_n, inputs_n_length);
1735 __ B(lo, &loop_n);
1736
1737 END();
1738 TRY_RUN(skipped);
1739 }
1740
1741 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1742 // arrays of rawbit representation of input values. This ensures that
1743 // exact bit comparisons can be performed.
1744 template <typename Td, typename Tn>
Test1OpAcrossNEON(const char* name, Test1OpNEONHelper_t helper, const Tn inputs_n[], unsigned inputs_n_length, const Td expected[], unsigned expected_length, VectorFormat vd_form, VectorFormat vn_form)1745 static void Test1OpAcrossNEON(const char* name,
1746 Test1OpNEONHelper_t helper,
1747 const Tn inputs_n[],
1748 unsigned inputs_n_length,
1749 const Td expected[],
1750 unsigned expected_length,
1751 VectorFormat vd_form,
1752 VectorFormat vn_form) {
1753 VIXL_ASSERT(inputs_n_length > 0);
1754
1755 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1756 const unsigned vd_lanes_per_q = MaxLaneCountFromFormat(vd_form);
1757
1758 const unsigned results_length = inputs_n_length;
1759 Td* results = new Td[results_length * vd_lanes_per_q];
1760 const unsigned lane_bit = sizeof(Td) * 8;
1761 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1762
1763 bool skipped;
1764
1765 Test1OpAcrossNEON_Helper(helper,
1766 reinterpret_cast<uintptr_t>(inputs_n),
1767 inputs_n_length,
1768 reinterpret_cast<uintptr_t>(results),
1769 vd_form,
1770 vn_form,
1771 &skipped);
1772
1773 if (Test::generate_test_trace()) {
1774 // Print the results.
1775 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1776 for (unsigned iteration = 0; iteration < results_length; iteration++) {
1777 printf(" ");
1778 // Output a separate result for each element of the result vector.
1779 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1780 unsigned index = lane + (iteration * vd_lanes_per_q);
1781 printf(" 0x%0*" PRIx64 ",",
1782 lane_len_in_hex,
1783 static_cast<uint64_t>(results[index]));
1784 }
1785 printf("\n");
1786 }
1787
1788 printf("};\n");
1789 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1790 name,
1791 results_length);
1792 } else if (!skipped) {
1793 // Check the results.
1794 VIXL_CHECK(expected_length == results_length);
1795 unsigned error_count = 0;
1796 unsigned d = 0;
1797 const char* padding = " ";
1798 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1799 for (unsigned n = 0; n < inputs_n_length; n++, d++) {
1800 bool error_in_vector = false;
1801
1802 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1803 unsigned expected_index = (n * vd_lane_count) + lane;
1804 unsigned results_index = (n * vd_lanes_per_q) + lane;
1805
1806 if (results[results_index] != expected[expected_index]) {
1807 error_in_vector = true;
1808 break;
1809 }
1810 }
1811
1812 // For across operations, the remaining lanes should be zero.
1813 for (unsigned lane = vd_lane_count; lane < vd_lanes_per_q; lane++) {
1814 unsigned results_index = (n * vd_lanes_per_q) + lane;
1815 if (results[results_index] != 0) {
1816 error_in_vector = true;
1817 break;
1818 }
1819 }
1820
1821 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1822 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1823
1824 printf("%s\n", name);
1825 printf(" Vn%.*s| Vd%.*s| Expected\n",
1826 lane_len_in_hex + 1,
1827 padding,
1828 lane_len_in_hex + 1,
1829 padding);
1830
1831 // TODO: In case of an error, all tests print out as many elements as
1832 // there are lanes in the output or input vectors. This way
1833 // the viewer can read all the values that were needed for the
1834 // operation but the output contains also unnecessary values.
1835 // These prints can be improved according to the arguments
1836 // passed to test functions.
1837 // This output for the 'Across' category has the required
1838 // modifications.
1839 for (unsigned lane = 0; lane < vn_lane_count; lane++) {
1840 unsigned results_index =
1841 (n * vd_lanes_per_q) + ((vn_lane_count - 1) - lane);
1842 unsigned input_index_n =
1843 (inputs_n_length - vn_lane_count + n + 1 + lane) %
1844 inputs_n_length;
1845
1846 Td expect = 0;
1847 if ((vn_lane_count - 1) == lane) {
1848 // This is the last lane to be printed, ie. the least-significant
1849 // lane, so use the expected value; any other lane should be zero.
1850 unsigned expected_index = n * vd_lane_count;
1851 expect = expected[expected_index];
1852 }
1853 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
1854 results[results_index] != expect ? '*' : ' ',
1855 lane_len_in_hex,
1856 static_cast<uint64_t>(inputs_n[input_index_n]),
1857 lane_len_in_hex,
1858 static_cast<uint64_t>(results[results_index]),
1859 lane_len_in_hex,
1860 static_cast<uint64_t>(expect));
1861 }
1862 }
1863 }
1864 VIXL_ASSERT(d == expected_length);
1865 if (error_count > kErrorReportLimit) {
1866 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1867 }
1868 VIXL_CHECK(error_count == 0);
1869 }
1870 delete[] results;
1871 }
1872
1873
1874 // ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ====
1875
1876 // TODO: Iterate over inputs_d once the traces file is split.
1877
Test2OpNEON_Helper(Test2OpNEONHelper_t helper, uintptr_t inputs_d, uintptr_t inputs_n, unsigned inputs_n_length, uintptr_t inputs_m, unsigned inputs_m_length, uintptr_t results, VectorFormat vd_form, VectorFormat vn_form, VectorFormat vm_form, bool* skipped)1878 static void Test2OpNEON_Helper(Test2OpNEONHelper_t helper,
1879 uintptr_t inputs_d,
1880 uintptr_t inputs_n,
1881 unsigned inputs_n_length,
1882 uintptr_t inputs_m,
1883 unsigned inputs_m_length,
1884 uintptr_t results,
1885 VectorFormat vd_form,
1886 VectorFormat vn_form,
1887 VectorFormat vm_form,
1888 bool* skipped) {
1889 VIXL_ASSERT(vd_form != kFormatUndefined);
1890 VIXL_ASSERT(vn_form != kFormatUndefined);
1891 VIXL_ASSERT(vm_form != kFormatUndefined);
1892
1893 CPUFeatures features;
1894 features.Combine(CPUFeatures::kNEON, CPUFeatures::kNEONHalf);
1895 features.Combine(CPUFeatures::kFP);
1896 features.Combine(CPUFeatures::kRDM);
1897 features.Combine(CPUFeatures::kDotProduct);
1898 features.Combine(CPUFeatures::kFHM);
1899 SETUP_WITH_FEATURES(features);
1900 START();
1901
1902 // Roll up the loop to keep the code size down.
1903 Label loop_n, loop_m;
1904
1905 Register out = x0;
1906 Register inputs_n_base = x1;
1907 Register inputs_m_base = x2;
1908 Register inputs_d_base = x3;
1909 Register inputs_n_last_16bytes = x4;
1910 Register inputs_m_last_16bytes = x5;
1911 Register index_n = x6;
1912 Register index_m = x7;
1913
1914 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1915 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1916 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1917
1918 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1919 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1920 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1921 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1922 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1923
1924 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
1925 const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
1926 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
1927 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
1928 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
1929
1930
1931 // Always load and store 128 bits regardless of the format.
1932 VRegister vd = v0.V16B();
1933 VRegister vn = v1.V16B();
1934 VRegister vm = v2.V16B();
1935 VRegister vntmp = v3.V16B();
1936 VRegister vmtmp = v4.V16B();
1937 VRegister vres = v5.V16B();
1938
1939 // These will have the correct format for calling the 'helper'.
1940 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1941 VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count);
1942 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
1943
1944 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1945 VRegister vntmp_single = VRegister(3, vn_lane_bits);
1946 VRegister vmtmp_single = VRegister(4, vm_lane_bits);
1947
1948 __ Mov(out, results);
1949
1950 __ Mov(inputs_d_base, inputs_d);
1951
1952 __ Mov(inputs_n_base, inputs_n);
1953 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
1954 __ Mov(inputs_m_base, inputs_m);
1955 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
1956
1957 __ Ldr(vd, MemOperand(inputs_d_base));
1958 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1959 __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
1960
1961 __ Mov(index_n, 0);
1962 __ Bind(&loop_n);
1963
1964 __ Ldr(vntmp_single,
1965 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1966 __ Ext(vn, vn, vntmp, vn_lane_bytes);
1967
1968 __ Mov(index_m, 0);
1969 __ Bind(&loop_m);
1970
1971 __ Ldr(vmtmp_single,
1972 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
1973 __ Ext(vm, vm, vmtmp, vm_lane_bytes);
1974
1975 __ Mov(vres, vd);
1976 {
1977 SingleEmissionCheckScope guard(&masm);
1978 (masm.*helper)(vres_helper, vn_helper, vm_helper);
1979 }
1980 __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
1981
1982 __ Add(index_m, index_m, 1);
1983 __ Cmp(index_m, inputs_m_length);
1984 __ B(lo, &loop_m);
1985
1986 __ Add(index_n, index_n, 1);
1987 __ Cmp(index_n, inputs_n_length);
1988 __ B(lo, &loop_n);
1989
1990 END();
1991 TRY_RUN(skipped);
1992 }
1993
1994
1995 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1996 // arrays of rawbit representation of input values. This ensures that
1997 // exact bit comparisons can be performed.
1998 template <typename Td, typename Tn, typename Tm>
Test2OpNEON(const char* name, Test2OpNEONHelper_t helper, const Td inputs_d[], const Tn inputs_n[], unsigned inputs_n_length, const Tm inputs_m[], unsigned inputs_m_length, const Td expected[], unsigned expected_length, VectorFormat vd_form, VectorFormat vn_form, VectorFormat vm_form)1999 static void Test2OpNEON(const char* name,
2000 Test2OpNEONHelper_t helper,
2001 const Td inputs_d[],
2002 const Tn inputs_n[],
2003 unsigned inputs_n_length,
2004 const Tm inputs_m[],
2005 unsigned inputs_m_length,
2006 const Td expected[],
2007 unsigned expected_length,
2008 VectorFormat vd_form,
2009 VectorFormat vn_form,
2010 VectorFormat vm_form) {
2011 VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
2012
2013 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
2014
2015 const unsigned results_length = inputs_n_length * inputs_m_length;
2016 Td* results = new Td[results_length * vd_lane_count];
2017 const unsigned lane_bit = sizeof(Td) * 8;
2018 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
2019
2020 bool skipped;
2021
2022 Test2OpNEON_Helper(helper,
2023 reinterpret_cast<uintptr_t>(inputs_d),
2024 reinterpret_cast<uintptr_t>(inputs_n),
2025 inputs_n_length,
2026 reinterpret_cast<uintptr_t>(inputs_m),
2027 inputs_m_length,
2028 reinterpret_cast<uintptr_t>(results),
2029 vd_form,
2030 vn_form,
2031 vm_form,
2032 &skipped);
2033
2034 if (Test::generate_test_trace()) {
2035 // Print the results.
2036 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2037 for (unsigned iteration = 0; iteration < results_length; iteration++) {
2038 printf(" ");
2039 // Output a separate result for each element of the result vector.
2040 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2041 unsigned index = lane + (iteration * vd_lane_count);
2042 printf(" 0x%0*" PRIx64 ",",
2043 lane_len_in_hex,
2044 static_cast<uint64_t>(results[index]));
2045 }
2046 printf("\n");
2047 }
2048
2049 printf("};\n");
2050 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2051 name,
2052 results_length);
2053 } else if (!skipped) {
2054 // Check the results.
2055 VIXL_CHECK(expected_length == results_length);
2056 unsigned error_count = 0;
2057 unsigned d = 0;
2058 const char* padding = " ";
2059 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2060 for (unsigned n = 0; n < inputs_n_length; n++) {
2061 for (unsigned m = 0; m < inputs_m_length; m++, d++) {
2062 bool error_in_vector = false;
2063
2064 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2065 unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2066 (m * vd_lane_count) + lane;
2067
2068 if (results[output_index] != expected[output_index]) {
2069 error_in_vector = true;
2070 break;
2071 }
2072 }
2073
2074 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2075 printf("%s\n", name);
2076 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n",
2077 lane_len_in_hex + 1,
2078 padding,
2079 lane_len_in_hex + 1,
2080 padding,
2081 lane_len_in_hex + 1,
2082 padding,
2083 lane_len_in_hex + 1,
2084 padding);
2085
2086 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2087 unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2088 (m * vd_lane_count) + lane;
2089 unsigned input_index_n =
2090 (inputs_n_length - vd_lane_count + n + 1 + lane) %
2091 inputs_n_length;
2092 unsigned input_index_m =
2093 (inputs_m_length - vd_lane_count + m + 1 + lane) %
2094 inputs_m_length;
2095
2096 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
2097 " "
2098 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2099 results[output_index] != expected[output_index] ? '*' : ' ',
2100 lane_len_in_hex,
2101 static_cast<uint64_t>(inputs_d[lane]),
2102 lane_len_in_hex,
2103 static_cast<uint64_t>(inputs_n[input_index_n]),
2104 lane_len_in_hex,
2105 static_cast<uint64_t>(inputs_m[input_index_m]),
2106 lane_len_in_hex,
2107 static_cast<uint64_t>(results[output_index]),
2108 lane_len_in_hex,
2109 static_cast<uint64_t>(expected[output_index]));
2110 }
2111 }
2112 }
2113 }
2114 VIXL_ASSERT(d == expected_length);
2115 if (error_count > kErrorReportLimit) {
2116 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2117 }
2118 VIXL_CHECK(error_count == 0);
2119 }
2120 delete[] results;
2121 }
2122
2123
2124 // ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ====
2125
TestByElementNEON_Helper(TestByElementNEONHelper_t helper, uintptr_t inputs_d, uintptr_t inputs_n, unsigned inputs_n_length, uintptr_t inputs_m, unsigned inputs_m_length, const int indices[], unsigned indices_length, uintptr_t results, VectorFormat vd_form, VectorFormat vn_form, VectorFormat vm_form, unsigned vm_subvector_count, bool* skipped)2126 static void TestByElementNEON_Helper(TestByElementNEONHelper_t helper,
2127 uintptr_t inputs_d,
2128 uintptr_t inputs_n,
2129 unsigned inputs_n_length,
2130 uintptr_t inputs_m,
2131 unsigned inputs_m_length,
2132 const int indices[],
2133 unsigned indices_length,
2134 uintptr_t results,
2135 VectorFormat vd_form,
2136 VectorFormat vn_form,
2137 VectorFormat vm_form,
2138 unsigned vm_subvector_count,
2139 bool* skipped) {
2140 VIXL_ASSERT(vd_form != kFormatUndefined);
2141 VIXL_ASSERT(vn_form != kFormatUndefined);
2142 VIXL_ASSERT(vm_form != kFormatUndefined);
2143 VIXL_ASSERT((vm_subvector_count != 0) && IsPowerOf2(vm_subvector_count));
2144
2145 CPUFeatures features;
2146 features.Combine(CPUFeatures::kNEON, CPUFeatures::kNEONHalf);
2147 features.Combine(CPUFeatures::kFP);
2148 features.Combine(CPUFeatures::kRDM);
2149 features.Combine(CPUFeatures::kDotProduct);
2150 features.Combine(CPUFeatures::kFHM);
2151 SETUP_WITH_FEATURES(features);
2152
2153 START();
2154
2155 // Roll up the loop to keep the code size down.
2156 Label loop_n, loop_m;
2157
2158 Register out = x0;
2159 Register inputs_n_base = x1;
2160 Register inputs_m_base = x2;
2161 Register inputs_d_base = x3;
2162 Register inputs_n_last_16bytes = x4;
2163 Register inputs_m_last_16bytes = x5;
2164 Register index_n = x6;
2165 Register index_m = x7;
2166
2167 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2168 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2169 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2170
2171 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2172 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2173 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2174 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2175 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2176
2177 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
2178 const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
2179 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
2180 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
2181 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
2182
2183 VIXL_ASSERT((vm_bits * vm_subvector_count) <= kQRegSize);
2184
2185 // Always load and store 128 bits regardless of the format.
2186 VRegister vd = v0.V16B();
2187 VRegister vn = v1.V16B();
2188 VRegister vm = v2.V16B();
2189 VRegister vntmp = v3.V16B();
2190 VRegister vmtmp = v4.V16B();
2191 VRegister vres = v5.V16B();
2192
2193 // These will have the correct format for calling the 'helper'.
2194 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2195 VRegister vm_helper =
2196 VRegister(2, vm_bits * vm_subvector_count, vm_lane_count);
2197 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
2198
2199 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2200 VRegister vntmp_single = VRegister(3, vn_lane_bits);
2201 VRegister vmtmp_single = VRegister(4, vm_lane_bits);
2202
2203 __ Mov(out, results);
2204
2205 __ Mov(inputs_d_base, inputs_d);
2206
2207 __ Mov(inputs_n_base, inputs_n);
2208 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
2209 __ Mov(inputs_m_base, inputs_m);
2210 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
2211
2212 __ Ldr(vd, MemOperand(inputs_d_base));
2213 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
2214 __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
2215
2216 __ Mov(index_n, 0);
2217 __ Bind(&loop_n);
2218
2219 __ Ldr(vntmp_single,
2220 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2221 __ Ext(vn, vn, vntmp, vn_lane_bytes);
2222
2223 __ Mov(index_m, 0);
2224 __ Bind(&loop_m);
2225
2226 __ Ldr(vmtmp_single,
2227 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
2228 __ Ext(vm, vm, vmtmp, vm_lane_bytes);
2229
2230 __ Mov(vres, vd);
2231 {
2232 for (unsigned i = 0; i < indices_length; i++) {
2233 {
2234 SingleEmissionCheckScope guard(&masm);
2235 (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]);
2236 }
2237 __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2238 }
2239 }
2240
2241 __ Add(index_m, index_m, 1);
2242 __ Cmp(index_m, inputs_m_length);
2243 __ B(lo, &loop_m);
2244
2245 __ Add(index_n, index_n, 1);
2246 __ Cmp(index_n, inputs_n_length);
2247 __ B(lo, &loop_n);
2248
2249 END();
2250 TRY_RUN(skipped);
2251 }
2252
2253
2254 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2255 // arrays of rawbit representation of input values. This ensures that
2256 // exact bit comparisons can be performed.
2257 template <typename Td, typename Tn, typename Tm>
TestByElementNEON(const char* name, TestByElementNEONHelper_t helper, const Td inputs_d[], const Tn inputs_n[], unsigned inputs_n_length, const Tm inputs_m[], unsigned inputs_m_length, const int indices[], unsigned indices_length, const Td expected[], unsigned expected_length, VectorFormat vd_form, VectorFormat vn_form, VectorFormat vm_form, unsigned vm_subvector_count = 1)2258 static void TestByElementNEON(const char* name,
2259 TestByElementNEONHelper_t helper,
2260 const Td inputs_d[],
2261 const Tn inputs_n[],
2262 unsigned inputs_n_length,
2263 const Tm inputs_m[],
2264 unsigned inputs_m_length,
2265 const int indices[],
2266 unsigned indices_length,
2267 const Td expected[],
2268 unsigned expected_length,
2269 VectorFormat vd_form,
2270 VectorFormat vn_form,
2271 VectorFormat vm_form,
2272 unsigned vm_subvector_count = 1) {
2273 VIXL_ASSERT(inputs_n_length > 0);
2274 VIXL_ASSERT(inputs_m_length > 0);
2275 VIXL_ASSERT(indices_length > 0);
2276
2277 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
2278
2279 const unsigned results_length =
2280 inputs_n_length * inputs_m_length * indices_length;
2281 Td* results = new Td[results_length * vd_lane_count];
2282 const unsigned lane_bit = sizeof(Td) * 8;
2283 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
2284
2285 bool skipped;
2286
2287 TestByElementNEON_Helper(helper,
2288 reinterpret_cast<uintptr_t>(inputs_d),
2289 reinterpret_cast<uintptr_t>(inputs_n),
2290 inputs_n_length,
2291 reinterpret_cast<uintptr_t>(inputs_m),
2292 inputs_m_length,
2293 indices,
2294 indices_length,
2295 reinterpret_cast<uintptr_t>(results),
2296 vd_form,
2297 vn_form,
2298 vm_form,
2299 vm_subvector_count,
2300 &skipped);
2301
2302 if (Test::generate_test_trace()) {
2303 // Print the results.
2304 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2305 for (unsigned iteration = 0; iteration < results_length; iteration++) {
2306 printf(" ");
2307 // Output a separate result for each element of the result vector.
2308 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2309 unsigned index = lane + (iteration * vd_lane_count);
2310 printf(" 0x%0*" PRIx64 ",",
2311 lane_len_in_hex,
2312 static_cast<uint64_t>(results[index]));
2313 }
2314 printf("\n");
2315 }
2316
2317 printf("};\n");
2318 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2319 name,
2320 results_length);
2321 } else if (!skipped) {
2322 // Check the results.
2323 VIXL_CHECK(expected_length == results_length);
2324 unsigned error_count = 0;
2325 unsigned d = 0;
2326 const char* padding = " ";
2327 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2328 for (unsigned n = 0; n < inputs_n_length; n++) {
2329 for (unsigned m = 0; m < inputs_m_length; m++) {
2330 for (unsigned index = 0; index < indices_length; index++, d++) {
2331 bool error_in_vector = false;
2332
2333 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2334 unsigned output_index =
2335 (n * inputs_m_length * indices_length * vd_lane_count) +
2336 (m * indices_length * vd_lane_count) + (index * vd_lane_count) +
2337 lane;
2338
2339 if (results[output_index] != expected[output_index]) {
2340 error_in_vector = true;
2341 break;
2342 }
2343 }
2344
2345 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2346 printf("%s\n", name);
2347 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n",
2348 lane_len_in_hex + 1,
2349 padding,
2350 lane_len_in_hex + 1,
2351 padding,
2352 lane_len_in_hex + 1,
2353 padding,
2354 lane_len_in_hex + 1,
2355 padding);
2356
2357 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2358 unsigned output_index =
2359 (n * inputs_m_length * indices_length * vd_lane_count) +
2360 (m * indices_length * vd_lane_count) +
2361 (index * vd_lane_count) + lane;
2362 unsigned input_index_n =
2363 (inputs_n_length - vd_lane_count + n + 1 + lane) %
2364 inputs_n_length;
2365 unsigned input_index_m =
2366 (inputs_m_length - vd_lane_count + m + 1 + lane) %
2367 inputs_m_length;
2368
2369 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
2370 " "
2371 "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2372 results[output_index] != expected[output_index] ? '*'
2373 : ' ',
2374 lane_len_in_hex,
2375 static_cast<uint64_t>(inputs_d[lane]),
2376 lane_len_in_hex,
2377 static_cast<uint64_t>(inputs_n[input_index_n]),
2378 lane_len_in_hex,
2379 static_cast<uint64_t>(inputs_m[input_index_m]),
2380 indices[index],
2381 lane_len_in_hex,
2382 static_cast<uint64_t>(results[output_index]),
2383 lane_len_in_hex,
2384 static_cast<uint64_t>(expected[output_index]));
2385 }
2386 }
2387 }
2388 }
2389 }
2390 VIXL_ASSERT(d == expected_length);
2391 if (error_count > kErrorReportLimit) {
2392 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2393 }
2394 VIXL_CHECK(error_count == 0);
2395 }
2396 delete[] results;
2397 }
2398
2399
2400 // ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ====
2401
2402
2403 template <typename Tm>
Test2OpImmNEON_Helper( typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper, uintptr_t inputs_n, unsigned inputs_n_length, const Tm inputs_m[], unsigned inputs_m_length, uintptr_t results, VectorFormat vd_form, VectorFormat vn_form, bool* skipped)2404 void Test2OpImmNEON_Helper(
2405 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
2406 uintptr_t inputs_n,
2407 unsigned inputs_n_length,
2408 const Tm inputs_m[],
2409 unsigned inputs_m_length,
2410 uintptr_t results,
2411 VectorFormat vd_form,
2412 VectorFormat vn_form,
2413 bool* skipped) {
2414 VIXL_ASSERT(vd_form != kFormatUndefined && vn_form != kFormatUndefined);
2415
2416 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
2417 CPUFeatures::kFP,
2418 CPUFeatures::kNEONHalf);
2419 START();
2420
2421 // Roll up the loop to keep the code size down.
2422 Label loop_n;
2423
2424 Register out = x0;
2425 Register inputs_n_base = x1;
2426 Register inputs_n_last_16bytes = x3;
2427 Register index_n = x5;
2428
2429 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2430 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2431 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2432
2433 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2434 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2435 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2436 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2437 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2438
2439
2440 // These will be either a D- or a Q-register form, with a single lane
2441 // (for use in scalar load and store operations).
2442 VRegister vd = VRegister(0, vd_bits);
2443 VRegister vn = v1.V16B();
2444 VRegister vntmp = v3.V16B();
2445
2446 // These will have the correct format for use when calling 'helper'.
2447 VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
2448 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2449
2450 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2451 VRegister vntmp_single = VRegister(3, vn_lane_bits);
2452
2453 __ Mov(out, results);
2454
2455 __ Mov(inputs_n_base, inputs_n);
2456 __ Mov(inputs_n_last_16bytes,
2457 inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
2458
2459 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
2460
2461 __ Mov(index_n, 0);
2462 __ Bind(&loop_n);
2463
2464 __ Ldr(vntmp_single,
2465 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2466 __ Ext(vn, vn, vntmp, vn_lane_bytes);
2467
2468 // Set the destination to zero for tests such as '[r]shrn2'.
2469 // TODO: Setting the destination to values other than zero might be a better
2470 // test for shift and accumulate instructions (srsra/ssra/usra/ursra).
2471 __ Movi(vd.V16B(), 0);
2472
2473 {
2474 for (unsigned i = 0; i < inputs_m_length; i++) {
2475 {
2476 SingleEmissionCheckScope guard(&masm);
2477 (masm.*helper)(vd_helper, vn_helper, inputs_m[i]);
2478 }
2479 __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2480 }
2481 }
2482
2483 __ Add(index_n, index_n, 1);
2484 __ Cmp(index_n, inputs_n_length);
2485 __ B(lo, &loop_n);
2486
2487 END();
2488 TRY_RUN(skipped);
2489 }
2490
2491
2492 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2493 // arrays of rawbit representation of input values. This ensures that
2494 // exact bit comparisons can be performed.
2495 template <typename Td, typename Tn, typename Tm>
Test2OpImmNEON( const char* name, typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper, const Tn inputs_n[], unsigned inputs_n_length, const Tm inputs_m[], unsigned inputs_m_length, const Td expected[], unsigned expected_length, VectorFormat vd_form, VectorFormat vn_form)2496 static void Test2OpImmNEON(
2497 const char* name,
2498 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
2499 const Tn inputs_n[],
2500 unsigned inputs_n_length,
2501 const Tm inputs_m[],
2502 unsigned inputs_m_length,
2503 const Td expected[],
2504 unsigned expected_length,
2505 VectorFormat vd_form,
2506 VectorFormat vn_form) {
2507 VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
2508
2509 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2510 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2511 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2512
2513 const unsigned results_length = inputs_n_length * inputs_m_length;
2514 Td* results = new Td[results_length * vd_lane_count];
2515 const unsigned lane_bit = sizeof(Td) * 8;
2516 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
2517
2518 bool skipped;
2519
2520 Test2OpImmNEON_Helper(helper,
2521 reinterpret_cast<uintptr_t>(inputs_n),
2522 inputs_n_length,
2523 inputs_m,
2524 inputs_m_length,
2525 reinterpret_cast<uintptr_t>(results),
2526 vd_form,
2527 vn_form,
2528 &skipped);
2529
2530 if (Test::generate_test_trace()) {
2531 // Print the results.
2532 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2533 for (unsigned iteration = 0; iteration < results_length; iteration++) {
2534 printf(" ");
2535 // Output a separate result for each element of the result vector.
2536 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2537 unsigned index = lane + (iteration * vd_lane_count);
2538 printf(" 0x%0*" PRIx64 ",",
2539 lane_len_in_hex,
2540 static_cast<uint64_t>(results[index]));
2541 }
2542 printf("\n");
2543 }
2544
2545 printf("};\n");
2546 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2547 name,
2548 results_length);
2549 } else if (!skipped) {
2550 // Check the results.
2551 VIXL_CHECK(expected_length == results_length);
2552 unsigned error_count = 0;
2553 unsigned d = 0;
2554 const char* padding = " ";
2555 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2556 for (unsigned n = 0; n < inputs_n_length; n++) {
2557 for (unsigned m = 0; m < inputs_m_length; m++, d++) {
2558 bool error_in_vector = false;
2559
2560 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2561 unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2562 (m * vd_lane_count) + lane;
2563
2564 if (results[output_index] != expected[output_index]) {
2565 error_in_vector = true;
2566 break;
2567 }
2568 }
2569
2570 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2571 printf("%s\n", name);
2572 printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
2573 lane_len_in_hex + 1,
2574 padding,
2575 lane_len_in_hex,
2576 padding,
2577 lane_len_in_hex + 1,
2578 padding);
2579
2580 const unsigned first_index_n =
2581 inputs_n_length - (16 / vn_lane_bytes) + n + 1;
2582
2583 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
2584 lane++) {
2585 unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2586 (m * vd_lane_count) + lane;
2587 unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
2588 unsigned input_index_m = m;
2589
2590 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64
2591 " "
2592 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2593 results[output_index] != expected[output_index] ? '*' : ' ',
2594 lane_len_in_hex,
2595 static_cast<uint64_t>(inputs_n[input_index_n]),
2596 lane_len_in_hex,
2597 static_cast<uint64_t>(inputs_m[input_index_m]),
2598 lane_len_in_hex,
2599 static_cast<uint64_t>(results[output_index]),
2600 lane_len_in_hex,
2601 static_cast<uint64_t>(expected[output_index]));
2602 }
2603 }
2604 }
2605 }
2606 VIXL_ASSERT(d == expected_length);
2607 if (error_count > kErrorReportLimit) {
2608 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2609 }
2610 VIXL_CHECK(error_count == 0);
2611 }
2612 delete[] results;
2613 }
2614
2615
2616 // ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ====
2617
2618
TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper, uintptr_t inputs_d, const int inputs_imm1[], unsigned inputs_imm1_length, uintptr_t inputs_n, unsigned inputs_n_length, const int inputs_imm2[], unsigned inputs_imm2_length, uintptr_t results, VectorFormat vd_form, VectorFormat vn_form, bool* skipped)2619 static void TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper,
2620 uintptr_t inputs_d,
2621 const int inputs_imm1[],
2622 unsigned inputs_imm1_length,
2623 uintptr_t inputs_n,
2624 unsigned inputs_n_length,
2625 const int inputs_imm2[],
2626 unsigned inputs_imm2_length,
2627 uintptr_t results,
2628 VectorFormat vd_form,
2629 VectorFormat vn_form,
2630 bool* skipped) {
2631 VIXL_ASSERT(vd_form != kFormatUndefined);
2632 VIXL_ASSERT(vn_form != kFormatUndefined);
2633
2634 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
2635 START();
2636
2637 // Roll up the loop to keep the code size down.
2638 Label loop_n;
2639
2640 Register out = x0;
2641 Register inputs_d_base = x1;
2642 Register inputs_n_base = x2;
2643 Register inputs_n_last_vector = x4;
2644 Register index_n = x6;
2645
2646 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2647 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2648 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2649
2650 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2651 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2652 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2653 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2654 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2655
2656
2657 // These will be either a D- or a Q-register form, with a single lane
2658 // (for use in scalar load and store operations).
2659 VRegister vd = VRegister(0, vd_bits);
2660 VRegister vn = VRegister(1, vn_bits);
2661 VRegister vntmp = VRegister(4, vn_bits);
2662 VRegister vres = VRegister(5, vn_bits);
2663
2664 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2665 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
2666
2667 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2668 VRegister vntmp_single = VRegister(4, vn_lane_bits);
2669
2670 // Same registers for use in the 'ext' instructions.
2671 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
2672 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
2673
2674 __ Mov(out, results);
2675
2676 __ Mov(inputs_d_base, inputs_d);
2677
2678 __ Mov(inputs_n_base, inputs_n);
2679 __ Mov(inputs_n_last_vector,
2680 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
2681
2682 __ Ldr(vd, MemOperand(inputs_d_base));
2683
2684 __ Ldr(vn, MemOperand(inputs_n_last_vector));
2685
2686 __ Mov(index_n, 0);
2687 __ Bind(&loop_n);
2688
2689 __ Ldr(vntmp_single,
2690 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2691 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
2692
2693 {
2694 EmissionCheckScope guard(&masm,
2695 kInstructionSize * inputs_imm1_length *
2696 inputs_imm2_length * 3);
2697 for (unsigned i = 0; i < inputs_imm1_length; i++) {
2698 for (unsigned j = 0; j < inputs_imm2_length; j++) {
2699 __ Mov(vres, vd);
2700 (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]);
2701 __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2702 }
2703 }
2704 }
2705
2706 __ Add(index_n, index_n, 1);
2707 __ Cmp(index_n, inputs_n_length);
2708 __ B(lo, &loop_n);
2709
2710 END();
2711 TRY_RUN(skipped);
2712 }
2713
2714
2715 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2716 // arrays of rawbit representation of input values. This ensures that
2717 // exact bit comparisons can be performed.
2718 template <typename Td, typename Tn>
TestOpImmOpImmNEON(const char* name, TestOpImmOpImmVdUpdateNEONHelper_t helper, const Td inputs_d[], const int inputs_imm1[], unsigned inputs_imm1_length, const Tn inputs_n[], unsigned inputs_n_length, const int inputs_imm2[], unsigned inputs_imm2_length, const Td expected[], unsigned expected_length, VectorFormat vd_form, VectorFormat vn_form)2719 static void TestOpImmOpImmNEON(const char* name,
2720 TestOpImmOpImmVdUpdateNEONHelper_t helper,
2721 const Td inputs_d[],
2722 const int inputs_imm1[],
2723 unsigned inputs_imm1_length,
2724 const Tn inputs_n[],
2725 unsigned inputs_n_length,
2726 const int inputs_imm2[],
2727 unsigned inputs_imm2_length,
2728 const Td expected[],
2729 unsigned expected_length,
2730 VectorFormat vd_form,
2731 VectorFormat vn_form) {
2732 VIXL_ASSERT(inputs_n_length > 0);
2733 VIXL_ASSERT(inputs_imm1_length > 0);
2734 VIXL_ASSERT(inputs_imm2_length > 0);
2735
2736 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2737
2738 const unsigned results_length =
2739 inputs_n_length * inputs_imm1_length * inputs_imm2_length;
2740
2741 Td* results = new Td[results_length * vd_lane_count];
2742 const unsigned lane_bit = sizeof(Td) * 8;
2743 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
2744
2745 bool skipped;
2746
2747 TestOpImmOpImmNEON_Helper(helper,
2748 reinterpret_cast<uintptr_t>(inputs_d),
2749 inputs_imm1,
2750 inputs_imm1_length,
2751 reinterpret_cast<uintptr_t>(inputs_n),
2752 inputs_n_length,
2753 inputs_imm2,
2754 inputs_imm2_length,
2755 reinterpret_cast<uintptr_t>(results),
2756 vd_form,
2757 vn_form,
2758 &skipped);
2759
2760 if (Test::generate_test_trace()) {
2761 // Print the results.
2762 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2763 for (unsigned iteration = 0; iteration < results_length; iteration++) {
2764 printf(" ");
2765 // Output a separate result for each element of the result vector.
2766 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2767 unsigned index = lane + (iteration * vd_lane_count);
2768 printf(" 0x%0*" PRIx64 ",",
2769 lane_len_in_hex,
2770 static_cast<uint64_t>(results[index]));
2771 }
2772 printf("\n");
2773 }
2774
2775 printf("};\n");
2776 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2777 name,
2778 results_length);
2779 } else if (!skipped) {
2780 // Check the results.
2781 VIXL_CHECK(expected_length == results_length);
2782 unsigned error_count = 0;
2783 unsigned counted_length = 0;
2784 const char* padding = " ";
2785 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2786 for (unsigned n = 0; n < inputs_n_length; n++) {
2787 for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) {
2788 for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) {
2789 bool error_in_vector = false;
2790
2791 counted_length++;
2792
2793 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2794 unsigned output_index =
2795 (n * inputs_imm1_length * inputs_imm2_length * vd_lane_count) +
2796 (imm1 * inputs_imm2_length * vd_lane_count) +
2797 (imm2 * vd_lane_count) + lane;
2798
2799 if (results[output_index] != expected[output_index]) {
2800 error_in_vector = true;
2801 break;
2802 }
2803 }
2804
2805 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2806 printf("%s\n", name);
2807 printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
2808 lane_len_in_hex + 1,
2809 padding,
2810 lane_len_in_hex,
2811 padding,
2812 lane_len_in_hex + 1,
2813 padding,
2814 lane_len_in_hex,
2815 padding,
2816 lane_len_in_hex + 1,
2817 padding);
2818
2819 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2820 unsigned output_index =
2821 (n * inputs_imm1_length * inputs_imm2_length *
2822 vd_lane_count) +
2823 (imm1 * inputs_imm2_length * vd_lane_count) +
2824 (imm2 * vd_lane_count) + lane;
2825 unsigned input_index_n =
2826 (inputs_n_length - vd_lane_count + n + 1 + lane) %
2827 inputs_n_length;
2828 unsigned input_index_imm1 = imm1;
2829 unsigned input_index_imm2 = imm2;
2830
2831 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
2832 " "
2833 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2834 results[output_index] != expected[output_index] ? '*'
2835 : ' ',
2836 lane_len_in_hex,
2837 static_cast<uint64_t>(inputs_d[lane]),
2838 lane_len_in_hex,
2839 static_cast<uint64_t>(inputs_imm1[input_index_imm1]),
2840 lane_len_in_hex,
2841 static_cast<uint64_t>(inputs_n[input_index_n]),
2842 lane_len_in_hex,
2843 static_cast<uint64_t>(inputs_imm2[input_index_imm2]),
2844 lane_len_in_hex,
2845 static_cast<uint64_t>(results[output_index]),
2846 lane_len_in_hex,
2847 static_cast<uint64_t>(expected[output_index]));
2848 }
2849 }
2850 }
2851 }
2852 }
2853 VIXL_ASSERT(counted_length == expected_length);
2854 if (error_count > kErrorReportLimit) {
2855 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2856 }
2857 VIXL_CHECK(error_count == 0);
2858 }
2859 delete[] results;
2860 }
2861
2862
2863 // ==== Floating-point tests. ====
2864
2865
2866 // Standard floating-point test expansion for both double- and single-precision
2867 // operations.
2868 #define STRINGIFY(s) #s
2869
2870 #define CALL_TEST_FP_HELPER(mnemonic, variant, type, input) \
2871 Test##type(STRINGIFY(mnemonic) "_" STRINGIFY(variant), \
2872 &MacroAssembler::mnemonic, \
2873 input, \
2874 sizeof(input) / sizeof(input[0]), \
2875 kExpected_##mnemonic##_##variant, \
2876 kExpectedCount_##mnemonic##_##variant)
2877
2878 #define DEFINE_TEST_FP(mnemonic, type, input) \
2879 TEST(mnemonic##_d) { \
2880 CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input); \
2881 } \
2882 TEST(mnemonic##_s) { \
2883 CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input); \
2884 }
2885
2886 #define DEFINE_TEST_FP_FP16(mnemonic, type, input) \
2887 TEST(mnemonic##_d) { \
2888 CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input); \
2889 } \
2890 TEST(mnemonic##_s) { \
2891 CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input); \
2892 } \
2893 TEST(mnemonic##_h) { \
2894 CALL_TEST_FP_HELPER(mnemonic, h, type, kInputFloat16##input); \
2895 }
2896
2897
2898 // TODO: Test with a newer version of valgrind.
2899 //
2900 // Note: valgrind-3.10.0 does not properly interpret libm's fma() on x86_64.
2901 // Therefore this test will be exiting though an ASSERT and thus leaking
2902 // memory.
2903 DEFINE_TEST_FP_FP16(fmadd, 3Op, Basic)
2904 DEFINE_TEST_FP_FP16(fmsub, 3Op, Basic)
2905 DEFINE_TEST_FP_FP16(fnmadd, 3Op, Basic)
2906 DEFINE_TEST_FP_FP16(fnmsub, 3Op, Basic)
2907
2908 DEFINE_TEST_FP_FP16(fadd, 2Op, Basic)
2909 DEFINE_TEST_FP_FP16(fdiv, 2Op, Basic)
2910 DEFINE_TEST_FP_FP16(fmax, 2Op, Basic)
2911 DEFINE_TEST_FP_FP16(fmaxnm, 2Op, Basic)
2912 DEFINE_TEST_FP_FP16(fmin, 2Op, Basic)
2913 DEFINE_TEST_FP_FP16(fminnm, 2Op, Basic)
2914 DEFINE_TEST_FP_FP16(fmul, 2Op, Basic)
2915 DEFINE_TEST_FP_FP16(fsub, 2Op, Basic)
2916 DEFINE_TEST_FP_FP16(fnmul, 2Op, Basic)
2917
2918 DEFINE_TEST_FP_FP16(fabs, 1Op, Basic)
2919 DEFINE_TEST_FP_FP16(fmov, 1Op, Basic)
2920 DEFINE_TEST_FP_FP16(fneg, 1Op, Basic)
2921 DEFINE_TEST_FP_FP16(fsqrt, 1Op, Basic)
2922 DEFINE_TEST_FP(frint32x, 1Op, Conversions)
2923 DEFINE_TEST_FP(frint64x, 1Op, Conversions)
2924 DEFINE_TEST_FP(frint32z, 1Op, Conversions)
2925 DEFINE_TEST_FP(frint64z, 1Op, Conversions)
2926 DEFINE_TEST_FP_FP16(frinta, 1Op, Conversions)
2927 DEFINE_TEST_FP_FP16(frinti, 1Op, Conversions)
2928 DEFINE_TEST_FP_FP16(frintm, 1Op, Conversions)
2929 DEFINE_TEST_FP_FP16(frintn, 1Op, Conversions)
2930 DEFINE_TEST_FP_FP16(frintp, 1Op, Conversions)
2931 DEFINE_TEST_FP_FP16(frintx, 1Op, Conversions)
2932 DEFINE_TEST_FP_FP16(frintz, 1Op, Conversions)
2933
TEST(fcmp_d)2934 TEST(fcmp_d) { CALL_TEST_FP_HELPER(fcmp, d, Cmp, kInputDoubleBasic); }
TEST(fcmp_s)2935 TEST(fcmp_s) { CALL_TEST_FP_HELPER(fcmp, s, Cmp, kInputFloatBasic); }
TEST(fcmp_dz)2936 TEST(fcmp_dz) { CALL_TEST_FP_HELPER(fcmp, dz, CmpZero, kInputDoubleBasic); }
TEST(fcmp_sz)2937 TEST(fcmp_sz) { CALL_TEST_FP_HELPER(fcmp, sz, CmpZero, kInputFloatBasic); }
2938
TEST(fcvt_sd)2939 TEST(fcvt_sd) { CALL_TEST_FP_HELPER(fcvt, sd, 1Op, kInputDoubleConversions); }
TEST(fcvt_ds)2940 TEST(fcvt_ds) { CALL_TEST_FP_HELPER(fcvt, ds, 1Op, kInputFloatConversions); }
2941
2942 #define DEFINE_TEST_FP_TO_INT(mnemonic, type, input) \
2943 TEST(mnemonic##_xd) { \
2944 CALL_TEST_FP_HELPER(mnemonic, xd, type, kInputDouble##input); \
2945 } \
2946 TEST(mnemonic##_xs) { \
2947 CALL_TEST_FP_HELPER(mnemonic, xs, type, kInputFloat##input); \
2948 } \
2949 TEST(mnemonic##_xh) { \
2950 CALL_TEST_FP_HELPER(mnemonic, xh, type, kInputFloat16##input); \
2951 } \
2952 TEST(mnemonic##_wd) { \
2953 CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \
2954 } \
2955 TEST(mnemonic##_ws) { \
2956 CALL_TEST_FP_HELPER(mnemonic, ws, type, kInputFloat##input); \
2957 } \
2958 TEST(mnemonic##_wh) { \
2959 CALL_TEST_FP_HELPER(mnemonic, wh, type, kInputFloat16##input); \
2960 }
2961
2962 DEFINE_TEST_FP_TO_INT(fcvtas, FPToS, Conversions)
2963 DEFINE_TEST_FP_TO_INT(fcvtau, FPToU, Conversions)
2964 DEFINE_TEST_FP_TO_INT(fcvtms, FPToS, Conversions)
2965 DEFINE_TEST_FP_TO_INT(fcvtmu, FPToU, Conversions)
2966 DEFINE_TEST_FP_TO_INT(fcvtns, FPToS, Conversions)
2967 DEFINE_TEST_FP_TO_INT(fcvtnu, FPToU, Conversions)
2968 DEFINE_TEST_FP_TO_INT(fcvtzs, FPToFixedS, Conversions)
2969 DEFINE_TEST_FP_TO_INT(fcvtzu, FPToFixedU, Conversions)
2970
2971 #define DEFINE_TEST_FP_TO_JS_INT(mnemonic, type, input) \
2972 TEST(mnemonic##_wd) { \
2973 CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \
2974 }
2975
2976 DEFINE_TEST_FP_TO_JS_INT(fjcvtzs, FPToS, Conversions)
2977
2978 // TODO: Scvtf-fixed-point
2979 // TODO: Scvtf-integer
2980 // TODO: Ucvtf-fixed-point
2981 // TODO: Ucvtf-integer
2982
2983 // TODO: Fccmp
2984 // TODO: Fcsel
2985
2986
2987 // ==== NEON Tests. ====
2988
2989 #define CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) \
2990 Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \
2991 &MacroAssembler::mnemonic, \
2992 input_n, \
2993 (sizeof(input_n) / sizeof(input_n[0])), \
2994 kExpected_NEON_##mnemonic##_##vdform, \
2995 kExpectedCount_NEON_##mnemonic##_##vdform, \
2996 kFormat##vdform, \
2997 kFormat##vnform)
2998
2999 #define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vdform, vnform, input_n) \
3000 Test1OpAcrossNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \
3001 vnform), \
3002 &MacroAssembler::mnemonic, \
3003 input_n, \
3004 (sizeof(input_n) / sizeof(input_n[0])), \
3005 kExpected_NEON_##mnemonic##_##vdform##_##vnform, \
3006 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform, \
3007 kFormat##vdform, \
3008 kFormat##vnform)
3009
3010 #define CALL_TEST_NEON_HELPER_2Op(mnemonic, \
3011 vdform, \
3012 vnform, \
3013 vmform, \
3014 input_d, \
3015 input_n, \
3016 input_m) \
3017 Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \
3018 &MacroAssembler::mnemonic, \
3019 input_d, \
3020 input_n, \
3021 (sizeof(input_n) / sizeof(input_n[0])), \
3022 input_m, \
3023 (sizeof(input_m) / sizeof(input_m[0])), \
3024 kExpected_NEON_##mnemonic##_##vdform, \
3025 kExpectedCount_NEON_##mnemonic##_##vdform, \
3026 kFormat##vdform, \
3027 kFormat##vnform, \
3028 kFormat##vmform)
3029
3030 #define CALL_TEST_NEON_HELPER_2OpImm(mnemonic, \
3031 vdform, \
3032 vnform, \
3033 input_n, \
3034 input_m) \
3035 Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM", \
3036 &MacroAssembler::mnemonic, \
3037 input_n, \
3038 (sizeof(input_n) / sizeof(input_n[0])), \
3039 input_m, \
3040 (sizeof(input_m) / sizeof(input_m[0])), \
3041 kExpected_NEON_##mnemonic##_##vdform##_2OPIMM, \
3042 kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM, \
3043 kFormat##vdform, \
3044 kFormat##vnform)
3045
3046 #define CALL_TEST_NEON_HELPER_ByElement(mnemonic, \
3047 vdform, \
3048 vnform, \
3049 vmform, \
3050 input_d, \
3051 input_n, \
3052 input_m, \
3053 indices) \
3054 TestByElementNEON( \
3055 STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \
3056 vnform) "_" STRINGIFY(vmform), \
3057 &MacroAssembler::mnemonic, \
3058 input_d, \
3059 input_n, \
3060 (sizeof(input_n) / sizeof(input_n[0])), \
3061 input_m, \
3062 (sizeof(input_m) / sizeof(input_m[0])), \
3063 indices, \
3064 (sizeof(indices) / sizeof(indices[0])), \
3065 kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \
3066 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \
3067 kFormat##vdform, \
3068 kFormat##vnform, \
3069 kFormat##vmform)
3070
3071 #define CALL_TEST_NEON_HELPER_ByElement_Dot_Product(mnemonic, \
3072 vdform, \
3073 vnform, \
3074 vmform, \
3075 input_d, \
3076 input_n, \
3077 input_m, \
3078 indices, \
3079 vm_subvector_count) \
3080 TestByElementNEON( \
3081 STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \
3082 vnform) "_" STRINGIFY(vmform), \
3083 &MacroAssembler::mnemonic, \
3084 input_d, \
3085 input_n, \
3086 (sizeof(input_n) / sizeof(input_n[0])), \
3087 input_m, \
3088 (sizeof(input_m) / sizeof(input_m[0])), \
3089 indices, \
3090 (sizeof(indices) / sizeof(indices[0])), \
3091 kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \
3092 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \
3093 kFormat##vdform, \
3094 kFormat##vnform, \
3095 kFormat##vmform, \
3096 vm_subvector_count)
3097
3098 #define CALL_TEST_NEON_HELPER_OpImmOpImm(helper, \
3099 mnemonic, \
3100 vdform, \
3101 vnform, \
3102 input_d, \
3103 input_imm1, \
3104 input_n, \
3105 input_imm2) \
3106 TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \
3107 helper, \
3108 input_d, \
3109 input_imm1, \
3110 (sizeof(input_imm1) / sizeof(input_imm1[0])), \
3111 input_n, \
3112 (sizeof(input_n) / sizeof(input_n[0])), \
3113 input_imm2, \
3114 (sizeof(input_imm2) / sizeof(input_imm2[0])), \
3115 kExpected_NEON_##mnemonic##_##vdform, \
3116 kExpectedCount_NEON_##mnemonic##_##vdform, \
3117 kFormat##vdform, \
3118 kFormat##vnform)
3119
3120 #define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input) \
3121 CALL_TEST_NEON_HELPER_1Op(mnemonic, variant, variant, input)
3122
3123 #define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \
3124 TEST(mnemonic##_8B) { \
3125 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input); \
3126 } \
3127 TEST(mnemonic##_16B) { \
3128 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input); \
3129 }
3130
3131 #define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input) \
3132 TEST(mnemonic##_4H) { \
3133 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input); \
3134 } \
3135 TEST(mnemonic##_8H) { \
3136 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input); \
3137 }
3138
3139 #define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \
3140 TEST(mnemonic##_2S) { \
3141 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input); \
3142 } \
3143 TEST(mnemonic##_4S) { \
3144 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input); \
3145 }
3146
3147 #define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \
3148 DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \
3149 DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)
3150
3151 #define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \
3152 DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \
3153 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)
3154
3155 #define DEFINE_TEST_NEON_2SAME(mnemonic, input) \
3156 DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \
3157 TEST(mnemonic##_2D) { \
3158 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
3159 }
3160 #define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input) \
3161 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \
3162 TEST(mnemonic##_2D) { \
3163 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
3164 }
3165
3166 #define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input) \
3167 TEST(mnemonic##_2S) { \
3168 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input); \
3169 } \
3170 TEST(mnemonic##_4S) { \
3171 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input); \
3172 } \
3173 TEST(mnemonic##_2D) { \
3174 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input); \
3175 }
3176
3177 #define DEFINE_TEST_NEON_2SAME_FP_FP16(mnemonic, input) \
3178 DEFINE_TEST_NEON_2SAME_FP(mnemonic, input) \
3179 TEST(mnemonic##_4H) { \
3180 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInputFloat16##input); \
3181 } \
3182 TEST(mnemonic##_8H) { \
3183 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInputFloat16##input); \
3184 }
3185
3186 #define DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(mnemonic, input) \
3187 TEST(mnemonic##_H) { \
3188 CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInputFloat16##input); \
3189 } \
3190 TEST(mnemonic##_S) { \
3191 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input); \
3192 } \
3193 TEST(mnemonic##_D) { \
3194 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input); \
3195 }
3196
3197 #define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \
3198 TEST(mnemonic##_B) { \
3199 CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input); \
3200 }
3201 #define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \
3202 TEST(mnemonic##_H) { \
3203 CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input); \
3204 }
3205 #define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \
3206 TEST(mnemonic##_S) { \
3207 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input); \
3208 }
3209 #define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) \
3210 TEST(mnemonic##_D) { \
3211 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input); \
3212 }
3213
3214 #define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input) \
3215 DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \
3216 DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \
3217 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \
3218 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
3219
3220 #define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input) \
3221 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \
3222 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
3223
3224
3225 #define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n) \
3226 CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vd_form, vn_form, input_n)
3227
3228 #define DEFINE_TEST_NEON_ACROSS(mnemonic, input) \
3229 TEST(mnemonic##_B_8B) { \
3230 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input); \
3231 } \
3232 TEST(mnemonic##_B_16B) { \
3233 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input); \
3234 } \
3235 TEST(mnemonic##_H_4H) { \
3236 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input); \
3237 } \
3238 TEST(mnemonic##_H_8H) { \
3239 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input); \
3240 } \
3241 TEST(mnemonic##_S_4S) { \
3242 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input); \
3243 }
3244
3245 #define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input) \
3246 TEST(mnemonic##_H_8B) { \
3247 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input); \
3248 } \
3249 TEST(mnemonic##_H_16B) { \
3250 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input); \
3251 } \
3252 TEST(mnemonic##_S_4H) { \
3253 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input); \
3254 } \
3255 TEST(mnemonic##_S_8H) { \
3256 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input); \
3257 } \
3258 TEST(mnemonic##_D_4S) { \
3259 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input); \
3260 }
3261
3262 #define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input) \
3263 TEST(mnemonic##_H_4H) { \
3264 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInputFloat16##input); \
3265 } \
3266 TEST(mnemonic##_H_8H) { \
3267 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInputFloat16##input); \
3268 } \
3269 TEST(mnemonic##_S_4S) { \
3270 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input); \
3271 }
3272
3273 #define CALL_TEST_NEON_HELPER_2DIFF(mnemonic, vdform, vnform, input_n) \
3274 CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n)
3275
3276 #define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input) \
3277 TEST(mnemonic##_4H) { \
3278 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input); \
3279 } \
3280 TEST(mnemonic##_8H) { \
3281 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input); \
3282 } \
3283 TEST(mnemonic##_2S) { \
3284 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input); \
3285 } \
3286 TEST(mnemonic##_4S) { \
3287 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input); \
3288 } \
3289 TEST(mnemonic##_1D) { \
3290 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input); \
3291 } \
3292 TEST(mnemonic##_2D) { \
3293 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input); \
3294 }
3295
3296 #define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input) \
3297 TEST(mnemonic##_8B) { \
3298 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input); \
3299 } \
3300 TEST(mnemonic##_4H) { \
3301 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input); \
3302 } \
3303 TEST(mnemonic##_2S) { \
3304 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input); \
3305 } \
3306 TEST(mnemonic##2_16B) { \
3307 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input); \
3308 } \
3309 TEST(mnemonic##2_8H) { \
3310 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input); \
3311 } \
3312 TEST(mnemonic##2_4S) { \
3313 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input); \
3314 }
3315
3316 #define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input) \
3317 TEST(mnemonic##_4S) { \
3318 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input); \
3319 } \
3320 TEST(mnemonic##_2D) { \
3321 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input); \
3322 } \
3323 TEST(mnemonic##2_4S) { \
3324 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input); \
3325 } \
3326 TEST(mnemonic##2_2D) { \
3327 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input); \
3328 }
3329
3330 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input) \
3331 TEST(mnemonic##_4H) { \
3332 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input); \
3333 } \
3334 TEST(mnemonic##_2S) { \
3335 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \
3336 } \
3337 TEST(mnemonic##2_8H) { \
3338 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input); \
3339 } \
3340 TEST(mnemonic##2_4S) { \
3341 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
3342 }
3343
3344 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input) \
3345 TEST(mnemonic##_2S) { \
3346 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \
3347 } \
3348 TEST(mnemonic##2_4S) { \
3349 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
3350 }
3351
3352 #define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input) \
3353 TEST(mnemonic##_B) { \
3354 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input); \
3355 } \
3356 TEST(mnemonic##_H) { \
3357 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input); \
3358 } \
3359 TEST(mnemonic##_S) { \
3360 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input); \
3361 }
3362
3363 #define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input) \
3364 TEST(mnemonic##_S) { \
3365 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input); \
3366 } \
3367 TEST(mnemonic##_D) { \
3368 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input); \
3369 } \
3370 TEST(mnemonic##_H) { \
3371 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, 2H, kInputFloat16##input); \
3372 }
3373
3374 #define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) \
3375 { \
3376 CALL_TEST_NEON_HELPER_2Op(mnemonic, \
3377 variant, \
3378 variant, \
3379 variant, \
3380 input_d, \
3381 input_nm, \
3382 input_nm); \
3383 }
3384
3385 #define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \
3386 TEST(mnemonic##_8B) { \
3387 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3388 8B, \
3389 kInput8bitsAccDestination, \
3390 kInput8bits##input); \
3391 } \
3392 TEST(mnemonic##_16B) { \
3393 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3394 16B, \
3395 kInput8bitsAccDestination, \
3396 kInput8bits##input); \
3397 }
3398
3399 #define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input) \
3400 TEST(mnemonic##_4H) { \
3401 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3402 4H, \
3403 kInput16bitsAccDestination, \
3404 kInput16bits##input); \
3405 } \
3406 TEST(mnemonic##_8H) { \
3407 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3408 8H, \
3409 kInput16bitsAccDestination, \
3410 kInput16bits##input); \
3411 } \
3412 TEST(mnemonic##_2S) { \
3413 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3414 2S, \
3415 kInput32bitsAccDestination, \
3416 kInput32bits##input); \
3417 } \
3418 TEST(mnemonic##_4S) { \
3419 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3420 4S, \
3421 kInput32bitsAccDestination, \
3422 kInput32bits##input); \
3423 }
3424
3425 #define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \
3426 DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \
3427 DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)
3428
3429 #define DEFINE_TEST_NEON_3SAME(mnemonic, input) \
3430 DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \
3431 TEST(mnemonic##_2D) { \
3432 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3433 2D, \
3434 kInput64bitsAccDestination, \
3435 kInput64bits##input); \
3436 }
3437
3438 #define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input) \
3439 TEST(mnemonic##_4H) { \
3440 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3441 4H, \
3442 kInputFloat16AccDestination, \
3443 kInputFloat16##input); \
3444 } \
3445 TEST(mnemonic##_8H) { \
3446 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3447 8H, \
3448 kInputFloat16AccDestination, \
3449 kInputFloat16##input); \
3450 } \
3451 TEST(mnemonic##_2S) { \
3452 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3453 2S, \
3454 kInputFloatAccDestination, \
3455 kInputFloat##input); \
3456 } \
3457 TEST(mnemonic##_4S) { \
3458 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3459 4S, \
3460 kInputFloatAccDestination, \
3461 kInputFloat##input); \
3462 } \
3463 TEST(mnemonic##_2D) { \
3464 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3465 2D, \
3466 kInputDoubleAccDestination, \
3467 kInputDouble##input); \
3468 }
3469
3470 #define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input) \
3471 TEST(mnemonic##_D) { \
3472 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3473 D, \
3474 kInput64bitsAccDestination, \
3475 kInput64bits##input); \
3476 }
3477
3478 #define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input) \
3479 TEST(mnemonic##_H) { \
3480 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3481 H, \
3482 kInput16bitsAccDestination, \
3483 kInput16bits##input); \
3484 } \
3485 TEST(mnemonic##_S) { \
3486 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3487 S, \
3488 kInput32bitsAccDestination, \
3489 kInput32bits##input); \
3490 }
3491
3492 #define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input) \
3493 TEST(mnemonic##_B) { \
3494 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3495 B, \
3496 kInput8bitsAccDestination, \
3497 kInput8bits##input); \
3498 } \
3499 TEST(mnemonic##_H) { \
3500 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3501 H, \
3502 kInput16bitsAccDestination, \
3503 kInput16bits##input); \
3504 } \
3505 TEST(mnemonic##_S) { \
3506 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3507 S, \
3508 kInput32bitsAccDestination, \
3509 kInput32bits##input); \
3510 } \
3511 TEST(mnemonic##_D) { \
3512 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3513 D, \
3514 kInput64bitsAccDestination, \
3515 kInput64bits##input); \
3516 }
3517
3518 #define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input) \
3519 TEST(mnemonic##_H) { \
3520 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3521 H, \
3522 kInputFloat16AccDestination, \
3523 kInputFloat16##input); \
3524 } \
3525 TEST(mnemonic##_S) { \
3526 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3527 S, \
3528 kInputFloatAccDestination, \
3529 kInputFloat##input); \
3530 } \
3531 TEST(mnemonic##_D) { \
3532 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3533 D, \
3534 kInputDoubleAccDestination, \
3535 kInputDouble##input); \
3536 }
3537
3538 #define DEFINE_TEST_NEON_FHM(mnemonic, input_d, input_n, input_m) \
3539 TEST(mnemonic##_2S) { \
3540 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3541 2S, \
3542 2H, \
3543 2H, \
3544 kInputFloatAccDestination, \
3545 kInputFloat16##input_n, \
3546 kInputFloat16##input_m); \
3547 } \
3548 TEST(mnemonic##_4S) { \
3549 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3550 4S, \
3551 4H, \
3552 4H, \
3553 kInputFloatAccDestination, \
3554 kInputFloat16##input_n, \
3555 kInputFloat16##input_m); \
3556 }
3557
3558 #define CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3559 vdform, \
3560 vnform, \
3561 vmform, \
3562 input_d, \
3563 input_n, \
3564 input_m) \
3565 { \
3566 CALL_TEST_NEON_HELPER_2Op(mnemonic, \
3567 vdform, \
3568 vnform, \
3569 vmform, \
3570 input_d, \
3571 input_n, \
3572 input_m); \
3573 }
3574
3575 #define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \
3576 TEST(mnemonic##_8H) { \
3577 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3578 8H, \
3579 8B, \
3580 8B, \
3581 kInput16bitsAccDestination, \
3582 kInput8bits##input, \
3583 kInput8bits##input); \
3584 } \
3585 TEST(mnemonic##2_8H) { \
3586 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3587 8H, \
3588 16B, \
3589 16B, \
3590 kInput16bitsAccDestination, \
3591 kInput8bits##input, \
3592 kInput8bits##input); \
3593 }
3594
3595 #define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \
3596 TEST(mnemonic##_4S) { \
3597 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3598 4S, \
3599 4H, \
3600 4H, \
3601 kInput32bitsAccDestination, \
3602 kInput16bits##input, \
3603 kInput16bits##input); \
3604 } \
3605 TEST(mnemonic##2_4S) { \
3606 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3607 4S, \
3608 8H, \
3609 8H, \
3610 kInput32bitsAccDestination, \
3611 kInput16bits##input, \
3612 kInput16bits##input); \
3613 }
3614
3615 #define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) \
3616 TEST(mnemonic##_2D) { \
3617 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3618 2D, \
3619 2S, \
3620 2S, \
3621 kInput64bitsAccDestination, \
3622 kInput32bits##input, \
3623 kInput32bits##input); \
3624 } \
3625 TEST(mnemonic##2_2D) { \
3626 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3627 2D, \
3628 4S, \
3629 4S, \
3630 kInput64bitsAccDestination, \
3631 kInput32bits##input, \
3632 kInput32bits##input); \
3633 }
3634
3635 #define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input) \
3636 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \
3637 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
3638
3639 #define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input) \
3640 DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \
3641 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \
3642 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
3643
3644 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \
3645 TEST(mnemonic##_S) { \
3646 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3647 S, \
3648 H, \
3649 H, \
3650 kInput32bitsAccDestination, \
3651 kInput16bits##input, \
3652 kInput16bits##input); \
3653 }
3654
3655 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) \
3656 TEST(mnemonic##_D) { \
3657 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3658 D, \
3659 S, \
3660 S, \
3661 kInput64bitsAccDestination, \
3662 kInput32bits##input, \
3663 kInput32bits##input); \
3664 }
3665
3666 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input) \
3667 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \
3668 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input)
3669
3670 #define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input) \
3671 TEST(mnemonic##_8H) { \
3672 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3673 8H, \
3674 8H, \
3675 8B, \
3676 kInput16bitsAccDestination, \
3677 kInput16bits##input, \
3678 kInput8bits##input); \
3679 } \
3680 TEST(mnemonic##_4S) { \
3681 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3682 4S, \
3683 4S, \
3684 4H, \
3685 kInput32bitsAccDestination, \
3686 kInput32bits##input, \
3687 kInput16bits##input); \
3688 } \
3689 TEST(mnemonic##_2D) { \
3690 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3691 2D, \
3692 2D, \
3693 2S, \
3694 kInput64bitsAccDestination, \
3695 kInput64bits##input, \
3696 kInput32bits##input); \
3697 } \
3698 TEST(mnemonic##2_8H) { \
3699 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3700 8H, \
3701 8H, \
3702 16B, \
3703 kInput16bitsAccDestination, \
3704 kInput16bits##input, \
3705 kInput8bits##input); \
3706 } \
3707 TEST(mnemonic##2_4S) { \
3708 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3709 4S, \
3710 4S, \
3711 8H, \
3712 kInput32bitsAccDestination, \
3713 kInput32bits##input, \
3714 kInput16bits##input); \
3715 } \
3716 TEST(mnemonic##2_2D) { \
3717 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3718 2D, \
3719 2D, \
3720 4S, \
3721 kInput64bitsAccDestination, \
3722 kInput64bits##input, \
3723 kInput32bits##input); \
3724 }
3725
3726 #define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input) \
3727 TEST(mnemonic##_8B) { \
3728 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3729 8B, \
3730 8H, \
3731 8H, \
3732 kInput8bitsAccDestination, \
3733 kInput16bits##input, \
3734 kInput16bits##input); \
3735 } \
3736 TEST(mnemonic##_4H) { \
3737 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3738 4H, \
3739 4S, \
3740 4S, \
3741 kInput16bitsAccDestination, \
3742 kInput32bits##input, \
3743 kInput32bits##input); \
3744 } \
3745 TEST(mnemonic##_2S) { \
3746 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3747 2S, \
3748 2D, \
3749 2D, \
3750 kInput32bitsAccDestination, \
3751 kInput64bits##input, \
3752 kInput64bits##input); \
3753 } \
3754 TEST(mnemonic##2_16B) { \
3755 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3756 16B, \
3757 8H, \
3758 8H, \
3759 kInput8bitsAccDestination, \
3760 kInput16bits##input, \
3761 kInput16bits##input); \
3762 } \
3763 TEST(mnemonic##2_8H) { \
3764 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3765 8H, \
3766 4S, \
3767 4S, \
3768 kInput16bitsAccDestination, \
3769 kInput32bits##input, \
3770 kInput32bits##input); \
3771 } \
3772 TEST(mnemonic##2_4S) { \
3773 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3774 4S, \
3775 2D, \
3776 2D, \
3777 kInput32bitsAccDestination, \
3778 kInput64bits##input, \
3779 kInput64bits##input); \
3780 }
3781
3782 #define DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(mnemonic, input) \
3783 TEST(mnemonic##_2S) { \
3784 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3785 2S, \
3786 8B, \
3787 8B, \
3788 kInput32bitsAccDestination, \
3789 kInput8bits##input, \
3790 kInput8bits##input); \
3791 } \
3792 TEST(mnemonic##_4S) { \
3793 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3794 4S, \
3795 16B, \
3796 16B, \
3797 kInput32bitsAccDestination, \
3798 kInput8bits##input, \
3799 kInput8bits##input); \
3800 }
3801
3802
3803 #define CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3804 vdform, \
3805 vnform, \
3806 input_n, \
3807 input_imm) \
3808 { \
3809 CALL_TEST_NEON_HELPER_2OpImm(mnemonic, \
3810 vdform, \
3811 vnform, \
3812 input_n, \
3813 input_imm); \
3814 }
3815
3816 #define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm) \
3817 TEST(mnemonic##_8B_2OPIMM) { \
3818 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3819 8B, \
3820 8B, \
3821 kInput8bits##input, \
3822 kInput8bitsImm##input_imm); \
3823 } \
3824 TEST(mnemonic##_16B_2OPIMM) { \
3825 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3826 16B, \
3827 16B, \
3828 kInput8bits##input, \
3829 kInput8bitsImm##input_imm); \
3830 } \
3831 TEST(mnemonic##_4H_2OPIMM) { \
3832 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3833 4H, \
3834 4H, \
3835 kInput16bits##input, \
3836 kInput16bitsImm##input_imm); \
3837 } \
3838 TEST(mnemonic##_8H_2OPIMM) { \
3839 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3840 8H, \
3841 8H, \
3842 kInput16bits##input, \
3843 kInput16bitsImm##input_imm); \
3844 } \
3845 TEST(mnemonic##_2S_2OPIMM) { \
3846 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3847 2S, \
3848 2S, \
3849 kInput32bits##input, \
3850 kInput32bitsImm##input_imm); \
3851 } \
3852 TEST(mnemonic##_4S_2OPIMM) { \
3853 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3854 4S, \
3855 4S, \
3856 kInput32bits##input, \
3857 kInput32bitsImm##input_imm); \
3858 } \
3859 TEST(mnemonic##_2D_2OPIMM) { \
3860 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3861 2D, \
3862 2D, \
3863 kInput64bits##input, \
3864 kInput64bitsImm##input_imm); \
3865 }
3866
3867 #define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm) \
3868 TEST(mnemonic##_8B_2OPIMM) { \
3869 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3870 8B, \
3871 B, \
3872 kInput8bits##input, \
3873 kInput8bitsImm##input_imm); \
3874 } \
3875 TEST(mnemonic##_16B_2OPIMM) { \
3876 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3877 16B, \
3878 B, \
3879 kInput8bits##input, \
3880 kInput8bitsImm##input_imm); \
3881 } \
3882 TEST(mnemonic##_4H_2OPIMM) { \
3883 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3884 4H, \
3885 H, \
3886 kInput16bits##input, \
3887 kInput16bitsImm##input_imm); \
3888 } \
3889 TEST(mnemonic##_8H_2OPIMM) { \
3890 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3891 8H, \
3892 H, \
3893 kInput16bits##input, \
3894 kInput16bitsImm##input_imm); \
3895 } \
3896 TEST(mnemonic##_2S_2OPIMM) { \
3897 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3898 2S, \
3899 S, \
3900 kInput32bits##input, \
3901 kInput32bitsImm##input_imm); \
3902 } \
3903 TEST(mnemonic##_4S_2OPIMM) { \
3904 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3905 4S, \
3906 S, \
3907 kInput32bits##input, \
3908 kInput32bitsImm##input_imm); \
3909 } \
3910 TEST(mnemonic##_2D_2OPIMM) { \
3911 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3912 2D, \
3913 D, \
3914 kInput64bits##input, \
3915 kInput64bitsImm##input_imm); \
3916 }
3917
3918 #define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm) \
3919 TEST(mnemonic##_8B_2OPIMM) { \
3920 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3921 8B, \
3922 8H, \
3923 kInput16bits##input, \
3924 kInput8bitsImm##input_imm); \
3925 } \
3926 TEST(mnemonic##_4H_2OPIMM) { \
3927 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3928 4H, \
3929 4S, \
3930 kInput32bits##input, \
3931 kInput16bitsImm##input_imm); \
3932 } \
3933 TEST(mnemonic##_2S_2OPIMM) { \
3934 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3935 2S, \
3936 2D, \
3937 kInput64bits##input, \
3938 kInput32bitsImm##input_imm); \
3939 } \
3940 TEST(mnemonic##2_16B_2OPIMM) { \
3941 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
3942 16B, \
3943 8H, \
3944 kInput16bits##input, \
3945 kInput8bitsImm##input_imm); \
3946 } \
3947 TEST(mnemonic##2_8H_2OPIMM) { \
3948 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
3949 8H, \
3950 4S, \
3951 kInput32bits##input, \
3952 kInput16bitsImm##input_imm); \
3953 } \
3954 TEST(mnemonic##2_4S_2OPIMM) { \
3955 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
3956 4S, \
3957 2D, \
3958 kInput64bits##input, \
3959 kInput32bitsImm##input_imm); \
3960 }
3961
3962 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm) \
3963 TEST(mnemonic##_B_2OPIMM) { \
3964 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3965 B, \
3966 H, \
3967 kInput16bits##input, \
3968 kInput8bitsImm##input_imm); \
3969 } \
3970 TEST(mnemonic##_H_2OPIMM) { \
3971 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3972 H, \
3973 S, \
3974 kInput32bits##input, \
3975 kInput16bitsImm##input_imm); \
3976 } \
3977 TEST(mnemonic##_S_2OPIMM) { \
3978 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3979 S, \
3980 D, \
3981 kInput64bits##input, \
3982 kInput32bitsImm##input_imm); \
3983 }
3984
3985 #define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm) \
3986 TEST(mnemonic##_4H_2OPIMM) { \
3987 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3988 4H, \
3989 4H, \
3990 kInputFloat16##input, \
3991 kInputDoubleImm##input_imm); \
3992 } \
3993 TEST(mnemonic##_8H_2OPIMM) { \
3994 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3995 8H, \
3996 8H, \
3997 kInputFloat16##input, \
3998 kInputDoubleImm##input_imm); \
3999 } \
4000 TEST(mnemonic##_2S_2OPIMM) { \
4001 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4002 2S, \
4003 2S, \
4004 kInputFloat##Basic, \
4005 kInputDoubleImm##input_imm); \
4006 } \
4007 TEST(mnemonic##_4S_2OPIMM) { \
4008 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4009 4S, \
4010 4S, \
4011 kInputFloat##input, \
4012 kInputDoubleImm##input_imm); \
4013 } \
4014 TEST(mnemonic##_2D_2OPIMM) { \
4015 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4016 2D, \
4017 2D, \
4018 kInputDouble##input, \
4019 kInputDoubleImm##input_imm); \
4020 }
4021
4022 #define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm) \
4023 TEST(mnemonic##_4H_2OPIMM) { \
4024 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4025 4H, \
4026 4H, \
4027 kInputFloat16##input, \
4028 kInput16bitsImm##input_imm); \
4029 } \
4030 TEST(mnemonic##_8H_2OPIMM) { \
4031 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4032 8H, \
4033 8H, \
4034 kInputFloat16##input, \
4035 kInput16bitsImm##input_imm); \
4036 } \
4037 TEST(mnemonic##_2S_2OPIMM) { \
4038 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4039 2S, \
4040 2S, \
4041 kInputFloat##Basic, \
4042 kInput32bitsImm##input_imm); \
4043 } \
4044 TEST(mnemonic##_4S_2OPIMM) { \
4045 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4046 4S, \
4047 4S, \
4048 kInputFloat##input, \
4049 kInput32bitsImm##input_imm); \
4050 } \
4051 TEST(mnemonic##_2D_2OPIMM) { \
4052 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4053 2D, \
4054 2D, \
4055 kInputDouble##input, \
4056 kInput64bitsImm##input_imm); \
4057 }
4058
4059 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm) \
4060 TEST(mnemonic##_H_2OPIMM) { \
4061 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4062 H, \
4063 H, \
4064 kInputFloat16##Basic, \
4065 kInput16bitsImm##input_imm); \
4066 } \
4067 TEST(mnemonic##_S_2OPIMM) { \
4068 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4069 S, \
4070 S, \
4071 kInputFloat##Basic, \
4072 kInput32bitsImm##input_imm); \
4073 } \
4074 TEST(mnemonic##_D_2OPIMM) { \
4075 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4076 D, \
4077 D, \
4078 kInputDouble##input, \
4079 kInput64bitsImm##input_imm); \
4080 }
4081
4082 #define DEFINE_TEST_NEON_2OPIMM_HSD(mnemonic, input, input_imm) \
4083 TEST(mnemonic##_4H_2OPIMM) { \
4084 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4085 4H, \
4086 4H, \
4087 kInput16bits##input, \
4088 kInput16bitsImm##input_imm); \
4089 } \
4090 TEST(mnemonic##_8H_2OPIMM) { \
4091 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4092 8H, \
4093 8H, \
4094 kInput16bits##input, \
4095 kInput16bitsImm##input_imm); \
4096 } \
4097 TEST(mnemonic##_2S_2OPIMM) { \
4098 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4099 2S, \
4100 2S, \
4101 kInput32bits##input, \
4102 kInput32bitsImm##input_imm); \
4103 } \
4104 TEST(mnemonic##_4S_2OPIMM) { \
4105 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4106 4S, \
4107 4S, \
4108 kInput32bits##input, \
4109 kInput32bitsImm##input_imm); \
4110 } \
4111 TEST(mnemonic##_2D_2OPIMM) { \
4112 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4113 2D, \
4114 2D, \
4115 kInput64bits##input, \
4116 kInput64bitsImm##input_imm); \
4117 }
4118
4119 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) \
4120 TEST(mnemonic##_D_2OPIMM) { \
4121 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4122 D, \
4123 D, \
4124 kInput64bits##input, \
4125 kInput64bitsImm##input_imm); \
4126 }
4127
4128 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(mnemonic, input, input_imm) \
4129 TEST(mnemonic##_H_2OPIMM) { \
4130 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4131 H, \
4132 H, \
4133 kInput16bits##input, \
4134 kInput16bitsImm##input_imm); \
4135 } \
4136 TEST(mnemonic##_S_2OPIMM) { \
4137 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4138 S, \
4139 S, \
4140 kInput32bits##input, \
4141 kInput32bitsImm##input_imm); \
4142 } \
4143 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm)
4144
4145 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) \
4146 TEST(mnemonic##_D_2OPIMM) { \
4147 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4148 D, \
4149 D, \
4150 kInputDouble##input, \
4151 kInputDoubleImm##input_imm); \
4152 }
4153
4154 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(mnemonic, input, input_imm) \
4155 TEST(mnemonic##_H_2OPIMM) { \
4156 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4157 H, \
4158 H, \
4159 kInputFloat16##input, \
4160 kInputDoubleImm##input_imm); \
4161 } \
4162 TEST(mnemonic##_S_2OPIMM) { \
4163 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4164 S, \
4165 S, \
4166 kInputFloat##input, \
4167 kInputDoubleImm##input_imm); \
4168 } \
4169 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm)
4170
4171 #define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm) \
4172 TEST(mnemonic##_B_2OPIMM) { \
4173 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4174 B, \
4175 B, \
4176 kInput8bits##input, \
4177 kInput8bitsImm##input_imm); \
4178 } \
4179 DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(mnemonic, input, input_imm)
4180
4181 #define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm) \
4182 TEST(mnemonic##_8H_2OPIMM) { \
4183 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4184 8H, \
4185 8B, \
4186 kInput8bits##input, \
4187 kInput8bitsImm##input_imm); \
4188 } \
4189 TEST(mnemonic##_4S_2OPIMM) { \
4190 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4191 4S, \
4192 4H, \
4193 kInput16bits##input, \
4194 kInput16bitsImm##input_imm); \
4195 } \
4196 TEST(mnemonic##_2D_2OPIMM) { \
4197 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4198 2D, \
4199 2S, \
4200 kInput32bits##input, \
4201 kInput32bitsImm##input_imm); \
4202 } \
4203 TEST(mnemonic##2_8H_2OPIMM) { \
4204 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
4205 8H, \
4206 16B, \
4207 kInput8bits##input, \
4208 kInput8bitsImm##input_imm); \
4209 } \
4210 TEST(mnemonic##2_4S_2OPIMM) { \
4211 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
4212 4S, \
4213 8H, \
4214 kInput16bits##input, \
4215 kInput16bitsImm##input_imm); \
4216 } \
4217 TEST(mnemonic##2_2D_2OPIMM) { \
4218 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
4219 2D, \
4220 4S, \
4221 kInput32bits##input, \
4222 kInput32bitsImm##input_imm); \
4223 }
4224
4225 #define CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic, \
4226 vdform, \
4227 vnform, \
4228 vmform, \
4229 input_d, \
4230 input_n, \
4231 input_m, \
4232 indices, \
4233 vm_subvector_count) \
4234 { \
4235 CALL_TEST_NEON_HELPER_ByElement_Dot_Product(mnemonic, \
4236 vdform, \
4237 vnform, \
4238 vmform, \
4239 input_d, \
4240 input_n, \
4241 input_m, \
4242 indices, \
4243 vm_subvector_count); \
4244 }
4245
4246 #define DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(mnemonic, \
4247 input_d, \
4248 input_n, \
4249 input_m) \
4250 TEST(mnemonic##_2S_8B_B) { \
4251 CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic, \
4252 2S, \
4253 8B, \
4254 B, \
4255 kInput32bits##input_d, \
4256 kInput8bits##input_n, \
4257 kInput8bits##input_m, \
4258 kInputSIndices, \
4259 4); \
4260 } \
4261 TEST(mnemonic##_4S_16B_B) { \
4262 CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic, \
4263 4S, \
4264 16B, \
4265 B, \
4266 kInput32bits##input_d, \
4267 kInput8bits##input_n, \
4268 kInput8bits##input_m, \
4269 kInputSIndices, \
4270 4); \
4271 }
4272
4273 #define CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4274 vdform, \
4275 vnform, \
4276 vmform, \
4277 input_d, \
4278 input_n, \
4279 input_m, \
4280 indices) \
4281 { \
4282 CALL_TEST_NEON_HELPER_ByElement(mnemonic, \
4283 vdform, \
4284 vnform, \
4285 vmform, \
4286 input_d, \
4287 input_n, \
4288 input_m, \
4289 indices); \
4290 }
4291
4292 #define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m) \
4293 TEST(mnemonic##_4H_4H_H) { \
4294 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4295 4H, \
4296 4H, \
4297 H, \
4298 kInput16bits##input_d, \
4299 kInput16bits##input_n, \
4300 kInput16bits##input_m, \
4301 kInputHIndices); \
4302 } \
4303 TEST(mnemonic##_8H_8H_H) { \
4304 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4305 8H, \
4306 8H, \
4307 H, \
4308 kInput16bits##input_d, \
4309 kInput16bits##input_n, \
4310 kInput16bits##input_m, \
4311 kInputHIndices); \
4312 } \
4313 TEST(mnemonic##_2S_2S_S) { \
4314 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4315 2S, \
4316 2S, \
4317 S, \
4318 kInput32bits##input_d, \
4319 kInput32bits##input_n, \
4320 kInput32bits##input_m, \
4321 kInputSIndices); \
4322 } \
4323 TEST(mnemonic##_4S_4S_S) { \
4324 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4325 4S, \
4326 4S, \
4327 S, \
4328 kInput32bits##input_d, \
4329 kInput32bits##input_n, \
4330 kInput32bits##input_m, \
4331 kInputSIndices); \
4332 }
4333
4334 #define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic, input_d, input_n, input_m) \
4335 TEST(mnemonic##_H_H_H) { \
4336 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4337 H, \
4338 H, \
4339 H, \
4340 kInput16bits##input_d, \
4341 kInput16bits##input_n, \
4342 kInput16bits##input_m, \
4343 kInputHIndices); \
4344 } \
4345 TEST(mnemonic##_S_S_S) { \
4346 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4347 S, \
4348 S, \
4349 S, \
4350 kInput32bits##input_d, \
4351 kInput32bits##input_n, \
4352 kInput32bits##input_m, \
4353 kInputSIndices); \
4354 }
4355
4356 #define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m) \
4357 TEST(mnemonic##_4H_4H_H) { \
4358 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4359 4H, \
4360 4H, \
4361 H, \
4362 kInputFloat16##input_d, \
4363 kInputFloat16##input_n, \
4364 kInputFloat16##input_m, \
4365 kInputHIndices); \
4366 } \
4367 TEST(mnemonic##_8H_8H_H) { \
4368 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4369 8H, \
4370 8H, \
4371 H, \
4372 kInputFloat16##input_d, \
4373 kInputFloat16##input_n, \
4374 kInputFloat16##input_m, \
4375 kInputHIndices); \
4376 } \
4377 TEST(mnemonic##_2S_2S_S) { \
4378 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4379 2S, \
4380 2S, \
4381 S, \
4382 kInputFloat##input_d, \
4383 kInputFloat##input_n, \
4384 kInputFloat##input_m, \
4385 kInputSIndices); \
4386 } \
4387 TEST(mnemonic##_4S_4S_S) { \
4388 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4389 4S, \
4390 4S, \
4391 S, \
4392 kInputFloat##input_d, \
4393 kInputFloat##input_n, \
4394 kInputFloat##input_m, \
4395 kInputSIndices); \
4396 } \
4397 TEST(mnemonic##_2D_2D_D) { \
4398 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4399 2D, \
4400 2D, \
4401 D, \
4402 kInputDouble##input_d, \
4403 kInputDouble##input_n, \
4404 kInputDouble##input_m, \
4405 kInputDIndices); \
4406 }
4407
4408 #define DEFINE_TEST_NEON_FHM_BYELEMENT(mnemonic, input_d, input_n, input_m) \
4409 TEST(mnemonic##_2S_2H_H) { \
4410 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4411 2S, \
4412 2H, \
4413 H, \
4414 kInputFloatAccDestination, \
4415 kInputFloat16##input_n, \
4416 kInputFloat16##input_m, \
4417 kInputHIndices); \
4418 } \
4419 TEST(mnemonic##_4S_4H_H) { \
4420 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4421 4S, \
4422 4H, \
4423 H, \
4424 kInputFloatAccDestination, \
4425 kInputFloat16##input_n, \
4426 kInputFloat16##input_m, \
4427 kInputHIndices); \
4428 }
4429
4430 #define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m) \
4431 TEST(mnemonic##_H_H_H) { \
4432 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4433 H, \
4434 H, \
4435 H, \
4436 kInputFloat16##inp_d, \
4437 kInputFloat16##inp_n, \
4438 kInputFloat16##inp_m, \
4439 kInputHIndices); \
4440 } \
4441 TEST(mnemonic##_S_S_S) { \
4442 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4443 S, \
4444 S, \
4445 S, \
4446 kInputFloat##inp_d, \
4447 kInputFloat##inp_n, \
4448 kInputFloat##inp_m, \
4449 kInputSIndices); \
4450 } \
4451 TEST(mnemonic##_D_D_D) { \
4452 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4453 D, \
4454 D, \
4455 D, \
4456 kInputDouble##inp_d, \
4457 kInputDouble##inp_n, \
4458 kInputDouble##inp_m, \
4459 kInputDIndices); \
4460 }
4461
4462
4463 #define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \
4464 TEST(mnemonic##_4S_4H_H) { \
4465 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4466 4S, \
4467 4H, \
4468 H, \
4469 kInput32bits##input_d, \
4470 kInput16bits##input_n, \
4471 kInput16bits##input_m, \
4472 kInputHIndices); \
4473 } \
4474 TEST(mnemonic##2_4S_8H_H) { \
4475 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2, \
4476 4S, \
4477 8H, \
4478 H, \
4479 kInput32bits##input_d, \
4480 kInput16bits##input_n, \
4481 kInput16bits##input_m, \
4482 kInputHIndices); \
4483 } \
4484 TEST(mnemonic##_2D_2S_S) { \
4485 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4486 2D, \
4487 2S, \
4488 S, \
4489 kInput64bits##input_d, \
4490 kInput32bits##input_n, \
4491 kInput32bits##input_m, \
4492 kInputSIndices); \
4493 } \
4494 TEST(mnemonic##2_2D_4S_S) { \
4495 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2, \
4496 2D, \
4497 4S, \
4498 S, \
4499 kInput64bits##input_d, \
4500 kInput32bits##input_n, \
4501 kInput32bits##input_m, \
4502 kInputSIndices); \
4503 }
4504
4505 #define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(mnemonic, \
4506 input_d, \
4507 input_n, \
4508 input_m) \
4509 TEST(mnemonic##_S_H_H) { \
4510 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4511 S, \
4512 H, \
4513 H, \
4514 kInput32bits##input_d, \
4515 kInput16bits##input_n, \
4516 kInput16bits##input_m, \
4517 kInputHIndices); \
4518 } \
4519 TEST(mnemonic##_D_S_S) { \
4520 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4521 D, \
4522 S, \
4523 S, \
4524 kInput64bits##input_d, \
4525 kInput32bits##input_n, \
4526 kInput32bits##input_m, \
4527 kInputSIndices); \
4528 }
4529
4530
4531 #define CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \
4532 variant, \
4533 input_d, \
4534 input_imm1, \
4535 input_n, \
4536 input_imm2) \
4537 { \
4538 CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic, \
4539 mnemonic, \
4540 variant, \
4541 variant, \
4542 input_d, \
4543 input_imm1, \
4544 input_n, \
4545 input_imm2); \
4546 }
4547
4548 #define DEFINE_TEST_NEON_2OP2IMM(mnemonic, \
4549 input_d, \
4550 input_imm1, \
4551 input_n, \
4552 input_imm2) \
4553 TEST(mnemonic##_B) { \
4554 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \
4555 16B, \
4556 kInput8bits##input_d, \
4557 kInput8bitsImm##input_imm1, \
4558 kInput8bits##input_n, \
4559 kInput8bitsImm##input_imm2); \
4560 } \
4561 TEST(mnemonic##_H) { \
4562 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \
4563 8H, \
4564 kInput16bits##input_d, \
4565 kInput16bitsImm##input_imm1, \
4566 kInput16bits##input_n, \
4567 kInput16bitsImm##input_imm2); \
4568 } \
4569 TEST(mnemonic##_S) { \
4570 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \
4571 4S, \
4572 kInput32bits##input_d, \
4573 kInput32bitsImm##input_imm1, \
4574 kInput32bits##input_n, \
4575 kInput32bitsImm##input_imm2); \
4576 } \
4577 TEST(mnemonic##_D) { \
4578 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \
4579 2D, \
4580 kInput64bits##input_d, \
4581 kInput64bitsImm##input_imm1, \
4582 kInput64bits##input_n, \
4583 kInput64bitsImm##input_imm2); \
4584 }
4585
4586
4587 // Advanced SIMD copy.
4588 DEFINE_TEST_NEON_2OP2IMM(
4589 ins, Basic, LaneCountFromZero, Basic, LaneCountFromZero)
4590 DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero)
4591
4592
4593 // Advanced SIMD scalar copy.
4594 DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero)
4595
4596
4597 // Advanced SIMD three same.
4598 DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic)
4599 DEFINE_TEST_NEON_3SAME(sqadd, Basic)
4600 DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic)
4601 DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic)
4602 DEFINE_TEST_NEON_3SAME(sqsub, Basic)
4603 DEFINE_TEST_NEON_3SAME(cmgt, Basic)
4604 DEFINE_TEST_NEON_3SAME(cmge, Basic)
4605 DEFINE_TEST_NEON_3SAME(sshl, Basic)
4606 DEFINE_TEST_NEON_3SAME(sqshl, Basic)
4607 DEFINE_TEST_NEON_3SAME(srshl, Basic)
4608 DEFINE_TEST_NEON_3SAME(sqrshl, Basic)
4609 DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic)
4610 DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic)
4611 DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic)
4612 DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic)
4613 DEFINE_TEST_NEON_3SAME(add, Basic)
4614 DEFINE_TEST_NEON_3SAME(cmtst, Basic)
4615 DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic)
4616 DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic)
4617 DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic)
4618 DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic)
4619 DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic)
4620 DEFINE_TEST_NEON_3SAME(addp, Basic)
4621 DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic)
4622 DEFINE_TEST_NEON_3SAME_FP(fmla, Basic)
4623 DEFINE_TEST_NEON_3SAME_FP(fadd, Basic)
4624 DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic)
4625 DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic)
4626 DEFINE_TEST_NEON_3SAME_FP(fmax, Basic)
4627 DEFINE_TEST_NEON_3SAME_FP(frecps, Basic)
4628 DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic)
4629 DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic)
4630 DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic)
4631 DEFINE_TEST_NEON_3SAME_FP(fmls, Basic)
4632 DEFINE_TEST_NEON_3SAME_FP(fsub, Basic)
4633 DEFINE_TEST_NEON_3SAME_FP(fmin, Basic)
4634 DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic)
4635 DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic)
4636 DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic)
4637 DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic)
4638 DEFINE_TEST_NEON_3SAME(uqadd, Basic)
4639 DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic)
4640 DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic)
4641 DEFINE_TEST_NEON_3SAME(uqsub, Basic)
4642 DEFINE_TEST_NEON_3SAME(cmhi, Basic)
4643 DEFINE_TEST_NEON_3SAME(cmhs, Basic)
4644 DEFINE_TEST_NEON_3SAME(ushl, Basic)
4645 DEFINE_TEST_NEON_3SAME(uqshl, Basic)
4646 DEFINE_TEST_NEON_3SAME(urshl, Basic)
4647 DEFINE_TEST_NEON_3SAME(uqrshl, Basic)
4648 DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic)
4649 DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic)
4650 DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic)
4651 DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic)
4652 DEFINE_TEST_NEON_3SAME(sub, Basic)
4653 DEFINE_TEST_NEON_3SAME(cmeq, Basic)
4654 DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic)
4655 DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic)
4656 DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic)
4657 DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic)
4658 DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic)
4659 DEFINE_TEST_NEON_3SAME_HS(sqrdmlah, Basic)
4660 DEFINE_TEST_NEON_3SAME_HS(sqrdmlsh, Basic)
4661 DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(udot, Basic)
4662 DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(sdot, Basic)
4663 DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic)
4664 DEFINE_TEST_NEON_3SAME_FP(faddp, Basic)
4665 DEFINE_TEST_NEON_3SAME_FP(fmul, Basic)
4666 DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic)
4667 DEFINE_TEST_NEON_3SAME_FP(facge, Basic)
4668 DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic)
4669 DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic)
4670 DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic)
4671 DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic)
4672 DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic)
4673 DEFINE_TEST_NEON_3SAME_FP(fabd, Basic)
4674 DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic)
4675 DEFINE_TEST_NEON_3SAME_FP(facgt, Basic)
4676 DEFINE_TEST_NEON_3SAME_FP(fminp, Basic)
4677 DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic)
4678 DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic)
4679
4680
4681 // Advanced SIMD scalar three same.
4682 DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic)
4683 DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic)
4684 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic)
4685 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic)
4686 DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic)
4687 DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic)
4688 DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic)
4689 DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic)
4690 DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic)
4691 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic)
4692 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic)
4693 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic)
4694 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic)
4695 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic)
4696 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic)
4697 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic)
4698 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic)
4699 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic)
4700 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic)
4701 DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic)
4702 DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic)
4703 DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic)
4704 DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic)
4705 DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic)
4706 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic)
4707 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic)
4708 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmlah, Basic)
4709 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmlsh, Basic)
4710 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic)
4711 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic)
4712 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic)
4713 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic)
4714 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic)
4715
4716
4717 // Advanced SIMD FHM instructions (FMLAL, FMLSL).
4718 // These are oddballs: they are encoded under the 3SAME group but behave
4719 // quite differently.
4720 DEFINE_TEST_NEON_FHM(fmlal, Basic, Basic, Basic)
4721 DEFINE_TEST_NEON_FHM(fmlal2, Basic, Basic, Basic)
4722 DEFINE_TEST_NEON_FHM(fmlsl, Basic, Basic, Basic)
4723 DEFINE_TEST_NEON_FHM(fmlsl2, Basic, Basic, Basic)
4724
4725
4726 // Advanced SIMD three different.
4727 DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic)
4728 DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic)
4729 DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic)
4730 DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic)
4731 DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic)
4732 DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic)
4733 DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic)
4734 DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic)
4735 DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic)
4736 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic)
4737 DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic)
4738 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic)
4739 DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic)
4740 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic)
4741 DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic)
4742 DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic)
4743 DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic)
4744 DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic)
4745 DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic)
4746 DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic)
4747 DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic)
4748 DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic)
4749 DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic)
4750 DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic)
4751 DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic)
4752 DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic)
4753
4754
4755 // Advanced SIMD scalar three different.
4756 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic)
4757 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic)
4758 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic)
4759
4760
4761 // Advanced SIMD scalar pairwise.
TEST(addp_SCALAR)4762 TEST(addp_SCALAR) {
4763 CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic);
4764 }
4765 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic)
4766 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic)
4767 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic)
4768 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic)
4769 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic)
4770
4771
4772 // Advanced SIMD shift by immediate.
4773 DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth)
4774 DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth)
4775 DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth)
4776 DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth)
4777 DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero)
4778 DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero)
4779 DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth)
4780 DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth)
4781 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth)
4782 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth)
4783 DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero)
4784 DEFINE_TEST_NEON_2OPIMM_HSD(scvtf,
4785 FixedPointConversions,
4786 TypeWidthFromZeroToWidth)
4787 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
4788 DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth)
4789 DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth)
4790 DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth)
4791 DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth)
4792 DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth)
4793 DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero)
4794 DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero)
4795 DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero)
4796 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth)
4797 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth)
4798 DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth)
4799 DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth)
4800 DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero)
4801 DEFINE_TEST_NEON_2OPIMM_HSD(ucvtf,
4802 FixedPointConversions,
4803 TypeWidthFromZeroToWidth)
4804 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
4805
4806
4807 // Advanced SIMD scalar shift by immediate..
4808 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth)
4809 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth)
4810 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth)
4811 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth)
4812 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero)
4813 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero)
4814 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth)
4815 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth)
4816 DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(scvtf,
4817 FixedPointConversions,
4818 TypeWidthFromZeroToWidth)
4819 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
4820 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth)
4821 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth)
4822 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth)
4823 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth)
4824 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth)
4825 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero)
4826 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero)
4827 DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero)
4828 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth)
4829 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth)
4830 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth)
4831 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth)
4832 DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(ucvtf,
4833 FixedPointConversions,
4834 TypeWidthFromZeroToWidth)
4835 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
4836
4837
4838 // Advanced SIMD two-register miscellaneous.
4839 DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic)
4840 DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic)
4841 DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic)
4842 DEFINE_TEST_NEON_2SAME(suqadd, Basic)
4843 DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic)
4844 DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic)
4845 DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic)
4846 DEFINE_TEST_NEON_2SAME(sqabs, Basic)
4847 DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero)
4848 DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero)
4849 DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero)
4850 DEFINE_TEST_NEON_2SAME(abs, Basic)
4851 DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic)
4852 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic)
4853 DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions)
4854 DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions)
4855 DEFINE_TEST_NEON_2SAME_FP_FP16(frintn, Conversions)
4856 DEFINE_TEST_NEON_2SAME_FP_FP16(frintm, Conversions)
4857 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtns, Conversions)
4858 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtms, Conversions)
4859 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtas, Conversions)
4860 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
4861 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero)
4862 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero)
4863 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero)
4864 DEFINE_TEST_NEON_2SAME_FP_FP16(fabs, Basic)
4865 DEFINE_TEST_NEON_2SAME_FP_FP16(frintp, Conversions)
4866 DEFINE_TEST_NEON_2SAME_FP_FP16(frintz, Conversions)
4867 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtps, Conversions)
4868 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
4869 DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic)
4870 DEFINE_TEST_NEON_2SAME_FP_FP16(frecpe, Basic)
4871 DEFINE_TEST_NEON_2SAME_BH(rev32, Basic)
4872 DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic)
4873 DEFINE_TEST_NEON_2SAME(usqadd, Basic)
4874 DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic)
4875 DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic)
4876 DEFINE_TEST_NEON_2SAME(sqneg, Basic)
4877 DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero)
4878 DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero)
4879 DEFINE_TEST_NEON_2SAME(neg, Basic)
4880 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic)
4881 DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL)
4882 DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic)
4883 DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions)
4884 DEFINE_TEST_NEON_2SAME_FP(frint32x, Conversions)
4885 DEFINE_TEST_NEON_2SAME_FP(frint64x, Conversions)
4886 DEFINE_TEST_NEON_2SAME_FP(frint32z, Conversions)
4887 DEFINE_TEST_NEON_2SAME_FP(frint64z, Conversions)
4888 DEFINE_TEST_NEON_2SAME_FP_FP16(frinta, Conversions)
4889 DEFINE_TEST_NEON_2SAME_FP_FP16(frintx, Conversions)
4890 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtnu, Conversions)
4891 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtmu, Conversions)
4892 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtau, Conversions)
4893 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
4894 DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic)
4895 DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic)
4896 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero)
4897 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero)
4898 DEFINE_TEST_NEON_2SAME_FP_FP16(fneg, Basic)
4899 DEFINE_TEST_NEON_2SAME_FP_FP16(frinti, Conversions)
4900 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtpu, Conversions)
4901 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
4902 DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic)
4903 DEFINE_TEST_NEON_2SAME_FP_FP16(frsqrte, Basic)
4904 DEFINE_TEST_NEON_2SAME_FP_FP16(fsqrt, Basic)
4905
4906
4907 // Advanced SIMD scalar two-register miscellaneous.
4908 DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic)
4909 DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic)
4910 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero)
4911 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero)
4912 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero)
4913 DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic)
4914 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic)
4915 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtns, Conversions)
4916 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtms, Conversions)
4917 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtas, Conversions)
4918 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
4919 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmgt, Basic, Zero)
4920 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmeq, Basic, Zero)
4921 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmlt, Basic, Zero)
4922 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtps, Conversions)
4923 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
4924 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frecpe, Basic)
4925 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frecpx, Basic)
4926 DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic)
4927 DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic)
4928 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero)
4929 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero)
4930 DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic)
4931 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic)
4932 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic)
TEST(fcvtxn_SCALAR)4933 TEST(fcvtxn_SCALAR) {
4934 CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions);
4935 }
4936 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtnu, Conversions)
4937 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtmu, Conversions)
4938 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtau, Conversions)
4939 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
4940 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmge, Basic, Zero)
4941 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmle, Basic, Zero)
4942 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtpu, Conversions)
4943 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
4944 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frsqrte, Basic)
4945
4946
4947 // Advanced SIMD across lanes.
4948 DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic)
4949 DEFINE_TEST_NEON_ACROSS(smaxv, Basic)
4950 DEFINE_TEST_NEON_ACROSS(sminv, Basic)
4951 DEFINE_TEST_NEON_ACROSS(addv, Basic)
4952 DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic)
4953 DEFINE_TEST_NEON_ACROSS(umaxv, Basic)
4954 DEFINE_TEST_NEON_ACROSS(uminv, Basic)
4955 DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic)
4956 DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic)
4957 DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic)
4958 DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic)
4959
4960
4961 // Advanced SIMD permute.
4962 DEFINE_TEST_NEON_3SAME(uzp1, Basic)
4963 DEFINE_TEST_NEON_3SAME(trn1, Basic)
4964 DEFINE_TEST_NEON_3SAME(zip1, Basic)
4965 DEFINE_TEST_NEON_3SAME(uzp2, Basic)
4966 DEFINE_TEST_NEON_3SAME(trn2, Basic)
4967 DEFINE_TEST_NEON_3SAME(zip2, Basic)
4968
4969
4970 // Advanced SIMD vector x indexed element.
4971 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic)
4972 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic)
4973 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic)
4974 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic)
4975 DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic)
4976 DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic)
4977 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic)
4978 DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic)
4979 DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic)
4980 DEFINE_TEST_NEON_BYELEMENT(sqrdmlah, Basic, Basic, Basic)
4981 DEFINE_TEST_NEON_BYELEMENT(sqrdmlsh, Basic, Basic, Basic)
4982 DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(udot, Basic, Basic, Basic)
4983 DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(sdot, Basic, Basic, Basic)
4984 DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic)
4985 DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic)
4986 DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic)
4987 DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic)
4988 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic)
4989 DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic)
4990 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic)
4991 DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic)
4992 DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic)
4993
4994
4995 // Advanced SIMD scalar x indexed element.
4996 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic)
4997 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic)
4998 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic)
4999 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic)
5000 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic)
5001 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmlah, Basic, Basic, Basic)
5002 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmlsh, Basic, Basic, Basic)
5003 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic)
5004 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic)
5005 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic)
5006 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic)
5007
5008
5009 DEFINE_TEST_NEON_FHM_BYELEMENT(fmlal, Basic, Basic, Basic)
5010 DEFINE_TEST_NEON_FHM_BYELEMENT(fmlal2, Basic, Basic, Basic)
5011 DEFINE_TEST_NEON_FHM_BYELEMENT(fmlsl, Basic, Basic, Basic)
5012 DEFINE_TEST_NEON_FHM_BYELEMENT(fmlsl2, Basic, Basic, Basic)
5013
5014
5015 #undef __
5016 #define __ masm->
5017
5018 #if defined(VIXL_INCLUDE_SIMULATOR_AARCH64) && \
5019 defined(VIXL_HAS_ABI_SUPPORT) && __cplusplus >= 201103L && \
5020 (defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1))
5021
5022 // Generate a function that stores zero to a hard-coded address.
GenerateStoreZero(MacroAssembler* masm, int32_t* target)5023 Instruction* GenerateStoreZero(MacroAssembler* masm, int32_t* target) {
5024 masm->Reset();
5025
5026 UseScratchRegisterScope temps(masm);
5027 Register temp = temps.AcquireX();
5028 __ Mov(temp, reinterpret_cast<intptr_t>(target));
5029 __ Str(wzr, MemOperand(temp));
5030 __ Ret();
5031
5032 masm->FinalizeCode();
5033 return masm->GetBuffer()->GetStartAddress<Instruction*>();
5034 }
5035
5036
5037 // Generate a function that stores the `int32_t` argument to a hard-coded
5038 // address.
5039 // In this example and the other below, we use the `abi` object to retrieve
5040 // argument and return locations even though we could easily hard code them.
5041 // This mirrors how more generic code (e.g. templated) user would use these
5042 // mechanisms.
GenerateStoreInput(MacroAssembler* masm, int32_t* target)5043 Instruction* GenerateStoreInput(MacroAssembler* masm, int32_t* target) {
5044 masm->Reset();
5045
5046 ABI abi;
5047 Register input =
5048 Register(abi.GetNextParameterGenericOperand<int32_t>().GetCPURegister());
5049
5050 UseScratchRegisterScope temps(masm);
5051 Register temp = temps.AcquireX();
5052 __ Mov(temp, reinterpret_cast<intptr_t>(target));
5053 __ Str(input, MemOperand(temp));
5054 __ Ret();
5055
5056 masm->FinalizeCode();
5057 return masm->GetBuffer()->GetStartAddress<Instruction*>();
5058 }
5059
5060
5061 // A minimal implementation of a `pow` function.
GeneratePow(MacroAssembler* masm, unsigned pow)5062 Instruction* GeneratePow(MacroAssembler* masm, unsigned pow) {
5063 masm->Reset();
5064
5065 ABI abi;
5066 Register input =
5067 Register(abi.GetNextParameterGenericOperand<int64_t>().GetCPURegister());
5068 Register result =
5069 Register(abi.GetReturnGenericOperand<int64_t>().GetCPURegister());
5070 UseScratchRegisterScope temps(masm);
5071 Register temp = temps.AcquireX();
5072
5073 __ Mov(temp, 1);
5074 for (unsigned i = 0; i < pow; i++) {
5075 __ Mul(temp, temp, input);
5076 }
5077 __ Mov(result, temp);
5078 __ Ret();
5079
5080 masm->FinalizeCode();
5081 return masm->GetBuffer()->GetStartAddress<Instruction*>();
5082 }
5083
5084
GenerateSum(MacroAssembler* masm)5085 Instruction* GenerateSum(MacroAssembler* masm) {
5086 masm->Reset();
5087
5088 ABI abi;
5089 VRegister input_1 =
5090 VRegister(abi.GetNextParameterGenericOperand<float>().GetCPURegister());
5091 Register input_2 =
5092 Register(abi.GetNextParameterGenericOperand<int64_t>().GetCPURegister());
5093 VRegister input_3 =
5094 VRegister(abi.GetNextParameterGenericOperand<double>().GetCPURegister());
5095 VRegister result =
5096 VRegister(abi.GetReturnGenericOperand<double>().GetCPURegister());
5097
5098 UseScratchRegisterScope temps(masm);
5099 VRegister temp = temps.AcquireD();
5100
5101 __ Fcvt(input_1.D(), input_1);
5102 __ Scvtf(temp, input_2);
5103 __ Fadd(temp, temp, input_1.D());
5104 __ Fadd(result, temp, input_3);
5105 __ Ret();
5106
5107 masm->FinalizeCode();
5108 return masm->GetBuffer()->GetStartAddress<Instruction*>();
5109 }
5110
5111
TEST(RunFrom)5112 TEST(RunFrom) {
5113 SETUP_WITH_FEATURES(CPUFeatures::kFP);
5114
5115 // Run a function returning `void` and taking no argument.
5116 int32_t value = 0xbad;
5117 simulator.RunFrom(GenerateStoreZero(&masm, &value));
5118 VIXL_CHECK(value == 0);
5119
5120 // Run a function returning `void` and taking one argument.
5121 int32_t argument = 0xf00d;
5122 simulator.RunFrom<void, int32_t>(GenerateStoreInput(&masm, &value), argument);
5123 VIXL_CHECK(value == 0xf00d);
5124
5125 // Run a function taking one argument and returning a value.
5126 int64_t res_int64_t;
5127 res_int64_t =
5128 simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 0), 0xbad);
5129 VIXL_CHECK(res_int64_t == 1);
5130 res_int64_t = simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 1), 123);
5131 VIXL_CHECK(res_int64_t == 123);
5132 res_int64_t = simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 10), 2);
5133 VIXL_CHECK(res_int64_t == 1024);
5134
5135 // Run a function taking multiple arguments in registers.
5136 double res_double =
5137 simulator.RunFrom<double, float, int64_t, double>(GenerateSum(&masm),
5138 1.0,
5139 2,
5140 3.0);
5141 VIXL_CHECK(res_double == 6.0);
5142 }
5143 #endif
5144
5145
5146 } // namespace aarch64
5147 } // namespace vixl
5148