1 // Copyright 2014, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #include "test-utils-aarch64.h"
28 
29 #include <cmath>
30 #include <queue>
31 
32 #include "test-runner.h"
33 
34 #include "../test/aarch64/test-simulator-inputs-aarch64.h"
35 #include "aarch64/cpu-aarch64.h"
36 #include "aarch64/disasm-aarch64.h"
37 #include "aarch64/macro-assembler-aarch64.h"
38 #include "aarch64/simulator-aarch64.h"
39 
40 #define __ masm->
41 
42 namespace vixl {
43 namespace aarch64 {
44 
45 
46 // This value is a signalling NaN as FP64, and also as FP32 or FP16 (taking the
47 // least-significant bits).
48 const double kFP64SignallingNaN = RawbitsToDouble(UINT64_C(0x7ff000007f807c01));
49 const float kFP32SignallingNaN = RawbitsToFloat(0x7f807c01);
50 const Float16 kFP16SignallingNaN = RawbitsToFloat16(0x7c01);
51 
52 // A similar value, but as a quiet NaN.
53 const double kFP64QuietNaN = RawbitsToDouble(UINT64_C(0x7ff800007fc07e01));
54 const float kFP32QuietNaN = RawbitsToFloat(0x7fc07e01);
55 const Float16 kFP16QuietNaN = RawbitsToFloat16(0x7e01);
56 
57 
Equal32(uint32_t expected, const RegisterDump*, uint32_t result)58 bool Equal32(uint32_t expected, const RegisterDump*, uint32_t result) {
59   if (result != expected) {
60     printf("Expected 0x%08" PRIx32 "\t Found 0x%08" PRIx32 "\n",
61            expected,
62            result);
63   }
64 
65   return expected == result;
66 }
67 
68 
Equal64(uint64_t reference, const RegisterDump*, uint64_t result, ExpectedResult option)69 bool Equal64(uint64_t reference,
70              const RegisterDump*,
71              uint64_t result,
72              ExpectedResult option) {
73   switch (option) {
74     case kExpectEqual:
75       if (result != reference) {
76         printf("Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
77                reference,
78                result);
79       }
80       break;
81     case kExpectNotEqual:
82       if (result == reference) {
83         printf("Expected a result not equal to 0x%016" PRIx64 "\n", reference);
84       }
85       break;
86   }
87 
88   return reference == result;
89 }
90 
91 
Equal128(QRegisterValue expected, const RegisterDump*, QRegisterValue result)92 bool Equal128(QRegisterValue expected,
93               const RegisterDump*,
94               QRegisterValue result) {
95   if (!expected.Equals(result)) {
96     printf("Expected 0x%016" PRIx64 "%016" PRIx64
97            "\t "
98            "Found 0x%016" PRIx64 "%016" PRIx64 "\n",
99            expected.GetLane<uint64_t>(1),
100            expected.GetLane<uint64_t>(0),
101            result.GetLane<uint64_t>(1),
102            result.GetLane<uint64_t>(0));
103   }
104 
105   return expected.Equals(result);
106 }
107 
108 
EqualFP16(Float16 expected, const RegisterDump*, Float16 result)109 bool EqualFP16(Float16 expected, const RegisterDump*, Float16 result) {
110   uint16_t e_rawbits = Float16ToRawbits(expected);
111   uint16_t r_rawbits = Float16ToRawbits(result);
112   if (e_rawbits == r_rawbits) {
113     return true;
114   } else {
115     if (IsNaN(expected) || IsZero(expected)) {
116       printf("Expected 0x%04" PRIx16 "\t Found 0x%04" PRIx16 "\n",
117              e_rawbits,
118              r_rawbits);
119     } else {
120       printf("Expected %.6f (16 bit): (0x%04" PRIx16
121              ")\t "
122              "Found %.6f (0x%04" PRIx16 ")\n",
123              FPToFloat(expected, kIgnoreDefaultNaN),
124              e_rawbits,
125              FPToFloat(result, kIgnoreDefaultNaN),
126              r_rawbits);
127     }
128     return false;
129   }
130 }
131 
132 
EqualFP32(float expected, const RegisterDump*, float result)133 bool EqualFP32(float expected, const RegisterDump*, float result) {
134   if (FloatToRawbits(expected) == FloatToRawbits(result)) {
135     return true;
136   } else {
137     if (IsNaN(expected) || (expected == 0.0)) {
138       printf("Expected 0x%08" PRIx32 "\t Found 0x%08" PRIx32 "\n",
139              FloatToRawbits(expected),
140              FloatToRawbits(result));
141     } else {
142       printf("Expected %.9f (0x%08" PRIx32
143              ")\t "
144              "Found %.9f (0x%08" PRIx32 ")\n",
145              expected,
146              FloatToRawbits(expected),
147              result,
148              FloatToRawbits(result));
149     }
150     return false;
151   }
152 }
153 
154 
EqualFP64(double expected, const RegisterDump*, double result)155 bool EqualFP64(double expected, const RegisterDump*, double result) {
156   if (DoubleToRawbits(expected) == DoubleToRawbits(result)) {
157     return true;
158   }
159 
160   if (IsNaN(expected) || (expected == 0.0)) {
161     printf("Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
162            DoubleToRawbits(expected),
163            DoubleToRawbits(result));
164   } else {
165     printf("Expected %.17f (0x%016" PRIx64
166            ")\t "
167            "Found %.17f (0x%016" PRIx64 ")\n",
168            expected,
169            DoubleToRawbits(expected),
170            result,
171            DoubleToRawbits(result));
172   }
173   return false;
174 }
175 
176 
Equal32(uint32_t expected, const RegisterDump* core, const Register& reg)177 bool Equal32(uint32_t expected, const RegisterDump* core, const Register& reg) {
178   VIXL_ASSERT(reg.Is32Bits());
179   // Retrieve the corresponding X register so we can check that the upper part
180   // was properly cleared.
181   int64_t result_x = core->xreg(reg.GetCode());
182   if ((result_x & 0xffffffff00000000) != 0) {
183     printf("Expected 0x%08" PRIx32 "\t Found 0x%016" PRIx64 "\n",
184            expected,
185            result_x);
186     return false;
187   }
188   uint32_t result_w = core->wreg(reg.GetCode());
189   return Equal32(expected, core, result_w);
190 }
191 
192 
Equal64(uint64_t reference, const RegisterDump* core, const Register& reg, ExpectedResult option)193 bool Equal64(uint64_t reference,
194              const RegisterDump* core,
195              const Register& reg,
196              ExpectedResult option) {
197   VIXL_ASSERT(reg.Is64Bits());
198   uint64_t result = core->xreg(reg.GetCode());
199   return Equal64(reference, core, result, option);
200 }
201 
202 
NotEqual64(uint64_t reference, const RegisterDump* core, const Register& reg)203 bool NotEqual64(uint64_t reference,
204                 const RegisterDump* core,
205                 const Register& reg) {
206   VIXL_ASSERT(reg.Is64Bits());
207   uint64_t result = core->xreg(reg.GetCode());
208   return NotEqual64(reference, core, result);
209 }
210 
211 
Equal128(uint64_t expected_h, uint64_t expected_l, const RegisterDump* core, const VRegister& vreg)212 bool Equal128(uint64_t expected_h,
213               uint64_t expected_l,
214               const RegisterDump* core,
215               const VRegister& vreg) {
216   VIXL_ASSERT(vreg.Is128Bits());
217   QRegisterValue expected;
218   expected.SetLane(0, expected_l);
219   expected.SetLane(1, expected_h);
220   QRegisterValue result = core->qreg(vreg.GetCode());
221   return Equal128(expected, core, result);
222 }
223 
224 
EqualFP16(Float16 expected, const RegisterDump* core, const VRegister& fpreg)225 bool EqualFP16(Float16 expected,
226                const RegisterDump* core,
227                const VRegister& fpreg) {
228   VIXL_ASSERT(fpreg.Is16Bits());
229   // Retrieve the corresponding D register so we can check that the upper part
230   // was properly cleared.
231   uint64_t result_64 = core->dreg_bits(fpreg.GetCode());
232   if ((result_64 & 0xfffffffffff0000) != 0) {
233     printf("Expected 0x%04" PRIx16 " (%f)\t Found 0x%016" PRIx64 "\n",
234            Float16ToRawbits(expected),
235            FPToFloat(expected, kIgnoreDefaultNaN),
236            result_64);
237     return false;
238   }
239   return EqualFP16(expected, core, core->hreg(fpreg.GetCode()));
240 }
241 
242 
EqualFP32(float expected, const RegisterDump* core, const VRegister& fpreg)243 bool EqualFP32(float expected,
244                const RegisterDump* core,
245                const VRegister& fpreg) {
246   VIXL_ASSERT(fpreg.Is32Bits());
247   // Retrieve the corresponding D register so we can check that the upper part
248   // was properly cleared.
249   uint64_t result_64 = core->dreg_bits(fpreg.GetCode());
250   if ((result_64 & 0xffffffff00000000) != 0) {
251     printf("Expected 0x%08" PRIx32 " (%f)\t Found 0x%016" PRIx64 "\n",
252            FloatToRawbits(expected),
253            expected,
254            result_64);
255     return false;
256   }
257 
258   return EqualFP32(expected, core, core->sreg(fpreg.GetCode()));
259 }
260 
261 
EqualFP64(double expected, const RegisterDump* core, const VRegister& fpreg)262 bool EqualFP64(double expected,
263                const RegisterDump* core,
264                const VRegister& fpreg) {
265   VIXL_ASSERT(fpreg.Is64Bits());
266   return EqualFP64(expected, core, core->dreg(fpreg.GetCode()));
267 }
268 
269 
Equal64(const Register& reg0, const RegisterDump* core, const Register& reg1, ExpectedResult option)270 bool Equal64(const Register& reg0,
271              const RegisterDump* core,
272              const Register& reg1,
273              ExpectedResult option) {
274   VIXL_ASSERT(reg0.Is64Bits() && reg1.Is64Bits());
275   int64_t reference = core->xreg(reg0.GetCode());
276   int64_t result = core->xreg(reg1.GetCode());
277   return Equal64(reference, core, result, option);
278 }
279 
280 
NotEqual64(const Register& reg0, const RegisterDump* core, const Register& reg1)281 bool NotEqual64(const Register& reg0,
282                 const RegisterDump* core,
283                 const Register& reg1) {
284   VIXL_ASSERT(reg0.Is64Bits() && reg1.Is64Bits());
285   int64_t expected = core->xreg(reg0.GetCode());
286   int64_t result = core->xreg(reg1.GetCode());
287   return NotEqual64(expected, core, result);
288 }
289 
290 
Equal64(uint64_t expected, const RegisterDump* core, const VRegister& vreg)291 bool Equal64(uint64_t expected,
292              const RegisterDump* core,
293              const VRegister& vreg) {
294   VIXL_ASSERT(vreg.Is64Bits());
295   uint64_t result = core->dreg_bits(vreg.GetCode());
296   return Equal64(expected, core, result);
297 }
298 
299 
FlagN(uint32_t flags)300 static char FlagN(uint32_t flags) { return (flags & NFlag) ? 'N' : 'n'; }
301 
302 
FlagZ(uint32_t flags)303 static char FlagZ(uint32_t flags) { return (flags & ZFlag) ? 'Z' : 'z'; }
304 
305 
FlagC(uint32_t flags)306 static char FlagC(uint32_t flags) { return (flags & CFlag) ? 'C' : 'c'; }
307 
308 
FlagV(uint32_t flags)309 static char FlagV(uint32_t flags) { return (flags & VFlag) ? 'V' : 'v'; }
310 
311 
EqualNzcv(uint32_t expected, uint32_t result)312 bool EqualNzcv(uint32_t expected, uint32_t result) {
313   VIXL_ASSERT((expected & ~NZCVFlag) == 0);
314   VIXL_ASSERT((result & ~NZCVFlag) == 0);
315   if (result != expected) {
316     printf("Expected: %c%c%c%c\t Found: %c%c%c%c\n",
317            FlagN(expected),
318            FlagZ(expected),
319            FlagC(expected),
320            FlagV(expected),
321            FlagN(result),
322            FlagZ(result),
323            FlagC(result),
324            FlagV(result));
325     return false;
326   }
327 
328   return true;
329 }
330 
331 
EqualRegisters(const RegisterDump* a, const RegisterDump* b)332 bool EqualRegisters(const RegisterDump* a, const RegisterDump* b) {
333   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
334     if (a->xreg(i) != b->xreg(i)) {
335       printf("x%d\t Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
336              i,
337              a->xreg(i),
338              b->xreg(i));
339       return false;
340     }
341   }
342 
343   for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
344     uint64_t a_bits = a->dreg_bits(i);
345     uint64_t b_bits = b->dreg_bits(i);
346     if (a_bits != b_bits) {
347       printf("d%d\t Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
348              i,
349              a_bits,
350              b_bits);
351       return false;
352     }
353   }
354 
355   return true;
356 }
357 
EqualSVELane(uint64_t expected, const RegisterDump* core, const ZRegister& reg, int lane)358 bool EqualSVELane(uint64_t expected,
359                   const RegisterDump* core,
360                   const ZRegister& reg,
361                   int lane) {
362   unsigned lane_size = reg.GetLaneSizeInBits();
363   // For convenience in the tests, we allow negative values to be passed into
364   // `expected`, but truncate them to an appropriately-sized unsigned value for
365   // the check. For example, in `EqualSVELane(-1, core, z0.VnB())`, the expected
366   // value is truncated from 0xffffffffffffffff to 0xff before the comparison.
367   VIXL_ASSERT(IsUintN(lane_size, expected) ||
368               IsIntN(lane_size, RawbitsToInt64(expected)));
369   expected &= GetUintMask(lane_size);
370 
371   uint64_t result = core->zreg_lane(reg.GetCode(), lane_size, lane);
372   if (expected != result) {
373     unsigned lane_size_in_hex_chars = lane_size / 4;
374     std::string reg_name = reg.GetArchitecturalName();
375     printf("%s[%d]\t Expected 0x%0*" PRIx64 "\t Found 0x%0*" PRIx64 "\n",
376            reg_name.c_str(),
377            lane,
378            lane_size_in_hex_chars,
379            expected,
380            lane_size_in_hex_chars,
381            result);
382     return false;
383   }
384   return true;
385 }
386 
EqualSVELane(uint64_t expected, const RegisterDump* core, const PRegister& reg, int lane)387 bool EqualSVELane(uint64_t expected,
388                   const RegisterDump* core,
389                   const PRegister& reg,
390                   int lane) {
391   VIXL_ASSERT(reg.HasLaneSize());
392   VIXL_ASSERT((reg.GetLaneSizeInBits() % kZRegBitsPerPRegBit) == 0);
393   unsigned p_bits_per_lane = reg.GetLaneSizeInBits() / kZRegBitsPerPRegBit;
394   VIXL_ASSERT(IsUintN(p_bits_per_lane, expected));
395   expected &= GetUintMask(p_bits_per_lane);
396 
397   uint64_t result = core->preg_lane(reg.GetCode(), p_bits_per_lane, lane);
398   if (expected != result) {
399     unsigned lane_size_in_hex_chars = (p_bits_per_lane + 3) / 4;
400     std::string reg_name = reg.GetArchitecturalName();
401     printf("%s[%d]\t Expected 0x%0*" PRIx64 "\t Found 0x%0*" PRIx64 "\n",
402            reg_name.c_str(),
403            lane,
404            lane_size_in_hex_chars,
405            expected,
406            lane_size_in_hex_chars,
407            result);
408     return false;
409   }
410   return true;
411 }
412 
413 struct EqualMemoryChunk {
414   typedef uint64_t RawChunk;
415 
416   uintptr_t address;
417   RawChunk expected;
418   RawChunk result;
419 
IsEqualvixl::aarch64::EqualMemoryChunk420   bool IsEqual() const { return expected == result; }
421 };
422 
EqualMemory(const void* expected, const void* result, size_t size_in_bytes, size_t zero_offset)423 bool EqualMemory(const void* expected,
424                  const void* result,
425                  size_t size_in_bytes,
426                  size_t zero_offset) {
427   if (memcmp(expected, result, size_in_bytes) == 0) return true;
428 
429   // Read 64-bit chunks, and print them side-by-side if they don't match.
430 
431   // Remember the last few chunks, even if they matched, so we can print some
432   // context. We don't want to print the whole buffer, because it could be huge.
433   static const size_t kContextLines = 1;
434   std::queue<EqualMemoryChunk> context;
435   static const size_t kChunkSize = sizeof(EqualMemoryChunk::RawChunk);
436 
437   // This assumption keeps the logic simple, and is acceptable for our tests.
438   VIXL_ASSERT((size_in_bytes % kChunkSize) == 0);
439 
440   const char* expected_it = reinterpret_cast<const char*>(expected);
441   const char* result_it = reinterpret_cast<const char*>(result);
442 
443   // This is the first error, so print a header row.
444   printf("  Address (of result)                  Expected           Result\n");
445 
446   // Always print some context at the start of the buffer.
447   uintptr_t print_context_to =
448       reinterpret_cast<uintptr_t>(result) + (kContextLines + 1) * kChunkSize;
449   for (size_t i = 0; i < size_in_bytes; i += kChunkSize) {
450     EqualMemoryChunk chunk;
451     chunk.address = reinterpret_cast<uintptr_t>(result_it);
452     memcpy(&chunk.expected, expected_it, kChunkSize);
453     memcpy(&chunk.result, result_it, kChunkSize);
454 
455     while (context.size() > kContextLines) context.pop();
456     context.push(chunk);
457 
458     // Print context after an error, and at the end of the buffer.
459     if (!chunk.IsEqual() || ((i + kChunkSize) >= size_in_bytes)) {
460       if (chunk.address > print_context_to) {
461         // We aren't currently printing context, so separate this context from
462         // the previous block.
463         printf("...\n");
464       }
465       print_context_to = chunk.address + (kContextLines + 1) * kChunkSize;
466     }
467 
468     // Print context (including the current line).
469     while (!context.empty() && (context.front().address < print_context_to)) {
470       uintptr_t address = context.front().address;
471       uint64_t offset = address - reinterpret_cast<uintptr_t>(result);
472       bool is_negative = (offset < zero_offset);
473       printf("0x%016" PRIxPTR " (result %c %5" PRIu64 "): 0x%016" PRIx64
474              " 0x%016" PRIx64 "\n",
475              address,
476              (is_negative ? '-' : '+'),
477              (is_negative ? (zero_offset - offset) : (offset - zero_offset)),
478              context.front().expected,
479              context.front().result);
480       context.pop();
481     }
482 
483     expected_it += kChunkSize;
484     result_it += kChunkSize;
485   }
486 
487   return false;
488 }
PopulateRegisterArray(Register* w, Register* x, Register* r, int reg_size, int reg_count, RegList allowed)489 RegList PopulateRegisterArray(Register* w,
490                               Register* x,
491                               Register* r,
492                               int reg_size,
493                               int reg_count,
494                               RegList allowed) {
495   RegList list = 0;
496   int i = 0;
497   for (unsigned n = 0; (n < kNumberOfRegisters) && (i < reg_count); n++) {
498     if (((UINT64_C(1) << n) & allowed) != 0) {
499       // Only assign allowed registers.
500       if (r) {
501         r[i] = Register(n, reg_size);
502       }
503       if (x) {
504         x[i] = Register(n, kXRegSize);
505       }
506       if (w) {
507         w[i] = Register(n, kWRegSize);
508       }
509       list |= (UINT64_C(1) << n);
510       i++;
511     }
512   }
513   // Check that we got enough registers.
514   VIXL_ASSERT(CountSetBits(list, kNumberOfRegisters) == reg_count);
515 
516   return list;
517 }
518 
519 
PopulateVRegisterArray(VRegister* s, VRegister* d, VRegister* v, int reg_size, int reg_count, RegList allowed)520 RegList PopulateVRegisterArray(VRegister* s,
521                                VRegister* d,
522                                VRegister* v,
523                                int reg_size,
524                                int reg_count,
525                                RegList allowed) {
526   RegList list = 0;
527   int i = 0;
528   for (unsigned n = 0; (n < kNumberOfVRegisters) && (i < reg_count); n++) {
529     if (((UINT64_C(1) << n) & allowed) != 0) {
530       // Only assigned allowed registers.
531       if (v) {
532         v[i] = VRegister(n, reg_size);
533       }
534       if (d) {
535         d[i] = VRegister(n, kDRegSize);
536       }
537       if (s) {
538         s[i] = VRegister(n, kSRegSize);
539       }
540       list |= (UINT64_C(1) << n);
541       i++;
542     }
543   }
544   // Check that we got enough registers.
545   VIXL_ASSERT(CountSetBits(list, kNumberOfVRegisters) == reg_count);
546 
547   return list;
548 }
549 
550 
Clobber(MacroAssembler* masm, RegList reg_list, uint64_t const value)551 void Clobber(MacroAssembler* masm, RegList reg_list, uint64_t const value) {
552   Register first = NoReg;
553   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
554     if (reg_list & (UINT64_C(1) << i)) {
555       Register xn(i, kXRegSize);
556       // We should never write into sp here.
557       VIXL_ASSERT(!xn.Is(sp));
558       if (!xn.IsZero()) {
559         if (!first.IsValid()) {
560           // This is the first register we've hit, so construct the literal.
561           __ Mov(xn, value);
562           first = xn;
563         } else {
564           // We've already loaded the literal, so re-use the value already
565           // loaded into the first register we hit.
566           __ Mov(xn, first);
567         }
568       }
569     }
570   }
571 }
572 
573 
ClobberFP(MacroAssembler* masm, RegList reg_list, double const value)574 void ClobberFP(MacroAssembler* masm, RegList reg_list, double const value) {
575   VRegister first = NoVReg;
576   for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
577     if (reg_list & (UINT64_C(1) << i)) {
578       VRegister dn(i, kDRegSize);
579       if (!first.IsValid()) {
580         // This is the first register we've hit, so construct the literal.
581         __ Fmov(dn, value);
582         first = dn;
583       } else {
584         // We've already loaded the literal, so re-use the value already loaded
585         // into the first register we hit.
586         __ Fmov(dn, first);
587       }
588     }
589   }
590 }
591 
592 
Clobber(MacroAssembler* masm, CPURegList reg_list)593 void Clobber(MacroAssembler* masm, CPURegList reg_list) {
594   if (reg_list.GetType() == CPURegister::kRegister) {
595     // This will always clobber X registers.
596     Clobber(masm, reg_list.GetList());
597   } else if (reg_list.GetType() == CPURegister::kVRegister) {
598     // This will always clobber D registers.
599     ClobberFP(masm, reg_list.GetList());
600   } else {
601     VIXL_UNIMPLEMENTED();
602   }
603 }
604 
605 // TODO: Once registers have sufficiently compatible interfaces, merge the two
606 // DumpRegisters templates.
607 template <typename T>
DumpRegisters(MacroAssembler* masm, Register dump_base, int offset)608 static void DumpRegisters(MacroAssembler* masm,
609                           Register dump_base,
610                           int offset) {
611   UseScratchRegisterScope temps(masm);
612   Register dump = temps.AcquireX();
613   __ Add(dump, dump_base, offset);
614   for (unsigned i = 0; i <= T::GetMaxCode(); i++) {
615     T reg(i);
616     __ Str(reg, SVEMemOperand(dump));
617     __ Add(dump, dump, reg.GetMaxSizeInBytes());
618   }
619 }
620 
621 template <typename T>
DumpRegisters(MacroAssembler* masm, Register dump_base, int offset, int reg_size_in_bytes)622 static void DumpRegisters(MacroAssembler* masm,
623                           Register dump_base,
624                           int offset,
625                           int reg_size_in_bytes) {
626   UseScratchRegisterScope temps(masm);
627   Register dump = temps.AcquireX();
628   __ Add(dump, dump_base, offset);
629   for (unsigned i = 0; i <= T::GetMaxCode(); i++) {
630     T reg(i, reg_size_in_bytes * kBitsPerByte);
631     __ Str(reg, MemOperand(dump));
632     __ Add(dump, dump, reg_size_in_bytes);
633   }
634 }
635 
Dump(MacroAssembler* masm)636 void RegisterDump::Dump(MacroAssembler* masm) {
637   VIXL_ASSERT(__ StackPointer().Is(sp));
638 
639   dump_cpu_features_ = *masm->GetCPUFeatures();
640 
641   // We need some scratch registers, but we also need to dump them, so we have
642   // to control exactly which registers are used, and dump them separately.
643   CPURegList scratch_registers(x0, x1, x2, x3);
644 
645   UseScratchRegisterScope temps(masm);
646   temps.ExcludeAll();
647   __ PushCPURegList(scratch_registers);
648   temps.Include(scratch_registers);
649 
650   Register dump_base = temps.AcquireX();
651   Register tmp = temps.AcquireX();
652 
653   // Offsets into the dump_ structure.
654   const int x_offset = offsetof(dump_t, x_);
655   const int w_offset = offsetof(dump_t, w_);
656   const int d_offset = offsetof(dump_t, d_);
657   const int s_offset = offsetof(dump_t, s_);
658   const int h_offset = offsetof(dump_t, h_);
659   const int q_offset = offsetof(dump_t, q_);
660   const int z_offset = offsetof(dump_t, z_);
661   const int p_offset = offsetof(dump_t, p_);
662   const int sp_offset = offsetof(dump_t, sp_);
663   const int wsp_offset = offsetof(dump_t, wsp_);
664   const int flags_offset = offsetof(dump_t, flags_);
665   const int vl_offset = offsetof(dump_t, vl_);
666 
667   // Load the address where we will dump the state.
668   __ Mov(dump_base, reinterpret_cast<uintptr_t>(&dump_));
669 
670   // Dump the stack pointer (sp and wsp).
671   // The stack pointer cannot be stored directly; it needs to be moved into
672   // another register first. Also, we pushed four X registers, so we need to
673   // compensate here.
674   __ Add(tmp, sp, 4 * kXRegSizeInBytes);
675   __ Str(tmp, MemOperand(dump_base, sp_offset));
676   __ Add(tmp.W(), wsp, 4 * kXRegSizeInBytes);
677   __ Str(tmp.W(), MemOperand(dump_base, wsp_offset));
678 
679   // Dump core registers.
680   DumpRegisters<Register>(masm, dump_base, x_offset, kXRegSizeInBytes);
681   DumpRegisters<Register>(masm, dump_base, w_offset, kWRegSizeInBytes);
682 
683   // Dump NEON and FP registers.
684   DumpRegisters<VRegister>(masm, dump_base, q_offset, kQRegSizeInBytes);
685   DumpRegisters<VRegister>(masm, dump_base, d_offset, kDRegSizeInBytes);
686   DumpRegisters<VRegister>(masm, dump_base, s_offset, kSRegSizeInBytes);
687   DumpRegisters<VRegister>(masm, dump_base, h_offset, kHRegSizeInBytes);
688 
689   // Dump SVE registers.
690   if (CPUHas(CPUFeatures::kSVE)) {
691     DumpRegisters<ZRegister>(masm, dump_base, z_offset);
692     DumpRegisters<PRegister>(masm, dump_base, p_offset);
693 
694     // Record the vector length.
695     __ Rdvl(tmp, kBitsPerByte);
696     __ Str(tmp, MemOperand(dump_base, vl_offset));
697   }
698 
699   // Dump the flags.
700   __ Mrs(tmp, NZCV);
701   __ Str(tmp, MemOperand(dump_base, flags_offset));
702 
703   // To dump the values we used as scratch registers, we need a new scratch
704   // register. We can use any of the already dumped registers since we can
705   // easily restore them.
706   Register dump2_base = x10;
707   VIXL_ASSERT(!scratch_registers.IncludesAliasOf(dump2_base));
708 
709   VIXL_ASSERT(scratch_registers.IncludesAliasOf(dump_base));
710 
711   // Ensure that we don't try to use the scratch registers again.
712   temps.ExcludeAll();
713 
714   // Don't lose the dump_ address.
715   __ Mov(dump2_base, dump_base);
716 
717   __ PopCPURegList(scratch_registers);
718 
719   while (!scratch_registers.IsEmpty()) {
720     CPURegister reg = scratch_registers.PopLowestIndex();
721     Register x = reg.X();
722     Register w = reg.W();
723     unsigned code = reg.GetCode();
724     __ Str(x, MemOperand(dump2_base, x_offset + (code * kXRegSizeInBytes)));
725     __ Str(w, MemOperand(dump2_base, w_offset + (code * kWRegSizeInBytes)));
726   }
727 
728   // Finally, restore dump2_base.
729   __ Ldr(dump2_base,
730          MemOperand(dump2_base,
731                     x_offset + (dump2_base.GetCode() * kXRegSizeInBytes)));
732 
733   completed_ = true;
734 }
735 
GetSignallingNan(int size_in_bits)736 uint64_t GetSignallingNan(int size_in_bits) {
737   switch (size_in_bits) {
738     case kHRegSize:
739       return Float16ToRawbits(kFP16SignallingNaN);
740     case kSRegSize:
741       return FloatToRawbits(kFP32SignallingNaN);
742     case kDRegSize:
743       return DoubleToRawbits(kFP64SignallingNaN);
744     default:
745       VIXL_UNIMPLEMENTED();
746       return 0;
747   }
748 }
749 
CanRun(const CPUFeatures& required, bool* queried_can_run)750 bool CanRun(const CPUFeatures& required, bool* queried_can_run) {
751   bool log_if_missing = true;
752   if (queried_can_run != NULL) {
753     log_if_missing = !*queried_can_run;
754     *queried_can_run = true;
755   }
756 
757 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
758   // The Simulator can run any test that VIXL can assemble.
759   USE(required);
760   USE(log_if_missing);
761   return true;
762 #else
763   CPUFeatures cpu = CPUFeatures::InferFromOS();
764   // If InferFromOS fails, assume that basic features are present.
765   if (cpu.HasNoFeatures()) cpu = CPUFeatures::AArch64LegacyBaseline();
766   VIXL_ASSERT(cpu.Has(kInfrastructureCPUFeatures));
767 
768   if (cpu.Has(required)) return true;
769 
770   if (log_if_missing) {
771     CPUFeatures missing = required.Without(cpu);
772     // Note: This message needs to match REGEXP_MISSING_FEATURES from
773     // tools/threaded_test.py.
774     std::cout << "SKIPPED: Missing features: { " << missing << " }\n";
775     std::cout << "This test requires the following features to run its "
776                  "generated code on this CPU: "
777               << required << "\n";
778   }
779   return false;
780 #endif
781 }
782 
783 // Note that the function assumes p0, p1, p2 and p3 are set to all true in b-,
784 // h-, s- and d-lane sizes respectively, and p4, p5 are clobbered as a temp
785 // predicate.
786 template <typename T, size_t N>
SetFpData(MacroAssembler* masm, int esize, const T (&values)[N], uint64_t lcg_mult)787 void SetFpData(MacroAssembler* masm,
788                int esize,
789                const T (&values)[N],
790                uint64_t lcg_mult) {
791   uint64_t a = 0;
792   uint64_t b = lcg_mult;
793   // Be used to populate the assigned element slots of register based on the
794   // type of floating point.
795   __ Pfalse(p5.VnB());
796   switch (esize) {
797     case kHRegSize:
798       a = Float16ToRawbits(Float16(1.5));
799       // Pick a convenient number within largest normal half-precision floating
800       // point.
801       b = Float16ToRawbits(Float16(lcg_mult % 1024));
802       // Step 1: Set fp16 numbers to the undefined registers.
803       //      p4< 15:0>: 0b0101010101010101
804       // z{code}<127:0>: 0xHHHHHHHHHHHHHHHH
805       __ Zip1(p4.VnB(), p0.VnB(), p5.VnB());
806       break;
807     case kSRegSize:
808       a = FloatToRawbits(1.5);
809       b = FloatToRawbits(lcg_mult);
810       // Step 2: Set fp32 numbers to register on top of fp16 initialized.
811       //      p4< 15:0>: 0b0000000100000001
812       // z{code}<127:0>: 0xHHHHSSSSHHHHSSSS
813       __ Zip1(p4.VnS(), p2.VnS(), p5.VnS());
814       break;
815     case kDRegSize:
816       a = DoubleToRawbits(1.5);
817       b = DoubleToRawbits(lcg_mult);
818       // Step 3: Set fp64 numbers to register on top of both fp16 and fp 32
819       // initialized.
820       //      p4< 15:0>: 0b0000000000000001
821       // z{code}<127:0>: 0xHHHHSSSSDDDDDDDD
822       __ Zip1(p4.VnD(), p3.VnD(), p5.VnD());
823       break;
824     default:
825       VIXL_UNIMPLEMENTED();
826       break;
827   }
828 
829   __ Dup(z30.WithLaneSize(esize), a);
830   __ Dup(z31.WithLaneSize(esize), b);
831 
832   for (unsigned j = 0; j <= (kZRegMaxSize / (N * esize)); j++) {
833     // As floating point operations on random values have a tendency to
834     // converge on special-case numbers like NaNs, adopt normal floating point
835     // values be the seed instead.
836     InsrHelper(masm, z0.WithLaneSize(esize), values);
837   }
838 
839   __ Fmla(z0.WithLaneSize(esize),
840           p4.Merging(),
841           z30.WithLaneSize(esize),
842           z0.WithLaneSize(esize),
843           z31.WithLaneSize(esize),
844           FastNaNPropagation);
845 
846   for (unsigned i = 1; i < kNumberOfZRegisters - 1; i++) {
847     __ Fmla(ZRegister(i).WithLaneSize(esize),
848             p4.Merging(),
849             z30.WithLaneSize(esize),
850             ZRegister(i - 1).WithLaneSize(esize),
851             z31.WithLaneSize(esize),
852             FastNaNPropagation);
853   }
854 
855   __ Fmul(z31.WithLaneSize(esize),
856           p4.Merging(),
857           z31.WithLaneSize(esize),
858           z30.WithLaneSize(esize),
859           FastNaNPropagation);
860   __ Fadd(z31.WithLaneSize(esize), p4.Merging(), z31.WithLaneSize(esize), 1);
861 }
862 
863 // Set z0 - z31 to some normal floating point data.
InitialiseRegisterFp(MacroAssembler* masm, uint64_t lcg_mult)864 void InitialiseRegisterFp(MacroAssembler* masm, uint64_t lcg_mult) {
865   // Initialise each Z registers to a mixture of fp16/32/64 values as following
866   // pattern:
867   // z0.h[0-1] = fp16, z0.s[1] = fp32, z0.d[1] = fp64 repeatedly throughout the
868   // register.
869   //
870   // For example:
871   // z{code}<2047:1920>: 0x{<      fp64      ><  fp32  ><fp16><fp16>}
872   // ...
873   // z{code}< 127:   0>: 0x{<      fp64      ><  fp32  ><fp16><fp16>}
874   //
875   // In current manner, in order to make a desired mixture, each part of
876   // initialization have to be called in the following order.
877   SetFpData(masm, kHRegSize, kInputFloat16Basic, lcg_mult);
878   SetFpData(masm, kSRegSize, kInputFloatBasic, lcg_mult);
879   SetFpData(masm, kDRegSize, kInputDoubleBasic, lcg_mult);
880 }
881 
SetInitialMachineState(MacroAssembler* masm, InputSet input_set)882 void SetInitialMachineState(MacroAssembler* masm, InputSet input_set) {
883   USE(input_set);
884   uint64_t lcg_mult = 6364136223846793005;
885 
886   // Set x0 - x30 to pseudo-random data.
887   __ Mov(x29, 1);  // LCG increment.
888   __ Mov(x30, lcg_mult);
889   __ Mov(x0, 42);  // LCG seed.
890 
891   __ Cmn(x0, 0);  // Clear NZCV flags for later.
892 
893   __ Madd(x0, x0, x30, x29);  // First pseudo-random number.
894 
895   // Registers 1 - 29.
896   for (unsigned i = 1; i < 30; i++) {
897     __ Madd(XRegister(i), XRegister(i - 1), x30, x29);
898   }
899   __ Mul(x30, x29, x30);
900   __ Add(x30, x30, 1);
901 
902 
903   // Set first four predicate registers to true for increasing lane sizes.
904   __ Ptrue(p0.VnB());
905   __ Ptrue(p1.VnH());
906   __ Ptrue(p2.VnS());
907   __ Ptrue(p3.VnD());
908 
909   // Set z0 - z31 to pseudo-random data.
910   if (input_set == kIntInputSet) {
911     __ Dup(z30.VnD(), 1);
912     __ Dup(z31.VnD(), lcg_mult);
913     __ Index(z0.VnB(), -16, 13);  // LCG seeds.
914 
915     __ Mla(z0.VnD(), p0.Merging(), z30.VnD(), z0.VnD(), z31.VnD());
916     for (unsigned i = 1; i < kNumberOfZRegisters - 1; i++) {
917       __ Mla(ZRegister(i).VnD(),
918              p0.Merging(),
919              z30.VnD(),
920              ZRegister(i - 1).VnD(),
921              z31.VnD());
922     }
923     __ Mul(z31.VnD(), p0.Merging(), z31.VnD(), z30.VnD());
924     __ Add(z31.VnD(), z31.VnD(), 1);
925 
926   } else {
927     VIXL_ASSERT(input_set == kFpInputSet);
928     InitialiseRegisterFp(masm, lcg_mult);
929   }
930 
931   // Set remaining predicate registers based on earlier pseudo-random data.
932   for (unsigned i = 4; i < kNumberOfPRegisters; i++) {
933     __ Cmpge(PRegister(i).VnB(), p0.Zeroing(), ZRegister(i).VnB(), 0);
934   }
935   for (unsigned i = 4; i < kNumberOfPRegisters; i += 2) {
936     __ Zip1(p0.VnB(), PRegister(i).VnB(), PRegister(i + 1).VnB());
937     __ Zip2(PRegister(i + 1).VnB(), PRegister(i).VnB(), PRegister(i + 1).VnB());
938     __ Mov(PRegister(i), p0);
939   }
940   __ Ptrue(p0.VnB());
941 
942   // At this point, only sp and a few status registers are undefined. These
943   // must be ignored when computing the state hash.
944 }
945 
ComputeMachineStateHash(MacroAssembler* masm, uint32_t* dst)946 void ComputeMachineStateHash(MacroAssembler* masm, uint32_t* dst) {
947   // Use explicit registers, to avoid hash order varying if
948   // UseScratchRegisterScope changes.
949   UseScratchRegisterScope temps(masm);
950   temps.ExcludeAll();
951   Register t0 = w0;
952   Register t1 = x1;
953 
954   // Compute hash of x0 - x30.
955   __ Push(t0.X(), t1);
956   __ Crc32x(t0, wzr, t0.X());
957   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
958     if (i == xzr.GetCode()) continue;   // Skip sp.
959     if (t0.Is(WRegister(i))) continue;  // Skip t0, as it's already hashed.
960     __ Crc32x(t0, t0, XRegister(i));
961   }
962 
963   // Hash the status flags.
964   __ Mrs(t1, NZCV);
965   __ Crc32x(t0, t0, t1);
966 
967   // Acquire another temp, as integer registers have been hashed already.
968   __ Push(x30, xzr);
969   Register t2 = x30;
970 
971   // Compute hash of all bits in z0 - z31. This implies different hashes are
972   // produced for machines of different vector length.
973   for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
974     __ Rdvl(t2, 1);
975     __ Lsr(t2, t2, 4);
976     Label vl_loop;
977     __ Bind(&vl_loop);
978     __ Umov(t1, VRegister(i).V2D(), 0);
979     __ Crc32x(t0, t0, t1);
980     __ Umov(t1, VRegister(i).V2D(), 1);
981     __ Crc32x(t0, t0, t1);
982     __ Ext(ZRegister(i).VnB(), ZRegister(i).VnB(), ZRegister(i).VnB(), 16);
983     __ Sub(t2, t2, 1);
984     __ Cbnz(t2, &vl_loop);
985   }
986 
987   // Hash predicate registers. For simplicity, this writes the predicate
988   // registers to a zero-initialised area of stack of the maximum size required
989   // for P registers. It then computes a hash of that entire stack area.
990   unsigned p_stack_space = kNumberOfPRegisters * kPRegMaxSizeInBytes;
991 
992   // Zero claimed stack area.
993   for (unsigned i = 0; i < p_stack_space; i += kXRegSizeInBytes * 2) {
994     __ Push(xzr, xzr);
995   }
996 
997   // Store all P registers to the stack.
998   __ Mov(t1, sp);
999   for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
1000     __ Str(PRegister(i), SVEMemOperand(t1));
1001     __ Add(t1, t1, kPRegMaxSizeInBytes);
1002   }
1003 
1004   // Hash the entire stack area.
1005   for (unsigned i = 0; i < p_stack_space; i += kXRegSizeInBytes * 2) {
1006     __ Pop(t1, t2);
1007     __ Crc32x(t0, t0, t1);
1008     __ Crc32x(t0, t0, t2);
1009   }
1010 
1011   __ Mov(t1, reinterpret_cast<uint64_t>(dst));
1012   __ Str(t0, MemOperand(t1));
1013 
1014   __ Pop(xzr, x30);
1015   __ Pop(t1, t0.X());
1016 }
1017 
1018 }  // namespace aarch64
1019 }  // namespace vixl
1020