1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #if defined(__aarch64__) && (defined(__ANDROID__) || defined(__linux__))
28 #include <sys/auxv.h>
29 #define VIXL_USE_LINUX_HWCAP 1
30 #endif
31 
32 #include "../utils-vixl.h"
33 
34 #include "cpu-aarch64.h"
35 
36 namespace vixl {
37 namespace aarch64 {
38 
39 
40 const IDRegister::Field AA64PFR0::kFP(16, Field::kSigned);
41 const IDRegister::Field AA64PFR0::kAdvSIMD(20, Field::kSigned);
42 const IDRegister::Field AA64PFR0::kRAS(28);
43 const IDRegister::Field AA64PFR0::kSVE(32);
44 const IDRegister::Field AA64PFR0::kDIT(48);
45 const IDRegister::Field AA64PFR0::kCSV2(56);
46 const IDRegister::Field AA64PFR0::kCSV3(60);
47 
48 const IDRegister::Field AA64PFR1::kBT(0);
49 const IDRegister::Field AA64PFR1::kSSBS(4);
50 const IDRegister::Field AA64PFR1::kMTE(8);
51 const IDRegister::Field AA64PFR1::kSME(24);
52 
53 const IDRegister::Field AA64ISAR0::kAES(4);
54 const IDRegister::Field AA64ISAR0::kSHA1(8);
55 const IDRegister::Field AA64ISAR0::kSHA2(12);
56 const IDRegister::Field AA64ISAR0::kCRC32(16);
57 const IDRegister::Field AA64ISAR0::kAtomic(20);
58 const IDRegister::Field AA64ISAR0::kRDM(28);
59 const IDRegister::Field AA64ISAR0::kSHA3(32);
60 const IDRegister::Field AA64ISAR0::kSM3(36);
61 const IDRegister::Field AA64ISAR0::kSM4(40);
62 const IDRegister::Field AA64ISAR0::kDP(44);
63 const IDRegister::Field AA64ISAR0::kFHM(48);
64 const IDRegister::Field AA64ISAR0::kTS(52);
65 const IDRegister::Field AA64ISAR0::kRNDR(60);
66 
67 const IDRegister::Field AA64ISAR1::kDPB(0);
68 const IDRegister::Field AA64ISAR1::kAPA(4);
69 const IDRegister::Field AA64ISAR1::kAPI(8);
70 const IDRegister::Field AA64ISAR1::kJSCVT(12);
71 const IDRegister::Field AA64ISAR1::kFCMA(16);
72 const IDRegister::Field AA64ISAR1::kLRCPC(20);
73 const IDRegister::Field AA64ISAR1::kGPA(24);
74 const IDRegister::Field AA64ISAR1::kGPI(28);
75 const IDRegister::Field AA64ISAR1::kFRINTTS(32);
76 const IDRegister::Field AA64ISAR1::kSB(36);
77 const IDRegister::Field AA64ISAR1::kSPECRES(40);
78 const IDRegister::Field AA64ISAR1::kBF16(44);
79 const IDRegister::Field AA64ISAR1::kDGH(48);
80 const IDRegister::Field AA64ISAR1::kI8MM(52);
81 
82 const IDRegister::Field AA64ISAR2::kWFXT(0);
83 const IDRegister::Field AA64ISAR2::kRPRES(4);
84 const IDRegister::Field AA64ISAR2::kMOPS(16);
85 const IDRegister::Field AA64ISAR2::kCSSC(52);
86 
87 const IDRegister::Field AA64MMFR0::kECV(60);
88 
89 const IDRegister::Field AA64MMFR1::kLO(16);
90 const IDRegister::Field AA64MMFR1::kAFP(44);
91 
92 const IDRegister::Field AA64MMFR2::kAT(32);
93 
94 const IDRegister::Field AA64ZFR0::kSVEver(0);
95 const IDRegister::Field AA64ZFR0::kAES(4);
96 const IDRegister::Field AA64ZFR0::kBitPerm(16);
97 const IDRegister::Field AA64ZFR0::kBF16(20);
98 const IDRegister::Field AA64ZFR0::kSHA3(32);
99 const IDRegister::Field AA64ZFR0::kSM4(40);
100 const IDRegister::Field AA64ZFR0::kI8MM(44);
101 const IDRegister::Field AA64ZFR0::kF32MM(52);
102 const IDRegister::Field AA64ZFR0::kF64MM(56);
103 
104 const IDRegister::Field AA64SMFR0::kSMEf32f32(32, 1);
105 const IDRegister::Field AA64SMFR0::kSMEb16f32(34, 1);
106 const IDRegister::Field AA64SMFR0::kSMEf16f32(35, 1);
107 const IDRegister::Field AA64SMFR0::kSMEi8i32(36);
108 const IDRegister::Field AA64SMFR0::kSMEf64f64(48, 1);
109 const IDRegister::Field AA64SMFR0::kSMEi16i64(52);
110 const IDRegister::Field AA64SMFR0::kSMEfa64(63, 1);
111 
GetCPUFeatures() const112 CPUFeatures AA64PFR0::GetCPUFeatures() const {
113   CPUFeatures f;
114   if (Get(kFP) >= 0) f.Combine(CPUFeatures::kFP);
115   if (Get(kFP) >= 1) f.Combine(CPUFeatures::kFPHalf);
116   if (Get(kAdvSIMD) >= 0) f.Combine(CPUFeatures::kNEON);
117   if (Get(kAdvSIMD) >= 1) f.Combine(CPUFeatures::kNEONHalf);
118   if (Get(kRAS) >= 1) f.Combine(CPUFeatures::kRAS);
119   if (Get(kSVE) >= 1) f.Combine(CPUFeatures::kSVE);
120   if (Get(kDIT) >= 1) f.Combine(CPUFeatures::kDIT);
121   if (Get(kCSV2) >= 1) f.Combine(CPUFeatures::kCSV2);
122   if (Get(kCSV2) >= 2) f.Combine(CPUFeatures::kSCXTNUM);
123   if (Get(kCSV3) >= 1) f.Combine(CPUFeatures::kCSV3);
124   return f;
125 }
126 
GetCPUFeatures() const127 CPUFeatures AA64PFR1::GetCPUFeatures() const {
128   CPUFeatures f;
129   if (Get(kBT) >= 1) f.Combine(CPUFeatures::kBTI);
130   if (Get(kSSBS) >= 1) f.Combine(CPUFeatures::kSSBS);
131   if (Get(kSSBS) >= 2) f.Combine(CPUFeatures::kSSBSControl);
132   if (Get(kMTE) >= 1) f.Combine(CPUFeatures::kMTEInstructions);
133   if (Get(kMTE) >= 2) f.Combine(CPUFeatures::kMTE);
134   if (Get(kMTE) >= 3) f.Combine(CPUFeatures::kMTE3);
135   if (Get(kSME) >= 1) f.Combine(CPUFeatures::kSME);
136   return f;
137 }
138 
GetCPUFeatures() const139 CPUFeatures AA64ISAR0::GetCPUFeatures() const {
140   CPUFeatures f;
141   if (Get(kAES) >= 1) f.Combine(CPUFeatures::kAES);
142   if (Get(kAES) >= 2) f.Combine(CPUFeatures::kPmull1Q);
143   if (Get(kSHA1) >= 1) f.Combine(CPUFeatures::kSHA1);
144   if (Get(kSHA2) >= 1) f.Combine(CPUFeatures::kSHA2);
145   if (Get(kSHA2) >= 2) f.Combine(CPUFeatures::kSHA512);
146   if (Get(kCRC32) >= 1) f.Combine(CPUFeatures::kCRC32);
147   if (Get(kAtomic) >= 1) f.Combine(CPUFeatures::kAtomics);
148   if (Get(kRDM) >= 1) f.Combine(CPUFeatures::kRDM);
149   if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSHA3);
150   if (Get(kSM3) >= 1) f.Combine(CPUFeatures::kSM3);
151   if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSM4);
152   if (Get(kDP) >= 1) f.Combine(CPUFeatures::kDotProduct);
153   if (Get(kFHM) >= 1) f.Combine(CPUFeatures::kFHM);
154   if (Get(kTS) >= 1) f.Combine(CPUFeatures::kFlagM);
155   if (Get(kTS) >= 2) f.Combine(CPUFeatures::kAXFlag);
156   if (Get(kRNDR) >= 1) f.Combine(CPUFeatures::kRNG);
157   return f;
158 }
159 
GetCPUFeatures() const160 CPUFeatures AA64ISAR1::GetCPUFeatures() const {
161   CPUFeatures f;
162   if (Get(kDPB) >= 1) f.Combine(CPUFeatures::kDCPoP);
163   if (Get(kDPB) >= 2) f.Combine(CPUFeatures::kDCCVADP);
164   if (Get(kJSCVT) >= 1) f.Combine(CPUFeatures::kJSCVT);
165   if (Get(kFCMA) >= 1) f.Combine(CPUFeatures::kFcma);
166   if (Get(kLRCPC) >= 1) f.Combine(CPUFeatures::kRCpc);
167   if (Get(kLRCPC) >= 2) f.Combine(CPUFeatures::kRCpcImm);
168   if (Get(kFRINTTS) >= 1) f.Combine(CPUFeatures::kFrintToFixedSizedInt);
169   if (Get(kSB) >= 1) f.Combine(CPUFeatures::kSB);
170   if (Get(kSPECRES) >= 1) f.Combine(CPUFeatures::kSPECRES);
171   if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kBF16);
172   if (Get(kBF16) >= 2) f.Combine(CPUFeatures::kEBF16);
173   if (Get(kDGH) >= 1) f.Combine(CPUFeatures::kDGH);
174   if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kI8MM);
175 
176   // Only one of these fields should be non-zero, but they have the same
177   // encodings, so merge the logic.
178   int apx = std::max(Get(kAPI), Get(kAPA));
179   if (apx >= 1) {
180     f.Combine(CPUFeatures::kPAuth);
181     // APA (rather than API) indicates QARMA.
182     if (Get(kAPA) >= 1) f.Combine(CPUFeatures::kPAuthQARMA);
183     if (apx == 0b0010) f.Combine(CPUFeatures::kPAuthEnhancedPAC);
184     if (apx >= 0b0011) f.Combine(CPUFeatures::kPAuthEnhancedPAC2);
185     if (apx >= 0b0100) f.Combine(CPUFeatures::kPAuthFPAC);
186     if (apx >= 0b0101) f.Combine(CPUFeatures::kPAuthFPACCombined);
187   }
188 
189   if (Get(kGPI) >= 1) f.Combine(CPUFeatures::kPAuthGeneric);
190   if (Get(kGPA) >= 1) {
191     f.Combine(CPUFeatures::kPAuthGeneric, CPUFeatures::kPAuthGenericQARMA);
192   }
193   return f;
194 }
195 
GetCPUFeatures() const196 CPUFeatures AA64ISAR2::GetCPUFeatures() const {
197   CPUFeatures f;
198   if (Get(kWFXT) >= 2) f.Combine(CPUFeatures::kWFXT);
199   if (Get(kRPRES) >= 1) f.Combine(CPUFeatures::kRPRES);
200   if (Get(kMOPS) >= 1) f.Combine(CPUFeatures::kMOPS);
201   if (Get(kCSSC) >= 1) f.Combine(CPUFeatures::kCSSC);
202   return f;
203 }
204 
GetCPUFeatures() const205 CPUFeatures AA64MMFR0::GetCPUFeatures() const {
206   CPUFeatures f;
207   if (Get(kECV) >= 1) f.Combine(CPUFeatures::kECV);
208   return f;
209 }
210 
GetCPUFeatures() const211 CPUFeatures AA64MMFR1::GetCPUFeatures() const {
212   CPUFeatures f;
213   if (Get(kLO) >= 1) f.Combine(CPUFeatures::kLORegions);
214   if (Get(kAFP) >= 1) f.Combine(CPUFeatures::kAFP);
215   return f;
216 }
217 
GetCPUFeatures() const218 CPUFeatures AA64MMFR2::GetCPUFeatures() const {
219   CPUFeatures f;
220   if (Get(kAT) >= 1) f.Combine(CPUFeatures::kUSCAT);
221   return f;
222 }
223 
GetCPUFeatures() const224 CPUFeatures AA64ZFR0::GetCPUFeatures() const {
225   // This register is only available with SVE, but reads-as-zero in its absence,
226   // so it's always safe to read it.
227   CPUFeatures f;
228   if (Get(kF64MM) >= 1) f.Combine(CPUFeatures::kSVEF64MM);
229   if (Get(kF32MM) >= 1) f.Combine(CPUFeatures::kSVEF32MM);
230   if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kSVEI8MM);
231   if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSVESM4);
232   if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSVESHA3);
233   if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kSVEBF16);
234   if (Get(kBF16) >= 2) f.Combine(CPUFeatures::kSVE_EBF16);
235   if (Get(kBitPerm) >= 1) f.Combine(CPUFeatures::kSVEBitPerm);
236   if (Get(kAES) >= 1) f.Combine(CPUFeatures::kSVEAES);
237   if (Get(kAES) >= 2) f.Combine(CPUFeatures::kSVEPmull128);
238   if (Get(kSVEver) >= 1) f.Combine(CPUFeatures::kSVE2);
239   return f;
240 }
241 
GetCPUFeatures() const242 CPUFeatures AA64SMFR0::GetCPUFeatures() const {
243   CPUFeatures f;
244   if (Get(kSMEf32f32) >= 1) f.Combine(CPUFeatures::kSMEf32f32);
245   if (Get(kSMEb16f32) >= 1) f.Combine(CPUFeatures::kSMEb16f32);
246   if (Get(kSMEf16f32) >= 1) f.Combine(CPUFeatures::kSMEf16f32);
247   if (Get(kSMEi8i32) >= 15) f.Combine(CPUFeatures::kSMEi8i32);
248   if (Get(kSMEf64f64) >= 1) f.Combine(CPUFeatures::kSMEf64f64);
249   if (Get(kSMEi16i64) >= 15) f.Combine(CPUFeatures::kSMEi16i64);
250   if (Get(kSMEfa64) >= 1) f.Combine(CPUFeatures::kSMEfa64);
251   return f;
252 }
253 
Get(IDRegister::Field field) const254 int IDRegister::Get(IDRegister::Field field) const {
255   int msb = field.GetMsb();
256   int lsb = field.GetLsb();
257   VIXL_STATIC_ASSERT(static_cast<size_t>(Field::kMaxWidthInBits) <
258                      (sizeof(int) * kBitsPerByte));
259   switch (field.GetType()) {
260     case Field::kSigned:
261       return static_cast<int>(ExtractSignedBitfield64(msb, lsb, value_));
262     case Field::kUnsigned:
263       return static_cast<int>(ExtractUnsignedBitfield64(msb, lsb, value_));
264   }
265   VIXL_UNREACHABLE();
266   return 0;
267 }
268 
InferCPUFeaturesFromIDRegisters()269 CPUFeatures CPU::InferCPUFeaturesFromIDRegisters() {
270   CPUFeatures f;
271 #define VIXL_COMBINE_ID_REG(NAME, MRS_ARG) \
272   f.Combine(Read##NAME().GetCPUFeatures());
273   VIXL_AARCH64_ID_REG_LIST(VIXL_COMBINE_ID_REG)
274 #undef VIXL_COMBINE_ID_REG
275   return f;
276 }
277 
InferCPUFeaturesFromOS( CPUFeatures::QueryIDRegistersOption option)278 CPUFeatures CPU::InferCPUFeaturesFromOS(
279     CPUFeatures::QueryIDRegistersOption option) {
280   CPUFeatures features;
281 
282 #ifdef VIXL_USE_LINUX_HWCAP
283   // Map each set bit onto a feature. Ideally, we'd use HWCAP_* macros rather
284   // than explicit bits, but explicit bits allow us to identify features that
285   // the toolchain doesn't know about.
286   static const CPUFeatures::Feature kFeatureBitsLow[] =
287       {// Bits 0-7
288        CPUFeatures::kFP,
289        CPUFeatures::kNEON,
290        CPUFeatures::kNone,  // "EVTSTRM", which VIXL doesn't track.
291        CPUFeatures::kAES,
292        CPUFeatures::kPmull1Q,
293        CPUFeatures::kSHA1,
294        CPUFeatures::kSHA2,
295        CPUFeatures::kCRC32,
296        // Bits 8-15
297        CPUFeatures::kAtomics,
298        CPUFeatures::kFPHalf,
299        CPUFeatures::kNEONHalf,
300        CPUFeatures::kIDRegisterEmulation,
301        CPUFeatures::kRDM,
302        CPUFeatures::kJSCVT,
303        CPUFeatures::kFcma,
304        CPUFeatures::kRCpc,
305        // Bits 16-23
306        CPUFeatures::kDCPoP,
307        CPUFeatures::kSHA3,
308        CPUFeatures::kSM3,
309        CPUFeatures::kSM4,
310        CPUFeatures::kDotProduct,
311        CPUFeatures::kSHA512,
312        CPUFeatures::kSVE,
313        CPUFeatures::kFHM,
314        // Bits 24-31
315        CPUFeatures::kDIT,
316        CPUFeatures::kUSCAT,
317        CPUFeatures::kRCpcImm,
318        CPUFeatures::kFlagM,
319        CPUFeatures::kSSBSControl,
320        CPUFeatures::kSB,
321        CPUFeatures::kPAuth,
322        CPUFeatures::kPAuthGeneric};
323   VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsLow) < 64);
324 
325   static const CPUFeatures::Feature kFeatureBitsHigh[] =
326       {// Bits 0-7
327        CPUFeatures::kDCCVADP,
328        CPUFeatures::kSVE2,
329        CPUFeatures::kSVEAES,
330        CPUFeatures::kSVEPmull128,
331        CPUFeatures::kSVEBitPerm,
332        CPUFeatures::kSVESHA3,
333        CPUFeatures::kSVESM4,
334        CPUFeatures::kAXFlag,
335        // Bits 8-15
336        CPUFeatures::kFrintToFixedSizedInt,
337        CPUFeatures::kSVEI8MM,
338        CPUFeatures::kSVEF32MM,
339        CPUFeatures::kSVEF64MM,
340        CPUFeatures::kSVEBF16,
341        CPUFeatures::kI8MM,
342        CPUFeatures::kBF16,
343        CPUFeatures::kDGH,
344        // Bits 16-23
345        CPUFeatures::kRNG,
346        CPUFeatures::kBTI,
347        CPUFeatures::kMTE,
348        CPUFeatures::kECV,
349        CPUFeatures::kAFP,
350        CPUFeatures::kRPRES,
351        CPUFeatures::kMTE3,
352        CPUFeatures::kSME,
353        // Bits 24-31
354        CPUFeatures::kSMEi16i64,
355        CPUFeatures::kSMEf64f64,
356        CPUFeatures::kSMEi8i32,
357        CPUFeatures::kSMEf16f32,
358        CPUFeatures::kSMEb16f32,
359        CPUFeatures::kSMEf32f32,
360        CPUFeatures::kSMEfa64,
361        CPUFeatures::kWFXT,
362        // Bits 32-39
363        CPUFeatures::kEBF16,
364        CPUFeatures::kSVE_EBF16};
365   VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsHigh) < 64);
366 
367   auto combine_features = [&features](uint64_t hwcap,
368                                       const CPUFeatures::Feature* feature_array,
369                                       size_t features_size) {
370     for (size_t i = 0; i < features_size; i++) {
371       if (hwcap & (UINT64_C(1) << i)) features.Combine(feature_array[i]);
372     }
373   };
374 
375   uint64_t hwcap_low = getauxval(AT_HWCAP);
376   uint64_t hwcap_high = getauxval(AT_HWCAP2);
377 
378   combine_features(hwcap_low, kFeatureBitsLow, ArrayLength(kFeatureBitsLow));
379   combine_features(hwcap_high, kFeatureBitsHigh, ArrayLength(kFeatureBitsHigh));
380 
381   // MTE support from HWCAP2 signifies FEAT_MTE1 and FEAT_MTE2 support
382   if (features.Has(CPUFeatures::kMTE)) {
383     features.Combine(CPUFeatures::kMTEInstructions);
384   }
385 #endif  // VIXL_USE_LINUX_HWCAP
386 
387   if ((option == CPUFeatures::kQueryIDRegistersIfAvailable) &&
388       (features.Has(CPUFeatures::kIDRegisterEmulation))) {
389     features.Combine(InferCPUFeaturesFromIDRegisters());
390   }
391   return features;
392 }
393 
394 
395 #ifdef __aarch64__
396 #define VIXL_READ_ID_REG(NAME, MRS_ARG)        \
397   NAME CPU::Read##NAME() {                     \
398     uint64_t value = 0;                        \
399     __asm__("mrs %0, " MRS_ARG : "=r"(value)); \
400     return NAME(value);                        \
401   }
402 #else  // __aarch64__
403 #define VIXL_READ_ID_REG(NAME, MRS_ARG) \
404   NAME CPU::Read##NAME() {              \
405     VIXL_UNREACHABLE();                 \
406     return NAME(0);                     \
407   }
408 #endif  // __aarch64__
409 
410 VIXL_AARCH64_ID_REG_LIST(VIXL_READ_ID_REG)
411 
412 #undef VIXL_READ_ID_REG
413 
414 
415 // Initialise to smallest possible cache size.
416 unsigned CPU::dcache_line_size_ = 1;
417 unsigned CPU::icache_line_size_ = 1;
418 
419 
420 // Currently computes I and D cache line size.
SetUp()421 void CPU::SetUp() {
422   uint32_t cache_type_register = GetCacheType();
423 
424   // The cache type register holds information about the caches, including I
425   // D caches line size.
426   static const int kDCacheLineSizeShift = 16;
427   static const int kICacheLineSizeShift = 0;
428   static const uint32_t kDCacheLineSizeMask = 0xf << kDCacheLineSizeShift;
429   static const uint32_t kICacheLineSizeMask = 0xf << kICacheLineSizeShift;
430 
431   // The cache type register holds the size of the I and D caches in words as
432   // a power of two.
433   uint32_t dcache_line_size_power_of_two =
434       (cache_type_register & kDCacheLineSizeMask) >> kDCacheLineSizeShift;
435   uint32_t icache_line_size_power_of_two =
436       (cache_type_register & kICacheLineSizeMask) >> kICacheLineSizeShift;
437 
438   dcache_line_size_ = 4 << dcache_line_size_power_of_two;
439   icache_line_size_ = 4 << icache_line_size_power_of_two;
440 }
441 
442 
GetCacheType()443 uint32_t CPU::GetCacheType() {
444 #ifdef __aarch64__
445   uint64_t cache_type_register;
446   // Copy the content of the cache type register to a core register.
447   __asm__ __volatile__("mrs %[ctr], ctr_el0"  // NOLINT(runtime/references)
448                        : [ctr] "=r"(cache_type_register));
449   VIXL_ASSERT(IsUint32(cache_type_register));
450   return static_cast<uint32_t>(cache_type_register);
451 #else
452   // This will lead to a cache with 1 byte long lines, which is fine since
453   // neither EnsureIAndDCacheCoherency nor the simulator will need this
454   // information.
455   return 0;
456 #endif
457 }
458 
459 
460 // Query the SVE vector length. This requires CPUFeatures::kSVE.
ReadSVEVectorLengthInBits()461 int CPU::ReadSVEVectorLengthInBits() {
462 #ifdef __aarch64__
463   uint64_t vl;
464   // To support compilers that don't understand `rdvl`, encode the value
465   // directly and move it manually.
466   __asm__(
467       "   .word 0x04bf5100\n"  // rdvl x0, #8
468       "   mov %[vl], x0\n"
469       : [vl] "=r"(vl)
470       :
471       : "x0");
472   VIXL_ASSERT(vl <= INT_MAX);
473   return static_cast<int>(vl);
474 #else
475   VIXL_UNREACHABLE();
476   return 0;
477 #endif
478 }
479 
480 
EnsureIAndDCacheCoherency(void* address, size_t length)481 void CPU::EnsureIAndDCacheCoherency(void* address, size_t length) {
482 #ifdef __aarch64__
483   // Implement the cache synchronisation for all targets where AArch64 is the
484   // host, even if we're building the simulator for an AAarch64 host. This
485   // allows for cases where the user wants to simulate code as well as run it
486   // natively.
487 
488   if (length == 0) {
489     return;
490   }
491 
492   // The code below assumes user space cache operations are allowed.
493 
494   // Work out the line sizes for each cache, and use them to determine the
495   // start addresses.
496   uintptr_t start = reinterpret_cast<uintptr_t>(address);
497   uintptr_t dsize = static_cast<uintptr_t>(dcache_line_size_);
498   uintptr_t isize = static_cast<uintptr_t>(icache_line_size_);
499   uintptr_t dline = start & ~(dsize - 1);
500   uintptr_t iline = start & ~(isize - 1);
501 
502   // Cache line sizes are always a power of 2.
503   VIXL_ASSERT(IsPowerOf2(dsize));
504   VIXL_ASSERT(IsPowerOf2(isize));
505   uintptr_t end = start + length;
506 
507   do {
508     __asm__ __volatile__(
509         // Clean each line of the D cache containing the target data.
510         //
511         // dc       : Data Cache maintenance
512         //     c    : Clean
513         //      va  : by (Virtual) Address
514         //        u : to the point of Unification
515         // The point of unification for a processor is the point by which the
516         // instruction and data caches are guaranteed to see the same copy of a
517         // memory location. See ARM DDI 0406B page B2-12 for more information.
518         "   dc    cvau, %[dline]\n"
519         :
520         : [dline] "r"(dline)
521         // This code does not write to memory, but the "memory" dependency
522         // prevents GCC from reordering the code.
523         : "memory");
524     dline += dsize;
525   } while (dline < end);
526 
527   __asm__ __volatile__(
528       // Make sure that the data cache operations (above) complete before the
529       // instruction cache operations (below).
530       //
531       // dsb      : Data Synchronisation Barrier
532       //      ish : Inner SHareable domain
533       //
534       // The point of unification for an Inner Shareable shareability domain is
535       // the point by which the instruction and data caches of all the
536       // processors
537       // in that Inner Shareable shareability domain are guaranteed to see the
538       // same copy of a memory location. See ARM DDI 0406B page B2-12 for more
539       // information.
540       "   dsb   ish\n"
541       :
542       :
543       : "memory");
544 
545   do {
546     __asm__ __volatile__(
547         // Invalidate each line of the I cache containing the target data.
548         //
549         // ic      : Instruction Cache maintenance
550         //    i    : Invalidate
551         //     va  : by Address
552         //       u : to the point of Unification
553         "   ic   ivau, %[iline]\n"
554         :
555         : [iline] "r"(iline)
556         : "memory");
557     iline += isize;
558   } while (iline < end);
559 
560   __asm__ __volatile__(
561       // Make sure that the instruction cache operations (above) take effect
562       // before the isb (below).
563       "   dsb  ish\n"
564 
565       // Ensure that any instructions already in the pipeline are discarded and
566       // reloaded from the new data.
567       // isb : Instruction Synchronisation Barrier
568       "   isb\n"
569       :
570       :
571       : "memory");
572 #else
573   // If the host isn't AArch64, we must be using the simulator, so this function
574   // doesn't have to do anything.
575   USE(address, length);
576 #endif
577 }
578 
579 
580 }  // namespace aarch64
581 }  // namespace vixl
582