1 // Copyright 2018, VIXL authors 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are met: 6 // 7 // * Redistributions of source code must retain the above copyright notice, 8 // this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above copyright notice, 10 // this list of conditions and the following disclaimer in the documentation 11 // and/or other materials provided with the distribution. 12 // * Neither the name of ARM Limited nor the names of its contributors may be 13 // used to endorse or promote products derived from this software without 14 // specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 #ifndef VIXL_CPU_FEATURES_H 28 #define VIXL_CPU_FEATURES_H 29 30 #include <bitset> 31 #include <ostream> 32 33 #include "globals-vixl.h" 34 35 namespace vixl { 36 37 38 // VIXL aims to handle and detect all architectural features that are likely to 39 // influence code-generation decisions at EL0 (user-space). 40 // 41 // - There may be multiple VIXL feature flags for a given architectural 42 // extension. This occurs where the extension allow components to be 43 // implemented independently, or where kernel support is needed, and is likely 44 // to be fragmented. 45 // 46 // For example, Pointer Authentication (kPAuth*) has a separate feature flag 47 // for access to PACGA, and to indicate that the QARMA algorithm is 48 // implemented. 49 // 50 // - Conversely, some extensions have configuration options that do not affect 51 // EL0, so these are presented as a single VIXL feature. 52 // 53 // For example, the RAS extension (kRAS) has several variants, but the only 54 // feature relevant to VIXL is the addition of the ESB instruction so we only 55 // need a single flag. 56 // 57 // - VIXL offers separate flags for separate features even if they're 58 // architecturally linked. 59 // 60 // For example, the architecture requires kFPHalf and kNEONHalf to be equal, 61 // but they have separate hardware ID register fields so VIXL presents them as 62 // separate features. 63 // 64 // - VIXL can detect every feature for which it can generate code. 65 // 66 // - VIXL can detect some features for which it cannot generate code. 67 // 68 // The CPUFeatures::Feature enum — derived from the macro list below — is 69 // frequently extended. New features may be added to the list at any point, and 70 // no assumptions should be made about the numerical values assigned to each 71 // enum constant. The symbolic names can be considered to be stable. 72 // 73 // The debug descriptions are used only for debug output. The 'cpuinfo' strings 74 // are informative; VIXL does not use /proc/cpuinfo for feature detection. 75 76 // clang-format off 77 #define VIXL_CPU_FEATURE_LIST(V) \ 78 /* If set, the OS traps and emulates MRS accesses to relevant (EL1) ID_* */ \ 79 /* registers, so that the detailed feature registers can be read */ \ 80 /* directly. */ \ 81 \ 82 /* Constant name Debug description Linux 'cpuinfo' string. */ \ 83 V(kIDRegisterEmulation, "ID register emulation", "cpuid") \ 84 \ 85 V(kFP, "FP", "fp") \ 86 V(kNEON, "NEON", "asimd") \ 87 V(kCRC32, "CRC32", "crc32") \ 88 V(kDGH, "DGH", "dgh") \ 89 /* Speculation control features. */ \ 90 V(kCSV2, "CSV2", NULL) \ 91 V(kSCXTNUM, "SCXTNUM", NULL) \ 92 V(kCSV3, "CSV3", NULL) \ 93 V(kSB, "SB", "sb") \ 94 V(kSPECRES, "SPECRES", NULL) \ 95 V(kSSBS, "SSBS", NULL) \ 96 V(kSSBSControl, "SSBS (PSTATE control)", "ssbs") \ 97 /* Cryptographic support instructions. */ \ 98 V(kAES, "AES", "aes") \ 99 V(kSHA1, "SHA1", "sha1") \ 100 V(kSHA2, "SHA2", "sha2") \ 101 /* A form of PMULL{2} with a 128-bit (1Q) result. */ \ 102 V(kPmull1Q, "Pmull1Q", "pmull") \ 103 /* Atomic operations on memory: CAS, LDADD, STADD, SWP, etc. */ \ 104 V(kAtomics, "Atomics", "atomics") \ 105 /* Limited ordering regions: LDLAR, STLLR and their variants. */ \ 106 V(kLORegions, "LORegions", NULL) \ 107 /* Rounding doubling multiply add/subtract: SQRDMLAH and SQRDMLSH. */ \ 108 V(kRDM, "RDM", "asimdrdm") \ 109 /* Scalable Vector Extension. */ \ 110 V(kSVE, "SVE", "sve") \ 111 V(kSVEF64MM, "SVE F64MM", "svef64mm") \ 112 V(kSVEF32MM, "SVE F32MM", "svef32mm") \ 113 V(kSVEI8MM, "SVE I8MM", "svei8imm") \ 114 V(kSVEBF16, "SVE BFloat16", "svebf16") \ 115 /* SDOT and UDOT support (in NEON). */ \ 116 V(kDotProduct, "DotProduct", "asimddp") \ 117 /* Int8 matrix multiplication (in NEON). */ \ 118 V(kI8MM, "NEON I8MM", "i8mm") \ 119 /* Half-precision (FP16) support for FP and NEON, respectively. */ \ 120 V(kFPHalf, "FPHalf", "fphp") \ 121 V(kNEONHalf, "NEONHalf", "asimdhp") \ 122 /* BFloat16 support (in both FP and NEON.) */ \ 123 V(kBF16, "FP/NEON BFloat 16", "bf16") \ 124 /* The RAS extension, including the ESB instruction. */ \ 125 V(kRAS, "RAS", NULL) \ 126 /* Data cache clean to the point of persistence: DC CVAP. */ \ 127 V(kDCPoP, "DCPoP", "dcpop") \ 128 /* Data cache clean to the point of deep persistence: DC CVADP. */ \ 129 V(kDCCVADP, "DCCVADP", "dcpodp") \ 130 /* Cryptographic support instructions. */ \ 131 V(kSHA3, "SHA3", "sha3") \ 132 V(kSHA512, "SHA512", "sha512") \ 133 V(kSM3, "SM3", "sm3") \ 134 V(kSM4, "SM4", "sm4") \ 135 /* Pointer authentication for addresses. */ \ 136 V(kPAuth, "PAuth", "paca") \ 137 /* Pointer authentication for addresses uses QARMA. */ \ 138 V(kPAuthQARMA, "PAuthQARMA", NULL) \ 139 /* Generic authentication (using the PACGA instruction). */ \ 140 V(kPAuthGeneric, "PAuthGeneric", "pacg") \ 141 /* Generic authentication uses QARMA. */ \ 142 V(kPAuthGenericQARMA, "PAuthGenericQARMA", NULL) \ 143 /* JavaScript-style FP -> integer conversion instruction: FJCVTZS. */ \ 144 V(kJSCVT, "JSCVT", "jscvt") \ 145 /* Complex number support for NEON: FCMLA and FCADD. */ \ 146 V(kFcma, "Fcma", "fcma") \ 147 /* RCpc-based model (for weaker release consistency): LDAPR and variants. */ \ 148 V(kRCpc, "RCpc", "lrcpc") \ 149 V(kRCpcImm, "RCpc (imm)", "ilrcpc") \ 150 /* Flag manipulation instructions: SETF{8,16}, CFINV, RMIF. */ \ 151 V(kFlagM, "FlagM", "flagm") \ 152 /* Unaligned single-copy atomicity. */ \ 153 V(kUSCAT, "USCAT", "uscat") \ 154 /* FP16 fused multiply-add or -subtract long: FMLAL{2}, FMLSL{2}. */ \ 155 V(kFHM, "FHM", "asimdfhm") \ 156 /* Data-independent timing (for selected instructions). */ \ 157 V(kDIT, "DIT", "dit") \ 158 /* Branch target identification. */ \ 159 V(kBTI, "BTI", "bti") \ 160 /* Flag manipulation instructions: {AX,XA}FLAG */ \ 161 V(kAXFlag, "AXFlag", "flagm2") \ 162 /* Random number generation extension, */ \ 163 V(kRNG, "RNG", "rng") \ 164 /* Floating-point round to {32,64}-bit integer. */ \ 165 V(kFrintToFixedSizedInt,"Frint (bounded)", "frint") \ 166 /* Memory Tagging Extension. */ \ 167 V(kMTEInstructions, "MTE (EL0 instructions)", NULL) \ 168 V(kMTE, "MTE", NULL) \ 169 V(kMTE3, "MTE (asymmetric)", "mte3") \ 170 /* PAuth extensions. */ \ 171 V(kPAuthEnhancedPAC, "PAuth EnhancedPAC", NULL) \ 172 V(kPAuthEnhancedPAC2, "PAuth EnhancedPAC2", NULL) \ 173 V(kPAuthFPAC, "PAuth FPAC", NULL) \ 174 V(kPAuthFPACCombined, "PAuth FPACCombined", NULL) \ 175 /* Scalable Vector Extension 2. */ \ 176 V(kSVE2, "SVE2", "sve2") \ 177 V(kSVESM4, "SVE SM4", "svesm4") \ 178 V(kSVESHA3, "SVE SHA3", "svesha3") \ 179 V(kSVEBitPerm, "SVE BitPerm", "svebitperm") \ 180 V(kSVEAES, "SVE AES", "sveaes") \ 181 V(kSVEPmull128, "SVE Pmull128", "svepmull") \ 182 /* Alternate floating-point behavior */ \ 183 V(kAFP, "AFP", "afp") \ 184 /* Enhanced Counter Virtualization */ \ 185 V(kECV, "ECV", "ecv") \ 186 /* Increased precision of Reciprocal Estimate and Square Root Estimate */ \ 187 V(kRPRES, "RPRES", "rpres") \ 188 /* Memory operation instructions, for memcpy, memset */ \ 189 V(kMOPS, "Memory ops", NULL) \ 190 /* Scalable Matrix Extension (SME) */ \ 191 V(kSME, "SME", "sme") \ 192 V(kSMEi16i64, "SME (i16i64)", "smei16i64") \ 193 V(kSMEf64f64, "SME (f64f64)", "smef64f64") \ 194 V(kSMEi8i32, "SME (i8i32)", "smei8i32") \ 195 V(kSMEf16f32, "SME (f16f32)", "smef16f32") \ 196 V(kSMEb16f32, "SME (b16f32)", "smeb16f32") \ 197 V(kSMEf32f32, "SME (f32f32)", "smef32f32") \ 198 V(kSMEfa64, "SME (fa64)", "smefa64") \ 199 /* WFET and WFIT instruction support */ \ 200 V(kWFXT, "WFXT", "wfxt") \ 201 /* Extended BFloat16 instructions */ \ 202 V(kEBF16, "EBF16", "ebf16") \ 203 V(kSVE_EBF16, "EBF16 (SVE)", "sveebf16") \ 204 V(kCSSC, "CSSC", "cssc") 205 // clang-format on 206 207 208 class CPUFeaturesConstIterator; 209 210 // A representation of the set of features known to be supported by the target 211 // device. Each feature is represented by a simple boolean flag. 212 // 213 // - When the Assembler is asked to assemble an instruction, it asserts (in 214 // debug mode) that the necessary features are available. 215 // 216 // - TODO: The MacroAssembler relies on the Assembler's assertions, but in 217 // some cases it may be useful for macros to generate a fall-back sequence 218 // in case features are not available. 219 // 220 // - The Simulator assumes by default that all features are available, but it 221 // is possible to configure it to fail if the simulated code uses features 222 // that are not enabled. 223 // 224 // The Simulator also offers pseudo-instructions to allow features to be 225 // enabled and disabled dynamically. This is useful when you want to ensure 226 // that some features are constrained to certain areas of code. 227 // 228 // - The base Disassembler knows nothing about CPU features, but the 229 // PrintDisassembler can be configured to annotate its output with warnings 230 // about unavailable features. The Simulator uses this feature when 231 // instruction trace is enabled. 232 // 233 // - The Decoder-based components -- the Simulator and PrintDisassembler -- 234 // rely on a CPUFeaturesAuditor visitor. This visitor keeps a list of 235 // features actually encountered so that a large block of code can be 236 // examined (either directly or through simulation), and the required 237 // features analysed later. 238 // 239 // Expected usage: 240 // 241 // // By default, VIXL uses CPUFeatures::AArch64LegacyBaseline(), for 242 // // compatibility with older version of VIXL. 243 // MacroAssembler masm; 244 // 245 // // Generate code only for the current CPU. 246 // masm.SetCPUFeatures(CPUFeatures::InferFromOS()); 247 // 248 // // Turn off feature checking entirely. 249 // masm.SetCPUFeatures(CPUFeatures::All()); 250 // 251 // Feature set manipulation: 252 // 253 // CPUFeatures f; // The default constructor gives an empty set. 254 // // Individual features can be added (or removed). 255 // f.Combine(CPUFeatures::kFP, CPUFeatures::kNEON, CPUFeatures::AES); 256 // f.Remove(CPUFeatures::kNEON); 257 // 258 // // Some helpers exist for extensions that provide several features. 259 // f.Remove(CPUFeatures::All()); 260 // f.Combine(CPUFeatures::AArch64LegacyBaseline()); 261 // 262 // // Chained construction is also possible. 263 // CPUFeatures g = 264 // f.With(CPUFeatures::kPmull1Q).Without(CPUFeatures::kCRC32); 265 // 266 // // Features can be queried. Where multiple features are given, they are 267 // // combined with logical AND. 268 // if (h.Has(CPUFeatures::kNEON)) { ... } 269 // if (h.Has(CPUFeatures::kFP, CPUFeatures::kNEON)) { ... } 270 // if (h.Has(g)) { ... } 271 // // If the empty set is requested, the result is always 'true'. 272 // VIXL_ASSERT(h.Has(CPUFeatures())); 273 // 274 // // For debug and reporting purposes, features can be enumerated (or 275 // // printed directly): 276 // std::cout << CPUFeatures::kNEON; // Prints something like "NEON". 277 // std::cout << f; // Prints something like "FP, NEON, CRC32". 278 class CPUFeatures { 279 public: 280 // clang-format off 281 // Individual features. 282 // These should be treated as opaque tokens. User code should not rely on 283 // specific numeric values or ordering. 284 enum Feature { 285 // Refer to VIXL_CPU_FEATURE_LIST (above) for the list of feature names that 286 // this class supports. 287 288 kNone = -1, 289 #define VIXL_DECLARE_FEATURE(SYMBOL, NAME, CPUINFO) SYMBOL, 290 VIXL_CPU_FEATURE_LIST(VIXL_DECLARE_FEATURE) 291 #undef VIXL_DECLARE_FEATURE 292 kNumberOfFeatures 293 }; 294 // clang-format on 295 296 // By default, construct with no features enabled. CPUFeatures()297 CPUFeatures() : features_{} {} 298 299 // Construct with some features already enabled. 300 template <typename T, typename... U> CPUFeatures(T first, U... others)301 CPUFeatures(T first, U... others) : features_{} { 302 Combine(first, others...); 303 } 304 305 // Construct with all features enabled. This can be used to disable feature 306 // checking: `Has(...)` returns true regardless of the argument. 307 static CPUFeatures All(); 308 309 // Construct an empty CPUFeatures. This is equivalent to the default 310 // constructor, but is provided for symmetry and convenience. None()311 static CPUFeatures None() { return CPUFeatures(); } 312 313 // The presence of these features was assumed by version of VIXL before this 314 // API was added, so using this set by default ensures API compatibility. AArch64LegacyBaseline()315 static CPUFeatures AArch64LegacyBaseline() { 316 return CPUFeatures(kFP, kNEON, kCRC32); 317 } 318 319 // Construct a new CPUFeatures object using ID registers. This assumes that 320 // kIDRegisterEmulation is present. 321 static CPUFeatures InferFromIDRegisters(); 322 323 enum QueryIDRegistersOption { 324 kDontQueryIDRegisters, 325 kQueryIDRegistersIfAvailable 326 }; 327 328 // Construct a new CPUFeatures object based on what the OS reports. 329 static CPUFeatures InferFromOS( 330 QueryIDRegistersOption option = kQueryIDRegistersIfAvailable); 331 332 // Combine another CPUFeatures object into this one. Features that already 333 // exist in this set are left unchanged. 334 void Combine(const CPUFeatures& other); 335 336 // Combine a specific feature into this set. If it already exists in the set, 337 // the set is left unchanged. 338 void Combine(Feature feature); 339 340 // Combine multiple features (or feature sets) into this set. 341 template <typename T, typename... U> Combine(T first, U... others)342 void Combine(T first, U... others) { 343 Combine(first); 344 Combine(others...); 345 } 346 347 // Remove features in another CPUFeatures object from this one. 348 void Remove(const CPUFeatures& other); 349 350 // Remove a specific feature from this set. This has no effect if the feature 351 // doesn't exist in the set. 352 void Remove(Feature feature0); 353 354 // Remove multiple features (or feature sets) from this set. 355 template <typename T, typename... U> Remove(T first, U... others)356 void Remove(T first, U... others) { 357 Remove(first); 358 Remove(others...); 359 } 360 361 // Chaining helpers for convenient construction by combining other CPUFeatures 362 // or individual Features. 363 template <typename... T> With(T.... others) const364 CPUFeatures With(T... others) const { 365 CPUFeatures f(*this); 366 f.Combine(others...); 367 return f; 368 } 369 370 template <typename... T> Without(T.... others) const371 CPUFeatures Without(T... others) const { 372 CPUFeatures f(*this); 373 f.Remove(others...); 374 return f; 375 } 376 377 // Test whether the `other` feature set is equal to or a subset of this one. 378 bool Has(const CPUFeatures& other) const; 379 380 // Test whether a single feature exists in this set. 381 // Note that `Has(kNone)` always returns true. 382 bool Has(Feature feature) const; 383 384 // Test whether all of the specified features exist in this set. 385 template <typename T, typename... U> Has(T first, U... others) const386 bool Has(T first, U... others) const { 387 return Has(first) && Has(others...); 388 } 389 390 // Return the number of enabled features. 391 size_t Count() const; HasNoFeatures() const392 bool HasNoFeatures() const { return Count() == 0; } 393 394 // Check for equivalence. operator ==(const CPUFeatures& other) const395 bool operator==(const CPUFeatures& other) const { 396 return Has(other) && other.Has(*this); 397 } operator !=(const CPUFeatures& other) const398 bool operator!=(const CPUFeatures& other) const { return !(*this == other); } 399 400 typedef CPUFeaturesConstIterator const_iterator; 401 402 const_iterator begin() const; 403 const_iterator end() const; 404 405 private: 406 // Each bit represents a feature. This set will be extended as needed. 407 std::bitset<kNumberOfFeatures> features_; 408 409 friend std::ostream& operator<<(std::ostream& os, 410 const vixl::CPUFeatures& features); 411 }; 412 413 std::ostream& operator<<(std::ostream& os, vixl::CPUFeatures::Feature feature); 414 std::ostream& operator<<(std::ostream& os, const vixl::CPUFeatures& features); 415 416 // This is not a proper C++ iterator type, but it simulates enough of 417 // ForwardIterator that simple loops can be written. 418 class CPUFeaturesConstIterator { 419 public: CPUFeaturesConstIterator(const CPUFeatures* cpu_features = NULL, CPUFeatures::Feature start = CPUFeatures::kNone)420 CPUFeaturesConstIterator(const CPUFeatures* cpu_features = NULL, 421 CPUFeatures::Feature start = CPUFeatures::kNone) 422 : cpu_features_(cpu_features), feature_(start) { 423 VIXL_ASSERT(IsValid()); 424 } 425 426 bool operator==(const CPUFeaturesConstIterator& other) const; operator !=(const CPUFeaturesConstIterator& other) const427 bool operator!=(const CPUFeaturesConstIterator& other) const { 428 return !(*this == other); 429 } 430 CPUFeaturesConstIterator& operator++(); 431 CPUFeaturesConstIterator operator++(int); 432 operator *() const433 CPUFeatures::Feature operator*() const { 434 VIXL_ASSERT(IsValid()); 435 return feature_; 436 } 437 438 // For proper support of C++'s simplest "Iterator" concept, this class would 439 // have to define member types (such as CPUFeaturesIterator::pointer) to make 440 // it appear as if it iterates over Feature objects in memory. That is, we'd 441 // need CPUFeatures::iterator to behave like std::vector<Feature>::iterator. 442 // This is at least partially possible -- the std::vector<bool> specialisation 443 // does something similar -- but it doesn't seem worthwhile for a 444 // special-purpose debug helper, so they are omitted here. 445 private: 446 const CPUFeatures* cpu_features_; 447 CPUFeatures::Feature feature_; 448 IsValid() const449 bool IsValid() const { 450 if (cpu_features_ == NULL) { 451 return feature_ == CPUFeatures::kNone; 452 } 453 return cpu_features_->Has(feature_); 454 } 455 }; 456 457 // A convenience scope for temporarily modifying a CPU features object. This 458 // allows features to be enabled for short sequences. 459 // 460 // Expected usage: 461 // 462 // { 463 // CPUFeaturesScope cpu(&masm, CPUFeatures::kCRC32); 464 // // This scope can now use CRC32, as well as anything else that was enabled 465 // // before the scope. 466 // 467 // ... 468 // 469 // // At the end of the scope, the original CPU features are restored. 470 // } 471 class CPUFeaturesScope { 472 public: 473 // Start a CPUFeaturesScope on any object that implements 474 // `CPUFeatures* GetCPUFeatures()`. 475 template <typename T> CPUFeaturesScope(T* cpu_features_wrapper)476 explicit CPUFeaturesScope(T* cpu_features_wrapper) 477 : cpu_features_(cpu_features_wrapper->GetCPUFeatures()), 478 old_features_(*cpu_features_) {} 479 480 // Start a CPUFeaturesScope on any object that implements 481 // `CPUFeatures* GetCPUFeatures()`, with the specified features enabled. 482 template <typename T, typename U, typename... V> CPUFeaturesScope(T* cpu_features_wrapper, U first, V... features)483 CPUFeaturesScope(T* cpu_features_wrapper, U first, V... features) 484 : cpu_features_(cpu_features_wrapper->GetCPUFeatures()), 485 old_features_(*cpu_features_) { 486 cpu_features_->Combine(first, features...); 487 } 488 ~CPUFeaturesScope()489 ~CPUFeaturesScope() { *cpu_features_ = old_features_; } 490 491 // For advanced usage, the CPUFeatures object can be accessed directly. 492 // The scope will restore the original state when it ends. 493 GetCPUFeatures() const494 CPUFeatures* GetCPUFeatures() const { return cpu_features_; } 495 SetCPUFeatures(const CPUFeatures& cpu_features)496 void SetCPUFeatures(const CPUFeatures& cpu_features) { 497 *cpu_features_ = cpu_features; 498 } 499 500 private: 501 CPUFeatures* const cpu_features_; 502 const CPUFeatures old_features_; 503 }; 504 505 506 } // namespace vixl 507 508 #endif // VIXL_CPU_FEATURES_H 509