1/************************************************************************** 2 * 3 * Copyright 2008 Dennis Smit 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * on the rights to use, copy, modify, merge, publish, distribute, sub 10 * license, and/or sell copies of the Software, and to permit persons to whom 11 * the Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 20 * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 23 * USE OR OTHER DEALINGS IN THE SOFTWARE. 24 * 25 ***************************************************************************/ 26 27/** 28 * @file 29 * CPU feature detection. 30 * 31 * @author Dennis Smit 32 * @author Based on the work of Eric Anholt <anholt@FreeBSD.org> 33 */ 34 35#ifndef _UTIL_CPU_DETECT_H 36#define _UTIL_CPU_DETECT_H 37 38#include <stdbool.h> 39 40#include "pipe/p_config.h" 41#include "util/u_atomic.h" 42#include "util/u_thread.h" 43 44 45#ifdef __cplusplus 46extern "C" { 47#endif 48 49enum cpu_family { 50 CPU_UNKNOWN, 51 52 CPU_AMD_ZEN1_ZEN2, 53 CPU_AMD_ZEN_HYGON, 54 CPU_AMD_ZEN3, 55 CPU_AMD_ZEN_NEXT, 56 CPU_AMD_LAST, 57 58 CPU_S390X, 59}; 60 61typedef uint32_t util_affinity_mask[UTIL_MAX_CPUS / 32]; 62 63struct util_cpu_caps_t { 64 /** 65 * Initialized to 0 and set to non-zero with an atomic after the entire 66 * struct has been initialized. 67 */ 68 uint32_t detect_done; 69 70 /** 71 * Number of CPUs available to the process. 72 * 73 * This will be less than or equal to \c max_cpus. This is the number of 74 * CPUs that are online and available to the process. 75 */ 76 int16_t nr_cpus; 77 78 /** 79 * Maximum number of CPUs that can be online in the system. 80 * 81 * This will be greater than or equal to \c nr_cpus. This is the number of 82 * CPUs installed in the system. \c nr_cpus will be less if some CPUs are 83 * offline. 84 */ 85 int16_t max_cpus; 86 87 enum cpu_family family; 88 89 /* Feature flags */ 90 int x86_cpu_type; 91 unsigned cacheline; 92 93 unsigned has_intel:1; 94 unsigned has_tsc:1; 95 unsigned has_mmx:1; 96 unsigned has_mmx2:1; 97 unsigned has_sse:1; 98 unsigned has_sse2:1; 99 unsigned has_sse3:1; 100 unsigned has_ssse3:1; 101 unsigned has_sse4_1:1; 102 unsigned has_sse4_2:1; 103 unsigned has_popcnt:1; 104 unsigned has_avx:1; 105 unsigned has_avx2:1; 106 unsigned has_f16c:1; 107 unsigned has_fma:1; 108 unsigned has_3dnow:1; 109 unsigned has_3dnow_ext:1; 110 unsigned has_xop:1; 111 unsigned has_altivec:1; 112 unsigned has_vsx:1; 113 unsigned has_daz:1; 114 unsigned has_neon:1; 115 unsigned has_msa:1; 116 117 unsigned has_avx512f:1; 118 unsigned has_avx512dq:1; 119 unsigned has_avx512ifma:1; 120 unsigned has_avx512pf:1; 121 unsigned has_avx512er:1; 122 unsigned has_avx512cd:1; 123 unsigned has_avx512bw:1; 124 unsigned has_avx512vl:1; 125 unsigned has_avx512vbmi:1; 126 127 unsigned num_L3_caches; 128 unsigned num_cpu_mask_bits; 129 130 uint16_t cpu_to_L3[UTIL_MAX_CPUS]; 131 /* Affinity masks for each L3 cache. */ 132 util_affinity_mask *L3_affinity_mask; 133}; 134 135#define U_CPU_INVALID_L3 0xffff 136 137void util_cpu_detect(void); 138 139static inline ATTRIBUTE_CONST const struct util_cpu_caps_t * 140util_get_cpu_caps(void) 141{ 142 extern struct util_cpu_caps_t util_cpu_caps; 143 144 /* On most CPU architectures, an atomic read is simply a regular memory 145 * load instruction with some extra compiler magic to prevent code 146 * re-ordering around it. The perf impact of doing this check should be 147 * negligible in most cases. 148 * 149 * Also, even though it looks like a bit of a lie, we've declared this 150 * function with ATTRIBUTE_CONST. The GCC docs say: 151 * 152 * "Calls to functions whose return value is not affected by changes to 153 * the observable state of the program and that have no observable 154 * effects on such state other than to return a value may lend 155 * themselves to optimizations such as common subexpression elimination. 156 * Declaring such functions with the const attribute allows GCC to avoid 157 * emitting some calls in repeated invocations of the function with the 158 * same argument values." 159 * 160 * The word "observable" is important here. With the exception of a 161 * llvmpipe debug flag behind an environment variable and a few unit tests, 162 * all of which emulate worse CPUs, this function neither affects nor is 163 * affected by any "observable" state. It has its own internal state for 164 * sure, but that state is such that it appears to return exactly the same 165 * value with the same internal data every time. 166 */ 167 if (unlikely(!p_atomic_read(&util_cpu_caps.detect_done))) 168 util_cpu_detect(); 169 170 return &util_cpu_caps; 171} 172 173#ifdef __cplusplus 174} 175#endif 176 177 178#endif /* _UTIL_CPU_DETECT_H */ 179