1bf215546Sopenharmony_ci/************************************************************************** 2bf215546Sopenharmony_ci * 3bf215546Sopenharmony_ci * Copyright 2008 Dennis Smit 4bf215546Sopenharmony_ci * All Rights Reserved. 5bf215546Sopenharmony_ci * 6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 7bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 8bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 9bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub 10bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom 11bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions: 12bf215546Sopenharmony_ci * 13bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 14bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 15bf215546Sopenharmony_ci * Software. 16bf215546Sopenharmony_ci * 17bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 20bf215546Sopenharmony_ci * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 21bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 22bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 23bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE. 24bf215546Sopenharmony_ci * 25bf215546Sopenharmony_ci **************************************************************************/ 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci/** 28bf215546Sopenharmony_ci * @file 29bf215546Sopenharmony_ci * CPU feature detection. 30bf215546Sopenharmony_ci * 31bf215546Sopenharmony_ci * @author Dennis Smit 32bf215546Sopenharmony_ci * @author Based on the work of Eric Anholt <anholt@FreeBSD.org> 33bf215546Sopenharmony_ci */ 34bf215546Sopenharmony_ci 35bf215546Sopenharmony_ci#include "pipe/p_config.h" 36bf215546Sopenharmony_ci#include "pipe/p_compiler.h" 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_ci#include "util/u_debug.h" 39bf215546Sopenharmony_ci#include "u_cpu_detect.h" 40bf215546Sopenharmony_ci#include "u_math.h" 41bf215546Sopenharmony_ci#include "c11/threads.h" 42bf215546Sopenharmony_ci 43bf215546Sopenharmony_ci#include <stdio.h> 44bf215546Sopenharmony_ci#include <inttypes.h> 45bf215546Sopenharmony_ci 46bf215546Sopenharmony_ci#if defined(PIPE_ARCH_PPC) 47bf215546Sopenharmony_ci#if defined(PIPE_OS_APPLE) 48bf215546Sopenharmony_ci#include <sys/sysctl.h> 49bf215546Sopenharmony_ci#else 50bf215546Sopenharmony_ci#include <signal.h> 51bf215546Sopenharmony_ci#include <setjmp.h> 52bf215546Sopenharmony_ci#endif 53bf215546Sopenharmony_ci#endif 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_ci#if defined(PIPE_OS_BSD) 56bf215546Sopenharmony_ci#include <sys/param.h> 57bf215546Sopenharmony_ci#include <sys/sysctl.h> 58bf215546Sopenharmony_ci#include <machine/cpu.h> 59bf215546Sopenharmony_ci#endif 60bf215546Sopenharmony_ci 61bf215546Sopenharmony_ci#if defined(PIPE_OS_FREEBSD) 62bf215546Sopenharmony_ci#if __has_include(<sys/auxv.h>) 63bf215546Sopenharmony_ci#include <sys/auxv.h> 64bf215546Sopenharmony_ci#define HAVE_ELF_AUX_INFO 65bf215546Sopenharmony_ci#endif 66bf215546Sopenharmony_ci#endif 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_ci#if defined(PIPE_OS_LINUX) 69bf215546Sopenharmony_ci#include <signal.h> 70bf215546Sopenharmony_ci#include <fcntl.h> 71bf215546Sopenharmony_ci#include <elf.h> 72bf215546Sopenharmony_ci#endif 73bf215546Sopenharmony_ci 74bf215546Sopenharmony_ci#ifdef PIPE_OS_UNIX 75bf215546Sopenharmony_ci#include <unistd.h> 76bf215546Sopenharmony_ci#endif 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci#if defined(HAS_ANDROID_CPUFEATURES) 79bf215546Sopenharmony_ci#include <cpu-features.h> 80bf215546Sopenharmony_ci#endif 81bf215546Sopenharmony_ci 82bf215546Sopenharmony_ci#if defined(PIPE_OS_WINDOWS) 83bf215546Sopenharmony_ci#include <windows.h> 84bf215546Sopenharmony_ci#if defined(PIPE_CC_MSVC) 85bf215546Sopenharmony_ci#include <intrin.h> 86bf215546Sopenharmony_ci#endif 87bf215546Sopenharmony_ci#endif 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_ci#if defined(HAS_SCHED_H) 90bf215546Sopenharmony_ci#include <sched.h> 91bf215546Sopenharmony_ci#endif 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ciDEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", false) 94bf215546Sopenharmony_ci 95bf215546Sopenharmony_ci 96bf215546Sopenharmony_cistruct util_cpu_caps_t util_cpu_caps; 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ci#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 99bf215546Sopenharmony_cistatic int has_cpuid(void); 100bf215546Sopenharmony_ci#endif 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_APPLE) && !defined(PIPE_OS_BSD) && !defined(PIPE_OS_LINUX) 104bf215546Sopenharmony_cistatic jmp_buf __lv_powerpc_jmpbuf; 105bf215546Sopenharmony_cistatic volatile sig_atomic_t __lv_powerpc_canjump = 0; 106bf215546Sopenharmony_ci 107bf215546Sopenharmony_cistatic void 108bf215546Sopenharmony_cisigill_handler(int sig) 109bf215546Sopenharmony_ci{ 110bf215546Sopenharmony_ci if (!__lv_powerpc_canjump) { 111bf215546Sopenharmony_ci signal (sig, SIG_DFL); 112bf215546Sopenharmony_ci raise (sig); 113bf215546Sopenharmony_ci } 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci __lv_powerpc_canjump = 0; 116bf215546Sopenharmony_ci longjmp(__lv_powerpc_jmpbuf, 1); 117bf215546Sopenharmony_ci} 118bf215546Sopenharmony_ci#endif 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_ci#if defined(PIPE_ARCH_PPC) 121bf215546Sopenharmony_cistatic void 122bf215546Sopenharmony_cicheck_os_altivec_support(void) 123bf215546Sopenharmony_ci{ 124bf215546Sopenharmony_ci#if defined(__ALTIVEC__) 125bf215546Sopenharmony_ci util_cpu_caps.has_altivec = 1; 126bf215546Sopenharmony_ci#endif 127bf215546Sopenharmony_ci#if defined(__VSX__) 128bf215546Sopenharmony_ci util_cpu_caps.has_vsx = 1; 129bf215546Sopenharmony_ci#endif 130bf215546Sopenharmony_ci#if defined(__ALTIVEC__) && defined(__VSX__) 131bf215546Sopenharmony_ci/* Do nothing */ 132bf215546Sopenharmony_ci#elif defined(PIPE_OS_APPLE) || defined(PIPE_OS_NETBSD) || defined(PIPE_OS_OPENBSD) 133bf215546Sopenharmony_ci#ifdef HW_VECTORUNIT 134bf215546Sopenharmony_ci int sels[2] = {CTL_HW, HW_VECTORUNIT}; 135bf215546Sopenharmony_ci#else 136bf215546Sopenharmony_ci int sels[2] = {CTL_MACHDEP, CPU_ALTIVEC}; 137bf215546Sopenharmony_ci#endif 138bf215546Sopenharmony_ci int has_vu = 0; 139bf215546Sopenharmony_ci size_t len = sizeof (has_vu); 140bf215546Sopenharmony_ci int err; 141bf215546Sopenharmony_ci 142bf215546Sopenharmony_ci err = sysctl(sels, 2, &has_vu, &len, NULL, 0); 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_ci if (err == 0) { 145bf215546Sopenharmony_ci if (has_vu != 0) { 146bf215546Sopenharmony_ci util_cpu_caps.has_altivec = 1; 147bf215546Sopenharmony_ci } 148bf215546Sopenharmony_ci } 149bf215546Sopenharmony_ci#elif defined(PIPE_OS_FREEBSD) /* !PIPE_OS_APPLE && !PIPE_OS_NETBSD && !PIPE_OS_OPENBSD */ 150bf215546Sopenharmony_ci unsigned long hwcap = 0; 151bf215546Sopenharmony_ci#ifdef HAVE_ELF_AUX_INFO 152bf215546Sopenharmony_ci elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap)); 153bf215546Sopenharmony_ci#else 154bf215546Sopenharmony_ci size_t len = sizeof(hwcap); 155bf215546Sopenharmony_ci sysctlbyname("hw.cpu_features", &hwcap, &len, NULL, 0); 156bf215546Sopenharmony_ci#endif 157bf215546Sopenharmony_ci if (hwcap & PPC_FEATURE_HAS_ALTIVEC) 158bf215546Sopenharmony_ci util_cpu_caps.has_altivec = 1; 159bf215546Sopenharmony_ci if (hwcap & PPC_FEATURE_HAS_VSX) 160bf215546Sopenharmony_ci util_cpu_caps.has_vsx = 1; 161bf215546Sopenharmony_ci#elif defined(PIPE_OS_LINUX) /* !PIPE_OS_FREEBSD */ 162bf215546Sopenharmony_ci#if defined(PIPE_ARCH_PPC_64) 163bf215546Sopenharmony_ci Elf64_auxv_t aux; 164bf215546Sopenharmony_ci#else 165bf215546Sopenharmony_ci Elf32_auxv_t aux; 166bf215546Sopenharmony_ci#endif 167bf215546Sopenharmony_ci int fd = open("/proc/self/auxv", O_RDONLY | O_CLOEXEC); 168bf215546Sopenharmony_ci if (fd >= 0) { 169bf215546Sopenharmony_ci while (read(fd, &aux, sizeof(aux)) == sizeof(aux)) { 170bf215546Sopenharmony_ci if (aux.a_type == AT_HWCAP) { 171bf215546Sopenharmony_ci char *env_vsx = getenv("GALLIVM_VSX"); 172bf215546Sopenharmony_ci uint64_t hwcap = aux.a_un.a_val; 173bf215546Sopenharmony_ci util_cpu_caps.has_altivec = (hwcap >> 28) & 1; 174bf215546Sopenharmony_ci if (!env_vsx || env_vsx[0] != '0') { 175bf215546Sopenharmony_ci util_cpu_caps.has_vsx = (hwcap >> 7) & 1; 176bf215546Sopenharmony_ci } 177bf215546Sopenharmony_ci break; 178bf215546Sopenharmony_ci } 179bf215546Sopenharmony_ci } 180bf215546Sopenharmony_ci close(fd); 181bf215546Sopenharmony_ci } 182bf215546Sopenharmony_ci#else /* !PIPE_OS_APPLE && !PIPE_OS_BSD && !PIPE_OS_LINUX */ 183bf215546Sopenharmony_ci /* not on Apple/Darwin or Linux, do it the brute-force way */ 184bf215546Sopenharmony_ci /* this is borrowed from the libmpeg2 library */ 185bf215546Sopenharmony_ci signal(SIGILL, sigill_handler); 186bf215546Sopenharmony_ci if (setjmp(__lv_powerpc_jmpbuf)) { 187bf215546Sopenharmony_ci signal(SIGILL, SIG_DFL); 188bf215546Sopenharmony_ci } else { 189bf215546Sopenharmony_ci boolean enable_altivec = TRUE; /* Default: enable if available, and if not overridden */ 190bf215546Sopenharmony_ci boolean enable_vsx = TRUE; 191bf215546Sopenharmony_ci#ifdef DEBUG 192bf215546Sopenharmony_ci /* Disabling Altivec code generation is not the same as disabling VSX code generation, 193bf215546Sopenharmony_ci * which can be done simply by passing -mattr=-vsx to the LLVM compiler; cf. 194bf215546Sopenharmony_ci * lp_build_create_jit_compiler_for_module(). 195bf215546Sopenharmony_ci * If you want to disable Altivec code generation, the best place to do it is here. 196bf215546Sopenharmony_ci */ 197bf215546Sopenharmony_ci char *env_control = getenv("GALLIVM_ALTIVEC"); /* 1=enable (default); 0=disable */ 198bf215546Sopenharmony_ci if (env_control && env_control[0] == '0') { 199bf215546Sopenharmony_ci enable_altivec = FALSE; 200bf215546Sopenharmony_ci } 201bf215546Sopenharmony_ci#endif 202bf215546Sopenharmony_ci /* VSX instructions can be explicitly enabled/disabled via GALLIVM_VSX=1 or 0 */ 203bf215546Sopenharmony_ci char *env_vsx = getenv("GALLIVM_VSX"); 204bf215546Sopenharmony_ci if (env_vsx && env_vsx[0] == '0') { 205bf215546Sopenharmony_ci enable_vsx = FALSE; 206bf215546Sopenharmony_ci } 207bf215546Sopenharmony_ci if (enable_altivec) { 208bf215546Sopenharmony_ci __lv_powerpc_canjump = 1; 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_ci __asm __volatile 211bf215546Sopenharmony_ci ("mtspr 256, %0\n\t" 212bf215546Sopenharmony_ci "vand %%v0, %%v0, %%v0" 213bf215546Sopenharmony_ci : 214bf215546Sopenharmony_ci : "r" (-1)); 215bf215546Sopenharmony_ci 216bf215546Sopenharmony_ci util_cpu_caps.has_altivec = 1; 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_ci if (enable_vsx) { 219bf215546Sopenharmony_ci __asm __volatile("xxland %vs0, %vs0, %vs0"); 220bf215546Sopenharmony_ci util_cpu_caps.has_vsx = 1; 221bf215546Sopenharmony_ci } 222bf215546Sopenharmony_ci signal(SIGILL, SIG_DFL); 223bf215546Sopenharmony_ci } else { 224bf215546Sopenharmony_ci util_cpu_caps.has_altivec = 0; 225bf215546Sopenharmony_ci } 226bf215546Sopenharmony_ci } 227bf215546Sopenharmony_ci#endif /* !PIPE_OS_APPLE && !PIPE_OS_LINUX */ 228bf215546Sopenharmony_ci} 229bf215546Sopenharmony_ci#endif /* PIPE_ARCH_PPC */ 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_ci 232bf215546Sopenharmony_ci#if defined(PIPE_ARCH_X86) || defined (PIPE_ARCH_X86_64) 233bf215546Sopenharmony_cistatic int has_cpuid(void) 234bf215546Sopenharmony_ci{ 235bf215546Sopenharmony_ci#if defined(PIPE_ARCH_X86) 236bf215546Sopenharmony_ci#if defined(PIPE_OS_GCC) 237bf215546Sopenharmony_ci int a, c; 238bf215546Sopenharmony_ci 239bf215546Sopenharmony_ci __asm __volatile 240bf215546Sopenharmony_ci ("pushf\n" 241bf215546Sopenharmony_ci "popl %0\n" 242bf215546Sopenharmony_ci "movl %0, %1\n" 243bf215546Sopenharmony_ci "xorl $0x200000, %0\n" 244bf215546Sopenharmony_ci "push %0\n" 245bf215546Sopenharmony_ci "popf\n" 246bf215546Sopenharmony_ci "pushf\n" 247bf215546Sopenharmony_ci "popl %0\n" 248bf215546Sopenharmony_ci : "=a" (a), "=c" (c) 249bf215546Sopenharmony_ci : 250bf215546Sopenharmony_ci : "cc"); 251bf215546Sopenharmony_ci 252bf215546Sopenharmony_ci return a != c; 253bf215546Sopenharmony_ci#else 254bf215546Sopenharmony_ci /* FIXME */ 255bf215546Sopenharmony_ci return 1; 256bf215546Sopenharmony_ci#endif 257bf215546Sopenharmony_ci#elif defined(PIPE_ARCH_X86_64) 258bf215546Sopenharmony_ci return 1; 259bf215546Sopenharmony_ci#else 260bf215546Sopenharmony_ci return 0; 261bf215546Sopenharmony_ci#endif 262bf215546Sopenharmony_ci} 263bf215546Sopenharmony_ci 264bf215546Sopenharmony_ci 265bf215546Sopenharmony_ci/** 266bf215546Sopenharmony_ci * @sa cpuid.h included in gcc-4.3 onwards. 267bf215546Sopenharmony_ci * @sa http://msdn.microsoft.com/en-us/library/hskdteyh.aspx 268bf215546Sopenharmony_ci */ 269bf215546Sopenharmony_cistatic inline void 270bf215546Sopenharmony_cicpuid(uint32_t ax, uint32_t *p) 271bf215546Sopenharmony_ci{ 272bf215546Sopenharmony_ci#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86) 273bf215546Sopenharmony_ci __asm __volatile ( 274bf215546Sopenharmony_ci "xchgl %%ebx, %1\n\t" 275bf215546Sopenharmony_ci "cpuid\n\t" 276bf215546Sopenharmony_ci "xchgl %%ebx, %1" 277bf215546Sopenharmony_ci : "=a" (p[0]), 278bf215546Sopenharmony_ci "=S" (p[1]), 279bf215546Sopenharmony_ci "=c" (p[2]), 280bf215546Sopenharmony_ci "=d" (p[3]) 281bf215546Sopenharmony_ci : "0" (ax) 282bf215546Sopenharmony_ci ); 283bf215546Sopenharmony_ci#elif defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86_64) 284bf215546Sopenharmony_ci __asm __volatile ( 285bf215546Sopenharmony_ci "cpuid\n\t" 286bf215546Sopenharmony_ci : "=a" (p[0]), 287bf215546Sopenharmony_ci "=b" (p[1]), 288bf215546Sopenharmony_ci "=c" (p[2]), 289bf215546Sopenharmony_ci "=d" (p[3]) 290bf215546Sopenharmony_ci : "0" (ax) 291bf215546Sopenharmony_ci ); 292bf215546Sopenharmony_ci#elif defined(PIPE_CC_MSVC) 293bf215546Sopenharmony_ci __cpuid(p, ax); 294bf215546Sopenharmony_ci#else 295bf215546Sopenharmony_ci p[0] = 0; 296bf215546Sopenharmony_ci p[1] = 0; 297bf215546Sopenharmony_ci p[2] = 0; 298bf215546Sopenharmony_ci p[3] = 0; 299bf215546Sopenharmony_ci#endif 300bf215546Sopenharmony_ci} 301bf215546Sopenharmony_ci 302bf215546Sopenharmony_ci/** 303bf215546Sopenharmony_ci * @sa cpuid.h included in gcc-4.4 onwards. 304bf215546Sopenharmony_ci * @sa http://msdn.microsoft.com/en-us/library/hskdteyh%28v=vs.90%29.aspx 305bf215546Sopenharmony_ci */ 306bf215546Sopenharmony_cistatic inline void 307bf215546Sopenharmony_cicpuid_count(uint32_t ax, uint32_t cx, uint32_t *p) 308bf215546Sopenharmony_ci{ 309bf215546Sopenharmony_ci#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86) 310bf215546Sopenharmony_ci __asm __volatile ( 311bf215546Sopenharmony_ci "xchgl %%ebx, %1\n\t" 312bf215546Sopenharmony_ci "cpuid\n\t" 313bf215546Sopenharmony_ci "xchgl %%ebx, %1" 314bf215546Sopenharmony_ci : "=a" (p[0]), 315bf215546Sopenharmony_ci "=S" (p[1]), 316bf215546Sopenharmony_ci "=c" (p[2]), 317bf215546Sopenharmony_ci "=d" (p[3]) 318bf215546Sopenharmony_ci : "0" (ax), "2" (cx) 319bf215546Sopenharmony_ci ); 320bf215546Sopenharmony_ci#elif defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86_64) 321bf215546Sopenharmony_ci __asm __volatile ( 322bf215546Sopenharmony_ci "cpuid\n\t" 323bf215546Sopenharmony_ci : "=a" (p[0]), 324bf215546Sopenharmony_ci "=b" (p[1]), 325bf215546Sopenharmony_ci "=c" (p[2]), 326bf215546Sopenharmony_ci "=d" (p[3]) 327bf215546Sopenharmony_ci : "0" (ax), "2" (cx) 328bf215546Sopenharmony_ci ); 329bf215546Sopenharmony_ci#elif defined(PIPE_CC_MSVC) 330bf215546Sopenharmony_ci __cpuidex(p, ax, cx); 331bf215546Sopenharmony_ci#else 332bf215546Sopenharmony_ci p[0] = 0; 333bf215546Sopenharmony_ci p[1] = 0; 334bf215546Sopenharmony_ci p[2] = 0; 335bf215546Sopenharmony_ci p[3] = 0; 336bf215546Sopenharmony_ci#endif 337bf215546Sopenharmony_ci} 338bf215546Sopenharmony_ci 339bf215546Sopenharmony_ci 340bf215546Sopenharmony_cistatic inline uint64_t xgetbv(void) 341bf215546Sopenharmony_ci{ 342bf215546Sopenharmony_ci#if defined(PIPE_CC_GCC) 343bf215546Sopenharmony_ci uint32_t eax, edx; 344bf215546Sopenharmony_ci 345bf215546Sopenharmony_ci __asm __volatile ( 346bf215546Sopenharmony_ci ".byte 0x0f, 0x01, 0xd0" // xgetbv isn't supported on gcc < 4.4 347bf215546Sopenharmony_ci : "=a"(eax), 348bf215546Sopenharmony_ci "=d"(edx) 349bf215546Sopenharmony_ci : "c"(0) 350bf215546Sopenharmony_ci ); 351bf215546Sopenharmony_ci 352bf215546Sopenharmony_ci return ((uint64_t)edx << 32) | eax; 353bf215546Sopenharmony_ci#elif defined(PIPE_CC_MSVC) && defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) 354bf215546Sopenharmony_ci return _xgetbv(_XCR_XFEATURE_ENABLED_MASK); 355bf215546Sopenharmony_ci#else 356bf215546Sopenharmony_ci return 0; 357bf215546Sopenharmony_ci#endif 358bf215546Sopenharmony_ci} 359bf215546Sopenharmony_ci 360bf215546Sopenharmony_ci 361bf215546Sopenharmony_ci#if defined(PIPE_ARCH_X86) 362bf215546Sopenharmony_ciPIPE_ALIGN_STACK static inline boolean sse2_has_daz(void) 363bf215546Sopenharmony_ci{ 364bf215546Sopenharmony_ci alignas(16) struct { 365bf215546Sopenharmony_ci uint32_t pad1[7]; 366bf215546Sopenharmony_ci uint32_t mxcsr_mask; 367bf215546Sopenharmony_ci uint32_t pad2[128-8]; 368bf215546Sopenharmony_ci } fxarea; 369bf215546Sopenharmony_ci 370bf215546Sopenharmony_ci fxarea.mxcsr_mask = 0; 371bf215546Sopenharmony_ci#if defined(PIPE_CC_GCC) 372bf215546Sopenharmony_ci __asm __volatile ("fxsave %0" : "+m" (fxarea)); 373bf215546Sopenharmony_ci#elif defined(PIPE_CC_MSVC) || defined(PIPE_CC_ICL) 374bf215546Sopenharmony_ci _fxsave(&fxarea); 375bf215546Sopenharmony_ci#else 376bf215546Sopenharmony_ci fxarea.mxcsr_mask = 0; 377bf215546Sopenharmony_ci#endif 378bf215546Sopenharmony_ci return !!(fxarea.mxcsr_mask & (1 << 6)); 379bf215546Sopenharmony_ci} 380bf215546Sopenharmony_ci#endif 381bf215546Sopenharmony_ci 382bf215546Sopenharmony_ci#endif /* X86 or X86_64 */ 383bf215546Sopenharmony_ci 384bf215546Sopenharmony_ci#if defined(PIPE_ARCH_ARM) 385bf215546Sopenharmony_cistatic void 386bf215546Sopenharmony_cicheck_os_arm_support(void) 387bf215546Sopenharmony_ci{ 388bf215546Sopenharmony_ci /* 389bf215546Sopenharmony_ci * On Android, the cpufeatures library is preferred way of checking 390bf215546Sopenharmony_ci * CPU capabilities. However, it is not available for standalone Mesa 391bf215546Sopenharmony_ci * builds, i.e. when Android build system (Android.mk-based) is not 392bf215546Sopenharmony_ci * used. Because of this we cannot use PIPE_OS_ANDROID here, but rather 393bf215546Sopenharmony_ci * have a separate macro that only gets enabled from respective Android.mk. 394bf215546Sopenharmony_ci */ 395bf215546Sopenharmony_ci#if defined(__ARM_NEON) || defined(__ARM_NEON__) 396bf215546Sopenharmony_ci util_cpu_caps.has_neon = 1; 397bf215546Sopenharmony_ci#elif defined(PIPE_OS_FREEBSD) && defined(HAVE_ELF_AUX_INFO) 398bf215546Sopenharmony_ci unsigned long hwcap = 0; 399bf215546Sopenharmony_ci elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap)); 400bf215546Sopenharmony_ci if (hwcap & HWCAP_NEON) 401bf215546Sopenharmony_ci util_cpu_caps.has_neon = 1; 402bf215546Sopenharmony_ci#elif defined(HAS_ANDROID_CPUFEATURES) 403bf215546Sopenharmony_ci AndroidCpuFamily cpu_family = android_getCpuFamily(); 404bf215546Sopenharmony_ci uint64_t cpu_features = android_getCpuFeatures(); 405bf215546Sopenharmony_ci 406bf215546Sopenharmony_ci if (cpu_family == ANDROID_CPU_FAMILY_ARM) { 407bf215546Sopenharmony_ci if (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) 408bf215546Sopenharmony_ci util_cpu_caps.has_neon = 1; 409bf215546Sopenharmony_ci } 410bf215546Sopenharmony_ci#elif defined(PIPE_OS_LINUX) 411bf215546Sopenharmony_ci Elf32_auxv_t aux; 412bf215546Sopenharmony_ci int fd; 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_ci fd = open("/proc/self/auxv", O_RDONLY | O_CLOEXEC); 415bf215546Sopenharmony_ci if (fd >= 0) { 416bf215546Sopenharmony_ci while (read(fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t)) { 417bf215546Sopenharmony_ci if (aux.a_type == AT_HWCAP) { 418bf215546Sopenharmony_ci uint32_t hwcap = aux.a_un.a_val; 419bf215546Sopenharmony_ci 420bf215546Sopenharmony_ci util_cpu_caps.has_neon = (hwcap >> 12) & 1; 421bf215546Sopenharmony_ci break; 422bf215546Sopenharmony_ci } 423bf215546Sopenharmony_ci } 424bf215546Sopenharmony_ci close (fd); 425bf215546Sopenharmony_ci } 426bf215546Sopenharmony_ci#endif /* PIPE_OS_LINUX */ 427bf215546Sopenharmony_ci} 428bf215546Sopenharmony_ci 429bf215546Sopenharmony_ci#elif defined(PIPE_ARCH_AARCH64) 430bf215546Sopenharmony_cistatic void 431bf215546Sopenharmony_cicheck_os_arm_support(void) 432bf215546Sopenharmony_ci{ 433bf215546Sopenharmony_ci util_cpu_caps.has_neon = true; 434bf215546Sopenharmony_ci} 435bf215546Sopenharmony_ci#endif /* PIPE_ARCH_ARM || PIPE_ARCH_AARCH64 */ 436bf215546Sopenharmony_ci 437bf215546Sopenharmony_ci#if defined(PIPE_ARCH_MIPS64) 438bf215546Sopenharmony_cistatic void 439bf215546Sopenharmony_cicheck_os_mips64_support(void) 440bf215546Sopenharmony_ci{ 441bf215546Sopenharmony_ci#if defined(PIPE_OS_LINUX) 442bf215546Sopenharmony_ci Elf64_auxv_t aux; 443bf215546Sopenharmony_ci int fd; 444bf215546Sopenharmony_ci 445bf215546Sopenharmony_ci fd = open("/proc/self/auxv", O_RDONLY | O_CLOEXEC); 446bf215546Sopenharmony_ci if (fd >= 0) { 447bf215546Sopenharmony_ci while (read(fd, &aux, sizeof(Elf64_auxv_t)) == sizeof(Elf64_auxv_t)) { 448bf215546Sopenharmony_ci if (aux.a_type == AT_HWCAP) { 449bf215546Sopenharmony_ci uint64_t hwcap = aux.a_un.a_val; 450bf215546Sopenharmony_ci 451bf215546Sopenharmony_ci util_cpu_caps.has_msa = (hwcap >> 1) & 1; 452bf215546Sopenharmony_ci break; 453bf215546Sopenharmony_ci } 454bf215546Sopenharmony_ci } 455bf215546Sopenharmony_ci close (fd); 456bf215546Sopenharmony_ci } 457bf215546Sopenharmony_ci#endif /* PIPE_OS_LINUX */ 458bf215546Sopenharmony_ci} 459bf215546Sopenharmony_ci#endif /* PIPE_ARCH_MIPS64 */ 460bf215546Sopenharmony_ci 461bf215546Sopenharmony_ci 462bf215546Sopenharmony_cistatic void 463bf215546Sopenharmony_ciget_cpu_topology(void) 464bf215546Sopenharmony_ci{ 465bf215546Sopenharmony_ci /* Default. This is OK if L3 is not present or there is only one. */ 466bf215546Sopenharmony_ci util_cpu_caps.num_L3_caches = 1; 467bf215546Sopenharmony_ci 468bf215546Sopenharmony_ci memset(util_cpu_caps.cpu_to_L3, 0xff, sizeof(util_cpu_caps.cpu_to_L3)); 469bf215546Sopenharmony_ci 470bf215546Sopenharmony_ci#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 471bf215546Sopenharmony_ci /* AMD Zen */ 472bf215546Sopenharmony_ci if (util_cpu_caps.family >= CPU_AMD_ZEN1_ZEN2 && 473bf215546Sopenharmony_ci util_cpu_caps.family < CPU_AMD_LAST) { 474bf215546Sopenharmony_ci uint32_t regs[4]; 475bf215546Sopenharmony_ci 476bf215546Sopenharmony_ci uint32_t saved_mask[UTIL_MAX_CPUS / 32] = {0}; 477bf215546Sopenharmony_ci uint32_t mask[UTIL_MAX_CPUS / 32] = {0}; 478bf215546Sopenharmony_ci bool saved = false; 479bf215546Sopenharmony_ci 480bf215546Sopenharmony_ci uint32_t L3_found[UTIL_MAX_CPUS] = {0}; 481bf215546Sopenharmony_ci uint32_t num_L3_caches = 0; 482bf215546Sopenharmony_ci util_affinity_mask *L3_affinity_masks = NULL; 483bf215546Sopenharmony_ci 484bf215546Sopenharmony_ci /* Query APIC IDs from each CPU core. 485bf215546Sopenharmony_ci * 486bf215546Sopenharmony_ci * An APIC ID is a logical ID of the CPU with respect to the cache 487bf215546Sopenharmony_ci * hierarchy, meaning that consecutive APIC IDs are neighbours in 488bf215546Sopenharmony_ci * the hierarchy, e.g. sharing the same cache. 489bf215546Sopenharmony_ci * 490bf215546Sopenharmony_ci * For example, CPU 0 can have APIC ID 0 and CPU 12 can have APIC ID 1, 491bf215546Sopenharmony_ci * which means that both CPU 0 and 12 are next to each other. 492bf215546Sopenharmony_ci * (e.g. they are 2 threads belonging to 1 SMT2 core) 493bf215546Sopenharmony_ci * 494bf215546Sopenharmony_ci * We need to find out which CPUs share the same L3 cache and they can 495bf215546Sopenharmony_ci * be all over the place. 496bf215546Sopenharmony_ci * 497bf215546Sopenharmony_ci * Querying the APIC ID can only be done by pinning the current thread 498bf215546Sopenharmony_ci * to each core. The original affinity mask is saved. 499bf215546Sopenharmony_ci * 500bf215546Sopenharmony_ci * Loop over all possible CPUs even though some may be offline. 501bf215546Sopenharmony_ci */ 502bf215546Sopenharmony_ci for (int16_t i = 0; i < util_cpu_caps.max_cpus && i < UTIL_MAX_CPUS; i++) { 503bf215546Sopenharmony_ci uint32_t cpu_bit = 1u << (i % 32); 504bf215546Sopenharmony_ci 505bf215546Sopenharmony_ci mask[i / 32] = cpu_bit; 506bf215546Sopenharmony_ci 507bf215546Sopenharmony_ci /* The assumption is that trying to bind the thread to a CPU that is 508bf215546Sopenharmony_ci * offline will fail. 509bf215546Sopenharmony_ci */ 510bf215546Sopenharmony_ci if (util_set_current_thread_affinity(mask, 511bf215546Sopenharmony_ci !saved ? saved_mask : NULL, 512bf215546Sopenharmony_ci util_cpu_caps.num_cpu_mask_bits)) { 513bf215546Sopenharmony_ci saved = true; 514bf215546Sopenharmony_ci 515bf215546Sopenharmony_ci /* Query the APIC ID of the current core. */ 516bf215546Sopenharmony_ci cpuid(0x00000001, regs); 517bf215546Sopenharmony_ci unsigned apic_id = regs[1] >> 24; 518bf215546Sopenharmony_ci 519bf215546Sopenharmony_ci /* Query the total core count for the CPU */ 520bf215546Sopenharmony_ci uint32_t core_count = 1; 521bf215546Sopenharmony_ci if (regs[3] & (1 << 28)) 522bf215546Sopenharmony_ci core_count = (regs[1] >> 16) & 0xff; 523bf215546Sopenharmony_ci 524bf215546Sopenharmony_ci core_count = util_next_power_of_two(core_count); 525bf215546Sopenharmony_ci 526bf215546Sopenharmony_ci /* Query the L3 cache count. */ 527bf215546Sopenharmony_ci cpuid_count(0x8000001D, 3, regs); 528bf215546Sopenharmony_ci unsigned cache_level = (regs[0] >> 5) & 0x7; 529bf215546Sopenharmony_ci unsigned cores_per_L3 = ((regs[0] >> 14) & 0xfff) + 1; 530bf215546Sopenharmony_ci 531bf215546Sopenharmony_ci if (cache_level != 3) 532bf215546Sopenharmony_ci continue; 533bf215546Sopenharmony_ci 534bf215546Sopenharmony_ci unsigned local_core_id = apic_id & (core_count - 1); 535bf215546Sopenharmony_ci unsigned phys_id = (apic_id & ~(core_count - 1)) >> util_logbase2(core_count); 536bf215546Sopenharmony_ci unsigned local_l3_cache_index = local_core_id / util_next_power_of_two(cores_per_L3); 537bf215546Sopenharmony_ci#define L3_ID(p, i) (p << 16 | i << 1 | 1); 538bf215546Sopenharmony_ci 539bf215546Sopenharmony_ci unsigned l3_id = L3_ID(phys_id, local_l3_cache_index); 540bf215546Sopenharmony_ci int idx = -1; 541bf215546Sopenharmony_ci for (unsigned c = 0; c < num_L3_caches; c++) { 542bf215546Sopenharmony_ci if (L3_found[c] == l3_id) { 543bf215546Sopenharmony_ci idx = c; 544bf215546Sopenharmony_ci break; 545bf215546Sopenharmony_ci } 546bf215546Sopenharmony_ci } 547bf215546Sopenharmony_ci if (idx == -1) { 548bf215546Sopenharmony_ci idx = num_L3_caches; 549bf215546Sopenharmony_ci L3_found[num_L3_caches++] = l3_id; 550bf215546Sopenharmony_ci L3_affinity_masks = realloc(L3_affinity_masks, sizeof(util_affinity_mask) * num_L3_caches); 551bf215546Sopenharmony_ci if (!L3_affinity_masks) 552bf215546Sopenharmony_ci return; 553bf215546Sopenharmony_ci memset(&L3_affinity_masks[num_L3_caches - 1], 0, sizeof(util_affinity_mask)); 554bf215546Sopenharmony_ci } 555bf215546Sopenharmony_ci util_cpu_caps.cpu_to_L3[i] = idx; 556bf215546Sopenharmony_ci L3_affinity_masks[idx][i / 32] |= cpu_bit; 557bf215546Sopenharmony_ci 558bf215546Sopenharmony_ci } 559bf215546Sopenharmony_ci mask[i / 32] = 0; 560bf215546Sopenharmony_ci } 561bf215546Sopenharmony_ci 562bf215546Sopenharmony_ci util_cpu_caps.num_L3_caches = num_L3_caches; 563bf215546Sopenharmony_ci util_cpu_caps.L3_affinity_mask = L3_affinity_masks; 564bf215546Sopenharmony_ci 565bf215546Sopenharmony_ci if (saved) { 566bf215546Sopenharmony_ci if (debug_get_option_dump_cpu()) { 567bf215546Sopenharmony_ci fprintf(stderr, "CPU <-> L3 cache mapping:\n"); 568bf215546Sopenharmony_ci for (unsigned i = 0; i < util_cpu_caps.num_L3_caches; i++) { 569bf215546Sopenharmony_ci fprintf(stderr, " - L3 %u mask = ", i); 570bf215546Sopenharmony_ci for (int j = util_cpu_caps.max_cpus - 1; j >= 0; j -= 32) 571bf215546Sopenharmony_ci fprintf(stderr, "%08x ", util_cpu_caps.L3_affinity_mask[i][j / 32]); 572bf215546Sopenharmony_ci fprintf(stderr, "\n"); 573bf215546Sopenharmony_ci } 574bf215546Sopenharmony_ci } 575bf215546Sopenharmony_ci 576bf215546Sopenharmony_ci /* Restore the original affinity mask. */ 577bf215546Sopenharmony_ci util_set_current_thread_affinity(saved_mask, NULL, 578bf215546Sopenharmony_ci util_cpu_caps.num_cpu_mask_bits); 579bf215546Sopenharmony_ci } else { 580bf215546Sopenharmony_ci if (debug_get_option_dump_cpu()) 581bf215546Sopenharmony_ci fprintf(stderr, "Cannot set thread affinity for any thread.\n"); 582bf215546Sopenharmony_ci } 583bf215546Sopenharmony_ci } 584bf215546Sopenharmony_ci#endif 585bf215546Sopenharmony_ci} 586bf215546Sopenharmony_ci 587bf215546Sopenharmony_cistatic void 588bf215546Sopenharmony_ciutil_cpu_detect_once(void) 589bf215546Sopenharmony_ci{ 590bf215546Sopenharmony_ci int available_cpus = 0; 591bf215546Sopenharmony_ci int total_cpus = 0; 592bf215546Sopenharmony_ci 593bf215546Sopenharmony_ci memset(&util_cpu_caps, 0, sizeof util_cpu_caps); 594bf215546Sopenharmony_ci 595bf215546Sopenharmony_ci /* Count the number of CPUs in system */ 596bf215546Sopenharmony_ci#if defined(PIPE_OS_WINDOWS) 597bf215546Sopenharmony_ci { 598bf215546Sopenharmony_ci SYSTEM_INFO system_info; 599bf215546Sopenharmony_ci GetSystemInfo(&system_info); 600bf215546Sopenharmony_ci available_cpus = MAX2(1, system_info.dwNumberOfProcessors); 601bf215546Sopenharmony_ci } 602bf215546Sopenharmony_ci#elif defined(PIPE_OS_UNIX) 603bf215546Sopenharmony_ci# if defined(HAS_SCHED_GETAFFINITY) 604bf215546Sopenharmony_ci { 605bf215546Sopenharmony_ci /* sched_setaffinity() can be used to further restrict the number of 606bf215546Sopenharmony_ci * CPUs on which the process can run. Use sched_getaffinity() to 607bf215546Sopenharmony_ci * determine the true number of available CPUs. 608bf215546Sopenharmony_ci * 609bf215546Sopenharmony_ci * FIXME: The Linux manual page for sched_getaffinity describes how this 610bf215546Sopenharmony_ci * simple implementation will fail with > 1024 CPUs, and we'll fall back 611bf215546Sopenharmony_ci * to the _SC_NPROCESSORS_ONLN path. Support for > 1024 CPUs can be 612bf215546Sopenharmony_ci * added to this path once someone has such a system for testing. 613bf215546Sopenharmony_ci */ 614bf215546Sopenharmony_ci cpu_set_t affin; 615bf215546Sopenharmony_ci if (sched_getaffinity(getpid(), sizeof(affin), &affin) == 0) 616bf215546Sopenharmony_ci available_cpus = CPU_COUNT(&affin); 617bf215546Sopenharmony_ci } 618bf215546Sopenharmony_ci# endif 619bf215546Sopenharmony_ci 620bf215546Sopenharmony_ci /* Linux, FreeBSD, DragonFly, and Mac OS X should have 621bf215546Sopenharmony_ci * _SC_NOPROCESSORS_ONLN. NetBSD and OpenBSD should have HW_NCPUONLINE. 622bf215546Sopenharmony_ci * This is what FFmpeg uses on those platforms. 623bf215546Sopenharmony_ci */ 624bf215546Sopenharmony_ci# if defined(PIPE_OS_BSD) && defined(HW_NCPUONLINE) 625bf215546Sopenharmony_ci if (available_cpus == 0) { 626bf215546Sopenharmony_ci const int mib[] = { CTL_HW, HW_NCPUONLINE }; 627bf215546Sopenharmony_ci int ncpu; 628bf215546Sopenharmony_ci size_t len = sizeof(ncpu); 629bf215546Sopenharmony_ci 630bf215546Sopenharmony_ci sysctl(mib, 2, &ncpu, &len, NULL, 0); 631bf215546Sopenharmony_ci available_cpus = ncpu; 632bf215546Sopenharmony_ci } 633bf215546Sopenharmony_ci# elif defined(_SC_NPROCESSORS_ONLN) 634bf215546Sopenharmony_ci if (available_cpus == 0) { 635bf215546Sopenharmony_ci available_cpus = sysconf(_SC_NPROCESSORS_ONLN); 636bf215546Sopenharmony_ci if (available_cpus == ~0) 637bf215546Sopenharmony_ci available_cpus = 1; 638bf215546Sopenharmony_ci } 639bf215546Sopenharmony_ci# elif defined(PIPE_OS_BSD) 640bf215546Sopenharmony_ci if (available_cpus == 0) { 641bf215546Sopenharmony_ci const int mib[] = { CTL_HW, HW_NCPU }; 642bf215546Sopenharmony_ci int ncpu; 643bf215546Sopenharmony_ci int len = sizeof(ncpu); 644bf215546Sopenharmony_ci 645bf215546Sopenharmony_ci sysctl(mib, 2, &ncpu, &len, NULL, 0); 646bf215546Sopenharmony_ci available_cpus = ncpu; 647bf215546Sopenharmony_ci } 648bf215546Sopenharmony_ci# endif /* defined(PIPE_OS_BSD) */ 649bf215546Sopenharmony_ci 650bf215546Sopenharmony_ci /* Determine the maximum number of CPUs configured in the system. This is 651bf215546Sopenharmony_ci * used to properly set num_cpu_mask_bits below. On BSDs that don't have 652bf215546Sopenharmony_ci * HW_NCPUONLINE, it was not clear whether HW_NCPU is the number of 653bf215546Sopenharmony_ci * configured or the number of online CPUs. For that reason, prefer the 654bf215546Sopenharmony_ci * _SC_NPROCESSORS_CONF path on all BSDs. 655bf215546Sopenharmony_ci */ 656bf215546Sopenharmony_ci# if defined(_SC_NPROCESSORS_CONF) 657bf215546Sopenharmony_ci total_cpus = sysconf(_SC_NPROCESSORS_CONF); 658bf215546Sopenharmony_ci if (total_cpus == ~0) 659bf215546Sopenharmony_ci total_cpus = 1; 660bf215546Sopenharmony_ci# elif defined(PIPE_OS_BSD) 661bf215546Sopenharmony_ci { 662bf215546Sopenharmony_ci const int mib[] = { CTL_HW, HW_NCPU }; 663bf215546Sopenharmony_ci int ncpu; 664bf215546Sopenharmony_ci int len = sizeof(ncpu); 665bf215546Sopenharmony_ci 666bf215546Sopenharmony_ci sysctl(mib, 2, &ncpu, &len, NULL, 0); 667bf215546Sopenharmony_ci total_cpus = ncpu; 668bf215546Sopenharmony_ci } 669bf215546Sopenharmony_ci# endif /* defined(PIPE_OS_BSD) */ 670bf215546Sopenharmony_ci#endif /* defined(PIPE_OS_UNIX) */ 671bf215546Sopenharmony_ci 672bf215546Sopenharmony_ci util_cpu_caps.nr_cpus = MAX2(1, available_cpus); 673bf215546Sopenharmony_ci total_cpus = MAX2(total_cpus, util_cpu_caps.nr_cpus); 674bf215546Sopenharmony_ci 675bf215546Sopenharmony_ci util_cpu_caps.max_cpus = total_cpus; 676bf215546Sopenharmony_ci util_cpu_caps.num_cpu_mask_bits = align(total_cpus, 32); 677bf215546Sopenharmony_ci 678bf215546Sopenharmony_ci /* Make the fallback cacheline size nonzero so that it can be 679bf215546Sopenharmony_ci * safely passed to align(). 680bf215546Sopenharmony_ci */ 681bf215546Sopenharmony_ci util_cpu_caps.cacheline = sizeof(void *); 682bf215546Sopenharmony_ci 683bf215546Sopenharmony_ci#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 684bf215546Sopenharmony_ci if (has_cpuid()) { 685bf215546Sopenharmony_ci uint32_t regs[4]; 686bf215546Sopenharmony_ci uint32_t regs2[4]; 687bf215546Sopenharmony_ci 688bf215546Sopenharmony_ci util_cpu_caps.cacheline = 32; 689bf215546Sopenharmony_ci 690bf215546Sopenharmony_ci /* Get max cpuid level */ 691bf215546Sopenharmony_ci cpuid(0x00000000, regs); 692bf215546Sopenharmony_ci 693bf215546Sopenharmony_ci if (regs[0] >= 0x00000001) { 694bf215546Sopenharmony_ci unsigned int cacheline; 695bf215546Sopenharmony_ci 696bf215546Sopenharmony_ci cpuid (0x00000001, regs2); 697bf215546Sopenharmony_ci 698bf215546Sopenharmony_ci util_cpu_caps.x86_cpu_type = (regs2[0] >> 8) & 0xf; 699bf215546Sopenharmony_ci /* Add "extended family". */ 700bf215546Sopenharmony_ci if (util_cpu_caps.x86_cpu_type == 0xf) 701bf215546Sopenharmony_ci util_cpu_caps.x86_cpu_type += ((regs2[0] >> 20) & 0xff); 702bf215546Sopenharmony_ci 703bf215546Sopenharmony_ci switch (util_cpu_caps.x86_cpu_type) { 704bf215546Sopenharmony_ci case 0x17: 705bf215546Sopenharmony_ci util_cpu_caps.family = CPU_AMD_ZEN1_ZEN2; 706bf215546Sopenharmony_ci break; 707bf215546Sopenharmony_ci case 0x18: 708bf215546Sopenharmony_ci util_cpu_caps.family = CPU_AMD_ZEN_HYGON; 709bf215546Sopenharmony_ci break; 710bf215546Sopenharmony_ci case 0x19: 711bf215546Sopenharmony_ci util_cpu_caps.family = CPU_AMD_ZEN3; 712bf215546Sopenharmony_ci break; 713bf215546Sopenharmony_ci default: 714bf215546Sopenharmony_ci if (util_cpu_caps.x86_cpu_type > 0x19) 715bf215546Sopenharmony_ci util_cpu_caps.family = CPU_AMD_ZEN_NEXT; 716bf215546Sopenharmony_ci } 717bf215546Sopenharmony_ci 718bf215546Sopenharmony_ci /* general feature flags */ 719bf215546Sopenharmony_ci util_cpu_caps.has_tsc = (regs2[3] >> 4) & 1; /* 0x0000010 */ 720bf215546Sopenharmony_ci util_cpu_caps.has_mmx = (regs2[3] >> 23) & 1; /* 0x0800000 */ 721bf215546Sopenharmony_ci util_cpu_caps.has_sse = (regs2[3] >> 25) & 1; /* 0x2000000 */ 722bf215546Sopenharmony_ci util_cpu_caps.has_sse2 = (regs2[3] >> 26) & 1; /* 0x4000000 */ 723bf215546Sopenharmony_ci util_cpu_caps.has_sse3 = (regs2[2] >> 0) & 1; /* 0x0000001 */ 724bf215546Sopenharmony_ci util_cpu_caps.has_ssse3 = (regs2[2] >> 9) & 1; /* 0x0000020 */ 725bf215546Sopenharmony_ci util_cpu_caps.has_sse4_1 = (regs2[2] >> 19) & 1; 726bf215546Sopenharmony_ci util_cpu_caps.has_sse4_2 = (regs2[2] >> 20) & 1; 727bf215546Sopenharmony_ci util_cpu_caps.has_popcnt = (regs2[2] >> 23) & 1; 728bf215546Sopenharmony_ci util_cpu_caps.has_avx = ((regs2[2] >> 28) & 1) && // AVX 729bf215546Sopenharmony_ci ((regs2[2] >> 27) & 1) && // OSXSAVE 730bf215546Sopenharmony_ci ((xgetbv() & 6) == 6); // XMM & YMM 731bf215546Sopenharmony_ci util_cpu_caps.has_f16c = ((regs2[2] >> 29) & 1) && util_cpu_caps.has_avx; 732bf215546Sopenharmony_ci util_cpu_caps.has_fma = ((regs2[2] >> 12) & 1) && util_cpu_caps.has_avx; 733bf215546Sopenharmony_ci util_cpu_caps.has_mmx2 = util_cpu_caps.has_sse; /* SSE cpus supports mmxext too */ 734bf215546Sopenharmony_ci#if defined(PIPE_ARCH_X86_64) 735bf215546Sopenharmony_ci util_cpu_caps.has_daz = 1; 736bf215546Sopenharmony_ci#else 737bf215546Sopenharmony_ci util_cpu_caps.has_daz = util_cpu_caps.has_sse3 || 738bf215546Sopenharmony_ci (util_cpu_caps.has_sse2 && sse2_has_daz()); 739bf215546Sopenharmony_ci#endif 740bf215546Sopenharmony_ci 741bf215546Sopenharmony_ci cacheline = ((regs2[1] >> 8) & 0xFF) * 8; 742bf215546Sopenharmony_ci if (cacheline > 0) 743bf215546Sopenharmony_ci util_cpu_caps.cacheline = cacheline; 744bf215546Sopenharmony_ci } 745bf215546Sopenharmony_ci if (util_cpu_caps.has_avx && regs[0] >= 0x00000007) { 746bf215546Sopenharmony_ci uint32_t regs7[4]; 747bf215546Sopenharmony_ci cpuid_count(0x00000007, 0x00000000, regs7); 748bf215546Sopenharmony_ci util_cpu_caps.has_avx2 = (regs7[1] >> 5) & 1; 749bf215546Sopenharmony_ci } 750bf215546Sopenharmony_ci 751bf215546Sopenharmony_ci // check for avx512 752bf215546Sopenharmony_ci if (((regs2[2] >> 27) & 1) && // OSXSAVE 753bf215546Sopenharmony_ci (xgetbv() & (0x7 << 5)) && // OPMASK: upper-256 enabled by OS 754bf215546Sopenharmony_ci ((xgetbv() & 6) == 6)) { // XMM/YMM enabled by OS 755bf215546Sopenharmony_ci uint32_t regs3[4]; 756bf215546Sopenharmony_ci cpuid_count(0x00000007, 0x00000000, regs3); 757bf215546Sopenharmony_ci util_cpu_caps.has_avx512f = (regs3[1] >> 16) & 1; 758bf215546Sopenharmony_ci util_cpu_caps.has_avx512dq = (regs3[1] >> 17) & 1; 759bf215546Sopenharmony_ci util_cpu_caps.has_avx512ifma = (regs3[1] >> 21) & 1; 760bf215546Sopenharmony_ci util_cpu_caps.has_avx512pf = (regs3[1] >> 26) & 1; 761bf215546Sopenharmony_ci util_cpu_caps.has_avx512er = (regs3[1] >> 27) & 1; 762bf215546Sopenharmony_ci util_cpu_caps.has_avx512cd = (regs3[1] >> 28) & 1; 763bf215546Sopenharmony_ci util_cpu_caps.has_avx512bw = (regs3[1] >> 30) & 1; 764bf215546Sopenharmony_ci util_cpu_caps.has_avx512vl = (regs3[1] >> 31) & 1; 765bf215546Sopenharmony_ci util_cpu_caps.has_avx512vbmi = (regs3[2] >> 1) & 1; 766bf215546Sopenharmony_ci } 767bf215546Sopenharmony_ci 768bf215546Sopenharmony_ci if (regs[1] == 0x756e6547 && regs[2] == 0x6c65746e && regs[3] == 0x49656e69) { 769bf215546Sopenharmony_ci /* GenuineIntel */ 770bf215546Sopenharmony_ci util_cpu_caps.has_intel = 1; 771bf215546Sopenharmony_ci } 772bf215546Sopenharmony_ci 773bf215546Sopenharmony_ci cpuid(0x80000000, regs); 774bf215546Sopenharmony_ci 775bf215546Sopenharmony_ci if (regs[0] >= 0x80000001) { 776bf215546Sopenharmony_ci 777bf215546Sopenharmony_ci cpuid(0x80000001, regs2); 778bf215546Sopenharmony_ci 779bf215546Sopenharmony_ci util_cpu_caps.has_mmx |= (regs2[3] >> 23) & 1; 780bf215546Sopenharmony_ci util_cpu_caps.has_mmx2 |= (regs2[3] >> 22) & 1; 781bf215546Sopenharmony_ci util_cpu_caps.has_3dnow = (regs2[3] >> 31) & 1; 782bf215546Sopenharmony_ci util_cpu_caps.has_3dnow_ext = (regs2[3] >> 30) & 1; 783bf215546Sopenharmony_ci 784bf215546Sopenharmony_ci util_cpu_caps.has_xop = util_cpu_caps.has_avx && 785bf215546Sopenharmony_ci ((regs2[2] >> 11) & 1); 786bf215546Sopenharmony_ci } 787bf215546Sopenharmony_ci 788bf215546Sopenharmony_ci if (regs[0] >= 0x80000006) { 789bf215546Sopenharmony_ci /* should we really do this if the clflush size above worked? */ 790bf215546Sopenharmony_ci unsigned int cacheline; 791bf215546Sopenharmony_ci cpuid(0x80000006, regs2); 792bf215546Sopenharmony_ci cacheline = regs2[2] & 0xFF; 793bf215546Sopenharmony_ci if (cacheline > 0) 794bf215546Sopenharmony_ci util_cpu_caps.cacheline = cacheline; 795bf215546Sopenharmony_ci } 796bf215546Sopenharmony_ci 797bf215546Sopenharmony_ci if (!util_cpu_caps.has_sse) { 798bf215546Sopenharmony_ci util_cpu_caps.has_sse2 = 0; 799bf215546Sopenharmony_ci util_cpu_caps.has_sse3 = 0; 800bf215546Sopenharmony_ci util_cpu_caps.has_ssse3 = 0; 801bf215546Sopenharmony_ci util_cpu_caps.has_sse4_1 = 0; 802bf215546Sopenharmony_ci } 803bf215546Sopenharmony_ci } 804bf215546Sopenharmony_ci#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */ 805bf215546Sopenharmony_ci 806bf215546Sopenharmony_ci#if defined(PIPE_ARCH_ARM) || defined(PIPE_ARCH_AARCH64) 807bf215546Sopenharmony_ci check_os_arm_support(); 808bf215546Sopenharmony_ci#endif 809bf215546Sopenharmony_ci 810bf215546Sopenharmony_ci#if defined(PIPE_ARCH_PPC) 811bf215546Sopenharmony_ci check_os_altivec_support(); 812bf215546Sopenharmony_ci#endif /* PIPE_ARCH_PPC */ 813bf215546Sopenharmony_ci 814bf215546Sopenharmony_ci#if defined(PIPE_ARCH_MIPS64) 815bf215546Sopenharmony_ci check_os_mips64_support(); 816bf215546Sopenharmony_ci#endif /* PIPE_ARCH_MIPS64 */ 817bf215546Sopenharmony_ci 818bf215546Sopenharmony_ci#if defined(PIPE_ARCH_S390) 819bf215546Sopenharmony_ci util_cpu_caps.family = CPU_S390X; 820bf215546Sopenharmony_ci#endif 821bf215546Sopenharmony_ci 822bf215546Sopenharmony_ci get_cpu_topology(); 823bf215546Sopenharmony_ci 824bf215546Sopenharmony_ci if (debug_get_option_dump_cpu()) { 825bf215546Sopenharmony_ci printf("util_cpu_caps.nr_cpus = %u\n", util_cpu_caps.nr_cpus); 826bf215546Sopenharmony_ci 827bf215546Sopenharmony_ci printf("util_cpu_caps.x86_cpu_type = %u\n", util_cpu_caps.x86_cpu_type); 828bf215546Sopenharmony_ci printf("util_cpu_caps.cacheline = %u\n", util_cpu_caps.cacheline); 829bf215546Sopenharmony_ci 830bf215546Sopenharmony_ci printf("util_cpu_caps.has_tsc = %u\n", util_cpu_caps.has_tsc); 831bf215546Sopenharmony_ci printf("util_cpu_caps.has_mmx = %u\n", util_cpu_caps.has_mmx); 832bf215546Sopenharmony_ci printf("util_cpu_caps.has_mmx2 = %u\n", util_cpu_caps.has_mmx2); 833bf215546Sopenharmony_ci printf("util_cpu_caps.has_sse = %u\n", util_cpu_caps.has_sse); 834bf215546Sopenharmony_ci printf("util_cpu_caps.has_sse2 = %u\n", util_cpu_caps.has_sse2); 835bf215546Sopenharmony_ci printf("util_cpu_caps.has_sse3 = %u\n", util_cpu_caps.has_sse3); 836bf215546Sopenharmony_ci printf("util_cpu_caps.has_ssse3 = %u\n", util_cpu_caps.has_ssse3); 837bf215546Sopenharmony_ci printf("util_cpu_caps.has_sse4_1 = %u\n", util_cpu_caps.has_sse4_1); 838bf215546Sopenharmony_ci printf("util_cpu_caps.has_sse4_2 = %u\n", util_cpu_caps.has_sse4_2); 839bf215546Sopenharmony_ci printf("util_cpu_caps.has_avx = %u\n", util_cpu_caps.has_avx); 840bf215546Sopenharmony_ci printf("util_cpu_caps.has_avx2 = %u\n", util_cpu_caps.has_avx2); 841bf215546Sopenharmony_ci printf("util_cpu_caps.has_f16c = %u\n", util_cpu_caps.has_f16c); 842bf215546Sopenharmony_ci printf("util_cpu_caps.has_popcnt = %u\n", util_cpu_caps.has_popcnt); 843bf215546Sopenharmony_ci printf("util_cpu_caps.has_3dnow = %u\n", util_cpu_caps.has_3dnow); 844bf215546Sopenharmony_ci printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext); 845bf215546Sopenharmony_ci printf("util_cpu_caps.has_xop = %u\n", util_cpu_caps.has_xop); 846bf215546Sopenharmony_ci printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec); 847bf215546Sopenharmony_ci printf("util_cpu_caps.has_vsx = %u\n", util_cpu_caps.has_vsx); 848bf215546Sopenharmony_ci printf("util_cpu_caps.has_neon = %u\n", util_cpu_caps.has_neon); 849bf215546Sopenharmony_ci printf("util_cpu_caps.has_msa = %u\n", util_cpu_caps.has_msa); 850bf215546Sopenharmony_ci printf("util_cpu_caps.has_daz = %u\n", util_cpu_caps.has_daz); 851bf215546Sopenharmony_ci printf("util_cpu_caps.has_avx512f = %u\n", util_cpu_caps.has_avx512f); 852bf215546Sopenharmony_ci printf("util_cpu_caps.has_avx512dq = %u\n", util_cpu_caps.has_avx512dq); 853bf215546Sopenharmony_ci printf("util_cpu_caps.has_avx512ifma = %u\n", util_cpu_caps.has_avx512ifma); 854bf215546Sopenharmony_ci printf("util_cpu_caps.has_avx512pf = %u\n", util_cpu_caps.has_avx512pf); 855bf215546Sopenharmony_ci printf("util_cpu_caps.has_avx512er = %u\n", util_cpu_caps.has_avx512er); 856bf215546Sopenharmony_ci printf("util_cpu_caps.has_avx512cd = %u\n", util_cpu_caps.has_avx512cd); 857bf215546Sopenharmony_ci printf("util_cpu_caps.has_avx512bw = %u\n", util_cpu_caps.has_avx512bw); 858bf215546Sopenharmony_ci printf("util_cpu_caps.has_avx512vl = %u\n", util_cpu_caps.has_avx512vl); 859bf215546Sopenharmony_ci printf("util_cpu_caps.has_avx512vbmi = %u\n", util_cpu_caps.has_avx512vbmi); 860bf215546Sopenharmony_ci printf("util_cpu_caps.num_L3_caches = %u\n", util_cpu_caps.num_L3_caches); 861bf215546Sopenharmony_ci printf("util_cpu_caps.num_cpu_mask_bits = %u\n", util_cpu_caps.num_cpu_mask_bits); 862bf215546Sopenharmony_ci } 863bf215546Sopenharmony_ci 864bf215546Sopenharmony_ci /* This must happen at the end as it's used to guard everything else */ 865bf215546Sopenharmony_ci p_atomic_set(&util_cpu_caps.detect_done, 1); 866bf215546Sopenharmony_ci} 867bf215546Sopenharmony_ci 868bf215546Sopenharmony_cistatic once_flag cpu_once_flag = ONCE_FLAG_INIT; 869bf215546Sopenharmony_ci 870bf215546Sopenharmony_civoid 871bf215546Sopenharmony_ciutil_cpu_detect(void) 872bf215546Sopenharmony_ci{ 873bf215546Sopenharmony_ci call_once(&cpu_once_flag, util_cpu_detect_once); 874bf215546Sopenharmony_ci} 875