xref: /third_party/mesa3d/src/util/u_cpu_detect.h (revision bf215546)
1/**************************************************************************
2 *
3 * Copyright 2008 Dennis Smit
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * on the rights to use, copy, modify, merge, publish, distribute, sub
10 * license, and/or sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
20 * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 ***************************************************************************/
26
27/**
28 * @file
29 * CPU feature detection.
30 *
31 * @author Dennis Smit
32 * @author Based on the work of Eric Anholt <anholt@FreeBSD.org>
33 */
34
35#ifndef _UTIL_CPU_DETECT_H
36#define _UTIL_CPU_DETECT_H
37
38#include <stdbool.h>
39
40#include "pipe/p_config.h"
41#include "util/u_atomic.h"
42#include "util/u_thread.h"
43
44
45#ifdef __cplusplus
46extern "C" {
47#endif
48
49enum cpu_family {
50   CPU_UNKNOWN,
51
52   CPU_AMD_ZEN1_ZEN2,
53   CPU_AMD_ZEN_HYGON,
54   CPU_AMD_ZEN3,
55   CPU_AMD_ZEN_NEXT,
56   CPU_AMD_LAST,
57
58   CPU_S390X,
59};
60
61typedef uint32_t util_affinity_mask[UTIL_MAX_CPUS / 32];
62
63struct util_cpu_caps_t {
64   /**
65    * Initialized to 0 and set to non-zero with an atomic after the entire
66    * struct has been initialized.
67    */
68   uint32_t detect_done;
69
70   /**
71    * Number of CPUs available to the process.
72    *
73    * This will be less than or equal to \c max_cpus.  This is the number of
74    * CPUs that are online and available to the process.
75    */
76   int16_t nr_cpus;
77
78   /**
79    * Maximum number of CPUs that can be online in the system.
80    *
81    * This will be greater than or equal to \c nr_cpus.  This is the number of
82    * CPUs installed in the system.  \c nr_cpus will be less if some CPUs are
83    * offline.
84    */
85   int16_t max_cpus;
86
87   enum cpu_family family;
88
89   /* Feature flags */
90   int x86_cpu_type;
91   unsigned cacheline;
92
93   unsigned has_intel:1;
94   unsigned has_tsc:1;
95   unsigned has_mmx:1;
96   unsigned has_mmx2:1;
97   unsigned has_sse:1;
98   unsigned has_sse2:1;
99   unsigned has_sse3:1;
100   unsigned has_ssse3:1;
101   unsigned has_sse4_1:1;
102   unsigned has_sse4_2:1;
103   unsigned has_popcnt:1;
104   unsigned has_avx:1;
105   unsigned has_avx2:1;
106   unsigned has_f16c:1;
107   unsigned has_fma:1;
108   unsigned has_3dnow:1;
109   unsigned has_3dnow_ext:1;
110   unsigned has_xop:1;
111   unsigned has_altivec:1;
112   unsigned has_vsx:1;
113   unsigned has_daz:1;
114   unsigned has_neon:1;
115   unsigned has_msa:1;
116
117   unsigned has_avx512f:1;
118   unsigned has_avx512dq:1;
119   unsigned has_avx512ifma:1;
120   unsigned has_avx512pf:1;
121   unsigned has_avx512er:1;
122   unsigned has_avx512cd:1;
123   unsigned has_avx512bw:1;
124   unsigned has_avx512vl:1;
125   unsigned has_avx512vbmi:1;
126
127   unsigned num_L3_caches;
128   unsigned num_cpu_mask_bits;
129
130   uint16_t cpu_to_L3[UTIL_MAX_CPUS];
131   /* Affinity masks for each L3 cache. */
132   util_affinity_mask *L3_affinity_mask;
133};
134
135#define U_CPU_INVALID_L3 0xffff
136
137void util_cpu_detect(void);
138
139static inline ATTRIBUTE_CONST const struct util_cpu_caps_t *
140util_get_cpu_caps(void)
141{
142   extern struct util_cpu_caps_t util_cpu_caps;
143
144   /* On most CPU architectures, an atomic read is simply a regular memory
145    * load instruction with some extra compiler magic to prevent code
146    * re-ordering around it.  The perf impact of doing this check should be
147    * negligible in most cases.
148    *
149    * Also, even though it looks like  a bit of a lie, we've declared this
150    * function with ATTRIBUTE_CONST.  The GCC docs say:
151    *
152    *    "Calls to functions whose return value is not affected by changes to
153    *    the observable state of the program and that have no observable
154    *    effects on such state other than to return a value may lend
155    *    themselves to optimizations such as common subexpression elimination.
156    *    Declaring such functions with the const attribute allows GCC to avoid
157    *    emitting some calls in repeated invocations of the function with the
158    *    same argument values."
159    *
160    * The word "observable" is important here.  With the exception of a
161    * llvmpipe debug flag behind an environment variable and a few unit tests,
162    * all of which emulate worse CPUs, this function neither affects nor is
163    * affected by any "observable" state.  It has its own internal state for
164    * sure, but that state is such that it appears to return exactly the same
165    * value with the same internal data every time.
166    */
167   if (unlikely(!p_atomic_read(&util_cpu_caps.detect_done)))
168      util_cpu_detect();
169
170   return &util_cpu_caps;
171}
172
173#ifdef __cplusplus
174}
175#endif
176
177
178#endif /* _UTIL_CPU_DETECT_H */
179