xref: /third_party/lzma/C/CpuArch.c (revision 370b324c)
1370b324cSopenharmony_ci/* CpuArch.c -- CPU specific code
2370b324cSopenharmony_ci2023-05-18 : Igor Pavlov : Public domain */
3370b324cSopenharmony_ci
4370b324cSopenharmony_ci#include "Precomp.h"
5370b324cSopenharmony_ci
6370b324cSopenharmony_ci// #include <stdio.h>
7370b324cSopenharmony_ci
8370b324cSopenharmony_ci#include "CpuArch.h"
9370b324cSopenharmony_ci
10370b324cSopenharmony_ci#ifdef MY_CPU_X86_OR_AMD64
11370b324cSopenharmony_ci
12370b324cSopenharmony_ci#undef NEED_CHECK_FOR_CPUID
13370b324cSopenharmony_ci#if !defined(MY_CPU_AMD64)
14370b324cSopenharmony_ci#define NEED_CHECK_FOR_CPUID
15370b324cSopenharmony_ci#endif
16370b324cSopenharmony_ci
17370b324cSopenharmony_ci/*
18370b324cSopenharmony_ci  cpuid instruction supports (subFunction) parameter in ECX,
19370b324cSopenharmony_ci  that is used only with some specific (function) parameter values.
20370b324cSopenharmony_ci  But we always use only (subFunction==0).
21370b324cSopenharmony_ci*/
22370b324cSopenharmony_ci/*
23370b324cSopenharmony_ci  __cpuid(): MSVC and GCC/CLANG use same function/macro name
24370b324cSopenharmony_ci             but parameters are different.
25370b324cSopenharmony_ci   We use MSVC __cpuid() parameters style for our z7_x86_cpuid() function.
26370b324cSopenharmony_ci*/
27370b324cSopenharmony_ci
28370b324cSopenharmony_ci#if defined(__GNUC__) /* && (__GNUC__ >= 10) */ \
29370b324cSopenharmony_ci    || defined(__clang__) /* && (__clang_major__ >= 10) */
30370b324cSopenharmony_ci
31370b324cSopenharmony_ci/* there was some CLANG/GCC compilers that have issues with
32370b324cSopenharmony_ci   rbx(ebx) handling in asm blocks in -fPIC mode (__PIC__ is defined).
33370b324cSopenharmony_ci   compiler's <cpuid.h> contains the macro __cpuid() that is similar to our code.
34370b324cSopenharmony_ci   The history of __cpuid() changes in CLANG/GCC:
35370b324cSopenharmony_ci   GCC:
36370b324cSopenharmony_ci     2007: it preserved ebx for (__PIC__ && __i386__)
37370b324cSopenharmony_ci     2013: it preserved rbx and ebx for __PIC__
38370b324cSopenharmony_ci     2014: it doesn't preserves rbx and ebx anymore
39370b324cSopenharmony_ci     we suppose that (__GNUC__ >= 5) fixed that __PIC__ ebx/rbx problem.
40370b324cSopenharmony_ci   CLANG:
41370b324cSopenharmony_ci     2014+: it preserves rbx, but only for 64-bit code. No __PIC__ check.
42370b324cSopenharmony_ci   Why CLANG cares about 64-bit mode only, and doesn't care about ebx (in 32-bit)?
43370b324cSopenharmony_ci   Do we need __PIC__ test for CLANG or we must care about rbx even if
44370b324cSopenharmony_ci   __PIC__ is not defined?
45370b324cSopenharmony_ci*/
46370b324cSopenharmony_ci
47370b324cSopenharmony_ci#define ASM_LN "\n"
48370b324cSopenharmony_ci
49370b324cSopenharmony_ci#if defined(MY_CPU_AMD64) && defined(__PIC__) \
50370b324cSopenharmony_ci    && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
51370b324cSopenharmony_ci
52370b324cSopenharmony_ci#define x86_cpuid_MACRO(p, func) { \
53370b324cSopenharmony_ci  __asm__ __volatile__ ( \
54370b324cSopenharmony_ci    ASM_LN   "mov     %%rbx, %q1"  \
55370b324cSopenharmony_ci    ASM_LN   "cpuid"               \
56370b324cSopenharmony_ci    ASM_LN   "xchg    %%rbx, %q1"  \
57370b324cSopenharmony_ci    : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
58370b324cSopenharmony_ci
59370b324cSopenharmony_ci  /* "=&r" selects free register. It can select even rbx, if that register is free.
60370b324cSopenharmony_ci     "=&D" for (RDI) also works, but the code can be larger with "=&D"
61370b324cSopenharmony_ci     "2"(0) means (subFunction = 0),
62370b324cSopenharmony_ci     2 is (zero-based) index in the output constraint list "=c" (ECX). */
63370b324cSopenharmony_ci
64370b324cSopenharmony_ci#elif defined(MY_CPU_X86) && defined(__PIC__) \
65370b324cSopenharmony_ci    && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
66370b324cSopenharmony_ci
67370b324cSopenharmony_ci#define x86_cpuid_MACRO(p, func) { \
68370b324cSopenharmony_ci  __asm__ __volatile__ ( \
69370b324cSopenharmony_ci    ASM_LN   "mov     %%ebx, %k1"  \
70370b324cSopenharmony_ci    ASM_LN   "cpuid"               \
71370b324cSopenharmony_ci    ASM_LN   "xchg    %%ebx, %k1"  \
72370b324cSopenharmony_ci    : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
73370b324cSopenharmony_ci
74370b324cSopenharmony_ci#else
75370b324cSopenharmony_ci
76370b324cSopenharmony_ci#define x86_cpuid_MACRO(p, func) { \
77370b324cSopenharmony_ci  __asm__ __volatile__ ( \
78370b324cSopenharmony_ci    ASM_LN   "cpuid"               \
79370b324cSopenharmony_ci    : "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
80370b324cSopenharmony_ci
81370b324cSopenharmony_ci#endif
82370b324cSopenharmony_ci
83370b324cSopenharmony_ci
84370b324cSopenharmony_civoid Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
85370b324cSopenharmony_ci{
86370b324cSopenharmony_ci  x86_cpuid_MACRO(p, func)
87370b324cSopenharmony_ci}
88370b324cSopenharmony_ci
89370b324cSopenharmony_ci
90370b324cSopenharmony_ciZ7_NO_INLINE
91370b324cSopenharmony_ciUInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
92370b324cSopenharmony_ci{
93370b324cSopenharmony_ci #if defined(NEED_CHECK_FOR_CPUID)
94370b324cSopenharmony_ci  #define EFALGS_CPUID_BIT 21
95370b324cSopenharmony_ci  UInt32 a;
96370b324cSopenharmony_ci  __asm__ __volatile__ (
97370b324cSopenharmony_ci    ASM_LN   "pushf"
98370b324cSopenharmony_ci    ASM_LN   "pushf"
99370b324cSopenharmony_ci    ASM_LN   "pop     %0"
100370b324cSopenharmony_ci    // ASM_LN   "movl    %0, %1"
101370b324cSopenharmony_ci    // ASM_LN   "xorl    $0x200000, %0"
102370b324cSopenharmony_ci    ASM_LN   "btc     %1, %0"
103370b324cSopenharmony_ci    ASM_LN   "push    %0"
104370b324cSopenharmony_ci    ASM_LN   "popf"
105370b324cSopenharmony_ci    ASM_LN   "pushf"
106370b324cSopenharmony_ci    ASM_LN   "pop     %0"
107370b324cSopenharmony_ci    ASM_LN   "xorl    (%%esp), %0"
108370b324cSopenharmony_ci
109370b324cSopenharmony_ci    ASM_LN   "popf"
110370b324cSopenharmony_ci    ASM_LN
111370b324cSopenharmony_ci    : "=&r" (a) // "=a"
112370b324cSopenharmony_ci    : "i" (EFALGS_CPUID_BIT)
113370b324cSopenharmony_ci    );
114370b324cSopenharmony_ci  if ((a & (1 << EFALGS_CPUID_BIT)) == 0)
115370b324cSopenharmony_ci    return 0;
116370b324cSopenharmony_ci #endif
117370b324cSopenharmony_ci  {
118370b324cSopenharmony_ci    UInt32 p[4];
119370b324cSopenharmony_ci    x86_cpuid_MACRO(p, 0)
120370b324cSopenharmony_ci    return p[0];
121370b324cSopenharmony_ci  }
122370b324cSopenharmony_ci}
123370b324cSopenharmony_ci
124370b324cSopenharmony_ci#undef ASM_LN
125370b324cSopenharmony_ci
126370b324cSopenharmony_ci#elif !defined(_MSC_VER)
127370b324cSopenharmony_ci
128370b324cSopenharmony_ci/*
129370b324cSopenharmony_ci// for gcc/clang and other: we can try to use __cpuid macro:
130370b324cSopenharmony_ci#include <cpuid.h>
131370b324cSopenharmony_civoid Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
132370b324cSopenharmony_ci{
133370b324cSopenharmony_ci  __cpuid(func, p[0], p[1], p[2], p[3]);
134370b324cSopenharmony_ci}
135370b324cSopenharmony_ciUInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
136370b324cSopenharmony_ci{
137370b324cSopenharmony_ci  return (UInt32)__get_cpuid_max(0, NULL);
138370b324cSopenharmony_ci}
139370b324cSopenharmony_ci*/
140370b324cSopenharmony_ci// for unsupported cpuid:
141370b324cSopenharmony_civoid Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
142370b324cSopenharmony_ci{
143370b324cSopenharmony_ci  UNUSED_VAR(func)
144370b324cSopenharmony_ci  p[0] = p[1] = p[2] = p[3] = 0;
145370b324cSopenharmony_ci}
146370b324cSopenharmony_ciUInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
147370b324cSopenharmony_ci{
148370b324cSopenharmony_ci  return 0;
149370b324cSopenharmony_ci}
150370b324cSopenharmony_ci
151370b324cSopenharmony_ci#else // _MSC_VER
152370b324cSopenharmony_ci
153370b324cSopenharmony_ci#if !defined(MY_CPU_AMD64)
154370b324cSopenharmony_ci
155370b324cSopenharmony_ciUInt32 __declspec(naked) Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
156370b324cSopenharmony_ci{
157370b324cSopenharmony_ci  #if defined(NEED_CHECK_FOR_CPUID)
158370b324cSopenharmony_ci  #define EFALGS_CPUID_BIT 21
159370b324cSopenharmony_ci  __asm   pushfd
160370b324cSopenharmony_ci  __asm   pushfd
161370b324cSopenharmony_ci  /*
162370b324cSopenharmony_ci  __asm   pop     eax
163370b324cSopenharmony_ci  // __asm   mov     edx, eax
164370b324cSopenharmony_ci  __asm   btc     eax, EFALGS_CPUID_BIT
165370b324cSopenharmony_ci  __asm   push    eax
166370b324cSopenharmony_ci  */
167370b324cSopenharmony_ci  __asm   btc     dword ptr [esp], EFALGS_CPUID_BIT
168370b324cSopenharmony_ci  __asm   popfd
169370b324cSopenharmony_ci  __asm   pushfd
170370b324cSopenharmony_ci  __asm   pop     eax
171370b324cSopenharmony_ci  // __asm   xor     eax, edx
172370b324cSopenharmony_ci  __asm   xor     eax, [esp]
173370b324cSopenharmony_ci  // __asm   push    edx
174370b324cSopenharmony_ci  __asm   popfd
175370b324cSopenharmony_ci  __asm   and     eax, (1 shl EFALGS_CPUID_BIT)
176370b324cSopenharmony_ci  __asm   jz end_func
177370b324cSopenharmony_ci  #endif
178370b324cSopenharmony_ci  __asm   push    ebx
179370b324cSopenharmony_ci  __asm   xor     eax, eax    // func
180370b324cSopenharmony_ci  __asm   xor     ecx, ecx    // subFunction (optional) for (func == 0)
181370b324cSopenharmony_ci  __asm   cpuid
182370b324cSopenharmony_ci  __asm   pop     ebx
183370b324cSopenharmony_ci  #if defined(NEED_CHECK_FOR_CPUID)
184370b324cSopenharmony_ci  end_func:
185370b324cSopenharmony_ci  #endif
186370b324cSopenharmony_ci  __asm   ret 0
187370b324cSopenharmony_ci}
188370b324cSopenharmony_ci
189370b324cSopenharmony_civoid __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
190370b324cSopenharmony_ci{
191370b324cSopenharmony_ci  UNUSED_VAR(p)
192370b324cSopenharmony_ci  UNUSED_VAR(func)
193370b324cSopenharmony_ci  __asm   push    ebx
194370b324cSopenharmony_ci  __asm   push    edi
195370b324cSopenharmony_ci  __asm   mov     edi, ecx    // p
196370b324cSopenharmony_ci  __asm   mov     eax, edx    // func
197370b324cSopenharmony_ci  __asm   xor     ecx, ecx    // subfunction (optional) for (func == 0)
198370b324cSopenharmony_ci  __asm   cpuid
199370b324cSopenharmony_ci  __asm   mov     [edi     ], eax
200370b324cSopenharmony_ci  __asm   mov     [edi +  4], ebx
201370b324cSopenharmony_ci  __asm   mov     [edi +  8], ecx
202370b324cSopenharmony_ci  __asm   mov     [edi + 12], edx
203370b324cSopenharmony_ci  __asm   pop     edi
204370b324cSopenharmony_ci  __asm   pop     ebx
205370b324cSopenharmony_ci  __asm   ret     0
206370b324cSopenharmony_ci}
207370b324cSopenharmony_ci
208370b324cSopenharmony_ci#else // MY_CPU_AMD64
209370b324cSopenharmony_ci
210370b324cSopenharmony_ci    #if _MSC_VER >= 1600
211370b324cSopenharmony_ci      #include <intrin.h>
212370b324cSopenharmony_ci      #define MY_cpuidex  __cpuidex
213370b324cSopenharmony_ci    #else
214370b324cSopenharmony_ci/*
215370b324cSopenharmony_ci __cpuid (func == (0 or 7)) requires subfunction number in ECX.
216370b324cSopenharmony_ci  MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
217370b324cSopenharmony_ci   __cpuid() in new MSVC clears ECX.
218370b324cSopenharmony_ci   __cpuid() in old MSVC (14.00) x64 doesn't clear ECX
219370b324cSopenharmony_ci We still can use __cpuid for low (func) values that don't require ECX,
220370b324cSopenharmony_ci but __cpuid() in old MSVC will be incorrect for some func values: (func == 7).
221370b324cSopenharmony_ci So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
222370b324cSopenharmony_ci where ECX value is first parameter for FASTCALL / NO_INLINE func,
223370b324cSopenharmony_ci So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and
224370b324cSopenharmony_ci old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
225370b324cSopenharmony_ci
226370b324cSopenharmony_ciDON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!!
227370b324cSopenharmony_ci*/
228370b324cSopenharmony_cistatic
229370b324cSopenharmony_ciZ7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(UInt32 subFunction, UInt32 func, int *CPUInfo)
230370b324cSopenharmony_ci{
231370b324cSopenharmony_ci  UNUSED_VAR(subFunction)
232370b324cSopenharmony_ci  __cpuid(CPUInfo, func);
233370b324cSopenharmony_ci}
234370b324cSopenharmony_ci      #define MY_cpuidex(info, func, func2)  MY_cpuidex_HACK(func2, func, info)
235370b324cSopenharmony_ci      #pragma message("======== MY_cpuidex_HACK WAS USED ========")
236370b324cSopenharmony_ci    #endif // _MSC_VER >= 1600
237370b324cSopenharmony_ci
238370b324cSopenharmony_ci#if !defined(MY_CPU_AMD64)
239370b324cSopenharmony_ci/* inlining for __cpuid() in MSVC x86 (32-bit) produces big ineffective code,
240370b324cSopenharmony_ci   so we disable inlining here */
241370b324cSopenharmony_ciZ7_NO_INLINE
242370b324cSopenharmony_ci#endif
243370b324cSopenharmony_civoid Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
244370b324cSopenharmony_ci{
245370b324cSopenharmony_ci  MY_cpuidex((int *)p, (int)func, 0);
246370b324cSopenharmony_ci}
247370b324cSopenharmony_ci
248370b324cSopenharmony_ciZ7_NO_INLINE
249370b324cSopenharmony_ciUInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
250370b324cSopenharmony_ci{
251370b324cSopenharmony_ci  int a[4];
252370b324cSopenharmony_ci  MY_cpuidex(a, 0, 0);
253370b324cSopenharmony_ci  return a[0];
254370b324cSopenharmony_ci}
255370b324cSopenharmony_ci
256370b324cSopenharmony_ci#endif // MY_CPU_AMD64
257370b324cSopenharmony_ci#endif // _MSC_VER
258370b324cSopenharmony_ci
259370b324cSopenharmony_ci#if defined(NEED_CHECK_FOR_CPUID)
260370b324cSopenharmony_ci#define CHECK_CPUID_IS_SUPPORTED { if (z7_x86_cpuid_GetMaxFunc() == 0) return 0; }
261370b324cSopenharmony_ci#else
262370b324cSopenharmony_ci#define CHECK_CPUID_IS_SUPPORTED
263370b324cSopenharmony_ci#endif
264370b324cSopenharmony_ci#undef NEED_CHECK_FOR_CPUID
265370b324cSopenharmony_ci
266370b324cSopenharmony_ci
267370b324cSopenharmony_cistatic
268370b324cSopenharmony_ciBoolInt x86cpuid_Func_1(UInt32 *p)
269370b324cSopenharmony_ci{
270370b324cSopenharmony_ci  CHECK_CPUID_IS_SUPPORTED
271370b324cSopenharmony_ci  z7_x86_cpuid(p, 1);
272370b324cSopenharmony_ci  return True;
273370b324cSopenharmony_ci}
274370b324cSopenharmony_ci
275370b324cSopenharmony_ci/*
276370b324cSopenharmony_cistatic const UInt32 kVendors[][1] =
277370b324cSopenharmony_ci{
278370b324cSopenharmony_ci  { 0x756E6547 }, // , 0x49656E69, 0x6C65746E },
279370b324cSopenharmony_ci  { 0x68747541 }, // , 0x69746E65, 0x444D4163 },
280370b324cSopenharmony_ci  { 0x746E6543 }  // , 0x48727561, 0x736C7561 }
281370b324cSopenharmony_ci};
282370b324cSopenharmony_ci*/
283370b324cSopenharmony_ci
284370b324cSopenharmony_ci/*
285370b324cSopenharmony_citypedef struct
286370b324cSopenharmony_ci{
287370b324cSopenharmony_ci  UInt32 maxFunc;
288370b324cSopenharmony_ci  UInt32 vendor[3];
289370b324cSopenharmony_ci  UInt32 ver;
290370b324cSopenharmony_ci  UInt32 b;
291370b324cSopenharmony_ci  UInt32 c;
292370b324cSopenharmony_ci  UInt32 d;
293370b324cSopenharmony_ci} Cx86cpuid;
294370b324cSopenharmony_ci
295370b324cSopenharmony_cienum
296370b324cSopenharmony_ci{
297370b324cSopenharmony_ci  CPU_FIRM_INTEL,
298370b324cSopenharmony_ci  CPU_FIRM_AMD,
299370b324cSopenharmony_ci  CPU_FIRM_VIA
300370b324cSopenharmony_ci};
301370b324cSopenharmony_ciint x86cpuid_GetFirm(const Cx86cpuid *p);
302370b324cSopenharmony_ci#define x86cpuid_ver_GetFamily(ver) (((ver >> 16) & 0xff0) | ((ver >> 8) & 0xf))
303370b324cSopenharmony_ci#define x86cpuid_ver_GetModel(ver)  (((ver >> 12) &  0xf0) | ((ver >> 4) & 0xf))
304370b324cSopenharmony_ci#define x86cpuid_ver_GetStepping(ver) (ver & 0xf)
305370b324cSopenharmony_ci
306370b324cSopenharmony_ciint x86cpuid_GetFirm(const Cx86cpuid *p)
307370b324cSopenharmony_ci{
308370b324cSopenharmony_ci  unsigned i;
309370b324cSopenharmony_ci  for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[0]); i++)
310370b324cSopenharmony_ci  {
311370b324cSopenharmony_ci    const UInt32 *v = kVendors[i];
312370b324cSopenharmony_ci    if (v[0] == p->vendor[0]
313370b324cSopenharmony_ci        // && v[1] == p->vendor[1]
314370b324cSopenharmony_ci        // && v[2] == p->vendor[2]
315370b324cSopenharmony_ci        )
316370b324cSopenharmony_ci      return (int)i;
317370b324cSopenharmony_ci  }
318370b324cSopenharmony_ci  return -1;
319370b324cSopenharmony_ci}
320370b324cSopenharmony_ci
321370b324cSopenharmony_ciBoolInt CPU_Is_InOrder()
322370b324cSopenharmony_ci{
323370b324cSopenharmony_ci  Cx86cpuid p;
324370b324cSopenharmony_ci  UInt32 family, model;
325370b324cSopenharmony_ci  if (!x86cpuid_CheckAndRead(&p))
326370b324cSopenharmony_ci    return True;
327370b324cSopenharmony_ci
328370b324cSopenharmony_ci  family = x86cpuid_ver_GetFamily(p.ver);
329370b324cSopenharmony_ci  model = x86cpuid_ver_GetModel(p.ver);
330370b324cSopenharmony_ci
331370b324cSopenharmony_ci  switch (x86cpuid_GetFirm(&p))
332370b324cSopenharmony_ci  {
333370b324cSopenharmony_ci    case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && (
334370b324cSopenharmony_ci        // In-Order Atom CPU
335370b324cSopenharmony_ci           model == 0x1C  // 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330
336370b324cSopenharmony_ci        || model == 0x26  // 45 nm, Z6xx
337370b324cSopenharmony_ci        || model == 0x27  // 32 nm, Z2460
338370b324cSopenharmony_ci        || model == 0x35  // 32 nm, Z2760
339370b324cSopenharmony_ci        || model == 0x36  // 32 nm, N2xxx, D2xxx
340370b324cSopenharmony_ci        )));
341370b324cSopenharmony_ci    case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA)));
342370b324cSopenharmony_ci    case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF));
343370b324cSopenharmony_ci  }
344370b324cSopenharmony_ci  return False; // v23 : unknown processors are not In-Order
345370b324cSopenharmony_ci}
346370b324cSopenharmony_ci*/
347370b324cSopenharmony_ci
348370b324cSopenharmony_ci#ifdef _WIN32
349370b324cSopenharmony_ci#include "7zWindows.h"
350370b324cSopenharmony_ci#endif
351370b324cSopenharmony_ci
352370b324cSopenharmony_ci#if !defined(MY_CPU_AMD64) && defined(_WIN32)
353370b324cSopenharmony_ci
354370b324cSopenharmony_ci/* for legacy SSE ia32: there is no user-space cpu instruction to check
355370b324cSopenharmony_ci   that OS supports SSE register storing/restoring on context switches.
356370b324cSopenharmony_ci   So we need some OS-specific function to check that it's safe to use SSE registers.
357370b324cSopenharmony_ci*/
358370b324cSopenharmony_ci
359370b324cSopenharmony_ciZ7_FORCE_INLINE
360370b324cSopenharmony_cistatic BoolInt CPU_Sys_Is_SSE_Supported(void)
361370b324cSopenharmony_ci{
362370b324cSopenharmony_ci#ifdef _MSC_VER
363370b324cSopenharmony_ci  #pragma warning(push)
364370b324cSopenharmony_ci  #pragma warning(disable : 4996) // `GetVersion': was declared deprecated
365370b324cSopenharmony_ci#endif
366370b324cSopenharmony_ci  /* low byte is major version of Windows
367370b324cSopenharmony_ci     We suppose that any Windows version since
368370b324cSopenharmony_ci     Windows2000 (major == 5) supports SSE registers */
369370b324cSopenharmony_ci  return (Byte)GetVersion() >= 5;
370370b324cSopenharmony_ci#if defined(_MSC_VER)
371370b324cSopenharmony_ci  #pragma warning(pop)
372370b324cSopenharmony_ci#endif
373370b324cSopenharmony_ci}
374370b324cSopenharmony_ci#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False;
375370b324cSopenharmony_ci#else
376370b324cSopenharmony_ci#define CHECK_SYS_SSE_SUPPORT
377370b324cSopenharmony_ci#endif
378370b324cSopenharmony_ci
379370b324cSopenharmony_ci
380370b324cSopenharmony_ci#if !defined(MY_CPU_AMD64)
381370b324cSopenharmony_ci
382370b324cSopenharmony_ciBoolInt CPU_IsSupported_CMOV(void)
383370b324cSopenharmony_ci{
384370b324cSopenharmony_ci  UInt32 a[4];
385370b324cSopenharmony_ci  if (!x86cpuid_Func_1(&a[0]))
386370b324cSopenharmony_ci    return 0;
387370b324cSopenharmony_ci  return (a[3] >> 15) & 1;
388370b324cSopenharmony_ci}
389370b324cSopenharmony_ci
390370b324cSopenharmony_ciBoolInt CPU_IsSupported_SSE(void)
391370b324cSopenharmony_ci{
392370b324cSopenharmony_ci  UInt32 a[4];
393370b324cSopenharmony_ci  CHECK_SYS_SSE_SUPPORT
394370b324cSopenharmony_ci  if (!x86cpuid_Func_1(&a[0]))
395370b324cSopenharmony_ci    return 0;
396370b324cSopenharmony_ci  return (a[3] >> 25) & 1;
397370b324cSopenharmony_ci}
398370b324cSopenharmony_ci
399370b324cSopenharmony_ciBoolInt CPU_IsSupported_SSE2(void)
400370b324cSopenharmony_ci{
401370b324cSopenharmony_ci  UInt32 a[4];
402370b324cSopenharmony_ci  CHECK_SYS_SSE_SUPPORT
403370b324cSopenharmony_ci  if (!x86cpuid_Func_1(&a[0]))
404370b324cSopenharmony_ci    return 0;
405370b324cSopenharmony_ci  return (a[3] >> 26) & 1;
406370b324cSopenharmony_ci}
407370b324cSopenharmony_ci
408370b324cSopenharmony_ci#endif
409370b324cSopenharmony_ci
410370b324cSopenharmony_ci
411370b324cSopenharmony_cistatic UInt32 x86cpuid_Func_1_ECX(void)
412370b324cSopenharmony_ci{
413370b324cSopenharmony_ci  UInt32 a[4];
414370b324cSopenharmony_ci  CHECK_SYS_SSE_SUPPORT
415370b324cSopenharmony_ci  if (!x86cpuid_Func_1(&a[0]))
416370b324cSopenharmony_ci    return 0;
417370b324cSopenharmony_ci  return a[2];
418370b324cSopenharmony_ci}
419370b324cSopenharmony_ci
420370b324cSopenharmony_ciBoolInt CPU_IsSupported_AES(void)
421370b324cSopenharmony_ci{
422370b324cSopenharmony_ci  return (x86cpuid_Func_1_ECX() >> 25) & 1;
423370b324cSopenharmony_ci}
424370b324cSopenharmony_ci
425370b324cSopenharmony_ciBoolInt CPU_IsSupported_SSSE3(void)
426370b324cSopenharmony_ci{
427370b324cSopenharmony_ci  return (x86cpuid_Func_1_ECX() >> 9) & 1;
428370b324cSopenharmony_ci}
429370b324cSopenharmony_ci
430370b324cSopenharmony_ciBoolInt CPU_IsSupported_SSE41(void)
431370b324cSopenharmony_ci{
432370b324cSopenharmony_ci  return (x86cpuid_Func_1_ECX() >> 19) & 1;
433370b324cSopenharmony_ci}
434370b324cSopenharmony_ci
435370b324cSopenharmony_ciBoolInt CPU_IsSupported_SHA(void)
436370b324cSopenharmony_ci{
437370b324cSopenharmony_ci  CHECK_SYS_SSE_SUPPORT
438370b324cSopenharmony_ci
439370b324cSopenharmony_ci  if (z7_x86_cpuid_GetMaxFunc() < 7)
440370b324cSopenharmony_ci    return False;
441370b324cSopenharmony_ci  {
442370b324cSopenharmony_ci    UInt32 d[4];
443370b324cSopenharmony_ci    z7_x86_cpuid(d, 7);
444370b324cSopenharmony_ci    return (d[1] >> 29) & 1;
445370b324cSopenharmony_ci  }
446370b324cSopenharmony_ci}
447370b324cSopenharmony_ci
448370b324cSopenharmony_ci/*
449370b324cSopenharmony_ciMSVC: _xgetbv() intrinsic is available since VS2010SP1.
450370b324cSopenharmony_ci   MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in
451370b324cSopenharmony_ci   <immintrin.h> that we can use or check.
452370b324cSopenharmony_ci   For any 32-bit x86 we can use asm code in MSVC,
453370b324cSopenharmony_ci   but MSVC asm code is huge after compilation.
454370b324cSopenharmony_ci   So _xgetbv() is better
455370b324cSopenharmony_ci
456370b324cSopenharmony_ciICC: _xgetbv() intrinsic is available (in what version of ICC?)
457370b324cSopenharmony_ci   ICC defines (__GNUC___) and it supports gnu assembler
458370b324cSopenharmony_ci   also ICC supports MASM style code with -use-msasm switch.
459370b324cSopenharmony_ci   but ICC doesn't support __attribute__((__target__))
460370b324cSopenharmony_ci
461370b324cSopenharmony_ciGCC/CLANG 9:
462370b324cSopenharmony_ci  _xgetbv() is macro that works via __builtin_ia32_xgetbv()
463370b324cSopenharmony_ci  and we need __attribute__((__target__("xsave")).
464370b324cSopenharmony_ci  But with __target__("xsave") the function will be not
465370b324cSopenharmony_ci  inlined to function that has no __target__("xsave") attribute.
466370b324cSopenharmony_ci  If we want _xgetbv() call inlining, then we should use asm version
467370b324cSopenharmony_ci  instead of calling _xgetbv().
468370b324cSopenharmony_ci  Note:intrinsic is broke before GCC 8.2:
469370b324cSopenharmony_ci    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85684
470370b324cSopenharmony_ci*/
471370b324cSopenharmony_ci
472370b324cSopenharmony_ci#if    defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1100) \
473370b324cSopenharmony_ci    || defined(_MSC_VER) && (_MSC_VER >= 1600) && (_MSC_FULL_VER >= 160040219)  \
474370b324cSopenharmony_ci    || defined(__GNUC__) && (__GNUC__ >= 9) \
475370b324cSopenharmony_ci    || defined(__clang__) && (__clang_major__ >= 9)
476370b324cSopenharmony_ci// we define ATTRIB_XGETBV, if we want to use predefined _xgetbv() from compiler
477370b324cSopenharmony_ci#if defined(__INTEL_COMPILER)
478370b324cSopenharmony_ci#define ATTRIB_XGETBV
479370b324cSopenharmony_ci#elif defined(__GNUC__) || defined(__clang__)
480370b324cSopenharmony_ci// we don't define ATTRIB_XGETBV here, because asm version is better for inlining.
481370b324cSopenharmony_ci// #define ATTRIB_XGETBV __attribute__((__target__("xsave")))
482370b324cSopenharmony_ci#else
483370b324cSopenharmony_ci#define ATTRIB_XGETBV
484370b324cSopenharmony_ci#endif
485370b324cSopenharmony_ci#endif
486370b324cSopenharmony_ci
487370b324cSopenharmony_ci#if defined(ATTRIB_XGETBV)
488370b324cSopenharmony_ci#include <immintrin.h>
489370b324cSopenharmony_ci#endif
490370b324cSopenharmony_ci
491370b324cSopenharmony_ci
492370b324cSopenharmony_ci// XFEATURE_ENABLED_MASK/XCR0
493370b324cSopenharmony_ci#define MY_XCR_XFEATURE_ENABLED_MASK 0
494370b324cSopenharmony_ci
495370b324cSopenharmony_ci#if defined(ATTRIB_XGETBV)
496370b324cSopenharmony_ciATTRIB_XGETBV
497370b324cSopenharmony_ci#endif
498370b324cSopenharmony_cistatic UInt64 x86_xgetbv_0(UInt32 num)
499370b324cSopenharmony_ci{
500370b324cSopenharmony_ci#if defined(ATTRIB_XGETBV)
501370b324cSopenharmony_ci  {
502370b324cSopenharmony_ci    return
503370b324cSopenharmony_ci      #if (defined(_MSC_VER))
504370b324cSopenharmony_ci        _xgetbv(num);
505370b324cSopenharmony_ci      #else
506370b324cSopenharmony_ci        __builtin_ia32_xgetbv(
507370b324cSopenharmony_ci          #if !defined(__clang__)
508370b324cSopenharmony_ci            (int)
509370b324cSopenharmony_ci          #endif
510370b324cSopenharmony_ci            num);
511370b324cSopenharmony_ci      #endif
512370b324cSopenharmony_ci  }
513370b324cSopenharmony_ci
514370b324cSopenharmony_ci#elif defined(__GNUC__) || defined(__clang__) || defined(__SUNPRO_CC)
515370b324cSopenharmony_ci
516370b324cSopenharmony_ci  UInt32 a, d;
517370b324cSopenharmony_ci #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
518370b324cSopenharmony_ci  __asm__
519370b324cSopenharmony_ci  (
520370b324cSopenharmony_ci    "xgetbv"
521370b324cSopenharmony_ci    : "=a"(a), "=d"(d) : "c"(num) : "cc"
522370b324cSopenharmony_ci  );
523370b324cSopenharmony_ci #else // is old gcc
524370b324cSopenharmony_ci  __asm__
525370b324cSopenharmony_ci  (
526370b324cSopenharmony_ci    ".byte 0x0f, 0x01, 0xd0" "\n\t"
527370b324cSopenharmony_ci    : "=a"(a), "=d"(d) : "c"(num) : "cc"
528370b324cSopenharmony_ci  );
529370b324cSopenharmony_ci #endif
530370b324cSopenharmony_ci  return ((UInt64)d << 32) | a;
531370b324cSopenharmony_ci  // return a;
532370b324cSopenharmony_ci
533370b324cSopenharmony_ci#elif defined(_MSC_VER) && !defined(MY_CPU_AMD64)
534370b324cSopenharmony_ci
535370b324cSopenharmony_ci  UInt32 a, d;
536370b324cSopenharmony_ci  __asm {
537370b324cSopenharmony_ci    push eax
538370b324cSopenharmony_ci    push edx
539370b324cSopenharmony_ci    push ecx
540370b324cSopenharmony_ci    mov ecx, num;
541370b324cSopenharmony_ci    // xor ecx, ecx // = MY_XCR_XFEATURE_ENABLED_MASK
542370b324cSopenharmony_ci    _emit 0x0f
543370b324cSopenharmony_ci    _emit 0x01
544370b324cSopenharmony_ci    _emit 0xd0
545370b324cSopenharmony_ci    mov a, eax
546370b324cSopenharmony_ci    mov d, edx
547370b324cSopenharmony_ci    pop ecx
548370b324cSopenharmony_ci    pop edx
549370b324cSopenharmony_ci    pop eax
550370b324cSopenharmony_ci  }
551370b324cSopenharmony_ci  return ((UInt64)d << 32) | a;
552370b324cSopenharmony_ci  // return a;
553370b324cSopenharmony_ci
554370b324cSopenharmony_ci#else // it's unknown compiler
555370b324cSopenharmony_ci  // #error "Need xgetbv function"
556370b324cSopenharmony_ci  UNUSED_VAR(num)
557370b324cSopenharmony_ci  // for MSVC-X64 we could call external function from external file.
558370b324cSopenharmony_ci  /* Actually we had checked OSXSAVE/AVX in cpuid before.
559370b324cSopenharmony_ci     So it's expected that OS supports at least AVX and below. */
560370b324cSopenharmony_ci  // if (num != MY_XCR_XFEATURE_ENABLED_MASK) return 0; // if not XCR0
561370b324cSopenharmony_ci  return
562370b324cSopenharmony_ci      // (1 << 0) |  // x87
563370b324cSopenharmony_ci        (1 << 1)   // SSE
564370b324cSopenharmony_ci      | (1 << 2);  // AVX
565370b324cSopenharmony_ci
566370b324cSopenharmony_ci#endif
567370b324cSopenharmony_ci}
568370b324cSopenharmony_ci
569370b324cSopenharmony_ci#ifdef _WIN32
570370b324cSopenharmony_ci/*
571370b324cSopenharmony_ci  Windows versions do not know about new ISA extensions that
572370b324cSopenharmony_ci  can be introduced. But we still can use new extensions,
573370b324cSopenharmony_ci  even if Windows doesn't report about supporting them,
574370b324cSopenharmony_ci  But we can use new extensions, only if Windows knows about new ISA extension
575370b324cSopenharmony_ci  that changes the number or size of registers: SSE, AVX/XSAVE, AVX512
576370b324cSopenharmony_ci  So it's enough to check
577370b324cSopenharmony_ci    MY_PF_AVX_INSTRUCTIONS_AVAILABLE
578370b324cSopenharmony_ci      instead of
579370b324cSopenharmony_ci    MY_PF_AVX2_INSTRUCTIONS_AVAILABLE
580370b324cSopenharmony_ci*/
581370b324cSopenharmony_ci#define MY_PF_XSAVE_ENABLED                            17
582370b324cSopenharmony_ci// #define MY_PF_SSSE3_INSTRUCTIONS_AVAILABLE             36
583370b324cSopenharmony_ci// #define MY_PF_SSE4_1_INSTRUCTIONS_AVAILABLE            37
584370b324cSopenharmony_ci// #define MY_PF_SSE4_2_INSTRUCTIONS_AVAILABLE            38
585370b324cSopenharmony_ci// #define MY_PF_AVX_INSTRUCTIONS_AVAILABLE               39
586370b324cSopenharmony_ci// #define MY_PF_AVX2_INSTRUCTIONS_AVAILABLE              40
587370b324cSopenharmony_ci// #define MY_PF_AVX512F_INSTRUCTIONS_AVAILABLE           41
588370b324cSopenharmony_ci#endif
589370b324cSopenharmony_ci
590370b324cSopenharmony_ciBoolInt CPU_IsSupported_AVX(void)
591370b324cSopenharmony_ci{
592370b324cSopenharmony_ci  #ifdef _WIN32
593370b324cSopenharmony_ci  if (!IsProcessorFeaturePresent(MY_PF_XSAVE_ENABLED))
594370b324cSopenharmony_ci    return False;
595370b324cSopenharmony_ci  /* PF_AVX_INSTRUCTIONS_AVAILABLE probably is supported starting from
596370b324cSopenharmony_ci     some latest Win10 revisions. But we need AVX in older Windows also.
597370b324cSopenharmony_ci     So we don't use the following check: */
598370b324cSopenharmony_ci  /*
599370b324cSopenharmony_ci  if (!IsProcessorFeaturePresent(MY_PF_AVX_INSTRUCTIONS_AVAILABLE))
600370b324cSopenharmony_ci    return False;
601370b324cSopenharmony_ci  */
602370b324cSopenharmony_ci  #endif
603370b324cSopenharmony_ci
604370b324cSopenharmony_ci  /*
605370b324cSopenharmony_ci    OS must use new special XSAVE/XRSTOR instructions to save
606370b324cSopenharmony_ci    AVX registers when it required for context switching.
607370b324cSopenharmony_ci    At OS statring:
608370b324cSopenharmony_ci      OS sets CR4.OSXSAVE flag to signal the processor that OS supports the XSAVE extensions.
609370b324cSopenharmony_ci      Also OS sets bitmask in XCR0 register that defines what
610370b324cSopenharmony_ci      registers will be processed by XSAVE instruction:
611370b324cSopenharmony_ci        XCR0.SSE[bit 0] - x87 registers and state
612370b324cSopenharmony_ci        XCR0.SSE[bit 1] - SSE registers and state
613370b324cSopenharmony_ci        XCR0.AVX[bit 2] - AVX registers and state
614370b324cSopenharmony_ci    CR4.OSXSAVE is reflected to CPUID.1:ECX.OSXSAVE[bit 27].
615370b324cSopenharmony_ci       So we can read that bit in user-space.
616370b324cSopenharmony_ci    XCR0 is available for reading in user-space by new XGETBV instruction.
617370b324cSopenharmony_ci  */
618370b324cSopenharmony_ci  {
619370b324cSopenharmony_ci    const UInt32 c = x86cpuid_Func_1_ECX();
620370b324cSopenharmony_ci    if (0 == (1
621370b324cSopenharmony_ci        & (c >> 28)   // AVX instructions are supported by hardware
622370b324cSopenharmony_ci        & (c >> 27))) // OSXSAVE bit: XSAVE and related instructions are enabled by OS.
623370b324cSopenharmony_ci      return False;
624370b324cSopenharmony_ci  }
625370b324cSopenharmony_ci
626370b324cSopenharmony_ci  /* also we can check
627370b324cSopenharmony_ci     CPUID.1:ECX.XSAVE [bit 26] : that shows that
628370b324cSopenharmony_ci        XSAVE, XRESTOR, XSETBV, XGETBV instructions are supported by hardware.
629370b324cSopenharmony_ci     But that check is redundant, because if OSXSAVE bit is set, then XSAVE is also set */
630370b324cSopenharmony_ci
631370b324cSopenharmony_ci  /* If OS have enabled XSAVE extension instructions (OSXSAVE == 1),
632370b324cSopenharmony_ci     in most cases we expect that OS also will support storing/restoring
633370b324cSopenharmony_ci     for AVX and SSE states at least.
634370b324cSopenharmony_ci     But to be ensure for that we call user-space instruction
635370b324cSopenharmony_ci     XGETBV(0) to get XCR0 value that contains bitmask that defines
636370b324cSopenharmony_ci     what exact states(registers) OS have enabled for storing/restoring.
637370b324cSopenharmony_ci  */
638370b324cSopenharmony_ci
639370b324cSopenharmony_ci  {
640370b324cSopenharmony_ci    const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK);
641370b324cSopenharmony_ci    // printf("\n=== XGetBV=%d\n", bm);
642370b324cSopenharmony_ci    return 1
643370b324cSopenharmony_ci        & (bm >> 1)  // SSE state is supported (set by OS) for storing/restoring
644370b324cSopenharmony_ci        & (bm >> 2); // AVX state is supported (set by OS) for storing/restoring
645370b324cSopenharmony_ci  }
646370b324cSopenharmony_ci  // since Win7SP1: we can use GetEnabledXStateFeatures();
647370b324cSopenharmony_ci}
648370b324cSopenharmony_ci
649370b324cSopenharmony_ci
650370b324cSopenharmony_ciBoolInt CPU_IsSupported_AVX2(void)
651370b324cSopenharmony_ci{
652370b324cSopenharmony_ci  if (!CPU_IsSupported_AVX())
653370b324cSopenharmony_ci    return False;
654370b324cSopenharmony_ci  if (z7_x86_cpuid_GetMaxFunc() < 7)
655370b324cSopenharmony_ci    return False;
656370b324cSopenharmony_ci  {
657370b324cSopenharmony_ci    UInt32 d[4];
658370b324cSopenharmony_ci    z7_x86_cpuid(d, 7);
659370b324cSopenharmony_ci    // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
660370b324cSopenharmony_ci    return 1
661370b324cSopenharmony_ci      & (d[1] >> 5); // avx2
662370b324cSopenharmony_ci  }
663370b324cSopenharmony_ci}
664370b324cSopenharmony_ci
665370b324cSopenharmony_ciBoolInt CPU_IsSupported_VAES_AVX2(void)
666370b324cSopenharmony_ci{
667370b324cSopenharmony_ci  if (!CPU_IsSupported_AVX())
668370b324cSopenharmony_ci    return False;
669370b324cSopenharmony_ci  if (z7_x86_cpuid_GetMaxFunc() < 7)
670370b324cSopenharmony_ci    return False;
671370b324cSopenharmony_ci  {
672370b324cSopenharmony_ci    UInt32 d[4];
673370b324cSopenharmony_ci    z7_x86_cpuid(d, 7);
674370b324cSopenharmony_ci    // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
675370b324cSopenharmony_ci    return 1
676370b324cSopenharmony_ci      & (d[1] >> 5) // avx2
677370b324cSopenharmony_ci      // & (d[1] >> 31) // avx512vl
678370b324cSopenharmony_ci      & (d[2] >> 9); // vaes // VEX-256/EVEX
679370b324cSopenharmony_ci  }
680370b324cSopenharmony_ci}
681370b324cSopenharmony_ci
682370b324cSopenharmony_ciBoolInt CPU_IsSupported_PageGB(void)
683370b324cSopenharmony_ci{
684370b324cSopenharmony_ci  CHECK_CPUID_IS_SUPPORTED
685370b324cSopenharmony_ci  {
686370b324cSopenharmony_ci    UInt32 d[4];
687370b324cSopenharmony_ci    z7_x86_cpuid(d, 0x80000000);
688370b324cSopenharmony_ci    if (d[0] < 0x80000001)
689370b324cSopenharmony_ci      return False;
690370b324cSopenharmony_ci    z7_x86_cpuid(d, 0x80000001);
691370b324cSopenharmony_ci    return (d[3] >> 26) & 1;
692370b324cSopenharmony_ci  }
693370b324cSopenharmony_ci}
694370b324cSopenharmony_ci
695370b324cSopenharmony_ci
696370b324cSopenharmony_ci#elif defined(MY_CPU_ARM_OR_ARM64)
697370b324cSopenharmony_ci
698370b324cSopenharmony_ci#ifdef _WIN32
699370b324cSopenharmony_ci
700370b324cSopenharmony_ci#include "7zWindows.h"
701370b324cSopenharmony_ci
702370b324cSopenharmony_ciBoolInt CPU_IsSupported_CRC32(void)  { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
703370b324cSopenharmony_ciBoolInt CPU_IsSupported_CRYPTO(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
704370b324cSopenharmony_ciBoolInt CPU_IsSupported_NEON(void)   { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
705370b324cSopenharmony_ci
706370b324cSopenharmony_ci#else
707370b324cSopenharmony_ci
708370b324cSopenharmony_ci#if defined(__APPLE__)
709370b324cSopenharmony_ci
710370b324cSopenharmony_ci/*
711370b324cSopenharmony_ci#include <stdio.h>
712370b324cSopenharmony_ci#include <string.h>
713370b324cSopenharmony_cistatic void Print_sysctlbyname(const char *name)
714370b324cSopenharmony_ci{
715370b324cSopenharmony_ci  size_t bufSize = 256;
716370b324cSopenharmony_ci  char buf[256];
717370b324cSopenharmony_ci  int res = sysctlbyname(name, &buf, &bufSize, NULL, 0);
718370b324cSopenharmony_ci  {
719370b324cSopenharmony_ci    int i;
720370b324cSopenharmony_ci    printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize);
721370b324cSopenharmony_ci    for (i = 0; i < 20; i++)
722370b324cSopenharmony_ci      printf(" %2x", (unsigned)(Byte)buf[i]);
723370b324cSopenharmony_ci
724370b324cSopenharmony_ci  }
725370b324cSopenharmony_ci}
726370b324cSopenharmony_ci*/
727370b324cSopenharmony_ci/*
728370b324cSopenharmony_ci  Print_sysctlbyname("hw.pagesize");
729370b324cSopenharmony_ci  Print_sysctlbyname("machdep.cpu.brand_string");
730370b324cSopenharmony_ci*/
731370b324cSopenharmony_ci
732370b324cSopenharmony_cistatic BoolInt z7_sysctlbyname_Get_BoolInt(const char *name)
733370b324cSopenharmony_ci{
734370b324cSopenharmony_ci  UInt32 val = 0;
735370b324cSopenharmony_ci  if (z7_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)
736370b324cSopenharmony_ci    return 1;
737370b324cSopenharmony_ci  return 0;
738370b324cSopenharmony_ci}
739370b324cSopenharmony_ci
740370b324cSopenharmony_ciBoolInt CPU_IsSupported_CRC32(void)
741370b324cSopenharmony_ci{
742370b324cSopenharmony_ci  return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");
743370b324cSopenharmony_ci}
744370b324cSopenharmony_ci
745370b324cSopenharmony_ciBoolInt CPU_IsSupported_NEON(void)
746370b324cSopenharmony_ci{
747370b324cSopenharmony_ci  return z7_sysctlbyname_Get_BoolInt("hw.optional.neon");
748370b324cSopenharmony_ci}
749370b324cSopenharmony_ci
750370b324cSopenharmony_ci#ifdef MY_CPU_ARM64
751370b324cSopenharmony_ci#define APPLE_CRYPTO_SUPPORT_VAL 1
752370b324cSopenharmony_ci#else
753370b324cSopenharmony_ci#define APPLE_CRYPTO_SUPPORT_VAL 0
754370b324cSopenharmony_ci#endif
755370b324cSopenharmony_ci
756370b324cSopenharmony_ciBoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
757370b324cSopenharmony_ciBoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
758370b324cSopenharmony_ciBoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }
759370b324cSopenharmony_ci
760370b324cSopenharmony_ci
761370b324cSopenharmony_ci#else // __APPLE__
762370b324cSopenharmony_ci
763370b324cSopenharmony_ci#include <sys/auxv.h>
764370b324cSopenharmony_ci
765370b324cSopenharmony_ci#define USE_HWCAP
766370b324cSopenharmony_ci
767370b324cSopenharmony_ci#ifdef USE_HWCAP
768370b324cSopenharmony_ci
769370b324cSopenharmony_ci#include <asm/hwcap.h>
770370b324cSopenharmony_ci
771370b324cSopenharmony_ci  #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \
772370b324cSopenharmony_ci  BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP)  & (HWCAP_  ## name2)) ? 1 : 0; }
773370b324cSopenharmony_ci
774370b324cSopenharmony_ci#ifdef MY_CPU_ARM64
775370b324cSopenharmony_ci  #define MY_HWCAP_CHECK_FUNC(name) \
776370b324cSopenharmony_ci  MY_HWCAP_CHECK_FUNC_2(name, name)
777370b324cSopenharmony_ci  MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD)
778370b324cSopenharmony_ci// MY_HWCAP_CHECK_FUNC (ASIMD)
779370b324cSopenharmony_ci#elif defined(MY_CPU_ARM)
780370b324cSopenharmony_ci  #define MY_HWCAP_CHECK_FUNC(name) \
781370b324cSopenharmony_ci  BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; }
782370b324cSopenharmony_ci  MY_HWCAP_CHECK_FUNC_2(NEON, NEON)
783370b324cSopenharmony_ci#endif
784370b324cSopenharmony_ci
785370b324cSopenharmony_ci#else // USE_HWCAP
786370b324cSopenharmony_ci
787370b324cSopenharmony_ci  #define MY_HWCAP_CHECK_FUNC(name) \
788370b324cSopenharmony_ci  BoolInt CPU_IsSupported_ ## name() { return 0; }
789370b324cSopenharmony_ci  MY_HWCAP_CHECK_FUNC(NEON)
790370b324cSopenharmony_ci
791370b324cSopenharmony_ci#endif // USE_HWCAP
792370b324cSopenharmony_ci
793370b324cSopenharmony_ciMY_HWCAP_CHECK_FUNC (CRC32)
794370b324cSopenharmony_ciMY_HWCAP_CHECK_FUNC (SHA1)
795370b324cSopenharmony_ciMY_HWCAP_CHECK_FUNC (SHA2)
796370b324cSopenharmony_ciMY_HWCAP_CHECK_FUNC (AES)
797370b324cSopenharmony_ci
798370b324cSopenharmony_ci#endif // __APPLE__
799370b324cSopenharmony_ci#endif // _WIN32
800370b324cSopenharmony_ci
801370b324cSopenharmony_ci#endif // MY_CPU_ARM_OR_ARM64
802370b324cSopenharmony_ci
803370b324cSopenharmony_ci
804370b324cSopenharmony_ci
805370b324cSopenharmony_ci#ifdef __APPLE__
806370b324cSopenharmony_ci
807370b324cSopenharmony_ci#include <sys/sysctl.h>
808370b324cSopenharmony_ci
809370b324cSopenharmony_ciint z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)
810370b324cSopenharmony_ci{
811370b324cSopenharmony_ci  return sysctlbyname(name, buf, bufSize, NULL, 0);
812370b324cSopenharmony_ci}
813370b324cSopenharmony_ci
814370b324cSopenharmony_ciint z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)
815370b324cSopenharmony_ci{
816370b324cSopenharmony_ci  size_t bufSize = sizeof(*val);
817370b324cSopenharmony_ci  const int res = z7_sysctlbyname_Get(name, val, &bufSize);
818370b324cSopenharmony_ci  if (res == 0 && bufSize != sizeof(*val))
819370b324cSopenharmony_ci    return EFAULT;
820370b324cSopenharmony_ci  return res;
821370b324cSopenharmony_ci}
822370b324cSopenharmony_ci
823370b324cSopenharmony_ci#endif
824