1cc1dc7a3Sopenharmony_ci// SPDX-License-Identifier: Apache-2.0
2cc1dc7a3Sopenharmony_ci// ----------------------------------------------------------------------------
3cc1dc7a3Sopenharmony_ci// Copyright 2020-2023 Arm Limited
4cc1dc7a3Sopenharmony_ci//
5cc1dc7a3Sopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6cc1dc7a3Sopenharmony_ci// use this file except in compliance with the License. You may obtain a copy
7cc1dc7a3Sopenharmony_ci// of the License at:
8cc1dc7a3Sopenharmony_ci//
9cc1dc7a3Sopenharmony_ci//     http://www.apache.org/licenses/LICENSE-2.0
10cc1dc7a3Sopenharmony_ci//
11cc1dc7a3Sopenharmony_ci// Unless required by applicable law or agreed to in writing, software
12cc1dc7a3Sopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13cc1dc7a3Sopenharmony_ci// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14cc1dc7a3Sopenharmony_ci// License for the specific language governing permissions and limitations
15cc1dc7a3Sopenharmony_ci// under the License.
16cc1dc7a3Sopenharmony_ci// ----------------------------------------------------------------------------
17cc1dc7a3Sopenharmony_ci
18cc1dc7a3Sopenharmony_ci/**
19cc1dc7a3Sopenharmony_ci * @brief Platform-specific function implementations.
20cc1dc7a3Sopenharmony_ci *
21cc1dc7a3Sopenharmony_ci * This module contains the CLI entry point which also performs the role of
22cc1dc7a3Sopenharmony_ci * validating the host extended ISA support meets the needs of the tools.
23cc1dc7a3Sopenharmony_ci */
24cc1dc7a3Sopenharmony_ci
25cc1dc7a3Sopenharmony_ci#include <cstdio>
26cc1dc7a3Sopenharmony_ci
27cc1dc7a3Sopenharmony_ci/**
28cc1dc7a3Sopenharmony_ci * @brief The main entry point.
29cc1dc7a3Sopenharmony_ci *
30cc1dc7a3Sopenharmony_ci * @param argc   The number of arguments.
31cc1dc7a3Sopenharmony_ci * @param argv   The vector of arguments.
32cc1dc7a3Sopenharmony_ci *
33cc1dc7a3Sopenharmony_ci * @return 0 on success, non-zero otherwise.
34cc1dc7a3Sopenharmony_ci */
35cc1dc7a3Sopenharmony_ciint astcenc_main(
36cc1dc7a3Sopenharmony_ci	int argc,
37cc1dc7a3Sopenharmony_ci	char **argv);
38cc1dc7a3Sopenharmony_ci
39cc1dc7a3Sopenharmony_ci#if (ASTCENC_SSE > 20)    || (ASTCENC_AVX > 0) || \
40cc1dc7a3Sopenharmony_ci    (ASTCENC_POPCNT > 0) || (ASTCENC_F16C > 0)
41cc1dc7a3Sopenharmony_ci
42cc1dc7a3Sopenharmony_cistatic bool g_init { false };
43cc1dc7a3Sopenharmony_ci
44cc1dc7a3Sopenharmony_ci/** Does this CPU support SSE 4.1? Set to -1 if not yet initialized. */
45cc1dc7a3Sopenharmony_cistatic bool g_cpu_has_sse41 { false };
46cc1dc7a3Sopenharmony_ci
47cc1dc7a3Sopenharmony_ci/** Does this CPU support AVX2? Set to -1 if not yet initialized. */
48cc1dc7a3Sopenharmony_cistatic bool g_cpu_has_avx2 { false };
49cc1dc7a3Sopenharmony_ci
50cc1dc7a3Sopenharmony_ci/** Does this CPU support POPCNT? Set to -1 if not yet initialized. */
51cc1dc7a3Sopenharmony_cistatic bool g_cpu_has_popcnt { false };
52cc1dc7a3Sopenharmony_ci
53cc1dc7a3Sopenharmony_ci/** Does this CPU support F16C? Set to -1 if not yet initialized. */
54cc1dc7a3Sopenharmony_cistatic bool g_cpu_has_f16c { false };
55cc1dc7a3Sopenharmony_ci
56cc1dc7a3Sopenharmony_ci/* ============================================================================
57cc1dc7a3Sopenharmony_ci   Platform code for Visual Studio
58cc1dc7a3Sopenharmony_ci============================================================================ */
59cc1dc7a3Sopenharmony_ci#if !defined(__clang__) && defined(_MSC_VER)
60cc1dc7a3Sopenharmony_ci#define WIN32_LEAN_AND_MEAN
61cc1dc7a3Sopenharmony_ci#include <windows.h>
62cc1dc7a3Sopenharmony_ci#include <intrin.h>
63cc1dc7a3Sopenharmony_ci
64cc1dc7a3Sopenharmony_ci/**
65cc1dc7a3Sopenharmony_ci * @brief Detect platform CPU ISA support and update global trackers.
66cc1dc7a3Sopenharmony_ci */
67cc1dc7a3Sopenharmony_cistatic void detect_cpu_isa()
68cc1dc7a3Sopenharmony_ci{
69cc1dc7a3Sopenharmony_ci	int data[4];
70cc1dc7a3Sopenharmony_ci
71cc1dc7a3Sopenharmony_ci	__cpuid(data, 0);
72cc1dc7a3Sopenharmony_ci	int num_id = data[0];
73cc1dc7a3Sopenharmony_ci
74cc1dc7a3Sopenharmony_ci	if (num_id >= 1)
75cc1dc7a3Sopenharmony_ci	{
76cc1dc7a3Sopenharmony_ci		__cpuidex(data, 1, 0);
77cc1dc7a3Sopenharmony_ci		// SSE41 = Bank 1, ECX, bit 19
78cc1dc7a3Sopenharmony_ci		g_cpu_has_sse41 = data[2] & (1 << 19) ? true : false;
79cc1dc7a3Sopenharmony_ci		// POPCNT = Bank 1, ECX, bit 23
80cc1dc7a3Sopenharmony_ci		g_cpu_has_popcnt = data[2] & (1 << 23) ? true : false;
81cc1dc7a3Sopenharmony_ci		// F16C = Bank 1, ECX, bit 29
82cc1dc7a3Sopenharmony_ci		g_cpu_has_f16c = data[2] & (1 << 29) ? true : false;
83cc1dc7a3Sopenharmony_ci	}
84cc1dc7a3Sopenharmony_ci
85cc1dc7a3Sopenharmony_ci	if (num_id >= 7)
86cc1dc7a3Sopenharmony_ci	{
87cc1dc7a3Sopenharmony_ci		__cpuidex(data, 7, 0);
88cc1dc7a3Sopenharmony_ci		// AVX2 = Bank 7, EBX, bit 5
89cc1dc7a3Sopenharmony_ci		g_cpu_has_avx2 = data[1] & (1 << 5) ? true : false;
90cc1dc7a3Sopenharmony_ci	}
91cc1dc7a3Sopenharmony_ci
92cc1dc7a3Sopenharmony_ci	// Ensure state bits are updated before init flag is updated
93cc1dc7a3Sopenharmony_ci	MemoryBarrier();
94cc1dc7a3Sopenharmony_ci	g_init = true;
95cc1dc7a3Sopenharmony_ci}
96cc1dc7a3Sopenharmony_ci
97cc1dc7a3Sopenharmony_ci/* ============================================================================
98cc1dc7a3Sopenharmony_ci   Platform code for GCC and Clang
99cc1dc7a3Sopenharmony_ci============================================================================ */
100cc1dc7a3Sopenharmony_ci#else
101cc1dc7a3Sopenharmony_ci#include <cpuid.h>
102cc1dc7a3Sopenharmony_ci
103cc1dc7a3Sopenharmony_ci/**
104cc1dc7a3Sopenharmony_ci * @brief Detect platform CPU ISA support and update global trackers.
105cc1dc7a3Sopenharmony_ci */
106cc1dc7a3Sopenharmony_cistatic void detect_cpu_isa()
107cc1dc7a3Sopenharmony_ci{
108cc1dc7a3Sopenharmony_ci	unsigned int data[4];
109cc1dc7a3Sopenharmony_ci
110cc1dc7a3Sopenharmony_ci	if (__get_cpuid_count(1, 0, &data[0], &data[1], &data[2], &data[3]))
111cc1dc7a3Sopenharmony_ci	{
112cc1dc7a3Sopenharmony_ci		// SSE41 = Bank 1, ECX, bit 19
113cc1dc7a3Sopenharmony_ci		g_cpu_has_sse41 = data[2] & (1 << 19) ? true : false;
114cc1dc7a3Sopenharmony_ci		// POPCNT = Bank 1, ECX, bit 23
115cc1dc7a3Sopenharmony_ci		g_cpu_has_popcnt = data[2] & (1 << 23) ? true : false;
116cc1dc7a3Sopenharmony_ci		// F16C = Bank 1, ECX, bit 29
117cc1dc7a3Sopenharmony_ci		g_cpu_has_f16c = data[2] & (1 << 29) ? true : false;
118cc1dc7a3Sopenharmony_ci	}
119cc1dc7a3Sopenharmony_ci
120cc1dc7a3Sopenharmony_ci	g_cpu_has_avx2 = 0;
121cc1dc7a3Sopenharmony_ci	if (__get_cpuid_count(7, 0, &data[0], &data[1], &data[2], &data[3]))
122cc1dc7a3Sopenharmony_ci	{
123cc1dc7a3Sopenharmony_ci		// AVX2 = Bank 7, EBX, bit 5
124cc1dc7a3Sopenharmony_ci		g_cpu_has_avx2 = data[1] & (1 << 5) ? true : false;
125cc1dc7a3Sopenharmony_ci	}
126cc1dc7a3Sopenharmony_ci
127cc1dc7a3Sopenharmony_ci	// Ensure state bits are updated before init flag is updated
128cc1dc7a3Sopenharmony_ci	__sync_synchronize();
129cc1dc7a3Sopenharmony_ci	g_init = true;
130cc1dc7a3Sopenharmony_ci}
131cc1dc7a3Sopenharmony_ci#endif
132cc1dc7a3Sopenharmony_ci
133cc1dc7a3Sopenharmony_ci#if ASTCENC_POPCNT > 0
134cc1dc7a3Sopenharmony_ci/**
135cc1dc7a3Sopenharmony_ci * @brief Run-time detection if the host CPU supports the POPCNT extension.
136cc1dc7a3Sopenharmony_ci *
137cc1dc7a3Sopenharmony_ci * @return @c true if supported, @c false if not.
138cc1dc7a3Sopenharmony_ci */
139cc1dc7a3Sopenharmony_cistatic bool cpu_supports_popcnt()
140cc1dc7a3Sopenharmony_ci{
141cc1dc7a3Sopenharmony_ci	if (!g_init)
142cc1dc7a3Sopenharmony_ci	{
143cc1dc7a3Sopenharmony_ci		detect_cpu_isa();
144cc1dc7a3Sopenharmony_ci	}
145cc1dc7a3Sopenharmony_ci
146cc1dc7a3Sopenharmony_ci	return g_cpu_has_popcnt;
147cc1dc7a3Sopenharmony_ci}
148cc1dc7a3Sopenharmony_ci#endif
149cc1dc7a3Sopenharmony_ci
150cc1dc7a3Sopenharmony_ci#if ASTCENC_F16C > 0
151cc1dc7a3Sopenharmony_ci/**
152cc1dc7a3Sopenharmony_ci * @brief Run-time detection if the host CPU supports F16C extension.
153cc1dc7a3Sopenharmony_ci *
154cc1dc7a3Sopenharmony_ci * @return @c true if supported, @c false if not.
155cc1dc7a3Sopenharmony_ci */
156cc1dc7a3Sopenharmony_cistatic bool cpu_supports_f16c()
157cc1dc7a3Sopenharmony_ci{
158cc1dc7a3Sopenharmony_ci	if (!g_init)
159cc1dc7a3Sopenharmony_ci	{
160cc1dc7a3Sopenharmony_ci		detect_cpu_isa();
161cc1dc7a3Sopenharmony_ci	}
162cc1dc7a3Sopenharmony_ci
163cc1dc7a3Sopenharmony_ci	return g_cpu_has_f16c;
164cc1dc7a3Sopenharmony_ci}
165cc1dc7a3Sopenharmony_ci#endif
166cc1dc7a3Sopenharmony_ci
167cc1dc7a3Sopenharmony_ci#if ASTCENC_SSE >= 41
168cc1dc7a3Sopenharmony_ci/**
169cc1dc7a3Sopenharmony_ci * @brief Run-time detection if the host CPU supports SSE 4.1 extension.
170cc1dc7a3Sopenharmony_ci *
171cc1dc7a3Sopenharmony_ci * @return @c true if supported, @c false if not.
172cc1dc7a3Sopenharmony_ci */
173cc1dc7a3Sopenharmony_cistatic bool cpu_supports_sse41()
174cc1dc7a3Sopenharmony_ci{
175cc1dc7a3Sopenharmony_ci	if (!g_init)
176cc1dc7a3Sopenharmony_ci	{
177cc1dc7a3Sopenharmony_ci		detect_cpu_isa();
178cc1dc7a3Sopenharmony_ci	}
179cc1dc7a3Sopenharmony_ci
180cc1dc7a3Sopenharmony_ci	return g_cpu_has_sse41;
181cc1dc7a3Sopenharmony_ci}
182cc1dc7a3Sopenharmony_ci#endif
183cc1dc7a3Sopenharmony_ci
184cc1dc7a3Sopenharmony_ci#if ASTCENC_AVX >= 2
185cc1dc7a3Sopenharmony_ci/**
186cc1dc7a3Sopenharmony_ci * @brief Run-time detection if the host CPU supports AVX 2 extension.
187cc1dc7a3Sopenharmony_ci *
188cc1dc7a3Sopenharmony_ci * @return @c true if supported, @c false if not.
189cc1dc7a3Sopenharmony_ci */
190cc1dc7a3Sopenharmony_cistatic bool cpu_supports_avx2()
191cc1dc7a3Sopenharmony_ci{
192cc1dc7a3Sopenharmony_ci	if (!g_init)
193cc1dc7a3Sopenharmony_ci	{
194cc1dc7a3Sopenharmony_ci		detect_cpu_isa();
195cc1dc7a3Sopenharmony_ci	}
196cc1dc7a3Sopenharmony_ci
197cc1dc7a3Sopenharmony_ci	return g_cpu_has_avx2;
198cc1dc7a3Sopenharmony_ci}
199cc1dc7a3Sopenharmony_ci#endif
200cc1dc7a3Sopenharmony_ci
201cc1dc7a3Sopenharmony_ci/**
202cc1dc7a3Sopenharmony_ci * @brief Print a string to stderr.
203cc1dc7a3Sopenharmony_ci */
204cc1dc7a3Sopenharmony_cistatic inline void print_error(
205cc1dc7a3Sopenharmony_ci	const char* format
206cc1dc7a3Sopenharmony_ci) {
207cc1dc7a3Sopenharmony_ci	fprintf(stderr, "%s", format);
208cc1dc7a3Sopenharmony_ci}
209cc1dc7a3Sopenharmony_ci
210cc1dc7a3Sopenharmony_ci/**
211cc1dc7a3Sopenharmony_ci * @brief Validate CPU ISA support meets the requirements of this build of the library.
212cc1dc7a3Sopenharmony_ci *
213cc1dc7a3Sopenharmony_ci * Each library build is statically compiled for a particular set of CPU ISA features, such as the
214cc1dc7a3Sopenharmony_ci * SIMD support or other ISA extensions such as POPCNT. This function checks that the host CPU
215cc1dc7a3Sopenharmony_ci * actually supports everything this build needs.
216cc1dc7a3Sopenharmony_ci *
217cc1dc7a3Sopenharmony_ci * @return Return @c true if validated, @c false otherwise.
218cc1dc7a3Sopenharmony_ci */
219cc1dc7a3Sopenharmony_cistatic bool validate_cpu_isa()
220cc1dc7a3Sopenharmony_ci{
221cc1dc7a3Sopenharmony_ci	#if ASTCENC_AVX >= 2
222cc1dc7a3Sopenharmony_ci		if (!cpu_supports_avx2())
223cc1dc7a3Sopenharmony_ci		{
224cc1dc7a3Sopenharmony_ci			print_error("ERROR: Host does not support AVX2 ISA extension\n");
225cc1dc7a3Sopenharmony_ci			return false;
226cc1dc7a3Sopenharmony_ci		}
227cc1dc7a3Sopenharmony_ci	#endif
228cc1dc7a3Sopenharmony_ci
229cc1dc7a3Sopenharmony_ci	#if ASTCENC_F16C >= 1
230cc1dc7a3Sopenharmony_ci		if (!cpu_supports_f16c())
231cc1dc7a3Sopenharmony_ci		{
232cc1dc7a3Sopenharmony_ci			print_error("ERROR: Host does not support F16C ISA extension\n");
233cc1dc7a3Sopenharmony_ci			return false;
234cc1dc7a3Sopenharmony_ci		}
235cc1dc7a3Sopenharmony_ci	#endif
236cc1dc7a3Sopenharmony_ci
237cc1dc7a3Sopenharmony_ci	#if ASTCENC_SSE >= 41
238cc1dc7a3Sopenharmony_ci		if (!cpu_supports_sse41())
239cc1dc7a3Sopenharmony_ci		{
240cc1dc7a3Sopenharmony_ci			print_error("ERROR: Host does not support SSE4.1 ISA extension\n");
241cc1dc7a3Sopenharmony_ci			return false;
242cc1dc7a3Sopenharmony_ci		}
243cc1dc7a3Sopenharmony_ci	#endif
244cc1dc7a3Sopenharmony_ci
245cc1dc7a3Sopenharmony_ci	#if ASTCENC_POPCNT >= 1
246cc1dc7a3Sopenharmony_ci		if (!cpu_supports_popcnt())
247cc1dc7a3Sopenharmony_ci		{
248cc1dc7a3Sopenharmony_ci			print_error("ERROR: Host does not support POPCNT ISA extension\n");
249cc1dc7a3Sopenharmony_ci			return false;
250cc1dc7a3Sopenharmony_ci		}
251cc1dc7a3Sopenharmony_ci	#endif
252cc1dc7a3Sopenharmony_ci
253cc1dc7a3Sopenharmony_ci	return true;
254cc1dc7a3Sopenharmony_ci}
255cc1dc7a3Sopenharmony_ci
256cc1dc7a3Sopenharmony_ci#else
257cc1dc7a3Sopenharmony_ci
258cc1dc7a3Sopenharmony_ci// Fallback for cases with no dynamic ISA availability
259cc1dc7a3Sopenharmony_cistatic bool validate_cpu_isa()
260cc1dc7a3Sopenharmony_ci{
261cc1dc7a3Sopenharmony_ci	return true;
262cc1dc7a3Sopenharmony_ci}
263cc1dc7a3Sopenharmony_ci
264cc1dc7a3Sopenharmony_ci#endif
265cc1dc7a3Sopenharmony_ci
266cc1dc7a3Sopenharmony_ciint main(
267cc1dc7a3Sopenharmony_ci	int argc,
268cc1dc7a3Sopenharmony_ci	char **argv
269cc1dc7a3Sopenharmony_ci) {
270cc1dc7a3Sopenharmony_ci	if (!validate_cpu_isa())
271cc1dc7a3Sopenharmony_ci	{
272cc1dc7a3Sopenharmony_ci		return 1;
273cc1dc7a3Sopenharmony_ci	}
274cc1dc7a3Sopenharmony_ci
275cc1dc7a3Sopenharmony_ci	return astcenc_main(argc, argv);
276cc1dc7a3Sopenharmony_ci}
277