1cc1dc7a3Sopenharmony_ci// SPDX-License-Identifier: Apache-2.0 2cc1dc7a3Sopenharmony_ci// ---------------------------------------------------------------------------- 3cc1dc7a3Sopenharmony_ci// Copyright 2020-2023 Arm Limited 4cc1dc7a3Sopenharmony_ci// 5cc1dc7a3Sopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License"); you may not 6cc1dc7a3Sopenharmony_ci// use this file except in compliance with the License. You may obtain a copy 7cc1dc7a3Sopenharmony_ci// of the License at: 8cc1dc7a3Sopenharmony_ci// 9cc1dc7a3Sopenharmony_ci// http://www.apache.org/licenses/LICENSE-2.0 10cc1dc7a3Sopenharmony_ci// 11cc1dc7a3Sopenharmony_ci// Unless required by applicable law or agreed to in writing, software 12cc1dc7a3Sopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13cc1dc7a3Sopenharmony_ci// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14cc1dc7a3Sopenharmony_ci// License for the specific language governing permissions and limitations 15cc1dc7a3Sopenharmony_ci// under the License. 16cc1dc7a3Sopenharmony_ci// ---------------------------------------------------------------------------- 17cc1dc7a3Sopenharmony_ci 18cc1dc7a3Sopenharmony_ci/** 19cc1dc7a3Sopenharmony_ci * @brief Platform-specific function implementations. 20cc1dc7a3Sopenharmony_ci * 21cc1dc7a3Sopenharmony_ci * This module contains the CLI entry point which also performs the role of 22cc1dc7a3Sopenharmony_ci * validating the host extended ISA support meets the needs of the tools. 23cc1dc7a3Sopenharmony_ci */ 24cc1dc7a3Sopenharmony_ci 25cc1dc7a3Sopenharmony_ci#include <cstdio> 26cc1dc7a3Sopenharmony_ci 27cc1dc7a3Sopenharmony_ci/** 28cc1dc7a3Sopenharmony_ci * @brief The main entry point. 29cc1dc7a3Sopenharmony_ci * 30cc1dc7a3Sopenharmony_ci * @param argc The number of arguments. 31cc1dc7a3Sopenharmony_ci * @param argv The vector of arguments. 32cc1dc7a3Sopenharmony_ci * 33cc1dc7a3Sopenharmony_ci * @return 0 on success, non-zero otherwise. 34cc1dc7a3Sopenharmony_ci */ 35cc1dc7a3Sopenharmony_ciint astcenc_main( 36cc1dc7a3Sopenharmony_ci int argc, 37cc1dc7a3Sopenharmony_ci char **argv); 38cc1dc7a3Sopenharmony_ci 39cc1dc7a3Sopenharmony_ci#if (ASTCENC_SSE > 20) || (ASTCENC_AVX > 0) || \ 40cc1dc7a3Sopenharmony_ci (ASTCENC_POPCNT > 0) || (ASTCENC_F16C > 0) 41cc1dc7a3Sopenharmony_ci 42cc1dc7a3Sopenharmony_cistatic bool g_init { false }; 43cc1dc7a3Sopenharmony_ci 44cc1dc7a3Sopenharmony_ci/** Does this CPU support SSE 4.1? Set to -1 if not yet initialized. */ 45cc1dc7a3Sopenharmony_cistatic bool g_cpu_has_sse41 { false }; 46cc1dc7a3Sopenharmony_ci 47cc1dc7a3Sopenharmony_ci/** Does this CPU support AVX2? Set to -1 if not yet initialized. */ 48cc1dc7a3Sopenharmony_cistatic bool g_cpu_has_avx2 { false }; 49cc1dc7a3Sopenharmony_ci 50cc1dc7a3Sopenharmony_ci/** Does this CPU support POPCNT? Set to -1 if not yet initialized. */ 51cc1dc7a3Sopenharmony_cistatic bool g_cpu_has_popcnt { false }; 52cc1dc7a3Sopenharmony_ci 53cc1dc7a3Sopenharmony_ci/** Does this CPU support F16C? Set to -1 if not yet initialized. */ 54cc1dc7a3Sopenharmony_cistatic bool g_cpu_has_f16c { false }; 55cc1dc7a3Sopenharmony_ci 56cc1dc7a3Sopenharmony_ci/* ============================================================================ 57cc1dc7a3Sopenharmony_ci Platform code for Visual Studio 58cc1dc7a3Sopenharmony_ci============================================================================ */ 59cc1dc7a3Sopenharmony_ci#if !defined(__clang__) && defined(_MSC_VER) 60cc1dc7a3Sopenharmony_ci#define WIN32_LEAN_AND_MEAN 61cc1dc7a3Sopenharmony_ci#include <windows.h> 62cc1dc7a3Sopenharmony_ci#include <intrin.h> 63cc1dc7a3Sopenharmony_ci 64cc1dc7a3Sopenharmony_ci/** 65cc1dc7a3Sopenharmony_ci * @brief Detect platform CPU ISA support and update global trackers. 66cc1dc7a3Sopenharmony_ci */ 67cc1dc7a3Sopenharmony_cistatic void detect_cpu_isa() 68cc1dc7a3Sopenharmony_ci{ 69cc1dc7a3Sopenharmony_ci int data[4]; 70cc1dc7a3Sopenharmony_ci 71cc1dc7a3Sopenharmony_ci __cpuid(data, 0); 72cc1dc7a3Sopenharmony_ci int num_id = data[0]; 73cc1dc7a3Sopenharmony_ci 74cc1dc7a3Sopenharmony_ci if (num_id >= 1) 75cc1dc7a3Sopenharmony_ci { 76cc1dc7a3Sopenharmony_ci __cpuidex(data, 1, 0); 77cc1dc7a3Sopenharmony_ci // SSE41 = Bank 1, ECX, bit 19 78cc1dc7a3Sopenharmony_ci g_cpu_has_sse41 = data[2] & (1 << 19) ? true : false; 79cc1dc7a3Sopenharmony_ci // POPCNT = Bank 1, ECX, bit 23 80cc1dc7a3Sopenharmony_ci g_cpu_has_popcnt = data[2] & (1 << 23) ? true : false; 81cc1dc7a3Sopenharmony_ci // F16C = Bank 1, ECX, bit 29 82cc1dc7a3Sopenharmony_ci g_cpu_has_f16c = data[2] & (1 << 29) ? true : false; 83cc1dc7a3Sopenharmony_ci } 84cc1dc7a3Sopenharmony_ci 85cc1dc7a3Sopenharmony_ci if (num_id >= 7) 86cc1dc7a3Sopenharmony_ci { 87cc1dc7a3Sopenharmony_ci __cpuidex(data, 7, 0); 88cc1dc7a3Sopenharmony_ci // AVX2 = Bank 7, EBX, bit 5 89cc1dc7a3Sopenharmony_ci g_cpu_has_avx2 = data[1] & (1 << 5) ? true : false; 90cc1dc7a3Sopenharmony_ci } 91cc1dc7a3Sopenharmony_ci 92cc1dc7a3Sopenharmony_ci // Ensure state bits are updated before init flag is updated 93cc1dc7a3Sopenharmony_ci MemoryBarrier(); 94cc1dc7a3Sopenharmony_ci g_init = true; 95cc1dc7a3Sopenharmony_ci} 96cc1dc7a3Sopenharmony_ci 97cc1dc7a3Sopenharmony_ci/* ============================================================================ 98cc1dc7a3Sopenharmony_ci Platform code for GCC and Clang 99cc1dc7a3Sopenharmony_ci============================================================================ */ 100cc1dc7a3Sopenharmony_ci#else 101cc1dc7a3Sopenharmony_ci#include <cpuid.h> 102cc1dc7a3Sopenharmony_ci 103cc1dc7a3Sopenharmony_ci/** 104cc1dc7a3Sopenharmony_ci * @brief Detect platform CPU ISA support and update global trackers. 105cc1dc7a3Sopenharmony_ci */ 106cc1dc7a3Sopenharmony_cistatic void detect_cpu_isa() 107cc1dc7a3Sopenharmony_ci{ 108cc1dc7a3Sopenharmony_ci unsigned int data[4]; 109cc1dc7a3Sopenharmony_ci 110cc1dc7a3Sopenharmony_ci if (__get_cpuid_count(1, 0, &data[0], &data[1], &data[2], &data[3])) 111cc1dc7a3Sopenharmony_ci { 112cc1dc7a3Sopenharmony_ci // SSE41 = Bank 1, ECX, bit 19 113cc1dc7a3Sopenharmony_ci g_cpu_has_sse41 = data[2] & (1 << 19) ? true : false; 114cc1dc7a3Sopenharmony_ci // POPCNT = Bank 1, ECX, bit 23 115cc1dc7a3Sopenharmony_ci g_cpu_has_popcnt = data[2] & (1 << 23) ? true : false; 116cc1dc7a3Sopenharmony_ci // F16C = Bank 1, ECX, bit 29 117cc1dc7a3Sopenharmony_ci g_cpu_has_f16c = data[2] & (1 << 29) ? true : false; 118cc1dc7a3Sopenharmony_ci } 119cc1dc7a3Sopenharmony_ci 120cc1dc7a3Sopenharmony_ci g_cpu_has_avx2 = 0; 121cc1dc7a3Sopenharmony_ci if (__get_cpuid_count(7, 0, &data[0], &data[1], &data[2], &data[3])) 122cc1dc7a3Sopenharmony_ci { 123cc1dc7a3Sopenharmony_ci // AVX2 = Bank 7, EBX, bit 5 124cc1dc7a3Sopenharmony_ci g_cpu_has_avx2 = data[1] & (1 << 5) ? true : false; 125cc1dc7a3Sopenharmony_ci } 126cc1dc7a3Sopenharmony_ci 127cc1dc7a3Sopenharmony_ci // Ensure state bits are updated before init flag is updated 128cc1dc7a3Sopenharmony_ci __sync_synchronize(); 129cc1dc7a3Sopenharmony_ci g_init = true; 130cc1dc7a3Sopenharmony_ci} 131cc1dc7a3Sopenharmony_ci#endif 132cc1dc7a3Sopenharmony_ci 133cc1dc7a3Sopenharmony_ci#if ASTCENC_POPCNT > 0 134cc1dc7a3Sopenharmony_ci/** 135cc1dc7a3Sopenharmony_ci * @brief Run-time detection if the host CPU supports the POPCNT extension. 136cc1dc7a3Sopenharmony_ci * 137cc1dc7a3Sopenharmony_ci * @return @c true if supported, @c false if not. 138cc1dc7a3Sopenharmony_ci */ 139cc1dc7a3Sopenharmony_cistatic bool cpu_supports_popcnt() 140cc1dc7a3Sopenharmony_ci{ 141cc1dc7a3Sopenharmony_ci if (!g_init) 142cc1dc7a3Sopenharmony_ci { 143cc1dc7a3Sopenharmony_ci detect_cpu_isa(); 144cc1dc7a3Sopenharmony_ci } 145cc1dc7a3Sopenharmony_ci 146cc1dc7a3Sopenharmony_ci return g_cpu_has_popcnt; 147cc1dc7a3Sopenharmony_ci} 148cc1dc7a3Sopenharmony_ci#endif 149cc1dc7a3Sopenharmony_ci 150cc1dc7a3Sopenharmony_ci#if ASTCENC_F16C > 0 151cc1dc7a3Sopenharmony_ci/** 152cc1dc7a3Sopenharmony_ci * @brief Run-time detection if the host CPU supports F16C extension. 153cc1dc7a3Sopenharmony_ci * 154cc1dc7a3Sopenharmony_ci * @return @c true if supported, @c false if not. 155cc1dc7a3Sopenharmony_ci */ 156cc1dc7a3Sopenharmony_cistatic bool cpu_supports_f16c() 157cc1dc7a3Sopenharmony_ci{ 158cc1dc7a3Sopenharmony_ci if (!g_init) 159cc1dc7a3Sopenharmony_ci { 160cc1dc7a3Sopenharmony_ci detect_cpu_isa(); 161cc1dc7a3Sopenharmony_ci } 162cc1dc7a3Sopenharmony_ci 163cc1dc7a3Sopenharmony_ci return g_cpu_has_f16c; 164cc1dc7a3Sopenharmony_ci} 165cc1dc7a3Sopenharmony_ci#endif 166cc1dc7a3Sopenharmony_ci 167cc1dc7a3Sopenharmony_ci#if ASTCENC_SSE >= 41 168cc1dc7a3Sopenharmony_ci/** 169cc1dc7a3Sopenharmony_ci * @brief Run-time detection if the host CPU supports SSE 4.1 extension. 170cc1dc7a3Sopenharmony_ci * 171cc1dc7a3Sopenharmony_ci * @return @c true if supported, @c false if not. 172cc1dc7a3Sopenharmony_ci */ 173cc1dc7a3Sopenharmony_cistatic bool cpu_supports_sse41() 174cc1dc7a3Sopenharmony_ci{ 175cc1dc7a3Sopenharmony_ci if (!g_init) 176cc1dc7a3Sopenharmony_ci { 177cc1dc7a3Sopenharmony_ci detect_cpu_isa(); 178cc1dc7a3Sopenharmony_ci } 179cc1dc7a3Sopenharmony_ci 180cc1dc7a3Sopenharmony_ci return g_cpu_has_sse41; 181cc1dc7a3Sopenharmony_ci} 182cc1dc7a3Sopenharmony_ci#endif 183cc1dc7a3Sopenharmony_ci 184cc1dc7a3Sopenharmony_ci#if ASTCENC_AVX >= 2 185cc1dc7a3Sopenharmony_ci/** 186cc1dc7a3Sopenharmony_ci * @brief Run-time detection if the host CPU supports AVX 2 extension. 187cc1dc7a3Sopenharmony_ci * 188cc1dc7a3Sopenharmony_ci * @return @c true if supported, @c false if not. 189cc1dc7a3Sopenharmony_ci */ 190cc1dc7a3Sopenharmony_cistatic bool cpu_supports_avx2() 191cc1dc7a3Sopenharmony_ci{ 192cc1dc7a3Sopenharmony_ci if (!g_init) 193cc1dc7a3Sopenharmony_ci { 194cc1dc7a3Sopenharmony_ci detect_cpu_isa(); 195cc1dc7a3Sopenharmony_ci } 196cc1dc7a3Sopenharmony_ci 197cc1dc7a3Sopenharmony_ci return g_cpu_has_avx2; 198cc1dc7a3Sopenharmony_ci} 199cc1dc7a3Sopenharmony_ci#endif 200cc1dc7a3Sopenharmony_ci 201cc1dc7a3Sopenharmony_ci/** 202cc1dc7a3Sopenharmony_ci * @brief Print a string to stderr. 203cc1dc7a3Sopenharmony_ci */ 204cc1dc7a3Sopenharmony_cistatic inline void print_error( 205cc1dc7a3Sopenharmony_ci const char* format 206cc1dc7a3Sopenharmony_ci) { 207cc1dc7a3Sopenharmony_ci fprintf(stderr, "%s", format); 208cc1dc7a3Sopenharmony_ci} 209cc1dc7a3Sopenharmony_ci 210cc1dc7a3Sopenharmony_ci/** 211cc1dc7a3Sopenharmony_ci * @brief Validate CPU ISA support meets the requirements of this build of the library. 212cc1dc7a3Sopenharmony_ci * 213cc1dc7a3Sopenharmony_ci * Each library build is statically compiled for a particular set of CPU ISA features, such as the 214cc1dc7a3Sopenharmony_ci * SIMD support or other ISA extensions such as POPCNT. This function checks that the host CPU 215cc1dc7a3Sopenharmony_ci * actually supports everything this build needs. 216cc1dc7a3Sopenharmony_ci * 217cc1dc7a3Sopenharmony_ci * @return Return @c true if validated, @c false otherwise. 218cc1dc7a3Sopenharmony_ci */ 219cc1dc7a3Sopenharmony_cistatic bool validate_cpu_isa() 220cc1dc7a3Sopenharmony_ci{ 221cc1dc7a3Sopenharmony_ci #if ASTCENC_AVX >= 2 222cc1dc7a3Sopenharmony_ci if (!cpu_supports_avx2()) 223cc1dc7a3Sopenharmony_ci { 224cc1dc7a3Sopenharmony_ci print_error("ERROR: Host does not support AVX2 ISA extension\n"); 225cc1dc7a3Sopenharmony_ci return false; 226cc1dc7a3Sopenharmony_ci } 227cc1dc7a3Sopenharmony_ci #endif 228cc1dc7a3Sopenharmony_ci 229cc1dc7a3Sopenharmony_ci #if ASTCENC_F16C >= 1 230cc1dc7a3Sopenharmony_ci if (!cpu_supports_f16c()) 231cc1dc7a3Sopenharmony_ci { 232cc1dc7a3Sopenharmony_ci print_error("ERROR: Host does not support F16C ISA extension\n"); 233cc1dc7a3Sopenharmony_ci return false; 234cc1dc7a3Sopenharmony_ci } 235cc1dc7a3Sopenharmony_ci #endif 236cc1dc7a3Sopenharmony_ci 237cc1dc7a3Sopenharmony_ci #if ASTCENC_SSE >= 41 238cc1dc7a3Sopenharmony_ci if (!cpu_supports_sse41()) 239cc1dc7a3Sopenharmony_ci { 240cc1dc7a3Sopenharmony_ci print_error("ERROR: Host does not support SSE4.1 ISA extension\n"); 241cc1dc7a3Sopenharmony_ci return false; 242cc1dc7a3Sopenharmony_ci } 243cc1dc7a3Sopenharmony_ci #endif 244cc1dc7a3Sopenharmony_ci 245cc1dc7a3Sopenharmony_ci #if ASTCENC_POPCNT >= 1 246cc1dc7a3Sopenharmony_ci if (!cpu_supports_popcnt()) 247cc1dc7a3Sopenharmony_ci { 248cc1dc7a3Sopenharmony_ci print_error("ERROR: Host does not support POPCNT ISA extension\n"); 249cc1dc7a3Sopenharmony_ci return false; 250cc1dc7a3Sopenharmony_ci } 251cc1dc7a3Sopenharmony_ci #endif 252cc1dc7a3Sopenharmony_ci 253cc1dc7a3Sopenharmony_ci return true; 254cc1dc7a3Sopenharmony_ci} 255cc1dc7a3Sopenharmony_ci 256cc1dc7a3Sopenharmony_ci#else 257cc1dc7a3Sopenharmony_ci 258cc1dc7a3Sopenharmony_ci// Fallback for cases with no dynamic ISA availability 259cc1dc7a3Sopenharmony_cistatic bool validate_cpu_isa() 260cc1dc7a3Sopenharmony_ci{ 261cc1dc7a3Sopenharmony_ci return true; 262cc1dc7a3Sopenharmony_ci} 263cc1dc7a3Sopenharmony_ci 264cc1dc7a3Sopenharmony_ci#endif 265cc1dc7a3Sopenharmony_ci 266cc1dc7a3Sopenharmony_ciint main( 267cc1dc7a3Sopenharmony_ci int argc, 268cc1dc7a3Sopenharmony_ci char **argv 269cc1dc7a3Sopenharmony_ci) { 270cc1dc7a3Sopenharmony_ci if (!validate_cpu_isa()) 271cc1dc7a3Sopenharmony_ci { 272cc1dc7a3Sopenharmony_ci return 1; 273cc1dc7a3Sopenharmony_ci } 274cc1dc7a3Sopenharmony_ci 275cc1dc7a3Sopenharmony_ci return astcenc_main(argc, argv); 276cc1dc7a3Sopenharmony_ci} 277