1// SPDX-License-Identifier: Apache-2.0 2// ---------------------------------------------------------------------------- 3// Copyright 2011-2023 Arm Limited 4// 5// Licensed under the Apache License, Version 2.0 (the "License"); you may not 6// use this file except in compliance with the License. You may obtain a copy 7// of the License at: 8// 9// http://www.apache.org/licenses/LICENSE-2.0 10// 11// Unless required by applicable law or agreed to in writing, software 12// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14// License for the specific language governing permissions and limitations 15// under the License. 16// ---------------------------------------------------------------------------- 17 18/** 19 * @brief Platform-specific function implementations. 20 * 21 * This module contains functions with strongly OS-dependent implementations: 22 * 23 * * CPU count queries 24 * * Threading 25 * * Time 26 * 27 * In addition to the basic thread abstraction (which is native pthreads on 28 * all platforms, except Windows where it is an emulation of pthreads), a 29 * utility function to create N threads and wait for them to complete a batch 30 * task has also been provided. 31 */ 32 33#include "astcenccli_internal.h" 34 35/* ============================================================================ 36 Platform code for Windows using the Win32 APIs. 37============================================================================ */ 38#if defined(_WIN32) && !defined(__CYGWIN__) 39 40#define WIN32_LEAN_AND_MEAN 41#include <windows.h> 42 43/** @brief Alias pthread_t to one of the internal Windows types. */ 44typedef HANDLE pthread_t; 45 46/** @brief Alias pthread_attr_t to one of the internal Windows types. */ 47typedef int pthread_attr_t; 48 49/** 50 * @brief Proxy Windows @c CreateThread underneath a pthreads-like wrapper. 51 */ 52static int pthread_create( 53 pthread_t* thread, 54 const pthread_attr_t* attribs, 55 void* (*threadfunc)(void*), 56 void* thread_arg 57) { 58 static_cast<void>(attribs); 59 LPTHREAD_START_ROUTINE func = reinterpret_cast<LPTHREAD_START_ROUTINE>(threadfunc); 60 *thread = CreateThread(nullptr, 0, func, thread_arg, 0, nullptr); 61 62 // Ensure we return 0 on success, non-zero on error 63 if (*thread == NULL) 64 { 65 return 1; 66 } 67 68 return 0; 69} 70 71/** 72 * @brief Manually set CPU group and thread affinity. 73 * 74 * This is needed on Windows 10 or older to allow benefit from large core count 75 * systems with more than 64 logical CPUs. The assignment is skipped on systems 76 * with a single processor group, as it is not necessary. 77 */ 78static void set_group_affinity( 79 pthread_t thread, 80 int thread_index 81) { 82 // Skip thread assignment for hardware with a single CPU group 83 int group_count = GetActiveProcessorGroupCount(); 84 if (group_count == 1) 85 { 86 return; 87 } 88 89 // Ensure we have a valid assign if user creates more threads than cores 90 int assign_index = thread_index % get_cpu_count(); 91 int assign_group { 0 }; 92 int assign_group_cpu_count { 0 }; 93 94 // Determine which core group and core in the group to use for this thread 95 int group_cpu_count_sum { 0 }; 96 for (int group = 0; group < group_count; group++) 97 { 98 int group_cpu_count = static_cast<int>(GetMaximumProcessorCount(group)); 99 group_cpu_count_sum += group_cpu_count; 100 101 if (assign_index < group_cpu_count_sum) 102 { 103 assign_group = group; 104 assign_group_cpu_count = group_cpu_count; 105 break; 106 } 107 } 108 109 // Set the affinity to the assigned group, and all supported cores 110 GROUP_AFFINITY affinity {}; 111 affinity.Mask = (1 << assign_group_cpu_count) - 1; 112 affinity.Group = assign_group; 113 SetThreadGroupAffinity(thread, &affinity, nullptr); 114} 115 116/** 117 * @brief Proxy Windows @c WaitForSingleObject underneath a pthreads-like wrapper. 118 */ 119static int pthread_join( 120 pthread_t thread, 121 void** value 122) { 123 static_cast<void>(value); 124 WaitForSingleObject(thread, INFINITE); 125 return 0; 126} 127 128/* See header for documentation */ 129int get_cpu_count() 130{ 131 DWORD cpu_count = GetActiveProcessorCount(ALL_PROCESSOR_GROUPS); 132 return static_cast<int>(cpu_count); 133} 134 135/* See header for documentation */ 136double get_time() 137{ 138 FILETIME tv; 139 GetSystemTimePreciseAsFileTime(&tv); 140 unsigned long long ticks = tv.dwHighDateTime; 141 ticks = (ticks << 32) | tv.dwLowDateTime; 142 return static_cast<double>(ticks) / 1.0e7; 143} 144 145/* ============================================================================ 146 Platform code for an platform using POSIX APIs. 147============================================================================ */ 148#else 149 150#include <pthread.h> 151#include <sys/time.h> 152#include <unistd.h> 153 154/* See header for documentation */ 155int get_cpu_count() 156{ 157 return static_cast<int>(sysconf(_SC_NPROCESSORS_ONLN)); 158} 159 160/* See header for documentation */ 161double get_time() 162{ 163 timeval tv; 164 gettimeofday(&tv, 0); 165 return static_cast<double>(tv.tv_sec) + static_cast<double>(tv.tv_usec) * 1.0e-6; 166} 167 168#endif 169 170/** 171 * @brief Worker thread helper payload for launch_threads. 172 */ 173struct launch_desc 174{ 175 /** @brief The native thread handle. */ 176 pthread_t thread_handle; 177 /** @brief The total number of threads in the thread pool. */ 178 int thread_count; 179 /** @brief The thread index in the thread pool. */ 180 int thread_id; 181 /** @brief The user thread function to execute. */ 182 void (*func)(int, int, void*); 183 /** @brief The user thread payload. */ 184 void* payload; 185}; 186 187/** 188 * @brief Helper function to translate thread entry points. 189 * 190 * Convert a (void*) thread entry to an (int, void*) thread entry, where the 191 * integer contains the thread ID in the thread pool. 192 * 193 * @param p The thread launch helper payload. 194 */ 195static void* launch_threads_helper( 196 void *p 197) { 198 launch_desc* ltd = reinterpret_cast<launch_desc*>(p); 199 ltd->func(ltd->thread_count, ltd->thread_id, ltd->payload); 200 return nullptr; 201} 202 203/* See header for documentation */ 204void launch_threads( 205 const char* operation, 206 int thread_count, 207 void (*func)(int, int, void*), 208 void *payload 209) { 210 // Directly execute single threaded workloads on this thread 211 if (thread_count <= 1) 212 { 213 func(1, 0, payload); 214 return; 215 } 216 217 // Otherwise spawn worker threads 218 launch_desc *thread_descs = new launch_desc[thread_count]; 219 int actual_thread_count { 0 }; 220 221 for (int i = 0; i < thread_count; i++) 222 { 223 thread_descs[actual_thread_count].thread_count = thread_count; 224 thread_descs[actual_thread_count].thread_id = actual_thread_count; 225 thread_descs[actual_thread_count].payload = payload; 226 thread_descs[actual_thread_count].func = func; 227 228 // Handle pthread_create failing by simply using fewer threads 229 int error = pthread_create( 230 &(thread_descs[actual_thread_count].thread_handle), 231 nullptr, 232 launch_threads_helper, 233 reinterpret_cast<void*>(thread_descs + actual_thread_count)); 234 235 // Track how many threads we actually created 236 if (!error) 237 { 238 // Windows needs explicit thread assignment to handle large core count systems 239 #if defined(_WIN32) && !defined(__CYGWIN__) 240 set_group_affinity( 241 thread_descs[actual_thread_count].thread_handle, 242 actual_thread_count); 243 #endif 244 245 actual_thread_count++; 246 } 247 } 248 249 // If we did not create thread_count threads then emit a warning 250 if (actual_thread_count != thread_count) 251 { 252 int log_count = actual_thread_count == 0 ? 1 : actual_thread_count; 253 const char* log_s = log_count == 1 ? "" : "s"; 254 printf("WARNING: %s using %d thread%s due to thread creation error\n\n", 255 operation, log_count, log_s); 256 } 257 258 // If we managed to spawn any threads wait for them to complete 259 if (actual_thread_count != 0) 260 { 261 for (int i = 0; i < actual_thread_count; i++) 262 { 263 pthread_join(thread_descs[i].thread_handle, nullptr); 264 } 265 } 266 // Else fall back to using this thread 267 else 268 { 269 func(1, 0, payload); 270 } 271 272 delete[] thread_descs; 273} 274