1cc1dc7a3Sopenharmony_ci// SPDX-License-Identifier: Apache-2.0 2cc1dc7a3Sopenharmony_ci// ---------------------------------------------------------------------------- 3cc1dc7a3Sopenharmony_ci// Copyright 2011-2023 Arm Limited 4cc1dc7a3Sopenharmony_ci// 5cc1dc7a3Sopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License"); you may not 6cc1dc7a3Sopenharmony_ci// use this file except in compliance with the License. You may obtain a copy 7cc1dc7a3Sopenharmony_ci// of the License at: 8cc1dc7a3Sopenharmony_ci// 9cc1dc7a3Sopenharmony_ci// http://www.apache.org/licenses/LICENSE-2.0 10cc1dc7a3Sopenharmony_ci// 11cc1dc7a3Sopenharmony_ci// Unless required by applicable law or agreed to in writing, software 12cc1dc7a3Sopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13cc1dc7a3Sopenharmony_ci// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14cc1dc7a3Sopenharmony_ci// License for the specific language governing permissions and limitations 15cc1dc7a3Sopenharmony_ci// under the License. 16cc1dc7a3Sopenharmony_ci// ---------------------------------------------------------------------------- 17cc1dc7a3Sopenharmony_ci 18cc1dc7a3Sopenharmony_ci/** 19cc1dc7a3Sopenharmony_ci * @brief Platform-specific function implementations. 20cc1dc7a3Sopenharmony_ci * 21cc1dc7a3Sopenharmony_ci * This module contains functions with strongly OS-dependent implementations: 22cc1dc7a3Sopenharmony_ci * 23cc1dc7a3Sopenharmony_ci * * CPU count queries 24cc1dc7a3Sopenharmony_ci * * Threading 25cc1dc7a3Sopenharmony_ci * * Time 26cc1dc7a3Sopenharmony_ci * 27cc1dc7a3Sopenharmony_ci * In addition to the basic thread abstraction (which is native pthreads on 28cc1dc7a3Sopenharmony_ci * all platforms, except Windows where it is an emulation of pthreads), a 29cc1dc7a3Sopenharmony_ci * utility function to create N threads and wait for them to complete a batch 30cc1dc7a3Sopenharmony_ci * task has also been provided. 31cc1dc7a3Sopenharmony_ci */ 32cc1dc7a3Sopenharmony_ci 33cc1dc7a3Sopenharmony_ci#include "astcenccli_internal.h" 34cc1dc7a3Sopenharmony_ci 35cc1dc7a3Sopenharmony_ci/* ============================================================================ 36cc1dc7a3Sopenharmony_ci Platform code for Windows using the Win32 APIs. 37cc1dc7a3Sopenharmony_ci============================================================================ */ 38cc1dc7a3Sopenharmony_ci#if defined(_WIN32) && !defined(__CYGWIN__) 39cc1dc7a3Sopenharmony_ci 40cc1dc7a3Sopenharmony_ci#define WIN32_LEAN_AND_MEAN 41cc1dc7a3Sopenharmony_ci#include <windows.h> 42cc1dc7a3Sopenharmony_ci 43cc1dc7a3Sopenharmony_ci/** @brief Alias pthread_t to one of the internal Windows types. */ 44cc1dc7a3Sopenharmony_citypedef HANDLE pthread_t; 45cc1dc7a3Sopenharmony_ci 46cc1dc7a3Sopenharmony_ci/** @brief Alias pthread_attr_t to one of the internal Windows types. */ 47cc1dc7a3Sopenharmony_citypedef int pthread_attr_t; 48cc1dc7a3Sopenharmony_ci 49cc1dc7a3Sopenharmony_ci/** 50cc1dc7a3Sopenharmony_ci * @brief Proxy Windows @c CreateThread underneath a pthreads-like wrapper. 51cc1dc7a3Sopenharmony_ci */ 52cc1dc7a3Sopenharmony_cistatic int pthread_create( 53cc1dc7a3Sopenharmony_ci pthread_t* thread, 54cc1dc7a3Sopenharmony_ci const pthread_attr_t* attribs, 55cc1dc7a3Sopenharmony_ci void* (*threadfunc)(void*), 56cc1dc7a3Sopenharmony_ci void* thread_arg 57cc1dc7a3Sopenharmony_ci) { 58cc1dc7a3Sopenharmony_ci static_cast<void>(attribs); 59cc1dc7a3Sopenharmony_ci LPTHREAD_START_ROUTINE func = reinterpret_cast<LPTHREAD_START_ROUTINE>(threadfunc); 60cc1dc7a3Sopenharmony_ci *thread = CreateThread(nullptr, 0, func, thread_arg, 0, nullptr); 61cc1dc7a3Sopenharmony_ci 62cc1dc7a3Sopenharmony_ci // Ensure we return 0 on success, non-zero on error 63cc1dc7a3Sopenharmony_ci if (*thread == NULL) 64cc1dc7a3Sopenharmony_ci { 65cc1dc7a3Sopenharmony_ci return 1; 66cc1dc7a3Sopenharmony_ci } 67cc1dc7a3Sopenharmony_ci 68cc1dc7a3Sopenharmony_ci return 0; 69cc1dc7a3Sopenharmony_ci} 70cc1dc7a3Sopenharmony_ci 71cc1dc7a3Sopenharmony_ci/** 72cc1dc7a3Sopenharmony_ci * @brief Manually set CPU group and thread affinity. 73cc1dc7a3Sopenharmony_ci * 74cc1dc7a3Sopenharmony_ci * This is needed on Windows 10 or older to allow benefit from large core count 75cc1dc7a3Sopenharmony_ci * systems with more than 64 logical CPUs. The assignment is skipped on systems 76cc1dc7a3Sopenharmony_ci * with a single processor group, as it is not necessary. 77cc1dc7a3Sopenharmony_ci */ 78cc1dc7a3Sopenharmony_cistatic void set_group_affinity( 79cc1dc7a3Sopenharmony_ci pthread_t thread, 80cc1dc7a3Sopenharmony_ci int thread_index 81cc1dc7a3Sopenharmony_ci) { 82cc1dc7a3Sopenharmony_ci // Skip thread assignment for hardware with a single CPU group 83cc1dc7a3Sopenharmony_ci int group_count = GetActiveProcessorGroupCount(); 84cc1dc7a3Sopenharmony_ci if (group_count == 1) 85cc1dc7a3Sopenharmony_ci { 86cc1dc7a3Sopenharmony_ci return; 87cc1dc7a3Sopenharmony_ci } 88cc1dc7a3Sopenharmony_ci 89cc1dc7a3Sopenharmony_ci // Ensure we have a valid assign if user creates more threads than cores 90cc1dc7a3Sopenharmony_ci int assign_index = thread_index % get_cpu_count(); 91cc1dc7a3Sopenharmony_ci int assign_group { 0 }; 92cc1dc7a3Sopenharmony_ci int assign_group_cpu_count { 0 }; 93cc1dc7a3Sopenharmony_ci 94cc1dc7a3Sopenharmony_ci // Determine which core group and core in the group to use for this thread 95cc1dc7a3Sopenharmony_ci int group_cpu_count_sum { 0 }; 96cc1dc7a3Sopenharmony_ci for (int group = 0; group < group_count; group++) 97cc1dc7a3Sopenharmony_ci { 98cc1dc7a3Sopenharmony_ci int group_cpu_count = static_cast<int>(GetMaximumProcessorCount(group)); 99cc1dc7a3Sopenharmony_ci group_cpu_count_sum += group_cpu_count; 100cc1dc7a3Sopenharmony_ci 101cc1dc7a3Sopenharmony_ci if (assign_index < group_cpu_count_sum) 102cc1dc7a3Sopenharmony_ci { 103cc1dc7a3Sopenharmony_ci assign_group = group; 104cc1dc7a3Sopenharmony_ci assign_group_cpu_count = group_cpu_count; 105cc1dc7a3Sopenharmony_ci break; 106cc1dc7a3Sopenharmony_ci } 107cc1dc7a3Sopenharmony_ci } 108cc1dc7a3Sopenharmony_ci 109cc1dc7a3Sopenharmony_ci // Set the affinity to the assigned group, and all supported cores 110cc1dc7a3Sopenharmony_ci GROUP_AFFINITY affinity {}; 111cc1dc7a3Sopenharmony_ci affinity.Mask = (1 << assign_group_cpu_count) - 1; 112cc1dc7a3Sopenharmony_ci affinity.Group = assign_group; 113cc1dc7a3Sopenharmony_ci SetThreadGroupAffinity(thread, &affinity, nullptr); 114cc1dc7a3Sopenharmony_ci} 115cc1dc7a3Sopenharmony_ci 116cc1dc7a3Sopenharmony_ci/** 117cc1dc7a3Sopenharmony_ci * @brief Proxy Windows @c WaitForSingleObject underneath a pthreads-like wrapper. 118cc1dc7a3Sopenharmony_ci */ 119cc1dc7a3Sopenharmony_cistatic int pthread_join( 120cc1dc7a3Sopenharmony_ci pthread_t thread, 121cc1dc7a3Sopenharmony_ci void** value 122cc1dc7a3Sopenharmony_ci) { 123cc1dc7a3Sopenharmony_ci static_cast<void>(value); 124cc1dc7a3Sopenharmony_ci WaitForSingleObject(thread, INFINITE); 125cc1dc7a3Sopenharmony_ci return 0; 126cc1dc7a3Sopenharmony_ci} 127cc1dc7a3Sopenharmony_ci 128cc1dc7a3Sopenharmony_ci/* See header for documentation */ 129cc1dc7a3Sopenharmony_ciint get_cpu_count() 130cc1dc7a3Sopenharmony_ci{ 131cc1dc7a3Sopenharmony_ci DWORD cpu_count = GetActiveProcessorCount(ALL_PROCESSOR_GROUPS); 132cc1dc7a3Sopenharmony_ci return static_cast<int>(cpu_count); 133cc1dc7a3Sopenharmony_ci} 134cc1dc7a3Sopenharmony_ci 135cc1dc7a3Sopenharmony_ci/* See header for documentation */ 136cc1dc7a3Sopenharmony_cidouble get_time() 137cc1dc7a3Sopenharmony_ci{ 138cc1dc7a3Sopenharmony_ci FILETIME tv; 139cc1dc7a3Sopenharmony_ci GetSystemTimePreciseAsFileTime(&tv); 140cc1dc7a3Sopenharmony_ci unsigned long long ticks = tv.dwHighDateTime; 141cc1dc7a3Sopenharmony_ci ticks = (ticks << 32) | tv.dwLowDateTime; 142cc1dc7a3Sopenharmony_ci return static_cast<double>(ticks) / 1.0e7; 143cc1dc7a3Sopenharmony_ci} 144cc1dc7a3Sopenharmony_ci 145cc1dc7a3Sopenharmony_ci/* ============================================================================ 146cc1dc7a3Sopenharmony_ci Platform code for an platform using POSIX APIs. 147cc1dc7a3Sopenharmony_ci============================================================================ */ 148cc1dc7a3Sopenharmony_ci#else 149cc1dc7a3Sopenharmony_ci 150cc1dc7a3Sopenharmony_ci#include <pthread.h> 151cc1dc7a3Sopenharmony_ci#include <sys/time.h> 152cc1dc7a3Sopenharmony_ci#include <unistd.h> 153cc1dc7a3Sopenharmony_ci 154cc1dc7a3Sopenharmony_ci/* See header for documentation */ 155cc1dc7a3Sopenharmony_ciint get_cpu_count() 156cc1dc7a3Sopenharmony_ci{ 157cc1dc7a3Sopenharmony_ci return static_cast<int>(sysconf(_SC_NPROCESSORS_ONLN)); 158cc1dc7a3Sopenharmony_ci} 159cc1dc7a3Sopenharmony_ci 160cc1dc7a3Sopenharmony_ci/* See header for documentation */ 161cc1dc7a3Sopenharmony_cidouble get_time() 162cc1dc7a3Sopenharmony_ci{ 163cc1dc7a3Sopenharmony_ci timeval tv; 164cc1dc7a3Sopenharmony_ci gettimeofday(&tv, 0); 165cc1dc7a3Sopenharmony_ci return static_cast<double>(tv.tv_sec) + static_cast<double>(tv.tv_usec) * 1.0e-6; 166cc1dc7a3Sopenharmony_ci} 167cc1dc7a3Sopenharmony_ci 168cc1dc7a3Sopenharmony_ci#endif 169cc1dc7a3Sopenharmony_ci 170cc1dc7a3Sopenharmony_ci/** 171cc1dc7a3Sopenharmony_ci * @brief Worker thread helper payload for launch_threads. 172cc1dc7a3Sopenharmony_ci */ 173cc1dc7a3Sopenharmony_cistruct launch_desc 174cc1dc7a3Sopenharmony_ci{ 175cc1dc7a3Sopenharmony_ci /** @brief The native thread handle. */ 176cc1dc7a3Sopenharmony_ci pthread_t thread_handle; 177cc1dc7a3Sopenharmony_ci /** @brief The total number of threads in the thread pool. */ 178cc1dc7a3Sopenharmony_ci int thread_count; 179cc1dc7a3Sopenharmony_ci /** @brief The thread index in the thread pool. */ 180cc1dc7a3Sopenharmony_ci int thread_id; 181cc1dc7a3Sopenharmony_ci /** @brief The user thread function to execute. */ 182cc1dc7a3Sopenharmony_ci void (*func)(int, int, void*); 183cc1dc7a3Sopenharmony_ci /** @brief The user thread payload. */ 184cc1dc7a3Sopenharmony_ci void* payload; 185cc1dc7a3Sopenharmony_ci}; 186cc1dc7a3Sopenharmony_ci 187cc1dc7a3Sopenharmony_ci/** 188cc1dc7a3Sopenharmony_ci * @brief Helper function to translate thread entry points. 189cc1dc7a3Sopenharmony_ci * 190cc1dc7a3Sopenharmony_ci * Convert a (void*) thread entry to an (int, void*) thread entry, where the 191cc1dc7a3Sopenharmony_ci * integer contains the thread ID in the thread pool. 192cc1dc7a3Sopenharmony_ci * 193cc1dc7a3Sopenharmony_ci * @param p The thread launch helper payload. 194cc1dc7a3Sopenharmony_ci */ 195cc1dc7a3Sopenharmony_cistatic void* launch_threads_helper( 196cc1dc7a3Sopenharmony_ci void *p 197cc1dc7a3Sopenharmony_ci) { 198cc1dc7a3Sopenharmony_ci launch_desc* ltd = reinterpret_cast<launch_desc*>(p); 199cc1dc7a3Sopenharmony_ci ltd->func(ltd->thread_count, ltd->thread_id, ltd->payload); 200cc1dc7a3Sopenharmony_ci return nullptr; 201cc1dc7a3Sopenharmony_ci} 202cc1dc7a3Sopenharmony_ci 203cc1dc7a3Sopenharmony_ci/* See header for documentation */ 204cc1dc7a3Sopenharmony_civoid launch_threads( 205cc1dc7a3Sopenharmony_ci const char* operation, 206cc1dc7a3Sopenharmony_ci int thread_count, 207cc1dc7a3Sopenharmony_ci void (*func)(int, int, void*), 208cc1dc7a3Sopenharmony_ci void *payload 209cc1dc7a3Sopenharmony_ci) { 210cc1dc7a3Sopenharmony_ci // Directly execute single threaded workloads on this thread 211cc1dc7a3Sopenharmony_ci if (thread_count <= 1) 212cc1dc7a3Sopenharmony_ci { 213cc1dc7a3Sopenharmony_ci func(1, 0, payload); 214cc1dc7a3Sopenharmony_ci return; 215cc1dc7a3Sopenharmony_ci } 216cc1dc7a3Sopenharmony_ci 217cc1dc7a3Sopenharmony_ci // Otherwise spawn worker threads 218cc1dc7a3Sopenharmony_ci launch_desc *thread_descs = new launch_desc[thread_count]; 219cc1dc7a3Sopenharmony_ci int actual_thread_count { 0 }; 220cc1dc7a3Sopenharmony_ci 221cc1dc7a3Sopenharmony_ci for (int i = 0; i < thread_count; i++) 222cc1dc7a3Sopenharmony_ci { 223cc1dc7a3Sopenharmony_ci thread_descs[actual_thread_count].thread_count = thread_count; 224cc1dc7a3Sopenharmony_ci thread_descs[actual_thread_count].thread_id = actual_thread_count; 225cc1dc7a3Sopenharmony_ci thread_descs[actual_thread_count].payload = payload; 226cc1dc7a3Sopenharmony_ci thread_descs[actual_thread_count].func = func; 227cc1dc7a3Sopenharmony_ci 228cc1dc7a3Sopenharmony_ci // Handle pthread_create failing by simply using fewer threads 229cc1dc7a3Sopenharmony_ci int error = pthread_create( 230cc1dc7a3Sopenharmony_ci &(thread_descs[actual_thread_count].thread_handle), 231cc1dc7a3Sopenharmony_ci nullptr, 232cc1dc7a3Sopenharmony_ci launch_threads_helper, 233cc1dc7a3Sopenharmony_ci reinterpret_cast<void*>(thread_descs + actual_thread_count)); 234cc1dc7a3Sopenharmony_ci 235cc1dc7a3Sopenharmony_ci // Track how many threads we actually created 236cc1dc7a3Sopenharmony_ci if (!error) 237cc1dc7a3Sopenharmony_ci { 238cc1dc7a3Sopenharmony_ci // Windows needs explicit thread assignment to handle large core count systems 239cc1dc7a3Sopenharmony_ci #if defined(_WIN32) && !defined(__CYGWIN__) 240cc1dc7a3Sopenharmony_ci set_group_affinity( 241cc1dc7a3Sopenharmony_ci thread_descs[actual_thread_count].thread_handle, 242cc1dc7a3Sopenharmony_ci actual_thread_count); 243cc1dc7a3Sopenharmony_ci #endif 244cc1dc7a3Sopenharmony_ci 245cc1dc7a3Sopenharmony_ci actual_thread_count++; 246cc1dc7a3Sopenharmony_ci } 247cc1dc7a3Sopenharmony_ci } 248cc1dc7a3Sopenharmony_ci 249cc1dc7a3Sopenharmony_ci // If we did not create thread_count threads then emit a warning 250cc1dc7a3Sopenharmony_ci if (actual_thread_count != thread_count) 251cc1dc7a3Sopenharmony_ci { 252cc1dc7a3Sopenharmony_ci int log_count = actual_thread_count == 0 ? 1 : actual_thread_count; 253cc1dc7a3Sopenharmony_ci const char* log_s = log_count == 1 ? "" : "s"; 254cc1dc7a3Sopenharmony_ci printf("WARNING: %s using %d thread%s due to thread creation error\n\n", 255cc1dc7a3Sopenharmony_ci operation, log_count, log_s); 256cc1dc7a3Sopenharmony_ci } 257cc1dc7a3Sopenharmony_ci 258cc1dc7a3Sopenharmony_ci // If we managed to spawn any threads wait for them to complete 259cc1dc7a3Sopenharmony_ci if (actual_thread_count != 0) 260cc1dc7a3Sopenharmony_ci { 261cc1dc7a3Sopenharmony_ci for (int i = 0; i < actual_thread_count; i++) 262cc1dc7a3Sopenharmony_ci { 263cc1dc7a3Sopenharmony_ci pthread_join(thread_descs[i].thread_handle, nullptr); 264cc1dc7a3Sopenharmony_ci } 265cc1dc7a3Sopenharmony_ci } 266cc1dc7a3Sopenharmony_ci // Else fall back to using this thread 267cc1dc7a3Sopenharmony_ci else 268cc1dc7a3Sopenharmony_ci { 269cc1dc7a3Sopenharmony_ci func(1, 0, payload); 270cc1dc7a3Sopenharmony_ci } 271cc1dc7a3Sopenharmony_ci 272cc1dc7a3Sopenharmony_ci delete[] thread_descs; 273cc1dc7a3Sopenharmony_ci} 274