1// SPDX-License-Identifier: Apache-2.0
2// ----------------------------------------------------------------------------
3// Copyright 2011-2023 Arm Limited
4//
5// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6// use this file except in compliance with the License. You may obtain a copy
7// of the License at:
8//
9//     http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14// License for the specific language governing permissions and limitations
15// under the License.
16// ----------------------------------------------------------------------------
17
18/**
19 * @brief Platform-specific function implementations.
20 *
21 * This module contains functions with strongly OS-dependent implementations:
22 *
23 *  * CPU count queries
24 *  * Threading
25 *  * Time
26 *
27 * In addition to the basic thread abstraction (which is native pthreads on
28 * all platforms, except Windows where it is an emulation of pthreads), a
29 * utility function to create N threads and wait for them to complete a batch
30 * task has also been provided.
31 */
32
33#include "astcenccli_internal.h"
34
35/* ============================================================================
36   Platform code for Windows using the Win32 APIs.
37============================================================================ */
38#if defined(_WIN32) && !defined(__CYGWIN__)
39
40#define WIN32_LEAN_AND_MEAN
41#include <windows.h>
42
43/** @brief Alias pthread_t to one of the internal Windows types. */
44typedef HANDLE pthread_t;
45
46/** @brief Alias pthread_attr_t to one of the internal Windows types. */
47typedef int pthread_attr_t;
48
49/**
50 * @brief Proxy Windows @c CreateThread underneath a pthreads-like wrapper.
51 */
52static int pthread_create(
53	pthread_t* thread,
54	const pthread_attr_t* attribs,
55	void* (*threadfunc)(void*),
56	void* thread_arg
57) {
58	static_cast<void>(attribs);
59	LPTHREAD_START_ROUTINE func = reinterpret_cast<LPTHREAD_START_ROUTINE>(threadfunc);
60	*thread = CreateThread(nullptr, 0, func, thread_arg, 0, nullptr);
61
62	// Ensure we return 0 on success, non-zero on error
63	if (*thread == NULL)
64	{
65		return 1;
66	}
67
68	return 0;
69}
70
71/**
72 * @brief Manually set CPU group and thread affinity.
73 *
74 * This is needed on Windows 10 or older to allow benefit from large core count
75 * systems with more than 64 logical CPUs. The assignment is skipped on systems
76 * with a single processor group, as it is not necessary.
77 */
78static void set_group_affinity(
79	pthread_t thread,
80	int thread_index
81) {
82	// Skip thread assignment for hardware with a single CPU group
83	int group_count = GetActiveProcessorGroupCount();
84	if (group_count == 1)
85	{
86		return;
87	}
88
89	// Ensure we have a valid assign if user creates more threads than cores
90	int assign_index = thread_index % get_cpu_count();
91	int assign_group { 0 };
92	int assign_group_cpu_count { 0 };
93
94	// Determine which core group and core in the group to use for this thread
95	int group_cpu_count_sum { 0 };
96	for (int group = 0; group < group_count; group++)
97	{
98		int group_cpu_count = static_cast<int>(GetMaximumProcessorCount(group));
99		group_cpu_count_sum += group_cpu_count;
100
101		if (assign_index < group_cpu_count_sum)
102		{
103			assign_group = group;
104			assign_group_cpu_count = group_cpu_count;
105			break;
106		}
107	}
108
109	// Set the affinity to the assigned group, and all supported cores
110	GROUP_AFFINITY affinity {};
111	affinity.Mask = (1 << assign_group_cpu_count) - 1;
112	affinity.Group = assign_group;
113	SetThreadGroupAffinity(thread, &affinity, nullptr);
114}
115
116/**
117 * @brief Proxy Windows @c WaitForSingleObject underneath a pthreads-like wrapper.
118 */
119static int pthread_join(
120	pthread_t thread,
121	void** value
122) {
123	static_cast<void>(value);
124	WaitForSingleObject(thread, INFINITE);
125	return 0;
126}
127
128/* See header for documentation */
129int get_cpu_count()
130{
131	DWORD cpu_count = GetActiveProcessorCount(ALL_PROCESSOR_GROUPS);
132	return static_cast<int>(cpu_count);
133}
134
135/* See header for documentation */
136double get_time()
137{
138	FILETIME tv;
139	GetSystemTimePreciseAsFileTime(&tv);
140	unsigned long long ticks = tv.dwHighDateTime;
141	ticks = (ticks << 32) | tv.dwLowDateTime;
142	return static_cast<double>(ticks) / 1.0e7;
143}
144
145/* ============================================================================
146   Platform code for an platform using POSIX APIs.
147============================================================================ */
148#else
149
150#include <pthread.h>
151#include <sys/time.h>
152#include <unistd.h>
153
154/* See header for documentation */
155int get_cpu_count()
156{
157	return static_cast<int>(sysconf(_SC_NPROCESSORS_ONLN));
158}
159
160/* See header for documentation */
161double get_time()
162{
163	timeval tv;
164	gettimeofday(&tv, 0);
165	return static_cast<double>(tv.tv_sec) + static_cast<double>(tv.tv_usec) * 1.0e-6;
166}
167
168#endif
169
170/**
171 * @brief Worker thread helper payload for launch_threads.
172 */
173struct launch_desc
174{
175	/** @brief The native thread handle. */
176	pthread_t thread_handle;
177	/** @brief The total number of threads in the thread pool. */
178	int thread_count;
179	/** @brief The thread index in the thread pool. */
180	int thread_id;
181	/** @brief The user thread function to execute. */
182	void (*func)(int, int, void*);
183	/** @brief The user thread payload. */
184	void* payload;
185};
186
187/**
188 * @brief Helper function to translate thread entry points.
189 *
190 * Convert a (void*) thread entry to an (int, void*) thread entry, where the
191 * integer contains the thread ID in the thread pool.
192 *
193 * @param p The thread launch helper payload.
194 */
195static void* launch_threads_helper(
196	void *p
197) {
198	launch_desc* ltd = reinterpret_cast<launch_desc*>(p);
199	ltd->func(ltd->thread_count, ltd->thread_id, ltd->payload);
200	return nullptr;
201}
202
203/* See header for documentation */
204void launch_threads(
205	const char* operation,
206	int thread_count,
207	void (*func)(int, int, void*),
208	void *payload
209) {
210	// Directly execute single threaded workloads on this thread
211	if (thread_count <= 1)
212	{
213		func(1, 0, payload);
214		return;
215	}
216
217	// Otherwise spawn worker threads
218	launch_desc *thread_descs = new launch_desc[thread_count];
219	int actual_thread_count { 0 };
220
221	for (int i = 0; i < thread_count; i++)
222	{
223		thread_descs[actual_thread_count].thread_count = thread_count;
224		thread_descs[actual_thread_count].thread_id = actual_thread_count;
225		thread_descs[actual_thread_count].payload = payload;
226		thread_descs[actual_thread_count].func = func;
227
228		// Handle pthread_create failing by simply using fewer threads
229		int error = pthread_create(
230			&(thread_descs[actual_thread_count].thread_handle),
231			nullptr,
232			launch_threads_helper,
233			reinterpret_cast<void*>(thread_descs + actual_thread_count));
234
235		// Track how many threads we actually created
236		if (!error)
237		{
238			// Windows needs explicit thread assignment to handle large core count systems
239			#if defined(_WIN32) && !defined(__CYGWIN__)
240				set_group_affinity(
241					thread_descs[actual_thread_count].thread_handle,
242					actual_thread_count);
243			#endif
244
245			actual_thread_count++;
246		}
247	}
248
249	// If we did not create thread_count threads then emit a warning
250	if (actual_thread_count != thread_count)
251	{
252		int log_count = actual_thread_count == 0 ? 1 : actual_thread_count;
253		const char* log_s = log_count == 1 ? "" : "s";
254		printf("WARNING: %s using %d thread%s due to thread creation error\n\n",
255		       operation, log_count, log_s);
256	}
257
258	// If we managed to spawn any threads wait for them to complete
259	if (actual_thread_count != 0)
260	{
261		for (int i = 0; i < actual_thread_count; i++)
262		{
263			pthread_join(thread_descs[i].thread_handle, nullptr);
264		}
265	}
266	// Else fall back to using this thread
267	else
268	{
269		func(1, 0, payload);
270	}
271
272	delete[] thread_descs;
273}
274