1cc1dc7a3Sopenharmony_ci// SPDX-License-Identifier: Apache-2.0
2cc1dc7a3Sopenharmony_ci// ----------------------------------------------------------------------------
3cc1dc7a3Sopenharmony_ci// Copyright 2011-2023 Arm Limited
4cc1dc7a3Sopenharmony_ci//
5cc1dc7a3Sopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6cc1dc7a3Sopenharmony_ci// use this file except in compliance with the License. You may obtain a copy
7cc1dc7a3Sopenharmony_ci// of the License at:
8cc1dc7a3Sopenharmony_ci//
9cc1dc7a3Sopenharmony_ci//     http://www.apache.org/licenses/LICENSE-2.0
10cc1dc7a3Sopenharmony_ci//
11cc1dc7a3Sopenharmony_ci// Unless required by applicable law or agreed to in writing, software
12cc1dc7a3Sopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13cc1dc7a3Sopenharmony_ci// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14cc1dc7a3Sopenharmony_ci// License for the specific language governing permissions and limitations
15cc1dc7a3Sopenharmony_ci// under the License.
16cc1dc7a3Sopenharmony_ci// ----------------------------------------------------------------------------
17cc1dc7a3Sopenharmony_ci
18cc1dc7a3Sopenharmony_ci/**
19cc1dc7a3Sopenharmony_ci * @brief Platform-specific function implementations.
20cc1dc7a3Sopenharmony_ci *
21cc1dc7a3Sopenharmony_ci * This module contains functions with strongly OS-dependent implementations:
22cc1dc7a3Sopenharmony_ci *
23cc1dc7a3Sopenharmony_ci *  * CPU count queries
24cc1dc7a3Sopenharmony_ci *  * Threading
25cc1dc7a3Sopenharmony_ci *  * Time
26cc1dc7a3Sopenharmony_ci *
27cc1dc7a3Sopenharmony_ci * In addition to the basic thread abstraction (which is native pthreads on
28cc1dc7a3Sopenharmony_ci * all platforms, except Windows where it is an emulation of pthreads), a
29cc1dc7a3Sopenharmony_ci * utility function to create N threads and wait for them to complete a batch
30cc1dc7a3Sopenharmony_ci * task has also been provided.
31cc1dc7a3Sopenharmony_ci */
32cc1dc7a3Sopenharmony_ci
33cc1dc7a3Sopenharmony_ci#include "astcenccli_internal.h"
34cc1dc7a3Sopenharmony_ci
35cc1dc7a3Sopenharmony_ci/* ============================================================================
36cc1dc7a3Sopenharmony_ci   Platform code for Windows using the Win32 APIs.
37cc1dc7a3Sopenharmony_ci============================================================================ */
38cc1dc7a3Sopenharmony_ci#if defined(_WIN32) && !defined(__CYGWIN__)
39cc1dc7a3Sopenharmony_ci
40cc1dc7a3Sopenharmony_ci#define WIN32_LEAN_AND_MEAN
41cc1dc7a3Sopenharmony_ci#include <windows.h>
42cc1dc7a3Sopenharmony_ci
43cc1dc7a3Sopenharmony_ci/** @brief Alias pthread_t to one of the internal Windows types. */
44cc1dc7a3Sopenharmony_citypedef HANDLE pthread_t;
45cc1dc7a3Sopenharmony_ci
46cc1dc7a3Sopenharmony_ci/** @brief Alias pthread_attr_t to one of the internal Windows types. */
47cc1dc7a3Sopenharmony_citypedef int pthread_attr_t;
48cc1dc7a3Sopenharmony_ci
49cc1dc7a3Sopenharmony_ci/**
50cc1dc7a3Sopenharmony_ci * @brief Proxy Windows @c CreateThread underneath a pthreads-like wrapper.
51cc1dc7a3Sopenharmony_ci */
52cc1dc7a3Sopenharmony_cistatic int pthread_create(
53cc1dc7a3Sopenharmony_ci	pthread_t* thread,
54cc1dc7a3Sopenharmony_ci	const pthread_attr_t* attribs,
55cc1dc7a3Sopenharmony_ci	void* (*threadfunc)(void*),
56cc1dc7a3Sopenharmony_ci	void* thread_arg
57cc1dc7a3Sopenharmony_ci) {
58cc1dc7a3Sopenharmony_ci	static_cast<void>(attribs);
59cc1dc7a3Sopenharmony_ci	LPTHREAD_START_ROUTINE func = reinterpret_cast<LPTHREAD_START_ROUTINE>(threadfunc);
60cc1dc7a3Sopenharmony_ci	*thread = CreateThread(nullptr, 0, func, thread_arg, 0, nullptr);
61cc1dc7a3Sopenharmony_ci
62cc1dc7a3Sopenharmony_ci	// Ensure we return 0 on success, non-zero on error
63cc1dc7a3Sopenharmony_ci	if (*thread == NULL)
64cc1dc7a3Sopenharmony_ci	{
65cc1dc7a3Sopenharmony_ci		return 1;
66cc1dc7a3Sopenharmony_ci	}
67cc1dc7a3Sopenharmony_ci
68cc1dc7a3Sopenharmony_ci	return 0;
69cc1dc7a3Sopenharmony_ci}
70cc1dc7a3Sopenharmony_ci
71cc1dc7a3Sopenharmony_ci/**
72cc1dc7a3Sopenharmony_ci * @brief Manually set CPU group and thread affinity.
73cc1dc7a3Sopenharmony_ci *
74cc1dc7a3Sopenharmony_ci * This is needed on Windows 10 or older to allow benefit from large core count
75cc1dc7a3Sopenharmony_ci * systems with more than 64 logical CPUs. The assignment is skipped on systems
76cc1dc7a3Sopenharmony_ci * with a single processor group, as it is not necessary.
77cc1dc7a3Sopenharmony_ci */
78cc1dc7a3Sopenharmony_cistatic void set_group_affinity(
79cc1dc7a3Sopenharmony_ci	pthread_t thread,
80cc1dc7a3Sopenharmony_ci	int thread_index
81cc1dc7a3Sopenharmony_ci) {
82cc1dc7a3Sopenharmony_ci	// Skip thread assignment for hardware with a single CPU group
83cc1dc7a3Sopenharmony_ci	int group_count = GetActiveProcessorGroupCount();
84cc1dc7a3Sopenharmony_ci	if (group_count == 1)
85cc1dc7a3Sopenharmony_ci	{
86cc1dc7a3Sopenharmony_ci		return;
87cc1dc7a3Sopenharmony_ci	}
88cc1dc7a3Sopenharmony_ci
89cc1dc7a3Sopenharmony_ci	// Ensure we have a valid assign if user creates more threads than cores
90cc1dc7a3Sopenharmony_ci	int assign_index = thread_index % get_cpu_count();
91cc1dc7a3Sopenharmony_ci	int assign_group { 0 };
92cc1dc7a3Sopenharmony_ci	int assign_group_cpu_count { 0 };
93cc1dc7a3Sopenharmony_ci
94cc1dc7a3Sopenharmony_ci	// Determine which core group and core in the group to use for this thread
95cc1dc7a3Sopenharmony_ci	int group_cpu_count_sum { 0 };
96cc1dc7a3Sopenharmony_ci	for (int group = 0; group < group_count; group++)
97cc1dc7a3Sopenharmony_ci	{
98cc1dc7a3Sopenharmony_ci		int group_cpu_count = static_cast<int>(GetMaximumProcessorCount(group));
99cc1dc7a3Sopenharmony_ci		group_cpu_count_sum += group_cpu_count;
100cc1dc7a3Sopenharmony_ci
101cc1dc7a3Sopenharmony_ci		if (assign_index < group_cpu_count_sum)
102cc1dc7a3Sopenharmony_ci		{
103cc1dc7a3Sopenharmony_ci			assign_group = group;
104cc1dc7a3Sopenharmony_ci			assign_group_cpu_count = group_cpu_count;
105cc1dc7a3Sopenharmony_ci			break;
106cc1dc7a3Sopenharmony_ci		}
107cc1dc7a3Sopenharmony_ci	}
108cc1dc7a3Sopenharmony_ci
109cc1dc7a3Sopenharmony_ci	// Set the affinity to the assigned group, and all supported cores
110cc1dc7a3Sopenharmony_ci	GROUP_AFFINITY affinity {};
111cc1dc7a3Sopenharmony_ci	affinity.Mask = (1 << assign_group_cpu_count) - 1;
112cc1dc7a3Sopenharmony_ci	affinity.Group = assign_group;
113cc1dc7a3Sopenharmony_ci	SetThreadGroupAffinity(thread, &affinity, nullptr);
114cc1dc7a3Sopenharmony_ci}
115cc1dc7a3Sopenharmony_ci
116cc1dc7a3Sopenharmony_ci/**
117cc1dc7a3Sopenharmony_ci * @brief Proxy Windows @c WaitForSingleObject underneath a pthreads-like wrapper.
118cc1dc7a3Sopenharmony_ci */
119cc1dc7a3Sopenharmony_cistatic int pthread_join(
120cc1dc7a3Sopenharmony_ci	pthread_t thread,
121cc1dc7a3Sopenharmony_ci	void** value
122cc1dc7a3Sopenharmony_ci) {
123cc1dc7a3Sopenharmony_ci	static_cast<void>(value);
124cc1dc7a3Sopenharmony_ci	WaitForSingleObject(thread, INFINITE);
125cc1dc7a3Sopenharmony_ci	return 0;
126cc1dc7a3Sopenharmony_ci}
127cc1dc7a3Sopenharmony_ci
128cc1dc7a3Sopenharmony_ci/* See header for documentation */
129cc1dc7a3Sopenharmony_ciint get_cpu_count()
130cc1dc7a3Sopenharmony_ci{
131cc1dc7a3Sopenharmony_ci	DWORD cpu_count = GetActiveProcessorCount(ALL_PROCESSOR_GROUPS);
132cc1dc7a3Sopenharmony_ci	return static_cast<int>(cpu_count);
133cc1dc7a3Sopenharmony_ci}
134cc1dc7a3Sopenharmony_ci
135cc1dc7a3Sopenharmony_ci/* See header for documentation */
136cc1dc7a3Sopenharmony_cidouble get_time()
137cc1dc7a3Sopenharmony_ci{
138cc1dc7a3Sopenharmony_ci	FILETIME tv;
139cc1dc7a3Sopenharmony_ci	GetSystemTimePreciseAsFileTime(&tv);
140cc1dc7a3Sopenharmony_ci	unsigned long long ticks = tv.dwHighDateTime;
141cc1dc7a3Sopenharmony_ci	ticks = (ticks << 32) | tv.dwLowDateTime;
142cc1dc7a3Sopenharmony_ci	return static_cast<double>(ticks) / 1.0e7;
143cc1dc7a3Sopenharmony_ci}
144cc1dc7a3Sopenharmony_ci
145cc1dc7a3Sopenharmony_ci/* ============================================================================
146cc1dc7a3Sopenharmony_ci   Platform code for an platform using POSIX APIs.
147cc1dc7a3Sopenharmony_ci============================================================================ */
148cc1dc7a3Sopenharmony_ci#else
149cc1dc7a3Sopenharmony_ci
150cc1dc7a3Sopenharmony_ci#include <pthread.h>
151cc1dc7a3Sopenharmony_ci#include <sys/time.h>
152cc1dc7a3Sopenharmony_ci#include <unistd.h>
153cc1dc7a3Sopenharmony_ci
154cc1dc7a3Sopenharmony_ci/* See header for documentation */
155cc1dc7a3Sopenharmony_ciint get_cpu_count()
156cc1dc7a3Sopenharmony_ci{
157cc1dc7a3Sopenharmony_ci	return static_cast<int>(sysconf(_SC_NPROCESSORS_ONLN));
158cc1dc7a3Sopenharmony_ci}
159cc1dc7a3Sopenharmony_ci
160cc1dc7a3Sopenharmony_ci/* See header for documentation */
161cc1dc7a3Sopenharmony_cidouble get_time()
162cc1dc7a3Sopenharmony_ci{
163cc1dc7a3Sopenharmony_ci	timeval tv;
164cc1dc7a3Sopenharmony_ci	gettimeofday(&tv, 0);
165cc1dc7a3Sopenharmony_ci	return static_cast<double>(tv.tv_sec) + static_cast<double>(tv.tv_usec) * 1.0e-6;
166cc1dc7a3Sopenharmony_ci}
167cc1dc7a3Sopenharmony_ci
168cc1dc7a3Sopenharmony_ci#endif
169cc1dc7a3Sopenharmony_ci
170cc1dc7a3Sopenharmony_ci/**
171cc1dc7a3Sopenharmony_ci * @brief Worker thread helper payload for launch_threads.
172cc1dc7a3Sopenharmony_ci */
173cc1dc7a3Sopenharmony_cistruct launch_desc
174cc1dc7a3Sopenharmony_ci{
175cc1dc7a3Sopenharmony_ci	/** @brief The native thread handle. */
176cc1dc7a3Sopenharmony_ci	pthread_t thread_handle;
177cc1dc7a3Sopenharmony_ci	/** @brief The total number of threads in the thread pool. */
178cc1dc7a3Sopenharmony_ci	int thread_count;
179cc1dc7a3Sopenharmony_ci	/** @brief The thread index in the thread pool. */
180cc1dc7a3Sopenharmony_ci	int thread_id;
181cc1dc7a3Sopenharmony_ci	/** @brief The user thread function to execute. */
182cc1dc7a3Sopenharmony_ci	void (*func)(int, int, void*);
183cc1dc7a3Sopenharmony_ci	/** @brief The user thread payload. */
184cc1dc7a3Sopenharmony_ci	void* payload;
185cc1dc7a3Sopenharmony_ci};
186cc1dc7a3Sopenharmony_ci
187cc1dc7a3Sopenharmony_ci/**
188cc1dc7a3Sopenharmony_ci * @brief Helper function to translate thread entry points.
189cc1dc7a3Sopenharmony_ci *
190cc1dc7a3Sopenharmony_ci * Convert a (void*) thread entry to an (int, void*) thread entry, where the
191cc1dc7a3Sopenharmony_ci * integer contains the thread ID in the thread pool.
192cc1dc7a3Sopenharmony_ci *
193cc1dc7a3Sopenharmony_ci * @param p The thread launch helper payload.
194cc1dc7a3Sopenharmony_ci */
195cc1dc7a3Sopenharmony_cistatic void* launch_threads_helper(
196cc1dc7a3Sopenharmony_ci	void *p
197cc1dc7a3Sopenharmony_ci) {
198cc1dc7a3Sopenharmony_ci	launch_desc* ltd = reinterpret_cast<launch_desc*>(p);
199cc1dc7a3Sopenharmony_ci	ltd->func(ltd->thread_count, ltd->thread_id, ltd->payload);
200cc1dc7a3Sopenharmony_ci	return nullptr;
201cc1dc7a3Sopenharmony_ci}
202cc1dc7a3Sopenharmony_ci
203cc1dc7a3Sopenharmony_ci/* See header for documentation */
204cc1dc7a3Sopenharmony_civoid launch_threads(
205cc1dc7a3Sopenharmony_ci	const char* operation,
206cc1dc7a3Sopenharmony_ci	int thread_count,
207cc1dc7a3Sopenharmony_ci	void (*func)(int, int, void*),
208cc1dc7a3Sopenharmony_ci	void *payload
209cc1dc7a3Sopenharmony_ci) {
210cc1dc7a3Sopenharmony_ci	// Directly execute single threaded workloads on this thread
211cc1dc7a3Sopenharmony_ci	if (thread_count <= 1)
212cc1dc7a3Sopenharmony_ci	{
213cc1dc7a3Sopenharmony_ci		func(1, 0, payload);
214cc1dc7a3Sopenharmony_ci		return;
215cc1dc7a3Sopenharmony_ci	}
216cc1dc7a3Sopenharmony_ci
217cc1dc7a3Sopenharmony_ci	// Otherwise spawn worker threads
218cc1dc7a3Sopenharmony_ci	launch_desc *thread_descs = new launch_desc[thread_count];
219cc1dc7a3Sopenharmony_ci	int actual_thread_count { 0 };
220cc1dc7a3Sopenharmony_ci
221cc1dc7a3Sopenharmony_ci	for (int i = 0; i < thread_count; i++)
222cc1dc7a3Sopenharmony_ci	{
223cc1dc7a3Sopenharmony_ci		thread_descs[actual_thread_count].thread_count = thread_count;
224cc1dc7a3Sopenharmony_ci		thread_descs[actual_thread_count].thread_id = actual_thread_count;
225cc1dc7a3Sopenharmony_ci		thread_descs[actual_thread_count].payload = payload;
226cc1dc7a3Sopenharmony_ci		thread_descs[actual_thread_count].func = func;
227cc1dc7a3Sopenharmony_ci
228cc1dc7a3Sopenharmony_ci		// Handle pthread_create failing by simply using fewer threads
229cc1dc7a3Sopenharmony_ci		int error = pthread_create(
230cc1dc7a3Sopenharmony_ci			&(thread_descs[actual_thread_count].thread_handle),
231cc1dc7a3Sopenharmony_ci			nullptr,
232cc1dc7a3Sopenharmony_ci			launch_threads_helper,
233cc1dc7a3Sopenharmony_ci			reinterpret_cast<void*>(thread_descs + actual_thread_count));
234cc1dc7a3Sopenharmony_ci
235cc1dc7a3Sopenharmony_ci		// Track how many threads we actually created
236cc1dc7a3Sopenharmony_ci		if (!error)
237cc1dc7a3Sopenharmony_ci		{
238cc1dc7a3Sopenharmony_ci			// Windows needs explicit thread assignment to handle large core count systems
239cc1dc7a3Sopenharmony_ci			#if defined(_WIN32) && !defined(__CYGWIN__)
240cc1dc7a3Sopenharmony_ci				set_group_affinity(
241cc1dc7a3Sopenharmony_ci					thread_descs[actual_thread_count].thread_handle,
242cc1dc7a3Sopenharmony_ci					actual_thread_count);
243cc1dc7a3Sopenharmony_ci			#endif
244cc1dc7a3Sopenharmony_ci
245cc1dc7a3Sopenharmony_ci			actual_thread_count++;
246cc1dc7a3Sopenharmony_ci		}
247cc1dc7a3Sopenharmony_ci	}
248cc1dc7a3Sopenharmony_ci
249cc1dc7a3Sopenharmony_ci	// If we did not create thread_count threads then emit a warning
250cc1dc7a3Sopenharmony_ci	if (actual_thread_count != thread_count)
251cc1dc7a3Sopenharmony_ci	{
252cc1dc7a3Sopenharmony_ci		int log_count = actual_thread_count == 0 ? 1 : actual_thread_count;
253cc1dc7a3Sopenharmony_ci		const char* log_s = log_count == 1 ? "" : "s";
254cc1dc7a3Sopenharmony_ci		printf("WARNING: %s using %d thread%s due to thread creation error\n\n",
255cc1dc7a3Sopenharmony_ci		       operation, log_count, log_s);
256cc1dc7a3Sopenharmony_ci	}
257cc1dc7a3Sopenharmony_ci
258cc1dc7a3Sopenharmony_ci	// If we managed to spawn any threads wait for them to complete
259cc1dc7a3Sopenharmony_ci	if (actual_thread_count != 0)
260cc1dc7a3Sopenharmony_ci	{
261cc1dc7a3Sopenharmony_ci		for (int i = 0; i < actual_thread_count; i++)
262cc1dc7a3Sopenharmony_ci		{
263cc1dc7a3Sopenharmony_ci			pthread_join(thread_descs[i].thread_handle, nullptr);
264cc1dc7a3Sopenharmony_ci		}
265cc1dc7a3Sopenharmony_ci	}
266cc1dc7a3Sopenharmony_ci	// Else fall back to using this thread
267cc1dc7a3Sopenharmony_ci	else
268cc1dc7a3Sopenharmony_ci	{
269cc1dc7a3Sopenharmony_ci		func(1, 0, payload);
270cc1dc7a3Sopenharmony_ci	}
271cc1dc7a3Sopenharmony_ci
272cc1dc7a3Sopenharmony_ci	delete[] thread_descs;
273cc1dc7a3Sopenharmony_ci}
274