18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (C) 2013  Davidlohr Bueso <davidlohr@hp.com>
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * futex-wake: Block a bunch of threads on a futex and wake'em up, N at a time.
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * This program is particularly useful to measure the latency of nthread wakeups
88c2ecf20Sopenharmony_ci * in non-error situations:  all waiters are queued and all wake calls wakeup
98c2ecf20Sopenharmony_ci * one or more tasks, and thus the waitqueue is never empty.
108c2ecf20Sopenharmony_ci */
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci/* For the CLR_() macros */
138c2ecf20Sopenharmony_ci#include <string.h>
148c2ecf20Sopenharmony_ci#include <pthread.h>
158c2ecf20Sopenharmony_ci
168c2ecf20Sopenharmony_ci#include <signal.h>
178c2ecf20Sopenharmony_ci#include "../util/stat.h"
188c2ecf20Sopenharmony_ci#include <subcmd/parse-options.h>
198c2ecf20Sopenharmony_ci#include <linux/compiler.h>
208c2ecf20Sopenharmony_ci#include <linux/kernel.h>
218c2ecf20Sopenharmony_ci#include <linux/time64.h>
228c2ecf20Sopenharmony_ci#include <errno.h>
238c2ecf20Sopenharmony_ci#include <internal/cpumap.h>
248c2ecf20Sopenharmony_ci#include <perf/cpumap.h>
258c2ecf20Sopenharmony_ci#include "bench.h"
268c2ecf20Sopenharmony_ci#include "futex.h"
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci#include <err.h>
298c2ecf20Sopenharmony_ci#include <stdlib.h>
308c2ecf20Sopenharmony_ci#include <sys/time.h>
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ci/* all threads will block on the same futex */
338c2ecf20Sopenharmony_cistatic u_int32_t futex1 = 0;
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_ci/*
368c2ecf20Sopenharmony_ci * How many wakeups to do at a time.
378c2ecf20Sopenharmony_ci * Default to 1 in order to make the kernel work more.
388c2ecf20Sopenharmony_ci */
398c2ecf20Sopenharmony_cistatic unsigned int nwakes = 1;
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_cipthread_t *worker;
428c2ecf20Sopenharmony_cistatic bool done = false, silent = false, fshared = false;
438c2ecf20Sopenharmony_cistatic pthread_mutex_t thread_lock;
448c2ecf20Sopenharmony_cistatic pthread_cond_t thread_parent, thread_worker;
458c2ecf20Sopenharmony_cistatic struct stats waketime_stats, wakeup_stats;
468c2ecf20Sopenharmony_cistatic unsigned int threads_starting, nthreads = 0;
478c2ecf20Sopenharmony_cistatic int futex_flag = 0;
488c2ecf20Sopenharmony_ci
498c2ecf20Sopenharmony_cistatic const struct option options[] = {
508c2ecf20Sopenharmony_ci	OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
518c2ecf20Sopenharmony_ci	OPT_UINTEGER('w', "nwakes",  &nwakes,   "Specify amount of threads to wake at once"),
528c2ecf20Sopenharmony_ci	OPT_BOOLEAN( 's', "silent",  &silent,   "Silent mode: do not display data/details"),
538c2ecf20Sopenharmony_ci	OPT_BOOLEAN( 'S', "shared",  &fshared,  "Use shared futexes instead of private ones"),
548c2ecf20Sopenharmony_ci	OPT_END()
558c2ecf20Sopenharmony_ci};
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_cistatic const char * const bench_futex_wake_usage[] = {
588c2ecf20Sopenharmony_ci	"perf bench futex wake <options>",
598c2ecf20Sopenharmony_ci	NULL
608c2ecf20Sopenharmony_ci};
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_cistatic void *workerfn(void *arg __maybe_unused)
638c2ecf20Sopenharmony_ci{
648c2ecf20Sopenharmony_ci	pthread_mutex_lock(&thread_lock);
658c2ecf20Sopenharmony_ci	threads_starting--;
668c2ecf20Sopenharmony_ci	if (!threads_starting)
678c2ecf20Sopenharmony_ci		pthread_cond_signal(&thread_parent);
688c2ecf20Sopenharmony_ci	pthread_cond_wait(&thread_worker, &thread_lock);
698c2ecf20Sopenharmony_ci	pthread_mutex_unlock(&thread_lock);
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_ci	while (1) {
728c2ecf20Sopenharmony_ci		if (futex_wait(&futex1, 0, NULL, futex_flag) != EINTR)
738c2ecf20Sopenharmony_ci			break;
748c2ecf20Sopenharmony_ci	}
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_ci	pthread_exit(NULL);
778c2ecf20Sopenharmony_ci	return NULL;
788c2ecf20Sopenharmony_ci}
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_cistatic void print_summary(void)
818c2ecf20Sopenharmony_ci{
828c2ecf20Sopenharmony_ci	double waketime_avg = avg_stats(&waketime_stats);
838c2ecf20Sopenharmony_ci	double waketime_stddev = stddev_stats(&waketime_stats);
848c2ecf20Sopenharmony_ci	unsigned int wakeup_avg = avg_stats(&wakeup_stats);
858c2ecf20Sopenharmony_ci
868c2ecf20Sopenharmony_ci	printf("Wokeup %d of %d threads in %.4f ms (+-%.2f%%)\n",
878c2ecf20Sopenharmony_ci	       wakeup_avg,
888c2ecf20Sopenharmony_ci	       nthreads,
898c2ecf20Sopenharmony_ci	       waketime_avg / USEC_PER_MSEC,
908c2ecf20Sopenharmony_ci	       rel_stddev_stats(waketime_stddev, waketime_avg));
918c2ecf20Sopenharmony_ci}
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_cistatic void block_threads(pthread_t *w,
948c2ecf20Sopenharmony_ci			  pthread_attr_t thread_attr, struct perf_cpu_map *cpu)
958c2ecf20Sopenharmony_ci{
968c2ecf20Sopenharmony_ci	cpu_set_t cpuset;
978c2ecf20Sopenharmony_ci	unsigned int i;
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_ci	threads_starting = nthreads;
1008c2ecf20Sopenharmony_ci
1018c2ecf20Sopenharmony_ci	/* create and block all threads */
1028c2ecf20Sopenharmony_ci	for (i = 0; i < nthreads; i++) {
1038c2ecf20Sopenharmony_ci		CPU_ZERO(&cpuset);
1048c2ecf20Sopenharmony_ci		CPU_SET(cpu->map[i % cpu->nr], &cpuset);
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_ci		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
1078c2ecf20Sopenharmony_ci			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
1088c2ecf20Sopenharmony_ci
1098c2ecf20Sopenharmony_ci		if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
1108c2ecf20Sopenharmony_ci			err(EXIT_FAILURE, "pthread_create");
1118c2ecf20Sopenharmony_ci	}
1128c2ecf20Sopenharmony_ci}
1138c2ecf20Sopenharmony_ci
1148c2ecf20Sopenharmony_cistatic void toggle_done(int sig __maybe_unused,
1158c2ecf20Sopenharmony_ci			siginfo_t *info __maybe_unused,
1168c2ecf20Sopenharmony_ci			void *uc __maybe_unused)
1178c2ecf20Sopenharmony_ci{
1188c2ecf20Sopenharmony_ci	done = true;
1198c2ecf20Sopenharmony_ci}
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_ciint bench_futex_wake(int argc, const char **argv)
1228c2ecf20Sopenharmony_ci{
1238c2ecf20Sopenharmony_ci	int ret = 0;
1248c2ecf20Sopenharmony_ci	unsigned int i, j;
1258c2ecf20Sopenharmony_ci	struct sigaction act;
1268c2ecf20Sopenharmony_ci	pthread_attr_t thread_attr;
1278c2ecf20Sopenharmony_ci	struct perf_cpu_map *cpu;
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_ci	argc = parse_options(argc, argv, options, bench_futex_wake_usage, 0);
1308c2ecf20Sopenharmony_ci	if (argc) {
1318c2ecf20Sopenharmony_ci		usage_with_options(bench_futex_wake_usage, options);
1328c2ecf20Sopenharmony_ci		exit(EXIT_FAILURE);
1338c2ecf20Sopenharmony_ci	}
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_ci	cpu = perf_cpu_map__new(NULL);
1368c2ecf20Sopenharmony_ci	if (!cpu)
1378c2ecf20Sopenharmony_ci		err(EXIT_FAILURE, "calloc");
1388c2ecf20Sopenharmony_ci
1398c2ecf20Sopenharmony_ci	memset(&act, 0, sizeof(act));
1408c2ecf20Sopenharmony_ci	sigfillset(&act.sa_mask);
1418c2ecf20Sopenharmony_ci	act.sa_sigaction = toggle_done;
1428c2ecf20Sopenharmony_ci	sigaction(SIGINT, &act, NULL);
1438c2ecf20Sopenharmony_ci
1448c2ecf20Sopenharmony_ci	if (!nthreads)
1458c2ecf20Sopenharmony_ci		nthreads = cpu->nr;
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_ci	worker = calloc(nthreads, sizeof(*worker));
1488c2ecf20Sopenharmony_ci	if (!worker)
1498c2ecf20Sopenharmony_ci		err(EXIT_FAILURE, "calloc");
1508c2ecf20Sopenharmony_ci
1518c2ecf20Sopenharmony_ci	if (!fshared)
1528c2ecf20Sopenharmony_ci		futex_flag = FUTEX_PRIVATE_FLAG;
1538c2ecf20Sopenharmony_ci
1548c2ecf20Sopenharmony_ci	printf("Run summary [PID %d]: blocking on %d threads (at [%s] futex %p), "
1558c2ecf20Sopenharmony_ci	       "waking up %d at a time.\n\n",
1568c2ecf20Sopenharmony_ci	       getpid(), nthreads, fshared ? "shared":"private",  &futex1, nwakes);
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_ci	init_stats(&wakeup_stats);
1598c2ecf20Sopenharmony_ci	init_stats(&waketime_stats);
1608c2ecf20Sopenharmony_ci	pthread_attr_init(&thread_attr);
1618c2ecf20Sopenharmony_ci	pthread_mutex_init(&thread_lock, NULL);
1628c2ecf20Sopenharmony_ci	pthread_cond_init(&thread_parent, NULL);
1638c2ecf20Sopenharmony_ci	pthread_cond_init(&thread_worker, NULL);
1648c2ecf20Sopenharmony_ci
1658c2ecf20Sopenharmony_ci	for (j = 0; j < bench_repeat && !done; j++) {
1668c2ecf20Sopenharmony_ci		unsigned int nwoken = 0;
1678c2ecf20Sopenharmony_ci		struct timeval start, end, runtime;
1688c2ecf20Sopenharmony_ci
1698c2ecf20Sopenharmony_ci		/* create, launch & block all threads */
1708c2ecf20Sopenharmony_ci		block_threads(worker, thread_attr, cpu);
1718c2ecf20Sopenharmony_ci
1728c2ecf20Sopenharmony_ci		/* make sure all threads are already blocked */
1738c2ecf20Sopenharmony_ci		pthread_mutex_lock(&thread_lock);
1748c2ecf20Sopenharmony_ci		while (threads_starting)
1758c2ecf20Sopenharmony_ci			pthread_cond_wait(&thread_parent, &thread_lock);
1768c2ecf20Sopenharmony_ci		pthread_cond_broadcast(&thread_worker);
1778c2ecf20Sopenharmony_ci		pthread_mutex_unlock(&thread_lock);
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_ci		usleep(100000);
1808c2ecf20Sopenharmony_ci
1818c2ecf20Sopenharmony_ci		/* Ok, all threads are patiently blocked, start waking folks up */
1828c2ecf20Sopenharmony_ci		gettimeofday(&start, NULL);
1838c2ecf20Sopenharmony_ci		while (nwoken != nthreads)
1848c2ecf20Sopenharmony_ci			nwoken += futex_wake(&futex1, nwakes, futex_flag);
1858c2ecf20Sopenharmony_ci		gettimeofday(&end, NULL);
1868c2ecf20Sopenharmony_ci		timersub(&end, &start, &runtime);
1878c2ecf20Sopenharmony_ci
1888c2ecf20Sopenharmony_ci		update_stats(&wakeup_stats, nwoken);
1898c2ecf20Sopenharmony_ci		update_stats(&waketime_stats, runtime.tv_usec);
1908c2ecf20Sopenharmony_ci
1918c2ecf20Sopenharmony_ci		if (!silent) {
1928c2ecf20Sopenharmony_ci			printf("[Run %d]: Wokeup %d of %d threads in %.4f ms\n",
1938c2ecf20Sopenharmony_ci			       j + 1, nwoken, nthreads, runtime.tv_usec / (double)USEC_PER_MSEC);
1948c2ecf20Sopenharmony_ci		}
1958c2ecf20Sopenharmony_ci
1968c2ecf20Sopenharmony_ci		for (i = 0; i < nthreads; i++) {
1978c2ecf20Sopenharmony_ci			ret = pthread_join(worker[i], NULL);
1988c2ecf20Sopenharmony_ci			if (ret)
1998c2ecf20Sopenharmony_ci				err(EXIT_FAILURE, "pthread_join");
2008c2ecf20Sopenharmony_ci		}
2018c2ecf20Sopenharmony_ci
2028c2ecf20Sopenharmony_ci	}
2038c2ecf20Sopenharmony_ci
2048c2ecf20Sopenharmony_ci	/* cleanup & report results */
2058c2ecf20Sopenharmony_ci	pthread_cond_destroy(&thread_parent);
2068c2ecf20Sopenharmony_ci	pthread_cond_destroy(&thread_worker);
2078c2ecf20Sopenharmony_ci	pthread_mutex_destroy(&thread_lock);
2088c2ecf20Sopenharmony_ci	pthread_attr_destroy(&thread_attr);
2098c2ecf20Sopenharmony_ci
2108c2ecf20Sopenharmony_ci	print_summary();
2118c2ecf20Sopenharmony_ci
2128c2ecf20Sopenharmony_ci	free(worker);
2138c2ecf20Sopenharmony_ci	perf_cpu_map__put(cpu);
2148c2ecf20Sopenharmony_ci	return ret;
2158c2ecf20Sopenharmony_ci}
216