18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (C) 2013  Davidlohr Bueso <davidlohr@hp.com>
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * futex-requeue: Block a bunch of threads on futex1 and requeue them
68c2ecf20Sopenharmony_ci *                on futex2, N at a time.
78c2ecf20Sopenharmony_ci *
88c2ecf20Sopenharmony_ci * This program is particularly useful to measure the latency of nthread
98c2ecf20Sopenharmony_ci * requeues without waking up any tasks -- thus mimicking a regular futex_wait.
108c2ecf20Sopenharmony_ci */
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci/* For the CLR_() macros */
138c2ecf20Sopenharmony_ci#include <string.h>
148c2ecf20Sopenharmony_ci#include <pthread.h>
158c2ecf20Sopenharmony_ci
168c2ecf20Sopenharmony_ci#include <signal.h>
178c2ecf20Sopenharmony_ci#include "../util/stat.h"
188c2ecf20Sopenharmony_ci#include <subcmd/parse-options.h>
198c2ecf20Sopenharmony_ci#include <linux/compiler.h>
208c2ecf20Sopenharmony_ci#include <linux/kernel.h>
218c2ecf20Sopenharmony_ci#include <linux/time64.h>
228c2ecf20Sopenharmony_ci#include <errno.h>
238c2ecf20Sopenharmony_ci#include <internal/cpumap.h>
248c2ecf20Sopenharmony_ci#include <perf/cpumap.h>
258c2ecf20Sopenharmony_ci#include "bench.h"
268c2ecf20Sopenharmony_ci#include "futex.h"
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci#include <err.h>
298c2ecf20Sopenharmony_ci#include <stdlib.h>
308c2ecf20Sopenharmony_ci#include <sys/time.h>
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_cistatic u_int32_t futex1 = 0, futex2 = 0;
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_ci/*
358c2ecf20Sopenharmony_ci * How many tasks to requeue at a time.
368c2ecf20Sopenharmony_ci * Default to 1 in order to make the kernel work more.
378c2ecf20Sopenharmony_ci */
388c2ecf20Sopenharmony_cistatic unsigned int nrequeue = 1;
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_cistatic pthread_t *worker;
418c2ecf20Sopenharmony_cistatic bool done = false, silent = false, fshared = false;
428c2ecf20Sopenharmony_cistatic pthread_mutex_t thread_lock;
438c2ecf20Sopenharmony_cistatic pthread_cond_t thread_parent, thread_worker;
448c2ecf20Sopenharmony_cistatic struct stats requeuetime_stats, requeued_stats;
458c2ecf20Sopenharmony_cistatic unsigned int threads_starting, nthreads = 0;
468c2ecf20Sopenharmony_cistatic int futex_flag = 0;
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_cistatic const struct option options[] = {
498c2ecf20Sopenharmony_ci	OPT_UINTEGER('t', "threads",  &nthreads, "Specify amount of threads"),
508c2ecf20Sopenharmony_ci	OPT_UINTEGER('q', "nrequeue", &nrequeue, "Specify amount of threads to requeue at once"),
518c2ecf20Sopenharmony_ci	OPT_BOOLEAN( 's', "silent",   &silent,   "Silent mode: do not display data/details"),
528c2ecf20Sopenharmony_ci	OPT_BOOLEAN( 'S', "shared",   &fshared,  "Use shared futexes instead of private ones"),
538c2ecf20Sopenharmony_ci	OPT_END()
548c2ecf20Sopenharmony_ci};
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_cistatic const char * const bench_futex_requeue_usage[] = {
578c2ecf20Sopenharmony_ci	"perf bench futex requeue <options>",
588c2ecf20Sopenharmony_ci	NULL
598c2ecf20Sopenharmony_ci};
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_cistatic void print_summary(void)
628c2ecf20Sopenharmony_ci{
638c2ecf20Sopenharmony_ci	double requeuetime_avg = avg_stats(&requeuetime_stats);
648c2ecf20Sopenharmony_ci	double requeuetime_stddev = stddev_stats(&requeuetime_stats);
658c2ecf20Sopenharmony_ci	unsigned int requeued_avg = avg_stats(&requeued_stats);
668c2ecf20Sopenharmony_ci
678c2ecf20Sopenharmony_ci	printf("Requeued %d of %d threads in %.4f ms (+-%.2f%%)\n",
688c2ecf20Sopenharmony_ci	       requeued_avg,
698c2ecf20Sopenharmony_ci	       nthreads,
708c2ecf20Sopenharmony_ci	       requeuetime_avg / USEC_PER_MSEC,
718c2ecf20Sopenharmony_ci	       rel_stddev_stats(requeuetime_stddev, requeuetime_avg));
728c2ecf20Sopenharmony_ci}
738c2ecf20Sopenharmony_ci
748c2ecf20Sopenharmony_cistatic void *workerfn(void *arg __maybe_unused)
758c2ecf20Sopenharmony_ci{
768c2ecf20Sopenharmony_ci	pthread_mutex_lock(&thread_lock);
778c2ecf20Sopenharmony_ci	threads_starting--;
788c2ecf20Sopenharmony_ci	if (!threads_starting)
798c2ecf20Sopenharmony_ci		pthread_cond_signal(&thread_parent);
808c2ecf20Sopenharmony_ci	pthread_cond_wait(&thread_worker, &thread_lock);
818c2ecf20Sopenharmony_ci	pthread_mutex_unlock(&thread_lock);
828c2ecf20Sopenharmony_ci
838c2ecf20Sopenharmony_ci	futex_wait(&futex1, 0, NULL, futex_flag);
848c2ecf20Sopenharmony_ci	return NULL;
858c2ecf20Sopenharmony_ci}
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_cistatic void block_threads(pthread_t *w,
888c2ecf20Sopenharmony_ci			  pthread_attr_t thread_attr, struct perf_cpu_map *cpu)
898c2ecf20Sopenharmony_ci{
908c2ecf20Sopenharmony_ci	cpu_set_t cpuset;
918c2ecf20Sopenharmony_ci	unsigned int i;
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_ci	threads_starting = nthreads;
948c2ecf20Sopenharmony_ci
958c2ecf20Sopenharmony_ci	/* create and block all threads */
968c2ecf20Sopenharmony_ci	for (i = 0; i < nthreads; i++) {
978c2ecf20Sopenharmony_ci		CPU_ZERO(&cpuset);
988c2ecf20Sopenharmony_ci		CPU_SET(cpu->map[i % cpu->nr], &cpuset);
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_ci		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
1018c2ecf20Sopenharmony_ci			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
1028c2ecf20Sopenharmony_ci
1038c2ecf20Sopenharmony_ci		if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
1048c2ecf20Sopenharmony_ci			err(EXIT_FAILURE, "pthread_create");
1058c2ecf20Sopenharmony_ci	}
1068c2ecf20Sopenharmony_ci}
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_cistatic void toggle_done(int sig __maybe_unused,
1098c2ecf20Sopenharmony_ci			siginfo_t *info __maybe_unused,
1108c2ecf20Sopenharmony_ci			void *uc __maybe_unused)
1118c2ecf20Sopenharmony_ci{
1128c2ecf20Sopenharmony_ci	done = true;
1138c2ecf20Sopenharmony_ci}
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ciint bench_futex_requeue(int argc, const char **argv)
1168c2ecf20Sopenharmony_ci{
1178c2ecf20Sopenharmony_ci	int ret = 0;
1188c2ecf20Sopenharmony_ci	unsigned int i, j;
1198c2ecf20Sopenharmony_ci	struct sigaction act;
1208c2ecf20Sopenharmony_ci	pthread_attr_t thread_attr;
1218c2ecf20Sopenharmony_ci	struct perf_cpu_map *cpu;
1228c2ecf20Sopenharmony_ci
1238c2ecf20Sopenharmony_ci	argc = parse_options(argc, argv, options, bench_futex_requeue_usage, 0);
1248c2ecf20Sopenharmony_ci	if (argc)
1258c2ecf20Sopenharmony_ci		goto err;
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_ci	cpu = perf_cpu_map__new(NULL);
1288c2ecf20Sopenharmony_ci	if (!cpu)
1298c2ecf20Sopenharmony_ci		err(EXIT_FAILURE, "cpu_map__new");
1308c2ecf20Sopenharmony_ci
1318c2ecf20Sopenharmony_ci	memset(&act, 0, sizeof(act));
1328c2ecf20Sopenharmony_ci	sigfillset(&act.sa_mask);
1338c2ecf20Sopenharmony_ci	act.sa_sigaction = toggle_done;
1348c2ecf20Sopenharmony_ci	sigaction(SIGINT, &act, NULL);
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_ci	if (!nthreads)
1378c2ecf20Sopenharmony_ci		nthreads = cpu->nr;
1388c2ecf20Sopenharmony_ci
1398c2ecf20Sopenharmony_ci	worker = calloc(nthreads, sizeof(*worker));
1408c2ecf20Sopenharmony_ci	if (!worker)
1418c2ecf20Sopenharmony_ci		err(EXIT_FAILURE, "calloc");
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_ci	if (!fshared)
1448c2ecf20Sopenharmony_ci		futex_flag = FUTEX_PRIVATE_FLAG;
1458c2ecf20Sopenharmony_ci
1468c2ecf20Sopenharmony_ci	if (nrequeue > nthreads)
1478c2ecf20Sopenharmony_ci		nrequeue = nthreads;
1488c2ecf20Sopenharmony_ci
1498c2ecf20Sopenharmony_ci	printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %p), "
1508c2ecf20Sopenharmony_ci	       "%d at a time.\n\n",  getpid(), nthreads,
1518c2ecf20Sopenharmony_ci	       fshared ? "shared":"private", &futex1, &futex2, nrequeue);
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ci	init_stats(&requeued_stats);
1548c2ecf20Sopenharmony_ci	init_stats(&requeuetime_stats);
1558c2ecf20Sopenharmony_ci	pthread_attr_init(&thread_attr);
1568c2ecf20Sopenharmony_ci	pthread_mutex_init(&thread_lock, NULL);
1578c2ecf20Sopenharmony_ci	pthread_cond_init(&thread_parent, NULL);
1588c2ecf20Sopenharmony_ci	pthread_cond_init(&thread_worker, NULL);
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_ci	for (j = 0; j < bench_repeat && !done; j++) {
1618c2ecf20Sopenharmony_ci		unsigned int nrequeued = 0;
1628c2ecf20Sopenharmony_ci		struct timeval start, end, runtime;
1638c2ecf20Sopenharmony_ci
1648c2ecf20Sopenharmony_ci		/* create, launch & block all threads */
1658c2ecf20Sopenharmony_ci		block_threads(worker, thread_attr, cpu);
1668c2ecf20Sopenharmony_ci
1678c2ecf20Sopenharmony_ci		/* make sure all threads are already blocked */
1688c2ecf20Sopenharmony_ci		pthread_mutex_lock(&thread_lock);
1698c2ecf20Sopenharmony_ci		while (threads_starting)
1708c2ecf20Sopenharmony_ci			pthread_cond_wait(&thread_parent, &thread_lock);
1718c2ecf20Sopenharmony_ci		pthread_cond_broadcast(&thread_worker);
1728c2ecf20Sopenharmony_ci		pthread_mutex_unlock(&thread_lock);
1738c2ecf20Sopenharmony_ci
1748c2ecf20Sopenharmony_ci		usleep(100000);
1758c2ecf20Sopenharmony_ci
1768c2ecf20Sopenharmony_ci		/* Ok, all threads are patiently blocked, start requeueing */
1778c2ecf20Sopenharmony_ci		gettimeofday(&start, NULL);
1788c2ecf20Sopenharmony_ci		while (nrequeued < nthreads) {
1798c2ecf20Sopenharmony_ci			/*
1808c2ecf20Sopenharmony_ci			 * Do not wakeup any tasks blocked on futex1, allowing
1818c2ecf20Sopenharmony_ci			 * us to really measure futex_wait functionality.
1828c2ecf20Sopenharmony_ci			 */
1838c2ecf20Sopenharmony_ci			nrequeued += futex_cmp_requeue(&futex1, 0, &futex2, 0,
1848c2ecf20Sopenharmony_ci						       nrequeue, futex_flag);
1858c2ecf20Sopenharmony_ci		}
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_ci		gettimeofday(&end, NULL);
1888c2ecf20Sopenharmony_ci		timersub(&end, &start, &runtime);
1898c2ecf20Sopenharmony_ci
1908c2ecf20Sopenharmony_ci		update_stats(&requeued_stats, nrequeued);
1918c2ecf20Sopenharmony_ci		update_stats(&requeuetime_stats, runtime.tv_usec);
1928c2ecf20Sopenharmony_ci
1938c2ecf20Sopenharmony_ci		if (!silent) {
1948c2ecf20Sopenharmony_ci			printf("[Run %d]: Requeued %d of %d threads in %.4f ms\n",
1958c2ecf20Sopenharmony_ci			       j + 1, nrequeued, nthreads, runtime.tv_usec / (double)USEC_PER_MSEC);
1968c2ecf20Sopenharmony_ci		}
1978c2ecf20Sopenharmony_ci
1988c2ecf20Sopenharmony_ci		/* everybody should be blocked on futex2, wake'em up */
1998c2ecf20Sopenharmony_ci		nrequeued = futex_wake(&futex2, nrequeued, futex_flag);
2008c2ecf20Sopenharmony_ci		if (nthreads != nrequeued)
2018c2ecf20Sopenharmony_ci			warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads);
2028c2ecf20Sopenharmony_ci
2038c2ecf20Sopenharmony_ci		for (i = 0; i < nthreads; i++) {
2048c2ecf20Sopenharmony_ci			ret = pthread_join(worker[i], NULL);
2058c2ecf20Sopenharmony_ci			if (ret)
2068c2ecf20Sopenharmony_ci				err(EXIT_FAILURE, "pthread_join");
2078c2ecf20Sopenharmony_ci		}
2088c2ecf20Sopenharmony_ci	}
2098c2ecf20Sopenharmony_ci
2108c2ecf20Sopenharmony_ci	/* cleanup & report results */
2118c2ecf20Sopenharmony_ci	pthread_cond_destroy(&thread_parent);
2128c2ecf20Sopenharmony_ci	pthread_cond_destroy(&thread_worker);
2138c2ecf20Sopenharmony_ci	pthread_mutex_destroy(&thread_lock);
2148c2ecf20Sopenharmony_ci	pthread_attr_destroy(&thread_attr);
2158c2ecf20Sopenharmony_ci
2168c2ecf20Sopenharmony_ci	print_summary();
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_ci	free(worker);
2198c2ecf20Sopenharmony_ci	perf_cpu_map__put(cpu);
2208c2ecf20Sopenharmony_ci	return ret;
2218c2ecf20Sopenharmony_cierr:
2228c2ecf20Sopenharmony_ci	usage_with_options(bench_futex_requeue_usage, options);
2238c2ecf20Sopenharmony_ci	exit(EXIT_FAILURE);
2248c2ecf20Sopenharmony_ci}
225