18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (C) 2013  Davidlohr Bueso <davidlohr@hp.com>
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * futex-hash: Stress the hell out of the Linux kernel futex uaddr hashing.
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * This program is particularly useful for measuring the kernel's futex hash
88c2ecf20Sopenharmony_ci * table/function implementation. In order for it to make sense, use with as
98c2ecf20Sopenharmony_ci * many threads and futexes as possible.
108c2ecf20Sopenharmony_ci */
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci/* For the CLR_() macros */
138c2ecf20Sopenharmony_ci#include <string.h>
148c2ecf20Sopenharmony_ci#include <pthread.h>
158c2ecf20Sopenharmony_ci
168c2ecf20Sopenharmony_ci#include <errno.h>
178c2ecf20Sopenharmony_ci#include <signal.h>
188c2ecf20Sopenharmony_ci#include <stdlib.h>
198c2ecf20Sopenharmony_ci#include <linux/compiler.h>
208c2ecf20Sopenharmony_ci#include <linux/kernel.h>
218c2ecf20Sopenharmony_ci#include <linux/zalloc.h>
228c2ecf20Sopenharmony_ci#include <sys/time.h>
238c2ecf20Sopenharmony_ci#include <internal/cpumap.h>
248c2ecf20Sopenharmony_ci#include <perf/cpumap.h>
258c2ecf20Sopenharmony_ci
268c2ecf20Sopenharmony_ci#include "../util/stat.h"
278c2ecf20Sopenharmony_ci#include <subcmd/parse-options.h>
288c2ecf20Sopenharmony_ci#include "bench.h"
298c2ecf20Sopenharmony_ci#include "futex.h"
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ci#include <err.h>
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_cistatic unsigned int nthreads = 0;
348c2ecf20Sopenharmony_cistatic unsigned int nsecs    = 10;
358c2ecf20Sopenharmony_ci/* amount of futexes per thread */
368c2ecf20Sopenharmony_cistatic unsigned int nfutexes = 1024;
378c2ecf20Sopenharmony_cistatic bool fshared = false, done = false, silent = false;
388c2ecf20Sopenharmony_cistatic int futex_flag = 0;
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_cistruct timeval bench__start, bench__end, bench__runtime;
418c2ecf20Sopenharmony_cistatic pthread_mutex_t thread_lock;
428c2ecf20Sopenharmony_cistatic unsigned int threads_starting;
438c2ecf20Sopenharmony_cistatic struct stats throughput_stats;
448c2ecf20Sopenharmony_cistatic pthread_cond_t thread_parent, thread_worker;
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_cistruct worker {
478c2ecf20Sopenharmony_ci	int tid;
488c2ecf20Sopenharmony_ci	u_int32_t *futex;
498c2ecf20Sopenharmony_ci	pthread_t thread;
508c2ecf20Sopenharmony_ci	unsigned long ops;
518c2ecf20Sopenharmony_ci};
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_cistatic const struct option options[] = {
548c2ecf20Sopenharmony_ci	OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
558c2ecf20Sopenharmony_ci	OPT_UINTEGER('r', "runtime", &nsecs,    "Specify runtime (in seconds)"),
568c2ecf20Sopenharmony_ci	OPT_UINTEGER('f', "futexes", &nfutexes, "Specify amount of futexes per threads"),
578c2ecf20Sopenharmony_ci	OPT_BOOLEAN( 's', "silent",  &silent,   "Silent mode: do not display data/details"),
588c2ecf20Sopenharmony_ci	OPT_BOOLEAN( 'S', "shared",  &fshared,  "Use shared futexes instead of private ones"),
598c2ecf20Sopenharmony_ci	OPT_END()
608c2ecf20Sopenharmony_ci};
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_cistatic const char * const bench_futex_hash_usage[] = {
638c2ecf20Sopenharmony_ci	"perf bench futex hash <options>",
648c2ecf20Sopenharmony_ci	NULL
658c2ecf20Sopenharmony_ci};
668c2ecf20Sopenharmony_ci
678c2ecf20Sopenharmony_cistatic void *workerfn(void *arg)
688c2ecf20Sopenharmony_ci{
698c2ecf20Sopenharmony_ci	int ret;
708c2ecf20Sopenharmony_ci	struct worker *w = (struct worker *) arg;
718c2ecf20Sopenharmony_ci	unsigned int i;
728c2ecf20Sopenharmony_ci	unsigned long ops = w->ops; /* avoid cacheline bouncing */
738c2ecf20Sopenharmony_ci
748c2ecf20Sopenharmony_ci	pthread_mutex_lock(&thread_lock);
758c2ecf20Sopenharmony_ci	threads_starting--;
768c2ecf20Sopenharmony_ci	if (!threads_starting)
778c2ecf20Sopenharmony_ci		pthread_cond_signal(&thread_parent);
788c2ecf20Sopenharmony_ci	pthread_cond_wait(&thread_worker, &thread_lock);
798c2ecf20Sopenharmony_ci	pthread_mutex_unlock(&thread_lock);
808c2ecf20Sopenharmony_ci
818c2ecf20Sopenharmony_ci	do {
828c2ecf20Sopenharmony_ci		for (i = 0; i < nfutexes; i++, ops++) {
838c2ecf20Sopenharmony_ci			/*
848c2ecf20Sopenharmony_ci			 * We want the futex calls to fail in order to stress
858c2ecf20Sopenharmony_ci			 * the hashing of uaddr and not measure other steps,
868c2ecf20Sopenharmony_ci			 * such as internal waitqueue handling, thus enlarging
878c2ecf20Sopenharmony_ci			 * the critical region protected by hb->lock.
888c2ecf20Sopenharmony_ci			 */
898c2ecf20Sopenharmony_ci			ret = futex_wait(&w->futex[i], 1234, NULL, futex_flag);
908c2ecf20Sopenharmony_ci			if (!silent &&
918c2ecf20Sopenharmony_ci			    (!ret || errno != EAGAIN || errno != EWOULDBLOCK))
928c2ecf20Sopenharmony_ci				warn("Non-expected futex return call");
938c2ecf20Sopenharmony_ci		}
948c2ecf20Sopenharmony_ci	}  while (!done);
958c2ecf20Sopenharmony_ci
968c2ecf20Sopenharmony_ci	w->ops = ops;
978c2ecf20Sopenharmony_ci	return NULL;
988c2ecf20Sopenharmony_ci}
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_cistatic void toggle_done(int sig __maybe_unused,
1018c2ecf20Sopenharmony_ci			siginfo_t *info __maybe_unused,
1028c2ecf20Sopenharmony_ci			void *uc __maybe_unused)
1038c2ecf20Sopenharmony_ci{
1048c2ecf20Sopenharmony_ci	/* inform all threads that we're done for the day */
1058c2ecf20Sopenharmony_ci	done = true;
1068c2ecf20Sopenharmony_ci	gettimeofday(&bench__end, NULL);
1078c2ecf20Sopenharmony_ci	timersub(&bench__end, &bench__start, &bench__runtime);
1088c2ecf20Sopenharmony_ci}
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_cistatic void print_summary(void)
1118c2ecf20Sopenharmony_ci{
1128c2ecf20Sopenharmony_ci	unsigned long avg = avg_stats(&throughput_stats);
1138c2ecf20Sopenharmony_ci	double stddev = stddev_stats(&throughput_stats);
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ci	printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
1168c2ecf20Sopenharmony_ci	       !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
1178c2ecf20Sopenharmony_ci	       (int)bench__runtime.tv_sec);
1188c2ecf20Sopenharmony_ci}
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ciint bench_futex_hash(int argc, const char **argv)
1218c2ecf20Sopenharmony_ci{
1228c2ecf20Sopenharmony_ci	int ret = 0;
1238c2ecf20Sopenharmony_ci	cpu_set_t cpuset;
1248c2ecf20Sopenharmony_ci	struct sigaction act;
1258c2ecf20Sopenharmony_ci	unsigned int i;
1268c2ecf20Sopenharmony_ci	pthread_attr_t thread_attr;
1278c2ecf20Sopenharmony_ci	struct worker *worker = NULL;
1288c2ecf20Sopenharmony_ci	struct perf_cpu_map *cpu;
1298c2ecf20Sopenharmony_ci
1308c2ecf20Sopenharmony_ci	argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0);
1318c2ecf20Sopenharmony_ci	if (argc) {
1328c2ecf20Sopenharmony_ci		usage_with_options(bench_futex_hash_usage, options);
1338c2ecf20Sopenharmony_ci		exit(EXIT_FAILURE);
1348c2ecf20Sopenharmony_ci	}
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_ci	cpu = perf_cpu_map__new(NULL);
1378c2ecf20Sopenharmony_ci	if (!cpu)
1388c2ecf20Sopenharmony_ci		goto errmem;
1398c2ecf20Sopenharmony_ci
1408c2ecf20Sopenharmony_ci	memset(&act, 0, sizeof(act));
1418c2ecf20Sopenharmony_ci	sigfillset(&act.sa_mask);
1428c2ecf20Sopenharmony_ci	act.sa_sigaction = toggle_done;
1438c2ecf20Sopenharmony_ci	sigaction(SIGINT, &act, NULL);
1448c2ecf20Sopenharmony_ci
1458c2ecf20Sopenharmony_ci	if (!nthreads) /* default to the number of CPUs */
1468c2ecf20Sopenharmony_ci		nthreads = cpu->nr;
1478c2ecf20Sopenharmony_ci
1488c2ecf20Sopenharmony_ci	worker = calloc(nthreads, sizeof(*worker));
1498c2ecf20Sopenharmony_ci	if (!worker)
1508c2ecf20Sopenharmony_ci		goto errmem;
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_ci	if (!fshared)
1538c2ecf20Sopenharmony_ci		futex_flag = FUTEX_PRIVATE_FLAG;
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_ci	printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n",
1568c2ecf20Sopenharmony_ci	       getpid(), nthreads, nfutexes, fshared ? "shared":"private", nsecs);
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_ci	init_stats(&throughput_stats);
1598c2ecf20Sopenharmony_ci	pthread_mutex_init(&thread_lock, NULL);
1608c2ecf20Sopenharmony_ci	pthread_cond_init(&thread_parent, NULL);
1618c2ecf20Sopenharmony_ci	pthread_cond_init(&thread_worker, NULL);
1628c2ecf20Sopenharmony_ci
1638c2ecf20Sopenharmony_ci	threads_starting = nthreads;
1648c2ecf20Sopenharmony_ci	pthread_attr_init(&thread_attr);
1658c2ecf20Sopenharmony_ci	gettimeofday(&bench__start, NULL);
1668c2ecf20Sopenharmony_ci	for (i = 0; i < nthreads; i++) {
1678c2ecf20Sopenharmony_ci		worker[i].tid = i;
1688c2ecf20Sopenharmony_ci		worker[i].futex = calloc(nfutexes, sizeof(*worker[i].futex));
1698c2ecf20Sopenharmony_ci		if (!worker[i].futex)
1708c2ecf20Sopenharmony_ci			goto errmem;
1718c2ecf20Sopenharmony_ci
1728c2ecf20Sopenharmony_ci		CPU_ZERO(&cpuset);
1738c2ecf20Sopenharmony_ci		CPU_SET(cpu->map[i % cpu->nr], &cpuset);
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci		ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
1768c2ecf20Sopenharmony_ci		if (ret)
1778c2ecf20Sopenharmony_ci			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_ci		ret = pthread_create(&worker[i].thread, &thread_attr, workerfn,
1808c2ecf20Sopenharmony_ci				     (void *)(struct worker *) &worker[i]);
1818c2ecf20Sopenharmony_ci		if (ret)
1828c2ecf20Sopenharmony_ci			err(EXIT_FAILURE, "pthread_create");
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_ci	}
1858c2ecf20Sopenharmony_ci	pthread_attr_destroy(&thread_attr);
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_ci	pthread_mutex_lock(&thread_lock);
1888c2ecf20Sopenharmony_ci	while (threads_starting)
1898c2ecf20Sopenharmony_ci		pthread_cond_wait(&thread_parent, &thread_lock);
1908c2ecf20Sopenharmony_ci	pthread_cond_broadcast(&thread_worker);
1918c2ecf20Sopenharmony_ci	pthread_mutex_unlock(&thread_lock);
1928c2ecf20Sopenharmony_ci
1938c2ecf20Sopenharmony_ci	sleep(nsecs);
1948c2ecf20Sopenharmony_ci	toggle_done(0, NULL, NULL);
1958c2ecf20Sopenharmony_ci
1968c2ecf20Sopenharmony_ci	for (i = 0; i < nthreads; i++) {
1978c2ecf20Sopenharmony_ci		ret = pthread_join(worker[i].thread, NULL);
1988c2ecf20Sopenharmony_ci		if (ret)
1998c2ecf20Sopenharmony_ci			err(EXIT_FAILURE, "pthread_join");
2008c2ecf20Sopenharmony_ci	}
2018c2ecf20Sopenharmony_ci
2028c2ecf20Sopenharmony_ci	/* cleanup & report results */
2038c2ecf20Sopenharmony_ci	pthread_cond_destroy(&thread_parent);
2048c2ecf20Sopenharmony_ci	pthread_cond_destroy(&thread_worker);
2058c2ecf20Sopenharmony_ci	pthread_mutex_destroy(&thread_lock);
2068c2ecf20Sopenharmony_ci
2078c2ecf20Sopenharmony_ci	for (i = 0; i < nthreads; i++) {
2088c2ecf20Sopenharmony_ci		unsigned long t = bench__runtime.tv_sec > 0 ?
2098c2ecf20Sopenharmony_ci			worker[i].ops / bench__runtime.tv_sec : 0;
2108c2ecf20Sopenharmony_ci		update_stats(&throughput_stats, t);
2118c2ecf20Sopenharmony_ci		if (!silent) {
2128c2ecf20Sopenharmony_ci			if (nfutexes == 1)
2138c2ecf20Sopenharmony_ci				printf("[thread %2d] futex: %p [ %ld ops/sec ]\n",
2148c2ecf20Sopenharmony_ci				       worker[i].tid, &worker[i].futex[0], t);
2158c2ecf20Sopenharmony_ci			else
2168c2ecf20Sopenharmony_ci				printf("[thread %2d] futexes: %p ... %p [ %ld ops/sec ]\n",
2178c2ecf20Sopenharmony_ci				       worker[i].tid, &worker[i].futex[0],
2188c2ecf20Sopenharmony_ci				       &worker[i].futex[nfutexes-1], t);
2198c2ecf20Sopenharmony_ci		}
2208c2ecf20Sopenharmony_ci
2218c2ecf20Sopenharmony_ci		zfree(&worker[i].futex);
2228c2ecf20Sopenharmony_ci	}
2238c2ecf20Sopenharmony_ci
2248c2ecf20Sopenharmony_ci	print_summary();
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_ci	free(worker);
2278c2ecf20Sopenharmony_ci	free(cpu);
2288c2ecf20Sopenharmony_ci	return ret;
2298c2ecf20Sopenharmony_cierrmem:
2308c2ecf20Sopenharmony_ci	err(EXIT_FAILURE, "calloc");
2318c2ecf20Sopenharmony_ci}
232