162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2015 Davidlohr Bueso. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Block a bunch of threads and let parallel waker threads wakeup an 662306a36Sopenharmony_ci * equal amount of them. The program output reflects the avg latency 762306a36Sopenharmony_ci * for each individual thread to service its share of work. Ultimately 862306a36Sopenharmony_ci * it can be used to measure futex_wake() changes. 962306a36Sopenharmony_ci */ 1062306a36Sopenharmony_ci#include "bench.h" 1162306a36Sopenharmony_ci#include <linux/compiler.h> 1262306a36Sopenharmony_ci#include "../util/debug.h" 1362306a36Sopenharmony_ci#include "../util/mutex.h" 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci#ifndef HAVE_PTHREAD_BARRIER 1662306a36Sopenharmony_ciint bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe_unused) 1762306a36Sopenharmony_ci{ 1862306a36Sopenharmony_ci pr_err("%s: pthread_barrier_t unavailable, disabling this test...\n", __func__); 1962306a36Sopenharmony_ci return 0; 2062306a36Sopenharmony_ci} 2162306a36Sopenharmony_ci#else /* HAVE_PTHREAD_BARRIER */ 2262306a36Sopenharmony_ci/* For the CLR_() macros */ 2362306a36Sopenharmony_ci#include <string.h> 2462306a36Sopenharmony_ci#include <pthread.h> 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci#include <signal.h> 2762306a36Sopenharmony_ci#include "../util/stat.h" 2862306a36Sopenharmony_ci#include <subcmd/parse-options.h> 2962306a36Sopenharmony_ci#include <linux/kernel.h> 3062306a36Sopenharmony_ci#include <linux/time64.h> 3162306a36Sopenharmony_ci#include <errno.h> 3262306a36Sopenharmony_ci#include "futex.h" 3362306a36Sopenharmony_ci#include <perf/cpumap.h> 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci#include <err.h> 3662306a36Sopenharmony_ci#include <stdlib.h> 3762306a36Sopenharmony_ci#include <sys/time.h> 3862306a36Sopenharmony_ci#include <sys/mman.h> 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_cistruct thread_data { 4162306a36Sopenharmony_ci pthread_t worker; 4262306a36Sopenharmony_ci unsigned int nwoken; 4362306a36Sopenharmony_ci struct timeval runtime; 4462306a36Sopenharmony_ci}; 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_cistatic unsigned int nwakes = 1; 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci/* all threads will block on the same futex -- hash bucket chaos ;) */ 4962306a36Sopenharmony_cistatic u_int32_t futex = 0; 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_cistatic pthread_t *blocked_worker; 5262306a36Sopenharmony_cistatic bool done = false; 5362306a36Sopenharmony_cistatic struct mutex thread_lock; 5462306a36Sopenharmony_cistatic struct cond thread_parent, thread_worker; 5562306a36Sopenharmony_cistatic pthread_barrier_t barrier; 5662306a36Sopenharmony_cistatic struct stats waketime_stats, wakeup_stats; 5762306a36Sopenharmony_cistatic unsigned int threads_starting; 5862306a36Sopenharmony_cistatic int futex_flag = 0; 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_cistatic struct bench_futex_parameters params; 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_cistatic const struct option options[] = { 6362306a36Sopenharmony_ci OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), 6462306a36Sopenharmony_ci OPT_UINTEGER('w', "nwakers", ¶ms.nwakes, "Specify amount of waking threads"), 6562306a36Sopenharmony_ci OPT_BOOLEAN( 's', "silent", ¶ms.silent, "Silent mode: do not display data/details"), 6662306a36Sopenharmony_ci OPT_BOOLEAN( 'S', "shared", ¶ms.fshared, "Use shared futexes instead of private ones"), 6762306a36Sopenharmony_ci OPT_BOOLEAN( 'm', "mlockall", ¶ms.mlockall, "Lock all current and future memory"), 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci OPT_END() 7062306a36Sopenharmony_ci}; 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_cistatic const char * const bench_futex_wake_parallel_usage[] = { 7362306a36Sopenharmony_ci "perf bench futex wake-parallel <options>", 7462306a36Sopenharmony_ci NULL 7562306a36Sopenharmony_ci}; 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_cistatic void *waking_workerfn(void *arg) 7862306a36Sopenharmony_ci{ 7962306a36Sopenharmony_ci struct thread_data *waker = (struct thread_data *) arg; 8062306a36Sopenharmony_ci struct timeval start, end; 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_ci pthread_barrier_wait(&barrier); 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ci gettimeofday(&start, NULL); 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci waker->nwoken = futex_wake(&futex, nwakes, futex_flag); 8762306a36Sopenharmony_ci if (waker->nwoken != nwakes) 8862306a36Sopenharmony_ci warnx("couldn't wakeup all tasks (%d/%d)", 8962306a36Sopenharmony_ci waker->nwoken, nwakes); 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci gettimeofday(&end, NULL); 9262306a36Sopenharmony_ci timersub(&end, &start, &waker->runtime); 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci pthread_exit(NULL); 9562306a36Sopenharmony_ci return NULL; 9662306a36Sopenharmony_ci} 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_cistatic void wakeup_threads(struct thread_data *td) 9962306a36Sopenharmony_ci{ 10062306a36Sopenharmony_ci unsigned int i; 10162306a36Sopenharmony_ci pthread_attr_t thread_attr; 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci pthread_attr_init(&thread_attr); 10462306a36Sopenharmony_ci pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE); 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_ci pthread_barrier_init(&barrier, NULL, params.nwakes + 1); 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ci /* create and block all threads */ 10962306a36Sopenharmony_ci for (i = 0; i < params.nwakes; i++) { 11062306a36Sopenharmony_ci /* 11162306a36Sopenharmony_ci * Thread creation order will impact per-thread latency 11262306a36Sopenharmony_ci * as it will affect the order to acquire the hb spinlock. 11362306a36Sopenharmony_ci * For now let the scheduler decide. 11462306a36Sopenharmony_ci */ 11562306a36Sopenharmony_ci if (pthread_create(&td[i].worker, &thread_attr, 11662306a36Sopenharmony_ci waking_workerfn, (void *)&td[i])) 11762306a36Sopenharmony_ci err(EXIT_FAILURE, "pthread_create"); 11862306a36Sopenharmony_ci } 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ci pthread_barrier_wait(&barrier); 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_ci for (i = 0; i < params.nwakes; i++) 12362306a36Sopenharmony_ci if (pthread_join(td[i].worker, NULL)) 12462306a36Sopenharmony_ci err(EXIT_FAILURE, "pthread_join"); 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_ci pthread_barrier_destroy(&barrier); 12762306a36Sopenharmony_ci pthread_attr_destroy(&thread_attr); 12862306a36Sopenharmony_ci} 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_cistatic void *blocked_workerfn(void *arg __maybe_unused) 13162306a36Sopenharmony_ci{ 13262306a36Sopenharmony_ci mutex_lock(&thread_lock); 13362306a36Sopenharmony_ci threads_starting--; 13462306a36Sopenharmony_ci if (!threads_starting) 13562306a36Sopenharmony_ci cond_signal(&thread_parent); 13662306a36Sopenharmony_ci cond_wait(&thread_worker, &thread_lock); 13762306a36Sopenharmony_ci mutex_unlock(&thread_lock); 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci while (1) { /* handle spurious wakeups */ 14062306a36Sopenharmony_ci if (futex_wait(&futex, 0, NULL, futex_flag) != EINTR) 14162306a36Sopenharmony_ci break; 14262306a36Sopenharmony_ci } 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci pthread_exit(NULL); 14562306a36Sopenharmony_ci return NULL; 14662306a36Sopenharmony_ci} 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_cistatic void block_threads(pthread_t *w, struct perf_cpu_map *cpu) 14962306a36Sopenharmony_ci{ 15062306a36Sopenharmony_ci cpu_set_t *cpuset; 15162306a36Sopenharmony_ci unsigned int i; 15262306a36Sopenharmony_ci int nrcpus = perf_cpu_map__nr(cpu); 15362306a36Sopenharmony_ci size_t size; 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci threads_starting = params.nthreads; 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci cpuset = CPU_ALLOC(nrcpus); 15862306a36Sopenharmony_ci BUG_ON(!cpuset); 15962306a36Sopenharmony_ci size = CPU_ALLOC_SIZE(nrcpus); 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci /* create and block all threads */ 16262306a36Sopenharmony_ci for (i = 0; i < params.nthreads; i++) { 16362306a36Sopenharmony_ci pthread_attr_t thread_attr; 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_ci pthread_attr_init(&thread_attr); 16662306a36Sopenharmony_ci CPU_ZERO_S(size, cpuset); 16762306a36Sopenharmony_ci CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset); 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ci if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) { 17062306a36Sopenharmony_ci CPU_FREE(cpuset); 17162306a36Sopenharmony_ci err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); 17262306a36Sopenharmony_ci } 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL)) { 17562306a36Sopenharmony_ci CPU_FREE(cpuset); 17662306a36Sopenharmony_ci err(EXIT_FAILURE, "pthread_create"); 17762306a36Sopenharmony_ci } 17862306a36Sopenharmony_ci pthread_attr_destroy(&thread_attr); 17962306a36Sopenharmony_ci } 18062306a36Sopenharmony_ci CPU_FREE(cpuset); 18162306a36Sopenharmony_ci} 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_cistatic void print_run(struct thread_data *waking_worker, unsigned int run_num) 18462306a36Sopenharmony_ci{ 18562306a36Sopenharmony_ci unsigned int i, wakeup_avg; 18662306a36Sopenharmony_ci double waketime_avg, waketime_stddev; 18762306a36Sopenharmony_ci struct stats __waketime_stats, __wakeup_stats; 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci init_stats(&__wakeup_stats); 19062306a36Sopenharmony_ci init_stats(&__waketime_stats); 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci for (i = 0; i < params.nwakes; i++) { 19362306a36Sopenharmony_ci update_stats(&__waketime_stats, waking_worker[i].runtime.tv_usec); 19462306a36Sopenharmony_ci update_stats(&__wakeup_stats, waking_worker[i].nwoken); 19562306a36Sopenharmony_ci } 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_ci waketime_avg = avg_stats(&__waketime_stats); 19862306a36Sopenharmony_ci waketime_stddev = stddev_stats(&__waketime_stats); 19962306a36Sopenharmony_ci wakeup_avg = avg_stats(&__wakeup_stats); 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci printf("[Run %d]: Avg per-thread latency (waking %d/%d threads) " 20262306a36Sopenharmony_ci "in %.4f ms (+-%.2f%%)\n", run_num + 1, wakeup_avg, 20362306a36Sopenharmony_ci params.nthreads, waketime_avg / USEC_PER_MSEC, 20462306a36Sopenharmony_ci rel_stddev_stats(waketime_stddev, waketime_avg)); 20562306a36Sopenharmony_ci} 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_cistatic void print_summary(void) 20862306a36Sopenharmony_ci{ 20962306a36Sopenharmony_ci unsigned int wakeup_avg; 21062306a36Sopenharmony_ci double waketime_avg, waketime_stddev; 21162306a36Sopenharmony_ci 21262306a36Sopenharmony_ci waketime_avg = avg_stats(&waketime_stats); 21362306a36Sopenharmony_ci waketime_stddev = stddev_stats(&waketime_stats); 21462306a36Sopenharmony_ci wakeup_avg = avg_stats(&wakeup_stats); 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci printf("Avg per-thread latency (waking %d/%d threads) in %.4f ms (+-%.2f%%)\n", 21762306a36Sopenharmony_ci wakeup_avg, 21862306a36Sopenharmony_ci params.nthreads, 21962306a36Sopenharmony_ci waketime_avg / USEC_PER_MSEC, 22062306a36Sopenharmony_ci rel_stddev_stats(waketime_stddev, waketime_avg)); 22162306a36Sopenharmony_ci} 22262306a36Sopenharmony_ci 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_cistatic void do_run_stats(struct thread_data *waking_worker) 22562306a36Sopenharmony_ci{ 22662306a36Sopenharmony_ci unsigned int i; 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci for (i = 0; i < params.nwakes; i++) { 22962306a36Sopenharmony_ci update_stats(&waketime_stats, waking_worker[i].runtime.tv_usec); 23062306a36Sopenharmony_ci update_stats(&wakeup_stats, waking_worker[i].nwoken); 23162306a36Sopenharmony_ci } 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci} 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_cistatic void toggle_done(int sig __maybe_unused, 23662306a36Sopenharmony_ci siginfo_t *info __maybe_unused, 23762306a36Sopenharmony_ci void *uc __maybe_unused) 23862306a36Sopenharmony_ci{ 23962306a36Sopenharmony_ci done = true; 24062306a36Sopenharmony_ci} 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ciint bench_futex_wake_parallel(int argc, const char **argv) 24362306a36Sopenharmony_ci{ 24462306a36Sopenharmony_ci int ret = 0; 24562306a36Sopenharmony_ci unsigned int i, j; 24662306a36Sopenharmony_ci struct sigaction act; 24762306a36Sopenharmony_ci struct thread_data *waking_worker; 24862306a36Sopenharmony_ci struct perf_cpu_map *cpu; 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_ci argc = parse_options(argc, argv, options, 25162306a36Sopenharmony_ci bench_futex_wake_parallel_usage, 0); 25262306a36Sopenharmony_ci if (argc) { 25362306a36Sopenharmony_ci usage_with_options(bench_futex_wake_parallel_usage, options); 25462306a36Sopenharmony_ci exit(EXIT_FAILURE); 25562306a36Sopenharmony_ci } 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci memset(&act, 0, sizeof(act)); 25862306a36Sopenharmony_ci sigfillset(&act.sa_mask); 25962306a36Sopenharmony_ci act.sa_sigaction = toggle_done; 26062306a36Sopenharmony_ci sigaction(SIGINT, &act, NULL); 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ci if (params.mlockall) { 26362306a36Sopenharmony_ci if (mlockall(MCL_CURRENT | MCL_FUTURE)) 26462306a36Sopenharmony_ci err(EXIT_FAILURE, "mlockall"); 26562306a36Sopenharmony_ci } 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci cpu = perf_cpu_map__new(NULL); 26862306a36Sopenharmony_ci if (!cpu) 26962306a36Sopenharmony_ci err(EXIT_FAILURE, "calloc"); 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_ci if (!params.nthreads) 27262306a36Sopenharmony_ci params.nthreads = perf_cpu_map__nr(cpu); 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci /* some sanity checks */ 27562306a36Sopenharmony_ci if (params.nwakes > params.nthreads || 27662306a36Sopenharmony_ci !params.nwakes) 27762306a36Sopenharmony_ci params.nwakes = params.nthreads; 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ci if (params.nthreads % params.nwakes) 28062306a36Sopenharmony_ci errx(EXIT_FAILURE, "Must be perfectly divisible"); 28162306a36Sopenharmony_ci /* 28262306a36Sopenharmony_ci * Each thread will wakeup nwakes tasks in 28362306a36Sopenharmony_ci * a single futex_wait call. 28462306a36Sopenharmony_ci */ 28562306a36Sopenharmony_ci nwakes = params.nthreads/params.nwakes; 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_ci blocked_worker = calloc(params.nthreads, sizeof(*blocked_worker)); 28862306a36Sopenharmony_ci if (!blocked_worker) 28962306a36Sopenharmony_ci err(EXIT_FAILURE, "calloc"); 29062306a36Sopenharmony_ci 29162306a36Sopenharmony_ci if (!params.fshared) 29262306a36Sopenharmony_ci futex_flag = FUTEX_PRIVATE_FLAG; 29362306a36Sopenharmony_ci 29462306a36Sopenharmony_ci printf("Run summary [PID %d]: blocking on %d threads (at [%s] " 29562306a36Sopenharmony_ci "futex %p), %d threads waking up %d at a time.\n\n", 29662306a36Sopenharmony_ci getpid(), params.nthreads, params.fshared ? "shared":"private", 29762306a36Sopenharmony_ci &futex, params.nwakes, nwakes); 29862306a36Sopenharmony_ci 29962306a36Sopenharmony_ci init_stats(&wakeup_stats); 30062306a36Sopenharmony_ci init_stats(&waketime_stats); 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci mutex_init(&thread_lock); 30362306a36Sopenharmony_ci cond_init(&thread_parent); 30462306a36Sopenharmony_ci cond_init(&thread_worker); 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ci for (j = 0; j < bench_repeat && !done; j++) { 30762306a36Sopenharmony_ci waking_worker = calloc(params.nwakes, sizeof(*waking_worker)); 30862306a36Sopenharmony_ci if (!waking_worker) 30962306a36Sopenharmony_ci err(EXIT_FAILURE, "calloc"); 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci /* create, launch & block all threads */ 31262306a36Sopenharmony_ci block_threads(blocked_worker, cpu); 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_ci /* make sure all threads are already blocked */ 31562306a36Sopenharmony_ci mutex_lock(&thread_lock); 31662306a36Sopenharmony_ci while (threads_starting) 31762306a36Sopenharmony_ci cond_wait(&thread_parent, &thread_lock); 31862306a36Sopenharmony_ci cond_broadcast(&thread_worker); 31962306a36Sopenharmony_ci mutex_unlock(&thread_lock); 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_ci usleep(100000); 32262306a36Sopenharmony_ci 32362306a36Sopenharmony_ci /* Ok, all threads are patiently blocked, start waking folks up */ 32462306a36Sopenharmony_ci wakeup_threads(waking_worker); 32562306a36Sopenharmony_ci 32662306a36Sopenharmony_ci for (i = 0; i < params.nthreads; i++) { 32762306a36Sopenharmony_ci ret = pthread_join(blocked_worker[i], NULL); 32862306a36Sopenharmony_ci if (ret) 32962306a36Sopenharmony_ci err(EXIT_FAILURE, "pthread_join"); 33062306a36Sopenharmony_ci } 33162306a36Sopenharmony_ci 33262306a36Sopenharmony_ci do_run_stats(waking_worker); 33362306a36Sopenharmony_ci if (!params.silent) 33462306a36Sopenharmony_ci print_run(waking_worker, j); 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ci free(waking_worker); 33762306a36Sopenharmony_ci } 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_ci /* cleanup & report results */ 34062306a36Sopenharmony_ci cond_destroy(&thread_parent); 34162306a36Sopenharmony_ci cond_destroy(&thread_worker); 34262306a36Sopenharmony_ci mutex_destroy(&thread_lock); 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci print_summary(); 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci free(blocked_worker); 34762306a36Sopenharmony_ci perf_cpu_map__put(cpu); 34862306a36Sopenharmony_ci return ret; 34962306a36Sopenharmony_ci} 35062306a36Sopenharmony_ci#endif /* HAVE_PTHREAD_BARRIER */ 351