18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (C) 2015 Davidlohr Bueso. 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Block a bunch of threads and let parallel waker threads wakeup an 68c2ecf20Sopenharmony_ci * equal amount of them. The program output reflects the avg latency 78c2ecf20Sopenharmony_ci * for each individual thread to service its share of work. Ultimately 88c2ecf20Sopenharmony_ci * it can be used to measure futex_wake() changes. 98c2ecf20Sopenharmony_ci */ 108c2ecf20Sopenharmony_ci#include "bench.h" 118c2ecf20Sopenharmony_ci#include <linux/compiler.h> 128c2ecf20Sopenharmony_ci#include "../util/debug.h" 138c2ecf20Sopenharmony_ci 148c2ecf20Sopenharmony_ci#ifndef HAVE_PTHREAD_BARRIER 158c2ecf20Sopenharmony_ciint bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe_unused) 168c2ecf20Sopenharmony_ci{ 178c2ecf20Sopenharmony_ci pr_err("%s: pthread_barrier_t unavailable, disabling this test...\n", __func__); 188c2ecf20Sopenharmony_ci return 0; 198c2ecf20Sopenharmony_ci} 208c2ecf20Sopenharmony_ci#else /* HAVE_PTHREAD_BARRIER */ 218c2ecf20Sopenharmony_ci/* For the CLR_() macros */ 228c2ecf20Sopenharmony_ci#include <string.h> 238c2ecf20Sopenharmony_ci#include <pthread.h> 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci#include <signal.h> 268c2ecf20Sopenharmony_ci#include "../util/stat.h" 278c2ecf20Sopenharmony_ci#include <subcmd/parse-options.h> 288c2ecf20Sopenharmony_ci#include <linux/kernel.h> 298c2ecf20Sopenharmony_ci#include <linux/time64.h> 308c2ecf20Sopenharmony_ci#include <errno.h> 318c2ecf20Sopenharmony_ci#include "futex.h" 328c2ecf20Sopenharmony_ci#include <internal/cpumap.h> 338c2ecf20Sopenharmony_ci#include <perf/cpumap.h> 348c2ecf20Sopenharmony_ci 358c2ecf20Sopenharmony_ci#include <err.h> 368c2ecf20Sopenharmony_ci#include <stdlib.h> 378c2ecf20Sopenharmony_ci#include <sys/time.h> 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_cistruct thread_data { 408c2ecf20Sopenharmony_ci pthread_t worker; 418c2ecf20Sopenharmony_ci unsigned int nwoken; 428c2ecf20Sopenharmony_ci struct timeval runtime; 438c2ecf20Sopenharmony_ci}; 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_cistatic unsigned int nwakes = 1; 468c2ecf20Sopenharmony_ci 478c2ecf20Sopenharmony_ci/* all threads will block on the same futex -- hash bucket chaos ;) */ 488c2ecf20Sopenharmony_cistatic u_int32_t futex = 0; 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_cistatic pthread_t *blocked_worker; 518c2ecf20Sopenharmony_cistatic bool done = false, silent = false, fshared = false; 528c2ecf20Sopenharmony_cistatic unsigned int nblocked_threads = 0, nwaking_threads = 0; 538c2ecf20Sopenharmony_cistatic pthread_mutex_t thread_lock; 548c2ecf20Sopenharmony_cistatic pthread_cond_t thread_parent, thread_worker; 558c2ecf20Sopenharmony_cistatic pthread_barrier_t barrier; 568c2ecf20Sopenharmony_cistatic struct stats waketime_stats, wakeup_stats; 578c2ecf20Sopenharmony_cistatic unsigned int threads_starting; 588c2ecf20Sopenharmony_cistatic int futex_flag = 0; 598c2ecf20Sopenharmony_ci 608c2ecf20Sopenharmony_cistatic const struct option options[] = { 618c2ecf20Sopenharmony_ci OPT_UINTEGER('t', "threads", &nblocked_threads, "Specify amount of threads"), 628c2ecf20Sopenharmony_ci OPT_UINTEGER('w', "nwakers", &nwaking_threads, "Specify amount of waking threads"), 638c2ecf20Sopenharmony_ci OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), 648c2ecf20Sopenharmony_ci OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"), 658c2ecf20Sopenharmony_ci OPT_END() 668c2ecf20Sopenharmony_ci}; 678c2ecf20Sopenharmony_ci 688c2ecf20Sopenharmony_cistatic const char * const bench_futex_wake_parallel_usage[] = { 698c2ecf20Sopenharmony_ci "perf bench futex wake-parallel <options>", 708c2ecf20Sopenharmony_ci NULL 718c2ecf20Sopenharmony_ci}; 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_cistatic void *waking_workerfn(void *arg) 748c2ecf20Sopenharmony_ci{ 758c2ecf20Sopenharmony_ci struct thread_data *waker = (struct thread_data *) arg; 768c2ecf20Sopenharmony_ci struct timeval start, end; 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci pthread_barrier_wait(&barrier); 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci gettimeofday(&start, NULL); 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_ci waker->nwoken = futex_wake(&futex, nwakes, futex_flag); 838c2ecf20Sopenharmony_ci if (waker->nwoken != nwakes) 848c2ecf20Sopenharmony_ci warnx("couldn't wakeup all tasks (%d/%d)", 858c2ecf20Sopenharmony_ci waker->nwoken, nwakes); 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_ci gettimeofday(&end, NULL); 888c2ecf20Sopenharmony_ci timersub(&end, &start, &waker->runtime); 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_ci pthread_exit(NULL); 918c2ecf20Sopenharmony_ci return NULL; 928c2ecf20Sopenharmony_ci} 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_cistatic void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr) 958c2ecf20Sopenharmony_ci{ 968c2ecf20Sopenharmony_ci unsigned int i; 978c2ecf20Sopenharmony_ci 988c2ecf20Sopenharmony_ci pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE); 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci pthread_barrier_init(&barrier, NULL, nwaking_threads + 1); 1018c2ecf20Sopenharmony_ci 1028c2ecf20Sopenharmony_ci /* create and block all threads */ 1038c2ecf20Sopenharmony_ci for (i = 0; i < nwaking_threads; i++) { 1048c2ecf20Sopenharmony_ci /* 1058c2ecf20Sopenharmony_ci * Thread creation order will impact per-thread latency 1068c2ecf20Sopenharmony_ci * as it will affect the order to acquire the hb spinlock. 1078c2ecf20Sopenharmony_ci * For now let the scheduler decide. 1088c2ecf20Sopenharmony_ci */ 1098c2ecf20Sopenharmony_ci if (pthread_create(&td[i].worker, &thread_attr, 1108c2ecf20Sopenharmony_ci waking_workerfn, (void *)&td[i])) 1118c2ecf20Sopenharmony_ci err(EXIT_FAILURE, "pthread_create"); 1128c2ecf20Sopenharmony_ci } 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ci pthread_barrier_wait(&barrier); 1158c2ecf20Sopenharmony_ci 1168c2ecf20Sopenharmony_ci for (i = 0; i < nwaking_threads; i++) 1178c2ecf20Sopenharmony_ci if (pthread_join(td[i].worker, NULL)) 1188c2ecf20Sopenharmony_ci err(EXIT_FAILURE, "pthread_join"); 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci pthread_barrier_destroy(&barrier); 1218c2ecf20Sopenharmony_ci} 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_cistatic void *blocked_workerfn(void *arg __maybe_unused) 1248c2ecf20Sopenharmony_ci{ 1258c2ecf20Sopenharmony_ci pthread_mutex_lock(&thread_lock); 1268c2ecf20Sopenharmony_ci threads_starting--; 1278c2ecf20Sopenharmony_ci if (!threads_starting) 1288c2ecf20Sopenharmony_ci pthread_cond_signal(&thread_parent); 1298c2ecf20Sopenharmony_ci pthread_cond_wait(&thread_worker, &thread_lock); 1308c2ecf20Sopenharmony_ci pthread_mutex_unlock(&thread_lock); 1318c2ecf20Sopenharmony_ci 1328c2ecf20Sopenharmony_ci while (1) { /* handle spurious wakeups */ 1338c2ecf20Sopenharmony_ci if (futex_wait(&futex, 0, NULL, futex_flag) != EINTR) 1348c2ecf20Sopenharmony_ci break; 1358c2ecf20Sopenharmony_ci } 1368c2ecf20Sopenharmony_ci 1378c2ecf20Sopenharmony_ci pthread_exit(NULL); 1388c2ecf20Sopenharmony_ci return NULL; 1398c2ecf20Sopenharmony_ci} 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_cistatic void block_threads(pthread_t *w, pthread_attr_t thread_attr, 1428c2ecf20Sopenharmony_ci struct perf_cpu_map *cpu) 1438c2ecf20Sopenharmony_ci{ 1448c2ecf20Sopenharmony_ci cpu_set_t cpuset; 1458c2ecf20Sopenharmony_ci unsigned int i; 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci threads_starting = nblocked_threads; 1488c2ecf20Sopenharmony_ci 1498c2ecf20Sopenharmony_ci /* create and block all threads */ 1508c2ecf20Sopenharmony_ci for (i = 0; i < nblocked_threads; i++) { 1518c2ecf20Sopenharmony_ci CPU_ZERO(&cpuset); 1528c2ecf20Sopenharmony_ci CPU_SET(cpu->map[i % cpu->nr], &cpuset); 1538c2ecf20Sopenharmony_ci 1548c2ecf20Sopenharmony_ci if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) 1558c2ecf20Sopenharmony_ci err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL)) 1588c2ecf20Sopenharmony_ci err(EXIT_FAILURE, "pthread_create"); 1598c2ecf20Sopenharmony_ci } 1608c2ecf20Sopenharmony_ci} 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_cistatic void print_run(struct thread_data *waking_worker, unsigned int run_num) 1638c2ecf20Sopenharmony_ci{ 1648c2ecf20Sopenharmony_ci unsigned int i, wakeup_avg; 1658c2ecf20Sopenharmony_ci double waketime_avg, waketime_stddev; 1668c2ecf20Sopenharmony_ci struct stats __waketime_stats, __wakeup_stats; 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_ci init_stats(&__wakeup_stats); 1698c2ecf20Sopenharmony_ci init_stats(&__waketime_stats); 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_ci for (i = 0; i < nwaking_threads; i++) { 1728c2ecf20Sopenharmony_ci update_stats(&__waketime_stats, waking_worker[i].runtime.tv_usec); 1738c2ecf20Sopenharmony_ci update_stats(&__wakeup_stats, waking_worker[i].nwoken); 1748c2ecf20Sopenharmony_ci } 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_ci waketime_avg = avg_stats(&__waketime_stats); 1778c2ecf20Sopenharmony_ci waketime_stddev = stddev_stats(&__waketime_stats); 1788c2ecf20Sopenharmony_ci wakeup_avg = avg_stats(&__wakeup_stats); 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_ci printf("[Run %d]: Avg per-thread latency (waking %d/%d threads) " 1818c2ecf20Sopenharmony_ci "in %.4f ms (+-%.2f%%)\n", run_num + 1, wakeup_avg, 1828c2ecf20Sopenharmony_ci nblocked_threads, waketime_avg / USEC_PER_MSEC, 1838c2ecf20Sopenharmony_ci rel_stddev_stats(waketime_stddev, waketime_avg)); 1848c2ecf20Sopenharmony_ci} 1858c2ecf20Sopenharmony_ci 1868c2ecf20Sopenharmony_cistatic void print_summary(void) 1878c2ecf20Sopenharmony_ci{ 1888c2ecf20Sopenharmony_ci unsigned int wakeup_avg; 1898c2ecf20Sopenharmony_ci double waketime_avg, waketime_stddev; 1908c2ecf20Sopenharmony_ci 1918c2ecf20Sopenharmony_ci waketime_avg = avg_stats(&waketime_stats); 1928c2ecf20Sopenharmony_ci waketime_stddev = stddev_stats(&waketime_stats); 1938c2ecf20Sopenharmony_ci wakeup_avg = avg_stats(&wakeup_stats); 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci printf("Avg per-thread latency (waking %d/%d threads) in %.4f ms (+-%.2f%%)\n", 1968c2ecf20Sopenharmony_ci wakeup_avg, 1978c2ecf20Sopenharmony_ci nblocked_threads, 1988c2ecf20Sopenharmony_ci waketime_avg / USEC_PER_MSEC, 1998c2ecf20Sopenharmony_ci rel_stddev_stats(waketime_stddev, waketime_avg)); 2008c2ecf20Sopenharmony_ci} 2018c2ecf20Sopenharmony_ci 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_cistatic void do_run_stats(struct thread_data *waking_worker) 2048c2ecf20Sopenharmony_ci{ 2058c2ecf20Sopenharmony_ci unsigned int i; 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_ci for (i = 0; i < nwaking_threads; i++) { 2088c2ecf20Sopenharmony_ci update_stats(&waketime_stats, waking_worker[i].runtime.tv_usec); 2098c2ecf20Sopenharmony_ci update_stats(&wakeup_stats, waking_worker[i].nwoken); 2108c2ecf20Sopenharmony_ci } 2118c2ecf20Sopenharmony_ci 2128c2ecf20Sopenharmony_ci} 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_cistatic void toggle_done(int sig __maybe_unused, 2158c2ecf20Sopenharmony_ci siginfo_t *info __maybe_unused, 2168c2ecf20Sopenharmony_ci void *uc __maybe_unused) 2178c2ecf20Sopenharmony_ci{ 2188c2ecf20Sopenharmony_ci done = true; 2198c2ecf20Sopenharmony_ci} 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_ciint bench_futex_wake_parallel(int argc, const char **argv) 2228c2ecf20Sopenharmony_ci{ 2238c2ecf20Sopenharmony_ci int ret = 0; 2248c2ecf20Sopenharmony_ci unsigned int i, j; 2258c2ecf20Sopenharmony_ci struct sigaction act; 2268c2ecf20Sopenharmony_ci pthread_attr_t thread_attr; 2278c2ecf20Sopenharmony_ci struct thread_data *waking_worker; 2288c2ecf20Sopenharmony_ci struct perf_cpu_map *cpu; 2298c2ecf20Sopenharmony_ci 2308c2ecf20Sopenharmony_ci argc = parse_options(argc, argv, options, 2318c2ecf20Sopenharmony_ci bench_futex_wake_parallel_usage, 0); 2328c2ecf20Sopenharmony_ci if (argc) { 2338c2ecf20Sopenharmony_ci usage_with_options(bench_futex_wake_parallel_usage, options); 2348c2ecf20Sopenharmony_ci exit(EXIT_FAILURE); 2358c2ecf20Sopenharmony_ci } 2368c2ecf20Sopenharmony_ci 2378c2ecf20Sopenharmony_ci memset(&act, 0, sizeof(act)); 2388c2ecf20Sopenharmony_ci sigfillset(&act.sa_mask); 2398c2ecf20Sopenharmony_ci act.sa_sigaction = toggle_done; 2408c2ecf20Sopenharmony_ci sigaction(SIGINT, &act, NULL); 2418c2ecf20Sopenharmony_ci 2428c2ecf20Sopenharmony_ci cpu = perf_cpu_map__new(NULL); 2438c2ecf20Sopenharmony_ci if (!cpu) 2448c2ecf20Sopenharmony_ci err(EXIT_FAILURE, "calloc"); 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci if (!nblocked_threads) 2478c2ecf20Sopenharmony_ci nblocked_threads = cpu->nr; 2488c2ecf20Sopenharmony_ci 2498c2ecf20Sopenharmony_ci /* some sanity checks */ 2508c2ecf20Sopenharmony_ci if (nwaking_threads > nblocked_threads || !nwaking_threads) 2518c2ecf20Sopenharmony_ci nwaking_threads = nblocked_threads; 2528c2ecf20Sopenharmony_ci 2538c2ecf20Sopenharmony_ci if (nblocked_threads % nwaking_threads) 2548c2ecf20Sopenharmony_ci errx(EXIT_FAILURE, "Must be perfectly divisible"); 2558c2ecf20Sopenharmony_ci /* 2568c2ecf20Sopenharmony_ci * Each thread will wakeup nwakes tasks in 2578c2ecf20Sopenharmony_ci * a single futex_wait call. 2588c2ecf20Sopenharmony_ci */ 2598c2ecf20Sopenharmony_ci nwakes = nblocked_threads/nwaking_threads; 2608c2ecf20Sopenharmony_ci 2618c2ecf20Sopenharmony_ci blocked_worker = calloc(nblocked_threads, sizeof(*blocked_worker)); 2628c2ecf20Sopenharmony_ci if (!blocked_worker) 2638c2ecf20Sopenharmony_ci err(EXIT_FAILURE, "calloc"); 2648c2ecf20Sopenharmony_ci 2658c2ecf20Sopenharmony_ci if (!fshared) 2668c2ecf20Sopenharmony_ci futex_flag = FUTEX_PRIVATE_FLAG; 2678c2ecf20Sopenharmony_ci 2688c2ecf20Sopenharmony_ci printf("Run summary [PID %d]: blocking on %d threads (at [%s] " 2698c2ecf20Sopenharmony_ci "futex %p), %d threads waking up %d at a time.\n\n", 2708c2ecf20Sopenharmony_ci getpid(), nblocked_threads, fshared ? "shared":"private", 2718c2ecf20Sopenharmony_ci &futex, nwaking_threads, nwakes); 2728c2ecf20Sopenharmony_ci 2738c2ecf20Sopenharmony_ci init_stats(&wakeup_stats); 2748c2ecf20Sopenharmony_ci init_stats(&waketime_stats); 2758c2ecf20Sopenharmony_ci 2768c2ecf20Sopenharmony_ci pthread_attr_init(&thread_attr); 2778c2ecf20Sopenharmony_ci pthread_mutex_init(&thread_lock, NULL); 2788c2ecf20Sopenharmony_ci pthread_cond_init(&thread_parent, NULL); 2798c2ecf20Sopenharmony_ci pthread_cond_init(&thread_worker, NULL); 2808c2ecf20Sopenharmony_ci 2818c2ecf20Sopenharmony_ci for (j = 0; j < bench_repeat && !done; j++) { 2828c2ecf20Sopenharmony_ci waking_worker = calloc(nwaking_threads, sizeof(*waking_worker)); 2838c2ecf20Sopenharmony_ci if (!waking_worker) 2848c2ecf20Sopenharmony_ci err(EXIT_FAILURE, "calloc"); 2858c2ecf20Sopenharmony_ci 2868c2ecf20Sopenharmony_ci /* create, launch & block all threads */ 2878c2ecf20Sopenharmony_ci block_threads(blocked_worker, thread_attr, cpu); 2888c2ecf20Sopenharmony_ci 2898c2ecf20Sopenharmony_ci /* make sure all threads are already blocked */ 2908c2ecf20Sopenharmony_ci pthread_mutex_lock(&thread_lock); 2918c2ecf20Sopenharmony_ci while (threads_starting) 2928c2ecf20Sopenharmony_ci pthread_cond_wait(&thread_parent, &thread_lock); 2938c2ecf20Sopenharmony_ci pthread_cond_broadcast(&thread_worker); 2948c2ecf20Sopenharmony_ci pthread_mutex_unlock(&thread_lock); 2958c2ecf20Sopenharmony_ci 2968c2ecf20Sopenharmony_ci usleep(100000); 2978c2ecf20Sopenharmony_ci 2988c2ecf20Sopenharmony_ci /* Ok, all threads are patiently blocked, start waking folks up */ 2998c2ecf20Sopenharmony_ci wakeup_threads(waking_worker, thread_attr); 3008c2ecf20Sopenharmony_ci 3018c2ecf20Sopenharmony_ci for (i = 0; i < nblocked_threads; i++) { 3028c2ecf20Sopenharmony_ci ret = pthread_join(blocked_worker[i], NULL); 3038c2ecf20Sopenharmony_ci if (ret) 3048c2ecf20Sopenharmony_ci err(EXIT_FAILURE, "pthread_join"); 3058c2ecf20Sopenharmony_ci } 3068c2ecf20Sopenharmony_ci 3078c2ecf20Sopenharmony_ci do_run_stats(waking_worker); 3088c2ecf20Sopenharmony_ci if (!silent) 3098c2ecf20Sopenharmony_ci print_run(waking_worker, j); 3108c2ecf20Sopenharmony_ci 3118c2ecf20Sopenharmony_ci free(waking_worker); 3128c2ecf20Sopenharmony_ci } 3138c2ecf20Sopenharmony_ci 3148c2ecf20Sopenharmony_ci /* cleanup & report results */ 3158c2ecf20Sopenharmony_ci pthread_cond_destroy(&thread_parent); 3168c2ecf20Sopenharmony_ci pthread_cond_destroy(&thread_worker); 3178c2ecf20Sopenharmony_ci pthread_mutex_destroy(&thread_lock); 3188c2ecf20Sopenharmony_ci pthread_attr_destroy(&thread_attr); 3198c2ecf20Sopenharmony_ci 3208c2ecf20Sopenharmony_ci print_summary(); 3218c2ecf20Sopenharmony_ci 3228c2ecf20Sopenharmony_ci free(blocked_worker); 3238c2ecf20Sopenharmony_ci perf_cpu_map__put(cpu); 3248c2ecf20Sopenharmony_ci return ret; 3258c2ecf20Sopenharmony_ci} 3268c2ecf20Sopenharmony_ci#endif /* HAVE_PTHREAD_BARRIER */ 327