18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Benchmark synthesis of perf events such as at the start of a 'perf 48c2ecf20Sopenharmony_ci * record'. Synthesis is done on the current process and the 'dummy' event 58c2ecf20Sopenharmony_ci * handlers are invoked that support dump_trace but otherwise do nothing. 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * Copyright 2019 Google LLC. 88c2ecf20Sopenharmony_ci */ 98c2ecf20Sopenharmony_ci#include <stdio.h> 108c2ecf20Sopenharmony_ci#include "bench.h" 118c2ecf20Sopenharmony_ci#include "../util/debug.h" 128c2ecf20Sopenharmony_ci#include "../util/session.h" 138c2ecf20Sopenharmony_ci#include "../util/stat.h" 148c2ecf20Sopenharmony_ci#include "../util/synthetic-events.h" 158c2ecf20Sopenharmony_ci#include "../util/target.h" 168c2ecf20Sopenharmony_ci#include "../util/thread_map.h" 178c2ecf20Sopenharmony_ci#include "../util/tool.h" 188c2ecf20Sopenharmony_ci#include "../util/util.h" 198c2ecf20Sopenharmony_ci#include <linux/atomic.h> 208c2ecf20Sopenharmony_ci#include <linux/err.h> 218c2ecf20Sopenharmony_ci#include <linux/time64.h> 228c2ecf20Sopenharmony_ci#include <subcmd/parse-options.h> 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_cistatic unsigned int min_threads = 1; 258c2ecf20Sopenharmony_cistatic unsigned int max_threads = UINT_MAX; 268c2ecf20Sopenharmony_cistatic unsigned int single_iterations = 10000; 278c2ecf20Sopenharmony_cistatic unsigned int multi_iterations = 10; 288c2ecf20Sopenharmony_cistatic bool run_st; 298c2ecf20Sopenharmony_cistatic bool run_mt; 308c2ecf20Sopenharmony_ci 318c2ecf20Sopenharmony_cistatic const struct option options[] = { 328c2ecf20Sopenharmony_ci OPT_BOOLEAN('s', "st", &run_st, "Run single threaded benchmark"), 338c2ecf20Sopenharmony_ci OPT_BOOLEAN('t', "mt", &run_mt, "Run multi-threaded benchmark"), 348c2ecf20Sopenharmony_ci OPT_UINTEGER('m', "min-threads", &min_threads, 358c2ecf20Sopenharmony_ci "Minimum number of threads in multithreaded bench"), 368c2ecf20Sopenharmony_ci OPT_UINTEGER('M', "max-threads", &max_threads, 378c2ecf20Sopenharmony_ci "Maximum number of threads in multithreaded bench"), 388c2ecf20Sopenharmony_ci OPT_UINTEGER('i', "single-iterations", &single_iterations, 398c2ecf20Sopenharmony_ci "Number of iterations used to compute single-threaded average"), 408c2ecf20Sopenharmony_ci OPT_UINTEGER('I', "multi-iterations", &multi_iterations, 418c2ecf20Sopenharmony_ci "Number of iterations used to compute multi-threaded average"), 428c2ecf20Sopenharmony_ci OPT_END() 438c2ecf20Sopenharmony_ci}; 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_cistatic const char *const bench_usage[] = { 468c2ecf20Sopenharmony_ci "perf bench internals synthesize <options>", 478c2ecf20Sopenharmony_ci NULL 488c2ecf20Sopenharmony_ci}; 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_cistatic atomic_t event_count; 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_cistatic int process_synthesized_event(struct perf_tool *tool __maybe_unused, 538c2ecf20Sopenharmony_ci union perf_event *event __maybe_unused, 548c2ecf20Sopenharmony_ci struct perf_sample *sample __maybe_unused, 558c2ecf20Sopenharmony_ci struct machine *machine __maybe_unused) 568c2ecf20Sopenharmony_ci{ 578c2ecf20Sopenharmony_ci atomic_inc(&event_count); 588c2ecf20Sopenharmony_ci return 0; 598c2ecf20Sopenharmony_ci} 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_cistatic int do_run_single_threaded(struct perf_session *session, 628c2ecf20Sopenharmony_ci struct perf_thread_map *threads, 638c2ecf20Sopenharmony_ci struct target *target, bool data_mmap) 648c2ecf20Sopenharmony_ci{ 658c2ecf20Sopenharmony_ci const unsigned int nr_threads_synthesize = 1; 668c2ecf20Sopenharmony_ci struct timeval start, end, diff; 678c2ecf20Sopenharmony_ci u64 runtime_us; 688c2ecf20Sopenharmony_ci unsigned int i; 698c2ecf20Sopenharmony_ci double time_average, time_stddev, event_average, event_stddev; 708c2ecf20Sopenharmony_ci int err; 718c2ecf20Sopenharmony_ci struct stats time_stats, event_stats; 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_ci init_stats(&time_stats); 748c2ecf20Sopenharmony_ci init_stats(&event_stats); 758c2ecf20Sopenharmony_ci 768c2ecf20Sopenharmony_ci for (i = 0; i < single_iterations; i++) { 778c2ecf20Sopenharmony_ci atomic_set(&event_count, 0); 788c2ecf20Sopenharmony_ci gettimeofday(&start, NULL); 798c2ecf20Sopenharmony_ci err = __machine__synthesize_threads(&session->machines.host, 808c2ecf20Sopenharmony_ci NULL, 818c2ecf20Sopenharmony_ci target, threads, 828c2ecf20Sopenharmony_ci process_synthesized_event, 838c2ecf20Sopenharmony_ci data_mmap, 848c2ecf20Sopenharmony_ci nr_threads_synthesize); 858c2ecf20Sopenharmony_ci if (err) 868c2ecf20Sopenharmony_ci return err; 878c2ecf20Sopenharmony_ci 888c2ecf20Sopenharmony_ci gettimeofday(&end, NULL); 898c2ecf20Sopenharmony_ci timersub(&end, &start, &diff); 908c2ecf20Sopenharmony_ci runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; 918c2ecf20Sopenharmony_ci update_stats(&time_stats, runtime_us); 928c2ecf20Sopenharmony_ci update_stats(&event_stats, atomic_read(&event_count)); 938c2ecf20Sopenharmony_ci } 948c2ecf20Sopenharmony_ci 958c2ecf20Sopenharmony_ci time_average = avg_stats(&time_stats); 968c2ecf20Sopenharmony_ci time_stddev = stddev_stats(&time_stats); 978c2ecf20Sopenharmony_ci printf(" Average %ssynthesis took: %.3f usec (+- %.3f usec)\n", 988c2ecf20Sopenharmony_ci data_mmap ? "data " : "", time_average, time_stddev); 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci event_average = avg_stats(&event_stats); 1018c2ecf20Sopenharmony_ci event_stddev = stddev_stats(&event_stats); 1028c2ecf20Sopenharmony_ci printf(" Average num. events: %.3f (+- %.3f)\n", 1038c2ecf20Sopenharmony_ci event_average, event_stddev); 1048c2ecf20Sopenharmony_ci 1058c2ecf20Sopenharmony_ci printf(" Average time per event %.3f usec\n", 1068c2ecf20Sopenharmony_ci time_average / event_average); 1078c2ecf20Sopenharmony_ci return 0; 1088c2ecf20Sopenharmony_ci} 1098c2ecf20Sopenharmony_ci 1108c2ecf20Sopenharmony_cistatic int run_single_threaded(void) 1118c2ecf20Sopenharmony_ci{ 1128c2ecf20Sopenharmony_ci struct perf_session *session; 1138c2ecf20Sopenharmony_ci struct target target = { 1148c2ecf20Sopenharmony_ci .pid = "self", 1158c2ecf20Sopenharmony_ci }; 1168c2ecf20Sopenharmony_ci struct perf_thread_map *threads; 1178c2ecf20Sopenharmony_ci int err; 1188c2ecf20Sopenharmony_ci 1198c2ecf20Sopenharmony_ci perf_set_singlethreaded(); 1208c2ecf20Sopenharmony_ci session = perf_session__new(NULL, false, NULL); 1218c2ecf20Sopenharmony_ci if (IS_ERR(session)) { 1228c2ecf20Sopenharmony_ci pr_err("Session creation failed.\n"); 1238c2ecf20Sopenharmony_ci return PTR_ERR(session); 1248c2ecf20Sopenharmony_ci } 1258c2ecf20Sopenharmony_ci threads = thread_map__new_by_pid(getpid()); 1268c2ecf20Sopenharmony_ci if (!threads) { 1278c2ecf20Sopenharmony_ci pr_err("Thread map creation failed.\n"); 1288c2ecf20Sopenharmony_ci err = -ENOMEM; 1298c2ecf20Sopenharmony_ci goto err_out; 1308c2ecf20Sopenharmony_ci } 1318c2ecf20Sopenharmony_ci 1328c2ecf20Sopenharmony_ci puts( 1338c2ecf20Sopenharmony_ci"Computing performance of single threaded perf event synthesis by\n" 1348c2ecf20Sopenharmony_ci"synthesizing events on the perf process itself:"); 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ci err = do_run_single_threaded(session, threads, &target, false); 1378c2ecf20Sopenharmony_ci if (err) 1388c2ecf20Sopenharmony_ci goto err_out; 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ci err = do_run_single_threaded(session, threads, &target, true); 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_cierr_out: 1438c2ecf20Sopenharmony_ci if (threads) 1448c2ecf20Sopenharmony_ci perf_thread_map__put(threads); 1458c2ecf20Sopenharmony_ci 1468c2ecf20Sopenharmony_ci perf_session__delete(session); 1478c2ecf20Sopenharmony_ci return err; 1488c2ecf20Sopenharmony_ci} 1498c2ecf20Sopenharmony_ci 1508c2ecf20Sopenharmony_cistatic int do_run_multi_threaded(struct target *target, 1518c2ecf20Sopenharmony_ci unsigned int nr_threads_synthesize) 1528c2ecf20Sopenharmony_ci{ 1538c2ecf20Sopenharmony_ci struct timeval start, end, diff; 1548c2ecf20Sopenharmony_ci u64 runtime_us; 1558c2ecf20Sopenharmony_ci unsigned int i; 1568c2ecf20Sopenharmony_ci double time_average, time_stddev, event_average, event_stddev; 1578c2ecf20Sopenharmony_ci int err; 1588c2ecf20Sopenharmony_ci struct stats time_stats, event_stats; 1598c2ecf20Sopenharmony_ci struct perf_session *session; 1608c2ecf20Sopenharmony_ci 1618c2ecf20Sopenharmony_ci init_stats(&time_stats); 1628c2ecf20Sopenharmony_ci init_stats(&event_stats); 1638c2ecf20Sopenharmony_ci for (i = 0; i < multi_iterations; i++) { 1648c2ecf20Sopenharmony_ci session = perf_session__new(NULL, false, NULL); 1658c2ecf20Sopenharmony_ci if (IS_ERR(session)) 1668c2ecf20Sopenharmony_ci return PTR_ERR(session); 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_ci atomic_set(&event_count, 0); 1698c2ecf20Sopenharmony_ci gettimeofday(&start, NULL); 1708c2ecf20Sopenharmony_ci err = __machine__synthesize_threads(&session->machines.host, 1718c2ecf20Sopenharmony_ci NULL, 1728c2ecf20Sopenharmony_ci target, NULL, 1738c2ecf20Sopenharmony_ci process_synthesized_event, 1748c2ecf20Sopenharmony_ci false, 1758c2ecf20Sopenharmony_ci nr_threads_synthesize); 1768c2ecf20Sopenharmony_ci if (err) { 1778c2ecf20Sopenharmony_ci perf_session__delete(session); 1788c2ecf20Sopenharmony_ci return err; 1798c2ecf20Sopenharmony_ci } 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_ci gettimeofday(&end, NULL); 1828c2ecf20Sopenharmony_ci timersub(&end, &start, &diff); 1838c2ecf20Sopenharmony_ci runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; 1848c2ecf20Sopenharmony_ci update_stats(&time_stats, runtime_us); 1858c2ecf20Sopenharmony_ci update_stats(&event_stats, atomic_read(&event_count)); 1868c2ecf20Sopenharmony_ci perf_session__delete(session); 1878c2ecf20Sopenharmony_ci } 1888c2ecf20Sopenharmony_ci 1898c2ecf20Sopenharmony_ci time_average = avg_stats(&time_stats); 1908c2ecf20Sopenharmony_ci time_stddev = stddev_stats(&time_stats); 1918c2ecf20Sopenharmony_ci printf(" Average synthesis took: %.3f usec (+- %.3f usec)\n", 1928c2ecf20Sopenharmony_ci time_average, time_stddev); 1938c2ecf20Sopenharmony_ci 1948c2ecf20Sopenharmony_ci event_average = avg_stats(&event_stats); 1958c2ecf20Sopenharmony_ci event_stddev = stddev_stats(&event_stats); 1968c2ecf20Sopenharmony_ci printf(" Average num. events: %.3f (+- %.3f)\n", 1978c2ecf20Sopenharmony_ci event_average, event_stddev); 1988c2ecf20Sopenharmony_ci 1998c2ecf20Sopenharmony_ci printf(" Average time per event %.3f usec\n", 2008c2ecf20Sopenharmony_ci time_average / event_average); 2018c2ecf20Sopenharmony_ci return 0; 2028c2ecf20Sopenharmony_ci} 2038c2ecf20Sopenharmony_ci 2048c2ecf20Sopenharmony_cistatic int run_multi_threaded(void) 2058c2ecf20Sopenharmony_ci{ 2068c2ecf20Sopenharmony_ci struct target target = { 2078c2ecf20Sopenharmony_ci .cpu_list = "0" 2088c2ecf20Sopenharmony_ci }; 2098c2ecf20Sopenharmony_ci unsigned int nr_threads_synthesize; 2108c2ecf20Sopenharmony_ci int err; 2118c2ecf20Sopenharmony_ci 2128c2ecf20Sopenharmony_ci if (max_threads == UINT_MAX) 2138c2ecf20Sopenharmony_ci max_threads = sysconf(_SC_NPROCESSORS_ONLN); 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_ci puts( 2168c2ecf20Sopenharmony_ci"Computing performance of multi threaded perf event synthesis by\n" 2178c2ecf20Sopenharmony_ci"synthesizing events on CPU 0:"); 2188c2ecf20Sopenharmony_ci 2198c2ecf20Sopenharmony_ci for (nr_threads_synthesize = min_threads; 2208c2ecf20Sopenharmony_ci nr_threads_synthesize <= max_threads; 2218c2ecf20Sopenharmony_ci nr_threads_synthesize++) { 2228c2ecf20Sopenharmony_ci if (nr_threads_synthesize == 1) 2238c2ecf20Sopenharmony_ci perf_set_singlethreaded(); 2248c2ecf20Sopenharmony_ci else 2258c2ecf20Sopenharmony_ci perf_set_multithreaded(); 2268c2ecf20Sopenharmony_ci 2278c2ecf20Sopenharmony_ci printf(" Number of synthesis threads: %u\n", 2288c2ecf20Sopenharmony_ci nr_threads_synthesize); 2298c2ecf20Sopenharmony_ci 2308c2ecf20Sopenharmony_ci err = do_run_multi_threaded(&target, nr_threads_synthesize); 2318c2ecf20Sopenharmony_ci if (err) 2328c2ecf20Sopenharmony_ci return err; 2338c2ecf20Sopenharmony_ci } 2348c2ecf20Sopenharmony_ci perf_set_singlethreaded(); 2358c2ecf20Sopenharmony_ci return 0; 2368c2ecf20Sopenharmony_ci} 2378c2ecf20Sopenharmony_ci 2388c2ecf20Sopenharmony_ciint bench_synthesize(int argc, const char **argv) 2398c2ecf20Sopenharmony_ci{ 2408c2ecf20Sopenharmony_ci int err = 0; 2418c2ecf20Sopenharmony_ci 2428c2ecf20Sopenharmony_ci argc = parse_options(argc, argv, options, bench_usage, 0); 2438c2ecf20Sopenharmony_ci if (argc) { 2448c2ecf20Sopenharmony_ci usage_with_options(bench_usage, options); 2458c2ecf20Sopenharmony_ci exit(EXIT_FAILURE); 2468c2ecf20Sopenharmony_ci } 2478c2ecf20Sopenharmony_ci 2488c2ecf20Sopenharmony_ci /* 2498c2ecf20Sopenharmony_ci * If neither single threaded or multi-threaded are specified, default 2508c2ecf20Sopenharmony_ci * to running just single threaded. 2518c2ecf20Sopenharmony_ci */ 2528c2ecf20Sopenharmony_ci if (!run_st && !run_mt) 2538c2ecf20Sopenharmony_ci run_st = true; 2548c2ecf20Sopenharmony_ci 2558c2ecf20Sopenharmony_ci if (run_st) 2568c2ecf20Sopenharmony_ci err = run_single_threaded(); 2578c2ecf20Sopenharmony_ci 2588c2ecf20Sopenharmony_ci if (!err && run_mt) 2598c2ecf20Sopenharmony_ci err = run_multi_threaded(); 2608c2ecf20Sopenharmony_ci 2618c2ecf20Sopenharmony_ci return err; 2628c2ecf20Sopenharmony_ci} 263