18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Benchmark synthesis of perf events such as at the start of a 'perf
48c2ecf20Sopenharmony_ci * record'. Synthesis is done on the current process and the 'dummy' event
58c2ecf20Sopenharmony_ci * handlers are invoked that support dump_trace but otherwise do nothing.
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * Copyright 2019 Google LLC.
88c2ecf20Sopenharmony_ci */
98c2ecf20Sopenharmony_ci#include <stdio.h>
108c2ecf20Sopenharmony_ci#include "bench.h"
118c2ecf20Sopenharmony_ci#include "../util/debug.h"
128c2ecf20Sopenharmony_ci#include "../util/session.h"
138c2ecf20Sopenharmony_ci#include "../util/stat.h"
148c2ecf20Sopenharmony_ci#include "../util/synthetic-events.h"
158c2ecf20Sopenharmony_ci#include "../util/target.h"
168c2ecf20Sopenharmony_ci#include "../util/thread_map.h"
178c2ecf20Sopenharmony_ci#include "../util/tool.h"
188c2ecf20Sopenharmony_ci#include "../util/util.h"
198c2ecf20Sopenharmony_ci#include <linux/atomic.h>
208c2ecf20Sopenharmony_ci#include <linux/err.h>
218c2ecf20Sopenharmony_ci#include <linux/time64.h>
228c2ecf20Sopenharmony_ci#include <subcmd/parse-options.h>
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_cistatic unsigned int min_threads = 1;
258c2ecf20Sopenharmony_cistatic unsigned int max_threads = UINT_MAX;
268c2ecf20Sopenharmony_cistatic unsigned int single_iterations = 10000;
278c2ecf20Sopenharmony_cistatic unsigned int multi_iterations = 10;
288c2ecf20Sopenharmony_cistatic bool run_st;
298c2ecf20Sopenharmony_cistatic bool run_mt;
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_cistatic const struct option options[] = {
328c2ecf20Sopenharmony_ci	OPT_BOOLEAN('s', "st", &run_st, "Run single threaded benchmark"),
338c2ecf20Sopenharmony_ci	OPT_BOOLEAN('t', "mt", &run_mt, "Run multi-threaded benchmark"),
348c2ecf20Sopenharmony_ci	OPT_UINTEGER('m', "min-threads", &min_threads,
358c2ecf20Sopenharmony_ci		"Minimum number of threads in multithreaded bench"),
368c2ecf20Sopenharmony_ci	OPT_UINTEGER('M', "max-threads", &max_threads,
378c2ecf20Sopenharmony_ci		"Maximum number of threads in multithreaded bench"),
388c2ecf20Sopenharmony_ci	OPT_UINTEGER('i', "single-iterations", &single_iterations,
398c2ecf20Sopenharmony_ci		"Number of iterations used to compute single-threaded average"),
408c2ecf20Sopenharmony_ci	OPT_UINTEGER('I', "multi-iterations", &multi_iterations,
418c2ecf20Sopenharmony_ci		"Number of iterations used to compute multi-threaded average"),
428c2ecf20Sopenharmony_ci	OPT_END()
438c2ecf20Sopenharmony_ci};
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_cistatic const char *const bench_usage[] = {
468c2ecf20Sopenharmony_ci	"perf bench internals synthesize <options>",
478c2ecf20Sopenharmony_ci	NULL
488c2ecf20Sopenharmony_ci};
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_cistatic atomic_t event_count;
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_cistatic int process_synthesized_event(struct perf_tool *tool __maybe_unused,
538c2ecf20Sopenharmony_ci				     union perf_event *event __maybe_unused,
548c2ecf20Sopenharmony_ci				     struct perf_sample *sample __maybe_unused,
558c2ecf20Sopenharmony_ci				     struct machine *machine __maybe_unused)
568c2ecf20Sopenharmony_ci{
578c2ecf20Sopenharmony_ci	atomic_inc(&event_count);
588c2ecf20Sopenharmony_ci	return 0;
598c2ecf20Sopenharmony_ci}
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_cistatic int do_run_single_threaded(struct perf_session *session,
628c2ecf20Sopenharmony_ci				struct perf_thread_map *threads,
638c2ecf20Sopenharmony_ci				struct target *target, bool data_mmap)
648c2ecf20Sopenharmony_ci{
658c2ecf20Sopenharmony_ci	const unsigned int nr_threads_synthesize = 1;
668c2ecf20Sopenharmony_ci	struct timeval start, end, diff;
678c2ecf20Sopenharmony_ci	u64 runtime_us;
688c2ecf20Sopenharmony_ci	unsigned int i;
698c2ecf20Sopenharmony_ci	double time_average, time_stddev, event_average, event_stddev;
708c2ecf20Sopenharmony_ci	int err;
718c2ecf20Sopenharmony_ci	struct stats time_stats, event_stats;
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_ci	init_stats(&time_stats);
748c2ecf20Sopenharmony_ci	init_stats(&event_stats);
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_ci	for (i = 0; i < single_iterations; i++) {
778c2ecf20Sopenharmony_ci		atomic_set(&event_count, 0);
788c2ecf20Sopenharmony_ci		gettimeofday(&start, NULL);
798c2ecf20Sopenharmony_ci		err = __machine__synthesize_threads(&session->machines.host,
808c2ecf20Sopenharmony_ci						NULL,
818c2ecf20Sopenharmony_ci						target, threads,
828c2ecf20Sopenharmony_ci						process_synthesized_event,
838c2ecf20Sopenharmony_ci						data_mmap,
848c2ecf20Sopenharmony_ci						nr_threads_synthesize);
858c2ecf20Sopenharmony_ci		if (err)
868c2ecf20Sopenharmony_ci			return err;
878c2ecf20Sopenharmony_ci
888c2ecf20Sopenharmony_ci		gettimeofday(&end, NULL);
898c2ecf20Sopenharmony_ci		timersub(&end, &start, &diff);
908c2ecf20Sopenharmony_ci		runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
918c2ecf20Sopenharmony_ci		update_stats(&time_stats, runtime_us);
928c2ecf20Sopenharmony_ci		update_stats(&event_stats, atomic_read(&event_count));
938c2ecf20Sopenharmony_ci	}
948c2ecf20Sopenharmony_ci
958c2ecf20Sopenharmony_ci	time_average = avg_stats(&time_stats);
968c2ecf20Sopenharmony_ci	time_stddev = stddev_stats(&time_stats);
978c2ecf20Sopenharmony_ci	printf("  Average %ssynthesis took: %.3f usec (+- %.3f usec)\n",
988c2ecf20Sopenharmony_ci		data_mmap ? "data " : "", time_average, time_stddev);
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_ci	event_average = avg_stats(&event_stats);
1018c2ecf20Sopenharmony_ci	event_stddev = stddev_stats(&event_stats);
1028c2ecf20Sopenharmony_ci	printf("  Average num. events: %.3f (+- %.3f)\n",
1038c2ecf20Sopenharmony_ci		event_average, event_stddev);
1048c2ecf20Sopenharmony_ci
1058c2ecf20Sopenharmony_ci	printf("  Average time per event %.3f usec\n",
1068c2ecf20Sopenharmony_ci		time_average / event_average);
1078c2ecf20Sopenharmony_ci	return 0;
1088c2ecf20Sopenharmony_ci}
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_cistatic int run_single_threaded(void)
1118c2ecf20Sopenharmony_ci{
1128c2ecf20Sopenharmony_ci	struct perf_session *session;
1138c2ecf20Sopenharmony_ci	struct target target = {
1148c2ecf20Sopenharmony_ci		.pid = "self",
1158c2ecf20Sopenharmony_ci	};
1168c2ecf20Sopenharmony_ci	struct perf_thread_map *threads;
1178c2ecf20Sopenharmony_ci	int err;
1188c2ecf20Sopenharmony_ci
1198c2ecf20Sopenharmony_ci	perf_set_singlethreaded();
1208c2ecf20Sopenharmony_ci	session = perf_session__new(NULL, false, NULL);
1218c2ecf20Sopenharmony_ci	if (IS_ERR(session)) {
1228c2ecf20Sopenharmony_ci		pr_err("Session creation failed.\n");
1238c2ecf20Sopenharmony_ci		return PTR_ERR(session);
1248c2ecf20Sopenharmony_ci	}
1258c2ecf20Sopenharmony_ci	threads = thread_map__new_by_pid(getpid());
1268c2ecf20Sopenharmony_ci	if (!threads) {
1278c2ecf20Sopenharmony_ci		pr_err("Thread map creation failed.\n");
1288c2ecf20Sopenharmony_ci		err = -ENOMEM;
1298c2ecf20Sopenharmony_ci		goto err_out;
1308c2ecf20Sopenharmony_ci	}
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_ci	puts(
1338c2ecf20Sopenharmony_ci"Computing performance of single threaded perf event synthesis by\n"
1348c2ecf20Sopenharmony_ci"synthesizing events on the perf process itself:");
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_ci	err = do_run_single_threaded(session, threads, &target, false);
1378c2ecf20Sopenharmony_ci	if (err)
1388c2ecf20Sopenharmony_ci		goto err_out;
1398c2ecf20Sopenharmony_ci
1408c2ecf20Sopenharmony_ci	err = do_run_single_threaded(session, threads, &target, true);
1418c2ecf20Sopenharmony_ci
1428c2ecf20Sopenharmony_cierr_out:
1438c2ecf20Sopenharmony_ci	if (threads)
1448c2ecf20Sopenharmony_ci		perf_thread_map__put(threads);
1458c2ecf20Sopenharmony_ci
1468c2ecf20Sopenharmony_ci	perf_session__delete(session);
1478c2ecf20Sopenharmony_ci	return err;
1488c2ecf20Sopenharmony_ci}
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_cistatic int do_run_multi_threaded(struct target *target,
1518c2ecf20Sopenharmony_ci				unsigned int nr_threads_synthesize)
1528c2ecf20Sopenharmony_ci{
1538c2ecf20Sopenharmony_ci	struct timeval start, end, diff;
1548c2ecf20Sopenharmony_ci	u64 runtime_us;
1558c2ecf20Sopenharmony_ci	unsigned int i;
1568c2ecf20Sopenharmony_ci	double time_average, time_stddev, event_average, event_stddev;
1578c2ecf20Sopenharmony_ci	int err;
1588c2ecf20Sopenharmony_ci	struct stats time_stats, event_stats;
1598c2ecf20Sopenharmony_ci	struct perf_session *session;
1608c2ecf20Sopenharmony_ci
1618c2ecf20Sopenharmony_ci	init_stats(&time_stats);
1628c2ecf20Sopenharmony_ci	init_stats(&event_stats);
1638c2ecf20Sopenharmony_ci	for (i = 0; i < multi_iterations; i++) {
1648c2ecf20Sopenharmony_ci		session = perf_session__new(NULL, false, NULL);
1658c2ecf20Sopenharmony_ci		if (IS_ERR(session))
1668c2ecf20Sopenharmony_ci			return PTR_ERR(session);
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_ci		atomic_set(&event_count, 0);
1698c2ecf20Sopenharmony_ci		gettimeofday(&start, NULL);
1708c2ecf20Sopenharmony_ci		err = __machine__synthesize_threads(&session->machines.host,
1718c2ecf20Sopenharmony_ci						NULL,
1728c2ecf20Sopenharmony_ci						target, NULL,
1738c2ecf20Sopenharmony_ci						process_synthesized_event,
1748c2ecf20Sopenharmony_ci						false,
1758c2ecf20Sopenharmony_ci						nr_threads_synthesize);
1768c2ecf20Sopenharmony_ci		if (err) {
1778c2ecf20Sopenharmony_ci			perf_session__delete(session);
1788c2ecf20Sopenharmony_ci			return err;
1798c2ecf20Sopenharmony_ci		}
1808c2ecf20Sopenharmony_ci
1818c2ecf20Sopenharmony_ci		gettimeofday(&end, NULL);
1828c2ecf20Sopenharmony_ci		timersub(&end, &start, &diff);
1838c2ecf20Sopenharmony_ci		runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
1848c2ecf20Sopenharmony_ci		update_stats(&time_stats, runtime_us);
1858c2ecf20Sopenharmony_ci		update_stats(&event_stats, atomic_read(&event_count));
1868c2ecf20Sopenharmony_ci		perf_session__delete(session);
1878c2ecf20Sopenharmony_ci	}
1888c2ecf20Sopenharmony_ci
1898c2ecf20Sopenharmony_ci	time_average = avg_stats(&time_stats);
1908c2ecf20Sopenharmony_ci	time_stddev = stddev_stats(&time_stats);
1918c2ecf20Sopenharmony_ci	printf("    Average synthesis took: %.3f usec (+- %.3f usec)\n",
1928c2ecf20Sopenharmony_ci		time_average, time_stddev);
1938c2ecf20Sopenharmony_ci
1948c2ecf20Sopenharmony_ci	event_average = avg_stats(&event_stats);
1958c2ecf20Sopenharmony_ci	event_stddev = stddev_stats(&event_stats);
1968c2ecf20Sopenharmony_ci	printf("    Average num. events: %.3f (+- %.3f)\n",
1978c2ecf20Sopenharmony_ci		event_average, event_stddev);
1988c2ecf20Sopenharmony_ci
1998c2ecf20Sopenharmony_ci	printf("    Average time per event %.3f usec\n",
2008c2ecf20Sopenharmony_ci		time_average / event_average);
2018c2ecf20Sopenharmony_ci	return 0;
2028c2ecf20Sopenharmony_ci}
2038c2ecf20Sopenharmony_ci
2048c2ecf20Sopenharmony_cistatic int run_multi_threaded(void)
2058c2ecf20Sopenharmony_ci{
2068c2ecf20Sopenharmony_ci	struct target target = {
2078c2ecf20Sopenharmony_ci		.cpu_list = "0"
2088c2ecf20Sopenharmony_ci	};
2098c2ecf20Sopenharmony_ci	unsigned int nr_threads_synthesize;
2108c2ecf20Sopenharmony_ci	int err;
2118c2ecf20Sopenharmony_ci
2128c2ecf20Sopenharmony_ci	if (max_threads == UINT_MAX)
2138c2ecf20Sopenharmony_ci		max_threads = sysconf(_SC_NPROCESSORS_ONLN);
2148c2ecf20Sopenharmony_ci
2158c2ecf20Sopenharmony_ci	puts(
2168c2ecf20Sopenharmony_ci"Computing performance of multi threaded perf event synthesis by\n"
2178c2ecf20Sopenharmony_ci"synthesizing events on CPU 0:");
2188c2ecf20Sopenharmony_ci
2198c2ecf20Sopenharmony_ci	for (nr_threads_synthesize = min_threads;
2208c2ecf20Sopenharmony_ci	     nr_threads_synthesize <= max_threads;
2218c2ecf20Sopenharmony_ci	     nr_threads_synthesize++) {
2228c2ecf20Sopenharmony_ci		if (nr_threads_synthesize == 1)
2238c2ecf20Sopenharmony_ci			perf_set_singlethreaded();
2248c2ecf20Sopenharmony_ci		else
2258c2ecf20Sopenharmony_ci			perf_set_multithreaded();
2268c2ecf20Sopenharmony_ci
2278c2ecf20Sopenharmony_ci		printf("  Number of synthesis threads: %u\n",
2288c2ecf20Sopenharmony_ci			nr_threads_synthesize);
2298c2ecf20Sopenharmony_ci
2308c2ecf20Sopenharmony_ci		err = do_run_multi_threaded(&target, nr_threads_synthesize);
2318c2ecf20Sopenharmony_ci		if (err)
2328c2ecf20Sopenharmony_ci			return err;
2338c2ecf20Sopenharmony_ci	}
2348c2ecf20Sopenharmony_ci	perf_set_singlethreaded();
2358c2ecf20Sopenharmony_ci	return 0;
2368c2ecf20Sopenharmony_ci}
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_ciint bench_synthesize(int argc, const char **argv)
2398c2ecf20Sopenharmony_ci{
2408c2ecf20Sopenharmony_ci	int err = 0;
2418c2ecf20Sopenharmony_ci
2428c2ecf20Sopenharmony_ci	argc = parse_options(argc, argv, options, bench_usage, 0);
2438c2ecf20Sopenharmony_ci	if (argc) {
2448c2ecf20Sopenharmony_ci		usage_with_options(bench_usage, options);
2458c2ecf20Sopenharmony_ci		exit(EXIT_FAILURE);
2468c2ecf20Sopenharmony_ci	}
2478c2ecf20Sopenharmony_ci
2488c2ecf20Sopenharmony_ci	/*
2498c2ecf20Sopenharmony_ci	 * If neither single threaded or multi-threaded are specified, default
2508c2ecf20Sopenharmony_ci	 * to running just single threaded.
2518c2ecf20Sopenharmony_ci	 */
2528c2ecf20Sopenharmony_ci	if (!run_st && !run_mt)
2538c2ecf20Sopenharmony_ci		run_st = true;
2548c2ecf20Sopenharmony_ci
2558c2ecf20Sopenharmony_ci	if (run_st)
2568c2ecf20Sopenharmony_ci		err = run_single_threaded();
2578c2ecf20Sopenharmony_ci
2588c2ecf20Sopenharmony_ci	if (!err && run_mt)
2598c2ecf20Sopenharmony_ci		err = run_multi_threaded();
2608c2ecf20Sopenharmony_ci
2618c2ecf20Sopenharmony_ci	return err;
2628c2ecf20Sopenharmony_ci}
263