162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci
362306a36Sopenharmony_ci#include <subcmd/parse-options.h>
462306a36Sopenharmony_ci#include <linux/hw_breakpoint.h>
562306a36Sopenharmony_ci#include <linux/perf_event.h>
662306a36Sopenharmony_ci#include <linux/time64.h>
762306a36Sopenharmony_ci#include <sys/syscall.h>
862306a36Sopenharmony_ci#include <sys/ioctl.h>
962306a36Sopenharmony_ci#include <sys/time.h>
1062306a36Sopenharmony_ci#include <pthread.h>
1162306a36Sopenharmony_ci#include <stddef.h>
1262306a36Sopenharmony_ci#include <stdlib.h>
1362306a36Sopenharmony_ci#include <unistd.h>
1462306a36Sopenharmony_ci#include <stdio.h>
1562306a36Sopenharmony_ci#include <errno.h>
1662306a36Sopenharmony_ci#include "bench.h"
1762306a36Sopenharmony_ci#include "futex.h"
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_cistruct {
2062306a36Sopenharmony_ci	unsigned int nbreakpoints;
2162306a36Sopenharmony_ci	unsigned int nparallel;
2262306a36Sopenharmony_ci	unsigned int nthreads;
2362306a36Sopenharmony_ci} thread_params = {
2462306a36Sopenharmony_ci	.nbreakpoints = 1,
2562306a36Sopenharmony_ci	.nparallel = 1,
2662306a36Sopenharmony_ci	.nthreads = 1,
2762306a36Sopenharmony_ci};
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_cistatic const struct option thread_options[] = {
3062306a36Sopenharmony_ci	OPT_UINTEGER('b', "breakpoints", &thread_params.nbreakpoints,
3162306a36Sopenharmony_ci		"Specify amount of breakpoints"),
3262306a36Sopenharmony_ci	OPT_UINTEGER('p', "parallelism", &thread_params.nparallel, "Specify amount of parallelism"),
3362306a36Sopenharmony_ci	OPT_UINTEGER('t', "threads", &thread_params.nthreads, "Specify amount of threads"),
3462306a36Sopenharmony_ci	OPT_END()
3562306a36Sopenharmony_ci};
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_cistatic const char * const thread_usage[] = {
3862306a36Sopenharmony_ci	"perf bench breakpoint thread <options>",
3962306a36Sopenharmony_ci	NULL
4062306a36Sopenharmony_ci};
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_cistruct breakpoint {
4362306a36Sopenharmony_ci	int fd;
4462306a36Sopenharmony_ci	char watched;
4562306a36Sopenharmony_ci};
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_cistatic int breakpoint_setup(void *addr)
4862306a36Sopenharmony_ci{
4962306a36Sopenharmony_ci	struct perf_event_attr attr = { .size = 0, };
5062306a36Sopenharmony_ci	int fd;
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci	attr.type = PERF_TYPE_BREAKPOINT;
5362306a36Sopenharmony_ci	attr.size = sizeof(attr);
5462306a36Sopenharmony_ci	attr.inherit = 1;
5562306a36Sopenharmony_ci	attr.exclude_kernel = 1;
5662306a36Sopenharmony_ci	attr.exclude_hv = 1;
5762306a36Sopenharmony_ci	attr.bp_addr = (unsigned long)addr;
5862306a36Sopenharmony_ci	attr.bp_type = HW_BREAKPOINT_RW;
5962306a36Sopenharmony_ci	attr.bp_len = HW_BREAKPOINT_LEN_1;
6062306a36Sopenharmony_ci	fd = syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0);
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci	if (fd < 0)
6362306a36Sopenharmony_ci		fd = -errno;
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci	return fd;
6662306a36Sopenharmony_ci}
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_cistatic void *passive_thread(void *arg)
6962306a36Sopenharmony_ci{
7062306a36Sopenharmony_ci	unsigned int *done = (unsigned int *)arg;
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci	while (!__atomic_load_n(done, __ATOMIC_RELAXED))
7362306a36Sopenharmony_ci		futex_wait(done, 0, NULL, 0);
7462306a36Sopenharmony_ci	return NULL;
7562306a36Sopenharmony_ci}
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_cistatic void *active_thread(void *arg)
7862306a36Sopenharmony_ci{
7962306a36Sopenharmony_ci	unsigned int *done = (unsigned int *)arg;
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_ci	while (!__atomic_load_n(done, __ATOMIC_RELAXED));
8262306a36Sopenharmony_ci	return NULL;
8362306a36Sopenharmony_ci}
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_cistatic void *breakpoint_thread(void *arg)
8662306a36Sopenharmony_ci{
8762306a36Sopenharmony_ci	unsigned int i, done;
8862306a36Sopenharmony_ci	int *repeat = (int *)arg;
8962306a36Sopenharmony_ci	pthread_t *threads;
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_ci	threads = calloc(thread_params.nthreads, sizeof(threads[0]));
9262306a36Sopenharmony_ci	if (!threads)
9362306a36Sopenharmony_ci		exit((perror("calloc"), EXIT_FAILURE));
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ci	while (__atomic_fetch_sub(repeat, 1, __ATOMIC_RELAXED) > 0) {
9662306a36Sopenharmony_ci		done = 0;
9762306a36Sopenharmony_ci		for (i = 0; i < thread_params.nthreads; i++) {
9862306a36Sopenharmony_ci			if (pthread_create(&threads[i], NULL, passive_thread, &done))
9962306a36Sopenharmony_ci				exit((perror("pthread_create"), EXIT_FAILURE));
10062306a36Sopenharmony_ci		}
10162306a36Sopenharmony_ci		__atomic_store_n(&done, 1, __ATOMIC_RELAXED);
10262306a36Sopenharmony_ci		futex_wake(&done, thread_params.nthreads, 0);
10362306a36Sopenharmony_ci		for (i = 0; i < thread_params.nthreads; i++)
10462306a36Sopenharmony_ci			pthread_join(threads[i], NULL);
10562306a36Sopenharmony_ci	}
10662306a36Sopenharmony_ci	free(threads);
10762306a36Sopenharmony_ci	return NULL;
10862306a36Sopenharmony_ci}
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci// The benchmark creates nbreakpoints inheritable breakpoints,
11162306a36Sopenharmony_ci// then starts nparallel threads which create and join bench_repeat batches of nthreads threads.
11262306a36Sopenharmony_ciint bench_breakpoint_thread(int argc, const char **argv)
11362306a36Sopenharmony_ci{
11462306a36Sopenharmony_ci	unsigned int i, result_usec;
11562306a36Sopenharmony_ci	int repeat = bench_repeat;
11662306a36Sopenharmony_ci	struct breakpoint *breakpoints;
11762306a36Sopenharmony_ci	pthread_t *parallel;
11862306a36Sopenharmony_ci	struct timeval start, stop, diff;
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ci	if (parse_options(argc, argv, thread_options, thread_usage, 0)) {
12162306a36Sopenharmony_ci		usage_with_options(thread_usage, thread_options);
12262306a36Sopenharmony_ci		exit(EXIT_FAILURE);
12362306a36Sopenharmony_ci	}
12462306a36Sopenharmony_ci	breakpoints = calloc(thread_params.nbreakpoints, sizeof(breakpoints[0]));
12562306a36Sopenharmony_ci	parallel = calloc(thread_params.nparallel, sizeof(parallel[0]));
12662306a36Sopenharmony_ci	if (!breakpoints || !parallel)
12762306a36Sopenharmony_ci		exit((perror("calloc"), EXIT_FAILURE));
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci	for (i = 0; i < thread_params.nbreakpoints; i++) {
13062306a36Sopenharmony_ci		breakpoints[i].fd = breakpoint_setup(&breakpoints[i].watched);
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_ci		if (breakpoints[i].fd < 0) {
13362306a36Sopenharmony_ci			if (breakpoints[i].fd == -ENODEV) {
13462306a36Sopenharmony_ci				printf("Skipping perf bench breakpoint thread: No hardware support\n");
13562306a36Sopenharmony_ci				return 0;
13662306a36Sopenharmony_ci			}
13762306a36Sopenharmony_ci			exit((perror("perf_event_open"), EXIT_FAILURE));
13862306a36Sopenharmony_ci		}
13962306a36Sopenharmony_ci	}
14062306a36Sopenharmony_ci	gettimeofday(&start, NULL);
14162306a36Sopenharmony_ci	for (i = 0; i < thread_params.nparallel; i++) {
14262306a36Sopenharmony_ci		if (pthread_create(&parallel[i], NULL, breakpoint_thread, &repeat))
14362306a36Sopenharmony_ci			exit((perror("pthread_create"), EXIT_FAILURE));
14462306a36Sopenharmony_ci	}
14562306a36Sopenharmony_ci	for (i = 0; i < thread_params.nparallel; i++)
14662306a36Sopenharmony_ci		pthread_join(parallel[i], NULL);
14762306a36Sopenharmony_ci	gettimeofday(&stop, NULL);
14862306a36Sopenharmony_ci	timersub(&stop, &start, &diff);
14962306a36Sopenharmony_ci	for (i = 0; i < thread_params.nbreakpoints; i++)
15062306a36Sopenharmony_ci		close(breakpoints[i].fd);
15162306a36Sopenharmony_ci	free(parallel);
15262306a36Sopenharmony_ci	free(breakpoints);
15362306a36Sopenharmony_ci	switch (bench_format) {
15462306a36Sopenharmony_ci	case BENCH_FORMAT_DEFAULT:
15562306a36Sopenharmony_ci		printf("# Created/joined %d threads with %d breakpoints and %d parallelism\n",
15662306a36Sopenharmony_ci			bench_repeat, thread_params.nbreakpoints, thread_params.nparallel);
15762306a36Sopenharmony_ci		printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
15862306a36Sopenharmony_ci			(long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC));
15962306a36Sopenharmony_ci		result_usec = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
16062306a36Sopenharmony_ci		printf(" %14lf usecs/op\n",
16162306a36Sopenharmony_ci			(double)result_usec / bench_repeat / thread_params.nthreads);
16262306a36Sopenharmony_ci		printf(" %14lf usecs/op/cpu\n",
16362306a36Sopenharmony_ci			(double)result_usec / bench_repeat /
16462306a36Sopenharmony_ci			thread_params.nthreads * thread_params.nparallel);
16562306a36Sopenharmony_ci		break;
16662306a36Sopenharmony_ci	case BENCH_FORMAT_SIMPLE:
16762306a36Sopenharmony_ci		printf("%lu.%03lu\n", (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC));
16862306a36Sopenharmony_ci		break;
16962306a36Sopenharmony_ci	default:
17062306a36Sopenharmony_ci		fprintf(stderr, "Unknown format: %d\n", bench_format);
17162306a36Sopenharmony_ci		exit(EXIT_FAILURE);
17262306a36Sopenharmony_ci	}
17362306a36Sopenharmony_ci	return 0;
17462306a36Sopenharmony_ci}
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_cistruct {
17762306a36Sopenharmony_ci	unsigned int npassive;
17862306a36Sopenharmony_ci	unsigned int nactive;
17962306a36Sopenharmony_ci} enable_params = {
18062306a36Sopenharmony_ci	.nactive = 0,
18162306a36Sopenharmony_ci	.npassive = 0,
18262306a36Sopenharmony_ci};
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_cistatic const struct option enable_options[] = {
18562306a36Sopenharmony_ci	OPT_UINTEGER('p', "passive", &enable_params.npassive, "Specify amount of passive threads"),
18662306a36Sopenharmony_ci	OPT_UINTEGER('a', "active", &enable_params.nactive, "Specify amount of active threads"),
18762306a36Sopenharmony_ci	OPT_END()
18862306a36Sopenharmony_ci};
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_cistatic const char * const enable_usage[] = {
19162306a36Sopenharmony_ci	"perf bench breakpoint enable <options>",
19262306a36Sopenharmony_ci	NULL
19362306a36Sopenharmony_ci};
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci// The benchmark creates an inheritable breakpoint,
19662306a36Sopenharmony_ci// then starts npassive threads that block and nactive threads that actively spin
19762306a36Sopenharmony_ci// and then disables and enables the breakpoint bench_repeat times.
19862306a36Sopenharmony_ciint bench_breakpoint_enable(int argc, const char **argv)
19962306a36Sopenharmony_ci{
20062306a36Sopenharmony_ci	unsigned int i, nthreads, result_usec, done = 0;
20162306a36Sopenharmony_ci	char watched;
20262306a36Sopenharmony_ci	int fd;
20362306a36Sopenharmony_ci	pthread_t *threads;
20462306a36Sopenharmony_ci	struct timeval start, stop, diff;
20562306a36Sopenharmony_ci
20662306a36Sopenharmony_ci	if (parse_options(argc, argv, enable_options, enable_usage, 0)) {
20762306a36Sopenharmony_ci		usage_with_options(enable_usage, enable_options);
20862306a36Sopenharmony_ci		exit(EXIT_FAILURE);
20962306a36Sopenharmony_ci	}
21062306a36Sopenharmony_ci	fd = breakpoint_setup(&watched);
21162306a36Sopenharmony_ci
21262306a36Sopenharmony_ci	if (fd < 0) {
21362306a36Sopenharmony_ci		if (fd == -ENODEV) {
21462306a36Sopenharmony_ci			printf("Skipping perf bench breakpoint enable: No hardware support\n");
21562306a36Sopenharmony_ci			return 0;
21662306a36Sopenharmony_ci		}
21762306a36Sopenharmony_ci		exit((perror("perf_event_open"), EXIT_FAILURE));
21862306a36Sopenharmony_ci	}
21962306a36Sopenharmony_ci	nthreads = enable_params.npassive + enable_params.nactive;
22062306a36Sopenharmony_ci	threads = calloc(nthreads, sizeof(threads[0]));
22162306a36Sopenharmony_ci	if (!threads)
22262306a36Sopenharmony_ci		exit((perror("calloc"), EXIT_FAILURE));
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_ci	for (i = 0; i < nthreads; i++) {
22562306a36Sopenharmony_ci		if (pthread_create(&threads[i], NULL,
22662306a36Sopenharmony_ci			i < enable_params.npassive ? passive_thread : active_thread, &done))
22762306a36Sopenharmony_ci			exit((perror("pthread_create"), EXIT_FAILURE));
22862306a36Sopenharmony_ci	}
22962306a36Sopenharmony_ci	usleep(10000);  // let the threads block
23062306a36Sopenharmony_ci	gettimeofday(&start, NULL);
23162306a36Sopenharmony_ci	for (i = 0; i < bench_repeat; i++) {
23262306a36Sopenharmony_ci		if (ioctl(fd, PERF_EVENT_IOC_DISABLE, 0))
23362306a36Sopenharmony_ci			exit((perror("ioctl(PERF_EVENT_IOC_DISABLE)"), EXIT_FAILURE));
23462306a36Sopenharmony_ci		if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0))
23562306a36Sopenharmony_ci			exit((perror("ioctl(PERF_EVENT_IOC_ENABLE)"), EXIT_FAILURE));
23662306a36Sopenharmony_ci	}
23762306a36Sopenharmony_ci	gettimeofday(&stop, NULL);
23862306a36Sopenharmony_ci	timersub(&stop, &start, &diff);
23962306a36Sopenharmony_ci	__atomic_store_n(&done, 1, __ATOMIC_RELAXED);
24062306a36Sopenharmony_ci	futex_wake(&done, enable_params.npassive, 0);
24162306a36Sopenharmony_ci	for (i = 0; i < nthreads; i++)
24262306a36Sopenharmony_ci		pthread_join(threads[i], NULL);
24362306a36Sopenharmony_ci	free(threads);
24462306a36Sopenharmony_ci	close(fd);
24562306a36Sopenharmony_ci	switch (bench_format) {
24662306a36Sopenharmony_ci	case BENCH_FORMAT_DEFAULT:
24762306a36Sopenharmony_ci		printf("# Enabled/disabled breakpoint %d time with %d passive and %d active threads\n",
24862306a36Sopenharmony_ci			bench_repeat, enable_params.npassive, enable_params.nactive);
24962306a36Sopenharmony_ci		printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
25062306a36Sopenharmony_ci			(long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC));
25162306a36Sopenharmony_ci		result_usec = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
25262306a36Sopenharmony_ci		printf(" %14lf usecs/op\n", (double)result_usec / bench_repeat);
25362306a36Sopenharmony_ci		break;
25462306a36Sopenharmony_ci	case BENCH_FORMAT_SIMPLE:
25562306a36Sopenharmony_ci		printf("%lu.%03lu\n", (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC));
25662306a36Sopenharmony_ci		break;
25762306a36Sopenharmony_ci	default:
25862306a36Sopenharmony_ci		fprintf(stderr, "Unknown format: %d\n", bench_format);
25962306a36Sopenharmony_ci		exit(EXIT_FAILURE);
26062306a36Sopenharmony_ci	}
26162306a36Sopenharmony_ci	return 0;
26262306a36Sopenharmony_ci}
263