162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci 362306a36Sopenharmony_ci#include <subcmd/parse-options.h> 462306a36Sopenharmony_ci#include <linux/hw_breakpoint.h> 562306a36Sopenharmony_ci#include <linux/perf_event.h> 662306a36Sopenharmony_ci#include <linux/time64.h> 762306a36Sopenharmony_ci#include <sys/syscall.h> 862306a36Sopenharmony_ci#include <sys/ioctl.h> 962306a36Sopenharmony_ci#include <sys/time.h> 1062306a36Sopenharmony_ci#include <pthread.h> 1162306a36Sopenharmony_ci#include <stddef.h> 1262306a36Sopenharmony_ci#include <stdlib.h> 1362306a36Sopenharmony_ci#include <unistd.h> 1462306a36Sopenharmony_ci#include <stdio.h> 1562306a36Sopenharmony_ci#include <errno.h> 1662306a36Sopenharmony_ci#include "bench.h" 1762306a36Sopenharmony_ci#include "futex.h" 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_cistruct { 2062306a36Sopenharmony_ci unsigned int nbreakpoints; 2162306a36Sopenharmony_ci unsigned int nparallel; 2262306a36Sopenharmony_ci unsigned int nthreads; 2362306a36Sopenharmony_ci} thread_params = { 2462306a36Sopenharmony_ci .nbreakpoints = 1, 2562306a36Sopenharmony_ci .nparallel = 1, 2662306a36Sopenharmony_ci .nthreads = 1, 2762306a36Sopenharmony_ci}; 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_cistatic const struct option thread_options[] = { 3062306a36Sopenharmony_ci OPT_UINTEGER('b', "breakpoints", &thread_params.nbreakpoints, 3162306a36Sopenharmony_ci "Specify amount of breakpoints"), 3262306a36Sopenharmony_ci OPT_UINTEGER('p', "parallelism", &thread_params.nparallel, "Specify amount of parallelism"), 3362306a36Sopenharmony_ci OPT_UINTEGER('t', "threads", &thread_params.nthreads, "Specify amount of threads"), 3462306a36Sopenharmony_ci OPT_END() 3562306a36Sopenharmony_ci}; 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_cistatic const char * const thread_usage[] = { 3862306a36Sopenharmony_ci "perf bench breakpoint thread <options>", 3962306a36Sopenharmony_ci NULL 4062306a36Sopenharmony_ci}; 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_cistruct breakpoint { 4362306a36Sopenharmony_ci int fd; 4462306a36Sopenharmony_ci char watched; 4562306a36Sopenharmony_ci}; 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_cistatic int breakpoint_setup(void *addr) 4862306a36Sopenharmony_ci{ 4962306a36Sopenharmony_ci struct perf_event_attr attr = { .size = 0, }; 5062306a36Sopenharmony_ci int fd; 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci attr.type = PERF_TYPE_BREAKPOINT; 5362306a36Sopenharmony_ci attr.size = sizeof(attr); 5462306a36Sopenharmony_ci attr.inherit = 1; 5562306a36Sopenharmony_ci attr.exclude_kernel = 1; 5662306a36Sopenharmony_ci attr.exclude_hv = 1; 5762306a36Sopenharmony_ci attr.bp_addr = (unsigned long)addr; 5862306a36Sopenharmony_ci attr.bp_type = HW_BREAKPOINT_RW; 5962306a36Sopenharmony_ci attr.bp_len = HW_BREAKPOINT_LEN_1; 6062306a36Sopenharmony_ci fd = syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0); 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci if (fd < 0) 6362306a36Sopenharmony_ci fd = -errno; 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci return fd; 6662306a36Sopenharmony_ci} 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_cistatic void *passive_thread(void *arg) 6962306a36Sopenharmony_ci{ 7062306a36Sopenharmony_ci unsigned int *done = (unsigned int *)arg; 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci while (!__atomic_load_n(done, __ATOMIC_RELAXED)) 7362306a36Sopenharmony_ci futex_wait(done, 0, NULL, 0); 7462306a36Sopenharmony_ci return NULL; 7562306a36Sopenharmony_ci} 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_cistatic void *active_thread(void *arg) 7862306a36Sopenharmony_ci{ 7962306a36Sopenharmony_ci unsigned int *done = (unsigned int *)arg; 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_ci while (!__atomic_load_n(done, __ATOMIC_RELAXED)); 8262306a36Sopenharmony_ci return NULL; 8362306a36Sopenharmony_ci} 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_cistatic void *breakpoint_thread(void *arg) 8662306a36Sopenharmony_ci{ 8762306a36Sopenharmony_ci unsigned int i, done; 8862306a36Sopenharmony_ci int *repeat = (int *)arg; 8962306a36Sopenharmony_ci pthread_t *threads; 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci threads = calloc(thread_params.nthreads, sizeof(threads[0])); 9262306a36Sopenharmony_ci if (!threads) 9362306a36Sopenharmony_ci exit((perror("calloc"), EXIT_FAILURE)); 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_ci while (__atomic_fetch_sub(repeat, 1, __ATOMIC_RELAXED) > 0) { 9662306a36Sopenharmony_ci done = 0; 9762306a36Sopenharmony_ci for (i = 0; i < thread_params.nthreads; i++) { 9862306a36Sopenharmony_ci if (pthread_create(&threads[i], NULL, passive_thread, &done)) 9962306a36Sopenharmony_ci exit((perror("pthread_create"), EXIT_FAILURE)); 10062306a36Sopenharmony_ci } 10162306a36Sopenharmony_ci __atomic_store_n(&done, 1, __ATOMIC_RELAXED); 10262306a36Sopenharmony_ci futex_wake(&done, thread_params.nthreads, 0); 10362306a36Sopenharmony_ci for (i = 0; i < thread_params.nthreads; i++) 10462306a36Sopenharmony_ci pthread_join(threads[i], NULL); 10562306a36Sopenharmony_ci } 10662306a36Sopenharmony_ci free(threads); 10762306a36Sopenharmony_ci return NULL; 10862306a36Sopenharmony_ci} 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ci// The benchmark creates nbreakpoints inheritable breakpoints, 11162306a36Sopenharmony_ci// then starts nparallel threads which create and join bench_repeat batches of nthreads threads. 11262306a36Sopenharmony_ciint bench_breakpoint_thread(int argc, const char **argv) 11362306a36Sopenharmony_ci{ 11462306a36Sopenharmony_ci unsigned int i, result_usec; 11562306a36Sopenharmony_ci int repeat = bench_repeat; 11662306a36Sopenharmony_ci struct breakpoint *breakpoints; 11762306a36Sopenharmony_ci pthread_t *parallel; 11862306a36Sopenharmony_ci struct timeval start, stop, diff; 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ci if (parse_options(argc, argv, thread_options, thread_usage, 0)) { 12162306a36Sopenharmony_ci usage_with_options(thread_usage, thread_options); 12262306a36Sopenharmony_ci exit(EXIT_FAILURE); 12362306a36Sopenharmony_ci } 12462306a36Sopenharmony_ci breakpoints = calloc(thread_params.nbreakpoints, sizeof(breakpoints[0])); 12562306a36Sopenharmony_ci parallel = calloc(thread_params.nparallel, sizeof(parallel[0])); 12662306a36Sopenharmony_ci if (!breakpoints || !parallel) 12762306a36Sopenharmony_ci exit((perror("calloc"), EXIT_FAILURE)); 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci for (i = 0; i < thread_params.nbreakpoints; i++) { 13062306a36Sopenharmony_ci breakpoints[i].fd = breakpoint_setup(&breakpoints[i].watched); 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci if (breakpoints[i].fd < 0) { 13362306a36Sopenharmony_ci if (breakpoints[i].fd == -ENODEV) { 13462306a36Sopenharmony_ci printf("Skipping perf bench breakpoint thread: No hardware support\n"); 13562306a36Sopenharmony_ci return 0; 13662306a36Sopenharmony_ci } 13762306a36Sopenharmony_ci exit((perror("perf_event_open"), EXIT_FAILURE)); 13862306a36Sopenharmony_ci } 13962306a36Sopenharmony_ci } 14062306a36Sopenharmony_ci gettimeofday(&start, NULL); 14162306a36Sopenharmony_ci for (i = 0; i < thread_params.nparallel; i++) { 14262306a36Sopenharmony_ci if (pthread_create(¶llel[i], NULL, breakpoint_thread, &repeat)) 14362306a36Sopenharmony_ci exit((perror("pthread_create"), EXIT_FAILURE)); 14462306a36Sopenharmony_ci } 14562306a36Sopenharmony_ci for (i = 0; i < thread_params.nparallel; i++) 14662306a36Sopenharmony_ci pthread_join(parallel[i], NULL); 14762306a36Sopenharmony_ci gettimeofday(&stop, NULL); 14862306a36Sopenharmony_ci timersub(&stop, &start, &diff); 14962306a36Sopenharmony_ci for (i = 0; i < thread_params.nbreakpoints; i++) 15062306a36Sopenharmony_ci close(breakpoints[i].fd); 15162306a36Sopenharmony_ci free(parallel); 15262306a36Sopenharmony_ci free(breakpoints); 15362306a36Sopenharmony_ci switch (bench_format) { 15462306a36Sopenharmony_ci case BENCH_FORMAT_DEFAULT: 15562306a36Sopenharmony_ci printf("# Created/joined %d threads with %d breakpoints and %d parallelism\n", 15662306a36Sopenharmony_ci bench_repeat, thread_params.nbreakpoints, thread_params.nparallel); 15762306a36Sopenharmony_ci printf(" %14s: %lu.%03lu [sec]\n\n", "Total time", 15862306a36Sopenharmony_ci (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC)); 15962306a36Sopenharmony_ci result_usec = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; 16062306a36Sopenharmony_ci printf(" %14lf usecs/op\n", 16162306a36Sopenharmony_ci (double)result_usec / bench_repeat / thread_params.nthreads); 16262306a36Sopenharmony_ci printf(" %14lf usecs/op/cpu\n", 16362306a36Sopenharmony_ci (double)result_usec / bench_repeat / 16462306a36Sopenharmony_ci thread_params.nthreads * thread_params.nparallel); 16562306a36Sopenharmony_ci break; 16662306a36Sopenharmony_ci case BENCH_FORMAT_SIMPLE: 16762306a36Sopenharmony_ci printf("%lu.%03lu\n", (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC)); 16862306a36Sopenharmony_ci break; 16962306a36Sopenharmony_ci default: 17062306a36Sopenharmony_ci fprintf(stderr, "Unknown format: %d\n", bench_format); 17162306a36Sopenharmony_ci exit(EXIT_FAILURE); 17262306a36Sopenharmony_ci } 17362306a36Sopenharmony_ci return 0; 17462306a36Sopenharmony_ci} 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_cistruct { 17762306a36Sopenharmony_ci unsigned int npassive; 17862306a36Sopenharmony_ci unsigned int nactive; 17962306a36Sopenharmony_ci} enable_params = { 18062306a36Sopenharmony_ci .nactive = 0, 18162306a36Sopenharmony_ci .npassive = 0, 18262306a36Sopenharmony_ci}; 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_cistatic const struct option enable_options[] = { 18562306a36Sopenharmony_ci OPT_UINTEGER('p', "passive", &enable_params.npassive, "Specify amount of passive threads"), 18662306a36Sopenharmony_ci OPT_UINTEGER('a', "active", &enable_params.nactive, "Specify amount of active threads"), 18762306a36Sopenharmony_ci OPT_END() 18862306a36Sopenharmony_ci}; 18962306a36Sopenharmony_ci 19062306a36Sopenharmony_cistatic const char * const enable_usage[] = { 19162306a36Sopenharmony_ci "perf bench breakpoint enable <options>", 19262306a36Sopenharmony_ci NULL 19362306a36Sopenharmony_ci}; 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci// The benchmark creates an inheritable breakpoint, 19662306a36Sopenharmony_ci// then starts npassive threads that block and nactive threads that actively spin 19762306a36Sopenharmony_ci// and then disables and enables the breakpoint bench_repeat times. 19862306a36Sopenharmony_ciint bench_breakpoint_enable(int argc, const char **argv) 19962306a36Sopenharmony_ci{ 20062306a36Sopenharmony_ci unsigned int i, nthreads, result_usec, done = 0; 20162306a36Sopenharmony_ci char watched; 20262306a36Sopenharmony_ci int fd; 20362306a36Sopenharmony_ci pthread_t *threads; 20462306a36Sopenharmony_ci struct timeval start, stop, diff; 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_ci if (parse_options(argc, argv, enable_options, enable_usage, 0)) { 20762306a36Sopenharmony_ci usage_with_options(enable_usage, enable_options); 20862306a36Sopenharmony_ci exit(EXIT_FAILURE); 20962306a36Sopenharmony_ci } 21062306a36Sopenharmony_ci fd = breakpoint_setup(&watched); 21162306a36Sopenharmony_ci 21262306a36Sopenharmony_ci if (fd < 0) { 21362306a36Sopenharmony_ci if (fd == -ENODEV) { 21462306a36Sopenharmony_ci printf("Skipping perf bench breakpoint enable: No hardware support\n"); 21562306a36Sopenharmony_ci return 0; 21662306a36Sopenharmony_ci } 21762306a36Sopenharmony_ci exit((perror("perf_event_open"), EXIT_FAILURE)); 21862306a36Sopenharmony_ci } 21962306a36Sopenharmony_ci nthreads = enable_params.npassive + enable_params.nactive; 22062306a36Sopenharmony_ci threads = calloc(nthreads, sizeof(threads[0])); 22162306a36Sopenharmony_ci if (!threads) 22262306a36Sopenharmony_ci exit((perror("calloc"), EXIT_FAILURE)); 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci for (i = 0; i < nthreads; i++) { 22562306a36Sopenharmony_ci if (pthread_create(&threads[i], NULL, 22662306a36Sopenharmony_ci i < enable_params.npassive ? passive_thread : active_thread, &done)) 22762306a36Sopenharmony_ci exit((perror("pthread_create"), EXIT_FAILURE)); 22862306a36Sopenharmony_ci } 22962306a36Sopenharmony_ci usleep(10000); // let the threads block 23062306a36Sopenharmony_ci gettimeofday(&start, NULL); 23162306a36Sopenharmony_ci for (i = 0; i < bench_repeat; i++) { 23262306a36Sopenharmony_ci if (ioctl(fd, PERF_EVENT_IOC_DISABLE, 0)) 23362306a36Sopenharmony_ci exit((perror("ioctl(PERF_EVENT_IOC_DISABLE)"), EXIT_FAILURE)); 23462306a36Sopenharmony_ci if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0)) 23562306a36Sopenharmony_ci exit((perror("ioctl(PERF_EVENT_IOC_ENABLE)"), EXIT_FAILURE)); 23662306a36Sopenharmony_ci } 23762306a36Sopenharmony_ci gettimeofday(&stop, NULL); 23862306a36Sopenharmony_ci timersub(&stop, &start, &diff); 23962306a36Sopenharmony_ci __atomic_store_n(&done, 1, __ATOMIC_RELAXED); 24062306a36Sopenharmony_ci futex_wake(&done, enable_params.npassive, 0); 24162306a36Sopenharmony_ci for (i = 0; i < nthreads; i++) 24262306a36Sopenharmony_ci pthread_join(threads[i], NULL); 24362306a36Sopenharmony_ci free(threads); 24462306a36Sopenharmony_ci close(fd); 24562306a36Sopenharmony_ci switch (bench_format) { 24662306a36Sopenharmony_ci case BENCH_FORMAT_DEFAULT: 24762306a36Sopenharmony_ci printf("# Enabled/disabled breakpoint %d time with %d passive and %d active threads\n", 24862306a36Sopenharmony_ci bench_repeat, enable_params.npassive, enable_params.nactive); 24962306a36Sopenharmony_ci printf(" %14s: %lu.%03lu [sec]\n\n", "Total time", 25062306a36Sopenharmony_ci (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC)); 25162306a36Sopenharmony_ci result_usec = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; 25262306a36Sopenharmony_ci printf(" %14lf usecs/op\n", (double)result_usec / bench_repeat); 25362306a36Sopenharmony_ci break; 25462306a36Sopenharmony_ci case BENCH_FORMAT_SIMPLE: 25562306a36Sopenharmony_ci printf("%lu.%03lu\n", (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC)); 25662306a36Sopenharmony_ci break; 25762306a36Sopenharmony_ci default: 25862306a36Sopenharmony_ci fprintf(stderr, "Unknown format: %d\n", bench_format); 25962306a36Sopenharmony_ci exit(EXIT_FAILURE); 26062306a36Sopenharmony_ci } 26162306a36Sopenharmony_ci return 0; 26262306a36Sopenharmony_ci} 263