1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2020 Facebook */
3#define _GNU_SOURCE
4#include <argp.h>
5#include <linux/compiler.h>
6#include <sys/time.h>
7#include <sched.h>
8#include <fcntl.h>
9#include <pthread.h>
10#include <sys/sysinfo.h>
11#include <sys/resource.h>
12#include <signal.h>
13#include "bench.h"
14#include "testing_helpers.h"
15
16struct env env = {
17	.warmup_sec = 1,
18	.duration_sec = 5,
19	.affinity = false,
20	.consumer_cnt = 1,
21	.producer_cnt = 1,
22};
23
24static int libbpf_print_fn(enum libbpf_print_level level,
25		    const char *format, va_list args)
26{
27	if (level == LIBBPF_DEBUG && !env.verbose)
28		return 0;
29	return vfprintf(stderr, format, args);
30}
31
32static int bump_memlock_rlimit(void)
33{
34	struct rlimit rlim_new = {
35		.rlim_cur	= RLIM_INFINITY,
36		.rlim_max	= RLIM_INFINITY,
37	};
38
39	return setrlimit(RLIMIT_MEMLOCK, &rlim_new);
40}
41
42void setup_libbpf()
43{
44	int err;
45
46	libbpf_set_print(libbpf_print_fn);
47
48	err = bump_memlock_rlimit();
49	if (err)
50		fprintf(stderr, "failed to increase RLIMIT_MEMLOCK: %d", err);
51}
52
53void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns)
54{
55	double hits_per_sec, drops_per_sec;
56	double hits_per_prod;
57
58	hits_per_sec = res->hits / 1000000.0 / (delta_ns / 1000000000.0);
59	hits_per_prod = hits_per_sec / env.producer_cnt;
60	drops_per_sec = res->drops / 1000000.0 / (delta_ns / 1000000000.0);
61
62	printf("Iter %3d (%7.3lfus): ",
63	       iter, (delta_ns - 1000000000) / 1000.0);
64
65	printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s\n",
66	       hits_per_sec, hits_per_prod, drops_per_sec);
67}
68
69void hits_drops_report_final(struct bench_res res[], int res_cnt)
70{
71	int i;
72	double hits_mean = 0.0, drops_mean = 0.0;
73	double hits_stddev = 0.0, drops_stddev = 0.0;
74
75	for (i = 0; i < res_cnt; i++) {
76		hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
77		drops_mean += res[i].drops / 1000000.0 / (0.0 + res_cnt);
78	}
79
80	if (res_cnt > 1)  {
81		for (i = 0; i < res_cnt; i++) {
82			hits_stddev += (hits_mean - res[i].hits / 1000000.0) *
83				       (hits_mean - res[i].hits / 1000000.0) /
84				       (res_cnt - 1.0);
85			drops_stddev += (drops_mean - res[i].drops / 1000000.0) *
86					(drops_mean - res[i].drops / 1000000.0) /
87					(res_cnt - 1.0);
88		}
89		hits_stddev = sqrt(hits_stddev);
90		drops_stddev = sqrt(drops_stddev);
91	}
92	printf("Summary: hits %8.3lf \u00B1 %5.3lfM/s (%7.3lfM/prod), ",
93	       hits_mean, hits_stddev, hits_mean / env.producer_cnt);
94	printf("drops %8.3lf \u00B1 %5.3lfM/s\n",
95	       drops_mean, drops_stddev);
96}
97
98const char *argp_program_version = "benchmark";
99const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
100const char argp_program_doc[] =
101"benchmark    Generic benchmarking framework.\n"
102"\n"
103"This tool runs benchmarks.\n"
104"\n"
105"USAGE: benchmark <bench-name>\n"
106"\n"
107"EXAMPLES:\n"
108"    # run 'count-local' benchmark with 1 producer and 1 consumer\n"
109"    benchmark count-local\n"
110"    # run 'count-local' with 16 producer and 8 consumer thread, pinned to CPUs\n"
111"    benchmark -p16 -c8 -a count-local\n";
112
113enum {
114	ARG_PROD_AFFINITY_SET = 1000,
115	ARG_CONS_AFFINITY_SET = 1001,
116};
117
118static const struct argp_option opts[] = {
119	{ "list", 'l', NULL, 0, "List available benchmarks"},
120	{ "duration", 'd', "SEC", 0, "Duration of benchmark, seconds"},
121	{ "warmup", 'w', "SEC", 0, "Warm-up period, seconds"},
122	{ "producers", 'p', "NUM", 0, "Number of producer threads"},
123	{ "consumers", 'c', "NUM", 0, "Number of consumer threads"},
124	{ "verbose", 'v', NULL, 0, "Verbose debug output"},
125	{ "affinity", 'a', NULL, 0, "Set consumer/producer thread affinity"},
126	{ "prod-affinity", ARG_PROD_AFFINITY_SET, "CPUSET", 0,
127	  "Set of CPUs for producer threads; implies --affinity"},
128	{ "cons-affinity", ARG_CONS_AFFINITY_SET, "CPUSET", 0,
129	  "Set of CPUs for consumer threads; implies --affinity"},
130	{},
131};
132
133extern struct argp bench_ringbufs_argp;
134
135static const struct argp_child bench_parsers[] = {
136	{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
137	{},
138};
139
140static error_t parse_arg(int key, char *arg, struct argp_state *state)
141{
142	static int pos_args;
143
144	switch (key) {
145	case 'v':
146		env.verbose = true;
147		break;
148	case 'l':
149		env.list = true;
150		break;
151	case 'd':
152		env.duration_sec = strtol(arg, NULL, 10);
153		if (env.duration_sec <= 0) {
154			fprintf(stderr, "Invalid duration: %s\n", arg);
155			argp_usage(state);
156		}
157		break;
158	case 'w':
159		env.warmup_sec = strtol(arg, NULL, 10);
160		if (env.warmup_sec <= 0) {
161			fprintf(stderr, "Invalid warm-up duration: %s\n", arg);
162			argp_usage(state);
163		}
164		break;
165	case 'p':
166		env.producer_cnt = strtol(arg, NULL, 10);
167		if (env.producer_cnt <= 0) {
168			fprintf(stderr, "Invalid producer count: %s\n", arg);
169			argp_usage(state);
170		}
171		break;
172	case 'c':
173		env.consumer_cnt = strtol(arg, NULL, 10);
174		if (env.consumer_cnt <= 0) {
175			fprintf(stderr, "Invalid consumer count: %s\n", arg);
176			argp_usage(state);
177		}
178		break;
179	case 'a':
180		env.affinity = true;
181		break;
182	case ARG_PROD_AFFINITY_SET:
183		env.affinity = true;
184		if (parse_num_list(arg, &env.prod_cpus.cpus,
185				   &env.prod_cpus.cpus_len)) {
186			fprintf(stderr, "Invalid format of CPU set for producers.");
187			argp_usage(state);
188		}
189		break;
190	case ARG_CONS_AFFINITY_SET:
191		env.affinity = true;
192		if (parse_num_list(arg, &env.cons_cpus.cpus,
193				   &env.cons_cpus.cpus_len)) {
194			fprintf(stderr, "Invalid format of CPU set for consumers.");
195			argp_usage(state);
196		}
197		break;
198	case ARGP_KEY_ARG:
199		if (pos_args++) {
200			fprintf(stderr,
201				"Unrecognized positional argument: %s\n", arg);
202			argp_usage(state);
203		}
204		env.bench_name = strdup(arg);
205		break;
206	default:
207		return ARGP_ERR_UNKNOWN;
208	}
209	return 0;
210}
211
212static void parse_cmdline_args(int argc, char **argv)
213{
214	static const struct argp argp = {
215		.options = opts,
216		.parser = parse_arg,
217		.doc = argp_program_doc,
218		.children = bench_parsers,
219	};
220	if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
221		exit(1);
222	if (!env.list && !env.bench_name) {
223		argp_help(&argp, stderr, ARGP_HELP_DOC, "bench");
224		exit(1);
225	}
226}
227
228static void collect_measurements(long delta_ns);
229
230static __u64 last_time_ns;
231static void sigalarm_handler(int signo)
232{
233	long new_time_ns = get_time_ns();
234	long delta_ns = new_time_ns - last_time_ns;
235
236	collect_measurements(delta_ns);
237
238	last_time_ns = new_time_ns;
239}
240
241/* set up periodic 1-second timer */
242static void setup_timer()
243{
244	static struct sigaction sigalarm_action = {
245		.sa_handler = sigalarm_handler,
246	};
247	struct itimerval timer_settings = {};
248	int err;
249
250	last_time_ns = get_time_ns();
251	err = sigaction(SIGALRM, &sigalarm_action, NULL);
252	if (err < 0) {
253		fprintf(stderr, "failed to install SIGALRM handler: %d\n", -errno);
254		exit(1);
255	}
256	timer_settings.it_interval.tv_sec = 1;
257	timer_settings.it_value.tv_sec = 1;
258	err = setitimer(ITIMER_REAL, &timer_settings, NULL);
259	if (err < 0) {
260		fprintf(stderr, "failed to arm interval timer: %d\n", -errno);
261		exit(1);
262	}
263}
264
265static void set_thread_affinity(pthread_t thread, int cpu)
266{
267	cpu_set_t cpuset;
268
269	CPU_ZERO(&cpuset);
270	CPU_SET(cpu, &cpuset);
271	if (pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset)) {
272		fprintf(stderr, "setting affinity to CPU #%d failed: %d\n",
273			cpu, errno);
274		exit(1);
275	}
276}
277
278static int next_cpu(struct cpu_set *cpu_set)
279{
280	if (cpu_set->cpus) {
281		int i;
282
283		/* find next available CPU */
284		for (i = cpu_set->next_cpu; i < cpu_set->cpus_len; i++) {
285			if (cpu_set->cpus[i]) {
286				cpu_set->next_cpu = i + 1;
287				return i;
288			}
289		}
290		fprintf(stderr, "Not enough CPUs specified, need CPU #%d or higher.\n", i);
291		exit(1);
292	}
293
294	return cpu_set->next_cpu++;
295}
296
297static struct bench_state {
298	int res_cnt;
299	struct bench_res *results;
300	pthread_t *consumers;
301	pthread_t *producers;
302} state;
303
304const struct bench *bench = NULL;
305
306extern const struct bench bench_count_global;
307extern const struct bench bench_count_local;
308extern const struct bench bench_rename_base;
309extern const struct bench bench_rename_kprobe;
310extern const struct bench bench_rename_kretprobe;
311extern const struct bench bench_rename_rawtp;
312extern const struct bench bench_rename_fentry;
313extern const struct bench bench_rename_fexit;
314extern const struct bench bench_trig_base;
315extern const struct bench bench_trig_tp;
316extern const struct bench bench_trig_rawtp;
317extern const struct bench bench_trig_kprobe;
318extern const struct bench bench_trig_fentry;
319extern const struct bench bench_trig_fentry_sleep;
320extern const struct bench bench_trig_fmodret;
321extern const struct bench bench_rb_libbpf;
322extern const struct bench bench_rb_custom;
323extern const struct bench bench_pb_libbpf;
324extern const struct bench bench_pb_custom;
325
326static const struct bench *benchs[] = {
327	&bench_count_global,
328	&bench_count_local,
329	&bench_rename_base,
330	&bench_rename_kprobe,
331	&bench_rename_kretprobe,
332	&bench_rename_rawtp,
333	&bench_rename_fentry,
334	&bench_rename_fexit,
335	&bench_trig_base,
336	&bench_trig_tp,
337	&bench_trig_rawtp,
338	&bench_trig_kprobe,
339	&bench_trig_fentry,
340	&bench_trig_fentry_sleep,
341	&bench_trig_fmodret,
342	&bench_rb_libbpf,
343	&bench_rb_custom,
344	&bench_pb_libbpf,
345	&bench_pb_custom,
346};
347
348static void setup_benchmark()
349{
350	int i, err;
351
352	if (!env.bench_name) {
353		fprintf(stderr, "benchmark name is not specified\n");
354		exit(1);
355	}
356
357	for (i = 0; i < ARRAY_SIZE(benchs); i++) {
358		if (strcmp(benchs[i]->name, env.bench_name) == 0) {
359			bench = benchs[i];
360			break;
361		}
362	}
363	if (!bench) {
364		fprintf(stderr, "benchmark '%s' not found\n", env.bench_name);
365		exit(1);
366	}
367
368	printf("Setting up benchmark '%s'...\n", bench->name);
369
370	state.producers = calloc(env.producer_cnt, sizeof(*state.producers));
371	state.consumers = calloc(env.consumer_cnt, sizeof(*state.consumers));
372	state.results = calloc(env.duration_sec + env.warmup_sec + 2,
373			       sizeof(*state.results));
374	if (!state.producers || !state.consumers || !state.results)
375		exit(1);
376
377	if (bench->validate)
378		bench->validate();
379	if (bench->setup)
380		bench->setup();
381
382	for (i = 0; i < env.consumer_cnt; i++) {
383		err = pthread_create(&state.consumers[i], NULL,
384				     bench->consumer_thread, (void *)(long)i);
385		if (err) {
386			fprintf(stderr, "failed to create consumer thread #%d: %d\n",
387				i, -errno);
388			exit(1);
389		}
390		if (env.affinity)
391			set_thread_affinity(state.consumers[i],
392					    next_cpu(&env.cons_cpus));
393	}
394
395	/* unless explicit producer CPU list is specified, continue after
396	 * last consumer CPU
397	 */
398	if (!env.prod_cpus.cpus)
399		env.prod_cpus.next_cpu = env.cons_cpus.next_cpu;
400
401	for (i = 0; i < env.producer_cnt; i++) {
402		err = pthread_create(&state.producers[i], NULL,
403				     bench->producer_thread, (void *)(long)i);
404		if (err) {
405			fprintf(stderr, "failed to create producer thread #%d: %d\n",
406				i, -errno);
407			exit(1);
408		}
409		if (env.affinity)
410			set_thread_affinity(state.producers[i],
411					    next_cpu(&env.prod_cpus));
412	}
413
414	printf("Benchmark '%s' started.\n", bench->name);
415}
416
417static pthread_mutex_t bench_done_mtx = PTHREAD_MUTEX_INITIALIZER;
418static pthread_cond_t bench_done = PTHREAD_COND_INITIALIZER;
419
420static void collect_measurements(long delta_ns) {
421	int iter = state.res_cnt++;
422	struct bench_res *res = &state.results[iter];
423
424	bench->measure(res);
425
426	if (bench->report_progress)
427		bench->report_progress(iter, res, delta_ns);
428
429	if (iter == env.duration_sec + env.warmup_sec) {
430		pthread_mutex_lock(&bench_done_mtx);
431		pthread_cond_signal(&bench_done);
432		pthread_mutex_unlock(&bench_done_mtx);
433	}
434}
435
436int main(int argc, char **argv)
437{
438	parse_cmdline_args(argc, argv);
439
440	if (env.list) {
441		int i;
442
443		printf("Available benchmarks:\n");
444		for (i = 0; i < ARRAY_SIZE(benchs); i++) {
445			printf("- %s\n", benchs[i]->name);
446		}
447		return 0;
448	}
449
450	setup_benchmark();
451
452	setup_timer();
453
454	pthread_mutex_lock(&bench_done_mtx);
455	pthread_cond_wait(&bench_done, &bench_done_mtx);
456	pthread_mutex_unlock(&bench_done_mtx);
457
458	if (bench->report_final)
459		/* skip first sample */
460		bench->report_final(state.results + env.warmup_sec,
461				    state.res_cnt - env.warmup_sec);
462
463	return 0;
464}
465