162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * mem-memcpy.c
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Simple memcpy() and memset() benchmarks
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
862306a36Sopenharmony_ci */
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include "debug.h"
1162306a36Sopenharmony_ci#include "../perf-sys.h"
1262306a36Sopenharmony_ci#include <subcmd/parse-options.h>
1362306a36Sopenharmony_ci#include "../util/header.h"
1462306a36Sopenharmony_ci#include "../util/cloexec.h"
1562306a36Sopenharmony_ci#include "../util/string2.h"
1662306a36Sopenharmony_ci#include "bench.h"
1762306a36Sopenharmony_ci#include "mem-memcpy-arch.h"
1862306a36Sopenharmony_ci#include "mem-memset-arch.h"
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ci#include <stdio.h>
2162306a36Sopenharmony_ci#include <stdlib.h>
2262306a36Sopenharmony_ci#include <string.h>
2362306a36Sopenharmony_ci#include <unistd.h>
2462306a36Sopenharmony_ci#include <sys/time.h>
2562306a36Sopenharmony_ci#include <errno.h>
2662306a36Sopenharmony_ci#include <linux/time64.h>
2762306a36Sopenharmony_ci#include <linux/zalloc.h>
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_ci#define K 1024
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_cistatic const char	*size_str	= "1MB";
3262306a36Sopenharmony_cistatic const char	*function_str	= "all";
3362306a36Sopenharmony_cistatic int		nr_loops	= 1;
3462306a36Sopenharmony_cistatic bool		use_cycles;
3562306a36Sopenharmony_cistatic int		cycles_fd;
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_cistatic const struct option options[] = {
3862306a36Sopenharmony_ci	OPT_STRING('s', "size", &size_str, "1MB",
3962306a36Sopenharmony_ci		    "Specify the size of the memory buffers. "
4062306a36Sopenharmony_ci		    "Available units: B, KB, MB, GB and TB (case insensitive)"),
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci	OPT_STRING('f', "function", &function_str, "all",
4362306a36Sopenharmony_ci		    "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"),
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ci	OPT_INTEGER('l', "nr_loops", &nr_loops,
4662306a36Sopenharmony_ci		    "Specify the number of loops to run. (default: 1)"),
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ci	OPT_BOOLEAN('c', "cycles", &use_cycles,
4962306a36Sopenharmony_ci		    "Use a cycles event instead of gettimeofday() to measure performance"),
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci	OPT_END()
5262306a36Sopenharmony_ci};
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_citypedef void *(*memcpy_t)(void *, const void *, size_t);
5562306a36Sopenharmony_citypedef void *(*memset_t)(void *, int, size_t);
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_cistruct function {
5862306a36Sopenharmony_ci	const char *name;
5962306a36Sopenharmony_ci	const char *desc;
6062306a36Sopenharmony_ci	union {
6162306a36Sopenharmony_ci		memcpy_t memcpy;
6262306a36Sopenharmony_ci		memset_t memset;
6362306a36Sopenharmony_ci	} fn;
6462306a36Sopenharmony_ci};
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_cistatic struct perf_event_attr cycle_attr = {
6762306a36Sopenharmony_ci	.type		= PERF_TYPE_HARDWARE,
6862306a36Sopenharmony_ci	.config		= PERF_COUNT_HW_CPU_CYCLES
6962306a36Sopenharmony_ci};
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_cistatic int init_cycles(void)
7262306a36Sopenharmony_ci{
7362306a36Sopenharmony_ci	cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci	if (cycles_fd < 0 && errno == ENOSYS) {
7662306a36Sopenharmony_ci		pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
7762306a36Sopenharmony_ci		return -1;
7862306a36Sopenharmony_ci	}
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci	return cycles_fd;
8162306a36Sopenharmony_ci}
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_cistatic u64 get_cycles(void)
8462306a36Sopenharmony_ci{
8562306a36Sopenharmony_ci	int ret;
8662306a36Sopenharmony_ci	u64 clk;
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci	ret = read(cycles_fd, &clk, sizeof(u64));
8962306a36Sopenharmony_ci	BUG_ON(ret != sizeof(u64));
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_ci	return clk;
9262306a36Sopenharmony_ci}
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_cistatic double timeval2double(struct timeval *ts)
9562306a36Sopenharmony_ci{
9662306a36Sopenharmony_ci	return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC;
9762306a36Sopenharmony_ci}
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci#define print_bps(x) do {						\
10062306a36Sopenharmony_ci		if (x < K)						\
10162306a36Sopenharmony_ci			printf(" %14lf bytes/sec\n", x);		\
10262306a36Sopenharmony_ci		else if (x < K * K)					\
10362306a36Sopenharmony_ci			printf(" %14lfd KB/sec\n", x / K);		\
10462306a36Sopenharmony_ci		else if (x < K * K * K)					\
10562306a36Sopenharmony_ci			printf(" %14lf MB/sec\n", x / K / K);		\
10662306a36Sopenharmony_ci		else							\
10762306a36Sopenharmony_ci			printf(" %14lf GB/sec\n", x / K / K / K);	\
10862306a36Sopenharmony_ci	} while (0)
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_cistruct bench_mem_info {
11162306a36Sopenharmony_ci	const struct function *functions;
11262306a36Sopenharmony_ci	u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst);
11362306a36Sopenharmony_ci	double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst);
11462306a36Sopenharmony_ci	const char *const *usage;
11562306a36Sopenharmony_ci	bool alloc_src;
11662306a36Sopenharmony_ci};
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_cistatic void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total)
11962306a36Sopenharmony_ci{
12062306a36Sopenharmony_ci	const struct function *r = &info->functions[r_idx];
12162306a36Sopenharmony_ci	double result_bps = 0.0;
12262306a36Sopenharmony_ci	u64 result_cycles = 0;
12362306a36Sopenharmony_ci	void *src = NULL, *dst = zalloc(size);
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci	printf("# function '%s' (%s)\n", r->name, r->desc);
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci	if (dst == NULL)
12862306a36Sopenharmony_ci		goto out_alloc_failed;
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci	if (info->alloc_src) {
13162306a36Sopenharmony_ci		src = zalloc(size);
13262306a36Sopenharmony_ci		if (src == NULL)
13362306a36Sopenharmony_ci			goto out_alloc_failed;
13462306a36Sopenharmony_ci	}
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci	if (bench_format == BENCH_FORMAT_DEFAULT)
13762306a36Sopenharmony_ci		printf("# Copying %s bytes ...\n\n", size_str);
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ci	if (use_cycles) {
14062306a36Sopenharmony_ci		result_cycles = info->do_cycles(r, size, src, dst);
14162306a36Sopenharmony_ci	} else {
14262306a36Sopenharmony_ci		result_bps = info->do_gettimeofday(r, size, src, dst);
14362306a36Sopenharmony_ci	}
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci	switch (bench_format) {
14662306a36Sopenharmony_ci	case BENCH_FORMAT_DEFAULT:
14762306a36Sopenharmony_ci		if (use_cycles) {
14862306a36Sopenharmony_ci			printf(" %14lf cycles/byte\n", (double)result_cycles/size_total);
14962306a36Sopenharmony_ci		} else {
15062306a36Sopenharmony_ci			print_bps(result_bps);
15162306a36Sopenharmony_ci		}
15262306a36Sopenharmony_ci		break;
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci	case BENCH_FORMAT_SIMPLE:
15562306a36Sopenharmony_ci		if (use_cycles) {
15662306a36Sopenharmony_ci			printf("%lf\n", (double)result_cycles/size_total);
15762306a36Sopenharmony_ci		} else {
15862306a36Sopenharmony_ci			printf("%lf\n", result_bps);
15962306a36Sopenharmony_ci		}
16062306a36Sopenharmony_ci		break;
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_ci	default:
16362306a36Sopenharmony_ci		BUG_ON(1);
16462306a36Sopenharmony_ci		break;
16562306a36Sopenharmony_ci	}
16662306a36Sopenharmony_ci
16762306a36Sopenharmony_ciout_free:
16862306a36Sopenharmony_ci	free(src);
16962306a36Sopenharmony_ci	free(dst);
17062306a36Sopenharmony_ci	return;
17162306a36Sopenharmony_ciout_alloc_failed:
17262306a36Sopenharmony_ci	printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str);
17362306a36Sopenharmony_ci	goto out_free;
17462306a36Sopenharmony_ci}
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_cistatic int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
17762306a36Sopenharmony_ci{
17862306a36Sopenharmony_ci	int i;
17962306a36Sopenharmony_ci	size_t size;
18062306a36Sopenharmony_ci	double size_total;
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci	argc = parse_options(argc, argv, options, info->usage, 0);
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci	if (use_cycles) {
18562306a36Sopenharmony_ci		i = init_cycles();
18662306a36Sopenharmony_ci		if (i < 0) {
18762306a36Sopenharmony_ci			fprintf(stderr, "Failed to open cycles counter\n");
18862306a36Sopenharmony_ci			return i;
18962306a36Sopenharmony_ci		}
19062306a36Sopenharmony_ci	}
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_ci	size = (size_t)perf_atoll((char *)size_str);
19362306a36Sopenharmony_ci	size_total = (double)size * nr_loops;
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci	if ((s64)size <= 0) {
19662306a36Sopenharmony_ci		fprintf(stderr, "Invalid size:%s\n", size_str);
19762306a36Sopenharmony_ci		return 1;
19862306a36Sopenharmony_ci	}
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci	if (!strncmp(function_str, "all", 3)) {
20162306a36Sopenharmony_ci		for (i = 0; info->functions[i].name; i++)
20262306a36Sopenharmony_ci			__bench_mem_function(info, i, size, size_total);
20362306a36Sopenharmony_ci		return 0;
20462306a36Sopenharmony_ci	}
20562306a36Sopenharmony_ci
20662306a36Sopenharmony_ci	for (i = 0; info->functions[i].name; i++) {
20762306a36Sopenharmony_ci		if (!strcmp(info->functions[i].name, function_str))
20862306a36Sopenharmony_ci			break;
20962306a36Sopenharmony_ci	}
21062306a36Sopenharmony_ci	if (!info->functions[i].name) {
21162306a36Sopenharmony_ci		if (strcmp(function_str, "help") && strcmp(function_str, "h"))
21262306a36Sopenharmony_ci			printf("Unknown function: %s\n", function_str);
21362306a36Sopenharmony_ci		printf("Available functions:\n");
21462306a36Sopenharmony_ci		for (i = 0; info->functions[i].name; i++) {
21562306a36Sopenharmony_ci			printf("\t%s ... %s\n",
21662306a36Sopenharmony_ci			       info->functions[i].name, info->functions[i].desc);
21762306a36Sopenharmony_ci		}
21862306a36Sopenharmony_ci		return 1;
21962306a36Sopenharmony_ci	}
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci	__bench_mem_function(info, i, size, size_total);
22262306a36Sopenharmony_ci
22362306a36Sopenharmony_ci	return 0;
22462306a36Sopenharmony_ci}
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_cistatic void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst)
22762306a36Sopenharmony_ci{
22862306a36Sopenharmony_ci	/* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
22962306a36Sopenharmony_ci	memset(src, 0, size);
23062306a36Sopenharmony_ci
23162306a36Sopenharmony_ci	/*
23262306a36Sopenharmony_ci	 * We prefault the freshly allocated memory range here,
23362306a36Sopenharmony_ci	 * to not measure page fault overhead:
23462306a36Sopenharmony_ci	 */
23562306a36Sopenharmony_ci	fn(dst, src, size);
23662306a36Sopenharmony_ci}
23762306a36Sopenharmony_ci
23862306a36Sopenharmony_cistatic u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
23962306a36Sopenharmony_ci{
24062306a36Sopenharmony_ci	u64 cycle_start = 0ULL, cycle_end = 0ULL;
24162306a36Sopenharmony_ci	memcpy_t fn = r->fn.memcpy;
24262306a36Sopenharmony_ci	int i;
24362306a36Sopenharmony_ci
24462306a36Sopenharmony_ci	memcpy_prefault(fn, size, src, dst);
24562306a36Sopenharmony_ci
24662306a36Sopenharmony_ci	cycle_start = get_cycles();
24762306a36Sopenharmony_ci	for (i = 0; i < nr_loops; ++i)
24862306a36Sopenharmony_ci		fn(dst, src, size);
24962306a36Sopenharmony_ci	cycle_end = get_cycles();
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_ci	return cycle_end - cycle_start;
25262306a36Sopenharmony_ci}
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_cistatic double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst)
25562306a36Sopenharmony_ci{
25662306a36Sopenharmony_ci	struct timeval tv_start, tv_end, tv_diff;
25762306a36Sopenharmony_ci	memcpy_t fn = r->fn.memcpy;
25862306a36Sopenharmony_ci	int i;
25962306a36Sopenharmony_ci
26062306a36Sopenharmony_ci	memcpy_prefault(fn, size, src, dst);
26162306a36Sopenharmony_ci
26262306a36Sopenharmony_ci	BUG_ON(gettimeofday(&tv_start, NULL));
26362306a36Sopenharmony_ci	for (i = 0; i < nr_loops; ++i)
26462306a36Sopenharmony_ci		fn(dst, src, size);
26562306a36Sopenharmony_ci	BUG_ON(gettimeofday(&tv_end, NULL));
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ci	timersub(&tv_end, &tv_start, &tv_diff);
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_ci	return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
27062306a36Sopenharmony_ci}
27162306a36Sopenharmony_ci
27262306a36Sopenharmony_cistruct function memcpy_functions[] = {
27362306a36Sopenharmony_ci	{ .name		= "default",
27462306a36Sopenharmony_ci	  .desc		= "Default memcpy() provided by glibc",
27562306a36Sopenharmony_ci	  .fn.memcpy	= memcpy },
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci#ifdef HAVE_ARCH_X86_64_SUPPORT
27862306a36Sopenharmony_ci# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
27962306a36Sopenharmony_ci# include "mem-memcpy-x86-64-asm-def.h"
28062306a36Sopenharmony_ci# undef MEMCPY_FN
28162306a36Sopenharmony_ci#endif
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci	{ .name = NULL, }
28462306a36Sopenharmony_ci};
28562306a36Sopenharmony_ci
28662306a36Sopenharmony_cistatic const char * const bench_mem_memcpy_usage[] = {
28762306a36Sopenharmony_ci	"perf bench mem memcpy <options>",
28862306a36Sopenharmony_ci	NULL
28962306a36Sopenharmony_ci};
29062306a36Sopenharmony_ci
29162306a36Sopenharmony_ciint bench_mem_memcpy(int argc, const char **argv)
29262306a36Sopenharmony_ci{
29362306a36Sopenharmony_ci	struct bench_mem_info info = {
29462306a36Sopenharmony_ci		.functions		= memcpy_functions,
29562306a36Sopenharmony_ci		.do_cycles		= do_memcpy_cycles,
29662306a36Sopenharmony_ci		.do_gettimeofday	= do_memcpy_gettimeofday,
29762306a36Sopenharmony_ci		.usage			= bench_mem_memcpy_usage,
29862306a36Sopenharmony_ci		.alloc_src              = true,
29962306a36Sopenharmony_ci	};
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ci	return bench_mem_common(argc, argv, &info);
30262306a36Sopenharmony_ci}
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_cistatic u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
30562306a36Sopenharmony_ci{
30662306a36Sopenharmony_ci	u64 cycle_start = 0ULL, cycle_end = 0ULL;
30762306a36Sopenharmony_ci	memset_t fn = r->fn.memset;
30862306a36Sopenharmony_ci	int i;
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_ci	/*
31162306a36Sopenharmony_ci	 * We prefault the freshly allocated memory range here,
31262306a36Sopenharmony_ci	 * to not measure page fault overhead:
31362306a36Sopenharmony_ci	 */
31462306a36Sopenharmony_ci	fn(dst, -1, size);
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci	cycle_start = get_cycles();
31762306a36Sopenharmony_ci	for (i = 0; i < nr_loops; ++i)
31862306a36Sopenharmony_ci		fn(dst, i, size);
31962306a36Sopenharmony_ci	cycle_end = get_cycles();
32062306a36Sopenharmony_ci
32162306a36Sopenharmony_ci	return cycle_end - cycle_start;
32262306a36Sopenharmony_ci}
32362306a36Sopenharmony_ci
32462306a36Sopenharmony_cistatic double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
32562306a36Sopenharmony_ci{
32662306a36Sopenharmony_ci	struct timeval tv_start, tv_end, tv_diff;
32762306a36Sopenharmony_ci	memset_t fn = r->fn.memset;
32862306a36Sopenharmony_ci	int i;
32962306a36Sopenharmony_ci
33062306a36Sopenharmony_ci	/*
33162306a36Sopenharmony_ci	 * We prefault the freshly allocated memory range here,
33262306a36Sopenharmony_ci	 * to not measure page fault overhead:
33362306a36Sopenharmony_ci	 */
33462306a36Sopenharmony_ci	fn(dst, -1, size);
33562306a36Sopenharmony_ci
33662306a36Sopenharmony_ci	BUG_ON(gettimeofday(&tv_start, NULL));
33762306a36Sopenharmony_ci	for (i = 0; i < nr_loops; ++i)
33862306a36Sopenharmony_ci		fn(dst, i, size);
33962306a36Sopenharmony_ci	BUG_ON(gettimeofday(&tv_end, NULL));
34062306a36Sopenharmony_ci
34162306a36Sopenharmony_ci	timersub(&tv_end, &tv_start, &tv_diff);
34262306a36Sopenharmony_ci
34362306a36Sopenharmony_ci	return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
34462306a36Sopenharmony_ci}
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_cistatic const char * const bench_mem_memset_usage[] = {
34762306a36Sopenharmony_ci	"perf bench mem memset <options>",
34862306a36Sopenharmony_ci	NULL
34962306a36Sopenharmony_ci};
35062306a36Sopenharmony_ci
35162306a36Sopenharmony_cistatic const struct function memset_functions[] = {
35262306a36Sopenharmony_ci	{ .name		= "default",
35362306a36Sopenharmony_ci	  .desc		= "Default memset() provided by glibc",
35462306a36Sopenharmony_ci	  .fn.memset	= memset },
35562306a36Sopenharmony_ci
35662306a36Sopenharmony_ci#ifdef HAVE_ARCH_X86_64_SUPPORT
35762306a36Sopenharmony_ci# define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
35862306a36Sopenharmony_ci# include "mem-memset-x86-64-asm-def.h"
35962306a36Sopenharmony_ci# undef MEMSET_FN
36062306a36Sopenharmony_ci#endif
36162306a36Sopenharmony_ci
36262306a36Sopenharmony_ci	{ .name = NULL, }
36362306a36Sopenharmony_ci};
36462306a36Sopenharmony_ci
36562306a36Sopenharmony_ciint bench_mem_memset(int argc, const char **argv)
36662306a36Sopenharmony_ci{
36762306a36Sopenharmony_ci	struct bench_mem_info info = {
36862306a36Sopenharmony_ci		.functions		= memset_functions,
36962306a36Sopenharmony_ci		.do_cycles		= do_memset_cycles,
37062306a36Sopenharmony_ci		.do_gettimeofday	= do_memset_gettimeofday,
37162306a36Sopenharmony_ci		.usage			= bench_mem_memset_usage,
37262306a36Sopenharmony_ci	};
37362306a36Sopenharmony_ci
37462306a36Sopenharmony_ci	return bench_mem_common(argc, argv, &info);
37562306a36Sopenharmony_ci}
376