162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * mem-memcpy.c 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Simple memcpy() and memset() benchmarks 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> 862306a36Sopenharmony_ci */ 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#include "debug.h" 1162306a36Sopenharmony_ci#include "../perf-sys.h" 1262306a36Sopenharmony_ci#include <subcmd/parse-options.h> 1362306a36Sopenharmony_ci#include "../util/header.h" 1462306a36Sopenharmony_ci#include "../util/cloexec.h" 1562306a36Sopenharmony_ci#include "../util/string2.h" 1662306a36Sopenharmony_ci#include "bench.h" 1762306a36Sopenharmony_ci#include "mem-memcpy-arch.h" 1862306a36Sopenharmony_ci#include "mem-memset-arch.h" 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci#include <stdio.h> 2162306a36Sopenharmony_ci#include <stdlib.h> 2262306a36Sopenharmony_ci#include <string.h> 2362306a36Sopenharmony_ci#include <unistd.h> 2462306a36Sopenharmony_ci#include <sys/time.h> 2562306a36Sopenharmony_ci#include <errno.h> 2662306a36Sopenharmony_ci#include <linux/time64.h> 2762306a36Sopenharmony_ci#include <linux/zalloc.h> 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_ci#define K 1024 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_cistatic const char *size_str = "1MB"; 3262306a36Sopenharmony_cistatic const char *function_str = "all"; 3362306a36Sopenharmony_cistatic int nr_loops = 1; 3462306a36Sopenharmony_cistatic bool use_cycles; 3562306a36Sopenharmony_cistatic int cycles_fd; 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_cistatic const struct option options[] = { 3862306a36Sopenharmony_ci OPT_STRING('s', "size", &size_str, "1MB", 3962306a36Sopenharmony_ci "Specify the size of the memory buffers. " 4062306a36Sopenharmony_ci "Available units: B, KB, MB, GB and TB (case insensitive)"), 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci OPT_STRING('f', "function", &function_str, "all", 4362306a36Sopenharmony_ci "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"), 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ci OPT_INTEGER('l', "nr_loops", &nr_loops, 4662306a36Sopenharmony_ci "Specify the number of loops to run. (default: 1)"), 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci OPT_BOOLEAN('c', "cycles", &use_cycles, 4962306a36Sopenharmony_ci "Use a cycles event instead of gettimeofday() to measure performance"), 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci OPT_END() 5262306a36Sopenharmony_ci}; 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_citypedef void *(*memcpy_t)(void *, const void *, size_t); 5562306a36Sopenharmony_citypedef void *(*memset_t)(void *, int, size_t); 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_cistruct function { 5862306a36Sopenharmony_ci const char *name; 5962306a36Sopenharmony_ci const char *desc; 6062306a36Sopenharmony_ci union { 6162306a36Sopenharmony_ci memcpy_t memcpy; 6262306a36Sopenharmony_ci memset_t memset; 6362306a36Sopenharmony_ci } fn; 6462306a36Sopenharmony_ci}; 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_cistatic struct perf_event_attr cycle_attr = { 6762306a36Sopenharmony_ci .type = PERF_TYPE_HARDWARE, 6862306a36Sopenharmony_ci .config = PERF_COUNT_HW_CPU_CYCLES 6962306a36Sopenharmony_ci}; 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_cistatic int init_cycles(void) 7262306a36Sopenharmony_ci{ 7362306a36Sopenharmony_ci cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag()); 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci if (cycles_fd < 0 && errno == ENOSYS) { 7662306a36Sopenharmony_ci pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 7762306a36Sopenharmony_ci return -1; 7862306a36Sopenharmony_ci } 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci return cycles_fd; 8162306a36Sopenharmony_ci} 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_cistatic u64 get_cycles(void) 8462306a36Sopenharmony_ci{ 8562306a36Sopenharmony_ci int ret; 8662306a36Sopenharmony_ci u64 clk; 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci ret = read(cycles_fd, &clk, sizeof(u64)); 8962306a36Sopenharmony_ci BUG_ON(ret != sizeof(u64)); 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci return clk; 9262306a36Sopenharmony_ci} 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_cistatic double timeval2double(struct timeval *ts) 9562306a36Sopenharmony_ci{ 9662306a36Sopenharmony_ci return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC; 9762306a36Sopenharmony_ci} 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci#define print_bps(x) do { \ 10062306a36Sopenharmony_ci if (x < K) \ 10162306a36Sopenharmony_ci printf(" %14lf bytes/sec\n", x); \ 10262306a36Sopenharmony_ci else if (x < K * K) \ 10362306a36Sopenharmony_ci printf(" %14lfd KB/sec\n", x / K); \ 10462306a36Sopenharmony_ci else if (x < K * K * K) \ 10562306a36Sopenharmony_ci printf(" %14lf MB/sec\n", x / K / K); \ 10662306a36Sopenharmony_ci else \ 10762306a36Sopenharmony_ci printf(" %14lf GB/sec\n", x / K / K / K); \ 10862306a36Sopenharmony_ci } while (0) 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_cistruct bench_mem_info { 11162306a36Sopenharmony_ci const struct function *functions; 11262306a36Sopenharmony_ci u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst); 11362306a36Sopenharmony_ci double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst); 11462306a36Sopenharmony_ci const char *const *usage; 11562306a36Sopenharmony_ci bool alloc_src; 11662306a36Sopenharmony_ci}; 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_cistatic void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total) 11962306a36Sopenharmony_ci{ 12062306a36Sopenharmony_ci const struct function *r = &info->functions[r_idx]; 12162306a36Sopenharmony_ci double result_bps = 0.0; 12262306a36Sopenharmony_ci u64 result_cycles = 0; 12362306a36Sopenharmony_ci void *src = NULL, *dst = zalloc(size); 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci printf("# function '%s' (%s)\n", r->name, r->desc); 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci if (dst == NULL) 12862306a36Sopenharmony_ci goto out_alloc_failed; 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci if (info->alloc_src) { 13162306a36Sopenharmony_ci src = zalloc(size); 13262306a36Sopenharmony_ci if (src == NULL) 13362306a36Sopenharmony_ci goto out_alloc_failed; 13462306a36Sopenharmony_ci } 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_ci if (bench_format == BENCH_FORMAT_DEFAULT) 13762306a36Sopenharmony_ci printf("# Copying %s bytes ...\n\n", size_str); 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci if (use_cycles) { 14062306a36Sopenharmony_ci result_cycles = info->do_cycles(r, size, src, dst); 14162306a36Sopenharmony_ci } else { 14262306a36Sopenharmony_ci result_bps = info->do_gettimeofday(r, size, src, dst); 14362306a36Sopenharmony_ci } 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ci switch (bench_format) { 14662306a36Sopenharmony_ci case BENCH_FORMAT_DEFAULT: 14762306a36Sopenharmony_ci if (use_cycles) { 14862306a36Sopenharmony_ci printf(" %14lf cycles/byte\n", (double)result_cycles/size_total); 14962306a36Sopenharmony_ci } else { 15062306a36Sopenharmony_ci print_bps(result_bps); 15162306a36Sopenharmony_ci } 15262306a36Sopenharmony_ci break; 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci case BENCH_FORMAT_SIMPLE: 15562306a36Sopenharmony_ci if (use_cycles) { 15662306a36Sopenharmony_ci printf("%lf\n", (double)result_cycles/size_total); 15762306a36Sopenharmony_ci } else { 15862306a36Sopenharmony_ci printf("%lf\n", result_bps); 15962306a36Sopenharmony_ci } 16062306a36Sopenharmony_ci break; 16162306a36Sopenharmony_ci 16262306a36Sopenharmony_ci default: 16362306a36Sopenharmony_ci BUG_ON(1); 16462306a36Sopenharmony_ci break; 16562306a36Sopenharmony_ci } 16662306a36Sopenharmony_ci 16762306a36Sopenharmony_ciout_free: 16862306a36Sopenharmony_ci free(src); 16962306a36Sopenharmony_ci free(dst); 17062306a36Sopenharmony_ci return; 17162306a36Sopenharmony_ciout_alloc_failed: 17262306a36Sopenharmony_ci printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str); 17362306a36Sopenharmony_ci goto out_free; 17462306a36Sopenharmony_ci} 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_cistatic int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info) 17762306a36Sopenharmony_ci{ 17862306a36Sopenharmony_ci int i; 17962306a36Sopenharmony_ci size_t size; 18062306a36Sopenharmony_ci double size_total; 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci argc = parse_options(argc, argv, options, info->usage, 0); 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci if (use_cycles) { 18562306a36Sopenharmony_ci i = init_cycles(); 18662306a36Sopenharmony_ci if (i < 0) { 18762306a36Sopenharmony_ci fprintf(stderr, "Failed to open cycles counter\n"); 18862306a36Sopenharmony_ci return i; 18962306a36Sopenharmony_ci } 19062306a36Sopenharmony_ci } 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci size = (size_t)perf_atoll((char *)size_str); 19362306a36Sopenharmony_ci size_total = (double)size * nr_loops; 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci if ((s64)size <= 0) { 19662306a36Sopenharmony_ci fprintf(stderr, "Invalid size:%s\n", size_str); 19762306a36Sopenharmony_ci return 1; 19862306a36Sopenharmony_ci } 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci if (!strncmp(function_str, "all", 3)) { 20162306a36Sopenharmony_ci for (i = 0; info->functions[i].name; i++) 20262306a36Sopenharmony_ci __bench_mem_function(info, i, size, size_total); 20362306a36Sopenharmony_ci return 0; 20462306a36Sopenharmony_ci } 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_ci for (i = 0; info->functions[i].name; i++) { 20762306a36Sopenharmony_ci if (!strcmp(info->functions[i].name, function_str)) 20862306a36Sopenharmony_ci break; 20962306a36Sopenharmony_ci } 21062306a36Sopenharmony_ci if (!info->functions[i].name) { 21162306a36Sopenharmony_ci if (strcmp(function_str, "help") && strcmp(function_str, "h")) 21262306a36Sopenharmony_ci printf("Unknown function: %s\n", function_str); 21362306a36Sopenharmony_ci printf("Available functions:\n"); 21462306a36Sopenharmony_ci for (i = 0; info->functions[i].name; i++) { 21562306a36Sopenharmony_ci printf("\t%s ... %s\n", 21662306a36Sopenharmony_ci info->functions[i].name, info->functions[i].desc); 21762306a36Sopenharmony_ci } 21862306a36Sopenharmony_ci return 1; 21962306a36Sopenharmony_ci } 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci __bench_mem_function(info, i, size, size_total); 22262306a36Sopenharmony_ci 22362306a36Sopenharmony_ci return 0; 22462306a36Sopenharmony_ci} 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_cistatic void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst) 22762306a36Sopenharmony_ci{ 22862306a36Sopenharmony_ci /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */ 22962306a36Sopenharmony_ci memset(src, 0, size); 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci /* 23262306a36Sopenharmony_ci * We prefault the freshly allocated memory range here, 23362306a36Sopenharmony_ci * to not measure page fault overhead: 23462306a36Sopenharmony_ci */ 23562306a36Sopenharmony_ci fn(dst, src, size); 23662306a36Sopenharmony_ci} 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_cistatic u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst) 23962306a36Sopenharmony_ci{ 24062306a36Sopenharmony_ci u64 cycle_start = 0ULL, cycle_end = 0ULL; 24162306a36Sopenharmony_ci memcpy_t fn = r->fn.memcpy; 24262306a36Sopenharmony_ci int i; 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci memcpy_prefault(fn, size, src, dst); 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_ci cycle_start = get_cycles(); 24762306a36Sopenharmony_ci for (i = 0; i < nr_loops; ++i) 24862306a36Sopenharmony_ci fn(dst, src, size); 24962306a36Sopenharmony_ci cycle_end = get_cycles(); 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci return cycle_end - cycle_start; 25262306a36Sopenharmony_ci} 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_cistatic double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst) 25562306a36Sopenharmony_ci{ 25662306a36Sopenharmony_ci struct timeval tv_start, tv_end, tv_diff; 25762306a36Sopenharmony_ci memcpy_t fn = r->fn.memcpy; 25862306a36Sopenharmony_ci int i; 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci memcpy_prefault(fn, size, src, dst); 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ci BUG_ON(gettimeofday(&tv_start, NULL)); 26362306a36Sopenharmony_ci for (i = 0; i < nr_loops; ++i) 26462306a36Sopenharmony_ci fn(dst, src, size); 26562306a36Sopenharmony_ci BUG_ON(gettimeofday(&tv_end, NULL)); 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci timersub(&tv_end, &tv_start, &tv_diff); 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_ci return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); 27062306a36Sopenharmony_ci} 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_cistruct function memcpy_functions[] = { 27362306a36Sopenharmony_ci { .name = "default", 27462306a36Sopenharmony_ci .desc = "Default memcpy() provided by glibc", 27562306a36Sopenharmony_ci .fn.memcpy = memcpy }, 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_ci#ifdef HAVE_ARCH_X86_64_SUPPORT 27862306a36Sopenharmony_ci# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn}, 27962306a36Sopenharmony_ci# include "mem-memcpy-x86-64-asm-def.h" 28062306a36Sopenharmony_ci# undef MEMCPY_FN 28162306a36Sopenharmony_ci#endif 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_ci { .name = NULL, } 28462306a36Sopenharmony_ci}; 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_cistatic const char * const bench_mem_memcpy_usage[] = { 28762306a36Sopenharmony_ci "perf bench mem memcpy <options>", 28862306a36Sopenharmony_ci NULL 28962306a36Sopenharmony_ci}; 29062306a36Sopenharmony_ci 29162306a36Sopenharmony_ciint bench_mem_memcpy(int argc, const char **argv) 29262306a36Sopenharmony_ci{ 29362306a36Sopenharmony_ci struct bench_mem_info info = { 29462306a36Sopenharmony_ci .functions = memcpy_functions, 29562306a36Sopenharmony_ci .do_cycles = do_memcpy_cycles, 29662306a36Sopenharmony_ci .do_gettimeofday = do_memcpy_gettimeofday, 29762306a36Sopenharmony_ci .usage = bench_mem_memcpy_usage, 29862306a36Sopenharmony_ci .alloc_src = true, 29962306a36Sopenharmony_ci }; 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci return bench_mem_common(argc, argv, &info); 30262306a36Sopenharmony_ci} 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_cistatic u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst) 30562306a36Sopenharmony_ci{ 30662306a36Sopenharmony_ci u64 cycle_start = 0ULL, cycle_end = 0ULL; 30762306a36Sopenharmony_ci memset_t fn = r->fn.memset; 30862306a36Sopenharmony_ci int i; 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_ci /* 31162306a36Sopenharmony_ci * We prefault the freshly allocated memory range here, 31262306a36Sopenharmony_ci * to not measure page fault overhead: 31362306a36Sopenharmony_ci */ 31462306a36Sopenharmony_ci fn(dst, -1, size); 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci cycle_start = get_cycles(); 31762306a36Sopenharmony_ci for (i = 0; i < nr_loops; ++i) 31862306a36Sopenharmony_ci fn(dst, i, size); 31962306a36Sopenharmony_ci cycle_end = get_cycles(); 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_ci return cycle_end - cycle_start; 32262306a36Sopenharmony_ci} 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_cistatic double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst) 32562306a36Sopenharmony_ci{ 32662306a36Sopenharmony_ci struct timeval tv_start, tv_end, tv_diff; 32762306a36Sopenharmony_ci memset_t fn = r->fn.memset; 32862306a36Sopenharmony_ci int i; 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_ci /* 33162306a36Sopenharmony_ci * We prefault the freshly allocated memory range here, 33262306a36Sopenharmony_ci * to not measure page fault overhead: 33362306a36Sopenharmony_ci */ 33462306a36Sopenharmony_ci fn(dst, -1, size); 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ci BUG_ON(gettimeofday(&tv_start, NULL)); 33762306a36Sopenharmony_ci for (i = 0; i < nr_loops; ++i) 33862306a36Sopenharmony_ci fn(dst, i, size); 33962306a36Sopenharmony_ci BUG_ON(gettimeofday(&tv_end, NULL)); 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci timersub(&tv_end, &tv_start, &tv_diff); 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_ci return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); 34462306a36Sopenharmony_ci} 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_cistatic const char * const bench_mem_memset_usage[] = { 34762306a36Sopenharmony_ci "perf bench mem memset <options>", 34862306a36Sopenharmony_ci NULL 34962306a36Sopenharmony_ci}; 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_cistatic const struct function memset_functions[] = { 35262306a36Sopenharmony_ci { .name = "default", 35362306a36Sopenharmony_ci .desc = "Default memset() provided by glibc", 35462306a36Sopenharmony_ci .fn.memset = memset }, 35562306a36Sopenharmony_ci 35662306a36Sopenharmony_ci#ifdef HAVE_ARCH_X86_64_SUPPORT 35762306a36Sopenharmony_ci# define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn }, 35862306a36Sopenharmony_ci# include "mem-memset-x86-64-asm-def.h" 35962306a36Sopenharmony_ci# undef MEMSET_FN 36062306a36Sopenharmony_ci#endif 36162306a36Sopenharmony_ci 36262306a36Sopenharmony_ci { .name = NULL, } 36362306a36Sopenharmony_ci}; 36462306a36Sopenharmony_ci 36562306a36Sopenharmony_ciint bench_mem_memset(int argc, const char **argv) 36662306a36Sopenharmony_ci{ 36762306a36Sopenharmony_ci struct bench_mem_info info = { 36862306a36Sopenharmony_ci .functions = memset_functions, 36962306a36Sopenharmony_ci .do_cycles = do_memset_cycles, 37062306a36Sopenharmony_ci .do_gettimeofday = do_memset_gettimeofday, 37162306a36Sopenharmony_ci .usage = bench_mem_memset_usage, 37262306a36Sopenharmony_ci }; 37362306a36Sopenharmony_ci 37462306a36Sopenharmony_ci return bench_mem_common(argc, argv, &info); 37562306a36Sopenharmony_ci} 376