18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * numa.c 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * numa: Simulate NUMA-sensitive workload and measure their NUMA performance 68c2ecf20Sopenharmony_ci */ 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci#include <inttypes.h> 98c2ecf20Sopenharmony_ci/* For the CLR_() macros */ 108c2ecf20Sopenharmony_ci#include <pthread.h> 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci#include <subcmd/parse-options.h> 138c2ecf20Sopenharmony_ci#include "../util/cloexec.h" 148c2ecf20Sopenharmony_ci 158c2ecf20Sopenharmony_ci#include "bench.h" 168c2ecf20Sopenharmony_ci 178c2ecf20Sopenharmony_ci#include <errno.h> 188c2ecf20Sopenharmony_ci#include <sched.h> 198c2ecf20Sopenharmony_ci#include <stdio.h> 208c2ecf20Sopenharmony_ci#include <assert.h> 218c2ecf20Sopenharmony_ci#include <malloc.h> 228c2ecf20Sopenharmony_ci#include <signal.h> 238c2ecf20Sopenharmony_ci#include <stdlib.h> 248c2ecf20Sopenharmony_ci#include <string.h> 258c2ecf20Sopenharmony_ci#include <unistd.h> 268c2ecf20Sopenharmony_ci#include <sys/mman.h> 278c2ecf20Sopenharmony_ci#include <sys/time.h> 288c2ecf20Sopenharmony_ci#include <sys/resource.h> 298c2ecf20Sopenharmony_ci#include <sys/wait.h> 308c2ecf20Sopenharmony_ci#include <sys/prctl.h> 318c2ecf20Sopenharmony_ci#include <sys/types.h> 328c2ecf20Sopenharmony_ci#include <linux/kernel.h> 338c2ecf20Sopenharmony_ci#include <linux/time64.h> 348c2ecf20Sopenharmony_ci#include <linux/numa.h> 358c2ecf20Sopenharmony_ci#include <linux/zalloc.h> 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci#include <numa.h> 388c2ecf20Sopenharmony_ci#include <numaif.h> 398c2ecf20Sopenharmony_ci 408c2ecf20Sopenharmony_ci#ifndef RUSAGE_THREAD 418c2ecf20Sopenharmony_ci# define RUSAGE_THREAD 1 428c2ecf20Sopenharmony_ci#endif 438c2ecf20Sopenharmony_ci 448c2ecf20Sopenharmony_ci/* 458c2ecf20Sopenharmony_ci * Regular printout to the terminal, supressed if -q is specified: 468c2ecf20Sopenharmony_ci */ 478c2ecf20Sopenharmony_ci#define tprintf(x...) do { if (g && g->p.show_details >= 0) printf(x); } while (0) 488c2ecf20Sopenharmony_ci 498c2ecf20Sopenharmony_ci/* 508c2ecf20Sopenharmony_ci * Debug printf: 518c2ecf20Sopenharmony_ci */ 528c2ecf20Sopenharmony_ci#undef dprintf 538c2ecf20Sopenharmony_ci#define dprintf(x...) do { if (g && g->p.show_details >= 1) printf(x); } while (0) 548c2ecf20Sopenharmony_ci 558c2ecf20Sopenharmony_cistruct thread_data { 568c2ecf20Sopenharmony_ci int curr_cpu; 578c2ecf20Sopenharmony_ci cpu_set_t bind_cpumask; 588c2ecf20Sopenharmony_ci int bind_node; 598c2ecf20Sopenharmony_ci u8 *process_data; 608c2ecf20Sopenharmony_ci int process_nr; 618c2ecf20Sopenharmony_ci int thread_nr; 628c2ecf20Sopenharmony_ci int task_nr; 638c2ecf20Sopenharmony_ci unsigned int loops_done; 648c2ecf20Sopenharmony_ci u64 val; 658c2ecf20Sopenharmony_ci u64 runtime_ns; 668c2ecf20Sopenharmony_ci u64 system_time_ns; 678c2ecf20Sopenharmony_ci u64 user_time_ns; 688c2ecf20Sopenharmony_ci double speed_gbs; 698c2ecf20Sopenharmony_ci pthread_mutex_t *process_lock; 708c2ecf20Sopenharmony_ci}; 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_ci/* Parameters set by options: */ 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_cistruct params { 758c2ecf20Sopenharmony_ci /* Startup synchronization: */ 768c2ecf20Sopenharmony_ci bool serialize_startup; 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci /* Task hierarchy: */ 798c2ecf20Sopenharmony_ci int nr_proc; 808c2ecf20Sopenharmony_ci int nr_threads; 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_ci /* Working set sizes: */ 838c2ecf20Sopenharmony_ci const char *mb_global_str; 848c2ecf20Sopenharmony_ci const char *mb_proc_str; 858c2ecf20Sopenharmony_ci const char *mb_proc_locked_str; 868c2ecf20Sopenharmony_ci const char *mb_thread_str; 878c2ecf20Sopenharmony_ci 888c2ecf20Sopenharmony_ci double mb_global; 898c2ecf20Sopenharmony_ci double mb_proc; 908c2ecf20Sopenharmony_ci double mb_proc_locked; 918c2ecf20Sopenharmony_ci double mb_thread; 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_ci /* Access patterns to the working set: */ 948c2ecf20Sopenharmony_ci bool data_reads; 958c2ecf20Sopenharmony_ci bool data_writes; 968c2ecf20Sopenharmony_ci bool data_backwards; 978c2ecf20Sopenharmony_ci bool data_zero_memset; 988c2ecf20Sopenharmony_ci bool data_rand_walk; 998c2ecf20Sopenharmony_ci u32 nr_loops; 1008c2ecf20Sopenharmony_ci u32 nr_secs; 1018c2ecf20Sopenharmony_ci u32 sleep_usecs; 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_ci /* Working set initialization: */ 1048c2ecf20Sopenharmony_ci bool init_zero; 1058c2ecf20Sopenharmony_ci bool init_random; 1068c2ecf20Sopenharmony_ci bool init_cpu0; 1078c2ecf20Sopenharmony_ci 1088c2ecf20Sopenharmony_ci /* Misc options: */ 1098c2ecf20Sopenharmony_ci int show_details; 1108c2ecf20Sopenharmony_ci int run_all; 1118c2ecf20Sopenharmony_ci int thp; 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci long bytes_global; 1148c2ecf20Sopenharmony_ci long bytes_process; 1158c2ecf20Sopenharmony_ci long bytes_process_locked; 1168c2ecf20Sopenharmony_ci long bytes_thread; 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_ci int nr_tasks; 1198c2ecf20Sopenharmony_ci bool show_quiet; 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_ci bool show_convergence; 1228c2ecf20Sopenharmony_ci bool measure_convergence; 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_ci int perturb_secs; 1258c2ecf20Sopenharmony_ci int nr_cpus; 1268c2ecf20Sopenharmony_ci int nr_nodes; 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_ci /* Affinity options -C and -N: */ 1298c2ecf20Sopenharmony_ci char *cpu_list_str; 1308c2ecf20Sopenharmony_ci char *node_list_str; 1318c2ecf20Sopenharmony_ci}; 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_ci/* Global, read-writable area, accessible to all processes and threads: */ 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_cistruct global_info { 1378c2ecf20Sopenharmony_ci u8 *data; 1388c2ecf20Sopenharmony_ci 1398c2ecf20Sopenharmony_ci pthread_mutex_t startup_mutex; 1408c2ecf20Sopenharmony_ci pthread_cond_t startup_cond; 1418c2ecf20Sopenharmony_ci int nr_tasks_started; 1428c2ecf20Sopenharmony_ci 1438c2ecf20Sopenharmony_ci pthread_mutex_t start_work_mutex; 1448c2ecf20Sopenharmony_ci pthread_cond_t start_work_cond; 1458c2ecf20Sopenharmony_ci int nr_tasks_working; 1468c2ecf20Sopenharmony_ci bool start_work; 1478c2ecf20Sopenharmony_ci 1488c2ecf20Sopenharmony_ci pthread_mutex_t stop_work_mutex; 1498c2ecf20Sopenharmony_ci u64 bytes_done; 1508c2ecf20Sopenharmony_ci 1518c2ecf20Sopenharmony_ci struct thread_data *threads; 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ci /* Convergence latency measurement: */ 1548c2ecf20Sopenharmony_ci bool all_converged; 1558c2ecf20Sopenharmony_ci bool stop_work; 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci int print_once; 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci struct params p; 1608c2ecf20Sopenharmony_ci}; 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_cistatic struct global_info *g = NULL; 1638c2ecf20Sopenharmony_ci 1648c2ecf20Sopenharmony_cistatic int parse_cpus_opt(const struct option *opt, const char *arg, int unset); 1658c2ecf20Sopenharmony_cistatic int parse_nodes_opt(const struct option *opt, const char *arg, int unset); 1668c2ecf20Sopenharmony_ci 1678c2ecf20Sopenharmony_cistruct params p0; 1688c2ecf20Sopenharmony_ci 1698c2ecf20Sopenharmony_cistatic const struct option options[] = { 1708c2ecf20Sopenharmony_ci OPT_INTEGER('p', "nr_proc" , &p0.nr_proc, "number of processes"), 1718c2ecf20Sopenharmony_ci OPT_INTEGER('t', "nr_threads" , &p0.nr_threads, "number of threads per process"), 1728c2ecf20Sopenharmony_ci 1738c2ecf20Sopenharmony_ci OPT_STRING('G', "mb_global" , &p0.mb_global_str, "MB", "global memory (MBs)"), 1748c2ecf20Sopenharmony_ci OPT_STRING('P', "mb_proc" , &p0.mb_proc_str, "MB", "process memory (MBs)"), 1758c2ecf20Sopenharmony_ci OPT_STRING('L', "mb_proc_locked", &p0.mb_proc_locked_str,"MB", "process serialized/locked memory access (MBs), <= process_memory"), 1768c2ecf20Sopenharmony_ci OPT_STRING('T', "mb_thread" , &p0.mb_thread_str, "MB", "thread memory (MBs)"), 1778c2ecf20Sopenharmony_ci 1788c2ecf20Sopenharmony_ci OPT_UINTEGER('l', "nr_loops" , &p0.nr_loops, "max number of loops to run (default: unlimited)"), 1798c2ecf20Sopenharmony_ci OPT_UINTEGER('s', "nr_secs" , &p0.nr_secs, "max number of seconds to run (default: 5 secs)"), 1808c2ecf20Sopenharmony_ci OPT_UINTEGER('u', "usleep" , &p0.sleep_usecs, "usecs to sleep per loop iteration"), 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_ci OPT_BOOLEAN('R', "data_reads" , &p0.data_reads, "access the data via reads (can be mixed with -W)"), 1838c2ecf20Sopenharmony_ci OPT_BOOLEAN('W', "data_writes" , &p0.data_writes, "access the data via writes (can be mixed with -R)"), 1848c2ecf20Sopenharmony_ci OPT_BOOLEAN('B', "data_backwards", &p0.data_backwards, "access the data backwards as well"), 1858c2ecf20Sopenharmony_ci OPT_BOOLEAN('Z', "data_zero_memset", &p0.data_zero_memset,"access the data via glibc bzero only"), 1868c2ecf20Sopenharmony_ci OPT_BOOLEAN('r', "data_rand_walk", &p0.data_rand_walk, "access the data with random (32bit LFSR) walk"), 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_ci 1898c2ecf20Sopenharmony_ci OPT_BOOLEAN('z', "init_zero" , &p0.init_zero, "bzero the initial allocations"), 1908c2ecf20Sopenharmony_ci OPT_BOOLEAN('I', "init_random" , &p0.init_random, "randomize the contents of the initial allocations"), 1918c2ecf20Sopenharmony_ci OPT_BOOLEAN('0', "init_cpu0" , &p0.init_cpu0, "do the initial allocations on CPU#0"), 1928c2ecf20Sopenharmony_ci OPT_INTEGER('x', "perturb_secs", &p0.perturb_secs, "perturb thread 0/0 every X secs, to test convergence stability"), 1938c2ecf20Sopenharmony_ci 1948c2ecf20Sopenharmony_ci OPT_INCR ('d', "show_details" , &p0.show_details, "Show details"), 1958c2ecf20Sopenharmony_ci OPT_INCR ('a', "all" , &p0.run_all, "Run all tests in the suite"), 1968c2ecf20Sopenharmony_ci OPT_INTEGER('H', "thp" , &p0.thp, "MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"), 1978c2ecf20Sopenharmony_ci OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details, " 1988c2ecf20Sopenharmony_ci "convergence is reached when each process (all its threads) is running on a single NUMA node."), 1998c2ecf20Sopenharmony_ci OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"), 2008c2ecf20Sopenharmony_ci OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "quiet mode"), 2018c2ecf20Sopenharmony_ci OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"), 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_ci /* Special option string parsing callbacks: */ 2048c2ecf20Sopenharmony_ci OPT_CALLBACK('C', "cpus", NULL, "cpu[,cpu2,...cpuN]", 2058c2ecf20Sopenharmony_ci "bind the first N tasks to these specific cpus (the rest is unbound)", 2068c2ecf20Sopenharmony_ci parse_cpus_opt), 2078c2ecf20Sopenharmony_ci OPT_CALLBACK('M', "memnodes", NULL, "node[,node2,...nodeN]", 2088c2ecf20Sopenharmony_ci "bind the first N tasks to these specific memory nodes (the rest is unbound)", 2098c2ecf20Sopenharmony_ci parse_nodes_opt), 2108c2ecf20Sopenharmony_ci OPT_END() 2118c2ecf20Sopenharmony_ci}; 2128c2ecf20Sopenharmony_ci 2138c2ecf20Sopenharmony_cistatic const char * const bench_numa_usage[] = { 2148c2ecf20Sopenharmony_ci "perf bench numa <options>", 2158c2ecf20Sopenharmony_ci NULL 2168c2ecf20Sopenharmony_ci}; 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_cistatic const char * const numa_usage[] = { 2198c2ecf20Sopenharmony_ci "perf bench numa mem [<options>]", 2208c2ecf20Sopenharmony_ci NULL 2218c2ecf20Sopenharmony_ci}; 2228c2ecf20Sopenharmony_ci 2238c2ecf20Sopenharmony_ci/* 2248c2ecf20Sopenharmony_ci * To get number of numa nodes present. 2258c2ecf20Sopenharmony_ci */ 2268c2ecf20Sopenharmony_cistatic int nr_numa_nodes(void) 2278c2ecf20Sopenharmony_ci{ 2288c2ecf20Sopenharmony_ci int i, nr_nodes = 0; 2298c2ecf20Sopenharmony_ci 2308c2ecf20Sopenharmony_ci for (i = 0; i < g->p.nr_nodes; i++) { 2318c2ecf20Sopenharmony_ci if (numa_bitmask_isbitset(numa_nodes_ptr, i)) 2328c2ecf20Sopenharmony_ci nr_nodes++; 2338c2ecf20Sopenharmony_ci } 2348c2ecf20Sopenharmony_ci 2358c2ecf20Sopenharmony_ci return nr_nodes; 2368c2ecf20Sopenharmony_ci} 2378c2ecf20Sopenharmony_ci 2388c2ecf20Sopenharmony_ci/* 2398c2ecf20Sopenharmony_ci * To check if given numa node is present. 2408c2ecf20Sopenharmony_ci */ 2418c2ecf20Sopenharmony_cistatic int is_node_present(int node) 2428c2ecf20Sopenharmony_ci{ 2438c2ecf20Sopenharmony_ci return numa_bitmask_isbitset(numa_nodes_ptr, node); 2448c2ecf20Sopenharmony_ci} 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci/* 2478c2ecf20Sopenharmony_ci * To check given numa node has cpus. 2488c2ecf20Sopenharmony_ci */ 2498c2ecf20Sopenharmony_cistatic bool node_has_cpus(int node) 2508c2ecf20Sopenharmony_ci{ 2518c2ecf20Sopenharmony_ci struct bitmask *cpumask = numa_allocate_cpumask(); 2528c2ecf20Sopenharmony_ci bool ret = false; /* fall back to nocpus */ 2538c2ecf20Sopenharmony_ci int cpu; 2548c2ecf20Sopenharmony_ci 2558c2ecf20Sopenharmony_ci BUG_ON(!cpumask); 2568c2ecf20Sopenharmony_ci if (!numa_node_to_cpus(node, cpumask)) { 2578c2ecf20Sopenharmony_ci for (cpu = 0; cpu < (int)cpumask->size; cpu++) { 2588c2ecf20Sopenharmony_ci if (numa_bitmask_isbitset(cpumask, cpu)) { 2598c2ecf20Sopenharmony_ci ret = true; 2608c2ecf20Sopenharmony_ci break; 2618c2ecf20Sopenharmony_ci } 2628c2ecf20Sopenharmony_ci } 2638c2ecf20Sopenharmony_ci } 2648c2ecf20Sopenharmony_ci numa_free_cpumask(cpumask); 2658c2ecf20Sopenharmony_ci 2668c2ecf20Sopenharmony_ci return ret; 2678c2ecf20Sopenharmony_ci} 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_cistatic cpu_set_t bind_to_cpu(int target_cpu) 2708c2ecf20Sopenharmony_ci{ 2718c2ecf20Sopenharmony_ci cpu_set_t orig_mask, mask; 2728c2ecf20Sopenharmony_ci int ret; 2738c2ecf20Sopenharmony_ci 2748c2ecf20Sopenharmony_ci ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask); 2758c2ecf20Sopenharmony_ci BUG_ON(ret); 2768c2ecf20Sopenharmony_ci 2778c2ecf20Sopenharmony_ci CPU_ZERO(&mask); 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_ci if (target_cpu == -1) { 2808c2ecf20Sopenharmony_ci int cpu; 2818c2ecf20Sopenharmony_ci 2828c2ecf20Sopenharmony_ci for (cpu = 0; cpu < g->p.nr_cpus; cpu++) 2838c2ecf20Sopenharmony_ci CPU_SET(cpu, &mask); 2848c2ecf20Sopenharmony_ci } else { 2858c2ecf20Sopenharmony_ci BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus); 2868c2ecf20Sopenharmony_ci CPU_SET(target_cpu, &mask); 2878c2ecf20Sopenharmony_ci } 2888c2ecf20Sopenharmony_ci 2898c2ecf20Sopenharmony_ci ret = sched_setaffinity(0, sizeof(mask), &mask); 2908c2ecf20Sopenharmony_ci BUG_ON(ret); 2918c2ecf20Sopenharmony_ci 2928c2ecf20Sopenharmony_ci return orig_mask; 2938c2ecf20Sopenharmony_ci} 2948c2ecf20Sopenharmony_ci 2958c2ecf20Sopenharmony_cistatic cpu_set_t bind_to_node(int target_node) 2968c2ecf20Sopenharmony_ci{ 2978c2ecf20Sopenharmony_ci cpu_set_t orig_mask, mask; 2988c2ecf20Sopenharmony_ci int cpu; 2998c2ecf20Sopenharmony_ci int ret; 3008c2ecf20Sopenharmony_ci 3018c2ecf20Sopenharmony_ci ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask); 3028c2ecf20Sopenharmony_ci BUG_ON(ret); 3038c2ecf20Sopenharmony_ci 3048c2ecf20Sopenharmony_ci CPU_ZERO(&mask); 3058c2ecf20Sopenharmony_ci 3068c2ecf20Sopenharmony_ci if (target_node == NUMA_NO_NODE) { 3078c2ecf20Sopenharmony_ci for (cpu = 0; cpu < g->p.nr_cpus; cpu++) 3088c2ecf20Sopenharmony_ci CPU_SET(cpu, &mask); 3098c2ecf20Sopenharmony_ci } else { 3108c2ecf20Sopenharmony_ci struct bitmask *cpumask = numa_allocate_cpumask(); 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_ci BUG_ON(!cpumask); 3138c2ecf20Sopenharmony_ci if (!numa_node_to_cpus(target_node, cpumask)) { 3148c2ecf20Sopenharmony_ci for (cpu = 0; cpu < (int)cpumask->size; cpu++) { 3158c2ecf20Sopenharmony_ci if (numa_bitmask_isbitset(cpumask, cpu)) 3168c2ecf20Sopenharmony_ci CPU_SET(cpu, &mask); 3178c2ecf20Sopenharmony_ci } 3188c2ecf20Sopenharmony_ci } 3198c2ecf20Sopenharmony_ci numa_free_cpumask(cpumask); 3208c2ecf20Sopenharmony_ci } 3218c2ecf20Sopenharmony_ci 3228c2ecf20Sopenharmony_ci ret = sched_setaffinity(0, sizeof(mask), &mask); 3238c2ecf20Sopenharmony_ci BUG_ON(ret); 3248c2ecf20Sopenharmony_ci 3258c2ecf20Sopenharmony_ci return orig_mask; 3268c2ecf20Sopenharmony_ci} 3278c2ecf20Sopenharmony_ci 3288c2ecf20Sopenharmony_cistatic void bind_to_cpumask(cpu_set_t mask) 3298c2ecf20Sopenharmony_ci{ 3308c2ecf20Sopenharmony_ci int ret; 3318c2ecf20Sopenharmony_ci 3328c2ecf20Sopenharmony_ci ret = sched_setaffinity(0, sizeof(mask), &mask); 3338c2ecf20Sopenharmony_ci BUG_ON(ret); 3348c2ecf20Sopenharmony_ci} 3358c2ecf20Sopenharmony_ci 3368c2ecf20Sopenharmony_cistatic void mempol_restore(void) 3378c2ecf20Sopenharmony_ci{ 3388c2ecf20Sopenharmony_ci int ret; 3398c2ecf20Sopenharmony_ci 3408c2ecf20Sopenharmony_ci ret = set_mempolicy(MPOL_DEFAULT, NULL, g->p.nr_nodes-1); 3418c2ecf20Sopenharmony_ci 3428c2ecf20Sopenharmony_ci BUG_ON(ret); 3438c2ecf20Sopenharmony_ci} 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_cistatic void bind_to_memnode(int node) 3468c2ecf20Sopenharmony_ci{ 3478c2ecf20Sopenharmony_ci unsigned long nodemask; 3488c2ecf20Sopenharmony_ci int ret; 3498c2ecf20Sopenharmony_ci 3508c2ecf20Sopenharmony_ci if (node == NUMA_NO_NODE) 3518c2ecf20Sopenharmony_ci return; 3528c2ecf20Sopenharmony_ci 3538c2ecf20Sopenharmony_ci BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8); 3548c2ecf20Sopenharmony_ci nodemask = 1L << node; 3558c2ecf20Sopenharmony_ci 3568c2ecf20Sopenharmony_ci ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8); 3578c2ecf20Sopenharmony_ci dprintf("binding to node %d, mask: %016lx => %d\n", node, nodemask, ret); 3588c2ecf20Sopenharmony_ci 3598c2ecf20Sopenharmony_ci BUG_ON(ret); 3608c2ecf20Sopenharmony_ci} 3618c2ecf20Sopenharmony_ci 3628c2ecf20Sopenharmony_ci#define HPSIZE (2*1024*1024) 3638c2ecf20Sopenharmony_ci 3648c2ecf20Sopenharmony_ci#define set_taskname(fmt...) \ 3658c2ecf20Sopenharmony_cido { \ 3668c2ecf20Sopenharmony_ci char name[20]; \ 3678c2ecf20Sopenharmony_ci \ 3688c2ecf20Sopenharmony_ci snprintf(name, 20, fmt); \ 3698c2ecf20Sopenharmony_ci prctl(PR_SET_NAME, name); \ 3708c2ecf20Sopenharmony_ci} while (0) 3718c2ecf20Sopenharmony_ci 3728c2ecf20Sopenharmony_cistatic u8 *alloc_data(ssize_t bytes0, int map_flags, 3738c2ecf20Sopenharmony_ci int init_zero, int init_cpu0, int thp, int init_random) 3748c2ecf20Sopenharmony_ci{ 3758c2ecf20Sopenharmony_ci cpu_set_t orig_mask; 3768c2ecf20Sopenharmony_ci ssize_t bytes; 3778c2ecf20Sopenharmony_ci u8 *buf; 3788c2ecf20Sopenharmony_ci int ret; 3798c2ecf20Sopenharmony_ci 3808c2ecf20Sopenharmony_ci if (!bytes0) 3818c2ecf20Sopenharmony_ci return NULL; 3828c2ecf20Sopenharmony_ci 3838c2ecf20Sopenharmony_ci /* Allocate and initialize all memory on CPU#0: */ 3848c2ecf20Sopenharmony_ci if (init_cpu0) { 3858c2ecf20Sopenharmony_ci int node = numa_node_of_cpu(0); 3868c2ecf20Sopenharmony_ci 3878c2ecf20Sopenharmony_ci orig_mask = bind_to_node(node); 3888c2ecf20Sopenharmony_ci bind_to_memnode(node); 3898c2ecf20Sopenharmony_ci } 3908c2ecf20Sopenharmony_ci 3918c2ecf20Sopenharmony_ci bytes = bytes0 + HPSIZE; 3928c2ecf20Sopenharmony_ci 3938c2ecf20Sopenharmony_ci buf = (void *)mmap(0, bytes, PROT_READ|PROT_WRITE, MAP_ANON|map_flags, -1, 0); 3948c2ecf20Sopenharmony_ci BUG_ON(buf == (void *)-1); 3958c2ecf20Sopenharmony_ci 3968c2ecf20Sopenharmony_ci if (map_flags == MAP_PRIVATE) { 3978c2ecf20Sopenharmony_ci if (thp > 0) { 3988c2ecf20Sopenharmony_ci ret = madvise(buf, bytes, MADV_HUGEPAGE); 3998c2ecf20Sopenharmony_ci if (ret && !g->print_once) { 4008c2ecf20Sopenharmony_ci g->print_once = 1; 4018c2ecf20Sopenharmony_ci printf("WARNING: Could not enable THP - do: 'echo madvise > /sys/kernel/mm/transparent_hugepage/enabled'\n"); 4028c2ecf20Sopenharmony_ci } 4038c2ecf20Sopenharmony_ci } 4048c2ecf20Sopenharmony_ci if (thp < 0) { 4058c2ecf20Sopenharmony_ci ret = madvise(buf, bytes, MADV_NOHUGEPAGE); 4068c2ecf20Sopenharmony_ci if (ret && !g->print_once) { 4078c2ecf20Sopenharmony_ci g->print_once = 1; 4088c2ecf20Sopenharmony_ci printf("WARNING: Could not disable THP: run a CONFIG_TRANSPARENT_HUGEPAGE kernel?\n"); 4098c2ecf20Sopenharmony_ci } 4108c2ecf20Sopenharmony_ci } 4118c2ecf20Sopenharmony_ci } 4128c2ecf20Sopenharmony_ci 4138c2ecf20Sopenharmony_ci if (init_zero) { 4148c2ecf20Sopenharmony_ci bzero(buf, bytes); 4158c2ecf20Sopenharmony_ci } else { 4168c2ecf20Sopenharmony_ci /* Initialize random contents, different in each word: */ 4178c2ecf20Sopenharmony_ci if (init_random) { 4188c2ecf20Sopenharmony_ci u64 *wbuf = (void *)buf; 4198c2ecf20Sopenharmony_ci long off = rand(); 4208c2ecf20Sopenharmony_ci long i; 4218c2ecf20Sopenharmony_ci 4228c2ecf20Sopenharmony_ci for (i = 0; i < bytes/8; i++) 4238c2ecf20Sopenharmony_ci wbuf[i] = i + off; 4248c2ecf20Sopenharmony_ci } 4258c2ecf20Sopenharmony_ci } 4268c2ecf20Sopenharmony_ci 4278c2ecf20Sopenharmony_ci /* Align to 2MB boundary: */ 4288c2ecf20Sopenharmony_ci buf = (void *)(((unsigned long)buf + HPSIZE-1) & ~(HPSIZE-1)); 4298c2ecf20Sopenharmony_ci 4308c2ecf20Sopenharmony_ci /* Restore affinity: */ 4318c2ecf20Sopenharmony_ci if (init_cpu0) { 4328c2ecf20Sopenharmony_ci bind_to_cpumask(orig_mask); 4338c2ecf20Sopenharmony_ci mempol_restore(); 4348c2ecf20Sopenharmony_ci } 4358c2ecf20Sopenharmony_ci 4368c2ecf20Sopenharmony_ci return buf; 4378c2ecf20Sopenharmony_ci} 4388c2ecf20Sopenharmony_ci 4398c2ecf20Sopenharmony_cistatic void free_data(void *data, ssize_t bytes) 4408c2ecf20Sopenharmony_ci{ 4418c2ecf20Sopenharmony_ci int ret; 4428c2ecf20Sopenharmony_ci 4438c2ecf20Sopenharmony_ci if (!data) 4448c2ecf20Sopenharmony_ci return; 4458c2ecf20Sopenharmony_ci 4468c2ecf20Sopenharmony_ci ret = munmap(data, bytes); 4478c2ecf20Sopenharmony_ci BUG_ON(ret); 4488c2ecf20Sopenharmony_ci} 4498c2ecf20Sopenharmony_ci 4508c2ecf20Sopenharmony_ci/* 4518c2ecf20Sopenharmony_ci * Create a shared memory buffer that can be shared between processes, zeroed: 4528c2ecf20Sopenharmony_ci */ 4538c2ecf20Sopenharmony_cistatic void * zalloc_shared_data(ssize_t bytes) 4548c2ecf20Sopenharmony_ci{ 4558c2ecf20Sopenharmony_ci return alloc_data(bytes, MAP_SHARED, 1, g->p.init_cpu0, g->p.thp, g->p.init_random); 4568c2ecf20Sopenharmony_ci} 4578c2ecf20Sopenharmony_ci 4588c2ecf20Sopenharmony_ci/* 4598c2ecf20Sopenharmony_ci * Create a shared memory buffer that can be shared between processes: 4608c2ecf20Sopenharmony_ci */ 4618c2ecf20Sopenharmony_cistatic void * setup_shared_data(ssize_t bytes) 4628c2ecf20Sopenharmony_ci{ 4638c2ecf20Sopenharmony_ci return alloc_data(bytes, MAP_SHARED, 0, g->p.init_cpu0, g->p.thp, g->p.init_random); 4648c2ecf20Sopenharmony_ci} 4658c2ecf20Sopenharmony_ci 4668c2ecf20Sopenharmony_ci/* 4678c2ecf20Sopenharmony_ci * Allocate process-local memory - this will either be shared between 4688c2ecf20Sopenharmony_ci * threads of this process, or only be accessed by this thread: 4698c2ecf20Sopenharmony_ci */ 4708c2ecf20Sopenharmony_cistatic void * setup_private_data(ssize_t bytes) 4718c2ecf20Sopenharmony_ci{ 4728c2ecf20Sopenharmony_ci return alloc_data(bytes, MAP_PRIVATE, 0, g->p.init_cpu0, g->p.thp, g->p.init_random); 4738c2ecf20Sopenharmony_ci} 4748c2ecf20Sopenharmony_ci 4758c2ecf20Sopenharmony_ci/* 4768c2ecf20Sopenharmony_ci * Return a process-shared (global) mutex: 4778c2ecf20Sopenharmony_ci */ 4788c2ecf20Sopenharmony_cistatic void init_global_mutex(pthread_mutex_t *mutex) 4798c2ecf20Sopenharmony_ci{ 4808c2ecf20Sopenharmony_ci pthread_mutexattr_t attr; 4818c2ecf20Sopenharmony_ci 4828c2ecf20Sopenharmony_ci pthread_mutexattr_init(&attr); 4838c2ecf20Sopenharmony_ci pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); 4848c2ecf20Sopenharmony_ci pthread_mutex_init(mutex, &attr); 4858c2ecf20Sopenharmony_ci} 4868c2ecf20Sopenharmony_ci 4878c2ecf20Sopenharmony_ci/* 4888c2ecf20Sopenharmony_ci * Return a process-shared (global) condition variable: 4898c2ecf20Sopenharmony_ci */ 4908c2ecf20Sopenharmony_cistatic void init_global_cond(pthread_cond_t *cond) 4918c2ecf20Sopenharmony_ci{ 4928c2ecf20Sopenharmony_ci pthread_condattr_t attr; 4938c2ecf20Sopenharmony_ci 4948c2ecf20Sopenharmony_ci pthread_condattr_init(&attr); 4958c2ecf20Sopenharmony_ci pthread_condattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); 4968c2ecf20Sopenharmony_ci pthread_cond_init(cond, &attr); 4978c2ecf20Sopenharmony_ci} 4988c2ecf20Sopenharmony_ci 4998c2ecf20Sopenharmony_cistatic int parse_cpu_list(const char *arg) 5008c2ecf20Sopenharmony_ci{ 5018c2ecf20Sopenharmony_ci p0.cpu_list_str = strdup(arg); 5028c2ecf20Sopenharmony_ci 5038c2ecf20Sopenharmony_ci dprintf("got CPU list: {%s}\n", p0.cpu_list_str); 5048c2ecf20Sopenharmony_ci 5058c2ecf20Sopenharmony_ci return 0; 5068c2ecf20Sopenharmony_ci} 5078c2ecf20Sopenharmony_ci 5088c2ecf20Sopenharmony_cistatic int parse_setup_cpu_list(void) 5098c2ecf20Sopenharmony_ci{ 5108c2ecf20Sopenharmony_ci struct thread_data *td; 5118c2ecf20Sopenharmony_ci char *str0, *str; 5128c2ecf20Sopenharmony_ci int t; 5138c2ecf20Sopenharmony_ci 5148c2ecf20Sopenharmony_ci if (!g->p.cpu_list_str) 5158c2ecf20Sopenharmony_ci return 0; 5168c2ecf20Sopenharmony_ci 5178c2ecf20Sopenharmony_ci dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks); 5188c2ecf20Sopenharmony_ci 5198c2ecf20Sopenharmony_ci str0 = str = strdup(g->p.cpu_list_str); 5208c2ecf20Sopenharmony_ci t = 0; 5218c2ecf20Sopenharmony_ci 5228c2ecf20Sopenharmony_ci BUG_ON(!str); 5238c2ecf20Sopenharmony_ci 5248c2ecf20Sopenharmony_ci tprintf("# binding tasks to CPUs:\n"); 5258c2ecf20Sopenharmony_ci tprintf("# "); 5268c2ecf20Sopenharmony_ci 5278c2ecf20Sopenharmony_ci while (true) { 5288c2ecf20Sopenharmony_ci int bind_cpu, bind_cpu_0, bind_cpu_1; 5298c2ecf20Sopenharmony_ci char *tok, *tok_end, *tok_step, *tok_len, *tok_mul; 5308c2ecf20Sopenharmony_ci int bind_len; 5318c2ecf20Sopenharmony_ci int step; 5328c2ecf20Sopenharmony_ci int mul; 5338c2ecf20Sopenharmony_ci 5348c2ecf20Sopenharmony_ci tok = strsep(&str, ","); 5358c2ecf20Sopenharmony_ci if (!tok) 5368c2ecf20Sopenharmony_ci break; 5378c2ecf20Sopenharmony_ci 5388c2ecf20Sopenharmony_ci tok_end = strstr(tok, "-"); 5398c2ecf20Sopenharmony_ci 5408c2ecf20Sopenharmony_ci dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end); 5418c2ecf20Sopenharmony_ci if (!tok_end) { 5428c2ecf20Sopenharmony_ci /* Single CPU specified: */ 5438c2ecf20Sopenharmony_ci bind_cpu_0 = bind_cpu_1 = atol(tok); 5448c2ecf20Sopenharmony_ci } else { 5458c2ecf20Sopenharmony_ci /* CPU range specified (for example: "5-11"): */ 5468c2ecf20Sopenharmony_ci bind_cpu_0 = atol(tok); 5478c2ecf20Sopenharmony_ci bind_cpu_1 = atol(tok_end + 1); 5488c2ecf20Sopenharmony_ci } 5498c2ecf20Sopenharmony_ci 5508c2ecf20Sopenharmony_ci step = 1; 5518c2ecf20Sopenharmony_ci tok_step = strstr(tok, "#"); 5528c2ecf20Sopenharmony_ci if (tok_step) { 5538c2ecf20Sopenharmony_ci step = atol(tok_step + 1); 5548c2ecf20Sopenharmony_ci BUG_ON(step <= 0 || step >= g->p.nr_cpus); 5558c2ecf20Sopenharmony_ci } 5568c2ecf20Sopenharmony_ci 5578c2ecf20Sopenharmony_ci /* 5588c2ecf20Sopenharmony_ci * Mask length. 5598c2ecf20Sopenharmony_ci * Eg: "--cpus 8_4-16#4" means: '--cpus 8_4,12_4,16_4', 5608c2ecf20Sopenharmony_ci * where the _4 means the next 4 CPUs are allowed. 5618c2ecf20Sopenharmony_ci */ 5628c2ecf20Sopenharmony_ci bind_len = 1; 5638c2ecf20Sopenharmony_ci tok_len = strstr(tok, "_"); 5648c2ecf20Sopenharmony_ci if (tok_len) { 5658c2ecf20Sopenharmony_ci bind_len = atol(tok_len + 1); 5668c2ecf20Sopenharmony_ci BUG_ON(bind_len <= 0 || bind_len > g->p.nr_cpus); 5678c2ecf20Sopenharmony_ci } 5688c2ecf20Sopenharmony_ci 5698c2ecf20Sopenharmony_ci /* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */ 5708c2ecf20Sopenharmony_ci mul = 1; 5718c2ecf20Sopenharmony_ci tok_mul = strstr(tok, "x"); 5728c2ecf20Sopenharmony_ci if (tok_mul) { 5738c2ecf20Sopenharmony_ci mul = atol(tok_mul + 1); 5748c2ecf20Sopenharmony_ci BUG_ON(mul <= 0); 5758c2ecf20Sopenharmony_ci } 5768c2ecf20Sopenharmony_ci 5778c2ecf20Sopenharmony_ci dprintf("CPUs: %d_%d-%d#%dx%d\n", bind_cpu_0, bind_len, bind_cpu_1, step, mul); 5788c2ecf20Sopenharmony_ci 5798c2ecf20Sopenharmony_ci if (bind_cpu_0 >= g->p.nr_cpus || bind_cpu_1 >= g->p.nr_cpus) { 5808c2ecf20Sopenharmony_ci printf("\nTest not applicable, system has only %d CPUs.\n", g->p.nr_cpus); 5818c2ecf20Sopenharmony_ci return -1; 5828c2ecf20Sopenharmony_ci } 5838c2ecf20Sopenharmony_ci 5848c2ecf20Sopenharmony_ci BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0); 5858c2ecf20Sopenharmony_ci BUG_ON(bind_cpu_0 > bind_cpu_1); 5868c2ecf20Sopenharmony_ci 5878c2ecf20Sopenharmony_ci for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) { 5888c2ecf20Sopenharmony_ci int i; 5898c2ecf20Sopenharmony_ci 5908c2ecf20Sopenharmony_ci for (i = 0; i < mul; i++) { 5918c2ecf20Sopenharmony_ci int cpu; 5928c2ecf20Sopenharmony_ci 5938c2ecf20Sopenharmony_ci if (t >= g->p.nr_tasks) { 5948c2ecf20Sopenharmony_ci printf("\n# NOTE: ignoring bind CPUs starting at CPU#%d\n #", bind_cpu); 5958c2ecf20Sopenharmony_ci goto out; 5968c2ecf20Sopenharmony_ci } 5978c2ecf20Sopenharmony_ci td = g->threads + t; 5988c2ecf20Sopenharmony_ci 5998c2ecf20Sopenharmony_ci if (t) 6008c2ecf20Sopenharmony_ci tprintf(","); 6018c2ecf20Sopenharmony_ci if (bind_len > 1) { 6028c2ecf20Sopenharmony_ci tprintf("%2d/%d", bind_cpu, bind_len); 6038c2ecf20Sopenharmony_ci } else { 6048c2ecf20Sopenharmony_ci tprintf("%2d", bind_cpu); 6058c2ecf20Sopenharmony_ci } 6068c2ecf20Sopenharmony_ci 6078c2ecf20Sopenharmony_ci CPU_ZERO(&td->bind_cpumask); 6088c2ecf20Sopenharmony_ci for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) { 6098c2ecf20Sopenharmony_ci BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus); 6108c2ecf20Sopenharmony_ci CPU_SET(cpu, &td->bind_cpumask); 6118c2ecf20Sopenharmony_ci } 6128c2ecf20Sopenharmony_ci t++; 6138c2ecf20Sopenharmony_ci } 6148c2ecf20Sopenharmony_ci } 6158c2ecf20Sopenharmony_ci } 6168c2ecf20Sopenharmony_ciout: 6178c2ecf20Sopenharmony_ci 6188c2ecf20Sopenharmony_ci tprintf("\n"); 6198c2ecf20Sopenharmony_ci 6208c2ecf20Sopenharmony_ci if (t < g->p.nr_tasks) 6218c2ecf20Sopenharmony_ci printf("# NOTE: %d tasks bound, %d tasks unbound\n", t, g->p.nr_tasks - t); 6228c2ecf20Sopenharmony_ci 6238c2ecf20Sopenharmony_ci free(str0); 6248c2ecf20Sopenharmony_ci return 0; 6258c2ecf20Sopenharmony_ci} 6268c2ecf20Sopenharmony_ci 6278c2ecf20Sopenharmony_cistatic int parse_cpus_opt(const struct option *opt __maybe_unused, 6288c2ecf20Sopenharmony_ci const char *arg, int unset __maybe_unused) 6298c2ecf20Sopenharmony_ci{ 6308c2ecf20Sopenharmony_ci if (!arg) 6318c2ecf20Sopenharmony_ci return -1; 6328c2ecf20Sopenharmony_ci 6338c2ecf20Sopenharmony_ci return parse_cpu_list(arg); 6348c2ecf20Sopenharmony_ci} 6358c2ecf20Sopenharmony_ci 6368c2ecf20Sopenharmony_cistatic int parse_node_list(const char *arg) 6378c2ecf20Sopenharmony_ci{ 6388c2ecf20Sopenharmony_ci p0.node_list_str = strdup(arg); 6398c2ecf20Sopenharmony_ci 6408c2ecf20Sopenharmony_ci dprintf("got NODE list: {%s}\n", p0.node_list_str); 6418c2ecf20Sopenharmony_ci 6428c2ecf20Sopenharmony_ci return 0; 6438c2ecf20Sopenharmony_ci} 6448c2ecf20Sopenharmony_ci 6458c2ecf20Sopenharmony_cistatic int parse_setup_node_list(void) 6468c2ecf20Sopenharmony_ci{ 6478c2ecf20Sopenharmony_ci struct thread_data *td; 6488c2ecf20Sopenharmony_ci char *str0, *str; 6498c2ecf20Sopenharmony_ci int t; 6508c2ecf20Sopenharmony_ci 6518c2ecf20Sopenharmony_ci if (!g->p.node_list_str) 6528c2ecf20Sopenharmony_ci return 0; 6538c2ecf20Sopenharmony_ci 6548c2ecf20Sopenharmony_ci dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks); 6558c2ecf20Sopenharmony_ci 6568c2ecf20Sopenharmony_ci str0 = str = strdup(g->p.node_list_str); 6578c2ecf20Sopenharmony_ci t = 0; 6588c2ecf20Sopenharmony_ci 6598c2ecf20Sopenharmony_ci BUG_ON(!str); 6608c2ecf20Sopenharmony_ci 6618c2ecf20Sopenharmony_ci tprintf("# binding tasks to NODEs:\n"); 6628c2ecf20Sopenharmony_ci tprintf("# "); 6638c2ecf20Sopenharmony_ci 6648c2ecf20Sopenharmony_ci while (true) { 6658c2ecf20Sopenharmony_ci int bind_node, bind_node_0, bind_node_1; 6668c2ecf20Sopenharmony_ci char *tok, *tok_end, *tok_step, *tok_mul; 6678c2ecf20Sopenharmony_ci int step; 6688c2ecf20Sopenharmony_ci int mul; 6698c2ecf20Sopenharmony_ci 6708c2ecf20Sopenharmony_ci tok = strsep(&str, ","); 6718c2ecf20Sopenharmony_ci if (!tok) 6728c2ecf20Sopenharmony_ci break; 6738c2ecf20Sopenharmony_ci 6748c2ecf20Sopenharmony_ci tok_end = strstr(tok, "-"); 6758c2ecf20Sopenharmony_ci 6768c2ecf20Sopenharmony_ci dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end); 6778c2ecf20Sopenharmony_ci if (!tok_end) { 6788c2ecf20Sopenharmony_ci /* Single NODE specified: */ 6798c2ecf20Sopenharmony_ci bind_node_0 = bind_node_1 = atol(tok); 6808c2ecf20Sopenharmony_ci } else { 6818c2ecf20Sopenharmony_ci /* NODE range specified (for example: "5-11"): */ 6828c2ecf20Sopenharmony_ci bind_node_0 = atol(tok); 6838c2ecf20Sopenharmony_ci bind_node_1 = atol(tok_end + 1); 6848c2ecf20Sopenharmony_ci } 6858c2ecf20Sopenharmony_ci 6868c2ecf20Sopenharmony_ci step = 1; 6878c2ecf20Sopenharmony_ci tok_step = strstr(tok, "#"); 6888c2ecf20Sopenharmony_ci if (tok_step) { 6898c2ecf20Sopenharmony_ci step = atol(tok_step + 1); 6908c2ecf20Sopenharmony_ci BUG_ON(step <= 0 || step >= g->p.nr_nodes); 6918c2ecf20Sopenharmony_ci } 6928c2ecf20Sopenharmony_ci 6938c2ecf20Sopenharmony_ci /* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */ 6948c2ecf20Sopenharmony_ci mul = 1; 6958c2ecf20Sopenharmony_ci tok_mul = strstr(tok, "x"); 6968c2ecf20Sopenharmony_ci if (tok_mul) { 6978c2ecf20Sopenharmony_ci mul = atol(tok_mul + 1); 6988c2ecf20Sopenharmony_ci BUG_ON(mul <= 0); 6998c2ecf20Sopenharmony_ci } 7008c2ecf20Sopenharmony_ci 7018c2ecf20Sopenharmony_ci dprintf("NODEs: %d-%d #%d\n", bind_node_0, bind_node_1, step); 7028c2ecf20Sopenharmony_ci 7038c2ecf20Sopenharmony_ci if (bind_node_0 >= g->p.nr_nodes || bind_node_1 >= g->p.nr_nodes) { 7048c2ecf20Sopenharmony_ci printf("\nTest not applicable, system has only %d nodes.\n", g->p.nr_nodes); 7058c2ecf20Sopenharmony_ci return -1; 7068c2ecf20Sopenharmony_ci } 7078c2ecf20Sopenharmony_ci 7088c2ecf20Sopenharmony_ci BUG_ON(bind_node_0 < 0 || bind_node_1 < 0); 7098c2ecf20Sopenharmony_ci BUG_ON(bind_node_0 > bind_node_1); 7108c2ecf20Sopenharmony_ci 7118c2ecf20Sopenharmony_ci for (bind_node = bind_node_0; bind_node <= bind_node_1; bind_node += step) { 7128c2ecf20Sopenharmony_ci int i; 7138c2ecf20Sopenharmony_ci 7148c2ecf20Sopenharmony_ci for (i = 0; i < mul; i++) { 7158c2ecf20Sopenharmony_ci if (t >= g->p.nr_tasks || !node_has_cpus(bind_node)) { 7168c2ecf20Sopenharmony_ci printf("\n# NOTE: ignoring bind NODEs starting at NODE#%d\n", bind_node); 7178c2ecf20Sopenharmony_ci goto out; 7188c2ecf20Sopenharmony_ci } 7198c2ecf20Sopenharmony_ci td = g->threads + t; 7208c2ecf20Sopenharmony_ci 7218c2ecf20Sopenharmony_ci if (!t) 7228c2ecf20Sopenharmony_ci tprintf(" %2d", bind_node); 7238c2ecf20Sopenharmony_ci else 7248c2ecf20Sopenharmony_ci tprintf(",%2d", bind_node); 7258c2ecf20Sopenharmony_ci 7268c2ecf20Sopenharmony_ci td->bind_node = bind_node; 7278c2ecf20Sopenharmony_ci t++; 7288c2ecf20Sopenharmony_ci } 7298c2ecf20Sopenharmony_ci } 7308c2ecf20Sopenharmony_ci } 7318c2ecf20Sopenharmony_ciout: 7328c2ecf20Sopenharmony_ci 7338c2ecf20Sopenharmony_ci tprintf("\n"); 7348c2ecf20Sopenharmony_ci 7358c2ecf20Sopenharmony_ci if (t < g->p.nr_tasks) 7368c2ecf20Sopenharmony_ci printf("# NOTE: %d tasks mem-bound, %d tasks unbound\n", t, g->p.nr_tasks - t); 7378c2ecf20Sopenharmony_ci 7388c2ecf20Sopenharmony_ci free(str0); 7398c2ecf20Sopenharmony_ci return 0; 7408c2ecf20Sopenharmony_ci} 7418c2ecf20Sopenharmony_ci 7428c2ecf20Sopenharmony_cistatic int parse_nodes_opt(const struct option *opt __maybe_unused, 7438c2ecf20Sopenharmony_ci const char *arg, int unset __maybe_unused) 7448c2ecf20Sopenharmony_ci{ 7458c2ecf20Sopenharmony_ci if (!arg) 7468c2ecf20Sopenharmony_ci return -1; 7478c2ecf20Sopenharmony_ci 7488c2ecf20Sopenharmony_ci return parse_node_list(arg); 7498c2ecf20Sopenharmony_ci} 7508c2ecf20Sopenharmony_ci 7518c2ecf20Sopenharmony_ci#define BIT(x) (1ul << x) 7528c2ecf20Sopenharmony_ci 7538c2ecf20Sopenharmony_cistatic inline uint32_t lfsr_32(uint32_t lfsr) 7548c2ecf20Sopenharmony_ci{ 7558c2ecf20Sopenharmony_ci const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31); 7568c2ecf20Sopenharmony_ci return (lfsr>>1) ^ ((0x0u - (lfsr & 0x1u)) & taps); 7578c2ecf20Sopenharmony_ci} 7588c2ecf20Sopenharmony_ci 7598c2ecf20Sopenharmony_ci/* 7608c2ecf20Sopenharmony_ci * Make sure there's real data dependency to RAM (when read 7618c2ecf20Sopenharmony_ci * accesses are enabled), so the compiler, the CPU and the 7628c2ecf20Sopenharmony_ci * kernel (KSM, zero page, etc.) cannot optimize away RAM 7638c2ecf20Sopenharmony_ci * accesses: 7648c2ecf20Sopenharmony_ci */ 7658c2ecf20Sopenharmony_cistatic inline u64 access_data(u64 *data, u64 val) 7668c2ecf20Sopenharmony_ci{ 7678c2ecf20Sopenharmony_ci if (g->p.data_reads) 7688c2ecf20Sopenharmony_ci val += *data; 7698c2ecf20Sopenharmony_ci if (g->p.data_writes) 7708c2ecf20Sopenharmony_ci *data = val + 1; 7718c2ecf20Sopenharmony_ci return val; 7728c2ecf20Sopenharmony_ci} 7738c2ecf20Sopenharmony_ci 7748c2ecf20Sopenharmony_ci/* 7758c2ecf20Sopenharmony_ci * The worker process does two types of work, a forwards going 7768c2ecf20Sopenharmony_ci * loop and a backwards going loop. 7778c2ecf20Sopenharmony_ci * 7788c2ecf20Sopenharmony_ci * We do this so that on multiprocessor systems we do not create 7798c2ecf20Sopenharmony_ci * a 'train' of processing, with highly synchronized processes, 7808c2ecf20Sopenharmony_ci * skewing the whole benchmark. 7818c2ecf20Sopenharmony_ci */ 7828c2ecf20Sopenharmony_cistatic u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val) 7838c2ecf20Sopenharmony_ci{ 7848c2ecf20Sopenharmony_ci long words = bytes/sizeof(u64); 7858c2ecf20Sopenharmony_ci u64 *data = (void *)__data; 7868c2ecf20Sopenharmony_ci long chunk_0, chunk_1; 7878c2ecf20Sopenharmony_ci u64 *d0, *d, *d1; 7888c2ecf20Sopenharmony_ci long off; 7898c2ecf20Sopenharmony_ci long i; 7908c2ecf20Sopenharmony_ci 7918c2ecf20Sopenharmony_ci BUG_ON(!data && words); 7928c2ecf20Sopenharmony_ci BUG_ON(data && !words); 7938c2ecf20Sopenharmony_ci 7948c2ecf20Sopenharmony_ci if (!data) 7958c2ecf20Sopenharmony_ci return val; 7968c2ecf20Sopenharmony_ci 7978c2ecf20Sopenharmony_ci /* Very simple memset() work variant: */ 7988c2ecf20Sopenharmony_ci if (g->p.data_zero_memset && !g->p.data_rand_walk) { 7998c2ecf20Sopenharmony_ci bzero(data, bytes); 8008c2ecf20Sopenharmony_ci return val; 8018c2ecf20Sopenharmony_ci } 8028c2ecf20Sopenharmony_ci 8038c2ecf20Sopenharmony_ci /* Spread out by PID/TID nr and by loop nr: */ 8048c2ecf20Sopenharmony_ci chunk_0 = words/nr_max; 8058c2ecf20Sopenharmony_ci chunk_1 = words/g->p.nr_loops; 8068c2ecf20Sopenharmony_ci off = nr*chunk_0 + loop*chunk_1; 8078c2ecf20Sopenharmony_ci 8088c2ecf20Sopenharmony_ci while (off >= words) 8098c2ecf20Sopenharmony_ci off -= words; 8108c2ecf20Sopenharmony_ci 8118c2ecf20Sopenharmony_ci if (g->p.data_rand_walk) { 8128c2ecf20Sopenharmony_ci u32 lfsr = nr + loop + val; 8138c2ecf20Sopenharmony_ci int j; 8148c2ecf20Sopenharmony_ci 8158c2ecf20Sopenharmony_ci for (i = 0; i < words/1024; i++) { 8168c2ecf20Sopenharmony_ci long start, end; 8178c2ecf20Sopenharmony_ci 8188c2ecf20Sopenharmony_ci lfsr = lfsr_32(lfsr); 8198c2ecf20Sopenharmony_ci 8208c2ecf20Sopenharmony_ci start = lfsr % words; 8218c2ecf20Sopenharmony_ci end = min(start + 1024, words-1); 8228c2ecf20Sopenharmony_ci 8238c2ecf20Sopenharmony_ci if (g->p.data_zero_memset) { 8248c2ecf20Sopenharmony_ci bzero(data + start, (end-start) * sizeof(u64)); 8258c2ecf20Sopenharmony_ci } else { 8268c2ecf20Sopenharmony_ci for (j = start; j < end; j++) 8278c2ecf20Sopenharmony_ci val = access_data(data + j, val); 8288c2ecf20Sopenharmony_ci } 8298c2ecf20Sopenharmony_ci } 8308c2ecf20Sopenharmony_ci } else if (!g->p.data_backwards || (nr + loop) & 1) { 8318c2ecf20Sopenharmony_ci /* Process data forwards: */ 8328c2ecf20Sopenharmony_ci 8338c2ecf20Sopenharmony_ci d0 = data + off; 8348c2ecf20Sopenharmony_ci d = data + off + 1; 8358c2ecf20Sopenharmony_ci d1 = data + words; 8368c2ecf20Sopenharmony_ci 8378c2ecf20Sopenharmony_ci for (;;) { 8388c2ecf20Sopenharmony_ci if (unlikely(d >= d1)) 8398c2ecf20Sopenharmony_ci d = data; 8408c2ecf20Sopenharmony_ci if (unlikely(d == d0)) 8418c2ecf20Sopenharmony_ci break; 8428c2ecf20Sopenharmony_ci 8438c2ecf20Sopenharmony_ci val = access_data(d, val); 8448c2ecf20Sopenharmony_ci 8458c2ecf20Sopenharmony_ci d++; 8468c2ecf20Sopenharmony_ci } 8478c2ecf20Sopenharmony_ci } else { 8488c2ecf20Sopenharmony_ci /* Process data backwards: */ 8498c2ecf20Sopenharmony_ci 8508c2ecf20Sopenharmony_ci d0 = data + off; 8518c2ecf20Sopenharmony_ci d = data + off - 1; 8528c2ecf20Sopenharmony_ci d1 = data + words; 8538c2ecf20Sopenharmony_ci 8548c2ecf20Sopenharmony_ci for (;;) { 8558c2ecf20Sopenharmony_ci if (unlikely(d < data)) 8568c2ecf20Sopenharmony_ci d = data + words-1; 8578c2ecf20Sopenharmony_ci if (unlikely(d == d0)) 8588c2ecf20Sopenharmony_ci break; 8598c2ecf20Sopenharmony_ci 8608c2ecf20Sopenharmony_ci val = access_data(d, val); 8618c2ecf20Sopenharmony_ci 8628c2ecf20Sopenharmony_ci d--; 8638c2ecf20Sopenharmony_ci } 8648c2ecf20Sopenharmony_ci } 8658c2ecf20Sopenharmony_ci 8668c2ecf20Sopenharmony_ci return val; 8678c2ecf20Sopenharmony_ci} 8688c2ecf20Sopenharmony_ci 8698c2ecf20Sopenharmony_cistatic void update_curr_cpu(int task_nr, unsigned long bytes_worked) 8708c2ecf20Sopenharmony_ci{ 8718c2ecf20Sopenharmony_ci unsigned int cpu; 8728c2ecf20Sopenharmony_ci 8738c2ecf20Sopenharmony_ci cpu = sched_getcpu(); 8748c2ecf20Sopenharmony_ci 8758c2ecf20Sopenharmony_ci g->threads[task_nr].curr_cpu = cpu; 8768c2ecf20Sopenharmony_ci prctl(0, bytes_worked); 8778c2ecf20Sopenharmony_ci} 8788c2ecf20Sopenharmony_ci 8798c2ecf20Sopenharmony_ci#define MAX_NR_NODES 64 8808c2ecf20Sopenharmony_ci 8818c2ecf20Sopenharmony_ci/* 8828c2ecf20Sopenharmony_ci * Count the number of nodes a process's threads 8838c2ecf20Sopenharmony_ci * are spread out on. 8848c2ecf20Sopenharmony_ci * 8858c2ecf20Sopenharmony_ci * A count of 1 means that the process is compressed 8868c2ecf20Sopenharmony_ci * to a single node. A count of g->p.nr_nodes means it's 8878c2ecf20Sopenharmony_ci * spread out on the whole system. 8888c2ecf20Sopenharmony_ci */ 8898c2ecf20Sopenharmony_cistatic int count_process_nodes(int process_nr) 8908c2ecf20Sopenharmony_ci{ 8918c2ecf20Sopenharmony_ci char node_present[MAX_NR_NODES] = { 0, }; 8928c2ecf20Sopenharmony_ci int nodes; 8938c2ecf20Sopenharmony_ci int n, t; 8948c2ecf20Sopenharmony_ci 8958c2ecf20Sopenharmony_ci for (t = 0; t < g->p.nr_threads; t++) { 8968c2ecf20Sopenharmony_ci struct thread_data *td; 8978c2ecf20Sopenharmony_ci int task_nr; 8988c2ecf20Sopenharmony_ci int node; 8998c2ecf20Sopenharmony_ci 9008c2ecf20Sopenharmony_ci task_nr = process_nr*g->p.nr_threads + t; 9018c2ecf20Sopenharmony_ci td = g->threads + task_nr; 9028c2ecf20Sopenharmony_ci 9038c2ecf20Sopenharmony_ci node = numa_node_of_cpu(td->curr_cpu); 9048c2ecf20Sopenharmony_ci if (node < 0) /* curr_cpu was likely still -1 */ 9058c2ecf20Sopenharmony_ci return 0; 9068c2ecf20Sopenharmony_ci 9078c2ecf20Sopenharmony_ci node_present[node] = 1; 9088c2ecf20Sopenharmony_ci } 9098c2ecf20Sopenharmony_ci 9108c2ecf20Sopenharmony_ci nodes = 0; 9118c2ecf20Sopenharmony_ci 9128c2ecf20Sopenharmony_ci for (n = 0; n < MAX_NR_NODES; n++) 9138c2ecf20Sopenharmony_ci nodes += node_present[n]; 9148c2ecf20Sopenharmony_ci 9158c2ecf20Sopenharmony_ci return nodes; 9168c2ecf20Sopenharmony_ci} 9178c2ecf20Sopenharmony_ci 9188c2ecf20Sopenharmony_ci/* 9198c2ecf20Sopenharmony_ci * Count the number of distinct process-threads a node contains. 9208c2ecf20Sopenharmony_ci * 9218c2ecf20Sopenharmony_ci * A count of 1 means that the node contains only a single 9228c2ecf20Sopenharmony_ci * process. If all nodes on the system contain at most one 9238c2ecf20Sopenharmony_ci * process then we are well-converged. 9248c2ecf20Sopenharmony_ci */ 9258c2ecf20Sopenharmony_cistatic int count_node_processes(int node) 9268c2ecf20Sopenharmony_ci{ 9278c2ecf20Sopenharmony_ci int processes = 0; 9288c2ecf20Sopenharmony_ci int t, p; 9298c2ecf20Sopenharmony_ci 9308c2ecf20Sopenharmony_ci for (p = 0; p < g->p.nr_proc; p++) { 9318c2ecf20Sopenharmony_ci for (t = 0; t < g->p.nr_threads; t++) { 9328c2ecf20Sopenharmony_ci struct thread_data *td; 9338c2ecf20Sopenharmony_ci int task_nr; 9348c2ecf20Sopenharmony_ci int n; 9358c2ecf20Sopenharmony_ci 9368c2ecf20Sopenharmony_ci task_nr = p*g->p.nr_threads + t; 9378c2ecf20Sopenharmony_ci td = g->threads + task_nr; 9388c2ecf20Sopenharmony_ci 9398c2ecf20Sopenharmony_ci n = numa_node_of_cpu(td->curr_cpu); 9408c2ecf20Sopenharmony_ci if (n == node) { 9418c2ecf20Sopenharmony_ci processes++; 9428c2ecf20Sopenharmony_ci break; 9438c2ecf20Sopenharmony_ci } 9448c2ecf20Sopenharmony_ci } 9458c2ecf20Sopenharmony_ci } 9468c2ecf20Sopenharmony_ci 9478c2ecf20Sopenharmony_ci return processes; 9488c2ecf20Sopenharmony_ci} 9498c2ecf20Sopenharmony_ci 9508c2ecf20Sopenharmony_cistatic void calc_convergence_compression(int *strong) 9518c2ecf20Sopenharmony_ci{ 9528c2ecf20Sopenharmony_ci unsigned int nodes_min, nodes_max; 9538c2ecf20Sopenharmony_ci int p; 9548c2ecf20Sopenharmony_ci 9558c2ecf20Sopenharmony_ci nodes_min = -1; 9568c2ecf20Sopenharmony_ci nodes_max = 0; 9578c2ecf20Sopenharmony_ci 9588c2ecf20Sopenharmony_ci for (p = 0; p < g->p.nr_proc; p++) { 9598c2ecf20Sopenharmony_ci unsigned int nodes = count_process_nodes(p); 9608c2ecf20Sopenharmony_ci 9618c2ecf20Sopenharmony_ci if (!nodes) { 9628c2ecf20Sopenharmony_ci *strong = 0; 9638c2ecf20Sopenharmony_ci return; 9648c2ecf20Sopenharmony_ci } 9658c2ecf20Sopenharmony_ci 9668c2ecf20Sopenharmony_ci nodes_min = min(nodes, nodes_min); 9678c2ecf20Sopenharmony_ci nodes_max = max(nodes, nodes_max); 9688c2ecf20Sopenharmony_ci } 9698c2ecf20Sopenharmony_ci 9708c2ecf20Sopenharmony_ci /* Strong convergence: all threads compress on a single node: */ 9718c2ecf20Sopenharmony_ci if (nodes_min == 1 && nodes_max == 1) { 9728c2ecf20Sopenharmony_ci *strong = 1; 9738c2ecf20Sopenharmony_ci } else { 9748c2ecf20Sopenharmony_ci *strong = 0; 9758c2ecf20Sopenharmony_ci tprintf(" {%d-%d}", nodes_min, nodes_max); 9768c2ecf20Sopenharmony_ci } 9778c2ecf20Sopenharmony_ci} 9788c2ecf20Sopenharmony_ci 9798c2ecf20Sopenharmony_cistatic void calc_convergence(double runtime_ns_max, double *convergence) 9808c2ecf20Sopenharmony_ci{ 9818c2ecf20Sopenharmony_ci unsigned int loops_done_min, loops_done_max; 9828c2ecf20Sopenharmony_ci int process_groups; 9838c2ecf20Sopenharmony_ci int nodes[MAX_NR_NODES]; 9848c2ecf20Sopenharmony_ci int distance; 9858c2ecf20Sopenharmony_ci int nr_min; 9868c2ecf20Sopenharmony_ci int nr_max; 9878c2ecf20Sopenharmony_ci int strong; 9888c2ecf20Sopenharmony_ci int sum; 9898c2ecf20Sopenharmony_ci int nr; 9908c2ecf20Sopenharmony_ci int node; 9918c2ecf20Sopenharmony_ci int cpu; 9928c2ecf20Sopenharmony_ci int t; 9938c2ecf20Sopenharmony_ci 9948c2ecf20Sopenharmony_ci if (!g->p.show_convergence && !g->p.measure_convergence) 9958c2ecf20Sopenharmony_ci return; 9968c2ecf20Sopenharmony_ci 9978c2ecf20Sopenharmony_ci for (node = 0; node < g->p.nr_nodes; node++) 9988c2ecf20Sopenharmony_ci nodes[node] = 0; 9998c2ecf20Sopenharmony_ci 10008c2ecf20Sopenharmony_ci loops_done_min = -1; 10018c2ecf20Sopenharmony_ci loops_done_max = 0; 10028c2ecf20Sopenharmony_ci 10038c2ecf20Sopenharmony_ci for (t = 0; t < g->p.nr_tasks; t++) { 10048c2ecf20Sopenharmony_ci struct thread_data *td = g->threads + t; 10058c2ecf20Sopenharmony_ci unsigned int loops_done; 10068c2ecf20Sopenharmony_ci 10078c2ecf20Sopenharmony_ci cpu = td->curr_cpu; 10088c2ecf20Sopenharmony_ci 10098c2ecf20Sopenharmony_ci /* Not all threads have written it yet: */ 10108c2ecf20Sopenharmony_ci if (cpu < 0) 10118c2ecf20Sopenharmony_ci continue; 10128c2ecf20Sopenharmony_ci 10138c2ecf20Sopenharmony_ci node = numa_node_of_cpu(cpu); 10148c2ecf20Sopenharmony_ci 10158c2ecf20Sopenharmony_ci nodes[node]++; 10168c2ecf20Sopenharmony_ci 10178c2ecf20Sopenharmony_ci loops_done = td->loops_done; 10188c2ecf20Sopenharmony_ci loops_done_min = min(loops_done, loops_done_min); 10198c2ecf20Sopenharmony_ci loops_done_max = max(loops_done, loops_done_max); 10208c2ecf20Sopenharmony_ci } 10218c2ecf20Sopenharmony_ci 10228c2ecf20Sopenharmony_ci nr_max = 0; 10238c2ecf20Sopenharmony_ci nr_min = g->p.nr_tasks; 10248c2ecf20Sopenharmony_ci sum = 0; 10258c2ecf20Sopenharmony_ci 10268c2ecf20Sopenharmony_ci for (node = 0; node < g->p.nr_nodes; node++) { 10278c2ecf20Sopenharmony_ci if (!is_node_present(node)) 10288c2ecf20Sopenharmony_ci continue; 10298c2ecf20Sopenharmony_ci nr = nodes[node]; 10308c2ecf20Sopenharmony_ci nr_min = min(nr, nr_min); 10318c2ecf20Sopenharmony_ci nr_max = max(nr, nr_max); 10328c2ecf20Sopenharmony_ci sum += nr; 10338c2ecf20Sopenharmony_ci } 10348c2ecf20Sopenharmony_ci BUG_ON(nr_min > nr_max); 10358c2ecf20Sopenharmony_ci 10368c2ecf20Sopenharmony_ci BUG_ON(sum > g->p.nr_tasks); 10378c2ecf20Sopenharmony_ci 10388c2ecf20Sopenharmony_ci if (0 && (sum < g->p.nr_tasks)) 10398c2ecf20Sopenharmony_ci return; 10408c2ecf20Sopenharmony_ci 10418c2ecf20Sopenharmony_ci /* 10428c2ecf20Sopenharmony_ci * Count the number of distinct process groups present 10438c2ecf20Sopenharmony_ci * on nodes - when we are converged this will decrease 10448c2ecf20Sopenharmony_ci * to g->p.nr_proc: 10458c2ecf20Sopenharmony_ci */ 10468c2ecf20Sopenharmony_ci process_groups = 0; 10478c2ecf20Sopenharmony_ci 10488c2ecf20Sopenharmony_ci for (node = 0; node < g->p.nr_nodes; node++) { 10498c2ecf20Sopenharmony_ci int processes; 10508c2ecf20Sopenharmony_ci 10518c2ecf20Sopenharmony_ci if (!is_node_present(node)) 10528c2ecf20Sopenharmony_ci continue; 10538c2ecf20Sopenharmony_ci processes = count_node_processes(node); 10548c2ecf20Sopenharmony_ci nr = nodes[node]; 10558c2ecf20Sopenharmony_ci tprintf(" %2d/%-2d", nr, processes); 10568c2ecf20Sopenharmony_ci 10578c2ecf20Sopenharmony_ci process_groups += processes; 10588c2ecf20Sopenharmony_ci } 10598c2ecf20Sopenharmony_ci 10608c2ecf20Sopenharmony_ci distance = nr_max - nr_min; 10618c2ecf20Sopenharmony_ci 10628c2ecf20Sopenharmony_ci tprintf(" [%2d/%-2d]", distance, process_groups); 10638c2ecf20Sopenharmony_ci 10648c2ecf20Sopenharmony_ci tprintf(" l:%3d-%-3d (%3d)", 10658c2ecf20Sopenharmony_ci loops_done_min, loops_done_max, loops_done_max-loops_done_min); 10668c2ecf20Sopenharmony_ci 10678c2ecf20Sopenharmony_ci if (loops_done_min && loops_done_max) { 10688c2ecf20Sopenharmony_ci double skew = 1.0 - (double)loops_done_min/loops_done_max; 10698c2ecf20Sopenharmony_ci 10708c2ecf20Sopenharmony_ci tprintf(" [%4.1f%%]", skew * 100.0); 10718c2ecf20Sopenharmony_ci } 10728c2ecf20Sopenharmony_ci 10738c2ecf20Sopenharmony_ci calc_convergence_compression(&strong); 10748c2ecf20Sopenharmony_ci 10758c2ecf20Sopenharmony_ci if (strong && process_groups == g->p.nr_proc) { 10768c2ecf20Sopenharmony_ci if (!*convergence) { 10778c2ecf20Sopenharmony_ci *convergence = runtime_ns_max; 10788c2ecf20Sopenharmony_ci tprintf(" (%6.1fs converged)\n", *convergence / NSEC_PER_SEC); 10798c2ecf20Sopenharmony_ci if (g->p.measure_convergence) { 10808c2ecf20Sopenharmony_ci g->all_converged = true; 10818c2ecf20Sopenharmony_ci g->stop_work = true; 10828c2ecf20Sopenharmony_ci } 10838c2ecf20Sopenharmony_ci } 10848c2ecf20Sopenharmony_ci } else { 10858c2ecf20Sopenharmony_ci if (*convergence) { 10868c2ecf20Sopenharmony_ci tprintf(" (%6.1fs de-converged)", runtime_ns_max / NSEC_PER_SEC); 10878c2ecf20Sopenharmony_ci *convergence = 0; 10888c2ecf20Sopenharmony_ci } 10898c2ecf20Sopenharmony_ci tprintf("\n"); 10908c2ecf20Sopenharmony_ci } 10918c2ecf20Sopenharmony_ci} 10928c2ecf20Sopenharmony_ci 10938c2ecf20Sopenharmony_cistatic void show_summary(double runtime_ns_max, int l, double *convergence) 10948c2ecf20Sopenharmony_ci{ 10958c2ecf20Sopenharmony_ci tprintf("\r # %5.1f%% [%.1f mins]", 10968c2ecf20Sopenharmony_ci (double)(l+1)/g->p.nr_loops*100.0, runtime_ns_max / NSEC_PER_SEC / 60.0); 10978c2ecf20Sopenharmony_ci 10988c2ecf20Sopenharmony_ci calc_convergence(runtime_ns_max, convergence); 10998c2ecf20Sopenharmony_ci 11008c2ecf20Sopenharmony_ci if (g->p.show_details >= 0) 11018c2ecf20Sopenharmony_ci fflush(stdout); 11028c2ecf20Sopenharmony_ci} 11038c2ecf20Sopenharmony_ci 11048c2ecf20Sopenharmony_cistatic void *worker_thread(void *__tdata) 11058c2ecf20Sopenharmony_ci{ 11068c2ecf20Sopenharmony_ci struct thread_data *td = __tdata; 11078c2ecf20Sopenharmony_ci struct timeval start0, start, stop, diff; 11088c2ecf20Sopenharmony_ci int process_nr = td->process_nr; 11098c2ecf20Sopenharmony_ci int thread_nr = td->thread_nr; 11108c2ecf20Sopenharmony_ci unsigned long last_perturbance; 11118c2ecf20Sopenharmony_ci int task_nr = td->task_nr; 11128c2ecf20Sopenharmony_ci int details = g->p.show_details; 11138c2ecf20Sopenharmony_ci int first_task, last_task; 11148c2ecf20Sopenharmony_ci double convergence = 0; 11158c2ecf20Sopenharmony_ci u64 val = td->val; 11168c2ecf20Sopenharmony_ci double runtime_ns_max; 11178c2ecf20Sopenharmony_ci u8 *global_data; 11188c2ecf20Sopenharmony_ci u8 *process_data; 11198c2ecf20Sopenharmony_ci u8 *thread_data; 11208c2ecf20Sopenharmony_ci u64 bytes_done, secs; 11218c2ecf20Sopenharmony_ci long work_done; 11228c2ecf20Sopenharmony_ci u32 l; 11238c2ecf20Sopenharmony_ci struct rusage rusage; 11248c2ecf20Sopenharmony_ci 11258c2ecf20Sopenharmony_ci bind_to_cpumask(td->bind_cpumask); 11268c2ecf20Sopenharmony_ci bind_to_memnode(td->bind_node); 11278c2ecf20Sopenharmony_ci 11288c2ecf20Sopenharmony_ci set_taskname("thread %d/%d", process_nr, thread_nr); 11298c2ecf20Sopenharmony_ci 11308c2ecf20Sopenharmony_ci global_data = g->data; 11318c2ecf20Sopenharmony_ci process_data = td->process_data; 11328c2ecf20Sopenharmony_ci thread_data = setup_private_data(g->p.bytes_thread); 11338c2ecf20Sopenharmony_ci 11348c2ecf20Sopenharmony_ci bytes_done = 0; 11358c2ecf20Sopenharmony_ci 11368c2ecf20Sopenharmony_ci last_task = 0; 11378c2ecf20Sopenharmony_ci if (process_nr == g->p.nr_proc-1 && thread_nr == g->p.nr_threads-1) 11388c2ecf20Sopenharmony_ci last_task = 1; 11398c2ecf20Sopenharmony_ci 11408c2ecf20Sopenharmony_ci first_task = 0; 11418c2ecf20Sopenharmony_ci if (process_nr == 0 && thread_nr == 0) 11428c2ecf20Sopenharmony_ci first_task = 1; 11438c2ecf20Sopenharmony_ci 11448c2ecf20Sopenharmony_ci if (details >= 2) { 11458c2ecf20Sopenharmony_ci printf("# thread %2d / %2d global mem: %p, process mem: %p, thread mem: %p\n", 11468c2ecf20Sopenharmony_ci process_nr, thread_nr, global_data, process_data, thread_data); 11478c2ecf20Sopenharmony_ci } 11488c2ecf20Sopenharmony_ci 11498c2ecf20Sopenharmony_ci if (g->p.serialize_startup) { 11508c2ecf20Sopenharmony_ci pthread_mutex_lock(&g->startup_mutex); 11518c2ecf20Sopenharmony_ci g->nr_tasks_started++; 11528c2ecf20Sopenharmony_ci /* The last thread wakes the main process. */ 11538c2ecf20Sopenharmony_ci if (g->nr_tasks_started == g->p.nr_tasks) 11548c2ecf20Sopenharmony_ci pthread_cond_signal(&g->startup_cond); 11558c2ecf20Sopenharmony_ci 11568c2ecf20Sopenharmony_ci pthread_mutex_unlock(&g->startup_mutex); 11578c2ecf20Sopenharmony_ci 11588c2ecf20Sopenharmony_ci /* Here we will wait for the main process to start us all at once: */ 11598c2ecf20Sopenharmony_ci pthread_mutex_lock(&g->start_work_mutex); 11608c2ecf20Sopenharmony_ci g->start_work = false; 11618c2ecf20Sopenharmony_ci g->nr_tasks_working++; 11628c2ecf20Sopenharmony_ci while (!g->start_work) 11638c2ecf20Sopenharmony_ci pthread_cond_wait(&g->start_work_cond, &g->start_work_mutex); 11648c2ecf20Sopenharmony_ci 11658c2ecf20Sopenharmony_ci pthread_mutex_unlock(&g->start_work_mutex); 11668c2ecf20Sopenharmony_ci } 11678c2ecf20Sopenharmony_ci 11688c2ecf20Sopenharmony_ci gettimeofday(&start0, NULL); 11698c2ecf20Sopenharmony_ci 11708c2ecf20Sopenharmony_ci start = stop = start0; 11718c2ecf20Sopenharmony_ci last_perturbance = start.tv_sec; 11728c2ecf20Sopenharmony_ci 11738c2ecf20Sopenharmony_ci for (l = 0; l < g->p.nr_loops; l++) { 11748c2ecf20Sopenharmony_ci start = stop; 11758c2ecf20Sopenharmony_ci 11768c2ecf20Sopenharmony_ci if (g->stop_work) 11778c2ecf20Sopenharmony_ci break; 11788c2ecf20Sopenharmony_ci 11798c2ecf20Sopenharmony_ci val += do_work(global_data, g->p.bytes_global, process_nr, g->p.nr_proc, l, val); 11808c2ecf20Sopenharmony_ci val += do_work(process_data, g->p.bytes_process, thread_nr, g->p.nr_threads, l, val); 11818c2ecf20Sopenharmony_ci val += do_work(thread_data, g->p.bytes_thread, 0, 1, l, val); 11828c2ecf20Sopenharmony_ci 11838c2ecf20Sopenharmony_ci if (g->p.sleep_usecs) { 11848c2ecf20Sopenharmony_ci pthread_mutex_lock(td->process_lock); 11858c2ecf20Sopenharmony_ci usleep(g->p.sleep_usecs); 11868c2ecf20Sopenharmony_ci pthread_mutex_unlock(td->process_lock); 11878c2ecf20Sopenharmony_ci } 11888c2ecf20Sopenharmony_ci /* 11898c2ecf20Sopenharmony_ci * Amount of work to be done under a process-global lock: 11908c2ecf20Sopenharmony_ci */ 11918c2ecf20Sopenharmony_ci if (g->p.bytes_process_locked) { 11928c2ecf20Sopenharmony_ci pthread_mutex_lock(td->process_lock); 11938c2ecf20Sopenharmony_ci val += do_work(process_data, g->p.bytes_process_locked, thread_nr, g->p.nr_threads, l, val); 11948c2ecf20Sopenharmony_ci pthread_mutex_unlock(td->process_lock); 11958c2ecf20Sopenharmony_ci } 11968c2ecf20Sopenharmony_ci 11978c2ecf20Sopenharmony_ci work_done = g->p.bytes_global + g->p.bytes_process + 11988c2ecf20Sopenharmony_ci g->p.bytes_process_locked + g->p.bytes_thread; 11998c2ecf20Sopenharmony_ci 12008c2ecf20Sopenharmony_ci update_curr_cpu(task_nr, work_done); 12018c2ecf20Sopenharmony_ci bytes_done += work_done; 12028c2ecf20Sopenharmony_ci 12038c2ecf20Sopenharmony_ci if (details < 0 && !g->p.perturb_secs && !g->p.measure_convergence && !g->p.nr_secs) 12048c2ecf20Sopenharmony_ci continue; 12058c2ecf20Sopenharmony_ci 12068c2ecf20Sopenharmony_ci td->loops_done = l; 12078c2ecf20Sopenharmony_ci 12088c2ecf20Sopenharmony_ci gettimeofday(&stop, NULL); 12098c2ecf20Sopenharmony_ci 12108c2ecf20Sopenharmony_ci /* Check whether our max runtime timed out: */ 12118c2ecf20Sopenharmony_ci if (g->p.nr_secs) { 12128c2ecf20Sopenharmony_ci timersub(&stop, &start0, &diff); 12138c2ecf20Sopenharmony_ci if ((u32)diff.tv_sec >= g->p.nr_secs) { 12148c2ecf20Sopenharmony_ci g->stop_work = true; 12158c2ecf20Sopenharmony_ci break; 12168c2ecf20Sopenharmony_ci } 12178c2ecf20Sopenharmony_ci } 12188c2ecf20Sopenharmony_ci 12198c2ecf20Sopenharmony_ci /* Update the summary at most once per second: */ 12208c2ecf20Sopenharmony_ci if (start.tv_sec == stop.tv_sec) 12218c2ecf20Sopenharmony_ci continue; 12228c2ecf20Sopenharmony_ci 12238c2ecf20Sopenharmony_ci /* 12248c2ecf20Sopenharmony_ci * Perturb the first task's equilibrium every g->p.perturb_secs seconds, 12258c2ecf20Sopenharmony_ci * by migrating to CPU#0: 12268c2ecf20Sopenharmony_ci */ 12278c2ecf20Sopenharmony_ci if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) { 12288c2ecf20Sopenharmony_ci cpu_set_t orig_mask; 12298c2ecf20Sopenharmony_ci int target_cpu; 12308c2ecf20Sopenharmony_ci int this_cpu; 12318c2ecf20Sopenharmony_ci 12328c2ecf20Sopenharmony_ci last_perturbance = stop.tv_sec; 12338c2ecf20Sopenharmony_ci 12348c2ecf20Sopenharmony_ci /* 12358c2ecf20Sopenharmony_ci * Depending on where we are running, move into 12368c2ecf20Sopenharmony_ci * the other half of the system, to create some 12378c2ecf20Sopenharmony_ci * real disturbance: 12388c2ecf20Sopenharmony_ci */ 12398c2ecf20Sopenharmony_ci this_cpu = g->threads[task_nr].curr_cpu; 12408c2ecf20Sopenharmony_ci if (this_cpu < g->p.nr_cpus/2) 12418c2ecf20Sopenharmony_ci target_cpu = g->p.nr_cpus-1; 12428c2ecf20Sopenharmony_ci else 12438c2ecf20Sopenharmony_ci target_cpu = 0; 12448c2ecf20Sopenharmony_ci 12458c2ecf20Sopenharmony_ci orig_mask = bind_to_cpu(target_cpu); 12468c2ecf20Sopenharmony_ci 12478c2ecf20Sopenharmony_ci /* Here we are running on the target CPU already */ 12488c2ecf20Sopenharmony_ci if (details >= 1) 12498c2ecf20Sopenharmony_ci printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu); 12508c2ecf20Sopenharmony_ci 12518c2ecf20Sopenharmony_ci bind_to_cpumask(orig_mask); 12528c2ecf20Sopenharmony_ci } 12538c2ecf20Sopenharmony_ci 12548c2ecf20Sopenharmony_ci if (details >= 3) { 12558c2ecf20Sopenharmony_ci timersub(&stop, &start, &diff); 12568c2ecf20Sopenharmony_ci runtime_ns_max = diff.tv_sec * NSEC_PER_SEC; 12578c2ecf20Sopenharmony_ci runtime_ns_max += diff.tv_usec * NSEC_PER_USEC; 12588c2ecf20Sopenharmony_ci 12598c2ecf20Sopenharmony_ci if (details >= 0) { 12608c2ecf20Sopenharmony_ci printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016"PRIx64"]\n", 12618c2ecf20Sopenharmony_ci process_nr, thread_nr, runtime_ns_max / bytes_done, val); 12628c2ecf20Sopenharmony_ci } 12638c2ecf20Sopenharmony_ci fflush(stdout); 12648c2ecf20Sopenharmony_ci } 12658c2ecf20Sopenharmony_ci if (!last_task) 12668c2ecf20Sopenharmony_ci continue; 12678c2ecf20Sopenharmony_ci 12688c2ecf20Sopenharmony_ci timersub(&stop, &start0, &diff); 12698c2ecf20Sopenharmony_ci runtime_ns_max = diff.tv_sec * NSEC_PER_SEC; 12708c2ecf20Sopenharmony_ci runtime_ns_max += diff.tv_usec * NSEC_PER_USEC; 12718c2ecf20Sopenharmony_ci 12728c2ecf20Sopenharmony_ci show_summary(runtime_ns_max, l, &convergence); 12738c2ecf20Sopenharmony_ci } 12748c2ecf20Sopenharmony_ci 12758c2ecf20Sopenharmony_ci gettimeofday(&stop, NULL); 12768c2ecf20Sopenharmony_ci timersub(&stop, &start0, &diff); 12778c2ecf20Sopenharmony_ci td->runtime_ns = diff.tv_sec * NSEC_PER_SEC; 12788c2ecf20Sopenharmony_ci td->runtime_ns += diff.tv_usec * NSEC_PER_USEC; 12798c2ecf20Sopenharmony_ci secs = td->runtime_ns / NSEC_PER_SEC; 12808c2ecf20Sopenharmony_ci td->speed_gbs = secs ? bytes_done / secs / 1e9 : 0; 12818c2ecf20Sopenharmony_ci 12828c2ecf20Sopenharmony_ci getrusage(RUSAGE_THREAD, &rusage); 12838c2ecf20Sopenharmony_ci td->system_time_ns = rusage.ru_stime.tv_sec * NSEC_PER_SEC; 12848c2ecf20Sopenharmony_ci td->system_time_ns += rusage.ru_stime.tv_usec * NSEC_PER_USEC; 12858c2ecf20Sopenharmony_ci td->user_time_ns = rusage.ru_utime.tv_sec * NSEC_PER_SEC; 12868c2ecf20Sopenharmony_ci td->user_time_ns += rusage.ru_utime.tv_usec * NSEC_PER_USEC; 12878c2ecf20Sopenharmony_ci 12888c2ecf20Sopenharmony_ci free_data(thread_data, g->p.bytes_thread); 12898c2ecf20Sopenharmony_ci 12908c2ecf20Sopenharmony_ci pthread_mutex_lock(&g->stop_work_mutex); 12918c2ecf20Sopenharmony_ci g->bytes_done += bytes_done; 12928c2ecf20Sopenharmony_ci pthread_mutex_unlock(&g->stop_work_mutex); 12938c2ecf20Sopenharmony_ci 12948c2ecf20Sopenharmony_ci return NULL; 12958c2ecf20Sopenharmony_ci} 12968c2ecf20Sopenharmony_ci 12978c2ecf20Sopenharmony_ci/* 12988c2ecf20Sopenharmony_ci * A worker process starts a couple of threads: 12998c2ecf20Sopenharmony_ci */ 13008c2ecf20Sopenharmony_cistatic void worker_process(int process_nr) 13018c2ecf20Sopenharmony_ci{ 13028c2ecf20Sopenharmony_ci pthread_mutex_t process_lock; 13038c2ecf20Sopenharmony_ci struct thread_data *td; 13048c2ecf20Sopenharmony_ci pthread_t *pthreads; 13058c2ecf20Sopenharmony_ci u8 *process_data; 13068c2ecf20Sopenharmony_ci int task_nr; 13078c2ecf20Sopenharmony_ci int ret; 13088c2ecf20Sopenharmony_ci int t; 13098c2ecf20Sopenharmony_ci 13108c2ecf20Sopenharmony_ci pthread_mutex_init(&process_lock, NULL); 13118c2ecf20Sopenharmony_ci set_taskname("process %d", process_nr); 13128c2ecf20Sopenharmony_ci 13138c2ecf20Sopenharmony_ci /* 13148c2ecf20Sopenharmony_ci * Pick up the memory policy and the CPU binding of our first thread, 13158c2ecf20Sopenharmony_ci * so that we initialize memory accordingly: 13168c2ecf20Sopenharmony_ci */ 13178c2ecf20Sopenharmony_ci task_nr = process_nr*g->p.nr_threads; 13188c2ecf20Sopenharmony_ci td = g->threads + task_nr; 13198c2ecf20Sopenharmony_ci 13208c2ecf20Sopenharmony_ci bind_to_memnode(td->bind_node); 13218c2ecf20Sopenharmony_ci bind_to_cpumask(td->bind_cpumask); 13228c2ecf20Sopenharmony_ci 13238c2ecf20Sopenharmony_ci pthreads = zalloc(g->p.nr_threads * sizeof(pthread_t)); 13248c2ecf20Sopenharmony_ci process_data = setup_private_data(g->p.bytes_process); 13258c2ecf20Sopenharmony_ci 13268c2ecf20Sopenharmony_ci if (g->p.show_details >= 3) { 13278c2ecf20Sopenharmony_ci printf(" # process %2d global mem: %p, process mem: %p\n", 13288c2ecf20Sopenharmony_ci process_nr, g->data, process_data); 13298c2ecf20Sopenharmony_ci } 13308c2ecf20Sopenharmony_ci 13318c2ecf20Sopenharmony_ci for (t = 0; t < g->p.nr_threads; t++) { 13328c2ecf20Sopenharmony_ci task_nr = process_nr*g->p.nr_threads + t; 13338c2ecf20Sopenharmony_ci td = g->threads + task_nr; 13348c2ecf20Sopenharmony_ci 13358c2ecf20Sopenharmony_ci td->process_data = process_data; 13368c2ecf20Sopenharmony_ci td->process_nr = process_nr; 13378c2ecf20Sopenharmony_ci td->thread_nr = t; 13388c2ecf20Sopenharmony_ci td->task_nr = task_nr; 13398c2ecf20Sopenharmony_ci td->val = rand(); 13408c2ecf20Sopenharmony_ci td->curr_cpu = -1; 13418c2ecf20Sopenharmony_ci td->process_lock = &process_lock; 13428c2ecf20Sopenharmony_ci 13438c2ecf20Sopenharmony_ci ret = pthread_create(pthreads + t, NULL, worker_thread, td); 13448c2ecf20Sopenharmony_ci BUG_ON(ret); 13458c2ecf20Sopenharmony_ci } 13468c2ecf20Sopenharmony_ci 13478c2ecf20Sopenharmony_ci for (t = 0; t < g->p.nr_threads; t++) { 13488c2ecf20Sopenharmony_ci ret = pthread_join(pthreads[t], NULL); 13498c2ecf20Sopenharmony_ci BUG_ON(ret); 13508c2ecf20Sopenharmony_ci } 13518c2ecf20Sopenharmony_ci 13528c2ecf20Sopenharmony_ci free_data(process_data, g->p.bytes_process); 13538c2ecf20Sopenharmony_ci free(pthreads); 13548c2ecf20Sopenharmony_ci} 13558c2ecf20Sopenharmony_ci 13568c2ecf20Sopenharmony_cistatic void print_summary(void) 13578c2ecf20Sopenharmony_ci{ 13588c2ecf20Sopenharmony_ci if (g->p.show_details < 0) 13598c2ecf20Sopenharmony_ci return; 13608c2ecf20Sopenharmony_ci 13618c2ecf20Sopenharmony_ci printf("\n ###\n"); 13628c2ecf20Sopenharmony_ci printf(" # %d %s will execute (on %d nodes, %d CPUs):\n", 13638c2ecf20Sopenharmony_ci g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", nr_numa_nodes(), g->p.nr_cpus); 13648c2ecf20Sopenharmony_ci printf(" # %5dx %5ldMB global shared mem operations\n", 13658c2ecf20Sopenharmony_ci g->p.nr_loops, g->p.bytes_global/1024/1024); 13668c2ecf20Sopenharmony_ci printf(" # %5dx %5ldMB process shared mem operations\n", 13678c2ecf20Sopenharmony_ci g->p.nr_loops, g->p.bytes_process/1024/1024); 13688c2ecf20Sopenharmony_ci printf(" # %5dx %5ldMB thread local mem operations\n", 13698c2ecf20Sopenharmony_ci g->p.nr_loops, g->p.bytes_thread/1024/1024); 13708c2ecf20Sopenharmony_ci 13718c2ecf20Sopenharmony_ci printf(" ###\n"); 13728c2ecf20Sopenharmony_ci 13738c2ecf20Sopenharmony_ci printf("\n ###\n"); fflush(stdout); 13748c2ecf20Sopenharmony_ci} 13758c2ecf20Sopenharmony_ci 13768c2ecf20Sopenharmony_cistatic void init_thread_data(void) 13778c2ecf20Sopenharmony_ci{ 13788c2ecf20Sopenharmony_ci ssize_t size = sizeof(*g->threads)*g->p.nr_tasks; 13798c2ecf20Sopenharmony_ci int t; 13808c2ecf20Sopenharmony_ci 13818c2ecf20Sopenharmony_ci g->threads = zalloc_shared_data(size); 13828c2ecf20Sopenharmony_ci 13838c2ecf20Sopenharmony_ci for (t = 0; t < g->p.nr_tasks; t++) { 13848c2ecf20Sopenharmony_ci struct thread_data *td = g->threads + t; 13858c2ecf20Sopenharmony_ci int cpu; 13868c2ecf20Sopenharmony_ci 13878c2ecf20Sopenharmony_ci /* Allow all nodes by default: */ 13888c2ecf20Sopenharmony_ci td->bind_node = NUMA_NO_NODE; 13898c2ecf20Sopenharmony_ci 13908c2ecf20Sopenharmony_ci /* Allow all CPUs by default: */ 13918c2ecf20Sopenharmony_ci CPU_ZERO(&td->bind_cpumask); 13928c2ecf20Sopenharmony_ci for (cpu = 0; cpu < g->p.nr_cpus; cpu++) 13938c2ecf20Sopenharmony_ci CPU_SET(cpu, &td->bind_cpumask); 13948c2ecf20Sopenharmony_ci } 13958c2ecf20Sopenharmony_ci} 13968c2ecf20Sopenharmony_ci 13978c2ecf20Sopenharmony_cistatic void deinit_thread_data(void) 13988c2ecf20Sopenharmony_ci{ 13998c2ecf20Sopenharmony_ci ssize_t size = sizeof(*g->threads)*g->p.nr_tasks; 14008c2ecf20Sopenharmony_ci 14018c2ecf20Sopenharmony_ci free_data(g->threads, size); 14028c2ecf20Sopenharmony_ci} 14038c2ecf20Sopenharmony_ci 14048c2ecf20Sopenharmony_cistatic int init(void) 14058c2ecf20Sopenharmony_ci{ 14068c2ecf20Sopenharmony_ci g = (void *)alloc_data(sizeof(*g), MAP_SHARED, 1, 0, 0 /* THP */, 0); 14078c2ecf20Sopenharmony_ci 14088c2ecf20Sopenharmony_ci /* Copy over options: */ 14098c2ecf20Sopenharmony_ci g->p = p0; 14108c2ecf20Sopenharmony_ci 14118c2ecf20Sopenharmony_ci g->p.nr_cpus = numa_num_configured_cpus(); 14128c2ecf20Sopenharmony_ci 14138c2ecf20Sopenharmony_ci g->p.nr_nodes = numa_max_node() + 1; 14148c2ecf20Sopenharmony_ci 14158c2ecf20Sopenharmony_ci /* char array in count_process_nodes(): */ 14168c2ecf20Sopenharmony_ci BUG_ON(g->p.nr_nodes > MAX_NR_NODES || g->p.nr_nodes < 0); 14178c2ecf20Sopenharmony_ci 14188c2ecf20Sopenharmony_ci if (g->p.show_quiet && !g->p.show_details) 14198c2ecf20Sopenharmony_ci g->p.show_details = -1; 14208c2ecf20Sopenharmony_ci 14218c2ecf20Sopenharmony_ci /* Some memory should be specified: */ 14228c2ecf20Sopenharmony_ci if (!g->p.mb_global_str && !g->p.mb_proc_str && !g->p.mb_thread_str) 14238c2ecf20Sopenharmony_ci return -1; 14248c2ecf20Sopenharmony_ci 14258c2ecf20Sopenharmony_ci if (g->p.mb_global_str) { 14268c2ecf20Sopenharmony_ci g->p.mb_global = atof(g->p.mb_global_str); 14278c2ecf20Sopenharmony_ci BUG_ON(g->p.mb_global < 0); 14288c2ecf20Sopenharmony_ci } 14298c2ecf20Sopenharmony_ci 14308c2ecf20Sopenharmony_ci if (g->p.mb_proc_str) { 14318c2ecf20Sopenharmony_ci g->p.mb_proc = atof(g->p.mb_proc_str); 14328c2ecf20Sopenharmony_ci BUG_ON(g->p.mb_proc < 0); 14338c2ecf20Sopenharmony_ci } 14348c2ecf20Sopenharmony_ci 14358c2ecf20Sopenharmony_ci if (g->p.mb_proc_locked_str) { 14368c2ecf20Sopenharmony_ci g->p.mb_proc_locked = atof(g->p.mb_proc_locked_str); 14378c2ecf20Sopenharmony_ci BUG_ON(g->p.mb_proc_locked < 0); 14388c2ecf20Sopenharmony_ci BUG_ON(g->p.mb_proc_locked > g->p.mb_proc); 14398c2ecf20Sopenharmony_ci } 14408c2ecf20Sopenharmony_ci 14418c2ecf20Sopenharmony_ci if (g->p.mb_thread_str) { 14428c2ecf20Sopenharmony_ci g->p.mb_thread = atof(g->p.mb_thread_str); 14438c2ecf20Sopenharmony_ci BUG_ON(g->p.mb_thread < 0); 14448c2ecf20Sopenharmony_ci } 14458c2ecf20Sopenharmony_ci 14468c2ecf20Sopenharmony_ci BUG_ON(g->p.nr_threads <= 0); 14478c2ecf20Sopenharmony_ci BUG_ON(g->p.nr_proc <= 0); 14488c2ecf20Sopenharmony_ci 14498c2ecf20Sopenharmony_ci g->p.nr_tasks = g->p.nr_proc*g->p.nr_threads; 14508c2ecf20Sopenharmony_ci 14518c2ecf20Sopenharmony_ci g->p.bytes_global = g->p.mb_global *1024L*1024L; 14528c2ecf20Sopenharmony_ci g->p.bytes_process = g->p.mb_proc *1024L*1024L; 14538c2ecf20Sopenharmony_ci g->p.bytes_process_locked = g->p.mb_proc_locked *1024L*1024L; 14548c2ecf20Sopenharmony_ci g->p.bytes_thread = g->p.mb_thread *1024L*1024L; 14558c2ecf20Sopenharmony_ci 14568c2ecf20Sopenharmony_ci g->data = setup_shared_data(g->p.bytes_global); 14578c2ecf20Sopenharmony_ci 14588c2ecf20Sopenharmony_ci /* Startup serialization: */ 14598c2ecf20Sopenharmony_ci init_global_mutex(&g->start_work_mutex); 14608c2ecf20Sopenharmony_ci init_global_cond(&g->start_work_cond); 14618c2ecf20Sopenharmony_ci init_global_mutex(&g->startup_mutex); 14628c2ecf20Sopenharmony_ci init_global_cond(&g->startup_cond); 14638c2ecf20Sopenharmony_ci init_global_mutex(&g->stop_work_mutex); 14648c2ecf20Sopenharmony_ci 14658c2ecf20Sopenharmony_ci init_thread_data(); 14668c2ecf20Sopenharmony_ci 14678c2ecf20Sopenharmony_ci tprintf("#\n"); 14688c2ecf20Sopenharmony_ci if (parse_setup_cpu_list() || parse_setup_node_list()) 14698c2ecf20Sopenharmony_ci return -1; 14708c2ecf20Sopenharmony_ci tprintf("#\n"); 14718c2ecf20Sopenharmony_ci 14728c2ecf20Sopenharmony_ci print_summary(); 14738c2ecf20Sopenharmony_ci 14748c2ecf20Sopenharmony_ci return 0; 14758c2ecf20Sopenharmony_ci} 14768c2ecf20Sopenharmony_ci 14778c2ecf20Sopenharmony_cistatic void deinit(void) 14788c2ecf20Sopenharmony_ci{ 14798c2ecf20Sopenharmony_ci free_data(g->data, g->p.bytes_global); 14808c2ecf20Sopenharmony_ci g->data = NULL; 14818c2ecf20Sopenharmony_ci 14828c2ecf20Sopenharmony_ci deinit_thread_data(); 14838c2ecf20Sopenharmony_ci 14848c2ecf20Sopenharmony_ci free_data(g, sizeof(*g)); 14858c2ecf20Sopenharmony_ci g = NULL; 14868c2ecf20Sopenharmony_ci} 14878c2ecf20Sopenharmony_ci 14888c2ecf20Sopenharmony_ci/* 14898c2ecf20Sopenharmony_ci * Print a short or long result, depending on the verbosity setting: 14908c2ecf20Sopenharmony_ci */ 14918c2ecf20Sopenharmony_cistatic void print_res(const char *name, double val, 14928c2ecf20Sopenharmony_ci const char *txt_unit, const char *txt_short, const char *txt_long) 14938c2ecf20Sopenharmony_ci{ 14948c2ecf20Sopenharmony_ci if (!name) 14958c2ecf20Sopenharmony_ci name = "main,"; 14968c2ecf20Sopenharmony_ci 14978c2ecf20Sopenharmony_ci if (!g->p.show_quiet) 14988c2ecf20Sopenharmony_ci printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short); 14998c2ecf20Sopenharmony_ci else 15008c2ecf20Sopenharmony_ci printf(" %14.3f %s\n", val, txt_long); 15018c2ecf20Sopenharmony_ci} 15028c2ecf20Sopenharmony_ci 15038c2ecf20Sopenharmony_cistatic int __bench_numa(const char *name) 15048c2ecf20Sopenharmony_ci{ 15058c2ecf20Sopenharmony_ci struct timeval start, stop, diff; 15068c2ecf20Sopenharmony_ci u64 runtime_ns_min, runtime_ns_sum; 15078c2ecf20Sopenharmony_ci pid_t *pids, pid, wpid; 15088c2ecf20Sopenharmony_ci double delta_runtime; 15098c2ecf20Sopenharmony_ci double runtime_avg; 15108c2ecf20Sopenharmony_ci double runtime_sec_max; 15118c2ecf20Sopenharmony_ci double runtime_sec_min; 15128c2ecf20Sopenharmony_ci int wait_stat; 15138c2ecf20Sopenharmony_ci double bytes; 15148c2ecf20Sopenharmony_ci int i, t, p; 15158c2ecf20Sopenharmony_ci 15168c2ecf20Sopenharmony_ci if (init()) 15178c2ecf20Sopenharmony_ci return -1; 15188c2ecf20Sopenharmony_ci 15198c2ecf20Sopenharmony_ci pids = zalloc(g->p.nr_proc * sizeof(*pids)); 15208c2ecf20Sopenharmony_ci pid = -1; 15218c2ecf20Sopenharmony_ci 15228c2ecf20Sopenharmony_ci if (g->p.serialize_startup) { 15238c2ecf20Sopenharmony_ci tprintf(" #\n"); 15248c2ecf20Sopenharmony_ci tprintf(" # Startup synchronization: ..."); fflush(stdout); 15258c2ecf20Sopenharmony_ci } 15268c2ecf20Sopenharmony_ci 15278c2ecf20Sopenharmony_ci gettimeofday(&start, NULL); 15288c2ecf20Sopenharmony_ci 15298c2ecf20Sopenharmony_ci for (i = 0; i < g->p.nr_proc; i++) { 15308c2ecf20Sopenharmony_ci pid = fork(); 15318c2ecf20Sopenharmony_ci dprintf(" # process %2d: PID %d\n", i, pid); 15328c2ecf20Sopenharmony_ci 15338c2ecf20Sopenharmony_ci BUG_ON(pid < 0); 15348c2ecf20Sopenharmony_ci if (!pid) { 15358c2ecf20Sopenharmony_ci /* Child process: */ 15368c2ecf20Sopenharmony_ci worker_process(i); 15378c2ecf20Sopenharmony_ci 15388c2ecf20Sopenharmony_ci exit(0); 15398c2ecf20Sopenharmony_ci } 15408c2ecf20Sopenharmony_ci pids[i] = pid; 15418c2ecf20Sopenharmony_ci 15428c2ecf20Sopenharmony_ci } 15438c2ecf20Sopenharmony_ci 15448c2ecf20Sopenharmony_ci if (g->p.serialize_startup) { 15458c2ecf20Sopenharmony_ci bool threads_ready = false; 15468c2ecf20Sopenharmony_ci double startup_sec; 15478c2ecf20Sopenharmony_ci 15488c2ecf20Sopenharmony_ci /* 15498c2ecf20Sopenharmony_ci * Wait for all the threads to start up. The last thread will 15508c2ecf20Sopenharmony_ci * signal this process. 15518c2ecf20Sopenharmony_ci */ 15528c2ecf20Sopenharmony_ci pthread_mutex_lock(&g->startup_mutex); 15538c2ecf20Sopenharmony_ci while (g->nr_tasks_started != g->p.nr_tasks) 15548c2ecf20Sopenharmony_ci pthread_cond_wait(&g->startup_cond, &g->startup_mutex); 15558c2ecf20Sopenharmony_ci 15568c2ecf20Sopenharmony_ci pthread_mutex_unlock(&g->startup_mutex); 15578c2ecf20Sopenharmony_ci 15588c2ecf20Sopenharmony_ci /* Wait for all threads to be at the start_work_cond. */ 15598c2ecf20Sopenharmony_ci while (!threads_ready) { 15608c2ecf20Sopenharmony_ci pthread_mutex_lock(&g->start_work_mutex); 15618c2ecf20Sopenharmony_ci threads_ready = (g->nr_tasks_working == g->p.nr_tasks); 15628c2ecf20Sopenharmony_ci pthread_mutex_unlock(&g->start_work_mutex); 15638c2ecf20Sopenharmony_ci if (!threads_ready) 15648c2ecf20Sopenharmony_ci usleep(1); 15658c2ecf20Sopenharmony_ci } 15668c2ecf20Sopenharmony_ci 15678c2ecf20Sopenharmony_ci gettimeofday(&stop, NULL); 15688c2ecf20Sopenharmony_ci 15698c2ecf20Sopenharmony_ci timersub(&stop, &start, &diff); 15708c2ecf20Sopenharmony_ci 15718c2ecf20Sopenharmony_ci startup_sec = diff.tv_sec * NSEC_PER_SEC; 15728c2ecf20Sopenharmony_ci startup_sec += diff.tv_usec * NSEC_PER_USEC; 15738c2ecf20Sopenharmony_ci startup_sec /= NSEC_PER_SEC; 15748c2ecf20Sopenharmony_ci 15758c2ecf20Sopenharmony_ci tprintf(" threads initialized in %.6f seconds.\n", startup_sec); 15768c2ecf20Sopenharmony_ci tprintf(" #\n"); 15778c2ecf20Sopenharmony_ci 15788c2ecf20Sopenharmony_ci start = stop; 15798c2ecf20Sopenharmony_ci /* Start all threads running. */ 15808c2ecf20Sopenharmony_ci pthread_mutex_lock(&g->start_work_mutex); 15818c2ecf20Sopenharmony_ci g->start_work = true; 15828c2ecf20Sopenharmony_ci pthread_mutex_unlock(&g->start_work_mutex); 15838c2ecf20Sopenharmony_ci pthread_cond_broadcast(&g->start_work_cond); 15848c2ecf20Sopenharmony_ci } else { 15858c2ecf20Sopenharmony_ci gettimeofday(&start, NULL); 15868c2ecf20Sopenharmony_ci } 15878c2ecf20Sopenharmony_ci 15888c2ecf20Sopenharmony_ci /* Parent process: */ 15898c2ecf20Sopenharmony_ci 15908c2ecf20Sopenharmony_ci 15918c2ecf20Sopenharmony_ci for (i = 0; i < g->p.nr_proc; i++) { 15928c2ecf20Sopenharmony_ci wpid = waitpid(pids[i], &wait_stat, 0); 15938c2ecf20Sopenharmony_ci BUG_ON(wpid < 0); 15948c2ecf20Sopenharmony_ci BUG_ON(!WIFEXITED(wait_stat)); 15958c2ecf20Sopenharmony_ci 15968c2ecf20Sopenharmony_ci } 15978c2ecf20Sopenharmony_ci 15988c2ecf20Sopenharmony_ci runtime_ns_sum = 0; 15998c2ecf20Sopenharmony_ci runtime_ns_min = -1LL; 16008c2ecf20Sopenharmony_ci 16018c2ecf20Sopenharmony_ci for (t = 0; t < g->p.nr_tasks; t++) { 16028c2ecf20Sopenharmony_ci u64 thread_runtime_ns = g->threads[t].runtime_ns; 16038c2ecf20Sopenharmony_ci 16048c2ecf20Sopenharmony_ci runtime_ns_sum += thread_runtime_ns; 16058c2ecf20Sopenharmony_ci runtime_ns_min = min(thread_runtime_ns, runtime_ns_min); 16068c2ecf20Sopenharmony_ci } 16078c2ecf20Sopenharmony_ci 16088c2ecf20Sopenharmony_ci gettimeofday(&stop, NULL); 16098c2ecf20Sopenharmony_ci timersub(&stop, &start, &diff); 16108c2ecf20Sopenharmony_ci 16118c2ecf20Sopenharmony_ci BUG_ON(bench_format != BENCH_FORMAT_DEFAULT); 16128c2ecf20Sopenharmony_ci 16138c2ecf20Sopenharmony_ci tprintf("\n ###\n"); 16148c2ecf20Sopenharmony_ci tprintf("\n"); 16158c2ecf20Sopenharmony_ci 16168c2ecf20Sopenharmony_ci runtime_sec_max = diff.tv_sec * NSEC_PER_SEC; 16178c2ecf20Sopenharmony_ci runtime_sec_max += diff.tv_usec * NSEC_PER_USEC; 16188c2ecf20Sopenharmony_ci runtime_sec_max /= NSEC_PER_SEC; 16198c2ecf20Sopenharmony_ci 16208c2ecf20Sopenharmony_ci runtime_sec_min = runtime_ns_min / NSEC_PER_SEC; 16218c2ecf20Sopenharmony_ci 16228c2ecf20Sopenharmony_ci bytes = g->bytes_done; 16238c2ecf20Sopenharmony_ci runtime_avg = (double)runtime_ns_sum / g->p.nr_tasks / NSEC_PER_SEC; 16248c2ecf20Sopenharmony_ci 16258c2ecf20Sopenharmony_ci if (g->p.measure_convergence) { 16268c2ecf20Sopenharmony_ci print_res(name, runtime_sec_max, 16278c2ecf20Sopenharmony_ci "secs,", "NUMA-convergence-latency", "secs latency to NUMA-converge"); 16288c2ecf20Sopenharmony_ci } 16298c2ecf20Sopenharmony_ci 16308c2ecf20Sopenharmony_ci print_res(name, runtime_sec_max, 16318c2ecf20Sopenharmony_ci "secs,", "runtime-max/thread", "secs slowest (max) thread-runtime"); 16328c2ecf20Sopenharmony_ci 16338c2ecf20Sopenharmony_ci print_res(name, runtime_sec_min, 16348c2ecf20Sopenharmony_ci "secs,", "runtime-min/thread", "secs fastest (min) thread-runtime"); 16358c2ecf20Sopenharmony_ci 16368c2ecf20Sopenharmony_ci print_res(name, runtime_avg, 16378c2ecf20Sopenharmony_ci "secs,", "runtime-avg/thread", "secs average thread-runtime"); 16388c2ecf20Sopenharmony_ci 16398c2ecf20Sopenharmony_ci delta_runtime = (runtime_sec_max - runtime_sec_min)/2.0; 16408c2ecf20Sopenharmony_ci print_res(name, delta_runtime / runtime_sec_max * 100.0, 16418c2ecf20Sopenharmony_ci "%,", "spread-runtime/thread", "% difference between max/avg runtime"); 16428c2ecf20Sopenharmony_ci 16438c2ecf20Sopenharmony_ci print_res(name, bytes / g->p.nr_tasks / 1e9, 16448c2ecf20Sopenharmony_ci "GB,", "data/thread", "GB data processed, per thread"); 16458c2ecf20Sopenharmony_ci 16468c2ecf20Sopenharmony_ci print_res(name, bytes / 1e9, 16478c2ecf20Sopenharmony_ci "GB,", "data-total", "GB data processed, total"); 16488c2ecf20Sopenharmony_ci 16498c2ecf20Sopenharmony_ci print_res(name, runtime_sec_max * NSEC_PER_SEC / (bytes / g->p.nr_tasks), 16508c2ecf20Sopenharmony_ci "nsecs,", "runtime/byte/thread","nsecs/byte/thread runtime"); 16518c2ecf20Sopenharmony_ci 16528c2ecf20Sopenharmony_ci print_res(name, bytes / g->p.nr_tasks / 1e9 / runtime_sec_max, 16538c2ecf20Sopenharmony_ci "GB/sec,", "thread-speed", "GB/sec/thread speed"); 16548c2ecf20Sopenharmony_ci 16558c2ecf20Sopenharmony_ci print_res(name, bytes / runtime_sec_max / 1e9, 16568c2ecf20Sopenharmony_ci "GB/sec,", "total-speed", "GB/sec total speed"); 16578c2ecf20Sopenharmony_ci 16588c2ecf20Sopenharmony_ci if (g->p.show_details >= 2) { 16598c2ecf20Sopenharmony_ci char tname[14 + 2 * 11 + 1]; 16608c2ecf20Sopenharmony_ci struct thread_data *td; 16618c2ecf20Sopenharmony_ci for (p = 0; p < g->p.nr_proc; p++) { 16628c2ecf20Sopenharmony_ci for (t = 0; t < g->p.nr_threads; t++) { 16638c2ecf20Sopenharmony_ci memset(tname, 0, sizeof(tname)); 16648c2ecf20Sopenharmony_ci td = g->threads + p*g->p.nr_threads + t; 16658c2ecf20Sopenharmony_ci snprintf(tname, sizeof(tname), "process%d:thread%d", p, t); 16668c2ecf20Sopenharmony_ci print_res(tname, td->speed_gbs, 16678c2ecf20Sopenharmony_ci "GB/sec", "thread-speed", "GB/sec/thread speed"); 16688c2ecf20Sopenharmony_ci print_res(tname, td->system_time_ns / NSEC_PER_SEC, 16698c2ecf20Sopenharmony_ci "secs", "thread-system-time", "system CPU time/thread"); 16708c2ecf20Sopenharmony_ci print_res(tname, td->user_time_ns / NSEC_PER_SEC, 16718c2ecf20Sopenharmony_ci "secs", "thread-user-time", "user CPU time/thread"); 16728c2ecf20Sopenharmony_ci } 16738c2ecf20Sopenharmony_ci } 16748c2ecf20Sopenharmony_ci } 16758c2ecf20Sopenharmony_ci 16768c2ecf20Sopenharmony_ci free(pids); 16778c2ecf20Sopenharmony_ci 16788c2ecf20Sopenharmony_ci deinit(); 16798c2ecf20Sopenharmony_ci 16808c2ecf20Sopenharmony_ci return 0; 16818c2ecf20Sopenharmony_ci} 16828c2ecf20Sopenharmony_ci 16838c2ecf20Sopenharmony_ci#define MAX_ARGS 50 16848c2ecf20Sopenharmony_ci 16858c2ecf20Sopenharmony_cistatic int command_size(const char **argv) 16868c2ecf20Sopenharmony_ci{ 16878c2ecf20Sopenharmony_ci int size = 0; 16888c2ecf20Sopenharmony_ci 16898c2ecf20Sopenharmony_ci while (*argv) { 16908c2ecf20Sopenharmony_ci size++; 16918c2ecf20Sopenharmony_ci argv++; 16928c2ecf20Sopenharmony_ci } 16938c2ecf20Sopenharmony_ci 16948c2ecf20Sopenharmony_ci BUG_ON(size >= MAX_ARGS); 16958c2ecf20Sopenharmony_ci 16968c2ecf20Sopenharmony_ci return size; 16978c2ecf20Sopenharmony_ci} 16988c2ecf20Sopenharmony_ci 16998c2ecf20Sopenharmony_cistatic void init_params(struct params *p, const char *name, int argc, const char **argv) 17008c2ecf20Sopenharmony_ci{ 17018c2ecf20Sopenharmony_ci int i; 17028c2ecf20Sopenharmony_ci 17038c2ecf20Sopenharmony_ci printf("\n # Running %s \"perf bench numa", name); 17048c2ecf20Sopenharmony_ci 17058c2ecf20Sopenharmony_ci for (i = 0; i < argc; i++) 17068c2ecf20Sopenharmony_ci printf(" %s", argv[i]); 17078c2ecf20Sopenharmony_ci 17088c2ecf20Sopenharmony_ci printf("\"\n"); 17098c2ecf20Sopenharmony_ci 17108c2ecf20Sopenharmony_ci memset(p, 0, sizeof(*p)); 17118c2ecf20Sopenharmony_ci 17128c2ecf20Sopenharmony_ci /* Initialize nonzero defaults: */ 17138c2ecf20Sopenharmony_ci 17148c2ecf20Sopenharmony_ci p->serialize_startup = 1; 17158c2ecf20Sopenharmony_ci p->data_reads = true; 17168c2ecf20Sopenharmony_ci p->data_writes = true; 17178c2ecf20Sopenharmony_ci p->data_backwards = true; 17188c2ecf20Sopenharmony_ci p->data_rand_walk = true; 17198c2ecf20Sopenharmony_ci p->nr_loops = -1; 17208c2ecf20Sopenharmony_ci p->init_random = true; 17218c2ecf20Sopenharmony_ci p->mb_global_str = "1"; 17228c2ecf20Sopenharmony_ci p->nr_proc = 1; 17238c2ecf20Sopenharmony_ci p->nr_threads = 1; 17248c2ecf20Sopenharmony_ci p->nr_secs = 5; 17258c2ecf20Sopenharmony_ci p->run_all = argc == 1; 17268c2ecf20Sopenharmony_ci} 17278c2ecf20Sopenharmony_ci 17288c2ecf20Sopenharmony_cistatic int run_bench_numa(const char *name, const char **argv) 17298c2ecf20Sopenharmony_ci{ 17308c2ecf20Sopenharmony_ci int argc = command_size(argv); 17318c2ecf20Sopenharmony_ci 17328c2ecf20Sopenharmony_ci init_params(&p0, name, argc, argv); 17338c2ecf20Sopenharmony_ci argc = parse_options(argc, argv, options, bench_numa_usage, 0); 17348c2ecf20Sopenharmony_ci if (argc) 17358c2ecf20Sopenharmony_ci goto err; 17368c2ecf20Sopenharmony_ci 17378c2ecf20Sopenharmony_ci if (__bench_numa(name)) 17388c2ecf20Sopenharmony_ci goto err; 17398c2ecf20Sopenharmony_ci 17408c2ecf20Sopenharmony_ci return 0; 17418c2ecf20Sopenharmony_ci 17428c2ecf20Sopenharmony_cierr: 17438c2ecf20Sopenharmony_ci return -1; 17448c2ecf20Sopenharmony_ci} 17458c2ecf20Sopenharmony_ci 17468c2ecf20Sopenharmony_ci#define OPT_BW_RAM "-s", "20", "-zZq", "--thp", " 1", "--no-data_rand_walk" 17478c2ecf20Sopenharmony_ci#define OPT_BW_RAM_NOTHP OPT_BW_RAM, "--thp", "-1" 17488c2ecf20Sopenharmony_ci 17498c2ecf20Sopenharmony_ci#define OPT_CONV "-s", "100", "-zZ0qcm", "--thp", " 1" 17508c2ecf20Sopenharmony_ci#define OPT_CONV_NOTHP OPT_CONV, "--thp", "-1" 17518c2ecf20Sopenharmony_ci 17528c2ecf20Sopenharmony_ci#define OPT_BW "-s", "20", "-zZ0q", "--thp", " 1" 17538c2ecf20Sopenharmony_ci#define OPT_BW_NOTHP OPT_BW, "--thp", "-1" 17548c2ecf20Sopenharmony_ci 17558c2ecf20Sopenharmony_ci/* 17568c2ecf20Sopenharmony_ci * The built-in test-suite executed by "perf bench numa -a". 17578c2ecf20Sopenharmony_ci * 17588c2ecf20Sopenharmony_ci * (A minimum of 4 nodes and 16 GB of RAM is recommended.) 17598c2ecf20Sopenharmony_ci */ 17608c2ecf20Sopenharmony_cistatic const char *tests[][MAX_ARGS] = { 17618c2ecf20Sopenharmony_ci /* Basic single-stream NUMA bandwidth measurements: */ 17628c2ecf20Sopenharmony_ci { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024", 17638c2ecf20Sopenharmony_ci "-C" , "0", "-M", "0", OPT_BW_RAM }, 17648c2ecf20Sopenharmony_ci { "RAM-bw-local-NOTHP,", 17658c2ecf20Sopenharmony_ci "mem", "-p", "1", "-t", "1", "-P", "1024", 17668c2ecf20Sopenharmony_ci "-C" , "0", "-M", "0", OPT_BW_RAM_NOTHP }, 17678c2ecf20Sopenharmony_ci { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024", 17688c2ecf20Sopenharmony_ci "-C" , "0", "-M", "1", OPT_BW_RAM }, 17698c2ecf20Sopenharmony_ci 17708c2ecf20Sopenharmony_ci /* 2-stream NUMA bandwidth measurements: */ 17718c2ecf20Sopenharmony_ci { "RAM-bw-local-2x,", "mem", "-p", "2", "-t", "1", "-P", "1024", 17728c2ecf20Sopenharmony_ci "-C", "0,2", "-M", "0x2", OPT_BW_RAM }, 17738c2ecf20Sopenharmony_ci { "RAM-bw-remote-2x,", "mem", "-p", "2", "-t", "1", "-P", "1024", 17748c2ecf20Sopenharmony_ci "-C", "0,2", "-M", "1x2", OPT_BW_RAM }, 17758c2ecf20Sopenharmony_ci 17768c2ecf20Sopenharmony_ci /* Cross-stream NUMA bandwidth measurement: */ 17778c2ecf20Sopenharmony_ci { "RAM-bw-cross,", "mem", "-p", "2", "-t", "1", "-P", "1024", 17788c2ecf20Sopenharmony_ci "-C", "0,8", "-M", "1,0", OPT_BW_RAM }, 17798c2ecf20Sopenharmony_ci 17808c2ecf20Sopenharmony_ci /* Convergence latency measurements: */ 17818c2ecf20Sopenharmony_ci { " 1x3-convergence,", "mem", "-p", "1", "-t", "3", "-P", "512", OPT_CONV }, 17828c2ecf20Sopenharmony_ci { " 1x4-convergence,", "mem", "-p", "1", "-t", "4", "-P", "512", OPT_CONV }, 17838c2ecf20Sopenharmony_ci { " 1x6-convergence,", "mem", "-p", "1", "-t", "6", "-P", "1020", OPT_CONV }, 17848c2ecf20Sopenharmony_ci { " 2x3-convergence,", "mem", "-p", "2", "-t", "3", "-P", "1020", OPT_CONV }, 17858c2ecf20Sopenharmony_ci { " 3x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV }, 17868c2ecf20Sopenharmony_ci { " 4x4-convergence,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV }, 17878c2ecf20Sopenharmony_ci { " 4x4-convergence-NOTHP,", 17888c2ecf20Sopenharmony_ci "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV_NOTHP }, 17898c2ecf20Sopenharmony_ci { " 4x6-convergence,", "mem", "-p", "4", "-t", "6", "-P", "1020", OPT_CONV }, 17908c2ecf20Sopenharmony_ci { " 4x8-convergence,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_CONV }, 17918c2ecf20Sopenharmony_ci { " 8x4-convergence,", "mem", "-p", "8", "-t", "4", "-P", "512", OPT_CONV }, 17928c2ecf20Sopenharmony_ci { " 8x4-convergence-NOTHP,", 17938c2ecf20Sopenharmony_ci "mem", "-p", "8", "-t", "4", "-P", "512", OPT_CONV_NOTHP }, 17948c2ecf20Sopenharmony_ci { " 3x1-convergence,", "mem", "-p", "3", "-t", "1", "-P", "512", OPT_CONV }, 17958c2ecf20Sopenharmony_ci { " 4x1-convergence,", "mem", "-p", "4", "-t", "1", "-P", "512", OPT_CONV }, 17968c2ecf20Sopenharmony_ci { " 8x1-convergence,", "mem", "-p", "8", "-t", "1", "-P", "512", OPT_CONV }, 17978c2ecf20Sopenharmony_ci { "16x1-convergence,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_CONV }, 17988c2ecf20Sopenharmony_ci { "32x1-convergence,", "mem", "-p", "32", "-t", "1", "-P", "128", OPT_CONV }, 17998c2ecf20Sopenharmony_ci 18008c2ecf20Sopenharmony_ci /* Various NUMA process/thread layout bandwidth measurements: */ 18018c2ecf20Sopenharmony_ci { " 2x1-bw-process,", "mem", "-p", "2", "-t", "1", "-P", "1024", OPT_BW }, 18028c2ecf20Sopenharmony_ci { " 3x1-bw-process,", "mem", "-p", "3", "-t", "1", "-P", "1024", OPT_BW }, 18038c2ecf20Sopenharmony_ci { " 4x1-bw-process,", "mem", "-p", "4", "-t", "1", "-P", "1024", OPT_BW }, 18048c2ecf20Sopenharmony_ci { " 8x1-bw-process,", "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW }, 18058c2ecf20Sopenharmony_ci { " 8x1-bw-process-NOTHP,", 18068c2ecf20Sopenharmony_ci "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW_NOTHP }, 18078c2ecf20Sopenharmony_ci { "16x1-bw-process,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_BW }, 18088c2ecf20Sopenharmony_ci 18098c2ecf20Sopenharmony_ci { " 1x4-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW }, 18108c2ecf20Sopenharmony_ci { " 1x8-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW }, 18118c2ecf20Sopenharmony_ci { "1x16-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW }, 18128c2ecf20Sopenharmony_ci { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW }, 18138c2ecf20Sopenharmony_ci 18148c2ecf20Sopenharmony_ci { " 2x3-bw-process,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW }, 18158c2ecf20Sopenharmony_ci { " 4x4-bw-process,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW }, 18168c2ecf20Sopenharmony_ci { " 4x6-bw-process,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW }, 18178c2ecf20Sopenharmony_ci { " 4x8-bw-process,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW }, 18188c2ecf20Sopenharmony_ci { " 4x8-bw-process-NOTHP,", 18198c2ecf20Sopenharmony_ci "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW_NOTHP }, 18208c2ecf20Sopenharmony_ci { " 3x3-bw-process,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW }, 18218c2ecf20Sopenharmony_ci { " 5x5-bw-process,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW }, 18228c2ecf20Sopenharmony_ci 18238c2ecf20Sopenharmony_ci { "2x16-bw-process,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW }, 18248c2ecf20Sopenharmony_ci { "1x32-bw-process,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW }, 18258c2ecf20Sopenharmony_ci 18268c2ecf20Sopenharmony_ci { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW }, 18278c2ecf20Sopenharmony_ci { "numa02-bw-NOTHP,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW_NOTHP }, 18288c2ecf20Sopenharmony_ci { "numa01-bw-thread,", "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW }, 18298c2ecf20Sopenharmony_ci { "numa01-bw-thread-NOTHP,", 18308c2ecf20Sopenharmony_ci "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW_NOTHP }, 18318c2ecf20Sopenharmony_ci}; 18328c2ecf20Sopenharmony_ci 18338c2ecf20Sopenharmony_cistatic int bench_all(void) 18348c2ecf20Sopenharmony_ci{ 18358c2ecf20Sopenharmony_ci int nr = ARRAY_SIZE(tests); 18368c2ecf20Sopenharmony_ci int ret; 18378c2ecf20Sopenharmony_ci int i; 18388c2ecf20Sopenharmony_ci 18398c2ecf20Sopenharmony_ci ret = system("echo ' #'; echo ' # Running test on: '$(uname -a); echo ' #'"); 18408c2ecf20Sopenharmony_ci BUG_ON(ret < 0); 18418c2ecf20Sopenharmony_ci 18428c2ecf20Sopenharmony_ci for (i = 0; i < nr; i++) { 18438c2ecf20Sopenharmony_ci run_bench_numa(tests[i][0], tests[i] + 1); 18448c2ecf20Sopenharmony_ci } 18458c2ecf20Sopenharmony_ci 18468c2ecf20Sopenharmony_ci printf("\n"); 18478c2ecf20Sopenharmony_ci 18488c2ecf20Sopenharmony_ci return 0; 18498c2ecf20Sopenharmony_ci} 18508c2ecf20Sopenharmony_ci 18518c2ecf20Sopenharmony_ciint bench_numa(int argc, const char **argv) 18528c2ecf20Sopenharmony_ci{ 18538c2ecf20Sopenharmony_ci init_params(&p0, "main,", argc, argv); 18548c2ecf20Sopenharmony_ci argc = parse_options(argc, argv, options, bench_numa_usage, 0); 18558c2ecf20Sopenharmony_ci if (argc) 18568c2ecf20Sopenharmony_ci goto err; 18578c2ecf20Sopenharmony_ci 18588c2ecf20Sopenharmony_ci if (p0.run_all) 18598c2ecf20Sopenharmony_ci return bench_all(); 18608c2ecf20Sopenharmony_ci 18618c2ecf20Sopenharmony_ci if (__bench_numa(NULL)) 18628c2ecf20Sopenharmony_ci goto err; 18638c2ecf20Sopenharmony_ci 18648c2ecf20Sopenharmony_ci return 0; 18658c2ecf20Sopenharmony_ci 18668c2ecf20Sopenharmony_cierr: 18678c2ecf20Sopenharmony_ci usage_with_options(numa_usage, options); 18688c2ecf20Sopenharmony_ci return -1; 18698c2ecf20Sopenharmony_ci} 1870