162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * numa.c 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * numa: Simulate NUMA-sensitive workload and measure their NUMA performance 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include <inttypes.h> 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#include <subcmd/parse-options.h> 1162306a36Sopenharmony_ci#include "../util/cloexec.h" 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci#include "bench.h" 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci#include <errno.h> 1662306a36Sopenharmony_ci#include <sched.h> 1762306a36Sopenharmony_ci#include <stdio.h> 1862306a36Sopenharmony_ci#include <assert.h> 1962306a36Sopenharmony_ci#include <debug.h> 2062306a36Sopenharmony_ci#include <malloc.h> 2162306a36Sopenharmony_ci#include <signal.h> 2262306a36Sopenharmony_ci#include <stdlib.h> 2362306a36Sopenharmony_ci#include <string.h> 2462306a36Sopenharmony_ci#include <unistd.h> 2562306a36Sopenharmony_ci#include <sys/mman.h> 2662306a36Sopenharmony_ci#include <sys/time.h> 2762306a36Sopenharmony_ci#include <sys/resource.h> 2862306a36Sopenharmony_ci#include <sys/wait.h> 2962306a36Sopenharmony_ci#include <sys/prctl.h> 3062306a36Sopenharmony_ci#include <sys/types.h> 3162306a36Sopenharmony_ci#include <linux/kernel.h> 3262306a36Sopenharmony_ci#include <linux/time64.h> 3362306a36Sopenharmony_ci#include <linux/numa.h> 3462306a36Sopenharmony_ci#include <linux/zalloc.h> 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_ci#include "../util/header.h" 3762306a36Sopenharmony_ci#include "../util/mutex.h" 3862306a36Sopenharmony_ci#include <numa.h> 3962306a36Sopenharmony_ci#include <numaif.h> 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci#ifndef RUSAGE_THREAD 4262306a36Sopenharmony_ci# define RUSAGE_THREAD 1 4362306a36Sopenharmony_ci#endif 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ci/* 4662306a36Sopenharmony_ci * Regular printout to the terminal, suppressed if -q is specified: 4762306a36Sopenharmony_ci */ 4862306a36Sopenharmony_ci#define tprintf(x...) do { if (g && g->p.show_details >= 0) printf(x); } while (0) 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci/* 5162306a36Sopenharmony_ci * Debug printf: 5262306a36Sopenharmony_ci */ 5362306a36Sopenharmony_ci#undef dprintf 5462306a36Sopenharmony_ci#define dprintf(x...) do { if (g && g->p.show_details >= 1) printf(x); } while (0) 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_cistruct thread_data { 5762306a36Sopenharmony_ci int curr_cpu; 5862306a36Sopenharmony_ci cpu_set_t *bind_cpumask; 5962306a36Sopenharmony_ci int bind_node; 6062306a36Sopenharmony_ci u8 *process_data; 6162306a36Sopenharmony_ci int process_nr; 6262306a36Sopenharmony_ci int thread_nr; 6362306a36Sopenharmony_ci int task_nr; 6462306a36Sopenharmony_ci unsigned int loops_done; 6562306a36Sopenharmony_ci u64 val; 6662306a36Sopenharmony_ci u64 runtime_ns; 6762306a36Sopenharmony_ci u64 system_time_ns; 6862306a36Sopenharmony_ci u64 user_time_ns; 6962306a36Sopenharmony_ci double speed_gbs; 7062306a36Sopenharmony_ci struct mutex *process_lock; 7162306a36Sopenharmony_ci}; 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ci/* Parameters set by options: */ 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_cistruct params { 7662306a36Sopenharmony_ci /* Startup synchronization: */ 7762306a36Sopenharmony_ci bool serialize_startup; 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci /* Task hierarchy: */ 8062306a36Sopenharmony_ci int nr_proc; 8162306a36Sopenharmony_ci int nr_threads; 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci /* Working set sizes: */ 8462306a36Sopenharmony_ci const char *mb_global_str; 8562306a36Sopenharmony_ci const char *mb_proc_str; 8662306a36Sopenharmony_ci const char *mb_proc_locked_str; 8762306a36Sopenharmony_ci const char *mb_thread_str; 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_ci double mb_global; 9062306a36Sopenharmony_ci double mb_proc; 9162306a36Sopenharmony_ci double mb_proc_locked; 9262306a36Sopenharmony_ci double mb_thread; 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci /* Access patterns to the working set: */ 9562306a36Sopenharmony_ci bool data_reads; 9662306a36Sopenharmony_ci bool data_writes; 9762306a36Sopenharmony_ci bool data_backwards; 9862306a36Sopenharmony_ci bool data_zero_memset; 9962306a36Sopenharmony_ci bool data_rand_walk; 10062306a36Sopenharmony_ci u32 nr_loops; 10162306a36Sopenharmony_ci u32 nr_secs; 10262306a36Sopenharmony_ci u32 sleep_usecs; 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci /* Working set initialization: */ 10562306a36Sopenharmony_ci bool init_zero; 10662306a36Sopenharmony_ci bool init_random; 10762306a36Sopenharmony_ci bool init_cpu0; 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci /* Misc options: */ 11062306a36Sopenharmony_ci int show_details; 11162306a36Sopenharmony_ci int run_all; 11262306a36Sopenharmony_ci int thp; 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci long bytes_global; 11562306a36Sopenharmony_ci long bytes_process; 11662306a36Sopenharmony_ci long bytes_process_locked; 11762306a36Sopenharmony_ci long bytes_thread; 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_ci int nr_tasks; 12062306a36Sopenharmony_ci 12162306a36Sopenharmony_ci bool show_convergence; 12262306a36Sopenharmony_ci bool measure_convergence; 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci int perturb_secs; 12562306a36Sopenharmony_ci int nr_cpus; 12662306a36Sopenharmony_ci int nr_nodes; 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci /* Affinity options -C and -N: */ 12962306a36Sopenharmony_ci char *cpu_list_str; 13062306a36Sopenharmony_ci char *node_list_str; 13162306a36Sopenharmony_ci}; 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ci/* Global, read-writable area, accessible to all processes and threads: */ 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_cistruct global_info { 13762306a36Sopenharmony_ci u8 *data; 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci struct mutex startup_mutex; 14062306a36Sopenharmony_ci struct cond startup_cond; 14162306a36Sopenharmony_ci int nr_tasks_started; 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_ci struct mutex start_work_mutex; 14462306a36Sopenharmony_ci struct cond start_work_cond; 14562306a36Sopenharmony_ci int nr_tasks_working; 14662306a36Sopenharmony_ci bool start_work; 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci struct mutex stop_work_mutex; 14962306a36Sopenharmony_ci u64 bytes_done; 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci struct thread_data *threads; 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ci /* Convergence latency measurement: */ 15462306a36Sopenharmony_ci bool all_converged; 15562306a36Sopenharmony_ci bool stop_work; 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci int print_once; 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci struct params p; 16062306a36Sopenharmony_ci}; 16162306a36Sopenharmony_ci 16262306a36Sopenharmony_cistatic struct global_info *g = NULL; 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_cistatic int parse_cpus_opt(const struct option *opt, const char *arg, int unset); 16562306a36Sopenharmony_cistatic int parse_nodes_opt(const struct option *opt, const char *arg, int unset); 16662306a36Sopenharmony_ci 16762306a36Sopenharmony_cistruct params p0; 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_cistatic const struct option options[] = { 17062306a36Sopenharmony_ci OPT_INTEGER('p', "nr_proc" , &p0.nr_proc, "number of processes"), 17162306a36Sopenharmony_ci OPT_INTEGER('t', "nr_threads" , &p0.nr_threads, "number of threads per process"), 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci OPT_STRING('G', "mb_global" , &p0.mb_global_str, "MB", "global memory (MBs)"), 17462306a36Sopenharmony_ci OPT_STRING('P', "mb_proc" , &p0.mb_proc_str, "MB", "process memory (MBs)"), 17562306a36Sopenharmony_ci OPT_STRING('L', "mb_proc_locked", &p0.mb_proc_locked_str,"MB", "process serialized/locked memory access (MBs), <= process_memory"), 17662306a36Sopenharmony_ci OPT_STRING('T', "mb_thread" , &p0.mb_thread_str, "MB", "thread memory (MBs)"), 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci OPT_UINTEGER('l', "nr_loops" , &p0.nr_loops, "max number of loops to run (default: unlimited)"), 17962306a36Sopenharmony_ci OPT_UINTEGER('s', "nr_secs" , &p0.nr_secs, "max number of seconds to run (default: 5 secs)"), 18062306a36Sopenharmony_ci OPT_UINTEGER('u', "usleep" , &p0.sleep_usecs, "usecs to sleep per loop iteration"), 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci OPT_BOOLEAN('R', "data_reads" , &p0.data_reads, "access the data via reads (can be mixed with -W)"), 18362306a36Sopenharmony_ci OPT_BOOLEAN('W', "data_writes" , &p0.data_writes, "access the data via writes (can be mixed with -R)"), 18462306a36Sopenharmony_ci OPT_BOOLEAN('B', "data_backwards", &p0.data_backwards, "access the data backwards as well"), 18562306a36Sopenharmony_ci OPT_BOOLEAN('Z', "data_zero_memset", &p0.data_zero_memset,"access the data via glibc bzero only"), 18662306a36Sopenharmony_ci OPT_BOOLEAN('r', "data_rand_walk", &p0.data_rand_walk, "access the data with random (32bit LFSR) walk"), 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci OPT_BOOLEAN('z', "init_zero" , &p0.init_zero, "bzero the initial allocations"), 19062306a36Sopenharmony_ci OPT_BOOLEAN('I', "init_random" , &p0.init_random, "randomize the contents of the initial allocations"), 19162306a36Sopenharmony_ci OPT_BOOLEAN('0', "init_cpu0" , &p0.init_cpu0, "do the initial allocations on CPU#0"), 19262306a36Sopenharmony_ci OPT_INTEGER('x', "perturb_secs", &p0.perturb_secs, "perturb thread 0/0 every X secs, to test convergence stability"), 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci OPT_INCR ('d', "show_details" , &p0.show_details, "Show details"), 19562306a36Sopenharmony_ci OPT_INCR ('a', "all" , &p0.run_all, "Run all tests in the suite"), 19662306a36Sopenharmony_ci OPT_INTEGER('H', "thp" , &p0.thp, "MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"), 19762306a36Sopenharmony_ci OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details, " 19862306a36Sopenharmony_ci "convergence is reached when each process (all its threads) is running on a single NUMA node."), 19962306a36Sopenharmony_ci OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"), 20062306a36Sopenharmony_ci OPT_BOOLEAN('q', "quiet" , &quiet, 20162306a36Sopenharmony_ci "quiet mode (do not show any warnings or messages)"), 20262306a36Sopenharmony_ci OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"), 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci /* Special option string parsing callbacks: */ 20562306a36Sopenharmony_ci OPT_CALLBACK('C', "cpus", NULL, "cpu[,cpu2,...cpuN]", 20662306a36Sopenharmony_ci "bind the first N tasks to these specific cpus (the rest is unbound)", 20762306a36Sopenharmony_ci parse_cpus_opt), 20862306a36Sopenharmony_ci OPT_CALLBACK('M', "memnodes", NULL, "node[,node2,...nodeN]", 20962306a36Sopenharmony_ci "bind the first N tasks to these specific memory nodes (the rest is unbound)", 21062306a36Sopenharmony_ci parse_nodes_opt), 21162306a36Sopenharmony_ci OPT_END() 21262306a36Sopenharmony_ci}; 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_cistatic const char * const bench_numa_usage[] = { 21562306a36Sopenharmony_ci "perf bench numa <options>", 21662306a36Sopenharmony_ci NULL 21762306a36Sopenharmony_ci}; 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_cistatic const char * const numa_usage[] = { 22062306a36Sopenharmony_ci "perf bench numa mem [<options>]", 22162306a36Sopenharmony_ci NULL 22262306a36Sopenharmony_ci}; 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci/* 22562306a36Sopenharmony_ci * To get number of numa nodes present. 22662306a36Sopenharmony_ci */ 22762306a36Sopenharmony_cistatic int nr_numa_nodes(void) 22862306a36Sopenharmony_ci{ 22962306a36Sopenharmony_ci int i, nr_nodes = 0; 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci for (i = 0; i < g->p.nr_nodes; i++) { 23262306a36Sopenharmony_ci if (numa_bitmask_isbitset(numa_nodes_ptr, i)) 23362306a36Sopenharmony_ci nr_nodes++; 23462306a36Sopenharmony_ci } 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci return nr_nodes; 23762306a36Sopenharmony_ci} 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci/* 24062306a36Sopenharmony_ci * To check if given numa node is present. 24162306a36Sopenharmony_ci */ 24262306a36Sopenharmony_cistatic int is_node_present(int node) 24362306a36Sopenharmony_ci{ 24462306a36Sopenharmony_ci return numa_bitmask_isbitset(numa_nodes_ptr, node); 24562306a36Sopenharmony_ci} 24662306a36Sopenharmony_ci 24762306a36Sopenharmony_ci/* 24862306a36Sopenharmony_ci * To check given numa node has cpus. 24962306a36Sopenharmony_ci */ 25062306a36Sopenharmony_cistatic bool node_has_cpus(int node) 25162306a36Sopenharmony_ci{ 25262306a36Sopenharmony_ci struct bitmask *cpumask = numa_allocate_cpumask(); 25362306a36Sopenharmony_ci bool ret = false; /* fall back to nocpus */ 25462306a36Sopenharmony_ci int cpu; 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ci BUG_ON(!cpumask); 25762306a36Sopenharmony_ci if (!numa_node_to_cpus(node, cpumask)) { 25862306a36Sopenharmony_ci for (cpu = 0; cpu < (int)cpumask->size; cpu++) { 25962306a36Sopenharmony_ci if (numa_bitmask_isbitset(cpumask, cpu)) { 26062306a36Sopenharmony_ci ret = true; 26162306a36Sopenharmony_ci break; 26262306a36Sopenharmony_ci } 26362306a36Sopenharmony_ci } 26462306a36Sopenharmony_ci } 26562306a36Sopenharmony_ci numa_free_cpumask(cpumask); 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci return ret; 26862306a36Sopenharmony_ci} 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_cistatic cpu_set_t *bind_to_cpu(int target_cpu) 27162306a36Sopenharmony_ci{ 27262306a36Sopenharmony_ci int nrcpus = numa_num_possible_cpus(); 27362306a36Sopenharmony_ci cpu_set_t *orig_mask, *mask; 27462306a36Sopenharmony_ci size_t size; 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ci orig_mask = CPU_ALLOC(nrcpus); 27762306a36Sopenharmony_ci BUG_ON(!orig_mask); 27862306a36Sopenharmony_ci size = CPU_ALLOC_SIZE(nrcpus); 27962306a36Sopenharmony_ci CPU_ZERO_S(size, orig_mask); 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci if (sched_getaffinity(0, size, orig_mask)) 28262306a36Sopenharmony_ci goto err_out; 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci mask = CPU_ALLOC(nrcpus); 28562306a36Sopenharmony_ci if (!mask) 28662306a36Sopenharmony_ci goto err_out; 28762306a36Sopenharmony_ci 28862306a36Sopenharmony_ci CPU_ZERO_S(size, mask); 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci if (target_cpu == -1) { 29162306a36Sopenharmony_ci int cpu; 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci for (cpu = 0; cpu < g->p.nr_cpus; cpu++) 29462306a36Sopenharmony_ci CPU_SET_S(cpu, size, mask); 29562306a36Sopenharmony_ci } else { 29662306a36Sopenharmony_ci if (target_cpu < 0 || target_cpu >= g->p.nr_cpus) 29762306a36Sopenharmony_ci goto err; 29862306a36Sopenharmony_ci 29962306a36Sopenharmony_ci CPU_SET_S(target_cpu, size, mask); 30062306a36Sopenharmony_ci } 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci if (sched_setaffinity(0, size, mask)) 30362306a36Sopenharmony_ci goto err; 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci return orig_mask; 30662306a36Sopenharmony_ci 30762306a36Sopenharmony_cierr: 30862306a36Sopenharmony_ci CPU_FREE(mask); 30962306a36Sopenharmony_cierr_out: 31062306a36Sopenharmony_ci CPU_FREE(orig_mask); 31162306a36Sopenharmony_ci 31262306a36Sopenharmony_ci /* BUG_ON due to failure in allocation of orig_mask/mask */ 31362306a36Sopenharmony_ci BUG_ON(-1); 31462306a36Sopenharmony_ci return NULL; 31562306a36Sopenharmony_ci} 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_cistatic cpu_set_t *bind_to_node(int target_node) 31862306a36Sopenharmony_ci{ 31962306a36Sopenharmony_ci int nrcpus = numa_num_possible_cpus(); 32062306a36Sopenharmony_ci size_t size; 32162306a36Sopenharmony_ci cpu_set_t *orig_mask, *mask; 32262306a36Sopenharmony_ci int cpu; 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_ci orig_mask = CPU_ALLOC(nrcpus); 32562306a36Sopenharmony_ci BUG_ON(!orig_mask); 32662306a36Sopenharmony_ci size = CPU_ALLOC_SIZE(nrcpus); 32762306a36Sopenharmony_ci CPU_ZERO_S(size, orig_mask); 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_ci if (sched_getaffinity(0, size, orig_mask)) 33062306a36Sopenharmony_ci goto err_out; 33162306a36Sopenharmony_ci 33262306a36Sopenharmony_ci mask = CPU_ALLOC(nrcpus); 33362306a36Sopenharmony_ci if (!mask) 33462306a36Sopenharmony_ci goto err_out; 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ci CPU_ZERO_S(size, mask); 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci if (target_node == NUMA_NO_NODE) { 33962306a36Sopenharmony_ci for (cpu = 0; cpu < g->p.nr_cpus; cpu++) 34062306a36Sopenharmony_ci CPU_SET_S(cpu, size, mask); 34162306a36Sopenharmony_ci } else { 34262306a36Sopenharmony_ci struct bitmask *cpumask = numa_allocate_cpumask(); 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci if (!cpumask) 34562306a36Sopenharmony_ci goto err; 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_ci if (!numa_node_to_cpus(target_node, cpumask)) { 34862306a36Sopenharmony_ci for (cpu = 0; cpu < (int)cpumask->size; cpu++) { 34962306a36Sopenharmony_ci if (numa_bitmask_isbitset(cpumask, cpu)) 35062306a36Sopenharmony_ci CPU_SET_S(cpu, size, mask); 35162306a36Sopenharmony_ci } 35262306a36Sopenharmony_ci } 35362306a36Sopenharmony_ci numa_free_cpumask(cpumask); 35462306a36Sopenharmony_ci } 35562306a36Sopenharmony_ci 35662306a36Sopenharmony_ci if (sched_setaffinity(0, size, mask)) 35762306a36Sopenharmony_ci goto err; 35862306a36Sopenharmony_ci 35962306a36Sopenharmony_ci return orig_mask; 36062306a36Sopenharmony_ci 36162306a36Sopenharmony_cierr: 36262306a36Sopenharmony_ci CPU_FREE(mask); 36362306a36Sopenharmony_cierr_out: 36462306a36Sopenharmony_ci CPU_FREE(orig_mask); 36562306a36Sopenharmony_ci 36662306a36Sopenharmony_ci /* BUG_ON due to failure in allocation of orig_mask/mask */ 36762306a36Sopenharmony_ci BUG_ON(-1); 36862306a36Sopenharmony_ci return NULL; 36962306a36Sopenharmony_ci} 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_cistatic void bind_to_cpumask(cpu_set_t *mask) 37262306a36Sopenharmony_ci{ 37362306a36Sopenharmony_ci int ret; 37462306a36Sopenharmony_ci size_t size = CPU_ALLOC_SIZE(numa_num_possible_cpus()); 37562306a36Sopenharmony_ci 37662306a36Sopenharmony_ci ret = sched_setaffinity(0, size, mask); 37762306a36Sopenharmony_ci if (ret) { 37862306a36Sopenharmony_ci CPU_FREE(mask); 37962306a36Sopenharmony_ci BUG_ON(ret); 38062306a36Sopenharmony_ci } 38162306a36Sopenharmony_ci} 38262306a36Sopenharmony_ci 38362306a36Sopenharmony_cistatic void mempol_restore(void) 38462306a36Sopenharmony_ci{ 38562306a36Sopenharmony_ci int ret; 38662306a36Sopenharmony_ci 38762306a36Sopenharmony_ci ret = set_mempolicy(MPOL_DEFAULT, NULL, g->p.nr_nodes-1); 38862306a36Sopenharmony_ci 38962306a36Sopenharmony_ci BUG_ON(ret); 39062306a36Sopenharmony_ci} 39162306a36Sopenharmony_ci 39262306a36Sopenharmony_cistatic void bind_to_memnode(int node) 39362306a36Sopenharmony_ci{ 39462306a36Sopenharmony_ci struct bitmask *node_mask; 39562306a36Sopenharmony_ci int ret; 39662306a36Sopenharmony_ci 39762306a36Sopenharmony_ci if (node == NUMA_NO_NODE) 39862306a36Sopenharmony_ci return; 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci node_mask = numa_allocate_nodemask(); 40162306a36Sopenharmony_ci BUG_ON(!node_mask); 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_ci numa_bitmask_clearall(node_mask); 40462306a36Sopenharmony_ci numa_bitmask_setbit(node_mask, node); 40562306a36Sopenharmony_ci 40662306a36Sopenharmony_ci ret = set_mempolicy(MPOL_BIND, node_mask->maskp, node_mask->size + 1); 40762306a36Sopenharmony_ci dprintf("binding to node %d, mask: %016lx => %d\n", node, *node_mask->maskp, ret); 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_ci numa_bitmask_free(node_mask); 41062306a36Sopenharmony_ci BUG_ON(ret); 41162306a36Sopenharmony_ci} 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_ci#define HPSIZE (2*1024*1024) 41462306a36Sopenharmony_ci 41562306a36Sopenharmony_ci#define set_taskname(fmt...) \ 41662306a36Sopenharmony_cido { \ 41762306a36Sopenharmony_ci char name[20]; \ 41862306a36Sopenharmony_ci \ 41962306a36Sopenharmony_ci snprintf(name, 20, fmt); \ 42062306a36Sopenharmony_ci prctl(PR_SET_NAME, name); \ 42162306a36Sopenharmony_ci} while (0) 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_cistatic u8 *alloc_data(ssize_t bytes0, int map_flags, 42462306a36Sopenharmony_ci int init_zero, int init_cpu0, int thp, int init_random) 42562306a36Sopenharmony_ci{ 42662306a36Sopenharmony_ci cpu_set_t *orig_mask = NULL; 42762306a36Sopenharmony_ci ssize_t bytes; 42862306a36Sopenharmony_ci u8 *buf; 42962306a36Sopenharmony_ci int ret; 43062306a36Sopenharmony_ci 43162306a36Sopenharmony_ci if (!bytes0) 43262306a36Sopenharmony_ci return NULL; 43362306a36Sopenharmony_ci 43462306a36Sopenharmony_ci /* Allocate and initialize all memory on CPU#0: */ 43562306a36Sopenharmony_ci if (init_cpu0) { 43662306a36Sopenharmony_ci int node = numa_node_of_cpu(0); 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_ci orig_mask = bind_to_node(node); 43962306a36Sopenharmony_ci bind_to_memnode(node); 44062306a36Sopenharmony_ci } 44162306a36Sopenharmony_ci 44262306a36Sopenharmony_ci bytes = bytes0 + HPSIZE; 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_ci buf = (void *)mmap(0, bytes, PROT_READ|PROT_WRITE, MAP_ANON|map_flags, -1, 0); 44562306a36Sopenharmony_ci BUG_ON(buf == (void *)-1); 44662306a36Sopenharmony_ci 44762306a36Sopenharmony_ci if (map_flags == MAP_PRIVATE) { 44862306a36Sopenharmony_ci if (thp > 0) { 44962306a36Sopenharmony_ci ret = madvise(buf, bytes, MADV_HUGEPAGE); 45062306a36Sopenharmony_ci if (ret && !g->print_once) { 45162306a36Sopenharmony_ci g->print_once = 1; 45262306a36Sopenharmony_ci printf("WARNING: Could not enable THP - do: 'echo madvise > /sys/kernel/mm/transparent_hugepage/enabled'\n"); 45362306a36Sopenharmony_ci } 45462306a36Sopenharmony_ci } 45562306a36Sopenharmony_ci if (thp < 0) { 45662306a36Sopenharmony_ci ret = madvise(buf, bytes, MADV_NOHUGEPAGE); 45762306a36Sopenharmony_ci if (ret && !g->print_once) { 45862306a36Sopenharmony_ci g->print_once = 1; 45962306a36Sopenharmony_ci printf("WARNING: Could not disable THP: run a CONFIG_TRANSPARENT_HUGEPAGE kernel?\n"); 46062306a36Sopenharmony_ci } 46162306a36Sopenharmony_ci } 46262306a36Sopenharmony_ci } 46362306a36Sopenharmony_ci 46462306a36Sopenharmony_ci if (init_zero) { 46562306a36Sopenharmony_ci bzero(buf, bytes); 46662306a36Sopenharmony_ci } else { 46762306a36Sopenharmony_ci /* Initialize random contents, different in each word: */ 46862306a36Sopenharmony_ci if (init_random) { 46962306a36Sopenharmony_ci u64 *wbuf = (void *)buf; 47062306a36Sopenharmony_ci long off = rand(); 47162306a36Sopenharmony_ci long i; 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_ci for (i = 0; i < bytes/8; i++) 47462306a36Sopenharmony_ci wbuf[i] = i + off; 47562306a36Sopenharmony_ci } 47662306a36Sopenharmony_ci } 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_ci /* Align to 2MB boundary: */ 47962306a36Sopenharmony_ci buf = (void *)(((unsigned long)buf + HPSIZE-1) & ~(HPSIZE-1)); 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_ci /* Restore affinity: */ 48262306a36Sopenharmony_ci if (init_cpu0) { 48362306a36Sopenharmony_ci bind_to_cpumask(orig_mask); 48462306a36Sopenharmony_ci CPU_FREE(orig_mask); 48562306a36Sopenharmony_ci mempol_restore(); 48662306a36Sopenharmony_ci } 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_ci return buf; 48962306a36Sopenharmony_ci} 49062306a36Sopenharmony_ci 49162306a36Sopenharmony_cistatic void free_data(void *data, ssize_t bytes) 49262306a36Sopenharmony_ci{ 49362306a36Sopenharmony_ci int ret; 49462306a36Sopenharmony_ci 49562306a36Sopenharmony_ci if (!data) 49662306a36Sopenharmony_ci return; 49762306a36Sopenharmony_ci 49862306a36Sopenharmony_ci ret = munmap(data, bytes); 49962306a36Sopenharmony_ci BUG_ON(ret); 50062306a36Sopenharmony_ci} 50162306a36Sopenharmony_ci 50262306a36Sopenharmony_ci/* 50362306a36Sopenharmony_ci * Create a shared memory buffer that can be shared between processes, zeroed: 50462306a36Sopenharmony_ci */ 50562306a36Sopenharmony_cistatic void * zalloc_shared_data(ssize_t bytes) 50662306a36Sopenharmony_ci{ 50762306a36Sopenharmony_ci return alloc_data(bytes, MAP_SHARED, 1, g->p.init_cpu0, g->p.thp, g->p.init_random); 50862306a36Sopenharmony_ci} 50962306a36Sopenharmony_ci 51062306a36Sopenharmony_ci/* 51162306a36Sopenharmony_ci * Create a shared memory buffer that can be shared between processes: 51262306a36Sopenharmony_ci */ 51362306a36Sopenharmony_cistatic void * setup_shared_data(ssize_t bytes) 51462306a36Sopenharmony_ci{ 51562306a36Sopenharmony_ci return alloc_data(bytes, MAP_SHARED, 0, g->p.init_cpu0, g->p.thp, g->p.init_random); 51662306a36Sopenharmony_ci} 51762306a36Sopenharmony_ci 51862306a36Sopenharmony_ci/* 51962306a36Sopenharmony_ci * Allocate process-local memory - this will either be shared between 52062306a36Sopenharmony_ci * threads of this process, or only be accessed by this thread: 52162306a36Sopenharmony_ci */ 52262306a36Sopenharmony_cistatic void * setup_private_data(ssize_t bytes) 52362306a36Sopenharmony_ci{ 52462306a36Sopenharmony_ci return alloc_data(bytes, MAP_PRIVATE, 0, g->p.init_cpu0, g->p.thp, g->p.init_random); 52562306a36Sopenharmony_ci} 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_cistatic int parse_cpu_list(const char *arg) 52862306a36Sopenharmony_ci{ 52962306a36Sopenharmony_ci p0.cpu_list_str = strdup(arg); 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_ci dprintf("got CPU list: {%s}\n", p0.cpu_list_str); 53262306a36Sopenharmony_ci 53362306a36Sopenharmony_ci return 0; 53462306a36Sopenharmony_ci} 53562306a36Sopenharmony_ci 53662306a36Sopenharmony_cistatic int parse_setup_cpu_list(void) 53762306a36Sopenharmony_ci{ 53862306a36Sopenharmony_ci struct thread_data *td; 53962306a36Sopenharmony_ci char *str0, *str; 54062306a36Sopenharmony_ci int t; 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci if (!g->p.cpu_list_str) 54362306a36Sopenharmony_ci return 0; 54462306a36Sopenharmony_ci 54562306a36Sopenharmony_ci dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks); 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci str0 = str = strdup(g->p.cpu_list_str); 54862306a36Sopenharmony_ci t = 0; 54962306a36Sopenharmony_ci 55062306a36Sopenharmony_ci BUG_ON(!str); 55162306a36Sopenharmony_ci 55262306a36Sopenharmony_ci tprintf("# binding tasks to CPUs:\n"); 55362306a36Sopenharmony_ci tprintf("# "); 55462306a36Sopenharmony_ci 55562306a36Sopenharmony_ci while (true) { 55662306a36Sopenharmony_ci int bind_cpu, bind_cpu_0, bind_cpu_1; 55762306a36Sopenharmony_ci char *tok, *tok_end, *tok_step, *tok_len, *tok_mul; 55862306a36Sopenharmony_ci int bind_len; 55962306a36Sopenharmony_ci int step; 56062306a36Sopenharmony_ci int mul; 56162306a36Sopenharmony_ci 56262306a36Sopenharmony_ci tok = strsep(&str, ","); 56362306a36Sopenharmony_ci if (!tok) 56462306a36Sopenharmony_ci break; 56562306a36Sopenharmony_ci 56662306a36Sopenharmony_ci tok_end = strstr(tok, "-"); 56762306a36Sopenharmony_ci 56862306a36Sopenharmony_ci dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end); 56962306a36Sopenharmony_ci if (!tok_end) { 57062306a36Sopenharmony_ci /* Single CPU specified: */ 57162306a36Sopenharmony_ci bind_cpu_0 = bind_cpu_1 = atol(tok); 57262306a36Sopenharmony_ci } else { 57362306a36Sopenharmony_ci /* CPU range specified (for example: "5-11"): */ 57462306a36Sopenharmony_ci bind_cpu_0 = atol(tok); 57562306a36Sopenharmony_ci bind_cpu_1 = atol(tok_end + 1); 57662306a36Sopenharmony_ci } 57762306a36Sopenharmony_ci 57862306a36Sopenharmony_ci step = 1; 57962306a36Sopenharmony_ci tok_step = strstr(tok, "#"); 58062306a36Sopenharmony_ci if (tok_step) { 58162306a36Sopenharmony_ci step = atol(tok_step + 1); 58262306a36Sopenharmony_ci BUG_ON(step <= 0 || step >= g->p.nr_cpus); 58362306a36Sopenharmony_ci } 58462306a36Sopenharmony_ci 58562306a36Sopenharmony_ci /* 58662306a36Sopenharmony_ci * Mask length. 58762306a36Sopenharmony_ci * Eg: "--cpus 8_4-16#4" means: '--cpus 8_4,12_4,16_4', 58862306a36Sopenharmony_ci * where the _4 means the next 4 CPUs are allowed. 58962306a36Sopenharmony_ci */ 59062306a36Sopenharmony_ci bind_len = 1; 59162306a36Sopenharmony_ci tok_len = strstr(tok, "_"); 59262306a36Sopenharmony_ci if (tok_len) { 59362306a36Sopenharmony_ci bind_len = atol(tok_len + 1); 59462306a36Sopenharmony_ci BUG_ON(bind_len <= 0 || bind_len > g->p.nr_cpus); 59562306a36Sopenharmony_ci } 59662306a36Sopenharmony_ci 59762306a36Sopenharmony_ci /* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */ 59862306a36Sopenharmony_ci mul = 1; 59962306a36Sopenharmony_ci tok_mul = strstr(tok, "x"); 60062306a36Sopenharmony_ci if (tok_mul) { 60162306a36Sopenharmony_ci mul = atol(tok_mul + 1); 60262306a36Sopenharmony_ci BUG_ON(mul <= 0); 60362306a36Sopenharmony_ci } 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_ci dprintf("CPUs: %d_%d-%d#%dx%d\n", bind_cpu_0, bind_len, bind_cpu_1, step, mul); 60662306a36Sopenharmony_ci 60762306a36Sopenharmony_ci if (bind_cpu_0 >= g->p.nr_cpus || bind_cpu_1 >= g->p.nr_cpus) { 60862306a36Sopenharmony_ci printf("\nTest not applicable, system has only %d CPUs.\n", g->p.nr_cpus); 60962306a36Sopenharmony_ci return -1; 61062306a36Sopenharmony_ci } 61162306a36Sopenharmony_ci 61262306a36Sopenharmony_ci if (is_cpu_online(bind_cpu_0) != 1 || is_cpu_online(bind_cpu_1) != 1) { 61362306a36Sopenharmony_ci printf("\nTest not applicable, bind_cpu_0 or bind_cpu_1 is offline\n"); 61462306a36Sopenharmony_ci return -1; 61562306a36Sopenharmony_ci } 61662306a36Sopenharmony_ci 61762306a36Sopenharmony_ci BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0); 61862306a36Sopenharmony_ci BUG_ON(bind_cpu_0 > bind_cpu_1); 61962306a36Sopenharmony_ci 62062306a36Sopenharmony_ci for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) { 62162306a36Sopenharmony_ci size_t size = CPU_ALLOC_SIZE(g->p.nr_cpus); 62262306a36Sopenharmony_ci int i; 62362306a36Sopenharmony_ci 62462306a36Sopenharmony_ci for (i = 0; i < mul; i++) { 62562306a36Sopenharmony_ci int cpu; 62662306a36Sopenharmony_ci 62762306a36Sopenharmony_ci if (t >= g->p.nr_tasks) { 62862306a36Sopenharmony_ci printf("\n# NOTE: ignoring bind CPUs starting at CPU#%d\n #", bind_cpu); 62962306a36Sopenharmony_ci goto out; 63062306a36Sopenharmony_ci } 63162306a36Sopenharmony_ci td = g->threads + t; 63262306a36Sopenharmony_ci 63362306a36Sopenharmony_ci if (t) 63462306a36Sopenharmony_ci tprintf(","); 63562306a36Sopenharmony_ci if (bind_len > 1) { 63662306a36Sopenharmony_ci tprintf("%2d/%d", bind_cpu, bind_len); 63762306a36Sopenharmony_ci } else { 63862306a36Sopenharmony_ci tprintf("%2d", bind_cpu); 63962306a36Sopenharmony_ci } 64062306a36Sopenharmony_ci 64162306a36Sopenharmony_ci td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus); 64262306a36Sopenharmony_ci BUG_ON(!td->bind_cpumask); 64362306a36Sopenharmony_ci CPU_ZERO_S(size, td->bind_cpumask); 64462306a36Sopenharmony_ci for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) { 64562306a36Sopenharmony_ci if (cpu < 0 || cpu >= g->p.nr_cpus) { 64662306a36Sopenharmony_ci CPU_FREE(td->bind_cpumask); 64762306a36Sopenharmony_ci BUG_ON(-1); 64862306a36Sopenharmony_ci } 64962306a36Sopenharmony_ci CPU_SET_S(cpu, size, td->bind_cpumask); 65062306a36Sopenharmony_ci } 65162306a36Sopenharmony_ci t++; 65262306a36Sopenharmony_ci } 65362306a36Sopenharmony_ci } 65462306a36Sopenharmony_ci } 65562306a36Sopenharmony_ciout: 65662306a36Sopenharmony_ci 65762306a36Sopenharmony_ci tprintf("\n"); 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_ci if (t < g->p.nr_tasks) 66062306a36Sopenharmony_ci printf("# NOTE: %d tasks bound, %d tasks unbound\n", t, g->p.nr_tasks - t); 66162306a36Sopenharmony_ci 66262306a36Sopenharmony_ci free(str0); 66362306a36Sopenharmony_ci return 0; 66462306a36Sopenharmony_ci} 66562306a36Sopenharmony_ci 66662306a36Sopenharmony_cistatic int parse_cpus_opt(const struct option *opt __maybe_unused, 66762306a36Sopenharmony_ci const char *arg, int unset __maybe_unused) 66862306a36Sopenharmony_ci{ 66962306a36Sopenharmony_ci if (!arg) 67062306a36Sopenharmony_ci return -1; 67162306a36Sopenharmony_ci 67262306a36Sopenharmony_ci return parse_cpu_list(arg); 67362306a36Sopenharmony_ci} 67462306a36Sopenharmony_ci 67562306a36Sopenharmony_cistatic int parse_node_list(const char *arg) 67662306a36Sopenharmony_ci{ 67762306a36Sopenharmony_ci p0.node_list_str = strdup(arg); 67862306a36Sopenharmony_ci 67962306a36Sopenharmony_ci dprintf("got NODE list: {%s}\n", p0.node_list_str); 68062306a36Sopenharmony_ci 68162306a36Sopenharmony_ci return 0; 68262306a36Sopenharmony_ci} 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_cistatic int parse_setup_node_list(void) 68562306a36Sopenharmony_ci{ 68662306a36Sopenharmony_ci struct thread_data *td; 68762306a36Sopenharmony_ci char *str0, *str; 68862306a36Sopenharmony_ci int t; 68962306a36Sopenharmony_ci 69062306a36Sopenharmony_ci if (!g->p.node_list_str) 69162306a36Sopenharmony_ci return 0; 69262306a36Sopenharmony_ci 69362306a36Sopenharmony_ci dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks); 69462306a36Sopenharmony_ci 69562306a36Sopenharmony_ci str0 = str = strdup(g->p.node_list_str); 69662306a36Sopenharmony_ci t = 0; 69762306a36Sopenharmony_ci 69862306a36Sopenharmony_ci BUG_ON(!str); 69962306a36Sopenharmony_ci 70062306a36Sopenharmony_ci tprintf("# binding tasks to NODEs:\n"); 70162306a36Sopenharmony_ci tprintf("# "); 70262306a36Sopenharmony_ci 70362306a36Sopenharmony_ci while (true) { 70462306a36Sopenharmony_ci int bind_node, bind_node_0, bind_node_1; 70562306a36Sopenharmony_ci char *tok, *tok_end, *tok_step, *tok_mul; 70662306a36Sopenharmony_ci int step; 70762306a36Sopenharmony_ci int mul; 70862306a36Sopenharmony_ci 70962306a36Sopenharmony_ci tok = strsep(&str, ","); 71062306a36Sopenharmony_ci if (!tok) 71162306a36Sopenharmony_ci break; 71262306a36Sopenharmony_ci 71362306a36Sopenharmony_ci tok_end = strstr(tok, "-"); 71462306a36Sopenharmony_ci 71562306a36Sopenharmony_ci dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end); 71662306a36Sopenharmony_ci if (!tok_end) { 71762306a36Sopenharmony_ci /* Single NODE specified: */ 71862306a36Sopenharmony_ci bind_node_0 = bind_node_1 = atol(tok); 71962306a36Sopenharmony_ci } else { 72062306a36Sopenharmony_ci /* NODE range specified (for example: "5-11"): */ 72162306a36Sopenharmony_ci bind_node_0 = atol(tok); 72262306a36Sopenharmony_ci bind_node_1 = atol(tok_end + 1); 72362306a36Sopenharmony_ci } 72462306a36Sopenharmony_ci 72562306a36Sopenharmony_ci step = 1; 72662306a36Sopenharmony_ci tok_step = strstr(tok, "#"); 72762306a36Sopenharmony_ci if (tok_step) { 72862306a36Sopenharmony_ci step = atol(tok_step + 1); 72962306a36Sopenharmony_ci BUG_ON(step <= 0 || step >= g->p.nr_nodes); 73062306a36Sopenharmony_ci } 73162306a36Sopenharmony_ci 73262306a36Sopenharmony_ci /* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */ 73362306a36Sopenharmony_ci mul = 1; 73462306a36Sopenharmony_ci tok_mul = strstr(tok, "x"); 73562306a36Sopenharmony_ci if (tok_mul) { 73662306a36Sopenharmony_ci mul = atol(tok_mul + 1); 73762306a36Sopenharmony_ci BUG_ON(mul <= 0); 73862306a36Sopenharmony_ci } 73962306a36Sopenharmony_ci 74062306a36Sopenharmony_ci dprintf("NODEs: %d-%d #%d\n", bind_node_0, bind_node_1, step); 74162306a36Sopenharmony_ci 74262306a36Sopenharmony_ci if (bind_node_0 >= g->p.nr_nodes || bind_node_1 >= g->p.nr_nodes) { 74362306a36Sopenharmony_ci printf("\nTest not applicable, system has only %d nodes.\n", g->p.nr_nodes); 74462306a36Sopenharmony_ci return -1; 74562306a36Sopenharmony_ci } 74662306a36Sopenharmony_ci 74762306a36Sopenharmony_ci BUG_ON(bind_node_0 < 0 || bind_node_1 < 0); 74862306a36Sopenharmony_ci BUG_ON(bind_node_0 > bind_node_1); 74962306a36Sopenharmony_ci 75062306a36Sopenharmony_ci for (bind_node = bind_node_0; bind_node <= bind_node_1; bind_node += step) { 75162306a36Sopenharmony_ci int i; 75262306a36Sopenharmony_ci 75362306a36Sopenharmony_ci for (i = 0; i < mul; i++) { 75462306a36Sopenharmony_ci if (t >= g->p.nr_tasks || !node_has_cpus(bind_node)) { 75562306a36Sopenharmony_ci printf("\n# NOTE: ignoring bind NODEs starting at NODE#%d\n", bind_node); 75662306a36Sopenharmony_ci goto out; 75762306a36Sopenharmony_ci } 75862306a36Sopenharmony_ci td = g->threads + t; 75962306a36Sopenharmony_ci 76062306a36Sopenharmony_ci if (!t) 76162306a36Sopenharmony_ci tprintf(" %2d", bind_node); 76262306a36Sopenharmony_ci else 76362306a36Sopenharmony_ci tprintf(",%2d", bind_node); 76462306a36Sopenharmony_ci 76562306a36Sopenharmony_ci td->bind_node = bind_node; 76662306a36Sopenharmony_ci t++; 76762306a36Sopenharmony_ci } 76862306a36Sopenharmony_ci } 76962306a36Sopenharmony_ci } 77062306a36Sopenharmony_ciout: 77162306a36Sopenharmony_ci 77262306a36Sopenharmony_ci tprintf("\n"); 77362306a36Sopenharmony_ci 77462306a36Sopenharmony_ci if (t < g->p.nr_tasks) 77562306a36Sopenharmony_ci printf("# NOTE: %d tasks mem-bound, %d tasks unbound\n", t, g->p.nr_tasks - t); 77662306a36Sopenharmony_ci 77762306a36Sopenharmony_ci free(str0); 77862306a36Sopenharmony_ci return 0; 77962306a36Sopenharmony_ci} 78062306a36Sopenharmony_ci 78162306a36Sopenharmony_cistatic int parse_nodes_opt(const struct option *opt __maybe_unused, 78262306a36Sopenharmony_ci const char *arg, int unset __maybe_unused) 78362306a36Sopenharmony_ci{ 78462306a36Sopenharmony_ci if (!arg) 78562306a36Sopenharmony_ci return -1; 78662306a36Sopenharmony_ci 78762306a36Sopenharmony_ci return parse_node_list(arg); 78862306a36Sopenharmony_ci} 78962306a36Sopenharmony_ci 79062306a36Sopenharmony_cistatic inline uint32_t lfsr_32(uint32_t lfsr) 79162306a36Sopenharmony_ci{ 79262306a36Sopenharmony_ci const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31); 79362306a36Sopenharmony_ci return (lfsr>>1) ^ ((0x0u - (lfsr & 0x1u)) & taps); 79462306a36Sopenharmony_ci} 79562306a36Sopenharmony_ci 79662306a36Sopenharmony_ci/* 79762306a36Sopenharmony_ci * Make sure there's real data dependency to RAM (when read 79862306a36Sopenharmony_ci * accesses are enabled), so the compiler, the CPU and the 79962306a36Sopenharmony_ci * kernel (KSM, zero page, etc.) cannot optimize away RAM 80062306a36Sopenharmony_ci * accesses: 80162306a36Sopenharmony_ci */ 80262306a36Sopenharmony_cistatic inline u64 access_data(u64 *data, u64 val) 80362306a36Sopenharmony_ci{ 80462306a36Sopenharmony_ci if (g->p.data_reads) 80562306a36Sopenharmony_ci val += *data; 80662306a36Sopenharmony_ci if (g->p.data_writes) 80762306a36Sopenharmony_ci *data = val + 1; 80862306a36Sopenharmony_ci return val; 80962306a36Sopenharmony_ci} 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_ci/* 81262306a36Sopenharmony_ci * The worker process does two types of work, a forwards going 81362306a36Sopenharmony_ci * loop and a backwards going loop. 81462306a36Sopenharmony_ci * 81562306a36Sopenharmony_ci * We do this so that on multiprocessor systems we do not create 81662306a36Sopenharmony_ci * a 'train' of processing, with highly synchronized processes, 81762306a36Sopenharmony_ci * skewing the whole benchmark. 81862306a36Sopenharmony_ci */ 81962306a36Sopenharmony_cistatic u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val) 82062306a36Sopenharmony_ci{ 82162306a36Sopenharmony_ci long words = bytes/sizeof(u64); 82262306a36Sopenharmony_ci u64 *data = (void *)__data; 82362306a36Sopenharmony_ci long chunk_0, chunk_1; 82462306a36Sopenharmony_ci u64 *d0, *d, *d1; 82562306a36Sopenharmony_ci long off; 82662306a36Sopenharmony_ci long i; 82762306a36Sopenharmony_ci 82862306a36Sopenharmony_ci BUG_ON(!data && words); 82962306a36Sopenharmony_ci BUG_ON(data && !words); 83062306a36Sopenharmony_ci 83162306a36Sopenharmony_ci if (!data) 83262306a36Sopenharmony_ci return val; 83362306a36Sopenharmony_ci 83462306a36Sopenharmony_ci /* Very simple memset() work variant: */ 83562306a36Sopenharmony_ci if (g->p.data_zero_memset && !g->p.data_rand_walk) { 83662306a36Sopenharmony_ci bzero(data, bytes); 83762306a36Sopenharmony_ci return val; 83862306a36Sopenharmony_ci } 83962306a36Sopenharmony_ci 84062306a36Sopenharmony_ci /* Spread out by PID/TID nr and by loop nr: */ 84162306a36Sopenharmony_ci chunk_0 = words/nr_max; 84262306a36Sopenharmony_ci chunk_1 = words/g->p.nr_loops; 84362306a36Sopenharmony_ci off = nr*chunk_0 + loop*chunk_1; 84462306a36Sopenharmony_ci 84562306a36Sopenharmony_ci while (off >= words) 84662306a36Sopenharmony_ci off -= words; 84762306a36Sopenharmony_ci 84862306a36Sopenharmony_ci if (g->p.data_rand_walk) { 84962306a36Sopenharmony_ci u32 lfsr = nr + loop + val; 85062306a36Sopenharmony_ci long j; 85162306a36Sopenharmony_ci 85262306a36Sopenharmony_ci for (i = 0; i < words/1024; i++) { 85362306a36Sopenharmony_ci long start, end; 85462306a36Sopenharmony_ci 85562306a36Sopenharmony_ci lfsr = lfsr_32(lfsr); 85662306a36Sopenharmony_ci 85762306a36Sopenharmony_ci start = lfsr % words; 85862306a36Sopenharmony_ci end = min(start + 1024, words-1); 85962306a36Sopenharmony_ci 86062306a36Sopenharmony_ci if (g->p.data_zero_memset) { 86162306a36Sopenharmony_ci bzero(data + start, (end-start) * sizeof(u64)); 86262306a36Sopenharmony_ci } else { 86362306a36Sopenharmony_ci for (j = start; j < end; j++) 86462306a36Sopenharmony_ci val = access_data(data + j, val); 86562306a36Sopenharmony_ci } 86662306a36Sopenharmony_ci } 86762306a36Sopenharmony_ci } else if (!g->p.data_backwards || (nr + loop) & 1) { 86862306a36Sopenharmony_ci /* Process data forwards: */ 86962306a36Sopenharmony_ci 87062306a36Sopenharmony_ci d0 = data + off; 87162306a36Sopenharmony_ci d = data + off + 1; 87262306a36Sopenharmony_ci d1 = data + words; 87362306a36Sopenharmony_ci 87462306a36Sopenharmony_ci for (;;) { 87562306a36Sopenharmony_ci if (unlikely(d >= d1)) 87662306a36Sopenharmony_ci d = data; 87762306a36Sopenharmony_ci if (unlikely(d == d0)) 87862306a36Sopenharmony_ci break; 87962306a36Sopenharmony_ci 88062306a36Sopenharmony_ci val = access_data(d, val); 88162306a36Sopenharmony_ci 88262306a36Sopenharmony_ci d++; 88362306a36Sopenharmony_ci } 88462306a36Sopenharmony_ci } else { 88562306a36Sopenharmony_ci /* Process data backwards: */ 88662306a36Sopenharmony_ci 88762306a36Sopenharmony_ci d0 = data + off; 88862306a36Sopenharmony_ci d = data + off - 1; 88962306a36Sopenharmony_ci d1 = data + words; 89062306a36Sopenharmony_ci 89162306a36Sopenharmony_ci for (;;) { 89262306a36Sopenharmony_ci if (unlikely(d < data)) 89362306a36Sopenharmony_ci d = data + words-1; 89462306a36Sopenharmony_ci if (unlikely(d == d0)) 89562306a36Sopenharmony_ci break; 89662306a36Sopenharmony_ci 89762306a36Sopenharmony_ci val = access_data(d, val); 89862306a36Sopenharmony_ci 89962306a36Sopenharmony_ci d--; 90062306a36Sopenharmony_ci } 90162306a36Sopenharmony_ci } 90262306a36Sopenharmony_ci 90362306a36Sopenharmony_ci return val; 90462306a36Sopenharmony_ci} 90562306a36Sopenharmony_ci 90662306a36Sopenharmony_cistatic void update_curr_cpu(int task_nr, unsigned long bytes_worked) 90762306a36Sopenharmony_ci{ 90862306a36Sopenharmony_ci unsigned int cpu; 90962306a36Sopenharmony_ci 91062306a36Sopenharmony_ci cpu = sched_getcpu(); 91162306a36Sopenharmony_ci 91262306a36Sopenharmony_ci g->threads[task_nr].curr_cpu = cpu; 91362306a36Sopenharmony_ci prctl(0, bytes_worked); 91462306a36Sopenharmony_ci} 91562306a36Sopenharmony_ci 91662306a36Sopenharmony_ci/* 91762306a36Sopenharmony_ci * Count the number of nodes a process's threads 91862306a36Sopenharmony_ci * are spread out on. 91962306a36Sopenharmony_ci * 92062306a36Sopenharmony_ci * A count of 1 means that the process is compressed 92162306a36Sopenharmony_ci * to a single node. A count of g->p.nr_nodes means it's 92262306a36Sopenharmony_ci * spread out on the whole system. 92362306a36Sopenharmony_ci */ 92462306a36Sopenharmony_cistatic int count_process_nodes(int process_nr) 92562306a36Sopenharmony_ci{ 92662306a36Sopenharmony_ci char *node_present; 92762306a36Sopenharmony_ci int nodes; 92862306a36Sopenharmony_ci int n, t; 92962306a36Sopenharmony_ci 93062306a36Sopenharmony_ci node_present = (char *)malloc(g->p.nr_nodes * sizeof(char)); 93162306a36Sopenharmony_ci BUG_ON(!node_present); 93262306a36Sopenharmony_ci for (nodes = 0; nodes < g->p.nr_nodes; nodes++) 93362306a36Sopenharmony_ci node_present[nodes] = 0; 93462306a36Sopenharmony_ci 93562306a36Sopenharmony_ci for (t = 0; t < g->p.nr_threads; t++) { 93662306a36Sopenharmony_ci struct thread_data *td; 93762306a36Sopenharmony_ci int task_nr; 93862306a36Sopenharmony_ci int node; 93962306a36Sopenharmony_ci 94062306a36Sopenharmony_ci task_nr = process_nr*g->p.nr_threads + t; 94162306a36Sopenharmony_ci td = g->threads + task_nr; 94262306a36Sopenharmony_ci 94362306a36Sopenharmony_ci node = numa_node_of_cpu(td->curr_cpu); 94462306a36Sopenharmony_ci if (node < 0) /* curr_cpu was likely still -1 */ { 94562306a36Sopenharmony_ci free(node_present); 94662306a36Sopenharmony_ci return 0; 94762306a36Sopenharmony_ci } 94862306a36Sopenharmony_ci 94962306a36Sopenharmony_ci node_present[node] = 1; 95062306a36Sopenharmony_ci } 95162306a36Sopenharmony_ci 95262306a36Sopenharmony_ci nodes = 0; 95362306a36Sopenharmony_ci 95462306a36Sopenharmony_ci for (n = 0; n < g->p.nr_nodes; n++) 95562306a36Sopenharmony_ci nodes += node_present[n]; 95662306a36Sopenharmony_ci 95762306a36Sopenharmony_ci free(node_present); 95862306a36Sopenharmony_ci return nodes; 95962306a36Sopenharmony_ci} 96062306a36Sopenharmony_ci 96162306a36Sopenharmony_ci/* 96262306a36Sopenharmony_ci * Count the number of distinct process-threads a node contains. 96362306a36Sopenharmony_ci * 96462306a36Sopenharmony_ci * A count of 1 means that the node contains only a single 96562306a36Sopenharmony_ci * process. If all nodes on the system contain at most one 96662306a36Sopenharmony_ci * process then we are well-converged. 96762306a36Sopenharmony_ci */ 96862306a36Sopenharmony_cistatic int count_node_processes(int node) 96962306a36Sopenharmony_ci{ 97062306a36Sopenharmony_ci int processes = 0; 97162306a36Sopenharmony_ci int t, p; 97262306a36Sopenharmony_ci 97362306a36Sopenharmony_ci for (p = 0; p < g->p.nr_proc; p++) { 97462306a36Sopenharmony_ci for (t = 0; t < g->p.nr_threads; t++) { 97562306a36Sopenharmony_ci struct thread_data *td; 97662306a36Sopenharmony_ci int task_nr; 97762306a36Sopenharmony_ci int n; 97862306a36Sopenharmony_ci 97962306a36Sopenharmony_ci task_nr = p*g->p.nr_threads + t; 98062306a36Sopenharmony_ci td = g->threads + task_nr; 98162306a36Sopenharmony_ci 98262306a36Sopenharmony_ci n = numa_node_of_cpu(td->curr_cpu); 98362306a36Sopenharmony_ci if (n == node) { 98462306a36Sopenharmony_ci processes++; 98562306a36Sopenharmony_ci break; 98662306a36Sopenharmony_ci } 98762306a36Sopenharmony_ci } 98862306a36Sopenharmony_ci } 98962306a36Sopenharmony_ci 99062306a36Sopenharmony_ci return processes; 99162306a36Sopenharmony_ci} 99262306a36Sopenharmony_ci 99362306a36Sopenharmony_cistatic void calc_convergence_compression(int *strong) 99462306a36Sopenharmony_ci{ 99562306a36Sopenharmony_ci unsigned int nodes_min, nodes_max; 99662306a36Sopenharmony_ci int p; 99762306a36Sopenharmony_ci 99862306a36Sopenharmony_ci nodes_min = -1; 99962306a36Sopenharmony_ci nodes_max = 0; 100062306a36Sopenharmony_ci 100162306a36Sopenharmony_ci for (p = 0; p < g->p.nr_proc; p++) { 100262306a36Sopenharmony_ci unsigned int nodes = count_process_nodes(p); 100362306a36Sopenharmony_ci 100462306a36Sopenharmony_ci if (!nodes) { 100562306a36Sopenharmony_ci *strong = 0; 100662306a36Sopenharmony_ci return; 100762306a36Sopenharmony_ci } 100862306a36Sopenharmony_ci 100962306a36Sopenharmony_ci nodes_min = min(nodes, nodes_min); 101062306a36Sopenharmony_ci nodes_max = max(nodes, nodes_max); 101162306a36Sopenharmony_ci } 101262306a36Sopenharmony_ci 101362306a36Sopenharmony_ci /* Strong convergence: all threads compress on a single node: */ 101462306a36Sopenharmony_ci if (nodes_min == 1 && nodes_max == 1) { 101562306a36Sopenharmony_ci *strong = 1; 101662306a36Sopenharmony_ci } else { 101762306a36Sopenharmony_ci *strong = 0; 101862306a36Sopenharmony_ci tprintf(" {%d-%d}", nodes_min, nodes_max); 101962306a36Sopenharmony_ci } 102062306a36Sopenharmony_ci} 102162306a36Sopenharmony_ci 102262306a36Sopenharmony_cistatic void calc_convergence(double runtime_ns_max, double *convergence) 102362306a36Sopenharmony_ci{ 102462306a36Sopenharmony_ci unsigned int loops_done_min, loops_done_max; 102562306a36Sopenharmony_ci int process_groups; 102662306a36Sopenharmony_ci int *nodes; 102762306a36Sopenharmony_ci int distance; 102862306a36Sopenharmony_ci int nr_min; 102962306a36Sopenharmony_ci int nr_max; 103062306a36Sopenharmony_ci int strong; 103162306a36Sopenharmony_ci int sum; 103262306a36Sopenharmony_ci int nr; 103362306a36Sopenharmony_ci int node; 103462306a36Sopenharmony_ci int cpu; 103562306a36Sopenharmony_ci int t; 103662306a36Sopenharmony_ci 103762306a36Sopenharmony_ci if (!g->p.show_convergence && !g->p.measure_convergence) 103862306a36Sopenharmony_ci return; 103962306a36Sopenharmony_ci 104062306a36Sopenharmony_ci nodes = (int *)malloc(g->p.nr_nodes * sizeof(int)); 104162306a36Sopenharmony_ci BUG_ON(!nodes); 104262306a36Sopenharmony_ci for (node = 0; node < g->p.nr_nodes; node++) 104362306a36Sopenharmony_ci nodes[node] = 0; 104462306a36Sopenharmony_ci 104562306a36Sopenharmony_ci loops_done_min = -1; 104662306a36Sopenharmony_ci loops_done_max = 0; 104762306a36Sopenharmony_ci 104862306a36Sopenharmony_ci for (t = 0; t < g->p.nr_tasks; t++) { 104962306a36Sopenharmony_ci struct thread_data *td = g->threads + t; 105062306a36Sopenharmony_ci unsigned int loops_done; 105162306a36Sopenharmony_ci 105262306a36Sopenharmony_ci cpu = td->curr_cpu; 105362306a36Sopenharmony_ci 105462306a36Sopenharmony_ci /* Not all threads have written it yet: */ 105562306a36Sopenharmony_ci if (cpu < 0) 105662306a36Sopenharmony_ci continue; 105762306a36Sopenharmony_ci 105862306a36Sopenharmony_ci node = numa_node_of_cpu(cpu); 105962306a36Sopenharmony_ci 106062306a36Sopenharmony_ci nodes[node]++; 106162306a36Sopenharmony_ci 106262306a36Sopenharmony_ci loops_done = td->loops_done; 106362306a36Sopenharmony_ci loops_done_min = min(loops_done, loops_done_min); 106462306a36Sopenharmony_ci loops_done_max = max(loops_done, loops_done_max); 106562306a36Sopenharmony_ci } 106662306a36Sopenharmony_ci 106762306a36Sopenharmony_ci nr_max = 0; 106862306a36Sopenharmony_ci nr_min = g->p.nr_tasks; 106962306a36Sopenharmony_ci sum = 0; 107062306a36Sopenharmony_ci 107162306a36Sopenharmony_ci for (node = 0; node < g->p.nr_nodes; node++) { 107262306a36Sopenharmony_ci if (!is_node_present(node)) 107362306a36Sopenharmony_ci continue; 107462306a36Sopenharmony_ci nr = nodes[node]; 107562306a36Sopenharmony_ci nr_min = min(nr, nr_min); 107662306a36Sopenharmony_ci nr_max = max(nr, nr_max); 107762306a36Sopenharmony_ci sum += nr; 107862306a36Sopenharmony_ci } 107962306a36Sopenharmony_ci BUG_ON(nr_min > nr_max); 108062306a36Sopenharmony_ci 108162306a36Sopenharmony_ci BUG_ON(sum > g->p.nr_tasks); 108262306a36Sopenharmony_ci 108362306a36Sopenharmony_ci if (0 && (sum < g->p.nr_tasks)) { 108462306a36Sopenharmony_ci free(nodes); 108562306a36Sopenharmony_ci return; 108662306a36Sopenharmony_ci } 108762306a36Sopenharmony_ci 108862306a36Sopenharmony_ci /* 108962306a36Sopenharmony_ci * Count the number of distinct process groups present 109062306a36Sopenharmony_ci * on nodes - when we are converged this will decrease 109162306a36Sopenharmony_ci * to g->p.nr_proc: 109262306a36Sopenharmony_ci */ 109362306a36Sopenharmony_ci process_groups = 0; 109462306a36Sopenharmony_ci 109562306a36Sopenharmony_ci for (node = 0; node < g->p.nr_nodes; node++) { 109662306a36Sopenharmony_ci int processes; 109762306a36Sopenharmony_ci 109862306a36Sopenharmony_ci if (!is_node_present(node)) 109962306a36Sopenharmony_ci continue; 110062306a36Sopenharmony_ci processes = count_node_processes(node); 110162306a36Sopenharmony_ci nr = nodes[node]; 110262306a36Sopenharmony_ci tprintf(" %2d/%-2d", nr, processes); 110362306a36Sopenharmony_ci 110462306a36Sopenharmony_ci process_groups += processes; 110562306a36Sopenharmony_ci } 110662306a36Sopenharmony_ci 110762306a36Sopenharmony_ci distance = nr_max - nr_min; 110862306a36Sopenharmony_ci 110962306a36Sopenharmony_ci tprintf(" [%2d/%-2d]", distance, process_groups); 111062306a36Sopenharmony_ci 111162306a36Sopenharmony_ci tprintf(" l:%3d-%-3d (%3d)", 111262306a36Sopenharmony_ci loops_done_min, loops_done_max, loops_done_max-loops_done_min); 111362306a36Sopenharmony_ci 111462306a36Sopenharmony_ci if (loops_done_min && loops_done_max) { 111562306a36Sopenharmony_ci double skew = 1.0 - (double)loops_done_min/loops_done_max; 111662306a36Sopenharmony_ci 111762306a36Sopenharmony_ci tprintf(" [%4.1f%%]", skew * 100.0); 111862306a36Sopenharmony_ci } 111962306a36Sopenharmony_ci 112062306a36Sopenharmony_ci calc_convergence_compression(&strong); 112162306a36Sopenharmony_ci 112262306a36Sopenharmony_ci if (strong && process_groups == g->p.nr_proc) { 112362306a36Sopenharmony_ci if (!*convergence) { 112462306a36Sopenharmony_ci *convergence = runtime_ns_max; 112562306a36Sopenharmony_ci tprintf(" (%6.1fs converged)\n", *convergence / NSEC_PER_SEC); 112662306a36Sopenharmony_ci if (g->p.measure_convergence) { 112762306a36Sopenharmony_ci g->all_converged = true; 112862306a36Sopenharmony_ci g->stop_work = true; 112962306a36Sopenharmony_ci } 113062306a36Sopenharmony_ci } 113162306a36Sopenharmony_ci } else { 113262306a36Sopenharmony_ci if (*convergence) { 113362306a36Sopenharmony_ci tprintf(" (%6.1fs de-converged)", runtime_ns_max / NSEC_PER_SEC); 113462306a36Sopenharmony_ci *convergence = 0; 113562306a36Sopenharmony_ci } 113662306a36Sopenharmony_ci tprintf("\n"); 113762306a36Sopenharmony_ci } 113862306a36Sopenharmony_ci 113962306a36Sopenharmony_ci free(nodes); 114062306a36Sopenharmony_ci} 114162306a36Sopenharmony_ci 114262306a36Sopenharmony_cistatic void show_summary(double runtime_ns_max, int l, double *convergence) 114362306a36Sopenharmony_ci{ 114462306a36Sopenharmony_ci tprintf("\r # %5.1f%% [%.1f mins]", 114562306a36Sopenharmony_ci (double)(l+1)/g->p.nr_loops*100.0, runtime_ns_max / NSEC_PER_SEC / 60.0); 114662306a36Sopenharmony_ci 114762306a36Sopenharmony_ci calc_convergence(runtime_ns_max, convergence); 114862306a36Sopenharmony_ci 114962306a36Sopenharmony_ci if (g->p.show_details >= 0) 115062306a36Sopenharmony_ci fflush(stdout); 115162306a36Sopenharmony_ci} 115262306a36Sopenharmony_ci 115362306a36Sopenharmony_cistatic void *worker_thread(void *__tdata) 115462306a36Sopenharmony_ci{ 115562306a36Sopenharmony_ci struct thread_data *td = __tdata; 115662306a36Sopenharmony_ci struct timeval start0, start, stop, diff; 115762306a36Sopenharmony_ci int process_nr = td->process_nr; 115862306a36Sopenharmony_ci int thread_nr = td->thread_nr; 115962306a36Sopenharmony_ci unsigned long last_perturbance; 116062306a36Sopenharmony_ci int task_nr = td->task_nr; 116162306a36Sopenharmony_ci int details = g->p.show_details; 116262306a36Sopenharmony_ci int first_task, last_task; 116362306a36Sopenharmony_ci double convergence = 0; 116462306a36Sopenharmony_ci u64 val = td->val; 116562306a36Sopenharmony_ci double runtime_ns_max; 116662306a36Sopenharmony_ci u8 *global_data; 116762306a36Sopenharmony_ci u8 *process_data; 116862306a36Sopenharmony_ci u8 *thread_data; 116962306a36Sopenharmony_ci u64 bytes_done, secs; 117062306a36Sopenharmony_ci long work_done; 117162306a36Sopenharmony_ci u32 l; 117262306a36Sopenharmony_ci struct rusage rusage; 117362306a36Sopenharmony_ci 117462306a36Sopenharmony_ci bind_to_cpumask(td->bind_cpumask); 117562306a36Sopenharmony_ci bind_to_memnode(td->bind_node); 117662306a36Sopenharmony_ci 117762306a36Sopenharmony_ci set_taskname("thread %d/%d", process_nr, thread_nr); 117862306a36Sopenharmony_ci 117962306a36Sopenharmony_ci global_data = g->data; 118062306a36Sopenharmony_ci process_data = td->process_data; 118162306a36Sopenharmony_ci thread_data = setup_private_data(g->p.bytes_thread); 118262306a36Sopenharmony_ci 118362306a36Sopenharmony_ci bytes_done = 0; 118462306a36Sopenharmony_ci 118562306a36Sopenharmony_ci last_task = 0; 118662306a36Sopenharmony_ci if (process_nr == g->p.nr_proc-1 && thread_nr == g->p.nr_threads-1) 118762306a36Sopenharmony_ci last_task = 1; 118862306a36Sopenharmony_ci 118962306a36Sopenharmony_ci first_task = 0; 119062306a36Sopenharmony_ci if (process_nr == 0 && thread_nr == 0) 119162306a36Sopenharmony_ci first_task = 1; 119262306a36Sopenharmony_ci 119362306a36Sopenharmony_ci if (details >= 2) { 119462306a36Sopenharmony_ci printf("# thread %2d / %2d global mem: %p, process mem: %p, thread mem: %p\n", 119562306a36Sopenharmony_ci process_nr, thread_nr, global_data, process_data, thread_data); 119662306a36Sopenharmony_ci } 119762306a36Sopenharmony_ci 119862306a36Sopenharmony_ci if (g->p.serialize_startup) { 119962306a36Sopenharmony_ci mutex_lock(&g->startup_mutex); 120062306a36Sopenharmony_ci g->nr_tasks_started++; 120162306a36Sopenharmony_ci /* The last thread wakes the main process. */ 120262306a36Sopenharmony_ci if (g->nr_tasks_started == g->p.nr_tasks) 120362306a36Sopenharmony_ci cond_signal(&g->startup_cond); 120462306a36Sopenharmony_ci 120562306a36Sopenharmony_ci mutex_unlock(&g->startup_mutex); 120662306a36Sopenharmony_ci 120762306a36Sopenharmony_ci /* Here we will wait for the main process to start us all at once: */ 120862306a36Sopenharmony_ci mutex_lock(&g->start_work_mutex); 120962306a36Sopenharmony_ci g->start_work = false; 121062306a36Sopenharmony_ci g->nr_tasks_working++; 121162306a36Sopenharmony_ci while (!g->start_work) 121262306a36Sopenharmony_ci cond_wait(&g->start_work_cond, &g->start_work_mutex); 121362306a36Sopenharmony_ci 121462306a36Sopenharmony_ci mutex_unlock(&g->start_work_mutex); 121562306a36Sopenharmony_ci } 121662306a36Sopenharmony_ci 121762306a36Sopenharmony_ci gettimeofday(&start0, NULL); 121862306a36Sopenharmony_ci 121962306a36Sopenharmony_ci start = stop = start0; 122062306a36Sopenharmony_ci last_perturbance = start.tv_sec; 122162306a36Sopenharmony_ci 122262306a36Sopenharmony_ci for (l = 0; l < g->p.nr_loops; l++) { 122362306a36Sopenharmony_ci start = stop; 122462306a36Sopenharmony_ci 122562306a36Sopenharmony_ci if (g->stop_work) 122662306a36Sopenharmony_ci break; 122762306a36Sopenharmony_ci 122862306a36Sopenharmony_ci val += do_work(global_data, g->p.bytes_global, process_nr, g->p.nr_proc, l, val); 122962306a36Sopenharmony_ci val += do_work(process_data, g->p.bytes_process, thread_nr, g->p.nr_threads, l, val); 123062306a36Sopenharmony_ci val += do_work(thread_data, g->p.bytes_thread, 0, 1, l, val); 123162306a36Sopenharmony_ci 123262306a36Sopenharmony_ci if (g->p.sleep_usecs) { 123362306a36Sopenharmony_ci mutex_lock(td->process_lock); 123462306a36Sopenharmony_ci usleep(g->p.sleep_usecs); 123562306a36Sopenharmony_ci mutex_unlock(td->process_lock); 123662306a36Sopenharmony_ci } 123762306a36Sopenharmony_ci /* 123862306a36Sopenharmony_ci * Amount of work to be done under a process-global lock: 123962306a36Sopenharmony_ci */ 124062306a36Sopenharmony_ci if (g->p.bytes_process_locked) { 124162306a36Sopenharmony_ci mutex_lock(td->process_lock); 124262306a36Sopenharmony_ci val += do_work(process_data, g->p.bytes_process_locked, thread_nr, g->p.nr_threads, l, val); 124362306a36Sopenharmony_ci mutex_unlock(td->process_lock); 124462306a36Sopenharmony_ci } 124562306a36Sopenharmony_ci 124662306a36Sopenharmony_ci work_done = g->p.bytes_global + g->p.bytes_process + 124762306a36Sopenharmony_ci g->p.bytes_process_locked + g->p.bytes_thread; 124862306a36Sopenharmony_ci 124962306a36Sopenharmony_ci update_curr_cpu(task_nr, work_done); 125062306a36Sopenharmony_ci bytes_done += work_done; 125162306a36Sopenharmony_ci 125262306a36Sopenharmony_ci if (details < 0 && !g->p.perturb_secs && !g->p.measure_convergence && !g->p.nr_secs) 125362306a36Sopenharmony_ci continue; 125462306a36Sopenharmony_ci 125562306a36Sopenharmony_ci td->loops_done = l; 125662306a36Sopenharmony_ci 125762306a36Sopenharmony_ci gettimeofday(&stop, NULL); 125862306a36Sopenharmony_ci 125962306a36Sopenharmony_ci /* Check whether our max runtime timed out: */ 126062306a36Sopenharmony_ci if (g->p.nr_secs) { 126162306a36Sopenharmony_ci timersub(&stop, &start0, &diff); 126262306a36Sopenharmony_ci if ((u32)diff.tv_sec >= g->p.nr_secs) { 126362306a36Sopenharmony_ci g->stop_work = true; 126462306a36Sopenharmony_ci break; 126562306a36Sopenharmony_ci } 126662306a36Sopenharmony_ci } 126762306a36Sopenharmony_ci 126862306a36Sopenharmony_ci /* Update the summary at most once per second: */ 126962306a36Sopenharmony_ci if (start.tv_sec == stop.tv_sec) 127062306a36Sopenharmony_ci continue; 127162306a36Sopenharmony_ci 127262306a36Sopenharmony_ci /* 127362306a36Sopenharmony_ci * Perturb the first task's equilibrium every g->p.perturb_secs seconds, 127462306a36Sopenharmony_ci * by migrating to CPU#0: 127562306a36Sopenharmony_ci */ 127662306a36Sopenharmony_ci if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) { 127762306a36Sopenharmony_ci cpu_set_t *orig_mask; 127862306a36Sopenharmony_ci int target_cpu; 127962306a36Sopenharmony_ci int this_cpu; 128062306a36Sopenharmony_ci 128162306a36Sopenharmony_ci last_perturbance = stop.tv_sec; 128262306a36Sopenharmony_ci 128362306a36Sopenharmony_ci /* 128462306a36Sopenharmony_ci * Depending on where we are running, move into 128562306a36Sopenharmony_ci * the other half of the system, to create some 128662306a36Sopenharmony_ci * real disturbance: 128762306a36Sopenharmony_ci */ 128862306a36Sopenharmony_ci this_cpu = g->threads[task_nr].curr_cpu; 128962306a36Sopenharmony_ci if (this_cpu < g->p.nr_cpus/2) 129062306a36Sopenharmony_ci target_cpu = g->p.nr_cpus-1; 129162306a36Sopenharmony_ci else 129262306a36Sopenharmony_ci target_cpu = 0; 129362306a36Sopenharmony_ci 129462306a36Sopenharmony_ci orig_mask = bind_to_cpu(target_cpu); 129562306a36Sopenharmony_ci 129662306a36Sopenharmony_ci /* Here we are running on the target CPU already */ 129762306a36Sopenharmony_ci if (details >= 1) 129862306a36Sopenharmony_ci printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu); 129962306a36Sopenharmony_ci 130062306a36Sopenharmony_ci bind_to_cpumask(orig_mask); 130162306a36Sopenharmony_ci CPU_FREE(orig_mask); 130262306a36Sopenharmony_ci } 130362306a36Sopenharmony_ci 130462306a36Sopenharmony_ci if (details >= 3) { 130562306a36Sopenharmony_ci timersub(&stop, &start, &diff); 130662306a36Sopenharmony_ci runtime_ns_max = diff.tv_sec * NSEC_PER_SEC; 130762306a36Sopenharmony_ci runtime_ns_max += diff.tv_usec * NSEC_PER_USEC; 130862306a36Sopenharmony_ci 130962306a36Sopenharmony_ci if (details >= 0) { 131062306a36Sopenharmony_ci printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016"PRIx64"]\n", 131162306a36Sopenharmony_ci process_nr, thread_nr, runtime_ns_max / bytes_done, val); 131262306a36Sopenharmony_ci } 131362306a36Sopenharmony_ci fflush(stdout); 131462306a36Sopenharmony_ci } 131562306a36Sopenharmony_ci if (!last_task) 131662306a36Sopenharmony_ci continue; 131762306a36Sopenharmony_ci 131862306a36Sopenharmony_ci timersub(&stop, &start0, &diff); 131962306a36Sopenharmony_ci runtime_ns_max = diff.tv_sec * NSEC_PER_SEC; 132062306a36Sopenharmony_ci runtime_ns_max += diff.tv_usec * NSEC_PER_USEC; 132162306a36Sopenharmony_ci 132262306a36Sopenharmony_ci show_summary(runtime_ns_max, l, &convergence); 132362306a36Sopenharmony_ci } 132462306a36Sopenharmony_ci 132562306a36Sopenharmony_ci gettimeofday(&stop, NULL); 132662306a36Sopenharmony_ci timersub(&stop, &start0, &diff); 132762306a36Sopenharmony_ci td->runtime_ns = diff.tv_sec * NSEC_PER_SEC; 132862306a36Sopenharmony_ci td->runtime_ns += diff.tv_usec * NSEC_PER_USEC; 132962306a36Sopenharmony_ci secs = td->runtime_ns / NSEC_PER_SEC; 133062306a36Sopenharmony_ci td->speed_gbs = secs ? bytes_done / secs / 1e9 : 0; 133162306a36Sopenharmony_ci 133262306a36Sopenharmony_ci getrusage(RUSAGE_THREAD, &rusage); 133362306a36Sopenharmony_ci td->system_time_ns = rusage.ru_stime.tv_sec * NSEC_PER_SEC; 133462306a36Sopenharmony_ci td->system_time_ns += rusage.ru_stime.tv_usec * NSEC_PER_USEC; 133562306a36Sopenharmony_ci td->user_time_ns = rusage.ru_utime.tv_sec * NSEC_PER_SEC; 133662306a36Sopenharmony_ci td->user_time_ns += rusage.ru_utime.tv_usec * NSEC_PER_USEC; 133762306a36Sopenharmony_ci 133862306a36Sopenharmony_ci free_data(thread_data, g->p.bytes_thread); 133962306a36Sopenharmony_ci 134062306a36Sopenharmony_ci mutex_lock(&g->stop_work_mutex); 134162306a36Sopenharmony_ci g->bytes_done += bytes_done; 134262306a36Sopenharmony_ci mutex_unlock(&g->stop_work_mutex); 134362306a36Sopenharmony_ci 134462306a36Sopenharmony_ci return NULL; 134562306a36Sopenharmony_ci} 134662306a36Sopenharmony_ci 134762306a36Sopenharmony_ci/* 134862306a36Sopenharmony_ci * A worker process starts a couple of threads: 134962306a36Sopenharmony_ci */ 135062306a36Sopenharmony_cistatic void worker_process(int process_nr) 135162306a36Sopenharmony_ci{ 135262306a36Sopenharmony_ci struct mutex process_lock; 135362306a36Sopenharmony_ci struct thread_data *td; 135462306a36Sopenharmony_ci pthread_t *pthreads; 135562306a36Sopenharmony_ci u8 *process_data; 135662306a36Sopenharmony_ci int task_nr; 135762306a36Sopenharmony_ci int ret; 135862306a36Sopenharmony_ci int t; 135962306a36Sopenharmony_ci 136062306a36Sopenharmony_ci mutex_init(&process_lock); 136162306a36Sopenharmony_ci set_taskname("process %d", process_nr); 136262306a36Sopenharmony_ci 136362306a36Sopenharmony_ci /* 136462306a36Sopenharmony_ci * Pick up the memory policy and the CPU binding of our first thread, 136562306a36Sopenharmony_ci * so that we initialize memory accordingly: 136662306a36Sopenharmony_ci */ 136762306a36Sopenharmony_ci task_nr = process_nr*g->p.nr_threads; 136862306a36Sopenharmony_ci td = g->threads + task_nr; 136962306a36Sopenharmony_ci 137062306a36Sopenharmony_ci bind_to_memnode(td->bind_node); 137162306a36Sopenharmony_ci bind_to_cpumask(td->bind_cpumask); 137262306a36Sopenharmony_ci 137362306a36Sopenharmony_ci pthreads = zalloc(g->p.nr_threads * sizeof(pthread_t)); 137462306a36Sopenharmony_ci process_data = setup_private_data(g->p.bytes_process); 137562306a36Sopenharmony_ci 137662306a36Sopenharmony_ci if (g->p.show_details >= 3) { 137762306a36Sopenharmony_ci printf(" # process %2d global mem: %p, process mem: %p\n", 137862306a36Sopenharmony_ci process_nr, g->data, process_data); 137962306a36Sopenharmony_ci } 138062306a36Sopenharmony_ci 138162306a36Sopenharmony_ci for (t = 0; t < g->p.nr_threads; t++) { 138262306a36Sopenharmony_ci task_nr = process_nr*g->p.nr_threads + t; 138362306a36Sopenharmony_ci td = g->threads + task_nr; 138462306a36Sopenharmony_ci 138562306a36Sopenharmony_ci td->process_data = process_data; 138662306a36Sopenharmony_ci td->process_nr = process_nr; 138762306a36Sopenharmony_ci td->thread_nr = t; 138862306a36Sopenharmony_ci td->task_nr = task_nr; 138962306a36Sopenharmony_ci td->val = rand(); 139062306a36Sopenharmony_ci td->curr_cpu = -1; 139162306a36Sopenharmony_ci td->process_lock = &process_lock; 139262306a36Sopenharmony_ci 139362306a36Sopenharmony_ci ret = pthread_create(pthreads + t, NULL, worker_thread, td); 139462306a36Sopenharmony_ci BUG_ON(ret); 139562306a36Sopenharmony_ci } 139662306a36Sopenharmony_ci 139762306a36Sopenharmony_ci for (t = 0; t < g->p.nr_threads; t++) { 139862306a36Sopenharmony_ci ret = pthread_join(pthreads[t], NULL); 139962306a36Sopenharmony_ci BUG_ON(ret); 140062306a36Sopenharmony_ci } 140162306a36Sopenharmony_ci 140262306a36Sopenharmony_ci free_data(process_data, g->p.bytes_process); 140362306a36Sopenharmony_ci free(pthreads); 140462306a36Sopenharmony_ci} 140562306a36Sopenharmony_ci 140662306a36Sopenharmony_cistatic void print_summary(void) 140762306a36Sopenharmony_ci{ 140862306a36Sopenharmony_ci if (g->p.show_details < 0) 140962306a36Sopenharmony_ci return; 141062306a36Sopenharmony_ci 141162306a36Sopenharmony_ci printf("\n ###\n"); 141262306a36Sopenharmony_ci printf(" # %d %s will execute (on %d nodes, %d CPUs):\n", 141362306a36Sopenharmony_ci g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", nr_numa_nodes(), g->p.nr_cpus); 141462306a36Sopenharmony_ci printf(" # %5dx %5ldMB global shared mem operations\n", 141562306a36Sopenharmony_ci g->p.nr_loops, g->p.bytes_global/1024/1024); 141662306a36Sopenharmony_ci printf(" # %5dx %5ldMB process shared mem operations\n", 141762306a36Sopenharmony_ci g->p.nr_loops, g->p.bytes_process/1024/1024); 141862306a36Sopenharmony_ci printf(" # %5dx %5ldMB thread local mem operations\n", 141962306a36Sopenharmony_ci g->p.nr_loops, g->p.bytes_thread/1024/1024); 142062306a36Sopenharmony_ci 142162306a36Sopenharmony_ci printf(" ###\n"); 142262306a36Sopenharmony_ci 142362306a36Sopenharmony_ci printf("\n ###\n"); fflush(stdout); 142462306a36Sopenharmony_ci} 142562306a36Sopenharmony_ci 142662306a36Sopenharmony_cistatic void init_thread_data(void) 142762306a36Sopenharmony_ci{ 142862306a36Sopenharmony_ci ssize_t size = sizeof(*g->threads)*g->p.nr_tasks; 142962306a36Sopenharmony_ci int t; 143062306a36Sopenharmony_ci 143162306a36Sopenharmony_ci g->threads = zalloc_shared_data(size); 143262306a36Sopenharmony_ci 143362306a36Sopenharmony_ci for (t = 0; t < g->p.nr_tasks; t++) { 143462306a36Sopenharmony_ci struct thread_data *td = g->threads + t; 143562306a36Sopenharmony_ci size_t cpuset_size = CPU_ALLOC_SIZE(g->p.nr_cpus); 143662306a36Sopenharmony_ci int cpu; 143762306a36Sopenharmony_ci 143862306a36Sopenharmony_ci /* Allow all nodes by default: */ 143962306a36Sopenharmony_ci td->bind_node = NUMA_NO_NODE; 144062306a36Sopenharmony_ci 144162306a36Sopenharmony_ci /* Allow all CPUs by default: */ 144262306a36Sopenharmony_ci td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus); 144362306a36Sopenharmony_ci BUG_ON(!td->bind_cpumask); 144462306a36Sopenharmony_ci CPU_ZERO_S(cpuset_size, td->bind_cpumask); 144562306a36Sopenharmony_ci for (cpu = 0; cpu < g->p.nr_cpus; cpu++) 144662306a36Sopenharmony_ci CPU_SET_S(cpu, cpuset_size, td->bind_cpumask); 144762306a36Sopenharmony_ci } 144862306a36Sopenharmony_ci} 144962306a36Sopenharmony_ci 145062306a36Sopenharmony_cistatic void deinit_thread_data(void) 145162306a36Sopenharmony_ci{ 145262306a36Sopenharmony_ci ssize_t size = sizeof(*g->threads)*g->p.nr_tasks; 145362306a36Sopenharmony_ci int t; 145462306a36Sopenharmony_ci 145562306a36Sopenharmony_ci /* Free the bind_cpumask allocated for thread_data */ 145662306a36Sopenharmony_ci for (t = 0; t < g->p.nr_tasks; t++) { 145762306a36Sopenharmony_ci struct thread_data *td = g->threads + t; 145862306a36Sopenharmony_ci CPU_FREE(td->bind_cpumask); 145962306a36Sopenharmony_ci } 146062306a36Sopenharmony_ci 146162306a36Sopenharmony_ci free_data(g->threads, size); 146262306a36Sopenharmony_ci} 146362306a36Sopenharmony_ci 146462306a36Sopenharmony_cistatic int init(void) 146562306a36Sopenharmony_ci{ 146662306a36Sopenharmony_ci g = (void *)alloc_data(sizeof(*g), MAP_SHARED, 1, 0, 0 /* THP */, 0); 146762306a36Sopenharmony_ci 146862306a36Sopenharmony_ci /* Copy over options: */ 146962306a36Sopenharmony_ci g->p = p0; 147062306a36Sopenharmony_ci 147162306a36Sopenharmony_ci g->p.nr_cpus = numa_num_configured_cpus(); 147262306a36Sopenharmony_ci 147362306a36Sopenharmony_ci g->p.nr_nodes = numa_max_node() + 1; 147462306a36Sopenharmony_ci 147562306a36Sopenharmony_ci /* char array in count_process_nodes(): */ 147662306a36Sopenharmony_ci BUG_ON(g->p.nr_nodes < 0); 147762306a36Sopenharmony_ci 147862306a36Sopenharmony_ci if (quiet && !g->p.show_details) 147962306a36Sopenharmony_ci g->p.show_details = -1; 148062306a36Sopenharmony_ci 148162306a36Sopenharmony_ci /* Some memory should be specified: */ 148262306a36Sopenharmony_ci if (!g->p.mb_global_str && !g->p.mb_proc_str && !g->p.mb_thread_str) 148362306a36Sopenharmony_ci return -1; 148462306a36Sopenharmony_ci 148562306a36Sopenharmony_ci if (g->p.mb_global_str) { 148662306a36Sopenharmony_ci g->p.mb_global = atof(g->p.mb_global_str); 148762306a36Sopenharmony_ci BUG_ON(g->p.mb_global < 0); 148862306a36Sopenharmony_ci } 148962306a36Sopenharmony_ci 149062306a36Sopenharmony_ci if (g->p.mb_proc_str) { 149162306a36Sopenharmony_ci g->p.mb_proc = atof(g->p.mb_proc_str); 149262306a36Sopenharmony_ci BUG_ON(g->p.mb_proc < 0); 149362306a36Sopenharmony_ci } 149462306a36Sopenharmony_ci 149562306a36Sopenharmony_ci if (g->p.mb_proc_locked_str) { 149662306a36Sopenharmony_ci g->p.mb_proc_locked = atof(g->p.mb_proc_locked_str); 149762306a36Sopenharmony_ci BUG_ON(g->p.mb_proc_locked < 0); 149862306a36Sopenharmony_ci BUG_ON(g->p.mb_proc_locked > g->p.mb_proc); 149962306a36Sopenharmony_ci } 150062306a36Sopenharmony_ci 150162306a36Sopenharmony_ci if (g->p.mb_thread_str) { 150262306a36Sopenharmony_ci g->p.mb_thread = atof(g->p.mb_thread_str); 150362306a36Sopenharmony_ci BUG_ON(g->p.mb_thread < 0); 150462306a36Sopenharmony_ci } 150562306a36Sopenharmony_ci 150662306a36Sopenharmony_ci BUG_ON(g->p.nr_threads <= 0); 150762306a36Sopenharmony_ci BUG_ON(g->p.nr_proc <= 0); 150862306a36Sopenharmony_ci 150962306a36Sopenharmony_ci g->p.nr_tasks = g->p.nr_proc*g->p.nr_threads; 151062306a36Sopenharmony_ci 151162306a36Sopenharmony_ci g->p.bytes_global = g->p.mb_global *1024L*1024L; 151262306a36Sopenharmony_ci g->p.bytes_process = g->p.mb_proc *1024L*1024L; 151362306a36Sopenharmony_ci g->p.bytes_process_locked = g->p.mb_proc_locked *1024L*1024L; 151462306a36Sopenharmony_ci g->p.bytes_thread = g->p.mb_thread *1024L*1024L; 151562306a36Sopenharmony_ci 151662306a36Sopenharmony_ci g->data = setup_shared_data(g->p.bytes_global); 151762306a36Sopenharmony_ci 151862306a36Sopenharmony_ci /* Startup serialization: */ 151962306a36Sopenharmony_ci mutex_init_pshared(&g->start_work_mutex); 152062306a36Sopenharmony_ci cond_init_pshared(&g->start_work_cond); 152162306a36Sopenharmony_ci mutex_init_pshared(&g->startup_mutex); 152262306a36Sopenharmony_ci cond_init_pshared(&g->startup_cond); 152362306a36Sopenharmony_ci mutex_init_pshared(&g->stop_work_mutex); 152462306a36Sopenharmony_ci 152562306a36Sopenharmony_ci init_thread_data(); 152662306a36Sopenharmony_ci 152762306a36Sopenharmony_ci tprintf("#\n"); 152862306a36Sopenharmony_ci if (parse_setup_cpu_list() || parse_setup_node_list()) 152962306a36Sopenharmony_ci return -1; 153062306a36Sopenharmony_ci tprintf("#\n"); 153162306a36Sopenharmony_ci 153262306a36Sopenharmony_ci print_summary(); 153362306a36Sopenharmony_ci 153462306a36Sopenharmony_ci return 0; 153562306a36Sopenharmony_ci} 153662306a36Sopenharmony_ci 153762306a36Sopenharmony_cistatic void deinit(void) 153862306a36Sopenharmony_ci{ 153962306a36Sopenharmony_ci free_data(g->data, g->p.bytes_global); 154062306a36Sopenharmony_ci g->data = NULL; 154162306a36Sopenharmony_ci 154262306a36Sopenharmony_ci deinit_thread_data(); 154362306a36Sopenharmony_ci 154462306a36Sopenharmony_ci free_data(g, sizeof(*g)); 154562306a36Sopenharmony_ci g = NULL; 154662306a36Sopenharmony_ci} 154762306a36Sopenharmony_ci 154862306a36Sopenharmony_ci/* 154962306a36Sopenharmony_ci * Print a short or long result, depending on the verbosity setting: 155062306a36Sopenharmony_ci */ 155162306a36Sopenharmony_cistatic void print_res(const char *name, double val, 155262306a36Sopenharmony_ci const char *txt_unit, const char *txt_short, const char *txt_long) 155362306a36Sopenharmony_ci{ 155462306a36Sopenharmony_ci if (!name) 155562306a36Sopenharmony_ci name = "main,"; 155662306a36Sopenharmony_ci 155762306a36Sopenharmony_ci if (!quiet) 155862306a36Sopenharmony_ci printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short); 155962306a36Sopenharmony_ci else 156062306a36Sopenharmony_ci printf(" %14.3f %s\n", val, txt_long); 156162306a36Sopenharmony_ci} 156262306a36Sopenharmony_ci 156362306a36Sopenharmony_cistatic int __bench_numa(const char *name) 156462306a36Sopenharmony_ci{ 156562306a36Sopenharmony_ci struct timeval start, stop, diff; 156662306a36Sopenharmony_ci u64 runtime_ns_min, runtime_ns_sum; 156762306a36Sopenharmony_ci pid_t *pids, pid, wpid; 156862306a36Sopenharmony_ci double delta_runtime; 156962306a36Sopenharmony_ci double runtime_avg; 157062306a36Sopenharmony_ci double runtime_sec_max; 157162306a36Sopenharmony_ci double runtime_sec_min; 157262306a36Sopenharmony_ci int wait_stat; 157362306a36Sopenharmony_ci double bytes; 157462306a36Sopenharmony_ci int i, t, p; 157562306a36Sopenharmony_ci 157662306a36Sopenharmony_ci if (init()) 157762306a36Sopenharmony_ci return -1; 157862306a36Sopenharmony_ci 157962306a36Sopenharmony_ci pids = zalloc(g->p.nr_proc * sizeof(*pids)); 158062306a36Sopenharmony_ci pid = -1; 158162306a36Sopenharmony_ci 158262306a36Sopenharmony_ci if (g->p.serialize_startup) { 158362306a36Sopenharmony_ci tprintf(" #\n"); 158462306a36Sopenharmony_ci tprintf(" # Startup synchronization: ..."); fflush(stdout); 158562306a36Sopenharmony_ci } 158662306a36Sopenharmony_ci 158762306a36Sopenharmony_ci gettimeofday(&start, NULL); 158862306a36Sopenharmony_ci 158962306a36Sopenharmony_ci for (i = 0; i < g->p.nr_proc; i++) { 159062306a36Sopenharmony_ci pid = fork(); 159162306a36Sopenharmony_ci dprintf(" # process %2d: PID %d\n", i, pid); 159262306a36Sopenharmony_ci 159362306a36Sopenharmony_ci BUG_ON(pid < 0); 159462306a36Sopenharmony_ci if (!pid) { 159562306a36Sopenharmony_ci /* Child process: */ 159662306a36Sopenharmony_ci worker_process(i); 159762306a36Sopenharmony_ci 159862306a36Sopenharmony_ci exit(0); 159962306a36Sopenharmony_ci } 160062306a36Sopenharmony_ci pids[i] = pid; 160162306a36Sopenharmony_ci 160262306a36Sopenharmony_ci } 160362306a36Sopenharmony_ci 160462306a36Sopenharmony_ci if (g->p.serialize_startup) { 160562306a36Sopenharmony_ci bool threads_ready = false; 160662306a36Sopenharmony_ci double startup_sec; 160762306a36Sopenharmony_ci 160862306a36Sopenharmony_ci /* 160962306a36Sopenharmony_ci * Wait for all the threads to start up. The last thread will 161062306a36Sopenharmony_ci * signal this process. 161162306a36Sopenharmony_ci */ 161262306a36Sopenharmony_ci mutex_lock(&g->startup_mutex); 161362306a36Sopenharmony_ci while (g->nr_tasks_started != g->p.nr_tasks) 161462306a36Sopenharmony_ci cond_wait(&g->startup_cond, &g->startup_mutex); 161562306a36Sopenharmony_ci 161662306a36Sopenharmony_ci mutex_unlock(&g->startup_mutex); 161762306a36Sopenharmony_ci 161862306a36Sopenharmony_ci /* Wait for all threads to be at the start_work_cond. */ 161962306a36Sopenharmony_ci while (!threads_ready) { 162062306a36Sopenharmony_ci mutex_lock(&g->start_work_mutex); 162162306a36Sopenharmony_ci threads_ready = (g->nr_tasks_working == g->p.nr_tasks); 162262306a36Sopenharmony_ci mutex_unlock(&g->start_work_mutex); 162362306a36Sopenharmony_ci if (!threads_ready) 162462306a36Sopenharmony_ci usleep(1); 162562306a36Sopenharmony_ci } 162662306a36Sopenharmony_ci 162762306a36Sopenharmony_ci gettimeofday(&stop, NULL); 162862306a36Sopenharmony_ci 162962306a36Sopenharmony_ci timersub(&stop, &start, &diff); 163062306a36Sopenharmony_ci 163162306a36Sopenharmony_ci startup_sec = diff.tv_sec * NSEC_PER_SEC; 163262306a36Sopenharmony_ci startup_sec += diff.tv_usec * NSEC_PER_USEC; 163362306a36Sopenharmony_ci startup_sec /= NSEC_PER_SEC; 163462306a36Sopenharmony_ci 163562306a36Sopenharmony_ci tprintf(" threads initialized in %.6f seconds.\n", startup_sec); 163662306a36Sopenharmony_ci tprintf(" #\n"); 163762306a36Sopenharmony_ci 163862306a36Sopenharmony_ci start = stop; 163962306a36Sopenharmony_ci /* Start all threads running. */ 164062306a36Sopenharmony_ci mutex_lock(&g->start_work_mutex); 164162306a36Sopenharmony_ci g->start_work = true; 164262306a36Sopenharmony_ci mutex_unlock(&g->start_work_mutex); 164362306a36Sopenharmony_ci cond_broadcast(&g->start_work_cond); 164462306a36Sopenharmony_ci } else { 164562306a36Sopenharmony_ci gettimeofday(&start, NULL); 164662306a36Sopenharmony_ci } 164762306a36Sopenharmony_ci 164862306a36Sopenharmony_ci /* Parent process: */ 164962306a36Sopenharmony_ci 165062306a36Sopenharmony_ci 165162306a36Sopenharmony_ci for (i = 0; i < g->p.nr_proc; i++) { 165262306a36Sopenharmony_ci wpid = waitpid(pids[i], &wait_stat, 0); 165362306a36Sopenharmony_ci BUG_ON(wpid < 0); 165462306a36Sopenharmony_ci BUG_ON(!WIFEXITED(wait_stat)); 165562306a36Sopenharmony_ci 165662306a36Sopenharmony_ci } 165762306a36Sopenharmony_ci 165862306a36Sopenharmony_ci runtime_ns_sum = 0; 165962306a36Sopenharmony_ci runtime_ns_min = -1LL; 166062306a36Sopenharmony_ci 166162306a36Sopenharmony_ci for (t = 0; t < g->p.nr_tasks; t++) { 166262306a36Sopenharmony_ci u64 thread_runtime_ns = g->threads[t].runtime_ns; 166362306a36Sopenharmony_ci 166462306a36Sopenharmony_ci runtime_ns_sum += thread_runtime_ns; 166562306a36Sopenharmony_ci runtime_ns_min = min(thread_runtime_ns, runtime_ns_min); 166662306a36Sopenharmony_ci } 166762306a36Sopenharmony_ci 166862306a36Sopenharmony_ci gettimeofday(&stop, NULL); 166962306a36Sopenharmony_ci timersub(&stop, &start, &diff); 167062306a36Sopenharmony_ci 167162306a36Sopenharmony_ci BUG_ON(bench_format != BENCH_FORMAT_DEFAULT); 167262306a36Sopenharmony_ci 167362306a36Sopenharmony_ci tprintf("\n ###\n"); 167462306a36Sopenharmony_ci tprintf("\n"); 167562306a36Sopenharmony_ci 167662306a36Sopenharmony_ci runtime_sec_max = diff.tv_sec * NSEC_PER_SEC; 167762306a36Sopenharmony_ci runtime_sec_max += diff.tv_usec * NSEC_PER_USEC; 167862306a36Sopenharmony_ci runtime_sec_max /= NSEC_PER_SEC; 167962306a36Sopenharmony_ci 168062306a36Sopenharmony_ci runtime_sec_min = runtime_ns_min / NSEC_PER_SEC; 168162306a36Sopenharmony_ci 168262306a36Sopenharmony_ci bytes = g->bytes_done; 168362306a36Sopenharmony_ci runtime_avg = (double)runtime_ns_sum / g->p.nr_tasks / NSEC_PER_SEC; 168462306a36Sopenharmony_ci 168562306a36Sopenharmony_ci if (g->p.measure_convergence) { 168662306a36Sopenharmony_ci print_res(name, runtime_sec_max, 168762306a36Sopenharmony_ci "secs,", "NUMA-convergence-latency", "secs latency to NUMA-converge"); 168862306a36Sopenharmony_ci } 168962306a36Sopenharmony_ci 169062306a36Sopenharmony_ci print_res(name, runtime_sec_max, 169162306a36Sopenharmony_ci "secs,", "runtime-max/thread", "secs slowest (max) thread-runtime"); 169262306a36Sopenharmony_ci 169362306a36Sopenharmony_ci print_res(name, runtime_sec_min, 169462306a36Sopenharmony_ci "secs,", "runtime-min/thread", "secs fastest (min) thread-runtime"); 169562306a36Sopenharmony_ci 169662306a36Sopenharmony_ci print_res(name, runtime_avg, 169762306a36Sopenharmony_ci "secs,", "runtime-avg/thread", "secs average thread-runtime"); 169862306a36Sopenharmony_ci 169962306a36Sopenharmony_ci delta_runtime = (runtime_sec_max - runtime_sec_min)/2.0; 170062306a36Sopenharmony_ci print_res(name, delta_runtime / runtime_sec_max * 100.0, 170162306a36Sopenharmony_ci "%,", "spread-runtime/thread", "% difference between max/avg runtime"); 170262306a36Sopenharmony_ci 170362306a36Sopenharmony_ci print_res(name, bytes / g->p.nr_tasks / 1e9, 170462306a36Sopenharmony_ci "GB,", "data/thread", "GB data processed, per thread"); 170562306a36Sopenharmony_ci 170662306a36Sopenharmony_ci print_res(name, bytes / 1e9, 170762306a36Sopenharmony_ci "GB,", "data-total", "GB data processed, total"); 170862306a36Sopenharmony_ci 170962306a36Sopenharmony_ci print_res(name, runtime_sec_max * NSEC_PER_SEC / (bytes / g->p.nr_tasks), 171062306a36Sopenharmony_ci "nsecs,", "runtime/byte/thread","nsecs/byte/thread runtime"); 171162306a36Sopenharmony_ci 171262306a36Sopenharmony_ci print_res(name, bytes / g->p.nr_tasks / 1e9 / runtime_sec_max, 171362306a36Sopenharmony_ci "GB/sec,", "thread-speed", "GB/sec/thread speed"); 171462306a36Sopenharmony_ci 171562306a36Sopenharmony_ci print_res(name, bytes / runtime_sec_max / 1e9, 171662306a36Sopenharmony_ci "GB/sec,", "total-speed", "GB/sec total speed"); 171762306a36Sopenharmony_ci 171862306a36Sopenharmony_ci if (g->p.show_details >= 2) { 171962306a36Sopenharmony_ci char tname[14 + 2 * 11 + 1]; 172062306a36Sopenharmony_ci struct thread_data *td; 172162306a36Sopenharmony_ci for (p = 0; p < g->p.nr_proc; p++) { 172262306a36Sopenharmony_ci for (t = 0; t < g->p.nr_threads; t++) { 172362306a36Sopenharmony_ci memset(tname, 0, sizeof(tname)); 172462306a36Sopenharmony_ci td = g->threads + p*g->p.nr_threads + t; 172562306a36Sopenharmony_ci snprintf(tname, sizeof(tname), "process%d:thread%d", p, t); 172662306a36Sopenharmony_ci print_res(tname, td->speed_gbs, 172762306a36Sopenharmony_ci "GB/sec", "thread-speed", "GB/sec/thread speed"); 172862306a36Sopenharmony_ci print_res(tname, td->system_time_ns / NSEC_PER_SEC, 172962306a36Sopenharmony_ci "secs", "thread-system-time", "system CPU time/thread"); 173062306a36Sopenharmony_ci print_res(tname, td->user_time_ns / NSEC_PER_SEC, 173162306a36Sopenharmony_ci "secs", "thread-user-time", "user CPU time/thread"); 173262306a36Sopenharmony_ci } 173362306a36Sopenharmony_ci } 173462306a36Sopenharmony_ci } 173562306a36Sopenharmony_ci 173662306a36Sopenharmony_ci free(pids); 173762306a36Sopenharmony_ci 173862306a36Sopenharmony_ci deinit(); 173962306a36Sopenharmony_ci 174062306a36Sopenharmony_ci return 0; 174162306a36Sopenharmony_ci} 174262306a36Sopenharmony_ci 174362306a36Sopenharmony_ci#define MAX_ARGS 50 174462306a36Sopenharmony_ci 174562306a36Sopenharmony_cistatic int command_size(const char **argv) 174662306a36Sopenharmony_ci{ 174762306a36Sopenharmony_ci int size = 0; 174862306a36Sopenharmony_ci 174962306a36Sopenharmony_ci while (*argv) { 175062306a36Sopenharmony_ci size++; 175162306a36Sopenharmony_ci argv++; 175262306a36Sopenharmony_ci } 175362306a36Sopenharmony_ci 175462306a36Sopenharmony_ci BUG_ON(size >= MAX_ARGS); 175562306a36Sopenharmony_ci 175662306a36Sopenharmony_ci return size; 175762306a36Sopenharmony_ci} 175862306a36Sopenharmony_ci 175962306a36Sopenharmony_cistatic void init_params(struct params *p, const char *name, int argc, const char **argv) 176062306a36Sopenharmony_ci{ 176162306a36Sopenharmony_ci int i; 176262306a36Sopenharmony_ci 176362306a36Sopenharmony_ci printf("\n # Running %s \"perf bench numa", name); 176462306a36Sopenharmony_ci 176562306a36Sopenharmony_ci for (i = 0; i < argc; i++) 176662306a36Sopenharmony_ci printf(" %s", argv[i]); 176762306a36Sopenharmony_ci 176862306a36Sopenharmony_ci printf("\"\n"); 176962306a36Sopenharmony_ci 177062306a36Sopenharmony_ci memset(p, 0, sizeof(*p)); 177162306a36Sopenharmony_ci 177262306a36Sopenharmony_ci /* Initialize nonzero defaults: */ 177362306a36Sopenharmony_ci 177462306a36Sopenharmony_ci p->serialize_startup = 1; 177562306a36Sopenharmony_ci p->data_reads = true; 177662306a36Sopenharmony_ci p->data_writes = true; 177762306a36Sopenharmony_ci p->data_backwards = true; 177862306a36Sopenharmony_ci p->data_rand_walk = true; 177962306a36Sopenharmony_ci p->nr_loops = -1; 178062306a36Sopenharmony_ci p->init_random = true; 178162306a36Sopenharmony_ci p->mb_global_str = "1"; 178262306a36Sopenharmony_ci p->nr_proc = 1; 178362306a36Sopenharmony_ci p->nr_threads = 1; 178462306a36Sopenharmony_ci p->nr_secs = 5; 178562306a36Sopenharmony_ci p->run_all = argc == 1; 178662306a36Sopenharmony_ci} 178762306a36Sopenharmony_ci 178862306a36Sopenharmony_cistatic int run_bench_numa(const char *name, const char **argv) 178962306a36Sopenharmony_ci{ 179062306a36Sopenharmony_ci int argc = command_size(argv); 179162306a36Sopenharmony_ci 179262306a36Sopenharmony_ci init_params(&p0, name, argc, argv); 179362306a36Sopenharmony_ci argc = parse_options(argc, argv, options, bench_numa_usage, 0); 179462306a36Sopenharmony_ci if (argc) 179562306a36Sopenharmony_ci goto err; 179662306a36Sopenharmony_ci 179762306a36Sopenharmony_ci if (__bench_numa(name)) 179862306a36Sopenharmony_ci goto err; 179962306a36Sopenharmony_ci 180062306a36Sopenharmony_ci return 0; 180162306a36Sopenharmony_ci 180262306a36Sopenharmony_cierr: 180362306a36Sopenharmony_ci return -1; 180462306a36Sopenharmony_ci} 180562306a36Sopenharmony_ci 180662306a36Sopenharmony_ci#define OPT_BW_RAM "-s", "20", "-zZq", "--thp", " 1", "--no-data_rand_walk" 180762306a36Sopenharmony_ci#define OPT_BW_RAM_NOTHP OPT_BW_RAM, "--thp", "-1" 180862306a36Sopenharmony_ci 180962306a36Sopenharmony_ci#define OPT_CONV "-s", "100", "-zZ0qcm", "--thp", " 1" 181062306a36Sopenharmony_ci#define OPT_CONV_NOTHP OPT_CONV, "--thp", "-1" 181162306a36Sopenharmony_ci 181262306a36Sopenharmony_ci#define OPT_BW "-s", "20", "-zZ0q", "--thp", " 1" 181362306a36Sopenharmony_ci#define OPT_BW_NOTHP OPT_BW, "--thp", "-1" 181462306a36Sopenharmony_ci 181562306a36Sopenharmony_ci/* 181662306a36Sopenharmony_ci * The built-in test-suite executed by "perf bench numa -a". 181762306a36Sopenharmony_ci * 181862306a36Sopenharmony_ci * (A minimum of 4 nodes and 16 GB of RAM is recommended.) 181962306a36Sopenharmony_ci */ 182062306a36Sopenharmony_cistatic const char *tests[][MAX_ARGS] = { 182162306a36Sopenharmony_ci /* Basic single-stream NUMA bandwidth measurements: */ 182262306a36Sopenharmony_ci { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024", 182362306a36Sopenharmony_ci "-C" , "0", "-M", "0", OPT_BW_RAM }, 182462306a36Sopenharmony_ci { "RAM-bw-local-NOTHP,", 182562306a36Sopenharmony_ci "mem", "-p", "1", "-t", "1", "-P", "1024", 182662306a36Sopenharmony_ci "-C" , "0", "-M", "0", OPT_BW_RAM_NOTHP }, 182762306a36Sopenharmony_ci { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024", 182862306a36Sopenharmony_ci "-C" , "0", "-M", "1", OPT_BW_RAM }, 182962306a36Sopenharmony_ci 183062306a36Sopenharmony_ci /* 2-stream NUMA bandwidth measurements: */ 183162306a36Sopenharmony_ci { "RAM-bw-local-2x,", "mem", "-p", "2", "-t", "1", "-P", "1024", 183262306a36Sopenharmony_ci "-C", "0,2", "-M", "0x2", OPT_BW_RAM }, 183362306a36Sopenharmony_ci { "RAM-bw-remote-2x,", "mem", "-p", "2", "-t", "1", "-P", "1024", 183462306a36Sopenharmony_ci "-C", "0,2", "-M", "1x2", OPT_BW_RAM }, 183562306a36Sopenharmony_ci 183662306a36Sopenharmony_ci /* Cross-stream NUMA bandwidth measurement: */ 183762306a36Sopenharmony_ci { "RAM-bw-cross,", "mem", "-p", "2", "-t", "1", "-P", "1024", 183862306a36Sopenharmony_ci "-C", "0,8", "-M", "1,0", OPT_BW_RAM }, 183962306a36Sopenharmony_ci 184062306a36Sopenharmony_ci /* Convergence latency measurements: */ 184162306a36Sopenharmony_ci { " 1x3-convergence,", "mem", "-p", "1", "-t", "3", "-P", "512", OPT_CONV }, 184262306a36Sopenharmony_ci { " 1x4-convergence,", "mem", "-p", "1", "-t", "4", "-P", "512", OPT_CONV }, 184362306a36Sopenharmony_ci { " 1x6-convergence,", "mem", "-p", "1", "-t", "6", "-P", "1020", OPT_CONV }, 184462306a36Sopenharmony_ci { " 2x3-convergence,", "mem", "-p", "2", "-t", "3", "-P", "1020", OPT_CONV }, 184562306a36Sopenharmony_ci { " 3x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV }, 184662306a36Sopenharmony_ci { " 4x4-convergence,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV }, 184762306a36Sopenharmony_ci { " 4x4-convergence-NOTHP,", 184862306a36Sopenharmony_ci "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV_NOTHP }, 184962306a36Sopenharmony_ci { " 4x6-convergence,", "mem", "-p", "4", "-t", "6", "-P", "1020", OPT_CONV }, 185062306a36Sopenharmony_ci { " 4x8-convergence,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_CONV }, 185162306a36Sopenharmony_ci { " 8x4-convergence,", "mem", "-p", "8", "-t", "4", "-P", "512", OPT_CONV }, 185262306a36Sopenharmony_ci { " 8x4-convergence-NOTHP,", 185362306a36Sopenharmony_ci "mem", "-p", "8", "-t", "4", "-P", "512", OPT_CONV_NOTHP }, 185462306a36Sopenharmony_ci { " 3x1-convergence,", "mem", "-p", "3", "-t", "1", "-P", "512", OPT_CONV }, 185562306a36Sopenharmony_ci { " 4x1-convergence,", "mem", "-p", "4", "-t", "1", "-P", "512", OPT_CONV }, 185662306a36Sopenharmony_ci { " 8x1-convergence,", "mem", "-p", "8", "-t", "1", "-P", "512", OPT_CONV }, 185762306a36Sopenharmony_ci { "16x1-convergence,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_CONV }, 185862306a36Sopenharmony_ci { "32x1-convergence,", "mem", "-p", "32", "-t", "1", "-P", "128", OPT_CONV }, 185962306a36Sopenharmony_ci 186062306a36Sopenharmony_ci /* Various NUMA process/thread layout bandwidth measurements: */ 186162306a36Sopenharmony_ci { " 2x1-bw-process,", "mem", "-p", "2", "-t", "1", "-P", "1024", OPT_BW }, 186262306a36Sopenharmony_ci { " 3x1-bw-process,", "mem", "-p", "3", "-t", "1", "-P", "1024", OPT_BW }, 186362306a36Sopenharmony_ci { " 4x1-bw-process,", "mem", "-p", "4", "-t", "1", "-P", "1024", OPT_BW }, 186462306a36Sopenharmony_ci { " 8x1-bw-process,", "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW }, 186562306a36Sopenharmony_ci { " 8x1-bw-process-NOTHP,", 186662306a36Sopenharmony_ci "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW_NOTHP }, 186762306a36Sopenharmony_ci { "16x1-bw-process,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_BW }, 186862306a36Sopenharmony_ci 186962306a36Sopenharmony_ci { " 1x4-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW }, 187062306a36Sopenharmony_ci { " 1x8-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW }, 187162306a36Sopenharmony_ci { "1x16-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW }, 187262306a36Sopenharmony_ci { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW }, 187362306a36Sopenharmony_ci 187462306a36Sopenharmony_ci { " 2x3-bw-process,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW }, 187562306a36Sopenharmony_ci { " 4x4-bw-process,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW }, 187662306a36Sopenharmony_ci { " 4x6-bw-process,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW }, 187762306a36Sopenharmony_ci { " 4x8-bw-process,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW }, 187862306a36Sopenharmony_ci { " 4x8-bw-process-NOTHP,", 187962306a36Sopenharmony_ci "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW_NOTHP }, 188062306a36Sopenharmony_ci { " 3x3-bw-process,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW }, 188162306a36Sopenharmony_ci { " 5x5-bw-process,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW }, 188262306a36Sopenharmony_ci 188362306a36Sopenharmony_ci { "2x16-bw-process,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW }, 188462306a36Sopenharmony_ci { "1x32-bw-process,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW }, 188562306a36Sopenharmony_ci 188662306a36Sopenharmony_ci { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW }, 188762306a36Sopenharmony_ci { "numa02-bw-NOTHP,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW_NOTHP }, 188862306a36Sopenharmony_ci { "numa01-bw-thread,", "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW }, 188962306a36Sopenharmony_ci { "numa01-bw-thread-NOTHP,", 189062306a36Sopenharmony_ci "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW_NOTHP }, 189162306a36Sopenharmony_ci}; 189262306a36Sopenharmony_ci 189362306a36Sopenharmony_cistatic int bench_all(void) 189462306a36Sopenharmony_ci{ 189562306a36Sopenharmony_ci int nr = ARRAY_SIZE(tests); 189662306a36Sopenharmony_ci int ret; 189762306a36Sopenharmony_ci int i; 189862306a36Sopenharmony_ci 189962306a36Sopenharmony_ci ret = system("echo ' #'; echo ' # Running test on: '$(uname -a); echo ' #'"); 190062306a36Sopenharmony_ci BUG_ON(ret < 0); 190162306a36Sopenharmony_ci 190262306a36Sopenharmony_ci for (i = 0; i < nr; i++) { 190362306a36Sopenharmony_ci run_bench_numa(tests[i][0], tests[i] + 1); 190462306a36Sopenharmony_ci } 190562306a36Sopenharmony_ci 190662306a36Sopenharmony_ci printf("\n"); 190762306a36Sopenharmony_ci 190862306a36Sopenharmony_ci return 0; 190962306a36Sopenharmony_ci} 191062306a36Sopenharmony_ci 191162306a36Sopenharmony_ciint bench_numa(int argc, const char **argv) 191262306a36Sopenharmony_ci{ 191362306a36Sopenharmony_ci init_params(&p0, "main,", argc, argv); 191462306a36Sopenharmony_ci argc = parse_options(argc, argv, options, bench_numa_usage, 0); 191562306a36Sopenharmony_ci if (argc) 191662306a36Sopenharmony_ci goto err; 191762306a36Sopenharmony_ci 191862306a36Sopenharmony_ci if (p0.run_all) 191962306a36Sopenharmony_ci return bench_all(); 192062306a36Sopenharmony_ci 192162306a36Sopenharmony_ci if (__bench_numa(NULL)) 192262306a36Sopenharmony_ci goto err; 192362306a36Sopenharmony_ci 192462306a36Sopenharmony_ci return 0; 192562306a36Sopenharmony_ci 192662306a36Sopenharmony_cierr: 192762306a36Sopenharmony_ci usage_with_options(numa_usage, options); 192862306a36Sopenharmony_ci return -1; 192962306a36Sopenharmony_ci} 1930