1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * Copyright (c) International Business Machines Corp., 2007, 2008 4 * 5 * Authors: Darren Hart <dvhltc@us.ibm.com> 6 * Dinakar Guniguntala <dino@in.ibm.com> 7 */ 8/*\ 9 * [Description] 10 * 11 * Compare running sequential matrix multiplication routines 12 * to running them in parallel to judge multiprocessor 13 * performance 14 */ 15 16#include <stdio.h> 17#include <stdlib.h> 18#include <math.h> 19#include <librttest.h> 20#include <libstats.h> 21 22#define MAX_CPUS 8192 23#define PRIO 43 24#define MATRIX_SIZE 100 25#define DEF_OPS 8 /* the higher the number, the more CPU intensive */ 26 /* (and therefore SMP performance goes up) */ 27#define PASS_CRITERIA 0.75 /* Avg concurrent time * pass criteria < avg seq time - */ 28 /* for every addition of a cpu */ 29#define ITERATIONS 128 30#define HIST_BUCKETS 100 31 32#define THREAD_WAIT 1 33#define THREAD_WORK 2 34#define THREAD_DONE 3 35 36#define THREAD_SLEEP 1 * NS_PER_US 37 38static int ops = DEF_OPS; 39static int numcpus; 40static float criteria; 41static int *tids; 42static int online_cpu_id = -1; 43static int iterations = ITERATIONS; 44static int iterations_percpu; 45 46stats_container_t sdat, cdat, *curdat; 47stats_container_t shist, chist; 48static pthread_barrier_t mult_start; 49 50struct matrices { 51 double A[MATRIX_SIZE][MATRIX_SIZE]; 52 double B[MATRIX_SIZE][MATRIX_SIZE]; 53 double C[MATRIX_SIZE][MATRIX_SIZE]; 54}; 55 56static void usage(void) 57{ 58 rt_help(); 59 printf("matrix_mult specific options:\n"); 60 printf 61 (" -l# #: number of multiplications per iteration (load)\n"); 62 printf(" -i# #: number of iterations\n"); 63} 64 65static int parse_args(int c, char *v) 66{ 67 int handled = 1; 68 switch (c) { 69 case 'i': 70 iterations = atoi(v); 71 break; 72 case 'l': 73 ops = atoi(v); 74 break; 75 case 'h': 76 usage(); 77 exit(0); 78 default: 79 handled = 0; 80 break; 81 } 82 return handled; 83} 84 85static void matrix_init(double A[MATRIX_SIZE][MATRIX_SIZE], 86 double B[MATRIX_SIZE][MATRIX_SIZE]) 87{ 88 int i, j; 89 for (i = 0; i < MATRIX_SIZE; i++) { 90 for (j = 0; j < MATRIX_SIZE; j++) { 91 A[i][j] = (double)(i * j); 92 B[i][j] = (double)((i * j) % 10); 93 } 94 } 95} 96 97static void matrix_mult(struct matrices *matrices) 98{ 99 int i, j, k; 100 101 matrix_init(matrices->A, matrices->B); 102 for (i = 0; i < MATRIX_SIZE; i++) { 103 int i_m = MATRIX_SIZE - i - 1; 104 for (j = 0; j < MATRIX_SIZE; j++) { 105 double sum = matrices->A[i_m][j] * matrices->B[j][i]; 106 for (k = 0; k < MATRIX_SIZE; k++) 107 sum += matrices->A[i_m][k] * matrices->B[k][j]; 108 matrices->C[i][j] = sum; 109 } 110 } 111} 112 113static void matrix_mult_record(struct matrices *matrices, int index) 114{ 115 nsec_t start, end, delta; 116 int i; 117 118 start = rt_gettime(); 119 for (i = 0; i < ops; i++) 120 matrix_mult(matrices); 121 end = rt_gettime(); 122 delta = (long)((end - start) / NS_PER_US); 123 curdat->records[index].x = index; 124 curdat->records[index].y = delta; 125} 126 127static int set_affinity(void) 128{ 129 static pthread_mutex_t mutex_cpu = PTHREAD_MUTEX_INITIALIZER; 130 cpu_set_t mask; 131 int cpuid; 132 133 pthread_mutex_lock(&mutex_cpu); 134 do { 135 ++online_cpu_id; 136 CPU_ZERO(&mask); 137 CPU_SET(online_cpu_id, &mask); 138 139 if (!sched_setaffinity(0, sizeof(mask), &mask)) { 140 cpuid = online_cpu_id; /* Save this value before unlocking mutex */ 141 pthread_mutex_unlock(&mutex_cpu); 142 return cpuid; 143 } 144 } while (online_cpu_id < MAX_CPUS); 145 pthread_mutex_unlock(&mutex_cpu); 146 return -1; 147} 148 149static void *concurrent_thread(void *thread) 150{ 151 struct thread *t = (struct thread *)thread; 152 struct matrices *matrices = (struct matrices *) t->arg; 153 int thread_id = (intptr_t) t->id; 154 int cpuid; 155 int i; 156 int index; 157 158 cpuid = set_affinity(); 159 if (cpuid == -1) { 160 fprintf(stderr, "Thread %d: Can't set affinity.\n", thread_id); 161 exit(1); 162 } 163 164 index = iterations_percpu * thread_id; /* To avoid stats overlapping */ 165 pthread_barrier_wait(&mult_start); 166 for (i = 0; i < iterations_percpu; i++) 167 matrix_mult_record(matrices, index++); 168 169 return NULL; 170} 171 172static int main_thread(void) 173{ 174 int ret, i, j; 175 nsec_t start, end; 176 long smin = 0, smax = 0, cmin = 0, cmax = 0, delta = 0; 177 float savg, cavg; 178 int cpuid; 179 struct matrices *matrices[numcpus]; 180 181 for (i = 0; i < numcpus; ++i) 182 matrices[i] = malloc(sizeof(struct matrices)); 183 184 if (stats_container_init(&sdat, iterations) || 185 stats_container_init(&shist, HIST_BUCKETS) || 186 stats_container_init(&cdat, iterations) || 187 stats_container_init(&chist, HIST_BUCKETS) 188 ) { 189 fprintf(stderr, "Cannot init stats container\n"); 190 exit(1); 191 } 192 193 tids = calloc(numcpus, sizeof(int)); 194 if (!tids) { 195 perror("malloc"); 196 exit(1); 197 } 198 199 cpuid = set_affinity(); 200 if (cpuid == -1) { 201 fprintf(stderr, "Main thread: Can't set affinity.\n"); 202 exit(1); 203 } 204 205 /* run matrix mult operation sequentially */ 206 curdat = &sdat; 207 curdat->index = iterations - 1; 208 printf("\nRunning sequential operations\n"); 209 start = rt_gettime(); 210 for (i = 0; i < iterations; i++) 211 matrix_mult_record(matrices[0], i); 212 end = rt_gettime(); 213 delta = (long)((end - start) / NS_PER_US); 214 215 savg = delta / iterations; /* don't use the stats record, use the total time recorded */ 216 smin = stats_min(&sdat); 217 smax = stats_max(&sdat); 218 219 printf("Min: %ld us\n", smin); 220 printf("Max: %ld us\n", smax); 221 printf("Avg: %.4f us\n", savg); 222 printf("StdDev: %.4f us\n", stats_stddev(&sdat)); 223 224 if (stats_hist(&shist, &sdat) || 225 stats_container_save("sequential", 226 "Matrix Multiplication Sequential Execution Runtime Scatter Plot", 227 "Iteration", "Runtime (us)", &sdat, "points") 228 || stats_container_save("sequential_hist", 229 "Matrix Multiplicatoin Sequential Execution Runtime Histogram", 230 "Runtime (us)", "Samples", &shist, "steps") 231 ) { 232 fprintf(stderr, 233 "Warning: could not save sequential mults stats\n"); 234 } 235 236 pthread_barrier_init(&mult_start, NULL, numcpus + 1); 237 set_priority(PRIO); 238 curdat = &cdat; 239 curdat->index = iterations - 1; 240 online_cpu_id = -1; /* Redispatch cpus */ 241 /* Create numcpus-1 concurrent threads */ 242 for (j = 0; j < numcpus; j++) { 243 tids[j] = create_fifo_thread(concurrent_thread, matrices[j], PRIO); 244 if (tids[j] == -1) { 245 printf 246 ("Thread creation failed (max threads exceeded?)\n"); 247 exit(1); 248 } 249 } 250 251 /* run matrix mult operation concurrently */ 252 printf("\nRunning concurrent operations\n"); 253 pthread_barrier_wait(&mult_start); 254 start = rt_gettime(); 255 join_threads(); 256 end = rt_gettime(); 257 258 delta = (long)((end - start) / NS_PER_US); 259 260 cavg = delta / iterations; /* don't use the stats record, use the total time recorded */ 261 cmin = stats_min(&cdat); 262 cmax = stats_max(&cdat); 263 264 printf("Min: %ld us\n", cmin); 265 printf("Max: %ld us\n", cmax); 266 printf("Avg: %.4f us\n", cavg); 267 printf("StdDev: %.4f us\n", stats_stddev(&cdat)); 268 269 if (stats_hist(&chist, &cdat) || 270 stats_container_save("concurrent", 271 "Matrix Multiplication Concurrent Execution Runtime Scatter Plot", 272 "Iteration", "Runtime (us)", &cdat, "points") 273 || stats_container_save("concurrent_hist", 274 "Matrix Multiplication Concurrent Execution Runtime Histogram", 275 "Iteration", "Runtime (us)", &chist, 276 "steps") 277 ) { 278 fprintf(stderr, 279 "Warning: could not save concurrent mults stats\n"); 280 } 281 282 printf("\nConcurrent Multipliers:\n"); 283 printf("Min: %.4f\n", (float)smin / cmin); 284 printf("Max: %.4f\n", (float)smax / cmax); 285 printf("Avg: %.4f\n", (float)savg / cavg); 286 287 ret = 1; 288 if (savg > (cavg * criteria)) 289 ret = 0; 290 printf 291 ("\nCriteria: %.2f * average concurrent time < average sequential time\n", 292 criteria); 293 printf("Result: %s\n", ret ? "FAIL" : "PASS"); 294 295 for (i = 0; i < numcpus; i++) 296 free(matrices[i]); 297 298 return ret; 299} 300 301int main(int argc, char *argv[]) 302{ 303 setup(); 304 pass_criteria = PASS_CRITERIA; 305 rt_init("l:i:h", parse_args, argc, argv); 306 numcpus = sysconf(_SC_NPROCESSORS_ONLN); 307 /* the minimum avg concurrent multiplier to pass */ 308 criteria = pass_criteria * numcpus; 309 int new_iterations, ret; 310 311 if (iterations <= 0) { 312 fprintf(stderr, "iterations must be greater than zero\n"); 313 exit(1); 314 } 315 316 printf("\n---------------------------------------\n"); 317 printf("Matrix Multiplication (SMP Performance)\n"); 318 printf("---------------------------------------\n\n"); 319 320 /* Line below rounds up iterations to a multiple of numcpus. 321 * Without this, having iterations not a mutiple of numcpus causes 322 * stats to segfault (overflow stats array). 323 */ 324 new_iterations = (int)((iterations + numcpus - 1) / numcpus) * numcpus; 325 if (new_iterations != iterations) 326 printf 327 ("Rounding up iterations value to nearest multiple of total online CPUs\n"); 328 329 iterations = new_iterations; 330 iterations_percpu = iterations / numcpus; 331 332 printf("Running %d iterations\n", iterations); 333 printf("Matrix Dimensions: %dx%d\n", MATRIX_SIZE, MATRIX_SIZE); 334 printf("Calculations per iteration: %d\n", ops); 335 printf("Number of CPUs: %u\n", numcpus); 336 337 set_priority(PRIO); 338 ret = main_thread(); 339 340 return ret; 341} 342