1f08c3bdfSopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 2f08c3bdfSopenharmony_ci/* 3f08c3bdfSopenharmony_ci * Copyright (c) International Business Machines Corp., 2007, 2008 4f08c3bdfSopenharmony_ci * 5f08c3bdfSopenharmony_ci * Authors: Darren Hart <dvhltc@us.ibm.com> 6f08c3bdfSopenharmony_ci * Dinakar Guniguntala <dino@in.ibm.com> 7f08c3bdfSopenharmony_ci */ 8f08c3bdfSopenharmony_ci/*\ 9f08c3bdfSopenharmony_ci * [Description] 10f08c3bdfSopenharmony_ci * 11f08c3bdfSopenharmony_ci * Compare running sequential matrix multiplication routines 12f08c3bdfSopenharmony_ci * to running them in parallel to judge multiprocessor 13f08c3bdfSopenharmony_ci * performance 14f08c3bdfSopenharmony_ci */ 15f08c3bdfSopenharmony_ci 16f08c3bdfSopenharmony_ci#include <stdio.h> 17f08c3bdfSopenharmony_ci#include <stdlib.h> 18f08c3bdfSopenharmony_ci#include <math.h> 19f08c3bdfSopenharmony_ci#include <librttest.h> 20f08c3bdfSopenharmony_ci#include <libstats.h> 21f08c3bdfSopenharmony_ci 22f08c3bdfSopenharmony_ci#define MAX_CPUS 8192 23f08c3bdfSopenharmony_ci#define PRIO 43 24f08c3bdfSopenharmony_ci#define MATRIX_SIZE 100 25f08c3bdfSopenharmony_ci#define DEF_OPS 8 /* the higher the number, the more CPU intensive */ 26f08c3bdfSopenharmony_ci /* (and therefore SMP performance goes up) */ 27f08c3bdfSopenharmony_ci#define PASS_CRITERIA 0.75 /* Avg concurrent time * pass criteria < avg seq time - */ 28f08c3bdfSopenharmony_ci /* for every addition of a cpu */ 29f08c3bdfSopenharmony_ci#define ITERATIONS 128 30f08c3bdfSopenharmony_ci#define HIST_BUCKETS 100 31f08c3bdfSopenharmony_ci 32f08c3bdfSopenharmony_ci#define THREAD_WAIT 1 33f08c3bdfSopenharmony_ci#define THREAD_WORK 2 34f08c3bdfSopenharmony_ci#define THREAD_DONE 3 35f08c3bdfSopenharmony_ci 36f08c3bdfSopenharmony_ci#define THREAD_SLEEP 1 * NS_PER_US 37f08c3bdfSopenharmony_ci 38f08c3bdfSopenharmony_cistatic int ops = DEF_OPS; 39f08c3bdfSopenharmony_cistatic int numcpus; 40f08c3bdfSopenharmony_cistatic float criteria; 41f08c3bdfSopenharmony_cistatic int *tids; 42f08c3bdfSopenharmony_cistatic int online_cpu_id = -1; 43f08c3bdfSopenharmony_cistatic int iterations = ITERATIONS; 44f08c3bdfSopenharmony_cistatic int iterations_percpu; 45f08c3bdfSopenharmony_ci 46f08c3bdfSopenharmony_cistats_container_t sdat, cdat, *curdat; 47f08c3bdfSopenharmony_cistats_container_t shist, chist; 48f08c3bdfSopenharmony_cistatic pthread_barrier_t mult_start; 49f08c3bdfSopenharmony_ci 50f08c3bdfSopenharmony_cistruct matrices { 51f08c3bdfSopenharmony_ci double A[MATRIX_SIZE][MATRIX_SIZE]; 52f08c3bdfSopenharmony_ci double B[MATRIX_SIZE][MATRIX_SIZE]; 53f08c3bdfSopenharmony_ci double C[MATRIX_SIZE][MATRIX_SIZE]; 54f08c3bdfSopenharmony_ci}; 55f08c3bdfSopenharmony_ci 56f08c3bdfSopenharmony_cistatic void usage(void) 57f08c3bdfSopenharmony_ci{ 58f08c3bdfSopenharmony_ci rt_help(); 59f08c3bdfSopenharmony_ci printf("matrix_mult specific options:\n"); 60f08c3bdfSopenharmony_ci printf 61f08c3bdfSopenharmony_ci (" -l# #: number of multiplications per iteration (load)\n"); 62f08c3bdfSopenharmony_ci printf(" -i# #: number of iterations\n"); 63f08c3bdfSopenharmony_ci} 64f08c3bdfSopenharmony_ci 65f08c3bdfSopenharmony_cistatic int parse_args(int c, char *v) 66f08c3bdfSopenharmony_ci{ 67f08c3bdfSopenharmony_ci int handled = 1; 68f08c3bdfSopenharmony_ci switch (c) { 69f08c3bdfSopenharmony_ci case 'i': 70f08c3bdfSopenharmony_ci iterations = atoi(v); 71f08c3bdfSopenharmony_ci break; 72f08c3bdfSopenharmony_ci case 'l': 73f08c3bdfSopenharmony_ci ops = atoi(v); 74f08c3bdfSopenharmony_ci break; 75f08c3bdfSopenharmony_ci case 'h': 76f08c3bdfSopenharmony_ci usage(); 77f08c3bdfSopenharmony_ci exit(0); 78f08c3bdfSopenharmony_ci default: 79f08c3bdfSopenharmony_ci handled = 0; 80f08c3bdfSopenharmony_ci break; 81f08c3bdfSopenharmony_ci } 82f08c3bdfSopenharmony_ci return handled; 83f08c3bdfSopenharmony_ci} 84f08c3bdfSopenharmony_ci 85f08c3bdfSopenharmony_cistatic void matrix_init(double A[MATRIX_SIZE][MATRIX_SIZE], 86f08c3bdfSopenharmony_ci double B[MATRIX_SIZE][MATRIX_SIZE]) 87f08c3bdfSopenharmony_ci{ 88f08c3bdfSopenharmony_ci int i, j; 89f08c3bdfSopenharmony_ci for (i = 0; i < MATRIX_SIZE; i++) { 90f08c3bdfSopenharmony_ci for (j = 0; j < MATRIX_SIZE; j++) { 91f08c3bdfSopenharmony_ci A[i][j] = (double)(i * j); 92f08c3bdfSopenharmony_ci B[i][j] = (double)((i * j) % 10); 93f08c3bdfSopenharmony_ci } 94f08c3bdfSopenharmony_ci } 95f08c3bdfSopenharmony_ci} 96f08c3bdfSopenharmony_ci 97f08c3bdfSopenharmony_cistatic void matrix_mult(struct matrices *matrices) 98f08c3bdfSopenharmony_ci{ 99f08c3bdfSopenharmony_ci int i, j, k; 100f08c3bdfSopenharmony_ci 101f08c3bdfSopenharmony_ci matrix_init(matrices->A, matrices->B); 102f08c3bdfSopenharmony_ci for (i = 0; i < MATRIX_SIZE; i++) { 103f08c3bdfSopenharmony_ci int i_m = MATRIX_SIZE - i - 1; 104f08c3bdfSopenharmony_ci for (j = 0; j < MATRIX_SIZE; j++) { 105f08c3bdfSopenharmony_ci double sum = matrices->A[i_m][j] * matrices->B[j][i]; 106f08c3bdfSopenharmony_ci for (k = 0; k < MATRIX_SIZE; k++) 107f08c3bdfSopenharmony_ci sum += matrices->A[i_m][k] * matrices->B[k][j]; 108f08c3bdfSopenharmony_ci matrices->C[i][j] = sum; 109f08c3bdfSopenharmony_ci } 110f08c3bdfSopenharmony_ci } 111f08c3bdfSopenharmony_ci} 112f08c3bdfSopenharmony_ci 113f08c3bdfSopenharmony_cistatic void matrix_mult_record(struct matrices *matrices, int index) 114f08c3bdfSopenharmony_ci{ 115f08c3bdfSopenharmony_ci nsec_t start, end, delta; 116f08c3bdfSopenharmony_ci int i; 117f08c3bdfSopenharmony_ci 118f08c3bdfSopenharmony_ci start = rt_gettime(); 119f08c3bdfSopenharmony_ci for (i = 0; i < ops; i++) 120f08c3bdfSopenharmony_ci matrix_mult(matrices); 121f08c3bdfSopenharmony_ci end = rt_gettime(); 122f08c3bdfSopenharmony_ci delta = (long)((end - start) / NS_PER_US); 123f08c3bdfSopenharmony_ci curdat->records[index].x = index; 124f08c3bdfSopenharmony_ci curdat->records[index].y = delta; 125f08c3bdfSopenharmony_ci} 126f08c3bdfSopenharmony_ci 127f08c3bdfSopenharmony_cistatic int set_affinity(void) 128f08c3bdfSopenharmony_ci{ 129f08c3bdfSopenharmony_ci static pthread_mutex_t mutex_cpu = PTHREAD_MUTEX_INITIALIZER; 130f08c3bdfSopenharmony_ci cpu_set_t mask; 131f08c3bdfSopenharmony_ci int cpuid; 132f08c3bdfSopenharmony_ci 133f08c3bdfSopenharmony_ci pthread_mutex_lock(&mutex_cpu); 134f08c3bdfSopenharmony_ci do { 135f08c3bdfSopenharmony_ci ++online_cpu_id; 136f08c3bdfSopenharmony_ci CPU_ZERO(&mask); 137f08c3bdfSopenharmony_ci CPU_SET(online_cpu_id, &mask); 138f08c3bdfSopenharmony_ci 139f08c3bdfSopenharmony_ci if (!sched_setaffinity(0, sizeof(mask), &mask)) { 140f08c3bdfSopenharmony_ci cpuid = online_cpu_id; /* Save this value before unlocking mutex */ 141f08c3bdfSopenharmony_ci pthread_mutex_unlock(&mutex_cpu); 142f08c3bdfSopenharmony_ci return cpuid; 143f08c3bdfSopenharmony_ci } 144f08c3bdfSopenharmony_ci } while (online_cpu_id < MAX_CPUS); 145f08c3bdfSopenharmony_ci pthread_mutex_unlock(&mutex_cpu); 146f08c3bdfSopenharmony_ci return -1; 147f08c3bdfSopenharmony_ci} 148f08c3bdfSopenharmony_ci 149f08c3bdfSopenharmony_cistatic void *concurrent_thread(void *thread) 150f08c3bdfSopenharmony_ci{ 151f08c3bdfSopenharmony_ci struct thread *t = (struct thread *)thread; 152f08c3bdfSopenharmony_ci struct matrices *matrices = (struct matrices *) t->arg; 153f08c3bdfSopenharmony_ci int thread_id = (intptr_t) t->id; 154f08c3bdfSopenharmony_ci int cpuid; 155f08c3bdfSopenharmony_ci int i; 156f08c3bdfSopenharmony_ci int index; 157f08c3bdfSopenharmony_ci 158f08c3bdfSopenharmony_ci cpuid = set_affinity(); 159f08c3bdfSopenharmony_ci if (cpuid == -1) { 160f08c3bdfSopenharmony_ci fprintf(stderr, "Thread %d: Can't set affinity.\n", thread_id); 161f08c3bdfSopenharmony_ci exit(1); 162f08c3bdfSopenharmony_ci } 163f08c3bdfSopenharmony_ci 164f08c3bdfSopenharmony_ci index = iterations_percpu * thread_id; /* To avoid stats overlapping */ 165f08c3bdfSopenharmony_ci pthread_barrier_wait(&mult_start); 166f08c3bdfSopenharmony_ci for (i = 0; i < iterations_percpu; i++) 167f08c3bdfSopenharmony_ci matrix_mult_record(matrices, index++); 168f08c3bdfSopenharmony_ci 169f08c3bdfSopenharmony_ci return NULL; 170f08c3bdfSopenharmony_ci} 171f08c3bdfSopenharmony_ci 172f08c3bdfSopenharmony_cistatic int main_thread(void) 173f08c3bdfSopenharmony_ci{ 174f08c3bdfSopenharmony_ci int ret, i, j; 175f08c3bdfSopenharmony_ci nsec_t start, end; 176f08c3bdfSopenharmony_ci long smin = 0, smax = 0, cmin = 0, cmax = 0, delta = 0; 177f08c3bdfSopenharmony_ci float savg, cavg; 178f08c3bdfSopenharmony_ci int cpuid; 179f08c3bdfSopenharmony_ci struct matrices *matrices[numcpus]; 180f08c3bdfSopenharmony_ci 181f08c3bdfSopenharmony_ci for (i = 0; i < numcpus; ++i) 182f08c3bdfSopenharmony_ci matrices[i] = malloc(sizeof(struct matrices)); 183f08c3bdfSopenharmony_ci 184f08c3bdfSopenharmony_ci if (stats_container_init(&sdat, iterations) || 185f08c3bdfSopenharmony_ci stats_container_init(&shist, HIST_BUCKETS) || 186f08c3bdfSopenharmony_ci stats_container_init(&cdat, iterations) || 187f08c3bdfSopenharmony_ci stats_container_init(&chist, HIST_BUCKETS) 188f08c3bdfSopenharmony_ci ) { 189f08c3bdfSopenharmony_ci fprintf(stderr, "Cannot init stats container\n"); 190f08c3bdfSopenharmony_ci exit(1); 191f08c3bdfSopenharmony_ci } 192f08c3bdfSopenharmony_ci 193f08c3bdfSopenharmony_ci tids = calloc(numcpus, sizeof(int)); 194f08c3bdfSopenharmony_ci if (!tids) { 195f08c3bdfSopenharmony_ci perror("malloc"); 196f08c3bdfSopenharmony_ci exit(1); 197f08c3bdfSopenharmony_ci } 198f08c3bdfSopenharmony_ci 199f08c3bdfSopenharmony_ci cpuid = set_affinity(); 200f08c3bdfSopenharmony_ci if (cpuid == -1) { 201f08c3bdfSopenharmony_ci fprintf(stderr, "Main thread: Can't set affinity.\n"); 202f08c3bdfSopenharmony_ci exit(1); 203f08c3bdfSopenharmony_ci } 204f08c3bdfSopenharmony_ci 205f08c3bdfSopenharmony_ci /* run matrix mult operation sequentially */ 206f08c3bdfSopenharmony_ci curdat = &sdat; 207f08c3bdfSopenharmony_ci curdat->index = iterations - 1; 208f08c3bdfSopenharmony_ci printf("\nRunning sequential operations\n"); 209f08c3bdfSopenharmony_ci start = rt_gettime(); 210f08c3bdfSopenharmony_ci for (i = 0; i < iterations; i++) 211f08c3bdfSopenharmony_ci matrix_mult_record(matrices[0], i); 212f08c3bdfSopenharmony_ci end = rt_gettime(); 213f08c3bdfSopenharmony_ci delta = (long)((end - start) / NS_PER_US); 214f08c3bdfSopenharmony_ci 215f08c3bdfSopenharmony_ci savg = delta / iterations; /* don't use the stats record, use the total time recorded */ 216f08c3bdfSopenharmony_ci smin = stats_min(&sdat); 217f08c3bdfSopenharmony_ci smax = stats_max(&sdat); 218f08c3bdfSopenharmony_ci 219f08c3bdfSopenharmony_ci printf("Min: %ld us\n", smin); 220f08c3bdfSopenharmony_ci printf("Max: %ld us\n", smax); 221f08c3bdfSopenharmony_ci printf("Avg: %.4f us\n", savg); 222f08c3bdfSopenharmony_ci printf("StdDev: %.4f us\n", stats_stddev(&sdat)); 223f08c3bdfSopenharmony_ci 224f08c3bdfSopenharmony_ci if (stats_hist(&shist, &sdat) || 225f08c3bdfSopenharmony_ci stats_container_save("sequential", 226f08c3bdfSopenharmony_ci "Matrix Multiplication Sequential Execution Runtime Scatter Plot", 227f08c3bdfSopenharmony_ci "Iteration", "Runtime (us)", &sdat, "points") 228f08c3bdfSopenharmony_ci || stats_container_save("sequential_hist", 229f08c3bdfSopenharmony_ci "Matrix Multiplicatoin Sequential Execution Runtime Histogram", 230f08c3bdfSopenharmony_ci "Runtime (us)", "Samples", &shist, "steps") 231f08c3bdfSopenharmony_ci ) { 232f08c3bdfSopenharmony_ci fprintf(stderr, 233f08c3bdfSopenharmony_ci "Warning: could not save sequential mults stats\n"); 234f08c3bdfSopenharmony_ci } 235f08c3bdfSopenharmony_ci 236f08c3bdfSopenharmony_ci pthread_barrier_init(&mult_start, NULL, numcpus + 1); 237f08c3bdfSopenharmony_ci set_priority(PRIO); 238f08c3bdfSopenharmony_ci curdat = &cdat; 239f08c3bdfSopenharmony_ci curdat->index = iterations - 1; 240f08c3bdfSopenharmony_ci online_cpu_id = -1; /* Redispatch cpus */ 241f08c3bdfSopenharmony_ci /* Create numcpus-1 concurrent threads */ 242f08c3bdfSopenharmony_ci for (j = 0; j < numcpus; j++) { 243f08c3bdfSopenharmony_ci tids[j] = create_fifo_thread(concurrent_thread, matrices[j], PRIO); 244f08c3bdfSopenharmony_ci if (tids[j] == -1) { 245f08c3bdfSopenharmony_ci printf 246f08c3bdfSopenharmony_ci ("Thread creation failed (max threads exceeded?)\n"); 247f08c3bdfSopenharmony_ci exit(1); 248f08c3bdfSopenharmony_ci } 249f08c3bdfSopenharmony_ci } 250f08c3bdfSopenharmony_ci 251f08c3bdfSopenharmony_ci /* run matrix mult operation concurrently */ 252f08c3bdfSopenharmony_ci printf("\nRunning concurrent operations\n"); 253f08c3bdfSopenharmony_ci pthread_barrier_wait(&mult_start); 254f08c3bdfSopenharmony_ci start = rt_gettime(); 255f08c3bdfSopenharmony_ci join_threads(); 256f08c3bdfSopenharmony_ci end = rt_gettime(); 257f08c3bdfSopenharmony_ci 258f08c3bdfSopenharmony_ci delta = (long)((end - start) / NS_PER_US); 259f08c3bdfSopenharmony_ci 260f08c3bdfSopenharmony_ci cavg = delta / iterations; /* don't use the stats record, use the total time recorded */ 261f08c3bdfSopenharmony_ci cmin = stats_min(&cdat); 262f08c3bdfSopenharmony_ci cmax = stats_max(&cdat); 263f08c3bdfSopenharmony_ci 264f08c3bdfSopenharmony_ci printf("Min: %ld us\n", cmin); 265f08c3bdfSopenharmony_ci printf("Max: %ld us\n", cmax); 266f08c3bdfSopenharmony_ci printf("Avg: %.4f us\n", cavg); 267f08c3bdfSopenharmony_ci printf("StdDev: %.4f us\n", stats_stddev(&cdat)); 268f08c3bdfSopenharmony_ci 269f08c3bdfSopenharmony_ci if (stats_hist(&chist, &cdat) || 270f08c3bdfSopenharmony_ci stats_container_save("concurrent", 271f08c3bdfSopenharmony_ci "Matrix Multiplication Concurrent Execution Runtime Scatter Plot", 272f08c3bdfSopenharmony_ci "Iteration", "Runtime (us)", &cdat, "points") 273f08c3bdfSopenharmony_ci || stats_container_save("concurrent_hist", 274f08c3bdfSopenharmony_ci "Matrix Multiplication Concurrent Execution Runtime Histogram", 275f08c3bdfSopenharmony_ci "Iteration", "Runtime (us)", &chist, 276f08c3bdfSopenharmony_ci "steps") 277f08c3bdfSopenharmony_ci ) { 278f08c3bdfSopenharmony_ci fprintf(stderr, 279f08c3bdfSopenharmony_ci "Warning: could not save concurrent mults stats\n"); 280f08c3bdfSopenharmony_ci } 281f08c3bdfSopenharmony_ci 282f08c3bdfSopenharmony_ci printf("\nConcurrent Multipliers:\n"); 283f08c3bdfSopenharmony_ci printf("Min: %.4f\n", (float)smin / cmin); 284f08c3bdfSopenharmony_ci printf("Max: %.4f\n", (float)smax / cmax); 285f08c3bdfSopenharmony_ci printf("Avg: %.4f\n", (float)savg / cavg); 286f08c3bdfSopenharmony_ci 287f08c3bdfSopenharmony_ci ret = 1; 288f08c3bdfSopenharmony_ci if (savg > (cavg * criteria)) 289f08c3bdfSopenharmony_ci ret = 0; 290f08c3bdfSopenharmony_ci printf 291f08c3bdfSopenharmony_ci ("\nCriteria: %.2f * average concurrent time < average sequential time\n", 292f08c3bdfSopenharmony_ci criteria); 293f08c3bdfSopenharmony_ci printf("Result: %s\n", ret ? "FAIL" : "PASS"); 294f08c3bdfSopenharmony_ci 295f08c3bdfSopenharmony_ci for (i = 0; i < numcpus; i++) 296f08c3bdfSopenharmony_ci free(matrices[i]); 297f08c3bdfSopenharmony_ci 298f08c3bdfSopenharmony_ci return ret; 299f08c3bdfSopenharmony_ci} 300f08c3bdfSopenharmony_ci 301f08c3bdfSopenharmony_ciint main(int argc, char *argv[]) 302f08c3bdfSopenharmony_ci{ 303f08c3bdfSopenharmony_ci setup(); 304f08c3bdfSopenharmony_ci pass_criteria = PASS_CRITERIA; 305f08c3bdfSopenharmony_ci rt_init("l:i:h", parse_args, argc, argv); 306f08c3bdfSopenharmony_ci numcpus = sysconf(_SC_NPROCESSORS_ONLN); 307f08c3bdfSopenharmony_ci /* the minimum avg concurrent multiplier to pass */ 308f08c3bdfSopenharmony_ci criteria = pass_criteria * numcpus; 309f08c3bdfSopenharmony_ci int new_iterations, ret; 310f08c3bdfSopenharmony_ci 311f08c3bdfSopenharmony_ci if (iterations <= 0) { 312f08c3bdfSopenharmony_ci fprintf(stderr, "iterations must be greater than zero\n"); 313f08c3bdfSopenharmony_ci exit(1); 314f08c3bdfSopenharmony_ci } 315f08c3bdfSopenharmony_ci 316f08c3bdfSopenharmony_ci printf("\n---------------------------------------\n"); 317f08c3bdfSopenharmony_ci printf("Matrix Multiplication (SMP Performance)\n"); 318f08c3bdfSopenharmony_ci printf("---------------------------------------\n\n"); 319f08c3bdfSopenharmony_ci 320f08c3bdfSopenharmony_ci /* Line below rounds up iterations to a multiple of numcpus. 321f08c3bdfSopenharmony_ci * Without this, having iterations not a mutiple of numcpus causes 322f08c3bdfSopenharmony_ci * stats to segfault (overflow stats array). 323f08c3bdfSopenharmony_ci */ 324f08c3bdfSopenharmony_ci new_iterations = (int)((iterations + numcpus - 1) / numcpus) * numcpus; 325f08c3bdfSopenharmony_ci if (new_iterations != iterations) 326f08c3bdfSopenharmony_ci printf 327f08c3bdfSopenharmony_ci ("Rounding up iterations value to nearest multiple of total online CPUs\n"); 328f08c3bdfSopenharmony_ci 329f08c3bdfSopenharmony_ci iterations = new_iterations; 330f08c3bdfSopenharmony_ci iterations_percpu = iterations / numcpus; 331f08c3bdfSopenharmony_ci 332f08c3bdfSopenharmony_ci printf("Running %d iterations\n", iterations); 333f08c3bdfSopenharmony_ci printf("Matrix Dimensions: %dx%d\n", MATRIX_SIZE, MATRIX_SIZE); 334f08c3bdfSopenharmony_ci printf("Calculations per iteration: %d\n", ops); 335f08c3bdfSopenharmony_ci printf("Number of CPUs: %u\n", numcpus); 336f08c3bdfSopenharmony_ci 337f08c3bdfSopenharmony_ci set_priority(PRIO); 338f08c3bdfSopenharmony_ci ret = main_thread(); 339f08c3bdfSopenharmony_ci 340f08c3bdfSopenharmony_ci return ret; 341f08c3bdfSopenharmony_ci} 342