1// SPDX-License-Identifier: LGPL-2.1 2#define _GNU_SOURCE 3#include <assert.h> 4#include <linux/membarrier.h> 5#include <pthread.h> 6#include <sched.h> 7#include <stdatomic.h> 8#include <stdint.h> 9#include <stdio.h> 10#include <stdlib.h> 11#include <string.h> 12#include <syscall.h> 13#include <unistd.h> 14#include <poll.h> 15#include <sys/types.h> 16#include <signal.h> 17#include <errno.h> 18#include <stddef.h> 19 20static inline pid_t rseq_gettid(void) 21{ 22 return syscall(__NR_gettid); 23} 24 25#define NR_INJECT 9 26static int loop_cnt[NR_INJECT + 1]; 27 28static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used)); 29static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used)); 30static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used)); 31static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used)); 32static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used)); 33static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used)); 34 35static int opt_modulo, verbose; 36 37static int opt_yield, opt_signal, opt_sleep, 38 opt_disable_rseq, opt_threads = 200, 39 opt_disable_mod = 0, opt_test = 's', opt_mb = 0; 40 41#ifndef RSEQ_SKIP_FASTPATH 42static long long opt_reps = 5000; 43#else 44static long long opt_reps = 100; 45#endif 46 47static __thread __attribute__((tls_model("initial-exec"))) 48unsigned int signals_delivered; 49 50#ifndef BENCHMARK 51 52static __thread __attribute__((tls_model("initial-exec"), unused)) 53unsigned int yield_mod_cnt, nr_abort; 54 55#define printf_verbose(fmt, ...) \ 56 do { \ 57 if (verbose) \ 58 printf(fmt, ## __VA_ARGS__); \ 59 } while (0) 60 61#ifdef __i386__ 62 63#define INJECT_ASM_REG "eax" 64 65#define RSEQ_INJECT_CLOBBER \ 66 , INJECT_ASM_REG 67 68#define RSEQ_INJECT_ASM(n) \ 69 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \ 70 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ 71 "jz 333f\n\t" \ 72 "222:\n\t" \ 73 "dec %%" INJECT_ASM_REG "\n\t" \ 74 "jnz 222b\n\t" \ 75 "333:\n\t" 76 77#elif defined(__x86_64__) 78 79#define INJECT_ASM_REG_P "rax" 80#define INJECT_ASM_REG "eax" 81 82#define RSEQ_INJECT_CLOBBER \ 83 , INJECT_ASM_REG_P \ 84 , INJECT_ASM_REG 85 86#define RSEQ_INJECT_ASM(n) \ 87 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \ 88 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \ 89 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ 90 "jz 333f\n\t" \ 91 "222:\n\t" \ 92 "dec %%" INJECT_ASM_REG "\n\t" \ 93 "jnz 222b\n\t" \ 94 "333:\n\t" 95 96#elif defined(__s390__) 97 98#define RSEQ_INJECT_INPUT \ 99 , [loop_cnt_1]"m"(loop_cnt[1]) \ 100 , [loop_cnt_2]"m"(loop_cnt[2]) \ 101 , [loop_cnt_3]"m"(loop_cnt[3]) \ 102 , [loop_cnt_4]"m"(loop_cnt[4]) \ 103 , [loop_cnt_5]"m"(loop_cnt[5]) \ 104 , [loop_cnt_6]"m"(loop_cnt[6]) 105 106#define INJECT_ASM_REG "r12" 107 108#define RSEQ_INJECT_CLOBBER \ 109 , INJECT_ASM_REG 110 111#define RSEQ_INJECT_ASM(n) \ 112 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 113 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \ 114 "je 333f\n\t" \ 115 "222:\n\t" \ 116 "ahi %%" INJECT_ASM_REG ", -1\n\t" \ 117 "jnz 222b\n\t" \ 118 "333:\n\t" 119 120#elif defined(__ARMEL__) 121 122#define RSEQ_INJECT_INPUT \ 123 , [loop_cnt_1]"m"(loop_cnt[1]) \ 124 , [loop_cnt_2]"m"(loop_cnt[2]) \ 125 , [loop_cnt_3]"m"(loop_cnt[3]) \ 126 , [loop_cnt_4]"m"(loop_cnt[4]) \ 127 , [loop_cnt_5]"m"(loop_cnt[5]) \ 128 , [loop_cnt_6]"m"(loop_cnt[6]) 129 130#define INJECT_ASM_REG "r4" 131 132#define RSEQ_INJECT_CLOBBER \ 133 , INJECT_ASM_REG 134 135#define RSEQ_INJECT_ASM(n) \ 136 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 137 "cmp " INJECT_ASM_REG ", #0\n\t" \ 138 "beq 333f\n\t" \ 139 "222:\n\t" \ 140 "subs " INJECT_ASM_REG ", #1\n\t" \ 141 "bne 222b\n\t" \ 142 "333:\n\t" 143 144#elif defined(__AARCH64EL__) 145 146#define RSEQ_INJECT_INPUT \ 147 , [loop_cnt_1] "Qo" (loop_cnt[1]) \ 148 , [loop_cnt_2] "Qo" (loop_cnt[2]) \ 149 , [loop_cnt_3] "Qo" (loop_cnt[3]) \ 150 , [loop_cnt_4] "Qo" (loop_cnt[4]) \ 151 , [loop_cnt_5] "Qo" (loop_cnt[5]) \ 152 , [loop_cnt_6] "Qo" (loop_cnt[6]) 153 154#define INJECT_ASM_REG RSEQ_ASM_TMP_REG32 155 156#define RSEQ_INJECT_ASM(n) \ 157 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \ 158 " cbz " INJECT_ASM_REG ", 333f\n" \ 159 "222:\n" \ 160 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \ 161 " cbnz " INJECT_ASM_REG ", 222b\n" \ 162 "333:\n" 163 164#elif defined(__PPC__) 165 166#define RSEQ_INJECT_INPUT \ 167 , [loop_cnt_1]"m"(loop_cnt[1]) \ 168 , [loop_cnt_2]"m"(loop_cnt[2]) \ 169 , [loop_cnt_3]"m"(loop_cnt[3]) \ 170 , [loop_cnt_4]"m"(loop_cnt[4]) \ 171 , [loop_cnt_5]"m"(loop_cnt[5]) \ 172 , [loop_cnt_6]"m"(loop_cnt[6]) 173 174#define INJECT_ASM_REG "r18" 175 176#define RSEQ_INJECT_CLOBBER \ 177 , INJECT_ASM_REG 178 179#define RSEQ_INJECT_ASM(n) \ 180 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 181 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \ 182 "beq 333f\n\t" \ 183 "222:\n\t" \ 184 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \ 185 "bne 222b\n\t" \ 186 "333:\n\t" 187 188#elif defined(__mips__) 189 190#define RSEQ_INJECT_INPUT \ 191 , [loop_cnt_1]"m"(loop_cnt[1]) \ 192 , [loop_cnt_2]"m"(loop_cnt[2]) \ 193 , [loop_cnt_3]"m"(loop_cnt[3]) \ 194 , [loop_cnt_4]"m"(loop_cnt[4]) \ 195 , [loop_cnt_5]"m"(loop_cnt[5]) \ 196 , [loop_cnt_6]"m"(loop_cnt[6]) 197 198#define INJECT_ASM_REG "$5" 199 200#define RSEQ_INJECT_CLOBBER \ 201 , INJECT_ASM_REG 202 203#define RSEQ_INJECT_ASM(n) \ 204 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 205 "beqz " INJECT_ASM_REG ", 333f\n\t" \ 206 "222:\n\t" \ 207 "addiu " INJECT_ASM_REG ", -1\n\t" \ 208 "bnez " INJECT_ASM_REG ", 222b\n\t" \ 209 "333:\n\t" 210 211#else 212#error unsupported target 213#endif 214 215#define RSEQ_INJECT_FAILED \ 216 nr_abort++; 217 218#define RSEQ_INJECT_C(n) \ 219{ \ 220 int loc_i, loc_nr_loops = loop_cnt[n]; \ 221 \ 222 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \ 223 rseq_barrier(); \ 224 } \ 225 if (loc_nr_loops == -1 && opt_modulo) { \ 226 if (yield_mod_cnt == opt_modulo - 1) { \ 227 if (opt_sleep > 0) \ 228 poll(NULL, 0, opt_sleep); \ 229 if (opt_yield) \ 230 sched_yield(); \ 231 if (opt_signal) \ 232 raise(SIGUSR1); \ 233 yield_mod_cnt = 0; \ 234 } else { \ 235 yield_mod_cnt++; \ 236 } \ 237 } \ 238} 239 240#else 241 242#define printf_verbose(fmt, ...) 243 244#endif /* BENCHMARK */ 245 246#include "rseq.h" 247 248struct percpu_lock_entry { 249 intptr_t v; 250} __attribute__((aligned(128))); 251 252struct percpu_lock { 253 struct percpu_lock_entry c[CPU_SETSIZE]; 254}; 255 256struct test_data_entry { 257 intptr_t count; 258} __attribute__((aligned(128))); 259 260struct spinlock_test_data { 261 struct percpu_lock lock; 262 struct test_data_entry c[CPU_SETSIZE]; 263}; 264 265struct spinlock_thread_test_data { 266 struct spinlock_test_data *data; 267 long long reps; 268 int reg; 269}; 270 271struct inc_test_data { 272 struct test_data_entry c[CPU_SETSIZE]; 273}; 274 275struct inc_thread_test_data { 276 struct inc_test_data *data; 277 long long reps; 278 int reg; 279}; 280 281struct percpu_list_node { 282 intptr_t data; 283 struct percpu_list_node *next; 284}; 285 286struct percpu_list_entry { 287 struct percpu_list_node *head; 288} __attribute__((aligned(128))); 289 290struct percpu_list { 291 struct percpu_list_entry c[CPU_SETSIZE]; 292}; 293 294#define BUFFER_ITEM_PER_CPU 100 295 296struct percpu_buffer_node { 297 intptr_t data; 298}; 299 300struct percpu_buffer_entry { 301 intptr_t offset; 302 intptr_t buflen; 303 struct percpu_buffer_node **array; 304} __attribute__((aligned(128))); 305 306struct percpu_buffer { 307 struct percpu_buffer_entry c[CPU_SETSIZE]; 308}; 309 310#define MEMCPY_BUFFER_ITEM_PER_CPU 100 311 312struct percpu_memcpy_buffer_node { 313 intptr_t data1; 314 uint64_t data2; 315}; 316 317struct percpu_memcpy_buffer_entry { 318 intptr_t offset; 319 intptr_t buflen; 320 struct percpu_memcpy_buffer_node *array; 321} __attribute__((aligned(128))); 322 323struct percpu_memcpy_buffer { 324 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE]; 325}; 326 327/* A simple percpu spinlock. Grabs lock on current cpu. */ 328static int rseq_this_cpu_lock(struct percpu_lock *lock) 329{ 330 int cpu; 331 332 for (;;) { 333 int ret; 334 335 cpu = rseq_cpu_start(); 336 ret = rseq_cmpeqv_storev(&lock->c[cpu].v, 337 0, 1, cpu); 338 if (rseq_likely(!ret)) 339 break; 340 /* Retry if comparison fails or rseq aborts. */ 341 } 342 /* 343 * Acquire semantic when taking lock after control dependency. 344 * Matches rseq_smp_store_release(). 345 */ 346 rseq_smp_acquire__after_ctrl_dep(); 347 return cpu; 348} 349 350static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu) 351{ 352 assert(lock->c[cpu].v == 1); 353 /* 354 * Release lock, with release semantic. Matches 355 * rseq_smp_acquire__after_ctrl_dep(). 356 */ 357 rseq_smp_store_release(&lock->c[cpu].v, 0); 358} 359 360void *test_percpu_spinlock_thread(void *arg) 361{ 362 struct spinlock_thread_test_data *thread_data = arg; 363 struct spinlock_test_data *data = thread_data->data; 364 long long i, reps; 365 366 if (!opt_disable_rseq && thread_data->reg && 367 rseq_register_current_thread()) 368 abort(); 369 reps = thread_data->reps; 370 for (i = 0; i < reps; i++) { 371 int cpu = rseq_this_cpu_lock(&data->lock); 372 data->c[cpu].count++; 373 rseq_percpu_unlock(&data->lock, cpu); 374#ifndef BENCHMARK 375 if (i != 0 && !(i % (reps / 10))) 376 printf_verbose("tid %d: count %lld\n", 377 (int) rseq_gettid(), i); 378#endif 379 } 380 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 381 (int) rseq_gettid(), nr_abort, signals_delivered); 382 if (!opt_disable_rseq && thread_data->reg && 383 rseq_unregister_current_thread()) 384 abort(); 385 return NULL; 386} 387 388/* 389 * A simple test which implements a sharded counter using a per-cpu 390 * lock. Obviously real applications might prefer to simply use a 391 * per-cpu increment; however, this is reasonable for a test and the 392 * lock can be extended to synchronize more complicated operations. 393 */ 394void test_percpu_spinlock(void) 395{ 396 const int num_threads = opt_threads; 397 int i, ret; 398 uint64_t sum; 399 pthread_t test_threads[num_threads]; 400 struct spinlock_test_data data; 401 struct spinlock_thread_test_data thread_data[num_threads]; 402 403 memset(&data, 0, sizeof(data)); 404 for (i = 0; i < num_threads; i++) { 405 thread_data[i].reps = opt_reps; 406 if (opt_disable_mod <= 0 || (i % opt_disable_mod)) 407 thread_data[i].reg = 1; 408 else 409 thread_data[i].reg = 0; 410 thread_data[i].data = &data; 411 ret = pthread_create(&test_threads[i], NULL, 412 test_percpu_spinlock_thread, 413 &thread_data[i]); 414 if (ret) { 415 errno = ret; 416 perror("pthread_create"); 417 abort(); 418 } 419 } 420 421 for (i = 0; i < num_threads; i++) { 422 ret = pthread_join(test_threads[i], NULL); 423 if (ret) { 424 errno = ret; 425 perror("pthread_join"); 426 abort(); 427 } 428 } 429 430 sum = 0; 431 for (i = 0; i < CPU_SETSIZE; i++) 432 sum += data.c[i].count; 433 434 assert(sum == (uint64_t)opt_reps * num_threads); 435} 436 437void *test_percpu_inc_thread(void *arg) 438{ 439 struct inc_thread_test_data *thread_data = arg; 440 struct inc_test_data *data = thread_data->data; 441 long long i, reps; 442 443 if (!opt_disable_rseq && thread_data->reg && 444 rseq_register_current_thread()) 445 abort(); 446 reps = thread_data->reps; 447 for (i = 0; i < reps; i++) { 448 int ret; 449 450 do { 451 int cpu; 452 453 cpu = rseq_cpu_start(); 454 ret = rseq_addv(&data->c[cpu].count, 1, cpu); 455 } while (rseq_unlikely(ret)); 456#ifndef BENCHMARK 457 if (i != 0 && !(i % (reps / 10))) 458 printf_verbose("tid %d: count %lld\n", 459 (int) rseq_gettid(), i); 460#endif 461 } 462 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 463 (int) rseq_gettid(), nr_abort, signals_delivered); 464 if (!opt_disable_rseq && thread_data->reg && 465 rseq_unregister_current_thread()) 466 abort(); 467 return NULL; 468} 469 470void test_percpu_inc(void) 471{ 472 const int num_threads = opt_threads; 473 int i, ret; 474 uint64_t sum; 475 pthread_t test_threads[num_threads]; 476 struct inc_test_data data; 477 struct inc_thread_test_data thread_data[num_threads]; 478 479 memset(&data, 0, sizeof(data)); 480 for (i = 0; i < num_threads; i++) { 481 thread_data[i].reps = opt_reps; 482 if (opt_disable_mod <= 0 || (i % opt_disable_mod)) 483 thread_data[i].reg = 1; 484 else 485 thread_data[i].reg = 0; 486 thread_data[i].data = &data; 487 ret = pthread_create(&test_threads[i], NULL, 488 test_percpu_inc_thread, 489 &thread_data[i]); 490 if (ret) { 491 errno = ret; 492 perror("pthread_create"); 493 abort(); 494 } 495 } 496 497 for (i = 0; i < num_threads; i++) { 498 ret = pthread_join(test_threads[i], NULL); 499 if (ret) { 500 errno = ret; 501 perror("pthread_join"); 502 abort(); 503 } 504 } 505 506 sum = 0; 507 for (i = 0; i < CPU_SETSIZE; i++) 508 sum += data.c[i].count; 509 510 assert(sum == (uint64_t)opt_reps * num_threads); 511} 512 513void this_cpu_list_push(struct percpu_list *list, 514 struct percpu_list_node *node, 515 int *_cpu) 516{ 517 int cpu; 518 519 for (;;) { 520 intptr_t *targetptr, newval, expect; 521 int ret; 522 523 cpu = rseq_cpu_start(); 524 /* Load list->c[cpu].head with single-copy atomicity. */ 525 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); 526 newval = (intptr_t)node; 527 targetptr = (intptr_t *)&list->c[cpu].head; 528 node->next = (struct percpu_list_node *)expect; 529 ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu); 530 if (rseq_likely(!ret)) 531 break; 532 /* Retry if comparison fails or rseq aborts. */ 533 } 534 if (_cpu) 535 *_cpu = cpu; 536} 537 538/* 539 * Unlike a traditional lock-less linked list; the availability of a 540 * rseq primitive allows us to implement pop without concerns over 541 * ABA-type races. 542 */ 543struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, 544 int *_cpu) 545{ 546 struct percpu_list_node *node = NULL; 547 int cpu; 548 549 for (;;) { 550 struct percpu_list_node *head; 551 intptr_t *targetptr, expectnot, *load; 552 long offset; 553 int ret; 554 555 cpu = rseq_cpu_start(); 556 targetptr = (intptr_t *)&list->c[cpu].head; 557 expectnot = (intptr_t)NULL; 558 offset = offsetof(struct percpu_list_node, next); 559 load = (intptr_t *)&head; 560 ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot, 561 offset, load, cpu); 562 if (rseq_likely(!ret)) { 563 node = head; 564 break; 565 } 566 if (ret > 0) 567 break; 568 /* Retry if rseq aborts. */ 569 } 570 if (_cpu) 571 *_cpu = cpu; 572 return node; 573} 574 575/* 576 * __percpu_list_pop is not safe against concurrent accesses. Should 577 * only be used on lists that are not concurrently modified. 578 */ 579struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu) 580{ 581 struct percpu_list_node *node; 582 583 node = list->c[cpu].head; 584 if (!node) 585 return NULL; 586 list->c[cpu].head = node->next; 587 return node; 588} 589 590void *test_percpu_list_thread(void *arg) 591{ 592 long long i, reps; 593 struct percpu_list *list = (struct percpu_list *)arg; 594 595 if (!opt_disable_rseq && rseq_register_current_thread()) 596 abort(); 597 598 reps = opt_reps; 599 for (i = 0; i < reps; i++) { 600 struct percpu_list_node *node; 601 602 node = this_cpu_list_pop(list, NULL); 603 if (opt_yield) 604 sched_yield(); /* encourage shuffling */ 605 if (node) 606 this_cpu_list_push(list, node, NULL); 607 } 608 609 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 610 (int) rseq_gettid(), nr_abort, signals_delivered); 611 if (!opt_disable_rseq && rseq_unregister_current_thread()) 612 abort(); 613 614 return NULL; 615} 616 617/* Simultaneous modification to a per-cpu linked list from many threads. */ 618void test_percpu_list(void) 619{ 620 const int num_threads = opt_threads; 621 int i, j, ret; 622 uint64_t sum = 0, expected_sum = 0; 623 struct percpu_list list; 624 pthread_t test_threads[num_threads]; 625 cpu_set_t allowed_cpus; 626 627 memset(&list, 0, sizeof(list)); 628 629 /* Generate list entries for every usable cpu. */ 630 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 631 for (i = 0; i < CPU_SETSIZE; i++) { 632 if (!CPU_ISSET(i, &allowed_cpus)) 633 continue; 634 for (j = 1; j <= 100; j++) { 635 struct percpu_list_node *node; 636 637 expected_sum += j; 638 639 node = malloc(sizeof(*node)); 640 assert(node); 641 node->data = j; 642 node->next = list.c[i].head; 643 list.c[i].head = node; 644 } 645 } 646 647 for (i = 0; i < num_threads; i++) { 648 ret = pthread_create(&test_threads[i], NULL, 649 test_percpu_list_thread, &list); 650 if (ret) { 651 errno = ret; 652 perror("pthread_create"); 653 abort(); 654 } 655 } 656 657 for (i = 0; i < num_threads; i++) { 658 ret = pthread_join(test_threads[i], NULL); 659 if (ret) { 660 errno = ret; 661 perror("pthread_join"); 662 abort(); 663 } 664 } 665 666 for (i = 0; i < CPU_SETSIZE; i++) { 667 struct percpu_list_node *node; 668 669 if (!CPU_ISSET(i, &allowed_cpus)) 670 continue; 671 672 while ((node = __percpu_list_pop(&list, i))) { 673 sum += node->data; 674 free(node); 675 } 676 } 677 678 /* 679 * All entries should now be accounted for (unless some external 680 * actor is interfering with our allowed affinity while this 681 * test is running). 682 */ 683 assert(sum == expected_sum); 684} 685 686bool this_cpu_buffer_push(struct percpu_buffer *buffer, 687 struct percpu_buffer_node *node, 688 int *_cpu) 689{ 690 bool result = false; 691 int cpu; 692 693 for (;;) { 694 intptr_t *targetptr_spec, newval_spec; 695 intptr_t *targetptr_final, newval_final; 696 intptr_t offset; 697 int ret; 698 699 cpu = rseq_cpu_start(); 700 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 701 if (offset == buffer->c[cpu].buflen) 702 break; 703 newval_spec = (intptr_t)node; 704 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset]; 705 newval_final = offset + 1; 706 targetptr_final = &buffer->c[cpu].offset; 707 if (opt_mb) 708 ret = rseq_cmpeqv_trystorev_storev_release( 709 targetptr_final, offset, targetptr_spec, 710 newval_spec, newval_final, cpu); 711 else 712 ret = rseq_cmpeqv_trystorev_storev(targetptr_final, 713 offset, targetptr_spec, newval_spec, 714 newval_final, cpu); 715 if (rseq_likely(!ret)) { 716 result = true; 717 break; 718 } 719 /* Retry if comparison fails or rseq aborts. */ 720 } 721 if (_cpu) 722 *_cpu = cpu; 723 return result; 724} 725 726struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer, 727 int *_cpu) 728{ 729 struct percpu_buffer_node *head; 730 int cpu; 731 732 for (;;) { 733 intptr_t *targetptr, newval; 734 intptr_t offset; 735 int ret; 736 737 cpu = rseq_cpu_start(); 738 /* Load offset with single-copy atomicity. */ 739 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 740 if (offset == 0) { 741 head = NULL; 742 break; 743 } 744 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]); 745 newval = offset - 1; 746 targetptr = (intptr_t *)&buffer->c[cpu].offset; 747 ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset, 748 (intptr_t *)&buffer->c[cpu].array[offset - 1], 749 (intptr_t)head, newval, cpu); 750 if (rseq_likely(!ret)) 751 break; 752 /* Retry if comparison fails or rseq aborts. */ 753 } 754 if (_cpu) 755 *_cpu = cpu; 756 return head; 757} 758 759/* 760 * __percpu_buffer_pop is not safe against concurrent accesses. Should 761 * only be used on buffers that are not concurrently modified. 762 */ 763struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer, 764 int cpu) 765{ 766 struct percpu_buffer_node *head; 767 intptr_t offset; 768 769 offset = buffer->c[cpu].offset; 770 if (offset == 0) 771 return NULL; 772 head = buffer->c[cpu].array[offset - 1]; 773 buffer->c[cpu].offset = offset - 1; 774 return head; 775} 776 777void *test_percpu_buffer_thread(void *arg) 778{ 779 long long i, reps; 780 struct percpu_buffer *buffer = (struct percpu_buffer *)arg; 781 782 if (!opt_disable_rseq && rseq_register_current_thread()) 783 abort(); 784 785 reps = opt_reps; 786 for (i = 0; i < reps; i++) { 787 struct percpu_buffer_node *node; 788 789 node = this_cpu_buffer_pop(buffer, NULL); 790 if (opt_yield) 791 sched_yield(); /* encourage shuffling */ 792 if (node) { 793 if (!this_cpu_buffer_push(buffer, node, NULL)) { 794 /* Should increase buffer size. */ 795 abort(); 796 } 797 } 798 } 799 800 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 801 (int) rseq_gettid(), nr_abort, signals_delivered); 802 if (!opt_disable_rseq && rseq_unregister_current_thread()) 803 abort(); 804 805 return NULL; 806} 807 808/* Simultaneous modification to a per-cpu buffer from many threads. */ 809void test_percpu_buffer(void) 810{ 811 const int num_threads = opt_threads; 812 int i, j, ret; 813 uint64_t sum = 0, expected_sum = 0; 814 struct percpu_buffer buffer; 815 pthread_t test_threads[num_threads]; 816 cpu_set_t allowed_cpus; 817 818 memset(&buffer, 0, sizeof(buffer)); 819 820 /* Generate list entries for every usable cpu. */ 821 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 822 for (i = 0; i < CPU_SETSIZE; i++) { 823 if (!CPU_ISSET(i, &allowed_cpus)) 824 continue; 825 /* Worse-case is every item in same CPU. */ 826 buffer.c[i].array = 827 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * 828 BUFFER_ITEM_PER_CPU); 829 assert(buffer.c[i].array); 830 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU; 831 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) { 832 struct percpu_buffer_node *node; 833 834 expected_sum += j; 835 836 /* 837 * We could theoretically put the word-sized 838 * "data" directly in the buffer. However, we 839 * want to model objects that would not fit 840 * within a single word, so allocate an object 841 * for each node. 842 */ 843 node = malloc(sizeof(*node)); 844 assert(node); 845 node->data = j; 846 buffer.c[i].array[j - 1] = node; 847 buffer.c[i].offset++; 848 } 849 } 850 851 for (i = 0; i < num_threads; i++) { 852 ret = pthread_create(&test_threads[i], NULL, 853 test_percpu_buffer_thread, &buffer); 854 if (ret) { 855 errno = ret; 856 perror("pthread_create"); 857 abort(); 858 } 859 } 860 861 for (i = 0; i < num_threads; i++) { 862 ret = pthread_join(test_threads[i], NULL); 863 if (ret) { 864 errno = ret; 865 perror("pthread_join"); 866 abort(); 867 } 868 } 869 870 for (i = 0; i < CPU_SETSIZE; i++) { 871 struct percpu_buffer_node *node; 872 873 if (!CPU_ISSET(i, &allowed_cpus)) 874 continue; 875 876 while ((node = __percpu_buffer_pop(&buffer, i))) { 877 sum += node->data; 878 free(node); 879 } 880 free(buffer.c[i].array); 881 } 882 883 /* 884 * All entries should now be accounted for (unless some external 885 * actor is interfering with our allowed affinity while this 886 * test is running). 887 */ 888 assert(sum == expected_sum); 889} 890 891bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer, 892 struct percpu_memcpy_buffer_node item, 893 int *_cpu) 894{ 895 bool result = false; 896 int cpu; 897 898 for (;;) { 899 intptr_t *targetptr_final, newval_final, offset; 900 char *destptr, *srcptr; 901 size_t copylen; 902 int ret; 903 904 cpu = rseq_cpu_start(); 905 /* Load offset with single-copy atomicity. */ 906 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 907 if (offset == buffer->c[cpu].buflen) 908 break; 909 destptr = (char *)&buffer->c[cpu].array[offset]; 910 srcptr = (char *)&item; 911 /* copylen must be <= 4kB. */ 912 copylen = sizeof(item); 913 newval_final = offset + 1; 914 targetptr_final = &buffer->c[cpu].offset; 915 if (opt_mb) 916 ret = rseq_cmpeqv_trymemcpy_storev_release( 917 targetptr_final, offset, 918 destptr, srcptr, copylen, 919 newval_final, cpu); 920 else 921 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final, 922 offset, destptr, srcptr, copylen, 923 newval_final, cpu); 924 if (rseq_likely(!ret)) { 925 result = true; 926 break; 927 } 928 /* Retry if comparison fails or rseq aborts. */ 929 } 930 if (_cpu) 931 *_cpu = cpu; 932 return result; 933} 934 935bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, 936 struct percpu_memcpy_buffer_node *item, 937 int *_cpu) 938{ 939 bool result = false; 940 int cpu; 941 942 for (;;) { 943 intptr_t *targetptr_final, newval_final, offset; 944 char *destptr, *srcptr; 945 size_t copylen; 946 int ret; 947 948 cpu = rseq_cpu_start(); 949 /* Load offset with single-copy atomicity. */ 950 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 951 if (offset == 0) 952 break; 953 destptr = (char *)item; 954 srcptr = (char *)&buffer->c[cpu].array[offset - 1]; 955 /* copylen must be <= 4kB. */ 956 copylen = sizeof(*item); 957 newval_final = offset - 1; 958 targetptr_final = &buffer->c[cpu].offset; 959 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final, 960 offset, destptr, srcptr, copylen, 961 newval_final, cpu); 962 if (rseq_likely(!ret)) { 963 result = true; 964 break; 965 } 966 /* Retry if comparison fails or rseq aborts. */ 967 } 968 if (_cpu) 969 *_cpu = cpu; 970 return result; 971} 972 973/* 974 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should 975 * only be used on buffers that are not concurrently modified. 976 */ 977bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, 978 struct percpu_memcpy_buffer_node *item, 979 int cpu) 980{ 981 intptr_t offset; 982 983 offset = buffer->c[cpu].offset; 984 if (offset == 0) 985 return false; 986 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item)); 987 buffer->c[cpu].offset = offset - 1; 988 return true; 989} 990 991void *test_percpu_memcpy_buffer_thread(void *arg) 992{ 993 long long i, reps; 994 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg; 995 996 if (!opt_disable_rseq && rseq_register_current_thread()) 997 abort(); 998 999 reps = opt_reps; 1000 for (i = 0; i < reps; i++) { 1001 struct percpu_memcpy_buffer_node item; 1002 bool result; 1003 1004 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL); 1005 if (opt_yield) 1006 sched_yield(); /* encourage shuffling */ 1007 if (result) { 1008 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) { 1009 /* Should increase buffer size. */ 1010 abort(); 1011 } 1012 } 1013 } 1014 1015 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 1016 (int) rseq_gettid(), nr_abort, signals_delivered); 1017 if (!opt_disable_rseq && rseq_unregister_current_thread()) 1018 abort(); 1019 1020 return NULL; 1021} 1022 1023/* Simultaneous modification to a per-cpu buffer from many threads. */ 1024void test_percpu_memcpy_buffer(void) 1025{ 1026 const int num_threads = opt_threads; 1027 int i, j, ret; 1028 uint64_t sum = 0, expected_sum = 0; 1029 struct percpu_memcpy_buffer buffer; 1030 pthread_t test_threads[num_threads]; 1031 cpu_set_t allowed_cpus; 1032 1033 memset(&buffer, 0, sizeof(buffer)); 1034 1035 /* Generate list entries for every usable cpu. */ 1036 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 1037 for (i = 0; i < CPU_SETSIZE; i++) { 1038 if (!CPU_ISSET(i, &allowed_cpus)) 1039 continue; 1040 /* Worse-case is every item in same CPU. */ 1041 buffer.c[i].array = 1042 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * 1043 MEMCPY_BUFFER_ITEM_PER_CPU); 1044 assert(buffer.c[i].array); 1045 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU; 1046 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) { 1047 expected_sum += 2 * j + 1; 1048 1049 /* 1050 * We could theoretically put the word-sized 1051 * "data" directly in the buffer. However, we 1052 * want to model objects that would not fit 1053 * within a single word, so allocate an object 1054 * for each node. 1055 */ 1056 buffer.c[i].array[j - 1].data1 = j; 1057 buffer.c[i].array[j - 1].data2 = j + 1; 1058 buffer.c[i].offset++; 1059 } 1060 } 1061 1062 for (i = 0; i < num_threads; i++) { 1063 ret = pthread_create(&test_threads[i], NULL, 1064 test_percpu_memcpy_buffer_thread, 1065 &buffer); 1066 if (ret) { 1067 errno = ret; 1068 perror("pthread_create"); 1069 abort(); 1070 } 1071 } 1072 1073 for (i = 0; i < num_threads; i++) { 1074 ret = pthread_join(test_threads[i], NULL); 1075 if (ret) { 1076 errno = ret; 1077 perror("pthread_join"); 1078 abort(); 1079 } 1080 } 1081 1082 for (i = 0; i < CPU_SETSIZE; i++) { 1083 struct percpu_memcpy_buffer_node item; 1084 1085 if (!CPU_ISSET(i, &allowed_cpus)) 1086 continue; 1087 1088 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) { 1089 sum += item.data1; 1090 sum += item.data2; 1091 } 1092 free(buffer.c[i].array); 1093 } 1094 1095 /* 1096 * All entries should now be accounted for (unless some external 1097 * actor is interfering with our allowed affinity while this 1098 * test is running). 1099 */ 1100 assert(sum == expected_sum); 1101} 1102 1103static void test_signal_interrupt_handler(int signo) 1104{ 1105 signals_delivered++; 1106} 1107 1108static int set_signal_handler(void) 1109{ 1110 int ret = 0; 1111 struct sigaction sa; 1112 sigset_t sigset; 1113 1114 ret = sigemptyset(&sigset); 1115 if (ret < 0) { 1116 perror("sigemptyset"); 1117 return ret; 1118 } 1119 1120 sa.sa_handler = test_signal_interrupt_handler; 1121 sa.sa_mask = sigset; 1122 sa.sa_flags = 0; 1123 ret = sigaction(SIGUSR1, &sa, NULL); 1124 if (ret < 0) { 1125 perror("sigaction"); 1126 return ret; 1127 } 1128 1129 printf_verbose("Signal handler set for SIGUSR1\n"); 1130 1131 return ret; 1132} 1133 1134struct test_membarrier_thread_args { 1135 int stop; 1136 intptr_t percpu_list_ptr; 1137}; 1138 1139/* Worker threads modify data in their "active" percpu lists. */ 1140void *test_membarrier_worker_thread(void *arg) 1141{ 1142 struct test_membarrier_thread_args *args = 1143 (struct test_membarrier_thread_args *)arg; 1144 const int iters = opt_reps; 1145 int i; 1146 1147 if (rseq_register_current_thread()) { 1148 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", 1149 errno, strerror(errno)); 1150 abort(); 1151 } 1152 1153 /* Wait for initialization. */ 1154 while (!atomic_load(&args->percpu_list_ptr)) {} 1155 1156 for (i = 0; i < iters; ++i) { 1157 int ret; 1158 1159 do { 1160 int cpu = rseq_cpu_start(); 1161 1162 ret = rseq_offset_deref_addv(&args->percpu_list_ptr, 1163 sizeof(struct percpu_list_entry) * cpu, 1, cpu); 1164 } while (rseq_unlikely(ret)); 1165 } 1166 1167 if (rseq_unregister_current_thread()) { 1168 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", 1169 errno, strerror(errno)); 1170 abort(); 1171 } 1172 return NULL; 1173} 1174 1175void test_membarrier_init_percpu_list(struct percpu_list *list) 1176{ 1177 int i; 1178 1179 memset(list, 0, sizeof(*list)); 1180 for (i = 0; i < CPU_SETSIZE; i++) { 1181 struct percpu_list_node *node; 1182 1183 node = malloc(sizeof(*node)); 1184 assert(node); 1185 node->data = 0; 1186 node->next = NULL; 1187 list->c[i].head = node; 1188 } 1189} 1190 1191void test_membarrier_free_percpu_list(struct percpu_list *list) 1192{ 1193 int i; 1194 1195 for (i = 0; i < CPU_SETSIZE; i++) 1196 free(list->c[i].head); 1197} 1198 1199static int sys_membarrier(int cmd, int flags, int cpu_id) 1200{ 1201 return syscall(__NR_membarrier, cmd, flags, cpu_id); 1202} 1203 1204/* 1205 * The manager thread swaps per-cpu lists that worker threads see, 1206 * and validates that there are no unexpected modifications. 1207 */ 1208void *test_membarrier_manager_thread(void *arg) 1209{ 1210 struct test_membarrier_thread_args *args = 1211 (struct test_membarrier_thread_args *)arg; 1212 struct percpu_list list_a, list_b; 1213 intptr_t expect_a = 0, expect_b = 0; 1214 int cpu_a = 0, cpu_b = 0; 1215 1216 if (rseq_register_current_thread()) { 1217 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", 1218 errno, strerror(errno)); 1219 abort(); 1220 } 1221 1222 /* Init lists. */ 1223 test_membarrier_init_percpu_list(&list_a); 1224 test_membarrier_init_percpu_list(&list_b); 1225 1226 atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a); 1227 1228 while (!atomic_load(&args->stop)) { 1229 /* list_a is "active". */ 1230 cpu_a = rand() % CPU_SETSIZE; 1231 /* 1232 * As list_b is "inactive", we should never see changes 1233 * to list_b. 1234 */ 1235 if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) { 1236 fprintf(stderr, "Membarrier test failed\n"); 1237 abort(); 1238 } 1239 1240 /* Make list_b "active". */ 1241 atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b); 1242 if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, 1243 MEMBARRIER_CMD_FLAG_CPU, cpu_a) && 1244 errno != ENXIO /* missing CPU */) { 1245 perror("sys_membarrier"); 1246 abort(); 1247 } 1248 /* 1249 * Cpu A should now only modify list_b, so the values 1250 * in list_a should be stable. 1251 */ 1252 expect_a = atomic_load(&list_a.c[cpu_a].head->data); 1253 1254 cpu_b = rand() % CPU_SETSIZE; 1255 /* 1256 * As list_a is "inactive", we should never see changes 1257 * to list_a. 1258 */ 1259 if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) { 1260 fprintf(stderr, "Membarrier test failed\n"); 1261 abort(); 1262 } 1263 1264 /* Make list_a "active". */ 1265 atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a); 1266 if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, 1267 MEMBARRIER_CMD_FLAG_CPU, cpu_b) && 1268 errno != ENXIO /* missing CPU*/) { 1269 perror("sys_membarrier"); 1270 abort(); 1271 } 1272 /* Remember a value from list_b. */ 1273 expect_b = atomic_load(&list_b.c[cpu_b].head->data); 1274 } 1275 1276 test_membarrier_free_percpu_list(&list_a); 1277 test_membarrier_free_percpu_list(&list_b); 1278 1279 if (rseq_unregister_current_thread()) { 1280 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", 1281 errno, strerror(errno)); 1282 abort(); 1283 } 1284 return NULL; 1285} 1286 1287/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */ 1288#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV 1289void test_membarrier(void) 1290{ 1291 const int num_threads = opt_threads; 1292 struct test_membarrier_thread_args thread_args; 1293 pthread_t worker_threads[num_threads]; 1294 pthread_t manager_thread; 1295 int i, ret; 1296 1297 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) { 1298 perror("sys_membarrier"); 1299 abort(); 1300 } 1301 1302 thread_args.stop = 0; 1303 thread_args.percpu_list_ptr = 0; 1304 ret = pthread_create(&manager_thread, NULL, 1305 test_membarrier_manager_thread, &thread_args); 1306 if (ret) { 1307 errno = ret; 1308 perror("pthread_create"); 1309 abort(); 1310 } 1311 1312 for (i = 0; i < num_threads; i++) { 1313 ret = pthread_create(&worker_threads[i], NULL, 1314 test_membarrier_worker_thread, &thread_args); 1315 if (ret) { 1316 errno = ret; 1317 perror("pthread_create"); 1318 abort(); 1319 } 1320 } 1321 1322 1323 for (i = 0; i < num_threads; i++) { 1324 ret = pthread_join(worker_threads[i], NULL); 1325 if (ret) { 1326 errno = ret; 1327 perror("pthread_join"); 1328 abort(); 1329 } 1330 } 1331 1332 atomic_store(&thread_args.stop, 1); 1333 ret = pthread_join(manager_thread, NULL); 1334 if (ret) { 1335 errno = ret; 1336 perror("pthread_join"); 1337 abort(); 1338 } 1339} 1340#else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */ 1341void test_membarrier(void) 1342{ 1343 fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. " 1344 "Skipping membarrier test.\n"); 1345} 1346#endif 1347 1348static void show_usage(int argc, char **argv) 1349{ 1350 printf("Usage : %s <OPTIONS>\n", 1351 argv[0]); 1352 printf("OPTIONS:\n"); 1353 printf(" [-1 loops] Number of loops for delay injection 1\n"); 1354 printf(" [-2 loops] Number of loops for delay injection 2\n"); 1355 printf(" [-3 loops] Number of loops for delay injection 3\n"); 1356 printf(" [-4 loops] Number of loops for delay injection 4\n"); 1357 printf(" [-5 loops] Number of loops for delay injection 5\n"); 1358 printf(" [-6 loops] Number of loops for delay injection 6\n"); 1359 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n"); 1360 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n"); 1361 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n"); 1362 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n"); 1363 printf(" [-y] Yield\n"); 1364 printf(" [-k] Kill thread with signal\n"); 1365 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n"); 1366 printf(" [-t N] Number of threads (default 200)\n"); 1367 printf(" [-r N] Number of repetitions per thread (default 5000)\n"); 1368 printf(" [-d] Disable rseq system call (no initialization)\n"); 1369 printf(" [-D M] Disable rseq for each M threads\n"); 1370 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n"); 1371 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n"); 1372 printf(" [-v] Verbose output.\n"); 1373 printf(" [-h] Show this help.\n"); 1374 printf("\n"); 1375} 1376 1377int main(int argc, char **argv) 1378{ 1379 int i; 1380 1381 for (i = 1; i < argc; i++) { 1382 if (argv[i][0] != '-') 1383 continue; 1384 switch (argv[i][1]) { 1385 case '1': 1386 case '2': 1387 case '3': 1388 case '4': 1389 case '5': 1390 case '6': 1391 case '7': 1392 case '8': 1393 case '9': 1394 if (argc < i + 2) { 1395 show_usage(argc, argv); 1396 goto error; 1397 } 1398 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]); 1399 i++; 1400 break; 1401 case 'm': 1402 if (argc < i + 2) { 1403 show_usage(argc, argv); 1404 goto error; 1405 } 1406 opt_modulo = atol(argv[i + 1]); 1407 if (opt_modulo < 0) { 1408 show_usage(argc, argv); 1409 goto error; 1410 } 1411 i++; 1412 break; 1413 case 's': 1414 if (argc < i + 2) { 1415 show_usage(argc, argv); 1416 goto error; 1417 } 1418 opt_sleep = atol(argv[i + 1]); 1419 if (opt_sleep < 0) { 1420 show_usage(argc, argv); 1421 goto error; 1422 } 1423 i++; 1424 break; 1425 case 'y': 1426 opt_yield = 1; 1427 break; 1428 case 'k': 1429 opt_signal = 1; 1430 break; 1431 case 'd': 1432 opt_disable_rseq = 1; 1433 break; 1434 case 'D': 1435 if (argc < i + 2) { 1436 show_usage(argc, argv); 1437 goto error; 1438 } 1439 opt_disable_mod = atol(argv[i + 1]); 1440 if (opt_disable_mod < 0) { 1441 show_usage(argc, argv); 1442 goto error; 1443 } 1444 i++; 1445 break; 1446 case 't': 1447 if (argc < i + 2) { 1448 show_usage(argc, argv); 1449 goto error; 1450 } 1451 opt_threads = atol(argv[i + 1]); 1452 if (opt_threads < 0) { 1453 show_usage(argc, argv); 1454 goto error; 1455 } 1456 i++; 1457 break; 1458 case 'r': 1459 if (argc < i + 2) { 1460 show_usage(argc, argv); 1461 goto error; 1462 } 1463 opt_reps = atoll(argv[i + 1]); 1464 if (opt_reps < 0) { 1465 show_usage(argc, argv); 1466 goto error; 1467 } 1468 i++; 1469 break; 1470 case 'h': 1471 show_usage(argc, argv); 1472 goto end; 1473 case 'T': 1474 if (argc < i + 2) { 1475 show_usage(argc, argv); 1476 goto error; 1477 } 1478 opt_test = *argv[i + 1]; 1479 switch (opt_test) { 1480 case 's': 1481 case 'l': 1482 case 'i': 1483 case 'b': 1484 case 'm': 1485 case 'r': 1486 break; 1487 default: 1488 show_usage(argc, argv); 1489 goto error; 1490 } 1491 i++; 1492 break; 1493 case 'v': 1494 verbose = 1; 1495 break; 1496 case 'M': 1497 opt_mb = 1; 1498 break; 1499 default: 1500 show_usage(argc, argv); 1501 goto error; 1502 } 1503 } 1504 1505 loop_cnt_1 = loop_cnt[1]; 1506 loop_cnt_2 = loop_cnt[2]; 1507 loop_cnt_3 = loop_cnt[3]; 1508 loop_cnt_4 = loop_cnt[4]; 1509 loop_cnt_5 = loop_cnt[5]; 1510 loop_cnt_6 = loop_cnt[6]; 1511 1512 if (set_signal_handler()) 1513 goto error; 1514 1515 if (!opt_disable_rseq && rseq_register_current_thread()) 1516 goto error; 1517 switch (opt_test) { 1518 case 's': 1519 printf_verbose("spinlock\n"); 1520 test_percpu_spinlock(); 1521 break; 1522 case 'l': 1523 printf_verbose("linked list\n"); 1524 test_percpu_list(); 1525 break; 1526 case 'b': 1527 printf_verbose("buffer\n"); 1528 test_percpu_buffer(); 1529 break; 1530 case 'm': 1531 printf_verbose("memcpy buffer\n"); 1532 test_percpu_memcpy_buffer(); 1533 break; 1534 case 'i': 1535 printf_verbose("counter increment\n"); 1536 test_percpu_inc(); 1537 break; 1538 case 'r': 1539 printf_verbose("membarrier\n"); 1540 test_membarrier(); 1541 break; 1542 } 1543 if (!opt_disable_rseq && rseq_unregister_current_thread()) 1544 abort(); 1545end: 1546 return 0; 1547 1548error: 1549 return -1; 1550} 1551