1// SPDX-License-Identifier: LGPL-2.1
2#define _GNU_SOURCE
3#include <assert.h>
4#include <linux/membarrier.h>
5#include <pthread.h>
6#include <sched.h>
7#include <stdatomic.h>
8#include <stdint.h>
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <syscall.h>
13#include <unistd.h>
14#include <poll.h>
15#include <sys/types.h>
16#include <signal.h>
17#include <errno.h>
18#include <stddef.h>
19
20static inline pid_t rseq_gettid(void)
21{
22	return syscall(__NR_gettid);
23}
24
25#define NR_INJECT	9
26static int loop_cnt[NR_INJECT + 1];
27
28static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
29static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
30static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
31static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
32static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
33static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
34
35static int opt_modulo, verbose;
36
37static int opt_yield, opt_signal, opt_sleep,
38		opt_disable_rseq, opt_threads = 200,
39		opt_disable_mod = 0, opt_test = 's', opt_mb = 0;
40
41#ifndef RSEQ_SKIP_FASTPATH
42static long long opt_reps = 5000;
43#else
44static long long opt_reps = 100;
45#endif
46
47static __thread __attribute__((tls_model("initial-exec")))
48unsigned int signals_delivered;
49
50#ifndef BENCHMARK
51
52static __thread __attribute__((tls_model("initial-exec"), unused))
53unsigned int yield_mod_cnt, nr_abort;
54
55#define printf_verbose(fmt, ...)			\
56	do {						\
57		if (verbose)				\
58			printf(fmt, ## __VA_ARGS__);	\
59	} while (0)
60
61#ifdef __i386__
62
63#define INJECT_ASM_REG	"eax"
64
65#define RSEQ_INJECT_CLOBBER \
66	, INJECT_ASM_REG
67
68#define RSEQ_INJECT_ASM(n) \
69	"mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
70	"test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
71	"jz 333f\n\t" \
72	"222:\n\t" \
73	"dec %%" INJECT_ASM_REG "\n\t" \
74	"jnz 222b\n\t" \
75	"333:\n\t"
76
77#elif defined(__x86_64__)
78
79#define INJECT_ASM_REG_P	"rax"
80#define INJECT_ASM_REG		"eax"
81
82#define RSEQ_INJECT_CLOBBER \
83	, INJECT_ASM_REG_P \
84	, INJECT_ASM_REG
85
86#define RSEQ_INJECT_ASM(n) \
87	"lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
88	"mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
89	"test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
90	"jz 333f\n\t" \
91	"222:\n\t" \
92	"dec %%" INJECT_ASM_REG "\n\t" \
93	"jnz 222b\n\t" \
94	"333:\n\t"
95
96#elif defined(__s390__)
97
98#define RSEQ_INJECT_INPUT \
99	, [loop_cnt_1]"m"(loop_cnt[1]) \
100	, [loop_cnt_2]"m"(loop_cnt[2]) \
101	, [loop_cnt_3]"m"(loop_cnt[3]) \
102	, [loop_cnt_4]"m"(loop_cnt[4]) \
103	, [loop_cnt_5]"m"(loop_cnt[5]) \
104	, [loop_cnt_6]"m"(loop_cnt[6])
105
106#define INJECT_ASM_REG	"r12"
107
108#define RSEQ_INJECT_CLOBBER \
109	, INJECT_ASM_REG
110
111#define RSEQ_INJECT_ASM(n) \
112	"l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
113	"ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
114	"je 333f\n\t" \
115	"222:\n\t" \
116	"ahi %%" INJECT_ASM_REG ", -1\n\t" \
117	"jnz 222b\n\t" \
118	"333:\n\t"
119
120#elif defined(__ARMEL__)
121
122#define RSEQ_INJECT_INPUT \
123	, [loop_cnt_1]"m"(loop_cnt[1]) \
124	, [loop_cnt_2]"m"(loop_cnt[2]) \
125	, [loop_cnt_3]"m"(loop_cnt[3]) \
126	, [loop_cnt_4]"m"(loop_cnt[4]) \
127	, [loop_cnt_5]"m"(loop_cnt[5]) \
128	, [loop_cnt_6]"m"(loop_cnt[6])
129
130#define INJECT_ASM_REG	"r4"
131
132#define RSEQ_INJECT_CLOBBER \
133	, INJECT_ASM_REG
134
135#define RSEQ_INJECT_ASM(n) \
136	"ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
137	"cmp " INJECT_ASM_REG ", #0\n\t" \
138	"beq 333f\n\t" \
139	"222:\n\t" \
140	"subs " INJECT_ASM_REG ", #1\n\t" \
141	"bne 222b\n\t" \
142	"333:\n\t"
143
144#elif defined(__AARCH64EL__)
145
146#define RSEQ_INJECT_INPUT \
147	, [loop_cnt_1] "Qo" (loop_cnt[1]) \
148	, [loop_cnt_2] "Qo" (loop_cnt[2]) \
149	, [loop_cnt_3] "Qo" (loop_cnt[3]) \
150	, [loop_cnt_4] "Qo" (loop_cnt[4]) \
151	, [loop_cnt_5] "Qo" (loop_cnt[5]) \
152	, [loop_cnt_6] "Qo" (loop_cnt[6])
153
154#define INJECT_ASM_REG	RSEQ_ASM_TMP_REG32
155
156#define RSEQ_INJECT_ASM(n) \
157	"	ldr	" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n"	\
158	"	cbz	" INJECT_ASM_REG ", 333f\n"			\
159	"222:\n"							\
160	"	sub	" INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n"	\
161	"	cbnz	" INJECT_ASM_REG ", 222b\n"			\
162	"333:\n"
163
164#elif defined(__PPC__)
165
166#define RSEQ_INJECT_INPUT \
167	, [loop_cnt_1]"m"(loop_cnt[1]) \
168	, [loop_cnt_2]"m"(loop_cnt[2]) \
169	, [loop_cnt_3]"m"(loop_cnt[3]) \
170	, [loop_cnt_4]"m"(loop_cnt[4]) \
171	, [loop_cnt_5]"m"(loop_cnt[5]) \
172	, [loop_cnt_6]"m"(loop_cnt[6])
173
174#define INJECT_ASM_REG	"r18"
175
176#define RSEQ_INJECT_CLOBBER \
177	, INJECT_ASM_REG
178
179#define RSEQ_INJECT_ASM(n) \
180	"lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
181	"cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
182	"beq 333f\n\t" \
183	"222:\n\t" \
184	"subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
185	"bne 222b\n\t" \
186	"333:\n\t"
187
188#elif defined(__mips__)
189
190#define RSEQ_INJECT_INPUT \
191	, [loop_cnt_1]"m"(loop_cnt[1]) \
192	, [loop_cnt_2]"m"(loop_cnt[2]) \
193	, [loop_cnt_3]"m"(loop_cnt[3]) \
194	, [loop_cnt_4]"m"(loop_cnt[4]) \
195	, [loop_cnt_5]"m"(loop_cnt[5]) \
196	, [loop_cnt_6]"m"(loop_cnt[6])
197
198#define INJECT_ASM_REG	"$5"
199
200#define RSEQ_INJECT_CLOBBER \
201	, INJECT_ASM_REG
202
203#define RSEQ_INJECT_ASM(n) \
204	"lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
205	"beqz " INJECT_ASM_REG ", 333f\n\t" \
206	"222:\n\t" \
207	"addiu " INJECT_ASM_REG ", -1\n\t" \
208	"bnez " INJECT_ASM_REG ", 222b\n\t" \
209	"333:\n\t"
210
211#else
212#error unsupported target
213#endif
214
215#define RSEQ_INJECT_FAILED \
216	nr_abort++;
217
218#define RSEQ_INJECT_C(n) \
219{ \
220	int loc_i, loc_nr_loops = loop_cnt[n]; \
221	\
222	for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
223		rseq_barrier(); \
224	} \
225	if (loc_nr_loops == -1 && opt_modulo) { \
226		if (yield_mod_cnt == opt_modulo - 1) { \
227			if (opt_sleep > 0) \
228				poll(NULL, 0, opt_sleep); \
229			if (opt_yield) \
230				sched_yield(); \
231			if (opt_signal) \
232				raise(SIGUSR1); \
233			yield_mod_cnt = 0; \
234		} else { \
235			yield_mod_cnt++; \
236		} \
237	} \
238}
239
240#else
241
242#define printf_verbose(fmt, ...)
243
244#endif /* BENCHMARK */
245
246#include "rseq.h"
247
248struct percpu_lock_entry {
249	intptr_t v;
250} __attribute__((aligned(128)));
251
252struct percpu_lock {
253	struct percpu_lock_entry c[CPU_SETSIZE];
254};
255
256struct test_data_entry {
257	intptr_t count;
258} __attribute__((aligned(128)));
259
260struct spinlock_test_data {
261	struct percpu_lock lock;
262	struct test_data_entry c[CPU_SETSIZE];
263};
264
265struct spinlock_thread_test_data {
266	struct spinlock_test_data *data;
267	long long reps;
268	int reg;
269};
270
271struct inc_test_data {
272	struct test_data_entry c[CPU_SETSIZE];
273};
274
275struct inc_thread_test_data {
276	struct inc_test_data *data;
277	long long reps;
278	int reg;
279};
280
281struct percpu_list_node {
282	intptr_t data;
283	struct percpu_list_node *next;
284};
285
286struct percpu_list_entry {
287	struct percpu_list_node *head;
288} __attribute__((aligned(128)));
289
290struct percpu_list {
291	struct percpu_list_entry c[CPU_SETSIZE];
292};
293
294#define BUFFER_ITEM_PER_CPU	100
295
296struct percpu_buffer_node {
297	intptr_t data;
298};
299
300struct percpu_buffer_entry {
301	intptr_t offset;
302	intptr_t buflen;
303	struct percpu_buffer_node **array;
304} __attribute__((aligned(128)));
305
306struct percpu_buffer {
307	struct percpu_buffer_entry c[CPU_SETSIZE];
308};
309
310#define MEMCPY_BUFFER_ITEM_PER_CPU	100
311
312struct percpu_memcpy_buffer_node {
313	intptr_t data1;
314	uint64_t data2;
315};
316
317struct percpu_memcpy_buffer_entry {
318	intptr_t offset;
319	intptr_t buflen;
320	struct percpu_memcpy_buffer_node *array;
321} __attribute__((aligned(128)));
322
323struct percpu_memcpy_buffer {
324	struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
325};
326
327/* A simple percpu spinlock. Grabs lock on current cpu. */
328static int rseq_this_cpu_lock(struct percpu_lock *lock)
329{
330	int cpu;
331
332	for (;;) {
333		int ret;
334
335		cpu = rseq_cpu_start();
336		ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
337					 0, 1, cpu);
338		if (rseq_likely(!ret))
339			break;
340		/* Retry if comparison fails or rseq aborts. */
341	}
342	/*
343	 * Acquire semantic when taking lock after control dependency.
344	 * Matches rseq_smp_store_release().
345	 */
346	rseq_smp_acquire__after_ctrl_dep();
347	return cpu;
348}
349
350static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
351{
352	assert(lock->c[cpu].v == 1);
353	/*
354	 * Release lock, with release semantic. Matches
355	 * rseq_smp_acquire__after_ctrl_dep().
356	 */
357	rseq_smp_store_release(&lock->c[cpu].v, 0);
358}
359
360void *test_percpu_spinlock_thread(void *arg)
361{
362	struct spinlock_thread_test_data *thread_data = arg;
363	struct spinlock_test_data *data = thread_data->data;
364	long long i, reps;
365
366	if (!opt_disable_rseq && thread_data->reg &&
367	    rseq_register_current_thread())
368		abort();
369	reps = thread_data->reps;
370	for (i = 0; i < reps; i++) {
371		int cpu = rseq_this_cpu_lock(&data->lock);
372		data->c[cpu].count++;
373		rseq_percpu_unlock(&data->lock, cpu);
374#ifndef BENCHMARK
375		if (i != 0 && !(i % (reps / 10)))
376			printf_verbose("tid %d: count %lld\n",
377				       (int) rseq_gettid(), i);
378#endif
379	}
380	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
381		       (int) rseq_gettid(), nr_abort, signals_delivered);
382	if (!opt_disable_rseq && thread_data->reg &&
383	    rseq_unregister_current_thread())
384		abort();
385	return NULL;
386}
387
388/*
389 * A simple test which implements a sharded counter using a per-cpu
390 * lock.  Obviously real applications might prefer to simply use a
391 * per-cpu increment; however, this is reasonable for a test and the
392 * lock can be extended to synchronize more complicated operations.
393 */
394void test_percpu_spinlock(void)
395{
396	const int num_threads = opt_threads;
397	int i, ret;
398	uint64_t sum;
399	pthread_t test_threads[num_threads];
400	struct spinlock_test_data data;
401	struct spinlock_thread_test_data thread_data[num_threads];
402
403	memset(&data, 0, sizeof(data));
404	for (i = 0; i < num_threads; i++) {
405		thread_data[i].reps = opt_reps;
406		if (opt_disable_mod <= 0 || (i % opt_disable_mod))
407			thread_data[i].reg = 1;
408		else
409			thread_data[i].reg = 0;
410		thread_data[i].data = &data;
411		ret = pthread_create(&test_threads[i], NULL,
412				     test_percpu_spinlock_thread,
413				     &thread_data[i]);
414		if (ret) {
415			errno = ret;
416			perror("pthread_create");
417			abort();
418		}
419	}
420
421	for (i = 0; i < num_threads; i++) {
422		ret = pthread_join(test_threads[i], NULL);
423		if (ret) {
424			errno = ret;
425			perror("pthread_join");
426			abort();
427		}
428	}
429
430	sum = 0;
431	for (i = 0; i < CPU_SETSIZE; i++)
432		sum += data.c[i].count;
433
434	assert(sum == (uint64_t)opt_reps * num_threads);
435}
436
437void *test_percpu_inc_thread(void *arg)
438{
439	struct inc_thread_test_data *thread_data = arg;
440	struct inc_test_data *data = thread_data->data;
441	long long i, reps;
442
443	if (!opt_disable_rseq && thread_data->reg &&
444	    rseq_register_current_thread())
445		abort();
446	reps = thread_data->reps;
447	for (i = 0; i < reps; i++) {
448		int ret;
449
450		do {
451			int cpu;
452
453			cpu = rseq_cpu_start();
454			ret = rseq_addv(&data->c[cpu].count, 1, cpu);
455		} while (rseq_unlikely(ret));
456#ifndef BENCHMARK
457		if (i != 0 && !(i % (reps / 10)))
458			printf_verbose("tid %d: count %lld\n",
459				       (int) rseq_gettid(), i);
460#endif
461	}
462	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
463		       (int) rseq_gettid(), nr_abort, signals_delivered);
464	if (!opt_disable_rseq && thread_data->reg &&
465	    rseq_unregister_current_thread())
466		abort();
467	return NULL;
468}
469
470void test_percpu_inc(void)
471{
472	const int num_threads = opt_threads;
473	int i, ret;
474	uint64_t sum;
475	pthread_t test_threads[num_threads];
476	struct inc_test_data data;
477	struct inc_thread_test_data thread_data[num_threads];
478
479	memset(&data, 0, sizeof(data));
480	for (i = 0; i < num_threads; i++) {
481		thread_data[i].reps = opt_reps;
482		if (opt_disable_mod <= 0 || (i % opt_disable_mod))
483			thread_data[i].reg = 1;
484		else
485			thread_data[i].reg = 0;
486		thread_data[i].data = &data;
487		ret = pthread_create(&test_threads[i], NULL,
488				     test_percpu_inc_thread,
489				     &thread_data[i]);
490		if (ret) {
491			errno = ret;
492			perror("pthread_create");
493			abort();
494		}
495	}
496
497	for (i = 0; i < num_threads; i++) {
498		ret = pthread_join(test_threads[i], NULL);
499		if (ret) {
500			errno = ret;
501			perror("pthread_join");
502			abort();
503		}
504	}
505
506	sum = 0;
507	for (i = 0; i < CPU_SETSIZE; i++)
508		sum += data.c[i].count;
509
510	assert(sum == (uint64_t)opt_reps * num_threads);
511}
512
513void this_cpu_list_push(struct percpu_list *list,
514			struct percpu_list_node *node,
515			int *_cpu)
516{
517	int cpu;
518
519	for (;;) {
520		intptr_t *targetptr, newval, expect;
521		int ret;
522
523		cpu = rseq_cpu_start();
524		/* Load list->c[cpu].head with single-copy atomicity. */
525		expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
526		newval = (intptr_t)node;
527		targetptr = (intptr_t *)&list->c[cpu].head;
528		node->next = (struct percpu_list_node *)expect;
529		ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
530		if (rseq_likely(!ret))
531			break;
532		/* Retry if comparison fails or rseq aborts. */
533	}
534	if (_cpu)
535		*_cpu = cpu;
536}
537
538/*
539 * Unlike a traditional lock-less linked list; the availability of a
540 * rseq primitive allows us to implement pop without concerns over
541 * ABA-type races.
542 */
543struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
544					   int *_cpu)
545{
546	struct percpu_list_node *node = NULL;
547	int cpu;
548
549	for (;;) {
550		struct percpu_list_node *head;
551		intptr_t *targetptr, expectnot, *load;
552		long offset;
553		int ret;
554
555		cpu = rseq_cpu_start();
556		targetptr = (intptr_t *)&list->c[cpu].head;
557		expectnot = (intptr_t)NULL;
558		offset = offsetof(struct percpu_list_node, next);
559		load = (intptr_t *)&head;
560		ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
561						   offset, load, cpu);
562		if (rseq_likely(!ret)) {
563			node = head;
564			break;
565		}
566		if (ret > 0)
567			break;
568		/* Retry if rseq aborts. */
569	}
570	if (_cpu)
571		*_cpu = cpu;
572	return node;
573}
574
575/*
576 * __percpu_list_pop is not safe against concurrent accesses. Should
577 * only be used on lists that are not concurrently modified.
578 */
579struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
580{
581	struct percpu_list_node *node;
582
583	node = list->c[cpu].head;
584	if (!node)
585		return NULL;
586	list->c[cpu].head = node->next;
587	return node;
588}
589
590void *test_percpu_list_thread(void *arg)
591{
592	long long i, reps;
593	struct percpu_list *list = (struct percpu_list *)arg;
594
595	if (!opt_disable_rseq && rseq_register_current_thread())
596		abort();
597
598	reps = opt_reps;
599	for (i = 0; i < reps; i++) {
600		struct percpu_list_node *node;
601
602		node = this_cpu_list_pop(list, NULL);
603		if (opt_yield)
604			sched_yield();  /* encourage shuffling */
605		if (node)
606			this_cpu_list_push(list, node, NULL);
607	}
608
609	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
610		       (int) rseq_gettid(), nr_abort, signals_delivered);
611	if (!opt_disable_rseq && rseq_unregister_current_thread())
612		abort();
613
614	return NULL;
615}
616
617/* Simultaneous modification to a per-cpu linked list from many threads.  */
618void test_percpu_list(void)
619{
620	const int num_threads = opt_threads;
621	int i, j, ret;
622	uint64_t sum = 0, expected_sum = 0;
623	struct percpu_list list;
624	pthread_t test_threads[num_threads];
625	cpu_set_t allowed_cpus;
626
627	memset(&list, 0, sizeof(list));
628
629	/* Generate list entries for every usable cpu. */
630	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
631	for (i = 0; i < CPU_SETSIZE; i++) {
632		if (!CPU_ISSET(i, &allowed_cpus))
633			continue;
634		for (j = 1; j <= 100; j++) {
635			struct percpu_list_node *node;
636
637			expected_sum += j;
638
639			node = malloc(sizeof(*node));
640			assert(node);
641			node->data = j;
642			node->next = list.c[i].head;
643			list.c[i].head = node;
644		}
645	}
646
647	for (i = 0; i < num_threads; i++) {
648		ret = pthread_create(&test_threads[i], NULL,
649				     test_percpu_list_thread, &list);
650		if (ret) {
651			errno = ret;
652			perror("pthread_create");
653			abort();
654		}
655	}
656
657	for (i = 0; i < num_threads; i++) {
658		ret = pthread_join(test_threads[i], NULL);
659		if (ret) {
660			errno = ret;
661			perror("pthread_join");
662			abort();
663		}
664	}
665
666	for (i = 0; i < CPU_SETSIZE; i++) {
667		struct percpu_list_node *node;
668
669		if (!CPU_ISSET(i, &allowed_cpus))
670			continue;
671
672		while ((node = __percpu_list_pop(&list, i))) {
673			sum += node->data;
674			free(node);
675		}
676	}
677
678	/*
679	 * All entries should now be accounted for (unless some external
680	 * actor is interfering with our allowed affinity while this
681	 * test is running).
682	 */
683	assert(sum == expected_sum);
684}
685
686bool this_cpu_buffer_push(struct percpu_buffer *buffer,
687			  struct percpu_buffer_node *node,
688			  int *_cpu)
689{
690	bool result = false;
691	int cpu;
692
693	for (;;) {
694		intptr_t *targetptr_spec, newval_spec;
695		intptr_t *targetptr_final, newval_final;
696		intptr_t offset;
697		int ret;
698
699		cpu = rseq_cpu_start();
700		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
701		if (offset == buffer->c[cpu].buflen)
702			break;
703		newval_spec = (intptr_t)node;
704		targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
705		newval_final = offset + 1;
706		targetptr_final = &buffer->c[cpu].offset;
707		if (opt_mb)
708			ret = rseq_cmpeqv_trystorev_storev_release(
709				targetptr_final, offset, targetptr_spec,
710				newval_spec, newval_final, cpu);
711		else
712			ret = rseq_cmpeqv_trystorev_storev(targetptr_final,
713				offset, targetptr_spec, newval_spec,
714				newval_final, cpu);
715		if (rseq_likely(!ret)) {
716			result = true;
717			break;
718		}
719		/* Retry if comparison fails or rseq aborts. */
720	}
721	if (_cpu)
722		*_cpu = cpu;
723	return result;
724}
725
726struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
727					       int *_cpu)
728{
729	struct percpu_buffer_node *head;
730	int cpu;
731
732	for (;;) {
733		intptr_t *targetptr, newval;
734		intptr_t offset;
735		int ret;
736
737		cpu = rseq_cpu_start();
738		/* Load offset with single-copy atomicity. */
739		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
740		if (offset == 0) {
741			head = NULL;
742			break;
743		}
744		head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
745		newval = offset - 1;
746		targetptr = (intptr_t *)&buffer->c[cpu].offset;
747		ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset,
748			(intptr_t *)&buffer->c[cpu].array[offset - 1],
749			(intptr_t)head, newval, cpu);
750		if (rseq_likely(!ret))
751			break;
752		/* Retry if comparison fails or rseq aborts. */
753	}
754	if (_cpu)
755		*_cpu = cpu;
756	return head;
757}
758
759/*
760 * __percpu_buffer_pop is not safe against concurrent accesses. Should
761 * only be used on buffers that are not concurrently modified.
762 */
763struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
764					       int cpu)
765{
766	struct percpu_buffer_node *head;
767	intptr_t offset;
768
769	offset = buffer->c[cpu].offset;
770	if (offset == 0)
771		return NULL;
772	head = buffer->c[cpu].array[offset - 1];
773	buffer->c[cpu].offset = offset - 1;
774	return head;
775}
776
777void *test_percpu_buffer_thread(void *arg)
778{
779	long long i, reps;
780	struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
781
782	if (!opt_disable_rseq && rseq_register_current_thread())
783		abort();
784
785	reps = opt_reps;
786	for (i = 0; i < reps; i++) {
787		struct percpu_buffer_node *node;
788
789		node = this_cpu_buffer_pop(buffer, NULL);
790		if (opt_yield)
791			sched_yield();  /* encourage shuffling */
792		if (node) {
793			if (!this_cpu_buffer_push(buffer, node, NULL)) {
794				/* Should increase buffer size. */
795				abort();
796			}
797		}
798	}
799
800	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
801		       (int) rseq_gettid(), nr_abort, signals_delivered);
802	if (!opt_disable_rseq && rseq_unregister_current_thread())
803		abort();
804
805	return NULL;
806}
807
808/* Simultaneous modification to a per-cpu buffer from many threads.  */
809void test_percpu_buffer(void)
810{
811	const int num_threads = opt_threads;
812	int i, j, ret;
813	uint64_t sum = 0, expected_sum = 0;
814	struct percpu_buffer buffer;
815	pthread_t test_threads[num_threads];
816	cpu_set_t allowed_cpus;
817
818	memset(&buffer, 0, sizeof(buffer));
819
820	/* Generate list entries for every usable cpu. */
821	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
822	for (i = 0; i < CPU_SETSIZE; i++) {
823		if (!CPU_ISSET(i, &allowed_cpus))
824			continue;
825		/* Worse-case is every item in same CPU. */
826		buffer.c[i].array =
827			malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
828			       BUFFER_ITEM_PER_CPU);
829		assert(buffer.c[i].array);
830		buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
831		for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
832			struct percpu_buffer_node *node;
833
834			expected_sum += j;
835
836			/*
837			 * We could theoretically put the word-sized
838			 * "data" directly in the buffer. However, we
839			 * want to model objects that would not fit
840			 * within a single word, so allocate an object
841			 * for each node.
842			 */
843			node = malloc(sizeof(*node));
844			assert(node);
845			node->data = j;
846			buffer.c[i].array[j - 1] = node;
847			buffer.c[i].offset++;
848		}
849	}
850
851	for (i = 0; i < num_threads; i++) {
852		ret = pthread_create(&test_threads[i], NULL,
853				     test_percpu_buffer_thread, &buffer);
854		if (ret) {
855			errno = ret;
856			perror("pthread_create");
857			abort();
858		}
859	}
860
861	for (i = 0; i < num_threads; i++) {
862		ret = pthread_join(test_threads[i], NULL);
863		if (ret) {
864			errno = ret;
865			perror("pthread_join");
866			abort();
867		}
868	}
869
870	for (i = 0; i < CPU_SETSIZE; i++) {
871		struct percpu_buffer_node *node;
872
873		if (!CPU_ISSET(i, &allowed_cpus))
874			continue;
875
876		while ((node = __percpu_buffer_pop(&buffer, i))) {
877			sum += node->data;
878			free(node);
879		}
880		free(buffer.c[i].array);
881	}
882
883	/*
884	 * All entries should now be accounted for (unless some external
885	 * actor is interfering with our allowed affinity while this
886	 * test is running).
887	 */
888	assert(sum == expected_sum);
889}
890
891bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
892				 struct percpu_memcpy_buffer_node item,
893				 int *_cpu)
894{
895	bool result = false;
896	int cpu;
897
898	for (;;) {
899		intptr_t *targetptr_final, newval_final, offset;
900		char *destptr, *srcptr;
901		size_t copylen;
902		int ret;
903
904		cpu = rseq_cpu_start();
905		/* Load offset with single-copy atomicity. */
906		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
907		if (offset == buffer->c[cpu].buflen)
908			break;
909		destptr = (char *)&buffer->c[cpu].array[offset];
910		srcptr = (char *)&item;
911		/* copylen must be <= 4kB. */
912		copylen = sizeof(item);
913		newval_final = offset + 1;
914		targetptr_final = &buffer->c[cpu].offset;
915		if (opt_mb)
916			ret = rseq_cmpeqv_trymemcpy_storev_release(
917				targetptr_final, offset,
918				destptr, srcptr, copylen,
919				newval_final, cpu);
920		else
921			ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
922				offset, destptr, srcptr, copylen,
923				newval_final, cpu);
924		if (rseq_likely(!ret)) {
925			result = true;
926			break;
927		}
928		/* Retry if comparison fails or rseq aborts. */
929	}
930	if (_cpu)
931		*_cpu = cpu;
932	return result;
933}
934
935bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
936				struct percpu_memcpy_buffer_node *item,
937				int *_cpu)
938{
939	bool result = false;
940	int cpu;
941
942	for (;;) {
943		intptr_t *targetptr_final, newval_final, offset;
944		char *destptr, *srcptr;
945		size_t copylen;
946		int ret;
947
948		cpu = rseq_cpu_start();
949		/* Load offset with single-copy atomicity. */
950		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
951		if (offset == 0)
952			break;
953		destptr = (char *)item;
954		srcptr = (char *)&buffer->c[cpu].array[offset - 1];
955		/* copylen must be <= 4kB. */
956		copylen = sizeof(*item);
957		newval_final = offset - 1;
958		targetptr_final = &buffer->c[cpu].offset;
959		ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
960			offset, destptr, srcptr, copylen,
961			newval_final, cpu);
962		if (rseq_likely(!ret)) {
963			result = true;
964			break;
965		}
966		/* Retry if comparison fails or rseq aborts. */
967	}
968	if (_cpu)
969		*_cpu = cpu;
970	return result;
971}
972
973/*
974 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
975 * only be used on buffers that are not concurrently modified.
976 */
977bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
978				struct percpu_memcpy_buffer_node *item,
979				int cpu)
980{
981	intptr_t offset;
982
983	offset = buffer->c[cpu].offset;
984	if (offset == 0)
985		return false;
986	memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
987	buffer->c[cpu].offset = offset - 1;
988	return true;
989}
990
991void *test_percpu_memcpy_buffer_thread(void *arg)
992{
993	long long i, reps;
994	struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
995
996	if (!opt_disable_rseq && rseq_register_current_thread())
997		abort();
998
999	reps = opt_reps;
1000	for (i = 0; i < reps; i++) {
1001		struct percpu_memcpy_buffer_node item;
1002		bool result;
1003
1004		result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1005		if (opt_yield)
1006			sched_yield();  /* encourage shuffling */
1007		if (result) {
1008			if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1009				/* Should increase buffer size. */
1010				abort();
1011			}
1012		}
1013	}
1014
1015	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1016		       (int) rseq_gettid(), nr_abort, signals_delivered);
1017	if (!opt_disable_rseq && rseq_unregister_current_thread())
1018		abort();
1019
1020	return NULL;
1021}
1022
1023/* Simultaneous modification to a per-cpu buffer from many threads.  */
1024void test_percpu_memcpy_buffer(void)
1025{
1026	const int num_threads = opt_threads;
1027	int i, j, ret;
1028	uint64_t sum = 0, expected_sum = 0;
1029	struct percpu_memcpy_buffer buffer;
1030	pthread_t test_threads[num_threads];
1031	cpu_set_t allowed_cpus;
1032
1033	memset(&buffer, 0, sizeof(buffer));
1034
1035	/* Generate list entries for every usable cpu. */
1036	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1037	for (i = 0; i < CPU_SETSIZE; i++) {
1038		if (!CPU_ISSET(i, &allowed_cpus))
1039			continue;
1040		/* Worse-case is every item in same CPU. */
1041		buffer.c[i].array =
1042			malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
1043			       MEMCPY_BUFFER_ITEM_PER_CPU);
1044		assert(buffer.c[i].array);
1045		buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1046		for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1047			expected_sum += 2 * j + 1;
1048
1049			/*
1050			 * We could theoretically put the word-sized
1051			 * "data" directly in the buffer. However, we
1052			 * want to model objects that would not fit
1053			 * within a single word, so allocate an object
1054			 * for each node.
1055			 */
1056			buffer.c[i].array[j - 1].data1 = j;
1057			buffer.c[i].array[j - 1].data2 = j + 1;
1058			buffer.c[i].offset++;
1059		}
1060	}
1061
1062	for (i = 0; i < num_threads; i++) {
1063		ret = pthread_create(&test_threads[i], NULL,
1064				     test_percpu_memcpy_buffer_thread,
1065				     &buffer);
1066		if (ret) {
1067			errno = ret;
1068			perror("pthread_create");
1069			abort();
1070		}
1071	}
1072
1073	for (i = 0; i < num_threads; i++) {
1074		ret = pthread_join(test_threads[i], NULL);
1075		if (ret) {
1076			errno = ret;
1077			perror("pthread_join");
1078			abort();
1079		}
1080	}
1081
1082	for (i = 0; i < CPU_SETSIZE; i++) {
1083		struct percpu_memcpy_buffer_node item;
1084
1085		if (!CPU_ISSET(i, &allowed_cpus))
1086			continue;
1087
1088		while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1089			sum += item.data1;
1090			sum += item.data2;
1091		}
1092		free(buffer.c[i].array);
1093	}
1094
1095	/*
1096	 * All entries should now be accounted for (unless some external
1097	 * actor is interfering with our allowed affinity while this
1098	 * test is running).
1099	 */
1100	assert(sum == expected_sum);
1101}
1102
1103static void test_signal_interrupt_handler(int signo)
1104{
1105	signals_delivered++;
1106}
1107
1108static int set_signal_handler(void)
1109{
1110	int ret = 0;
1111	struct sigaction sa;
1112	sigset_t sigset;
1113
1114	ret = sigemptyset(&sigset);
1115	if (ret < 0) {
1116		perror("sigemptyset");
1117		return ret;
1118	}
1119
1120	sa.sa_handler = test_signal_interrupt_handler;
1121	sa.sa_mask = sigset;
1122	sa.sa_flags = 0;
1123	ret = sigaction(SIGUSR1, &sa, NULL);
1124	if (ret < 0) {
1125		perror("sigaction");
1126		return ret;
1127	}
1128
1129	printf_verbose("Signal handler set for SIGUSR1\n");
1130
1131	return ret;
1132}
1133
1134struct test_membarrier_thread_args {
1135	int stop;
1136	intptr_t percpu_list_ptr;
1137};
1138
1139/* Worker threads modify data in their "active" percpu lists. */
1140void *test_membarrier_worker_thread(void *arg)
1141{
1142	struct test_membarrier_thread_args *args =
1143		(struct test_membarrier_thread_args *)arg;
1144	const int iters = opt_reps;
1145	int i;
1146
1147	if (rseq_register_current_thread()) {
1148		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1149			errno, strerror(errno));
1150		abort();
1151	}
1152
1153	/* Wait for initialization. */
1154	while (!atomic_load(&args->percpu_list_ptr)) {}
1155
1156	for (i = 0; i < iters; ++i) {
1157		int ret;
1158
1159		do {
1160			int cpu = rseq_cpu_start();
1161
1162			ret = rseq_offset_deref_addv(&args->percpu_list_ptr,
1163				sizeof(struct percpu_list_entry) * cpu, 1, cpu);
1164		} while (rseq_unlikely(ret));
1165	}
1166
1167	if (rseq_unregister_current_thread()) {
1168		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1169			errno, strerror(errno));
1170		abort();
1171	}
1172	return NULL;
1173}
1174
1175void test_membarrier_init_percpu_list(struct percpu_list *list)
1176{
1177	int i;
1178
1179	memset(list, 0, sizeof(*list));
1180	for (i = 0; i < CPU_SETSIZE; i++) {
1181		struct percpu_list_node *node;
1182
1183		node = malloc(sizeof(*node));
1184		assert(node);
1185		node->data = 0;
1186		node->next = NULL;
1187		list->c[i].head = node;
1188	}
1189}
1190
1191void test_membarrier_free_percpu_list(struct percpu_list *list)
1192{
1193	int i;
1194
1195	for (i = 0; i < CPU_SETSIZE; i++)
1196		free(list->c[i].head);
1197}
1198
1199static int sys_membarrier(int cmd, int flags, int cpu_id)
1200{
1201	return syscall(__NR_membarrier, cmd, flags, cpu_id);
1202}
1203
1204/*
1205 * The manager thread swaps per-cpu lists that worker threads see,
1206 * and validates that there are no unexpected modifications.
1207 */
1208void *test_membarrier_manager_thread(void *arg)
1209{
1210	struct test_membarrier_thread_args *args =
1211		(struct test_membarrier_thread_args *)arg;
1212	struct percpu_list list_a, list_b;
1213	intptr_t expect_a = 0, expect_b = 0;
1214	int cpu_a = 0, cpu_b = 0;
1215
1216	if (rseq_register_current_thread()) {
1217		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1218			errno, strerror(errno));
1219		abort();
1220	}
1221
1222	/* Init lists. */
1223	test_membarrier_init_percpu_list(&list_a);
1224	test_membarrier_init_percpu_list(&list_b);
1225
1226	atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
1227
1228	while (!atomic_load(&args->stop)) {
1229		/* list_a is "active". */
1230		cpu_a = rand() % CPU_SETSIZE;
1231		/*
1232		 * As list_b is "inactive", we should never see changes
1233		 * to list_b.
1234		 */
1235		if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) {
1236			fprintf(stderr, "Membarrier test failed\n");
1237			abort();
1238		}
1239
1240		/* Make list_b "active". */
1241		atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b);
1242		if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
1243					MEMBARRIER_CMD_FLAG_CPU, cpu_a) &&
1244				errno != ENXIO /* missing CPU */) {
1245			perror("sys_membarrier");
1246			abort();
1247		}
1248		/*
1249		 * Cpu A should now only modify list_b, so the values
1250		 * in list_a should be stable.
1251		 */
1252		expect_a = atomic_load(&list_a.c[cpu_a].head->data);
1253
1254		cpu_b = rand() % CPU_SETSIZE;
1255		/*
1256		 * As list_a is "inactive", we should never see changes
1257		 * to list_a.
1258		 */
1259		if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) {
1260			fprintf(stderr, "Membarrier test failed\n");
1261			abort();
1262		}
1263
1264		/* Make list_a "active". */
1265		atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
1266		if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
1267					MEMBARRIER_CMD_FLAG_CPU, cpu_b) &&
1268				errno != ENXIO /* missing CPU*/) {
1269			perror("sys_membarrier");
1270			abort();
1271		}
1272		/* Remember a value from list_b. */
1273		expect_b = atomic_load(&list_b.c[cpu_b].head->data);
1274	}
1275
1276	test_membarrier_free_percpu_list(&list_a);
1277	test_membarrier_free_percpu_list(&list_b);
1278
1279	if (rseq_unregister_current_thread()) {
1280		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1281			errno, strerror(errno));
1282		abort();
1283	}
1284	return NULL;
1285}
1286
1287/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
1288#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
1289void test_membarrier(void)
1290{
1291	const int num_threads = opt_threads;
1292	struct test_membarrier_thread_args thread_args;
1293	pthread_t worker_threads[num_threads];
1294	pthread_t manager_thread;
1295	int i, ret;
1296
1297	if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1298		perror("sys_membarrier");
1299		abort();
1300	}
1301
1302	thread_args.stop = 0;
1303	thread_args.percpu_list_ptr = 0;
1304	ret = pthread_create(&manager_thread, NULL,
1305			test_membarrier_manager_thread, &thread_args);
1306	if (ret) {
1307		errno = ret;
1308		perror("pthread_create");
1309		abort();
1310	}
1311
1312	for (i = 0; i < num_threads; i++) {
1313		ret = pthread_create(&worker_threads[i], NULL,
1314				test_membarrier_worker_thread, &thread_args);
1315		if (ret) {
1316			errno = ret;
1317			perror("pthread_create");
1318			abort();
1319		}
1320	}
1321
1322
1323	for (i = 0; i < num_threads; i++) {
1324		ret = pthread_join(worker_threads[i], NULL);
1325		if (ret) {
1326			errno = ret;
1327			perror("pthread_join");
1328			abort();
1329		}
1330	}
1331
1332	atomic_store(&thread_args.stop, 1);
1333	ret = pthread_join(manager_thread, NULL);
1334	if (ret) {
1335		errno = ret;
1336		perror("pthread_join");
1337		abort();
1338	}
1339}
1340#else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */
1341void test_membarrier(void)
1342{
1343	fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
1344			"Skipping membarrier test.\n");
1345}
1346#endif
1347
1348static void show_usage(int argc, char **argv)
1349{
1350	printf("Usage : %s <OPTIONS>\n",
1351		argv[0]);
1352	printf("OPTIONS:\n");
1353	printf("	[-1 loops] Number of loops for delay injection 1\n");
1354	printf("	[-2 loops] Number of loops for delay injection 2\n");
1355	printf("	[-3 loops] Number of loops for delay injection 3\n");
1356	printf("	[-4 loops] Number of loops for delay injection 4\n");
1357	printf("	[-5 loops] Number of loops for delay injection 5\n");
1358	printf("	[-6 loops] Number of loops for delay injection 6\n");
1359	printf("	[-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1360	printf("	[-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1361	printf("	[-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1362	printf("	[-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1363	printf("	[-y] Yield\n");
1364	printf("	[-k] Kill thread with signal\n");
1365	printf("	[-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1366	printf("	[-t N] Number of threads (default 200)\n");
1367	printf("	[-r N] Number of repetitions per thread (default 5000)\n");
1368	printf("	[-d] Disable rseq system call (no initialization)\n");
1369	printf("	[-D M] Disable rseq for each M threads\n");
1370	printf("	[-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
1371	printf("	[-M] Push into buffer and memcpy buffer with memory barriers.\n");
1372	printf("	[-v] Verbose output.\n");
1373	printf("	[-h] Show this help.\n");
1374	printf("\n");
1375}
1376
1377int main(int argc, char **argv)
1378{
1379	int i;
1380
1381	for (i = 1; i < argc; i++) {
1382		if (argv[i][0] != '-')
1383			continue;
1384		switch (argv[i][1]) {
1385		case '1':
1386		case '2':
1387		case '3':
1388		case '4':
1389		case '5':
1390		case '6':
1391		case '7':
1392		case '8':
1393		case '9':
1394			if (argc < i + 2) {
1395				show_usage(argc, argv);
1396				goto error;
1397			}
1398			loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1399			i++;
1400			break;
1401		case 'm':
1402			if (argc < i + 2) {
1403				show_usage(argc, argv);
1404				goto error;
1405			}
1406			opt_modulo = atol(argv[i + 1]);
1407			if (opt_modulo < 0) {
1408				show_usage(argc, argv);
1409				goto error;
1410			}
1411			i++;
1412			break;
1413		case 's':
1414			if (argc < i + 2) {
1415				show_usage(argc, argv);
1416				goto error;
1417			}
1418			opt_sleep = atol(argv[i + 1]);
1419			if (opt_sleep < 0) {
1420				show_usage(argc, argv);
1421				goto error;
1422			}
1423			i++;
1424			break;
1425		case 'y':
1426			opt_yield = 1;
1427			break;
1428		case 'k':
1429			opt_signal = 1;
1430			break;
1431		case 'd':
1432			opt_disable_rseq = 1;
1433			break;
1434		case 'D':
1435			if (argc < i + 2) {
1436				show_usage(argc, argv);
1437				goto error;
1438			}
1439			opt_disable_mod = atol(argv[i + 1]);
1440			if (opt_disable_mod < 0) {
1441				show_usage(argc, argv);
1442				goto error;
1443			}
1444			i++;
1445			break;
1446		case 't':
1447			if (argc < i + 2) {
1448				show_usage(argc, argv);
1449				goto error;
1450			}
1451			opt_threads = atol(argv[i + 1]);
1452			if (opt_threads < 0) {
1453				show_usage(argc, argv);
1454				goto error;
1455			}
1456			i++;
1457			break;
1458		case 'r':
1459			if (argc < i + 2) {
1460				show_usage(argc, argv);
1461				goto error;
1462			}
1463			opt_reps = atoll(argv[i + 1]);
1464			if (opt_reps < 0) {
1465				show_usage(argc, argv);
1466				goto error;
1467			}
1468			i++;
1469			break;
1470		case 'h':
1471			show_usage(argc, argv);
1472			goto end;
1473		case 'T':
1474			if (argc < i + 2) {
1475				show_usage(argc, argv);
1476				goto error;
1477			}
1478			opt_test = *argv[i + 1];
1479			switch (opt_test) {
1480			case 's':
1481			case 'l':
1482			case 'i':
1483			case 'b':
1484			case 'm':
1485			case 'r':
1486				break;
1487			default:
1488				show_usage(argc, argv);
1489				goto error;
1490			}
1491			i++;
1492			break;
1493		case 'v':
1494			verbose = 1;
1495			break;
1496		case 'M':
1497			opt_mb = 1;
1498			break;
1499		default:
1500			show_usage(argc, argv);
1501			goto error;
1502		}
1503	}
1504
1505	loop_cnt_1 = loop_cnt[1];
1506	loop_cnt_2 = loop_cnt[2];
1507	loop_cnt_3 = loop_cnt[3];
1508	loop_cnt_4 = loop_cnt[4];
1509	loop_cnt_5 = loop_cnt[5];
1510	loop_cnt_6 = loop_cnt[6];
1511
1512	if (set_signal_handler())
1513		goto error;
1514
1515	if (!opt_disable_rseq && rseq_register_current_thread())
1516		goto error;
1517	switch (opt_test) {
1518	case 's':
1519		printf_verbose("spinlock\n");
1520		test_percpu_spinlock();
1521		break;
1522	case 'l':
1523		printf_verbose("linked list\n");
1524		test_percpu_list();
1525		break;
1526	case 'b':
1527		printf_verbose("buffer\n");
1528		test_percpu_buffer();
1529		break;
1530	case 'm':
1531		printf_verbose("memcpy buffer\n");
1532		test_percpu_memcpy_buffer();
1533		break;
1534	case 'i':
1535		printf_verbose("counter increment\n");
1536		test_percpu_inc();
1537		break;
1538	case 'r':
1539		printf_verbose("membarrier\n");
1540		test_membarrier();
1541		break;
1542	}
1543	if (!opt_disable_rseq && rseq_unregister_current_thread())
1544		abort();
1545end:
1546	return 0;
1547
1548error:
1549	return -1;
1550}
1551