1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Userfaultfd unit tests.
4 *
5 *  Copyright (C) 2015-2023  Red Hat, Inc.
6 */
7
8#include "uffd-common.h"
9
10#include "../../../../mm/gup_test.h"
11
12#ifdef __NR_userfaultfd
13
14/* The unit test doesn't need a large or random size, make it 32MB for now */
15#define  UFFD_TEST_MEM_SIZE               (32UL << 20)
16
17#define  MEM_ANON                         BIT_ULL(0)
18#define  MEM_SHMEM                        BIT_ULL(1)
19#define  MEM_SHMEM_PRIVATE                BIT_ULL(2)
20#define  MEM_HUGETLB                      BIT_ULL(3)
21#define  MEM_HUGETLB_PRIVATE              BIT_ULL(4)
22
23#define  MEM_ALL  (MEM_ANON | MEM_SHMEM | MEM_SHMEM_PRIVATE | \
24		   MEM_HUGETLB | MEM_HUGETLB_PRIVATE)
25
26struct mem_type {
27	const char *name;
28	unsigned int mem_flag;
29	uffd_test_ops_t *mem_ops;
30	bool shared;
31};
32typedef struct mem_type mem_type_t;
33
34mem_type_t mem_types[] = {
35	{
36		.name = "anon",
37		.mem_flag = MEM_ANON,
38		.mem_ops = &anon_uffd_test_ops,
39		.shared = false,
40	},
41	{
42		.name = "shmem",
43		.mem_flag = MEM_SHMEM,
44		.mem_ops = &shmem_uffd_test_ops,
45		.shared = true,
46	},
47	{
48		.name = "shmem-private",
49		.mem_flag = MEM_SHMEM_PRIVATE,
50		.mem_ops = &shmem_uffd_test_ops,
51		.shared = false,
52	},
53	{
54		.name = "hugetlb",
55		.mem_flag = MEM_HUGETLB,
56		.mem_ops = &hugetlb_uffd_test_ops,
57		.shared = true,
58	},
59	{
60		.name = "hugetlb-private",
61		.mem_flag = MEM_HUGETLB_PRIVATE,
62		.mem_ops = &hugetlb_uffd_test_ops,
63		.shared = false,
64	},
65};
66
67/* Arguments to be passed over to each uffd unit test */
68struct uffd_test_args {
69	mem_type_t *mem_type;
70};
71typedef struct uffd_test_args uffd_test_args_t;
72
73/* Returns: UFFD_TEST_* */
74typedef void (*uffd_test_fn)(uffd_test_args_t *);
75
76typedef struct {
77	const char *name;
78	uffd_test_fn uffd_fn;
79	unsigned int mem_targets;
80	uint64_t uffd_feature_required;
81} uffd_test_case_t;
82
83static void uffd_test_report(void)
84{
85	printf("Userfaults unit tests: pass=%u, skip=%u, fail=%u (total=%u)\n",
86	       ksft_get_pass_cnt(),
87	       ksft_get_xskip_cnt(),
88	       ksft_get_fail_cnt(),
89	       ksft_test_num());
90}
91
92static void uffd_test_pass(void)
93{
94	printf("done\n");
95	ksft_inc_pass_cnt();
96}
97
98#define  uffd_test_start(...)  do {		\
99		printf("Testing ");		\
100		printf(__VA_ARGS__);		\
101		printf("... ");			\
102		fflush(stdout);			\
103	} while (0)
104
105#define  uffd_test_fail(...)  do {		\
106		printf("failed [reason: ");	\
107		printf(__VA_ARGS__);		\
108		printf("]\n");			\
109		ksft_inc_fail_cnt();		\
110	} while (0)
111
112static void uffd_test_skip(const char *message)
113{
114	printf("skipped [reason: %s]\n", message);
115	ksft_inc_xskip_cnt();
116}
117
118/*
119 * Returns 1 if specific userfaultfd supported, 0 otherwise.  Note, we'll
120 * return 1 even if some test failed as long as uffd supported, because in
121 * that case we still want to proceed with the rest uffd unit tests.
122 */
123static int test_uffd_api(bool use_dev)
124{
125	struct uffdio_api uffdio_api;
126	int uffd;
127
128	uffd_test_start("UFFDIO_API (with %s)",
129			use_dev ? "/dev/userfaultfd" : "syscall");
130
131	if (use_dev)
132		uffd = uffd_open_dev(UFFD_FLAGS);
133	else
134		uffd = uffd_open_sys(UFFD_FLAGS);
135	if (uffd < 0) {
136		uffd_test_skip("cannot open userfaultfd handle");
137		return 0;
138	}
139
140	/* Test wrong UFFD_API */
141	uffdio_api.api = 0xab;
142	uffdio_api.features = 0;
143	if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) {
144		uffd_test_fail("UFFDIO_API should fail with wrong api but didn't");
145		goto out;
146	}
147
148	/* Test wrong feature bit */
149	uffdio_api.api = UFFD_API;
150	uffdio_api.features = BIT_ULL(63);
151	if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) {
152		uffd_test_fail("UFFDIO_API should fail with wrong feature but didn't");
153		goto out;
154	}
155
156	/* Test normal UFFDIO_API */
157	uffdio_api.api = UFFD_API;
158	uffdio_api.features = 0;
159	if (ioctl(uffd, UFFDIO_API, &uffdio_api)) {
160		uffd_test_fail("UFFDIO_API should succeed but failed");
161		goto out;
162	}
163
164	/* Test double requests of UFFDIO_API with a random feature set */
165	uffdio_api.features = BIT_ULL(0);
166	if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) {
167		uffd_test_fail("UFFDIO_API should reject initialized uffd");
168		goto out;
169	}
170
171	uffd_test_pass();
172out:
173	close(uffd);
174	/* We have a valid uffd handle */
175	return 1;
176}
177
178/*
179 * This function initializes the global variables.  TODO: remove global
180 * vars and then remove this.
181 */
182static int
183uffd_setup_environment(uffd_test_args_t *args, uffd_test_case_t *test,
184		       mem_type_t *mem_type, const char **errmsg)
185{
186	map_shared = mem_type->shared;
187	uffd_test_ops = mem_type->mem_ops;
188
189	if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB))
190		page_size = default_huge_page_size();
191	else
192		page_size = psize();
193
194	nr_pages = UFFD_TEST_MEM_SIZE / page_size;
195	/* TODO: remove this global var.. it's so ugly */
196	nr_cpus = 1;
197
198	/* Initialize test arguments */
199	args->mem_type = mem_type;
200
201	return uffd_test_ctx_init(test->uffd_feature_required, errmsg);
202}
203
204static bool uffd_feature_supported(uffd_test_case_t *test)
205{
206	uint64_t features;
207
208	if (uffd_get_features(&features))
209		return false;
210
211	return (features & test->uffd_feature_required) ==
212	    test->uffd_feature_required;
213}
214
215static int pagemap_open(void)
216{
217	int fd = open("/proc/self/pagemap", O_RDONLY);
218
219	if (fd < 0)
220		err("open pagemap");
221
222	return fd;
223}
224
225/* This macro let __LINE__ works in err() */
226#define  pagemap_check_wp(value, wp) do {				\
227		if (!!(value & PM_UFFD_WP) != wp)			\
228			err("pagemap uffd-wp bit error: 0x%"PRIx64, value); \
229	} while (0)
230
231typedef struct {
232	int parent_uffd, child_uffd;
233} fork_event_args;
234
235static void *fork_event_consumer(void *data)
236{
237	fork_event_args *args = data;
238	struct uffd_msg msg = { 0 };
239
240	/* Read until a full msg received */
241	while (uffd_read_msg(args->parent_uffd, &msg));
242
243	if (msg.event != UFFD_EVENT_FORK)
244		err("wrong message: %u\n", msg.event);
245
246	/* Just to be properly freed later */
247	args->child_uffd = msg.arg.fork.ufd;
248	return NULL;
249}
250
251typedef struct {
252	int gup_fd;
253	bool pinned;
254} pin_args;
255
256/*
257 * Returns 0 if succeed, <0 for errors.  pin_pages() needs to be paired
258 * with unpin_pages().  Currently it needs to be RO longterm pin to satisfy
259 * all needs of the test cases (e.g., trigger unshare, trigger fork() early
260 * CoW, etc.).
261 */
262static int pin_pages(pin_args *args, void *buffer, size_t size)
263{
264	struct pin_longterm_test test = {
265		.addr = (uintptr_t)buffer,
266		.size = size,
267		/* Read-only pins */
268		.flags = 0,
269	};
270
271	if (args->pinned)
272		err("already pinned");
273
274	args->gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
275	if (args->gup_fd < 0)
276		return -errno;
277
278	if (ioctl(args->gup_fd, PIN_LONGTERM_TEST_START, &test)) {
279		/* Even if gup_test existed, can be an old gup_test / kernel */
280		close(args->gup_fd);
281		return -errno;
282	}
283	args->pinned = true;
284	return 0;
285}
286
287static void unpin_pages(pin_args *args)
288{
289	if (!args->pinned)
290		err("unpin without pin first");
291	if (ioctl(args->gup_fd, PIN_LONGTERM_TEST_STOP))
292		err("PIN_LONGTERM_TEST_STOP");
293	close(args->gup_fd);
294	args->pinned = false;
295}
296
297static int pagemap_test_fork(int uffd, bool with_event, bool test_pin)
298{
299	fork_event_args args = { .parent_uffd = uffd, .child_uffd = -1 };
300	pthread_t thread;
301	pid_t child;
302	uint64_t value;
303	int fd, result;
304
305	/* Prepare a thread to resolve EVENT_FORK */
306	if (with_event) {
307		if (pthread_create(&thread, NULL, fork_event_consumer, &args))
308			err("pthread_create()");
309	}
310
311	child = fork();
312	if (!child) {
313		/* Open the pagemap fd of the child itself */
314		pin_args args = {};
315
316		fd = pagemap_open();
317
318		if (test_pin && pin_pages(&args, area_dst, page_size))
319			/*
320			 * Normally when reach here we have pinned in
321			 * previous tests, so shouldn't fail anymore
322			 */
323			err("pin page failed in child");
324
325		value = pagemap_get_entry(fd, area_dst);
326		/*
327		 * After fork(), we should handle uffd-wp bit differently:
328		 *
329		 * (1) when with EVENT_FORK, it should persist
330		 * (2) when without EVENT_FORK, it should be dropped
331		 */
332		pagemap_check_wp(value, with_event);
333		if (test_pin)
334			unpin_pages(&args);
335		/* Succeed */
336		exit(0);
337	}
338	waitpid(child, &result, 0);
339
340	if (with_event) {
341		if (pthread_join(thread, NULL))
342			err("pthread_join()");
343		if (args.child_uffd < 0)
344			err("Didn't receive child uffd");
345		close(args.child_uffd);
346	}
347
348	return result;
349}
350
351static void uffd_wp_unpopulated_test(uffd_test_args_t *args)
352{
353	uint64_t value;
354	int pagemap_fd;
355
356	if (uffd_register(uffd, area_dst, nr_pages * page_size,
357			  false, true, false))
358		err("register failed");
359
360	pagemap_fd = pagemap_open();
361
362	/* Test applying pte marker to anon unpopulated */
363	wp_range(uffd, (uint64_t)area_dst, page_size, true);
364	value = pagemap_get_entry(pagemap_fd, area_dst);
365	pagemap_check_wp(value, true);
366
367	/* Test unprotect on anon pte marker */
368	wp_range(uffd, (uint64_t)area_dst, page_size, false);
369	value = pagemap_get_entry(pagemap_fd, area_dst);
370	pagemap_check_wp(value, false);
371
372	/* Test zap on anon marker */
373	wp_range(uffd, (uint64_t)area_dst, page_size, true);
374	if (madvise(area_dst, page_size, MADV_DONTNEED))
375		err("madvise(MADV_DONTNEED) failed");
376	value = pagemap_get_entry(pagemap_fd, area_dst);
377	pagemap_check_wp(value, false);
378
379	/* Test fault in after marker removed */
380	*area_dst = 1;
381	value = pagemap_get_entry(pagemap_fd, area_dst);
382	pagemap_check_wp(value, false);
383	/* Drop it to make pte none again */
384	if (madvise(area_dst, page_size, MADV_DONTNEED))
385		err("madvise(MADV_DONTNEED) failed");
386
387	/* Test read-zero-page upon pte marker */
388	wp_range(uffd, (uint64_t)area_dst, page_size, true);
389	*(volatile char *)area_dst;
390	/* Drop it to make pte none again */
391	if (madvise(area_dst, page_size, MADV_DONTNEED))
392		err("madvise(MADV_DONTNEED) failed");
393
394	uffd_test_pass();
395}
396
397static void uffd_wp_fork_test_common(uffd_test_args_t *args,
398				     bool with_event)
399{
400	int pagemap_fd;
401	uint64_t value;
402
403	if (uffd_register(uffd, area_dst, nr_pages * page_size,
404			  false, true, false))
405		err("register failed");
406
407	pagemap_fd = pagemap_open();
408
409	/* Touch the page */
410	*area_dst = 1;
411	wp_range(uffd, (uint64_t)area_dst, page_size, true);
412	value = pagemap_get_entry(pagemap_fd, area_dst);
413	pagemap_check_wp(value, true);
414	if (pagemap_test_fork(uffd, with_event, false)) {
415		uffd_test_fail("Detected %s uffd-wp bit in child in present pte",
416			       with_event ? "missing" : "stall");
417		goto out;
418	}
419
420	/*
421	 * This is an attempt for zapping the pgtable so as to test the
422	 * markers.
423	 *
424	 * For private mappings, PAGEOUT will only work on exclusive ptes
425	 * (PM_MMAP_EXCLUSIVE) which we should satisfy.
426	 *
427	 * For shared, PAGEOUT may not work.  Use DONTNEED instead which
428	 * plays a similar role of zapping (rather than freeing the page)
429	 * to expose pte markers.
430	 */
431	if (args->mem_type->shared) {
432		if (madvise(area_dst, page_size, MADV_DONTNEED))
433			err("MADV_DONTNEED");
434	} else {
435		/*
436		 * NOTE: ignore retval because private-hugetlb doesn't yet
437		 * support swapping, so it could fail.
438		 */
439		madvise(area_dst, page_size, MADV_PAGEOUT);
440	}
441
442	/* Uffd-wp should persist even swapped out */
443	value = pagemap_get_entry(pagemap_fd, area_dst);
444	pagemap_check_wp(value, true);
445	if (pagemap_test_fork(uffd, with_event, false)) {
446		uffd_test_fail("Detected %s uffd-wp bit in child in zapped pte",
447			       with_event ? "missing" : "stall");
448		goto out;
449	}
450
451	/* Unprotect; this tests swap pte modifications */
452	wp_range(uffd, (uint64_t)area_dst, page_size, false);
453	value = pagemap_get_entry(pagemap_fd, area_dst);
454	pagemap_check_wp(value, false);
455
456	/* Fault in the page from disk */
457	*area_dst = 2;
458	value = pagemap_get_entry(pagemap_fd, area_dst);
459	pagemap_check_wp(value, false);
460	uffd_test_pass();
461out:
462	if (uffd_unregister(uffd, area_dst, nr_pages * page_size))
463		err("unregister failed");
464	close(pagemap_fd);
465}
466
467static void uffd_wp_fork_test(uffd_test_args_t *args)
468{
469	uffd_wp_fork_test_common(args, false);
470}
471
472static void uffd_wp_fork_with_event_test(uffd_test_args_t *args)
473{
474	uffd_wp_fork_test_common(args, true);
475}
476
477static void uffd_wp_fork_pin_test_common(uffd_test_args_t *args,
478					 bool with_event)
479{
480	int pagemap_fd;
481	pin_args pin_args = {};
482
483	if (uffd_register(uffd, area_dst, page_size, false, true, false))
484		err("register failed");
485
486	pagemap_fd = pagemap_open();
487
488	/* Touch the page */
489	*area_dst = 1;
490	wp_range(uffd, (uint64_t)area_dst, page_size, true);
491
492	/*
493	 * 1. First pin, then fork().  This tests fork() special path when
494	 * doing early CoW if the page is private.
495	 */
496	if (pin_pages(&pin_args, area_dst, page_size)) {
497		uffd_test_skip("Possibly CONFIG_GUP_TEST missing "
498			       "or unprivileged");
499		close(pagemap_fd);
500		uffd_unregister(uffd, area_dst, page_size);
501		return;
502	}
503
504	if (pagemap_test_fork(uffd, with_event, false)) {
505		uffd_test_fail("Detected %s uffd-wp bit in early CoW of fork()",
506			       with_event ? "missing" : "stall");
507		unpin_pages(&pin_args);
508		goto out;
509	}
510
511	unpin_pages(&pin_args);
512
513	/*
514	 * 2. First fork(), then pin (in the child, where test_pin==true).
515	 * This tests COR, aka, page unsharing on private memories.
516	 */
517	if (pagemap_test_fork(uffd, with_event, true)) {
518		uffd_test_fail("Detected %s uffd-wp bit when RO pin",
519			       with_event ? "missing" : "stall");
520		goto out;
521	}
522	uffd_test_pass();
523out:
524	if (uffd_unregister(uffd, area_dst, page_size))
525		err("register failed");
526	close(pagemap_fd);
527}
528
529static void uffd_wp_fork_pin_test(uffd_test_args_t *args)
530{
531	uffd_wp_fork_pin_test_common(args, false);
532}
533
534static void uffd_wp_fork_pin_with_event_test(uffd_test_args_t *args)
535{
536	uffd_wp_fork_pin_test_common(args, true);
537}
538
539static void check_memory_contents(char *p)
540{
541	unsigned long i, j;
542	uint8_t expected_byte;
543
544	for (i = 0; i < nr_pages; ++i) {
545		expected_byte = ~((uint8_t)(i % ((uint8_t)-1)));
546		for (j = 0; j < page_size; j++) {
547			uint8_t v = *(uint8_t *)(p + (i * page_size) + j);
548			if (v != expected_byte)
549				err("unexpected page contents");
550		}
551	}
552}
553
554static void uffd_minor_test_common(bool test_collapse, bool test_wp)
555{
556	unsigned long p;
557	pthread_t uffd_mon;
558	char c;
559	struct uffd_args args = { 0 };
560
561	/*
562	 * NOTE: MADV_COLLAPSE is not yet compatible with WP, so testing
563	 * both do not make much sense.
564	 */
565	assert(!(test_collapse && test_wp));
566
567	if (uffd_register(uffd, area_dst_alias, nr_pages * page_size,
568			  /* NOTE! MADV_COLLAPSE may not work with uffd-wp */
569			  false, test_wp, true))
570		err("register failure");
571
572	/*
573	 * After registering with UFFD, populate the non-UFFD-registered side of
574	 * the shared mapping. This should *not* trigger any UFFD minor faults.
575	 */
576	for (p = 0; p < nr_pages; ++p)
577		memset(area_dst + (p * page_size), p % ((uint8_t)-1),
578		       page_size);
579
580	args.apply_wp = test_wp;
581	if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
582		err("uffd_poll_thread create");
583
584	/*
585	 * Read each of the pages back using the UFFD-registered mapping. We
586	 * expect that the first time we touch a page, it will result in a minor
587	 * fault. uffd_poll_thread will resolve the fault by bit-flipping the
588	 * page's contents, and then issuing a CONTINUE ioctl.
589	 */
590	check_memory_contents(area_dst_alias);
591
592	if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
593		err("pipe write");
594	if (pthread_join(uffd_mon, NULL))
595		err("join() failed");
596
597	if (test_collapse) {
598		if (madvise(area_dst_alias, nr_pages * page_size,
599			    MADV_COLLAPSE)) {
600			/* It's fine to fail for this one... */
601			uffd_test_skip("MADV_COLLAPSE failed");
602			return;
603		}
604
605		uffd_test_ops->check_pmd_mapping(area_dst,
606						 nr_pages * page_size /
607						 read_pmd_pagesize());
608		/*
609		 * This won't cause uffd-fault - it purely just makes sure there
610		 * was no corruption.
611		 */
612		check_memory_contents(area_dst_alias);
613	}
614
615	if (args.missing_faults != 0 || args.minor_faults != nr_pages)
616		uffd_test_fail("stats check error");
617	else
618		uffd_test_pass();
619}
620
621void uffd_minor_test(uffd_test_args_t *args)
622{
623	uffd_minor_test_common(false, false);
624}
625
626void uffd_minor_wp_test(uffd_test_args_t *args)
627{
628	uffd_minor_test_common(false, true);
629}
630
631void uffd_minor_collapse_test(uffd_test_args_t *args)
632{
633	uffd_minor_test_common(true, false);
634}
635
636static sigjmp_buf jbuf, *sigbuf;
637
638static void sighndl(int sig, siginfo_t *siginfo, void *ptr)
639{
640	if (sig == SIGBUS) {
641		if (sigbuf)
642			siglongjmp(*sigbuf, 1);
643		abort();
644	}
645}
646
647/*
648 * For non-cooperative userfaultfd test we fork() a process that will
649 * generate pagefaults, will mremap the area monitored by the
650 * userfaultfd and at last this process will release the monitored
651 * area.
652 * For the anonymous and shared memory the area is divided into two
653 * parts, the first part is accessed before mremap, and the second
654 * part is accessed after mremap. Since hugetlbfs does not support
655 * mremap, the entire monitored area is accessed in a single pass for
656 * HUGETLB_TEST.
657 * The release of the pages currently generates event for shmem and
658 * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked
659 * for hugetlb.
660 * For signal test(UFFD_FEATURE_SIGBUS), signal_test = 1, we register
661 * monitored area, generate pagefaults and test that signal is delivered.
662 * Use UFFDIO_COPY to allocate missing page and retry. For signal_test = 2
663 * test robustness use case - we release monitored area, fork a process
664 * that will generate pagefaults and verify signal is generated.
665 * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal
666 * feature. Using monitor thread, verify no userfault events are generated.
667 */
668static int faulting_process(int signal_test, bool wp)
669{
670	unsigned long nr, i;
671	unsigned long long count;
672	unsigned long split_nr_pages;
673	unsigned long lastnr;
674	struct sigaction act;
675	volatile unsigned long signalled = 0;
676
677	split_nr_pages = (nr_pages + 1) / 2;
678
679	if (signal_test) {
680		sigbuf = &jbuf;
681		memset(&act, 0, sizeof(act));
682		act.sa_sigaction = sighndl;
683		act.sa_flags = SA_SIGINFO;
684		if (sigaction(SIGBUS, &act, 0))
685			err("sigaction");
686		lastnr = (unsigned long)-1;
687	}
688
689	for (nr = 0; nr < split_nr_pages; nr++) {
690		volatile int steps = 1;
691		unsigned long offset = nr * page_size;
692
693		if (signal_test) {
694			if (sigsetjmp(*sigbuf, 1) != 0) {
695				if (steps == 1 && nr == lastnr)
696					err("Signal repeated");
697
698				lastnr = nr;
699				if (signal_test == 1) {
700					if (steps == 1) {
701						/* This is a MISSING request */
702						steps++;
703						if (copy_page(uffd, offset, wp))
704							signalled++;
705					} else {
706						/* This is a WP request */
707						assert(steps == 2);
708						wp_range(uffd,
709							 (__u64)area_dst +
710							 offset,
711							 page_size, false);
712					}
713				} else {
714					signalled++;
715					continue;
716				}
717			}
718		}
719
720		count = *area_count(area_dst, nr);
721		if (count != count_verify[nr])
722			err("nr %lu memory corruption %llu %llu\n",
723			    nr, count, count_verify[nr]);
724		/*
725		 * Trigger write protection if there is by writing
726		 * the same value back.
727		 */
728		*area_count(area_dst, nr) = count;
729	}
730
731	if (signal_test)
732		return signalled != split_nr_pages;
733
734	area_dst = mremap(area_dst, nr_pages * page_size,  nr_pages * page_size,
735			  MREMAP_MAYMOVE | MREMAP_FIXED, area_src);
736	if (area_dst == MAP_FAILED)
737		err("mremap");
738	/* Reset area_src since we just clobbered it */
739	area_src = NULL;
740
741	for (; nr < nr_pages; nr++) {
742		count = *area_count(area_dst, nr);
743		if (count != count_verify[nr]) {
744			err("nr %lu memory corruption %llu %llu\n",
745			    nr, count, count_verify[nr]);
746		}
747		/*
748		 * Trigger write protection if there is by writing
749		 * the same value back.
750		 */
751		*area_count(area_dst, nr) = count;
752	}
753
754	uffd_test_ops->release_pages(area_dst);
755
756	for (nr = 0; nr < nr_pages; nr++)
757		for (i = 0; i < page_size; i++)
758			if (*(area_dst + nr * page_size + i) != 0)
759				err("page %lu offset %lu is not zero", nr, i);
760
761	return 0;
762}
763
764static void uffd_sigbus_test_common(bool wp)
765{
766	unsigned long userfaults;
767	pthread_t uffd_mon;
768	pid_t pid;
769	int err;
770	char c;
771	struct uffd_args args = { 0 };
772
773	fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
774
775	if (uffd_register(uffd, area_dst, nr_pages * page_size,
776			  true, wp, false))
777		err("register failure");
778
779	if (faulting_process(1, wp))
780		err("faulting process failed");
781
782	uffd_test_ops->release_pages(area_dst);
783
784	args.apply_wp = wp;
785	if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
786		err("uffd_poll_thread create");
787
788	pid = fork();
789	if (pid < 0)
790		err("fork");
791
792	if (!pid)
793		exit(faulting_process(2, wp));
794
795	waitpid(pid, &err, 0);
796	if (err)
797		err("faulting process failed");
798	if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
799		err("pipe write");
800	if (pthread_join(uffd_mon, (void **)&userfaults))
801		err("pthread_join()");
802
803	if (userfaults)
804		uffd_test_fail("Signal test failed, userfaults: %ld", userfaults);
805	else
806		uffd_test_pass();
807}
808
809static void uffd_sigbus_test(uffd_test_args_t *args)
810{
811	uffd_sigbus_test_common(false);
812}
813
814static void uffd_sigbus_wp_test(uffd_test_args_t *args)
815{
816	uffd_sigbus_test_common(true);
817}
818
819static void uffd_events_test_common(bool wp)
820{
821	pthread_t uffd_mon;
822	pid_t pid;
823	int err;
824	char c;
825	struct uffd_args args = { 0 };
826
827	fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
828	if (uffd_register(uffd, area_dst, nr_pages * page_size,
829			  true, wp, false))
830		err("register failure");
831
832	args.apply_wp = wp;
833	if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
834		err("uffd_poll_thread create");
835
836	pid = fork();
837	if (pid < 0)
838		err("fork");
839
840	if (!pid)
841		exit(faulting_process(0, wp));
842
843	waitpid(pid, &err, 0);
844	if (err)
845		err("faulting process failed");
846	if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
847		err("pipe write");
848	if (pthread_join(uffd_mon, NULL))
849		err("pthread_join()");
850
851	if (args.missing_faults != nr_pages)
852		uffd_test_fail("Fault counts wrong");
853	else
854		uffd_test_pass();
855}
856
857static void uffd_events_test(uffd_test_args_t *args)
858{
859	uffd_events_test_common(false);
860}
861
862static void uffd_events_wp_test(uffd_test_args_t *args)
863{
864	uffd_events_test_common(true);
865}
866
867static void retry_uffdio_zeropage(int ufd,
868				  struct uffdio_zeropage *uffdio_zeropage)
869{
870	uffd_test_ops->alias_mapping(&uffdio_zeropage->range.start,
871				     uffdio_zeropage->range.len,
872				     0);
873	if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
874		if (uffdio_zeropage->zeropage != -EEXIST)
875			err("UFFDIO_ZEROPAGE error: %"PRId64,
876			    (int64_t)uffdio_zeropage->zeropage);
877	} else {
878		err("UFFDIO_ZEROPAGE error: %"PRId64,
879		    (int64_t)uffdio_zeropage->zeropage);
880	}
881}
882
883static bool do_uffdio_zeropage(int ufd, bool has_zeropage)
884{
885	struct uffdio_zeropage uffdio_zeropage = { 0 };
886	int ret;
887	__s64 res;
888
889	uffdio_zeropage.range.start = (unsigned long) area_dst;
890	uffdio_zeropage.range.len = page_size;
891	uffdio_zeropage.mode = 0;
892	ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage);
893	res = uffdio_zeropage.zeropage;
894	if (ret) {
895		/* real retval in ufdio_zeropage.zeropage */
896		if (has_zeropage)
897			err("UFFDIO_ZEROPAGE error: %"PRId64, (int64_t)res);
898		else if (res != -EINVAL)
899			err("UFFDIO_ZEROPAGE not -EINVAL");
900	} else if (has_zeropage) {
901		if (res != page_size)
902			err("UFFDIO_ZEROPAGE unexpected size");
903		else
904			retry_uffdio_zeropage(ufd, &uffdio_zeropage);
905		return true;
906	} else
907		err("UFFDIO_ZEROPAGE succeeded");
908
909	return false;
910}
911
912/*
913 * Registers a range with MISSING mode only for zeropage test.  Return true
914 * if UFFDIO_ZEROPAGE supported, false otherwise. Can't use uffd_register()
915 * because we want to detect .ioctls along the way.
916 */
917static bool
918uffd_register_detect_zeropage(int uffd, void *addr, uint64_t len)
919{
920	uint64_t ioctls = 0;
921
922	if (uffd_register_with_ioctls(uffd, addr, len, true,
923				      false, false, &ioctls))
924		err("zeropage register fail");
925
926	return ioctls & (1 << _UFFDIO_ZEROPAGE);
927}
928
929/* exercise UFFDIO_ZEROPAGE */
930static void uffd_zeropage_test(uffd_test_args_t *args)
931{
932	bool has_zeropage;
933	int i;
934
935	has_zeropage = uffd_register_detect_zeropage(uffd, area_dst, page_size);
936	if (area_dst_alias)
937		/* Ignore the retval; we already have it */
938		uffd_register_detect_zeropage(uffd, area_dst_alias, page_size);
939
940	if (do_uffdio_zeropage(uffd, has_zeropage))
941		for (i = 0; i < page_size; i++)
942			if (area_dst[i] != 0)
943				err("data non-zero at offset %d\n", i);
944
945	if (uffd_unregister(uffd, area_dst, page_size))
946		err("unregister");
947
948	if (area_dst_alias && uffd_unregister(uffd, area_dst_alias, page_size))
949		err("unregister");
950
951	uffd_test_pass();
952}
953
954static void uffd_register_poison(int uffd, void *addr, uint64_t len)
955{
956	uint64_t ioctls = 0;
957	uint64_t expected = (1 << _UFFDIO_COPY) | (1 << _UFFDIO_POISON);
958
959	if (uffd_register_with_ioctls(uffd, addr, len, true,
960				      false, false, &ioctls))
961		err("poison register fail");
962
963	if ((ioctls & expected) != expected)
964		err("registered area doesn't support COPY and POISON ioctls");
965}
966
967static void do_uffdio_poison(int uffd, unsigned long offset)
968{
969	struct uffdio_poison uffdio_poison = { 0 };
970	int ret;
971	__s64 res;
972
973	uffdio_poison.range.start = (unsigned long) area_dst + offset;
974	uffdio_poison.range.len = page_size;
975	uffdio_poison.mode = 0;
976	ret = ioctl(uffd, UFFDIO_POISON, &uffdio_poison);
977	res = uffdio_poison.updated;
978
979	if (ret)
980		err("UFFDIO_POISON error: %"PRId64, (int64_t)res);
981	else if (res != page_size)
982		err("UFFDIO_POISON unexpected size: %"PRId64, (int64_t)res);
983}
984
985static void uffd_poison_handle_fault(
986	struct uffd_msg *msg, struct uffd_args *args)
987{
988	unsigned long offset;
989
990	if (msg->event != UFFD_EVENT_PAGEFAULT)
991		err("unexpected msg event %u", msg->event);
992
993	if (msg->arg.pagefault.flags &
994	    (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR))
995		err("unexpected fault type %llu", msg->arg.pagefault.flags);
996
997	offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
998	offset &= ~(page_size-1);
999
1000	/* Odd pages -> copy zeroed page; even pages -> poison. */
1001	if (offset & page_size)
1002		copy_page(uffd, offset, false);
1003	else
1004		do_uffdio_poison(uffd, offset);
1005}
1006
1007static void uffd_poison_test(uffd_test_args_t *targs)
1008{
1009	pthread_t uffd_mon;
1010	char c;
1011	struct uffd_args args = { 0 };
1012	struct sigaction act = { 0 };
1013	unsigned long nr_sigbus = 0;
1014	unsigned long nr;
1015
1016	fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
1017
1018	uffd_register_poison(uffd, area_dst, nr_pages * page_size);
1019	memset(area_src, 0, nr_pages * page_size);
1020
1021	args.handle_fault = uffd_poison_handle_fault;
1022	if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
1023		err("uffd_poll_thread create");
1024
1025	sigbuf = &jbuf;
1026	act.sa_sigaction = sighndl;
1027	act.sa_flags = SA_SIGINFO;
1028	if (sigaction(SIGBUS, &act, 0))
1029		err("sigaction");
1030
1031	for (nr = 0; nr < nr_pages; ++nr) {
1032		unsigned long offset = nr * page_size;
1033		const char *bytes = (const char *) area_dst + offset;
1034		const char *i;
1035
1036		if (sigsetjmp(*sigbuf, 1)) {
1037			/*
1038			 * Access below triggered a SIGBUS, which was caught by
1039			 * sighndl, which then jumped here. Count this SIGBUS,
1040			 * and move on to next page.
1041			 */
1042			++nr_sigbus;
1043			continue;
1044		}
1045
1046		for (i = bytes; i < bytes + page_size; ++i) {
1047			if (*i)
1048				err("nonzero byte in area_dst (%p) at %p: %u",
1049				    area_dst, i, *i);
1050		}
1051	}
1052
1053	if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
1054		err("pipe write");
1055	if (pthread_join(uffd_mon, NULL))
1056		err("pthread_join()");
1057
1058	if (nr_sigbus != nr_pages / 2)
1059		err("expected to receive %lu SIGBUS, actually received %lu",
1060		    nr_pages / 2, nr_sigbus);
1061
1062	uffd_test_pass();
1063}
1064
1065/*
1066 * Test the returned uffdio_register.ioctls with different register modes.
1067 * Note that _UFFDIO_ZEROPAGE is tested separately in the zeropage test.
1068 */
1069static void
1070do_register_ioctls_test(uffd_test_args_t *args, bool miss, bool wp, bool minor)
1071{
1072	uint64_t ioctls = 0, expected = BIT_ULL(_UFFDIO_WAKE);
1073	mem_type_t *mem_type = args->mem_type;
1074	int ret;
1075
1076	ret = uffd_register_with_ioctls(uffd, area_dst, page_size,
1077					miss, wp, minor, &ioctls);
1078
1079	/*
1080	 * Handle special cases of UFFDIO_REGISTER here where it should
1081	 * just fail with -EINVAL first..
1082	 *
1083	 * Case 1: register MINOR on anon
1084	 * Case 2: register with no mode selected
1085	 */
1086	if ((minor && (mem_type->mem_flag == MEM_ANON)) ||
1087	    (!miss && !wp && !minor)) {
1088		if (ret != -EINVAL)
1089			err("register (miss=%d, wp=%d, minor=%d) failed "
1090			    "with wrong errno=%d", miss, wp, minor, ret);
1091		return;
1092	}
1093
1094	/* UFFDIO_REGISTER should succeed, then check ioctls returned */
1095	if (miss)
1096		expected |= BIT_ULL(_UFFDIO_COPY);
1097	if (wp)
1098		expected |= BIT_ULL(_UFFDIO_WRITEPROTECT);
1099	if (minor)
1100		expected |= BIT_ULL(_UFFDIO_CONTINUE);
1101
1102	if ((ioctls & expected) != expected)
1103		err("unexpected uffdio_register.ioctls "
1104		    "(miss=%d, wp=%d, minor=%d): expected=0x%"PRIx64", "
1105		    "returned=0x%"PRIx64, miss, wp, minor, expected, ioctls);
1106
1107	if (uffd_unregister(uffd, area_dst, page_size))
1108		err("unregister");
1109}
1110
1111static void uffd_register_ioctls_test(uffd_test_args_t *args)
1112{
1113	int miss, wp, minor;
1114
1115	for (miss = 0; miss <= 1; miss++)
1116		for (wp = 0; wp <= 1; wp++)
1117			for (minor = 0; minor <= 1; minor++)
1118				do_register_ioctls_test(args, miss, wp, minor);
1119
1120	uffd_test_pass();
1121}
1122
1123uffd_test_case_t uffd_tests[] = {
1124	{
1125		/* Test returned uffdio_register.ioctls. */
1126		.name = "register-ioctls",
1127		.uffd_fn = uffd_register_ioctls_test,
1128		.mem_targets = MEM_ALL,
1129		.uffd_feature_required = UFFD_FEATURE_MISSING_HUGETLBFS |
1130		UFFD_FEATURE_MISSING_SHMEM |
1131		UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1132		UFFD_FEATURE_WP_HUGETLBFS_SHMEM |
1133		UFFD_FEATURE_MINOR_HUGETLBFS |
1134		UFFD_FEATURE_MINOR_SHMEM,
1135	},
1136	{
1137		.name = "zeropage",
1138		.uffd_fn = uffd_zeropage_test,
1139		.mem_targets = MEM_ALL,
1140		.uffd_feature_required = 0,
1141	},
1142	{
1143		.name = "wp-fork",
1144		.uffd_fn = uffd_wp_fork_test,
1145		.mem_targets = MEM_ALL,
1146		.uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1147		UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
1148	},
1149	{
1150		.name = "wp-fork-with-event",
1151		.uffd_fn = uffd_wp_fork_with_event_test,
1152		.mem_targets = MEM_ALL,
1153		.uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1154		UFFD_FEATURE_WP_HUGETLBFS_SHMEM |
1155		/* when set, child process should inherit uffd-wp bits */
1156		UFFD_FEATURE_EVENT_FORK,
1157	},
1158	{
1159		.name = "wp-fork-pin",
1160		.uffd_fn = uffd_wp_fork_pin_test,
1161		.mem_targets = MEM_ALL,
1162		.uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1163		UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
1164	},
1165	{
1166		.name = "wp-fork-pin-with-event",
1167		.uffd_fn = uffd_wp_fork_pin_with_event_test,
1168		.mem_targets = MEM_ALL,
1169		.uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1170		UFFD_FEATURE_WP_HUGETLBFS_SHMEM |
1171		/* when set, child process should inherit uffd-wp bits */
1172		UFFD_FEATURE_EVENT_FORK,
1173	},
1174	{
1175		.name = "wp-unpopulated",
1176		.uffd_fn = uffd_wp_unpopulated_test,
1177		.mem_targets = MEM_ANON,
1178		.uffd_feature_required =
1179		UFFD_FEATURE_PAGEFAULT_FLAG_WP | UFFD_FEATURE_WP_UNPOPULATED,
1180	},
1181	{
1182		.name = "minor",
1183		.uffd_fn = uffd_minor_test,
1184		.mem_targets = MEM_SHMEM | MEM_HUGETLB,
1185		.uffd_feature_required =
1186		UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM,
1187	},
1188	{
1189		.name = "minor-wp",
1190		.uffd_fn = uffd_minor_wp_test,
1191		.mem_targets = MEM_SHMEM | MEM_HUGETLB,
1192		.uffd_feature_required =
1193		UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM |
1194		UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1195		/*
1196		 * HACK: here we leveraged WP_UNPOPULATED to detect whether
1197		 * minor mode supports wr-protect.  There's no feature flag
1198		 * for it so this is the best we can test against.
1199		 */
1200		UFFD_FEATURE_WP_UNPOPULATED,
1201	},
1202	{
1203		.name = "minor-collapse",
1204		.uffd_fn = uffd_minor_collapse_test,
1205		/* MADV_COLLAPSE only works with shmem */
1206		.mem_targets = MEM_SHMEM,
1207		/* We can't test MADV_COLLAPSE, so try our luck */
1208		.uffd_feature_required = UFFD_FEATURE_MINOR_SHMEM,
1209	},
1210	{
1211		.name = "sigbus",
1212		.uffd_fn = uffd_sigbus_test,
1213		.mem_targets = MEM_ALL,
1214		.uffd_feature_required = UFFD_FEATURE_SIGBUS |
1215		UFFD_FEATURE_EVENT_FORK,
1216	},
1217	{
1218		.name = "sigbus-wp",
1219		.uffd_fn = uffd_sigbus_wp_test,
1220		.mem_targets = MEM_ALL,
1221		.uffd_feature_required = UFFD_FEATURE_SIGBUS |
1222		UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_PAGEFAULT_FLAG_WP,
1223	},
1224	{
1225		.name = "events",
1226		.uffd_fn = uffd_events_test,
1227		.mem_targets = MEM_ALL,
1228		.uffd_feature_required = UFFD_FEATURE_EVENT_FORK |
1229		UFFD_FEATURE_EVENT_REMAP | UFFD_FEATURE_EVENT_REMOVE,
1230	},
1231	{
1232		.name = "events-wp",
1233		.uffd_fn = uffd_events_wp_test,
1234		.mem_targets = MEM_ALL,
1235		.uffd_feature_required = UFFD_FEATURE_EVENT_FORK |
1236		UFFD_FEATURE_EVENT_REMAP | UFFD_FEATURE_EVENT_REMOVE |
1237		UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1238		UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
1239	},
1240	{
1241		.name = "poison",
1242		.uffd_fn = uffd_poison_test,
1243		.mem_targets = MEM_ALL,
1244		.uffd_feature_required = UFFD_FEATURE_POISON,
1245	},
1246};
1247
1248static void usage(const char *prog)
1249{
1250	printf("usage: %s [-f TESTNAME]\n", prog);
1251	puts("");
1252	puts(" -f: test name to filter (e.g., event)");
1253	puts(" -h: show the help msg");
1254	puts(" -l: list tests only");
1255	puts("");
1256	exit(KSFT_FAIL);
1257}
1258
1259int main(int argc, char *argv[])
1260{
1261	int n_tests = sizeof(uffd_tests) / sizeof(uffd_test_case_t);
1262	int n_mems = sizeof(mem_types) / sizeof(mem_type_t);
1263	const char *test_filter = NULL;
1264	bool list_only = false;
1265	uffd_test_case_t *test;
1266	mem_type_t *mem_type;
1267	uffd_test_args_t args;
1268	const char *errmsg;
1269	int has_uffd, opt;
1270	int i, j;
1271
1272	while ((opt = getopt(argc, argv, "f:hl")) != -1) {
1273		switch (opt) {
1274		case 'f':
1275			test_filter = optarg;
1276			break;
1277		case 'l':
1278			list_only = true;
1279			break;
1280		case 'h':
1281		default:
1282			/* Unknown */
1283			usage(argv[0]);
1284			break;
1285		}
1286	}
1287
1288	if (!test_filter && !list_only) {
1289		has_uffd = test_uffd_api(false);
1290		has_uffd |= test_uffd_api(true);
1291
1292		if (!has_uffd) {
1293			printf("Userfaultfd not supported or unprivileged, skip all tests\n");
1294			exit(KSFT_SKIP);
1295		}
1296	}
1297
1298	for (i = 0; i < n_tests; i++) {
1299		test = &uffd_tests[i];
1300		if (test_filter && !strstr(test->name, test_filter))
1301			continue;
1302		if (list_only) {
1303			printf("%s\n", test->name);
1304			continue;
1305		}
1306		for (j = 0; j < n_mems; j++) {
1307			mem_type = &mem_types[j];
1308			if (!(test->mem_targets & mem_type->mem_flag))
1309				continue;
1310
1311			uffd_test_start("%s on %s", test->name, mem_type->name);
1312			if ((mem_type->mem_flag == MEM_HUGETLB ||
1313			    mem_type->mem_flag == MEM_HUGETLB_PRIVATE) &&
1314			    (default_huge_page_size() == 0)) {
1315				uffd_test_skip("huge page size is 0, feature missing?");
1316				continue;
1317			}
1318			if (!uffd_feature_supported(test)) {
1319				uffd_test_skip("feature missing");
1320				continue;
1321			}
1322			if (uffd_setup_environment(&args, test, mem_type,
1323						   &errmsg)) {
1324				uffd_test_skip(errmsg);
1325				continue;
1326			}
1327			test->uffd_fn(&args);
1328		}
1329	}
1330
1331	if (!list_only)
1332		uffd_test_report();
1333
1334	return ksft_get_fail_cnt() ? KSFT_FAIL : KSFT_PASS;
1335}
1336
1337#else /* __NR_userfaultfd */
1338
1339#warning "missing __NR_userfaultfd definition"
1340
1341int main(void)
1342{
1343	printf("Skipping %s (missing __NR_userfaultfd)\n", __file__);
1344	return KSFT_SKIP;
1345}
1346
1347#endif /* __NR_userfaultfd */
1348