18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright 2020, Gustavo Luiz Duarte, IBM Corp.
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * This test starts a transaction and triggers a signal, forcing a pagefault to
68c2ecf20Sopenharmony_ci * happen when the kernel signal handling code touches the user signal stack.
78c2ecf20Sopenharmony_ci *
88c2ecf20Sopenharmony_ci * In order to avoid pre-faulting the signal stack memory and to force the
98c2ecf20Sopenharmony_ci * pagefault to happen precisely in the kernel signal handling code, the
108c2ecf20Sopenharmony_ci * pagefault handling is done in userspace using the userfaultfd facility.
118c2ecf20Sopenharmony_ci *
128c2ecf20Sopenharmony_ci * Further pagefaults are triggered by crafting the signal handler's ucontext
138c2ecf20Sopenharmony_ci * to point to additional memory regions managed by the userfaultfd, so using
148c2ecf20Sopenharmony_ci * the same mechanism used to avoid pre-faulting the signal stack memory.
158c2ecf20Sopenharmony_ci *
168c2ecf20Sopenharmony_ci * On failure (bug is present) kernel crashes or never returns control back to
178c2ecf20Sopenharmony_ci * userspace. If bug is not present, tests completes almost immediately.
188c2ecf20Sopenharmony_ci */
198c2ecf20Sopenharmony_ci
208c2ecf20Sopenharmony_ci#include <stdio.h>
218c2ecf20Sopenharmony_ci#include <stdlib.h>
228c2ecf20Sopenharmony_ci#include <string.h>
238c2ecf20Sopenharmony_ci#include <linux/userfaultfd.h>
248c2ecf20Sopenharmony_ci#include <poll.h>
258c2ecf20Sopenharmony_ci#include <unistd.h>
268c2ecf20Sopenharmony_ci#include <sys/ioctl.h>
278c2ecf20Sopenharmony_ci#include <sys/syscall.h>
288c2ecf20Sopenharmony_ci#include <fcntl.h>
298c2ecf20Sopenharmony_ci#include <sys/mman.h>
308c2ecf20Sopenharmony_ci#include <pthread.h>
318c2ecf20Sopenharmony_ci#include <signal.h>
328c2ecf20Sopenharmony_ci#include <errno.h>
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_ci#include "tm.h"
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_ci#define UF_MEM_SIZE 655360	/* 10 x 64k pages */
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_ci/* Memory handled by userfaultfd */
408c2ecf20Sopenharmony_cistatic char *uf_mem;
418c2ecf20Sopenharmony_cistatic size_t uf_mem_offset = 0;
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_ci/*
448c2ecf20Sopenharmony_ci * Data that will be copied into the faulting pages (instead of zero-filled
458c2ecf20Sopenharmony_ci * pages). This is used to make the test more reliable and avoid segfaulting
468c2ecf20Sopenharmony_ci * when we return from the signal handler. Since we are making the signal
478c2ecf20Sopenharmony_ci * handler's ucontext point to newly allocated memory, when that memory is
488c2ecf20Sopenharmony_ci * paged-in it will contain the expected content.
498c2ecf20Sopenharmony_ci */
508c2ecf20Sopenharmony_cistatic char backing_mem[UF_MEM_SIZE];
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_cistatic size_t pagesize;
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ci/*
558c2ecf20Sopenharmony_ci * Return a chunk of at least 'size' bytes of memory that will be handled by
568c2ecf20Sopenharmony_ci * userfaultfd. If 'backing_data' is not NULL, its content will be save to
578c2ecf20Sopenharmony_ci * 'backing_mem' and then copied into the faulting pages when the page fault
588c2ecf20Sopenharmony_ci * is handled.
598c2ecf20Sopenharmony_ci */
608c2ecf20Sopenharmony_civoid *get_uf_mem(size_t size, void *backing_data)
618c2ecf20Sopenharmony_ci{
628c2ecf20Sopenharmony_ci	void *ret;
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_ci	if (uf_mem_offset + size > UF_MEM_SIZE) {
658c2ecf20Sopenharmony_ci		fprintf(stderr, "Requesting more uf_mem than expected!\n");
668c2ecf20Sopenharmony_ci		exit(EXIT_FAILURE);
678c2ecf20Sopenharmony_ci	}
688c2ecf20Sopenharmony_ci
698c2ecf20Sopenharmony_ci	ret = &uf_mem[uf_mem_offset];
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_ci	/* Save the data that will be copied into the faulting page */
728c2ecf20Sopenharmony_ci	if (backing_data != NULL)
738c2ecf20Sopenharmony_ci		memcpy(&backing_mem[uf_mem_offset], backing_data, size);
748c2ecf20Sopenharmony_ci
758c2ecf20Sopenharmony_ci	/* Reserve the requested amount of uf_mem */
768c2ecf20Sopenharmony_ci	uf_mem_offset += size;
778c2ecf20Sopenharmony_ci	/* Keep uf_mem_offset aligned to the page size (round up) */
788c2ecf20Sopenharmony_ci	uf_mem_offset = (uf_mem_offset + pagesize - 1) & ~(pagesize - 1);
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci	return ret;
818c2ecf20Sopenharmony_ci}
828c2ecf20Sopenharmony_ci
838c2ecf20Sopenharmony_civoid *fault_handler_thread(void *arg)
848c2ecf20Sopenharmony_ci{
858c2ecf20Sopenharmony_ci	struct uffd_msg msg;	/* Data read from userfaultfd */
868c2ecf20Sopenharmony_ci	long uffd;		/* userfaultfd file descriptor */
878c2ecf20Sopenharmony_ci	struct uffdio_copy uffdio_copy;
888c2ecf20Sopenharmony_ci	struct pollfd pollfd;
898c2ecf20Sopenharmony_ci	ssize_t nread, offset;
908c2ecf20Sopenharmony_ci
918c2ecf20Sopenharmony_ci	uffd = (long) arg;
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_ci	for (;;) {
948c2ecf20Sopenharmony_ci		pollfd.fd = uffd;
958c2ecf20Sopenharmony_ci		pollfd.events = POLLIN;
968c2ecf20Sopenharmony_ci		if (poll(&pollfd, 1, -1) == -1) {
978c2ecf20Sopenharmony_ci			perror("poll() failed");
988c2ecf20Sopenharmony_ci			exit(EXIT_FAILURE);
998c2ecf20Sopenharmony_ci		}
1008c2ecf20Sopenharmony_ci
1018c2ecf20Sopenharmony_ci		nread = read(uffd, &msg, sizeof(msg));
1028c2ecf20Sopenharmony_ci		if (nread == 0) {
1038c2ecf20Sopenharmony_ci			fprintf(stderr, "read(): EOF on userfaultfd\n");
1048c2ecf20Sopenharmony_ci			exit(EXIT_FAILURE);
1058c2ecf20Sopenharmony_ci		}
1068c2ecf20Sopenharmony_ci
1078c2ecf20Sopenharmony_ci		if (nread == -1) {
1088c2ecf20Sopenharmony_ci			perror("read() failed");
1098c2ecf20Sopenharmony_ci			exit(EXIT_FAILURE);
1108c2ecf20Sopenharmony_ci		}
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_ci		/* We expect only one kind of event */
1138c2ecf20Sopenharmony_ci		if (msg.event != UFFD_EVENT_PAGEFAULT) {
1148c2ecf20Sopenharmony_ci			fprintf(stderr, "Unexpected event on userfaultfd\n");
1158c2ecf20Sopenharmony_ci			exit(EXIT_FAILURE);
1168c2ecf20Sopenharmony_ci		}
1178c2ecf20Sopenharmony_ci
1188c2ecf20Sopenharmony_ci		/*
1198c2ecf20Sopenharmony_ci		 * We need to handle page faults in units of pages(!).
1208c2ecf20Sopenharmony_ci		 * So, round faulting address down to page boundary.
1218c2ecf20Sopenharmony_ci		 */
1228c2ecf20Sopenharmony_ci		uffdio_copy.dst = msg.arg.pagefault.address & ~(pagesize-1);
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci		offset = (char *) uffdio_copy.dst - uf_mem;
1258c2ecf20Sopenharmony_ci		uffdio_copy.src = (unsigned long) &backing_mem[offset];
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_ci		uffdio_copy.len = pagesize;
1288c2ecf20Sopenharmony_ci		uffdio_copy.mode = 0;
1298c2ecf20Sopenharmony_ci		uffdio_copy.copy = 0;
1308c2ecf20Sopenharmony_ci		if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1) {
1318c2ecf20Sopenharmony_ci			perror("ioctl-UFFDIO_COPY failed");
1328c2ecf20Sopenharmony_ci			exit(EXIT_FAILURE);
1338c2ecf20Sopenharmony_ci		}
1348c2ecf20Sopenharmony_ci	}
1358c2ecf20Sopenharmony_ci}
1368c2ecf20Sopenharmony_ci
1378c2ecf20Sopenharmony_civoid setup_uf_mem(void)
1388c2ecf20Sopenharmony_ci{
1398c2ecf20Sopenharmony_ci	long uffd;		/* userfaultfd file descriptor */
1408c2ecf20Sopenharmony_ci	pthread_t thr;
1418c2ecf20Sopenharmony_ci	struct uffdio_api uffdio_api;
1428c2ecf20Sopenharmony_ci	struct uffdio_register uffdio_register;
1438c2ecf20Sopenharmony_ci	int ret;
1448c2ecf20Sopenharmony_ci
1458c2ecf20Sopenharmony_ci	pagesize = sysconf(_SC_PAGE_SIZE);
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_ci	/* Create and enable userfaultfd object */
1488c2ecf20Sopenharmony_ci	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
1498c2ecf20Sopenharmony_ci	if (uffd == -1) {
1508c2ecf20Sopenharmony_ci		perror("userfaultfd() failed");
1518c2ecf20Sopenharmony_ci		exit(EXIT_FAILURE);
1528c2ecf20Sopenharmony_ci	}
1538c2ecf20Sopenharmony_ci	uffdio_api.api = UFFD_API;
1548c2ecf20Sopenharmony_ci	uffdio_api.features = 0;
1558c2ecf20Sopenharmony_ci	if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
1568c2ecf20Sopenharmony_ci		perror("ioctl-UFFDIO_API failed");
1578c2ecf20Sopenharmony_ci		exit(EXIT_FAILURE);
1588c2ecf20Sopenharmony_ci	}
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_ci	/*
1618c2ecf20Sopenharmony_ci	 * Create a private anonymous mapping. The memory will be demand-zero
1628c2ecf20Sopenharmony_ci	 * paged, that is, not yet allocated. When we actually touch the memory
1638c2ecf20Sopenharmony_ci	 * the related page will be allocated via the userfaultfd mechanism.
1648c2ecf20Sopenharmony_ci	 */
1658c2ecf20Sopenharmony_ci	uf_mem = mmap(NULL, UF_MEM_SIZE, PROT_READ | PROT_WRITE,
1668c2ecf20Sopenharmony_ci		      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1678c2ecf20Sopenharmony_ci	if (uf_mem == MAP_FAILED) {
1688c2ecf20Sopenharmony_ci		perror("mmap() failed");
1698c2ecf20Sopenharmony_ci		exit(EXIT_FAILURE);
1708c2ecf20Sopenharmony_ci	}
1718c2ecf20Sopenharmony_ci
1728c2ecf20Sopenharmony_ci	/*
1738c2ecf20Sopenharmony_ci	 * Register the memory range of the mapping we've just mapped to be
1748c2ecf20Sopenharmony_ci	 * handled by the userfaultfd object. In 'mode' we request to track
1758c2ecf20Sopenharmony_ci	 * missing pages (i.e. pages that have not yet been faulted-in).
1768c2ecf20Sopenharmony_ci	 */
1778c2ecf20Sopenharmony_ci	uffdio_register.range.start = (unsigned long) uf_mem;
1788c2ecf20Sopenharmony_ci	uffdio_register.range.len = UF_MEM_SIZE;
1798c2ecf20Sopenharmony_ci	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
1808c2ecf20Sopenharmony_ci	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) {
1818c2ecf20Sopenharmony_ci		perror("ioctl-UFFDIO_REGISTER");
1828c2ecf20Sopenharmony_ci		exit(EXIT_FAILURE);
1838c2ecf20Sopenharmony_ci	}
1848c2ecf20Sopenharmony_ci
1858c2ecf20Sopenharmony_ci	/* Create a thread that will process the userfaultfd events */
1868c2ecf20Sopenharmony_ci	ret = pthread_create(&thr, NULL, fault_handler_thread, (void *) uffd);
1878c2ecf20Sopenharmony_ci	if (ret != 0) {
1888c2ecf20Sopenharmony_ci		fprintf(stderr, "pthread_create(): Error. Returned %d\n", ret);
1898c2ecf20Sopenharmony_ci		exit(EXIT_FAILURE);
1908c2ecf20Sopenharmony_ci	}
1918c2ecf20Sopenharmony_ci}
1928c2ecf20Sopenharmony_ci
1938c2ecf20Sopenharmony_ci/*
1948c2ecf20Sopenharmony_ci * Assumption: the signal was delivered while userspace was in transactional or
1958c2ecf20Sopenharmony_ci * suspended state, i.e. uc->uc_link != NULL.
1968c2ecf20Sopenharmony_ci */
1978c2ecf20Sopenharmony_civoid signal_handler(int signo, siginfo_t *si, void *uc)
1988c2ecf20Sopenharmony_ci{
1998c2ecf20Sopenharmony_ci	ucontext_t *ucp = uc;
2008c2ecf20Sopenharmony_ci
2018c2ecf20Sopenharmony_ci	/* Skip 'trap' after returning, otherwise we get a SIGTRAP again */
2028c2ecf20Sopenharmony_ci	ucp->uc_link->uc_mcontext.regs->nip += 4;
2038c2ecf20Sopenharmony_ci
2048c2ecf20Sopenharmony_ci	ucp->uc_mcontext.v_regs =
2058c2ecf20Sopenharmony_ci		get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_mcontext.v_regs);
2068c2ecf20Sopenharmony_ci
2078c2ecf20Sopenharmony_ci	ucp->uc_link->uc_mcontext.v_regs =
2088c2ecf20Sopenharmony_ci		get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_link->uc_mcontext.v_regs);
2098c2ecf20Sopenharmony_ci
2108c2ecf20Sopenharmony_ci	ucp->uc_link = get_uf_mem(sizeof(ucontext_t), ucp->uc_link);
2118c2ecf20Sopenharmony_ci}
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_cibool have_userfaultfd(void)
2148c2ecf20Sopenharmony_ci{
2158c2ecf20Sopenharmony_ci	long rc;
2168c2ecf20Sopenharmony_ci
2178c2ecf20Sopenharmony_ci	errno = 0;
2188c2ecf20Sopenharmony_ci	rc = syscall(__NR_userfaultfd, -1);
2198c2ecf20Sopenharmony_ci
2208c2ecf20Sopenharmony_ci	return rc == 0 || errno != ENOSYS;
2218c2ecf20Sopenharmony_ci}
2228c2ecf20Sopenharmony_ci
2238c2ecf20Sopenharmony_ciint tm_signal_pagefault(void)
2248c2ecf20Sopenharmony_ci{
2258c2ecf20Sopenharmony_ci	struct sigaction sa;
2268c2ecf20Sopenharmony_ci	stack_t ss;
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_ci	SKIP_IF(!have_htm());
2298c2ecf20Sopenharmony_ci	SKIP_IF(!have_userfaultfd());
2308c2ecf20Sopenharmony_ci
2318c2ecf20Sopenharmony_ci	setup_uf_mem();
2328c2ecf20Sopenharmony_ci
2338c2ecf20Sopenharmony_ci	/*
2348c2ecf20Sopenharmony_ci	 * Set an alternative stack that will generate a page fault when the
2358c2ecf20Sopenharmony_ci	 * signal is raised. The page fault will be treated via userfaultfd,
2368c2ecf20Sopenharmony_ci	 * i.e. via fault_handler_thread.
2378c2ecf20Sopenharmony_ci	 */
2388c2ecf20Sopenharmony_ci	ss.ss_sp = get_uf_mem(SIGSTKSZ, NULL);
2398c2ecf20Sopenharmony_ci	ss.ss_size = SIGSTKSZ;
2408c2ecf20Sopenharmony_ci	ss.ss_flags = 0;
2418c2ecf20Sopenharmony_ci	if (sigaltstack(&ss, NULL) == -1) {
2428c2ecf20Sopenharmony_ci		perror("sigaltstack() failed");
2438c2ecf20Sopenharmony_ci		exit(EXIT_FAILURE);
2448c2ecf20Sopenharmony_ci	}
2458c2ecf20Sopenharmony_ci
2468c2ecf20Sopenharmony_ci	sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
2478c2ecf20Sopenharmony_ci	sa.sa_sigaction = signal_handler;
2488c2ecf20Sopenharmony_ci	if (sigaction(SIGTRAP, &sa, NULL) == -1) {
2498c2ecf20Sopenharmony_ci		perror("sigaction() failed");
2508c2ecf20Sopenharmony_ci		exit(EXIT_FAILURE);
2518c2ecf20Sopenharmony_ci	}
2528c2ecf20Sopenharmony_ci
2538c2ecf20Sopenharmony_ci	/* Trigger a SIGTRAP in transactional state */
2548c2ecf20Sopenharmony_ci	asm __volatile__(
2558c2ecf20Sopenharmony_ci			"tbegin.;"
2568c2ecf20Sopenharmony_ci			"beq    1f;"
2578c2ecf20Sopenharmony_ci			"trap;"
2588c2ecf20Sopenharmony_ci			"1: ;"
2598c2ecf20Sopenharmony_ci			: : : "memory");
2608c2ecf20Sopenharmony_ci
2618c2ecf20Sopenharmony_ci	/* Trigger a SIGTRAP in suspended state */
2628c2ecf20Sopenharmony_ci	asm __volatile__(
2638c2ecf20Sopenharmony_ci			"tbegin.;"
2648c2ecf20Sopenharmony_ci			"beq    1f;"
2658c2ecf20Sopenharmony_ci			"tsuspend.;"
2668c2ecf20Sopenharmony_ci			"trap;"
2678c2ecf20Sopenharmony_ci			"tresume.;"
2688c2ecf20Sopenharmony_ci			"1: ;"
2698c2ecf20Sopenharmony_ci			: : : "memory");
2708c2ecf20Sopenharmony_ci
2718c2ecf20Sopenharmony_ci	return EXIT_SUCCESS;
2728c2ecf20Sopenharmony_ci}
2738c2ecf20Sopenharmony_ci
2748c2ecf20Sopenharmony_ciint main(int argc, char **argv)
2758c2ecf20Sopenharmony_ci{
2768c2ecf20Sopenharmony_ci	/*
2778c2ecf20Sopenharmony_ci	 * Depending on kernel config, the TM Bad Thing might not result in a
2788c2ecf20Sopenharmony_ci	 * crash, instead the kernel never returns control back to userspace, so
2798c2ecf20Sopenharmony_ci	 * set a tight timeout. If the test passes it completes almost
2808c2ecf20Sopenharmony_ci	 * immediately.
2818c2ecf20Sopenharmony_ci	 */
2828c2ecf20Sopenharmony_ci	test_harness_set_timeout(2);
2838c2ecf20Sopenharmony_ci	return test_harness(tm_signal_pagefault, "tm_signal_pagefault");
2848c2ecf20Sopenharmony_ci}
285