18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright 2020, Gustavo Luiz Duarte, IBM Corp. 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * This test starts a transaction and triggers a signal, forcing a pagefault to 68c2ecf20Sopenharmony_ci * happen when the kernel signal handling code touches the user signal stack. 78c2ecf20Sopenharmony_ci * 88c2ecf20Sopenharmony_ci * In order to avoid pre-faulting the signal stack memory and to force the 98c2ecf20Sopenharmony_ci * pagefault to happen precisely in the kernel signal handling code, the 108c2ecf20Sopenharmony_ci * pagefault handling is done in userspace using the userfaultfd facility. 118c2ecf20Sopenharmony_ci * 128c2ecf20Sopenharmony_ci * Further pagefaults are triggered by crafting the signal handler's ucontext 138c2ecf20Sopenharmony_ci * to point to additional memory regions managed by the userfaultfd, so using 148c2ecf20Sopenharmony_ci * the same mechanism used to avoid pre-faulting the signal stack memory. 158c2ecf20Sopenharmony_ci * 168c2ecf20Sopenharmony_ci * On failure (bug is present) kernel crashes or never returns control back to 178c2ecf20Sopenharmony_ci * userspace. If bug is not present, tests completes almost immediately. 188c2ecf20Sopenharmony_ci */ 198c2ecf20Sopenharmony_ci 208c2ecf20Sopenharmony_ci#include <stdio.h> 218c2ecf20Sopenharmony_ci#include <stdlib.h> 228c2ecf20Sopenharmony_ci#include <string.h> 238c2ecf20Sopenharmony_ci#include <linux/userfaultfd.h> 248c2ecf20Sopenharmony_ci#include <poll.h> 258c2ecf20Sopenharmony_ci#include <unistd.h> 268c2ecf20Sopenharmony_ci#include <sys/ioctl.h> 278c2ecf20Sopenharmony_ci#include <sys/syscall.h> 288c2ecf20Sopenharmony_ci#include <fcntl.h> 298c2ecf20Sopenharmony_ci#include <sys/mman.h> 308c2ecf20Sopenharmony_ci#include <pthread.h> 318c2ecf20Sopenharmony_ci#include <signal.h> 328c2ecf20Sopenharmony_ci#include <errno.h> 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_ci#include "tm.h" 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci#define UF_MEM_SIZE 655360 /* 10 x 64k pages */ 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_ci/* Memory handled by userfaultfd */ 408c2ecf20Sopenharmony_cistatic char *uf_mem; 418c2ecf20Sopenharmony_cistatic size_t uf_mem_offset = 0; 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_ci/* 448c2ecf20Sopenharmony_ci * Data that will be copied into the faulting pages (instead of zero-filled 458c2ecf20Sopenharmony_ci * pages). This is used to make the test more reliable and avoid segfaulting 468c2ecf20Sopenharmony_ci * when we return from the signal handler. Since we are making the signal 478c2ecf20Sopenharmony_ci * handler's ucontext point to newly allocated memory, when that memory is 488c2ecf20Sopenharmony_ci * paged-in it will contain the expected content. 498c2ecf20Sopenharmony_ci */ 508c2ecf20Sopenharmony_cistatic char backing_mem[UF_MEM_SIZE]; 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_cistatic size_t pagesize; 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci/* 558c2ecf20Sopenharmony_ci * Return a chunk of at least 'size' bytes of memory that will be handled by 568c2ecf20Sopenharmony_ci * userfaultfd. If 'backing_data' is not NULL, its content will be save to 578c2ecf20Sopenharmony_ci * 'backing_mem' and then copied into the faulting pages when the page fault 588c2ecf20Sopenharmony_ci * is handled. 598c2ecf20Sopenharmony_ci */ 608c2ecf20Sopenharmony_civoid *get_uf_mem(size_t size, void *backing_data) 618c2ecf20Sopenharmony_ci{ 628c2ecf20Sopenharmony_ci void *ret; 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci if (uf_mem_offset + size > UF_MEM_SIZE) { 658c2ecf20Sopenharmony_ci fprintf(stderr, "Requesting more uf_mem than expected!\n"); 668c2ecf20Sopenharmony_ci exit(EXIT_FAILURE); 678c2ecf20Sopenharmony_ci } 688c2ecf20Sopenharmony_ci 698c2ecf20Sopenharmony_ci ret = &uf_mem[uf_mem_offset]; 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_ci /* Save the data that will be copied into the faulting page */ 728c2ecf20Sopenharmony_ci if (backing_data != NULL) 738c2ecf20Sopenharmony_ci memcpy(&backing_mem[uf_mem_offset], backing_data, size); 748c2ecf20Sopenharmony_ci 758c2ecf20Sopenharmony_ci /* Reserve the requested amount of uf_mem */ 768c2ecf20Sopenharmony_ci uf_mem_offset += size; 778c2ecf20Sopenharmony_ci /* Keep uf_mem_offset aligned to the page size (round up) */ 788c2ecf20Sopenharmony_ci uf_mem_offset = (uf_mem_offset + pagesize - 1) & ~(pagesize - 1); 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci return ret; 818c2ecf20Sopenharmony_ci} 828c2ecf20Sopenharmony_ci 838c2ecf20Sopenharmony_civoid *fault_handler_thread(void *arg) 848c2ecf20Sopenharmony_ci{ 858c2ecf20Sopenharmony_ci struct uffd_msg msg; /* Data read from userfaultfd */ 868c2ecf20Sopenharmony_ci long uffd; /* userfaultfd file descriptor */ 878c2ecf20Sopenharmony_ci struct uffdio_copy uffdio_copy; 888c2ecf20Sopenharmony_ci struct pollfd pollfd; 898c2ecf20Sopenharmony_ci ssize_t nread, offset; 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_ci uffd = (long) arg; 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_ci for (;;) { 948c2ecf20Sopenharmony_ci pollfd.fd = uffd; 958c2ecf20Sopenharmony_ci pollfd.events = POLLIN; 968c2ecf20Sopenharmony_ci if (poll(&pollfd, 1, -1) == -1) { 978c2ecf20Sopenharmony_ci perror("poll() failed"); 988c2ecf20Sopenharmony_ci exit(EXIT_FAILURE); 998c2ecf20Sopenharmony_ci } 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_ci nread = read(uffd, &msg, sizeof(msg)); 1028c2ecf20Sopenharmony_ci if (nread == 0) { 1038c2ecf20Sopenharmony_ci fprintf(stderr, "read(): EOF on userfaultfd\n"); 1048c2ecf20Sopenharmony_ci exit(EXIT_FAILURE); 1058c2ecf20Sopenharmony_ci } 1068c2ecf20Sopenharmony_ci 1078c2ecf20Sopenharmony_ci if (nread == -1) { 1088c2ecf20Sopenharmony_ci perror("read() failed"); 1098c2ecf20Sopenharmony_ci exit(EXIT_FAILURE); 1108c2ecf20Sopenharmony_ci } 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci /* We expect only one kind of event */ 1138c2ecf20Sopenharmony_ci if (msg.event != UFFD_EVENT_PAGEFAULT) { 1148c2ecf20Sopenharmony_ci fprintf(stderr, "Unexpected event on userfaultfd\n"); 1158c2ecf20Sopenharmony_ci exit(EXIT_FAILURE); 1168c2ecf20Sopenharmony_ci } 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_ci /* 1198c2ecf20Sopenharmony_ci * We need to handle page faults in units of pages(!). 1208c2ecf20Sopenharmony_ci * So, round faulting address down to page boundary. 1218c2ecf20Sopenharmony_ci */ 1228c2ecf20Sopenharmony_ci uffdio_copy.dst = msg.arg.pagefault.address & ~(pagesize-1); 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_ci offset = (char *) uffdio_copy.dst - uf_mem; 1258c2ecf20Sopenharmony_ci uffdio_copy.src = (unsigned long) &backing_mem[offset]; 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci uffdio_copy.len = pagesize; 1288c2ecf20Sopenharmony_ci uffdio_copy.mode = 0; 1298c2ecf20Sopenharmony_ci uffdio_copy.copy = 0; 1308c2ecf20Sopenharmony_ci if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1) { 1318c2ecf20Sopenharmony_ci perror("ioctl-UFFDIO_COPY failed"); 1328c2ecf20Sopenharmony_ci exit(EXIT_FAILURE); 1338c2ecf20Sopenharmony_ci } 1348c2ecf20Sopenharmony_ci } 1358c2ecf20Sopenharmony_ci} 1368c2ecf20Sopenharmony_ci 1378c2ecf20Sopenharmony_civoid setup_uf_mem(void) 1388c2ecf20Sopenharmony_ci{ 1398c2ecf20Sopenharmony_ci long uffd; /* userfaultfd file descriptor */ 1408c2ecf20Sopenharmony_ci pthread_t thr; 1418c2ecf20Sopenharmony_ci struct uffdio_api uffdio_api; 1428c2ecf20Sopenharmony_ci struct uffdio_register uffdio_register; 1438c2ecf20Sopenharmony_ci int ret; 1448c2ecf20Sopenharmony_ci 1458c2ecf20Sopenharmony_ci pagesize = sysconf(_SC_PAGE_SIZE); 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci /* Create and enable userfaultfd object */ 1488c2ecf20Sopenharmony_ci uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); 1498c2ecf20Sopenharmony_ci if (uffd == -1) { 1508c2ecf20Sopenharmony_ci perror("userfaultfd() failed"); 1518c2ecf20Sopenharmony_ci exit(EXIT_FAILURE); 1528c2ecf20Sopenharmony_ci } 1538c2ecf20Sopenharmony_ci uffdio_api.api = UFFD_API; 1548c2ecf20Sopenharmony_ci uffdio_api.features = 0; 1558c2ecf20Sopenharmony_ci if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) { 1568c2ecf20Sopenharmony_ci perror("ioctl-UFFDIO_API failed"); 1578c2ecf20Sopenharmony_ci exit(EXIT_FAILURE); 1588c2ecf20Sopenharmony_ci } 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_ci /* 1618c2ecf20Sopenharmony_ci * Create a private anonymous mapping. The memory will be demand-zero 1628c2ecf20Sopenharmony_ci * paged, that is, not yet allocated. When we actually touch the memory 1638c2ecf20Sopenharmony_ci * the related page will be allocated via the userfaultfd mechanism. 1648c2ecf20Sopenharmony_ci */ 1658c2ecf20Sopenharmony_ci uf_mem = mmap(NULL, UF_MEM_SIZE, PROT_READ | PROT_WRITE, 1668c2ecf20Sopenharmony_ci MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1678c2ecf20Sopenharmony_ci if (uf_mem == MAP_FAILED) { 1688c2ecf20Sopenharmony_ci perror("mmap() failed"); 1698c2ecf20Sopenharmony_ci exit(EXIT_FAILURE); 1708c2ecf20Sopenharmony_ci } 1718c2ecf20Sopenharmony_ci 1728c2ecf20Sopenharmony_ci /* 1738c2ecf20Sopenharmony_ci * Register the memory range of the mapping we've just mapped to be 1748c2ecf20Sopenharmony_ci * handled by the userfaultfd object. In 'mode' we request to track 1758c2ecf20Sopenharmony_ci * missing pages (i.e. pages that have not yet been faulted-in). 1768c2ecf20Sopenharmony_ci */ 1778c2ecf20Sopenharmony_ci uffdio_register.range.start = (unsigned long) uf_mem; 1788c2ecf20Sopenharmony_ci uffdio_register.range.len = UF_MEM_SIZE; 1798c2ecf20Sopenharmony_ci uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; 1808c2ecf20Sopenharmony_ci if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) { 1818c2ecf20Sopenharmony_ci perror("ioctl-UFFDIO_REGISTER"); 1828c2ecf20Sopenharmony_ci exit(EXIT_FAILURE); 1838c2ecf20Sopenharmony_ci } 1848c2ecf20Sopenharmony_ci 1858c2ecf20Sopenharmony_ci /* Create a thread that will process the userfaultfd events */ 1868c2ecf20Sopenharmony_ci ret = pthread_create(&thr, NULL, fault_handler_thread, (void *) uffd); 1878c2ecf20Sopenharmony_ci if (ret != 0) { 1888c2ecf20Sopenharmony_ci fprintf(stderr, "pthread_create(): Error. Returned %d\n", ret); 1898c2ecf20Sopenharmony_ci exit(EXIT_FAILURE); 1908c2ecf20Sopenharmony_ci } 1918c2ecf20Sopenharmony_ci} 1928c2ecf20Sopenharmony_ci 1938c2ecf20Sopenharmony_ci/* 1948c2ecf20Sopenharmony_ci * Assumption: the signal was delivered while userspace was in transactional or 1958c2ecf20Sopenharmony_ci * suspended state, i.e. uc->uc_link != NULL. 1968c2ecf20Sopenharmony_ci */ 1978c2ecf20Sopenharmony_civoid signal_handler(int signo, siginfo_t *si, void *uc) 1988c2ecf20Sopenharmony_ci{ 1998c2ecf20Sopenharmony_ci ucontext_t *ucp = uc; 2008c2ecf20Sopenharmony_ci 2018c2ecf20Sopenharmony_ci /* Skip 'trap' after returning, otherwise we get a SIGTRAP again */ 2028c2ecf20Sopenharmony_ci ucp->uc_link->uc_mcontext.regs->nip += 4; 2038c2ecf20Sopenharmony_ci 2048c2ecf20Sopenharmony_ci ucp->uc_mcontext.v_regs = 2058c2ecf20Sopenharmony_ci get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_mcontext.v_regs); 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_ci ucp->uc_link->uc_mcontext.v_regs = 2088c2ecf20Sopenharmony_ci get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_link->uc_mcontext.v_regs); 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_ci ucp->uc_link = get_uf_mem(sizeof(ucontext_t), ucp->uc_link); 2118c2ecf20Sopenharmony_ci} 2128c2ecf20Sopenharmony_ci 2138c2ecf20Sopenharmony_cibool have_userfaultfd(void) 2148c2ecf20Sopenharmony_ci{ 2158c2ecf20Sopenharmony_ci long rc; 2168c2ecf20Sopenharmony_ci 2178c2ecf20Sopenharmony_ci errno = 0; 2188c2ecf20Sopenharmony_ci rc = syscall(__NR_userfaultfd, -1); 2198c2ecf20Sopenharmony_ci 2208c2ecf20Sopenharmony_ci return rc == 0 || errno != ENOSYS; 2218c2ecf20Sopenharmony_ci} 2228c2ecf20Sopenharmony_ci 2238c2ecf20Sopenharmony_ciint tm_signal_pagefault(void) 2248c2ecf20Sopenharmony_ci{ 2258c2ecf20Sopenharmony_ci struct sigaction sa; 2268c2ecf20Sopenharmony_ci stack_t ss; 2278c2ecf20Sopenharmony_ci 2288c2ecf20Sopenharmony_ci SKIP_IF(!have_htm()); 2298c2ecf20Sopenharmony_ci SKIP_IF(!have_userfaultfd()); 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_ci setup_uf_mem(); 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_ci /* 2348c2ecf20Sopenharmony_ci * Set an alternative stack that will generate a page fault when the 2358c2ecf20Sopenharmony_ci * signal is raised. The page fault will be treated via userfaultfd, 2368c2ecf20Sopenharmony_ci * i.e. via fault_handler_thread. 2378c2ecf20Sopenharmony_ci */ 2388c2ecf20Sopenharmony_ci ss.ss_sp = get_uf_mem(SIGSTKSZ, NULL); 2398c2ecf20Sopenharmony_ci ss.ss_size = SIGSTKSZ; 2408c2ecf20Sopenharmony_ci ss.ss_flags = 0; 2418c2ecf20Sopenharmony_ci if (sigaltstack(&ss, NULL) == -1) { 2428c2ecf20Sopenharmony_ci perror("sigaltstack() failed"); 2438c2ecf20Sopenharmony_ci exit(EXIT_FAILURE); 2448c2ecf20Sopenharmony_ci } 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci sa.sa_flags = SA_SIGINFO | SA_ONSTACK; 2478c2ecf20Sopenharmony_ci sa.sa_sigaction = signal_handler; 2488c2ecf20Sopenharmony_ci if (sigaction(SIGTRAP, &sa, NULL) == -1) { 2498c2ecf20Sopenharmony_ci perror("sigaction() failed"); 2508c2ecf20Sopenharmony_ci exit(EXIT_FAILURE); 2518c2ecf20Sopenharmony_ci } 2528c2ecf20Sopenharmony_ci 2538c2ecf20Sopenharmony_ci /* Trigger a SIGTRAP in transactional state */ 2548c2ecf20Sopenharmony_ci asm __volatile__( 2558c2ecf20Sopenharmony_ci "tbegin.;" 2568c2ecf20Sopenharmony_ci "beq 1f;" 2578c2ecf20Sopenharmony_ci "trap;" 2588c2ecf20Sopenharmony_ci "1: ;" 2598c2ecf20Sopenharmony_ci : : : "memory"); 2608c2ecf20Sopenharmony_ci 2618c2ecf20Sopenharmony_ci /* Trigger a SIGTRAP in suspended state */ 2628c2ecf20Sopenharmony_ci asm __volatile__( 2638c2ecf20Sopenharmony_ci "tbegin.;" 2648c2ecf20Sopenharmony_ci "beq 1f;" 2658c2ecf20Sopenharmony_ci "tsuspend.;" 2668c2ecf20Sopenharmony_ci "trap;" 2678c2ecf20Sopenharmony_ci "tresume.;" 2688c2ecf20Sopenharmony_ci "1: ;" 2698c2ecf20Sopenharmony_ci : : : "memory"); 2708c2ecf20Sopenharmony_ci 2718c2ecf20Sopenharmony_ci return EXIT_SUCCESS; 2728c2ecf20Sopenharmony_ci} 2738c2ecf20Sopenharmony_ci 2748c2ecf20Sopenharmony_ciint main(int argc, char **argv) 2758c2ecf20Sopenharmony_ci{ 2768c2ecf20Sopenharmony_ci /* 2778c2ecf20Sopenharmony_ci * Depending on kernel config, the TM Bad Thing might not result in a 2788c2ecf20Sopenharmony_ci * crash, instead the kernel never returns control back to userspace, so 2798c2ecf20Sopenharmony_ci * set a tight timeout. If the test passes it completes almost 2808c2ecf20Sopenharmony_ci * immediately. 2818c2ecf20Sopenharmony_ci */ 2828c2ecf20Sopenharmony_ci test_harness_set_timeout(2); 2838c2ecf20Sopenharmony_ci return test_harness(tm_signal_pagefault, "tm_signal_pagefault"); 2848c2ecf20Sopenharmony_ci} 285