162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright 2020, Gustavo Luiz Duarte, IBM Corp. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * This test starts a transaction and triggers a signal, forcing a pagefault to 662306a36Sopenharmony_ci * happen when the kernel signal handling code touches the user signal stack. 762306a36Sopenharmony_ci * 862306a36Sopenharmony_ci * In order to avoid pre-faulting the signal stack memory and to force the 962306a36Sopenharmony_ci * pagefault to happen precisely in the kernel signal handling code, the 1062306a36Sopenharmony_ci * pagefault handling is done in userspace using the userfaultfd facility. 1162306a36Sopenharmony_ci * 1262306a36Sopenharmony_ci * Further pagefaults are triggered by crafting the signal handler's ucontext 1362306a36Sopenharmony_ci * to point to additional memory regions managed by the userfaultfd, so using 1462306a36Sopenharmony_ci * the same mechanism used to avoid pre-faulting the signal stack memory. 1562306a36Sopenharmony_ci * 1662306a36Sopenharmony_ci * On failure (bug is present) kernel crashes or never returns control back to 1762306a36Sopenharmony_ci * userspace. If bug is not present, tests completes almost immediately. 1862306a36Sopenharmony_ci */ 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci#include <stdio.h> 2162306a36Sopenharmony_ci#include <stdlib.h> 2262306a36Sopenharmony_ci#include <string.h> 2362306a36Sopenharmony_ci#include <linux/userfaultfd.h> 2462306a36Sopenharmony_ci#include <poll.h> 2562306a36Sopenharmony_ci#include <unistd.h> 2662306a36Sopenharmony_ci#include <sys/ioctl.h> 2762306a36Sopenharmony_ci#include <sys/syscall.h> 2862306a36Sopenharmony_ci#include <fcntl.h> 2962306a36Sopenharmony_ci#include <sys/mman.h> 3062306a36Sopenharmony_ci#include <pthread.h> 3162306a36Sopenharmony_ci#include <signal.h> 3262306a36Sopenharmony_ci#include <errno.h> 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci#include "tm.h" 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci#define UF_MEM_SIZE 655360 /* 10 x 64k pages */ 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci/* Memory handled by userfaultfd */ 4062306a36Sopenharmony_cistatic char *uf_mem; 4162306a36Sopenharmony_cistatic size_t uf_mem_offset = 0; 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ci/* 4462306a36Sopenharmony_ci * Data that will be copied into the faulting pages (instead of zero-filled 4562306a36Sopenharmony_ci * pages). This is used to make the test more reliable and avoid segfaulting 4662306a36Sopenharmony_ci * when we return from the signal handler. Since we are making the signal 4762306a36Sopenharmony_ci * handler's ucontext point to newly allocated memory, when that memory is 4862306a36Sopenharmony_ci * paged-in it will contain the expected content. 4962306a36Sopenharmony_ci */ 5062306a36Sopenharmony_cistatic char backing_mem[UF_MEM_SIZE]; 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_cistatic size_t pagesize; 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_ci/* 5562306a36Sopenharmony_ci * Return a chunk of at least 'size' bytes of memory that will be handled by 5662306a36Sopenharmony_ci * userfaultfd. If 'backing_data' is not NULL, its content will be save to 5762306a36Sopenharmony_ci * 'backing_mem' and then copied into the faulting pages when the page fault 5862306a36Sopenharmony_ci * is handled. 5962306a36Sopenharmony_ci */ 6062306a36Sopenharmony_civoid *get_uf_mem(size_t size, void *backing_data) 6162306a36Sopenharmony_ci{ 6262306a36Sopenharmony_ci void *ret; 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci if (uf_mem_offset + size > UF_MEM_SIZE) { 6562306a36Sopenharmony_ci fprintf(stderr, "Requesting more uf_mem than expected!\n"); 6662306a36Sopenharmony_ci exit(EXIT_FAILURE); 6762306a36Sopenharmony_ci } 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci ret = &uf_mem[uf_mem_offset]; 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci /* Save the data that will be copied into the faulting page */ 7262306a36Sopenharmony_ci if (backing_data != NULL) 7362306a36Sopenharmony_ci memcpy(&backing_mem[uf_mem_offset], backing_data, size); 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci /* Reserve the requested amount of uf_mem */ 7662306a36Sopenharmony_ci uf_mem_offset += size; 7762306a36Sopenharmony_ci /* Keep uf_mem_offset aligned to the page size (round up) */ 7862306a36Sopenharmony_ci uf_mem_offset = (uf_mem_offset + pagesize - 1) & ~(pagesize - 1); 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci return ret; 8162306a36Sopenharmony_ci} 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_civoid *fault_handler_thread(void *arg) 8462306a36Sopenharmony_ci{ 8562306a36Sopenharmony_ci struct uffd_msg msg; /* Data read from userfaultfd */ 8662306a36Sopenharmony_ci long uffd; /* userfaultfd file descriptor */ 8762306a36Sopenharmony_ci struct uffdio_copy uffdio_copy; 8862306a36Sopenharmony_ci struct pollfd pollfd; 8962306a36Sopenharmony_ci ssize_t nread, offset; 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci uffd = (long) arg; 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci for (;;) { 9462306a36Sopenharmony_ci pollfd.fd = uffd; 9562306a36Sopenharmony_ci pollfd.events = POLLIN; 9662306a36Sopenharmony_ci if (poll(&pollfd, 1, -1) == -1) { 9762306a36Sopenharmony_ci perror("poll() failed"); 9862306a36Sopenharmony_ci exit(EXIT_FAILURE); 9962306a36Sopenharmony_ci } 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci nread = read(uffd, &msg, sizeof(msg)); 10262306a36Sopenharmony_ci if (nread == 0) { 10362306a36Sopenharmony_ci fprintf(stderr, "read(): EOF on userfaultfd\n"); 10462306a36Sopenharmony_ci exit(EXIT_FAILURE); 10562306a36Sopenharmony_ci } 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci if (nread == -1) { 10862306a36Sopenharmony_ci perror("read() failed"); 10962306a36Sopenharmony_ci exit(EXIT_FAILURE); 11062306a36Sopenharmony_ci } 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci /* We expect only one kind of event */ 11362306a36Sopenharmony_ci if (msg.event != UFFD_EVENT_PAGEFAULT) { 11462306a36Sopenharmony_ci fprintf(stderr, "Unexpected event on userfaultfd\n"); 11562306a36Sopenharmony_ci exit(EXIT_FAILURE); 11662306a36Sopenharmony_ci } 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_ci /* 11962306a36Sopenharmony_ci * We need to handle page faults in units of pages(!). 12062306a36Sopenharmony_ci * So, round faulting address down to page boundary. 12162306a36Sopenharmony_ci */ 12262306a36Sopenharmony_ci uffdio_copy.dst = msg.arg.pagefault.address & ~(pagesize-1); 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci offset = (char *) uffdio_copy.dst - uf_mem; 12562306a36Sopenharmony_ci uffdio_copy.src = (unsigned long) &backing_mem[offset]; 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci uffdio_copy.len = pagesize; 12862306a36Sopenharmony_ci uffdio_copy.mode = 0; 12962306a36Sopenharmony_ci uffdio_copy.copy = 0; 13062306a36Sopenharmony_ci if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1) { 13162306a36Sopenharmony_ci perror("ioctl-UFFDIO_COPY failed"); 13262306a36Sopenharmony_ci exit(EXIT_FAILURE); 13362306a36Sopenharmony_ci } 13462306a36Sopenharmony_ci } 13562306a36Sopenharmony_ci} 13662306a36Sopenharmony_ci 13762306a36Sopenharmony_civoid setup_uf_mem(void) 13862306a36Sopenharmony_ci{ 13962306a36Sopenharmony_ci long uffd; /* userfaultfd file descriptor */ 14062306a36Sopenharmony_ci pthread_t thr; 14162306a36Sopenharmony_ci struct uffdio_api uffdio_api; 14262306a36Sopenharmony_ci struct uffdio_register uffdio_register; 14362306a36Sopenharmony_ci int ret; 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ci pagesize = sysconf(_SC_PAGE_SIZE); 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci /* Create and enable userfaultfd object */ 14862306a36Sopenharmony_ci uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); 14962306a36Sopenharmony_ci if (uffd == -1) { 15062306a36Sopenharmony_ci perror("userfaultfd() failed"); 15162306a36Sopenharmony_ci exit(EXIT_FAILURE); 15262306a36Sopenharmony_ci } 15362306a36Sopenharmony_ci uffdio_api.api = UFFD_API; 15462306a36Sopenharmony_ci uffdio_api.features = 0; 15562306a36Sopenharmony_ci if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) { 15662306a36Sopenharmony_ci perror("ioctl-UFFDIO_API failed"); 15762306a36Sopenharmony_ci exit(EXIT_FAILURE); 15862306a36Sopenharmony_ci } 15962306a36Sopenharmony_ci 16062306a36Sopenharmony_ci /* 16162306a36Sopenharmony_ci * Create a private anonymous mapping. The memory will be demand-zero 16262306a36Sopenharmony_ci * paged, that is, not yet allocated. When we actually touch the memory 16362306a36Sopenharmony_ci * the related page will be allocated via the userfaultfd mechanism. 16462306a36Sopenharmony_ci */ 16562306a36Sopenharmony_ci uf_mem = mmap(NULL, UF_MEM_SIZE, PROT_READ | PROT_WRITE, 16662306a36Sopenharmony_ci MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 16762306a36Sopenharmony_ci if (uf_mem == MAP_FAILED) { 16862306a36Sopenharmony_ci perror("mmap() failed"); 16962306a36Sopenharmony_ci exit(EXIT_FAILURE); 17062306a36Sopenharmony_ci } 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_ci /* 17362306a36Sopenharmony_ci * Register the memory range of the mapping we've just mapped to be 17462306a36Sopenharmony_ci * handled by the userfaultfd object. In 'mode' we request to track 17562306a36Sopenharmony_ci * missing pages (i.e. pages that have not yet been faulted-in). 17662306a36Sopenharmony_ci */ 17762306a36Sopenharmony_ci uffdio_register.range.start = (unsigned long) uf_mem; 17862306a36Sopenharmony_ci uffdio_register.range.len = UF_MEM_SIZE; 17962306a36Sopenharmony_ci uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; 18062306a36Sopenharmony_ci if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) { 18162306a36Sopenharmony_ci perror("ioctl-UFFDIO_REGISTER"); 18262306a36Sopenharmony_ci exit(EXIT_FAILURE); 18362306a36Sopenharmony_ci } 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci /* Create a thread that will process the userfaultfd events */ 18662306a36Sopenharmony_ci ret = pthread_create(&thr, NULL, fault_handler_thread, (void *) uffd); 18762306a36Sopenharmony_ci if (ret != 0) { 18862306a36Sopenharmony_ci fprintf(stderr, "pthread_create(): Error. Returned %d\n", ret); 18962306a36Sopenharmony_ci exit(EXIT_FAILURE); 19062306a36Sopenharmony_ci } 19162306a36Sopenharmony_ci} 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_ci/* 19462306a36Sopenharmony_ci * Assumption: the signal was delivered while userspace was in transactional or 19562306a36Sopenharmony_ci * suspended state, i.e. uc->uc_link != NULL. 19662306a36Sopenharmony_ci */ 19762306a36Sopenharmony_civoid signal_handler(int signo, siginfo_t *si, void *uc) 19862306a36Sopenharmony_ci{ 19962306a36Sopenharmony_ci ucontext_t *ucp = uc; 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci /* Skip 'trap' after returning, otherwise we get a SIGTRAP again */ 20262306a36Sopenharmony_ci ucp->uc_link->uc_mcontext.regs->nip += 4; 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci ucp->uc_mcontext.v_regs = 20562306a36Sopenharmony_ci get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_mcontext.v_regs); 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_ci ucp->uc_link->uc_mcontext.v_regs = 20862306a36Sopenharmony_ci get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_link->uc_mcontext.v_regs); 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci ucp->uc_link = get_uf_mem(sizeof(ucontext_t), ucp->uc_link); 21162306a36Sopenharmony_ci} 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_cibool have_userfaultfd(void) 21462306a36Sopenharmony_ci{ 21562306a36Sopenharmony_ci long rc; 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci errno = 0; 21862306a36Sopenharmony_ci rc = syscall(__NR_userfaultfd, -1); 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_ci return rc == 0 || errno != ENOSYS; 22162306a36Sopenharmony_ci} 22262306a36Sopenharmony_ci 22362306a36Sopenharmony_ciint tm_signal_pagefault(void) 22462306a36Sopenharmony_ci{ 22562306a36Sopenharmony_ci struct sigaction sa; 22662306a36Sopenharmony_ci stack_t ss; 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci SKIP_IF(!have_htm()); 22962306a36Sopenharmony_ci SKIP_IF(htm_is_synthetic()); 23062306a36Sopenharmony_ci SKIP_IF(!have_userfaultfd()); 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci setup_uf_mem(); 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci /* 23562306a36Sopenharmony_ci * Set an alternative stack that will generate a page fault when the 23662306a36Sopenharmony_ci * signal is raised. The page fault will be treated via userfaultfd, 23762306a36Sopenharmony_ci * i.e. via fault_handler_thread. 23862306a36Sopenharmony_ci */ 23962306a36Sopenharmony_ci ss.ss_sp = get_uf_mem(SIGSTKSZ, NULL); 24062306a36Sopenharmony_ci ss.ss_size = SIGSTKSZ; 24162306a36Sopenharmony_ci ss.ss_flags = 0; 24262306a36Sopenharmony_ci if (sigaltstack(&ss, NULL) == -1) { 24362306a36Sopenharmony_ci perror("sigaltstack() failed"); 24462306a36Sopenharmony_ci exit(EXIT_FAILURE); 24562306a36Sopenharmony_ci } 24662306a36Sopenharmony_ci 24762306a36Sopenharmony_ci sa.sa_flags = SA_SIGINFO | SA_ONSTACK; 24862306a36Sopenharmony_ci sa.sa_sigaction = signal_handler; 24962306a36Sopenharmony_ci if (sigaction(SIGTRAP, &sa, NULL) == -1) { 25062306a36Sopenharmony_ci perror("sigaction() failed"); 25162306a36Sopenharmony_ci exit(EXIT_FAILURE); 25262306a36Sopenharmony_ci } 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci /* Trigger a SIGTRAP in transactional state */ 25562306a36Sopenharmony_ci asm __volatile__( 25662306a36Sopenharmony_ci "tbegin.;" 25762306a36Sopenharmony_ci "beq 1f;" 25862306a36Sopenharmony_ci "trap;" 25962306a36Sopenharmony_ci "1: ;" 26062306a36Sopenharmony_ci : : : "memory"); 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ci /* Trigger a SIGTRAP in suspended state */ 26362306a36Sopenharmony_ci asm __volatile__( 26462306a36Sopenharmony_ci "tbegin.;" 26562306a36Sopenharmony_ci "beq 1f;" 26662306a36Sopenharmony_ci "tsuspend.;" 26762306a36Sopenharmony_ci "trap;" 26862306a36Sopenharmony_ci "tresume.;" 26962306a36Sopenharmony_ci "1: ;" 27062306a36Sopenharmony_ci : : : "memory"); 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci return EXIT_SUCCESS; 27362306a36Sopenharmony_ci} 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_ciint main(int argc, char **argv) 27662306a36Sopenharmony_ci{ 27762306a36Sopenharmony_ci /* 27862306a36Sopenharmony_ci * Depending on kernel config, the TM Bad Thing might not result in a 27962306a36Sopenharmony_ci * crash, instead the kernel never returns control back to userspace, so 28062306a36Sopenharmony_ci * set a tight timeout. If the test passes it completes almost 28162306a36Sopenharmony_ci * immediately. 28262306a36Sopenharmony_ci */ 28362306a36Sopenharmony_ci test_harness_set_timeout(2); 28462306a36Sopenharmony_ci return test_harness(tm_signal_pagefault, "tm_signal_pagefault"); 28562306a36Sopenharmony_ci} 286