162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * single_step_syscall.c - single-steps various x86 syscalls
462306a36Sopenharmony_ci * Copyright (c) 2014-2015 Andrew Lutomirski
562306a36Sopenharmony_ci *
662306a36Sopenharmony_ci * This is a very simple series of tests that makes system calls with
762306a36Sopenharmony_ci * the TF flag set.  This exercises some nasty kernel code in the
862306a36Sopenharmony_ci * SYSENTER case: SYSENTER does not clear TF, so SYSENTER with TF set
962306a36Sopenharmony_ci * immediately issues #DB from CPL 0.  This requires special handling in
1062306a36Sopenharmony_ci * the kernel.
1162306a36Sopenharmony_ci */
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci#define _GNU_SOURCE
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ci#include <sys/time.h>
1662306a36Sopenharmony_ci#include <time.h>
1762306a36Sopenharmony_ci#include <stdlib.h>
1862306a36Sopenharmony_ci#include <sys/syscall.h>
1962306a36Sopenharmony_ci#include <unistd.h>
2062306a36Sopenharmony_ci#include <stdio.h>
2162306a36Sopenharmony_ci#include <string.h>
2262306a36Sopenharmony_ci#include <inttypes.h>
2362306a36Sopenharmony_ci#include <sys/mman.h>
2462306a36Sopenharmony_ci#include <sys/signal.h>
2562306a36Sopenharmony_ci#include <sys/ucontext.h>
2662306a36Sopenharmony_ci#include <asm/ldt.h>
2762306a36Sopenharmony_ci#include <err.h>
2862306a36Sopenharmony_ci#include <setjmp.h>
2962306a36Sopenharmony_ci#include <stddef.h>
3062306a36Sopenharmony_ci#include <stdbool.h>
3162306a36Sopenharmony_ci#include <sys/ptrace.h>
3262306a36Sopenharmony_ci#include <sys/user.h>
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_ci#include "helpers.h"
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_cistatic void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
3762306a36Sopenharmony_ci		       int flags)
3862306a36Sopenharmony_ci{
3962306a36Sopenharmony_ci	struct sigaction sa;
4062306a36Sopenharmony_ci	memset(&sa, 0, sizeof(sa));
4162306a36Sopenharmony_ci	sa.sa_sigaction = handler;
4262306a36Sopenharmony_ci	sa.sa_flags = SA_SIGINFO | flags;
4362306a36Sopenharmony_ci	sigemptyset(&sa.sa_mask);
4462306a36Sopenharmony_ci	if (sigaction(sig, &sa, 0))
4562306a36Sopenharmony_ci		err(1, "sigaction");
4662306a36Sopenharmony_ci}
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_cistatic void clearhandler(int sig)
4962306a36Sopenharmony_ci{
5062306a36Sopenharmony_ci	struct sigaction sa;
5162306a36Sopenharmony_ci	memset(&sa, 0, sizeof(sa));
5262306a36Sopenharmony_ci	sa.sa_handler = SIG_DFL;
5362306a36Sopenharmony_ci	sigemptyset(&sa.sa_mask);
5462306a36Sopenharmony_ci	if (sigaction(sig, &sa, 0))
5562306a36Sopenharmony_ci		err(1, "sigaction");
5662306a36Sopenharmony_ci}
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_cistatic volatile sig_atomic_t sig_traps, sig_eflags;
5962306a36Sopenharmony_cisigjmp_buf jmpbuf;
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci#ifdef __x86_64__
6262306a36Sopenharmony_ci# define REG_IP REG_RIP
6362306a36Sopenharmony_ci# define WIDTH "q"
6462306a36Sopenharmony_ci# define INT80_CLOBBERS "r8", "r9", "r10", "r11"
6562306a36Sopenharmony_ci#else
6662306a36Sopenharmony_ci# define REG_IP REG_EIP
6762306a36Sopenharmony_ci# define WIDTH "l"
6862306a36Sopenharmony_ci# define INT80_CLOBBERS
6962306a36Sopenharmony_ci#endif
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_cistatic void sigtrap(int sig, siginfo_t *info, void *ctx_void)
7262306a36Sopenharmony_ci{
7362306a36Sopenharmony_ci	ucontext_t *ctx = (ucontext_t*)ctx_void;
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci	if (get_eflags() & X86_EFLAGS_TF) {
7662306a36Sopenharmony_ci		set_eflags(get_eflags() & ~X86_EFLAGS_TF);
7762306a36Sopenharmony_ci		printf("[WARN]\tSIGTRAP handler had TF set\n");
7862306a36Sopenharmony_ci		_exit(1);
7962306a36Sopenharmony_ci	}
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_ci	sig_traps++;
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci	if (sig_traps == 10000 || sig_traps == 10001) {
8462306a36Sopenharmony_ci		printf("[WARN]\tHit %d SIGTRAPs with si_addr 0x%lx, ip 0x%lx\n",
8562306a36Sopenharmony_ci		       (int)sig_traps,
8662306a36Sopenharmony_ci		       (unsigned long)info->si_addr,
8762306a36Sopenharmony_ci		       (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
8862306a36Sopenharmony_ci	}
8962306a36Sopenharmony_ci}
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_cistatic char const * const signames[] = {
9262306a36Sopenharmony_ci	[SIGSEGV] = "SIGSEGV",
9362306a36Sopenharmony_ci	[SIGBUS] = "SIBGUS",
9462306a36Sopenharmony_ci	[SIGTRAP] = "SIGTRAP",
9562306a36Sopenharmony_ci	[SIGILL] = "SIGILL",
9662306a36Sopenharmony_ci};
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_cistatic void print_and_longjmp(int sig, siginfo_t *si, void *ctx_void)
9962306a36Sopenharmony_ci{
10062306a36Sopenharmony_ci	ucontext_t *ctx = ctx_void;
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_ci	printf("\tGot %s with RIP=%lx, TF=%ld\n", signames[sig],
10362306a36Sopenharmony_ci	       (unsigned long)ctx->uc_mcontext.gregs[REG_IP],
10462306a36Sopenharmony_ci	       (unsigned long)ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_TF);
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_ci	sig_eflags = (unsigned long)ctx->uc_mcontext.gregs[REG_EFL];
10762306a36Sopenharmony_ci	siglongjmp(jmpbuf, 1);
10862306a36Sopenharmony_ci}
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_cistatic void check_result(void)
11162306a36Sopenharmony_ci{
11262306a36Sopenharmony_ci	unsigned long new_eflags = get_eflags();
11362306a36Sopenharmony_ci	set_eflags(new_eflags & ~X86_EFLAGS_TF);
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	if (!sig_traps) {
11662306a36Sopenharmony_ci		printf("[FAIL]\tNo SIGTRAP\n");
11762306a36Sopenharmony_ci		exit(1);
11862306a36Sopenharmony_ci	}
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ci	if (!(new_eflags & X86_EFLAGS_TF)) {
12162306a36Sopenharmony_ci		printf("[FAIL]\tTF was cleared\n");
12262306a36Sopenharmony_ci		exit(1);
12362306a36Sopenharmony_ci	}
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci	printf("[OK]\tSurvived with TF set and %d traps\n", (int)sig_traps);
12662306a36Sopenharmony_ci	sig_traps = 0;
12762306a36Sopenharmony_ci}
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_cistatic void fast_syscall_no_tf(void)
13062306a36Sopenharmony_ci{
13162306a36Sopenharmony_ci	sig_traps = 0;
13262306a36Sopenharmony_ci	printf("[RUN]\tFast syscall with TF cleared\n");
13362306a36Sopenharmony_ci	fflush(stdout);  /* Force a syscall */
13462306a36Sopenharmony_ci	if (get_eflags() & X86_EFLAGS_TF) {
13562306a36Sopenharmony_ci		printf("[FAIL]\tTF is now set\n");
13662306a36Sopenharmony_ci		exit(1);
13762306a36Sopenharmony_ci	}
13862306a36Sopenharmony_ci	if (sig_traps) {
13962306a36Sopenharmony_ci		printf("[FAIL]\tGot SIGTRAP\n");
14062306a36Sopenharmony_ci		exit(1);
14162306a36Sopenharmony_ci	}
14262306a36Sopenharmony_ci	printf("[OK]\tNothing unexpected happened\n");
14362306a36Sopenharmony_ci}
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ciint main()
14662306a36Sopenharmony_ci{
14762306a36Sopenharmony_ci#ifdef CAN_BUILD_32
14862306a36Sopenharmony_ci	int tmp;
14962306a36Sopenharmony_ci#endif
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci	sethandler(SIGTRAP, sigtrap, 0);
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci	printf("[RUN]\tSet TF and check nop\n");
15462306a36Sopenharmony_ci	set_eflags(get_eflags() | X86_EFLAGS_TF);
15562306a36Sopenharmony_ci	asm volatile ("nop");
15662306a36Sopenharmony_ci	check_result();
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci#ifdef __x86_64__
15962306a36Sopenharmony_ci	printf("[RUN]\tSet TF and check syscall-less opportunistic sysret\n");
16062306a36Sopenharmony_ci	set_eflags(get_eflags() | X86_EFLAGS_TF);
16162306a36Sopenharmony_ci	extern unsigned char post_nop[];
16262306a36Sopenharmony_ci	asm volatile ("pushf" WIDTH "\n\t"
16362306a36Sopenharmony_ci		      "pop" WIDTH " %%r11\n\t"
16462306a36Sopenharmony_ci		      "nop\n\t"
16562306a36Sopenharmony_ci		      "post_nop:"
16662306a36Sopenharmony_ci		      : : "c" (post_nop) : "r11");
16762306a36Sopenharmony_ci	check_result();
16862306a36Sopenharmony_ci#endif
16962306a36Sopenharmony_ci#ifdef CAN_BUILD_32
17062306a36Sopenharmony_ci	printf("[RUN]\tSet TF and check int80\n");
17162306a36Sopenharmony_ci	set_eflags(get_eflags() | X86_EFLAGS_TF);
17262306a36Sopenharmony_ci	asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)
17362306a36Sopenharmony_ci			: INT80_CLOBBERS);
17462306a36Sopenharmony_ci	check_result();
17562306a36Sopenharmony_ci#endif
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_ci	/*
17862306a36Sopenharmony_ci	 * This test is particularly interesting if fast syscalls use
17962306a36Sopenharmony_ci	 * SYSENTER: it triggers a nasty design flaw in SYSENTER.
18062306a36Sopenharmony_ci	 * Specifically, SYSENTER does not clear TF, so either SYSENTER
18162306a36Sopenharmony_ci	 * or the next instruction traps at CPL0.  (Of course, Intel
18262306a36Sopenharmony_ci	 * mostly forgot to document exactly what happens here.)  So we
18362306a36Sopenharmony_ci	 * get a CPL0 fault with usergs (on 64-bit kernels) and possibly
18462306a36Sopenharmony_ci	 * no stack.  The only sane way the kernel can possibly handle
18562306a36Sopenharmony_ci	 * it is to clear TF on return from the #DB handler, but this
18662306a36Sopenharmony_ci	 * happens way too early to set TF in the saved pt_regs, so the
18762306a36Sopenharmony_ci	 * kernel has to do something clever to avoid losing track of
18862306a36Sopenharmony_ci	 * the TF bit.
18962306a36Sopenharmony_ci	 *
19062306a36Sopenharmony_ci	 * Needless to say, we've had bugs in this area.
19162306a36Sopenharmony_ci	 */
19262306a36Sopenharmony_ci	syscall(SYS_getpid);  /* Force symbol binding without TF set. */
19362306a36Sopenharmony_ci	printf("[RUN]\tSet TF and check a fast syscall\n");
19462306a36Sopenharmony_ci	set_eflags(get_eflags() | X86_EFLAGS_TF);
19562306a36Sopenharmony_ci	syscall(SYS_getpid);
19662306a36Sopenharmony_ci	check_result();
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci	/* Now make sure that another fast syscall doesn't set TF again. */
19962306a36Sopenharmony_ci	fast_syscall_no_tf();
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci	/*
20262306a36Sopenharmony_ci	 * And do a forced SYSENTER to make sure that this works even if
20362306a36Sopenharmony_ci	 * fast syscalls don't use SYSENTER.
20462306a36Sopenharmony_ci	 *
20562306a36Sopenharmony_ci	 * Invoking SYSENTER directly breaks all the rules.  Just handle
20662306a36Sopenharmony_ci	 * the SIGSEGV.
20762306a36Sopenharmony_ci	 */
20862306a36Sopenharmony_ci	if (sigsetjmp(jmpbuf, 1) == 0) {
20962306a36Sopenharmony_ci		unsigned long nr = SYS_getpid;
21062306a36Sopenharmony_ci		printf("[RUN]\tSet TF and check SYSENTER\n");
21162306a36Sopenharmony_ci		stack_t stack = {
21262306a36Sopenharmony_ci			.ss_sp = malloc(sizeof(char) * SIGSTKSZ),
21362306a36Sopenharmony_ci			.ss_size = SIGSTKSZ,
21462306a36Sopenharmony_ci		};
21562306a36Sopenharmony_ci		if (sigaltstack(&stack, NULL) != 0)
21662306a36Sopenharmony_ci			err(1, "sigaltstack");
21762306a36Sopenharmony_ci		sethandler(SIGSEGV, print_and_longjmp,
21862306a36Sopenharmony_ci			   SA_RESETHAND | SA_ONSTACK);
21962306a36Sopenharmony_ci		sethandler(SIGILL, print_and_longjmp, SA_RESETHAND);
22062306a36Sopenharmony_ci		set_eflags(get_eflags() | X86_EFLAGS_TF);
22162306a36Sopenharmony_ci		free(stack.ss_sp);
22262306a36Sopenharmony_ci		/* Clear EBP first to make sure we segfault cleanly. */
22362306a36Sopenharmony_ci		asm volatile ("xorl %%ebp, %%ebp; SYSENTER" : "+a" (nr) :: "flags", "rcx"
22462306a36Sopenharmony_ci#ifdef __x86_64__
22562306a36Sopenharmony_ci				, "r11"
22662306a36Sopenharmony_ci#endif
22762306a36Sopenharmony_ci			);
22862306a36Sopenharmony_ci
22962306a36Sopenharmony_ci		/* We're unreachable here.  SYSENTER forgets RIP. */
23062306a36Sopenharmony_ci	}
23162306a36Sopenharmony_ci	clearhandler(SIGSEGV);
23262306a36Sopenharmony_ci	clearhandler(SIGILL);
23362306a36Sopenharmony_ci	if (!(sig_eflags & X86_EFLAGS_TF)) {
23462306a36Sopenharmony_ci		printf("[FAIL]\tTF was cleared\n");
23562306a36Sopenharmony_ci		exit(1);
23662306a36Sopenharmony_ci	}
23762306a36Sopenharmony_ci
23862306a36Sopenharmony_ci	/* Now make sure that another fast syscall doesn't set TF again. */
23962306a36Sopenharmony_ci	fast_syscall_no_tf();
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci	return 0;
24262306a36Sopenharmony_ci}
243