1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * single_step_syscall.c - single-steps various x86 syscalls
4 * Copyright (c) 2014-2015 Andrew Lutomirski
5 *
6 * This is a very simple series of tests that makes system calls with
7 * the TF flag set.  This exercises some nasty kernel code in the
8 * SYSENTER case: SYSENTER does not clear TF, so SYSENTER with TF set
9 * immediately issues #DB from CPL 0.  This requires special handling in
10 * the kernel.
11 */
12
13#define _GNU_SOURCE
14
15#include <sys/time.h>
16#include <time.h>
17#include <stdlib.h>
18#include <sys/syscall.h>
19#include <unistd.h>
20#include <stdio.h>
21#include <string.h>
22#include <inttypes.h>
23#include <sys/mman.h>
24#include <sys/signal.h>
25#include <sys/ucontext.h>
26#include <asm/ldt.h>
27#include <err.h>
28#include <setjmp.h>
29#include <stddef.h>
30#include <stdbool.h>
31#include <sys/ptrace.h>
32#include <sys/user.h>
33
34#include "helpers.h"
35
36static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
37		       int flags)
38{
39	struct sigaction sa;
40	memset(&sa, 0, sizeof(sa));
41	sa.sa_sigaction = handler;
42	sa.sa_flags = SA_SIGINFO | flags;
43	sigemptyset(&sa.sa_mask);
44	if (sigaction(sig, &sa, 0))
45		err(1, "sigaction");
46}
47
48static void clearhandler(int sig)
49{
50	struct sigaction sa;
51	memset(&sa, 0, sizeof(sa));
52	sa.sa_handler = SIG_DFL;
53	sigemptyset(&sa.sa_mask);
54	if (sigaction(sig, &sa, 0))
55		err(1, "sigaction");
56}
57
58static volatile sig_atomic_t sig_traps, sig_eflags;
59sigjmp_buf jmpbuf;
60static unsigned char altstack_data[SIGSTKSZ];
61
62#ifdef __x86_64__
63# define REG_IP REG_RIP
64# define WIDTH "q"
65# define INT80_CLOBBERS "r8", "r9", "r10", "r11"
66#else
67# define REG_IP REG_EIP
68# define WIDTH "l"
69# define INT80_CLOBBERS
70#endif
71
72static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
73{
74	ucontext_t *ctx = (ucontext_t*)ctx_void;
75
76	if (get_eflags() & X86_EFLAGS_TF) {
77		set_eflags(get_eflags() & ~X86_EFLAGS_TF);
78		printf("[WARN]\tSIGTRAP handler had TF set\n");
79		_exit(1);
80	}
81
82	sig_traps++;
83
84	if (sig_traps == 10000 || sig_traps == 10001) {
85		printf("[WARN]\tHit %d SIGTRAPs with si_addr 0x%lx, ip 0x%lx\n",
86		       (int)sig_traps,
87		       (unsigned long)info->si_addr,
88		       (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
89	}
90}
91
92static char const * const signames[] = {
93	[SIGSEGV] = "SIGSEGV",
94	[SIGBUS] = "SIBGUS",
95	[SIGTRAP] = "SIGTRAP",
96	[SIGILL] = "SIGILL",
97};
98
99static void print_and_longjmp(int sig, siginfo_t *si, void *ctx_void)
100{
101	ucontext_t *ctx = ctx_void;
102
103	printf("\tGot %s with RIP=%lx, TF=%ld\n", signames[sig],
104	       (unsigned long)ctx->uc_mcontext.gregs[REG_IP],
105	       (unsigned long)ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_TF);
106
107	sig_eflags = (unsigned long)ctx->uc_mcontext.gregs[REG_EFL];
108	siglongjmp(jmpbuf, 1);
109}
110
111static void check_result(void)
112{
113	unsigned long new_eflags = get_eflags();
114	set_eflags(new_eflags & ~X86_EFLAGS_TF);
115
116	if (!sig_traps) {
117		printf("[FAIL]\tNo SIGTRAP\n");
118		exit(1);
119	}
120
121	if (!(new_eflags & X86_EFLAGS_TF)) {
122		printf("[FAIL]\tTF was cleared\n");
123		exit(1);
124	}
125
126	printf("[OK]\tSurvived with TF set and %d traps\n", (int)sig_traps);
127	sig_traps = 0;
128}
129
130static void fast_syscall_no_tf(void)
131{
132	sig_traps = 0;
133	printf("[RUN]\tFast syscall with TF cleared\n");
134	fflush(stdout);  /* Force a syscall */
135	if (get_eflags() & X86_EFLAGS_TF) {
136		printf("[FAIL]\tTF is now set\n");
137		exit(1);
138	}
139	if (sig_traps) {
140		printf("[FAIL]\tGot SIGTRAP\n");
141		exit(1);
142	}
143	printf("[OK]\tNothing unexpected happened\n");
144}
145
146int main()
147{
148#ifdef CAN_BUILD_32
149	int tmp;
150#endif
151
152	sethandler(SIGTRAP, sigtrap, 0);
153
154	printf("[RUN]\tSet TF and check nop\n");
155	set_eflags(get_eflags() | X86_EFLAGS_TF);
156	asm volatile ("nop");
157	check_result();
158
159#ifdef __x86_64__
160	printf("[RUN]\tSet TF and check syscall-less opportunistic sysret\n");
161	set_eflags(get_eflags() | X86_EFLAGS_TF);
162	extern unsigned char post_nop[];
163	asm volatile ("pushf" WIDTH "\n\t"
164		      "pop" WIDTH " %%r11\n\t"
165		      "nop\n\t"
166		      "post_nop:"
167		      : : "c" (post_nop) : "r11");
168	check_result();
169#endif
170#ifdef CAN_BUILD_32
171	printf("[RUN]\tSet TF and check int80\n");
172	set_eflags(get_eflags() | X86_EFLAGS_TF);
173	asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)
174			: INT80_CLOBBERS);
175	check_result();
176#endif
177
178	/*
179	 * This test is particularly interesting if fast syscalls use
180	 * SYSENTER: it triggers a nasty design flaw in SYSENTER.
181	 * Specifically, SYSENTER does not clear TF, so either SYSENTER
182	 * or the next instruction traps at CPL0.  (Of course, Intel
183	 * mostly forgot to document exactly what happens here.)  So we
184	 * get a CPL0 fault with usergs (on 64-bit kernels) and possibly
185	 * no stack.  The only sane way the kernel can possibly handle
186	 * it is to clear TF on return from the #DB handler, but this
187	 * happens way too early to set TF in the saved pt_regs, so the
188	 * kernel has to do something clever to avoid losing track of
189	 * the TF bit.
190	 *
191	 * Needless to say, we've had bugs in this area.
192	 */
193	syscall(SYS_getpid);  /* Force symbol binding without TF set. */
194	printf("[RUN]\tSet TF and check a fast syscall\n");
195	set_eflags(get_eflags() | X86_EFLAGS_TF);
196	syscall(SYS_getpid);
197	check_result();
198
199	/* Now make sure that another fast syscall doesn't set TF again. */
200	fast_syscall_no_tf();
201
202	/*
203	 * And do a forced SYSENTER to make sure that this works even if
204	 * fast syscalls don't use SYSENTER.
205	 *
206	 * Invoking SYSENTER directly breaks all the rules.  Just handle
207	 * the SIGSEGV.
208	 */
209	if (sigsetjmp(jmpbuf, 1) == 0) {
210		unsigned long nr = SYS_getpid;
211		printf("[RUN]\tSet TF and check SYSENTER\n");
212		stack_t stack = {
213			.ss_sp = altstack_data,
214			.ss_size = SIGSTKSZ,
215		};
216		if (sigaltstack(&stack, NULL) != 0)
217			err(1, "sigaltstack");
218		sethandler(SIGSEGV, print_and_longjmp,
219			   SA_RESETHAND | SA_ONSTACK);
220		sethandler(SIGILL, print_and_longjmp, SA_RESETHAND);
221		set_eflags(get_eflags() | X86_EFLAGS_TF);
222		/* Clear EBP first to make sure we segfault cleanly. */
223		asm volatile ("xorl %%ebp, %%ebp; SYSENTER" : "+a" (nr) :: "flags", "rcx"
224#ifdef __x86_64__
225				, "r11"
226#endif
227			);
228
229		/* We're unreachable here.  SYSENTER forgets RIP. */
230	}
231	clearhandler(SIGSEGV);
232	clearhandler(SIGILL);
233	if (!(sig_eflags & X86_EFLAGS_TF)) {
234		printf("[FAIL]\tTF was cleared\n");
235		exit(1);
236	}
237
238	/* Now make sure that another fast syscall doesn't set TF again. */
239	fast_syscall_no_tf();
240
241	return 0;
242}
243