1#if defined __amd64__ || defined __i386__
2/*
3 * Copyright (c) 2022 Alexey Dobriyan <adobriyan@gmail.com>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17/*
18 * Create a process without mappings by unmapping everything at once and
19 * holding it with ptrace(2). See what happens to
20 *
21 *	/proc/${pid}/maps
22 *	/proc/${pid}/numa_maps
23 *	/proc/${pid}/smaps
24 *	/proc/${pid}/smaps_rollup
25 */
26#undef NDEBUG
27#include <assert.h>
28#include <errno.h>
29#include <stdint.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <fcntl.h>
34#include <sys/mman.h>
35#include <sys/ptrace.h>
36#include <sys/resource.h>
37#include <sys/types.h>
38#include <sys/wait.h>
39#include <unistd.h>
40
41#ifdef __amd64__
42#define TEST_VSYSCALL
43#endif
44
45/*
46 * 0: vsyscall VMA doesn't exist	vsyscall=none
47 * 1: vsyscall VMA is --xp		vsyscall=xonly
48 * 2: vsyscall VMA is r-xp		vsyscall=emulate
49 */
50static volatile int g_vsyscall;
51static const char *g_proc_pid_maps_vsyscall;
52static const char *g_proc_pid_smaps_vsyscall;
53
54static const char proc_pid_maps_vsyscall_0[] = "";
55static const char proc_pid_maps_vsyscall_1[] =
56"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0                  [vsyscall]\n";
57static const char proc_pid_maps_vsyscall_2[] =
58"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]\n";
59
60static const char proc_pid_smaps_vsyscall_0[] = "";
61
62static const char proc_pid_smaps_vsyscall_1[] =
63"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]\n"
64"Size:                  4 kB\n"
65"KernelPageSize:        4 kB\n"
66"MMUPageSize:           4 kB\n"
67"Rss:                   0 kB\n"
68"Pss:                   0 kB\n"
69"Pss_Dirty:             0 kB\n"
70"Shared_Clean:          0 kB\n"
71"Shared_Dirty:          0 kB\n"
72"Private_Clean:         0 kB\n"
73"Private_Dirty:         0 kB\n"
74"Referenced:            0 kB\n"
75"Anonymous:             0 kB\n"
76"LazyFree:              0 kB\n"
77"AnonHugePages:         0 kB\n"
78"ShmemPmdMapped:        0 kB\n"
79"FilePmdMapped:         0 kB\n"
80"Shared_Hugetlb:        0 kB\n"
81"Private_Hugetlb:       0 kB\n"
82"Swap:                  0 kB\n"
83"SwapPss:               0 kB\n"
84"Locked:                0 kB\n"
85"THPeligible:           0\n"
86/*
87 * "ProtectionKey:" field is conditional. It is possible to check it as well,
88 * but I don't have such machine.
89 */
90;
91
92static const char proc_pid_smaps_vsyscall_2[] =
93"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0                  [vsyscall]\n"
94"Size:                  4 kB\n"
95"KernelPageSize:        4 kB\n"
96"MMUPageSize:           4 kB\n"
97"Rss:                   0 kB\n"
98"Pss:                   0 kB\n"
99"Pss_Dirty:             0 kB\n"
100"Shared_Clean:          0 kB\n"
101"Shared_Dirty:          0 kB\n"
102"Private_Clean:         0 kB\n"
103"Private_Dirty:         0 kB\n"
104"Referenced:            0 kB\n"
105"Anonymous:             0 kB\n"
106"LazyFree:              0 kB\n"
107"AnonHugePages:         0 kB\n"
108"ShmemPmdMapped:        0 kB\n"
109"FilePmdMapped:         0 kB\n"
110"Shared_Hugetlb:        0 kB\n"
111"Private_Hugetlb:       0 kB\n"
112"Swap:                  0 kB\n"
113"SwapPss:               0 kB\n"
114"Locked:                0 kB\n"
115"THPeligible:           0\n"
116/*
117 * "ProtectionKey:" field is conditional. It is possible to check it as well,
118 * but I'm too tired.
119 */
120;
121
122static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___)
123{
124	_exit(EXIT_FAILURE);
125}
126
127#ifdef TEST_VSYSCALL
128static void sigaction_SIGSEGV_vsyscall(int _, siginfo_t *__, void *___)
129{
130	_exit(g_vsyscall);
131}
132
133/*
134 * vsyscall page can't be unmapped, probe it directly.
135 */
136static void vsyscall(void)
137{
138	pid_t pid;
139	int wstatus;
140
141	pid = fork();
142	if (pid < 0) {
143		fprintf(stderr, "fork, errno %d\n", errno);
144		exit(1);
145	}
146	if (pid == 0) {
147		setrlimit(RLIMIT_CORE, &(struct rlimit){});
148
149		/* Hide "segfault at ffffffffff600000" messages. */
150		struct sigaction act = {};
151		act.sa_flags = SA_SIGINFO;
152		act.sa_sigaction = sigaction_SIGSEGV_vsyscall;
153		sigaction(SIGSEGV, &act, NULL);
154
155		g_vsyscall = 0;
156		/* gettimeofday(NULL, NULL); */
157		uint64_t rax = 0xffffffffff600000;
158		asm volatile (
159			"call *%[rax]"
160			: [rax] "+a" (rax)
161			: "D" (NULL), "S" (NULL)
162			: "rcx", "r11"
163		);
164
165		g_vsyscall = 1;
166		*(volatile int *)0xffffffffff600000UL;
167
168		g_vsyscall = 2;
169		exit(g_vsyscall);
170	}
171	waitpid(pid, &wstatus, 0);
172	if (WIFEXITED(wstatus)) {
173		g_vsyscall = WEXITSTATUS(wstatus);
174	} else {
175		fprintf(stderr, "error: vsyscall wstatus %08x\n", wstatus);
176		exit(1);
177	}
178}
179#endif
180
181static int test_proc_pid_maps(pid_t pid)
182{
183	char buf[4096];
184	snprintf(buf, sizeof(buf), "/proc/%u/maps", pid);
185	int fd = open(buf, O_RDONLY);
186	if (fd == -1) {
187		perror("open /proc/${pid}/maps");
188		return EXIT_FAILURE;
189	} else {
190		ssize_t rv = read(fd, buf, sizeof(buf));
191		close(fd);
192		if (g_vsyscall == 0) {
193			assert(rv == 0);
194		} else {
195			size_t len = strlen(g_proc_pid_maps_vsyscall);
196			assert(rv == len);
197			assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0);
198		}
199		return EXIT_SUCCESS;
200	}
201}
202
203static int test_proc_pid_numa_maps(pid_t pid)
204{
205	char buf[4096];
206	snprintf(buf, sizeof(buf), "/proc/%u/numa_maps", pid);
207	int fd = open(buf, O_RDONLY);
208	if (fd == -1) {
209		if (errno == ENOENT) {
210			/*
211			 * /proc/${pid}/numa_maps is under CONFIG_NUMA,
212			 * it doesn't necessarily exist.
213			 */
214			return EXIT_SUCCESS;
215		}
216		perror("open /proc/${pid}/numa_maps");
217		return EXIT_FAILURE;
218	} else {
219		ssize_t rv = read(fd, buf, sizeof(buf));
220		close(fd);
221		assert(rv == 0);
222		return EXIT_SUCCESS;
223	}
224}
225
226static int test_proc_pid_smaps(pid_t pid)
227{
228	char buf[4096];
229	snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid);
230	int fd = open(buf, O_RDONLY);
231	if (fd == -1) {
232		if (errno == ENOENT) {
233			/*
234			 * /proc/${pid}/smaps is under CONFIG_PROC_PAGE_MONITOR,
235			 * it doesn't necessarily exist.
236			 */
237			return EXIT_SUCCESS;
238		}
239		perror("open /proc/${pid}/smaps");
240		return EXIT_FAILURE;
241	} else {
242		ssize_t rv = read(fd, buf, sizeof(buf));
243		close(fd);
244		if (g_vsyscall == 0) {
245			assert(rv == 0);
246		} else {
247			size_t len = strlen(g_proc_pid_maps_vsyscall);
248			/* TODO "ProtectionKey:" */
249			assert(rv > len);
250			assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0);
251		}
252		return EXIT_SUCCESS;
253	}
254}
255
256static const char g_smaps_rollup[] =
257"00000000-00000000 ---p 00000000 00:00 0                                  [rollup]\n"
258"Rss:                   0 kB\n"
259"Pss:                   0 kB\n"
260"Pss_Dirty:             0 kB\n"
261"Pss_Anon:              0 kB\n"
262"Pss_File:              0 kB\n"
263"Pss_Shmem:             0 kB\n"
264"Shared_Clean:          0 kB\n"
265"Shared_Dirty:          0 kB\n"
266"Private_Clean:         0 kB\n"
267"Private_Dirty:         0 kB\n"
268"Referenced:            0 kB\n"
269"Anonymous:             0 kB\n"
270"KSM:                   0 kB\n"
271"LazyFree:              0 kB\n"
272"AnonHugePages:         0 kB\n"
273"ShmemPmdMapped:        0 kB\n"
274"FilePmdMapped:         0 kB\n"
275"Shared_Hugetlb:        0 kB\n"
276"Private_Hugetlb:       0 kB\n"
277"Swap:                  0 kB\n"
278"SwapPss:               0 kB\n"
279"Locked:                0 kB\n"
280;
281
282static int test_proc_pid_smaps_rollup(pid_t pid)
283{
284	char buf[4096];
285	snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid);
286	int fd = open(buf, O_RDONLY);
287	if (fd == -1) {
288		if (errno == ENOENT) {
289			/*
290			 * /proc/${pid}/smaps_rollup is under CONFIG_PROC_PAGE_MONITOR,
291			 * it doesn't necessarily exist.
292			 */
293			return EXIT_SUCCESS;
294		}
295		perror("open /proc/${pid}/smaps_rollup");
296		return EXIT_FAILURE;
297	} else {
298		ssize_t rv = read(fd, buf, sizeof(buf));
299		close(fd);
300		assert(rv == sizeof(g_smaps_rollup) - 1);
301		assert(memcmp(buf, g_smaps_rollup, sizeof(g_smaps_rollup) - 1) == 0);
302		return EXIT_SUCCESS;
303	}
304}
305
306int main(void)
307{
308	int rv = EXIT_SUCCESS;
309
310#ifdef TEST_VSYSCALL
311	vsyscall();
312#endif
313
314	switch (g_vsyscall) {
315	case 0:
316		g_proc_pid_maps_vsyscall  = proc_pid_maps_vsyscall_0;
317		g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_0;
318		break;
319	case 1:
320		g_proc_pid_maps_vsyscall  = proc_pid_maps_vsyscall_1;
321		g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_1;
322		break;
323	case 2:
324		g_proc_pid_maps_vsyscall  = proc_pid_maps_vsyscall_2;
325		g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_2;
326		break;
327	default:
328		abort();
329	}
330
331	pid_t pid = fork();
332	if (pid == -1) {
333		perror("fork");
334		return EXIT_FAILURE;
335	} else if (pid == 0) {
336		rv = ptrace(PTRACE_TRACEME, 0, NULL, NULL);
337		if (rv != 0) {
338			if (errno == EPERM) {
339				fprintf(stderr,
340"Did you know? ptrace(PTRACE_TRACEME) doesn't work under strace.\n"
341				);
342				kill(getppid(), SIGTERM);
343				return EXIT_FAILURE;
344			}
345			perror("ptrace PTRACE_TRACEME");
346			return EXIT_FAILURE;
347		}
348
349		/*
350		 * Hide "segfault at ..." messages. Signal handler won't run.
351		 */
352		struct sigaction act = {};
353		act.sa_flags = SA_SIGINFO;
354		act.sa_sigaction = sigaction_SIGSEGV;
355		sigaction(SIGSEGV, &act, NULL);
356
357#ifdef __amd64__
358		munmap(NULL, ((size_t)1 << 47) - 4096);
359#elif defined __i386__
360		{
361			size_t len;
362
363			for (len = -4096;; len -= 4096) {
364				munmap(NULL, len);
365			}
366		}
367#else
368#error "implement 'unmap everything'"
369#endif
370		return EXIT_FAILURE;
371	} else {
372		/*
373		 * TODO find reliable way to signal parent that munmap(2) completed.
374		 * Child can't do it directly because it effectively doesn't exist
375		 * anymore. Looking at child's VM files isn't 100% reliable either:
376		 * due to a bug they may not become empty or empty-like.
377		 */
378		sleep(1);
379
380		if (rv == EXIT_SUCCESS) {
381			rv = test_proc_pid_maps(pid);
382		}
383		if (rv == EXIT_SUCCESS) {
384			rv = test_proc_pid_numa_maps(pid);
385		}
386		if (rv == EXIT_SUCCESS) {
387			rv = test_proc_pid_smaps(pid);
388		}
389		if (rv == EXIT_SUCCESS) {
390			rv = test_proc_pid_smaps_rollup(pid);
391		}
392		/*
393		 * TODO test /proc/${pid}/statm, task_statm()
394		 * ->start_code, ->end_code aren't updated by munmap().
395		 * Output can be "0 0 0 2 0 0 0\n" where "2" can be anything.
396		 */
397
398		/* Cut the rope. */
399		int wstatus;
400		waitpid(pid, &wstatus, 0);
401		assert(WIFSTOPPED(wstatus));
402		assert(WSTOPSIG(wstatus) == SIGSEGV);
403	}
404
405	return rv;
406}
407#else
408int main(void)
409{
410	return 4;
411}
412#endif
413