1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * KSM functional tests
4 *
5 * Copyright 2022, Red Hat, Inc.
6 *
7 * Author(s): David Hildenbrand <david@redhat.com>
8 */
9#define _GNU_SOURCE
10#include <stdlib.h>
11#include <string.h>
12#include <stdbool.h>
13#include <stdint.h>
14#include <unistd.h>
15#include <errno.h>
16#include <fcntl.h>
17#include <sys/mman.h>
18#include <sys/prctl.h>
19#include <sys/syscall.h>
20#include <sys/ioctl.h>
21#include <sys/wait.h>
22#include <linux/userfaultfd.h>
23
24#include "../kselftest.h"
25#include "vm_util.h"
26
27#define KiB 1024u
28#define MiB (1024 * KiB)
29
30static int mem_fd;
31static int ksm_fd;
32static int ksm_full_scans_fd;
33static int proc_self_ksm_stat_fd;
34static int proc_self_ksm_merging_pages_fd;
35static int ksm_use_zero_pages_fd;
36static int pagemap_fd;
37static size_t pagesize;
38
39static bool range_maps_duplicates(char *addr, unsigned long size)
40{
41	unsigned long offs_a, offs_b, pfn_a, pfn_b;
42
43	/*
44	 * There is no easy way to check if there are KSM pages mapped into
45	 * this range. We only check that the range does not map the same PFN
46	 * twice by comparing each pair of mapped pages.
47	 */
48	for (offs_a = 0; offs_a < size; offs_a += pagesize) {
49		pfn_a = pagemap_get_pfn(pagemap_fd, addr + offs_a);
50		/* Page not present or PFN not exposed by the kernel. */
51		if (pfn_a == -1ul || !pfn_a)
52			continue;
53
54		for (offs_b = offs_a + pagesize; offs_b < size;
55		     offs_b += pagesize) {
56			pfn_b = pagemap_get_pfn(pagemap_fd, addr + offs_b);
57			if (pfn_b == -1ul || !pfn_b)
58				continue;
59			if (pfn_a == pfn_b)
60				return true;
61		}
62	}
63	return false;
64}
65
66static long get_my_ksm_zero_pages(void)
67{
68	char buf[200];
69	char *substr_ksm_zero;
70	size_t value_pos;
71	ssize_t read_size;
72	unsigned long my_ksm_zero_pages;
73
74	if (!proc_self_ksm_stat_fd)
75		return 0;
76
77	read_size = pread(proc_self_ksm_stat_fd, buf, sizeof(buf) - 1, 0);
78	if (read_size < 0)
79		return -errno;
80
81	buf[read_size] = 0;
82
83	substr_ksm_zero = strstr(buf, "ksm_zero_pages");
84	if (!substr_ksm_zero)
85		return 0;
86
87	value_pos = strcspn(substr_ksm_zero, "0123456789");
88	my_ksm_zero_pages = strtol(substr_ksm_zero + value_pos, NULL, 10);
89
90	return my_ksm_zero_pages;
91}
92
93static long get_my_merging_pages(void)
94{
95	char buf[10];
96	ssize_t ret;
97
98	if (proc_self_ksm_merging_pages_fd < 0)
99		return proc_self_ksm_merging_pages_fd;
100
101	ret = pread(proc_self_ksm_merging_pages_fd, buf, sizeof(buf) - 1, 0);
102	if (ret <= 0)
103		return -errno;
104	buf[ret] = 0;
105
106	return strtol(buf, NULL, 10);
107}
108
109static long ksm_get_full_scans(void)
110{
111	char buf[10];
112	ssize_t ret;
113
114	ret = pread(ksm_full_scans_fd, buf, sizeof(buf) - 1, 0);
115	if (ret <= 0)
116		return -errno;
117	buf[ret] = 0;
118
119	return strtol(buf, NULL, 10);
120}
121
122static int ksm_merge(void)
123{
124	long start_scans, end_scans;
125
126	/* Wait for two full scans such that any possible merging happened. */
127	start_scans = ksm_get_full_scans();
128	if (start_scans < 0)
129		return start_scans;
130	if (write(ksm_fd, "1", 1) != 1)
131		return -errno;
132	do {
133		end_scans = ksm_get_full_scans();
134		if (end_scans < 0)
135			return end_scans;
136	} while (end_scans < start_scans + 2);
137
138	return 0;
139}
140
141static int ksm_unmerge(void)
142{
143	if (write(ksm_fd, "2", 1) != 1)
144		return -errno;
145	return 0;
146}
147
148static char *mmap_and_merge_range(char val, unsigned long size, int prot,
149				  bool use_prctl)
150{
151	char *map;
152	int ret;
153
154	/* Stabilize accounting by disabling KSM completely. */
155	if (ksm_unmerge()) {
156		ksft_test_result_fail("Disabling (unmerging) KSM failed\n");
157		goto unmap;
158	}
159
160	if (get_my_merging_pages() > 0) {
161		ksft_test_result_fail("Still pages merged\n");
162		goto unmap;
163	}
164
165	map = mmap(NULL, size, PROT_READ|PROT_WRITE,
166		   MAP_PRIVATE|MAP_ANON, -1, 0);
167	if (map == MAP_FAILED) {
168		ksft_test_result_fail("mmap() failed\n");
169		return MAP_FAILED;
170	}
171
172	/* Don't use THP. Ignore if THP are not around on a kernel. */
173	if (madvise(map, size, MADV_NOHUGEPAGE) && errno != EINVAL) {
174		ksft_test_result_fail("MADV_NOHUGEPAGE failed\n");
175		goto unmap;
176	}
177
178	/* Make sure each page contains the same values to merge them. */
179	memset(map, val, size);
180
181	if (mprotect(map, size, prot)) {
182		ksft_test_result_skip("mprotect() failed\n");
183		goto unmap;
184	}
185
186	if (use_prctl) {
187		ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
188		if (ret < 0 && errno == EINVAL) {
189			ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n");
190			goto unmap;
191		} else if (ret) {
192			ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n");
193			goto unmap;
194		}
195	} else if (madvise(map, size, MADV_MERGEABLE)) {
196		ksft_test_result_fail("MADV_MERGEABLE failed\n");
197		goto unmap;
198	}
199
200	/* Run KSM to trigger merging and wait. */
201	if (ksm_merge()) {
202		ksft_test_result_fail("Running KSM failed\n");
203		goto unmap;
204	}
205
206	/*
207	 * Check if anything was merged at all. Ignore the zero page that is
208	 * accounted differently (depending on kernel support).
209	 */
210	if (val && !get_my_merging_pages()) {
211		ksft_test_result_fail("No pages got merged\n");
212		goto unmap;
213	}
214
215	return map;
216unmap:
217	munmap(map, size);
218	return MAP_FAILED;
219}
220
221static void test_unmerge(void)
222{
223	const unsigned int size = 2 * MiB;
224	char *map;
225
226	ksft_print_msg("[RUN] %s\n", __func__);
227
228	map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false);
229	if (map == MAP_FAILED)
230		return;
231
232	if (madvise(map, size, MADV_UNMERGEABLE)) {
233		ksft_test_result_fail("MADV_UNMERGEABLE failed\n");
234		goto unmap;
235	}
236
237	ksft_test_result(!range_maps_duplicates(map, size),
238			 "Pages were unmerged\n");
239unmap:
240	munmap(map, size);
241}
242
243static void test_unmerge_zero_pages(void)
244{
245	const unsigned int size = 2 * MiB;
246	char *map;
247	unsigned int offs;
248	unsigned long pages_expected;
249
250	ksft_print_msg("[RUN] %s\n", __func__);
251
252	if (proc_self_ksm_stat_fd < 0) {
253		ksft_test_result_skip("open(\"/proc/self/ksm_stat\") failed\n");
254		return;
255	}
256	if (ksm_use_zero_pages_fd < 0) {
257		ksft_test_result_skip("open \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n");
258		return;
259	}
260	if (write(ksm_use_zero_pages_fd, "1", 1) != 1) {
261		ksft_test_result_skip("write \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n");
262		return;
263	}
264
265	/* Let KSM deduplicate zero pages. */
266	map = mmap_and_merge_range(0x00, size, PROT_READ | PROT_WRITE, false);
267	if (map == MAP_FAILED)
268		return;
269
270	/* Check if ksm_zero_pages is updated correctly after KSM merging */
271	pages_expected = size / pagesize;
272	if (pages_expected != get_my_ksm_zero_pages()) {
273		ksft_test_result_fail("'ksm_zero_pages' updated after merging\n");
274		goto unmap;
275	}
276
277	/* Try to unmerge half of the region */
278	if (madvise(map, size / 2, MADV_UNMERGEABLE)) {
279		ksft_test_result_fail("MADV_UNMERGEABLE failed\n");
280		goto unmap;
281	}
282
283	/* Check if ksm_zero_pages is updated correctly after unmerging */
284	pages_expected /= 2;
285	if (pages_expected != get_my_ksm_zero_pages()) {
286		ksft_test_result_fail("'ksm_zero_pages' updated after unmerging\n");
287		goto unmap;
288	}
289
290	/* Trigger unmerging of the other half by writing to the pages. */
291	for (offs = size / 2; offs < size; offs += pagesize)
292		*((unsigned int *)&map[offs]) = offs;
293
294	/* Now we should have no zeropages remaining. */
295	if (get_my_ksm_zero_pages()) {
296		ksft_test_result_fail("'ksm_zero_pages' updated after write fault\n");
297		goto unmap;
298	}
299
300	/* Check if ksm zero pages are really unmerged */
301	ksft_test_result(!range_maps_duplicates(map, size),
302			"KSM zero pages were unmerged\n");
303unmap:
304	munmap(map, size);
305}
306
307static void test_unmerge_discarded(void)
308{
309	const unsigned int size = 2 * MiB;
310	char *map;
311
312	ksft_print_msg("[RUN] %s\n", __func__);
313
314	map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false);
315	if (map == MAP_FAILED)
316		return;
317
318	/* Discard half of all mapped pages so we have pte_none() entries. */
319	if (madvise(map, size / 2, MADV_DONTNEED)) {
320		ksft_test_result_fail("MADV_DONTNEED failed\n");
321		goto unmap;
322	}
323
324	if (madvise(map, size, MADV_UNMERGEABLE)) {
325		ksft_test_result_fail("MADV_UNMERGEABLE failed\n");
326		goto unmap;
327	}
328
329	ksft_test_result(!range_maps_duplicates(map, size),
330			 "Pages were unmerged\n");
331unmap:
332	munmap(map, size);
333}
334
335#ifdef __NR_userfaultfd
336static void test_unmerge_uffd_wp(void)
337{
338	struct uffdio_writeprotect uffd_writeprotect;
339	const unsigned int size = 2 * MiB;
340	struct uffdio_api uffdio_api;
341	char *map;
342	int uffd;
343
344	ksft_print_msg("[RUN] %s\n", __func__);
345
346	map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false);
347	if (map == MAP_FAILED)
348		return;
349
350	/* See if UFFD is around. */
351	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
352	if (uffd < 0) {
353		ksft_test_result_skip("__NR_userfaultfd failed\n");
354		goto unmap;
355	}
356
357	/* See if UFFD-WP is around. */
358	uffdio_api.api = UFFD_API;
359	uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP;
360	if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) {
361		ksft_test_result_fail("UFFDIO_API failed\n");
362		goto close_uffd;
363	}
364	if (!(uffdio_api.features & UFFD_FEATURE_PAGEFAULT_FLAG_WP)) {
365		ksft_test_result_skip("UFFD_FEATURE_PAGEFAULT_FLAG_WP not available\n");
366		goto close_uffd;
367	}
368
369	/* Register UFFD-WP, no need for an actual handler. */
370	if (uffd_register(uffd, map, size, false, true, false)) {
371		ksft_test_result_fail("UFFDIO_REGISTER_MODE_WP failed\n");
372		goto close_uffd;
373	}
374
375	/* Write-protect the range using UFFD-WP. */
376	uffd_writeprotect.range.start = (unsigned long) map;
377	uffd_writeprotect.range.len = size;
378	uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_WP;
379	if (ioctl(uffd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) {
380		ksft_test_result_fail("UFFDIO_WRITEPROTECT failed\n");
381		goto close_uffd;
382	}
383
384	if (madvise(map, size, MADV_UNMERGEABLE)) {
385		ksft_test_result_fail("MADV_UNMERGEABLE failed\n");
386		goto close_uffd;
387	}
388
389	ksft_test_result(!range_maps_duplicates(map, size),
390			 "Pages were unmerged\n");
391close_uffd:
392	close(uffd);
393unmap:
394	munmap(map, size);
395}
396#endif
397
398/* Verify that KSM can be enabled / queried with prctl. */
399static void test_prctl(void)
400{
401	int ret;
402
403	ksft_print_msg("[RUN] %s\n", __func__);
404
405	ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
406	if (ret < 0 && errno == EINVAL) {
407		ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n");
408		return;
409	} else if (ret) {
410		ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n");
411		return;
412	}
413
414	ret = prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0);
415	if (ret < 0) {
416		ksft_test_result_fail("PR_GET_MEMORY_MERGE failed\n");
417		return;
418	} else if (ret != 1) {
419		ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 not effective\n");
420		return;
421	}
422
423	ret = prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0);
424	if (ret) {
425		ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 failed\n");
426		return;
427	}
428
429	ret = prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0);
430	if (ret < 0) {
431		ksft_test_result_fail("PR_GET_MEMORY_MERGE failed\n");
432		return;
433	} else if (ret != 0) {
434		ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 not effective\n");
435		return;
436	}
437
438	ksft_test_result_pass("Setting/clearing PR_SET_MEMORY_MERGE works\n");
439}
440
441/* Verify that prctl ksm flag is inherited. */
442static void test_prctl_fork(void)
443{
444	int ret, status;
445	pid_t child_pid;
446
447	ksft_print_msg("[RUN] %s\n", __func__);
448
449	ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
450	if (ret < 0 && errno == EINVAL) {
451		ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n");
452		return;
453	} else if (ret) {
454		ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n");
455		return;
456	}
457
458	child_pid = fork();
459	if (!child_pid) {
460		exit(prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0));
461	} else if (child_pid < 0) {
462		ksft_test_result_fail("fork() failed\n");
463		return;
464	}
465
466	if (waitpid(child_pid, &status, 0) < 0) {
467		ksft_test_result_fail("waitpid() failed\n");
468		return;
469	} else if (WEXITSTATUS(status) != 1) {
470		ksft_test_result_fail("unexpected PR_GET_MEMORY_MERGE result in child\n");
471		return;
472	}
473
474	if (prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0)) {
475		ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 failed\n");
476		return;
477	}
478
479	ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n");
480}
481
482static void test_prctl_unmerge(void)
483{
484	const unsigned int size = 2 * MiB;
485	char *map;
486
487	ksft_print_msg("[RUN] %s\n", __func__);
488
489	map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, true);
490	if (map == MAP_FAILED)
491		return;
492
493	if (prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0)) {
494		ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 failed\n");
495		goto unmap;
496	}
497
498	ksft_test_result(!range_maps_duplicates(map, size),
499			 "Pages were unmerged\n");
500unmap:
501	munmap(map, size);
502}
503
504static void test_prot_none(void)
505{
506	const unsigned int size = 2 * MiB;
507	char *map;
508	int i;
509
510	ksft_print_msg("[RUN] %s\n", __func__);
511
512	map = mmap_and_merge_range(0x11, size, PROT_NONE, false);
513	if (map == MAP_FAILED)
514		goto unmap;
515
516	/* Store a unique value in each page on one half using ptrace */
517	for (i = 0; i < size / 2; i += pagesize) {
518		lseek(mem_fd, (uintptr_t) map + i, SEEK_SET);
519		if (write(mem_fd, &i, sizeof(i)) != sizeof(i)) {
520			ksft_test_result_fail("ptrace write failed\n");
521			goto unmap;
522		}
523	}
524
525	/* Trigger unsharing on the other half. */
526	if (madvise(map + size / 2, size / 2, MADV_UNMERGEABLE)) {
527		ksft_test_result_fail("MADV_UNMERGEABLE failed\n");
528		goto unmap;
529	}
530
531	ksft_test_result(!range_maps_duplicates(map, size),
532			 "Pages were unmerged\n");
533unmap:
534	munmap(map, size);
535}
536
537int main(int argc, char **argv)
538{
539	unsigned int tests = 7;
540	int err;
541
542#ifdef __NR_userfaultfd
543	tests++;
544#endif
545
546	ksft_print_header();
547	ksft_set_plan(tests);
548
549	pagesize = getpagesize();
550
551	mem_fd = open("/proc/self/mem", O_RDWR);
552	if (mem_fd < 0)
553		ksft_exit_fail_msg("opening /proc/self/mem failed\n");
554	ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR);
555	if (ksm_fd < 0)
556		ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n");
557	ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY);
558	if (ksm_full_scans_fd < 0)
559		ksft_exit_skip("open(\"/sys/kernel/mm/ksm/full_scans\") failed\n");
560	pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
561	if (pagemap_fd < 0)
562		ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n");
563	proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY);
564	proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages",
565					      O_RDONLY);
566	ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR);
567
568	test_unmerge();
569	test_unmerge_zero_pages();
570	test_unmerge_discarded();
571#ifdef __NR_userfaultfd
572	test_unmerge_uffd_wp();
573#endif
574
575	test_prot_none();
576
577	test_prctl();
578	test_prctl_fork();
579	test_prctl_unmerge();
580
581	err = ksft_get_fail_cnt();
582	if (err)
583		ksft_exit_fail_msg("%d out of %d tests failed\n",
584				   err, ksft_test_num());
585	return ksft_exit_pass();
586}
587