1f08c3bdfSopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
2f08c3bdfSopenharmony_ci/*
3f08c3bdfSopenharmony_ci * Copyright (c) 2016 Red Hat, Inc.
4f08c3bdfSopenharmony_ci */
5f08c3bdfSopenharmony_ci
6f08c3bdfSopenharmony_ci/*\
7f08c3bdfSopenharmony_ci * [Description]
8f08c3bdfSopenharmony_ci *
9f08c3bdfSopenharmony_ci * Page fault occurs in spite that madvise(WILLNEED) system call is called
10f08c3bdfSopenharmony_ci * to prefetch the page. This issue is reproduced by running a program
11f08c3bdfSopenharmony_ci * which sequentially accesses to a shared memory and calls madvise(WILLNEED)
12f08c3bdfSopenharmony_ci * to the next page on a page fault.
13f08c3bdfSopenharmony_ci *
14f08c3bdfSopenharmony_ci * This bug is present in all RHEL7 versions. It looks like this was fixed in
15f08c3bdfSopenharmony_ci * mainline kernel > v3.15 by the following patch:
16f08c3bdfSopenharmony_ci *
17f08c3bdfSopenharmony_ci *  commit 55231e5c898c5c03c14194001e349f40f59bd300
18f08c3bdfSopenharmony_ci *  Author: Johannes Weiner <hannes@cmpxchg.org>
19f08c3bdfSopenharmony_ci *  Date:   Thu May 22 11:54:17 2014 -0700
20f08c3bdfSopenharmony_ci *
21f08c3bdfSopenharmony_ci *     mm: madvise: fix MADV_WILLNEED on shmem swapouts
22f08c3bdfSopenharmony_ci *
23f08c3bdfSopenharmony_ci * Two checks are performed, the first looks at how SwapCache
24f08c3bdfSopenharmony_ci * changes during madvise. When the pages are dirtied, about half
25f08c3bdfSopenharmony_ci * will be accounted for under Cached and the other half will be
26f08c3bdfSopenharmony_ci * moved into Swap. When madvise is run it will cause the pages
27f08c3bdfSopenharmony_ci * under Cached to also be moved to Swap while rotating the pages
28f08c3bdfSopenharmony_ci * already in Swap into SwapCached. So we expect that SwapCached has
29f08c3bdfSopenharmony_ci * roughly MEM_LIMIT bytes added to it, but for reliability the
30f08c3bdfSopenharmony_ci * PASS_THRESHOLD is much lower than that.
31f08c3bdfSopenharmony_ci *
32f08c3bdfSopenharmony_ci * Secondly we run madvise again, but only on the first
33f08c3bdfSopenharmony_ci * PASS_THRESHOLD bytes to ensure these are entirely in RAM. Then we
34f08c3bdfSopenharmony_ci * dirty these pages and check there were (almost) no page
35f08c3bdfSopenharmony_ci * faults. Two faults are allowed incase some tasklet or something
36f08c3bdfSopenharmony_ci * else unexpected, but irrelevant procedure, registers a fault to
37f08c3bdfSopenharmony_ci * our process.
38f08c3bdfSopenharmony_ci *
39f08c3bdfSopenharmony_ci * It also can reproduce the MADV_WILLNEED preformance problem.
40f08c3bdfSopenharmony_ci * It was introduced since 5.9 kernel with the following commit
41f08c3bdfSopenharmony_ci *   e6e88712e43b ("mm: optimise madvise WILLNEED")
42f08c3bdfSopenharmony_ci * and fixed since 5.10-rc5 kernel with the following commit
43f08c3bdfSopenharmony_ci *   66383800df9c ("mm: fix madvise WILLNEED performance problem").
44f08c3bdfSopenharmony_ci */
45f08c3bdfSopenharmony_ci
46f08c3bdfSopenharmony_ci#include <errno.h>
47f08c3bdfSopenharmony_ci#include <stdio.h>
48f08c3bdfSopenharmony_ci#include <sys/mount.h>
49f08c3bdfSopenharmony_ci#include <sys/sysinfo.h>
50f08c3bdfSopenharmony_ci#include "tst_test.h"
51f08c3bdfSopenharmony_ci
52f08c3bdfSopenharmony_ci#define CHUNK_SZ (400*1024*1024L)
53f08c3bdfSopenharmony_ci#define MEM_LIMIT (CHUNK_SZ / 2)
54f08c3bdfSopenharmony_ci#define MEMSW_LIMIT (2 * CHUNK_SZ)
55f08c3bdfSopenharmony_ci#define PASS_THRESHOLD (CHUNK_SZ / 4)
56f08c3bdfSopenharmony_ci#define PASS_THRESHOLD_KB (PASS_THRESHOLD / 1024)
57f08c3bdfSopenharmony_ci
58f08c3bdfSopenharmony_cistatic const char drop_caches_fname[] = "/proc/sys/vm/drop_caches";
59f08c3bdfSopenharmony_cistatic int pg_sz, stat_refresh_sup;
60f08c3bdfSopenharmony_ci
61f08c3bdfSopenharmony_cistatic long init_swap, init_swap_cached, init_cached;
62f08c3bdfSopenharmony_ci
63f08c3bdfSopenharmony_cistatic void check_path(const char *path)
64f08c3bdfSopenharmony_ci{
65f08c3bdfSopenharmony_ci	if (access(path, R_OK | W_OK))
66f08c3bdfSopenharmony_ci		tst_brk(TCONF, "file needed: %s", path);
67f08c3bdfSopenharmony_ci}
68f08c3bdfSopenharmony_ci
69f08c3bdfSopenharmony_cistatic void print_cgmem(const char *name)
70f08c3bdfSopenharmony_ci{
71f08c3bdfSopenharmony_ci	long ret;
72f08c3bdfSopenharmony_ci
73f08c3bdfSopenharmony_ci	if (!SAFE_CG_HAS(tst_cg, name))
74f08c3bdfSopenharmony_ci		return;
75f08c3bdfSopenharmony_ci
76f08c3bdfSopenharmony_ci	SAFE_CG_SCANF(tst_cg, name, "%ld", &ret);
77f08c3bdfSopenharmony_ci	tst_res(TINFO, "\t%s: %ld Kb", name, ret / 1024);
78f08c3bdfSopenharmony_ci}
79f08c3bdfSopenharmony_ci
80f08c3bdfSopenharmony_cistatic void meminfo_diag(const char *point)
81f08c3bdfSopenharmony_ci{
82f08c3bdfSopenharmony_ci	if (stat_refresh_sup)
83f08c3bdfSopenharmony_ci		SAFE_FILE_PRINTF("/proc/sys/vm/stat_refresh", "1");
84f08c3bdfSopenharmony_ci
85f08c3bdfSopenharmony_ci	tst_res(TINFO, "%s", point);
86f08c3bdfSopenharmony_ci	tst_res(TINFO, "\tSwap: %ld Kb",
87f08c3bdfSopenharmony_ci		SAFE_READ_MEMINFO("SwapTotal:") - SAFE_READ_MEMINFO("SwapFree:") - init_swap);
88f08c3bdfSopenharmony_ci	tst_res(TINFO, "\tSwapCached: %ld Kb",
89f08c3bdfSopenharmony_ci		SAFE_READ_MEMINFO("SwapCached:") - init_swap_cached);
90f08c3bdfSopenharmony_ci	tst_res(TINFO, "\tCached: %ld Kb",
91f08c3bdfSopenharmony_ci		SAFE_READ_MEMINFO("Cached:") - init_cached);
92f08c3bdfSopenharmony_ci
93f08c3bdfSopenharmony_ci	print_cgmem("memory.current");
94f08c3bdfSopenharmony_ci	print_cgmem("memory.swap.current");
95f08c3bdfSopenharmony_ci	print_cgmem("memory.kmem.usage_in_bytes");
96f08c3bdfSopenharmony_ci}
97f08c3bdfSopenharmony_ci
98f08c3bdfSopenharmony_cistatic void setup(void)
99f08c3bdfSopenharmony_ci{
100f08c3bdfSopenharmony_ci	struct sysinfo sys_buf_start;
101f08c3bdfSopenharmony_ci
102f08c3bdfSopenharmony_ci	pg_sz = getpagesize();
103f08c3bdfSopenharmony_ci
104f08c3bdfSopenharmony_ci	tst_res(TINFO, "dropping caches");
105f08c3bdfSopenharmony_ci	sync();
106f08c3bdfSopenharmony_ci	SAFE_FILE_PRINTF(drop_caches_fname, "3");
107f08c3bdfSopenharmony_ci
108f08c3bdfSopenharmony_ci	sysinfo(&sys_buf_start);
109f08c3bdfSopenharmony_ci	if (sys_buf_start.freeram < 2 * CHUNK_SZ) {
110f08c3bdfSopenharmony_ci		tst_brk(TCONF, "System RAM is too small (%li bytes needed)",
111f08c3bdfSopenharmony_ci			2 * CHUNK_SZ);
112f08c3bdfSopenharmony_ci	}
113f08c3bdfSopenharmony_ci	if (sys_buf_start.freeswap < 2 * CHUNK_SZ) {
114f08c3bdfSopenharmony_ci		tst_brk(TCONF, "System swap is too small (%li bytes needed)",
115f08c3bdfSopenharmony_ci			2 * CHUNK_SZ);
116f08c3bdfSopenharmony_ci	}
117f08c3bdfSopenharmony_ci
118f08c3bdfSopenharmony_ci	check_path("/proc/self/oom_score_adj");
119f08c3bdfSopenharmony_ci	SAFE_FILE_PRINTF("/proc/self/oom_score_adj", "%d", -1000);
120f08c3bdfSopenharmony_ci
121f08c3bdfSopenharmony_ci	SAFE_CG_PRINTF(tst_cg, "memory.max", "%ld", MEM_LIMIT);
122f08c3bdfSopenharmony_ci	if (SAFE_CG_HAS(tst_cg, "memory.swap.max"))
123f08c3bdfSopenharmony_ci		SAFE_CG_PRINTF(tst_cg, "memory.swap.max", "%ld", MEMSW_LIMIT);
124f08c3bdfSopenharmony_ci
125f08c3bdfSopenharmony_ci	if (SAFE_CG_HAS(tst_cg, "memory.swappiness")) {
126f08c3bdfSopenharmony_ci		SAFE_CG_PRINT(tst_cg, "memory.swappiness", "60");
127f08c3bdfSopenharmony_ci	} else {
128f08c3bdfSopenharmony_ci		check_path("/proc/sys/vm/swappiness");
129f08c3bdfSopenharmony_ci		SAFE_FILE_PRINTF("/proc/sys/vm/swappiness", "%d", 60);
130f08c3bdfSopenharmony_ci	}
131f08c3bdfSopenharmony_ci
132f08c3bdfSopenharmony_ci	SAFE_CG_PRINTF(tst_cg, "cgroup.procs", "%d", getpid());
133f08c3bdfSopenharmony_ci
134f08c3bdfSopenharmony_ci	meminfo_diag("Initial meminfo, later values are relative to this (except memcg)");
135f08c3bdfSopenharmony_ci	init_swap = SAFE_READ_MEMINFO("SwapTotal:") - SAFE_READ_MEMINFO("SwapFree:");
136f08c3bdfSopenharmony_ci	init_swap_cached = SAFE_READ_MEMINFO("SwapCached:");
137f08c3bdfSopenharmony_ci	init_cached = SAFE_READ_MEMINFO("Cached:");
138f08c3bdfSopenharmony_ci
139f08c3bdfSopenharmony_ci	if (!access("/proc/sys/vm/stat_refresh", W_OK))
140f08c3bdfSopenharmony_ci		stat_refresh_sup = 1;
141f08c3bdfSopenharmony_ci
142f08c3bdfSopenharmony_ci	tst_res(TINFO, "mapping %ld Kb (%ld pages), limit %ld Kb, pass threshold %ld Kb",
143f08c3bdfSopenharmony_ci		CHUNK_SZ / 1024, CHUNK_SZ / pg_sz, MEM_LIMIT / 1024, PASS_THRESHOLD_KB);
144f08c3bdfSopenharmony_ci}
145f08c3bdfSopenharmony_ci
146f08c3bdfSopenharmony_cistatic void dirty_pages(char *ptr, long size)
147f08c3bdfSopenharmony_ci{
148f08c3bdfSopenharmony_ci	long i;
149f08c3bdfSopenharmony_ci	long pages = size / pg_sz;
150f08c3bdfSopenharmony_ci
151f08c3bdfSopenharmony_ci	for (i = 0; i < pages; i++)
152f08c3bdfSopenharmony_ci		ptr[i * pg_sz] = 'x';
153f08c3bdfSopenharmony_ci}
154f08c3bdfSopenharmony_ci
155f08c3bdfSopenharmony_cistatic int get_page_fault_num(void)
156f08c3bdfSopenharmony_ci{
157f08c3bdfSopenharmony_ci	int pg;
158f08c3bdfSopenharmony_ci
159f08c3bdfSopenharmony_ci	SAFE_FILE_SCANF("/proc/self/stat",
160f08c3bdfSopenharmony_ci			"%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %d",
161f08c3bdfSopenharmony_ci			&pg);
162f08c3bdfSopenharmony_ci	return pg;
163f08c3bdfSopenharmony_ci}
164f08c3bdfSopenharmony_ci
165f08c3bdfSopenharmony_cistatic void test_advice_willneed(void)
166f08c3bdfSopenharmony_ci{
167f08c3bdfSopenharmony_ci	int loops = 100, res;
168f08c3bdfSopenharmony_ci	char *target;
169f08c3bdfSopenharmony_ci	long swapcached_start, swapcached;
170f08c3bdfSopenharmony_ci	int page_fault_num_1, page_fault_num_2;
171f08c3bdfSopenharmony_ci
172f08c3bdfSopenharmony_ci	meminfo_diag("Before mmap");
173f08c3bdfSopenharmony_ci	tst_res(TINFO, "PageFault(before mmap): %d", get_page_fault_num());
174f08c3bdfSopenharmony_ci	target = SAFE_MMAP(NULL, CHUNK_SZ, PROT_READ | PROT_WRITE,
175f08c3bdfSopenharmony_ci			MAP_SHARED | MAP_ANONYMOUS,
176f08c3bdfSopenharmony_ci			-1, 0);
177f08c3bdfSopenharmony_ci	meminfo_diag("Before dirty");
178f08c3bdfSopenharmony_ci	tst_res(TINFO, "PageFault(before dirty): %d", get_page_fault_num());
179f08c3bdfSopenharmony_ci	dirty_pages(target, CHUNK_SZ);
180f08c3bdfSopenharmony_ci	tst_res(TINFO, "PageFault(after dirty): %d", get_page_fault_num());
181f08c3bdfSopenharmony_ci
182f08c3bdfSopenharmony_ci	meminfo_diag("Before madvise");
183f08c3bdfSopenharmony_ci	SAFE_FILE_LINES_SCANF("/proc/meminfo", "SwapCached: %ld",
184f08c3bdfSopenharmony_ci		&swapcached_start);
185f08c3bdfSopenharmony_ci
186f08c3bdfSopenharmony_ci	TEST(madvise(target, MEM_LIMIT, MADV_WILLNEED));
187f08c3bdfSopenharmony_ci	if (TST_RET == -1)
188f08c3bdfSopenharmony_ci		tst_brk(TBROK | TTERRNO, "madvise failed");
189f08c3bdfSopenharmony_ci
190f08c3bdfSopenharmony_ci	do {
191f08c3bdfSopenharmony_ci		loops--;
192f08c3bdfSopenharmony_ci		usleep(100000);
193f08c3bdfSopenharmony_ci		if (stat_refresh_sup)
194f08c3bdfSopenharmony_ci			SAFE_FILE_PRINTF("/proc/sys/vm/stat_refresh", "1");
195f08c3bdfSopenharmony_ci		SAFE_FILE_LINES_SCANF("/proc/meminfo", "SwapCached: %ld",
196f08c3bdfSopenharmony_ci			&swapcached);
197f08c3bdfSopenharmony_ci	} while (swapcached < swapcached_start + PASS_THRESHOLD_KB && loops > 0);
198f08c3bdfSopenharmony_ci
199f08c3bdfSopenharmony_ci	meminfo_diag("After madvise");
200f08c3bdfSopenharmony_ci	res = swapcached > swapcached_start + PASS_THRESHOLD_KB;
201f08c3bdfSopenharmony_ci	tst_res(res ? TPASS : TINFO,
202f08c3bdfSopenharmony_ci		"%s than %ld Kb were moved to the swap cache",
203f08c3bdfSopenharmony_ci		res ? "more" : "less", PASS_THRESHOLD_KB);
204f08c3bdfSopenharmony_ci
205f08c3bdfSopenharmony_ci	loops = 100;
206f08c3bdfSopenharmony_ci	SAFE_FILE_LINES_SCANF("/proc/meminfo", "SwapCached: %ld", &swapcached_start);
207f08c3bdfSopenharmony_ci	TEST(madvise(target, pg_sz * 3, MADV_WILLNEED));
208f08c3bdfSopenharmony_ci	if (TST_RET == -1)
209f08c3bdfSopenharmony_ci		tst_brk(TBROK | TTERRNO, "madvise failed");
210f08c3bdfSopenharmony_ci	do {
211f08c3bdfSopenharmony_ci		loops--;
212f08c3bdfSopenharmony_ci		usleep(100000);
213f08c3bdfSopenharmony_ci		if (stat_refresh_sup)
214f08c3bdfSopenharmony_ci			SAFE_FILE_PRINTF("/proc/sys/vm/stat_refresh", "1");
215f08c3bdfSopenharmony_ci		SAFE_FILE_LINES_SCANF("/proc/meminfo", "SwapCached: %ld",
216f08c3bdfSopenharmony_ci				&swapcached);
217f08c3bdfSopenharmony_ci	} while (swapcached < swapcached_start + pg_sz*3/1024 && loops > 0);
218f08c3bdfSopenharmony_ci
219f08c3bdfSopenharmony_ci	page_fault_num_1 = get_page_fault_num();
220f08c3bdfSopenharmony_ci	tst_res(TINFO, "PageFault(madvice / no mem access): %d",
221f08c3bdfSopenharmony_ci			page_fault_num_1);
222f08c3bdfSopenharmony_ci	dirty_pages(target, pg_sz * 3);
223f08c3bdfSopenharmony_ci	page_fault_num_2 = get_page_fault_num();
224f08c3bdfSopenharmony_ci	tst_res(TINFO, "PageFault(madvice / mem access): %d",
225f08c3bdfSopenharmony_ci			page_fault_num_2);
226f08c3bdfSopenharmony_ci	meminfo_diag("After page access");
227f08c3bdfSopenharmony_ci
228f08c3bdfSopenharmony_ci	res = page_fault_num_2 - page_fault_num_1;
229f08c3bdfSopenharmony_ci	tst_res(res == 0 ? TPASS : TINFO,
230f08c3bdfSopenharmony_ci		"%d pages were faulted out of 3 max", res);
231f08c3bdfSopenharmony_ci
232f08c3bdfSopenharmony_ci	SAFE_MUNMAP(target, CHUNK_SZ);
233f08c3bdfSopenharmony_ci
234f08c3bdfSopenharmony_ci	if (tst_taint_check())
235f08c3bdfSopenharmony_ci		tst_res(TFAIL, "Kernel tainted");
236f08c3bdfSopenharmony_ci	else
237f08c3bdfSopenharmony_ci		tst_res(TPASS, "No kernel taints");
238f08c3bdfSopenharmony_ci}
239f08c3bdfSopenharmony_ci
240f08c3bdfSopenharmony_cistatic struct tst_test test = {
241f08c3bdfSopenharmony_ci	.test_all = test_advice_willneed,
242f08c3bdfSopenharmony_ci	.setup = setup,
243f08c3bdfSopenharmony_ci	.needs_tmpdir = 1,
244f08c3bdfSopenharmony_ci	.needs_root = 1,
245f08c3bdfSopenharmony_ci	.taint_check = TST_TAINT_W | TST_TAINT_D,
246f08c3bdfSopenharmony_ci	.save_restore = (const struct tst_path_val[]) {
247f08c3bdfSopenharmony_ci		{"/proc/sys/vm/swappiness", NULL,
248f08c3bdfSopenharmony_ci			TST_SR_SKIP_MISSING | TST_SR_TCONF_RO},
249f08c3bdfSopenharmony_ci		{}
250f08c3bdfSopenharmony_ci	},
251f08c3bdfSopenharmony_ci	.needs_cgroup_ctrls = (const char *const []){ "memory", NULL },
252f08c3bdfSopenharmony_ci	.tags = (const struct tst_tag[]) {
253f08c3bdfSopenharmony_ci		{"linux-git", "55231e5c898c"},
254f08c3bdfSopenharmony_ci		{"linux-git", "8de15e920dc8"},
255f08c3bdfSopenharmony_ci		{"linux-git", "66383800df9c"},
256f08c3bdfSopenharmony_ci		{}
257f08c3bdfSopenharmony_ci	}
258f08c3bdfSopenharmony_ci};
259