1f08c3bdfSopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 2f08c3bdfSopenharmony_ci/* 3f08c3bdfSopenharmony_ci * Copyright (c) 2016 Red Hat, Inc. 4f08c3bdfSopenharmony_ci */ 5f08c3bdfSopenharmony_ci 6f08c3bdfSopenharmony_ci/*\ 7f08c3bdfSopenharmony_ci * [Description] 8f08c3bdfSopenharmony_ci * 9f08c3bdfSopenharmony_ci * Page fault occurs in spite that madvise(WILLNEED) system call is called 10f08c3bdfSopenharmony_ci * to prefetch the page. This issue is reproduced by running a program 11f08c3bdfSopenharmony_ci * which sequentially accesses to a shared memory and calls madvise(WILLNEED) 12f08c3bdfSopenharmony_ci * to the next page on a page fault. 13f08c3bdfSopenharmony_ci * 14f08c3bdfSopenharmony_ci * This bug is present in all RHEL7 versions. It looks like this was fixed in 15f08c3bdfSopenharmony_ci * mainline kernel > v3.15 by the following patch: 16f08c3bdfSopenharmony_ci * 17f08c3bdfSopenharmony_ci * commit 55231e5c898c5c03c14194001e349f40f59bd300 18f08c3bdfSopenharmony_ci * Author: Johannes Weiner <hannes@cmpxchg.org> 19f08c3bdfSopenharmony_ci * Date: Thu May 22 11:54:17 2014 -0700 20f08c3bdfSopenharmony_ci * 21f08c3bdfSopenharmony_ci * mm: madvise: fix MADV_WILLNEED on shmem swapouts 22f08c3bdfSopenharmony_ci * 23f08c3bdfSopenharmony_ci * Two checks are performed, the first looks at how SwapCache 24f08c3bdfSopenharmony_ci * changes during madvise. When the pages are dirtied, about half 25f08c3bdfSopenharmony_ci * will be accounted for under Cached and the other half will be 26f08c3bdfSopenharmony_ci * moved into Swap. When madvise is run it will cause the pages 27f08c3bdfSopenharmony_ci * under Cached to also be moved to Swap while rotating the pages 28f08c3bdfSopenharmony_ci * already in Swap into SwapCached. So we expect that SwapCached has 29f08c3bdfSopenharmony_ci * roughly MEM_LIMIT bytes added to it, but for reliability the 30f08c3bdfSopenharmony_ci * PASS_THRESHOLD is much lower than that. 31f08c3bdfSopenharmony_ci * 32f08c3bdfSopenharmony_ci * Secondly we run madvise again, but only on the first 33f08c3bdfSopenharmony_ci * PASS_THRESHOLD bytes to ensure these are entirely in RAM. Then we 34f08c3bdfSopenharmony_ci * dirty these pages and check there were (almost) no page 35f08c3bdfSopenharmony_ci * faults. Two faults are allowed incase some tasklet or something 36f08c3bdfSopenharmony_ci * else unexpected, but irrelevant procedure, registers a fault to 37f08c3bdfSopenharmony_ci * our process. 38f08c3bdfSopenharmony_ci * 39f08c3bdfSopenharmony_ci * It also can reproduce the MADV_WILLNEED preformance problem. 40f08c3bdfSopenharmony_ci * It was introduced since 5.9 kernel with the following commit 41f08c3bdfSopenharmony_ci * e6e88712e43b ("mm: optimise madvise WILLNEED") 42f08c3bdfSopenharmony_ci * and fixed since 5.10-rc5 kernel with the following commit 43f08c3bdfSopenharmony_ci * 66383800df9c ("mm: fix madvise WILLNEED performance problem"). 44f08c3bdfSopenharmony_ci */ 45f08c3bdfSopenharmony_ci 46f08c3bdfSopenharmony_ci#include <errno.h> 47f08c3bdfSopenharmony_ci#include <stdio.h> 48f08c3bdfSopenharmony_ci#include <sys/mount.h> 49f08c3bdfSopenharmony_ci#include <sys/sysinfo.h> 50f08c3bdfSopenharmony_ci#include "tst_test.h" 51f08c3bdfSopenharmony_ci 52f08c3bdfSopenharmony_ci#define CHUNK_SZ (400*1024*1024L) 53f08c3bdfSopenharmony_ci#define MEM_LIMIT (CHUNK_SZ / 2) 54f08c3bdfSopenharmony_ci#define MEMSW_LIMIT (2 * CHUNK_SZ) 55f08c3bdfSopenharmony_ci#define PASS_THRESHOLD (CHUNK_SZ / 4) 56f08c3bdfSopenharmony_ci#define PASS_THRESHOLD_KB (PASS_THRESHOLD / 1024) 57f08c3bdfSopenharmony_ci 58f08c3bdfSopenharmony_cistatic const char drop_caches_fname[] = "/proc/sys/vm/drop_caches"; 59f08c3bdfSopenharmony_cistatic int pg_sz, stat_refresh_sup; 60f08c3bdfSopenharmony_ci 61f08c3bdfSopenharmony_cistatic long init_swap, init_swap_cached, init_cached; 62f08c3bdfSopenharmony_ci 63f08c3bdfSopenharmony_cistatic void check_path(const char *path) 64f08c3bdfSopenharmony_ci{ 65f08c3bdfSopenharmony_ci if (access(path, R_OK | W_OK)) 66f08c3bdfSopenharmony_ci tst_brk(TCONF, "file needed: %s", path); 67f08c3bdfSopenharmony_ci} 68f08c3bdfSopenharmony_ci 69f08c3bdfSopenharmony_cistatic void print_cgmem(const char *name) 70f08c3bdfSopenharmony_ci{ 71f08c3bdfSopenharmony_ci long ret; 72f08c3bdfSopenharmony_ci 73f08c3bdfSopenharmony_ci if (!SAFE_CG_HAS(tst_cg, name)) 74f08c3bdfSopenharmony_ci return; 75f08c3bdfSopenharmony_ci 76f08c3bdfSopenharmony_ci SAFE_CG_SCANF(tst_cg, name, "%ld", &ret); 77f08c3bdfSopenharmony_ci tst_res(TINFO, "\t%s: %ld Kb", name, ret / 1024); 78f08c3bdfSopenharmony_ci} 79f08c3bdfSopenharmony_ci 80f08c3bdfSopenharmony_cistatic void meminfo_diag(const char *point) 81f08c3bdfSopenharmony_ci{ 82f08c3bdfSopenharmony_ci if (stat_refresh_sup) 83f08c3bdfSopenharmony_ci SAFE_FILE_PRINTF("/proc/sys/vm/stat_refresh", "1"); 84f08c3bdfSopenharmony_ci 85f08c3bdfSopenharmony_ci tst_res(TINFO, "%s", point); 86f08c3bdfSopenharmony_ci tst_res(TINFO, "\tSwap: %ld Kb", 87f08c3bdfSopenharmony_ci SAFE_READ_MEMINFO("SwapTotal:") - SAFE_READ_MEMINFO("SwapFree:") - init_swap); 88f08c3bdfSopenharmony_ci tst_res(TINFO, "\tSwapCached: %ld Kb", 89f08c3bdfSopenharmony_ci SAFE_READ_MEMINFO("SwapCached:") - init_swap_cached); 90f08c3bdfSopenharmony_ci tst_res(TINFO, "\tCached: %ld Kb", 91f08c3bdfSopenharmony_ci SAFE_READ_MEMINFO("Cached:") - init_cached); 92f08c3bdfSopenharmony_ci 93f08c3bdfSopenharmony_ci print_cgmem("memory.current"); 94f08c3bdfSopenharmony_ci print_cgmem("memory.swap.current"); 95f08c3bdfSopenharmony_ci print_cgmem("memory.kmem.usage_in_bytes"); 96f08c3bdfSopenharmony_ci} 97f08c3bdfSopenharmony_ci 98f08c3bdfSopenharmony_cistatic void setup(void) 99f08c3bdfSopenharmony_ci{ 100f08c3bdfSopenharmony_ci struct sysinfo sys_buf_start; 101f08c3bdfSopenharmony_ci 102f08c3bdfSopenharmony_ci pg_sz = getpagesize(); 103f08c3bdfSopenharmony_ci 104f08c3bdfSopenharmony_ci tst_res(TINFO, "dropping caches"); 105f08c3bdfSopenharmony_ci sync(); 106f08c3bdfSopenharmony_ci SAFE_FILE_PRINTF(drop_caches_fname, "3"); 107f08c3bdfSopenharmony_ci 108f08c3bdfSopenharmony_ci sysinfo(&sys_buf_start); 109f08c3bdfSopenharmony_ci if (sys_buf_start.freeram < 2 * CHUNK_SZ) { 110f08c3bdfSopenharmony_ci tst_brk(TCONF, "System RAM is too small (%li bytes needed)", 111f08c3bdfSopenharmony_ci 2 * CHUNK_SZ); 112f08c3bdfSopenharmony_ci } 113f08c3bdfSopenharmony_ci if (sys_buf_start.freeswap < 2 * CHUNK_SZ) { 114f08c3bdfSopenharmony_ci tst_brk(TCONF, "System swap is too small (%li bytes needed)", 115f08c3bdfSopenharmony_ci 2 * CHUNK_SZ); 116f08c3bdfSopenharmony_ci } 117f08c3bdfSopenharmony_ci 118f08c3bdfSopenharmony_ci check_path("/proc/self/oom_score_adj"); 119f08c3bdfSopenharmony_ci SAFE_FILE_PRINTF("/proc/self/oom_score_adj", "%d", -1000); 120f08c3bdfSopenharmony_ci 121f08c3bdfSopenharmony_ci SAFE_CG_PRINTF(tst_cg, "memory.max", "%ld", MEM_LIMIT); 122f08c3bdfSopenharmony_ci if (SAFE_CG_HAS(tst_cg, "memory.swap.max")) 123f08c3bdfSopenharmony_ci SAFE_CG_PRINTF(tst_cg, "memory.swap.max", "%ld", MEMSW_LIMIT); 124f08c3bdfSopenharmony_ci 125f08c3bdfSopenharmony_ci if (SAFE_CG_HAS(tst_cg, "memory.swappiness")) { 126f08c3bdfSopenharmony_ci SAFE_CG_PRINT(tst_cg, "memory.swappiness", "60"); 127f08c3bdfSopenharmony_ci } else { 128f08c3bdfSopenharmony_ci check_path("/proc/sys/vm/swappiness"); 129f08c3bdfSopenharmony_ci SAFE_FILE_PRINTF("/proc/sys/vm/swappiness", "%d", 60); 130f08c3bdfSopenharmony_ci } 131f08c3bdfSopenharmony_ci 132f08c3bdfSopenharmony_ci SAFE_CG_PRINTF(tst_cg, "cgroup.procs", "%d", getpid()); 133f08c3bdfSopenharmony_ci 134f08c3bdfSopenharmony_ci meminfo_diag("Initial meminfo, later values are relative to this (except memcg)"); 135f08c3bdfSopenharmony_ci init_swap = SAFE_READ_MEMINFO("SwapTotal:") - SAFE_READ_MEMINFO("SwapFree:"); 136f08c3bdfSopenharmony_ci init_swap_cached = SAFE_READ_MEMINFO("SwapCached:"); 137f08c3bdfSopenharmony_ci init_cached = SAFE_READ_MEMINFO("Cached:"); 138f08c3bdfSopenharmony_ci 139f08c3bdfSopenharmony_ci if (!access("/proc/sys/vm/stat_refresh", W_OK)) 140f08c3bdfSopenharmony_ci stat_refresh_sup = 1; 141f08c3bdfSopenharmony_ci 142f08c3bdfSopenharmony_ci tst_res(TINFO, "mapping %ld Kb (%ld pages), limit %ld Kb, pass threshold %ld Kb", 143f08c3bdfSopenharmony_ci CHUNK_SZ / 1024, CHUNK_SZ / pg_sz, MEM_LIMIT / 1024, PASS_THRESHOLD_KB); 144f08c3bdfSopenharmony_ci} 145f08c3bdfSopenharmony_ci 146f08c3bdfSopenharmony_cistatic void dirty_pages(char *ptr, long size) 147f08c3bdfSopenharmony_ci{ 148f08c3bdfSopenharmony_ci long i; 149f08c3bdfSopenharmony_ci long pages = size / pg_sz; 150f08c3bdfSopenharmony_ci 151f08c3bdfSopenharmony_ci for (i = 0; i < pages; i++) 152f08c3bdfSopenharmony_ci ptr[i * pg_sz] = 'x'; 153f08c3bdfSopenharmony_ci} 154f08c3bdfSopenharmony_ci 155f08c3bdfSopenharmony_cistatic int get_page_fault_num(void) 156f08c3bdfSopenharmony_ci{ 157f08c3bdfSopenharmony_ci int pg; 158f08c3bdfSopenharmony_ci 159f08c3bdfSopenharmony_ci SAFE_FILE_SCANF("/proc/self/stat", 160f08c3bdfSopenharmony_ci "%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %d", 161f08c3bdfSopenharmony_ci &pg); 162f08c3bdfSopenharmony_ci return pg; 163f08c3bdfSopenharmony_ci} 164f08c3bdfSopenharmony_ci 165f08c3bdfSopenharmony_cistatic void test_advice_willneed(void) 166f08c3bdfSopenharmony_ci{ 167f08c3bdfSopenharmony_ci int loops = 100, res; 168f08c3bdfSopenharmony_ci char *target; 169f08c3bdfSopenharmony_ci long swapcached_start, swapcached; 170f08c3bdfSopenharmony_ci int page_fault_num_1, page_fault_num_2; 171f08c3bdfSopenharmony_ci 172f08c3bdfSopenharmony_ci meminfo_diag("Before mmap"); 173f08c3bdfSopenharmony_ci tst_res(TINFO, "PageFault(before mmap): %d", get_page_fault_num()); 174f08c3bdfSopenharmony_ci target = SAFE_MMAP(NULL, CHUNK_SZ, PROT_READ | PROT_WRITE, 175f08c3bdfSopenharmony_ci MAP_SHARED | MAP_ANONYMOUS, 176f08c3bdfSopenharmony_ci -1, 0); 177f08c3bdfSopenharmony_ci meminfo_diag("Before dirty"); 178f08c3bdfSopenharmony_ci tst_res(TINFO, "PageFault(before dirty): %d", get_page_fault_num()); 179f08c3bdfSopenharmony_ci dirty_pages(target, CHUNK_SZ); 180f08c3bdfSopenharmony_ci tst_res(TINFO, "PageFault(after dirty): %d", get_page_fault_num()); 181f08c3bdfSopenharmony_ci 182f08c3bdfSopenharmony_ci meminfo_diag("Before madvise"); 183f08c3bdfSopenharmony_ci SAFE_FILE_LINES_SCANF("/proc/meminfo", "SwapCached: %ld", 184f08c3bdfSopenharmony_ci &swapcached_start); 185f08c3bdfSopenharmony_ci 186f08c3bdfSopenharmony_ci TEST(madvise(target, MEM_LIMIT, MADV_WILLNEED)); 187f08c3bdfSopenharmony_ci if (TST_RET == -1) 188f08c3bdfSopenharmony_ci tst_brk(TBROK | TTERRNO, "madvise failed"); 189f08c3bdfSopenharmony_ci 190f08c3bdfSopenharmony_ci do { 191f08c3bdfSopenharmony_ci loops--; 192f08c3bdfSopenharmony_ci usleep(100000); 193f08c3bdfSopenharmony_ci if (stat_refresh_sup) 194f08c3bdfSopenharmony_ci SAFE_FILE_PRINTF("/proc/sys/vm/stat_refresh", "1"); 195f08c3bdfSopenharmony_ci SAFE_FILE_LINES_SCANF("/proc/meminfo", "SwapCached: %ld", 196f08c3bdfSopenharmony_ci &swapcached); 197f08c3bdfSopenharmony_ci } while (swapcached < swapcached_start + PASS_THRESHOLD_KB && loops > 0); 198f08c3bdfSopenharmony_ci 199f08c3bdfSopenharmony_ci meminfo_diag("After madvise"); 200f08c3bdfSopenharmony_ci res = swapcached > swapcached_start + PASS_THRESHOLD_KB; 201f08c3bdfSopenharmony_ci tst_res(res ? TPASS : TINFO, 202f08c3bdfSopenharmony_ci "%s than %ld Kb were moved to the swap cache", 203f08c3bdfSopenharmony_ci res ? "more" : "less", PASS_THRESHOLD_KB); 204f08c3bdfSopenharmony_ci 205f08c3bdfSopenharmony_ci loops = 100; 206f08c3bdfSopenharmony_ci SAFE_FILE_LINES_SCANF("/proc/meminfo", "SwapCached: %ld", &swapcached_start); 207f08c3bdfSopenharmony_ci TEST(madvise(target, pg_sz * 3, MADV_WILLNEED)); 208f08c3bdfSopenharmony_ci if (TST_RET == -1) 209f08c3bdfSopenharmony_ci tst_brk(TBROK | TTERRNO, "madvise failed"); 210f08c3bdfSopenharmony_ci do { 211f08c3bdfSopenharmony_ci loops--; 212f08c3bdfSopenharmony_ci usleep(100000); 213f08c3bdfSopenharmony_ci if (stat_refresh_sup) 214f08c3bdfSopenharmony_ci SAFE_FILE_PRINTF("/proc/sys/vm/stat_refresh", "1"); 215f08c3bdfSopenharmony_ci SAFE_FILE_LINES_SCANF("/proc/meminfo", "SwapCached: %ld", 216f08c3bdfSopenharmony_ci &swapcached); 217f08c3bdfSopenharmony_ci } while (swapcached < swapcached_start + pg_sz*3/1024 && loops > 0); 218f08c3bdfSopenharmony_ci 219f08c3bdfSopenharmony_ci page_fault_num_1 = get_page_fault_num(); 220f08c3bdfSopenharmony_ci tst_res(TINFO, "PageFault(madvice / no mem access): %d", 221f08c3bdfSopenharmony_ci page_fault_num_1); 222f08c3bdfSopenharmony_ci dirty_pages(target, pg_sz * 3); 223f08c3bdfSopenharmony_ci page_fault_num_2 = get_page_fault_num(); 224f08c3bdfSopenharmony_ci tst_res(TINFO, "PageFault(madvice / mem access): %d", 225f08c3bdfSopenharmony_ci page_fault_num_2); 226f08c3bdfSopenharmony_ci meminfo_diag("After page access"); 227f08c3bdfSopenharmony_ci 228f08c3bdfSopenharmony_ci res = page_fault_num_2 - page_fault_num_1; 229f08c3bdfSopenharmony_ci tst_res(res == 0 ? TPASS : TINFO, 230f08c3bdfSopenharmony_ci "%d pages were faulted out of 3 max", res); 231f08c3bdfSopenharmony_ci 232f08c3bdfSopenharmony_ci SAFE_MUNMAP(target, CHUNK_SZ); 233f08c3bdfSopenharmony_ci 234f08c3bdfSopenharmony_ci if (tst_taint_check()) 235f08c3bdfSopenharmony_ci tst_res(TFAIL, "Kernel tainted"); 236f08c3bdfSopenharmony_ci else 237f08c3bdfSopenharmony_ci tst_res(TPASS, "No kernel taints"); 238f08c3bdfSopenharmony_ci} 239f08c3bdfSopenharmony_ci 240f08c3bdfSopenharmony_cistatic struct tst_test test = { 241f08c3bdfSopenharmony_ci .test_all = test_advice_willneed, 242f08c3bdfSopenharmony_ci .setup = setup, 243f08c3bdfSopenharmony_ci .needs_tmpdir = 1, 244f08c3bdfSopenharmony_ci .needs_root = 1, 245f08c3bdfSopenharmony_ci .taint_check = TST_TAINT_W | TST_TAINT_D, 246f08c3bdfSopenharmony_ci .save_restore = (const struct tst_path_val[]) { 247f08c3bdfSopenharmony_ci {"/proc/sys/vm/swappiness", NULL, 248f08c3bdfSopenharmony_ci TST_SR_SKIP_MISSING | TST_SR_TCONF_RO}, 249f08c3bdfSopenharmony_ci {} 250f08c3bdfSopenharmony_ci }, 251f08c3bdfSopenharmony_ci .needs_cgroup_ctrls = (const char *const []){ "memory", NULL }, 252f08c3bdfSopenharmony_ci .tags = (const struct tst_tag[]) { 253f08c3bdfSopenharmony_ci {"linux-git", "55231e5c898c"}, 254f08c3bdfSopenharmony_ci {"linux-git", "8de15e920dc8"}, 255f08c3bdfSopenharmony_ci {"linux-git", "66383800df9c"}, 256f08c3bdfSopenharmony_ci {} 257f08c3bdfSopenharmony_ci } 258f08c3bdfSopenharmony_ci}; 259