162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci#define _GNU_SOURCE 362306a36Sopenharmony_ci 462306a36Sopenharmony_ci#include <linux/limits.h> 562306a36Sopenharmony_ci#include <unistd.h> 662306a36Sopenharmony_ci#include <stdio.h> 762306a36Sopenharmony_ci#include <signal.h> 862306a36Sopenharmony_ci#include <sys/sysinfo.h> 962306a36Sopenharmony_ci#include <string.h> 1062306a36Sopenharmony_ci#include <sys/wait.h> 1162306a36Sopenharmony_ci#include <sys/mman.h> 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci#include "../kselftest.h" 1462306a36Sopenharmony_ci#include "cgroup_util.h" 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_cistatic int read_int(const char *path, size_t *value) 1762306a36Sopenharmony_ci{ 1862306a36Sopenharmony_ci FILE *file; 1962306a36Sopenharmony_ci int ret = 0; 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_ci file = fopen(path, "r"); 2262306a36Sopenharmony_ci if (!file) 2362306a36Sopenharmony_ci return -1; 2462306a36Sopenharmony_ci if (fscanf(file, "%ld", value) != 1) 2562306a36Sopenharmony_ci ret = -1; 2662306a36Sopenharmony_ci fclose(file); 2762306a36Sopenharmony_ci return ret; 2862306a36Sopenharmony_ci} 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_cistatic int set_min_free_kb(size_t value) 3162306a36Sopenharmony_ci{ 3262306a36Sopenharmony_ci FILE *file; 3362306a36Sopenharmony_ci int ret; 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci file = fopen("/proc/sys/vm/min_free_kbytes", "w"); 3662306a36Sopenharmony_ci if (!file) 3762306a36Sopenharmony_ci return -1; 3862306a36Sopenharmony_ci ret = fprintf(file, "%ld\n", value); 3962306a36Sopenharmony_ci fclose(file); 4062306a36Sopenharmony_ci return ret; 4162306a36Sopenharmony_ci} 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_cistatic int read_min_free_kb(size_t *value) 4462306a36Sopenharmony_ci{ 4562306a36Sopenharmony_ci return read_int("/proc/sys/vm/min_free_kbytes", value); 4662306a36Sopenharmony_ci} 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_cistatic int get_zswap_stored_pages(size_t *value) 4962306a36Sopenharmony_ci{ 5062306a36Sopenharmony_ci return read_int("/sys/kernel/debug/zswap/stored_pages", value); 5162306a36Sopenharmony_ci} 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_cistatic int get_zswap_written_back_pages(size_t *value) 5462306a36Sopenharmony_ci{ 5562306a36Sopenharmony_ci return read_int("/sys/kernel/debug/zswap/written_back_pages", value); 5662306a36Sopenharmony_ci} 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_cistatic int allocate_bytes(const char *cgroup, void *arg) 5962306a36Sopenharmony_ci{ 6062306a36Sopenharmony_ci size_t size = (size_t)arg; 6162306a36Sopenharmony_ci char *mem = (char *)malloc(size); 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci if (!mem) 6462306a36Sopenharmony_ci return -1; 6562306a36Sopenharmony_ci for (int i = 0; i < size; i += 4095) 6662306a36Sopenharmony_ci mem[i] = 'a'; 6762306a36Sopenharmony_ci free(mem); 6862306a36Sopenharmony_ci return 0; 6962306a36Sopenharmony_ci} 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci/* 7262306a36Sopenharmony_ci * When trying to store a memcg page in zswap, if the memcg hits its memory 7362306a36Sopenharmony_ci * limit in zswap, writeback should not be triggered. 7462306a36Sopenharmony_ci * 7562306a36Sopenharmony_ci * This was fixed with commit 0bdf0efa180a("zswap: do not shrink if cgroup may 7662306a36Sopenharmony_ci * not zswap"). Needs to be revised when a per memcg writeback mechanism is 7762306a36Sopenharmony_ci * implemented. 7862306a36Sopenharmony_ci */ 7962306a36Sopenharmony_cistatic int test_no_invasive_cgroup_shrink(const char *root) 8062306a36Sopenharmony_ci{ 8162306a36Sopenharmony_ci size_t written_back_before, written_back_after; 8262306a36Sopenharmony_ci int ret = KSFT_FAIL; 8362306a36Sopenharmony_ci char *test_group; 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci /* Set up */ 8662306a36Sopenharmony_ci test_group = cg_name(root, "no_shrink_test"); 8762306a36Sopenharmony_ci if (!test_group) 8862306a36Sopenharmony_ci goto out; 8962306a36Sopenharmony_ci if (cg_create(test_group)) 9062306a36Sopenharmony_ci goto out; 9162306a36Sopenharmony_ci if (cg_write(test_group, "memory.max", "1M")) 9262306a36Sopenharmony_ci goto out; 9362306a36Sopenharmony_ci if (cg_write(test_group, "memory.zswap.max", "10K")) 9462306a36Sopenharmony_ci goto out; 9562306a36Sopenharmony_ci if (get_zswap_written_back_pages(&written_back_before)) 9662306a36Sopenharmony_ci goto out; 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_ci /* Allocate 10x memory.max to push memory into zswap */ 9962306a36Sopenharmony_ci if (cg_run(test_group, allocate_bytes, (void *)MB(10))) 10062306a36Sopenharmony_ci goto out; 10162306a36Sopenharmony_ci 10262306a36Sopenharmony_ci /* Verify that no writeback happened because of the memcg allocation */ 10362306a36Sopenharmony_ci if (get_zswap_written_back_pages(&written_back_after)) 10462306a36Sopenharmony_ci goto out; 10562306a36Sopenharmony_ci if (written_back_after == written_back_before) 10662306a36Sopenharmony_ci ret = KSFT_PASS; 10762306a36Sopenharmony_ciout: 10862306a36Sopenharmony_ci cg_destroy(test_group); 10962306a36Sopenharmony_ci free(test_group); 11062306a36Sopenharmony_ci return ret; 11162306a36Sopenharmony_ci} 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_cistruct no_kmem_bypass_child_args { 11462306a36Sopenharmony_ci size_t target_alloc_bytes; 11562306a36Sopenharmony_ci size_t child_allocated; 11662306a36Sopenharmony_ci}; 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_cistatic int no_kmem_bypass_child(const char *cgroup, void *arg) 11962306a36Sopenharmony_ci{ 12062306a36Sopenharmony_ci struct no_kmem_bypass_child_args *values = arg; 12162306a36Sopenharmony_ci void *allocation; 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci allocation = malloc(values->target_alloc_bytes); 12462306a36Sopenharmony_ci if (!allocation) { 12562306a36Sopenharmony_ci values->child_allocated = true; 12662306a36Sopenharmony_ci return -1; 12762306a36Sopenharmony_ci } 12862306a36Sopenharmony_ci for (long i = 0; i < values->target_alloc_bytes; i += 4095) 12962306a36Sopenharmony_ci ((char *)allocation)[i] = 'a'; 13062306a36Sopenharmony_ci values->child_allocated = true; 13162306a36Sopenharmony_ci pause(); 13262306a36Sopenharmony_ci free(allocation); 13362306a36Sopenharmony_ci return 0; 13462306a36Sopenharmony_ci} 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_ci/* 13762306a36Sopenharmony_ci * When pages owned by a memcg are pushed to zswap by kswapd, they should be 13862306a36Sopenharmony_ci * charged to that cgroup. This wasn't the case before commit 13962306a36Sopenharmony_ci * cd08d80ecdac("mm: correctly charge compressed memory to its memcg"). 14062306a36Sopenharmony_ci * 14162306a36Sopenharmony_ci * The test first allocates memory in a memcg, then raises min_free_kbytes to 14262306a36Sopenharmony_ci * a very high value so that the allocation falls below low wm, then makes 14362306a36Sopenharmony_ci * another allocation to trigger kswapd that should push the memcg-owned pages 14462306a36Sopenharmony_ci * to zswap and verifies that the zswap pages are correctly charged. 14562306a36Sopenharmony_ci * 14662306a36Sopenharmony_ci * To be run on a VM with at most 4G of memory. 14762306a36Sopenharmony_ci */ 14862306a36Sopenharmony_cistatic int test_no_kmem_bypass(const char *root) 14962306a36Sopenharmony_ci{ 15062306a36Sopenharmony_ci size_t min_free_kb_high, min_free_kb_low, min_free_kb_original; 15162306a36Sopenharmony_ci struct no_kmem_bypass_child_args *values; 15262306a36Sopenharmony_ci size_t trigger_allocation_size; 15362306a36Sopenharmony_ci int wait_child_iteration = 0; 15462306a36Sopenharmony_ci long stored_pages_threshold; 15562306a36Sopenharmony_ci struct sysinfo sys_info; 15662306a36Sopenharmony_ci int ret = KSFT_FAIL; 15762306a36Sopenharmony_ci int child_status; 15862306a36Sopenharmony_ci char *test_group; 15962306a36Sopenharmony_ci pid_t child_pid; 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci /* Read sys info and compute test values accordingly */ 16262306a36Sopenharmony_ci if (sysinfo(&sys_info) != 0) 16362306a36Sopenharmony_ci return KSFT_FAIL; 16462306a36Sopenharmony_ci if (sys_info.totalram > 5000000000) 16562306a36Sopenharmony_ci return KSFT_SKIP; 16662306a36Sopenharmony_ci values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ | 16762306a36Sopenharmony_ci PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); 16862306a36Sopenharmony_ci if (values == MAP_FAILED) 16962306a36Sopenharmony_ci return KSFT_FAIL; 17062306a36Sopenharmony_ci if (read_min_free_kb(&min_free_kb_original)) 17162306a36Sopenharmony_ci return KSFT_FAIL; 17262306a36Sopenharmony_ci min_free_kb_high = sys_info.totalram / 2000; 17362306a36Sopenharmony_ci min_free_kb_low = sys_info.totalram / 500000; 17462306a36Sopenharmony_ci values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) + 17562306a36Sopenharmony_ci sys_info.totalram * 5 / 100; 17662306a36Sopenharmony_ci stored_pages_threshold = sys_info.totalram / 5 / 4096; 17762306a36Sopenharmony_ci trigger_allocation_size = sys_info.totalram / 20; 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci /* Set up test memcg */ 18062306a36Sopenharmony_ci if (cg_write(root, "cgroup.subtree_control", "+memory")) 18162306a36Sopenharmony_ci goto out; 18262306a36Sopenharmony_ci test_group = cg_name(root, "kmem_bypass_test"); 18362306a36Sopenharmony_ci if (!test_group) 18462306a36Sopenharmony_ci goto out; 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci /* Spawn memcg child and wait for it to allocate */ 18762306a36Sopenharmony_ci set_min_free_kb(min_free_kb_low); 18862306a36Sopenharmony_ci if (cg_create(test_group)) 18962306a36Sopenharmony_ci goto out; 19062306a36Sopenharmony_ci values->child_allocated = false; 19162306a36Sopenharmony_ci child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values); 19262306a36Sopenharmony_ci if (child_pid < 0) 19362306a36Sopenharmony_ci goto out; 19462306a36Sopenharmony_ci while (!values->child_allocated && wait_child_iteration++ < 10000) 19562306a36Sopenharmony_ci usleep(1000); 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_ci /* Try to wakeup kswapd and let it push child memory to zswap */ 19862306a36Sopenharmony_ci set_min_free_kb(min_free_kb_high); 19962306a36Sopenharmony_ci for (int i = 0; i < 20; i++) { 20062306a36Sopenharmony_ci size_t stored_pages; 20162306a36Sopenharmony_ci char *trigger_allocation = malloc(trigger_allocation_size); 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_ci if (!trigger_allocation) 20462306a36Sopenharmony_ci break; 20562306a36Sopenharmony_ci for (int i = 0; i < trigger_allocation_size; i += 4095) 20662306a36Sopenharmony_ci trigger_allocation[i] = 'b'; 20762306a36Sopenharmony_ci usleep(100000); 20862306a36Sopenharmony_ci free(trigger_allocation); 20962306a36Sopenharmony_ci if (get_zswap_stored_pages(&stored_pages)) 21062306a36Sopenharmony_ci break; 21162306a36Sopenharmony_ci if (stored_pages < 0) 21262306a36Sopenharmony_ci break; 21362306a36Sopenharmony_ci /* If memory was pushed to zswap, verify it belongs to memcg */ 21462306a36Sopenharmony_ci if (stored_pages > stored_pages_threshold) { 21562306a36Sopenharmony_ci int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped "); 21662306a36Sopenharmony_ci int delta = stored_pages * 4096 - zswapped; 21762306a36Sopenharmony_ci int result_ok = delta < stored_pages * 4096 / 4; 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci ret = result_ok ? KSFT_PASS : KSFT_FAIL; 22062306a36Sopenharmony_ci break; 22162306a36Sopenharmony_ci } 22262306a36Sopenharmony_ci } 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci kill(child_pid, SIGTERM); 22562306a36Sopenharmony_ci waitpid(child_pid, &child_status, 0); 22662306a36Sopenharmony_ciout: 22762306a36Sopenharmony_ci set_min_free_kb(min_free_kb_original); 22862306a36Sopenharmony_ci cg_destroy(test_group); 22962306a36Sopenharmony_ci free(test_group); 23062306a36Sopenharmony_ci return ret; 23162306a36Sopenharmony_ci} 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci#define T(x) { x, #x } 23462306a36Sopenharmony_cistruct zswap_test { 23562306a36Sopenharmony_ci int (*fn)(const char *root); 23662306a36Sopenharmony_ci const char *name; 23762306a36Sopenharmony_ci} tests[] = { 23862306a36Sopenharmony_ci T(test_no_kmem_bypass), 23962306a36Sopenharmony_ci T(test_no_invasive_cgroup_shrink), 24062306a36Sopenharmony_ci}; 24162306a36Sopenharmony_ci#undef T 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_cistatic bool zswap_configured(void) 24462306a36Sopenharmony_ci{ 24562306a36Sopenharmony_ci return access("/sys/module/zswap", F_OK) == 0; 24662306a36Sopenharmony_ci} 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ciint main(int argc, char **argv) 24962306a36Sopenharmony_ci{ 25062306a36Sopenharmony_ci char root[PATH_MAX]; 25162306a36Sopenharmony_ci int i, ret = EXIT_SUCCESS; 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_ci if (cg_find_unified_root(root, sizeof(root))) 25462306a36Sopenharmony_ci ksft_exit_skip("cgroup v2 isn't mounted\n"); 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ci if (!zswap_configured()) 25762306a36Sopenharmony_ci ksft_exit_skip("zswap isn't configured\n"); 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_ci /* 26062306a36Sopenharmony_ci * Check that memory controller is available: 26162306a36Sopenharmony_ci * memory is listed in cgroup.controllers 26262306a36Sopenharmony_ci */ 26362306a36Sopenharmony_ci if (cg_read_strstr(root, "cgroup.controllers", "memory")) 26462306a36Sopenharmony_ci ksft_exit_skip("memory controller isn't available\n"); 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) 26762306a36Sopenharmony_ci if (cg_write(root, "cgroup.subtree_control", "+memory")) 26862306a36Sopenharmony_ci ksft_exit_skip("Failed to set memory controller\n"); 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(tests); i++) { 27162306a36Sopenharmony_ci switch (tests[i].fn(root)) { 27262306a36Sopenharmony_ci case KSFT_PASS: 27362306a36Sopenharmony_ci ksft_test_result_pass("%s\n", tests[i].name); 27462306a36Sopenharmony_ci break; 27562306a36Sopenharmony_ci case KSFT_SKIP: 27662306a36Sopenharmony_ci ksft_test_result_skip("%s\n", tests[i].name); 27762306a36Sopenharmony_ci break; 27862306a36Sopenharmony_ci default: 27962306a36Sopenharmony_ci ret = EXIT_FAILURE; 28062306a36Sopenharmony_ci ksft_test_result_fail("%s\n", tests[i].name); 28162306a36Sopenharmony_ci break; 28262306a36Sopenharmony_ci } 28362306a36Sopenharmony_ci } 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci return ret; 28662306a36Sopenharmony_ci} 287