1// SPDX-License-Identifier: GPL-2.0
2#define _GNU_SOURCE
3
4#include <linux/limits.h>
5#include <unistd.h>
6#include <stdio.h>
7#include <signal.h>
8#include <sys/sysinfo.h>
9#include <string.h>
10#include <sys/wait.h>
11#include <sys/mman.h>
12
13#include "../kselftest.h"
14#include "cgroup_util.h"
15
16static int read_int(const char *path, size_t *value)
17{
18	FILE *file;
19	int ret = 0;
20
21	file = fopen(path, "r");
22	if (!file)
23		return -1;
24	if (fscanf(file, "%ld", value) != 1)
25		ret = -1;
26	fclose(file);
27	return ret;
28}
29
30static int set_min_free_kb(size_t value)
31{
32	FILE *file;
33	int ret;
34
35	file = fopen("/proc/sys/vm/min_free_kbytes", "w");
36	if (!file)
37		return -1;
38	ret = fprintf(file, "%ld\n", value);
39	fclose(file);
40	return ret;
41}
42
43static int read_min_free_kb(size_t *value)
44{
45	return read_int("/proc/sys/vm/min_free_kbytes", value);
46}
47
48static int get_zswap_stored_pages(size_t *value)
49{
50	return read_int("/sys/kernel/debug/zswap/stored_pages", value);
51}
52
53static int get_zswap_written_back_pages(size_t *value)
54{
55	return read_int("/sys/kernel/debug/zswap/written_back_pages", value);
56}
57
58static int allocate_bytes(const char *cgroup, void *arg)
59{
60	size_t size = (size_t)arg;
61	char *mem = (char *)malloc(size);
62
63	if (!mem)
64		return -1;
65	for (int i = 0; i < size; i += 4095)
66		mem[i] = 'a';
67	free(mem);
68	return 0;
69}
70
71/*
72 * When trying to store a memcg page in zswap, if the memcg hits its memory
73 * limit in zswap, writeback should not be triggered.
74 *
75 * This was fixed with commit 0bdf0efa180a("zswap: do not shrink if cgroup may
76 * not zswap"). Needs to be revised when a per memcg writeback mechanism is
77 * implemented.
78 */
79static int test_no_invasive_cgroup_shrink(const char *root)
80{
81	size_t written_back_before, written_back_after;
82	int ret = KSFT_FAIL;
83	char *test_group;
84
85	/* Set up */
86	test_group = cg_name(root, "no_shrink_test");
87	if (!test_group)
88		goto out;
89	if (cg_create(test_group))
90		goto out;
91	if (cg_write(test_group, "memory.max", "1M"))
92		goto out;
93	if (cg_write(test_group, "memory.zswap.max", "10K"))
94		goto out;
95	if (get_zswap_written_back_pages(&written_back_before))
96		goto out;
97
98	/* Allocate 10x memory.max to push memory into zswap */
99	if (cg_run(test_group, allocate_bytes, (void *)MB(10)))
100		goto out;
101
102	/* Verify that no writeback happened because of the memcg allocation */
103	if (get_zswap_written_back_pages(&written_back_after))
104		goto out;
105	if (written_back_after == written_back_before)
106		ret = KSFT_PASS;
107out:
108	cg_destroy(test_group);
109	free(test_group);
110	return ret;
111}
112
113struct no_kmem_bypass_child_args {
114	size_t target_alloc_bytes;
115	size_t child_allocated;
116};
117
118static int no_kmem_bypass_child(const char *cgroup, void *arg)
119{
120	struct no_kmem_bypass_child_args *values = arg;
121	void *allocation;
122
123	allocation = malloc(values->target_alloc_bytes);
124	if (!allocation) {
125		values->child_allocated = true;
126		return -1;
127	}
128	for (long i = 0; i < values->target_alloc_bytes; i += 4095)
129		((char *)allocation)[i] = 'a';
130	values->child_allocated = true;
131	pause();
132	free(allocation);
133	return 0;
134}
135
136/*
137 * When pages owned by a memcg are pushed to zswap by kswapd, they should be
138 * charged to that cgroup. This wasn't the case before commit
139 * cd08d80ecdac("mm: correctly charge compressed memory to its memcg").
140 *
141 * The test first allocates memory in a memcg, then raises min_free_kbytes to
142 * a very high value so that the allocation falls below low wm, then makes
143 * another allocation to trigger kswapd that should push the memcg-owned pages
144 * to zswap and verifies that the zswap pages are correctly charged.
145 *
146 * To be run on a VM with at most 4G of memory.
147 */
148static int test_no_kmem_bypass(const char *root)
149{
150	size_t min_free_kb_high, min_free_kb_low, min_free_kb_original;
151	struct no_kmem_bypass_child_args *values;
152	size_t trigger_allocation_size;
153	int wait_child_iteration = 0;
154	long stored_pages_threshold;
155	struct sysinfo sys_info;
156	int ret = KSFT_FAIL;
157	int child_status;
158	char *test_group;
159	pid_t child_pid;
160
161	/* Read sys info and compute test values accordingly */
162	if (sysinfo(&sys_info) != 0)
163		return KSFT_FAIL;
164	if (sys_info.totalram > 5000000000)
165		return KSFT_SKIP;
166	values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ |
167			PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
168	if (values == MAP_FAILED)
169		return KSFT_FAIL;
170	if (read_min_free_kb(&min_free_kb_original))
171		return KSFT_FAIL;
172	min_free_kb_high = sys_info.totalram / 2000;
173	min_free_kb_low = sys_info.totalram / 500000;
174	values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) +
175		sys_info.totalram * 5 / 100;
176	stored_pages_threshold = sys_info.totalram / 5 / 4096;
177	trigger_allocation_size = sys_info.totalram / 20;
178
179	/* Set up test memcg */
180	if (cg_write(root, "cgroup.subtree_control", "+memory"))
181		goto out;
182	test_group = cg_name(root, "kmem_bypass_test");
183	if (!test_group)
184		goto out;
185
186	/* Spawn memcg child and wait for it to allocate */
187	set_min_free_kb(min_free_kb_low);
188	if (cg_create(test_group))
189		goto out;
190	values->child_allocated = false;
191	child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values);
192	if (child_pid < 0)
193		goto out;
194	while (!values->child_allocated && wait_child_iteration++ < 10000)
195		usleep(1000);
196
197	/* Try to wakeup kswapd and let it push child memory to zswap */
198	set_min_free_kb(min_free_kb_high);
199	for (int i = 0; i < 20; i++) {
200		size_t stored_pages;
201		char *trigger_allocation = malloc(trigger_allocation_size);
202
203		if (!trigger_allocation)
204			break;
205		for (int i = 0; i < trigger_allocation_size; i += 4095)
206			trigger_allocation[i] = 'b';
207		usleep(100000);
208		free(trigger_allocation);
209		if (get_zswap_stored_pages(&stored_pages))
210			break;
211		if (stored_pages < 0)
212			break;
213		/* If memory was pushed to zswap, verify it belongs to memcg */
214		if (stored_pages > stored_pages_threshold) {
215			int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped ");
216			int delta = stored_pages * 4096 - zswapped;
217			int result_ok = delta < stored_pages * 4096 / 4;
218
219			ret = result_ok ? KSFT_PASS : KSFT_FAIL;
220			break;
221		}
222	}
223
224	kill(child_pid, SIGTERM);
225	waitpid(child_pid, &child_status, 0);
226out:
227	set_min_free_kb(min_free_kb_original);
228	cg_destroy(test_group);
229	free(test_group);
230	return ret;
231}
232
233#define T(x) { x, #x }
234struct zswap_test {
235	int (*fn)(const char *root);
236	const char *name;
237} tests[] = {
238	T(test_no_kmem_bypass),
239	T(test_no_invasive_cgroup_shrink),
240};
241#undef T
242
243static bool zswap_configured(void)
244{
245	return access("/sys/module/zswap", F_OK) == 0;
246}
247
248int main(int argc, char **argv)
249{
250	char root[PATH_MAX];
251	int i, ret = EXIT_SUCCESS;
252
253	if (cg_find_unified_root(root, sizeof(root)))
254		ksft_exit_skip("cgroup v2 isn't mounted\n");
255
256	if (!zswap_configured())
257		ksft_exit_skip("zswap isn't configured\n");
258
259	/*
260	 * Check that memory controller is available:
261	 * memory is listed in cgroup.controllers
262	 */
263	if (cg_read_strstr(root, "cgroup.controllers", "memory"))
264		ksft_exit_skip("memory controller isn't available\n");
265
266	if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
267		if (cg_write(root, "cgroup.subtree_control", "+memory"))
268			ksft_exit_skip("Failed to set memory controller\n");
269
270	for (i = 0; i < ARRAY_SIZE(tests); i++) {
271		switch (tests[i].fn(root)) {
272		case KSFT_PASS:
273			ksft_test_result_pass("%s\n", tests[i].name);
274			break;
275		case KSFT_SKIP:
276			ksft_test_result_skip("%s\n", tests[i].name);
277			break;
278		default:
279			ret = EXIT_FAILURE;
280			ksft_test_result_fail("%s\n", tests[i].name);
281			break;
282		}
283	}
284
285	return ret;
286}
287