1/* SPDX-License-Identifier: GPL-2.0 */
2#define _GNU_SOURCE
3
4#include <linux/limits.h>
5#include <linux/oom.h>
6#include <fcntl.h>
7#include <stdio.h>
8#include <stdlib.h>
9#include <string.h>
10#include <sys/stat.h>
11#include <sys/types.h>
12#include <unistd.h>
13#include <sys/socket.h>
14#include <sys/wait.h>
15#include <arpa/inet.h>
16#include <netinet/in.h>
17#include <netdb.h>
18#include <errno.h>
19
20#include "../kselftest.h"
21#include "cgroup_util.h"
22
23/*
24 * This test creates two nested cgroups with and without enabling
25 * the memory controller.
26 */
27static int test_memcg_subtree_control(const char *root)
28{
29	char *parent, *child, *parent2 = NULL, *child2 = NULL;
30	int ret = KSFT_FAIL;
31	char buf[PAGE_SIZE];
32
33	/* Create two nested cgroups with the memory controller enabled */
34	parent = cg_name(root, "memcg_test_0");
35	child = cg_name(root, "memcg_test_0/memcg_test_1");
36	if (!parent || !child)
37		goto cleanup_free;
38
39	if (cg_create(parent))
40		goto cleanup_free;
41
42	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
43		goto cleanup_parent;
44
45	if (cg_create(child))
46		goto cleanup_parent;
47
48	if (cg_read_strstr(child, "cgroup.controllers", "memory"))
49		goto cleanup_child;
50
51	/* Create two nested cgroups without enabling memory controller */
52	parent2 = cg_name(root, "memcg_test_1");
53	child2 = cg_name(root, "memcg_test_1/memcg_test_1");
54	if (!parent2 || !child2)
55		goto cleanup_free2;
56
57	if (cg_create(parent2))
58		goto cleanup_free2;
59
60	if (cg_create(child2))
61		goto cleanup_parent2;
62
63	if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
64		goto cleanup_all;
65
66	if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
67		goto cleanup_all;
68
69	ret = KSFT_PASS;
70
71cleanup_all:
72	cg_destroy(child2);
73cleanup_parent2:
74	cg_destroy(parent2);
75cleanup_free2:
76	free(parent2);
77	free(child2);
78cleanup_child:
79	cg_destroy(child);
80cleanup_parent:
81	cg_destroy(parent);
82cleanup_free:
83	free(parent);
84	free(child);
85
86	return ret;
87}
88
89static int alloc_anon_50M_check(const char *cgroup, void *arg)
90{
91	size_t size = MB(50);
92	char *buf, *ptr;
93	long anon, current;
94	int ret = -1;
95
96	buf = malloc(size);
97	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
98		*ptr = 0;
99
100	current = cg_read_long(cgroup, "memory.current");
101	if (current < size)
102		goto cleanup;
103
104	if (!values_close(size, current, 3))
105		goto cleanup;
106
107	anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
108	if (anon < 0)
109		goto cleanup;
110
111	if (!values_close(anon, current, 3))
112		goto cleanup;
113
114	ret = 0;
115cleanup:
116	free(buf);
117	return ret;
118}
119
120static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
121{
122	size_t size = MB(50);
123	int ret = -1;
124	long current, file;
125	int fd;
126
127	fd = get_temp_fd();
128	if (fd < 0)
129		return -1;
130
131	if (alloc_pagecache(fd, size))
132		goto cleanup;
133
134	current = cg_read_long(cgroup, "memory.current");
135	if (current < size)
136		goto cleanup;
137
138	file = cg_read_key_long(cgroup, "memory.stat", "file ");
139	if (file < 0)
140		goto cleanup;
141
142	if (!values_close(file, current, 10))
143		goto cleanup;
144
145	ret = 0;
146
147cleanup:
148	close(fd);
149	return ret;
150}
151
152/*
153 * This test create a memory cgroup, allocates
154 * some anonymous memory and some pagecache
155 * and check memory.current and some memory.stat values.
156 */
157static int test_memcg_current(const char *root)
158{
159	int ret = KSFT_FAIL;
160	long current;
161	char *memcg;
162
163	memcg = cg_name(root, "memcg_test");
164	if (!memcg)
165		goto cleanup;
166
167	if (cg_create(memcg))
168		goto cleanup;
169
170	current = cg_read_long(memcg, "memory.current");
171	if (current != 0)
172		goto cleanup;
173
174	if (cg_run(memcg, alloc_anon_50M_check, NULL))
175		goto cleanup;
176
177	if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
178		goto cleanup;
179
180	ret = KSFT_PASS;
181
182cleanup:
183	cg_destroy(memcg);
184	free(memcg);
185
186	return ret;
187}
188
189static int alloc_pagecache_50M(const char *cgroup, void *arg)
190{
191	int fd = (long)arg;
192
193	return alloc_pagecache(fd, MB(50));
194}
195
196static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
197{
198	int fd = (long)arg;
199	int ppid = getppid();
200
201	if (alloc_pagecache(fd, MB(50)))
202		return -1;
203
204	while (getppid() == ppid)
205		sleep(1);
206
207	return 0;
208}
209
210static int alloc_anon_noexit(const char *cgroup, void *arg)
211{
212	int ppid = getppid();
213
214	if (alloc_anon(cgroup, arg))
215		return -1;
216
217	while (getppid() == ppid)
218		sleep(1);
219
220	return 0;
221}
222
223/*
224 * Wait until processes are killed asynchronously by the OOM killer
225 * If we exceed a timeout, fail.
226 */
227static int cg_test_proc_killed(const char *cgroup)
228{
229	int limit;
230
231	for (limit = 10; limit > 0; limit--) {
232		if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0)
233			return 0;
234
235		usleep(100000);
236	}
237	return -1;
238}
239
240/*
241 * First, this test creates the following hierarchy:
242 * A       memory.min = 50M,  memory.max = 200M
243 * A/B     memory.min = 50M,  memory.current = 50M
244 * A/B/C   memory.min = 75M,  memory.current = 50M
245 * A/B/D   memory.min = 25M,  memory.current = 50M
246 * A/B/E   memory.min = 500M, memory.current = 0
247 * A/B/F   memory.min = 0,    memory.current = 50M
248 *
249 * Usages are pagecache, but the test keeps a running
250 * process in every leaf cgroup.
251 * Then it creates A/G and creates a significant
252 * memory pressure in it.
253 *
254 * A/B    memory.current ~= 50M
255 * A/B/C  memory.current ~= 33M
256 * A/B/D  memory.current ~= 17M
257 * A/B/E  memory.current ~= 0
258 *
259 * After that it tries to allocate more than there is
260 * unprotected memory in A available, and checks
261 * checks that memory.min protects pagecache even
262 * in this case.
263 */
264static int test_memcg_min(const char *root)
265{
266	int ret = KSFT_FAIL;
267	char *parent[3] = {NULL};
268	char *children[4] = {NULL};
269	long c[4];
270	int i, attempts;
271	int fd;
272
273	fd = get_temp_fd();
274	if (fd < 0)
275		goto cleanup;
276
277	parent[0] = cg_name(root, "memcg_test_0");
278	if (!parent[0])
279		goto cleanup;
280
281	parent[1] = cg_name(parent[0], "memcg_test_1");
282	if (!parent[1])
283		goto cleanup;
284
285	parent[2] = cg_name(parent[0], "memcg_test_2");
286	if (!parent[2])
287		goto cleanup;
288
289	if (cg_create(parent[0]))
290		goto cleanup;
291
292	if (cg_read_long(parent[0], "memory.min")) {
293		ret = KSFT_SKIP;
294		goto cleanup;
295	}
296
297	if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
298		goto cleanup;
299
300	if (cg_write(parent[0], "memory.max", "200M"))
301		goto cleanup;
302
303	if (cg_write(parent[0], "memory.swap.max", "0"))
304		goto cleanup;
305
306	if (cg_create(parent[1]))
307		goto cleanup;
308
309	if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
310		goto cleanup;
311
312	if (cg_create(parent[2]))
313		goto cleanup;
314
315	for (i = 0; i < ARRAY_SIZE(children); i++) {
316		children[i] = cg_name_indexed(parent[1], "child_memcg", i);
317		if (!children[i])
318			goto cleanup;
319
320		if (cg_create(children[i]))
321			goto cleanup;
322
323		if (i == 2)
324			continue;
325
326		cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
327			      (void *)(long)fd);
328	}
329
330	if (cg_write(parent[0], "memory.min", "50M"))
331		goto cleanup;
332	if (cg_write(parent[1], "memory.min", "50M"))
333		goto cleanup;
334	if (cg_write(children[0], "memory.min", "75M"))
335		goto cleanup;
336	if (cg_write(children[1], "memory.min", "25M"))
337		goto cleanup;
338	if (cg_write(children[2], "memory.min", "500M"))
339		goto cleanup;
340	if (cg_write(children[3], "memory.min", "0"))
341		goto cleanup;
342
343	attempts = 0;
344	while (!values_close(cg_read_long(parent[1], "memory.current"),
345			     MB(150), 3)) {
346		if (attempts++ > 5)
347			break;
348		sleep(1);
349	}
350
351	if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
352		goto cleanup;
353
354	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
355		goto cleanup;
356
357	for (i = 0; i < ARRAY_SIZE(children); i++)
358		c[i] = cg_read_long(children[i], "memory.current");
359
360	if (!values_close(c[0], MB(33), 10))
361		goto cleanup;
362
363	if (!values_close(c[1], MB(17), 10))
364		goto cleanup;
365
366	if (!values_close(c[2], 0, 1))
367		goto cleanup;
368
369	if (!cg_run(parent[2], alloc_anon, (void *)MB(170)))
370		goto cleanup;
371
372	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
373		goto cleanup;
374
375	ret = KSFT_PASS;
376
377cleanup:
378	for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
379		if (!children[i])
380			continue;
381
382		cg_destroy(children[i]);
383		free(children[i]);
384	}
385
386	for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
387		if (!parent[i])
388			continue;
389
390		cg_destroy(parent[i]);
391		free(parent[i]);
392	}
393	close(fd);
394	return ret;
395}
396
397/*
398 * First, this test creates the following hierarchy:
399 * A       memory.low = 50M,  memory.max = 200M
400 * A/B     memory.low = 50M,  memory.current = 50M
401 * A/B/C   memory.low = 75M,  memory.current = 50M
402 * A/B/D   memory.low = 25M,  memory.current = 50M
403 * A/B/E   memory.low = 500M, memory.current = 0
404 * A/B/F   memory.low = 0,    memory.current = 50M
405 *
406 * Usages are pagecache.
407 * Then it creates A/G an creates a significant
408 * memory pressure in it.
409 *
410 * Then it checks actual memory usages and expects that:
411 * A/B    memory.current ~= 50M
412 * A/B/   memory.current ~= 33M
413 * A/B/D  memory.current ~= 17M
414 * A/B/E  memory.current ~= 0
415 *
416 * After that it tries to allocate more than there is
417 * unprotected memory in A available,
418 * and checks low and oom events in memory.events.
419 */
420static int test_memcg_low(const char *root)
421{
422	int ret = KSFT_FAIL;
423	char *parent[3] = {NULL};
424	char *children[4] = {NULL};
425	long low, oom;
426	long c[4];
427	int i;
428	int fd;
429
430	fd = get_temp_fd();
431	if (fd < 0)
432		goto cleanup;
433
434	parent[0] = cg_name(root, "memcg_test_0");
435	if (!parent[0])
436		goto cleanup;
437
438	parent[1] = cg_name(parent[0], "memcg_test_1");
439	if (!parent[1])
440		goto cleanup;
441
442	parent[2] = cg_name(parent[0], "memcg_test_2");
443	if (!parent[2])
444		goto cleanup;
445
446	if (cg_create(parent[0]))
447		goto cleanup;
448
449	if (cg_read_long(parent[0], "memory.low"))
450		goto cleanup;
451
452	if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
453		goto cleanup;
454
455	if (cg_write(parent[0], "memory.max", "200M"))
456		goto cleanup;
457
458	if (cg_write(parent[0], "memory.swap.max", "0"))
459		goto cleanup;
460
461	if (cg_create(parent[1]))
462		goto cleanup;
463
464	if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
465		goto cleanup;
466
467	if (cg_create(parent[2]))
468		goto cleanup;
469
470	for (i = 0; i < ARRAY_SIZE(children); i++) {
471		children[i] = cg_name_indexed(parent[1], "child_memcg", i);
472		if (!children[i])
473			goto cleanup;
474
475		if (cg_create(children[i]))
476			goto cleanup;
477
478		if (i == 2)
479			continue;
480
481		if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd))
482			goto cleanup;
483	}
484
485	if (cg_write(parent[0], "memory.low", "50M"))
486		goto cleanup;
487	if (cg_write(parent[1], "memory.low", "50M"))
488		goto cleanup;
489	if (cg_write(children[0], "memory.low", "75M"))
490		goto cleanup;
491	if (cg_write(children[1], "memory.low", "25M"))
492		goto cleanup;
493	if (cg_write(children[2], "memory.low", "500M"))
494		goto cleanup;
495	if (cg_write(children[3], "memory.low", "0"))
496		goto cleanup;
497
498	if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
499		goto cleanup;
500
501	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
502		goto cleanup;
503
504	for (i = 0; i < ARRAY_SIZE(children); i++)
505		c[i] = cg_read_long(children[i], "memory.current");
506
507	if (!values_close(c[0], MB(33), 10))
508		goto cleanup;
509
510	if (!values_close(c[1], MB(17), 10))
511		goto cleanup;
512
513	if (!values_close(c[2], 0, 1))
514		goto cleanup;
515
516	if (cg_run(parent[2], alloc_anon, (void *)MB(166))) {
517		fprintf(stderr,
518			"memory.low prevents from allocating anon memory\n");
519		goto cleanup;
520	}
521
522	for (i = 0; i < ARRAY_SIZE(children); i++) {
523		oom = cg_read_key_long(children[i], "memory.events", "oom ");
524		low = cg_read_key_long(children[i], "memory.events", "low ");
525
526		if (oom)
527			goto cleanup;
528		if (i < 2 && low <= 0)
529			goto cleanup;
530		if (i >= 2 && low)
531			goto cleanup;
532	}
533
534	ret = KSFT_PASS;
535
536cleanup:
537	for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
538		if (!children[i])
539			continue;
540
541		cg_destroy(children[i]);
542		free(children[i]);
543	}
544
545	for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
546		if (!parent[i])
547			continue;
548
549		cg_destroy(parent[i]);
550		free(parent[i]);
551	}
552	close(fd);
553	return ret;
554}
555
556static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
557{
558	size_t size = MB(50);
559	int ret = -1;
560	long current;
561	int fd;
562
563	fd = get_temp_fd();
564	if (fd < 0)
565		return -1;
566
567	if (alloc_pagecache(fd, size))
568		goto cleanup;
569
570	current = cg_read_long(cgroup, "memory.current");
571	if (current <= MB(29) || current > MB(30))
572		goto cleanup;
573
574	ret = 0;
575
576cleanup:
577	close(fd);
578	return ret;
579
580}
581
582/*
583 * This test checks that memory.high limits the amount of
584 * memory which can be consumed by either anonymous memory
585 * or pagecache.
586 */
587static int test_memcg_high(const char *root)
588{
589	int ret = KSFT_FAIL;
590	char *memcg;
591	long high;
592
593	memcg = cg_name(root, "memcg_test");
594	if (!memcg)
595		goto cleanup;
596
597	if (cg_create(memcg))
598		goto cleanup;
599
600	if (cg_read_strcmp(memcg, "memory.high", "max\n"))
601		goto cleanup;
602
603	if (cg_write(memcg, "memory.swap.max", "0"))
604		goto cleanup;
605
606	if (cg_write(memcg, "memory.high", "30M"))
607		goto cleanup;
608
609	if (cg_run(memcg, alloc_anon, (void *)MB(100)))
610		goto cleanup;
611
612	if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
613		goto cleanup;
614
615	if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
616		goto cleanup;
617
618	high = cg_read_key_long(memcg, "memory.events", "high ");
619	if (high <= 0)
620		goto cleanup;
621
622	ret = KSFT_PASS;
623
624cleanup:
625	cg_destroy(memcg);
626	free(memcg);
627
628	return ret;
629}
630
631/*
632 * This test checks that memory.max limits the amount of
633 * memory which can be consumed by either anonymous memory
634 * or pagecache.
635 */
636static int test_memcg_max(const char *root)
637{
638	int ret = KSFT_FAIL;
639	char *memcg;
640	long current, max;
641
642	memcg = cg_name(root, "memcg_test");
643	if (!memcg)
644		goto cleanup;
645
646	if (cg_create(memcg))
647		goto cleanup;
648
649	if (cg_read_strcmp(memcg, "memory.max", "max\n"))
650		goto cleanup;
651
652	if (cg_write(memcg, "memory.swap.max", "0"))
653		goto cleanup;
654
655	if (cg_write(memcg, "memory.max", "30M"))
656		goto cleanup;
657
658	/* Should be killed by OOM killer */
659	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
660		goto cleanup;
661
662	if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
663		goto cleanup;
664
665	current = cg_read_long(memcg, "memory.current");
666	if (current > MB(30) || !current)
667		goto cleanup;
668
669	max = cg_read_key_long(memcg, "memory.events", "max ");
670	if (max <= 0)
671		goto cleanup;
672
673	ret = KSFT_PASS;
674
675cleanup:
676	cg_destroy(memcg);
677	free(memcg);
678
679	return ret;
680}
681
682static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
683{
684	long mem_max = (long)arg;
685	size_t size = MB(50);
686	char *buf, *ptr;
687	long mem_current, swap_current;
688	int ret = -1;
689
690	buf = malloc(size);
691	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
692		*ptr = 0;
693
694	mem_current = cg_read_long(cgroup, "memory.current");
695	if (!mem_current || !values_close(mem_current, mem_max, 3))
696		goto cleanup;
697
698	swap_current = cg_read_long(cgroup, "memory.swap.current");
699	if (!swap_current ||
700	    !values_close(mem_current + swap_current, size, 3))
701		goto cleanup;
702
703	ret = 0;
704cleanup:
705	free(buf);
706	return ret;
707}
708
709/*
710 * This test checks that memory.swap.max limits the amount of
711 * anonymous memory which can be swapped out.
712 */
713static int test_memcg_swap_max(const char *root)
714{
715	int ret = KSFT_FAIL;
716	char *memcg;
717	long max;
718
719	if (!is_swap_enabled())
720		return KSFT_SKIP;
721
722	memcg = cg_name(root, "memcg_test");
723	if (!memcg)
724		goto cleanup;
725
726	if (cg_create(memcg))
727		goto cleanup;
728
729	if (cg_read_long(memcg, "memory.swap.current")) {
730		ret = KSFT_SKIP;
731		goto cleanup;
732	}
733
734	if (cg_read_strcmp(memcg, "memory.max", "max\n"))
735		goto cleanup;
736
737	if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
738		goto cleanup;
739
740	if (cg_write(memcg, "memory.swap.max", "30M"))
741		goto cleanup;
742
743	if (cg_write(memcg, "memory.max", "30M"))
744		goto cleanup;
745
746	/* Should be killed by OOM killer */
747	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
748		goto cleanup;
749
750	if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
751		goto cleanup;
752
753	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
754		goto cleanup;
755
756	if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
757		goto cleanup;
758
759	max = cg_read_key_long(memcg, "memory.events", "max ");
760	if (max <= 0)
761		goto cleanup;
762
763	ret = KSFT_PASS;
764
765cleanup:
766	cg_destroy(memcg);
767	free(memcg);
768
769	return ret;
770}
771
772/*
773 * This test disables swapping and tries to allocate anonymous memory
774 * up to OOM. Then it checks for oom and oom_kill events in
775 * memory.events.
776 */
777static int test_memcg_oom_events(const char *root)
778{
779	int ret = KSFT_FAIL;
780	char *memcg;
781
782	memcg = cg_name(root, "memcg_test");
783	if (!memcg)
784		goto cleanup;
785
786	if (cg_create(memcg))
787		goto cleanup;
788
789	if (cg_write(memcg, "memory.max", "30M"))
790		goto cleanup;
791
792	if (cg_write(memcg, "memory.swap.max", "0"))
793		goto cleanup;
794
795	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
796		goto cleanup;
797
798	if (cg_read_strcmp(memcg, "cgroup.procs", ""))
799		goto cleanup;
800
801	if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
802		goto cleanup;
803
804	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
805		goto cleanup;
806
807	ret = KSFT_PASS;
808
809cleanup:
810	cg_destroy(memcg);
811	free(memcg);
812
813	return ret;
814}
815
816struct tcp_server_args {
817	unsigned short port;
818	int ctl[2];
819};
820
821static int tcp_server(const char *cgroup, void *arg)
822{
823	struct tcp_server_args *srv_args = arg;
824	struct sockaddr_in6 saddr = { 0 };
825	socklen_t slen = sizeof(saddr);
826	int sk, client_sk, ctl_fd, yes = 1, ret = -1;
827
828	close(srv_args->ctl[0]);
829	ctl_fd = srv_args->ctl[1];
830
831	saddr.sin6_family = AF_INET6;
832	saddr.sin6_addr = in6addr_any;
833	saddr.sin6_port = htons(srv_args->port);
834
835	sk = socket(AF_INET6, SOCK_STREAM, 0);
836	if (sk < 0)
837		return ret;
838
839	if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
840		goto cleanup;
841
842	if (bind(sk, (struct sockaddr *)&saddr, slen)) {
843		write(ctl_fd, &errno, sizeof(errno));
844		goto cleanup;
845	}
846
847	if (listen(sk, 1))
848		goto cleanup;
849
850	ret = 0;
851	if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
852		ret = -1;
853		goto cleanup;
854	}
855
856	client_sk = accept(sk, NULL, NULL);
857	if (client_sk < 0)
858		goto cleanup;
859
860	ret = -1;
861	for (;;) {
862		uint8_t buf[0x100000];
863
864		if (write(client_sk, buf, sizeof(buf)) <= 0) {
865			if (errno == ECONNRESET)
866				ret = 0;
867			break;
868		}
869	}
870
871	close(client_sk);
872
873cleanup:
874	close(sk);
875	return ret;
876}
877
878static int tcp_client(const char *cgroup, unsigned short port)
879{
880	const char server[] = "localhost";
881	struct addrinfo *ai;
882	char servport[6];
883	int retries = 0x10; /* nice round number */
884	int sk, ret;
885
886	snprintf(servport, sizeof(servport), "%hd", port);
887	ret = getaddrinfo(server, servport, NULL, &ai);
888	if (ret)
889		return ret;
890
891	sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
892	if (sk < 0)
893		goto free_ainfo;
894
895	ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
896	if (ret < 0)
897		goto close_sk;
898
899	ret = KSFT_FAIL;
900	while (retries--) {
901		uint8_t buf[0x100000];
902		long current, sock;
903
904		if (read(sk, buf, sizeof(buf)) <= 0)
905			goto close_sk;
906
907		current = cg_read_long(cgroup, "memory.current");
908		sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
909
910		if (current < 0 || sock < 0)
911			goto close_sk;
912
913		if (current < sock)
914			goto close_sk;
915
916		if (values_close(current, sock, 10)) {
917			ret = KSFT_PASS;
918			break;
919		}
920	}
921
922close_sk:
923	close(sk);
924free_ainfo:
925	freeaddrinfo(ai);
926	return ret;
927}
928
929/*
930 * This test checks socket memory accounting.
931 * The test forks a TCP server listens on a random port between 1000
932 * and 61000. Once it gets a client connection, it starts writing to
933 * its socket.
934 * The TCP client interleaves reads from the socket with check whether
935 * memory.current and memory.stat.sock are similar.
936 */
937static int test_memcg_sock(const char *root)
938{
939	int bind_retries = 5, ret = KSFT_FAIL, pid, err;
940	unsigned short port;
941	char *memcg;
942
943	memcg = cg_name(root, "memcg_test");
944	if (!memcg)
945		goto cleanup;
946
947	if (cg_create(memcg))
948		goto cleanup;
949
950	while (bind_retries--) {
951		struct tcp_server_args args;
952
953		if (pipe(args.ctl))
954			goto cleanup;
955
956		port = args.port = 1000 + rand() % 60000;
957
958		pid = cg_run_nowait(memcg, tcp_server, &args);
959		if (pid < 0)
960			goto cleanup;
961
962		close(args.ctl[1]);
963		if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
964			goto cleanup;
965		close(args.ctl[0]);
966
967		if (!err)
968			break;
969		if (err != EADDRINUSE)
970			goto cleanup;
971
972		waitpid(pid, NULL, 0);
973	}
974
975	if (err == EADDRINUSE) {
976		ret = KSFT_SKIP;
977		goto cleanup;
978	}
979
980	if (tcp_client(memcg, port) != KSFT_PASS)
981		goto cleanup;
982
983	waitpid(pid, &err, 0);
984	if (WEXITSTATUS(err))
985		goto cleanup;
986
987	if (cg_read_long(memcg, "memory.current") < 0)
988		goto cleanup;
989
990	if (cg_read_key_long(memcg, "memory.stat", "sock "))
991		goto cleanup;
992
993	ret = KSFT_PASS;
994
995cleanup:
996	cg_destroy(memcg);
997	free(memcg);
998
999	return ret;
1000}
1001
1002/*
1003 * This test disables swapping and tries to allocate anonymous memory
1004 * up to OOM with memory.group.oom set. Then it checks that all
1005 * processes in the leaf (but not the parent) were killed.
1006 */
1007static int test_memcg_oom_group_leaf_events(const char *root)
1008{
1009	int ret = KSFT_FAIL;
1010	char *parent, *child;
1011
1012	parent = cg_name(root, "memcg_test_0");
1013	child = cg_name(root, "memcg_test_0/memcg_test_1");
1014
1015	if (!parent || !child)
1016		goto cleanup;
1017
1018	if (cg_create(parent))
1019		goto cleanup;
1020
1021	if (cg_create(child))
1022		goto cleanup;
1023
1024	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
1025		goto cleanup;
1026
1027	if (cg_write(child, "memory.max", "50M"))
1028		goto cleanup;
1029
1030	if (cg_write(child, "memory.swap.max", "0"))
1031		goto cleanup;
1032
1033	if (cg_write(child, "memory.oom.group", "1"))
1034		goto cleanup;
1035
1036	cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1037	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1038	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1039	if (!cg_run(child, alloc_anon, (void *)MB(100)))
1040		goto cleanup;
1041
1042	if (cg_test_proc_killed(child))
1043		goto cleanup;
1044
1045	if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
1046		goto cleanup;
1047
1048	if (cg_read_key_long(parent, "memory.events", "oom_kill ") != 0)
1049		goto cleanup;
1050
1051	ret = KSFT_PASS;
1052
1053cleanup:
1054	if (child)
1055		cg_destroy(child);
1056	if (parent)
1057		cg_destroy(parent);
1058	free(child);
1059	free(parent);
1060
1061	return ret;
1062}
1063
1064/*
1065 * This test disables swapping and tries to allocate anonymous memory
1066 * up to OOM with memory.group.oom set. Then it checks that all
1067 * processes in the parent and leaf were killed.
1068 */
1069static int test_memcg_oom_group_parent_events(const char *root)
1070{
1071	int ret = KSFT_FAIL;
1072	char *parent, *child;
1073
1074	parent = cg_name(root, "memcg_test_0");
1075	child = cg_name(root, "memcg_test_0/memcg_test_1");
1076
1077	if (!parent || !child)
1078		goto cleanup;
1079
1080	if (cg_create(parent))
1081		goto cleanup;
1082
1083	if (cg_create(child))
1084		goto cleanup;
1085
1086	if (cg_write(parent, "memory.max", "80M"))
1087		goto cleanup;
1088
1089	if (cg_write(parent, "memory.swap.max", "0"))
1090		goto cleanup;
1091
1092	if (cg_write(parent, "memory.oom.group", "1"))
1093		goto cleanup;
1094
1095	cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1096	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1097	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1098
1099	if (!cg_run(child, alloc_anon, (void *)MB(100)))
1100		goto cleanup;
1101
1102	if (cg_test_proc_killed(child))
1103		goto cleanup;
1104	if (cg_test_proc_killed(parent))
1105		goto cleanup;
1106
1107	ret = KSFT_PASS;
1108
1109cleanup:
1110	if (child)
1111		cg_destroy(child);
1112	if (parent)
1113		cg_destroy(parent);
1114	free(child);
1115	free(parent);
1116
1117	return ret;
1118}
1119
1120/*
1121 * This test disables swapping and tries to allocate anonymous memory
1122 * up to OOM with memory.group.oom set. Then it checks that all
1123 * processes were killed except those set with OOM_SCORE_ADJ_MIN
1124 */
1125static int test_memcg_oom_group_score_events(const char *root)
1126{
1127	int ret = KSFT_FAIL;
1128	char *memcg;
1129	int safe_pid;
1130
1131	memcg = cg_name(root, "memcg_test_0");
1132
1133	if (!memcg)
1134		goto cleanup;
1135
1136	if (cg_create(memcg))
1137		goto cleanup;
1138
1139	if (cg_write(memcg, "memory.max", "50M"))
1140		goto cleanup;
1141
1142	if (cg_write(memcg, "memory.swap.max", "0"))
1143		goto cleanup;
1144
1145	if (cg_write(memcg, "memory.oom.group", "1"))
1146		goto cleanup;
1147
1148	safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1149	if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN))
1150		goto cleanup;
1151
1152	cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1153	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1154		goto cleanup;
1155
1156	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
1157		goto cleanup;
1158
1159	if (kill(safe_pid, SIGKILL))
1160		goto cleanup;
1161
1162	ret = KSFT_PASS;
1163
1164cleanup:
1165	if (memcg)
1166		cg_destroy(memcg);
1167	free(memcg);
1168
1169	return ret;
1170}
1171
1172
1173#define T(x) { x, #x }
1174struct memcg_test {
1175	int (*fn)(const char *root);
1176	const char *name;
1177} tests[] = {
1178	T(test_memcg_subtree_control),
1179	T(test_memcg_current),
1180	T(test_memcg_min),
1181	T(test_memcg_low),
1182	T(test_memcg_high),
1183	T(test_memcg_max),
1184	T(test_memcg_oom_events),
1185	T(test_memcg_swap_max),
1186	T(test_memcg_sock),
1187	T(test_memcg_oom_group_leaf_events),
1188	T(test_memcg_oom_group_parent_events),
1189	T(test_memcg_oom_group_score_events),
1190};
1191#undef T
1192
1193int main(int argc, char **argv)
1194{
1195	char root[PATH_MAX];
1196	int i, ret = EXIT_SUCCESS;
1197
1198	if (cg_find_unified_root(root, sizeof(root)))
1199		ksft_exit_skip("cgroup v2 isn't mounted\n");
1200
1201	/*
1202	 * Check that memory controller is available:
1203	 * memory is listed in cgroup.controllers
1204	 */
1205	if (cg_read_strstr(root, "cgroup.controllers", "memory"))
1206		ksft_exit_skip("memory controller isn't available\n");
1207
1208	if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
1209		if (cg_write(root, "cgroup.subtree_control", "+memory"))
1210			ksft_exit_skip("Failed to set memory controller\n");
1211
1212	for (i = 0; i < ARRAY_SIZE(tests); i++) {
1213		switch (tests[i].fn(root)) {
1214		case KSFT_PASS:
1215			ksft_test_result_pass("%s\n", tests[i].name);
1216			break;
1217		case KSFT_SKIP:
1218			ksft_test_result_skip("%s\n", tests[i].name);
1219			break;
1220		default:
1221			ret = EXIT_FAILURE;
1222			ksft_test_result_fail("%s\n", tests[i].name);
1223			break;
1224		}
1225	}
1226
1227	return ret;
1228}
1229