1/* SPDX-License-Identifier: GPL-2.0 */
2
3#define _GNU_SOURCE
4
5#include <errno.h>
6#include <fcntl.h>
7#include <linux/limits.h>
8#include <signal.h>
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <sys/stat.h>
13#include <sys/types.h>
14#include <sys/wait.h>
15#include <unistd.h>
16
17#include "cgroup_util.h"
18#include "../clone3/clone3_selftests.h"
19
20static ssize_t read_text(const char *path, char *buf, size_t max_len)
21{
22	ssize_t len;
23	int fd;
24
25	fd = open(path, O_RDONLY);
26	if (fd < 0)
27		return fd;
28
29	len = read(fd, buf, max_len - 1);
30	if (len < 0)
31		goto out;
32
33	buf[len] = 0;
34out:
35	close(fd);
36	return len;
37}
38
39static ssize_t write_text(const char *path, char *buf, ssize_t len)
40{
41	int fd;
42
43	fd = open(path, O_WRONLY | O_APPEND);
44	if (fd < 0)
45		return fd;
46
47	len = write(fd, buf, len);
48	if (len < 0) {
49		close(fd);
50		return len;
51	}
52
53	close(fd);
54
55	return len;
56}
57
58char *cg_name(const char *root, const char *name)
59{
60	size_t len = strlen(root) + strlen(name) + 2;
61	char *ret = malloc(len);
62
63	snprintf(ret, len, "%s/%s", root, name);
64
65	return ret;
66}
67
68char *cg_name_indexed(const char *root, const char *name, int index)
69{
70	size_t len = strlen(root) + strlen(name) + 10;
71	char *ret = malloc(len);
72
73	snprintf(ret, len, "%s/%s_%d", root, name, index);
74
75	return ret;
76}
77
78char *cg_control(const char *cgroup, const char *control)
79{
80	size_t len = strlen(cgroup) + strlen(control) + 2;
81	char *ret = malloc(len);
82
83	snprintf(ret, len, "%s/%s", cgroup, control);
84
85	return ret;
86}
87
88int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
89{
90	char path[PATH_MAX];
91
92	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
93
94	if (read_text(path, buf, len) >= 0)
95		return 0;
96
97	return -1;
98}
99
100int cg_read_strcmp(const char *cgroup, const char *control,
101		   const char *expected)
102{
103	size_t size;
104	char *buf;
105	int ret;
106
107	/* Handle the case of comparing against empty string */
108	if (!expected)
109		return -1;
110	else
111		size = strlen(expected) + 1;
112
113	buf = malloc(size);
114	if (!buf)
115		return -1;
116
117	if (cg_read(cgroup, control, buf, size)) {
118		free(buf);
119		return -1;
120	}
121
122	ret = strcmp(expected, buf);
123	free(buf);
124	return ret;
125}
126
127int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
128{
129	char buf[PAGE_SIZE];
130
131	if (cg_read(cgroup, control, buf, sizeof(buf)))
132		return -1;
133
134	return strstr(buf, needle) ? 0 : -1;
135}
136
137long cg_read_long(const char *cgroup, const char *control)
138{
139	char buf[128];
140
141	if (cg_read(cgroup, control, buf, sizeof(buf)))
142		return -1;
143
144	return atol(buf);
145}
146
147long cg_read_key_long(const char *cgroup, const char *control, const char *key)
148{
149	char buf[PAGE_SIZE];
150	char *ptr;
151
152	if (cg_read(cgroup, control, buf, sizeof(buf)))
153		return -1;
154
155	ptr = strstr(buf, key);
156	if (!ptr)
157		return -1;
158
159	return atol(ptr + strlen(key));
160}
161
162long cg_read_lc(const char *cgroup, const char *control)
163{
164	char buf[PAGE_SIZE];
165	const char delim[] = "\n";
166	char *line;
167	long cnt = 0;
168
169	if (cg_read(cgroup, control, buf, sizeof(buf)))
170		return -1;
171
172	for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
173		cnt++;
174
175	return cnt;
176}
177
178int cg_write(const char *cgroup, const char *control, char *buf)
179{
180	char path[PATH_MAX];
181	ssize_t len = strlen(buf);
182
183	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
184
185	if (write_text(path, buf, len) == len)
186		return 0;
187
188	return -1;
189}
190
191int cg_find_unified_root(char *root, size_t len)
192{
193	char buf[10 * PAGE_SIZE];
194	char *fs, *mount, *type;
195	const char delim[] = "\n\t ";
196
197	if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
198		return -1;
199
200	/*
201	 * Example:
202	 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0
203	 */
204	for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
205		mount = strtok(NULL, delim);
206		type = strtok(NULL, delim);
207		strtok(NULL, delim);
208		strtok(NULL, delim);
209		strtok(NULL, delim);
210
211		if (strcmp(type, "cgroup2") == 0) {
212			strncpy(root, mount, len);
213			return 0;
214		}
215	}
216
217	return -1;
218}
219
220int cg_create(const char *cgroup)
221{
222	return mkdir(cgroup, 0755);
223}
224
225int cg_wait_for_proc_count(const char *cgroup, int count)
226{
227	char buf[10 * PAGE_SIZE] = {0};
228	int attempts;
229	char *ptr;
230
231	for (attempts = 10; attempts >= 0; attempts--) {
232		int nr = 0;
233
234		if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
235			break;
236
237		for (ptr = buf; *ptr; ptr++)
238			if (*ptr == '\n')
239				nr++;
240
241		if (nr >= count)
242			return 0;
243
244		usleep(100000);
245	}
246
247	return -1;
248}
249
250int cg_killall(const char *cgroup)
251{
252	char buf[PAGE_SIZE];
253	char *ptr = buf;
254
255	if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
256		return -1;
257
258	while (ptr < buf + sizeof(buf)) {
259		int pid = strtol(ptr, &ptr, 10);
260
261		if (pid == 0)
262			break;
263		if (*ptr)
264			ptr++;
265		else
266			break;
267		if (kill(pid, SIGKILL))
268			return -1;
269	}
270
271	return 0;
272}
273
274int cg_destroy(const char *cgroup)
275{
276	int ret;
277
278retry:
279	ret = rmdir(cgroup);
280	if (ret && errno == EBUSY) {
281		cg_killall(cgroup);
282		usleep(100);
283		goto retry;
284	}
285
286	if (ret && errno == ENOENT)
287		ret = 0;
288
289	return ret;
290}
291
292int cg_enter(const char *cgroup, int pid)
293{
294	char pidbuf[64];
295
296	snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
297	return cg_write(cgroup, "cgroup.procs", pidbuf);
298}
299
300int cg_enter_current(const char *cgroup)
301{
302	return cg_write(cgroup, "cgroup.procs", "0");
303}
304
305int cg_enter_current_thread(const char *cgroup)
306{
307	return cg_write(cgroup, "cgroup.threads", "0");
308}
309
310int cg_run(const char *cgroup,
311	   int (*fn)(const char *cgroup, void *arg),
312	   void *arg)
313{
314	int pid, retcode;
315
316	pid = fork();
317	if (pid < 0) {
318		return pid;
319	} else if (pid == 0) {
320		char buf[64];
321
322		snprintf(buf, sizeof(buf), "%d", getpid());
323		if (cg_write(cgroup, "cgroup.procs", buf))
324			exit(EXIT_FAILURE);
325		exit(fn(cgroup, arg));
326	} else {
327		waitpid(pid, &retcode, 0);
328		if (WIFEXITED(retcode))
329			return WEXITSTATUS(retcode);
330		else
331			return -1;
332	}
333}
334
335pid_t clone_into_cgroup(int cgroup_fd)
336{
337#ifdef CLONE_ARGS_SIZE_VER2
338	pid_t pid;
339
340	struct __clone_args args = {
341		.flags = CLONE_INTO_CGROUP,
342		.exit_signal = SIGCHLD,
343		.cgroup = cgroup_fd,
344	};
345
346	pid = sys_clone3(&args, sizeof(struct __clone_args));
347	/*
348	 * Verify that this is a genuine test failure:
349	 * ENOSYS -> clone3() not available
350	 * E2BIG  -> CLONE_INTO_CGROUP not available
351	 */
352	if (pid < 0 && (errno == ENOSYS || errno == E2BIG))
353		goto pretend_enosys;
354
355	return pid;
356
357pretend_enosys:
358#endif
359	errno = ENOSYS;
360	return -ENOSYS;
361}
362
363int clone_reap(pid_t pid, int options)
364{
365	int ret;
366	siginfo_t info = {
367		.si_signo = 0,
368	};
369
370again:
371	ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD);
372	if (ret < 0) {
373		if (errno == EINTR)
374			goto again;
375		return -1;
376	}
377
378	if (options & WEXITED) {
379		if (WIFEXITED(info.si_status))
380			return WEXITSTATUS(info.si_status);
381	}
382
383	if (options & WSTOPPED) {
384		if (WIFSTOPPED(info.si_status))
385			return WSTOPSIG(info.si_status);
386	}
387
388	if (options & WCONTINUED) {
389		if (WIFCONTINUED(info.si_status))
390			return 0;
391	}
392
393	return -1;
394}
395
396int dirfd_open_opath(const char *dir)
397{
398	return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH);
399}
400
401#define close_prot_errno(fd)                                                   \
402	if (fd >= 0) {                                                         \
403		int _e_ = errno;                                               \
404		close(fd);                                                     \
405		errno = _e_;                                                   \
406	}
407
408static int clone_into_cgroup_run_nowait(const char *cgroup,
409					int (*fn)(const char *cgroup, void *arg),
410					void *arg)
411{
412	int cgroup_fd;
413	pid_t pid;
414
415	cgroup_fd =  dirfd_open_opath(cgroup);
416	if (cgroup_fd < 0)
417		return -1;
418
419	pid = clone_into_cgroup(cgroup_fd);
420	close_prot_errno(cgroup_fd);
421	if (pid == 0)
422		exit(fn(cgroup, arg));
423
424	return pid;
425}
426
427int cg_run_nowait(const char *cgroup,
428		  int (*fn)(const char *cgroup, void *arg),
429		  void *arg)
430{
431	int pid;
432
433	pid = clone_into_cgroup_run_nowait(cgroup, fn, arg);
434	if (pid > 0)
435		return pid;
436
437	/* Genuine test failure. */
438	if (pid < 0 && errno != ENOSYS)
439		return -1;
440
441	pid = fork();
442	if (pid == 0) {
443		char buf[64];
444
445		snprintf(buf, sizeof(buf), "%d", getpid());
446		if (cg_write(cgroup, "cgroup.procs", buf))
447			exit(EXIT_FAILURE);
448		exit(fn(cgroup, arg));
449	}
450
451	return pid;
452}
453
454int get_temp_fd(void)
455{
456	return open(".", O_TMPFILE | O_RDWR | O_EXCL);
457}
458
459int alloc_pagecache(int fd, size_t size)
460{
461	char buf[PAGE_SIZE];
462	struct stat st;
463	int i;
464
465	if (fstat(fd, &st))
466		goto cleanup;
467
468	size += st.st_size;
469
470	if (ftruncate(fd, size))
471		goto cleanup;
472
473	for (i = 0; i < size; i += sizeof(buf))
474		read(fd, buf, sizeof(buf));
475
476	return 0;
477
478cleanup:
479	return -1;
480}
481
482int alloc_anon(const char *cgroup, void *arg)
483{
484	size_t size = (unsigned long)arg;
485	char *buf, *ptr;
486
487	buf = malloc(size);
488	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
489		*ptr = 0;
490
491	free(buf);
492	return 0;
493}
494
495int is_swap_enabled(void)
496{
497	char buf[PAGE_SIZE];
498	const char delim[] = "\n";
499	int cnt = 0;
500	char *line;
501
502	if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
503		return -1;
504
505	for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
506		cnt++;
507
508	return cnt > 1;
509}
510
511int set_oom_adj_score(int pid, int score)
512{
513	char path[PATH_MAX];
514	int fd, len;
515
516	sprintf(path, "/proc/%d/oom_score_adj", pid);
517
518	fd = open(path, O_WRONLY | O_APPEND);
519	if (fd < 0)
520		return fd;
521
522	len = dprintf(fd, "%d", score);
523	if (len < 0) {
524		close(fd);
525		return len;
526	}
527
528	close(fd);
529	return 0;
530}
531
532ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
533{
534	char path[PATH_MAX];
535
536	if (!pid)
537		snprintf(path, sizeof(path), "/proc/%s/%s",
538			 thread ? "thread-self" : "self", item);
539	else
540		snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
541
542	return read_text(path, buf, size);
543}
544
545int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
546{
547	char buf[PAGE_SIZE];
548
549	if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0)
550		return -1;
551
552	return strstr(buf, needle) ? 0 : -1;
553}
554
555int clone_into_cgroup_run_wait(const char *cgroup)
556{
557	int cgroup_fd;
558	pid_t pid;
559
560	cgroup_fd =  dirfd_open_opath(cgroup);
561	if (cgroup_fd < 0)
562		return -1;
563
564	pid = clone_into_cgroup(cgroup_fd);
565	close_prot_errno(cgroup_fd);
566	if (pid < 0)
567		return -1;
568
569	if (pid == 0)
570		exit(EXIT_SUCCESS);
571
572	/*
573	 * We don't care whether this fails. We only care whether the initial
574	 * clone succeeded.
575	 */
576	(void)clone_reap(pid, WEXITED);
577	return 0;
578}
579