1// SPDX-License-Identifier: GPL-2.0
2#define _GNU_SOURCE
3#include <sched.h>
4#include <sys/mount.h>
5#include <sys/stat.h>
6#include <sys/types.h>
7#include <linux/limits.h>
8#include <stdio.h>
9#include <stdlib.h>
10#include <linux/sched.h>
11#include <fcntl.h>
12#include <unistd.h>
13#include <ftw.h>
14
15#include "cgroup_helpers.h"
16#include "bpf_util.h"
17
18/*
19 * To avoid relying on the system setup, when setup_cgroup_env is called
20 * we create a new mount namespace, and cgroup namespace. The cgroupv2
21 * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't
22 * have cgroupv2 enabled at this point in time. It's easier to create our
23 * own mount namespace and manage it ourselves. We assume /mnt exists.
24 *
25 * Related cgroupv1 helpers are named *classid*(), since we only use the
26 * net_cls controller for tagging net_cls.classid. We assume the default
27 * mount under /sys/fs/cgroup/net_cls, which should be the case for the
28 * vast majority of users.
29 */
30
31#define WALK_FD_LIMIT			16
32
33#define CGROUP_MOUNT_PATH		"/mnt"
34#define CGROUP_MOUNT_DFLT		"/sys/fs/cgroup"
35#define NETCLS_MOUNT_PATH		CGROUP_MOUNT_DFLT "/net_cls"
36#define CGROUP_WORK_DIR			"/cgroup-test-work-dir"
37
38#define format_cgroup_path_pid(buf, path, pid) \
39	snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \
40	CGROUP_WORK_DIR, pid, path)
41
42#define format_cgroup_path(buf, path) \
43	format_cgroup_path_pid(buf, path, getpid())
44
45#define format_parent_cgroup_path(buf, path) \
46	format_cgroup_path_pid(buf, path, getppid())
47
48#define format_classid_path(buf)				\
49	snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH,	\
50		 CGROUP_WORK_DIR)
51
52static int __enable_controllers(const char *cgroup_path, const char *controllers)
53{
54	char path[PATH_MAX + 1];
55	char enable[PATH_MAX + 1];
56	char *c, *c2;
57	int fd, cfd;
58	ssize_t len;
59
60	/* If not controllers are passed, enable all available controllers */
61	if (!controllers) {
62		snprintf(path, sizeof(path), "%s/cgroup.controllers",
63			 cgroup_path);
64		fd = open(path, O_RDONLY);
65		if (fd < 0) {
66			log_err("Opening cgroup.controllers: %s", path);
67			return 1;
68		}
69		len = read(fd, enable, sizeof(enable) - 1);
70		if (len < 0) {
71			close(fd);
72			log_err("Reading cgroup.controllers: %s", path);
73			return 1;
74		} else if (len == 0) { /* No controllers to enable */
75			close(fd);
76			return 0;
77		}
78		enable[len] = 0;
79		close(fd);
80	} else {
81		bpf_strlcpy(enable, controllers, sizeof(enable));
82	}
83
84	snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);
85	cfd = open(path, O_RDWR);
86	if (cfd < 0) {
87		log_err("Opening cgroup.subtree_control: %s", path);
88		return 1;
89	}
90
91	for (c = strtok_r(enable, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) {
92		if (dprintf(cfd, "+%s\n", c) <= 0) {
93			log_err("Enabling controller %s: %s", c, path);
94			close(cfd);
95			return 1;
96		}
97	}
98	close(cfd);
99	return 0;
100}
101
102/**
103 * enable_controllers() - Enable cgroup v2 controllers
104 * @relative_path: The cgroup path, relative to the workdir
105 * @controllers: List of controllers to enable in cgroup.controllers format
106 *
107 *
108 * Enable given cgroup v2 controllers, if @controllers is NULL, enable all
109 * available controllers.
110 *
111 * If successful, 0 is returned.
112 */
113int enable_controllers(const char *relative_path, const char *controllers)
114{
115	char cgroup_path[PATH_MAX + 1];
116
117	format_cgroup_path(cgroup_path, relative_path);
118	return __enable_controllers(cgroup_path, controllers);
119}
120
121static int __write_cgroup_file(const char *cgroup_path, const char *file,
122			       const char *buf)
123{
124	char file_path[PATH_MAX + 1];
125	int fd;
126
127	snprintf(file_path, sizeof(file_path), "%s/%s", cgroup_path, file);
128	fd = open(file_path, O_RDWR);
129	if (fd < 0) {
130		log_err("Opening %s", file_path);
131		return 1;
132	}
133
134	if (dprintf(fd, "%s", buf) <= 0) {
135		log_err("Writing to %s", file_path);
136		close(fd);
137		return 1;
138	}
139	close(fd);
140	return 0;
141}
142
143/**
144 * write_cgroup_file() - Write to a cgroup file
145 * @relative_path: The cgroup path, relative to the workdir
146 * @file: The name of the file in cgroupfs to write to
147 * @buf: Buffer to write to the file
148 *
149 * Write to a file in the given cgroup's directory.
150 *
151 * If successful, 0 is returned.
152 */
153int write_cgroup_file(const char *relative_path, const char *file,
154		      const char *buf)
155{
156	char cgroup_path[PATH_MAX - 24];
157
158	format_cgroup_path(cgroup_path, relative_path);
159	return __write_cgroup_file(cgroup_path, file, buf);
160}
161
162/**
163 * write_cgroup_file_parent() - Write to a cgroup file in the parent process
164 *                              workdir
165 * @relative_path: The cgroup path, relative to the parent process workdir
166 * @file: The name of the file in cgroupfs to write to
167 * @buf: Buffer to write to the file
168 *
169 * Write to a file in the given cgroup's directory under the parent process
170 * workdir.
171 *
172 * If successful, 0 is returned.
173 */
174int write_cgroup_file_parent(const char *relative_path, const char *file,
175			     const char *buf)
176{
177	char cgroup_path[PATH_MAX - 24];
178
179	format_parent_cgroup_path(cgroup_path, relative_path);
180	return __write_cgroup_file(cgroup_path, file, buf);
181}
182
183/**
184 * setup_cgroup_environment() - Setup the cgroup environment
185 *
186 * After calling this function, cleanup_cgroup_environment should be called
187 * once testing is complete.
188 *
189 * This function will print an error to stderr and return 1 if it is unable
190 * to setup the cgroup environment. If setup is successful, 0 is returned.
191 */
192int setup_cgroup_environment(void)
193{
194	char cgroup_workdir[PATH_MAX - 24];
195
196	format_cgroup_path(cgroup_workdir, "");
197
198	if (unshare(CLONE_NEWNS)) {
199		log_err("unshare");
200		return 1;
201	}
202
203	if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
204		log_err("mount fakeroot");
205		return 1;
206	}
207
208	if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL) && errno != EBUSY) {
209		log_err("mount cgroup2");
210		return 1;
211	}
212
213	/* Cleanup existing failed runs, now that the environment is setup */
214	cleanup_cgroup_environment();
215
216	if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
217		log_err("mkdir cgroup work dir");
218		return 1;
219	}
220
221	/* Enable all available controllers to increase test coverage */
222	if (__enable_controllers(CGROUP_MOUNT_PATH, NULL) ||
223	    __enable_controllers(cgroup_workdir, NULL))
224		return 1;
225
226	return 0;
227}
228
229static int nftwfunc(const char *filename, const struct stat *statptr,
230		    int fileflags, struct FTW *pfwt)
231{
232	if ((fileflags & FTW_D) && rmdir(filename))
233		log_err("Removing cgroup: %s", filename);
234	return 0;
235}
236
237static int join_cgroup_from_top(const char *cgroup_path)
238{
239	char cgroup_procs_path[PATH_MAX + 1];
240	pid_t pid = getpid();
241	int fd, rc = 0;
242
243	snprintf(cgroup_procs_path, sizeof(cgroup_procs_path),
244		 "%s/cgroup.procs", cgroup_path);
245
246	fd = open(cgroup_procs_path, O_WRONLY);
247	if (fd < 0) {
248		log_err("Opening Cgroup Procs: %s", cgroup_procs_path);
249		return 1;
250	}
251
252	if (dprintf(fd, "%d\n", pid) < 0) {
253		log_err("Joining Cgroup");
254		rc = 1;
255	}
256
257	close(fd);
258	return rc;
259}
260
261/**
262 * join_cgroup() - Join a cgroup
263 * @relative_path: The cgroup path, relative to the workdir, to join
264 *
265 * This function expects a cgroup to already be created, relative to the cgroup
266 * work dir, and it joins it. For example, passing "/my-cgroup" as the path
267 * would actually put the calling process into the cgroup
268 * "/cgroup-test-work-dir/my-cgroup"
269 *
270 * On success, it returns 0, otherwise on failure it returns 1.
271 */
272int join_cgroup(const char *relative_path)
273{
274	char cgroup_path[PATH_MAX + 1];
275
276	format_cgroup_path(cgroup_path, relative_path);
277	return join_cgroup_from_top(cgroup_path);
278}
279
280/**
281 * join_root_cgroup() - Join the root cgroup
282 *
283 * This function joins the root cgroup.
284 *
285 * On success, it returns 0, otherwise on failure it returns 1.
286 */
287int join_root_cgroup(void)
288{
289	return join_cgroup_from_top(CGROUP_MOUNT_PATH);
290}
291
292/**
293 * join_parent_cgroup() - Join a cgroup in the parent process workdir
294 * @relative_path: The cgroup path, relative to parent process workdir, to join
295 *
296 * See join_cgroup().
297 *
298 * On success, it returns 0, otherwise on failure it returns 1.
299 */
300int join_parent_cgroup(const char *relative_path)
301{
302	char cgroup_path[PATH_MAX + 1];
303
304	format_parent_cgroup_path(cgroup_path, relative_path);
305	return join_cgroup_from_top(cgroup_path);
306}
307
308/**
309 * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment
310 *
311 * This is an idempotent function to delete all temporary cgroups that
312 * have been created during the test, including the cgroup testing work
313 * directory.
314 *
315 * At call time, it moves the calling process to the root cgroup, and then
316 * runs the deletion process. It is idempotent, and should not fail, unless
317 * a process is lingering.
318 *
319 * On failure, it will print an error to stderr, and try to continue.
320 */
321void cleanup_cgroup_environment(void)
322{
323	char cgroup_workdir[PATH_MAX + 1];
324
325	format_cgroup_path(cgroup_workdir, "");
326	join_cgroup_from_top(CGROUP_MOUNT_PATH);
327	nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
328}
329
330/**
331 * get_root_cgroup() - Get the FD of the root cgroup
332 *
333 * On success, it returns the file descriptor. On failure, it returns -1.
334 * If there is a failure, it prints the error to stderr.
335 */
336int get_root_cgroup(void)
337{
338	int fd;
339
340	fd = open(CGROUP_MOUNT_PATH, O_RDONLY);
341	if (fd < 0) {
342		log_err("Opening root cgroup");
343		return -1;
344	}
345	return fd;
346}
347
348/*
349 * remove_cgroup() - Remove a cgroup
350 * @relative_path: The cgroup path, relative to the workdir, to remove
351 *
352 * This function expects a cgroup to already be created, relative to the cgroup
353 * work dir. It also expects the cgroup doesn't have any children or live
354 * processes and it removes the cgroup.
355 *
356 * On failure, it will print an error to stderr.
357 */
358void remove_cgroup(const char *relative_path)
359{
360	char cgroup_path[PATH_MAX + 1];
361
362	format_cgroup_path(cgroup_path, relative_path);
363	if (rmdir(cgroup_path))
364		log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path);
365}
366
367/**
368 * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD
369 * @relative_path: The cgroup path, relative to the workdir, to join
370 *
371 * This function creates a cgroup under the top level workdir and returns the
372 * file descriptor. It is idempotent.
373 *
374 * On success, it returns the file descriptor. On failure it returns -1.
375 * If there is a failure, it prints the error to stderr.
376 */
377int create_and_get_cgroup(const char *relative_path)
378{
379	char cgroup_path[PATH_MAX + 1];
380	int fd;
381
382	format_cgroup_path(cgroup_path, relative_path);
383	if (mkdir(cgroup_path, 0777) && errno != EEXIST) {
384		log_err("mkdiring cgroup %s .. %s", relative_path, cgroup_path);
385		return -1;
386	}
387
388	fd = open(cgroup_path, O_RDONLY);
389	if (fd < 0) {
390		log_err("Opening Cgroup");
391		return -1;
392	}
393
394	return fd;
395}
396
397/**
398 * get_cgroup_id() - Get cgroup id for a particular cgroup path
399 * @relative_path: The cgroup path, relative to the workdir, to join
400 *
401 * On success, it returns the cgroup id. On failure it returns 0,
402 * which is an invalid cgroup id.
403 * If there is a failure, it prints the error to stderr.
404 */
405unsigned long long get_cgroup_id(const char *relative_path)
406{
407	int dirfd, err, flags, mount_id, fhsize;
408	union {
409		unsigned long long cgid;
410		unsigned char raw_bytes[8];
411	} id;
412	char cgroup_workdir[PATH_MAX + 1];
413	struct file_handle *fhp, *fhp2;
414	unsigned long long ret = 0;
415
416	format_cgroup_path(cgroup_workdir, relative_path);
417
418	dirfd = AT_FDCWD;
419	flags = 0;
420	fhsize = sizeof(*fhp);
421	fhp = calloc(1, fhsize);
422	if (!fhp) {
423		log_err("calloc");
424		return 0;
425	}
426	err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags);
427	if (err >= 0 || fhp->handle_bytes != 8) {
428		log_err("name_to_handle_at");
429		goto free_mem;
430	}
431
432	fhsize = sizeof(struct file_handle) + fhp->handle_bytes;
433	fhp2 = realloc(fhp, fhsize);
434	if (!fhp2) {
435		log_err("realloc");
436		goto free_mem;
437	}
438	err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags);
439	fhp = fhp2;
440	if (err < 0) {
441		log_err("name_to_handle_at");
442		goto free_mem;
443	}
444
445	memcpy(id.raw_bytes, fhp->f_handle, 8);
446	ret = id.cgid;
447
448free_mem:
449	free(fhp);
450	return ret;
451}
452
453int cgroup_setup_and_join(const char *path) {
454	int cg_fd;
455
456	if (setup_cgroup_environment()) {
457		fprintf(stderr, "Failed to setup cgroup environment\n");
458		return -EINVAL;
459	}
460
461	cg_fd = create_and_get_cgroup(path);
462	if (cg_fd < 0) {
463		fprintf(stderr, "Failed to create test cgroup\n");
464		cleanup_cgroup_environment();
465		return cg_fd;
466	}
467
468	if (join_cgroup(path)) {
469		fprintf(stderr, "Failed to join cgroup\n");
470		cleanup_cgroup_environment();
471		return -EINVAL;
472	}
473	return cg_fd;
474}
475
476/**
477 * setup_classid_environment() - Setup the cgroupv1 net_cls environment
478 *
479 * After calling this function, cleanup_classid_environment should be called
480 * once testing is complete.
481 *
482 * This function will print an error to stderr and return 1 if it is unable
483 * to setup the cgroup environment. If setup is successful, 0 is returned.
484 */
485int setup_classid_environment(void)
486{
487	char cgroup_workdir[PATH_MAX + 1];
488
489	format_classid_path(cgroup_workdir);
490
491	if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) &&
492	    errno != EBUSY) {
493		log_err("mount cgroup base");
494		return 1;
495	}
496
497	if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) {
498		log_err("mkdir cgroup net_cls");
499		return 1;
500	}
501
502	if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls")) {
503		if (errno != EBUSY) {
504			log_err("mount cgroup net_cls");
505			return 1;
506		}
507
508		if (rmdir(NETCLS_MOUNT_PATH)) {
509			log_err("rmdir cgroup net_cls");
510			return 1;
511		}
512		if (umount(CGROUP_MOUNT_DFLT)) {
513			log_err("umount cgroup base");
514			return 1;
515		}
516	}
517
518	cleanup_classid_environment();
519
520	if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
521		log_err("mkdir cgroup work dir");
522		return 1;
523	}
524
525	return 0;
526}
527
528/**
529 * set_classid() - Set a cgroupv1 net_cls classid
530 * @id: the numeric classid
531 *
532 * Writes the passed classid into the cgroup work dir's net_cls.classid
533 * file in order to later on trigger socket tagging.
534 *
535 * On success, it returns 0, otherwise on failure it returns 1. If there
536 * is a failure, it prints the error to stderr.
537 */
538int set_classid(unsigned int id)
539{
540	char cgroup_workdir[PATH_MAX - 42];
541	char cgroup_classid_path[PATH_MAX + 1];
542	int fd, rc = 0;
543
544	format_classid_path(cgroup_workdir);
545	snprintf(cgroup_classid_path, sizeof(cgroup_classid_path),
546		 "%s/net_cls.classid", cgroup_workdir);
547
548	fd = open(cgroup_classid_path, O_WRONLY);
549	if (fd < 0) {
550		log_err("Opening cgroup classid: %s", cgroup_classid_path);
551		return 1;
552	}
553
554	if (dprintf(fd, "%u\n", id) < 0) {
555		log_err("Setting cgroup classid");
556		rc = 1;
557	}
558
559	close(fd);
560	return rc;
561}
562
563/**
564 * join_classid() - Join a cgroupv1 net_cls classid
565 *
566 * This function expects the cgroup work dir to be already created, as we
567 * join it here. This causes the process sockets to be tagged with the given
568 * net_cls classid.
569 *
570 * On success, it returns 0, otherwise on failure it returns 1.
571 */
572int join_classid(void)
573{
574	char cgroup_workdir[PATH_MAX + 1];
575
576	format_classid_path(cgroup_workdir);
577	return join_cgroup_from_top(cgroup_workdir);
578}
579
580/**
581 * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment
582 *
583 * At call time, it moves the calling process to the root cgroup, and then
584 * runs the deletion process.
585 *
586 * On failure, it will print an error to stderr, and try to continue.
587 */
588void cleanup_classid_environment(void)
589{
590	char cgroup_workdir[PATH_MAX + 1];
591
592	format_classid_path(cgroup_workdir);
593	join_cgroup_from_top(NETCLS_MOUNT_PATH);
594	nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
595}
596