1/* SPDX-License-Identifier: GPL-2.0 */ 2 3#define _GNU_SOURCE 4 5#include <errno.h> 6#include <fcntl.h> 7#include <linux/limits.h> 8#include <signal.h> 9#include <stdio.h> 10#include <stdlib.h> 11#include <string.h> 12#include <sys/stat.h> 13#include <sys/types.h> 14#include <sys/wait.h> 15#include <unistd.h> 16 17#include "cgroup_util.h" 18#include "../clone3/clone3_selftests.h" 19 20static ssize_t read_text(const char *path, char *buf, size_t max_len) 21{ 22 ssize_t len; 23 int fd; 24 25 fd = open(path, O_RDONLY); 26 if (fd < 0) 27 return fd; 28 29 len = read(fd, buf, max_len - 1); 30 if (len < 0) 31 goto out; 32 33 buf[len] = 0; 34out: 35 close(fd); 36 return len; 37} 38 39static ssize_t write_text(const char *path, char *buf, ssize_t len) 40{ 41 int fd; 42 43 fd = open(path, O_WRONLY | O_APPEND); 44 if (fd < 0) 45 return fd; 46 47 len = write(fd, buf, len); 48 if (len < 0) { 49 close(fd); 50 return len; 51 } 52 53 close(fd); 54 55 return len; 56} 57 58char *cg_name(const char *root, const char *name) 59{ 60 size_t len = strlen(root) + strlen(name) + 2; 61 char *ret = malloc(len); 62 63 snprintf(ret, len, "%s/%s", root, name); 64 65 return ret; 66} 67 68char *cg_name_indexed(const char *root, const char *name, int index) 69{ 70 size_t len = strlen(root) + strlen(name) + 10; 71 char *ret = malloc(len); 72 73 snprintf(ret, len, "%s/%s_%d", root, name, index); 74 75 return ret; 76} 77 78char *cg_control(const char *cgroup, const char *control) 79{ 80 size_t len = strlen(cgroup) + strlen(control) + 2; 81 char *ret = malloc(len); 82 83 snprintf(ret, len, "%s/%s", cgroup, control); 84 85 return ret; 86} 87 88int cg_read(const char *cgroup, const char *control, char *buf, size_t len) 89{ 90 char path[PATH_MAX]; 91 92 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 93 94 if (read_text(path, buf, len) >= 0) 95 return 0; 96 97 return -1; 98} 99 100int cg_read_strcmp(const char *cgroup, const char *control, 101 const char *expected) 102{ 103 size_t size; 104 char *buf; 105 int ret; 106 107 /* Handle the case of comparing against empty string */ 108 if (!expected) 109 return -1; 110 else 111 size = strlen(expected) + 1; 112 113 buf = malloc(size); 114 if (!buf) 115 return -1; 116 117 if (cg_read(cgroup, control, buf, size)) { 118 free(buf); 119 return -1; 120 } 121 122 ret = strcmp(expected, buf); 123 free(buf); 124 return ret; 125} 126 127int cg_read_strstr(const char *cgroup, const char *control, const char *needle) 128{ 129 char buf[PAGE_SIZE]; 130 131 if (cg_read(cgroup, control, buf, sizeof(buf))) 132 return -1; 133 134 return strstr(buf, needle) ? 0 : -1; 135} 136 137long cg_read_long(const char *cgroup, const char *control) 138{ 139 char buf[128]; 140 141 if (cg_read(cgroup, control, buf, sizeof(buf))) 142 return -1; 143 144 return atol(buf); 145} 146 147long cg_read_key_long(const char *cgroup, const char *control, const char *key) 148{ 149 char buf[PAGE_SIZE]; 150 char *ptr; 151 152 if (cg_read(cgroup, control, buf, sizeof(buf))) 153 return -1; 154 155 ptr = strstr(buf, key); 156 if (!ptr) 157 return -1; 158 159 return atol(ptr + strlen(key)); 160} 161 162long cg_read_lc(const char *cgroup, const char *control) 163{ 164 char buf[PAGE_SIZE]; 165 const char delim[] = "\n"; 166 char *line; 167 long cnt = 0; 168 169 if (cg_read(cgroup, control, buf, sizeof(buf))) 170 return -1; 171 172 for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) 173 cnt++; 174 175 return cnt; 176} 177 178int cg_write(const char *cgroup, const char *control, char *buf) 179{ 180 char path[PATH_MAX]; 181 ssize_t len = strlen(buf); 182 183 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 184 185 if (write_text(path, buf, len) == len) 186 return 0; 187 188 return -1; 189} 190 191int cg_find_unified_root(char *root, size_t len) 192{ 193 char buf[10 * PAGE_SIZE]; 194 char *fs, *mount, *type; 195 const char delim[] = "\n\t "; 196 197 if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0) 198 return -1; 199 200 /* 201 * Example: 202 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0 203 */ 204 for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) { 205 mount = strtok(NULL, delim); 206 type = strtok(NULL, delim); 207 strtok(NULL, delim); 208 strtok(NULL, delim); 209 strtok(NULL, delim); 210 211 if (strcmp(type, "cgroup2") == 0) { 212 strncpy(root, mount, len); 213 return 0; 214 } 215 } 216 217 return -1; 218} 219 220int cg_create(const char *cgroup) 221{ 222 return mkdir(cgroup, 0755); 223} 224 225int cg_wait_for_proc_count(const char *cgroup, int count) 226{ 227 char buf[10 * PAGE_SIZE] = {0}; 228 int attempts; 229 char *ptr; 230 231 for (attempts = 10; attempts >= 0; attempts--) { 232 int nr = 0; 233 234 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) 235 break; 236 237 for (ptr = buf; *ptr; ptr++) 238 if (*ptr == '\n') 239 nr++; 240 241 if (nr >= count) 242 return 0; 243 244 usleep(100000); 245 } 246 247 return -1; 248} 249 250int cg_killall(const char *cgroup) 251{ 252 char buf[PAGE_SIZE]; 253 char *ptr = buf; 254 255 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) 256 return -1; 257 258 while (ptr < buf + sizeof(buf)) { 259 int pid = strtol(ptr, &ptr, 10); 260 261 if (pid == 0) 262 break; 263 if (*ptr) 264 ptr++; 265 else 266 break; 267 if (kill(pid, SIGKILL)) 268 return -1; 269 } 270 271 return 0; 272} 273 274int cg_destroy(const char *cgroup) 275{ 276 int ret; 277 278retry: 279 ret = rmdir(cgroup); 280 if (ret && errno == EBUSY) { 281 cg_killall(cgroup); 282 usleep(100); 283 goto retry; 284 } 285 286 if (ret && errno == ENOENT) 287 ret = 0; 288 289 return ret; 290} 291 292int cg_enter(const char *cgroup, int pid) 293{ 294 char pidbuf[64]; 295 296 snprintf(pidbuf, sizeof(pidbuf), "%d", pid); 297 return cg_write(cgroup, "cgroup.procs", pidbuf); 298} 299 300int cg_enter_current(const char *cgroup) 301{ 302 return cg_write(cgroup, "cgroup.procs", "0"); 303} 304 305int cg_enter_current_thread(const char *cgroup) 306{ 307 return cg_write(cgroup, "cgroup.threads", "0"); 308} 309 310int cg_run(const char *cgroup, 311 int (*fn)(const char *cgroup, void *arg), 312 void *arg) 313{ 314 int pid, retcode; 315 316 pid = fork(); 317 if (pid < 0) { 318 return pid; 319 } else if (pid == 0) { 320 char buf[64]; 321 322 snprintf(buf, sizeof(buf), "%d", getpid()); 323 if (cg_write(cgroup, "cgroup.procs", buf)) 324 exit(EXIT_FAILURE); 325 exit(fn(cgroup, arg)); 326 } else { 327 waitpid(pid, &retcode, 0); 328 if (WIFEXITED(retcode)) 329 return WEXITSTATUS(retcode); 330 else 331 return -1; 332 } 333} 334 335pid_t clone_into_cgroup(int cgroup_fd) 336{ 337#ifdef CLONE_ARGS_SIZE_VER2 338 pid_t pid; 339 340 struct __clone_args args = { 341 .flags = CLONE_INTO_CGROUP, 342 .exit_signal = SIGCHLD, 343 .cgroup = cgroup_fd, 344 }; 345 346 pid = sys_clone3(&args, sizeof(struct __clone_args)); 347 /* 348 * Verify that this is a genuine test failure: 349 * ENOSYS -> clone3() not available 350 * E2BIG -> CLONE_INTO_CGROUP not available 351 */ 352 if (pid < 0 && (errno == ENOSYS || errno == E2BIG)) 353 goto pretend_enosys; 354 355 return pid; 356 357pretend_enosys: 358#endif 359 errno = ENOSYS; 360 return -ENOSYS; 361} 362 363int clone_reap(pid_t pid, int options) 364{ 365 int ret; 366 siginfo_t info = { 367 .si_signo = 0, 368 }; 369 370again: 371 ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD); 372 if (ret < 0) { 373 if (errno == EINTR) 374 goto again; 375 return -1; 376 } 377 378 if (options & WEXITED) { 379 if (WIFEXITED(info.si_status)) 380 return WEXITSTATUS(info.si_status); 381 } 382 383 if (options & WSTOPPED) { 384 if (WIFSTOPPED(info.si_status)) 385 return WSTOPSIG(info.si_status); 386 } 387 388 if (options & WCONTINUED) { 389 if (WIFCONTINUED(info.si_status)) 390 return 0; 391 } 392 393 return -1; 394} 395 396int dirfd_open_opath(const char *dir) 397{ 398 return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH); 399} 400 401#define close_prot_errno(fd) \ 402 if (fd >= 0) { \ 403 int _e_ = errno; \ 404 close(fd); \ 405 errno = _e_; \ 406 } 407 408static int clone_into_cgroup_run_nowait(const char *cgroup, 409 int (*fn)(const char *cgroup, void *arg), 410 void *arg) 411{ 412 int cgroup_fd; 413 pid_t pid; 414 415 cgroup_fd = dirfd_open_opath(cgroup); 416 if (cgroup_fd < 0) 417 return -1; 418 419 pid = clone_into_cgroup(cgroup_fd); 420 close_prot_errno(cgroup_fd); 421 if (pid == 0) 422 exit(fn(cgroup, arg)); 423 424 return pid; 425} 426 427int cg_run_nowait(const char *cgroup, 428 int (*fn)(const char *cgroup, void *arg), 429 void *arg) 430{ 431 int pid; 432 433 pid = clone_into_cgroup_run_nowait(cgroup, fn, arg); 434 if (pid > 0) 435 return pid; 436 437 /* Genuine test failure. */ 438 if (pid < 0 && errno != ENOSYS) 439 return -1; 440 441 pid = fork(); 442 if (pid == 0) { 443 char buf[64]; 444 445 snprintf(buf, sizeof(buf), "%d", getpid()); 446 if (cg_write(cgroup, "cgroup.procs", buf)) 447 exit(EXIT_FAILURE); 448 exit(fn(cgroup, arg)); 449 } 450 451 return pid; 452} 453 454int get_temp_fd(void) 455{ 456 return open(".", O_TMPFILE | O_RDWR | O_EXCL); 457} 458 459int alloc_pagecache(int fd, size_t size) 460{ 461 char buf[PAGE_SIZE]; 462 struct stat st; 463 int i; 464 465 if (fstat(fd, &st)) 466 goto cleanup; 467 468 size += st.st_size; 469 470 if (ftruncate(fd, size)) 471 goto cleanup; 472 473 for (i = 0; i < size; i += sizeof(buf)) 474 read(fd, buf, sizeof(buf)); 475 476 return 0; 477 478cleanup: 479 return -1; 480} 481 482int alloc_anon(const char *cgroup, void *arg) 483{ 484 size_t size = (unsigned long)arg; 485 char *buf, *ptr; 486 487 buf = malloc(size); 488 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 489 *ptr = 0; 490 491 free(buf); 492 return 0; 493} 494 495int is_swap_enabled(void) 496{ 497 char buf[PAGE_SIZE]; 498 const char delim[] = "\n"; 499 int cnt = 0; 500 char *line; 501 502 if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0) 503 return -1; 504 505 for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) 506 cnt++; 507 508 return cnt > 1; 509} 510 511int set_oom_adj_score(int pid, int score) 512{ 513 char path[PATH_MAX]; 514 int fd, len; 515 516 sprintf(path, "/proc/%d/oom_score_adj", pid); 517 518 fd = open(path, O_WRONLY | O_APPEND); 519 if (fd < 0) 520 return fd; 521 522 len = dprintf(fd, "%d", score); 523 if (len < 0) { 524 close(fd); 525 return len; 526 } 527 528 close(fd); 529 return 0; 530} 531 532ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size) 533{ 534 char path[PATH_MAX]; 535 536 if (!pid) 537 snprintf(path, sizeof(path), "/proc/%s/%s", 538 thread ? "thread-self" : "self", item); 539 else 540 snprintf(path, sizeof(path), "/proc/%d/%s", pid, item); 541 542 return read_text(path, buf, size); 543} 544 545int proc_read_strstr(int pid, bool thread, const char *item, const char *needle) 546{ 547 char buf[PAGE_SIZE]; 548 549 if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0) 550 return -1; 551 552 return strstr(buf, needle) ? 0 : -1; 553} 554 555int clone_into_cgroup_run_wait(const char *cgroup) 556{ 557 int cgroup_fd; 558 pid_t pid; 559 560 cgroup_fd = dirfd_open_opath(cgroup); 561 if (cgroup_fd < 0) 562 return -1; 563 564 pid = clone_into_cgroup(cgroup_fd); 565 close_prot_errno(cgroup_fd); 566 if (pid < 0) 567 return -1; 568 569 if (pid == 0) 570 exit(EXIT_SUCCESS); 571 572 /* 573 * We don't care whether this fails. We only care whether the initial 574 * clone succeeded. 575 */ 576 (void)clone_reap(pid, WEXITED); 577 return 0; 578} 579