18c2ecf20Sopenharmony_ci//SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci#include <linux/cgroup.h> 38c2ecf20Sopenharmony_ci#include <linux/sched.h> 48c2ecf20Sopenharmony_ci#include <linux/sched/task.h> 58c2ecf20Sopenharmony_ci#include <linux/sched/signal.h> 68c2ecf20Sopenharmony_ci 78c2ecf20Sopenharmony_ci#include "cgroup-internal.h" 88c2ecf20Sopenharmony_ci 98c2ecf20Sopenharmony_ci#include <trace/events/cgroup.h> 108c2ecf20Sopenharmony_ci 118c2ecf20Sopenharmony_ci/* 128c2ecf20Sopenharmony_ci * Propagate the cgroup frozen state upwards by the cgroup tree. 138c2ecf20Sopenharmony_ci */ 148c2ecf20Sopenharmony_cistatic void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen) 158c2ecf20Sopenharmony_ci{ 168c2ecf20Sopenharmony_ci int desc = 1; 178c2ecf20Sopenharmony_ci 188c2ecf20Sopenharmony_ci /* 198c2ecf20Sopenharmony_ci * If the new state is frozen, some freezing ancestor cgroups may change 208c2ecf20Sopenharmony_ci * their state too, depending on if all their descendants are frozen. 218c2ecf20Sopenharmony_ci * 228c2ecf20Sopenharmony_ci * Otherwise, all ancestor cgroups are forced into the non-frozen state. 238c2ecf20Sopenharmony_ci */ 248c2ecf20Sopenharmony_ci while ((cgrp = cgroup_parent(cgrp))) { 258c2ecf20Sopenharmony_ci if (frozen) { 268c2ecf20Sopenharmony_ci cgrp->freezer.nr_frozen_descendants += desc; 278c2ecf20Sopenharmony_ci if (!test_bit(CGRP_FROZEN, &cgrp->flags) && 288c2ecf20Sopenharmony_ci test_bit(CGRP_FREEZE, &cgrp->flags) && 298c2ecf20Sopenharmony_ci cgrp->freezer.nr_frozen_descendants == 308c2ecf20Sopenharmony_ci cgrp->nr_descendants) { 318c2ecf20Sopenharmony_ci set_bit(CGRP_FROZEN, &cgrp->flags); 328c2ecf20Sopenharmony_ci cgroup_file_notify(&cgrp->events_file); 338c2ecf20Sopenharmony_ci TRACE_CGROUP_PATH(notify_frozen, cgrp, 1); 348c2ecf20Sopenharmony_ci desc++; 358c2ecf20Sopenharmony_ci } 368c2ecf20Sopenharmony_ci } else { 378c2ecf20Sopenharmony_ci cgrp->freezer.nr_frozen_descendants -= desc; 388c2ecf20Sopenharmony_ci if (test_bit(CGRP_FROZEN, &cgrp->flags)) { 398c2ecf20Sopenharmony_ci clear_bit(CGRP_FROZEN, &cgrp->flags); 408c2ecf20Sopenharmony_ci cgroup_file_notify(&cgrp->events_file); 418c2ecf20Sopenharmony_ci TRACE_CGROUP_PATH(notify_frozen, cgrp, 0); 428c2ecf20Sopenharmony_ci desc++; 438c2ecf20Sopenharmony_ci } 448c2ecf20Sopenharmony_ci } 458c2ecf20Sopenharmony_ci } 468c2ecf20Sopenharmony_ci} 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ci/* 498c2ecf20Sopenharmony_ci * Revisit the cgroup frozen state. 508c2ecf20Sopenharmony_ci * Checks if the cgroup is really frozen and perform all state transitions. 518c2ecf20Sopenharmony_ci */ 528c2ecf20Sopenharmony_civoid cgroup_update_frozen(struct cgroup *cgrp) 538c2ecf20Sopenharmony_ci{ 548c2ecf20Sopenharmony_ci bool frozen; 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci lockdep_assert_held(&css_set_lock); 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ci /* 598c2ecf20Sopenharmony_ci * If the cgroup has to be frozen (CGRP_FREEZE bit set), 608c2ecf20Sopenharmony_ci * and all tasks are frozen and/or stopped, let's consider 618c2ecf20Sopenharmony_ci * the cgroup frozen. Otherwise it's not frozen. 628c2ecf20Sopenharmony_ci */ 638c2ecf20Sopenharmony_ci frozen = test_bit(CGRP_FREEZE, &cgrp->flags) && 648c2ecf20Sopenharmony_ci cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp); 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci if (frozen) { 678c2ecf20Sopenharmony_ci /* Already there? */ 688c2ecf20Sopenharmony_ci if (test_bit(CGRP_FROZEN, &cgrp->flags)) 698c2ecf20Sopenharmony_ci return; 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_ci set_bit(CGRP_FROZEN, &cgrp->flags); 728c2ecf20Sopenharmony_ci } else { 738c2ecf20Sopenharmony_ci /* Already there? */ 748c2ecf20Sopenharmony_ci if (!test_bit(CGRP_FROZEN, &cgrp->flags)) 758c2ecf20Sopenharmony_ci return; 768c2ecf20Sopenharmony_ci 778c2ecf20Sopenharmony_ci clear_bit(CGRP_FROZEN, &cgrp->flags); 788c2ecf20Sopenharmony_ci } 798c2ecf20Sopenharmony_ci cgroup_file_notify(&cgrp->events_file); 808c2ecf20Sopenharmony_ci TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen); 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_ci /* Update the state of ancestor cgroups. */ 838c2ecf20Sopenharmony_ci cgroup_propagate_frozen(cgrp, frozen); 848c2ecf20Sopenharmony_ci} 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci/* 878c2ecf20Sopenharmony_ci * Increment cgroup's nr_frozen_tasks. 888c2ecf20Sopenharmony_ci */ 898c2ecf20Sopenharmony_cistatic void cgroup_inc_frozen_cnt(struct cgroup *cgrp) 908c2ecf20Sopenharmony_ci{ 918c2ecf20Sopenharmony_ci cgrp->freezer.nr_frozen_tasks++; 928c2ecf20Sopenharmony_ci} 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_ci/* 958c2ecf20Sopenharmony_ci * Decrement cgroup's nr_frozen_tasks. 968c2ecf20Sopenharmony_ci */ 978c2ecf20Sopenharmony_cistatic void cgroup_dec_frozen_cnt(struct cgroup *cgrp) 988c2ecf20Sopenharmony_ci{ 998c2ecf20Sopenharmony_ci cgrp->freezer.nr_frozen_tasks--; 1008c2ecf20Sopenharmony_ci WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0); 1018c2ecf20Sopenharmony_ci} 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_ci/* 1048c2ecf20Sopenharmony_ci * Enter frozen/stopped state, if not yet there. Update cgroup's counters, 1058c2ecf20Sopenharmony_ci * and revisit the state of the cgroup, if necessary. 1068c2ecf20Sopenharmony_ci */ 1078c2ecf20Sopenharmony_civoid cgroup_enter_frozen(void) 1088c2ecf20Sopenharmony_ci{ 1098c2ecf20Sopenharmony_ci struct cgroup *cgrp; 1108c2ecf20Sopenharmony_ci 1118c2ecf20Sopenharmony_ci if (current->frozen) 1128c2ecf20Sopenharmony_ci return; 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ci spin_lock_irq(&css_set_lock); 1158c2ecf20Sopenharmony_ci current->frozen = true; 1168c2ecf20Sopenharmony_ci cgrp = task_dfl_cgroup(current); 1178c2ecf20Sopenharmony_ci cgroup_inc_frozen_cnt(cgrp); 1188c2ecf20Sopenharmony_ci cgroup_update_frozen(cgrp); 1198c2ecf20Sopenharmony_ci spin_unlock_irq(&css_set_lock); 1208c2ecf20Sopenharmony_ci} 1218c2ecf20Sopenharmony_ci 1228c2ecf20Sopenharmony_ci/* 1238c2ecf20Sopenharmony_ci * Conditionally leave frozen/stopped state. Update cgroup's counters, 1248c2ecf20Sopenharmony_ci * and revisit the state of the cgroup, if necessary. 1258c2ecf20Sopenharmony_ci * 1268c2ecf20Sopenharmony_ci * If always_leave is not set, and the cgroup is freezing, 1278c2ecf20Sopenharmony_ci * we're racing with the cgroup freezing. In this case, we don't 1288c2ecf20Sopenharmony_ci * drop the frozen counter to avoid a transient switch to 1298c2ecf20Sopenharmony_ci * the unfrozen state. 1308c2ecf20Sopenharmony_ci */ 1318c2ecf20Sopenharmony_civoid cgroup_leave_frozen(bool always_leave) 1328c2ecf20Sopenharmony_ci{ 1338c2ecf20Sopenharmony_ci struct cgroup *cgrp; 1348c2ecf20Sopenharmony_ci 1358c2ecf20Sopenharmony_ci spin_lock_irq(&css_set_lock); 1368c2ecf20Sopenharmony_ci cgrp = task_dfl_cgroup(current); 1378c2ecf20Sopenharmony_ci if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) { 1388c2ecf20Sopenharmony_ci cgroup_dec_frozen_cnt(cgrp); 1398c2ecf20Sopenharmony_ci cgroup_update_frozen(cgrp); 1408c2ecf20Sopenharmony_ci WARN_ON_ONCE(!current->frozen); 1418c2ecf20Sopenharmony_ci current->frozen = false; 1428c2ecf20Sopenharmony_ci } else if (!(current->jobctl & JOBCTL_TRAP_FREEZE)) { 1438c2ecf20Sopenharmony_ci spin_lock(¤t->sighand->siglock); 1448c2ecf20Sopenharmony_ci current->jobctl |= JOBCTL_TRAP_FREEZE; 1458c2ecf20Sopenharmony_ci set_thread_flag(TIF_SIGPENDING); 1468c2ecf20Sopenharmony_ci spin_unlock(¤t->sighand->siglock); 1478c2ecf20Sopenharmony_ci } 1488c2ecf20Sopenharmony_ci spin_unlock_irq(&css_set_lock); 1498c2ecf20Sopenharmony_ci} 1508c2ecf20Sopenharmony_ci 1518c2ecf20Sopenharmony_ci/* 1528c2ecf20Sopenharmony_ci * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE 1538c2ecf20Sopenharmony_ci * jobctl bit. 1548c2ecf20Sopenharmony_ci */ 1558c2ecf20Sopenharmony_cistatic void cgroup_freeze_task(struct task_struct *task, bool freeze) 1568c2ecf20Sopenharmony_ci{ 1578c2ecf20Sopenharmony_ci unsigned long flags; 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci /* If the task is about to die, don't bother with freezing it. */ 1608c2ecf20Sopenharmony_ci if (!lock_task_sighand(task, &flags)) 1618c2ecf20Sopenharmony_ci return; 1628c2ecf20Sopenharmony_ci 1638c2ecf20Sopenharmony_ci if (freeze) { 1648c2ecf20Sopenharmony_ci task->jobctl |= JOBCTL_TRAP_FREEZE; 1658c2ecf20Sopenharmony_ci signal_wake_up(task, false); 1668c2ecf20Sopenharmony_ci } else { 1678c2ecf20Sopenharmony_ci task->jobctl &= ~JOBCTL_TRAP_FREEZE; 1688c2ecf20Sopenharmony_ci wake_up_process(task); 1698c2ecf20Sopenharmony_ci } 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_ci unlock_task_sighand(task, &flags); 1728c2ecf20Sopenharmony_ci} 1738c2ecf20Sopenharmony_ci 1748c2ecf20Sopenharmony_ci/* 1758c2ecf20Sopenharmony_ci * Freeze or unfreeze all tasks in the given cgroup. 1768c2ecf20Sopenharmony_ci */ 1778c2ecf20Sopenharmony_cistatic void cgroup_do_freeze(struct cgroup *cgrp, bool freeze) 1788c2ecf20Sopenharmony_ci{ 1798c2ecf20Sopenharmony_ci struct css_task_iter it; 1808c2ecf20Sopenharmony_ci struct task_struct *task; 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_ci lockdep_assert_held(&cgroup_mutex); 1838c2ecf20Sopenharmony_ci 1848c2ecf20Sopenharmony_ci spin_lock_irq(&css_set_lock); 1858c2ecf20Sopenharmony_ci if (freeze) 1868c2ecf20Sopenharmony_ci set_bit(CGRP_FREEZE, &cgrp->flags); 1878c2ecf20Sopenharmony_ci else 1888c2ecf20Sopenharmony_ci clear_bit(CGRP_FREEZE, &cgrp->flags); 1898c2ecf20Sopenharmony_ci spin_unlock_irq(&css_set_lock); 1908c2ecf20Sopenharmony_ci 1918c2ecf20Sopenharmony_ci if (freeze) 1928c2ecf20Sopenharmony_ci TRACE_CGROUP_PATH(freeze, cgrp); 1938c2ecf20Sopenharmony_ci else 1948c2ecf20Sopenharmony_ci TRACE_CGROUP_PATH(unfreeze, cgrp); 1958c2ecf20Sopenharmony_ci 1968c2ecf20Sopenharmony_ci css_task_iter_start(&cgrp->self, 0, &it); 1978c2ecf20Sopenharmony_ci while ((task = css_task_iter_next(&it))) { 1988c2ecf20Sopenharmony_ci /* 1998c2ecf20Sopenharmony_ci * Ignore kernel threads here. Freezing cgroups containing 2008c2ecf20Sopenharmony_ci * kthreads isn't supported. 2018c2ecf20Sopenharmony_ci */ 2028c2ecf20Sopenharmony_ci if (task->flags & PF_KTHREAD) 2038c2ecf20Sopenharmony_ci continue; 2048c2ecf20Sopenharmony_ci cgroup_freeze_task(task, freeze); 2058c2ecf20Sopenharmony_ci } 2068c2ecf20Sopenharmony_ci css_task_iter_end(&it); 2078c2ecf20Sopenharmony_ci 2088c2ecf20Sopenharmony_ci /* 2098c2ecf20Sopenharmony_ci * Cgroup state should be revisited here to cover empty leaf cgroups 2108c2ecf20Sopenharmony_ci * and cgroups which descendants are already in the desired state. 2118c2ecf20Sopenharmony_ci */ 2128c2ecf20Sopenharmony_ci spin_lock_irq(&css_set_lock); 2138c2ecf20Sopenharmony_ci if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants) 2148c2ecf20Sopenharmony_ci cgroup_update_frozen(cgrp); 2158c2ecf20Sopenharmony_ci spin_unlock_irq(&css_set_lock); 2168c2ecf20Sopenharmony_ci} 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci/* 2198c2ecf20Sopenharmony_ci * Adjust the task state (freeze or unfreeze) and revisit the state of 2208c2ecf20Sopenharmony_ci * source and destination cgroups. 2218c2ecf20Sopenharmony_ci */ 2228c2ecf20Sopenharmony_civoid cgroup_freezer_migrate_task(struct task_struct *task, 2238c2ecf20Sopenharmony_ci struct cgroup *src, struct cgroup *dst) 2248c2ecf20Sopenharmony_ci{ 2258c2ecf20Sopenharmony_ci lockdep_assert_held(&css_set_lock); 2268c2ecf20Sopenharmony_ci 2278c2ecf20Sopenharmony_ci /* 2288c2ecf20Sopenharmony_ci * Kernel threads are not supposed to be frozen at all. 2298c2ecf20Sopenharmony_ci */ 2308c2ecf20Sopenharmony_ci if (task->flags & PF_KTHREAD) 2318c2ecf20Sopenharmony_ci return; 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_ci /* 2348c2ecf20Sopenharmony_ci * It's not necessary to do changes if both of the src and dst cgroups 2358c2ecf20Sopenharmony_ci * are not freezing and task is not frozen. 2368c2ecf20Sopenharmony_ci */ 2378c2ecf20Sopenharmony_ci if (!test_bit(CGRP_FREEZE, &src->flags) && 2388c2ecf20Sopenharmony_ci !test_bit(CGRP_FREEZE, &dst->flags) && 2398c2ecf20Sopenharmony_ci !task->frozen) 2408c2ecf20Sopenharmony_ci return; 2418c2ecf20Sopenharmony_ci 2428c2ecf20Sopenharmony_ci /* 2438c2ecf20Sopenharmony_ci * Adjust counters of freezing and frozen tasks. 2448c2ecf20Sopenharmony_ci * Note, that if the task is frozen, but the destination cgroup is not 2458c2ecf20Sopenharmony_ci * frozen, we bump both counters to keep them balanced. 2468c2ecf20Sopenharmony_ci */ 2478c2ecf20Sopenharmony_ci if (task->frozen) { 2488c2ecf20Sopenharmony_ci cgroup_inc_frozen_cnt(dst); 2498c2ecf20Sopenharmony_ci cgroup_dec_frozen_cnt(src); 2508c2ecf20Sopenharmony_ci } 2518c2ecf20Sopenharmony_ci cgroup_update_frozen(dst); 2528c2ecf20Sopenharmony_ci cgroup_update_frozen(src); 2538c2ecf20Sopenharmony_ci 2548c2ecf20Sopenharmony_ci /* 2558c2ecf20Sopenharmony_ci * Force the task to the desired state. 2568c2ecf20Sopenharmony_ci */ 2578c2ecf20Sopenharmony_ci cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags)); 2588c2ecf20Sopenharmony_ci} 2598c2ecf20Sopenharmony_ci 2608c2ecf20Sopenharmony_civoid cgroup_freeze(struct cgroup *cgrp, bool freeze) 2618c2ecf20Sopenharmony_ci{ 2628c2ecf20Sopenharmony_ci struct cgroup_subsys_state *css; 2638c2ecf20Sopenharmony_ci struct cgroup *dsct; 2648c2ecf20Sopenharmony_ci bool applied = false; 2658c2ecf20Sopenharmony_ci 2668c2ecf20Sopenharmony_ci lockdep_assert_held(&cgroup_mutex); 2678c2ecf20Sopenharmony_ci 2688c2ecf20Sopenharmony_ci /* 2698c2ecf20Sopenharmony_ci * Nothing changed? Just exit. 2708c2ecf20Sopenharmony_ci */ 2718c2ecf20Sopenharmony_ci if (cgrp->freezer.freeze == freeze) 2728c2ecf20Sopenharmony_ci return; 2738c2ecf20Sopenharmony_ci 2748c2ecf20Sopenharmony_ci cgrp->freezer.freeze = freeze; 2758c2ecf20Sopenharmony_ci 2768c2ecf20Sopenharmony_ci /* 2778c2ecf20Sopenharmony_ci * Propagate changes downwards the cgroup tree. 2788c2ecf20Sopenharmony_ci */ 2798c2ecf20Sopenharmony_ci css_for_each_descendant_pre(css, &cgrp->self) { 2808c2ecf20Sopenharmony_ci dsct = css->cgroup; 2818c2ecf20Sopenharmony_ci 2828c2ecf20Sopenharmony_ci if (cgroup_is_dead(dsct)) 2838c2ecf20Sopenharmony_ci continue; 2848c2ecf20Sopenharmony_ci 2858c2ecf20Sopenharmony_ci if (freeze) { 2868c2ecf20Sopenharmony_ci dsct->freezer.e_freeze++; 2878c2ecf20Sopenharmony_ci /* 2888c2ecf20Sopenharmony_ci * Already frozen because of ancestor's settings? 2898c2ecf20Sopenharmony_ci */ 2908c2ecf20Sopenharmony_ci if (dsct->freezer.e_freeze > 1) 2918c2ecf20Sopenharmony_ci continue; 2928c2ecf20Sopenharmony_ci } else { 2938c2ecf20Sopenharmony_ci dsct->freezer.e_freeze--; 2948c2ecf20Sopenharmony_ci /* 2958c2ecf20Sopenharmony_ci * Still frozen because of ancestor's settings? 2968c2ecf20Sopenharmony_ci */ 2978c2ecf20Sopenharmony_ci if (dsct->freezer.e_freeze > 0) 2988c2ecf20Sopenharmony_ci continue; 2998c2ecf20Sopenharmony_ci 3008c2ecf20Sopenharmony_ci WARN_ON_ONCE(dsct->freezer.e_freeze < 0); 3018c2ecf20Sopenharmony_ci } 3028c2ecf20Sopenharmony_ci 3038c2ecf20Sopenharmony_ci /* 3048c2ecf20Sopenharmony_ci * Do change actual state: freeze or unfreeze. 3058c2ecf20Sopenharmony_ci */ 3068c2ecf20Sopenharmony_ci cgroup_do_freeze(dsct, freeze); 3078c2ecf20Sopenharmony_ci applied = true; 3088c2ecf20Sopenharmony_ci } 3098c2ecf20Sopenharmony_ci 3108c2ecf20Sopenharmony_ci /* 3118c2ecf20Sopenharmony_ci * Even if the actual state hasn't changed, let's notify a user. 3128c2ecf20Sopenharmony_ci * The state can be enforced by an ancestor cgroup: the cgroup 3138c2ecf20Sopenharmony_ci * can already be in the desired state or it can be locked in the 3148c2ecf20Sopenharmony_ci * opposite state, so that the transition will never happen. 3158c2ecf20Sopenharmony_ci * In both cases it's better to notify a user, that there is 3168c2ecf20Sopenharmony_ci * nothing to wait for. 3178c2ecf20Sopenharmony_ci */ 3188c2ecf20Sopenharmony_ci if (!applied) { 3198c2ecf20Sopenharmony_ci TRACE_CGROUP_PATH(notify_frozen, cgrp, 3208c2ecf20Sopenharmony_ci test_bit(CGRP_FROZEN, &cgrp->flags)); 3218c2ecf20Sopenharmony_ci cgroup_file_notify(&cgrp->events_file); 3228c2ecf20Sopenharmony_ci } 3238c2ecf20Sopenharmony_ci} 324