18c2ecf20Sopenharmony_ci//SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci#include <linux/cgroup.h>
38c2ecf20Sopenharmony_ci#include <linux/sched.h>
48c2ecf20Sopenharmony_ci#include <linux/sched/task.h>
58c2ecf20Sopenharmony_ci#include <linux/sched/signal.h>
68c2ecf20Sopenharmony_ci
78c2ecf20Sopenharmony_ci#include "cgroup-internal.h"
88c2ecf20Sopenharmony_ci
98c2ecf20Sopenharmony_ci#include <trace/events/cgroup.h>
108c2ecf20Sopenharmony_ci
118c2ecf20Sopenharmony_ci/*
128c2ecf20Sopenharmony_ci * Propagate the cgroup frozen state upwards by the cgroup tree.
138c2ecf20Sopenharmony_ci */
148c2ecf20Sopenharmony_cistatic void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen)
158c2ecf20Sopenharmony_ci{
168c2ecf20Sopenharmony_ci	int desc = 1;
178c2ecf20Sopenharmony_ci
188c2ecf20Sopenharmony_ci	/*
198c2ecf20Sopenharmony_ci	 * If the new state is frozen, some freezing ancestor cgroups may change
208c2ecf20Sopenharmony_ci	 * their state too, depending on if all their descendants are frozen.
218c2ecf20Sopenharmony_ci	 *
228c2ecf20Sopenharmony_ci	 * Otherwise, all ancestor cgroups are forced into the non-frozen state.
238c2ecf20Sopenharmony_ci	 */
248c2ecf20Sopenharmony_ci	while ((cgrp = cgroup_parent(cgrp))) {
258c2ecf20Sopenharmony_ci		if (frozen) {
268c2ecf20Sopenharmony_ci			cgrp->freezer.nr_frozen_descendants += desc;
278c2ecf20Sopenharmony_ci			if (!test_bit(CGRP_FROZEN, &cgrp->flags) &&
288c2ecf20Sopenharmony_ci			    test_bit(CGRP_FREEZE, &cgrp->flags) &&
298c2ecf20Sopenharmony_ci			    cgrp->freezer.nr_frozen_descendants ==
308c2ecf20Sopenharmony_ci			    cgrp->nr_descendants) {
318c2ecf20Sopenharmony_ci				set_bit(CGRP_FROZEN, &cgrp->flags);
328c2ecf20Sopenharmony_ci				cgroup_file_notify(&cgrp->events_file);
338c2ecf20Sopenharmony_ci				TRACE_CGROUP_PATH(notify_frozen, cgrp, 1);
348c2ecf20Sopenharmony_ci				desc++;
358c2ecf20Sopenharmony_ci			}
368c2ecf20Sopenharmony_ci		} else {
378c2ecf20Sopenharmony_ci			cgrp->freezer.nr_frozen_descendants -= desc;
388c2ecf20Sopenharmony_ci			if (test_bit(CGRP_FROZEN, &cgrp->flags)) {
398c2ecf20Sopenharmony_ci				clear_bit(CGRP_FROZEN, &cgrp->flags);
408c2ecf20Sopenharmony_ci				cgroup_file_notify(&cgrp->events_file);
418c2ecf20Sopenharmony_ci				TRACE_CGROUP_PATH(notify_frozen, cgrp, 0);
428c2ecf20Sopenharmony_ci				desc++;
438c2ecf20Sopenharmony_ci			}
448c2ecf20Sopenharmony_ci		}
458c2ecf20Sopenharmony_ci	}
468c2ecf20Sopenharmony_ci}
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_ci/*
498c2ecf20Sopenharmony_ci * Revisit the cgroup frozen state.
508c2ecf20Sopenharmony_ci * Checks if the cgroup is really frozen and perform all state transitions.
518c2ecf20Sopenharmony_ci */
528c2ecf20Sopenharmony_civoid cgroup_update_frozen(struct cgroup *cgrp)
538c2ecf20Sopenharmony_ci{
548c2ecf20Sopenharmony_ci	bool frozen;
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_ci	lockdep_assert_held(&css_set_lock);
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_ci	/*
598c2ecf20Sopenharmony_ci	 * If the cgroup has to be frozen (CGRP_FREEZE bit set),
608c2ecf20Sopenharmony_ci	 * and all tasks are frozen and/or stopped, let's consider
618c2ecf20Sopenharmony_ci	 * the cgroup frozen. Otherwise it's not frozen.
628c2ecf20Sopenharmony_ci	 */
638c2ecf20Sopenharmony_ci	frozen = test_bit(CGRP_FREEZE, &cgrp->flags) &&
648c2ecf20Sopenharmony_ci		cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp);
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_ci	if (frozen) {
678c2ecf20Sopenharmony_ci		/* Already there? */
688c2ecf20Sopenharmony_ci		if (test_bit(CGRP_FROZEN, &cgrp->flags))
698c2ecf20Sopenharmony_ci			return;
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_ci		set_bit(CGRP_FROZEN, &cgrp->flags);
728c2ecf20Sopenharmony_ci	} else {
738c2ecf20Sopenharmony_ci		/* Already there? */
748c2ecf20Sopenharmony_ci		if (!test_bit(CGRP_FROZEN, &cgrp->flags))
758c2ecf20Sopenharmony_ci			return;
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_ci		clear_bit(CGRP_FROZEN, &cgrp->flags);
788c2ecf20Sopenharmony_ci	}
798c2ecf20Sopenharmony_ci	cgroup_file_notify(&cgrp->events_file);
808c2ecf20Sopenharmony_ci	TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen);
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci	/* Update the state of ancestor cgroups. */
838c2ecf20Sopenharmony_ci	cgroup_propagate_frozen(cgrp, frozen);
848c2ecf20Sopenharmony_ci}
858c2ecf20Sopenharmony_ci
868c2ecf20Sopenharmony_ci/*
878c2ecf20Sopenharmony_ci * Increment cgroup's nr_frozen_tasks.
888c2ecf20Sopenharmony_ci */
898c2ecf20Sopenharmony_cistatic void cgroup_inc_frozen_cnt(struct cgroup *cgrp)
908c2ecf20Sopenharmony_ci{
918c2ecf20Sopenharmony_ci	cgrp->freezer.nr_frozen_tasks++;
928c2ecf20Sopenharmony_ci}
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci/*
958c2ecf20Sopenharmony_ci * Decrement cgroup's nr_frozen_tasks.
968c2ecf20Sopenharmony_ci */
978c2ecf20Sopenharmony_cistatic void cgroup_dec_frozen_cnt(struct cgroup *cgrp)
988c2ecf20Sopenharmony_ci{
998c2ecf20Sopenharmony_ci	cgrp->freezer.nr_frozen_tasks--;
1008c2ecf20Sopenharmony_ci	WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0);
1018c2ecf20Sopenharmony_ci}
1028c2ecf20Sopenharmony_ci
1038c2ecf20Sopenharmony_ci/*
1048c2ecf20Sopenharmony_ci * Enter frozen/stopped state, if not yet there. Update cgroup's counters,
1058c2ecf20Sopenharmony_ci * and revisit the state of the cgroup, if necessary.
1068c2ecf20Sopenharmony_ci */
1078c2ecf20Sopenharmony_civoid cgroup_enter_frozen(void)
1088c2ecf20Sopenharmony_ci{
1098c2ecf20Sopenharmony_ci	struct cgroup *cgrp;
1108c2ecf20Sopenharmony_ci
1118c2ecf20Sopenharmony_ci	if (current->frozen)
1128c2ecf20Sopenharmony_ci		return;
1138c2ecf20Sopenharmony_ci
1148c2ecf20Sopenharmony_ci	spin_lock_irq(&css_set_lock);
1158c2ecf20Sopenharmony_ci	current->frozen = true;
1168c2ecf20Sopenharmony_ci	cgrp = task_dfl_cgroup(current);
1178c2ecf20Sopenharmony_ci	cgroup_inc_frozen_cnt(cgrp);
1188c2ecf20Sopenharmony_ci	cgroup_update_frozen(cgrp);
1198c2ecf20Sopenharmony_ci	spin_unlock_irq(&css_set_lock);
1208c2ecf20Sopenharmony_ci}
1218c2ecf20Sopenharmony_ci
1228c2ecf20Sopenharmony_ci/*
1238c2ecf20Sopenharmony_ci * Conditionally leave frozen/stopped state. Update cgroup's counters,
1248c2ecf20Sopenharmony_ci * and revisit the state of the cgroup, if necessary.
1258c2ecf20Sopenharmony_ci *
1268c2ecf20Sopenharmony_ci * If always_leave is not set, and the cgroup is freezing,
1278c2ecf20Sopenharmony_ci * we're racing with the cgroup freezing. In this case, we don't
1288c2ecf20Sopenharmony_ci * drop the frozen counter to avoid a transient switch to
1298c2ecf20Sopenharmony_ci * the unfrozen state.
1308c2ecf20Sopenharmony_ci */
1318c2ecf20Sopenharmony_civoid cgroup_leave_frozen(bool always_leave)
1328c2ecf20Sopenharmony_ci{
1338c2ecf20Sopenharmony_ci	struct cgroup *cgrp;
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_ci	spin_lock_irq(&css_set_lock);
1368c2ecf20Sopenharmony_ci	cgrp = task_dfl_cgroup(current);
1378c2ecf20Sopenharmony_ci	if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) {
1388c2ecf20Sopenharmony_ci		cgroup_dec_frozen_cnt(cgrp);
1398c2ecf20Sopenharmony_ci		cgroup_update_frozen(cgrp);
1408c2ecf20Sopenharmony_ci		WARN_ON_ONCE(!current->frozen);
1418c2ecf20Sopenharmony_ci		current->frozen = false;
1428c2ecf20Sopenharmony_ci	} else if (!(current->jobctl & JOBCTL_TRAP_FREEZE)) {
1438c2ecf20Sopenharmony_ci		spin_lock(&current->sighand->siglock);
1448c2ecf20Sopenharmony_ci		current->jobctl |= JOBCTL_TRAP_FREEZE;
1458c2ecf20Sopenharmony_ci		set_thread_flag(TIF_SIGPENDING);
1468c2ecf20Sopenharmony_ci		spin_unlock(&current->sighand->siglock);
1478c2ecf20Sopenharmony_ci	}
1488c2ecf20Sopenharmony_ci	spin_unlock_irq(&css_set_lock);
1498c2ecf20Sopenharmony_ci}
1508c2ecf20Sopenharmony_ci
1518c2ecf20Sopenharmony_ci/*
1528c2ecf20Sopenharmony_ci * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE
1538c2ecf20Sopenharmony_ci * jobctl bit.
1548c2ecf20Sopenharmony_ci */
1558c2ecf20Sopenharmony_cistatic void cgroup_freeze_task(struct task_struct *task, bool freeze)
1568c2ecf20Sopenharmony_ci{
1578c2ecf20Sopenharmony_ci	unsigned long flags;
1588c2ecf20Sopenharmony_ci
1598c2ecf20Sopenharmony_ci	/* If the task is about to die, don't bother with freezing it. */
1608c2ecf20Sopenharmony_ci	if (!lock_task_sighand(task, &flags))
1618c2ecf20Sopenharmony_ci		return;
1628c2ecf20Sopenharmony_ci
1638c2ecf20Sopenharmony_ci	if (freeze) {
1648c2ecf20Sopenharmony_ci		task->jobctl |= JOBCTL_TRAP_FREEZE;
1658c2ecf20Sopenharmony_ci		signal_wake_up(task, false);
1668c2ecf20Sopenharmony_ci	} else {
1678c2ecf20Sopenharmony_ci		task->jobctl &= ~JOBCTL_TRAP_FREEZE;
1688c2ecf20Sopenharmony_ci		wake_up_process(task);
1698c2ecf20Sopenharmony_ci	}
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci	unlock_task_sighand(task, &flags);
1728c2ecf20Sopenharmony_ci}
1738c2ecf20Sopenharmony_ci
1748c2ecf20Sopenharmony_ci/*
1758c2ecf20Sopenharmony_ci * Freeze or unfreeze all tasks in the given cgroup.
1768c2ecf20Sopenharmony_ci */
1778c2ecf20Sopenharmony_cistatic void cgroup_do_freeze(struct cgroup *cgrp, bool freeze)
1788c2ecf20Sopenharmony_ci{
1798c2ecf20Sopenharmony_ci	struct css_task_iter it;
1808c2ecf20Sopenharmony_ci	struct task_struct *task;
1818c2ecf20Sopenharmony_ci
1828c2ecf20Sopenharmony_ci	lockdep_assert_held(&cgroup_mutex);
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_ci	spin_lock_irq(&css_set_lock);
1858c2ecf20Sopenharmony_ci	if (freeze)
1868c2ecf20Sopenharmony_ci		set_bit(CGRP_FREEZE, &cgrp->flags);
1878c2ecf20Sopenharmony_ci	else
1888c2ecf20Sopenharmony_ci		clear_bit(CGRP_FREEZE, &cgrp->flags);
1898c2ecf20Sopenharmony_ci	spin_unlock_irq(&css_set_lock);
1908c2ecf20Sopenharmony_ci
1918c2ecf20Sopenharmony_ci	if (freeze)
1928c2ecf20Sopenharmony_ci		TRACE_CGROUP_PATH(freeze, cgrp);
1938c2ecf20Sopenharmony_ci	else
1948c2ecf20Sopenharmony_ci		TRACE_CGROUP_PATH(unfreeze, cgrp);
1958c2ecf20Sopenharmony_ci
1968c2ecf20Sopenharmony_ci	css_task_iter_start(&cgrp->self, 0, &it);
1978c2ecf20Sopenharmony_ci	while ((task = css_task_iter_next(&it))) {
1988c2ecf20Sopenharmony_ci		/*
1998c2ecf20Sopenharmony_ci		 * Ignore kernel threads here. Freezing cgroups containing
2008c2ecf20Sopenharmony_ci		 * kthreads isn't supported.
2018c2ecf20Sopenharmony_ci		 */
2028c2ecf20Sopenharmony_ci		if (task->flags & PF_KTHREAD)
2038c2ecf20Sopenharmony_ci			continue;
2048c2ecf20Sopenharmony_ci		cgroup_freeze_task(task, freeze);
2058c2ecf20Sopenharmony_ci	}
2068c2ecf20Sopenharmony_ci	css_task_iter_end(&it);
2078c2ecf20Sopenharmony_ci
2088c2ecf20Sopenharmony_ci	/*
2098c2ecf20Sopenharmony_ci	 * Cgroup state should be revisited here to cover empty leaf cgroups
2108c2ecf20Sopenharmony_ci	 * and cgroups which descendants are already in the desired state.
2118c2ecf20Sopenharmony_ci	 */
2128c2ecf20Sopenharmony_ci	spin_lock_irq(&css_set_lock);
2138c2ecf20Sopenharmony_ci	if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants)
2148c2ecf20Sopenharmony_ci		cgroup_update_frozen(cgrp);
2158c2ecf20Sopenharmony_ci	spin_unlock_irq(&css_set_lock);
2168c2ecf20Sopenharmony_ci}
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_ci/*
2198c2ecf20Sopenharmony_ci * Adjust the task state (freeze or unfreeze) and revisit the state of
2208c2ecf20Sopenharmony_ci * source and destination cgroups.
2218c2ecf20Sopenharmony_ci */
2228c2ecf20Sopenharmony_civoid cgroup_freezer_migrate_task(struct task_struct *task,
2238c2ecf20Sopenharmony_ci				 struct cgroup *src, struct cgroup *dst)
2248c2ecf20Sopenharmony_ci{
2258c2ecf20Sopenharmony_ci	lockdep_assert_held(&css_set_lock);
2268c2ecf20Sopenharmony_ci
2278c2ecf20Sopenharmony_ci	/*
2288c2ecf20Sopenharmony_ci	 * Kernel threads are not supposed to be frozen at all.
2298c2ecf20Sopenharmony_ci	 */
2308c2ecf20Sopenharmony_ci	if (task->flags & PF_KTHREAD)
2318c2ecf20Sopenharmony_ci		return;
2328c2ecf20Sopenharmony_ci
2338c2ecf20Sopenharmony_ci	/*
2348c2ecf20Sopenharmony_ci	 * It's not necessary to do changes if both of the src and dst cgroups
2358c2ecf20Sopenharmony_ci	 * are not freezing and task is not frozen.
2368c2ecf20Sopenharmony_ci	 */
2378c2ecf20Sopenharmony_ci	if (!test_bit(CGRP_FREEZE, &src->flags) &&
2388c2ecf20Sopenharmony_ci	    !test_bit(CGRP_FREEZE, &dst->flags) &&
2398c2ecf20Sopenharmony_ci	    !task->frozen)
2408c2ecf20Sopenharmony_ci		return;
2418c2ecf20Sopenharmony_ci
2428c2ecf20Sopenharmony_ci	/*
2438c2ecf20Sopenharmony_ci	 * Adjust counters of freezing and frozen tasks.
2448c2ecf20Sopenharmony_ci	 * Note, that if the task is frozen, but the destination cgroup is not
2458c2ecf20Sopenharmony_ci	 * frozen, we bump both counters to keep them balanced.
2468c2ecf20Sopenharmony_ci	 */
2478c2ecf20Sopenharmony_ci	if (task->frozen) {
2488c2ecf20Sopenharmony_ci		cgroup_inc_frozen_cnt(dst);
2498c2ecf20Sopenharmony_ci		cgroup_dec_frozen_cnt(src);
2508c2ecf20Sopenharmony_ci	}
2518c2ecf20Sopenharmony_ci	cgroup_update_frozen(dst);
2528c2ecf20Sopenharmony_ci	cgroup_update_frozen(src);
2538c2ecf20Sopenharmony_ci
2548c2ecf20Sopenharmony_ci	/*
2558c2ecf20Sopenharmony_ci	 * Force the task to the desired state.
2568c2ecf20Sopenharmony_ci	 */
2578c2ecf20Sopenharmony_ci	cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags));
2588c2ecf20Sopenharmony_ci}
2598c2ecf20Sopenharmony_ci
2608c2ecf20Sopenharmony_civoid cgroup_freeze(struct cgroup *cgrp, bool freeze)
2618c2ecf20Sopenharmony_ci{
2628c2ecf20Sopenharmony_ci	struct cgroup_subsys_state *css;
2638c2ecf20Sopenharmony_ci	struct cgroup *dsct;
2648c2ecf20Sopenharmony_ci	bool applied = false;
2658c2ecf20Sopenharmony_ci
2668c2ecf20Sopenharmony_ci	lockdep_assert_held(&cgroup_mutex);
2678c2ecf20Sopenharmony_ci
2688c2ecf20Sopenharmony_ci	/*
2698c2ecf20Sopenharmony_ci	 * Nothing changed? Just exit.
2708c2ecf20Sopenharmony_ci	 */
2718c2ecf20Sopenharmony_ci	if (cgrp->freezer.freeze == freeze)
2728c2ecf20Sopenharmony_ci		return;
2738c2ecf20Sopenharmony_ci
2748c2ecf20Sopenharmony_ci	cgrp->freezer.freeze = freeze;
2758c2ecf20Sopenharmony_ci
2768c2ecf20Sopenharmony_ci	/*
2778c2ecf20Sopenharmony_ci	 * Propagate changes downwards the cgroup tree.
2788c2ecf20Sopenharmony_ci	 */
2798c2ecf20Sopenharmony_ci	css_for_each_descendant_pre(css, &cgrp->self) {
2808c2ecf20Sopenharmony_ci		dsct = css->cgroup;
2818c2ecf20Sopenharmony_ci
2828c2ecf20Sopenharmony_ci		if (cgroup_is_dead(dsct))
2838c2ecf20Sopenharmony_ci			continue;
2848c2ecf20Sopenharmony_ci
2858c2ecf20Sopenharmony_ci		if (freeze) {
2868c2ecf20Sopenharmony_ci			dsct->freezer.e_freeze++;
2878c2ecf20Sopenharmony_ci			/*
2888c2ecf20Sopenharmony_ci			 * Already frozen because of ancestor's settings?
2898c2ecf20Sopenharmony_ci			 */
2908c2ecf20Sopenharmony_ci			if (dsct->freezer.e_freeze > 1)
2918c2ecf20Sopenharmony_ci				continue;
2928c2ecf20Sopenharmony_ci		} else {
2938c2ecf20Sopenharmony_ci			dsct->freezer.e_freeze--;
2948c2ecf20Sopenharmony_ci			/*
2958c2ecf20Sopenharmony_ci			 * Still frozen because of ancestor's settings?
2968c2ecf20Sopenharmony_ci			 */
2978c2ecf20Sopenharmony_ci			if (dsct->freezer.e_freeze > 0)
2988c2ecf20Sopenharmony_ci				continue;
2998c2ecf20Sopenharmony_ci
3008c2ecf20Sopenharmony_ci			WARN_ON_ONCE(dsct->freezer.e_freeze < 0);
3018c2ecf20Sopenharmony_ci		}
3028c2ecf20Sopenharmony_ci
3038c2ecf20Sopenharmony_ci		/*
3048c2ecf20Sopenharmony_ci		 * Do change actual state: freeze or unfreeze.
3058c2ecf20Sopenharmony_ci		 */
3068c2ecf20Sopenharmony_ci		cgroup_do_freeze(dsct, freeze);
3078c2ecf20Sopenharmony_ci		applied = true;
3088c2ecf20Sopenharmony_ci	}
3098c2ecf20Sopenharmony_ci
3108c2ecf20Sopenharmony_ci	/*
3118c2ecf20Sopenharmony_ci	 * Even if the actual state hasn't changed, let's notify a user.
3128c2ecf20Sopenharmony_ci	 * The state can be enforced by an ancestor cgroup: the cgroup
3138c2ecf20Sopenharmony_ci	 * can already be in the desired state or it can be locked in the
3148c2ecf20Sopenharmony_ci	 * opposite state, so that the transition will never happen.
3158c2ecf20Sopenharmony_ci	 * In both cases it's better to notify a user, that there is
3168c2ecf20Sopenharmony_ci	 * nothing to wait for.
3178c2ecf20Sopenharmony_ci	 */
3188c2ecf20Sopenharmony_ci	if (!applied) {
3198c2ecf20Sopenharmony_ci		TRACE_CGROUP_PATH(notify_frozen, cgrp,
3208c2ecf20Sopenharmony_ci				  test_bit(CGRP_FROZEN, &cgrp->flags));
3218c2ecf20Sopenharmony_ci		cgroup_file_notify(&cgrp->events_file);
3228c2ecf20Sopenharmony_ci	}
3238c2ecf20Sopenharmony_ci}
324