162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * net/core/netprio_cgroup.c	Priority Control Group
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Authors:	Neil Horman <nhorman@tuxdriver.com>
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include <linux/module.h>
1162306a36Sopenharmony_ci#include <linux/slab.h>
1262306a36Sopenharmony_ci#include <linux/types.h>
1362306a36Sopenharmony_ci#include <linux/string.h>
1462306a36Sopenharmony_ci#include <linux/errno.h>
1562306a36Sopenharmony_ci#include <linux/skbuff.h>
1662306a36Sopenharmony_ci#include <linux/cgroup.h>
1762306a36Sopenharmony_ci#include <linux/rcupdate.h>
1862306a36Sopenharmony_ci#include <linux/atomic.h>
1962306a36Sopenharmony_ci#include <linux/sched/task.h>
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci#include <net/rtnetlink.h>
2262306a36Sopenharmony_ci#include <net/pkt_cls.h>
2362306a36Sopenharmony_ci#include <net/sock.h>
2462306a36Sopenharmony_ci#include <net/netprio_cgroup.h>
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci#include <linux/fdtable.h>
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_ci/*
2962306a36Sopenharmony_ci * netprio allocates per-net_device priomap array which is indexed by
3062306a36Sopenharmony_ci * css->id.  Limiting css ID to 16bits doesn't lose anything.
3162306a36Sopenharmony_ci */
3262306a36Sopenharmony_ci#define NETPRIO_ID_MAX		USHRT_MAX
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_ci#define PRIOMAP_MIN_SZ		128
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci/*
3762306a36Sopenharmony_ci * Extend @dev->priomap so that it's large enough to accommodate
3862306a36Sopenharmony_ci * @target_idx.  @dev->priomap.priomap_len > @target_idx after successful
3962306a36Sopenharmony_ci * return.  Must be called under rtnl lock.
4062306a36Sopenharmony_ci */
4162306a36Sopenharmony_cistatic int extend_netdev_table(struct net_device *dev, u32 target_idx)
4262306a36Sopenharmony_ci{
4362306a36Sopenharmony_ci	struct netprio_map *old, *new;
4462306a36Sopenharmony_ci	size_t new_sz, new_len;
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_ci	/* is the existing priomap large enough? */
4762306a36Sopenharmony_ci	old = rtnl_dereference(dev->priomap);
4862306a36Sopenharmony_ci	if (old && old->priomap_len > target_idx)
4962306a36Sopenharmony_ci		return 0;
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci	/*
5262306a36Sopenharmony_ci	 * Determine the new size.  Let's keep it power-of-two.  We start
5362306a36Sopenharmony_ci	 * from PRIOMAP_MIN_SZ and double it until it's large enough to
5462306a36Sopenharmony_ci	 * accommodate @target_idx.
5562306a36Sopenharmony_ci	 */
5662306a36Sopenharmony_ci	new_sz = PRIOMAP_MIN_SZ;
5762306a36Sopenharmony_ci	while (true) {
5862306a36Sopenharmony_ci		new_len = (new_sz - offsetof(struct netprio_map, priomap)) /
5962306a36Sopenharmony_ci			sizeof(new->priomap[0]);
6062306a36Sopenharmony_ci		if (new_len > target_idx)
6162306a36Sopenharmony_ci			break;
6262306a36Sopenharmony_ci		new_sz *= 2;
6362306a36Sopenharmony_ci		/* overflowed? */
6462306a36Sopenharmony_ci		if (WARN_ON(new_sz < PRIOMAP_MIN_SZ))
6562306a36Sopenharmony_ci			return -ENOSPC;
6662306a36Sopenharmony_ci	}
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_ci	/* allocate & copy */
6962306a36Sopenharmony_ci	new = kzalloc(new_sz, GFP_KERNEL);
7062306a36Sopenharmony_ci	if (!new)
7162306a36Sopenharmony_ci		return -ENOMEM;
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci	if (old)
7462306a36Sopenharmony_ci		memcpy(new->priomap, old->priomap,
7562306a36Sopenharmony_ci		       old->priomap_len * sizeof(old->priomap[0]));
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci	new->priomap_len = new_len;
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci	/* install the new priomap */
8062306a36Sopenharmony_ci	rcu_assign_pointer(dev->priomap, new);
8162306a36Sopenharmony_ci	if (old)
8262306a36Sopenharmony_ci		kfree_rcu(old, rcu);
8362306a36Sopenharmony_ci	return 0;
8462306a36Sopenharmony_ci}
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci/**
8762306a36Sopenharmony_ci * netprio_prio - return the effective netprio of a cgroup-net_device pair
8862306a36Sopenharmony_ci * @css: css part of the target pair
8962306a36Sopenharmony_ci * @dev: net_device part of the target pair
9062306a36Sopenharmony_ci *
9162306a36Sopenharmony_ci * Should be called under RCU read or rtnl lock.
9262306a36Sopenharmony_ci */
9362306a36Sopenharmony_cistatic u32 netprio_prio(struct cgroup_subsys_state *css, struct net_device *dev)
9462306a36Sopenharmony_ci{
9562306a36Sopenharmony_ci	struct netprio_map *map = rcu_dereference_rtnl(dev->priomap);
9662306a36Sopenharmony_ci	int id = css->id;
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_ci	if (map && id < map->priomap_len)
9962306a36Sopenharmony_ci		return map->priomap[id];
10062306a36Sopenharmony_ci	return 0;
10162306a36Sopenharmony_ci}
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_ci/**
10462306a36Sopenharmony_ci * netprio_set_prio - set netprio on a cgroup-net_device pair
10562306a36Sopenharmony_ci * @css: css part of the target pair
10662306a36Sopenharmony_ci * @dev: net_device part of the target pair
10762306a36Sopenharmony_ci * @prio: prio to set
10862306a36Sopenharmony_ci *
10962306a36Sopenharmony_ci * Set netprio to @prio on @css-@dev pair.  Should be called under rtnl
11062306a36Sopenharmony_ci * lock and may fail under memory pressure for non-zero @prio.
11162306a36Sopenharmony_ci */
11262306a36Sopenharmony_cistatic int netprio_set_prio(struct cgroup_subsys_state *css,
11362306a36Sopenharmony_ci			    struct net_device *dev, u32 prio)
11462306a36Sopenharmony_ci{
11562306a36Sopenharmony_ci	struct netprio_map *map;
11662306a36Sopenharmony_ci	int id = css->id;
11762306a36Sopenharmony_ci	int ret;
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci	/* avoid extending priomap for zero writes */
12062306a36Sopenharmony_ci	map = rtnl_dereference(dev->priomap);
12162306a36Sopenharmony_ci	if (!prio && (!map || map->priomap_len <= id))
12262306a36Sopenharmony_ci		return 0;
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci	ret = extend_netdev_table(dev, id);
12562306a36Sopenharmony_ci	if (ret)
12662306a36Sopenharmony_ci		return ret;
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_ci	map = rtnl_dereference(dev->priomap);
12962306a36Sopenharmony_ci	map->priomap[id] = prio;
13062306a36Sopenharmony_ci	return 0;
13162306a36Sopenharmony_ci}
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_cistatic struct cgroup_subsys_state *
13462306a36Sopenharmony_cicgrp_css_alloc(struct cgroup_subsys_state *parent_css)
13562306a36Sopenharmony_ci{
13662306a36Sopenharmony_ci	struct cgroup_subsys_state *css;
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	css = kzalloc(sizeof(*css), GFP_KERNEL);
13962306a36Sopenharmony_ci	if (!css)
14062306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_ci	return css;
14362306a36Sopenharmony_ci}
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_cistatic int cgrp_css_online(struct cgroup_subsys_state *css)
14662306a36Sopenharmony_ci{
14762306a36Sopenharmony_ci	struct cgroup_subsys_state *parent_css = css->parent;
14862306a36Sopenharmony_ci	struct net_device *dev;
14962306a36Sopenharmony_ci	int ret = 0;
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci	if (css->id > NETPRIO_ID_MAX)
15262306a36Sopenharmony_ci		return -ENOSPC;
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci	if (!parent_css)
15562306a36Sopenharmony_ci		return 0;
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_ci	rtnl_lock();
15862306a36Sopenharmony_ci	/*
15962306a36Sopenharmony_ci	 * Inherit prios from the parent.  As all prios are set during
16062306a36Sopenharmony_ci	 * onlining, there is no need to clear them on offline.
16162306a36Sopenharmony_ci	 */
16262306a36Sopenharmony_ci	for_each_netdev(&init_net, dev) {
16362306a36Sopenharmony_ci		u32 prio = netprio_prio(parent_css, dev);
16462306a36Sopenharmony_ci
16562306a36Sopenharmony_ci		ret = netprio_set_prio(css, dev, prio);
16662306a36Sopenharmony_ci		if (ret)
16762306a36Sopenharmony_ci			break;
16862306a36Sopenharmony_ci	}
16962306a36Sopenharmony_ci	rtnl_unlock();
17062306a36Sopenharmony_ci	return ret;
17162306a36Sopenharmony_ci}
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_cistatic void cgrp_css_free(struct cgroup_subsys_state *css)
17462306a36Sopenharmony_ci{
17562306a36Sopenharmony_ci	kfree(css);
17662306a36Sopenharmony_ci}
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_cistatic u64 read_prioidx(struct cgroup_subsys_state *css, struct cftype *cft)
17962306a36Sopenharmony_ci{
18062306a36Sopenharmony_ci	return css->id;
18162306a36Sopenharmony_ci}
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_cistatic int read_priomap(struct seq_file *sf, void *v)
18462306a36Sopenharmony_ci{
18562306a36Sopenharmony_ci	struct net_device *dev;
18662306a36Sopenharmony_ci
18762306a36Sopenharmony_ci	rcu_read_lock();
18862306a36Sopenharmony_ci	for_each_netdev_rcu(&init_net, dev)
18962306a36Sopenharmony_ci		seq_printf(sf, "%s %u\n", dev->name,
19062306a36Sopenharmony_ci			   netprio_prio(seq_css(sf), dev));
19162306a36Sopenharmony_ci	rcu_read_unlock();
19262306a36Sopenharmony_ci	return 0;
19362306a36Sopenharmony_ci}
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_cistatic ssize_t write_priomap(struct kernfs_open_file *of,
19662306a36Sopenharmony_ci			     char *buf, size_t nbytes, loff_t off)
19762306a36Sopenharmony_ci{
19862306a36Sopenharmony_ci	char devname[IFNAMSIZ + 1];
19962306a36Sopenharmony_ci	struct net_device *dev;
20062306a36Sopenharmony_ci	u32 prio;
20162306a36Sopenharmony_ci	int ret;
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci	if (sscanf(buf, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2)
20462306a36Sopenharmony_ci		return -EINVAL;
20562306a36Sopenharmony_ci
20662306a36Sopenharmony_ci	dev = dev_get_by_name(&init_net, devname);
20762306a36Sopenharmony_ci	if (!dev)
20862306a36Sopenharmony_ci		return -ENODEV;
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_ci	rtnl_lock();
21162306a36Sopenharmony_ci
21262306a36Sopenharmony_ci	ret = netprio_set_prio(of_css(of), dev, prio);
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci	rtnl_unlock();
21562306a36Sopenharmony_ci	dev_put(dev);
21662306a36Sopenharmony_ci	return ret ?: nbytes;
21762306a36Sopenharmony_ci}
21862306a36Sopenharmony_ci
21962306a36Sopenharmony_cistatic int update_netprio(const void *v, struct file *file, unsigned n)
22062306a36Sopenharmony_ci{
22162306a36Sopenharmony_ci	struct socket *sock = sock_from_file(file);
22262306a36Sopenharmony_ci
22362306a36Sopenharmony_ci	if (sock)
22462306a36Sopenharmony_ci		sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data,
22562306a36Sopenharmony_ci					(unsigned long)v);
22662306a36Sopenharmony_ci	return 0;
22762306a36Sopenharmony_ci}
22862306a36Sopenharmony_ci
22962306a36Sopenharmony_cistatic void net_prio_attach(struct cgroup_taskset *tset)
23062306a36Sopenharmony_ci{
23162306a36Sopenharmony_ci	struct task_struct *p;
23262306a36Sopenharmony_ci	struct cgroup_subsys_state *css;
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_ci	cgroup_taskset_for_each(p, css, tset) {
23562306a36Sopenharmony_ci		void *v = (void *)(unsigned long)css->id;
23662306a36Sopenharmony_ci
23762306a36Sopenharmony_ci		task_lock(p);
23862306a36Sopenharmony_ci		iterate_fd(p->files, 0, update_netprio, v);
23962306a36Sopenharmony_ci		task_unlock(p);
24062306a36Sopenharmony_ci	}
24162306a36Sopenharmony_ci}
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_cistatic struct cftype ss_files[] = {
24462306a36Sopenharmony_ci	{
24562306a36Sopenharmony_ci		.name = "prioidx",
24662306a36Sopenharmony_ci		.read_u64 = read_prioidx,
24762306a36Sopenharmony_ci	},
24862306a36Sopenharmony_ci	{
24962306a36Sopenharmony_ci		.name = "ifpriomap",
25062306a36Sopenharmony_ci		.seq_show = read_priomap,
25162306a36Sopenharmony_ci		.write = write_priomap,
25262306a36Sopenharmony_ci	},
25362306a36Sopenharmony_ci	{ }	/* terminate */
25462306a36Sopenharmony_ci};
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_cistruct cgroup_subsys net_prio_cgrp_subsys = {
25762306a36Sopenharmony_ci	.css_alloc	= cgrp_css_alloc,
25862306a36Sopenharmony_ci	.css_online	= cgrp_css_online,
25962306a36Sopenharmony_ci	.css_free	= cgrp_css_free,
26062306a36Sopenharmony_ci	.attach		= net_prio_attach,
26162306a36Sopenharmony_ci	.legacy_cftypes	= ss_files,
26262306a36Sopenharmony_ci};
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_cistatic int netprio_device_event(struct notifier_block *unused,
26562306a36Sopenharmony_ci				unsigned long event, void *ptr)
26662306a36Sopenharmony_ci{
26762306a36Sopenharmony_ci	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
26862306a36Sopenharmony_ci	struct netprio_map *old;
26962306a36Sopenharmony_ci
27062306a36Sopenharmony_ci	/*
27162306a36Sopenharmony_ci	 * Note this is called with rtnl_lock held so we have update side
27262306a36Sopenharmony_ci	 * protection on our rcu assignments
27362306a36Sopenharmony_ci	 */
27462306a36Sopenharmony_ci
27562306a36Sopenharmony_ci	switch (event) {
27662306a36Sopenharmony_ci	case NETDEV_UNREGISTER:
27762306a36Sopenharmony_ci		old = rtnl_dereference(dev->priomap);
27862306a36Sopenharmony_ci		RCU_INIT_POINTER(dev->priomap, NULL);
27962306a36Sopenharmony_ci		if (old)
28062306a36Sopenharmony_ci			kfree_rcu(old, rcu);
28162306a36Sopenharmony_ci		break;
28262306a36Sopenharmony_ci	}
28362306a36Sopenharmony_ci	return NOTIFY_DONE;
28462306a36Sopenharmony_ci}
28562306a36Sopenharmony_ci
28662306a36Sopenharmony_cistatic struct notifier_block netprio_device_notifier = {
28762306a36Sopenharmony_ci	.notifier_call = netprio_device_event
28862306a36Sopenharmony_ci};
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_cistatic int __init init_cgroup_netprio(void)
29162306a36Sopenharmony_ci{
29262306a36Sopenharmony_ci	register_netdevice_notifier(&netprio_device_notifier);
29362306a36Sopenharmony_ci	return 0;
29462306a36Sopenharmony_ci}
29562306a36Sopenharmony_cisubsys_initcall(init_cgroup_netprio);
296