1// SPDX-License-Identifier: GPL-2.0
2/*
3 * device_cgroup.c - device cgroup subsystem
4 *
5 * Copyright 2007 IBM Corp
6 */
7
8#include <linux/device_cgroup.h>
9#include <linux/cgroup.h>
10#include <linux/ctype.h>
11#include <linux/list.h>
12#include <linux/uaccess.h>
13#include <linux/seq_file.h>
14#include <linux/slab.h>
15#include <linux/rcupdate.h>
16#include <linux/mutex.h>
17
18#ifdef CONFIG_CGROUP_DEVICE
19
20static DEFINE_MUTEX(devcgroup_mutex);
21
22enum devcg_behavior {
23	DEVCG_DEFAULT_NONE,
24	DEVCG_DEFAULT_ALLOW,
25	DEVCG_DEFAULT_DENY,
26};
27
28/*
29 * exception list locking rules:
30 * hold devcgroup_mutex for update/read.
31 * hold rcu_read_lock() for read.
32 */
33
34struct dev_exception_item {
35	u32 major, minor;
36	short type;
37	short access;
38	struct list_head list;
39	struct rcu_head rcu;
40};
41
42struct dev_cgroup {
43	struct cgroup_subsys_state css;
44	struct list_head exceptions;
45	enum devcg_behavior behavior;
46};
47
48static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
49{
50	return s ? container_of(s, struct dev_cgroup, css) : NULL;
51}
52
53static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
54{
55	return css_to_devcgroup(task_css(task, devices_cgrp_id));
56}
57
58/*
59 * called under devcgroup_mutex
60 */
61static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig)
62{
63	struct dev_exception_item *ex, *tmp, *new;
64
65	lockdep_assert_held(&devcgroup_mutex);
66
67	list_for_each_entry(ex, orig, list) {
68		new = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
69		if (!new)
70			goto free_and_exit;
71		list_add_tail(&new->list, dest);
72	}
73
74	return 0;
75
76free_and_exit:
77	list_for_each_entry_safe(ex, tmp, dest, list) {
78		list_del(&ex->list);
79		kfree(ex);
80	}
81	return -ENOMEM;
82}
83
84static void dev_exceptions_move(struct list_head *dest, struct list_head *orig)
85{
86	struct dev_exception_item *ex, *tmp;
87
88	lockdep_assert_held(&devcgroup_mutex);
89
90	list_for_each_entry_safe(ex, tmp, orig, list) {
91		list_move_tail(&ex->list, dest);
92	}
93}
94
95/*
96 * called under devcgroup_mutex
97 */
98static int dev_exception_add(struct dev_cgroup *dev_cgroup,
99			     struct dev_exception_item *ex)
100{
101	struct dev_exception_item *excopy, *walk;
102
103	lockdep_assert_held(&devcgroup_mutex);
104
105	excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
106	if (!excopy)
107		return -ENOMEM;
108
109	list_for_each_entry(walk, &dev_cgroup->exceptions, list) {
110		if (walk->type != ex->type)
111			continue;
112		if (walk->major != ex->major)
113			continue;
114		if (walk->minor != ex->minor)
115			continue;
116
117		walk->access |= ex->access;
118		kfree(excopy);
119		excopy = NULL;
120	}
121
122	if (excopy != NULL)
123		list_add_tail_rcu(&excopy->list, &dev_cgroup->exceptions);
124	return 0;
125}
126
127/*
128 * called under devcgroup_mutex
129 */
130static void dev_exception_rm(struct dev_cgroup *dev_cgroup,
131			     struct dev_exception_item *ex)
132{
133	struct dev_exception_item *walk, *tmp;
134
135	lockdep_assert_held(&devcgroup_mutex);
136
137	list_for_each_entry_safe(walk, tmp, &dev_cgroup->exceptions, list) {
138		if (walk->type != ex->type)
139			continue;
140		if (walk->major != ex->major)
141			continue;
142		if (walk->minor != ex->minor)
143			continue;
144
145		walk->access &= ~ex->access;
146		if (!walk->access) {
147			list_del_rcu(&walk->list);
148			kfree_rcu(walk, rcu);
149		}
150	}
151}
152
153static void __dev_exception_clean(struct dev_cgroup *dev_cgroup)
154{
155	struct dev_exception_item *ex, *tmp;
156
157	list_for_each_entry_safe(ex, tmp, &dev_cgroup->exceptions, list) {
158		list_del_rcu(&ex->list);
159		kfree_rcu(ex, rcu);
160	}
161}
162
163/**
164 * dev_exception_clean - frees all entries of the exception list
165 * @dev_cgroup: dev_cgroup with the exception list to be cleaned
166 *
167 * called under devcgroup_mutex
168 */
169static void dev_exception_clean(struct dev_cgroup *dev_cgroup)
170{
171	lockdep_assert_held(&devcgroup_mutex);
172
173	__dev_exception_clean(dev_cgroup);
174}
175
176static inline bool is_devcg_online(const struct dev_cgroup *devcg)
177{
178	return (devcg->behavior != DEVCG_DEFAULT_NONE);
179}
180
181/**
182 * devcgroup_online - initializes devcgroup's behavior and exceptions based on
183 * 		      parent's
184 * @css: css getting online
185 * returns 0 in case of success, error code otherwise
186 */
187static int devcgroup_online(struct cgroup_subsys_state *css)
188{
189	struct dev_cgroup *dev_cgroup = css_to_devcgroup(css);
190	struct dev_cgroup *parent_dev_cgroup = css_to_devcgroup(css->parent);
191	int ret = 0;
192
193	mutex_lock(&devcgroup_mutex);
194
195	if (parent_dev_cgroup == NULL)
196		dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW;
197	else {
198		ret = dev_exceptions_copy(&dev_cgroup->exceptions,
199					  &parent_dev_cgroup->exceptions);
200		if (!ret)
201			dev_cgroup->behavior = parent_dev_cgroup->behavior;
202	}
203	mutex_unlock(&devcgroup_mutex);
204
205	return ret;
206}
207
208static void devcgroup_offline(struct cgroup_subsys_state *css)
209{
210	struct dev_cgroup *dev_cgroup = css_to_devcgroup(css);
211
212	mutex_lock(&devcgroup_mutex);
213	dev_cgroup->behavior = DEVCG_DEFAULT_NONE;
214	mutex_unlock(&devcgroup_mutex);
215}
216
217/*
218 * called from kernel/cgroup.c with cgroup_lock() held.
219 */
220static struct cgroup_subsys_state *
221devcgroup_css_alloc(struct cgroup_subsys_state *parent_css)
222{
223	struct dev_cgroup *dev_cgroup;
224
225	dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL);
226	if (!dev_cgroup)
227		return ERR_PTR(-ENOMEM);
228	INIT_LIST_HEAD(&dev_cgroup->exceptions);
229	dev_cgroup->behavior = DEVCG_DEFAULT_NONE;
230
231	return &dev_cgroup->css;
232}
233
234static void devcgroup_css_free(struct cgroup_subsys_state *css)
235{
236	struct dev_cgroup *dev_cgroup = css_to_devcgroup(css);
237
238	__dev_exception_clean(dev_cgroup);
239	kfree(dev_cgroup);
240}
241
242#define DEVCG_ALLOW 1
243#define DEVCG_DENY 2
244#define DEVCG_LIST 3
245
246#define MAJMINLEN 13
247#define ACCLEN 4
248
249static void set_access(char *acc, short access)
250{
251	int idx = 0;
252	memset(acc, 0, ACCLEN);
253	if (access & DEVCG_ACC_READ)
254		acc[idx++] = 'r';
255	if (access & DEVCG_ACC_WRITE)
256		acc[idx++] = 'w';
257	if (access & DEVCG_ACC_MKNOD)
258		acc[idx++] = 'm';
259}
260
261static char type_to_char(short type)
262{
263	if (type == DEVCG_DEV_ALL)
264		return 'a';
265	if (type == DEVCG_DEV_CHAR)
266		return 'c';
267	if (type == DEVCG_DEV_BLOCK)
268		return 'b';
269	return 'X';
270}
271
272static void set_majmin(char *str, unsigned m)
273{
274	if (m == ~0)
275		strcpy(str, "*");
276	else
277		sprintf(str, "%u", m);
278}
279
280static int devcgroup_seq_show(struct seq_file *m, void *v)
281{
282	struct dev_cgroup *devcgroup = css_to_devcgroup(seq_css(m));
283	struct dev_exception_item *ex;
284	char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];
285
286	rcu_read_lock();
287	/*
288	 * To preserve the compatibility:
289	 * - Only show the "all devices" when the default policy is to allow
290	 * - List the exceptions in case the default policy is to deny
291	 * This way, the file remains as a "whitelist of devices"
292	 */
293	if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
294		set_access(acc, DEVCG_ACC_MASK);
295		set_majmin(maj, ~0);
296		set_majmin(min, ~0);
297		seq_printf(m, "%c %s:%s %s\n", type_to_char(DEVCG_DEV_ALL),
298			   maj, min, acc);
299	} else {
300		list_for_each_entry_rcu(ex, &devcgroup->exceptions, list) {
301			set_access(acc, ex->access);
302			set_majmin(maj, ex->major);
303			set_majmin(min, ex->minor);
304			seq_printf(m, "%c %s:%s %s\n", type_to_char(ex->type),
305				   maj, min, acc);
306		}
307	}
308	rcu_read_unlock();
309
310	return 0;
311}
312
313/**
314 * match_exception	- iterates the exception list trying to find a complete match
315 * @exceptions: list of exceptions
316 * @type: device type (DEVCG_DEV_BLOCK or DEVCG_DEV_CHAR)
317 * @major: device file major number, ~0 to match all
318 * @minor: device file minor number, ~0 to match all
319 * @access: permission mask (DEVCG_ACC_READ, DEVCG_ACC_WRITE, DEVCG_ACC_MKNOD)
320 *
321 * It is considered a complete match if an exception is found that will
322 * contain the entire range of provided parameters.
323 *
324 * Return: true in case it matches an exception completely
325 */
326static bool match_exception(struct list_head *exceptions, short type,
327			    u32 major, u32 minor, short access)
328{
329	struct dev_exception_item *ex;
330
331	list_for_each_entry_rcu(ex, exceptions, list) {
332		if ((type & DEVCG_DEV_BLOCK) && !(ex->type & DEVCG_DEV_BLOCK))
333			continue;
334		if ((type & DEVCG_DEV_CHAR) && !(ex->type & DEVCG_DEV_CHAR))
335			continue;
336		if (ex->major != ~0 && ex->major != major)
337			continue;
338		if (ex->minor != ~0 && ex->minor != minor)
339			continue;
340		/* provided access cannot have more than the exception rule */
341		if (access & (~ex->access))
342			continue;
343		return true;
344	}
345	return false;
346}
347
348/**
349 * match_exception_partial - iterates the exception list trying to find a partial match
350 * @exceptions: list of exceptions
351 * @type: device type (DEVCG_DEV_BLOCK or DEVCG_DEV_CHAR)
352 * @major: device file major number, ~0 to match all
353 * @minor: device file minor number, ~0 to match all
354 * @access: permission mask (DEVCG_ACC_READ, DEVCG_ACC_WRITE, DEVCG_ACC_MKNOD)
355 *
356 * It is considered a partial match if an exception's range is found to
357 * contain *any* of the devices specified by provided parameters. This is
358 * used to make sure no extra access is being granted that is forbidden by
359 * any of the exception list.
360 *
361 * Return: true in case the provided range mat matches an exception completely
362 */
363static bool match_exception_partial(struct list_head *exceptions, short type,
364				    u32 major, u32 minor, short access)
365{
366	struct dev_exception_item *ex;
367
368	list_for_each_entry_rcu(ex, exceptions, list,
369				lockdep_is_held(&devcgroup_mutex)) {
370		if ((type & DEVCG_DEV_BLOCK) && !(ex->type & DEVCG_DEV_BLOCK))
371			continue;
372		if ((type & DEVCG_DEV_CHAR) && !(ex->type & DEVCG_DEV_CHAR))
373			continue;
374		/*
375		 * We must be sure that both the exception and the provided
376		 * range aren't masking all devices
377		 */
378		if (ex->major != ~0 && major != ~0 && ex->major != major)
379			continue;
380		if (ex->minor != ~0 && minor != ~0 && ex->minor != minor)
381			continue;
382		/*
383		 * In order to make sure the provided range isn't matching
384		 * an exception, all its access bits shouldn't match the
385		 * exception's access bits
386		 */
387		if (!(access & ex->access))
388			continue;
389		return true;
390	}
391	return false;
392}
393
394/**
395 * verify_new_ex - verifies if a new exception is allowed by parent cgroup's permissions
396 * @dev_cgroup: dev cgroup to be tested against
397 * @refex: new exception
398 * @behavior: behavior of the exception's dev_cgroup
399 *
400 * This is used to make sure a child cgroup won't have more privileges
401 * than its parent
402 */
403static bool verify_new_ex(struct dev_cgroup *dev_cgroup,
404		          struct dev_exception_item *refex,
405		          enum devcg_behavior behavior)
406{
407	bool match = false;
408
409	RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&
410			 !lockdep_is_held(&devcgroup_mutex),
411			 "device_cgroup:verify_new_ex called without proper synchronization");
412
413	if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) {
414		if (behavior == DEVCG_DEFAULT_ALLOW) {
415			/*
416			 * new exception in the child doesn't matter, only
417			 * adding extra restrictions
418			 */
419			return true;
420		} else {
421			/*
422			 * new exception in the child will add more devices
423			 * that can be acessed, so it can't match any of
424			 * parent's exceptions, even slightly
425			 */
426			match = match_exception_partial(&dev_cgroup->exceptions,
427							refex->type,
428							refex->major,
429							refex->minor,
430							refex->access);
431
432			if (match)
433				return false;
434			return true;
435		}
436	} else {
437		/*
438		 * Only behavior == DEVCG_DEFAULT_DENY allowed here, therefore
439		 * the new exception will add access to more devices and must
440		 * be contained completely in an parent's exception to be
441		 * allowed
442		 */
443		match = match_exception(&dev_cgroup->exceptions, refex->type,
444					refex->major, refex->minor,
445					refex->access);
446
447		if (match)
448			/* parent has an exception that matches the proposed */
449			return true;
450		else
451			return false;
452	}
453	return false;
454}
455
456/*
457 * parent_has_perm:
458 * when adding a new allow rule to a device exception list, the rule
459 * must be allowed in the parent device
460 */
461static int parent_has_perm(struct dev_cgroup *childcg,
462				  struct dev_exception_item *ex)
463{
464	struct dev_cgroup *parent = css_to_devcgroup(childcg->css.parent);
465
466	if (!parent)
467		return 1;
468	return verify_new_ex(parent, ex, childcg->behavior);
469}
470
471/**
472 * parent_allows_removal - verify if it's ok to remove an exception
473 * @childcg: child cgroup from where the exception will be removed
474 * @ex: exception being removed
475 *
476 * When removing an exception in cgroups with default ALLOW policy, it must
477 * be checked if removing it will give the child cgroup more access than the
478 * parent.
479 *
480 * Return: true if it's ok to remove exception, false otherwise
481 */
482static bool parent_allows_removal(struct dev_cgroup *childcg,
483				  struct dev_exception_item *ex)
484{
485	struct dev_cgroup *parent = css_to_devcgroup(childcg->css.parent);
486
487	if (!parent)
488		return true;
489
490	/* It's always allowed to remove access to devices */
491	if (childcg->behavior == DEVCG_DEFAULT_DENY)
492		return true;
493
494	/*
495	 * Make sure you're not removing part or a whole exception existing in
496	 * the parent cgroup
497	 */
498	return !match_exception_partial(&parent->exceptions, ex->type,
499					ex->major, ex->minor, ex->access);
500}
501
502/**
503 * may_allow_all - checks if it's possible to change the behavior to
504 *		   allow based on parent's rules.
505 * @parent: device cgroup's parent
506 * returns: != 0 in case it's allowed, 0 otherwise
507 */
508static inline int may_allow_all(struct dev_cgroup *parent)
509{
510	if (!parent)
511		return 1;
512	return parent->behavior == DEVCG_DEFAULT_ALLOW;
513}
514
515/**
516 * revalidate_active_exceptions - walks through the active exception list and
517 * 				  revalidates the exceptions based on parent's
518 * 				  behavior and exceptions. The exceptions that
519 * 				  are no longer valid will be removed.
520 * 				  Called with devcgroup_mutex held.
521 * @devcg: cgroup which exceptions will be checked
522 *
523 * This is one of the three key functions for hierarchy implementation.
524 * This function is responsible for re-evaluating all the cgroup's active
525 * exceptions due to a parent's exception change.
526 * Refer to Documentation/admin-guide/cgroup-v1/devices.rst for more details.
527 */
528static void revalidate_active_exceptions(struct dev_cgroup *devcg)
529{
530	struct dev_exception_item *ex;
531	struct list_head *this, *tmp;
532
533	list_for_each_safe(this, tmp, &devcg->exceptions) {
534		ex = container_of(this, struct dev_exception_item, list);
535		if (!parent_has_perm(devcg, ex))
536			dev_exception_rm(devcg, ex);
537	}
538}
539
540/**
541 * propagate_exception - propagates a new exception to the children
542 * @devcg_root: device cgroup that added a new exception
543 * @ex: new exception to be propagated
544 *
545 * returns: 0 in case of success, != 0 in case of error
546 */
547static int propagate_exception(struct dev_cgroup *devcg_root,
548			       struct dev_exception_item *ex)
549{
550	struct cgroup_subsys_state *pos;
551	int rc = 0;
552
553	rcu_read_lock();
554
555	css_for_each_descendant_pre(pos, &devcg_root->css) {
556		struct dev_cgroup *devcg = css_to_devcgroup(pos);
557
558		/*
559		 * Because devcgroup_mutex is held, no devcg will become
560		 * online or offline during the tree walk (see on/offline
561		 * methods), and online ones are safe to access outside RCU
562		 * read lock without bumping refcnt.
563		 */
564		if (pos == &devcg_root->css || !is_devcg_online(devcg))
565			continue;
566
567		rcu_read_unlock();
568
569		/*
570		 * in case both root's behavior and devcg is allow, a new
571		 * restriction means adding to the exception list
572		 */
573		if (devcg_root->behavior == DEVCG_DEFAULT_ALLOW &&
574		    devcg->behavior == DEVCG_DEFAULT_ALLOW) {
575			rc = dev_exception_add(devcg, ex);
576			if (rc)
577				return rc;
578		} else {
579			/*
580			 * in the other possible cases:
581			 * root's behavior: allow, devcg's: deny
582			 * root's behavior: deny, devcg's: deny
583			 * the exception will be removed
584			 */
585			dev_exception_rm(devcg, ex);
586		}
587		revalidate_active_exceptions(devcg);
588
589		rcu_read_lock();
590	}
591
592	rcu_read_unlock();
593	return rc;
594}
595
596/*
597 * Modify the exception list using allow/deny rules.
598 * CAP_SYS_ADMIN is needed for this.  It's at least separate from CAP_MKNOD
599 * so we can give a container CAP_MKNOD to let it create devices but not
600 * modify the exception list.
601 * It seems likely we'll want to add a CAP_CONTAINER capability to allow
602 * us to also grant CAP_SYS_ADMIN to containers without giving away the
603 * device exception list controls, but for now we'll stick with CAP_SYS_ADMIN
604 *
605 * Taking rules away is always allowed (given CAP_SYS_ADMIN).  Granting
606 * new access is only allowed if you're in the top-level cgroup, or your
607 * parent cgroup has the access you're asking for.
608 */
609static int devcgroup_update_access(struct dev_cgroup *devcgroup,
610				   int filetype, char *buffer)
611{
612	const char *b;
613	char temp[12];		/* 11 + 1 characters needed for a u32 */
614	int count, rc = 0;
615	struct dev_exception_item ex;
616	struct dev_cgroup *parent = css_to_devcgroup(devcgroup->css.parent);
617	struct dev_cgroup tmp_devcgrp;
618
619	if (!capable(CAP_SYS_ADMIN))
620		return -EPERM;
621
622	memset(&ex, 0, sizeof(ex));
623	memset(&tmp_devcgrp, 0, sizeof(tmp_devcgrp));
624	b = buffer;
625
626	switch (*b) {
627	case 'a':
628		switch (filetype) {
629		case DEVCG_ALLOW:
630			if (css_has_online_children(&devcgroup->css))
631				return -EINVAL;
632
633			if (!may_allow_all(parent))
634				return -EPERM;
635			if (!parent) {
636				devcgroup->behavior = DEVCG_DEFAULT_ALLOW;
637				dev_exception_clean(devcgroup);
638				break;
639			}
640
641			INIT_LIST_HEAD(&tmp_devcgrp.exceptions);
642			rc = dev_exceptions_copy(&tmp_devcgrp.exceptions,
643						 &devcgroup->exceptions);
644			if (rc)
645				return rc;
646			dev_exception_clean(devcgroup);
647			rc = dev_exceptions_copy(&devcgroup->exceptions,
648						 &parent->exceptions);
649			if (rc) {
650				dev_exceptions_move(&devcgroup->exceptions,
651						    &tmp_devcgrp.exceptions);
652				return rc;
653			}
654			devcgroup->behavior = DEVCG_DEFAULT_ALLOW;
655			dev_exception_clean(&tmp_devcgrp);
656			break;
657		case DEVCG_DENY:
658			if (css_has_online_children(&devcgroup->css))
659				return -EINVAL;
660
661			dev_exception_clean(devcgroup);
662			devcgroup->behavior = DEVCG_DEFAULT_DENY;
663			break;
664		default:
665			return -EINVAL;
666		}
667		return 0;
668	case 'b':
669		ex.type = DEVCG_DEV_BLOCK;
670		break;
671	case 'c':
672		ex.type = DEVCG_DEV_CHAR;
673		break;
674	default:
675		return -EINVAL;
676	}
677	b++;
678	if (!isspace(*b))
679		return -EINVAL;
680	b++;
681	if (*b == '*') {
682		ex.major = ~0;
683		b++;
684	} else if (isdigit(*b)) {
685		memset(temp, 0, sizeof(temp));
686		for (count = 0; count < sizeof(temp) - 1; count++) {
687			temp[count] = *b;
688			b++;
689			if (!isdigit(*b))
690				break;
691		}
692		rc = kstrtou32(temp, 10, &ex.major);
693		if (rc)
694			return -EINVAL;
695	} else {
696		return -EINVAL;
697	}
698	if (*b != ':')
699		return -EINVAL;
700	b++;
701
702	/* read minor */
703	if (*b == '*') {
704		ex.minor = ~0;
705		b++;
706	} else if (isdigit(*b)) {
707		memset(temp, 0, sizeof(temp));
708		for (count = 0; count < sizeof(temp) - 1; count++) {
709			temp[count] = *b;
710			b++;
711			if (!isdigit(*b))
712				break;
713		}
714		rc = kstrtou32(temp, 10, &ex.minor);
715		if (rc)
716			return -EINVAL;
717	} else {
718		return -EINVAL;
719	}
720	if (!isspace(*b))
721		return -EINVAL;
722	for (b++, count = 0; count < 3; count++, b++) {
723		switch (*b) {
724		case 'r':
725			ex.access |= DEVCG_ACC_READ;
726			break;
727		case 'w':
728			ex.access |= DEVCG_ACC_WRITE;
729			break;
730		case 'm':
731			ex.access |= DEVCG_ACC_MKNOD;
732			break;
733		case '\n':
734		case '\0':
735			count = 3;
736			break;
737		default:
738			return -EINVAL;
739		}
740	}
741
742	switch (filetype) {
743	case DEVCG_ALLOW:
744		/*
745		 * If the default policy is to allow by default, try to remove
746		 * an matching exception instead. And be silent about it: we
747		 * don't want to break compatibility
748		 */
749		if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
750			/* Check if the parent allows removing it first */
751			if (!parent_allows_removal(devcgroup, &ex))
752				return -EPERM;
753			dev_exception_rm(devcgroup, &ex);
754			break;
755		}
756
757		if (!parent_has_perm(devcgroup, &ex))
758			return -EPERM;
759		rc = dev_exception_add(devcgroup, &ex);
760		break;
761	case DEVCG_DENY:
762		/*
763		 * If the default policy is to deny by default, try to remove
764		 * an matching exception instead. And be silent about it: we
765		 * don't want to break compatibility
766		 */
767		if (devcgroup->behavior == DEVCG_DEFAULT_DENY)
768			dev_exception_rm(devcgroup, &ex);
769		else
770			rc = dev_exception_add(devcgroup, &ex);
771
772		if (rc)
773			break;
774		/* we only propagate new restrictions */
775		rc = propagate_exception(devcgroup, &ex);
776		break;
777	default:
778		rc = -EINVAL;
779	}
780	return rc;
781}
782
783static ssize_t devcgroup_access_write(struct kernfs_open_file *of,
784				      char *buf, size_t nbytes, loff_t off)
785{
786	int retval;
787
788	mutex_lock(&devcgroup_mutex);
789	retval = devcgroup_update_access(css_to_devcgroup(of_css(of)),
790					 of_cft(of)->private, strstrip(buf));
791	mutex_unlock(&devcgroup_mutex);
792	return retval ?: nbytes;
793}
794
795static struct cftype dev_cgroup_files[] = {
796	{
797		.name = "allow",
798		.write = devcgroup_access_write,
799		.private = DEVCG_ALLOW,
800	},
801	{
802		.name = "deny",
803		.write = devcgroup_access_write,
804		.private = DEVCG_DENY,
805	},
806	{
807		.name = "list",
808		.seq_show = devcgroup_seq_show,
809		.private = DEVCG_LIST,
810	},
811	{ }	/* terminate */
812};
813
814struct cgroup_subsys devices_cgrp_subsys = {
815	.css_alloc = devcgroup_css_alloc,
816	.css_free = devcgroup_css_free,
817	.css_online = devcgroup_online,
818	.css_offline = devcgroup_offline,
819	.legacy_cftypes = dev_cgroup_files,
820};
821
822/**
823 * devcgroup_legacy_check_permission - checks if an inode operation is permitted
824 * @dev_cgroup: the dev cgroup to be tested against
825 * @type: device type
826 * @major: device major number
827 * @minor: device minor number
828 * @access: combination of DEVCG_ACC_WRITE, DEVCG_ACC_READ and DEVCG_ACC_MKNOD
829 *
830 * returns 0 on success, -EPERM case the operation is not permitted
831 */
832static int devcgroup_legacy_check_permission(short type, u32 major, u32 minor,
833					short access)
834{
835	struct dev_cgroup *dev_cgroup;
836	bool rc;
837
838	rcu_read_lock();
839	dev_cgroup = task_devcgroup(current);
840	if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW)
841		/* Can't match any of the exceptions, even partially */
842		rc = !match_exception_partial(&dev_cgroup->exceptions,
843					      type, major, minor, access);
844	else
845		/* Need to match completely one exception to be allowed */
846		rc = match_exception(&dev_cgroup->exceptions, type, major,
847				     minor, access);
848	rcu_read_unlock();
849
850	if (!rc)
851		return -EPERM;
852
853	return 0;
854}
855
856#endif /* CONFIG_CGROUP_DEVICE */
857
858#if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF)
859
860int devcgroup_check_permission(short type, u32 major, u32 minor, short access)
861{
862	int rc = BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access);
863
864	if (rc)
865		return -EPERM;
866
867	#ifdef CONFIG_CGROUP_DEVICE
868	return devcgroup_legacy_check_permission(type, major, minor, access);
869
870	#else /* CONFIG_CGROUP_DEVICE */
871	return 0;
872
873	#endif /* CONFIG_CGROUP_DEVICE */
874}
875EXPORT_SYMBOL(devcgroup_check_permission);
876#endif /* defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) */
877