xref: /kernel/linux/linux-6.6/security/landlock/fs.c (revision 62306a36)
162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Landlock LSM - Filesystem management and hooks
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
662306a36Sopenharmony_ci * Copyright © 2018-2020 ANSSI
762306a36Sopenharmony_ci * Copyright © 2021-2022 Microsoft Corporation
862306a36Sopenharmony_ci */
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include <linux/atomic.h>
1162306a36Sopenharmony_ci#include <linux/bitops.h>
1262306a36Sopenharmony_ci#include <linux/bits.h>
1362306a36Sopenharmony_ci#include <linux/compiler_types.h>
1462306a36Sopenharmony_ci#include <linux/dcache.h>
1562306a36Sopenharmony_ci#include <linux/err.h>
1662306a36Sopenharmony_ci#include <linux/fs.h>
1762306a36Sopenharmony_ci#include <linux/init.h>
1862306a36Sopenharmony_ci#include <linux/kernel.h>
1962306a36Sopenharmony_ci#include <linux/limits.h>
2062306a36Sopenharmony_ci#include <linux/list.h>
2162306a36Sopenharmony_ci#include <linux/lsm_hooks.h>
2262306a36Sopenharmony_ci#include <linux/mount.h>
2362306a36Sopenharmony_ci#include <linux/namei.h>
2462306a36Sopenharmony_ci#include <linux/path.h>
2562306a36Sopenharmony_ci#include <linux/rcupdate.h>
2662306a36Sopenharmony_ci#include <linux/spinlock.h>
2762306a36Sopenharmony_ci#include <linux/stat.h>
2862306a36Sopenharmony_ci#include <linux/types.h>
2962306a36Sopenharmony_ci#include <linux/wait_bit.h>
3062306a36Sopenharmony_ci#include <linux/workqueue.h>
3162306a36Sopenharmony_ci#include <uapi/linux/landlock.h>
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ci#include "common.h"
3462306a36Sopenharmony_ci#include "cred.h"
3562306a36Sopenharmony_ci#include "fs.h"
3662306a36Sopenharmony_ci#include "limits.h"
3762306a36Sopenharmony_ci#include "object.h"
3862306a36Sopenharmony_ci#include "ruleset.h"
3962306a36Sopenharmony_ci#include "setup.h"
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci/* Underlying object management */
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_cistatic void release_inode(struct landlock_object *const object)
4462306a36Sopenharmony_ci	__releases(object->lock)
4562306a36Sopenharmony_ci{
4662306a36Sopenharmony_ci	struct inode *const inode = object->underobj;
4762306a36Sopenharmony_ci	struct super_block *sb;
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci	if (!inode) {
5062306a36Sopenharmony_ci		spin_unlock(&object->lock);
5162306a36Sopenharmony_ci		return;
5262306a36Sopenharmony_ci	}
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ci	/*
5562306a36Sopenharmony_ci	 * Protects against concurrent use by hook_sb_delete() of the reference
5662306a36Sopenharmony_ci	 * to the underlying inode.
5762306a36Sopenharmony_ci	 */
5862306a36Sopenharmony_ci	object->underobj = NULL;
5962306a36Sopenharmony_ci	/*
6062306a36Sopenharmony_ci	 * Makes sure that if the filesystem is concurrently unmounted,
6162306a36Sopenharmony_ci	 * hook_sb_delete() will wait for us to finish iput().
6262306a36Sopenharmony_ci	 */
6362306a36Sopenharmony_ci	sb = inode->i_sb;
6462306a36Sopenharmony_ci	atomic_long_inc(&landlock_superblock(sb)->inode_refs);
6562306a36Sopenharmony_ci	spin_unlock(&object->lock);
6662306a36Sopenharmony_ci	/*
6762306a36Sopenharmony_ci	 * Because object->underobj was not NULL, hook_sb_delete() and
6862306a36Sopenharmony_ci	 * get_inode_object() guarantee that it is safe to reset
6962306a36Sopenharmony_ci	 * landlock_inode(inode)->object while it is not NULL.  It is therefore
7062306a36Sopenharmony_ci	 * not necessary to lock inode->i_lock.
7162306a36Sopenharmony_ci	 */
7262306a36Sopenharmony_ci	rcu_assign_pointer(landlock_inode(inode)->object, NULL);
7362306a36Sopenharmony_ci	/*
7462306a36Sopenharmony_ci	 * Now, new rules can safely be tied to @inode with get_inode_object().
7562306a36Sopenharmony_ci	 */
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci	iput(inode);
7862306a36Sopenharmony_ci	if (atomic_long_dec_and_test(&landlock_superblock(sb)->inode_refs))
7962306a36Sopenharmony_ci		wake_up_var(&landlock_superblock(sb)->inode_refs);
8062306a36Sopenharmony_ci}
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_cistatic const struct landlock_object_underops landlock_fs_underops = {
8362306a36Sopenharmony_ci	.release = release_inode
8462306a36Sopenharmony_ci};
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci/* Ruleset management */
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_cistatic struct landlock_object *get_inode_object(struct inode *const inode)
8962306a36Sopenharmony_ci{
9062306a36Sopenharmony_ci	struct landlock_object *object, *new_object;
9162306a36Sopenharmony_ci	struct landlock_inode_security *inode_sec = landlock_inode(inode);
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_ci	rcu_read_lock();
9462306a36Sopenharmony_ciretry:
9562306a36Sopenharmony_ci	object = rcu_dereference(inode_sec->object);
9662306a36Sopenharmony_ci	if (object) {
9762306a36Sopenharmony_ci		if (likely(refcount_inc_not_zero(&object->usage))) {
9862306a36Sopenharmony_ci			rcu_read_unlock();
9962306a36Sopenharmony_ci			return object;
10062306a36Sopenharmony_ci		}
10162306a36Sopenharmony_ci		/*
10262306a36Sopenharmony_ci		 * We are racing with release_inode(), the object is going
10362306a36Sopenharmony_ci		 * away.  Wait for release_inode(), then retry.
10462306a36Sopenharmony_ci		 */
10562306a36Sopenharmony_ci		spin_lock(&object->lock);
10662306a36Sopenharmony_ci		spin_unlock(&object->lock);
10762306a36Sopenharmony_ci		goto retry;
10862306a36Sopenharmony_ci	}
10962306a36Sopenharmony_ci	rcu_read_unlock();
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci	/*
11262306a36Sopenharmony_ci	 * If there is no object tied to @inode, then create a new one (without
11362306a36Sopenharmony_ci	 * holding any locks).
11462306a36Sopenharmony_ci	 */
11562306a36Sopenharmony_ci	new_object = landlock_create_object(&landlock_fs_underops, inode);
11662306a36Sopenharmony_ci	if (IS_ERR(new_object))
11762306a36Sopenharmony_ci		return new_object;
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci	/*
12062306a36Sopenharmony_ci	 * Protects against concurrent calls to get_inode_object() or
12162306a36Sopenharmony_ci	 * hook_sb_delete().
12262306a36Sopenharmony_ci	 */
12362306a36Sopenharmony_ci	spin_lock(&inode->i_lock);
12462306a36Sopenharmony_ci	if (unlikely(rcu_access_pointer(inode_sec->object))) {
12562306a36Sopenharmony_ci		/* Someone else just created the object, bail out and retry. */
12662306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
12762306a36Sopenharmony_ci		kfree(new_object);
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci		rcu_read_lock();
13062306a36Sopenharmony_ci		goto retry;
13162306a36Sopenharmony_ci	}
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci	/*
13462306a36Sopenharmony_ci	 * @inode will be released by hook_sb_delete() on its superblock
13562306a36Sopenharmony_ci	 * shutdown, or by release_inode() when no more ruleset references the
13662306a36Sopenharmony_ci	 * related object.
13762306a36Sopenharmony_ci	 */
13862306a36Sopenharmony_ci	ihold(inode);
13962306a36Sopenharmony_ci	rcu_assign_pointer(inode_sec->object, new_object);
14062306a36Sopenharmony_ci	spin_unlock(&inode->i_lock);
14162306a36Sopenharmony_ci	return new_object;
14262306a36Sopenharmony_ci}
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci/* All access rights that can be tied to files. */
14562306a36Sopenharmony_ci/* clang-format off */
14662306a36Sopenharmony_ci#define ACCESS_FILE ( \
14762306a36Sopenharmony_ci	LANDLOCK_ACCESS_FS_EXECUTE | \
14862306a36Sopenharmony_ci	LANDLOCK_ACCESS_FS_WRITE_FILE | \
14962306a36Sopenharmony_ci	LANDLOCK_ACCESS_FS_READ_FILE | \
15062306a36Sopenharmony_ci	LANDLOCK_ACCESS_FS_TRUNCATE)
15162306a36Sopenharmony_ci/* clang-format on */
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci/*
15462306a36Sopenharmony_ci * All access rights that are denied by default whether they are handled or not
15562306a36Sopenharmony_ci * by a ruleset/layer.  This must be ORed with all ruleset->fs_access_masks[]
15662306a36Sopenharmony_ci * entries when we need to get the absolute handled access masks.
15762306a36Sopenharmony_ci */
15862306a36Sopenharmony_ci/* clang-format off */
15962306a36Sopenharmony_ci#define ACCESS_INITIALLY_DENIED ( \
16062306a36Sopenharmony_ci	LANDLOCK_ACCESS_FS_REFER)
16162306a36Sopenharmony_ci/* clang-format on */
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ci/*
16462306a36Sopenharmony_ci * @path: Should have been checked by get_path_from_fd().
16562306a36Sopenharmony_ci */
16662306a36Sopenharmony_ciint landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
16762306a36Sopenharmony_ci			    const struct path *const path,
16862306a36Sopenharmony_ci			    access_mask_t access_rights)
16962306a36Sopenharmony_ci{
17062306a36Sopenharmony_ci	int err;
17162306a36Sopenharmony_ci	struct landlock_object *object;
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci	/* Files only get access rights that make sense. */
17462306a36Sopenharmony_ci	if (!d_is_dir(path->dentry) &&
17562306a36Sopenharmony_ci	    (access_rights | ACCESS_FILE) != ACCESS_FILE)
17662306a36Sopenharmony_ci		return -EINVAL;
17762306a36Sopenharmony_ci	if (WARN_ON_ONCE(ruleset->num_layers != 1))
17862306a36Sopenharmony_ci		return -EINVAL;
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_ci	/* Transforms relative access rights to absolute ones. */
18162306a36Sopenharmony_ci	access_rights |=
18262306a36Sopenharmony_ci		LANDLOCK_MASK_ACCESS_FS &
18362306a36Sopenharmony_ci		~(ruleset->fs_access_masks[0] | ACCESS_INITIALLY_DENIED);
18462306a36Sopenharmony_ci	object = get_inode_object(d_backing_inode(path->dentry));
18562306a36Sopenharmony_ci	if (IS_ERR(object))
18662306a36Sopenharmony_ci		return PTR_ERR(object);
18762306a36Sopenharmony_ci	mutex_lock(&ruleset->lock);
18862306a36Sopenharmony_ci	err = landlock_insert_rule(ruleset, object, access_rights);
18962306a36Sopenharmony_ci	mutex_unlock(&ruleset->lock);
19062306a36Sopenharmony_ci	/*
19162306a36Sopenharmony_ci	 * No need to check for an error because landlock_insert_rule()
19262306a36Sopenharmony_ci	 * increments the refcount for the new object if needed.
19362306a36Sopenharmony_ci	 */
19462306a36Sopenharmony_ci	landlock_put_object(object);
19562306a36Sopenharmony_ci	return err;
19662306a36Sopenharmony_ci}
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci/* Access-control management */
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci/*
20162306a36Sopenharmony_ci * The lifetime of the returned rule is tied to @domain.
20262306a36Sopenharmony_ci *
20362306a36Sopenharmony_ci * Returns NULL if no rule is found or if @dentry is negative.
20462306a36Sopenharmony_ci */
20562306a36Sopenharmony_cistatic inline const struct landlock_rule *
20662306a36Sopenharmony_cifind_rule(const struct landlock_ruleset *const domain,
20762306a36Sopenharmony_ci	  const struct dentry *const dentry)
20862306a36Sopenharmony_ci{
20962306a36Sopenharmony_ci	const struct landlock_rule *rule;
21062306a36Sopenharmony_ci	const struct inode *inode;
21162306a36Sopenharmony_ci
21262306a36Sopenharmony_ci	/* Ignores nonexistent leafs. */
21362306a36Sopenharmony_ci	if (d_is_negative(dentry))
21462306a36Sopenharmony_ci		return NULL;
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci	inode = d_backing_inode(dentry);
21762306a36Sopenharmony_ci	rcu_read_lock();
21862306a36Sopenharmony_ci	rule = landlock_find_rule(
21962306a36Sopenharmony_ci		domain, rcu_dereference(landlock_inode(inode)->object));
22062306a36Sopenharmony_ci	rcu_read_unlock();
22162306a36Sopenharmony_ci	return rule;
22262306a36Sopenharmony_ci}
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_ci/*
22562306a36Sopenharmony_ci * @layer_masks is read and may be updated according to the access request and
22662306a36Sopenharmony_ci * the matching rule.
22762306a36Sopenharmony_ci *
22862306a36Sopenharmony_ci * Returns true if the request is allowed (i.e. relevant layer masks for the
22962306a36Sopenharmony_ci * request are empty).
23062306a36Sopenharmony_ci */
23162306a36Sopenharmony_cistatic inline bool
23262306a36Sopenharmony_ciunmask_layers(const struct landlock_rule *const rule,
23362306a36Sopenharmony_ci	      const access_mask_t access_request,
23462306a36Sopenharmony_ci	      layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS])
23562306a36Sopenharmony_ci{
23662306a36Sopenharmony_ci	size_t layer_level;
23762306a36Sopenharmony_ci
23862306a36Sopenharmony_ci	if (!access_request || !layer_masks)
23962306a36Sopenharmony_ci		return true;
24062306a36Sopenharmony_ci	if (!rule)
24162306a36Sopenharmony_ci		return false;
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_ci	/*
24462306a36Sopenharmony_ci	 * An access is granted if, for each policy layer, at least one rule
24562306a36Sopenharmony_ci	 * encountered on the pathwalk grants the requested access,
24662306a36Sopenharmony_ci	 * regardless of its position in the layer stack.  We must then check
24762306a36Sopenharmony_ci	 * the remaining layers for each inode, from the first added layer to
24862306a36Sopenharmony_ci	 * the last one.  When there is multiple requested accesses, for each
24962306a36Sopenharmony_ci	 * policy layer, the full set of requested accesses may not be granted
25062306a36Sopenharmony_ci	 * by only one rule, but by the union (binary OR) of multiple rules.
25162306a36Sopenharmony_ci	 * E.g. /a/b <execute> + /a <read> => /a/b <execute + read>
25262306a36Sopenharmony_ci	 */
25362306a36Sopenharmony_ci	for (layer_level = 0; layer_level < rule->num_layers; layer_level++) {
25462306a36Sopenharmony_ci		const struct landlock_layer *const layer =
25562306a36Sopenharmony_ci			&rule->layers[layer_level];
25662306a36Sopenharmony_ci		const layer_mask_t layer_bit = BIT_ULL(layer->level - 1);
25762306a36Sopenharmony_ci		const unsigned long access_req = access_request;
25862306a36Sopenharmony_ci		unsigned long access_bit;
25962306a36Sopenharmony_ci		bool is_empty;
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_ci		/*
26262306a36Sopenharmony_ci		 * Records in @layer_masks which layer grants access to each
26362306a36Sopenharmony_ci		 * requested access.
26462306a36Sopenharmony_ci		 */
26562306a36Sopenharmony_ci		is_empty = true;
26662306a36Sopenharmony_ci		for_each_set_bit(access_bit, &access_req,
26762306a36Sopenharmony_ci				 ARRAY_SIZE(*layer_masks)) {
26862306a36Sopenharmony_ci			if (layer->access & BIT_ULL(access_bit))
26962306a36Sopenharmony_ci				(*layer_masks)[access_bit] &= ~layer_bit;
27062306a36Sopenharmony_ci			is_empty = is_empty && !(*layer_masks)[access_bit];
27162306a36Sopenharmony_ci		}
27262306a36Sopenharmony_ci		if (is_empty)
27362306a36Sopenharmony_ci			return true;
27462306a36Sopenharmony_ci	}
27562306a36Sopenharmony_ci	return false;
27662306a36Sopenharmony_ci}
27762306a36Sopenharmony_ci
27862306a36Sopenharmony_ci/*
27962306a36Sopenharmony_ci * Allows access to pseudo filesystems that will never be mountable (e.g.
28062306a36Sopenharmony_ci * sockfs, pipefs), but can still be reachable through
28162306a36Sopenharmony_ci * /proc/<pid>/fd/<file-descriptor>
28262306a36Sopenharmony_ci */
28362306a36Sopenharmony_cistatic inline bool is_nouser_or_private(const struct dentry *dentry)
28462306a36Sopenharmony_ci{
28562306a36Sopenharmony_ci	return (dentry->d_sb->s_flags & SB_NOUSER) ||
28662306a36Sopenharmony_ci	       (d_is_positive(dentry) &&
28762306a36Sopenharmony_ci		unlikely(IS_PRIVATE(d_backing_inode(dentry))));
28862306a36Sopenharmony_ci}
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_cistatic inline access_mask_t
29162306a36Sopenharmony_ciget_handled_accesses(const struct landlock_ruleset *const domain)
29262306a36Sopenharmony_ci{
29362306a36Sopenharmony_ci	access_mask_t access_dom = ACCESS_INITIALLY_DENIED;
29462306a36Sopenharmony_ci	size_t layer_level;
29562306a36Sopenharmony_ci
29662306a36Sopenharmony_ci	for (layer_level = 0; layer_level < domain->num_layers; layer_level++)
29762306a36Sopenharmony_ci		access_dom |= domain->fs_access_masks[layer_level];
29862306a36Sopenharmony_ci	return access_dom & LANDLOCK_MASK_ACCESS_FS;
29962306a36Sopenharmony_ci}
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ci/**
30262306a36Sopenharmony_ci * init_layer_masks - Initialize layer masks from an access request
30362306a36Sopenharmony_ci *
30462306a36Sopenharmony_ci * Populates @layer_masks such that for each access right in @access_request,
30562306a36Sopenharmony_ci * the bits for all the layers are set where this access right is handled.
30662306a36Sopenharmony_ci *
30762306a36Sopenharmony_ci * @domain: The domain that defines the current restrictions.
30862306a36Sopenharmony_ci * @access_request: The requested access rights to check.
30962306a36Sopenharmony_ci * @layer_masks: The layer masks to populate.
31062306a36Sopenharmony_ci *
31162306a36Sopenharmony_ci * Returns: An access mask where each access right bit is set which is handled
31262306a36Sopenharmony_ci * in any of the active layers in @domain.
31362306a36Sopenharmony_ci */
31462306a36Sopenharmony_cistatic inline access_mask_t
31562306a36Sopenharmony_ciinit_layer_masks(const struct landlock_ruleset *const domain,
31662306a36Sopenharmony_ci		 const access_mask_t access_request,
31762306a36Sopenharmony_ci		 layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS])
31862306a36Sopenharmony_ci{
31962306a36Sopenharmony_ci	access_mask_t handled_accesses = 0;
32062306a36Sopenharmony_ci	size_t layer_level;
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_ci	memset(layer_masks, 0, sizeof(*layer_masks));
32362306a36Sopenharmony_ci	/* An empty access request can happen because of O_WRONLY | O_RDWR. */
32462306a36Sopenharmony_ci	if (!access_request)
32562306a36Sopenharmony_ci		return 0;
32662306a36Sopenharmony_ci
32762306a36Sopenharmony_ci	/* Saves all handled accesses per layer. */
32862306a36Sopenharmony_ci	for (layer_level = 0; layer_level < domain->num_layers; layer_level++) {
32962306a36Sopenharmony_ci		const unsigned long access_req = access_request;
33062306a36Sopenharmony_ci		unsigned long access_bit;
33162306a36Sopenharmony_ci
33262306a36Sopenharmony_ci		for_each_set_bit(access_bit, &access_req,
33362306a36Sopenharmony_ci				 ARRAY_SIZE(*layer_masks)) {
33462306a36Sopenharmony_ci			/*
33562306a36Sopenharmony_ci			 * Artificially handles all initially denied by default
33662306a36Sopenharmony_ci			 * access rights.
33762306a36Sopenharmony_ci			 */
33862306a36Sopenharmony_ci			if (BIT_ULL(access_bit) &
33962306a36Sopenharmony_ci			    (domain->fs_access_masks[layer_level] |
34062306a36Sopenharmony_ci			     ACCESS_INITIALLY_DENIED)) {
34162306a36Sopenharmony_ci				(*layer_masks)[access_bit] |=
34262306a36Sopenharmony_ci					BIT_ULL(layer_level);
34362306a36Sopenharmony_ci				handled_accesses |= BIT_ULL(access_bit);
34462306a36Sopenharmony_ci			}
34562306a36Sopenharmony_ci		}
34662306a36Sopenharmony_ci	}
34762306a36Sopenharmony_ci	return handled_accesses;
34862306a36Sopenharmony_ci}
34962306a36Sopenharmony_ci
35062306a36Sopenharmony_ci/*
35162306a36Sopenharmony_ci * Check that a destination file hierarchy has more restrictions than a source
35262306a36Sopenharmony_ci * file hierarchy.  This is only used for link and rename actions.
35362306a36Sopenharmony_ci *
35462306a36Sopenharmony_ci * @layer_masks_child2: Optional child masks.
35562306a36Sopenharmony_ci */
35662306a36Sopenharmony_cistatic inline bool no_more_access(
35762306a36Sopenharmony_ci	const layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS],
35862306a36Sopenharmony_ci	const layer_mask_t (*const layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS],
35962306a36Sopenharmony_ci	const bool child1_is_directory,
36062306a36Sopenharmony_ci	const layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS],
36162306a36Sopenharmony_ci	const layer_mask_t (*const layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS],
36262306a36Sopenharmony_ci	const bool child2_is_directory)
36362306a36Sopenharmony_ci{
36462306a36Sopenharmony_ci	unsigned long access_bit;
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_ci	for (access_bit = 0; access_bit < ARRAY_SIZE(*layer_masks_parent2);
36762306a36Sopenharmony_ci	     access_bit++) {
36862306a36Sopenharmony_ci		/* Ignores accesses that only make sense for directories. */
36962306a36Sopenharmony_ci		const bool is_file_access =
37062306a36Sopenharmony_ci			!!(BIT_ULL(access_bit) & ACCESS_FILE);
37162306a36Sopenharmony_ci
37262306a36Sopenharmony_ci		if (child1_is_directory || is_file_access) {
37362306a36Sopenharmony_ci			/*
37462306a36Sopenharmony_ci			 * Checks if the destination restrictions are a
37562306a36Sopenharmony_ci			 * superset of the source ones (i.e. inherited access
37662306a36Sopenharmony_ci			 * rights without child exceptions):
37762306a36Sopenharmony_ci			 * restrictions(parent2) >= restrictions(child1)
37862306a36Sopenharmony_ci			 */
37962306a36Sopenharmony_ci			if ((((*layer_masks_parent1)[access_bit] &
38062306a36Sopenharmony_ci			      (*layer_masks_child1)[access_bit]) |
38162306a36Sopenharmony_ci			     (*layer_masks_parent2)[access_bit]) !=
38262306a36Sopenharmony_ci			    (*layer_masks_parent2)[access_bit])
38362306a36Sopenharmony_ci				return false;
38462306a36Sopenharmony_ci		}
38562306a36Sopenharmony_ci
38662306a36Sopenharmony_ci		if (!layer_masks_child2)
38762306a36Sopenharmony_ci			continue;
38862306a36Sopenharmony_ci		if (child2_is_directory || is_file_access) {
38962306a36Sopenharmony_ci			/*
39062306a36Sopenharmony_ci			 * Checks inverted restrictions for RENAME_EXCHANGE:
39162306a36Sopenharmony_ci			 * restrictions(parent1) >= restrictions(child2)
39262306a36Sopenharmony_ci			 */
39362306a36Sopenharmony_ci			if ((((*layer_masks_parent2)[access_bit] &
39462306a36Sopenharmony_ci			      (*layer_masks_child2)[access_bit]) |
39562306a36Sopenharmony_ci			     (*layer_masks_parent1)[access_bit]) !=
39662306a36Sopenharmony_ci			    (*layer_masks_parent1)[access_bit])
39762306a36Sopenharmony_ci				return false;
39862306a36Sopenharmony_ci		}
39962306a36Sopenharmony_ci	}
40062306a36Sopenharmony_ci	return true;
40162306a36Sopenharmony_ci}
40262306a36Sopenharmony_ci
40362306a36Sopenharmony_ci/*
40462306a36Sopenharmony_ci * Removes @layer_masks accesses that are not requested.
40562306a36Sopenharmony_ci *
40662306a36Sopenharmony_ci * Returns true if the request is allowed, false otherwise.
40762306a36Sopenharmony_ci */
40862306a36Sopenharmony_cistatic inline bool
40962306a36Sopenharmony_ciscope_to_request(const access_mask_t access_request,
41062306a36Sopenharmony_ci		 layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS])
41162306a36Sopenharmony_ci{
41262306a36Sopenharmony_ci	const unsigned long access_req = access_request;
41362306a36Sopenharmony_ci	unsigned long access_bit;
41462306a36Sopenharmony_ci
41562306a36Sopenharmony_ci	if (WARN_ON_ONCE(!layer_masks))
41662306a36Sopenharmony_ci		return true;
41762306a36Sopenharmony_ci
41862306a36Sopenharmony_ci	for_each_clear_bit(access_bit, &access_req, ARRAY_SIZE(*layer_masks))
41962306a36Sopenharmony_ci		(*layer_masks)[access_bit] = 0;
42062306a36Sopenharmony_ci	return !memchr_inv(layer_masks, 0, sizeof(*layer_masks));
42162306a36Sopenharmony_ci}
42262306a36Sopenharmony_ci
42362306a36Sopenharmony_ci/*
42462306a36Sopenharmony_ci * Returns true if there is at least one access right different than
42562306a36Sopenharmony_ci * LANDLOCK_ACCESS_FS_REFER.
42662306a36Sopenharmony_ci */
42762306a36Sopenharmony_cistatic inline bool
42862306a36Sopenharmony_ciis_eacces(const layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS],
42962306a36Sopenharmony_ci	  const access_mask_t access_request)
43062306a36Sopenharmony_ci{
43162306a36Sopenharmony_ci	unsigned long access_bit;
43262306a36Sopenharmony_ci	/* LANDLOCK_ACCESS_FS_REFER alone must return -EXDEV. */
43362306a36Sopenharmony_ci	const unsigned long access_check = access_request &
43462306a36Sopenharmony_ci					   ~LANDLOCK_ACCESS_FS_REFER;
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_ci	if (!layer_masks)
43762306a36Sopenharmony_ci		return false;
43862306a36Sopenharmony_ci
43962306a36Sopenharmony_ci	for_each_set_bit(access_bit, &access_check, ARRAY_SIZE(*layer_masks)) {
44062306a36Sopenharmony_ci		if ((*layer_masks)[access_bit])
44162306a36Sopenharmony_ci			return true;
44262306a36Sopenharmony_ci	}
44362306a36Sopenharmony_ci	return false;
44462306a36Sopenharmony_ci}
44562306a36Sopenharmony_ci
44662306a36Sopenharmony_ci/**
44762306a36Sopenharmony_ci * is_access_to_paths_allowed - Check accesses for requests with a common path
44862306a36Sopenharmony_ci *
44962306a36Sopenharmony_ci * @domain: Domain to check against.
45062306a36Sopenharmony_ci * @path: File hierarchy to walk through.
45162306a36Sopenharmony_ci * @access_request_parent1: Accesses to check, once @layer_masks_parent1 is
45262306a36Sopenharmony_ci *     equal to @layer_masks_parent2 (if any).  This is tied to the unique
45362306a36Sopenharmony_ci *     requested path for most actions, or the source in case of a refer action
45462306a36Sopenharmony_ci *     (i.e. rename or link), or the source and destination in case of
45562306a36Sopenharmony_ci *     RENAME_EXCHANGE.
45662306a36Sopenharmony_ci * @layer_masks_parent1: Pointer to a matrix of layer masks per access
45762306a36Sopenharmony_ci *     masks, identifying the layers that forbid a specific access.  Bits from
45862306a36Sopenharmony_ci *     this matrix can be unset according to the @path walk.  An empty matrix
45962306a36Sopenharmony_ci *     means that @domain allows all possible Landlock accesses (i.e. not only
46062306a36Sopenharmony_ci *     those identified by @access_request_parent1).  This matrix can
46162306a36Sopenharmony_ci *     initially refer to domain layer masks and, when the accesses for the
46262306a36Sopenharmony_ci *     destination and source are the same, to requested layer masks.
46362306a36Sopenharmony_ci * @dentry_child1: Dentry to the initial child of the parent1 path.  This
46462306a36Sopenharmony_ci *     pointer must be NULL for non-refer actions (i.e. not link nor rename).
46562306a36Sopenharmony_ci * @access_request_parent2: Similar to @access_request_parent1 but for a
46662306a36Sopenharmony_ci *     request involving a source and a destination.  This refers to the
46762306a36Sopenharmony_ci *     destination, except in case of RENAME_EXCHANGE where it also refers to
46862306a36Sopenharmony_ci *     the source.  Must be set to 0 when using a simple path request.
46962306a36Sopenharmony_ci * @layer_masks_parent2: Similar to @layer_masks_parent1 but for a refer
47062306a36Sopenharmony_ci *     action.  This must be NULL otherwise.
47162306a36Sopenharmony_ci * @dentry_child2: Dentry to the initial child of the parent2 path.  This
47262306a36Sopenharmony_ci *     pointer is only set for RENAME_EXCHANGE actions and must be NULL
47362306a36Sopenharmony_ci *     otherwise.
47462306a36Sopenharmony_ci *
47562306a36Sopenharmony_ci * This helper first checks that the destination has a superset of restrictions
47662306a36Sopenharmony_ci * compared to the source (if any) for a common path.  Because of
47762306a36Sopenharmony_ci * RENAME_EXCHANGE actions, source and destinations may be swapped.  It then
47862306a36Sopenharmony_ci * checks that the collected accesses and the remaining ones are enough to
47962306a36Sopenharmony_ci * allow the request.
48062306a36Sopenharmony_ci *
48162306a36Sopenharmony_ci * Returns:
48262306a36Sopenharmony_ci * - true if the access request is granted;
48362306a36Sopenharmony_ci * - false otherwise.
48462306a36Sopenharmony_ci */
48562306a36Sopenharmony_cistatic bool is_access_to_paths_allowed(
48662306a36Sopenharmony_ci	const struct landlock_ruleset *const domain,
48762306a36Sopenharmony_ci	const struct path *const path,
48862306a36Sopenharmony_ci	const access_mask_t access_request_parent1,
48962306a36Sopenharmony_ci	layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS],
49062306a36Sopenharmony_ci	const struct dentry *const dentry_child1,
49162306a36Sopenharmony_ci	const access_mask_t access_request_parent2,
49262306a36Sopenharmony_ci	layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS],
49362306a36Sopenharmony_ci	const struct dentry *const dentry_child2)
49462306a36Sopenharmony_ci{
49562306a36Sopenharmony_ci	bool allowed_parent1 = false, allowed_parent2 = false, is_dom_check,
49662306a36Sopenharmony_ci	     child1_is_directory = true, child2_is_directory = true;
49762306a36Sopenharmony_ci	struct path walker_path;
49862306a36Sopenharmony_ci	access_mask_t access_masked_parent1, access_masked_parent2;
49962306a36Sopenharmony_ci	layer_mask_t _layer_masks_child1[LANDLOCK_NUM_ACCESS_FS],
50062306a36Sopenharmony_ci		_layer_masks_child2[LANDLOCK_NUM_ACCESS_FS];
50162306a36Sopenharmony_ci	layer_mask_t(*layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS] = NULL,
50262306a36Sopenharmony_ci	(*layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS] = NULL;
50362306a36Sopenharmony_ci
50462306a36Sopenharmony_ci	if (!access_request_parent1 && !access_request_parent2)
50562306a36Sopenharmony_ci		return true;
50662306a36Sopenharmony_ci	if (WARN_ON_ONCE(!domain || !path))
50762306a36Sopenharmony_ci		return true;
50862306a36Sopenharmony_ci	if (is_nouser_or_private(path->dentry))
50962306a36Sopenharmony_ci		return true;
51062306a36Sopenharmony_ci	if (WARN_ON_ONCE(domain->num_layers < 1 || !layer_masks_parent1))
51162306a36Sopenharmony_ci		return false;
51262306a36Sopenharmony_ci
51362306a36Sopenharmony_ci	if (unlikely(layer_masks_parent2)) {
51462306a36Sopenharmony_ci		if (WARN_ON_ONCE(!dentry_child1))
51562306a36Sopenharmony_ci			return false;
51662306a36Sopenharmony_ci		/*
51762306a36Sopenharmony_ci		 * For a double request, first check for potential privilege
51862306a36Sopenharmony_ci		 * escalation by looking at domain handled accesses (which are
51962306a36Sopenharmony_ci		 * a superset of the meaningful requested accesses).
52062306a36Sopenharmony_ci		 */
52162306a36Sopenharmony_ci		access_masked_parent1 = access_masked_parent2 =
52262306a36Sopenharmony_ci			get_handled_accesses(domain);
52362306a36Sopenharmony_ci		is_dom_check = true;
52462306a36Sopenharmony_ci	} else {
52562306a36Sopenharmony_ci		if (WARN_ON_ONCE(dentry_child1 || dentry_child2))
52662306a36Sopenharmony_ci			return false;
52762306a36Sopenharmony_ci		/* For a simple request, only check for requested accesses. */
52862306a36Sopenharmony_ci		access_masked_parent1 = access_request_parent1;
52962306a36Sopenharmony_ci		access_masked_parent2 = access_request_parent2;
53062306a36Sopenharmony_ci		is_dom_check = false;
53162306a36Sopenharmony_ci	}
53262306a36Sopenharmony_ci
53362306a36Sopenharmony_ci	if (unlikely(dentry_child1)) {
53462306a36Sopenharmony_ci		unmask_layers(find_rule(domain, dentry_child1),
53562306a36Sopenharmony_ci			      init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS,
53662306a36Sopenharmony_ci					       &_layer_masks_child1),
53762306a36Sopenharmony_ci			      &_layer_masks_child1);
53862306a36Sopenharmony_ci		layer_masks_child1 = &_layer_masks_child1;
53962306a36Sopenharmony_ci		child1_is_directory = d_is_dir(dentry_child1);
54062306a36Sopenharmony_ci	}
54162306a36Sopenharmony_ci	if (unlikely(dentry_child2)) {
54262306a36Sopenharmony_ci		unmask_layers(find_rule(domain, dentry_child2),
54362306a36Sopenharmony_ci			      init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS,
54462306a36Sopenharmony_ci					       &_layer_masks_child2),
54562306a36Sopenharmony_ci			      &_layer_masks_child2);
54662306a36Sopenharmony_ci		layer_masks_child2 = &_layer_masks_child2;
54762306a36Sopenharmony_ci		child2_is_directory = d_is_dir(dentry_child2);
54862306a36Sopenharmony_ci	}
54962306a36Sopenharmony_ci
55062306a36Sopenharmony_ci	walker_path = *path;
55162306a36Sopenharmony_ci	path_get(&walker_path);
55262306a36Sopenharmony_ci	/*
55362306a36Sopenharmony_ci	 * We need to walk through all the hierarchy to not miss any relevant
55462306a36Sopenharmony_ci	 * restriction.
55562306a36Sopenharmony_ci	 */
55662306a36Sopenharmony_ci	while (true) {
55762306a36Sopenharmony_ci		struct dentry *parent_dentry;
55862306a36Sopenharmony_ci		const struct landlock_rule *rule;
55962306a36Sopenharmony_ci
56062306a36Sopenharmony_ci		/*
56162306a36Sopenharmony_ci		 * If at least all accesses allowed on the destination are
56262306a36Sopenharmony_ci		 * already allowed on the source, respectively if there is at
56362306a36Sopenharmony_ci		 * least as much as restrictions on the destination than on the
56462306a36Sopenharmony_ci		 * source, then we can safely refer files from the source to
56562306a36Sopenharmony_ci		 * the destination without risking a privilege escalation.
56662306a36Sopenharmony_ci		 * This also applies in the case of RENAME_EXCHANGE, which
56762306a36Sopenharmony_ci		 * implies checks on both direction.  This is crucial for
56862306a36Sopenharmony_ci		 * standalone multilayered security policies.  Furthermore,
56962306a36Sopenharmony_ci		 * this helps avoid policy writers to shoot themselves in the
57062306a36Sopenharmony_ci		 * foot.
57162306a36Sopenharmony_ci		 */
57262306a36Sopenharmony_ci		if (unlikely(is_dom_check &&
57362306a36Sopenharmony_ci			     no_more_access(
57462306a36Sopenharmony_ci				     layer_masks_parent1, layer_masks_child1,
57562306a36Sopenharmony_ci				     child1_is_directory, layer_masks_parent2,
57662306a36Sopenharmony_ci				     layer_masks_child2,
57762306a36Sopenharmony_ci				     child2_is_directory))) {
57862306a36Sopenharmony_ci			allowed_parent1 = scope_to_request(
57962306a36Sopenharmony_ci				access_request_parent1, layer_masks_parent1);
58062306a36Sopenharmony_ci			allowed_parent2 = scope_to_request(
58162306a36Sopenharmony_ci				access_request_parent2, layer_masks_parent2);
58262306a36Sopenharmony_ci
58362306a36Sopenharmony_ci			/* Stops when all accesses are granted. */
58462306a36Sopenharmony_ci			if (allowed_parent1 && allowed_parent2)
58562306a36Sopenharmony_ci				break;
58662306a36Sopenharmony_ci
58762306a36Sopenharmony_ci			/*
58862306a36Sopenharmony_ci			 * Now, downgrades the remaining checks from domain
58962306a36Sopenharmony_ci			 * handled accesses to requested accesses.
59062306a36Sopenharmony_ci			 */
59162306a36Sopenharmony_ci			is_dom_check = false;
59262306a36Sopenharmony_ci			access_masked_parent1 = access_request_parent1;
59362306a36Sopenharmony_ci			access_masked_parent2 = access_request_parent2;
59462306a36Sopenharmony_ci		}
59562306a36Sopenharmony_ci
59662306a36Sopenharmony_ci		rule = find_rule(domain, walker_path.dentry);
59762306a36Sopenharmony_ci		allowed_parent1 = unmask_layers(rule, access_masked_parent1,
59862306a36Sopenharmony_ci						layer_masks_parent1);
59962306a36Sopenharmony_ci		allowed_parent2 = unmask_layers(rule, access_masked_parent2,
60062306a36Sopenharmony_ci						layer_masks_parent2);
60162306a36Sopenharmony_ci
60262306a36Sopenharmony_ci		/* Stops when a rule from each layer grants access. */
60362306a36Sopenharmony_ci		if (allowed_parent1 && allowed_parent2)
60462306a36Sopenharmony_ci			break;
60562306a36Sopenharmony_ci
60662306a36Sopenharmony_cijump_up:
60762306a36Sopenharmony_ci		if (walker_path.dentry == walker_path.mnt->mnt_root) {
60862306a36Sopenharmony_ci			if (follow_up(&walker_path)) {
60962306a36Sopenharmony_ci				/* Ignores hidden mount points. */
61062306a36Sopenharmony_ci				goto jump_up;
61162306a36Sopenharmony_ci			} else {
61262306a36Sopenharmony_ci				/*
61362306a36Sopenharmony_ci				 * Stops at the real root.  Denies access
61462306a36Sopenharmony_ci				 * because not all layers have granted access.
61562306a36Sopenharmony_ci				 */
61662306a36Sopenharmony_ci				break;
61762306a36Sopenharmony_ci			}
61862306a36Sopenharmony_ci		}
61962306a36Sopenharmony_ci		if (unlikely(IS_ROOT(walker_path.dentry))) {
62062306a36Sopenharmony_ci			/*
62162306a36Sopenharmony_ci			 * Stops at disconnected root directories.  Only allows
62262306a36Sopenharmony_ci			 * access to internal filesystems (e.g. nsfs, which is
62362306a36Sopenharmony_ci			 * reachable through /proc/<pid>/ns/<namespace>).
62462306a36Sopenharmony_ci			 */
62562306a36Sopenharmony_ci			allowed_parent1 = allowed_parent2 =
62662306a36Sopenharmony_ci				!!(walker_path.mnt->mnt_flags & MNT_INTERNAL);
62762306a36Sopenharmony_ci			break;
62862306a36Sopenharmony_ci		}
62962306a36Sopenharmony_ci		parent_dentry = dget_parent(walker_path.dentry);
63062306a36Sopenharmony_ci		dput(walker_path.dentry);
63162306a36Sopenharmony_ci		walker_path.dentry = parent_dentry;
63262306a36Sopenharmony_ci	}
63362306a36Sopenharmony_ci	path_put(&walker_path);
63462306a36Sopenharmony_ci
63562306a36Sopenharmony_ci	return allowed_parent1 && allowed_parent2;
63662306a36Sopenharmony_ci}
63762306a36Sopenharmony_ci
63862306a36Sopenharmony_cistatic inline int check_access_path(const struct landlock_ruleset *const domain,
63962306a36Sopenharmony_ci				    const struct path *const path,
64062306a36Sopenharmony_ci				    access_mask_t access_request)
64162306a36Sopenharmony_ci{
64262306a36Sopenharmony_ci	layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {};
64362306a36Sopenharmony_ci
64462306a36Sopenharmony_ci	access_request = init_layer_masks(domain, access_request, &layer_masks);
64562306a36Sopenharmony_ci	if (is_access_to_paths_allowed(domain, path, access_request,
64662306a36Sopenharmony_ci				       &layer_masks, NULL, 0, NULL, NULL))
64762306a36Sopenharmony_ci		return 0;
64862306a36Sopenharmony_ci	return -EACCES;
64962306a36Sopenharmony_ci}
65062306a36Sopenharmony_ci
65162306a36Sopenharmony_cistatic inline int current_check_access_path(const struct path *const path,
65262306a36Sopenharmony_ci					    const access_mask_t access_request)
65362306a36Sopenharmony_ci{
65462306a36Sopenharmony_ci	const struct landlock_ruleset *const dom =
65562306a36Sopenharmony_ci		landlock_get_current_domain();
65662306a36Sopenharmony_ci
65762306a36Sopenharmony_ci	if (!dom)
65862306a36Sopenharmony_ci		return 0;
65962306a36Sopenharmony_ci	return check_access_path(dom, path, access_request);
66062306a36Sopenharmony_ci}
66162306a36Sopenharmony_ci
66262306a36Sopenharmony_cistatic inline access_mask_t get_mode_access(const umode_t mode)
66362306a36Sopenharmony_ci{
66462306a36Sopenharmony_ci	switch (mode & S_IFMT) {
66562306a36Sopenharmony_ci	case S_IFLNK:
66662306a36Sopenharmony_ci		return LANDLOCK_ACCESS_FS_MAKE_SYM;
66762306a36Sopenharmony_ci	case 0:
66862306a36Sopenharmony_ci		/* A zero mode translates to S_IFREG. */
66962306a36Sopenharmony_ci	case S_IFREG:
67062306a36Sopenharmony_ci		return LANDLOCK_ACCESS_FS_MAKE_REG;
67162306a36Sopenharmony_ci	case S_IFDIR:
67262306a36Sopenharmony_ci		return LANDLOCK_ACCESS_FS_MAKE_DIR;
67362306a36Sopenharmony_ci	case S_IFCHR:
67462306a36Sopenharmony_ci		return LANDLOCK_ACCESS_FS_MAKE_CHAR;
67562306a36Sopenharmony_ci	case S_IFBLK:
67662306a36Sopenharmony_ci		return LANDLOCK_ACCESS_FS_MAKE_BLOCK;
67762306a36Sopenharmony_ci	case S_IFIFO:
67862306a36Sopenharmony_ci		return LANDLOCK_ACCESS_FS_MAKE_FIFO;
67962306a36Sopenharmony_ci	case S_IFSOCK:
68062306a36Sopenharmony_ci		return LANDLOCK_ACCESS_FS_MAKE_SOCK;
68162306a36Sopenharmony_ci	default:
68262306a36Sopenharmony_ci		WARN_ON_ONCE(1);
68362306a36Sopenharmony_ci		return 0;
68462306a36Sopenharmony_ci	}
68562306a36Sopenharmony_ci}
68662306a36Sopenharmony_ci
68762306a36Sopenharmony_cistatic inline access_mask_t maybe_remove(const struct dentry *const dentry)
68862306a36Sopenharmony_ci{
68962306a36Sopenharmony_ci	if (d_is_negative(dentry))
69062306a36Sopenharmony_ci		return 0;
69162306a36Sopenharmony_ci	return d_is_dir(dentry) ? LANDLOCK_ACCESS_FS_REMOVE_DIR :
69262306a36Sopenharmony_ci				  LANDLOCK_ACCESS_FS_REMOVE_FILE;
69362306a36Sopenharmony_ci}
69462306a36Sopenharmony_ci
69562306a36Sopenharmony_ci/**
69662306a36Sopenharmony_ci * collect_domain_accesses - Walk through a file path and collect accesses
69762306a36Sopenharmony_ci *
69862306a36Sopenharmony_ci * @domain: Domain to check against.
69962306a36Sopenharmony_ci * @mnt_root: Last directory to check.
70062306a36Sopenharmony_ci * @dir: Directory to start the walk from.
70162306a36Sopenharmony_ci * @layer_masks_dom: Where to store the collected accesses.
70262306a36Sopenharmony_ci *
70362306a36Sopenharmony_ci * This helper is useful to begin a path walk from the @dir directory to a
70462306a36Sopenharmony_ci * @mnt_root directory used as a mount point.  This mount point is the common
70562306a36Sopenharmony_ci * ancestor between the source and the destination of a renamed and linked
70662306a36Sopenharmony_ci * file.  While walking from @dir to @mnt_root, we record all the domain's
70762306a36Sopenharmony_ci * allowed accesses in @layer_masks_dom.
70862306a36Sopenharmony_ci *
70962306a36Sopenharmony_ci * This is similar to is_access_to_paths_allowed() but much simpler because it
71062306a36Sopenharmony_ci * only handles walking on the same mount point and only checks one set of
71162306a36Sopenharmony_ci * accesses.
71262306a36Sopenharmony_ci *
71362306a36Sopenharmony_ci * Returns:
71462306a36Sopenharmony_ci * - true if all the domain access rights are allowed for @dir;
71562306a36Sopenharmony_ci * - false if the walk reached @mnt_root.
71662306a36Sopenharmony_ci */
71762306a36Sopenharmony_cistatic bool collect_domain_accesses(
71862306a36Sopenharmony_ci	const struct landlock_ruleset *const domain,
71962306a36Sopenharmony_ci	const struct dentry *const mnt_root, struct dentry *dir,
72062306a36Sopenharmony_ci	layer_mask_t (*const layer_masks_dom)[LANDLOCK_NUM_ACCESS_FS])
72162306a36Sopenharmony_ci{
72262306a36Sopenharmony_ci	unsigned long access_dom;
72362306a36Sopenharmony_ci	bool ret = false;
72462306a36Sopenharmony_ci
72562306a36Sopenharmony_ci	if (WARN_ON_ONCE(!domain || !mnt_root || !dir || !layer_masks_dom))
72662306a36Sopenharmony_ci		return true;
72762306a36Sopenharmony_ci	if (is_nouser_or_private(dir))
72862306a36Sopenharmony_ci		return true;
72962306a36Sopenharmony_ci
73062306a36Sopenharmony_ci	access_dom = init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS,
73162306a36Sopenharmony_ci				      layer_masks_dom);
73262306a36Sopenharmony_ci
73362306a36Sopenharmony_ci	dget(dir);
73462306a36Sopenharmony_ci	while (true) {
73562306a36Sopenharmony_ci		struct dentry *parent_dentry;
73662306a36Sopenharmony_ci
73762306a36Sopenharmony_ci		/* Gets all layers allowing all domain accesses. */
73862306a36Sopenharmony_ci		if (unmask_layers(find_rule(domain, dir), access_dom,
73962306a36Sopenharmony_ci				  layer_masks_dom)) {
74062306a36Sopenharmony_ci			/*
74162306a36Sopenharmony_ci			 * Stops when all handled accesses are allowed by at
74262306a36Sopenharmony_ci			 * least one rule in each layer.
74362306a36Sopenharmony_ci			 */
74462306a36Sopenharmony_ci			ret = true;
74562306a36Sopenharmony_ci			break;
74662306a36Sopenharmony_ci		}
74762306a36Sopenharmony_ci
74862306a36Sopenharmony_ci		/* We should not reach a root other than @mnt_root. */
74962306a36Sopenharmony_ci		if (dir == mnt_root || WARN_ON_ONCE(IS_ROOT(dir)))
75062306a36Sopenharmony_ci			break;
75162306a36Sopenharmony_ci
75262306a36Sopenharmony_ci		parent_dentry = dget_parent(dir);
75362306a36Sopenharmony_ci		dput(dir);
75462306a36Sopenharmony_ci		dir = parent_dentry;
75562306a36Sopenharmony_ci	}
75662306a36Sopenharmony_ci	dput(dir);
75762306a36Sopenharmony_ci	return ret;
75862306a36Sopenharmony_ci}
75962306a36Sopenharmony_ci
76062306a36Sopenharmony_ci/**
76162306a36Sopenharmony_ci * current_check_refer_path - Check if a rename or link action is allowed
76262306a36Sopenharmony_ci *
76362306a36Sopenharmony_ci * @old_dentry: File or directory requested to be moved or linked.
76462306a36Sopenharmony_ci * @new_dir: Destination parent directory.
76562306a36Sopenharmony_ci * @new_dentry: Destination file or directory.
76662306a36Sopenharmony_ci * @removable: Sets to true if it is a rename operation.
76762306a36Sopenharmony_ci * @exchange: Sets to true if it is a rename operation with RENAME_EXCHANGE.
76862306a36Sopenharmony_ci *
76962306a36Sopenharmony_ci * Because of its unprivileged constraints, Landlock relies on file hierarchies
77062306a36Sopenharmony_ci * (and not only inodes) to tie access rights to files.  Being able to link or
77162306a36Sopenharmony_ci * rename a file hierarchy brings some challenges.  Indeed, moving or linking a
77262306a36Sopenharmony_ci * file (i.e. creating a new reference to an inode) can have an impact on the
77362306a36Sopenharmony_ci * actions allowed for a set of files if it would change its parent directory
77462306a36Sopenharmony_ci * (i.e. reparenting).
77562306a36Sopenharmony_ci *
77662306a36Sopenharmony_ci * To avoid trivial access right bypasses, Landlock first checks if the file or
77762306a36Sopenharmony_ci * directory requested to be moved would gain new access rights inherited from
77862306a36Sopenharmony_ci * its new hierarchy.  Before returning any error, Landlock then checks that
77962306a36Sopenharmony_ci * the parent source hierarchy and the destination hierarchy would allow the
78062306a36Sopenharmony_ci * link or rename action.  If it is not the case, an error with EACCES is
78162306a36Sopenharmony_ci * returned to inform user space that there is no way to remove or create the
78262306a36Sopenharmony_ci * requested source file type.  If it should be allowed but the new inherited
78362306a36Sopenharmony_ci * access rights would be greater than the source access rights, then the
78462306a36Sopenharmony_ci * kernel returns an error with EXDEV.  Prioritizing EACCES over EXDEV enables
78562306a36Sopenharmony_ci * user space to abort the whole operation if there is no way to do it, or to
78662306a36Sopenharmony_ci * manually copy the source to the destination if this remains allowed, e.g.
78762306a36Sopenharmony_ci * because file creation is allowed on the destination directory but not direct
78862306a36Sopenharmony_ci * linking.
78962306a36Sopenharmony_ci *
79062306a36Sopenharmony_ci * To achieve this goal, the kernel needs to compare two file hierarchies: the
79162306a36Sopenharmony_ci * one identifying the source file or directory (including itself), and the
79262306a36Sopenharmony_ci * destination one.  This can be seen as a multilayer partial ordering problem.
79362306a36Sopenharmony_ci * The kernel walks through these paths and collects in a matrix the access
79462306a36Sopenharmony_ci * rights that are denied per layer.  These matrices are then compared to see
79562306a36Sopenharmony_ci * if the destination one has more (or the same) restrictions as the source
79662306a36Sopenharmony_ci * one.  If this is the case, the requested action will not return EXDEV, which
79762306a36Sopenharmony_ci * doesn't mean the action is allowed.  The parent hierarchy of the source
79862306a36Sopenharmony_ci * (i.e. parent directory), and the destination hierarchy must also be checked
79962306a36Sopenharmony_ci * to verify that they explicitly allow such action (i.e.  referencing,
80062306a36Sopenharmony_ci * creation and potentially removal rights).  The kernel implementation is then
80162306a36Sopenharmony_ci * required to rely on potentially four matrices of access rights: one for the
80262306a36Sopenharmony_ci * source file or directory (i.e. the child), a potentially other one for the
80362306a36Sopenharmony_ci * other source/destination (in case of RENAME_EXCHANGE), one for the source
80462306a36Sopenharmony_ci * parent hierarchy and a last one for the destination hierarchy.  These
80562306a36Sopenharmony_ci * ephemeral matrices take some space on the stack, which limits the number of
80662306a36Sopenharmony_ci * layers to a deemed reasonable number: 16.
80762306a36Sopenharmony_ci *
80862306a36Sopenharmony_ci * Returns:
80962306a36Sopenharmony_ci * - 0 if access is allowed;
81062306a36Sopenharmony_ci * - -EXDEV if @old_dentry would inherit new access rights from @new_dir;
81162306a36Sopenharmony_ci * - -EACCES if file removal or creation is denied.
81262306a36Sopenharmony_ci */
81362306a36Sopenharmony_cistatic int current_check_refer_path(struct dentry *const old_dentry,
81462306a36Sopenharmony_ci				    const struct path *const new_dir,
81562306a36Sopenharmony_ci				    struct dentry *const new_dentry,
81662306a36Sopenharmony_ci				    const bool removable, const bool exchange)
81762306a36Sopenharmony_ci{
81862306a36Sopenharmony_ci	const struct landlock_ruleset *const dom =
81962306a36Sopenharmony_ci		landlock_get_current_domain();
82062306a36Sopenharmony_ci	bool allow_parent1, allow_parent2;
82162306a36Sopenharmony_ci	access_mask_t access_request_parent1, access_request_parent2;
82262306a36Sopenharmony_ci	struct path mnt_dir;
82362306a36Sopenharmony_ci	layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS] = {},
82462306a36Sopenharmony_ci		     layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS] = {};
82562306a36Sopenharmony_ci
82662306a36Sopenharmony_ci	if (!dom)
82762306a36Sopenharmony_ci		return 0;
82862306a36Sopenharmony_ci	if (WARN_ON_ONCE(dom->num_layers < 1))
82962306a36Sopenharmony_ci		return -EACCES;
83062306a36Sopenharmony_ci	if (unlikely(d_is_negative(old_dentry)))
83162306a36Sopenharmony_ci		return -ENOENT;
83262306a36Sopenharmony_ci	if (exchange) {
83362306a36Sopenharmony_ci		if (unlikely(d_is_negative(new_dentry)))
83462306a36Sopenharmony_ci			return -ENOENT;
83562306a36Sopenharmony_ci		access_request_parent1 =
83662306a36Sopenharmony_ci			get_mode_access(d_backing_inode(new_dentry)->i_mode);
83762306a36Sopenharmony_ci	} else {
83862306a36Sopenharmony_ci		access_request_parent1 = 0;
83962306a36Sopenharmony_ci	}
84062306a36Sopenharmony_ci	access_request_parent2 =
84162306a36Sopenharmony_ci		get_mode_access(d_backing_inode(old_dentry)->i_mode);
84262306a36Sopenharmony_ci	if (removable) {
84362306a36Sopenharmony_ci		access_request_parent1 |= maybe_remove(old_dentry);
84462306a36Sopenharmony_ci		access_request_parent2 |= maybe_remove(new_dentry);
84562306a36Sopenharmony_ci	}
84662306a36Sopenharmony_ci
84762306a36Sopenharmony_ci	/* The mount points are the same for old and new paths, cf. EXDEV. */
84862306a36Sopenharmony_ci	if (old_dentry->d_parent == new_dir->dentry) {
84962306a36Sopenharmony_ci		/*
85062306a36Sopenharmony_ci		 * The LANDLOCK_ACCESS_FS_REFER access right is not required
85162306a36Sopenharmony_ci		 * for same-directory referer (i.e. no reparenting).
85262306a36Sopenharmony_ci		 */
85362306a36Sopenharmony_ci		access_request_parent1 = init_layer_masks(
85462306a36Sopenharmony_ci			dom, access_request_parent1 | access_request_parent2,
85562306a36Sopenharmony_ci			&layer_masks_parent1);
85662306a36Sopenharmony_ci		if (is_access_to_paths_allowed(
85762306a36Sopenharmony_ci			    dom, new_dir, access_request_parent1,
85862306a36Sopenharmony_ci			    &layer_masks_parent1, NULL, 0, NULL, NULL))
85962306a36Sopenharmony_ci			return 0;
86062306a36Sopenharmony_ci		return -EACCES;
86162306a36Sopenharmony_ci	}
86262306a36Sopenharmony_ci
86362306a36Sopenharmony_ci	access_request_parent1 |= LANDLOCK_ACCESS_FS_REFER;
86462306a36Sopenharmony_ci	access_request_parent2 |= LANDLOCK_ACCESS_FS_REFER;
86562306a36Sopenharmony_ci
86662306a36Sopenharmony_ci	/* Saves the common mount point. */
86762306a36Sopenharmony_ci	mnt_dir.mnt = new_dir->mnt;
86862306a36Sopenharmony_ci	mnt_dir.dentry = new_dir->mnt->mnt_root;
86962306a36Sopenharmony_ci
87062306a36Sopenharmony_ci	/* new_dir->dentry is equal to new_dentry->d_parent */
87162306a36Sopenharmony_ci	allow_parent1 = collect_domain_accesses(dom, mnt_dir.dentry,
87262306a36Sopenharmony_ci						old_dentry->d_parent,
87362306a36Sopenharmony_ci						&layer_masks_parent1);
87462306a36Sopenharmony_ci	allow_parent2 = collect_domain_accesses(
87562306a36Sopenharmony_ci		dom, mnt_dir.dentry, new_dir->dentry, &layer_masks_parent2);
87662306a36Sopenharmony_ci
87762306a36Sopenharmony_ci	if (allow_parent1 && allow_parent2)
87862306a36Sopenharmony_ci		return 0;
87962306a36Sopenharmony_ci
88062306a36Sopenharmony_ci	/*
88162306a36Sopenharmony_ci	 * To be able to compare source and destination domain access rights,
88262306a36Sopenharmony_ci	 * take into account the @old_dentry access rights aggregated with its
88362306a36Sopenharmony_ci	 * parent access rights.  This will be useful to compare with the
88462306a36Sopenharmony_ci	 * destination parent access rights.
88562306a36Sopenharmony_ci	 */
88662306a36Sopenharmony_ci	if (is_access_to_paths_allowed(
88762306a36Sopenharmony_ci		    dom, &mnt_dir, access_request_parent1, &layer_masks_parent1,
88862306a36Sopenharmony_ci		    old_dentry, access_request_parent2, &layer_masks_parent2,
88962306a36Sopenharmony_ci		    exchange ? new_dentry : NULL))
89062306a36Sopenharmony_ci		return 0;
89162306a36Sopenharmony_ci
89262306a36Sopenharmony_ci	/*
89362306a36Sopenharmony_ci	 * This prioritizes EACCES over EXDEV for all actions, including
89462306a36Sopenharmony_ci	 * renames with RENAME_EXCHANGE.
89562306a36Sopenharmony_ci	 */
89662306a36Sopenharmony_ci	if (likely(is_eacces(&layer_masks_parent1, access_request_parent1) ||
89762306a36Sopenharmony_ci		   is_eacces(&layer_masks_parent2, access_request_parent2)))
89862306a36Sopenharmony_ci		return -EACCES;
89962306a36Sopenharmony_ci
90062306a36Sopenharmony_ci	/*
90162306a36Sopenharmony_ci	 * Gracefully forbids reparenting if the destination directory
90262306a36Sopenharmony_ci	 * hierarchy is not a superset of restrictions of the source directory
90362306a36Sopenharmony_ci	 * hierarchy, or if LANDLOCK_ACCESS_FS_REFER is not allowed by the
90462306a36Sopenharmony_ci	 * source or the destination.
90562306a36Sopenharmony_ci	 */
90662306a36Sopenharmony_ci	return -EXDEV;
90762306a36Sopenharmony_ci}
90862306a36Sopenharmony_ci
90962306a36Sopenharmony_ci/* Inode hooks */
91062306a36Sopenharmony_ci
91162306a36Sopenharmony_cistatic void hook_inode_free_security(struct inode *const inode)
91262306a36Sopenharmony_ci{
91362306a36Sopenharmony_ci	/*
91462306a36Sopenharmony_ci	 * All inodes must already have been untied from their object by
91562306a36Sopenharmony_ci	 * release_inode() or hook_sb_delete().
91662306a36Sopenharmony_ci	 */
91762306a36Sopenharmony_ci	WARN_ON_ONCE(landlock_inode(inode)->object);
91862306a36Sopenharmony_ci}
91962306a36Sopenharmony_ci
92062306a36Sopenharmony_ci/* Super-block hooks */
92162306a36Sopenharmony_ci
92262306a36Sopenharmony_ci/*
92362306a36Sopenharmony_ci * Release the inodes used in a security policy.
92462306a36Sopenharmony_ci *
92562306a36Sopenharmony_ci * Cf. fsnotify_unmount_inodes() and invalidate_inodes()
92662306a36Sopenharmony_ci */
92762306a36Sopenharmony_cistatic void hook_sb_delete(struct super_block *const sb)
92862306a36Sopenharmony_ci{
92962306a36Sopenharmony_ci	struct inode *inode, *prev_inode = NULL;
93062306a36Sopenharmony_ci
93162306a36Sopenharmony_ci	if (!landlock_initialized)
93262306a36Sopenharmony_ci		return;
93362306a36Sopenharmony_ci
93462306a36Sopenharmony_ci	spin_lock(&sb->s_inode_list_lock);
93562306a36Sopenharmony_ci	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
93662306a36Sopenharmony_ci		struct landlock_object *object;
93762306a36Sopenharmony_ci
93862306a36Sopenharmony_ci		/* Only handles referenced inodes. */
93962306a36Sopenharmony_ci		if (!atomic_read(&inode->i_count))
94062306a36Sopenharmony_ci			continue;
94162306a36Sopenharmony_ci
94262306a36Sopenharmony_ci		/*
94362306a36Sopenharmony_ci		 * Protects against concurrent modification of inode (e.g.
94462306a36Sopenharmony_ci		 * from get_inode_object()).
94562306a36Sopenharmony_ci		 */
94662306a36Sopenharmony_ci		spin_lock(&inode->i_lock);
94762306a36Sopenharmony_ci		/*
94862306a36Sopenharmony_ci		 * Checks I_FREEING and I_WILL_FREE  to protect against a race
94962306a36Sopenharmony_ci		 * condition when release_inode() just called iput(), which
95062306a36Sopenharmony_ci		 * could lead to a NULL dereference of inode->security or a
95162306a36Sopenharmony_ci		 * second call to iput() for the same Landlock object.  Also
95262306a36Sopenharmony_ci		 * checks I_NEW because such inode cannot be tied to an object.
95362306a36Sopenharmony_ci		 */
95462306a36Sopenharmony_ci		if (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW)) {
95562306a36Sopenharmony_ci			spin_unlock(&inode->i_lock);
95662306a36Sopenharmony_ci			continue;
95762306a36Sopenharmony_ci		}
95862306a36Sopenharmony_ci
95962306a36Sopenharmony_ci		rcu_read_lock();
96062306a36Sopenharmony_ci		object = rcu_dereference(landlock_inode(inode)->object);
96162306a36Sopenharmony_ci		if (!object) {
96262306a36Sopenharmony_ci			rcu_read_unlock();
96362306a36Sopenharmony_ci			spin_unlock(&inode->i_lock);
96462306a36Sopenharmony_ci			continue;
96562306a36Sopenharmony_ci		}
96662306a36Sopenharmony_ci		/* Keeps a reference to this inode until the next loop walk. */
96762306a36Sopenharmony_ci		__iget(inode);
96862306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
96962306a36Sopenharmony_ci
97062306a36Sopenharmony_ci		/*
97162306a36Sopenharmony_ci		 * If there is no concurrent release_inode() ongoing, then we
97262306a36Sopenharmony_ci		 * are in charge of calling iput() on this inode, otherwise we
97362306a36Sopenharmony_ci		 * will just wait for it to finish.
97462306a36Sopenharmony_ci		 */
97562306a36Sopenharmony_ci		spin_lock(&object->lock);
97662306a36Sopenharmony_ci		if (object->underobj == inode) {
97762306a36Sopenharmony_ci			object->underobj = NULL;
97862306a36Sopenharmony_ci			spin_unlock(&object->lock);
97962306a36Sopenharmony_ci			rcu_read_unlock();
98062306a36Sopenharmony_ci
98162306a36Sopenharmony_ci			/*
98262306a36Sopenharmony_ci			 * Because object->underobj was not NULL,
98362306a36Sopenharmony_ci			 * release_inode() and get_inode_object() guarantee
98462306a36Sopenharmony_ci			 * that it is safe to reset
98562306a36Sopenharmony_ci			 * landlock_inode(inode)->object while it is not NULL.
98662306a36Sopenharmony_ci			 * It is therefore not necessary to lock inode->i_lock.
98762306a36Sopenharmony_ci			 */
98862306a36Sopenharmony_ci			rcu_assign_pointer(landlock_inode(inode)->object, NULL);
98962306a36Sopenharmony_ci			/*
99062306a36Sopenharmony_ci			 * At this point, we own the ihold() reference that was
99162306a36Sopenharmony_ci			 * originally set up by get_inode_object() and the
99262306a36Sopenharmony_ci			 * __iget() reference that we just set in this loop
99362306a36Sopenharmony_ci			 * walk.  Therefore the following call to iput() will
99462306a36Sopenharmony_ci			 * not sleep nor drop the inode because there is now at
99562306a36Sopenharmony_ci			 * least two references to it.
99662306a36Sopenharmony_ci			 */
99762306a36Sopenharmony_ci			iput(inode);
99862306a36Sopenharmony_ci		} else {
99962306a36Sopenharmony_ci			spin_unlock(&object->lock);
100062306a36Sopenharmony_ci			rcu_read_unlock();
100162306a36Sopenharmony_ci		}
100262306a36Sopenharmony_ci
100362306a36Sopenharmony_ci		if (prev_inode) {
100462306a36Sopenharmony_ci			/*
100562306a36Sopenharmony_ci			 * At this point, we still own the __iget() reference
100662306a36Sopenharmony_ci			 * that we just set in this loop walk.  Therefore we
100762306a36Sopenharmony_ci			 * can drop the list lock and know that the inode won't
100862306a36Sopenharmony_ci			 * disappear from under us until the next loop walk.
100962306a36Sopenharmony_ci			 */
101062306a36Sopenharmony_ci			spin_unlock(&sb->s_inode_list_lock);
101162306a36Sopenharmony_ci			/*
101262306a36Sopenharmony_ci			 * We can now actually put the inode reference from the
101362306a36Sopenharmony_ci			 * previous loop walk, which is not needed anymore.
101462306a36Sopenharmony_ci			 */
101562306a36Sopenharmony_ci			iput(prev_inode);
101662306a36Sopenharmony_ci			cond_resched();
101762306a36Sopenharmony_ci			spin_lock(&sb->s_inode_list_lock);
101862306a36Sopenharmony_ci		}
101962306a36Sopenharmony_ci		prev_inode = inode;
102062306a36Sopenharmony_ci	}
102162306a36Sopenharmony_ci	spin_unlock(&sb->s_inode_list_lock);
102262306a36Sopenharmony_ci
102362306a36Sopenharmony_ci	/* Puts the inode reference from the last loop walk, if any. */
102462306a36Sopenharmony_ci	if (prev_inode)
102562306a36Sopenharmony_ci		iput(prev_inode);
102662306a36Sopenharmony_ci	/* Waits for pending iput() in release_inode(). */
102762306a36Sopenharmony_ci	wait_var_event(&landlock_superblock(sb)->inode_refs,
102862306a36Sopenharmony_ci		       !atomic_long_read(&landlock_superblock(sb)->inode_refs));
102962306a36Sopenharmony_ci}
103062306a36Sopenharmony_ci
103162306a36Sopenharmony_ci/*
103262306a36Sopenharmony_ci * Because a Landlock security policy is defined according to the filesystem
103362306a36Sopenharmony_ci * topology (i.e. the mount namespace), changing it may grant access to files
103462306a36Sopenharmony_ci * not previously allowed.
103562306a36Sopenharmony_ci *
103662306a36Sopenharmony_ci * To make it simple, deny any filesystem topology modification by landlocked
103762306a36Sopenharmony_ci * processes.  Non-landlocked processes may still change the namespace of a
103862306a36Sopenharmony_ci * landlocked process, but this kind of threat must be handled by a system-wide
103962306a36Sopenharmony_ci * access-control security policy.
104062306a36Sopenharmony_ci *
104162306a36Sopenharmony_ci * This could be lifted in the future if Landlock can safely handle mount
104262306a36Sopenharmony_ci * namespace updates requested by a landlocked process.  Indeed, we could
104362306a36Sopenharmony_ci * update the current domain (which is currently read-only) by taking into
104462306a36Sopenharmony_ci * account the accesses of the source and the destination of a new mount point.
104562306a36Sopenharmony_ci * However, it would also require to make all the child domains dynamically
104662306a36Sopenharmony_ci * inherit these new constraints.  Anyway, for backward compatibility reasons,
104762306a36Sopenharmony_ci * a dedicated user space option would be required (e.g. as a ruleset flag).
104862306a36Sopenharmony_ci */
104962306a36Sopenharmony_cistatic int hook_sb_mount(const char *const dev_name,
105062306a36Sopenharmony_ci			 const struct path *const path, const char *const type,
105162306a36Sopenharmony_ci			 const unsigned long flags, void *const data)
105262306a36Sopenharmony_ci{
105362306a36Sopenharmony_ci	if (!landlock_get_current_domain())
105462306a36Sopenharmony_ci		return 0;
105562306a36Sopenharmony_ci	return -EPERM;
105662306a36Sopenharmony_ci}
105762306a36Sopenharmony_ci
105862306a36Sopenharmony_cistatic int hook_move_mount(const struct path *const from_path,
105962306a36Sopenharmony_ci			   const struct path *const to_path)
106062306a36Sopenharmony_ci{
106162306a36Sopenharmony_ci	if (!landlock_get_current_domain())
106262306a36Sopenharmony_ci		return 0;
106362306a36Sopenharmony_ci	return -EPERM;
106462306a36Sopenharmony_ci}
106562306a36Sopenharmony_ci
106662306a36Sopenharmony_ci/*
106762306a36Sopenharmony_ci * Removing a mount point may reveal a previously hidden file hierarchy, which
106862306a36Sopenharmony_ci * may then grant access to files, which may have previously been forbidden.
106962306a36Sopenharmony_ci */
107062306a36Sopenharmony_cistatic int hook_sb_umount(struct vfsmount *const mnt, const int flags)
107162306a36Sopenharmony_ci{
107262306a36Sopenharmony_ci	if (!landlock_get_current_domain())
107362306a36Sopenharmony_ci		return 0;
107462306a36Sopenharmony_ci	return -EPERM;
107562306a36Sopenharmony_ci}
107662306a36Sopenharmony_ci
107762306a36Sopenharmony_cistatic int hook_sb_remount(struct super_block *const sb, void *const mnt_opts)
107862306a36Sopenharmony_ci{
107962306a36Sopenharmony_ci	if (!landlock_get_current_domain())
108062306a36Sopenharmony_ci		return 0;
108162306a36Sopenharmony_ci	return -EPERM;
108262306a36Sopenharmony_ci}
108362306a36Sopenharmony_ci
108462306a36Sopenharmony_ci/*
108562306a36Sopenharmony_ci * pivot_root(2), like mount(2), changes the current mount namespace.  It must
108662306a36Sopenharmony_ci * then be forbidden for a landlocked process.
108762306a36Sopenharmony_ci *
108862306a36Sopenharmony_ci * However, chroot(2) may be allowed because it only changes the relative root
108962306a36Sopenharmony_ci * directory of the current process.  Moreover, it can be used to restrict the
109062306a36Sopenharmony_ci * view of the filesystem.
109162306a36Sopenharmony_ci */
109262306a36Sopenharmony_cistatic int hook_sb_pivotroot(const struct path *const old_path,
109362306a36Sopenharmony_ci			     const struct path *const new_path)
109462306a36Sopenharmony_ci{
109562306a36Sopenharmony_ci	if (!landlock_get_current_domain())
109662306a36Sopenharmony_ci		return 0;
109762306a36Sopenharmony_ci	return -EPERM;
109862306a36Sopenharmony_ci}
109962306a36Sopenharmony_ci
110062306a36Sopenharmony_ci/* Path hooks */
110162306a36Sopenharmony_ci
110262306a36Sopenharmony_cistatic int hook_path_link(struct dentry *const old_dentry,
110362306a36Sopenharmony_ci			  const struct path *const new_dir,
110462306a36Sopenharmony_ci			  struct dentry *const new_dentry)
110562306a36Sopenharmony_ci{
110662306a36Sopenharmony_ci	return current_check_refer_path(old_dentry, new_dir, new_dentry, false,
110762306a36Sopenharmony_ci					false);
110862306a36Sopenharmony_ci}
110962306a36Sopenharmony_ci
111062306a36Sopenharmony_cistatic int hook_path_rename(const struct path *const old_dir,
111162306a36Sopenharmony_ci			    struct dentry *const old_dentry,
111262306a36Sopenharmony_ci			    const struct path *const new_dir,
111362306a36Sopenharmony_ci			    struct dentry *const new_dentry,
111462306a36Sopenharmony_ci			    const unsigned int flags)
111562306a36Sopenharmony_ci{
111662306a36Sopenharmony_ci	/* old_dir refers to old_dentry->d_parent and new_dir->mnt */
111762306a36Sopenharmony_ci	return current_check_refer_path(old_dentry, new_dir, new_dentry, true,
111862306a36Sopenharmony_ci					!!(flags & RENAME_EXCHANGE));
111962306a36Sopenharmony_ci}
112062306a36Sopenharmony_ci
112162306a36Sopenharmony_cistatic int hook_path_mkdir(const struct path *const dir,
112262306a36Sopenharmony_ci			   struct dentry *const dentry, const umode_t mode)
112362306a36Sopenharmony_ci{
112462306a36Sopenharmony_ci	return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_DIR);
112562306a36Sopenharmony_ci}
112662306a36Sopenharmony_ci
112762306a36Sopenharmony_cistatic int hook_path_mknod(const struct path *const dir,
112862306a36Sopenharmony_ci			   struct dentry *const dentry, const umode_t mode,
112962306a36Sopenharmony_ci			   const unsigned int dev)
113062306a36Sopenharmony_ci{
113162306a36Sopenharmony_ci	const struct landlock_ruleset *const dom =
113262306a36Sopenharmony_ci		landlock_get_current_domain();
113362306a36Sopenharmony_ci
113462306a36Sopenharmony_ci	if (!dom)
113562306a36Sopenharmony_ci		return 0;
113662306a36Sopenharmony_ci	return check_access_path(dom, dir, get_mode_access(mode));
113762306a36Sopenharmony_ci}
113862306a36Sopenharmony_ci
113962306a36Sopenharmony_cistatic int hook_path_symlink(const struct path *const dir,
114062306a36Sopenharmony_ci			     struct dentry *const dentry,
114162306a36Sopenharmony_ci			     const char *const old_name)
114262306a36Sopenharmony_ci{
114362306a36Sopenharmony_ci	return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_SYM);
114462306a36Sopenharmony_ci}
114562306a36Sopenharmony_ci
114662306a36Sopenharmony_cistatic int hook_path_unlink(const struct path *const dir,
114762306a36Sopenharmony_ci			    struct dentry *const dentry)
114862306a36Sopenharmony_ci{
114962306a36Sopenharmony_ci	return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_FILE);
115062306a36Sopenharmony_ci}
115162306a36Sopenharmony_ci
115262306a36Sopenharmony_cistatic int hook_path_rmdir(const struct path *const dir,
115362306a36Sopenharmony_ci			   struct dentry *const dentry)
115462306a36Sopenharmony_ci{
115562306a36Sopenharmony_ci	return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_DIR);
115662306a36Sopenharmony_ci}
115762306a36Sopenharmony_ci
115862306a36Sopenharmony_cistatic int hook_path_truncate(const struct path *const path)
115962306a36Sopenharmony_ci{
116062306a36Sopenharmony_ci	return current_check_access_path(path, LANDLOCK_ACCESS_FS_TRUNCATE);
116162306a36Sopenharmony_ci}
116262306a36Sopenharmony_ci
116362306a36Sopenharmony_ci/* File hooks */
116462306a36Sopenharmony_ci
116562306a36Sopenharmony_ci/**
116662306a36Sopenharmony_ci * get_required_file_open_access - Get access needed to open a file
116762306a36Sopenharmony_ci *
116862306a36Sopenharmony_ci * @file: File being opened.
116962306a36Sopenharmony_ci *
117062306a36Sopenharmony_ci * Returns the access rights that are required for opening the given file,
117162306a36Sopenharmony_ci * depending on the file type and open mode.
117262306a36Sopenharmony_ci */
117362306a36Sopenharmony_cistatic inline access_mask_t
117462306a36Sopenharmony_ciget_required_file_open_access(const struct file *const file)
117562306a36Sopenharmony_ci{
117662306a36Sopenharmony_ci	access_mask_t access = 0;
117762306a36Sopenharmony_ci
117862306a36Sopenharmony_ci	if (file->f_mode & FMODE_READ) {
117962306a36Sopenharmony_ci		/* A directory can only be opened in read mode. */
118062306a36Sopenharmony_ci		if (S_ISDIR(file_inode(file)->i_mode))
118162306a36Sopenharmony_ci			return LANDLOCK_ACCESS_FS_READ_DIR;
118262306a36Sopenharmony_ci		access = LANDLOCK_ACCESS_FS_READ_FILE;
118362306a36Sopenharmony_ci	}
118462306a36Sopenharmony_ci	if (file->f_mode & FMODE_WRITE)
118562306a36Sopenharmony_ci		access |= LANDLOCK_ACCESS_FS_WRITE_FILE;
118662306a36Sopenharmony_ci	/* __FMODE_EXEC is indeed part of f_flags, not f_mode. */
118762306a36Sopenharmony_ci	if (file->f_flags & __FMODE_EXEC)
118862306a36Sopenharmony_ci		access |= LANDLOCK_ACCESS_FS_EXECUTE;
118962306a36Sopenharmony_ci	return access;
119062306a36Sopenharmony_ci}
119162306a36Sopenharmony_ci
119262306a36Sopenharmony_cistatic int hook_file_alloc_security(struct file *const file)
119362306a36Sopenharmony_ci{
119462306a36Sopenharmony_ci	/*
119562306a36Sopenharmony_ci	 * Grants all access rights, even if most of them are not checked later
119662306a36Sopenharmony_ci	 * on. It is more consistent.
119762306a36Sopenharmony_ci	 *
119862306a36Sopenharmony_ci	 * Notably, file descriptors for regular files can also be acquired
119962306a36Sopenharmony_ci	 * without going through the file_open hook, for example when using
120062306a36Sopenharmony_ci	 * memfd_create(2).
120162306a36Sopenharmony_ci	 */
120262306a36Sopenharmony_ci	landlock_file(file)->allowed_access = LANDLOCK_MASK_ACCESS_FS;
120362306a36Sopenharmony_ci	return 0;
120462306a36Sopenharmony_ci}
120562306a36Sopenharmony_ci
120662306a36Sopenharmony_cistatic int hook_file_open(struct file *const file)
120762306a36Sopenharmony_ci{
120862306a36Sopenharmony_ci	layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {};
120962306a36Sopenharmony_ci	access_mask_t open_access_request, full_access_request, allowed_access;
121062306a36Sopenharmony_ci	const access_mask_t optional_access = LANDLOCK_ACCESS_FS_TRUNCATE;
121162306a36Sopenharmony_ci	const struct landlock_ruleset *const dom =
121262306a36Sopenharmony_ci		landlock_get_current_domain();
121362306a36Sopenharmony_ci
121462306a36Sopenharmony_ci	if (!dom)
121562306a36Sopenharmony_ci		return 0;
121662306a36Sopenharmony_ci
121762306a36Sopenharmony_ci	/*
121862306a36Sopenharmony_ci	 * Because a file may be opened with O_PATH, get_required_file_open_access()
121962306a36Sopenharmony_ci	 * may return 0.  This case will be handled with a future Landlock
122062306a36Sopenharmony_ci	 * evolution.
122162306a36Sopenharmony_ci	 */
122262306a36Sopenharmony_ci	open_access_request = get_required_file_open_access(file);
122362306a36Sopenharmony_ci
122462306a36Sopenharmony_ci	/*
122562306a36Sopenharmony_ci	 * We look up more access than what we immediately need for open(), so
122662306a36Sopenharmony_ci	 * that we can later authorize operations on opened files.
122762306a36Sopenharmony_ci	 */
122862306a36Sopenharmony_ci	full_access_request = open_access_request | optional_access;
122962306a36Sopenharmony_ci
123062306a36Sopenharmony_ci	if (is_access_to_paths_allowed(
123162306a36Sopenharmony_ci		    dom, &file->f_path,
123262306a36Sopenharmony_ci		    init_layer_masks(dom, full_access_request, &layer_masks),
123362306a36Sopenharmony_ci		    &layer_masks, NULL, 0, NULL, NULL)) {
123462306a36Sopenharmony_ci		allowed_access = full_access_request;
123562306a36Sopenharmony_ci	} else {
123662306a36Sopenharmony_ci		unsigned long access_bit;
123762306a36Sopenharmony_ci		const unsigned long access_req = full_access_request;
123862306a36Sopenharmony_ci
123962306a36Sopenharmony_ci		/*
124062306a36Sopenharmony_ci		 * Calculate the actual allowed access rights from layer_masks.
124162306a36Sopenharmony_ci		 * Add each access right to allowed_access which has not been
124262306a36Sopenharmony_ci		 * vetoed by any layer.
124362306a36Sopenharmony_ci		 */
124462306a36Sopenharmony_ci		allowed_access = 0;
124562306a36Sopenharmony_ci		for_each_set_bit(access_bit, &access_req,
124662306a36Sopenharmony_ci				 ARRAY_SIZE(layer_masks)) {
124762306a36Sopenharmony_ci			if (!layer_masks[access_bit])
124862306a36Sopenharmony_ci				allowed_access |= BIT_ULL(access_bit);
124962306a36Sopenharmony_ci		}
125062306a36Sopenharmony_ci	}
125162306a36Sopenharmony_ci
125262306a36Sopenharmony_ci	/*
125362306a36Sopenharmony_ci	 * For operations on already opened files (i.e. ftruncate()), it is the
125462306a36Sopenharmony_ci	 * access rights at the time of open() which decide whether the
125562306a36Sopenharmony_ci	 * operation is permitted. Therefore, we record the relevant subset of
125662306a36Sopenharmony_ci	 * file access rights in the opened struct file.
125762306a36Sopenharmony_ci	 */
125862306a36Sopenharmony_ci	landlock_file(file)->allowed_access = allowed_access;
125962306a36Sopenharmony_ci
126062306a36Sopenharmony_ci	if ((open_access_request & allowed_access) == open_access_request)
126162306a36Sopenharmony_ci		return 0;
126262306a36Sopenharmony_ci
126362306a36Sopenharmony_ci	return -EACCES;
126462306a36Sopenharmony_ci}
126562306a36Sopenharmony_ci
126662306a36Sopenharmony_cistatic int hook_file_truncate(struct file *const file)
126762306a36Sopenharmony_ci{
126862306a36Sopenharmony_ci	/*
126962306a36Sopenharmony_ci	 * Allows truncation if the truncate right was available at the time of
127062306a36Sopenharmony_ci	 * opening the file, to get a consistent access check as for read, write
127162306a36Sopenharmony_ci	 * and execute operations.
127262306a36Sopenharmony_ci	 *
127362306a36Sopenharmony_ci	 * Note: For checks done based on the file's Landlock allowed access, we
127462306a36Sopenharmony_ci	 * enforce them independently of whether the current thread is in a
127562306a36Sopenharmony_ci	 * Landlock domain, so that open files passed between independent
127662306a36Sopenharmony_ci	 * processes retain their behaviour.
127762306a36Sopenharmony_ci	 */
127862306a36Sopenharmony_ci	if (landlock_file(file)->allowed_access & LANDLOCK_ACCESS_FS_TRUNCATE)
127962306a36Sopenharmony_ci		return 0;
128062306a36Sopenharmony_ci	return -EACCES;
128162306a36Sopenharmony_ci}
128262306a36Sopenharmony_ci
128362306a36Sopenharmony_cistatic struct security_hook_list landlock_hooks[] __ro_after_init = {
128462306a36Sopenharmony_ci	LSM_HOOK_INIT(inode_free_security, hook_inode_free_security),
128562306a36Sopenharmony_ci
128662306a36Sopenharmony_ci	LSM_HOOK_INIT(sb_delete, hook_sb_delete),
128762306a36Sopenharmony_ci	LSM_HOOK_INIT(sb_mount, hook_sb_mount),
128862306a36Sopenharmony_ci	LSM_HOOK_INIT(move_mount, hook_move_mount),
128962306a36Sopenharmony_ci	LSM_HOOK_INIT(sb_umount, hook_sb_umount),
129062306a36Sopenharmony_ci	LSM_HOOK_INIT(sb_remount, hook_sb_remount),
129162306a36Sopenharmony_ci	LSM_HOOK_INIT(sb_pivotroot, hook_sb_pivotroot),
129262306a36Sopenharmony_ci
129362306a36Sopenharmony_ci	LSM_HOOK_INIT(path_link, hook_path_link),
129462306a36Sopenharmony_ci	LSM_HOOK_INIT(path_rename, hook_path_rename),
129562306a36Sopenharmony_ci	LSM_HOOK_INIT(path_mkdir, hook_path_mkdir),
129662306a36Sopenharmony_ci	LSM_HOOK_INIT(path_mknod, hook_path_mknod),
129762306a36Sopenharmony_ci	LSM_HOOK_INIT(path_symlink, hook_path_symlink),
129862306a36Sopenharmony_ci	LSM_HOOK_INIT(path_unlink, hook_path_unlink),
129962306a36Sopenharmony_ci	LSM_HOOK_INIT(path_rmdir, hook_path_rmdir),
130062306a36Sopenharmony_ci	LSM_HOOK_INIT(path_truncate, hook_path_truncate),
130162306a36Sopenharmony_ci
130262306a36Sopenharmony_ci	LSM_HOOK_INIT(file_alloc_security, hook_file_alloc_security),
130362306a36Sopenharmony_ci	LSM_HOOK_INIT(file_open, hook_file_open),
130462306a36Sopenharmony_ci	LSM_HOOK_INIT(file_truncate, hook_file_truncate),
130562306a36Sopenharmony_ci};
130662306a36Sopenharmony_ci
130762306a36Sopenharmony_ci__init void landlock_add_fs_hooks(void)
130862306a36Sopenharmony_ci{
130962306a36Sopenharmony_ci	security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks),
131062306a36Sopenharmony_ci			   LANDLOCK_NAME);
131162306a36Sopenharmony_ci}
1312