162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright IBM Corporation, 2021
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Author: Mike Rapoport <rppt@linux.ibm.com>
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include <linux/mm.h>
962306a36Sopenharmony_ci#include <linux/fs.h>
1062306a36Sopenharmony_ci#include <linux/swap.h>
1162306a36Sopenharmony_ci#include <linux/mount.h>
1262306a36Sopenharmony_ci#include <linux/memfd.h>
1362306a36Sopenharmony_ci#include <linux/bitops.h>
1462306a36Sopenharmony_ci#include <linux/printk.h>
1562306a36Sopenharmony_ci#include <linux/pagemap.h>
1662306a36Sopenharmony_ci#include <linux/syscalls.h>
1762306a36Sopenharmony_ci#include <linux/pseudo_fs.h>
1862306a36Sopenharmony_ci#include <linux/secretmem.h>
1962306a36Sopenharmony_ci#include <linux/set_memory.h>
2062306a36Sopenharmony_ci#include <linux/sched/signal.h>
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci#include <uapi/linux/magic.h>
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_ci#include <asm/tlbflush.h>
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci#include "internal.h"
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_ci#undef pr_fmt
2962306a36Sopenharmony_ci#define pr_fmt(fmt) "secretmem: " fmt
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci/*
3262306a36Sopenharmony_ci * Define mode and flag masks to allow validation of the system call
3362306a36Sopenharmony_ci * parameters.
3462306a36Sopenharmony_ci */
3562306a36Sopenharmony_ci#define SECRETMEM_MODE_MASK	(0x0)
3662306a36Sopenharmony_ci#define SECRETMEM_FLAGS_MASK	SECRETMEM_MODE_MASK
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_cistatic bool secretmem_enable __ro_after_init = 1;
3962306a36Sopenharmony_cimodule_param_named(enable, secretmem_enable, bool, 0400);
4062306a36Sopenharmony_ciMODULE_PARM_DESC(secretmem_enable,
4162306a36Sopenharmony_ci		 "Enable secretmem and memfd_secret(2) system call");
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_cistatic atomic_t secretmem_users;
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_cibool secretmem_active(void)
4662306a36Sopenharmony_ci{
4762306a36Sopenharmony_ci	return !!atomic_read(&secretmem_users);
4862306a36Sopenharmony_ci}
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_cistatic vm_fault_t secretmem_fault(struct vm_fault *vmf)
5162306a36Sopenharmony_ci{
5262306a36Sopenharmony_ci	struct address_space *mapping = vmf->vma->vm_file->f_mapping;
5362306a36Sopenharmony_ci	struct inode *inode = file_inode(vmf->vma->vm_file);
5462306a36Sopenharmony_ci	pgoff_t offset = vmf->pgoff;
5562306a36Sopenharmony_ci	gfp_t gfp = vmf->gfp_mask;
5662306a36Sopenharmony_ci	unsigned long addr;
5762306a36Sopenharmony_ci	struct page *page;
5862306a36Sopenharmony_ci	struct folio *folio;
5962306a36Sopenharmony_ci	vm_fault_t ret;
6062306a36Sopenharmony_ci	int err;
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci	if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
6362306a36Sopenharmony_ci		return vmf_error(-EINVAL);
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci	filemap_invalidate_lock_shared(mapping);
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ciretry:
6862306a36Sopenharmony_ci	page = find_lock_page(mapping, offset);
6962306a36Sopenharmony_ci	if (!page) {
7062306a36Sopenharmony_ci		folio = folio_alloc(gfp | __GFP_ZERO, 0);
7162306a36Sopenharmony_ci		if (!folio) {
7262306a36Sopenharmony_ci			ret = VM_FAULT_OOM;
7362306a36Sopenharmony_ci			goto out;
7462306a36Sopenharmony_ci		}
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci		page = &folio->page;
7762306a36Sopenharmony_ci		err = set_direct_map_invalid_noflush(page);
7862306a36Sopenharmony_ci		if (err) {
7962306a36Sopenharmony_ci			folio_put(folio);
8062306a36Sopenharmony_ci			ret = vmf_error(err);
8162306a36Sopenharmony_ci			goto out;
8262306a36Sopenharmony_ci		}
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_ci		__folio_mark_uptodate(folio);
8562306a36Sopenharmony_ci		err = filemap_add_folio(mapping, folio, offset, gfp);
8662306a36Sopenharmony_ci		if (unlikely(err)) {
8762306a36Sopenharmony_ci			folio_put(folio);
8862306a36Sopenharmony_ci			/*
8962306a36Sopenharmony_ci			 * If a split of large page was required, it
9062306a36Sopenharmony_ci			 * already happened when we marked the page invalid
9162306a36Sopenharmony_ci			 * which guarantees that this call won't fail
9262306a36Sopenharmony_ci			 */
9362306a36Sopenharmony_ci			set_direct_map_default_noflush(page);
9462306a36Sopenharmony_ci			if (err == -EEXIST)
9562306a36Sopenharmony_ci				goto retry;
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci			ret = vmf_error(err);
9862306a36Sopenharmony_ci			goto out;
9962306a36Sopenharmony_ci		}
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci		addr = (unsigned long)page_address(page);
10262306a36Sopenharmony_ci		flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
10362306a36Sopenharmony_ci	}
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_ci	vmf->page = page;
10662306a36Sopenharmony_ci	ret = VM_FAULT_LOCKED;
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_ciout:
10962306a36Sopenharmony_ci	filemap_invalidate_unlock_shared(mapping);
11062306a36Sopenharmony_ci	return ret;
11162306a36Sopenharmony_ci}
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_cistatic const struct vm_operations_struct secretmem_vm_ops = {
11462306a36Sopenharmony_ci	.fault = secretmem_fault,
11562306a36Sopenharmony_ci};
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_cistatic int secretmem_release(struct inode *inode, struct file *file)
11862306a36Sopenharmony_ci{
11962306a36Sopenharmony_ci	atomic_dec(&secretmem_users);
12062306a36Sopenharmony_ci	return 0;
12162306a36Sopenharmony_ci}
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_cistatic int secretmem_mmap(struct file *file, struct vm_area_struct *vma)
12462306a36Sopenharmony_ci{
12562306a36Sopenharmony_ci	unsigned long len = vma->vm_end - vma->vm_start;
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci	if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
12862306a36Sopenharmony_ci		return -EINVAL;
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci	if (!mlock_future_ok(vma->vm_mm, vma->vm_flags | VM_LOCKED, len))
13162306a36Sopenharmony_ci		return -EAGAIN;
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci	vm_flags_set(vma, VM_LOCKED | VM_DONTDUMP);
13462306a36Sopenharmony_ci	vma->vm_ops = &secretmem_vm_ops;
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci	return 0;
13762306a36Sopenharmony_ci}
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_cibool vma_is_secretmem(struct vm_area_struct *vma)
14062306a36Sopenharmony_ci{
14162306a36Sopenharmony_ci	return vma->vm_ops == &secretmem_vm_ops;
14262306a36Sopenharmony_ci}
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_cistatic const struct file_operations secretmem_fops = {
14562306a36Sopenharmony_ci	.release	= secretmem_release,
14662306a36Sopenharmony_ci	.mmap		= secretmem_mmap,
14762306a36Sopenharmony_ci};
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_cistatic int secretmem_migrate_folio(struct address_space *mapping,
15062306a36Sopenharmony_ci		struct folio *dst, struct folio *src, enum migrate_mode mode)
15162306a36Sopenharmony_ci{
15262306a36Sopenharmony_ci	return -EBUSY;
15362306a36Sopenharmony_ci}
15462306a36Sopenharmony_ci
15562306a36Sopenharmony_cistatic void secretmem_free_folio(struct folio *folio)
15662306a36Sopenharmony_ci{
15762306a36Sopenharmony_ci	set_direct_map_default_noflush(&folio->page);
15862306a36Sopenharmony_ci	folio_zero_segment(folio, 0, folio_size(folio));
15962306a36Sopenharmony_ci}
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_ciconst struct address_space_operations secretmem_aops = {
16262306a36Sopenharmony_ci	.dirty_folio	= noop_dirty_folio,
16362306a36Sopenharmony_ci	.free_folio	= secretmem_free_folio,
16462306a36Sopenharmony_ci	.migrate_folio	= secretmem_migrate_folio,
16562306a36Sopenharmony_ci};
16662306a36Sopenharmony_ci
16762306a36Sopenharmony_cistatic int secretmem_setattr(struct mnt_idmap *idmap,
16862306a36Sopenharmony_ci			     struct dentry *dentry, struct iattr *iattr)
16962306a36Sopenharmony_ci{
17062306a36Sopenharmony_ci	struct inode *inode = d_inode(dentry);
17162306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
17262306a36Sopenharmony_ci	unsigned int ia_valid = iattr->ia_valid;
17362306a36Sopenharmony_ci	int ret;
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ci	filemap_invalidate_lock(mapping);
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_ci	if ((ia_valid & ATTR_SIZE) && inode->i_size)
17862306a36Sopenharmony_ci		ret = -EINVAL;
17962306a36Sopenharmony_ci	else
18062306a36Sopenharmony_ci		ret = simple_setattr(idmap, dentry, iattr);
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci	filemap_invalidate_unlock(mapping);
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci	return ret;
18562306a36Sopenharmony_ci}
18662306a36Sopenharmony_ci
18762306a36Sopenharmony_cistatic const struct inode_operations secretmem_iops = {
18862306a36Sopenharmony_ci	.setattr = secretmem_setattr,
18962306a36Sopenharmony_ci};
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_cistatic struct vfsmount *secretmem_mnt;
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_cistatic struct file *secretmem_file_create(unsigned long flags)
19462306a36Sopenharmony_ci{
19562306a36Sopenharmony_ci	struct file *file;
19662306a36Sopenharmony_ci	struct inode *inode;
19762306a36Sopenharmony_ci	const char *anon_name = "[secretmem]";
19862306a36Sopenharmony_ci	const struct qstr qname = QSTR_INIT(anon_name, strlen(anon_name));
19962306a36Sopenharmony_ci	int err;
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci	inode = alloc_anon_inode(secretmem_mnt->mnt_sb);
20262306a36Sopenharmony_ci	if (IS_ERR(inode))
20362306a36Sopenharmony_ci		return ERR_CAST(inode);
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ci	err = security_inode_init_security_anon(inode, &qname, NULL);
20662306a36Sopenharmony_ci	if (err) {
20762306a36Sopenharmony_ci		file = ERR_PTR(err);
20862306a36Sopenharmony_ci		goto err_free_inode;
20962306a36Sopenharmony_ci	}
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ci	file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem",
21262306a36Sopenharmony_ci				 O_RDWR, &secretmem_fops);
21362306a36Sopenharmony_ci	if (IS_ERR(file))
21462306a36Sopenharmony_ci		goto err_free_inode;
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci	mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
21762306a36Sopenharmony_ci	mapping_set_unevictable(inode->i_mapping);
21862306a36Sopenharmony_ci
21962306a36Sopenharmony_ci	inode->i_op = &secretmem_iops;
22062306a36Sopenharmony_ci	inode->i_mapping->a_ops = &secretmem_aops;
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_ci	/* pretend we are a normal file with zero size */
22362306a36Sopenharmony_ci	inode->i_mode |= S_IFREG;
22462306a36Sopenharmony_ci	inode->i_size = 0;
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_ci	return file;
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_cierr_free_inode:
22962306a36Sopenharmony_ci	iput(inode);
23062306a36Sopenharmony_ci	return file;
23162306a36Sopenharmony_ci}
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ciSYSCALL_DEFINE1(memfd_secret, unsigned int, flags)
23462306a36Sopenharmony_ci{
23562306a36Sopenharmony_ci	struct file *file;
23662306a36Sopenharmony_ci	int fd, err;
23762306a36Sopenharmony_ci
23862306a36Sopenharmony_ci	/* make sure local flags do not confict with global fcntl.h */
23962306a36Sopenharmony_ci	BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC);
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci	if (!secretmem_enable)
24262306a36Sopenharmony_ci		return -ENOSYS;
24362306a36Sopenharmony_ci
24462306a36Sopenharmony_ci	if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC))
24562306a36Sopenharmony_ci		return -EINVAL;
24662306a36Sopenharmony_ci	if (atomic_read(&secretmem_users) < 0)
24762306a36Sopenharmony_ci		return -ENFILE;
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ci	fd = get_unused_fd_flags(flags & O_CLOEXEC);
25062306a36Sopenharmony_ci	if (fd < 0)
25162306a36Sopenharmony_ci		return fd;
25262306a36Sopenharmony_ci
25362306a36Sopenharmony_ci	file = secretmem_file_create(flags);
25462306a36Sopenharmony_ci	if (IS_ERR(file)) {
25562306a36Sopenharmony_ci		err = PTR_ERR(file);
25662306a36Sopenharmony_ci		goto err_put_fd;
25762306a36Sopenharmony_ci	}
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_ci	file->f_flags |= O_LARGEFILE;
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_ci	atomic_inc(&secretmem_users);
26262306a36Sopenharmony_ci	fd_install(fd, file);
26362306a36Sopenharmony_ci	return fd;
26462306a36Sopenharmony_ci
26562306a36Sopenharmony_cierr_put_fd:
26662306a36Sopenharmony_ci	put_unused_fd(fd);
26762306a36Sopenharmony_ci	return err;
26862306a36Sopenharmony_ci}
26962306a36Sopenharmony_ci
27062306a36Sopenharmony_cistatic int secretmem_init_fs_context(struct fs_context *fc)
27162306a36Sopenharmony_ci{
27262306a36Sopenharmony_ci	return init_pseudo(fc, SECRETMEM_MAGIC) ? 0 : -ENOMEM;
27362306a36Sopenharmony_ci}
27462306a36Sopenharmony_ci
27562306a36Sopenharmony_cistatic struct file_system_type secretmem_fs = {
27662306a36Sopenharmony_ci	.name		= "secretmem",
27762306a36Sopenharmony_ci	.init_fs_context = secretmem_init_fs_context,
27862306a36Sopenharmony_ci	.kill_sb	= kill_anon_super,
27962306a36Sopenharmony_ci};
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_cistatic int __init secretmem_init(void)
28262306a36Sopenharmony_ci{
28362306a36Sopenharmony_ci	if (!secretmem_enable)
28462306a36Sopenharmony_ci		return 0;
28562306a36Sopenharmony_ci
28662306a36Sopenharmony_ci	secretmem_mnt = kern_mount(&secretmem_fs);
28762306a36Sopenharmony_ci	if (IS_ERR(secretmem_mnt))
28862306a36Sopenharmony_ci		return PTR_ERR(secretmem_mnt);
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_ci	/* prevent secretmem mappings from ever getting PROT_EXEC */
29162306a36Sopenharmony_ci	secretmem_mnt->mnt_flags |= MNT_NOEXEC;
29262306a36Sopenharmony_ci
29362306a36Sopenharmony_ci	return 0;
29462306a36Sopenharmony_ci}
29562306a36Sopenharmony_cifs_initcall(secretmem_init);
296