162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright IBM Corporation, 2021 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Author: Mike Rapoport <rppt@linux.ibm.com> 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include <linux/mm.h> 962306a36Sopenharmony_ci#include <linux/fs.h> 1062306a36Sopenharmony_ci#include <linux/swap.h> 1162306a36Sopenharmony_ci#include <linux/mount.h> 1262306a36Sopenharmony_ci#include <linux/memfd.h> 1362306a36Sopenharmony_ci#include <linux/bitops.h> 1462306a36Sopenharmony_ci#include <linux/printk.h> 1562306a36Sopenharmony_ci#include <linux/pagemap.h> 1662306a36Sopenharmony_ci#include <linux/syscalls.h> 1762306a36Sopenharmony_ci#include <linux/pseudo_fs.h> 1862306a36Sopenharmony_ci#include <linux/secretmem.h> 1962306a36Sopenharmony_ci#include <linux/set_memory.h> 2062306a36Sopenharmony_ci#include <linux/sched/signal.h> 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci#include <uapi/linux/magic.h> 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci#include <asm/tlbflush.h> 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci#include "internal.h" 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci#undef pr_fmt 2962306a36Sopenharmony_ci#define pr_fmt(fmt) "secretmem: " fmt 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci/* 3262306a36Sopenharmony_ci * Define mode and flag masks to allow validation of the system call 3362306a36Sopenharmony_ci * parameters. 3462306a36Sopenharmony_ci */ 3562306a36Sopenharmony_ci#define SECRETMEM_MODE_MASK (0x0) 3662306a36Sopenharmony_ci#define SECRETMEM_FLAGS_MASK SECRETMEM_MODE_MASK 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_cistatic bool secretmem_enable __ro_after_init = 1; 3962306a36Sopenharmony_cimodule_param_named(enable, secretmem_enable, bool, 0400); 4062306a36Sopenharmony_ciMODULE_PARM_DESC(secretmem_enable, 4162306a36Sopenharmony_ci "Enable secretmem and memfd_secret(2) system call"); 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_cistatic atomic_t secretmem_users; 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_cibool secretmem_active(void) 4662306a36Sopenharmony_ci{ 4762306a36Sopenharmony_ci return !!atomic_read(&secretmem_users); 4862306a36Sopenharmony_ci} 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_cistatic vm_fault_t secretmem_fault(struct vm_fault *vmf) 5162306a36Sopenharmony_ci{ 5262306a36Sopenharmony_ci struct address_space *mapping = vmf->vma->vm_file->f_mapping; 5362306a36Sopenharmony_ci struct inode *inode = file_inode(vmf->vma->vm_file); 5462306a36Sopenharmony_ci pgoff_t offset = vmf->pgoff; 5562306a36Sopenharmony_ci gfp_t gfp = vmf->gfp_mask; 5662306a36Sopenharmony_ci unsigned long addr; 5762306a36Sopenharmony_ci struct page *page; 5862306a36Sopenharmony_ci struct folio *folio; 5962306a36Sopenharmony_ci vm_fault_t ret; 6062306a36Sopenharmony_ci int err; 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) 6362306a36Sopenharmony_ci return vmf_error(-EINVAL); 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci filemap_invalidate_lock_shared(mapping); 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ciretry: 6862306a36Sopenharmony_ci page = find_lock_page(mapping, offset); 6962306a36Sopenharmony_ci if (!page) { 7062306a36Sopenharmony_ci folio = folio_alloc(gfp | __GFP_ZERO, 0); 7162306a36Sopenharmony_ci if (!folio) { 7262306a36Sopenharmony_ci ret = VM_FAULT_OOM; 7362306a36Sopenharmony_ci goto out; 7462306a36Sopenharmony_ci } 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci page = &folio->page; 7762306a36Sopenharmony_ci err = set_direct_map_invalid_noflush(page); 7862306a36Sopenharmony_ci if (err) { 7962306a36Sopenharmony_ci folio_put(folio); 8062306a36Sopenharmony_ci ret = vmf_error(err); 8162306a36Sopenharmony_ci goto out; 8262306a36Sopenharmony_ci } 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ci __folio_mark_uptodate(folio); 8562306a36Sopenharmony_ci err = filemap_add_folio(mapping, folio, offset, gfp); 8662306a36Sopenharmony_ci if (unlikely(err)) { 8762306a36Sopenharmony_ci folio_put(folio); 8862306a36Sopenharmony_ci /* 8962306a36Sopenharmony_ci * If a split of large page was required, it 9062306a36Sopenharmony_ci * already happened when we marked the page invalid 9162306a36Sopenharmony_ci * which guarantees that this call won't fail 9262306a36Sopenharmony_ci */ 9362306a36Sopenharmony_ci set_direct_map_default_noflush(page); 9462306a36Sopenharmony_ci if (err == -EEXIST) 9562306a36Sopenharmony_ci goto retry; 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci ret = vmf_error(err); 9862306a36Sopenharmony_ci goto out; 9962306a36Sopenharmony_ci } 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci addr = (unsigned long)page_address(page); 10262306a36Sopenharmony_ci flush_tlb_kernel_range(addr, addr + PAGE_SIZE); 10362306a36Sopenharmony_ci } 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_ci vmf->page = page; 10662306a36Sopenharmony_ci ret = VM_FAULT_LOCKED; 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ciout: 10962306a36Sopenharmony_ci filemap_invalidate_unlock_shared(mapping); 11062306a36Sopenharmony_ci return ret; 11162306a36Sopenharmony_ci} 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_cistatic const struct vm_operations_struct secretmem_vm_ops = { 11462306a36Sopenharmony_ci .fault = secretmem_fault, 11562306a36Sopenharmony_ci}; 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_cistatic int secretmem_release(struct inode *inode, struct file *file) 11862306a36Sopenharmony_ci{ 11962306a36Sopenharmony_ci atomic_dec(&secretmem_users); 12062306a36Sopenharmony_ci return 0; 12162306a36Sopenharmony_ci} 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_cistatic int secretmem_mmap(struct file *file, struct vm_area_struct *vma) 12462306a36Sopenharmony_ci{ 12562306a36Sopenharmony_ci unsigned long len = vma->vm_end - vma->vm_start; 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0) 12862306a36Sopenharmony_ci return -EINVAL; 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci if (!mlock_future_ok(vma->vm_mm, vma->vm_flags | VM_LOCKED, len)) 13162306a36Sopenharmony_ci return -EAGAIN; 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci vm_flags_set(vma, VM_LOCKED | VM_DONTDUMP); 13462306a36Sopenharmony_ci vma->vm_ops = &secretmem_vm_ops; 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_ci return 0; 13762306a36Sopenharmony_ci} 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_cibool vma_is_secretmem(struct vm_area_struct *vma) 14062306a36Sopenharmony_ci{ 14162306a36Sopenharmony_ci return vma->vm_ops == &secretmem_vm_ops; 14262306a36Sopenharmony_ci} 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_cistatic const struct file_operations secretmem_fops = { 14562306a36Sopenharmony_ci .release = secretmem_release, 14662306a36Sopenharmony_ci .mmap = secretmem_mmap, 14762306a36Sopenharmony_ci}; 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_cistatic int secretmem_migrate_folio(struct address_space *mapping, 15062306a36Sopenharmony_ci struct folio *dst, struct folio *src, enum migrate_mode mode) 15162306a36Sopenharmony_ci{ 15262306a36Sopenharmony_ci return -EBUSY; 15362306a36Sopenharmony_ci} 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_cistatic void secretmem_free_folio(struct folio *folio) 15662306a36Sopenharmony_ci{ 15762306a36Sopenharmony_ci set_direct_map_default_noflush(&folio->page); 15862306a36Sopenharmony_ci folio_zero_segment(folio, 0, folio_size(folio)); 15962306a36Sopenharmony_ci} 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ciconst struct address_space_operations secretmem_aops = { 16262306a36Sopenharmony_ci .dirty_folio = noop_dirty_folio, 16362306a36Sopenharmony_ci .free_folio = secretmem_free_folio, 16462306a36Sopenharmony_ci .migrate_folio = secretmem_migrate_folio, 16562306a36Sopenharmony_ci}; 16662306a36Sopenharmony_ci 16762306a36Sopenharmony_cistatic int secretmem_setattr(struct mnt_idmap *idmap, 16862306a36Sopenharmony_ci struct dentry *dentry, struct iattr *iattr) 16962306a36Sopenharmony_ci{ 17062306a36Sopenharmony_ci struct inode *inode = d_inode(dentry); 17162306a36Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 17262306a36Sopenharmony_ci unsigned int ia_valid = iattr->ia_valid; 17362306a36Sopenharmony_ci int ret; 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_ci filemap_invalidate_lock(mapping); 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci if ((ia_valid & ATTR_SIZE) && inode->i_size) 17862306a36Sopenharmony_ci ret = -EINVAL; 17962306a36Sopenharmony_ci else 18062306a36Sopenharmony_ci ret = simple_setattr(idmap, dentry, iattr); 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci filemap_invalidate_unlock(mapping); 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci return ret; 18562306a36Sopenharmony_ci} 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_cistatic const struct inode_operations secretmem_iops = { 18862306a36Sopenharmony_ci .setattr = secretmem_setattr, 18962306a36Sopenharmony_ci}; 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_cistatic struct vfsmount *secretmem_mnt; 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_cistatic struct file *secretmem_file_create(unsigned long flags) 19462306a36Sopenharmony_ci{ 19562306a36Sopenharmony_ci struct file *file; 19662306a36Sopenharmony_ci struct inode *inode; 19762306a36Sopenharmony_ci const char *anon_name = "[secretmem]"; 19862306a36Sopenharmony_ci const struct qstr qname = QSTR_INIT(anon_name, strlen(anon_name)); 19962306a36Sopenharmony_ci int err; 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci inode = alloc_anon_inode(secretmem_mnt->mnt_sb); 20262306a36Sopenharmony_ci if (IS_ERR(inode)) 20362306a36Sopenharmony_ci return ERR_CAST(inode); 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci err = security_inode_init_security_anon(inode, &qname, NULL); 20662306a36Sopenharmony_ci if (err) { 20762306a36Sopenharmony_ci file = ERR_PTR(err); 20862306a36Sopenharmony_ci goto err_free_inode; 20962306a36Sopenharmony_ci } 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_ci file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem", 21262306a36Sopenharmony_ci O_RDWR, &secretmem_fops); 21362306a36Sopenharmony_ci if (IS_ERR(file)) 21462306a36Sopenharmony_ci goto err_free_inode; 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); 21762306a36Sopenharmony_ci mapping_set_unevictable(inode->i_mapping); 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci inode->i_op = &secretmem_iops; 22062306a36Sopenharmony_ci inode->i_mapping->a_ops = &secretmem_aops; 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_ci /* pretend we are a normal file with zero size */ 22362306a36Sopenharmony_ci inode->i_mode |= S_IFREG; 22462306a36Sopenharmony_ci inode->i_size = 0; 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ci return file; 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_cierr_free_inode: 22962306a36Sopenharmony_ci iput(inode); 23062306a36Sopenharmony_ci return file; 23162306a36Sopenharmony_ci} 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ciSYSCALL_DEFINE1(memfd_secret, unsigned int, flags) 23462306a36Sopenharmony_ci{ 23562306a36Sopenharmony_ci struct file *file; 23662306a36Sopenharmony_ci int fd, err; 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci /* make sure local flags do not confict with global fcntl.h */ 23962306a36Sopenharmony_ci BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC); 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_ci if (!secretmem_enable) 24262306a36Sopenharmony_ci return -ENOSYS; 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC)) 24562306a36Sopenharmony_ci return -EINVAL; 24662306a36Sopenharmony_ci if (atomic_read(&secretmem_users) < 0) 24762306a36Sopenharmony_ci return -ENFILE; 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci fd = get_unused_fd_flags(flags & O_CLOEXEC); 25062306a36Sopenharmony_ci if (fd < 0) 25162306a36Sopenharmony_ci return fd; 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_ci file = secretmem_file_create(flags); 25462306a36Sopenharmony_ci if (IS_ERR(file)) { 25562306a36Sopenharmony_ci err = PTR_ERR(file); 25662306a36Sopenharmony_ci goto err_put_fd; 25762306a36Sopenharmony_ci } 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_ci file->f_flags |= O_LARGEFILE; 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_ci atomic_inc(&secretmem_users); 26262306a36Sopenharmony_ci fd_install(fd, file); 26362306a36Sopenharmony_ci return fd; 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_cierr_put_fd: 26662306a36Sopenharmony_ci put_unused_fd(fd); 26762306a36Sopenharmony_ci return err; 26862306a36Sopenharmony_ci} 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_cistatic int secretmem_init_fs_context(struct fs_context *fc) 27162306a36Sopenharmony_ci{ 27262306a36Sopenharmony_ci return init_pseudo(fc, SECRETMEM_MAGIC) ? 0 : -ENOMEM; 27362306a36Sopenharmony_ci} 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_cistatic struct file_system_type secretmem_fs = { 27662306a36Sopenharmony_ci .name = "secretmem", 27762306a36Sopenharmony_ci .init_fs_context = secretmem_init_fs_context, 27862306a36Sopenharmony_ci .kill_sb = kill_anon_super, 27962306a36Sopenharmony_ci}; 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_cistatic int __init secretmem_init(void) 28262306a36Sopenharmony_ci{ 28362306a36Sopenharmony_ci if (!secretmem_enable) 28462306a36Sopenharmony_ci return 0; 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci secretmem_mnt = kern_mount(&secretmem_fs); 28762306a36Sopenharmony_ci if (IS_ERR(secretmem_mnt)) 28862306a36Sopenharmony_ci return PTR_ERR(secretmem_mnt); 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci /* prevent secretmem mappings from ever getting PROT_EXEC */ 29162306a36Sopenharmony_ci secretmem_mnt->mnt_flags |= MNT_NOEXEC; 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci return 0; 29462306a36Sopenharmony_ci} 29562306a36Sopenharmony_cifs_initcall(secretmem_init); 296