162306a36Sopenharmony_ci/* 262306a36Sopenharmony_ci * Resizable virtual memory filesystem for Linux. 362306a36Sopenharmony_ci * 462306a36Sopenharmony_ci * Copyright (C) 2000 Linus Torvalds. 562306a36Sopenharmony_ci * 2000 Transmeta Corp. 662306a36Sopenharmony_ci * 2000-2001 Christoph Rohland 762306a36Sopenharmony_ci * 2000-2001 SAP AG 862306a36Sopenharmony_ci * 2002 Red Hat Inc. 962306a36Sopenharmony_ci * Copyright (C) 2002-2011 Hugh Dickins. 1062306a36Sopenharmony_ci * Copyright (C) 2011 Google Inc. 1162306a36Sopenharmony_ci * Copyright (C) 2002-2005 VERITAS Software Corporation. 1262306a36Sopenharmony_ci * Copyright (C) 2004 Andi Kleen, SuSE Labs 1362306a36Sopenharmony_ci * 1462306a36Sopenharmony_ci * Extended attribute support for tmpfs: 1562306a36Sopenharmony_ci * Copyright (c) 2004, Luke Kenneth Casson Leighton <lkcl@lkcl.net> 1662306a36Sopenharmony_ci * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> 1762306a36Sopenharmony_ci * 1862306a36Sopenharmony_ci * tiny-shmem: 1962306a36Sopenharmony_ci * Copyright (c) 2004, 2008 Matt Mackall <mpm@selenic.com> 2062306a36Sopenharmony_ci * 2162306a36Sopenharmony_ci * This file is released under the GPL. 2262306a36Sopenharmony_ci */ 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci#include <linux/fs.h> 2562306a36Sopenharmony_ci#include <linux/init.h> 2662306a36Sopenharmony_ci#include <linux/vfs.h> 2762306a36Sopenharmony_ci#include <linux/mount.h> 2862306a36Sopenharmony_ci#include <linux/ramfs.h> 2962306a36Sopenharmony_ci#include <linux/pagemap.h> 3062306a36Sopenharmony_ci#include <linux/file.h> 3162306a36Sopenharmony_ci#include <linux/fileattr.h> 3262306a36Sopenharmony_ci#include <linux/mm.h> 3362306a36Sopenharmony_ci#include <linux/random.h> 3462306a36Sopenharmony_ci#include <linux/sched/signal.h> 3562306a36Sopenharmony_ci#include <linux/export.h> 3662306a36Sopenharmony_ci#include <linux/shmem_fs.h> 3762306a36Sopenharmony_ci#include <linux/swap.h> 3862306a36Sopenharmony_ci#include <linux/uio.h> 3962306a36Sopenharmony_ci#include <linux/hugetlb.h> 4062306a36Sopenharmony_ci#include <linux/fs_parser.h> 4162306a36Sopenharmony_ci#include <linux/swapfile.h> 4262306a36Sopenharmony_ci#include <linux/iversion.h> 4362306a36Sopenharmony_ci#include "swap.h" 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_cistatic struct vfsmount *shm_mnt; 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci#ifdef CONFIG_SHMEM 4862306a36Sopenharmony_ci/* 4962306a36Sopenharmony_ci * This virtual memory filesystem is heavily based on the ramfs. It 5062306a36Sopenharmony_ci * extends ramfs by the ability to use swap and honor resource limits 5162306a36Sopenharmony_ci * which makes it a completely usable filesystem. 5262306a36Sopenharmony_ci */ 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_ci#include <linux/xattr.h> 5562306a36Sopenharmony_ci#include <linux/exportfs.h> 5662306a36Sopenharmony_ci#include <linux/posix_acl.h> 5762306a36Sopenharmony_ci#include <linux/posix_acl_xattr.h> 5862306a36Sopenharmony_ci#include <linux/mman.h> 5962306a36Sopenharmony_ci#include <linux/string.h> 6062306a36Sopenharmony_ci#include <linux/slab.h> 6162306a36Sopenharmony_ci#include <linux/backing-dev.h> 6262306a36Sopenharmony_ci#include <linux/writeback.h> 6362306a36Sopenharmony_ci#include <linux/pagevec.h> 6462306a36Sopenharmony_ci#include <linux/percpu_counter.h> 6562306a36Sopenharmony_ci#include <linux/falloc.h> 6662306a36Sopenharmony_ci#include <linux/splice.h> 6762306a36Sopenharmony_ci#include <linux/security.h> 6862306a36Sopenharmony_ci#include <linux/swapops.h> 6962306a36Sopenharmony_ci#include <linux/mempolicy.h> 7062306a36Sopenharmony_ci#include <linux/namei.h> 7162306a36Sopenharmony_ci#include <linux/ctype.h> 7262306a36Sopenharmony_ci#include <linux/migrate.h> 7362306a36Sopenharmony_ci#include <linux/highmem.h> 7462306a36Sopenharmony_ci#include <linux/seq_file.h> 7562306a36Sopenharmony_ci#include <linux/magic.h> 7662306a36Sopenharmony_ci#include <linux/syscalls.h> 7762306a36Sopenharmony_ci#include <linux/fcntl.h> 7862306a36Sopenharmony_ci#include <uapi/linux/memfd.h> 7962306a36Sopenharmony_ci#include <linux/rmap.h> 8062306a36Sopenharmony_ci#include <linux/uuid.h> 8162306a36Sopenharmony_ci#include <linux/quotaops.h> 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci#include <linux/uaccess.h> 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci#include "internal.h" 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci#define BLOCKS_PER_PAGE (PAGE_SIZE/512) 8862306a36Sopenharmony_ci#define VM_ACCT(size) (PAGE_ALIGN(size) >> PAGE_SHIFT) 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci/* Pretend that each entry is of this size in directory's i_size */ 9162306a36Sopenharmony_ci#define BOGO_DIRENT_SIZE 20 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci/* Pretend that one inode + its dentry occupy this much memory */ 9462306a36Sopenharmony_ci#define BOGO_INODE_SIZE 1024 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci/* Symlink up to this size is kmalloc'ed instead of using a swappable page */ 9762306a36Sopenharmony_ci#define SHORT_SYMLINK_LEN 128 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci/* 10062306a36Sopenharmony_ci * shmem_fallocate communicates with shmem_fault or shmem_writepage via 10162306a36Sopenharmony_ci * inode->i_private (with i_rwsem making sure that it has only one user at 10262306a36Sopenharmony_ci * a time): we would prefer not to enlarge the shmem inode just for that. 10362306a36Sopenharmony_ci */ 10462306a36Sopenharmony_cistruct shmem_falloc { 10562306a36Sopenharmony_ci wait_queue_head_t *waitq; /* faults into hole wait for punch to end */ 10662306a36Sopenharmony_ci pgoff_t start; /* start of range currently being fallocated */ 10762306a36Sopenharmony_ci pgoff_t next; /* the next page offset to be fallocated */ 10862306a36Sopenharmony_ci pgoff_t nr_falloced; /* how many new pages have been fallocated */ 10962306a36Sopenharmony_ci pgoff_t nr_unswapped; /* how often writepage refused to swap out */ 11062306a36Sopenharmony_ci}; 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_cistruct shmem_options { 11362306a36Sopenharmony_ci unsigned long long blocks; 11462306a36Sopenharmony_ci unsigned long long inodes; 11562306a36Sopenharmony_ci struct mempolicy *mpol; 11662306a36Sopenharmony_ci kuid_t uid; 11762306a36Sopenharmony_ci kgid_t gid; 11862306a36Sopenharmony_ci umode_t mode; 11962306a36Sopenharmony_ci bool full_inums; 12062306a36Sopenharmony_ci int huge; 12162306a36Sopenharmony_ci int seen; 12262306a36Sopenharmony_ci bool noswap; 12362306a36Sopenharmony_ci unsigned short quota_types; 12462306a36Sopenharmony_ci struct shmem_quota_limits qlimits; 12562306a36Sopenharmony_ci#define SHMEM_SEEN_BLOCKS 1 12662306a36Sopenharmony_ci#define SHMEM_SEEN_INODES 2 12762306a36Sopenharmony_ci#define SHMEM_SEEN_HUGE 4 12862306a36Sopenharmony_ci#define SHMEM_SEEN_INUMS 8 12962306a36Sopenharmony_ci#define SHMEM_SEEN_NOSWAP 16 13062306a36Sopenharmony_ci#define SHMEM_SEEN_QUOTA 32 13162306a36Sopenharmony_ci}; 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci#ifdef CONFIG_TMPFS 13462306a36Sopenharmony_cistatic unsigned long shmem_default_max_blocks(void) 13562306a36Sopenharmony_ci{ 13662306a36Sopenharmony_ci return totalram_pages() / 2; 13762306a36Sopenharmony_ci} 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_cistatic unsigned long shmem_default_max_inodes(void) 14062306a36Sopenharmony_ci{ 14162306a36Sopenharmony_ci unsigned long nr_pages = totalram_pages(); 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_ci return min3(nr_pages - totalhigh_pages(), nr_pages / 2, 14462306a36Sopenharmony_ci ULONG_MAX / BOGO_INODE_SIZE); 14562306a36Sopenharmony_ci} 14662306a36Sopenharmony_ci#endif 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_cistatic int shmem_swapin_folio(struct inode *inode, pgoff_t index, 14962306a36Sopenharmony_ci struct folio **foliop, enum sgp_type sgp, 15062306a36Sopenharmony_ci gfp_t gfp, struct vm_area_struct *vma, 15162306a36Sopenharmony_ci vm_fault_t *fault_type); 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_cistatic inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) 15462306a36Sopenharmony_ci{ 15562306a36Sopenharmony_ci return sb->s_fs_info; 15662306a36Sopenharmony_ci} 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci/* 15962306a36Sopenharmony_ci * shmem_file_setup pre-accounts the whole fixed size of a VM object, 16062306a36Sopenharmony_ci * for shared memory and for shared anonymous (/dev/zero) mappings 16162306a36Sopenharmony_ci * (unless MAP_NORESERVE and sysctl_overcommit_memory <= 1), 16262306a36Sopenharmony_ci * consistent with the pre-accounting of private mappings ... 16362306a36Sopenharmony_ci */ 16462306a36Sopenharmony_cistatic inline int shmem_acct_size(unsigned long flags, loff_t size) 16562306a36Sopenharmony_ci{ 16662306a36Sopenharmony_ci return (flags & VM_NORESERVE) ? 16762306a36Sopenharmony_ci 0 : security_vm_enough_memory_mm(current->mm, VM_ACCT(size)); 16862306a36Sopenharmony_ci} 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_cistatic inline void shmem_unacct_size(unsigned long flags, loff_t size) 17162306a36Sopenharmony_ci{ 17262306a36Sopenharmony_ci if (!(flags & VM_NORESERVE)) 17362306a36Sopenharmony_ci vm_unacct_memory(VM_ACCT(size)); 17462306a36Sopenharmony_ci} 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_cistatic inline int shmem_reacct_size(unsigned long flags, 17762306a36Sopenharmony_ci loff_t oldsize, loff_t newsize) 17862306a36Sopenharmony_ci{ 17962306a36Sopenharmony_ci if (!(flags & VM_NORESERVE)) { 18062306a36Sopenharmony_ci if (VM_ACCT(newsize) > VM_ACCT(oldsize)) 18162306a36Sopenharmony_ci return security_vm_enough_memory_mm(current->mm, 18262306a36Sopenharmony_ci VM_ACCT(newsize) - VM_ACCT(oldsize)); 18362306a36Sopenharmony_ci else if (VM_ACCT(newsize) < VM_ACCT(oldsize)) 18462306a36Sopenharmony_ci vm_unacct_memory(VM_ACCT(oldsize) - VM_ACCT(newsize)); 18562306a36Sopenharmony_ci } 18662306a36Sopenharmony_ci return 0; 18762306a36Sopenharmony_ci} 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci/* 19062306a36Sopenharmony_ci * ... whereas tmpfs objects are accounted incrementally as 19162306a36Sopenharmony_ci * pages are allocated, in order to allow large sparse files. 19262306a36Sopenharmony_ci * shmem_get_folio reports shmem_acct_block failure as -ENOSPC not -ENOMEM, 19362306a36Sopenharmony_ci * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM. 19462306a36Sopenharmony_ci */ 19562306a36Sopenharmony_cistatic inline int shmem_acct_block(unsigned long flags, long pages) 19662306a36Sopenharmony_ci{ 19762306a36Sopenharmony_ci if (!(flags & VM_NORESERVE)) 19862306a36Sopenharmony_ci return 0; 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci return security_vm_enough_memory_mm(current->mm, 20162306a36Sopenharmony_ci pages * VM_ACCT(PAGE_SIZE)); 20262306a36Sopenharmony_ci} 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_cistatic inline void shmem_unacct_blocks(unsigned long flags, long pages) 20562306a36Sopenharmony_ci{ 20662306a36Sopenharmony_ci if (flags & VM_NORESERVE) 20762306a36Sopenharmony_ci vm_unacct_memory(pages * VM_ACCT(PAGE_SIZE)); 20862306a36Sopenharmony_ci} 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_cistatic int shmem_inode_acct_block(struct inode *inode, long pages) 21162306a36Sopenharmony_ci{ 21262306a36Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 21362306a36Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 21462306a36Sopenharmony_ci int err = -ENOSPC; 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci if (shmem_acct_block(info->flags, pages)) 21762306a36Sopenharmony_ci return err; 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci might_sleep(); /* when quotas */ 22062306a36Sopenharmony_ci if (sbinfo->max_blocks) { 22162306a36Sopenharmony_ci if (percpu_counter_compare(&sbinfo->used_blocks, 22262306a36Sopenharmony_ci sbinfo->max_blocks - pages) > 0) 22362306a36Sopenharmony_ci goto unacct; 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci err = dquot_alloc_block_nodirty(inode, pages); 22662306a36Sopenharmony_ci if (err) 22762306a36Sopenharmony_ci goto unacct; 22862306a36Sopenharmony_ci 22962306a36Sopenharmony_ci percpu_counter_add(&sbinfo->used_blocks, pages); 23062306a36Sopenharmony_ci } else { 23162306a36Sopenharmony_ci err = dquot_alloc_block_nodirty(inode, pages); 23262306a36Sopenharmony_ci if (err) 23362306a36Sopenharmony_ci goto unacct; 23462306a36Sopenharmony_ci } 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci return 0; 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ciunacct: 23962306a36Sopenharmony_ci shmem_unacct_blocks(info->flags, pages); 24062306a36Sopenharmony_ci return err; 24162306a36Sopenharmony_ci} 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_cistatic void shmem_inode_unacct_blocks(struct inode *inode, long pages) 24462306a36Sopenharmony_ci{ 24562306a36Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 24662306a36Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ci might_sleep(); /* when quotas */ 24962306a36Sopenharmony_ci dquot_free_block_nodirty(inode, pages); 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci if (sbinfo->max_blocks) 25262306a36Sopenharmony_ci percpu_counter_sub(&sbinfo->used_blocks, pages); 25362306a36Sopenharmony_ci shmem_unacct_blocks(info->flags, pages); 25462306a36Sopenharmony_ci} 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_cistatic const struct super_operations shmem_ops; 25762306a36Sopenharmony_ciconst struct address_space_operations shmem_aops; 25862306a36Sopenharmony_cistatic const struct file_operations shmem_file_operations; 25962306a36Sopenharmony_cistatic const struct inode_operations shmem_inode_operations; 26062306a36Sopenharmony_cistatic const struct inode_operations shmem_dir_inode_operations; 26162306a36Sopenharmony_cistatic const struct inode_operations shmem_special_inode_operations; 26262306a36Sopenharmony_cistatic const struct vm_operations_struct shmem_vm_ops; 26362306a36Sopenharmony_cistatic const struct vm_operations_struct shmem_anon_vm_ops; 26462306a36Sopenharmony_cistatic struct file_system_type shmem_fs_type; 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_cibool vma_is_anon_shmem(struct vm_area_struct *vma) 26762306a36Sopenharmony_ci{ 26862306a36Sopenharmony_ci return vma->vm_ops == &shmem_anon_vm_ops; 26962306a36Sopenharmony_ci} 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_cibool vma_is_shmem(struct vm_area_struct *vma) 27262306a36Sopenharmony_ci{ 27362306a36Sopenharmony_ci return vma_is_anon_shmem(vma) || vma->vm_ops == &shmem_vm_ops; 27462306a36Sopenharmony_ci} 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_cistatic LIST_HEAD(shmem_swaplist); 27762306a36Sopenharmony_cistatic DEFINE_MUTEX(shmem_swaplist_mutex); 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_QUOTA 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_cistatic int shmem_enable_quotas(struct super_block *sb, 28262306a36Sopenharmony_ci unsigned short quota_types) 28362306a36Sopenharmony_ci{ 28462306a36Sopenharmony_ci int type, err = 0; 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY; 28762306a36Sopenharmony_ci for (type = 0; type < SHMEM_MAXQUOTAS; type++) { 28862306a36Sopenharmony_ci if (!(quota_types & (1 << type))) 28962306a36Sopenharmony_ci continue; 29062306a36Sopenharmony_ci err = dquot_load_quota_sb(sb, type, QFMT_SHMEM, 29162306a36Sopenharmony_ci DQUOT_USAGE_ENABLED | 29262306a36Sopenharmony_ci DQUOT_LIMITS_ENABLED); 29362306a36Sopenharmony_ci if (err) 29462306a36Sopenharmony_ci goto out_err; 29562306a36Sopenharmony_ci } 29662306a36Sopenharmony_ci return 0; 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ciout_err: 29962306a36Sopenharmony_ci pr_warn("tmpfs: failed to enable quota tracking (type=%d, err=%d)\n", 30062306a36Sopenharmony_ci type, err); 30162306a36Sopenharmony_ci for (type--; type >= 0; type--) 30262306a36Sopenharmony_ci dquot_quota_off(sb, type); 30362306a36Sopenharmony_ci return err; 30462306a36Sopenharmony_ci} 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_cistatic void shmem_disable_quotas(struct super_block *sb) 30762306a36Sopenharmony_ci{ 30862306a36Sopenharmony_ci int type; 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_ci for (type = 0; type < SHMEM_MAXQUOTAS; type++) 31162306a36Sopenharmony_ci dquot_quota_off(sb, type); 31262306a36Sopenharmony_ci} 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_cistatic struct dquot __rcu **shmem_get_dquots(struct inode *inode) 31562306a36Sopenharmony_ci{ 31662306a36Sopenharmony_ci return SHMEM_I(inode)->i_dquot; 31762306a36Sopenharmony_ci} 31862306a36Sopenharmony_ci#endif /* CONFIG_TMPFS_QUOTA */ 31962306a36Sopenharmony_ci 32062306a36Sopenharmony_ci/* 32162306a36Sopenharmony_ci * shmem_reserve_inode() performs bookkeeping to reserve a shmem inode, and 32262306a36Sopenharmony_ci * produces a novel ino for the newly allocated inode. 32362306a36Sopenharmony_ci * 32462306a36Sopenharmony_ci * It may also be called when making a hard link to permit the space needed by 32562306a36Sopenharmony_ci * each dentry. However, in that case, no new inode number is needed since that 32662306a36Sopenharmony_ci * internally draws from another pool of inode numbers (currently global 32762306a36Sopenharmony_ci * get_next_ino()). This case is indicated by passing NULL as inop. 32862306a36Sopenharmony_ci */ 32962306a36Sopenharmony_ci#define SHMEM_INO_BATCH 1024 33062306a36Sopenharmony_cistatic int shmem_reserve_inode(struct super_block *sb, ino_t *inop) 33162306a36Sopenharmony_ci{ 33262306a36Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 33362306a36Sopenharmony_ci ino_t ino; 33462306a36Sopenharmony_ci 33562306a36Sopenharmony_ci if (!(sb->s_flags & SB_KERNMOUNT)) { 33662306a36Sopenharmony_ci raw_spin_lock(&sbinfo->stat_lock); 33762306a36Sopenharmony_ci if (sbinfo->max_inodes) { 33862306a36Sopenharmony_ci if (sbinfo->free_ispace < BOGO_INODE_SIZE) { 33962306a36Sopenharmony_ci raw_spin_unlock(&sbinfo->stat_lock); 34062306a36Sopenharmony_ci return -ENOSPC; 34162306a36Sopenharmony_ci } 34262306a36Sopenharmony_ci sbinfo->free_ispace -= BOGO_INODE_SIZE; 34362306a36Sopenharmony_ci } 34462306a36Sopenharmony_ci if (inop) { 34562306a36Sopenharmony_ci ino = sbinfo->next_ino++; 34662306a36Sopenharmony_ci if (unlikely(is_zero_ino(ino))) 34762306a36Sopenharmony_ci ino = sbinfo->next_ino++; 34862306a36Sopenharmony_ci if (unlikely(!sbinfo->full_inums && 34962306a36Sopenharmony_ci ino > UINT_MAX)) { 35062306a36Sopenharmony_ci /* 35162306a36Sopenharmony_ci * Emulate get_next_ino uint wraparound for 35262306a36Sopenharmony_ci * compatibility 35362306a36Sopenharmony_ci */ 35462306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_64BIT)) 35562306a36Sopenharmony_ci pr_warn("%s: inode number overflow on device %d, consider using inode64 mount option\n", 35662306a36Sopenharmony_ci __func__, MINOR(sb->s_dev)); 35762306a36Sopenharmony_ci sbinfo->next_ino = 1; 35862306a36Sopenharmony_ci ino = sbinfo->next_ino++; 35962306a36Sopenharmony_ci } 36062306a36Sopenharmony_ci *inop = ino; 36162306a36Sopenharmony_ci } 36262306a36Sopenharmony_ci raw_spin_unlock(&sbinfo->stat_lock); 36362306a36Sopenharmony_ci } else if (inop) { 36462306a36Sopenharmony_ci /* 36562306a36Sopenharmony_ci * __shmem_file_setup, one of our callers, is lock-free: it 36662306a36Sopenharmony_ci * doesn't hold stat_lock in shmem_reserve_inode since 36762306a36Sopenharmony_ci * max_inodes is always 0, and is called from potentially 36862306a36Sopenharmony_ci * unknown contexts. As such, use a per-cpu batched allocator 36962306a36Sopenharmony_ci * which doesn't require the per-sb stat_lock unless we are at 37062306a36Sopenharmony_ci * the batch boundary. 37162306a36Sopenharmony_ci * 37262306a36Sopenharmony_ci * We don't need to worry about inode{32,64} since SB_KERNMOUNT 37362306a36Sopenharmony_ci * shmem mounts are not exposed to userspace, so we don't need 37462306a36Sopenharmony_ci * to worry about things like glibc compatibility. 37562306a36Sopenharmony_ci */ 37662306a36Sopenharmony_ci ino_t *next_ino; 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_ci next_ino = per_cpu_ptr(sbinfo->ino_batch, get_cpu()); 37962306a36Sopenharmony_ci ino = *next_ino; 38062306a36Sopenharmony_ci if (unlikely(ino % SHMEM_INO_BATCH == 0)) { 38162306a36Sopenharmony_ci raw_spin_lock(&sbinfo->stat_lock); 38262306a36Sopenharmony_ci ino = sbinfo->next_ino; 38362306a36Sopenharmony_ci sbinfo->next_ino += SHMEM_INO_BATCH; 38462306a36Sopenharmony_ci raw_spin_unlock(&sbinfo->stat_lock); 38562306a36Sopenharmony_ci if (unlikely(is_zero_ino(ino))) 38662306a36Sopenharmony_ci ino++; 38762306a36Sopenharmony_ci } 38862306a36Sopenharmony_ci *inop = ino; 38962306a36Sopenharmony_ci *next_ino = ++ino; 39062306a36Sopenharmony_ci put_cpu(); 39162306a36Sopenharmony_ci } 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_ci return 0; 39462306a36Sopenharmony_ci} 39562306a36Sopenharmony_ci 39662306a36Sopenharmony_cistatic void shmem_free_inode(struct super_block *sb, size_t freed_ispace) 39762306a36Sopenharmony_ci{ 39862306a36Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 39962306a36Sopenharmony_ci if (sbinfo->max_inodes) { 40062306a36Sopenharmony_ci raw_spin_lock(&sbinfo->stat_lock); 40162306a36Sopenharmony_ci sbinfo->free_ispace += BOGO_INODE_SIZE + freed_ispace; 40262306a36Sopenharmony_ci raw_spin_unlock(&sbinfo->stat_lock); 40362306a36Sopenharmony_ci } 40462306a36Sopenharmony_ci} 40562306a36Sopenharmony_ci 40662306a36Sopenharmony_ci/** 40762306a36Sopenharmony_ci * shmem_recalc_inode - recalculate the block usage of an inode 40862306a36Sopenharmony_ci * @inode: inode to recalc 40962306a36Sopenharmony_ci * @alloced: the change in number of pages allocated to inode 41062306a36Sopenharmony_ci * @swapped: the change in number of pages swapped from inode 41162306a36Sopenharmony_ci * 41262306a36Sopenharmony_ci * We have to calculate the free blocks since the mm can drop 41362306a36Sopenharmony_ci * undirtied hole pages behind our back. 41462306a36Sopenharmony_ci * 41562306a36Sopenharmony_ci * But normally info->alloced == inode->i_mapping->nrpages + info->swapped 41662306a36Sopenharmony_ci * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped) 41762306a36Sopenharmony_ci */ 41862306a36Sopenharmony_cistatic void shmem_recalc_inode(struct inode *inode, long alloced, long swapped) 41962306a36Sopenharmony_ci{ 42062306a36Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 42162306a36Sopenharmony_ci long freed; 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_ci spin_lock(&info->lock); 42462306a36Sopenharmony_ci info->alloced += alloced; 42562306a36Sopenharmony_ci info->swapped += swapped; 42662306a36Sopenharmony_ci freed = info->alloced - info->swapped - 42762306a36Sopenharmony_ci READ_ONCE(inode->i_mapping->nrpages); 42862306a36Sopenharmony_ci /* 42962306a36Sopenharmony_ci * Special case: whereas normally shmem_recalc_inode() is called 43062306a36Sopenharmony_ci * after i_mapping->nrpages has already been adjusted (up or down), 43162306a36Sopenharmony_ci * shmem_writepage() has to raise swapped before nrpages is lowered - 43262306a36Sopenharmony_ci * to stop a racing shmem_recalc_inode() from thinking that a page has 43362306a36Sopenharmony_ci * been freed. Compensate here, to avoid the need for a followup call. 43462306a36Sopenharmony_ci */ 43562306a36Sopenharmony_ci if (swapped > 0) 43662306a36Sopenharmony_ci freed += swapped; 43762306a36Sopenharmony_ci if (freed > 0) 43862306a36Sopenharmony_ci info->alloced -= freed; 43962306a36Sopenharmony_ci spin_unlock(&info->lock); 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_ci /* The quota case may block */ 44262306a36Sopenharmony_ci if (freed > 0) 44362306a36Sopenharmony_ci shmem_inode_unacct_blocks(inode, freed); 44462306a36Sopenharmony_ci} 44562306a36Sopenharmony_ci 44662306a36Sopenharmony_cibool shmem_charge(struct inode *inode, long pages) 44762306a36Sopenharmony_ci{ 44862306a36Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_ci if (shmem_inode_acct_block(inode, pages)) 45162306a36Sopenharmony_ci return false; 45262306a36Sopenharmony_ci 45362306a36Sopenharmony_ci /* nrpages adjustment first, then shmem_recalc_inode() when balanced */ 45462306a36Sopenharmony_ci xa_lock_irq(&mapping->i_pages); 45562306a36Sopenharmony_ci mapping->nrpages += pages; 45662306a36Sopenharmony_ci xa_unlock_irq(&mapping->i_pages); 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_ci shmem_recalc_inode(inode, pages, 0); 45962306a36Sopenharmony_ci return true; 46062306a36Sopenharmony_ci} 46162306a36Sopenharmony_ci 46262306a36Sopenharmony_civoid shmem_uncharge(struct inode *inode, long pages) 46362306a36Sopenharmony_ci{ 46462306a36Sopenharmony_ci /* pages argument is currently unused: keep it to help debugging */ 46562306a36Sopenharmony_ci /* nrpages adjustment done by __filemap_remove_folio() or caller */ 46662306a36Sopenharmony_ci 46762306a36Sopenharmony_ci shmem_recalc_inode(inode, 0, 0); 46862306a36Sopenharmony_ci} 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_ci/* 47162306a36Sopenharmony_ci * Replace item expected in xarray by a new item, while holding xa_lock. 47262306a36Sopenharmony_ci */ 47362306a36Sopenharmony_cistatic int shmem_replace_entry(struct address_space *mapping, 47462306a36Sopenharmony_ci pgoff_t index, void *expected, void *replacement) 47562306a36Sopenharmony_ci{ 47662306a36Sopenharmony_ci XA_STATE(xas, &mapping->i_pages, index); 47762306a36Sopenharmony_ci void *item; 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_ci VM_BUG_ON(!expected); 48062306a36Sopenharmony_ci VM_BUG_ON(!replacement); 48162306a36Sopenharmony_ci item = xas_load(&xas); 48262306a36Sopenharmony_ci if (item != expected) 48362306a36Sopenharmony_ci return -ENOENT; 48462306a36Sopenharmony_ci xas_store(&xas, replacement); 48562306a36Sopenharmony_ci return 0; 48662306a36Sopenharmony_ci} 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_ci/* 48962306a36Sopenharmony_ci * Sometimes, before we decide whether to proceed or to fail, we must check 49062306a36Sopenharmony_ci * that an entry was not already brought back from swap by a racing thread. 49162306a36Sopenharmony_ci * 49262306a36Sopenharmony_ci * Checking page is not enough: by the time a SwapCache page is locked, it 49362306a36Sopenharmony_ci * might be reused, and again be SwapCache, using the same swap as before. 49462306a36Sopenharmony_ci */ 49562306a36Sopenharmony_cistatic bool shmem_confirm_swap(struct address_space *mapping, 49662306a36Sopenharmony_ci pgoff_t index, swp_entry_t swap) 49762306a36Sopenharmony_ci{ 49862306a36Sopenharmony_ci return xa_load(&mapping->i_pages, index) == swp_to_radix_entry(swap); 49962306a36Sopenharmony_ci} 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_ci/* 50262306a36Sopenharmony_ci * Definitions for "huge tmpfs": tmpfs mounted with the huge= option 50362306a36Sopenharmony_ci * 50462306a36Sopenharmony_ci * SHMEM_HUGE_NEVER: 50562306a36Sopenharmony_ci * disables huge pages for the mount; 50662306a36Sopenharmony_ci * SHMEM_HUGE_ALWAYS: 50762306a36Sopenharmony_ci * enables huge pages for the mount; 50862306a36Sopenharmony_ci * SHMEM_HUGE_WITHIN_SIZE: 50962306a36Sopenharmony_ci * only allocate huge pages if the page will be fully within i_size, 51062306a36Sopenharmony_ci * also respect fadvise()/madvise() hints; 51162306a36Sopenharmony_ci * SHMEM_HUGE_ADVISE: 51262306a36Sopenharmony_ci * only allocate huge pages if requested with fadvise()/madvise(); 51362306a36Sopenharmony_ci */ 51462306a36Sopenharmony_ci 51562306a36Sopenharmony_ci#define SHMEM_HUGE_NEVER 0 51662306a36Sopenharmony_ci#define SHMEM_HUGE_ALWAYS 1 51762306a36Sopenharmony_ci#define SHMEM_HUGE_WITHIN_SIZE 2 51862306a36Sopenharmony_ci#define SHMEM_HUGE_ADVISE 3 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_ci/* 52162306a36Sopenharmony_ci * Special values. 52262306a36Sopenharmony_ci * Only can be set via /sys/kernel/mm/transparent_hugepage/shmem_enabled: 52362306a36Sopenharmony_ci * 52462306a36Sopenharmony_ci * SHMEM_HUGE_DENY: 52562306a36Sopenharmony_ci * disables huge on shm_mnt and all mounts, for emergency use; 52662306a36Sopenharmony_ci * SHMEM_HUGE_FORCE: 52762306a36Sopenharmony_ci * enables huge on shm_mnt and all mounts, w/o needing option, for testing; 52862306a36Sopenharmony_ci * 52962306a36Sopenharmony_ci */ 53062306a36Sopenharmony_ci#define SHMEM_HUGE_DENY (-1) 53162306a36Sopenharmony_ci#define SHMEM_HUGE_FORCE (-2) 53262306a36Sopenharmony_ci 53362306a36Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE 53462306a36Sopenharmony_ci/* ifdef here to avoid bloating shmem.o when not necessary */ 53562306a36Sopenharmony_ci 53662306a36Sopenharmony_cistatic int shmem_huge __read_mostly = SHMEM_HUGE_NEVER; 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_cibool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, 53962306a36Sopenharmony_ci struct mm_struct *mm, unsigned long vm_flags) 54062306a36Sopenharmony_ci{ 54162306a36Sopenharmony_ci loff_t i_size; 54262306a36Sopenharmony_ci 54362306a36Sopenharmony_ci if (!S_ISREG(inode->i_mode)) 54462306a36Sopenharmony_ci return false; 54562306a36Sopenharmony_ci if (mm && ((vm_flags & VM_NOHUGEPAGE) || test_bit(MMF_DISABLE_THP, &mm->flags))) 54662306a36Sopenharmony_ci return false; 54762306a36Sopenharmony_ci if (shmem_huge == SHMEM_HUGE_DENY) 54862306a36Sopenharmony_ci return false; 54962306a36Sopenharmony_ci if (shmem_huge_force || shmem_huge == SHMEM_HUGE_FORCE) 55062306a36Sopenharmony_ci return true; 55162306a36Sopenharmony_ci 55262306a36Sopenharmony_ci switch (SHMEM_SB(inode->i_sb)->huge) { 55362306a36Sopenharmony_ci case SHMEM_HUGE_ALWAYS: 55462306a36Sopenharmony_ci return true; 55562306a36Sopenharmony_ci case SHMEM_HUGE_WITHIN_SIZE: 55662306a36Sopenharmony_ci index = round_up(index + 1, HPAGE_PMD_NR); 55762306a36Sopenharmony_ci i_size = round_up(i_size_read(inode), PAGE_SIZE); 55862306a36Sopenharmony_ci if (i_size >> PAGE_SHIFT >= index) 55962306a36Sopenharmony_ci return true; 56062306a36Sopenharmony_ci fallthrough; 56162306a36Sopenharmony_ci case SHMEM_HUGE_ADVISE: 56262306a36Sopenharmony_ci if (mm && (vm_flags & VM_HUGEPAGE)) 56362306a36Sopenharmony_ci return true; 56462306a36Sopenharmony_ci fallthrough; 56562306a36Sopenharmony_ci default: 56662306a36Sopenharmony_ci return false; 56762306a36Sopenharmony_ci } 56862306a36Sopenharmony_ci} 56962306a36Sopenharmony_ci 57062306a36Sopenharmony_ci#if defined(CONFIG_SYSFS) 57162306a36Sopenharmony_cistatic int shmem_parse_huge(const char *str) 57262306a36Sopenharmony_ci{ 57362306a36Sopenharmony_ci if (!strcmp(str, "never")) 57462306a36Sopenharmony_ci return SHMEM_HUGE_NEVER; 57562306a36Sopenharmony_ci if (!strcmp(str, "always")) 57662306a36Sopenharmony_ci return SHMEM_HUGE_ALWAYS; 57762306a36Sopenharmony_ci if (!strcmp(str, "within_size")) 57862306a36Sopenharmony_ci return SHMEM_HUGE_WITHIN_SIZE; 57962306a36Sopenharmony_ci if (!strcmp(str, "advise")) 58062306a36Sopenharmony_ci return SHMEM_HUGE_ADVISE; 58162306a36Sopenharmony_ci if (!strcmp(str, "deny")) 58262306a36Sopenharmony_ci return SHMEM_HUGE_DENY; 58362306a36Sopenharmony_ci if (!strcmp(str, "force")) 58462306a36Sopenharmony_ci return SHMEM_HUGE_FORCE; 58562306a36Sopenharmony_ci return -EINVAL; 58662306a36Sopenharmony_ci} 58762306a36Sopenharmony_ci#endif 58862306a36Sopenharmony_ci 58962306a36Sopenharmony_ci#if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS) 59062306a36Sopenharmony_cistatic const char *shmem_format_huge(int huge) 59162306a36Sopenharmony_ci{ 59262306a36Sopenharmony_ci switch (huge) { 59362306a36Sopenharmony_ci case SHMEM_HUGE_NEVER: 59462306a36Sopenharmony_ci return "never"; 59562306a36Sopenharmony_ci case SHMEM_HUGE_ALWAYS: 59662306a36Sopenharmony_ci return "always"; 59762306a36Sopenharmony_ci case SHMEM_HUGE_WITHIN_SIZE: 59862306a36Sopenharmony_ci return "within_size"; 59962306a36Sopenharmony_ci case SHMEM_HUGE_ADVISE: 60062306a36Sopenharmony_ci return "advise"; 60162306a36Sopenharmony_ci case SHMEM_HUGE_DENY: 60262306a36Sopenharmony_ci return "deny"; 60362306a36Sopenharmony_ci case SHMEM_HUGE_FORCE: 60462306a36Sopenharmony_ci return "force"; 60562306a36Sopenharmony_ci default: 60662306a36Sopenharmony_ci VM_BUG_ON(1); 60762306a36Sopenharmony_ci return "bad_val"; 60862306a36Sopenharmony_ci } 60962306a36Sopenharmony_ci} 61062306a36Sopenharmony_ci#endif 61162306a36Sopenharmony_ci 61262306a36Sopenharmony_cistatic unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, 61362306a36Sopenharmony_ci struct shrink_control *sc, unsigned long nr_to_split) 61462306a36Sopenharmony_ci{ 61562306a36Sopenharmony_ci LIST_HEAD(list), *pos, *next; 61662306a36Sopenharmony_ci LIST_HEAD(to_remove); 61762306a36Sopenharmony_ci struct inode *inode; 61862306a36Sopenharmony_ci struct shmem_inode_info *info; 61962306a36Sopenharmony_ci struct folio *folio; 62062306a36Sopenharmony_ci unsigned long batch = sc ? sc->nr_to_scan : 128; 62162306a36Sopenharmony_ci int split = 0; 62262306a36Sopenharmony_ci 62362306a36Sopenharmony_ci if (list_empty(&sbinfo->shrinklist)) 62462306a36Sopenharmony_ci return SHRINK_STOP; 62562306a36Sopenharmony_ci 62662306a36Sopenharmony_ci spin_lock(&sbinfo->shrinklist_lock); 62762306a36Sopenharmony_ci list_for_each_safe(pos, next, &sbinfo->shrinklist) { 62862306a36Sopenharmony_ci info = list_entry(pos, struct shmem_inode_info, shrinklist); 62962306a36Sopenharmony_ci 63062306a36Sopenharmony_ci /* pin the inode */ 63162306a36Sopenharmony_ci inode = igrab(&info->vfs_inode); 63262306a36Sopenharmony_ci 63362306a36Sopenharmony_ci /* inode is about to be evicted */ 63462306a36Sopenharmony_ci if (!inode) { 63562306a36Sopenharmony_ci list_del_init(&info->shrinklist); 63662306a36Sopenharmony_ci goto next; 63762306a36Sopenharmony_ci } 63862306a36Sopenharmony_ci 63962306a36Sopenharmony_ci /* Check if there's anything to gain */ 64062306a36Sopenharmony_ci if (round_up(inode->i_size, PAGE_SIZE) == 64162306a36Sopenharmony_ci round_up(inode->i_size, HPAGE_PMD_SIZE)) { 64262306a36Sopenharmony_ci list_move(&info->shrinklist, &to_remove); 64362306a36Sopenharmony_ci goto next; 64462306a36Sopenharmony_ci } 64562306a36Sopenharmony_ci 64662306a36Sopenharmony_ci list_move(&info->shrinklist, &list); 64762306a36Sopenharmony_cinext: 64862306a36Sopenharmony_ci sbinfo->shrinklist_len--; 64962306a36Sopenharmony_ci if (!--batch) 65062306a36Sopenharmony_ci break; 65162306a36Sopenharmony_ci } 65262306a36Sopenharmony_ci spin_unlock(&sbinfo->shrinklist_lock); 65362306a36Sopenharmony_ci 65462306a36Sopenharmony_ci list_for_each_safe(pos, next, &to_remove) { 65562306a36Sopenharmony_ci info = list_entry(pos, struct shmem_inode_info, shrinklist); 65662306a36Sopenharmony_ci inode = &info->vfs_inode; 65762306a36Sopenharmony_ci list_del_init(&info->shrinklist); 65862306a36Sopenharmony_ci iput(inode); 65962306a36Sopenharmony_ci } 66062306a36Sopenharmony_ci 66162306a36Sopenharmony_ci list_for_each_safe(pos, next, &list) { 66262306a36Sopenharmony_ci int ret; 66362306a36Sopenharmony_ci pgoff_t index; 66462306a36Sopenharmony_ci 66562306a36Sopenharmony_ci info = list_entry(pos, struct shmem_inode_info, shrinklist); 66662306a36Sopenharmony_ci inode = &info->vfs_inode; 66762306a36Sopenharmony_ci 66862306a36Sopenharmony_ci if (nr_to_split && split >= nr_to_split) 66962306a36Sopenharmony_ci goto move_back; 67062306a36Sopenharmony_ci 67162306a36Sopenharmony_ci index = (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT; 67262306a36Sopenharmony_ci folio = filemap_get_folio(inode->i_mapping, index); 67362306a36Sopenharmony_ci if (IS_ERR(folio)) 67462306a36Sopenharmony_ci goto drop; 67562306a36Sopenharmony_ci 67662306a36Sopenharmony_ci /* No huge page at the end of the file: nothing to split */ 67762306a36Sopenharmony_ci if (!folio_test_large(folio)) { 67862306a36Sopenharmony_ci folio_put(folio); 67962306a36Sopenharmony_ci goto drop; 68062306a36Sopenharmony_ci } 68162306a36Sopenharmony_ci 68262306a36Sopenharmony_ci /* 68362306a36Sopenharmony_ci * Move the inode on the list back to shrinklist if we failed 68462306a36Sopenharmony_ci * to lock the page at this time. 68562306a36Sopenharmony_ci * 68662306a36Sopenharmony_ci * Waiting for the lock may lead to deadlock in the 68762306a36Sopenharmony_ci * reclaim path. 68862306a36Sopenharmony_ci */ 68962306a36Sopenharmony_ci if (!folio_trylock(folio)) { 69062306a36Sopenharmony_ci folio_put(folio); 69162306a36Sopenharmony_ci goto move_back; 69262306a36Sopenharmony_ci } 69362306a36Sopenharmony_ci 69462306a36Sopenharmony_ci ret = split_folio(folio); 69562306a36Sopenharmony_ci folio_unlock(folio); 69662306a36Sopenharmony_ci folio_put(folio); 69762306a36Sopenharmony_ci 69862306a36Sopenharmony_ci /* If split failed move the inode on the list back to shrinklist */ 69962306a36Sopenharmony_ci if (ret) 70062306a36Sopenharmony_ci goto move_back; 70162306a36Sopenharmony_ci 70262306a36Sopenharmony_ci split++; 70362306a36Sopenharmony_cidrop: 70462306a36Sopenharmony_ci list_del_init(&info->shrinklist); 70562306a36Sopenharmony_ci goto put; 70662306a36Sopenharmony_cimove_back: 70762306a36Sopenharmony_ci /* 70862306a36Sopenharmony_ci * Make sure the inode is either on the global list or deleted 70962306a36Sopenharmony_ci * from any local list before iput() since it could be deleted 71062306a36Sopenharmony_ci * in another thread once we put the inode (then the local list 71162306a36Sopenharmony_ci * is corrupted). 71262306a36Sopenharmony_ci */ 71362306a36Sopenharmony_ci spin_lock(&sbinfo->shrinklist_lock); 71462306a36Sopenharmony_ci list_move(&info->shrinklist, &sbinfo->shrinklist); 71562306a36Sopenharmony_ci sbinfo->shrinklist_len++; 71662306a36Sopenharmony_ci spin_unlock(&sbinfo->shrinklist_lock); 71762306a36Sopenharmony_ciput: 71862306a36Sopenharmony_ci iput(inode); 71962306a36Sopenharmony_ci } 72062306a36Sopenharmony_ci 72162306a36Sopenharmony_ci return split; 72262306a36Sopenharmony_ci} 72362306a36Sopenharmony_ci 72462306a36Sopenharmony_cistatic long shmem_unused_huge_scan(struct super_block *sb, 72562306a36Sopenharmony_ci struct shrink_control *sc) 72662306a36Sopenharmony_ci{ 72762306a36Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 72862306a36Sopenharmony_ci 72962306a36Sopenharmony_ci if (!READ_ONCE(sbinfo->shrinklist_len)) 73062306a36Sopenharmony_ci return SHRINK_STOP; 73162306a36Sopenharmony_ci 73262306a36Sopenharmony_ci return shmem_unused_huge_shrink(sbinfo, sc, 0); 73362306a36Sopenharmony_ci} 73462306a36Sopenharmony_ci 73562306a36Sopenharmony_cistatic long shmem_unused_huge_count(struct super_block *sb, 73662306a36Sopenharmony_ci struct shrink_control *sc) 73762306a36Sopenharmony_ci{ 73862306a36Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 73962306a36Sopenharmony_ci return READ_ONCE(sbinfo->shrinklist_len); 74062306a36Sopenharmony_ci} 74162306a36Sopenharmony_ci#else /* !CONFIG_TRANSPARENT_HUGEPAGE */ 74262306a36Sopenharmony_ci 74362306a36Sopenharmony_ci#define shmem_huge SHMEM_HUGE_DENY 74462306a36Sopenharmony_ci 74562306a36Sopenharmony_cibool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, 74662306a36Sopenharmony_ci struct mm_struct *mm, unsigned long vm_flags) 74762306a36Sopenharmony_ci{ 74862306a36Sopenharmony_ci return false; 74962306a36Sopenharmony_ci} 75062306a36Sopenharmony_ci 75162306a36Sopenharmony_cistatic unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, 75262306a36Sopenharmony_ci struct shrink_control *sc, unsigned long nr_to_split) 75362306a36Sopenharmony_ci{ 75462306a36Sopenharmony_ci return 0; 75562306a36Sopenharmony_ci} 75662306a36Sopenharmony_ci#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 75762306a36Sopenharmony_ci 75862306a36Sopenharmony_ci/* 75962306a36Sopenharmony_ci * Like filemap_add_folio, but error if expected item has gone. 76062306a36Sopenharmony_ci */ 76162306a36Sopenharmony_cistatic int shmem_add_to_page_cache(struct folio *folio, 76262306a36Sopenharmony_ci struct address_space *mapping, 76362306a36Sopenharmony_ci pgoff_t index, void *expected, gfp_t gfp, 76462306a36Sopenharmony_ci struct mm_struct *charge_mm) 76562306a36Sopenharmony_ci{ 76662306a36Sopenharmony_ci XA_STATE_ORDER(xas, &mapping->i_pages, index, folio_order(folio)); 76762306a36Sopenharmony_ci long nr = folio_nr_pages(folio); 76862306a36Sopenharmony_ci int error; 76962306a36Sopenharmony_ci 77062306a36Sopenharmony_ci VM_BUG_ON_FOLIO(index != round_down(index, nr), folio); 77162306a36Sopenharmony_ci VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); 77262306a36Sopenharmony_ci VM_BUG_ON_FOLIO(!folio_test_swapbacked(folio), folio); 77362306a36Sopenharmony_ci VM_BUG_ON(expected && folio_test_large(folio)); 77462306a36Sopenharmony_ci 77562306a36Sopenharmony_ci folio_ref_add(folio, nr); 77662306a36Sopenharmony_ci folio->mapping = mapping; 77762306a36Sopenharmony_ci folio->index = index; 77862306a36Sopenharmony_ci 77962306a36Sopenharmony_ci if (!folio_test_swapcache(folio)) { 78062306a36Sopenharmony_ci error = mem_cgroup_charge(folio, charge_mm, gfp); 78162306a36Sopenharmony_ci if (error) { 78262306a36Sopenharmony_ci if (folio_test_pmd_mappable(folio)) { 78362306a36Sopenharmony_ci count_vm_event(THP_FILE_FALLBACK); 78462306a36Sopenharmony_ci count_vm_event(THP_FILE_FALLBACK_CHARGE); 78562306a36Sopenharmony_ci } 78662306a36Sopenharmony_ci goto error; 78762306a36Sopenharmony_ci } 78862306a36Sopenharmony_ci } 78962306a36Sopenharmony_ci folio_throttle_swaprate(folio, gfp); 79062306a36Sopenharmony_ci 79162306a36Sopenharmony_ci do { 79262306a36Sopenharmony_ci xas_lock_irq(&xas); 79362306a36Sopenharmony_ci if (expected != xas_find_conflict(&xas)) { 79462306a36Sopenharmony_ci xas_set_err(&xas, -EEXIST); 79562306a36Sopenharmony_ci goto unlock; 79662306a36Sopenharmony_ci } 79762306a36Sopenharmony_ci if (expected && xas_find_conflict(&xas)) { 79862306a36Sopenharmony_ci xas_set_err(&xas, -EEXIST); 79962306a36Sopenharmony_ci goto unlock; 80062306a36Sopenharmony_ci } 80162306a36Sopenharmony_ci xas_store(&xas, folio); 80262306a36Sopenharmony_ci if (xas_error(&xas)) 80362306a36Sopenharmony_ci goto unlock; 80462306a36Sopenharmony_ci if (folio_test_pmd_mappable(folio)) { 80562306a36Sopenharmony_ci count_vm_event(THP_FILE_ALLOC); 80662306a36Sopenharmony_ci __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, nr); 80762306a36Sopenharmony_ci } 80862306a36Sopenharmony_ci mapping->nrpages += nr; 80962306a36Sopenharmony_ci __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr); 81062306a36Sopenharmony_ci __lruvec_stat_mod_folio(folio, NR_SHMEM, nr); 81162306a36Sopenharmony_ciunlock: 81262306a36Sopenharmony_ci xas_unlock_irq(&xas); 81362306a36Sopenharmony_ci } while (xas_nomem(&xas, gfp)); 81462306a36Sopenharmony_ci 81562306a36Sopenharmony_ci if (xas_error(&xas)) { 81662306a36Sopenharmony_ci error = xas_error(&xas); 81762306a36Sopenharmony_ci goto error; 81862306a36Sopenharmony_ci } 81962306a36Sopenharmony_ci 82062306a36Sopenharmony_ci return 0; 82162306a36Sopenharmony_cierror: 82262306a36Sopenharmony_ci folio->mapping = NULL; 82362306a36Sopenharmony_ci folio_ref_sub(folio, nr); 82462306a36Sopenharmony_ci return error; 82562306a36Sopenharmony_ci} 82662306a36Sopenharmony_ci 82762306a36Sopenharmony_ci/* 82862306a36Sopenharmony_ci * Like delete_from_page_cache, but substitutes swap for @folio. 82962306a36Sopenharmony_ci */ 83062306a36Sopenharmony_cistatic void shmem_delete_from_page_cache(struct folio *folio, void *radswap) 83162306a36Sopenharmony_ci{ 83262306a36Sopenharmony_ci struct address_space *mapping = folio->mapping; 83362306a36Sopenharmony_ci long nr = folio_nr_pages(folio); 83462306a36Sopenharmony_ci int error; 83562306a36Sopenharmony_ci 83662306a36Sopenharmony_ci xa_lock_irq(&mapping->i_pages); 83762306a36Sopenharmony_ci error = shmem_replace_entry(mapping, folio->index, folio, radswap); 83862306a36Sopenharmony_ci folio->mapping = NULL; 83962306a36Sopenharmony_ci mapping->nrpages -= nr; 84062306a36Sopenharmony_ci __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, -nr); 84162306a36Sopenharmony_ci __lruvec_stat_mod_folio(folio, NR_SHMEM, -nr); 84262306a36Sopenharmony_ci xa_unlock_irq(&mapping->i_pages); 84362306a36Sopenharmony_ci folio_put(folio); 84462306a36Sopenharmony_ci BUG_ON(error); 84562306a36Sopenharmony_ci} 84662306a36Sopenharmony_ci 84762306a36Sopenharmony_ci/* 84862306a36Sopenharmony_ci * Remove swap entry from page cache, free the swap and its page cache. 84962306a36Sopenharmony_ci */ 85062306a36Sopenharmony_cistatic int shmem_free_swap(struct address_space *mapping, 85162306a36Sopenharmony_ci pgoff_t index, void *radswap) 85262306a36Sopenharmony_ci{ 85362306a36Sopenharmony_ci void *old; 85462306a36Sopenharmony_ci 85562306a36Sopenharmony_ci old = xa_cmpxchg_irq(&mapping->i_pages, index, radswap, NULL, 0); 85662306a36Sopenharmony_ci if (old != radswap) 85762306a36Sopenharmony_ci return -ENOENT; 85862306a36Sopenharmony_ci free_swap_and_cache(radix_to_swp_entry(radswap)); 85962306a36Sopenharmony_ci return 0; 86062306a36Sopenharmony_ci} 86162306a36Sopenharmony_ci 86262306a36Sopenharmony_ci/* 86362306a36Sopenharmony_ci * Determine (in bytes) how many of the shmem object's pages mapped by the 86462306a36Sopenharmony_ci * given offsets are swapped out. 86562306a36Sopenharmony_ci * 86662306a36Sopenharmony_ci * This is safe to call without i_rwsem or the i_pages lock thanks to RCU, 86762306a36Sopenharmony_ci * as long as the inode doesn't go away and racy results are not a problem. 86862306a36Sopenharmony_ci */ 86962306a36Sopenharmony_ciunsigned long shmem_partial_swap_usage(struct address_space *mapping, 87062306a36Sopenharmony_ci pgoff_t start, pgoff_t end) 87162306a36Sopenharmony_ci{ 87262306a36Sopenharmony_ci XA_STATE(xas, &mapping->i_pages, start); 87362306a36Sopenharmony_ci struct page *page; 87462306a36Sopenharmony_ci unsigned long swapped = 0; 87562306a36Sopenharmony_ci unsigned long max = end - 1; 87662306a36Sopenharmony_ci 87762306a36Sopenharmony_ci rcu_read_lock(); 87862306a36Sopenharmony_ci xas_for_each(&xas, page, max) { 87962306a36Sopenharmony_ci if (xas_retry(&xas, page)) 88062306a36Sopenharmony_ci continue; 88162306a36Sopenharmony_ci if (xa_is_value(page)) 88262306a36Sopenharmony_ci swapped++; 88362306a36Sopenharmony_ci if (xas.xa_index == max) 88462306a36Sopenharmony_ci break; 88562306a36Sopenharmony_ci if (need_resched()) { 88662306a36Sopenharmony_ci xas_pause(&xas); 88762306a36Sopenharmony_ci cond_resched_rcu(); 88862306a36Sopenharmony_ci } 88962306a36Sopenharmony_ci } 89062306a36Sopenharmony_ci 89162306a36Sopenharmony_ci rcu_read_unlock(); 89262306a36Sopenharmony_ci 89362306a36Sopenharmony_ci return swapped << PAGE_SHIFT; 89462306a36Sopenharmony_ci} 89562306a36Sopenharmony_ci 89662306a36Sopenharmony_ci/* 89762306a36Sopenharmony_ci * Determine (in bytes) how many of the shmem object's pages mapped by the 89862306a36Sopenharmony_ci * given vma is swapped out. 89962306a36Sopenharmony_ci * 90062306a36Sopenharmony_ci * This is safe to call without i_rwsem or the i_pages lock thanks to RCU, 90162306a36Sopenharmony_ci * as long as the inode doesn't go away and racy results are not a problem. 90262306a36Sopenharmony_ci */ 90362306a36Sopenharmony_ciunsigned long shmem_swap_usage(struct vm_area_struct *vma) 90462306a36Sopenharmony_ci{ 90562306a36Sopenharmony_ci struct inode *inode = file_inode(vma->vm_file); 90662306a36Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 90762306a36Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 90862306a36Sopenharmony_ci unsigned long swapped; 90962306a36Sopenharmony_ci 91062306a36Sopenharmony_ci /* Be careful as we don't hold info->lock */ 91162306a36Sopenharmony_ci swapped = READ_ONCE(info->swapped); 91262306a36Sopenharmony_ci 91362306a36Sopenharmony_ci /* 91462306a36Sopenharmony_ci * The easier cases are when the shmem object has nothing in swap, or 91562306a36Sopenharmony_ci * the vma maps it whole. Then we can simply use the stats that we 91662306a36Sopenharmony_ci * already track. 91762306a36Sopenharmony_ci */ 91862306a36Sopenharmony_ci if (!swapped) 91962306a36Sopenharmony_ci return 0; 92062306a36Sopenharmony_ci 92162306a36Sopenharmony_ci if (!vma->vm_pgoff && vma->vm_end - vma->vm_start >= inode->i_size) 92262306a36Sopenharmony_ci return swapped << PAGE_SHIFT; 92362306a36Sopenharmony_ci 92462306a36Sopenharmony_ci /* Here comes the more involved part */ 92562306a36Sopenharmony_ci return shmem_partial_swap_usage(mapping, vma->vm_pgoff, 92662306a36Sopenharmony_ci vma->vm_pgoff + vma_pages(vma)); 92762306a36Sopenharmony_ci} 92862306a36Sopenharmony_ci 92962306a36Sopenharmony_ci/* 93062306a36Sopenharmony_ci * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists. 93162306a36Sopenharmony_ci */ 93262306a36Sopenharmony_civoid shmem_unlock_mapping(struct address_space *mapping) 93362306a36Sopenharmony_ci{ 93462306a36Sopenharmony_ci struct folio_batch fbatch; 93562306a36Sopenharmony_ci pgoff_t index = 0; 93662306a36Sopenharmony_ci 93762306a36Sopenharmony_ci folio_batch_init(&fbatch); 93862306a36Sopenharmony_ci /* 93962306a36Sopenharmony_ci * Minor point, but we might as well stop if someone else SHM_LOCKs it. 94062306a36Sopenharmony_ci */ 94162306a36Sopenharmony_ci while (!mapping_unevictable(mapping) && 94262306a36Sopenharmony_ci filemap_get_folios(mapping, &index, ~0UL, &fbatch)) { 94362306a36Sopenharmony_ci check_move_unevictable_folios(&fbatch); 94462306a36Sopenharmony_ci folio_batch_release(&fbatch); 94562306a36Sopenharmony_ci cond_resched(); 94662306a36Sopenharmony_ci } 94762306a36Sopenharmony_ci} 94862306a36Sopenharmony_ci 94962306a36Sopenharmony_cistatic struct folio *shmem_get_partial_folio(struct inode *inode, pgoff_t index) 95062306a36Sopenharmony_ci{ 95162306a36Sopenharmony_ci struct folio *folio; 95262306a36Sopenharmony_ci 95362306a36Sopenharmony_ci /* 95462306a36Sopenharmony_ci * At first avoid shmem_get_folio(,,,SGP_READ): that fails 95562306a36Sopenharmony_ci * beyond i_size, and reports fallocated folios as holes. 95662306a36Sopenharmony_ci */ 95762306a36Sopenharmony_ci folio = filemap_get_entry(inode->i_mapping, index); 95862306a36Sopenharmony_ci if (!folio) 95962306a36Sopenharmony_ci return folio; 96062306a36Sopenharmony_ci if (!xa_is_value(folio)) { 96162306a36Sopenharmony_ci folio_lock(folio); 96262306a36Sopenharmony_ci if (folio->mapping == inode->i_mapping) 96362306a36Sopenharmony_ci return folio; 96462306a36Sopenharmony_ci /* The folio has been swapped out */ 96562306a36Sopenharmony_ci folio_unlock(folio); 96662306a36Sopenharmony_ci folio_put(folio); 96762306a36Sopenharmony_ci } 96862306a36Sopenharmony_ci /* 96962306a36Sopenharmony_ci * But read a folio back from swap if any of it is within i_size 97062306a36Sopenharmony_ci * (although in some cases this is just a waste of time). 97162306a36Sopenharmony_ci */ 97262306a36Sopenharmony_ci folio = NULL; 97362306a36Sopenharmony_ci shmem_get_folio(inode, index, &folio, SGP_READ); 97462306a36Sopenharmony_ci return folio; 97562306a36Sopenharmony_ci} 97662306a36Sopenharmony_ci 97762306a36Sopenharmony_ci/* 97862306a36Sopenharmony_ci * Remove range of pages and swap entries from page cache, and free them. 97962306a36Sopenharmony_ci * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate. 98062306a36Sopenharmony_ci */ 98162306a36Sopenharmony_cistatic void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, 98262306a36Sopenharmony_ci bool unfalloc) 98362306a36Sopenharmony_ci{ 98462306a36Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 98562306a36Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 98662306a36Sopenharmony_ci pgoff_t start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT; 98762306a36Sopenharmony_ci pgoff_t end = (lend + 1) >> PAGE_SHIFT; 98862306a36Sopenharmony_ci struct folio_batch fbatch; 98962306a36Sopenharmony_ci pgoff_t indices[PAGEVEC_SIZE]; 99062306a36Sopenharmony_ci struct folio *folio; 99162306a36Sopenharmony_ci bool same_folio; 99262306a36Sopenharmony_ci long nr_swaps_freed = 0; 99362306a36Sopenharmony_ci pgoff_t index; 99462306a36Sopenharmony_ci int i; 99562306a36Sopenharmony_ci 99662306a36Sopenharmony_ci if (lend == -1) 99762306a36Sopenharmony_ci end = -1; /* unsigned, so actually very big */ 99862306a36Sopenharmony_ci 99962306a36Sopenharmony_ci if (info->fallocend > start && info->fallocend <= end && !unfalloc) 100062306a36Sopenharmony_ci info->fallocend = start; 100162306a36Sopenharmony_ci 100262306a36Sopenharmony_ci folio_batch_init(&fbatch); 100362306a36Sopenharmony_ci index = start; 100462306a36Sopenharmony_ci while (index < end && find_lock_entries(mapping, &index, end - 1, 100562306a36Sopenharmony_ci &fbatch, indices)) { 100662306a36Sopenharmony_ci for (i = 0; i < folio_batch_count(&fbatch); i++) { 100762306a36Sopenharmony_ci folio = fbatch.folios[i]; 100862306a36Sopenharmony_ci 100962306a36Sopenharmony_ci if (xa_is_value(folio)) { 101062306a36Sopenharmony_ci if (unfalloc) 101162306a36Sopenharmony_ci continue; 101262306a36Sopenharmony_ci nr_swaps_freed += !shmem_free_swap(mapping, 101362306a36Sopenharmony_ci indices[i], folio); 101462306a36Sopenharmony_ci continue; 101562306a36Sopenharmony_ci } 101662306a36Sopenharmony_ci 101762306a36Sopenharmony_ci if (!unfalloc || !folio_test_uptodate(folio)) 101862306a36Sopenharmony_ci truncate_inode_folio(mapping, folio); 101962306a36Sopenharmony_ci folio_unlock(folio); 102062306a36Sopenharmony_ci } 102162306a36Sopenharmony_ci folio_batch_remove_exceptionals(&fbatch); 102262306a36Sopenharmony_ci folio_batch_release(&fbatch); 102362306a36Sopenharmony_ci cond_resched(); 102462306a36Sopenharmony_ci } 102562306a36Sopenharmony_ci 102662306a36Sopenharmony_ci /* 102762306a36Sopenharmony_ci * When undoing a failed fallocate, we want none of the partial folio 102862306a36Sopenharmony_ci * zeroing and splitting below, but shall want to truncate the whole 102962306a36Sopenharmony_ci * folio when !uptodate indicates that it was added by this fallocate, 103062306a36Sopenharmony_ci * even when [lstart, lend] covers only a part of the folio. 103162306a36Sopenharmony_ci */ 103262306a36Sopenharmony_ci if (unfalloc) 103362306a36Sopenharmony_ci goto whole_folios; 103462306a36Sopenharmony_ci 103562306a36Sopenharmony_ci same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT); 103662306a36Sopenharmony_ci folio = shmem_get_partial_folio(inode, lstart >> PAGE_SHIFT); 103762306a36Sopenharmony_ci if (folio) { 103862306a36Sopenharmony_ci same_folio = lend < folio_pos(folio) + folio_size(folio); 103962306a36Sopenharmony_ci folio_mark_dirty(folio); 104062306a36Sopenharmony_ci if (!truncate_inode_partial_folio(folio, lstart, lend)) { 104162306a36Sopenharmony_ci start = folio_next_index(folio); 104262306a36Sopenharmony_ci if (same_folio) 104362306a36Sopenharmony_ci end = folio->index; 104462306a36Sopenharmony_ci } 104562306a36Sopenharmony_ci folio_unlock(folio); 104662306a36Sopenharmony_ci folio_put(folio); 104762306a36Sopenharmony_ci folio = NULL; 104862306a36Sopenharmony_ci } 104962306a36Sopenharmony_ci 105062306a36Sopenharmony_ci if (!same_folio) 105162306a36Sopenharmony_ci folio = shmem_get_partial_folio(inode, lend >> PAGE_SHIFT); 105262306a36Sopenharmony_ci if (folio) { 105362306a36Sopenharmony_ci folio_mark_dirty(folio); 105462306a36Sopenharmony_ci if (!truncate_inode_partial_folio(folio, lstart, lend)) 105562306a36Sopenharmony_ci end = folio->index; 105662306a36Sopenharmony_ci folio_unlock(folio); 105762306a36Sopenharmony_ci folio_put(folio); 105862306a36Sopenharmony_ci } 105962306a36Sopenharmony_ci 106062306a36Sopenharmony_ciwhole_folios: 106162306a36Sopenharmony_ci 106262306a36Sopenharmony_ci index = start; 106362306a36Sopenharmony_ci while (index < end) { 106462306a36Sopenharmony_ci cond_resched(); 106562306a36Sopenharmony_ci 106662306a36Sopenharmony_ci if (!find_get_entries(mapping, &index, end - 1, &fbatch, 106762306a36Sopenharmony_ci indices)) { 106862306a36Sopenharmony_ci /* If all gone or hole-punch or unfalloc, we're done */ 106962306a36Sopenharmony_ci if (index == start || end != -1) 107062306a36Sopenharmony_ci break; 107162306a36Sopenharmony_ci /* But if truncating, restart to make sure all gone */ 107262306a36Sopenharmony_ci index = start; 107362306a36Sopenharmony_ci continue; 107462306a36Sopenharmony_ci } 107562306a36Sopenharmony_ci for (i = 0; i < folio_batch_count(&fbatch); i++) { 107662306a36Sopenharmony_ci folio = fbatch.folios[i]; 107762306a36Sopenharmony_ci 107862306a36Sopenharmony_ci if (xa_is_value(folio)) { 107962306a36Sopenharmony_ci if (unfalloc) 108062306a36Sopenharmony_ci continue; 108162306a36Sopenharmony_ci if (shmem_free_swap(mapping, indices[i], folio)) { 108262306a36Sopenharmony_ci /* Swap was replaced by page: retry */ 108362306a36Sopenharmony_ci index = indices[i]; 108462306a36Sopenharmony_ci break; 108562306a36Sopenharmony_ci } 108662306a36Sopenharmony_ci nr_swaps_freed++; 108762306a36Sopenharmony_ci continue; 108862306a36Sopenharmony_ci } 108962306a36Sopenharmony_ci 109062306a36Sopenharmony_ci folio_lock(folio); 109162306a36Sopenharmony_ci 109262306a36Sopenharmony_ci if (!unfalloc || !folio_test_uptodate(folio)) { 109362306a36Sopenharmony_ci if (folio_mapping(folio) != mapping) { 109462306a36Sopenharmony_ci /* Page was replaced by swap: retry */ 109562306a36Sopenharmony_ci folio_unlock(folio); 109662306a36Sopenharmony_ci index = indices[i]; 109762306a36Sopenharmony_ci break; 109862306a36Sopenharmony_ci } 109962306a36Sopenharmony_ci VM_BUG_ON_FOLIO(folio_test_writeback(folio), 110062306a36Sopenharmony_ci folio); 110162306a36Sopenharmony_ci 110262306a36Sopenharmony_ci if (!folio_test_large(folio)) { 110362306a36Sopenharmony_ci truncate_inode_folio(mapping, folio); 110462306a36Sopenharmony_ci } else if (truncate_inode_partial_folio(folio, lstart, lend)) { 110562306a36Sopenharmony_ci /* 110662306a36Sopenharmony_ci * If we split a page, reset the loop so 110762306a36Sopenharmony_ci * that we pick up the new sub pages. 110862306a36Sopenharmony_ci * Otherwise the THP was entirely 110962306a36Sopenharmony_ci * dropped or the target range was 111062306a36Sopenharmony_ci * zeroed, so just continue the loop as 111162306a36Sopenharmony_ci * is. 111262306a36Sopenharmony_ci */ 111362306a36Sopenharmony_ci if (!folio_test_large(folio)) { 111462306a36Sopenharmony_ci folio_unlock(folio); 111562306a36Sopenharmony_ci index = start; 111662306a36Sopenharmony_ci break; 111762306a36Sopenharmony_ci } 111862306a36Sopenharmony_ci } 111962306a36Sopenharmony_ci } 112062306a36Sopenharmony_ci folio_unlock(folio); 112162306a36Sopenharmony_ci } 112262306a36Sopenharmony_ci folio_batch_remove_exceptionals(&fbatch); 112362306a36Sopenharmony_ci folio_batch_release(&fbatch); 112462306a36Sopenharmony_ci } 112562306a36Sopenharmony_ci 112662306a36Sopenharmony_ci shmem_recalc_inode(inode, 0, -nr_swaps_freed); 112762306a36Sopenharmony_ci} 112862306a36Sopenharmony_ci 112962306a36Sopenharmony_civoid shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) 113062306a36Sopenharmony_ci{ 113162306a36Sopenharmony_ci shmem_undo_range(inode, lstart, lend, false); 113262306a36Sopenharmony_ci inode->i_mtime = inode_set_ctime_current(inode); 113362306a36Sopenharmony_ci inode_inc_iversion(inode); 113462306a36Sopenharmony_ci} 113562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(shmem_truncate_range); 113662306a36Sopenharmony_ci 113762306a36Sopenharmony_cistatic int shmem_getattr(struct mnt_idmap *idmap, 113862306a36Sopenharmony_ci const struct path *path, struct kstat *stat, 113962306a36Sopenharmony_ci u32 request_mask, unsigned int query_flags) 114062306a36Sopenharmony_ci{ 114162306a36Sopenharmony_ci struct inode *inode = path->dentry->d_inode; 114262306a36Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 114362306a36Sopenharmony_ci 114462306a36Sopenharmony_ci if (info->alloced - info->swapped != inode->i_mapping->nrpages) 114562306a36Sopenharmony_ci shmem_recalc_inode(inode, 0, 0); 114662306a36Sopenharmony_ci 114762306a36Sopenharmony_ci if (info->fsflags & FS_APPEND_FL) 114862306a36Sopenharmony_ci stat->attributes |= STATX_ATTR_APPEND; 114962306a36Sopenharmony_ci if (info->fsflags & FS_IMMUTABLE_FL) 115062306a36Sopenharmony_ci stat->attributes |= STATX_ATTR_IMMUTABLE; 115162306a36Sopenharmony_ci if (info->fsflags & FS_NODUMP_FL) 115262306a36Sopenharmony_ci stat->attributes |= STATX_ATTR_NODUMP; 115362306a36Sopenharmony_ci stat->attributes_mask |= (STATX_ATTR_APPEND | 115462306a36Sopenharmony_ci STATX_ATTR_IMMUTABLE | 115562306a36Sopenharmony_ci STATX_ATTR_NODUMP); 115662306a36Sopenharmony_ci generic_fillattr(idmap, request_mask, inode, stat); 115762306a36Sopenharmony_ci 115862306a36Sopenharmony_ci if (shmem_is_huge(inode, 0, false, NULL, 0)) 115962306a36Sopenharmony_ci stat->blksize = HPAGE_PMD_SIZE; 116062306a36Sopenharmony_ci 116162306a36Sopenharmony_ci if (request_mask & STATX_BTIME) { 116262306a36Sopenharmony_ci stat->result_mask |= STATX_BTIME; 116362306a36Sopenharmony_ci stat->btime.tv_sec = info->i_crtime.tv_sec; 116462306a36Sopenharmony_ci stat->btime.tv_nsec = info->i_crtime.tv_nsec; 116562306a36Sopenharmony_ci } 116662306a36Sopenharmony_ci 116762306a36Sopenharmony_ci return 0; 116862306a36Sopenharmony_ci} 116962306a36Sopenharmony_ci 117062306a36Sopenharmony_cistatic int shmem_setattr(struct mnt_idmap *idmap, 117162306a36Sopenharmony_ci struct dentry *dentry, struct iattr *attr) 117262306a36Sopenharmony_ci{ 117362306a36Sopenharmony_ci struct inode *inode = d_inode(dentry); 117462306a36Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 117562306a36Sopenharmony_ci int error; 117662306a36Sopenharmony_ci bool update_mtime = false; 117762306a36Sopenharmony_ci bool update_ctime = true; 117862306a36Sopenharmony_ci 117962306a36Sopenharmony_ci error = setattr_prepare(idmap, dentry, attr); 118062306a36Sopenharmony_ci if (error) 118162306a36Sopenharmony_ci return error; 118262306a36Sopenharmony_ci 118362306a36Sopenharmony_ci if ((info->seals & F_SEAL_EXEC) && (attr->ia_valid & ATTR_MODE)) { 118462306a36Sopenharmony_ci if ((inode->i_mode ^ attr->ia_mode) & 0111) { 118562306a36Sopenharmony_ci return -EPERM; 118662306a36Sopenharmony_ci } 118762306a36Sopenharmony_ci } 118862306a36Sopenharmony_ci 118962306a36Sopenharmony_ci if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { 119062306a36Sopenharmony_ci loff_t oldsize = inode->i_size; 119162306a36Sopenharmony_ci loff_t newsize = attr->ia_size; 119262306a36Sopenharmony_ci 119362306a36Sopenharmony_ci /* protected by i_rwsem */ 119462306a36Sopenharmony_ci if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) || 119562306a36Sopenharmony_ci (newsize > oldsize && (info->seals & F_SEAL_GROW))) 119662306a36Sopenharmony_ci return -EPERM; 119762306a36Sopenharmony_ci 119862306a36Sopenharmony_ci if (newsize != oldsize) { 119962306a36Sopenharmony_ci error = shmem_reacct_size(SHMEM_I(inode)->flags, 120062306a36Sopenharmony_ci oldsize, newsize); 120162306a36Sopenharmony_ci if (error) 120262306a36Sopenharmony_ci return error; 120362306a36Sopenharmony_ci i_size_write(inode, newsize); 120462306a36Sopenharmony_ci update_mtime = true; 120562306a36Sopenharmony_ci } else { 120662306a36Sopenharmony_ci update_ctime = false; 120762306a36Sopenharmony_ci } 120862306a36Sopenharmony_ci if (newsize <= oldsize) { 120962306a36Sopenharmony_ci loff_t holebegin = round_up(newsize, PAGE_SIZE); 121062306a36Sopenharmony_ci if (oldsize > holebegin) 121162306a36Sopenharmony_ci unmap_mapping_range(inode->i_mapping, 121262306a36Sopenharmony_ci holebegin, 0, 1); 121362306a36Sopenharmony_ci if (info->alloced) 121462306a36Sopenharmony_ci shmem_truncate_range(inode, 121562306a36Sopenharmony_ci newsize, (loff_t)-1); 121662306a36Sopenharmony_ci /* unmap again to remove racily COWed private pages */ 121762306a36Sopenharmony_ci if (oldsize > holebegin) 121862306a36Sopenharmony_ci unmap_mapping_range(inode->i_mapping, 121962306a36Sopenharmony_ci holebegin, 0, 1); 122062306a36Sopenharmony_ci } 122162306a36Sopenharmony_ci } 122262306a36Sopenharmony_ci 122362306a36Sopenharmony_ci if (is_quota_modification(idmap, inode, attr)) { 122462306a36Sopenharmony_ci error = dquot_initialize(inode); 122562306a36Sopenharmony_ci if (error) 122662306a36Sopenharmony_ci return error; 122762306a36Sopenharmony_ci } 122862306a36Sopenharmony_ci 122962306a36Sopenharmony_ci /* Transfer quota accounting */ 123062306a36Sopenharmony_ci if (i_uid_needs_update(idmap, attr, inode) || 123162306a36Sopenharmony_ci i_gid_needs_update(idmap, attr, inode)) { 123262306a36Sopenharmony_ci error = dquot_transfer(idmap, inode, attr); 123362306a36Sopenharmony_ci 123462306a36Sopenharmony_ci if (error) 123562306a36Sopenharmony_ci return error; 123662306a36Sopenharmony_ci } 123762306a36Sopenharmony_ci 123862306a36Sopenharmony_ci setattr_copy(idmap, inode, attr); 123962306a36Sopenharmony_ci if (attr->ia_valid & ATTR_MODE) 124062306a36Sopenharmony_ci error = posix_acl_chmod(idmap, dentry, inode->i_mode); 124162306a36Sopenharmony_ci if (!error && update_ctime) { 124262306a36Sopenharmony_ci inode_set_ctime_current(inode); 124362306a36Sopenharmony_ci if (update_mtime) 124462306a36Sopenharmony_ci inode->i_mtime = inode_get_ctime(inode); 124562306a36Sopenharmony_ci inode_inc_iversion(inode); 124662306a36Sopenharmony_ci } 124762306a36Sopenharmony_ci return error; 124862306a36Sopenharmony_ci} 124962306a36Sopenharmony_ci 125062306a36Sopenharmony_cistatic void shmem_evict_inode(struct inode *inode) 125162306a36Sopenharmony_ci{ 125262306a36Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 125362306a36Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 125462306a36Sopenharmony_ci size_t freed = 0; 125562306a36Sopenharmony_ci 125662306a36Sopenharmony_ci if (shmem_mapping(inode->i_mapping)) { 125762306a36Sopenharmony_ci shmem_unacct_size(info->flags, inode->i_size); 125862306a36Sopenharmony_ci inode->i_size = 0; 125962306a36Sopenharmony_ci mapping_set_exiting(inode->i_mapping); 126062306a36Sopenharmony_ci shmem_truncate_range(inode, 0, (loff_t)-1); 126162306a36Sopenharmony_ci if (!list_empty(&info->shrinklist)) { 126262306a36Sopenharmony_ci spin_lock(&sbinfo->shrinklist_lock); 126362306a36Sopenharmony_ci if (!list_empty(&info->shrinklist)) { 126462306a36Sopenharmony_ci list_del_init(&info->shrinklist); 126562306a36Sopenharmony_ci sbinfo->shrinklist_len--; 126662306a36Sopenharmony_ci } 126762306a36Sopenharmony_ci spin_unlock(&sbinfo->shrinklist_lock); 126862306a36Sopenharmony_ci } 126962306a36Sopenharmony_ci while (!list_empty(&info->swaplist)) { 127062306a36Sopenharmony_ci /* Wait while shmem_unuse() is scanning this inode... */ 127162306a36Sopenharmony_ci wait_var_event(&info->stop_eviction, 127262306a36Sopenharmony_ci !atomic_read(&info->stop_eviction)); 127362306a36Sopenharmony_ci mutex_lock(&shmem_swaplist_mutex); 127462306a36Sopenharmony_ci /* ...but beware of the race if we peeked too early */ 127562306a36Sopenharmony_ci if (!atomic_read(&info->stop_eviction)) 127662306a36Sopenharmony_ci list_del_init(&info->swaplist); 127762306a36Sopenharmony_ci mutex_unlock(&shmem_swaplist_mutex); 127862306a36Sopenharmony_ci } 127962306a36Sopenharmony_ci } 128062306a36Sopenharmony_ci 128162306a36Sopenharmony_ci simple_xattrs_free(&info->xattrs, sbinfo->max_inodes ? &freed : NULL); 128262306a36Sopenharmony_ci shmem_free_inode(inode->i_sb, freed); 128362306a36Sopenharmony_ci WARN_ON(inode->i_blocks); 128462306a36Sopenharmony_ci clear_inode(inode); 128562306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_QUOTA 128662306a36Sopenharmony_ci dquot_free_inode(inode); 128762306a36Sopenharmony_ci dquot_drop(inode); 128862306a36Sopenharmony_ci#endif 128962306a36Sopenharmony_ci} 129062306a36Sopenharmony_ci 129162306a36Sopenharmony_cistatic int shmem_find_swap_entries(struct address_space *mapping, 129262306a36Sopenharmony_ci pgoff_t start, struct folio_batch *fbatch, 129362306a36Sopenharmony_ci pgoff_t *indices, unsigned int type) 129462306a36Sopenharmony_ci{ 129562306a36Sopenharmony_ci XA_STATE(xas, &mapping->i_pages, start); 129662306a36Sopenharmony_ci struct folio *folio; 129762306a36Sopenharmony_ci swp_entry_t entry; 129862306a36Sopenharmony_ci 129962306a36Sopenharmony_ci rcu_read_lock(); 130062306a36Sopenharmony_ci xas_for_each(&xas, folio, ULONG_MAX) { 130162306a36Sopenharmony_ci if (xas_retry(&xas, folio)) 130262306a36Sopenharmony_ci continue; 130362306a36Sopenharmony_ci 130462306a36Sopenharmony_ci if (!xa_is_value(folio)) 130562306a36Sopenharmony_ci continue; 130662306a36Sopenharmony_ci 130762306a36Sopenharmony_ci entry = radix_to_swp_entry(folio); 130862306a36Sopenharmony_ci /* 130962306a36Sopenharmony_ci * swapin error entries can be found in the mapping. But they're 131062306a36Sopenharmony_ci * deliberately ignored here as we've done everything we can do. 131162306a36Sopenharmony_ci */ 131262306a36Sopenharmony_ci if (swp_type(entry) != type) 131362306a36Sopenharmony_ci continue; 131462306a36Sopenharmony_ci 131562306a36Sopenharmony_ci indices[folio_batch_count(fbatch)] = xas.xa_index; 131662306a36Sopenharmony_ci if (!folio_batch_add(fbatch, folio)) 131762306a36Sopenharmony_ci break; 131862306a36Sopenharmony_ci 131962306a36Sopenharmony_ci if (need_resched()) { 132062306a36Sopenharmony_ci xas_pause(&xas); 132162306a36Sopenharmony_ci cond_resched_rcu(); 132262306a36Sopenharmony_ci } 132362306a36Sopenharmony_ci } 132462306a36Sopenharmony_ci rcu_read_unlock(); 132562306a36Sopenharmony_ci 132662306a36Sopenharmony_ci return xas.xa_index; 132762306a36Sopenharmony_ci} 132862306a36Sopenharmony_ci 132962306a36Sopenharmony_ci/* 133062306a36Sopenharmony_ci * Move the swapped pages for an inode to page cache. Returns the count 133162306a36Sopenharmony_ci * of pages swapped in, or the error in case of failure. 133262306a36Sopenharmony_ci */ 133362306a36Sopenharmony_cistatic int shmem_unuse_swap_entries(struct inode *inode, 133462306a36Sopenharmony_ci struct folio_batch *fbatch, pgoff_t *indices) 133562306a36Sopenharmony_ci{ 133662306a36Sopenharmony_ci int i = 0; 133762306a36Sopenharmony_ci int ret = 0; 133862306a36Sopenharmony_ci int error = 0; 133962306a36Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 134062306a36Sopenharmony_ci 134162306a36Sopenharmony_ci for (i = 0; i < folio_batch_count(fbatch); i++) { 134262306a36Sopenharmony_ci struct folio *folio = fbatch->folios[i]; 134362306a36Sopenharmony_ci 134462306a36Sopenharmony_ci if (!xa_is_value(folio)) 134562306a36Sopenharmony_ci continue; 134662306a36Sopenharmony_ci error = shmem_swapin_folio(inode, indices[i], 134762306a36Sopenharmony_ci &folio, SGP_CACHE, 134862306a36Sopenharmony_ci mapping_gfp_mask(mapping), 134962306a36Sopenharmony_ci NULL, NULL); 135062306a36Sopenharmony_ci if (error == 0) { 135162306a36Sopenharmony_ci folio_unlock(folio); 135262306a36Sopenharmony_ci folio_put(folio); 135362306a36Sopenharmony_ci ret++; 135462306a36Sopenharmony_ci } 135562306a36Sopenharmony_ci if (error == -ENOMEM) 135662306a36Sopenharmony_ci break; 135762306a36Sopenharmony_ci error = 0; 135862306a36Sopenharmony_ci } 135962306a36Sopenharmony_ci return error ? error : ret; 136062306a36Sopenharmony_ci} 136162306a36Sopenharmony_ci 136262306a36Sopenharmony_ci/* 136362306a36Sopenharmony_ci * If swap found in inode, free it and move page from swapcache to filecache. 136462306a36Sopenharmony_ci */ 136562306a36Sopenharmony_cistatic int shmem_unuse_inode(struct inode *inode, unsigned int type) 136662306a36Sopenharmony_ci{ 136762306a36Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 136862306a36Sopenharmony_ci pgoff_t start = 0; 136962306a36Sopenharmony_ci struct folio_batch fbatch; 137062306a36Sopenharmony_ci pgoff_t indices[PAGEVEC_SIZE]; 137162306a36Sopenharmony_ci int ret = 0; 137262306a36Sopenharmony_ci 137362306a36Sopenharmony_ci do { 137462306a36Sopenharmony_ci folio_batch_init(&fbatch); 137562306a36Sopenharmony_ci shmem_find_swap_entries(mapping, start, &fbatch, indices, type); 137662306a36Sopenharmony_ci if (folio_batch_count(&fbatch) == 0) { 137762306a36Sopenharmony_ci ret = 0; 137862306a36Sopenharmony_ci break; 137962306a36Sopenharmony_ci } 138062306a36Sopenharmony_ci 138162306a36Sopenharmony_ci ret = shmem_unuse_swap_entries(inode, &fbatch, indices); 138262306a36Sopenharmony_ci if (ret < 0) 138362306a36Sopenharmony_ci break; 138462306a36Sopenharmony_ci 138562306a36Sopenharmony_ci start = indices[folio_batch_count(&fbatch) - 1]; 138662306a36Sopenharmony_ci } while (true); 138762306a36Sopenharmony_ci 138862306a36Sopenharmony_ci return ret; 138962306a36Sopenharmony_ci} 139062306a36Sopenharmony_ci 139162306a36Sopenharmony_ci/* 139262306a36Sopenharmony_ci * Read all the shared memory data that resides in the swap 139362306a36Sopenharmony_ci * device 'type' back into memory, so the swap device can be 139462306a36Sopenharmony_ci * unused. 139562306a36Sopenharmony_ci */ 139662306a36Sopenharmony_ciint shmem_unuse(unsigned int type) 139762306a36Sopenharmony_ci{ 139862306a36Sopenharmony_ci struct shmem_inode_info *info, *next; 139962306a36Sopenharmony_ci int error = 0; 140062306a36Sopenharmony_ci 140162306a36Sopenharmony_ci if (list_empty(&shmem_swaplist)) 140262306a36Sopenharmony_ci return 0; 140362306a36Sopenharmony_ci 140462306a36Sopenharmony_ci mutex_lock(&shmem_swaplist_mutex); 140562306a36Sopenharmony_ci list_for_each_entry_safe(info, next, &shmem_swaplist, swaplist) { 140662306a36Sopenharmony_ci if (!info->swapped) { 140762306a36Sopenharmony_ci list_del_init(&info->swaplist); 140862306a36Sopenharmony_ci continue; 140962306a36Sopenharmony_ci } 141062306a36Sopenharmony_ci /* 141162306a36Sopenharmony_ci * Drop the swaplist mutex while searching the inode for swap; 141262306a36Sopenharmony_ci * but before doing so, make sure shmem_evict_inode() will not 141362306a36Sopenharmony_ci * remove placeholder inode from swaplist, nor let it be freed 141462306a36Sopenharmony_ci * (igrab() would protect from unlink, but not from unmount). 141562306a36Sopenharmony_ci */ 141662306a36Sopenharmony_ci atomic_inc(&info->stop_eviction); 141762306a36Sopenharmony_ci mutex_unlock(&shmem_swaplist_mutex); 141862306a36Sopenharmony_ci 141962306a36Sopenharmony_ci error = shmem_unuse_inode(&info->vfs_inode, type); 142062306a36Sopenharmony_ci cond_resched(); 142162306a36Sopenharmony_ci 142262306a36Sopenharmony_ci mutex_lock(&shmem_swaplist_mutex); 142362306a36Sopenharmony_ci next = list_next_entry(info, swaplist); 142462306a36Sopenharmony_ci if (!info->swapped) 142562306a36Sopenharmony_ci list_del_init(&info->swaplist); 142662306a36Sopenharmony_ci if (atomic_dec_and_test(&info->stop_eviction)) 142762306a36Sopenharmony_ci wake_up_var(&info->stop_eviction); 142862306a36Sopenharmony_ci if (error) 142962306a36Sopenharmony_ci break; 143062306a36Sopenharmony_ci } 143162306a36Sopenharmony_ci mutex_unlock(&shmem_swaplist_mutex); 143262306a36Sopenharmony_ci 143362306a36Sopenharmony_ci return error; 143462306a36Sopenharmony_ci} 143562306a36Sopenharmony_ci 143662306a36Sopenharmony_ci/* 143762306a36Sopenharmony_ci * Move the page from the page cache to the swap cache. 143862306a36Sopenharmony_ci */ 143962306a36Sopenharmony_cistatic int shmem_writepage(struct page *page, struct writeback_control *wbc) 144062306a36Sopenharmony_ci{ 144162306a36Sopenharmony_ci struct folio *folio = page_folio(page); 144262306a36Sopenharmony_ci struct address_space *mapping = folio->mapping; 144362306a36Sopenharmony_ci struct inode *inode = mapping->host; 144462306a36Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 144562306a36Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 144662306a36Sopenharmony_ci swp_entry_t swap; 144762306a36Sopenharmony_ci pgoff_t index; 144862306a36Sopenharmony_ci 144962306a36Sopenharmony_ci /* 145062306a36Sopenharmony_ci * Our capabilities prevent regular writeback or sync from ever calling 145162306a36Sopenharmony_ci * shmem_writepage; but a stacking filesystem might use ->writepage of 145262306a36Sopenharmony_ci * its underlying filesystem, in which case tmpfs should write out to 145362306a36Sopenharmony_ci * swap only in response to memory pressure, and not for the writeback 145462306a36Sopenharmony_ci * threads or sync. 145562306a36Sopenharmony_ci */ 145662306a36Sopenharmony_ci if (WARN_ON_ONCE(!wbc->for_reclaim)) 145762306a36Sopenharmony_ci goto redirty; 145862306a36Sopenharmony_ci 145962306a36Sopenharmony_ci if (WARN_ON_ONCE((info->flags & VM_LOCKED) || sbinfo->noswap)) 146062306a36Sopenharmony_ci goto redirty; 146162306a36Sopenharmony_ci 146262306a36Sopenharmony_ci if (!total_swap_pages) 146362306a36Sopenharmony_ci goto redirty; 146462306a36Sopenharmony_ci 146562306a36Sopenharmony_ci /* 146662306a36Sopenharmony_ci * If /sys/kernel/mm/transparent_hugepage/shmem_enabled is "always" or 146762306a36Sopenharmony_ci * "force", drivers/gpu/drm/i915/gem/i915_gem_shmem.c gets huge pages, 146862306a36Sopenharmony_ci * and its shmem_writeback() needs them to be split when swapping. 146962306a36Sopenharmony_ci */ 147062306a36Sopenharmony_ci if (folio_test_large(folio)) { 147162306a36Sopenharmony_ci /* Ensure the subpages are still dirty */ 147262306a36Sopenharmony_ci folio_test_set_dirty(folio); 147362306a36Sopenharmony_ci if (split_huge_page(page) < 0) 147462306a36Sopenharmony_ci goto redirty; 147562306a36Sopenharmony_ci folio = page_folio(page); 147662306a36Sopenharmony_ci folio_clear_dirty(folio); 147762306a36Sopenharmony_ci } 147862306a36Sopenharmony_ci 147962306a36Sopenharmony_ci index = folio->index; 148062306a36Sopenharmony_ci 148162306a36Sopenharmony_ci /* 148262306a36Sopenharmony_ci * This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC 148362306a36Sopenharmony_ci * value into swapfile.c, the only way we can correctly account for a 148462306a36Sopenharmony_ci * fallocated folio arriving here is now to initialize it and write it. 148562306a36Sopenharmony_ci * 148662306a36Sopenharmony_ci * That's okay for a folio already fallocated earlier, but if we have 148762306a36Sopenharmony_ci * not yet completed the fallocation, then (a) we want to keep track 148862306a36Sopenharmony_ci * of this folio in case we have to undo it, and (b) it may not be a 148962306a36Sopenharmony_ci * good idea to continue anyway, once we're pushing into swap. So 149062306a36Sopenharmony_ci * reactivate the folio, and let shmem_fallocate() quit when too many. 149162306a36Sopenharmony_ci */ 149262306a36Sopenharmony_ci if (!folio_test_uptodate(folio)) { 149362306a36Sopenharmony_ci if (inode->i_private) { 149462306a36Sopenharmony_ci struct shmem_falloc *shmem_falloc; 149562306a36Sopenharmony_ci spin_lock(&inode->i_lock); 149662306a36Sopenharmony_ci shmem_falloc = inode->i_private; 149762306a36Sopenharmony_ci if (shmem_falloc && 149862306a36Sopenharmony_ci !shmem_falloc->waitq && 149962306a36Sopenharmony_ci index >= shmem_falloc->start && 150062306a36Sopenharmony_ci index < shmem_falloc->next) 150162306a36Sopenharmony_ci shmem_falloc->nr_unswapped++; 150262306a36Sopenharmony_ci else 150362306a36Sopenharmony_ci shmem_falloc = NULL; 150462306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 150562306a36Sopenharmony_ci if (shmem_falloc) 150662306a36Sopenharmony_ci goto redirty; 150762306a36Sopenharmony_ci } 150862306a36Sopenharmony_ci folio_zero_range(folio, 0, folio_size(folio)); 150962306a36Sopenharmony_ci flush_dcache_folio(folio); 151062306a36Sopenharmony_ci folio_mark_uptodate(folio); 151162306a36Sopenharmony_ci } 151262306a36Sopenharmony_ci 151362306a36Sopenharmony_ci swap = folio_alloc_swap(folio); 151462306a36Sopenharmony_ci if (!swap.val) 151562306a36Sopenharmony_ci goto redirty; 151662306a36Sopenharmony_ci 151762306a36Sopenharmony_ci /* 151862306a36Sopenharmony_ci * Add inode to shmem_unuse()'s list of swapped-out inodes, 151962306a36Sopenharmony_ci * if it's not already there. Do it now before the folio is 152062306a36Sopenharmony_ci * moved to swap cache, when its pagelock no longer protects 152162306a36Sopenharmony_ci * the inode from eviction. But don't unlock the mutex until 152262306a36Sopenharmony_ci * we've incremented swapped, because shmem_unuse_inode() will 152362306a36Sopenharmony_ci * prune a !swapped inode from the swaplist under this mutex. 152462306a36Sopenharmony_ci */ 152562306a36Sopenharmony_ci mutex_lock(&shmem_swaplist_mutex); 152662306a36Sopenharmony_ci if (list_empty(&info->swaplist)) 152762306a36Sopenharmony_ci list_add(&info->swaplist, &shmem_swaplist); 152862306a36Sopenharmony_ci 152962306a36Sopenharmony_ci if (add_to_swap_cache(folio, swap, 153062306a36Sopenharmony_ci __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN, 153162306a36Sopenharmony_ci NULL) == 0) { 153262306a36Sopenharmony_ci shmem_recalc_inode(inode, 0, 1); 153362306a36Sopenharmony_ci swap_shmem_alloc(swap); 153462306a36Sopenharmony_ci shmem_delete_from_page_cache(folio, swp_to_radix_entry(swap)); 153562306a36Sopenharmony_ci 153662306a36Sopenharmony_ci mutex_unlock(&shmem_swaplist_mutex); 153762306a36Sopenharmony_ci BUG_ON(folio_mapped(folio)); 153862306a36Sopenharmony_ci swap_writepage(&folio->page, wbc); 153962306a36Sopenharmony_ci return 0; 154062306a36Sopenharmony_ci } 154162306a36Sopenharmony_ci 154262306a36Sopenharmony_ci mutex_unlock(&shmem_swaplist_mutex); 154362306a36Sopenharmony_ci put_swap_folio(folio, swap); 154462306a36Sopenharmony_ciredirty: 154562306a36Sopenharmony_ci folio_mark_dirty(folio); 154662306a36Sopenharmony_ci if (wbc->for_reclaim) 154762306a36Sopenharmony_ci return AOP_WRITEPAGE_ACTIVATE; /* Return with folio locked */ 154862306a36Sopenharmony_ci folio_unlock(folio); 154962306a36Sopenharmony_ci return 0; 155062306a36Sopenharmony_ci} 155162306a36Sopenharmony_ci 155262306a36Sopenharmony_ci#if defined(CONFIG_NUMA) && defined(CONFIG_TMPFS) 155362306a36Sopenharmony_cistatic void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol) 155462306a36Sopenharmony_ci{ 155562306a36Sopenharmony_ci char buffer[64]; 155662306a36Sopenharmony_ci 155762306a36Sopenharmony_ci if (!mpol || mpol->mode == MPOL_DEFAULT) 155862306a36Sopenharmony_ci return; /* show nothing */ 155962306a36Sopenharmony_ci 156062306a36Sopenharmony_ci mpol_to_str(buffer, sizeof(buffer), mpol); 156162306a36Sopenharmony_ci 156262306a36Sopenharmony_ci seq_printf(seq, ",mpol=%s", buffer); 156362306a36Sopenharmony_ci} 156462306a36Sopenharmony_ci 156562306a36Sopenharmony_cistatic struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) 156662306a36Sopenharmony_ci{ 156762306a36Sopenharmony_ci struct mempolicy *mpol = NULL; 156862306a36Sopenharmony_ci if (sbinfo->mpol) { 156962306a36Sopenharmony_ci raw_spin_lock(&sbinfo->stat_lock); /* prevent replace/use races */ 157062306a36Sopenharmony_ci mpol = sbinfo->mpol; 157162306a36Sopenharmony_ci mpol_get(mpol); 157262306a36Sopenharmony_ci raw_spin_unlock(&sbinfo->stat_lock); 157362306a36Sopenharmony_ci } 157462306a36Sopenharmony_ci return mpol; 157562306a36Sopenharmony_ci} 157662306a36Sopenharmony_ci#else /* !CONFIG_NUMA || !CONFIG_TMPFS */ 157762306a36Sopenharmony_cistatic inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol) 157862306a36Sopenharmony_ci{ 157962306a36Sopenharmony_ci} 158062306a36Sopenharmony_cistatic inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) 158162306a36Sopenharmony_ci{ 158262306a36Sopenharmony_ci return NULL; 158362306a36Sopenharmony_ci} 158462306a36Sopenharmony_ci#endif /* CONFIG_NUMA && CONFIG_TMPFS */ 158562306a36Sopenharmony_ci#ifndef CONFIG_NUMA 158662306a36Sopenharmony_ci#define vm_policy vm_private_data 158762306a36Sopenharmony_ci#endif 158862306a36Sopenharmony_ci 158962306a36Sopenharmony_cistatic void shmem_pseudo_vma_init(struct vm_area_struct *vma, 159062306a36Sopenharmony_ci struct shmem_inode_info *info, pgoff_t index) 159162306a36Sopenharmony_ci{ 159262306a36Sopenharmony_ci /* Create a pseudo vma that just contains the policy */ 159362306a36Sopenharmony_ci vma_init(vma, NULL); 159462306a36Sopenharmony_ci /* Bias interleave by inode number to distribute better across nodes */ 159562306a36Sopenharmony_ci vma->vm_pgoff = index + info->vfs_inode.i_ino; 159662306a36Sopenharmony_ci vma->vm_policy = mpol_shared_policy_lookup(&info->policy, index); 159762306a36Sopenharmony_ci} 159862306a36Sopenharmony_ci 159962306a36Sopenharmony_cistatic void shmem_pseudo_vma_destroy(struct vm_area_struct *vma) 160062306a36Sopenharmony_ci{ 160162306a36Sopenharmony_ci /* Drop reference taken by mpol_shared_policy_lookup() */ 160262306a36Sopenharmony_ci mpol_cond_put(vma->vm_policy); 160362306a36Sopenharmony_ci} 160462306a36Sopenharmony_ci 160562306a36Sopenharmony_cistatic struct folio *shmem_swapin(swp_entry_t swap, gfp_t gfp, 160662306a36Sopenharmony_ci struct shmem_inode_info *info, pgoff_t index) 160762306a36Sopenharmony_ci{ 160862306a36Sopenharmony_ci struct vm_area_struct pvma; 160962306a36Sopenharmony_ci struct page *page; 161062306a36Sopenharmony_ci struct vm_fault vmf = { 161162306a36Sopenharmony_ci .vma = &pvma, 161262306a36Sopenharmony_ci }; 161362306a36Sopenharmony_ci 161462306a36Sopenharmony_ci shmem_pseudo_vma_init(&pvma, info, index); 161562306a36Sopenharmony_ci page = swap_cluster_readahead(swap, gfp, &vmf); 161662306a36Sopenharmony_ci shmem_pseudo_vma_destroy(&pvma); 161762306a36Sopenharmony_ci 161862306a36Sopenharmony_ci if (!page) 161962306a36Sopenharmony_ci return NULL; 162062306a36Sopenharmony_ci return page_folio(page); 162162306a36Sopenharmony_ci} 162262306a36Sopenharmony_ci 162362306a36Sopenharmony_ci/* 162462306a36Sopenharmony_ci * Make sure huge_gfp is always more limited than limit_gfp. 162562306a36Sopenharmony_ci * Some of the flags set permissions, while others set limitations. 162662306a36Sopenharmony_ci */ 162762306a36Sopenharmony_cistatic gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp) 162862306a36Sopenharmony_ci{ 162962306a36Sopenharmony_ci gfp_t allowflags = __GFP_IO | __GFP_FS | __GFP_RECLAIM; 163062306a36Sopenharmony_ci gfp_t denyflags = __GFP_NOWARN | __GFP_NORETRY; 163162306a36Sopenharmony_ci gfp_t zoneflags = limit_gfp & GFP_ZONEMASK; 163262306a36Sopenharmony_ci gfp_t result = huge_gfp & ~(allowflags | GFP_ZONEMASK); 163362306a36Sopenharmony_ci 163462306a36Sopenharmony_ci /* Allow allocations only from the originally specified zones. */ 163562306a36Sopenharmony_ci result |= zoneflags; 163662306a36Sopenharmony_ci 163762306a36Sopenharmony_ci /* 163862306a36Sopenharmony_ci * Minimize the result gfp by taking the union with the deny flags, 163962306a36Sopenharmony_ci * and the intersection of the allow flags. 164062306a36Sopenharmony_ci */ 164162306a36Sopenharmony_ci result |= (limit_gfp & denyflags); 164262306a36Sopenharmony_ci result |= (huge_gfp & limit_gfp) & allowflags; 164362306a36Sopenharmony_ci 164462306a36Sopenharmony_ci return result; 164562306a36Sopenharmony_ci} 164662306a36Sopenharmony_ci 164762306a36Sopenharmony_cistatic struct folio *shmem_alloc_hugefolio(gfp_t gfp, 164862306a36Sopenharmony_ci struct shmem_inode_info *info, pgoff_t index) 164962306a36Sopenharmony_ci{ 165062306a36Sopenharmony_ci struct vm_area_struct pvma; 165162306a36Sopenharmony_ci struct address_space *mapping = info->vfs_inode.i_mapping; 165262306a36Sopenharmony_ci pgoff_t hindex; 165362306a36Sopenharmony_ci struct folio *folio; 165462306a36Sopenharmony_ci 165562306a36Sopenharmony_ci hindex = round_down(index, HPAGE_PMD_NR); 165662306a36Sopenharmony_ci if (xa_find(&mapping->i_pages, &hindex, hindex + HPAGE_PMD_NR - 1, 165762306a36Sopenharmony_ci XA_PRESENT)) 165862306a36Sopenharmony_ci return NULL; 165962306a36Sopenharmony_ci 166062306a36Sopenharmony_ci shmem_pseudo_vma_init(&pvma, info, hindex); 166162306a36Sopenharmony_ci folio = vma_alloc_folio(gfp, HPAGE_PMD_ORDER, &pvma, 0, true); 166262306a36Sopenharmony_ci shmem_pseudo_vma_destroy(&pvma); 166362306a36Sopenharmony_ci if (!folio) 166462306a36Sopenharmony_ci count_vm_event(THP_FILE_FALLBACK); 166562306a36Sopenharmony_ci return folio; 166662306a36Sopenharmony_ci} 166762306a36Sopenharmony_ci 166862306a36Sopenharmony_cistatic struct folio *shmem_alloc_folio(gfp_t gfp, 166962306a36Sopenharmony_ci struct shmem_inode_info *info, pgoff_t index) 167062306a36Sopenharmony_ci{ 167162306a36Sopenharmony_ci struct vm_area_struct pvma; 167262306a36Sopenharmony_ci struct folio *folio; 167362306a36Sopenharmony_ci 167462306a36Sopenharmony_ci shmem_pseudo_vma_init(&pvma, info, index); 167562306a36Sopenharmony_ci folio = vma_alloc_folio(gfp, 0, &pvma, 0, false); 167662306a36Sopenharmony_ci shmem_pseudo_vma_destroy(&pvma); 167762306a36Sopenharmony_ci 167862306a36Sopenharmony_ci return folio; 167962306a36Sopenharmony_ci} 168062306a36Sopenharmony_ci 168162306a36Sopenharmony_cistatic struct folio *shmem_alloc_and_acct_folio(gfp_t gfp, struct inode *inode, 168262306a36Sopenharmony_ci pgoff_t index, bool huge) 168362306a36Sopenharmony_ci{ 168462306a36Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 168562306a36Sopenharmony_ci struct folio *folio; 168662306a36Sopenharmony_ci int nr; 168762306a36Sopenharmony_ci int err; 168862306a36Sopenharmony_ci 168962306a36Sopenharmony_ci if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 169062306a36Sopenharmony_ci huge = false; 169162306a36Sopenharmony_ci nr = huge ? HPAGE_PMD_NR : 1; 169262306a36Sopenharmony_ci 169362306a36Sopenharmony_ci err = shmem_inode_acct_block(inode, nr); 169462306a36Sopenharmony_ci if (err) 169562306a36Sopenharmony_ci goto failed; 169662306a36Sopenharmony_ci 169762306a36Sopenharmony_ci if (huge) 169862306a36Sopenharmony_ci folio = shmem_alloc_hugefolio(gfp, info, index); 169962306a36Sopenharmony_ci else 170062306a36Sopenharmony_ci folio = shmem_alloc_folio(gfp, info, index); 170162306a36Sopenharmony_ci if (folio) { 170262306a36Sopenharmony_ci __folio_set_locked(folio); 170362306a36Sopenharmony_ci __folio_set_swapbacked(folio); 170462306a36Sopenharmony_ci return folio; 170562306a36Sopenharmony_ci } 170662306a36Sopenharmony_ci 170762306a36Sopenharmony_ci err = -ENOMEM; 170862306a36Sopenharmony_ci shmem_inode_unacct_blocks(inode, nr); 170962306a36Sopenharmony_cifailed: 171062306a36Sopenharmony_ci return ERR_PTR(err); 171162306a36Sopenharmony_ci} 171262306a36Sopenharmony_ci 171362306a36Sopenharmony_ci/* 171462306a36Sopenharmony_ci * When a page is moved from swapcache to shmem filecache (either by the 171562306a36Sopenharmony_ci * usual swapin of shmem_get_folio_gfp(), or by the less common swapoff of 171662306a36Sopenharmony_ci * shmem_unuse_inode()), it may have been read in earlier from swap, in 171762306a36Sopenharmony_ci * ignorance of the mapping it belongs to. If that mapping has special 171862306a36Sopenharmony_ci * constraints (like the gma500 GEM driver, which requires RAM below 4GB), 171962306a36Sopenharmony_ci * we may need to copy to a suitable page before moving to filecache. 172062306a36Sopenharmony_ci * 172162306a36Sopenharmony_ci * In a future release, this may well be extended to respect cpuset and 172262306a36Sopenharmony_ci * NUMA mempolicy, and applied also to anonymous pages in do_swap_page(); 172362306a36Sopenharmony_ci * but for now it is a simple matter of zone. 172462306a36Sopenharmony_ci */ 172562306a36Sopenharmony_cistatic bool shmem_should_replace_folio(struct folio *folio, gfp_t gfp) 172662306a36Sopenharmony_ci{ 172762306a36Sopenharmony_ci return folio_zonenum(folio) > gfp_zone(gfp); 172862306a36Sopenharmony_ci} 172962306a36Sopenharmony_ci 173062306a36Sopenharmony_cistatic int shmem_replace_folio(struct folio **foliop, gfp_t gfp, 173162306a36Sopenharmony_ci struct shmem_inode_info *info, pgoff_t index) 173262306a36Sopenharmony_ci{ 173362306a36Sopenharmony_ci struct folio *old, *new; 173462306a36Sopenharmony_ci struct address_space *swap_mapping; 173562306a36Sopenharmony_ci swp_entry_t entry; 173662306a36Sopenharmony_ci pgoff_t swap_index; 173762306a36Sopenharmony_ci int error; 173862306a36Sopenharmony_ci 173962306a36Sopenharmony_ci old = *foliop; 174062306a36Sopenharmony_ci entry = old->swap; 174162306a36Sopenharmony_ci swap_index = swp_offset(entry); 174262306a36Sopenharmony_ci swap_mapping = swap_address_space(entry); 174362306a36Sopenharmony_ci 174462306a36Sopenharmony_ci /* 174562306a36Sopenharmony_ci * We have arrived here because our zones are constrained, so don't 174662306a36Sopenharmony_ci * limit chance of success by further cpuset and node constraints. 174762306a36Sopenharmony_ci */ 174862306a36Sopenharmony_ci gfp &= ~GFP_CONSTRAINT_MASK; 174962306a36Sopenharmony_ci VM_BUG_ON_FOLIO(folio_test_large(old), old); 175062306a36Sopenharmony_ci new = shmem_alloc_folio(gfp, info, index); 175162306a36Sopenharmony_ci if (!new) 175262306a36Sopenharmony_ci return -ENOMEM; 175362306a36Sopenharmony_ci 175462306a36Sopenharmony_ci folio_get(new); 175562306a36Sopenharmony_ci folio_copy(new, old); 175662306a36Sopenharmony_ci flush_dcache_folio(new); 175762306a36Sopenharmony_ci 175862306a36Sopenharmony_ci __folio_set_locked(new); 175962306a36Sopenharmony_ci __folio_set_swapbacked(new); 176062306a36Sopenharmony_ci folio_mark_uptodate(new); 176162306a36Sopenharmony_ci new->swap = entry; 176262306a36Sopenharmony_ci folio_set_swapcache(new); 176362306a36Sopenharmony_ci 176462306a36Sopenharmony_ci /* 176562306a36Sopenharmony_ci * Our caller will very soon move newpage out of swapcache, but it's 176662306a36Sopenharmony_ci * a nice clean interface for us to replace oldpage by newpage there. 176762306a36Sopenharmony_ci */ 176862306a36Sopenharmony_ci xa_lock_irq(&swap_mapping->i_pages); 176962306a36Sopenharmony_ci error = shmem_replace_entry(swap_mapping, swap_index, old, new); 177062306a36Sopenharmony_ci if (!error) { 177162306a36Sopenharmony_ci mem_cgroup_migrate(old, new); 177262306a36Sopenharmony_ci __lruvec_stat_mod_folio(new, NR_FILE_PAGES, 1); 177362306a36Sopenharmony_ci __lruvec_stat_mod_folio(new, NR_SHMEM, 1); 177462306a36Sopenharmony_ci __lruvec_stat_mod_folio(old, NR_FILE_PAGES, -1); 177562306a36Sopenharmony_ci __lruvec_stat_mod_folio(old, NR_SHMEM, -1); 177662306a36Sopenharmony_ci } 177762306a36Sopenharmony_ci xa_unlock_irq(&swap_mapping->i_pages); 177862306a36Sopenharmony_ci 177962306a36Sopenharmony_ci if (unlikely(error)) { 178062306a36Sopenharmony_ci /* 178162306a36Sopenharmony_ci * Is this possible? I think not, now that our callers check 178262306a36Sopenharmony_ci * both PageSwapCache and page_private after getting page lock; 178362306a36Sopenharmony_ci * but be defensive. Reverse old to newpage for clear and free. 178462306a36Sopenharmony_ci */ 178562306a36Sopenharmony_ci old = new; 178662306a36Sopenharmony_ci } else { 178762306a36Sopenharmony_ci folio_add_lru(new); 178862306a36Sopenharmony_ci *foliop = new; 178962306a36Sopenharmony_ci } 179062306a36Sopenharmony_ci 179162306a36Sopenharmony_ci folio_clear_swapcache(old); 179262306a36Sopenharmony_ci old->private = NULL; 179362306a36Sopenharmony_ci 179462306a36Sopenharmony_ci folio_unlock(old); 179562306a36Sopenharmony_ci folio_put_refs(old, 2); 179662306a36Sopenharmony_ci return error; 179762306a36Sopenharmony_ci} 179862306a36Sopenharmony_ci 179962306a36Sopenharmony_cistatic void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index, 180062306a36Sopenharmony_ci struct folio *folio, swp_entry_t swap) 180162306a36Sopenharmony_ci{ 180262306a36Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 180362306a36Sopenharmony_ci swp_entry_t swapin_error; 180462306a36Sopenharmony_ci void *old; 180562306a36Sopenharmony_ci 180662306a36Sopenharmony_ci swapin_error = make_poisoned_swp_entry(); 180762306a36Sopenharmony_ci old = xa_cmpxchg_irq(&mapping->i_pages, index, 180862306a36Sopenharmony_ci swp_to_radix_entry(swap), 180962306a36Sopenharmony_ci swp_to_radix_entry(swapin_error), 0); 181062306a36Sopenharmony_ci if (old != swp_to_radix_entry(swap)) 181162306a36Sopenharmony_ci return; 181262306a36Sopenharmony_ci 181362306a36Sopenharmony_ci folio_wait_writeback(folio); 181462306a36Sopenharmony_ci delete_from_swap_cache(folio); 181562306a36Sopenharmony_ci /* 181662306a36Sopenharmony_ci * Don't treat swapin error folio as alloced. Otherwise inode->i_blocks 181762306a36Sopenharmony_ci * won't be 0 when inode is released and thus trigger WARN_ON(i_blocks) 181862306a36Sopenharmony_ci * in shmem_evict_inode(). 181962306a36Sopenharmony_ci */ 182062306a36Sopenharmony_ci shmem_recalc_inode(inode, -1, -1); 182162306a36Sopenharmony_ci swap_free(swap); 182262306a36Sopenharmony_ci} 182362306a36Sopenharmony_ci 182462306a36Sopenharmony_ci/* 182562306a36Sopenharmony_ci * Swap in the folio pointed to by *foliop. 182662306a36Sopenharmony_ci * Caller has to make sure that *foliop contains a valid swapped folio. 182762306a36Sopenharmony_ci * Returns 0 and the folio in foliop if success. On failure, returns the 182862306a36Sopenharmony_ci * error code and NULL in *foliop. 182962306a36Sopenharmony_ci */ 183062306a36Sopenharmony_cistatic int shmem_swapin_folio(struct inode *inode, pgoff_t index, 183162306a36Sopenharmony_ci struct folio **foliop, enum sgp_type sgp, 183262306a36Sopenharmony_ci gfp_t gfp, struct vm_area_struct *vma, 183362306a36Sopenharmony_ci vm_fault_t *fault_type) 183462306a36Sopenharmony_ci{ 183562306a36Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 183662306a36Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 183762306a36Sopenharmony_ci struct mm_struct *charge_mm = vma ? vma->vm_mm : NULL; 183862306a36Sopenharmony_ci struct swap_info_struct *si; 183962306a36Sopenharmony_ci struct folio *folio = NULL; 184062306a36Sopenharmony_ci swp_entry_t swap; 184162306a36Sopenharmony_ci int error; 184262306a36Sopenharmony_ci 184362306a36Sopenharmony_ci VM_BUG_ON(!*foliop || !xa_is_value(*foliop)); 184462306a36Sopenharmony_ci swap = radix_to_swp_entry(*foliop); 184562306a36Sopenharmony_ci *foliop = NULL; 184662306a36Sopenharmony_ci 184762306a36Sopenharmony_ci if (is_poisoned_swp_entry(swap)) 184862306a36Sopenharmony_ci return -EIO; 184962306a36Sopenharmony_ci 185062306a36Sopenharmony_ci si = get_swap_device(swap); 185162306a36Sopenharmony_ci if (!si) { 185262306a36Sopenharmony_ci if (!shmem_confirm_swap(mapping, index, swap)) 185362306a36Sopenharmony_ci return -EEXIST; 185462306a36Sopenharmony_ci else 185562306a36Sopenharmony_ci return -EINVAL; 185662306a36Sopenharmony_ci } 185762306a36Sopenharmony_ci 185862306a36Sopenharmony_ci /* Look it up and read it in.. */ 185962306a36Sopenharmony_ci folio = swap_cache_get_folio(swap, NULL, 0); 186062306a36Sopenharmony_ci if (!folio) { 186162306a36Sopenharmony_ci /* Or update major stats only when swapin succeeds?? */ 186262306a36Sopenharmony_ci if (fault_type) { 186362306a36Sopenharmony_ci *fault_type |= VM_FAULT_MAJOR; 186462306a36Sopenharmony_ci count_vm_event(PGMAJFAULT); 186562306a36Sopenharmony_ci count_memcg_event_mm(charge_mm, PGMAJFAULT); 186662306a36Sopenharmony_ci } 186762306a36Sopenharmony_ci /* Here we actually start the io */ 186862306a36Sopenharmony_ci folio = shmem_swapin(swap, gfp, info, index); 186962306a36Sopenharmony_ci if (!folio) { 187062306a36Sopenharmony_ci error = -ENOMEM; 187162306a36Sopenharmony_ci goto failed; 187262306a36Sopenharmony_ci } 187362306a36Sopenharmony_ci } 187462306a36Sopenharmony_ci 187562306a36Sopenharmony_ci /* We have to do this with folio locked to prevent races */ 187662306a36Sopenharmony_ci folio_lock(folio); 187762306a36Sopenharmony_ci if (!folio_test_swapcache(folio) || 187862306a36Sopenharmony_ci folio->swap.val != swap.val || 187962306a36Sopenharmony_ci !shmem_confirm_swap(mapping, index, swap)) { 188062306a36Sopenharmony_ci error = -EEXIST; 188162306a36Sopenharmony_ci goto unlock; 188262306a36Sopenharmony_ci } 188362306a36Sopenharmony_ci if (!folio_test_uptodate(folio)) { 188462306a36Sopenharmony_ci error = -EIO; 188562306a36Sopenharmony_ci goto failed; 188662306a36Sopenharmony_ci } 188762306a36Sopenharmony_ci folio_wait_writeback(folio); 188862306a36Sopenharmony_ci 188962306a36Sopenharmony_ci /* 189062306a36Sopenharmony_ci * Some architectures may have to restore extra metadata to the 189162306a36Sopenharmony_ci * folio after reading from swap. 189262306a36Sopenharmony_ci */ 189362306a36Sopenharmony_ci arch_swap_restore(swap, folio); 189462306a36Sopenharmony_ci 189562306a36Sopenharmony_ci if (shmem_should_replace_folio(folio, gfp)) { 189662306a36Sopenharmony_ci error = shmem_replace_folio(&folio, gfp, info, index); 189762306a36Sopenharmony_ci if (error) 189862306a36Sopenharmony_ci goto failed; 189962306a36Sopenharmony_ci } 190062306a36Sopenharmony_ci 190162306a36Sopenharmony_ci error = shmem_add_to_page_cache(folio, mapping, index, 190262306a36Sopenharmony_ci swp_to_radix_entry(swap), gfp, 190362306a36Sopenharmony_ci charge_mm); 190462306a36Sopenharmony_ci if (error) 190562306a36Sopenharmony_ci goto failed; 190662306a36Sopenharmony_ci 190762306a36Sopenharmony_ci shmem_recalc_inode(inode, 0, -1); 190862306a36Sopenharmony_ci 190962306a36Sopenharmony_ci if (sgp == SGP_WRITE) 191062306a36Sopenharmony_ci folio_mark_accessed(folio); 191162306a36Sopenharmony_ci 191262306a36Sopenharmony_ci delete_from_swap_cache(folio); 191362306a36Sopenharmony_ci folio_mark_dirty(folio); 191462306a36Sopenharmony_ci swap_free(swap); 191562306a36Sopenharmony_ci put_swap_device(si); 191662306a36Sopenharmony_ci 191762306a36Sopenharmony_ci *foliop = folio; 191862306a36Sopenharmony_ci return 0; 191962306a36Sopenharmony_cifailed: 192062306a36Sopenharmony_ci if (!shmem_confirm_swap(mapping, index, swap)) 192162306a36Sopenharmony_ci error = -EEXIST; 192262306a36Sopenharmony_ci if (error == -EIO) 192362306a36Sopenharmony_ci shmem_set_folio_swapin_error(inode, index, folio, swap); 192462306a36Sopenharmony_ciunlock: 192562306a36Sopenharmony_ci if (folio) { 192662306a36Sopenharmony_ci folio_unlock(folio); 192762306a36Sopenharmony_ci folio_put(folio); 192862306a36Sopenharmony_ci } 192962306a36Sopenharmony_ci put_swap_device(si); 193062306a36Sopenharmony_ci 193162306a36Sopenharmony_ci return error; 193262306a36Sopenharmony_ci} 193362306a36Sopenharmony_ci 193462306a36Sopenharmony_ci/* 193562306a36Sopenharmony_ci * shmem_get_folio_gfp - find page in cache, or get from swap, or allocate 193662306a36Sopenharmony_ci * 193762306a36Sopenharmony_ci * If we allocate a new one we do not mark it dirty. That's up to the 193862306a36Sopenharmony_ci * vm. If we swap it in we mark it dirty since we also free the swap 193962306a36Sopenharmony_ci * entry since a page cannot live in both the swap and page cache. 194062306a36Sopenharmony_ci * 194162306a36Sopenharmony_ci * vma, vmf, and fault_type are only supplied by shmem_fault: 194262306a36Sopenharmony_ci * otherwise they are NULL. 194362306a36Sopenharmony_ci */ 194462306a36Sopenharmony_cistatic int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, 194562306a36Sopenharmony_ci struct folio **foliop, enum sgp_type sgp, gfp_t gfp, 194662306a36Sopenharmony_ci struct vm_area_struct *vma, struct vm_fault *vmf, 194762306a36Sopenharmony_ci vm_fault_t *fault_type) 194862306a36Sopenharmony_ci{ 194962306a36Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 195062306a36Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 195162306a36Sopenharmony_ci struct shmem_sb_info *sbinfo; 195262306a36Sopenharmony_ci struct mm_struct *charge_mm; 195362306a36Sopenharmony_ci struct folio *folio; 195462306a36Sopenharmony_ci pgoff_t hindex; 195562306a36Sopenharmony_ci gfp_t huge_gfp; 195662306a36Sopenharmony_ci int error; 195762306a36Sopenharmony_ci int once = 0; 195862306a36Sopenharmony_ci int alloced = 0; 195962306a36Sopenharmony_ci 196062306a36Sopenharmony_ci if (index > (MAX_LFS_FILESIZE >> PAGE_SHIFT)) 196162306a36Sopenharmony_ci return -EFBIG; 196262306a36Sopenharmony_cirepeat: 196362306a36Sopenharmony_ci if (sgp <= SGP_CACHE && 196462306a36Sopenharmony_ci ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) { 196562306a36Sopenharmony_ci return -EINVAL; 196662306a36Sopenharmony_ci } 196762306a36Sopenharmony_ci 196862306a36Sopenharmony_ci sbinfo = SHMEM_SB(inode->i_sb); 196962306a36Sopenharmony_ci charge_mm = vma ? vma->vm_mm : NULL; 197062306a36Sopenharmony_ci 197162306a36Sopenharmony_ci folio = filemap_get_entry(mapping, index); 197262306a36Sopenharmony_ci if (folio && vma && userfaultfd_minor(vma)) { 197362306a36Sopenharmony_ci if (!xa_is_value(folio)) 197462306a36Sopenharmony_ci folio_put(folio); 197562306a36Sopenharmony_ci *fault_type = handle_userfault(vmf, VM_UFFD_MINOR); 197662306a36Sopenharmony_ci return 0; 197762306a36Sopenharmony_ci } 197862306a36Sopenharmony_ci 197962306a36Sopenharmony_ci if (xa_is_value(folio)) { 198062306a36Sopenharmony_ci error = shmem_swapin_folio(inode, index, &folio, 198162306a36Sopenharmony_ci sgp, gfp, vma, fault_type); 198262306a36Sopenharmony_ci if (error == -EEXIST) 198362306a36Sopenharmony_ci goto repeat; 198462306a36Sopenharmony_ci 198562306a36Sopenharmony_ci *foliop = folio; 198662306a36Sopenharmony_ci return error; 198762306a36Sopenharmony_ci } 198862306a36Sopenharmony_ci 198962306a36Sopenharmony_ci if (folio) { 199062306a36Sopenharmony_ci folio_lock(folio); 199162306a36Sopenharmony_ci 199262306a36Sopenharmony_ci /* Has the folio been truncated or swapped out? */ 199362306a36Sopenharmony_ci if (unlikely(folio->mapping != mapping)) { 199462306a36Sopenharmony_ci folio_unlock(folio); 199562306a36Sopenharmony_ci folio_put(folio); 199662306a36Sopenharmony_ci goto repeat; 199762306a36Sopenharmony_ci } 199862306a36Sopenharmony_ci if (sgp == SGP_WRITE) 199962306a36Sopenharmony_ci folio_mark_accessed(folio); 200062306a36Sopenharmony_ci if (folio_test_uptodate(folio)) 200162306a36Sopenharmony_ci goto out; 200262306a36Sopenharmony_ci /* fallocated folio */ 200362306a36Sopenharmony_ci if (sgp != SGP_READ) 200462306a36Sopenharmony_ci goto clear; 200562306a36Sopenharmony_ci folio_unlock(folio); 200662306a36Sopenharmony_ci folio_put(folio); 200762306a36Sopenharmony_ci } 200862306a36Sopenharmony_ci 200962306a36Sopenharmony_ci /* 201062306a36Sopenharmony_ci * SGP_READ: succeed on hole, with NULL folio, letting caller zero. 201162306a36Sopenharmony_ci * SGP_NOALLOC: fail on hole, with NULL folio, letting caller fail. 201262306a36Sopenharmony_ci */ 201362306a36Sopenharmony_ci *foliop = NULL; 201462306a36Sopenharmony_ci if (sgp == SGP_READ) 201562306a36Sopenharmony_ci return 0; 201662306a36Sopenharmony_ci if (sgp == SGP_NOALLOC) 201762306a36Sopenharmony_ci return -ENOENT; 201862306a36Sopenharmony_ci 201962306a36Sopenharmony_ci /* 202062306a36Sopenharmony_ci * Fast cache lookup and swap lookup did not find it: allocate. 202162306a36Sopenharmony_ci */ 202262306a36Sopenharmony_ci 202362306a36Sopenharmony_ci if (vma && userfaultfd_missing(vma)) { 202462306a36Sopenharmony_ci *fault_type = handle_userfault(vmf, VM_UFFD_MISSING); 202562306a36Sopenharmony_ci return 0; 202662306a36Sopenharmony_ci } 202762306a36Sopenharmony_ci 202862306a36Sopenharmony_ci if (!shmem_is_huge(inode, index, false, 202962306a36Sopenharmony_ci vma ? vma->vm_mm : NULL, vma ? vma->vm_flags : 0)) 203062306a36Sopenharmony_ci goto alloc_nohuge; 203162306a36Sopenharmony_ci 203262306a36Sopenharmony_ci huge_gfp = vma_thp_gfp_mask(vma); 203362306a36Sopenharmony_ci huge_gfp = limit_gfp_mask(huge_gfp, gfp); 203462306a36Sopenharmony_ci folio = shmem_alloc_and_acct_folio(huge_gfp, inode, index, true); 203562306a36Sopenharmony_ci if (IS_ERR(folio)) { 203662306a36Sopenharmony_cialloc_nohuge: 203762306a36Sopenharmony_ci folio = shmem_alloc_and_acct_folio(gfp, inode, index, false); 203862306a36Sopenharmony_ci } 203962306a36Sopenharmony_ci if (IS_ERR(folio)) { 204062306a36Sopenharmony_ci int retry = 5; 204162306a36Sopenharmony_ci 204262306a36Sopenharmony_ci error = PTR_ERR(folio); 204362306a36Sopenharmony_ci folio = NULL; 204462306a36Sopenharmony_ci if (error != -ENOSPC) 204562306a36Sopenharmony_ci goto unlock; 204662306a36Sopenharmony_ci /* 204762306a36Sopenharmony_ci * Try to reclaim some space by splitting a large folio 204862306a36Sopenharmony_ci * beyond i_size on the filesystem. 204962306a36Sopenharmony_ci */ 205062306a36Sopenharmony_ci while (retry--) { 205162306a36Sopenharmony_ci int ret; 205262306a36Sopenharmony_ci 205362306a36Sopenharmony_ci ret = shmem_unused_huge_shrink(sbinfo, NULL, 1); 205462306a36Sopenharmony_ci if (ret == SHRINK_STOP) 205562306a36Sopenharmony_ci break; 205662306a36Sopenharmony_ci if (ret) 205762306a36Sopenharmony_ci goto alloc_nohuge; 205862306a36Sopenharmony_ci } 205962306a36Sopenharmony_ci goto unlock; 206062306a36Sopenharmony_ci } 206162306a36Sopenharmony_ci 206262306a36Sopenharmony_ci hindex = round_down(index, folio_nr_pages(folio)); 206362306a36Sopenharmony_ci 206462306a36Sopenharmony_ci if (sgp == SGP_WRITE) 206562306a36Sopenharmony_ci __folio_set_referenced(folio); 206662306a36Sopenharmony_ci 206762306a36Sopenharmony_ci error = shmem_add_to_page_cache(folio, mapping, hindex, 206862306a36Sopenharmony_ci NULL, gfp & GFP_RECLAIM_MASK, 206962306a36Sopenharmony_ci charge_mm); 207062306a36Sopenharmony_ci if (error) 207162306a36Sopenharmony_ci goto unacct; 207262306a36Sopenharmony_ci 207362306a36Sopenharmony_ci folio_add_lru(folio); 207462306a36Sopenharmony_ci shmem_recalc_inode(inode, folio_nr_pages(folio), 0); 207562306a36Sopenharmony_ci alloced = true; 207662306a36Sopenharmony_ci 207762306a36Sopenharmony_ci if (folio_test_pmd_mappable(folio) && 207862306a36Sopenharmony_ci DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) < 207962306a36Sopenharmony_ci folio_next_index(folio) - 1) { 208062306a36Sopenharmony_ci /* 208162306a36Sopenharmony_ci * Part of the large folio is beyond i_size: subject 208262306a36Sopenharmony_ci * to shrink under memory pressure. 208362306a36Sopenharmony_ci */ 208462306a36Sopenharmony_ci spin_lock(&sbinfo->shrinklist_lock); 208562306a36Sopenharmony_ci /* 208662306a36Sopenharmony_ci * _careful to defend against unlocked access to 208762306a36Sopenharmony_ci * ->shrink_list in shmem_unused_huge_shrink() 208862306a36Sopenharmony_ci */ 208962306a36Sopenharmony_ci if (list_empty_careful(&info->shrinklist)) { 209062306a36Sopenharmony_ci list_add_tail(&info->shrinklist, 209162306a36Sopenharmony_ci &sbinfo->shrinklist); 209262306a36Sopenharmony_ci sbinfo->shrinklist_len++; 209362306a36Sopenharmony_ci } 209462306a36Sopenharmony_ci spin_unlock(&sbinfo->shrinklist_lock); 209562306a36Sopenharmony_ci } 209662306a36Sopenharmony_ci 209762306a36Sopenharmony_ci /* 209862306a36Sopenharmony_ci * Let SGP_FALLOC use the SGP_WRITE optimization on a new folio. 209962306a36Sopenharmony_ci */ 210062306a36Sopenharmony_ci if (sgp == SGP_FALLOC) 210162306a36Sopenharmony_ci sgp = SGP_WRITE; 210262306a36Sopenharmony_ciclear: 210362306a36Sopenharmony_ci /* 210462306a36Sopenharmony_ci * Let SGP_WRITE caller clear ends if write does not fill folio; 210562306a36Sopenharmony_ci * but SGP_FALLOC on a folio fallocated earlier must initialize 210662306a36Sopenharmony_ci * it now, lest undo on failure cancel our earlier guarantee. 210762306a36Sopenharmony_ci */ 210862306a36Sopenharmony_ci if (sgp != SGP_WRITE && !folio_test_uptodate(folio)) { 210962306a36Sopenharmony_ci long i, n = folio_nr_pages(folio); 211062306a36Sopenharmony_ci 211162306a36Sopenharmony_ci for (i = 0; i < n; i++) 211262306a36Sopenharmony_ci clear_highpage(folio_page(folio, i)); 211362306a36Sopenharmony_ci flush_dcache_folio(folio); 211462306a36Sopenharmony_ci folio_mark_uptodate(folio); 211562306a36Sopenharmony_ci } 211662306a36Sopenharmony_ci 211762306a36Sopenharmony_ci /* Perhaps the file has been truncated since we checked */ 211862306a36Sopenharmony_ci if (sgp <= SGP_CACHE && 211962306a36Sopenharmony_ci ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) { 212062306a36Sopenharmony_ci if (alloced) { 212162306a36Sopenharmony_ci folio_clear_dirty(folio); 212262306a36Sopenharmony_ci filemap_remove_folio(folio); 212362306a36Sopenharmony_ci shmem_recalc_inode(inode, 0, 0); 212462306a36Sopenharmony_ci } 212562306a36Sopenharmony_ci error = -EINVAL; 212662306a36Sopenharmony_ci goto unlock; 212762306a36Sopenharmony_ci } 212862306a36Sopenharmony_ciout: 212962306a36Sopenharmony_ci *foliop = folio; 213062306a36Sopenharmony_ci return 0; 213162306a36Sopenharmony_ci 213262306a36Sopenharmony_ci /* 213362306a36Sopenharmony_ci * Error recovery. 213462306a36Sopenharmony_ci */ 213562306a36Sopenharmony_ciunacct: 213662306a36Sopenharmony_ci shmem_inode_unacct_blocks(inode, folio_nr_pages(folio)); 213762306a36Sopenharmony_ci 213862306a36Sopenharmony_ci if (folio_test_large(folio)) { 213962306a36Sopenharmony_ci folio_unlock(folio); 214062306a36Sopenharmony_ci folio_put(folio); 214162306a36Sopenharmony_ci goto alloc_nohuge; 214262306a36Sopenharmony_ci } 214362306a36Sopenharmony_ciunlock: 214462306a36Sopenharmony_ci if (folio) { 214562306a36Sopenharmony_ci folio_unlock(folio); 214662306a36Sopenharmony_ci folio_put(folio); 214762306a36Sopenharmony_ci } 214862306a36Sopenharmony_ci if (error == -ENOSPC && !once++) { 214962306a36Sopenharmony_ci shmem_recalc_inode(inode, 0, 0); 215062306a36Sopenharmony_ci goto repeat; 215162306a36Sopenharmony_ci } 215262306a36Sopenharmony_ci if (error == -EEXIST) 215362306a36Sopenharmony_ci goto repeat; 215462306a36Sopenharmony_ci return error; 215562306a36Sopenharmony_ci} 215662306a36Sopenharmony_ci 215762306a36Sopenharmony_ciint shmem_get_folio(struct inode *inode, pgoff_t index, struct folio **foliop, 215862306a36Sopenharmony_ci enum sgp_type sgp) 215962306a36Sopenharmony_ci{ 216062306a36Sopenharmony_ci return shmem_get_folio_gfp(inode, index, foliop, sgp, 216162306a36Sopenharmony_ci mapping_gfp_mask(inode->i_mapping), NULL, NULL, NULL); 216262306a36Sopenharmony_ci} 216362306a36Sopenharmony_ci 216462306a36Sopenharmony_ci/* 216562306a36Sopenharmony_ci * This is like autoremove_wake_function, but it removes the wait queue 216662306a36Sopenharmony_ci * entry unconditionally - even if something else had already woken the 216762306a36Sopenharmony_ci * target. 216862306a36Sopenharmony_ci */ 216962306a36Sopenharmony_cistatic int synchronous_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) 217062306a36Sopenharmony_ci{ 217162306a36Sopenharmony_ci int ret = default_wake_function(wait, mode, sync, key); 217262306a36Sopenharmony_ci list_del_init(&wait->entry); 217362306a36Sopenharmony_ci return ret; 217462306a36Sopenharmony_ci} 217562306a36Sopenharmony_ci 217662306a36Sopenharmony_cistatic vm_fault_t shmem_fault(struct vm_fault *vmf) 217762306a36Sopenharmony_ci{ 217862306a36Sopenharmony_ci struct vm_area_struct *vma = vmf->vma; 217962306a36Sopenharmony_ci struct inode *inode = file_inode(vma->vm_file); 218062306a36Sopenharmony_ci gfp_t gfp = mapping_gfp_mask(inode->i_mapping); 218162306a36Sopenharmony_ci struct folio *folio = NULL; 218262306a36Sopenharmony_ci int err; 218362306a36Sopenharmony_ci vm_fault_t ret = VM_FAULT_LOCKED; 218462306a36Sopenharmony_ci 218562306a36Sopenharmony_ci /* 218662306a36Sopenharmony_ci * Trinity finds that probing a hole which tmpfs is punching can 218762306a36Sopenharmony_ci * prevent the hole-punch from ever completing: which in turn 218862306a36Sopenharmony_ci * locks writers out with its hold on i_rwsem. So refrain from 218962306a36Sopenharmony_ci * faulting pages into the hole while it's being punched. Although 219062306a36Sopenharmony_ci * shmem_undo_range() does remove the additions, it may be unable to 219162306a36Sopenharmony_ci * keep up, as each new page needs its own unmap_mapping_range() call, 219262306a36Sopenharmony_ci * and the i_mmap tree grows ever slower to scan if new vmas are added. 219362306a36Sopenharmony_ci * 219462306a36Sopenharmony_ci * It does not matter if we sometimes reach this check just before the 219562306a36Sopenharmony_ci * hole-punch begins, so that one fault then races with the punch: 219662306a36Sopenharmony_ci * we just need to make racing faults a rare case. 219762306a36Sopenharmony_ci * 219862306a36Sopenharmony_ci * The implementation below would be much simpler if we just used a 219962306a36Sopenharmony_ci * standard mutex or completion: but we cannot take i_rwsem in fault, 220062306a36Sopenharmony_ci * and bloating every shmem inode for this unlikely case would be sad. 220162306a36Sopenharmony_ci */ 220262306a36Sopenharmony_ci if (unlikely(inode->i_private)) { 220362306a36Sopenharmony_ci struct shmem_falloc *shmem_falloc; 220462306a36Sopenharmony_ci 220562306a36Sopenharmony_ci spin_lock(&inode->i_lock); 220662306a36Sopenharmony_ci shmem_falloc = inode->i_private; 220762306a36Sopenharmony_ci if (shmem_falloc && 220862306a36Sopenharmony_ci shmem_falloc->waitq && 220962306a36Sopenharmony_ci vmf->pgoff >= shmem_falloc->start && 221062306a36Sopenharmony_ci vmf->pgoff < shmem_falloc->next) { 221162306a36Sopenharmony_ci struct file *fpin; 221262306a36Sopenharmony_ci wait_queue_head_t *shmem_falloc_waitq; 221362306a36Sopenharmony_ci DEFINE_WAIT_FUNC(shmem_fault_wait, synchronous_wake_function); 221462306a36Sopenharmony_ci 221562306a36Sopenharmony_ci ret = VM_FAULT_NOPAGE; 221662306a36Sopenharmony_ci fpin = maybe_unlock_mmap_for_io(vmf, NULL); 221762306a36Sopenharmony_ci if (fpin) 221862306a36Sopenharmony_ci ret = VM_FAULT_RETRY; 221962306a36Sopenharmony_ci 222062306a36Sopenharmony_ci shmem_falloc_waitq = shmem_falloc->waitq; 222162306a36Sopenharmony_ci prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait, 222262306a36Sopenharmony_ci TASK_UNINTERRUPTIBLE); 222362306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 222462306a36Sopenharmony_ci schedule(); 222562306a36Sopenharmony_ci 222662306a36Sopenharmony_ci /* 222762306a36Sopenharmony_ci * shmem_falloc_waitq points into the shmem_fallocate() 222862306a36Sopenharmony_ci * stack of the hole-punching task: shmem_falloc_waitq 222962306a36Sopenharmony_ci * is usually invalid by the time we reach here, but 223062306a36Sopenharmony_ci * finish_wait() does not dereference it in that case; 223162306a36Sopenharmony_ci * though i_lock needed lest racing with wake_up_all(). 223262306a36Sopenharmony_ci */ 223362306a36Sopenharmony_ci spin_lock(&inode->i_lock); 223462306a36Sopenharmony_ci finish_wait(shmem_falloc_waitq, &shmem_fault_wait); 223562306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 223662306a36Sopenharmony_ci 223762306a36Sopenharmony_ci if (fpin) 223862306a36Sopenharmony_ci fput(fpin); 223962306a36Sopenharmony_ci return ret; 224062306a36Sopenharmony_ci } 224162306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 224262306a36Sopenharmony_ci } 224362306a36Sopenharmony_ci 224462306a36Sopenharmony_ci err = shmem_get_folio_gfp(inode, vmf->pgoff, &folio, SGP_CACHE, 224562306a36Sopenharmony_ci gfp, vma, vmf, &ret); 224662306a36Sopenharmony_ci if (err) 224762306a36Sopenharmony_ci return vmf_error(err); 224862306a36Sopenharmony_ci if (folio) 224962306a36Sopenharmony_ci vmf->page = folio_file_page(folio, vmf->pgoff); 225062306a36Sopenharmony_ci return ret; 225162306a36Sopenharmony_ci} 225262306a36Sopenharmony_ci 225362306a36Sopenharmony_ciunsigned long shmem_get_unmapped_area(struct file *file, 225462306a36Sopenharmony_ci unsigned long uaddr, unsigned long len, 225562306a36Sopenharmony_ci unsigned long pgoff, unsigned long flags) 225662306a36Sopenharmony_ci{ 225762306a36Sopenharmony_ci unsigned long (*get_area)(struct file *, 225862306a36Sopenharmony_ci unsigned long, unsigned long, unsigned long, unsigned long); 225962306a36Sopenharmony_ci unsigned long addr; 226062306a36Sopenharmony_ci unsigned long offset; 226162306a36Sopenharmony_ci unsigned long inflated_len; 226262306a36Sopenharmony_ci unsigned long inflated_addr; 226362306a36Sopenharmony_ci unsigned long inflated_offset; 226462306a36Sopenharmony_ci 226562306a36Sopenharmony_ci if (len > TASK_SIZE) 226662306a36Sopenharmony_ci return -ENOMEM; 226762306a36Sopenharmony_ci 226862306a36Sopenharmony_ci get_area = current->mm->get_unmapped_area; 226962306a36Sopenharmony_ci addr = get_area(file, uaddr, len, pgoff, flags); 227062306a36Sopenharmony_ci 227162306a36Sopenharmony_ci if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 227262306a36Sopenharmony_ci return addr; 227362306a36Sopenharmony_ci if (IS_ERR_VALUE(addr)) 227462306a36Sopenharmony_ci return addr; 227562306a36Sopenharmony_ci if (addr & ~PAGE_MASK) 227662306a36Sopenharmony_ci return addr; 227762306a36Sopenharmony_ci if (addr > TASK_SIZE - len) 227862306a36Sopenharmony_ci return addr; 227962306a36Sopenharmony_ci 228062306a36Sopenharmony_ci if (shmem_huge == SHMEM_HUGE_DENY) 228162306a36Sopenharmony_ci return addr; 228262306a36Sopenharmony_ci if (len < HPAGE_PMD_SIZE) 228362306a36Sopenharmony_ci return addr; 228462306a36Sopenharmony_ci if (flags & MAP_FIXED) 228562306a36Sopenharmony_ci return addr; 228662306a36Sopenharmony_ci /* 228762306a36Sopenharmony_ci * Our priority is to support MAP_SHARED mapped hugely; 228862306a36Sopenharmony_ci * and support MAP_PRIVATE mapped hugely too, until it is COWed. 228962306a36Sopenharmony_ci * But if caller specified an address hint and we allocated area there 229062306a36Sopenharmony_ci * successfully, respect that as before. 229162306a36Sopenharmony_ci */ 229262306a36Sopenharmony_ci if (uaddr == addr) 229362306a36Sopenharmony_ci return addr; 229462306a36Sopenharmony_ci 229562306a36Sopenharmony_ci if (shmem_huge != SHMEM_HUGE_FORCE) { 229662306a36Sopenharmony_ci struct super_block *sb; 229762306a36Sopenharmony_ci 229862306a36Sopenharmony_ci if (file) { 229962306a36Sopenharmony_ci VM_BUG_ON(file->f_op != &shmem_file_operations); 230062306a36Sopenharmony_ci sb = file_inode(file)->i_sb; 230162306a36Sopenharmony_ci } else { 230262306a36Sopenharmony_ci /* 230362306a36Sopenharmony_ci * Called directly from mm/mmap.c, or drivers/char/mem.c 230462306a36Sopenharmony_ci * for "/dev/zero", to create a shared anonymous object. 230562306a36Sopenharmony_ci */ 230662306a36Sopenharmony_ci if (IS_ERR(shm_mnt)) 230762306a36Sopenharmony_ci return addr; 230862306a36Sopenharmony_ci sb = shm_mnt->mnt_sb; 230962306a36Sopenharmony_ci } 231062306a36Sopenharmony_ci if (SHMEM_SB(sb)->huge == SHMEM_HUGE_NEVER) 231162306a36Sopenharmony_ci return addr; 231262306a36Sopenharmony_ci } 231362306a36Sopenharmony_ci 231462306a36Sopenharmony_ci offset = (pgoff << PAGE_SHIFT) & (HPAGE_PMD_SIZE-1); 231562306a36Sopenharmony_ci if (offset && offset + len < 2 * HPAGE_PMD_SIZE) 231662306a36Sopenharmony_ci return addr; 231762306a36Sopenharmony_ci if ((addr & (HPAGE_PMD_SIZE-1)) == offset) 231862306a36Sopenharmony_ci return addr; 231962306a36Sopenharmony_ci 232062306a36Sopenharmony_ci inflated_len = len + HPAGE_PMD_SIZE - PAGE_SIZE; 232162306a36Sopenharmony_ci if (inflated_len > TASK_SIZE) 232262306a36Sopenharmony_ci return addr; 232362306a36Sopenharmony_ci if (inflated_len < len) 232462306a36Sopenharmony_ci return addr; 232562306a36Sopenharmony_ci 232662306a36Sopenharmony_ci inflated_addr = get_area(NULL, uaddr, inflated_len, 0, flags); 232762306a36Sopenharmony_ci if (IS_ERR_VALUE(inflated_addr)) 232862306a36Sopenharmony_ci return addr; 232962306a36Sopenharmony_ci if (inflated_addr & ~PAGE_MASK) 233062306a36Sopenharmony_ci return addr; 233162306a36Sopenharmony_ci 233262306a36Sopenharmony_ci inflated_offset = inflated_addr & (HPAGE_PMD_SIZE-1); 233362306a36Sopenharmony_ci inflated_addr += offset - inflated_offset; 233462306a36Sopenharmony_ci if (inflated_offset > offset) 233562306a36Sopenharmony_ci inflated_addr += HPAGE_PMD_SIZE; 233662306a36Sopenharmony_ci 233762306a36Sopenharmony_ci if (inflated_addr > TASK_SIZE - len) 233862306a36Sopenharmony_ci return addr; 233962306a36Sopenharmony_ci return inflated_addr; 234062306a36Sopenharmony_ci} 234162306a36Sopenharmony_ci 234262306a36Sopenharmony_ci#ifdef CONFIG_NUMA 234362306a36Sopenharmony_cistatic int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol) 234462306a36Sopenharmony_ci{ 234562306a36Sopenharmony_ci struct inode *inode = file_inode(vma->vm_file); 234662306a36Sopenharmony_ci return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol); 234762306a36Sopenharmony_ci} 234862306a36Sopenharmony_ci 234962306a36Sopenharmony_cistatic struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, 235062306a36Sopenharmony_ci unsigned long addr) 235162306a36Sopenharmony_ci{ 235262306a36Sopenharmony_ci struct inode *inode = file_inode(vma->vm_file); 235362306a36Sopenharmony_ci pgoff_t index; 235462306a36Sopenharmony_ci 235562306a36Sopenharmony_ci index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 235662306a36Sopenharmony_ci return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index); 235762306a36Sopenharmony_ci} 235862306a36Sopenharmony_ci#endif 235962306a36Sopenharmony_ci 236062306a36Sopenharmony_ciint shmem_lock(struct file *file, int lock, struct ucounts *ucounts) 236162306a36Sopenharmony_ci{ 236262306a36Sopenharmony_ci struct inode *inode = file_inode(file); 236362306a36Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 236462306a36Sopenharmony_ci int retval = -ENOMEM; 236562306a36Sopenharmony_ci 236662306a36Sopenharmony_ci /* 236762306a36Sopenharmony_ci * What serializes the accesses to info->flags? 236862306a36Sopenharmony_ci * ipc_lock_object() when called from shmctl_do_lock(), 236962306a36Sopenharmony_ci * no serialization needed when called from shm_destroy(). 237062306a36Sopenharmony_ci */ 237162306a36Sopenharmony_ci if (lock && !(info->flags & VM_LOCKED)) { 237262306a36Sopenharmony_ci if (!user_shm_lock(inode->i_size, ucounts)) 237362306a36Sopenharmony_ci goto out_nomem; 237462306a36Sopenharmony_ci info->flags |= VM_LOCKED; 237562306a36Sopenharmony_ci mapping_set_unevictable(file->f_mapping); 237662306a36Sopenharmony_ci } 237762306a36Sopenharmony_ci if (!lock && (info->flags & VM_LOCKED) && ucounts) { 237862306a36Sopenharmony_ci user_shm_unlock(inode->i_size, ucounts); 237962306a36Sopenharmony_ci info->flags &= ~VM_LOCKED; 238062306a36Sopenharmony_ci mapping_clear_unevictable(file->f_mapping); 238162306a36Sopenharmony_ci } 238262306a36Sopenharmony_ci retval = 0; 238362306a36Sopenharmony_ci 238462306a36Sopenharmony_ciout_nomem: 238562306a36Sopenharmony_ci return retval; 238662306a36Sopenharmony_ci} 238762306a36Sopenharmony_ci 238862306a36Sopenharmony_cistatic int shmem_mmap(struct file *file, struct vm_area_struct *vma) 238962306a36Sopenharmony_ci{ 239062306a36Sopenharmony_ci struct inode *inode = file_inode(file); 239162306a36Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 239262306a36Sopenharmony_ci int ret; 239362306a36Sopenharmony_ci 239462306a36Sopenharmony_ci ret = seal_check_future_write(info->seals, vma); 239562306a36Sopenharmony_ci if (ret) 239662306a36Sopenharmony_ci return ret; 239762306a36Sopenharmony_ci 239862306a36Sopenharmony_ci /* arm64 - allow memory tagging on RAM-based files */ 239962306a36Sopenharmony_ci vm_flags_set(vma, VM_MTE_ALLOWED); 240062306a36Sopenharmony_ci 240162306a36Sopenharmony_ci file_accessed(file); 240262306a36Sopenharmony_ci /* This is anonymous shared memory if it is unlinked at the time of mmap */ 240362306a36Sopenharmony_ci if (inode->i_nlink) 240462306a36Sopenharmony_ci vma->vm_ops = &shmem_vm_ops; 240562306a36Sopenharmony_ci else 240662306a36Sopenharmony_ci vma->vm_ops = &shmem_anon_vm_ops; 240762306a36Sopenharmony_ci return 0; 240862306a36Sopenharmony_ci} 240962306a36Sopenharmony_ci 241062306a36Sopenharmony_cistatic int shmem_file_open(struct inode *inode, struct file *file) 241162306a36Sopenharmony_ci{ 241262306a36Sopenharmony_ci file->f_mode |= FMODE_CAN_ODIRECT; 241362306a36Sopenharmony_ci return generic_file_open(inode, file); 241462306a36Sopenharmony_ci} 241562306a36Sopenharmony_ci 241662306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_XATTR 241762306a36Sopenharmony_cistatic int shmem_initxattrs(struct inode *, const struct xattr *, void *); 241862306a36Sopenharmony_ci 241962306a36Sopenharmony_ci/* 242062306a36Sopenharmony_ci * chattr's fsflags are unrelated to extended attributes, 242162306a36Sopenharmony_ci * but tmpfs has chosen to enable them under the same config option. 242262306a36Sopenharmony_ci */ 242362306a36Sopenharmony_cistatic void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags) 242462306a36Sopenharmony_ci{ 242562306a36Sopenharmony_ci unsigned int i_flags = 0; 242662306a36Sopenharmony_ci 242762306a36Sopenharmony_ci if (fsflags & FS_NOATIME_FL) 242862306a36Sopenharmony_ci i_flags |= S_NOATIME; 242962306a36Sopenharmony_ci if (fsflags & FS_APPEND_FL) 243062306a36Sopenharmony_ci i_flags |= S_APPEND; 243162306a36Sopenharmony_ci if (fsflags & FS_IMMUTABLE_FL) 243262306a36Sopenharmony_ci i_flags |= S_IMMUTABLE; 243362306a36Sopenharmony_ci /* 243462306a36Sopenharmony_ci * But FS_NODUMP_FL does not require any action in i_flags. 243562306a36Sopenharmony_ci */ 243662306a36Sopenharmony_ci inode_set_flags(inode, i_flags, S_NOATIME | S_APPEND | S_IMMUTABLE); 243762306a36Sopenharmony_ci} 243862306a36Sopenharmony_ci#else 243962306a36Sopenharmony_cistatic void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags) 244062306a36Sopenharmony_ci{ 244162306a36Sopenharmony_ci} 244262306a36Sopenharmony_ci#define shmem_initxattrs NULL 244362306a36Sopenharmony_ci#endif 244462306a36Sopenharmony_ci 244562306a36Sopenharmony_cistatic struct offset_ctx *shmem_get_offset_ctx(struct inode *inode) 244662306a36Sopenharmony_ci{ 244762306a36Sopenharmony_ci return &SHMEM_I(inode)->dir_offsets; 244862306a36Sopenharmony_ci} 244962306a36Sopenharmony_ci 245062306a36Sopenharmony_cistatic struct inode *__shmem_get_inode(struct mnt_idmap *idmap, 245162306a36Sopenharmony_ci struct super_block *sb, 245262306a36Sopenharmony_ci struct inode *dir, umode_t mode, 245362306a36Sopenharmony_ci dev_t dev, unsigned long flags) 245462306a36Sopenharmony_ci{ 245562306a36Sopenharmony_ci struct inode *inode; 245662306a36Sopenharmony_ci struct shmem_inode_info *info; 245762306a36Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 245862306a36Sopenharmony_ci ino_t ino; 245962306a36Sopenharmony_ci int err; 246062306a36Sopenharmony_ci 246162306a36Sopenharmony_ci err = shmem_reserve_inode(sb, &ino); 246262306a36Sopenharmony_ci if (err) 246362306a36Sopenharmony_ci return ERR_PTR(err); 246462306a36Sopenharmony_ci 246562306a36Sopenharmony_ci 246662306a36Sopenharmony_ci inode = new_inode(sb); 246762306a36Sopenharmony_ci if (!inode) { 246862306a36Sopenharmony_ci shmem_free_inode(sb, 0); 246962306a36Sopenharmony_ci return ERR_PTR(-ENOSPC); 247062306a36Sopenharmony_ci } 247162306a36Sopenharmony_ci 247262306a36Sopenharmony_ci inode->i_ino = ino; 247362306a36Sopenharmony_ci inode_init_owner(idmap, inode, dir, mode); 247462306a36Sopenharmony_ci inode->i_blocks = 0; 247562306a36Sopenharmony_ci inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); 247662306a36Sopenharmony_ci inode->i_generation = get_random_u32(); 247762306a36Sopenharmony_ci info = SHMEM_I(inode); 247862306a36Sopenharmony_ci memset(info, 0, (char *)inode - (char *)info); 247962306a36Sopenharmony_ci spin_lock_init(&info->lock); 248062306a36Sopenharmony_ci atomic_set(&info->stop_eviction, 0); 248162306a36Sopenharmony_ci info->seals = F_SEAL_SEAL; 248262306a36Sopenharmony_ci info->flags = flags & VM_NORESERVE; 248362306a36Sopenharmony_ci info->i_crtime = inode->i_mtime; 248462306a36Sopenharmony_ci info->fsflags = (dir == NULL) ? 0 : 248562306a36Sopenharmony_ci SHMEM_I(dir)->fsflags & SHMEM_FL_INHERITED; 248662306a36Sopenharmony_ci if (info->fsflags) 248762306a36Sopenharmony_ci shmem_set_inode_flags(inode, info->fsflags); 248862306a36Sopenharmony_ci INIT_LIST_HEAD(&info->shrinklist); 248962306a36Sopenharmony_ci INIT_LIST_HEAD(&info->swaplist); 249062306a36Sopenharmony_ci INIT_LIST_HEAD(&info->swaplist); 249162306a36Sopenharmony_ci if (sbinfo->noswap) 249262306a36Sopenharmony_ci mapping_set_unevictable(inode->i_mapping); 249362306a36Sopenharmony_ci simple_xattrs_init(&info->xattrs); 249462306a36Sopenharmony_ci cache_no_acl(inode); 249562306a36Sopenharmony_ci mapping_set_large_folios(inode->i_mapping); 249662306a36Sopenharmony_ci 249762306a36Sopenharmony_ci switch (mode & S_IFMT) { 249862306a36Sopenharmony_ci default: 249962306a36Sopenharmony_ci inode->i_op = &shmem_special_inode_operations; 250062306a36Sopenharmony_ci init_special_inode(inode, mode, dev); 250162306a36Sopenharmony_ci break; 250262306a36Sopenharmony_ci case S_IFREG: 250362306a36Sopenharmony_ci inode->i_mapping->a_ops = &shmem_aops; 250462306a36Sopenharmony_ci inode->i_op = &shmem_inode_operations; 250562306a36Sopenharmony_ci inode->i_fop = &shmem_file_operations; 250662306a36Sopenharmony_ci mpol_shared_policy_init(&info->policy, 250762306a36Sopenharmony_ci shmem_get_sbmpol(sbinfo)); 250862306a36Sopenharmony_ci break; 250962306a36Sopenharmony_ci case S_IFDIR: 251062306a36Sopenharmony_ci inc_nlink(inode); 251162306a36Sopenharmony_ci /* Some things misbehave if size == 0 on a directory */ 251262306a36Sopenharmony_ci inode->i_size = 2 * BOGO_DIRENT_SIZE; 251362306a36Sopenharmony_ci inode->i_op = &shmem_dir_inode_operations; 251462306a36Sopenharmony_ci inode->i_fop = &simple_offset_dir_operations; 251562306a36Sopenharmony_ci simple_offset_init(shmem_get_offset_ctx(inode)); 251662306a36Sopenharmony_ci break; 251762306a36Sopenharmony_ci case S_IFLNK: 251862306a36Sopenharmony_ci /* 251962306a36Sopenharmony_ci * Must not load anything in the rbtree, 252062306a36Sopenharmony_ci * mpol_free_shared_policy will not be called. 252162306a36Sopenharmony_ci */ 252262306a36Sopenharmony_ci mpol_shared_policy_init(&info->policy, NULL); 252362306a36Sopenharmony_ci break; 252462306a36Sopenharmony_ci } 252562306a36Sopenharmony_ci 252662306a36Sopenharmony_ci lockdep_annotate_inode_mutex_key(inode); 252762306a36Sopenharmony_ci return inode; 252862306a36Sopenharmony_ci} 252962306a36Sopenharmony_ci 253062306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_QUOTA 253162306a36Sopenharmony_cistatic struct inode *shmem_get_inode(struct mnt_idmap *idmap, 253262306a36Sopenharmony_ci struct super_block *sb, struct inode *dir, 253362306a36Sopenharmony_ci umode_t mode, dev_t dev, unsigned long flags) 253462306a36Sopenharmony_ci{ 253562306a36Sopenharmony_ci int err; 253662306a36Sopenharmony_ci struct inode *inode; 253762306a36Sopenharmony_ci 253862306a36Sopenharmony_ci inode = __shmem_get_inode(idmap, sb, dir, mode, dev, flags); 253962306a36Sopenharmony_ci if (IS_ERR(inode)) 254062306a36Sopenharmony_ci return inode; 254162306a36Sopenharmony_ci 254262306a36Sopenharmony_ci err = dquot_initialize(inode); 254362306a36Sopenharmony_ci if (err) 254462306a36Sopenharmony_ci goto errout; 254562306a36Sopenharmony_ci 254662306a36Sopenharmony_ci err = dquot_alloc_inode(inode); 254762306a36Sopenharmony_ci if (err) { 254862306a36Sopenharmony_ci dquot_drop(inode); 254962306a36Sopenharmony_ci goto errout; 255062306a36Sopenharmony_ci } 255162306a36Sopenharmony_ci return inode; 255262306a36Sopenharmony_ci 255362306a36Sopenharmony_cierrout: 255462306a36Sopenharmony_ci inode->i_flags |= S_NOQUOTA; 255562306a36Sopenharmony_ci iput(inode); 255662306a36Sopenharmony_ci return ERR_PTR(err); 255762306a36Sopenharmony_ci} 255862306a36Sopenharmony_ci#else 255962306a36Sopenharmony_cistatic inline struct inode *shmem_get_inode(struct mnt_idmap *idmap, 256062306a36Sopenharmony_ci struct super_block *sb, struct inode *dir, 256162306a36Sopenharmony_ci umode_t mode, dev_t dev, unsigned long flags) 256262306a36Sopenharmony_ci{ 256362306a36Sopenharmony_ci return __shmem_get_inode(idmap, sb, dir, mode, dev, flags); 256462306a36Sopenharmony_ci} 256562306a36Sopenharmony_ci#endif /* CONFIG_TMPFS_QUOTA */ 256662306a36Sopenharmony_ci 256762306a36Sopenharmony_ci#ifdef CONFIG_USERFAULTFD 256862306a36Sopenharmony_ciint shmem_mfill_atomic_pte(pmd_t *dst_pmd, 256962306a36Sopenharmony_ci struct vm_area_struct *dst_vma, 257062306a36Sopenharmony_ci unsigned long dst_addr, 257162306a36Sopenharmony_ci unsigned long src_addr, 257262306a36Sopenharmony_ci uffd_flags_t flags, 257362306a36Sopenharmony_ci struct folio **foliop) 257462306a36Sopenharmony_ci{ 257562306a36Sopenharmony_ci struct inode *inode = file_inode(dst_vma->vm_file); 257662306a36Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 257762306a36Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 257862306a36Sopenharmony_ci gfp_t gfp = mapping_gfp_mask(mapping); 257962306a36Sopenharmony_ci pgoff_t pgoff = linear_page_index(dst_vma, dst_addr); 258062306a36Sopenharmony_ci void *page_kaddr; 258162306a36Sopenharmony_ci struct folio *folio; 258262306a36Sopenharmony_ci int ret; 258362306a36Sopenharmony_ci pgoff_t max_off; 258462306a36Sopenharmony_ci 258562306a36Sopenharmony_ci if (shmem_inode_acct_block(inode, 1)) { 258662306a36Sopenharmony_ci /* 258762306a36Sopenharmony_ci * We may have got a page, returned -ENOENT triggering a retry, 258862306a36Sopenharmony_ci * and now we find ourselves with -ENOMEM. Release the page, to 258962306a36Sopenharmony_ci * avoid a BUG_ON in our caller. 259062306a36Sopenharmony_ci */ 259162306a36Sopenharmony_ci if (unlikely(*foliop)) { 259262306a36Sopenharmony_ci folio_put(*foliop); 259362306a36Sopenharmony_ci *foliop = NULL; 259462306a36Sopenharmony_ci } 259562306a36Sopenharmony_ci return -ENOMEM; 259662306a36Sopenharmony_ci } 259762306a36Sopenharmony_ci 259862306a36Sopenharmony_ci if (!*foliop) { 259962306a36Sopenharmony_ci ret = -ENOMEM; 260062306a36Sopenharmony_ci folio = shmem_alloc_folio(gfp, info, pgoff); 260162306a36Sopenharmony_ci if (!folio) 260262306a36Sopenharmony_ci goto out_unacct_blocks; 260362306a36Sopenharmony_ci 260462306a36Sopenharmony_ci if (uffd_flags_mode_is(flags, MFILL_ATOMIC_COPY)) { 260562306a36Sopenharmony_ci page_kaddr = kmap_local_folio(folio, 0); 260662306a36Sopenharmony_ci /* 260762306a36Sopenharmony_ci * The read mmap_lock is held here. Despite the 260862306a36Sopenharmony_ci * mmap_lock being read recursive a deadlock is still 260962306a36Sopenharmony_ci * possible if a writer has taken a lock. For example: 261062306a36Sopenharmony_ci * 261162306a36Sopenharmony_ci * process A thread 1 takes read lock on own mmap_lock 261262306a36Sopenharmony_ci * process A thread 2 calls mmap, blocks taking write lock 261362306a36Sopenharmony_ci * process B thread 1 takes page fault, read lock on own mmap lock 261462306a36Sopenharmony_ci * process B thread 2 calls mmap, blocks taking write lock 261562306a36Sopenharmony_ci * process A thread 1 blocks taking read lock on process B 261662306a36Sopenharmony_ci * process B thread 1 blocks taking read lock on process A 261762306a36Sopenharmony_ci * 261862306a36Sopenharmony_ci * Disable page faults to prevent potential deadlock 261962306a36Sopenharmony_ci * and retry the copy outside the mmap_lock. 262062306a36Sopenharmony_ci */ 262162306a36Sopenharmony_ci pagefault_disable(); 262262306a36Sopenharmony_ci ret = copy_from_user(page_kaddr, 262362306a36Sopenharmony_ci (const void __user *)src_addr, 262462306a36Sopenharmony_ci PAGE_SIZE); 262562306a36Sopenharmony_ci pagefault_enable(); 262662306a36Sopenharmony_ci kunmap_local(page_kaddr); 262762306a36Sopenharmony_ci 262862306a36Sopenharmony_ci /* fallback to copy_from_user outside mmap_lock */ 262962306a36Sopenharmony_ci if (unlikely(ret)) { 263062306a36Sopenharmony_ci *foliop = folio; 263162306a36Sopenharmony_ci ret = -ENOENT; 263262306a36Sopenharmony_ci /* don't free the page */ 263362306a36Sopenharmony_ci goto out_unacct_blocks; 263462306a36Sopenharmony_ci } 263562306a36Sopenharmony_ci 263662306a36Sopenharmony_ci flush_dcache_folio(folio); 263762306a36Sopenharmony_ci } else { /* ZEROPAGE */ 263862306a36Sopenharmony_ci clear_user_highpage(&folio->page, dst_addr); 263962306a36Sopenharmony_ci } 264062306a36Sopenharmony_ci } else { 264162306a36Sopenharmony_ci folio = *foliop; 264262306a36Sopenharmony_ci VM_BUG_ON_FOLIO(folio_test_large(folio), folio); 264362306a36Sopenharmony_ci *foliop = NULL; 264462306a36Sopenharmony_ci } 264562306a36Sopenharmony_ci 264662306a36Sopenharmony_ci VM_BUG_ON(folio_test_locked(folio)); 264762306a36Sopenharmony_ci VM_BUG_ON(folio_test_swapbacked(folio)); 264862306a36Sopenharmony_ci __folio_set_locked(folio); 264962306a36Sopenharmony_ci __folio_set_swapbacked(folio); 265062306a36Sopenharmony_ci __folio_mark_uptodate(folio); 265162306a36Sopenharmony_ci 265262306a36Sopenharmony_ci ret = -EFAULT; 265362306a36Sopenharmony_ci max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 265462306a36Sopenharmony_ci if (unlikely(pgoff >= max_off)) 265562306a36Sopenharmony_ci goto out_release; 265662306a36Sopenharmony_ci 265762306a36Sopenharmony_ci ret = shmem_add_to_page_cache(folio, mapping, pgoff, NULL, 265862306a36Sopenharmony_ci gfp & GFP_RECLAIM_MASK, dst_vma->vm_mm); 265962306a36Sopenharmony_ci if (ret) 266062306a36Sopenharmony_ci goto out_release; 266162306a36Sopenharmony_ci 266262306a36Sopenharmony_ci ret = mfill_atomic_install_pte(dst_pmd, dst_vma, dst_addr, 266362306a36Sopenharmony_ci &folio->page, true, flags); 266462306a36Sopenharmony_ci if (ret) 266562306a36Sopenharmony_ci goto out_delete_from_cache; 266662306a36Sopenharmony_ci 266762306a36Sopenharmony_ci shmem_recalc_inode(inode, 1, 0); 266862306a36Sopenharmony_ci folio_unlock(folio); 266962306a36Sopenharmony_ci return 0; 267062306a36Sopenharmony_ciout_delete_from_cache: 267162306a36Sopenharmony_ci filemap_remove_folio(folio); 267262306a36Sopenharmony_ciout_release: 267362306a36Sopenharmony_ci folio_unlock(folio); 267462306a36Sopenharmony_ci folio_put(folio); 267562306a36Sopenharmony_ciout_unacct_blocks: 267662306a36Sopenharmony_ci shmem_inode_unacct_blocks(inode, 1); 267762306a36Sopenharmony_ci return ret; 267862306a36Sopenharmony_ci} 267962306a36Sopenharmony_ci#endif /* CONFIG_USERFAULTFD */ 268062306a36Sopenharmony_ci 268162306a36Sopenharmony_ci#ifdef CONFIG_TMPFS 268262306a36Sopenharmony_cistatic const struct inode_operations shmem_symlink_inode_operations; 268362306a36Sopenharmony_cistatic const struct inode_operations shmem_short_symlink_operations; 268462306a36Sopenharmony_ci 268562306a36Sopenharmony_cistatic int 268662306a36Sopenharmony_cishmem_write_begin(struct file *file, struct address_space *mapping, 268762306a36Sopenharmony_ci loff_t pos, unsigned len, 268862306a36Sopenharmony_ci struct page **pagep, void **fsdata) 268962306a36Sopenharmony_ci{ 269062306a36Sopenharmony_ci struct inode *inode = mapping->host; 269162306a36Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 269262306a36Sopenharmony_ci pgoff_t index = pos >> PAGE_SHIFT; 269362306a36Sopenharmony_ci struct folio *folio; 269462306a36Sopenharmony_ci int ret = 0; 269562306a36Sopenharmony_ci 269662306a36Sopenharmony_ci /* i_rwsem is held by caller */ 269762306a36Sopenharmony_ci if (unlikely(info->seals & (F_SEAL_GROW | 269862306a36Sopenharmony_ci F_SEAL_WRITE | F_SEAL_FUTURE_WRITE))) { 269962306a36Sopenharmony_ci if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) 270062306a36Sopenharmony_ci return -EPERM; 270162306a36Sopenharmony_ci if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size) 270262306a36Sopenharmony_ci return -EPERM; 270362306a36Sopenharmony_ci } 270462306a36Sopenharmony_ci 270562306a36Sopenharmony_ci ret = shmem_get_folio(inode, index, &folio, SGP_WRITE); 270662306a36Sopenharmony_ci 270762306a36Sopenharmony_ci if (ret) 270862306a36Sopenharmony_ci return ret; 270962306a36Sopenharmony_ci 271062306a36Sopenharmony_ci *pagep = folio_file_page(folio, index); 271162306a36Sopenharmony_ci if (PageHWPoison(*pagep)) { 271262306a36Sopenharmony_ci folio_unlock(folio); 271362306a36Sopenharmony_ci folio_put(folio); 271462306a36Sopenharmony_ci *pagep = NULL; 271562306a36Sopenharmony_ci return -EIO; 271662306a36Sopenharmony_ci } 271762306a36Sopenharmony_ci 271862306a36Sopenharmony_ci return 0; 271962306a36Sopenharmony_ci} 272062306a36Sopenharmony_ci 272162306a36Sopenharmony_cistatic int 272262306a36Sopenharmony_cishmem_write_end(struct file *file, struct address_space *mapping, 272362306a36Sopenharmony_ci loff_t pos, unsigned len, unsigned copied, 272462306a36Sopenharmony_ci struct page *page, void *fsdata) 272562306a36Sopenharmony_ci{ 272662306a36Sopenharmony_ci struct folio *folio = page_folio(page); 272762306a36Sopenharmony_ci struct inode *inode = mapping->host; 272862306a36Sopenharmony_ci 272962306a36Sopenharmony_ci if (pos + copied > inode->i_size) 273062306a36Sopenharmony_ci i_size_write(inode, pos + copied); 273162306a36Sopenharmony_ci 273262306a36Sopenharmony_ci if (!folio_test_uptodate(folio)) { 273362306a36Sopenharmony_ci if (copied < folio_size(folio)) { 273462306a36Sopenharmony_ci size_t from = offset_in_folio(folio, pos); 273562306a36Sopenharmony_ci folio_zero_segments(folio, 0, from, 273662306a36Sopenharmony_ci from + copied, folio_size(folio)); 273762306a36Sopenharmony_ci } 273862306a36Sopenharmony_ci folio_mark_uptodate(folio); 273962306a36Sopenharmony_ci } 274062306a36Sopenharmony_ci folio_mark_dirty(folio); 274162306a36Sopenharmony_ci folio_unlock(folio); 274262306a36Sopenharmony_ci folio_put(folio); 274362306a36Sopenharmony_ci 274462306a36Sopenharmony_ci return copied; 274562306a36Sopenharmony_ci} 274662306a36Sopenharmony_ci 274762306a36Sopenharmony_cistatic ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) 274862306a36Sopenharmony_ci{ 274962306a36Sopenharmony_ci struct file *file = iocb->ki_filp; 275062306a36Sopenharmony_ci struct inode *inode = file_inode(file); 275162306a36Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 275262306a36Sopenharmony_ci pgoff_t index; 275362306a36Sopenharmony_ci unsigned long offset; 275462306a36Sopenharmony_ci int error = 0; 275562306a36Sopenharmony_ci ssize_t retval = 0; 275662306a36Sopenharmony_ci loff_t *ppos = &iocb->ki_pos; 275762306a36Sopenharmony_ci 275862306a36Sopenharmony_ci index = *ppos >> PAGE_SHIFT; 275962306a36Sopenharmony_ci offset = *ppos & ~PAGE_MASK; 276062306a36Sopenharmony_ci 276162306a36Sopenharmony_ci for (;;) { 276262306a36Sopenharmony_ci struct folio *folio = NULL; 276362306a36Sopenharmony_ci struct page *page = NULL; 276462306a36Sopenharmony_ci pgoff_t end_index; 276562306a36Sopenharmony_ci unsigned long nr, ret; 276662306a36Sopenharmony_ci loff_t i_size = i_size_read(inode); 276762306a36Sopenharmony_ci 276862306a36Sopenharmony_ci end_index = i_size >> PAGE_SHIFT; 276962306a36Sopenharmony_ci if (index > end_index) 277062306a36Sopenharmony_ci break; 277162306a36Sopenharmony_ci if (index == end_index) { 277262306a36Sopenharmony_ci nr = i_size & ~PAGE_MASK; 277362306a36Sopenharmony_ci if (nr <= offset) 277462306a36Sopenharmony_ci break; 277562306a36Sopenharmony_ci } 277662306a36Sopenharmony_ci 277762306a36Sopenharmony_ci error = shmem_get_folio(inode, index, &folio, SGP_READ); 277862306a36Sopenharmony_ci if (error) { 277962306a36Sopenharmony_ci if (error == -EINVAL) 278062306a36Sopenharmony_ci error = 0; 278162306a36Sopenharmony_ci break; 278262306a36Sopenharmony_ci } 278362306a36Sopenharmony_ci if (folio) { 278462306a36Sopenharmony_ci folio_unlock(folio); 278562306a36Sopenharmony_ci 278662306a36Sopenharmony_ci page = folio_file_page(folio, index); 278762306a36Sopenharmony_ci if (PageHWPoison(page)) { 278862306a36Sopenharmony_ci folio_put(folio); 278962306a36Sopenharmony_ci error = -EIO; 279062306a36Sopenharmony_ci break; 279162306a36Sopenharmony_ci } 279262306a36Sopenharmony_ci } 279362306a36Sopenharmony_ci 279462306a36Sopenharmony_ci /* 279562306a36Sopenharmony_ci * We must evaluate after, since reads (unlike writes) 279662306a36Sopenharmony_ci * are called without i_rwsem protection against truncate 279762306a36Sopenharmony_ci */ 279862306a36Sopenharmony_ci nr = PAGE_SIZE; 279962306a36Sopenharmony_ci i_size = i_size_read(inode); 280062306a36Sopenharmony_ci end_index = i_size >> PAGE_SHIFT; 280162306a36Sopenharmony_ci if (index == end_index) { 280262306a36Sopenharmony_ci nr = i_size & ~PAGE_MASK; 280362306a36Sopenharmony_ci if (nr <= offset) { 280462306a36Sopenharmony_ci if (folio) 280562306a36Sopenharmony_ci folio_put(folio); 280662306a36Sopenharmony_ci break; 280762306a36Sopenharmony_ci } 280862306a36Sopenharmony_ci } 280962306a36Sopenharmony_ci nr -= offset; 281062306a36Sopenharmony_ci 281162306a36Sopenharmony_ci if (folio) { 281262306a36Sopenharmony_ci /* 281362306a36Sopenharmony_ci * If users can be writing to this page using arbitrary 281462306a36Sopenharmony_ci * virtual addresses, take care about potential aliasing 281562306a36Sopenharmony_ci * before reading the page on the kernel side. 281662306a36Sopenharmony_ci */ 281762306a36Sopenharmony_ci if (mapping_writably_mapped(mapping)) 281862306a36Sopenharmony_ci flush_dcache_page(page); 281962306a36Sopenharmony_ci /* 282062306a36Sopenharmony_ci * Mark the page accessed if we read the beginning. 282162306a36Sopenharmony_ci */ 282262306a36Sopenharmony_ci if (!offset) 282362306a36Sopenharmony_ci folio_mark_accessed(folio); 282462306a36Sopenharmony_ci /* 282562306a36Sopenharmony_ci * Ok, we have the page, and it's up-to-date, so 282662306a36Sopenharmony_ci * now we can copy it to user space... 282762306a36Sopenharmony_ci */ 282862306a36Sopenharmony_ci ret = copy_page_to_iter(page, offset, nr, to); 282962306a36Sopenharmony_ci folio_put(folio); 283062306a36Sopenharmony_ci 283162306a36Sopenharmony_ci } else if (user_backed_iter(to)) { 283262306a36Sopenharmony_ci /* 283362306a36Sopenharmony_ci * Copy to user tends to be so well optimized, but 283462306a36Sopenharmony_ci * clear_user() not so much, that it is noticeably 283562306a36Sopenharmony_ci * faster to copy the zero page instead of clearing. 283662306a36Sopenharmony_ci */ 283762306a36Sopenharmony_ci ret = copy_page_to_iter(ZERO_PAGE(0), offset, nr, to); 283862306a36Sopenharmony_ci } else { 283962306a36Sopenharmony_ci /* 284062306a36Sopenharmony_ci * But submitting the same page twice in a row to 284162306a36Sopenharmony_ci * splice() - or others? - can result in confusion: 284262306a36Sopenharmony_ci * so don't attempt that optimization on pipes etc. 284362306a36Sopenharmony_ci */ 284462306a36Sopenharmony_ci ret = iov_iter_zero(nr, to); 284562306a36Sopenharmony_ci } 284662306a36Sopenharmony_ci 284762306a36Sopenharmony_ci retval += ret; 284862306a36Sopenharmony_ci offset += ret; 284962306a36Sopenharmony_ci index += offset >> PAGE_SHIFT; 285062306a36Sopenharmony_ci offset &= ~PAGE_MASK; 285162306a36Sopenharmony_ci 285262306a36Sopenharmony_ci if (!iov_iter_count(to)) 285362306a36Sopenharmony_ci break; 285462306a36Sopenharmony_ci if (ret < nr) { 285562306a36Sopenharmony_ci error = -EFAULT; 285662306a36Sopenharmony_ci break; 285762306a36Sopenharmony_ci } 285862306a36Sopenharmony_ci cond_resched(); 285962306a36Sopenharmony_ci } 286062306a36Sopenharmony_ci 286162306a36Sopenharmony_ci *ppos = ((loff_t) index << PAGE_SHIFT) + offset; 286262306a36Sopenharmony_ci file_accessed(file); 286362306a36Sopenharmony_ci return retval ? retval : error; 286462306a36Sopenharmony_ci} 286562306a36Sopenharmony_ci 286662306a36Sopenharmony_cistatic ssize_t shmem_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 286762306a36Sopenharmony_ci{ 286862306a36Sopenharmony_ci struct file *file = iocb->ki_filp; 286962306a36Sopenharmony_ci struct inode *inode = file->f_mapping->host; 287062306a36Sopenharmony_ci ssize_t ret; 287162306a36Sopenharmony_ci 287262306a36Sopenharmony_ci inode_lock(inode); 287362306a36Sopenharmony_ci ret = generic_write_checks(iocb, from); 287462306a36Sopenharmony_ci if (ret <= 0) 287562306a36Sopenharmony_ci goto unlock; 287662306a36Sopenharmony_ci ret = file_remove_privs(file); 287762306a36Sopenharmony_ci if (ret) 287862306a36Sopenharmony_ci goto unlock; 287962306a36Sopenharmony_ci ret = file_update_time(file); 288062306a36Sopenharmony_ci if (ret) 288162306a36Sopenharmony_ci goto unlock; 288262306a36Sopenharmony_ci ret = generic_perform_write(iocb, from); 288362306a36Sopenharmony_ciunlock: 288462306a36Sopenharmony_ci inode_unlock(inode); 288562306a36Sopenharmony_ci return ret; 288662306a36Sopenharmony_ci} 288762306a36Sopenharmony_ci 288862306a36Sopenharmony_cistatic bool zero_pipe_buf_get(struct pipe_inode_info *pipe, 288962306a36Sopenharmony_ci struct pipe_buffer *buf) 289062306a36Sopenharmony_ci{ 289162306a36Sopenharmony_ci return true; 289262306a36Sopenharmony_ci} 289362306a36Sopenharmony_ci 289462306a36Sopenharmony_cistatic void zero_pipe_buf_release(struct pipe_inode_info *pipe, 289562306a36Sopenharmony_ci struct pipe_buffer *buf) 289662306a36Sopenharmony_ci{ 289762306a36Sopenharmony_ci} 289862306a36Sopenharmony_ci 289962306a36Sopenharmony_cistatic bool zero_pipe_buf_try_steal(struct pipe_inode_info *pipe, 290062306a36Sopenharmony_ci struct pipe_buffer *buf) 290162306a36Sopenharmony_ci{ 290262306a36Sopenharmony_ci return false; 290362306a36Sopenharmony_ci} 290462306a36Sopenharmony_ci 290562306a36Sopenharmony_cistatic const struct pipe_buf_operations zero_pipe_buf_ops = { 290662306a36Sopenharmony_ci .release = zero_pipe_buf_release, 290762306a36Sopenharmony_ci .try_steal = zero_pipe_buf_try_steal, 290862306a36Sopenharmony_ci .get = zero_pipe_buf_get, 290962306a36Sopenharmony_ci}; 291062306a36Sopenharmony_ci 291162306a36Sopenharmony_cistatic size_t splice_zeropage_into_pipe(struct pipe_inode_info *pipe, 291262306a36Sopenharmony_ci loff_t fpos, size_t size) 291362306a36Sopenharmony_ci{ 291462306a36Sopenharmony_ci size_t offset = fpos & ~PAGE_MASK; 291562306a36Sopenharmony_ci 291662306a36Sopenharmony_ci size = min_t(size_t, size, PAGE_SIZE - offset); 291762306a36Sopenharmony_ci 291862306a36Sopenharmony_ci if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { 291962306a36Sopenharmony_ci struct pipe_buffer *buf = pipe_head_buf(pipe); 292062306a36Sopenharmony_ci 292162306a36Sopenharmony_ci *buf = (struct pipe_buffer) { 292262306a36Sopenharmony_ci .ops = &zero_pipe_buf_ops, 292362306a36Sopenharmony_ci .page = ZERO_PAGE(0), 292462306a36Sopenharmony_ci .offset = offset, 292562306a36Sopenharmony_ci .len = size, 292662306a36Sopenharmony_ci }; 292762306a36Sopenharmony_ci pipe->head++; 292862306a36Sopenharmony_ci } 292962306a36Sopenharmony_ci 293062306a36Sopenharmony_ci return size; 293162306a36Sopenharmony_ci} 293262306a36Sopenharmony_ci 293362306a36Sopenharmony_cistatic ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, 293462306a36Sopenharmony_ci struct pipe_inode_info *pipe, 293562306a36Sopenharmony_ci size_t len, unsigned int flags) 293662306a36Sopenharmony_ci{ 293762306a36Sopenharmony_ci struct inode *inode = file_inode(in); 293862306a36Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 293962306a36Sopenharmony_ci struct folio *folio = NULL; 294062306a36Sopenharmony_ci size_t total_spliced = 0, used, npages, n, part; 294162306a36Sopenharmony_ci loff_t isize; 294262306a36Sopenharmony_ci int error = 0; 294362306a36Sopenharmony_ci 294462306a36Sopenharmony_ci /* Work out how much data we can actually add into the pipe */ 294562306a36Sopenharmony_ci used = pipe_occupancy(pipe->head, pipe->tail); 294662306a36Sopenharmony_ci npages = max_t(ssize_t, pipe->max_usage - used, 0); 294762306a36Sopenharmony_ci len = min_t(size_t, len, npages * PAGE_SIZE); 294862306a36Sopenharmony_ci 294962306a36Sopenharmony_ci do { 295062306a36Sopenharmony_ci if (*ppos >= i_size_read(inode)) 295162306a36Sopenharmony_ci break; 295262306a36Sopenharmony_ci 295362306a36Sopenharmony_ci error = shmem_get_folio(inode, *ppos / PAGE_SIZE, &folio, 295462306a36Sopenharmony_ci SGP_READ); 295562306a36Sopenharmony_ci if (error) { 295662306a36Sopenharmony_ci if (error == -EINVAL) 295762306a36Sopenharmony_ci error = 0; 295862306a36Sopenharmony_ci break; 295962306a36Sopenharmony_ci } 296062306a36Sopenharmony_ci if (folio) { 296162306a36Sopenharmony_ci folio_unlock(folio); 296262306a36Sopenharmony_ci 296362306a36Sopenharmony_ci if (folio_test_hwpoison(folio) || 296462306a36Sopenharmony_ci (folio_test_large(folio) && 296562306a36Sopenharmony_ci folio_test_has_hwpoisoned(folio))) { 296662306a36Sopenharmony_ci error = -EIO; 296762306a36Sopenharmony_ci break; 296862306a36Sopenharmony_ci } 296962306a36Sopenharmony_ci } 297062306a36Sopenharmony_ci 297162306a36Sopenharmony_ci /* 297262306a36Sopenharmony_ci * i_size must be checked after we know the pages are Uptodate. 297362306a36Sopenharmony_ci * 297462306a36Sopenharmony_ci * Checking i_size after the check allows us to calculate 297562306a36Sopenharmony_ci * the correct value for "nr", which means the zero-filled 297662306a36Sopenharmony_ci * part of the page is not copied back to userspace (unless 297762306a36Sopenharmony_ci * another truncate extends the file - this is desired though). 297862306a36Sopenharmony_ci */ 297962306a36Sopenharmony_ci isize = i_size_read(inode); 298062306a36Sopenharmony_ci if (unlikely(*ppos >= isize)) 298162306a36Sopenharmony_ci break; 298262306a36Sopenharmony_ci part = min_t(loff_t, isize - *ppos, len); 298362306a36Sopenharmony_ci 298462306a36Sopenharmony_ci if (folio) { 298562306a36Sopenharmony_ci /* 298662306a36Sopenharmony_ci * If users can be writing to this page using arbitrary 298762306a36Sopenharmony_ci * virtual addresses, take care about potential aliasing 298862306a36Sopenharmony_ci * before reading the page on the kernel side. 298962306a36Sopenharmony_ci */ 299062306a36Sopenharmony_ci if (mapping_writably_mapped(mapping)) 299162306a36Sopenharmony_ci flush_dcache_folio(folio); 299262306a36Sopenharmony_ci folio_mark_accessed(folio); 299362306a36Sopenharmony_ci /* 299462306a36Sopenharmony_ci * Ok, we have the page, and it's up-to-date, so we can 299562306a36Sopenharmony_ci * now splice it into the pipe. 299662306a36Sopenharmony_ci */ 299762306a36Sopenharmony_ci n = splice_folio_into_pipe(pipe, folio, *ppos, part); 299862306a36Sopenharmony_ci folio_put(folio); 299962306a36Sopenharmony_ci folio = NULL; 300062306a36Sopenharmony_ci } else { 300162306a36Sopenharmony_ci n = splice_zeropage_into_pipe(pipe, *ppos, part); 300262306a36Sopenharmony_ci } 300362306a36Sopenharmony_ci 300462306a36Sopenharmony_ci if (!n) 300562306a36Sopenharmony_ci break; 300662306a36Sopenharmony_ci len -= n; 300762306a36Sopenharmony_ci total_spliced += n; 300862306a36Sopenharmony_ci *ppos += n; 300962306a36Sopenharmony_ci in->f_ra.prev_pos = *ppos; 301062306a36Sopenharmony_ci if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) 301162306a36Sopenharmony_ci break; 301262306a36Sopenharmony_ci 301362306a36Sopenharmony_ci cond_resched(); 301462306a36Sopenharmony_ci } while (len); 301562306a36Sopenharmony_ci 301662306a36Sopenharmony_ci if (folio) 301762306a36Sopenharmony_ci folio_put(folio); 301862306a36Sopenharmony_ci 301962306a36Sopenharmony_ci file_accessed(in); 302062306a36Sopenharmony_ci return total_spliced ? total_spliced : error; 302162306a36Sopenharmony_ci} 302262306a36Sopenharmony_ci 302362306a36Sopenharmony_cistatic loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence) 302462306a36Sopenharmony_ci{ 302562306a36Sopenharmony_ci struct address_space *mapping = file->f_mapping; 302662306a36Sopenharmony_ci struct inode *inode = mapping->host; 302762306a36Sopenharmony_ci 302862306a36Sopenharmony_ci if (whence != SEEK_DATA && whence != SEEK_HOLE) 302962306a36Sopenharmony_ci return generic_file_llseek_size(file, offset, whence, 303062306a36Sopenharmony_ci MAX_LFS_FILESIZE, i_size_read(inode)); 303162306a36Sopenharmony_ci if (offset < 0) 303262306a36Sopenharmony_ci return -ENXIO; 303362306a36Sopenharmony_ci 303462306a36Sopenharmony_ci inode_lock(inode); 303562306a36Sopenharmony_ci /* We're holding i_rwsem so we can access i_size directly */ 303662306a36Sopenharmony_ci offset = mapping_seek_hole_data(mapping, offset, inode->i_size, whence); 303762306a36Sopenharmony_ci if (offset >= 0) 303862306a36Sopenharmony_ci offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE); 303962306a36Sopenharmony_ci inode_unlock(inode); 304062306a36Sopenharmony_ci return offset; 304162306a36Sopenharmony_ci} 304262306a36Sopenharmony_ci 304362306a36Sopenharmony_cistatic long shmem_fallocate(struct file *file, int mode, loff_t offset, 304462306a36Sopenharmony_ci loff_t len) 304562306a36Sopenharmony_ci{ 304662306a36Sopenharmony_ci struct inode *inode = file_inode(file); 304762306a36Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 304862306a36Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 304962306a36Sopenharmony_ci struct shmem_falloc shmem_falloc; 305062306a36Sopenharmony_ci pgoff_t start, index, end, undo_fallocend; 305162306a36Sopenharmony_ci int error; 305262306a36Sopenharmony_ci 305362306a36Sopenharmony_ci if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 305462306a36Sopenharmony_ci return -EOPNOTSUPP; 305562306a36Sopenharmony_ci 305662306a36Sopenharmony_ci inode_lock(inode); 305762306a36Sopenharmony_ci 305862306a36Sopenharmony_ci if (mode & FALLOC_FL_PUNCH_HOLE) { 305962306a36Sopenharmony_ci struct address_space *mapping = file->f_mapping; 306062306a36Sopenharmony_ci loff_t unmap_start = round_up(offset, PAGE_SIZE); 306162306a36Sopenharmony_ci loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1; 306262306a36Sopenharmony_ci DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq); 306362306a36Sopenharmony_ci 306462306a36Sopenharmony_ci /* protected by i_rwsem */ 306562306a36Sopenharmony_ci if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) { 306662306a36Sopenharmony_ci error = -EPERM; 306762306a36Sopenharmony_ci goto out; 306862306a36Sopenharmony_ci } 306962306a36Sopenharmony_ci 307062306a36Sopenharmony_ci shmem_falloc.waitq = &shmem_falloc_waitq; 307162306a36Sopenharmony_ci shmem_falloc.start = (u64)unmap_start >> PAGE_SHIFT; 307262306a36Sopenharmony_ci shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT; 307362306a36Sopenharmony_ci spin_lock(&inode->i_lock); 307462306a36Sopenharmony_ci inode->i_private = &shmem_falloc; 307562306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 307662306a36Sopenharmony_ci 307762306a36Sopenharmony_ci if ((u64)unmap_end > (u64)unmap_start) 307862306a36Sopenharmony_ci unmap_mapping_range(mapping, unmap_start, 307962306a36Sopenharmony_ci 1 + unmap_end - unmap_start, 0); 308062306a36Sopenharmony_ci shmem_truncate_range(inode, offset, offset + len - 1); 308162306a36Sopenharmony_ci /* No need to unmap again: hole-punching leaves COWed pages */ 308262306a36Sopenharmony_ci 308362306a36Sopenharmony_ci spin_lock(&inode->i_lock); 308462306a36Sopenharmony_ci inode->i_private = NULL; 308562306a36Sopenharmony_ci wake_up_all(&shmem_falloc_waitq); 308662306a36Sopenharmony_ci WARN_ON_ONCE(!list_empty(&shmem_falloc_waitq.head)); 308762306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 308862306a36Sopenharmony_ci error = 0; 308962306a36Sopenharmony_ci goto out; 309062306a36Sopenharmony_ci } 309162306a36Sopenharmony_ci 309262306a36Sopenharmony_ci /* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */ 309362306a36Sopenharmony_ci error = inode_newsize_ok(inode, offset + len); 309462306a36Sopenharmony_ci if (error) 309562306a36Sopenharmony_ci goto out; 309662306a36Sopenharmony_ci 309762306a36Sopenharmony_ci if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) { 309862306a36Sopenharmony_ci error = -EPERM; 309962306a36Sopenharmony_ci goto out; 310062306a36Sopenharmony_ci } 310162306a36Sopenharmony_ci 310262306a36Sopenharmony_ci start = offset >> PAGE_SHIFT; 310362306a36Sopenharmony_ci end = (offset + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 310462306a36Sopenharmony_ci /* Try to avoid a swapstorm if len is impossible to satisfy */ 310562306a36Sopenharmony_ci if (sbinfo->max_blocks && end - start > sbinfo->max_blocks) { 310662306a36Sopenharmony_ci error = -ENOSPC; 310762306a36Sopenharmony_ci goto out; 310862306a36Sopenharmony_ci } 310962306a36Sopenharmony_ci 311062306a36Sopenharmony_ci shmem_falloc.waitq = NULL; 311162306a36Sopenharmony_ci shmem_falloc.start = start; 311262306a36Sopenharmony_ci shmem_falloc.next = start; 311362306a36Sopenharmony_ci shmem_falloc.nr_falloced = 0; 311462306a36Sopenharmony_ci shmem_falloc.nr_unswapped = 0; 311562306a36Sopenharmony_ci spin_lock(&inode->i_lock); 311662306a36Sopenharmony_ci inode->i_private = &shmem_falloc; 311762306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 311862306a36Sopenharmony_ci 311962306a36Sopenharmony_ci /* 312062306a36Sopenharmony_ci * info->fallocend is only relevant when huge pages might be 312162306a36Sopenharmony_ci * involved: to prevent split_huge_page() freeing fallocated 312262306a36Sopenharmony_ci * pages when FALLOC_FL_KEEP_SIZE committed beyond i_size. 312362306a36Sopenharmony_ci */ 312462306a36Sopenharmony_ci undo_fallocend = info->fallocend; 312562306a36Sopenharmony_ci if (info->fallocend < end) 312662306a36Sopenharmony_ci info->fallocend = end; 312762306a36Sopenharmony_ci 312862306a36Sopenharmony_ci for (index = start; index < end; ) { 312962306a36Sopenharmony_ci struct folio *folio; 313062306a36Sopenharmony_ci 313162306a36Sopenharmony_ci /* 313262306a36Sopenharmony_ci * Good, the fallocate(2) manpage permits EINTR: we may have 313362306a36Sopenharmony_ci * been interrupted because we are using up too much memory. 313462306a36Sopenharmony_ci */ 313562306a36Sopenharmony_ci if (signal_pending(current)) 313662306a36Sopenharmony_ci error = -EINTR; 313762306a36Sopenharmony_ci else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced) 313862306a36Sopenharmony_ci error = -ENOMEM; 313962306a36Sopenharmony_ci else 314062306a36Sopenharmony_ci error = shmem_get_folio(inode, index, &folio, 314162306a36Sopenharmony_ci SGP_FALLOC); 314262306a36Sopenharmony_ci if (error) { 314362306a36Sopenharmony_ci info->fallocend = undo_fallocend; 314462306a36Sopenharmony_ci /* Remove the !uptodate folios we added */ 314562306a36Sopenharmony_ci if (index > start) { 314662306a36Sopenharmony_ci shmem_undo_range(inode, 314762306a36Sopenharmony_ci (loff_t)start << PAGE_SHIFT, 314862306a36Sopenharmony_ci ((loff_t)index << PAGE_SHIFT) - 1, true); 314962306a36Sopenharmony_ci } 315062306a36Sopenharmony_ci goto undone; 315162306a36Sopenharmony_ci } 315262306a36Sopenharmony_ci 315362306a36Sopenharmony_ci /* 315462306a36Sopenharmony_ci * Here is a more important optimization than it appears: 315562306a36Sopenharmony_ci * a second SGP_FALLOC on the same large folio will clear it, 315662306a36Sopenharmony_ci * making it uptodate and un-undoable if we fail later. 315762306a36Sopenharmony_ci */ 315862306a36Sopenharmony_ci index = folio_next_index(folio); 315962306a36Sopenharmony_ci /* Beware 32-bit wraparound */ 316062306a36Sopenharmony_ci if (!index) 316162306a36Sopenharmony_ci index--; 316262306a36Sopenharmony_ci 316362306a36Sopenharmony_ci /* 316462306a36Sopenharmony_ci * Inform shmem_writepage() how far we have reached. 316562306a36Sopenharmony_ci * No need for lock or barrier: we have the page lock. 316662306a36Sopenharmony_ci */ 316762306a36Sopenharmony_ci if (!folio_test_uptodate(folio)) 316862306a36Sopenharmony_ci shmem_falloc.nr_falloced += index - shmem_falloc.next; 316962306a36Sopenharmony_ci shmem_falloc.next = index; 317062306a36Sopenharmony_ci 317162306a36Sopenharmony_ci /* 317262306a36Sopenharmony_ci * If !uptodate, leave it that way so that freeable folios 317362306a36Sopenharmony_ci * can be recognized if we need to rollback on error later. 317462306a36Sopenharmony_ci * But mark it dirty so that memory pressure will swap rather 317562306a36Sopenharmony_ci * than free the folios we are allocating (and SGP_CACHE folios 317662306a36Sopenharmony_ci * might still be clean: we now need to mark those dirty too). 317762306a36Sopenharmony_ci */ 317862306a36Sopenharmony_ci folio_mark_dirty(folio); 317962306a36Sopenharmony_ci folio_unlock(folio); 318062306a36Sopenharmony_ci folio_put(folio); 318162306a36Sopenharmony_ci cond_resched(); 318262306a36Sopenharmony_ci } 318362306a36Sopenharmony_ci 318462306a36Sopenharmony_ci if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) 318562306a36Sopenharmony_ci i_size_write(inode, offset + len); 318662306a36Sopenharmony_ciundone: 318762306a36Sopenharmony_ci spin_lock(&inode->i_lock); 318862306a36Sopenharmony_ci inode->i_private = NULL; 318962306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 319062306a36Sopenharmony_ciout: 319162306a36Sopenharmony_ci if (!error) 319262306a36Sopenharmony_ci file_modified(file); 319362306a36Sopenharmony_ci inode_unlock(inode); 319462306a36Sopenharmony_ci return error; 319562306a36Sopenharmony_ci} 319662306a36Sopenharmony_ci 319762306a36Sopenharmony_cistatic int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) 319862306a36Sopenharmony_ci{ 319962306a36Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb); 320062306a36Sopenharmony_ci 320162306a36Sopenharmony_ci buf->f_type = TMPFS_MAGIC; 320262306a36Sopenharmony_ci buf->f_bsize = PAGE_SIZE; 320362306a36Sopenharmony_ci buf->f_namelen = NAME_MAX; 320462306a36Sopenharmony_ci if (sbinfo->max_blocks) { 320562306a36Sopenharmony_ci buf->f_blocks = sbinfo->max_blocks; 320662306a36Sopenharmony_ci buf->f_bavail = 320762306a36Sopenharmony_ci buf->f_bfree = sbinfo->max_blocks - 320862306a36Sopenharmony_ci percpu_counter_sum(&sbinfo->used_blocks); 320962306a36Sopenharmony_ci } 321062306a36Sopenharmony_ci if (sbinfo->max_inodes) { 321162306a36Sopenharmony_ci buf->f_files = sbinfo->max_inodes; 321262306a36Sopenharmony_ci buf->f_ffree = sbinfo->free_ispace / BOGO_INODE_SIZE; 321362306a36Sopenharmony_ci } 321462306a36Sopenharmony_ci /* else leave those fields 0 like simple_statfs */ 321562306a36Sopenharmony_ci 321662306a36Sopenharmony_ci buf->f_fsid = uuid_to_fsid(dentry->d_sb->s_uuid.b); 321762306a36Sopenharmony_ci 321862306a36Sopenharmony_ci return 0; 321962306a36Sopenharmony_ci} 322062306a36Sopenharmony_ci 322162306a36Sopenharmony_ci/* 322262306a36Sopenharmony_ci * File creation. Allocate an inode, and we're done.. 322362306a36Sopenharmony_ci */ 322462306a36Sopenharmony_cistatic int 322562306a36Sopenharmony_cishmem_mknod(struct mnt_idmap *idmap, struct inode *dir, 322662306a36Sopenharmony_ci struct dentry *dentry, umode_t mode, dev_t dev) 322762306a36Sopenharmony_ci{ 322862306a36Sopenharmony_ci struct inode *inode; 322962306a36Sopenharmony_ci int error; 323062306a36Sopenharmony_ci 323162306a36Sopenharmony_ci inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, dev, VM_NORESERVE); 323262306a36Sopenharmony_ci if (IS_ERR(inode)) 323362306a36Sopenharmony_ci return PTR_ERR(inode); 323462306a36Sopenharmony_ci 323562306a36Sopenharmony_ci error = simple_acl_create(dir, inode); 323662306a36Sopenharmony_ci if (error) 323762306a36Sopenharmony_ci goto out_iput; 323862306a36Sopenharmony_ci error = security_inode_init_security(inode, dir, 323962306a36Sopenharmony_ci &dentry->d_name, 324062306a36Sopenharmony_ci shmem_initxattrs, NULL); 324162306a36Sopenharmony_ci if (error && error != -EOPNOTSUPP) 324262306a36Sopenharmony_ci goto out_iput; 324362306a36Sopenharmony_ci 324462306a36Sopenharmony_ci error = simple_offset_add(shmem_get_offset_ctx(dir), dentry); 324562306a36Sopenharmony_ci if (error) 324662306a36Sopenharmony_ci goto out_iput; 324762306a36Sopenharmony_ci 324862306a36Sopenharmony_ci dir->i_size += BOGO_DIRENT_SIZE; 324962306a36Sopenharmony_ci dir->i_mtime = inode_set_ctime_current(dir); 325062306a36Sopenharmony_ci inode_inc_iversion(dir); 325162306a36Sopenharmony_ci d_instantiate(dentry, inode); 325262306a36Sopenharmony_ci dget(dentry); /* Extra count - pin the dentry in core */ 325362306a36Sopenharmony_ci return error; 325462306a36Sopenharmony_ci 325562306a36Sopenharmony_ciout_iput: 325662306a36Sopenharmony_ci iput(inode); 325762306a36Sopenharmony_ci return error; 325862306a36Sopenharmony_ci} 325962306a36Sopenharmony_ci 326062306a36Sopenharmony_cistatic int 326162306a36Sopenharmony_cishmem_tmpfile(struct mnt_idmap *idmap, struct inode *dir, 326262306a36Sopenharmony_ci struct file *file, umode_t mode) 326362306a36Sopenharmony_ci{ 326462306a36Sopenharmony_ci struct inode *inode; 326562306a36Sopenharmony_ci int error; 326662306a36Sopenharmony_ci 326762306a36Sopenharmony_ci inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, 0, VM_NORESERVE); 326862306a36Sopenharmony_ci 326962306a36Sopenharmony_ci if (IS_ERR(inode)) { 327062306a36Sopenharmony_ci error = PTR_ERR(inode); 327162306a36Sopenharmony_ci goto err_out; 327262306a36Sopenharmony_ci } 327362306a36Sopenharmony_ci 327462306a36Sopenharmony_ci error = security_inode_init_security(inode, dir, 327562306a36Sopenharmony_ci NULL, 327662306a36Sopenharmony_ci shmem_initxattrs, NULL); 327762306a36Sopenharmony_ci if (error && error != -EOPNOTSUPP) 327862306a36Sopenharmony_ci goto out_iput; 327962306a36Sopenharmony_ci error = simple_acl_create(dir, inode); 328062306a36Sopenharmony_ci if (error) 328162306a36Sopenharmony_ci goto out_iput; 328262306a36Sopenharmony_ci d_tmpfile(file, inode); 328362306a36Sopenharmony_ci 328462306a36Sopenharmony_cierr_out: 328562306a36Sopenharmony_ci return finish_open_simple(file, error); 328662306a36Sopenharmony_ciout_iput: 328762306a36Sopenharmony_ci iput(inode); 328862306a36Sopenharmony_ci return error; 328962306a36Sopenharmony_ci} 329062306a36Sopenharmony_ci 329162306a36Sopenharmony_cistatic int shmem_mkdir(struct mnt_idmap *idmap, struct inode *dir, 329262306a36Sopenharmony_ci struct dentry *dentry, umode_t mode) 329362306a36Sopenharmony_ci{ 329462306a36Sopenharmony_ci int error; 329562306a36Sopenharmony_ci 329662306a36Sopenharmony_ci error = shmem_mknod(idmap, dir, dentry, mode | S_IFDIR, 0); 329762306a36Sopenharmony_ci if (error) 329862306a36Sopenharmony_ci return error; 329962306a36Sopenharmony_ci inc_nlink(dir); 330062306a36Sopenharmony_ci return 0; 330162306a36Sopenharmony_ci} 330262306a36Sopenharmony_ci 330362306a36Sopenharmony_cistatic int shmem_create(struct mnt_idmap *idmap, struct inode *dir, 330462306a36Sopenharmony_ci struct dentry *dentry, umode_t mode, bool excl) 330562306a36Sopenharmony_ci{ 330662306a36Sopenharmony_ci return shmem_mknod(idmap, dir, dentry, mode | S_IFREG, 0); 330762306a36Sopenharmony_ci} 330862306a36Sopenharmony_ci 330962306a36Sopenharmony_ci/* 331062306a36Sopenharmony_ci * Link a file.. 331162306a36Sopenharmony_ci */ 331262306a36Sopenharmony_cistatic int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) 331362306a36Sopenharmony_ci{ 331462306a36Sopenharmony_ci struct inode *inode = d_inode(old_dentry); 331562306a36Sopenharmony_ci int ret = 0; 331662306a36Sopenharmony_ci 331762306a36Sopenharmony_ci /* 331862306a36Sopenharmony_ci * No ordinary (disk based) filesystem counts links as inodes; 331962306a36Sopenharmony_ci * but each new link needs a new dentry, pinning lowmem, and 332062306a36Sopenharmony_ci * tmpfs dentries cannot be pruned until they are unlinked. 332162306a36Sopenharmony_ci * But if an O_TMPFILE file is linked into the tmpfs, the 332262306a36Sopenharmony_ci * first link must skip that, to get the accounting right. 332362306a36Sopenharmony_ci */ 332462306a36Sopenharmony_ci if (inode->i_nlink) { 332562306a36Sopenharmony_ci ret = shmem_reserve_inode(inode->i_sb, NULL); 332662306a36Sopenharmony_ci if (ret) 332762306a36Sopenharmony_ci goto out; 332862306a36Sopenharmony_ci } 332962306a36Sopenharmony_ci 333062306a36Sopenharmony_ci ret = simple_offset_add(shmem_get_offset_ctx(dir), dentry); 333162306a36Sopenharmony_ci if (ret) { 333262306a36Sopenharmony_ci if (inode->i_nlink) 333362306a36Sopenharmony_ci shmem_free_inode(inode->i_sb, 0); 333462306a36Sopenharmony_ci goto out; 333562306a36Sopenharmony_ci } 333662306a36Sopenharmony_ci 333762306a36Sopenharmony_ci dir->i_size += BOGO_DIRENT_SIZE; 333862306a36Sopenharmony_ci dir->i_mtime = inode_set_ctime_to_ts(dir, 333962306a36Sopenharmony_ci inode_set_ctime_current(inode)); 334062306a36Sopenharmony_ci inode_inc_iversion(dir); 334162306a36Sopenharmony_ci inc_nlink(inode); 334262306a36Sopenharmony_ci ihold(inode); /* New dentry reference */ 334362306a36Sopenharmony_ci dget(dentry); /* Extra pinning count for the created dentry */ 334462306a36Sopenharmony_ci d_instantiate(dentry, inode); 334562306a36Sopenharmony_ciout: 334662306a36Sopenharmony_ci return ret; 334762306a36Sopenharmony_ci} 334862306a36Sopenharmony_ci 334962306a36Sopenharmony_cistatic int shmem_unlink(struct inode *dir, struct dentry *dentry) 335062306a36Sopenharmony_ci{ 335162306a36Sopenharmony_ci struct inode *inode = d_inode(dentry); 335262306a36Sopenharmony_ci 335362306a36Sopenharmony_ci if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode)) 335462306a36Sopenharmony_ci shmem_free_inode(inode->i_sb, 0); 335562306a36Sopenharmony_ci 335662306a36Sopenharmony_ci simple_offset_remove(shmem_get_offset_ctx(dir), dentry); 335762306a36Sopenharmony_ci 335862306a36Sopenharmony_ci dir->i_size -= BOGO_DIRENT_SIZE; 335962306a36Sopenharmony_ci dir->i_mtime = inode_set_ctime_to_ts(dir, 336062306a36Sopenharmony_ci inode_set_ctime_current(inode)); 336162306a36Sopenharmony_ci inode_inc_iversion(dir); 336262306a36Sopenharmony_ci drop_nlink(inode); 336362306a36Sopenharmony_ci dput(dentry); /* Undo the count from "create" - this does all the work */ 336462306a36Sopenharmony_ci return 0; 336562306a36Sopenharmony_ci} 336662306a36Sopenharmony_ci 336762306a36Sopenharmony_cistatic int shmem_rmdir(struct inode *dir, struct dentry *dentry) 336862306a36Sopenharmony_ci{ 336962306a36Sopenharmony_ci if (!simple_empty(dentry)) 337062306a36Sopenharmony_ci return -ENOTEMPTY; 337162306a36Sopenharmony_ci 337262306a36Sopenharmony_ci drop_nlink(d_inode(dentry)); 337362306a36Sopenharmony_ci drop_nlink(dir); 337462306a36Sopenharmony_ci return shmem_unlink(dir, dentry); 337562306a36Sopenharmony_ci} 337662306a36Sopenharmony_ci 337762306a36Sopenharmony_cistatic int shmem_whiteout(struct mnt_idmap *idmap, 337862306a36Sopenharmony_ci struct inode *old_dir, struct dentry *old_dentry) 337962306a36Sopenharmony_ci{ 338062306a36Sopenharmony_ci struct dentry *whiteout; 338162306a36Sopenharmony_ci int error; 338262306a36Sopenharmony_ci 338362306a36Sopenharmony_ci whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name); 338462306a36Sopenharmony_ci if (!whiteout) 338562306a36Sopenharmony_ci return -ENOMEM; 338662306a36Sopenharmony_ci 338762306a36Sopenharmony_ci error = shmem_mknod(idmap, old_dir, whiteout, 338862306a36Sopenharmony_ci S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); 338962306a36Sopenharmony_ci dput(whiteout); 339062306a36Sopenharmony_ci if (error) 339162306a36Sopenharmony_ci return error; 339262306a36Sopenharmony_ci 339362306a36Sopenharmony_ci /* 339462306a36Sopenharmony_ci * Cheat and hash the whiteout while the old dentry is still in 339562306a36Sopenharmony_ci * place, instead of playing games with FS_RENAME_DOES_D_MOVE. 339662306a36Sopenharmony_ci * 339762306a36Sopenharmony_ci * d_lookup() will consistently find one of them at this point, 339862306a36Sopenharmony_ci * not sure which one, but that isn't even important. 339962306a36Sopenharmony_ci */ 340062306a36Sopenharmony_ci d_rehash(whiteout); 340162306a36Sopenharmony_ci return 0; 340262306a36Sopenharmony_ci} 340362306a36Sopenharmony_ci 340462306a36Sopenharmony_ci/* 340562306a36Sopenharmony_ci * The VFS layer already does all the dentry stuff for rename, 340662306a36Sopenharmony_ci * we just have to decrement the usage count for the target if 340762306a36Sopenharmony_ci * it exists so that the VFS layer correctly free's it when it 340862306a36Sopenharmony_ci * gets overwritten. 340962306a36Sopenharmony_ci */ 341062306a36Sopenharmony_cistatic int shmem_rename2(struct mnt_idmap *idmap, 341162306a36Sopenharmony_ci struct inode *old_dir, struct dentry *old_dentry, 341262306a36Sopenharmony_ci struct inode *new_dir, struct dentry *new_dentry, 341362306a36Sopenharmony_ci unsigned int flags) 341462306a36Sopenharmony_ci{ 341562306a36Sopenharmony_ci struct inode *inode = d_inode(old_dentry); 341662306a36Sopenharmony_ci int they_are_dirs = S_ISDIR(inode->i_mode); 341762306a36Sopenharmony_ci int error; 341862306a36Sopenharmony_ci 341962306a36Sopenharmony_ci if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) 342062306a36Sopenharmony_ci return -EINVAL; 342162306a36Sopenharmony_ci 342262306a36Sopenharmony_ci if (flags & RENAME_EXCHANGE) 342362306a36Sopenharmony_ci return simple_offset_rename_exchange(old_dir, old_dentry, 342462306a36Sopenharmony_ci new_dir, new_dentry); 342562306a36Sopenharmony_ci 342662306a36Sopenharmony_ci if (!simple_empty(new_dentry)) 342762306a36Sopenharmony_ci return -ENOTEMPTY; 342862306a36Sopenharmony_ci 342962306a36Sopenharmony_ci if (flags & RENAME_WHITEOUT) { 343062306a36Sopenharmony_ci error = shmem_whiteout(idmap, old_dir, old_dentry); 343162306a36Sopenharmony_ci if (error) 343262306a36Sopenharmony_ci return error; 343362306a36Sopenharmony_ci } 343462306a36Sopenharmony_ci 343562306a36Sopenharmony_ci simple_offset_remove(shmem_get_offset_ctx(old_dir), old_dentry); 343662306a36Sopenharmony_ci error = simple_offset_add(shmem_get_offset_ctx(new_dir), old_dentry); 343762306a36Sopenharmony_ci if (error) 343862306a36Sopenharmony_ci return error; 343962306a36Sopenharmony_ci 344062306a36Sopenharmony_ci if (d_really_is_positive(new_dentry)) { 344162306a36Sopenharmony_ci (void) shmem_unlink(new_dir, new_dentry); 344262306a36Sopenharmony_ci if (they_are_dirs) { 344362306a36Sopenharmony_ci drop_nlink(d_inode(new_dentry)); 344462306a36Sopenharmony_ci drop_nlink(old_dir); 344562306a36Sopenharmony_ci } 344662306a36Sopenharmony_ci } else if (they_are_dirs) { 344762306a36Sopenharmony_ci drop_nlink(old_dir); 344862306a36Sopenharmony_ci inc_nlink(new_dir); 344962306a36Sopenharmony_ci } 345062306a36Sopenharmony_ci 345162306a36Sopenharmony_ci old_dir->i_size -= BOGO_DIRENT_SIZE; 345262306a36Sopenharmony_ci new_dir->i_size += BOGO_DIRENT_SIZE; 345362306a36Sopenharmony_ci simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); 345462306a36Sopenharmony_ci inode_inc_iversion(old_dir); 345562306a36Sopenharmony_ci inode_inc_iversion(new_dir); 345662306a36Sopenharmony_ci return 0; 345762306a36Sopenharmony_ci} 345862306a36Sopenharmony_ci 345962306a36Sopenharmony_cistatic int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir, 346062306a36Sopenharmony_ci struct dentry *dentry, const char *symname) 346162306a36Sopenharmony_ci{ 346262306a36Sopenharmony_ci int error; 346362306a36Sopenharmony_ci int len; 346462306a36Sopenharmony_ci struct inode *inode; 346562306a36Sopenharmony_ci struct folio *folio; 346662306a36Sopenharmony_ci 346762306a36Sopenharmony_ci len = strlen(symname) + 1; 346862306a36Sopenharmony_ci if (len > PAGE_SIZE) 346962306a36Sopenharmony_ci return -ENAMETOOLONG; 347062306a36Sopenharmony_ci 347162306a36Sopenharmony_ci inode = shmem_get_inode(idmap, dir->i_sb, dir, S_IFLNK | 0777, 0, 347262306a36Sopenharmony_ci VM_NORESERVE); 347362306a36Sopenharmony_ci 347462306a36Sopenharmony_ci if (IS_ERR(inode)) 347562306a36Sopenharmony_ci return PTR_ERR(inode); 347662306a36Sopenharmony_ci 347762306a36Sopenharmony_ci error = security_inode_init_security(inode, dir, &dentry->d_name, 347862306a36Sopenharmony_ci shmem_initxattrs, NULL); 347962306a36Sopenharmony_ci if (error && error != -EOPNOTSUPP) 348062306a36Sopenharmony_ci goto out_iput; 348162306a36Sopenharmony_ci 348262306a36Sopenharmony_ci error = simple_offset_add(shmem_get_offset_ctx(dir), dentry); 348362306a36Sopenharmony_ci if (error) 348462306a36Sopenharmony_ci goto out_iput; 348562306a36Sopenharmony_ci 348662306a36Sopenharmony_ci inode->i_size = len-1; 348762306a36Sopenharmony_ci if (len <= SHORT_SYMLINK_LEN) { 348862306a36Sopenharmony_ci inode->i_link = kmemdup(symname, len, GFP_KERNEL); 348962306a36Sopenharmony_ci if (!inode->i_link) { 349062306a36Sopenharmony_ci error = -ENOMEM; 349162306a36Sopenharmony_ci goto out_remove_offset; 349262306a36Sopenharmony_ci } 349362306a36Sopenharmony_ci inode->i_op = &shmem_short_symlink_operations; 349462306a36Sopenharmony_ci } else { 349562306a36Sopenharmony_ci inode_nohighmem(inode); 349662306a36Sopenharmony_ci error = shmem_get_folio(inode, 0, &folio, SGP_WRITE); 349762306a36Sopenharmony_ci if (error) 349862306a36Sopenharmony_ci goto out_remove_offset; 349962306a36Sopenharmony_ci inode->i_mapping->a_ops = &shmem_aops; 350062306a36Sopenharmony_ci inode->i_op = &shmem_symlink_inode_operations; 350162306a36Sopenharmony_ci memcpy(folio_address(folio), symname, len); 350262306a36Sopenharmony_ci folio_mark_uptodate(folio); 350362306a36Sopenharmony_ci folio_mark_dirty(folio); 350462306a36Sopenharmony_ci folio_unlock(folio); 350562306a36Sopenharmony_ci folio_put(folio); 350662306a36Sopenharmony_ci } 350762306a36Sopenharmony_ci dir->i_size += BOGO_DIRENT_SIZE; 350862306a36Sopenharmony_ci dir->i_mtime = inode_set_ctime_current(dir); 350962306a36Sopenharmony_ci inode_inc_iversion(dir); 351062306a36Sopenharmony_ci d_instantiate(dentry, inode); 351162306a36Sopenharmony_ci dget(dentry); 351262306a36Sopenharmony_ci return 0; 351362306a36Sopenharmony_ci 351462306a36Sopenharmony_ciout_remove_offset: 351562306a36Sopenharmony_ci simple_offset_remove(shmem_get_offset_ctx(dir), dentry); 351662306a36Sopenharmony_ciout_iput: 351762306a36Sopenharmony_ci iput(inode); 351862306a36Sopenharmony_ci return error; 351962306a36Sopenharmony_ci} 352062306a36Sopenharmony_ci 352162306a36Sopenharmony_cistatic void shmem_put_link(void *arg) 352262306a36Sopenharmony_ci{ 352362306a36Sopenharmony_ci folio_mark_accessed(arg); 352462306a36Sopenharmony_ci folio_put(arg); 352562306a36Sopenharmony_ci} 352662306a36Sopenharmony_ci 352762306a36Sopenharmony_cistatic const char *shmem_get_link(struct dentry *dentry, 352862306a36Sopenharmony_ci struct inode *inode, 352962306a36Sopenharmony_ci struct delayed_call *done) 353062306a36Sopenharmony_ci{ 353162306a36Sopenharmony_ci struct folio *folio = NULL; 353262306a36Sopenharmony_ci int error; 353362306a36Sopenharmony_ci 353462306a36Sopenharmony_ci if (!dentry) { 353562306a36Sopenharmony_ci folio = filemap_get_folio(inode->i_mapping, 0); 353662306a36Sopenharmony_ci if (IS_ERR(folio)) 353762306a36Sopenharmony_ci return ERR_PTR(-ECHILD); 353862306a36Sopenharmony_ci if (PageHWPoison(folio_page(folio, 0)) || 353962306a36Sopenharmony_ci !folio_test_uptodate(folio)) { 354062306a36Sopenharmony_ci folio_put(folio); 354162306a36Sopenharmony_ci return ERR_PTR(-ECHILD); 354262306a36Sopenharmony_ci } 354362306a36Sopenharmony_ci } else { 354462306a36Sopenharmony_ci error = shmem_get_folio(inode, 0, &folio, SGP_READ); 354562306a36Sopenharmony_ci if (error) 354662306a36Sopenharmony_ci return ERR_PTR(error); 354762306a36Sopenharmony_ci if (!folio) 354862306a36Sopenharmony_ci return ERR_PTR(-ECHILD); 354962306a36Sopenharmony_ci if (PageHWPoison(folio_page(folio, 0))) { 355062306a36Sopenharmony_ci folio_unlock(folio); 355162306a36Sopenharmony_ci folio_put(folio); 355262306a36Sopenharmony_ci return ERR_PTR(-ECHILD); 355362306a36Sopenharmony_ci } 355462306a36Sopenharmony_ci folio_unlock(folio); 355562306a36Sopenharmony_ci } 355662306a36Sopenharmony_ci set_delayed_call(done, shmem_put_link, folio); 355762306a36Sopenharmony_ci return folio_address(folio); 355862306a36Sopenharmony_ci} 355962306a36Sopenharmony_ci 356062306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_XATTR 356162306a36Sopenharmony_ci 356262306a36Sopenharmony_cistatic int shmem_fileattr_get(struct dentry *dentry, struct fileattr *fa) 356362306a36Sopenharmony_ci{ 356462306a36Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(d_inode(dentry)); 356562306a36Sopenharmony_ci 356662306a36Sopenharmony_ci fileattr_fill_flags(fa, info->fsflags & SHMEM_FL_USER_VISIBLE); 356762306a36Sopenharmony_ci 356862306a36Sopenharmony_ci return 0; 356962306a36Sopenharmony_ci} 357062306a36Sopenharmony_ci 357162306a36Sopenharmony_cistatic int shmem_fileattr_set(struct mnt_idmap *idmap, 357262306a36Sopenharmony_ci struct dentry *dentry, struct fileattr *fa) 357362306a36Sopenharmony_ci{ 357462306a36Sopenharmony_ci struct inode *inode = d_inode(dentry); 357562306a36Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 357662306a36Sopenharmony_ci 357762306a36Sopenharmony_ci if (fileattr_has_fsx(fa)) 357862306a36Sopenharmony_ci return -EOPNOTSUPP; 357962306a36Sopenharmony_ci if (fa->flags & ~SHMEM_FL_USER_MODIFIABLE) 358062306a36Sopenharmony_ci return -EOPNOTSUPP; 358162306a36Sopenharmony_ci 358262306a36Sopenharmony_ci info->fsflags = (info->fsflags & ~SHMEM_FL_USER_MODIFIABLE) | 358362306a36Sopenharmony_ci (fa->flags & SHMEM_FL_USER_MODIFIABLE); 358462306a36Sopenharmony_ci 358562306a36Sopenharmony_ci shmem_set_inode_flags(inode, info->fsflags); 358662306a36Sopenharmony_ci inode_set_ctime_current(inode); 358762306a36Sopenharmony_ci inode_inc_iversion(inode); 358862306a36Sopenharmony_ci return 0; 358962306a36Sopenharmony_ci} 359062306a36Sopenharmony_ci 359162306a36Sopenharmony_ci/* 359262306a36Sopenharmony_ci * Superblocks without xattr inode operations may get some security.* xattr 359362306a36Sopenharmony_ci * support from the LSM "for free". As soon as we have any other xattrs 359462306a36Sopenharmony_ci * like ACLs, we also need to implement the security.* handlers at 359562306a36Sopenharmony_ci * filesystem level, though. 359662306a36Sopenharmony_ci */ 359762306a36Sopenharmony_ci 359862306a36Sopenharmony_ci/* 359962306a36Sopenharmony_ci * Callback for security_inode_init_security() for acquiring xattrs. 360062306a36Sopenharmony_ci */ 360162306a36Sopenharmony_cistatic int shmem_initxattrs(struct inode *inode, 360262306a36Sopenharmony_ci const struct xattr *xattr_array, 360362306a36Sopenharmony_ci void *fs_info) 360462306a36Sopenharmony_ci{ 360562306a36Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 360662306a36Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 360762306a36Sopenharmony_ci const struct xattr *xattr; 360862306a36Sopenharmony_ci struct simple_xattr *new_xattr; 360962306a36Sopenharmony_ci size_t ispace = 0; 361062306a36Sopenharmony_ci size_t len; 361162306a36Sopenharmony_ci 361262306a36Sopenharmony_ci if (sbinfo->max_inodes) { 361362306a36Sopenharmony_ci for (xattr = xattr_array; xattr->name != NULL; xattr++) { 361462306a36Sopenharmony_ci ispace += simple_xattr_space(xattr->name, 361562306a36Sopenharmony_ci xattr->value_len + XATTR_SECURITY_PREFIX_LEN); 361662306a36Sopenharmony_ci } 361762306a36Sopenharmony_ci if (ispace) { 361862306a36Sopenharmony_ci raw_spin_lock(&sbinfo->stat_lock); 361962306a36Sopenharmony_ci if (sbinfo->free_ispace < ispace) 362062306a36Sopenharmony_ci ispace = 0; 362162306a36Sopenharmony_ci else 362262306a36Sopenharmony_ci sbinfo->free_ispace -= ispace; 362362306a36Sopenharmony_ci raw_spin_unlock(&sbinfo->stat_lock); 362462306a36Sopenharmony_ci if (!ispace) 362562306a36Sopenharmony_ci return -ENOSPC; 362662306a36Sopenharmony_ci } 362762306a36Sopenharmony_ci } 362862306a36Sopenharmony_ci 362962306a36Sopenharmony_ci for (xattr = xattr_array; xattr->name != NULL; xattr++) { 363062306a36Sopenharmony_ci new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len); 363162306a36Sopenharmony_ci if (!new_xattr) 363262306a36Sopenharmony_ci break; 363362306a36Sopenharmony_ci 363462306a36Sopenharmony_ci len = strlen(xattr->name) + 1; 363562306a36Sopenharmony_ci new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len, 363662306a36Sopenharmony_ci GFP_KERNEL_ACCOUNT); 363762306a36Sopenharmony_ci if (!new_xattr->name) { 363862306a36Sopenharmony_ci kvfree(new_xattr); 363962306a36Sopenharmony_ci break; 364062306a36Sopenharmony_ci } 364162306a36Sopenharmony_ci 364262306a36Sopenharmony_ci memcpy(new_xattr->name, XATTR_SECURITY_PREFIX, 364362306a36Sopenharmony_ci XATTR_SECURITY_PREFIX_LEN); 364462306a36Sopenharmony_ci memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN, 364562306a36Sopenharmony_ci xattr->name, len); 364662306a36Sopenharmony_ci 364762306a36Sopenharmony_ci simple_xattr_add(&info->xattrs, new_xattr); 364862306a36Sopenharmony_ci } 364962306a36Sopenharmony_ci 365062306a36Sopenharmony_ci if (xattr->name != NULL) { 365162306a36Sopenharmony_ci if (ispace) { 365262306a36Sopenharmony_ci raw_spin_lock(&sbinfo->stat_lock); 365362306a36Sopenharmony_ci sbinfo->free_ispace += ispace; 365462306a36Sopenharmony_ci raw_spin_unlock(&sbinfo->stat_lock); 365562306a36Sopenharmony_ci } 365662306a36Sopenharmony_ci simple_xattrs_free(&info->xattrs, NULL); 365762306a36Sopenharmony_ci return -ENOMEM; 365862306a36Sopenharmony_ci } 365962306a36Sopenharmony_ci 366062306a36Sopenharmony_ci return 0; 366162306a36Sopenharmony_ci} 366262306a36Sopenharmony_ci 366362306a36Sopenharmony_cistatic int shmem_xattr_handler_get(const struct xattr_handler *handler, 366462306a36Sopenharmony_ci struct dentry *unused, struct inode *inode, 366562306a36Sopenharmony_ci const char *name, void *buffer, size_t size) 366662306a36Sopenharmony_ci{ 366762306a36Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 366862306a36Sopenharmony_ci 366962306a36Sopenharmony_ci name = xattr_full_name(handler, name); 367062306a36Sopenharmony_ci return simple_xattr_get(&info->xattrs, name, buffer, size); 367162306a36Sopenharmony_ci} 367262306a36Sopenharmony_ci 367362306a36Sopenharmony_cistatic int shmem_xattr_handler_set(const struct xattr_handler *handler, 367462306a36Sopenharmony_ci struct mnt_idmap *idmap, 367562306a36Sopenharmony_ci struct dentry *unused, struct inode *inode, 367662306a36Sopenharmony_ci const char *name, const void *value, 367762306a36Sopenharmony_ci size_t size, int flags) 367862306a36Sopenharmony_ci{ 367962306a36Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 368062306a36Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 368162306a36Sopenharmony_ci struct simple_xattr *old_xattr; 368262306a36Sopenharmony_ci size_t ispace = 0; 368362306a36Sopenharmony_ci 368462306a36Sopenharmony_ci name = xattr_full_name(handler, name); 368562306a36Sopenharmony_ci if (value && sbinfo->max_inodes) { 368662306a36Sopenharmony_ci ispace = simple_xattr_space(name, size); 368762306a36Sopenharmony_ci raw_spin_lock(&sbinfo->stat_lock); 368862306a36Sopenharmony_ci if (sbinfo->free_ispace < ispace) 368962306a36Sopenharmony_ci ispace = 0; 369062306a36Sopenharmony_ci else 369162306a36Sopenharmony_ci sbinfo->free_ispace -= ispace; 369262306a36Sopenharmony_ci raw_spin_unlock(&sbinfo->stat_lock); 369362306a36Sopenharmony_ci if (!ispace) 369462306a36Sopenharmony_ci return -ENOSPC; 369562306a36Sopenharmony_ci } 369662306a36Sopenharmony_ci 369762306a36Sopenharmony_ci old_xattr = simple_xattr_set(&info->xattrs, name, value, size, flags); 369862306a36Sopenharmony_ci if (!IS_ERR(old_xattr)) { 369962306a36Sopenharmony_ci ispace = 0; 370062306a36Sopenharmony_ci if (old_xattr && sbinfo->max_inodes) 370162306a36Sopenharmony_ci ispace = simple_xattr_space(old_xattr->name, 370262306a36Sopenharmony_ci old_xattr->size); 370362306a36Sopenharmony_ci simple_xattr_free(old_xattr); 370462306a36Sopenharmony_ci old_xattr = NULL; 370562306a36Sopenharmony_ci inode_set_ctime_current(inode); 370662306a36Sopenharmony_ci inode_inc_iversion(inode); 370762306a36Sopenharmony_ci } 370862306a36Sopenharmony_ci if (ispace) { 370962306a36Sopenharmony_ci raw_spin_lock(&sbinfo->stat_lock); 371062306a36Sopenharmony_ci sbinfo->free_ispace += ispace; 371162306a36Sopenharmony_ci raw_spin_unlock(&sbinfo->stat_lock); 371262306a36Sopenharmony_ci } 371362306a36Sopenharmony_ci return PTR_ERR(old_xattr); 371462306a36Sopenharmony_ci} 371562306a36Sopenharmony_ci 371662306a36Sopenharmony_cistatic const struct xattr_handler shmem_security_xattr_handler = { 371762306a36Sopenharmony_ci .prefix = XATTR_SECURITY_PREFIX, 371862306a36Sopenharmony_ci .get = shmem_xattr_handler_get, 371962306a36Sopenharmony_ci .set = shmem_xattr_handler_set, 372062306a36Sopenharmony_ci}; 372162306a36Sopenharmony_ci 372262306a36Sopenharmony_cistatic const struct xattr_handler shmem_trusted_xattr_handler = { 372362306a36Sopenharmony_ci .prefix = XATTR_TRUSTED_PREFIX, 372462306a36Sopenharmony_ci .get = shmem_xattr_handler_get, 372562306a36Sopenharmony_ci .set = shmem_xattr_handler_set, 372662306a36Sopenharmony_ci}; 372762306a36Sopenharmony_ci 372862306a36Sopenharmony_cistatic const struct xattr_handler shmem_user_xattr_handler = { 372962306a36Sopenharmony_ci .prefix = XATTR_USER_PREFIX, 373062306a36Sopenharmony_ci .get = shmem_xattr_handler_get, 373162306a36Sopenharmony_ci .set = shmem_xattr_handler_set, 373262306a36Sopenharmony_ci}; 373362306a36Sopenharmony_ci 373462306a36Sopenharmony_cistatic const struct xattr_handler *shmem_xattr_handlers[] = { 373562306a36Sopenharmony_ci &shmem_security_xattr_handler, 373662306a36Sopenharmony_ci &shmem_trusted_xattr_handler, 373762306a36Sopenharmony_ci &shmem_user_xattr_handler, 373862306a36Sopenharmony_ci NULL 373962306a36Sopenharmony_ci}; 374062306a36Sopenharmony_ci 374162306a36Sopenharmony_cistatic ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size) 374262306a36Sopenharmony_ci{ 374362306a36Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(d_inode(dentry)); 374462306a36Sopenharmony_ci return simple_xattr_list(d_inode(dentry), &info->xattrs, buffer, size); 374562306a36Sopenharmony_ci} 374662306a36Sopenharmony_ci#endif /* CONFIG_TMPFS_XATTR */ 374762306a36Sopenharmony_ci 374862306a36Sopenharmony_cistatic const struct inode_operations shmem_short_symlink_operations = { 374962306a36Sopenharmony_ci .getattr = shmem_getattr, 375062306a36Sopenharmony_ci .setattr = shmem_setattr, 375162306a36Sopenharmony_ci .get_link = simple_get_link, 375262306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_XATTR 375362306a36Sopenharmony_ci .listxattr = shmem_listxattr, 375462306a36Sopenharmony_ci#endif 375562306a36Sopenharmony_ci}; 375662306a36Sopenharmony_ci 375762306a36Sopenharmony_cistatic const struct inode_operations shmem_symlink_inode_operations = { 375862306a36Sopenharmony_ci .getattr = shmem_getattr, 375962306a36Sopenharmony_ci .setattr = shmem_setattr, 376062306a36Sopenharmony_ci .get_link = shmem_get_link, 376162306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_XATTR 376262306a36Sopenharmony_ci .listxattr = shmem_listxattr, 376362306a36Sopenharmony_ci#endif 376462306a36Sopenharmony_ci}; 376562306a36Sopenharmony_ci 376662306a36Sopenharmony_cistatic struct dentry *shmem_get_parent(struct dentry *child) 376762306a36Sopenharmony_ci{ 376862306a36Sopenharmony_ci return ERR_PTR(-ESTALE); 376962306a36Sopenharmony_ci} 377062306a36Sopenharmony_ci 377162306a36Sopenharmony_cistatic int shmem_match(struct inode *ino, void *vfh) 377262306a36Sopenharmony_ci{ 377362306a36Sopenharmony_ci __u32 *fh = vfh; 377462306a36Sopenharmony_ci __u64 inum = fh[2]; 377562306a36Sopenharmony_ci inum = (inum << 32) | fh[1]; 377662306a36Sopenharmony_ci return ino->i_ino == inum && fh[0] == ino->i_generation; 377762306a36Sopenharmony_ci} 377862306a36Sopenharmony_ci 377962306a36Sopenharmony_ci/* Find any alias of inode, but prefer a hashed alias */ 378062306a36Sopenharmony_cistatic struct dentry *shmem_find_alias(struct inode *inode) 378162306a36Sopenharmony_ci{ 378262306a36Sopenharmony_ci struct dentry *alias = d_find_alias(inode); 378362306a36Sopenharmony_ci 378462306a36Sopenharmony_ci return alias ?: d_find_any_alias(inode); 378562306a36Sopenharmony_ci} 378662306a36Sopenharmony_ci 378762306a36Sopenharmony_ci 378862306a36Sopenharmony_cistatic struct dentry *shmem_fh_to_dentry(struct super_block *sb, 378962306a36Sopenharmony_ci struct fid *fid, int fh_len, int fh_type) 379062306a36Sopenharmony_ci{ 379162306a36Sopenharmony_ci struct inode *inode; 379262306a36Sopenharmony_ci struct dentry *dentry = NULL; 379362306a36Sopenharmony_ci u64 inum; 379462306a36Sopenharmony_ci 379562306a36Sopenharmony_ci if (fh_len < 3) 379662306a36Sopenharmony_ci return NULL; 379762306a36Sopenharmony_ci 379862306a36Sopenharmony_ci inum = fid->raw[2]; 379962306a36Sopenharmony_ci inum = (inum << 32) | fid->raw[1]; 380062306a36Sopenharmony_ci 380162306a36Sopenharmony_ci inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]), 380262306a36Sopenharmony_ci shmem_match, fid->raw); 380362306a36Sopenharmony_ci if (inode) { 380462306a36Sopenharmony_ci dentry = shmem_find_alias(inode); 380562306a36Sopenharmony_ci iput(inode); 380662306a36Sopenharmony_ci } 380762306a36Sopenharmony_ci 380862306a36Sopenharmony_ci return dentry; 380962306a36Sopenharmony_ci} 381062306a36Sopenharmony_ci 381162306a36Sopenharmony_cistatic int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len, 381262306a36Sopenharmony_ci struct inode *parent) 381362306a36Sopenharmony_ci{ 381462306a36Sopenharmony_ci if (*len < 3) { 381562306a36Sopenharmony_ci *len = 3; 381662306a36Sopenharmony_ci return FILEID_INVALID; 381762306a36Sopenharmony_ci } 381862306a36Sopenharmony_ci 381962306a36Sopenharmony_ci if (inode_unhashed(inode)) { 382062306a36Sopenharmony_ci /* Unfortunately insert_inode_hash is not idempotent, 382162306a36Sopenharmony_ci * so as we hash inodes here rather than at creation 382262306a36Sopenharmony_ci * time, we need a lock to ensure we only try 382362306a36Sopenharmony_ci * to do it once 382462306a36Sopenharmony_ci */ 382562306a36Sopenharmony_ci static DEFINE_SPINLOCK(lock); 382662306a36Sopenharmony_ci spin_lock(&lock); 382762306a36Sopenharmony_ci if (inode_unhashed(inode)) 382862306a36Sopenharmony_ci __insert_inode_hash(inode, 382962306a36Sopenharmony_ci inode->i_ino + inode->i_generation); 383062306a36Sopenharmony_ci spin_unlock(&lock); 383162306a36Sopenharmony_ci } 383262306a36Sopenharmony_ci 383362306a36Sopenharmony_ci fh[0] = inode->i_generation; 383462306a36Sopenharmony_ci fh[1] = inode->i_ino; 383562306a36Sopenharmony_ci fh[2] = ((__u64)inode->i_ino) >> 32; 383662306a36Sopenharmony_ci 383762306a36Sopenharmony_ci *len = 3; 383862306a36Sopenharmony_ci return 1; 383962306a36Sopenharmony_ci} 384062306a36Sopenharmony_ci 384162306a36Sopenharmony_cistatic const struct export_operations shmem_export_ops = { 384262306a36Sopenharmony_ci .get_parent = shmem_get_parent, 384362306a36Sopenharmony_ci .encode_fh = shmem_encode_fh, 384462306a36Sopenharmony_ci .fh_to_dentry = shmem_fh_to_dentry, 384562306a36Sopenharmony_ci}; 384662306a36Sopenharmony_ci 384762306a36Sopenharmony_cienum shmem_param { 384862306a36Sopenharmony_ci Opt_gid, 384962306a36Sopenharmony_ci Opt_huge, 385062306a36Sopenharmony_ci Opt_mode, 385162306a36Sopenharmony_ci Opt_mpol, 385262306a36Sopenharmony_ci Opt_nr_blocks, 385362306a36Sopenharmony_ci Opt_nr_inodes, 385462306a36Sopenharmony_ci Opt_size, 385562306a36Sopenharmony_ci Opt_uid, 385662306a36Sopenharmony_ci Opt_inode32, 385762306a36Sopenharmony_ci Opt_inode64, 385862306a36Sopenharmony_ci Opt_noswap, 385962306a36Sopenharmony_ci Opt_quota, 386062306a36Sopenharmony_ci Opt_usrquota, 386162306a36Sopenharmony_ci Opt_grpquota, 386262306a36Sopenharmony_ci Opt_usrquota_block_hardlimit, 386362306a36Sopenharmony_ci Opt_usrquota_inode_hardlimit, 386462306a36Sopenharmony_ci Opt_grpquota_block_hardlimit, 386562306a36Sopenharmony_ci Opt_grpquota_inode_hardlimit, 386662306a36Sopenharmony_ci}; 386762306a36Sopenharmony_ci 386862306a36Sopenharmony_cistatic const struct constant_table shmem_param_enums_huge[] = { 386962306a36Sopenharmony_ci {"never", SHMEM_HUGE_NEVER }, 387062306a36Sopenharmony_ci {"always", SHMEM_HUGE_ALWAYS }, 387162306a36Sopenharmony_ci {"within_size", SHMEM_HUGE_WITHIN_SIZE }, 387262306a36Sopenharmony_ci {"advise", SHMEM_HUGE_ADVISE }, 387362306a36Sopenharmony_ci {} 387462306a36Sopenharmony_ci}; 387562306a36Sopenharmony_ci 387662306a36Sopenharmony_ciconst struct fs_parameter_spec shmem_fs_parameters[] = { 387762306a36Sopenharmony_ci fsparam_u32 ("gid", Opt_gid), 387862306a36Sopenharmony_ci fsparam_enum ("huge", Opt_huge, shmem_param_enums_huge), 387962306a36Sopenharmony_ci fsparam_u32oct("mode", Opt_mode), 388062306a36Sopenharmony_ci fsparam_string("mpol", Opt_mpol), 388162306a36Sopenharmony_ci fsparam_string("nr_blocks", Opt_nr_blocks), 388262306a36Sopenharmony_ci fsparam_string("nr_inodes", Opt_nr_inodes), 388362306a36Sopenharmony_ci fsparam_string("size", Opt_size), 388462306a36Sopenharmony_ci fsparam_u32 ("uid", Opt_uid), 388562306a36Sopenharmony_ci fsparam_flag ("inode32", Opt_inode32), 388662306a36Sopenharmony_ci fsparam_flag ("inode64", Opt_inode64), 388762306a36Sopenharmony_ci fsparam_flag ("noswap", Opt_noswap), 388862306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_QUOTA 388962306a36Sopenharmony_ci fsparam_flag ("quota", Opt_quota), 389062306a36Sopenharmony_ci fsparam_flag ("usrquota", Opt_usrquota), 389162306a36Sopenharmony_ci fsparam_flag ("grpquota", Opt_grpquota), 389262306a36Sopenharmony_ci fsparam_string("usrquota_block_hardlimit", Opt_usrquota_block_hardlimit), 389362306a36Sopenharmony_ci fsparam_string("usrquota_inode_hardlimit", Opt_usrquota_inode_hardlimit), 389462306a36Sopenharmony_ci fsparam_string("grpquota_block_hardlimit", Opt_grpquota_block_hardlimit), 389562306a36Sopenharmony_ci fsparam_string("grpquota_inode_hardlimit", Opt_grpquota_inode_hardlimit), 389662306a36Sopenharmony_ci#endif 389762306a36Sopenharmony_ci {} 389862306a36Sopenharmony_ci}; 389962306a36Sopenharmony_ci 390062306a36Sopenharmony_cistatic int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param) 390162306a36Sopenharmony_ci{ 390262306a36Sopenharmony_ci struct shmem_options *ctx = fc->fs_private; 390362306a36Sopenharmony_ci struct fs_parse_result result; 390462306a36Sopenharmony_ci unsigned long long size; 390562306a36Sopenharmony_ci char *rest; 390662306a36Sopenharmony_ci int opt; 390762306a36Sopenharmony_ci kuid_t kuid; 390862306a36Sopenharmony_ci kgid_t kgid; 390962306a36Sopenharmony_ci 391062306a36Sopenharmony_ci opt = fs_parse(fc, shmem_fs_parameters, param, &result); 391162306a36Sopenharmony_ci if (opt < 0) 391262306a36Sopenharmony_ci return opt; 391362306a36Sopenharmony_ci 391462306a36Sopenharmony_ci switch (opt) { 391562306a36Sopenharmony_ci case Opt_size: 391662306a36Sopenharmony_ci size = memparse(param->string, &rest); 391762306a36Sopenharmony_ci if (*rest == '%') { 391862306a36Sopenharmony_ci size <<= PAGE_SHIFT; 391962306a36Sopenharmony_ci size *= totalram_pages(); 392062306a36Sopenharmony_ci do_div(size, 100); 392162306a36Sopenharmony_ci rest++; 392262306a36Sopenharmony_ci } 392362306a36Sopenharmony_ci if (*rest) 392462306a36Sopenharmony_ci goto bad_value; 392562306a36Sopenharmony_ci ctx->blocks = DIV_ROUND_UP(size, PAGE_SIZE); 392662306a36Sopenharmony_ci ctx->seen |= SHMEM_SEEN_BLOCKS; 392762306a36Sopenharmony_ci break; 392862306a36Sopenharmony_ci case Opt_nr_blocks: 392962306a36Sopenharmony_ci ctx->blocks = memparse(param->string, &rest); 393062306a36Sopenharmony_ci if (*rest || ctx->blocks > LONG_MAX) 393162306a36Sopenharmony_ci goto bad_value; 393262306a36Sopenharmony_ci ctx->seen |= SHMEM_SEEN_BLOCKS; 393362306a36Sopenharmony_ci break; 393462306a36Sopenharmony_ci case Opt_nr_inodes: 393562306a36Sopenharmony_ci ctx->inodes = memparse(param->string, &rest); 393662306a36Sopenharmony_ci if (*rest || ctx->inodes > ULONG_MAX / BOGO_INODE_SIZE) 393762306a36Sopenharmony_ci goto bad_value; 393862306a36Sopenharmony_ci ctx->seen |= SHMEM_SEEN_INODES; 393962306a36Sopenharmony_ci break; 394062306a36Sopenharmony_ci case Opt_mode: 394162306a36Sopenharmony_ci ctx->mode = result.uint_32 & 07777; 394262306a36Sopenharmony_ci break; 394362306a36Sopenharmony_ci case Opt_uid: 394462306a36Sopenharmony_ci kuid = make_kuid(current_user_ns(), result.uint_32); 394562306a36Sopenharmony_ci if (!uid_valid(kuid)) 394662306a36Sopenharmony_ci goto bad_value; 394762306a36Sopenharmony_ci 394862306a36Sopenharmony_ci /* 394962306a36Sopenharmony_ci * The requested uid must be representable in the 395062306a36Sopenharmony_ci * filesystem's idmapping. 395162306a36Sopenharmony_ci */ 395262306a36Sopenharmony_ci if (!kuid_has_mapping(fc->user_ns, kuid)) 395362306a36Sopenharmony_ci goto bad_value; 395462306a36Sopenharmony_ci 395562306a36Sopenharmony_ci ctx->uid = kuid; 395662306a36Sopenharmony_ci break; 395762306a36Sopenharmony_ci case Opt_gid: 395862306a36Sopenharmony_ci kgid = make_kgid(current_user_ns(), result.uint_32); 395962306a36Sopenharmony_ci if (!gid_valid(kgid)) 396062306a36Sopenharmony_ci goto bad_value; 396162306a36Sopenharmony_ci 396262306a36Sopenharmony_ci /* 396362306a36Sopenharmony_ci * The requested gid must be representable in the 396462306a36Sopenharmony_ci * filesystem's idmapping. 396562306a36Sopenharmony_ci */ 396662306a36Sopenharmony_ci if (!kgid_has_mapping(fc->user_ns, kgid)) 396762306a36Sopenharmony_ci goto bad_value; 396862306a36Sopenharmony_ci 396962306a36Sopenharmony_ci ctx->gid = kgid; 397062306a36Sopenharmony_ci break; 397162306a36Sopenharmony_ci case Opt_huge: 397262306a36Sopenharmony_ci ctx->huge = result.uint_32; 397362306a36Sopenharmony_ci if (ctx->huge != SHMEM_HUGE_NEVER && 397462306a36Sopenharmony_ci !(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && 397562306a36Sopenharmony_ci has_transparent_hugepage())) 397662306a36Sopenharmony_ci goto unsupported_parameter; 397762306a36Sopenharmony_ci ctx->seen |= SHMEM_SEEN_HUGE; 397862306a36Sopenharmony_ci break; 397962306a36Sopenharmony_ci case Opt_mpol: 398062306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_NUMA)) { 398162306a36Sopenharmony_ci mpol_put(ctx->mpol); 398262306a36Sopenharmony_ci ctx->mpol = NULL; 398362306a36Sopenharmony_ci if (mpol_parse_str(param->string, &ctx->mpol)) 398462306a36Sopenharmony_ci goto bad_value; 398562306a36Sopenharmony_ci break; 398662306a36Sopenharmony_ci } 398762306a36Sopenharmony_ci goto unsupported_parameter; 398862306a36Sopenharmony_ci case Opt_inode32: 398962306a36Sopenharmony_ci ctx->full_inums = false; 399062306a36Sopenharmony_ci ctx->seen |= SHMEM_SEEN_INUMS; 399162306a36Sopenharmony_ci break; 399262306a36Sopenharmony_ci case Opt_inode64: 399362306a36Sopenharmony_ci if (sizeof(ino_t) < 8) { 399462306a36Sopenharmony_ci return invalfc(fc, 399562306a36Sopenharmony_ci "Cannot use inode64 with <64bit inums in kernel\n"); 399662306a36Sopenharmony_ci } 399762306a36Sopenharmony_ci ctx->full_inums = true; 399862306a36Sopenharmony_ci ctx->seen |= SHMEM_SEEN_INUMS; 399962306a36Sopenharmony_ci break; 400062306a36Sopenharmony_ci case Opt_noswap: 400162306a36Sopenharmony_ci if ((fc->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN)) { 400262306a36Sopenharmony_ci return invalfc(fc, 400362306a36Sopenharmony_ci "Turning off swap in unprivileged tmpfs mounts unsupported"); 400462306a36Sopenharmony_ci } 400562306a36Sopenharmony_ci ctx->noswap = true; 400662306a36Sopenharmony_ci ctx->seen |= SHMEM_SEEN_NOSWAP; 400762306a36Sopenharmony_ci break; 400862306a36Sopenharmony_ci case Opt_quota: 400962306a36Sopenharmony_ci if (fc->user_ns != &init_user_ns) 401062306a36Sopenharmony_ci return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported"); 401162306a36Sopenharmony_ci ctx->seen |= SHMEM_SEEN_QUOTA; 401262306a36Sopenharmony_ci ctx->quota_types |= (QTYPE_MASK_USR | QTYPE_MASK_GRP); 401362306a36Sopenharmony_ci break; 401462306a36Sopenharmony_ci case Opt_usrquota: 401562306a36Sopenharmony_ci if (fc->user_ns != &init_user_ns) 401662306a36Sopenharmony_ci return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported"); 401762306a36Sopenharmony_ci ctx->seen |= SHMEM_SEEN_QUOTA; 401862306a36Sopenharmony_ci ctx->quota_types |= QTYPE_MASK_USR; 401962306a36Sopenharmony_ci break; 402062306a36Sopenharmony_ci case Opt_grpquota: 402162306a36Sopenharmony_ci if (fc->user_ns != &init_user_ns) 402262306a36Sopenharmony_ci return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported"); 402362306a36Sopenharmony_ci ctx->seen |= SHMEM_SEEN_QUOTA; 402462306a36Sopenharmony_ci ctx->quota_types |= QTYPE_MASK_GRP; 402562306a36Sopenharmony_ci break; 402662306a36Sopenharmony_ci case Opt_usrquota_block_hardlimit: 402762306a36Sopenharmony_ci size = memparse(param->string, &rest); 402862306a36Sopenharmony_ci if (*rest || !size) 402962306a36Sopenharmony_ci goto bad_value; 403062306a36Sopenharmony_ci if (size > SHMEM_QUOTA_MAX_SPC_LIMIT) 403162306a36Sopenharmony_ci return invalfc(fc, 403262306a36Sopenharmony_ci "User quota block hardlimit too large."); 403362306a36Sopenharmony_ci ctx->qlimits.usrquota_bhardlimit = size; 403462306a36Sopenharmony_ci break; 403562306a36Sopenharmony_ci case Opt_grpquota_block_hardlimit: 403662306a36Sopenharmony_ci size = memparse(param->string, &rest); 403762306a36Sopenharmony_ci if (*rest || !size) 403862306a36Sopenharmony_ci goto bad_value; 403962306a36Sopenharmony_ci if (size > SHMEM_QUOTA_MAX_SPC_LIMIT) 404062306a36Sopenharmony_ci return invalfc(fc, 404162306a36Sopenharmony_ci "Group quota block hardlimit too large."); 404262306a36Sopenharmony_ci ctx->qlimits.grpquota_bhardlimit = size; 404362306a36Sopenharmony_ci break; 404462306a36Sopenharmony_ci case Opt_usrquota_inode_hardlimit: 404562306a36Sopenharmony_ci size = memparse(param->string, &rest); 404662306a36Sopenharmony_ci if (*rest || !size) 404762306a36Sopenharmony_ci goto bad_value; 404862306a36Sopenharmony_ci if (size > SHMEM_QUOTA_MAX_INO_LIMIT) 404962306a36Sopenharmony_ci return invalfc(fc, 405062306a36Sopenharmony_ci "User quota inode hardlimit too large."); 405162306a36Sopenharmony_ci ctx->qlimits.usrquota_ihardlimit = size; 405262306a36Sopenharmony_ci break; 405362306a36Sopenharmony_ci case Opt_grpquota_inode_hardlimit: 405462306a36Sopenharmony_ci size = memparse(param->string, &rest); 405562306a36Sopenharmony_ci if (*rest || !size) 405662306a36Sopenharmony_ci goto bad_value; 405762306a36Sopenharmony_ci if (size > SHMEM_QUOTA_MAX_INO_LIMIT) 405862306a36Sopenharmony_ci return invalfc(fc, 405962306a36Sopenharmony_ci "Group quota inode hardlimit too large."); 406062306a36Sopenharmony_ci ctx->qlimits.grpquota_ihardlimit = size; 406162306a36Sopenharmony_ci break; 406262306a36Sopenharmony_ci } 406362306a36Sopenharmony_ci return 0; 406462306a36Sopenharmony_ci 406562306a36Sopenharmony_ciunsupported_parameter: 406662306a36Sopenharmony_ci return invalfc(fc, "Unsupported parameter '%s'", param->key); 406762306a36Sopenharmony_cibad_value: 406862306a36Sopenharmony_ci return invalfc(fc, "Bad value for '%s'", param->key); 406962306a36Sopenharmony_ci} 407062306a36Sopenharmony_ci 407162306a36Sopenharmony_cistatic int shmem_parse_options(struct fs_context *fc, void *data) 407262306a36Sopenharmony_ci{ 407362306a36Sopenharmony_ci char *options = data; 407462306a36Sopenharmony_ci 407562306a36Sopenharmony_ci if (options) { 407662306a36Sopenharmony_ci int err = security_sb_eat_lsm_opts(options, &fc->security); 407762306a36Sopenharmony_ci if (err) 407862306a36Sopenharmony_ci return err; 407962306a36Sopenharmony_ci } 408062306a36Sopenharmony_ci 408162306a36Sopenharmony_ci while (options != NULL) { 408262306a36Sopenharmony_ci char *this_char = options; 408362306a36Sopenharmony_ci for (;;) { 408462306a36Sopenharmony_ci /* 408562306a36Sopenharmony_ci * NUL-terminate this option: unfortunately, 408662306a36Sopenharmony_ci * mount options form a comma-separated list, 408762306a36Sopenharmony_ci * but mpol's nodelist may also contain commas. 408862306a36Sopenharmony_ci */ 408962306a36Sopenharmony_ci options = strchr(options, ','); 409062306a36Sopenharmony_ci if (options == NULL) 409162306a36Sopenharmony_ci break; 409262306a36Sopenharmony_ci options++; 409362306a36Sopenharmony_ci if (!isdigit(*options)) { 409462306a36Sopenharmony_ci options[-1] = '\0'; 409562306a36Sopenharmony_ci break; 409662306a36Sopenharmony_ci } 409762306a36Sopenharmony_ci } 409862306a36Sopenharmony_ci if (*this_char) { 409962306a36Sopenharmony_ci char *value = strchr(this_char, '='); 410062306a36Sopenharmony_ci size_t len = 0; 410162306a36Sopenharmony_ci int err; 410262306a36Sopenharmony_ci 410362306a36Sopenharmony_ci if (value) { 410462306a36Sopenharmony_ci *value++ = '\0'; 410562306a36Sopenharmony_ci len = strlen(value); 410662306a36Sopenharmony_ci } 410762306a36Sopenharmony_ci err = vfs_parse_fs_string(fc, this_char, value, len); 410862306a36Sopenharmony_ci if (err < 0) 410962306a36Sopenharmony_ci return err; 411062306a36Sopenharmony_ci } 411162306a36Sopenharmony_ci } 411262306a36Sopenharmony_ci return 0; 411362306a36Sopenharmony_ci} 411462306a36Sopenharmony_ci 411562306a36Sopenharmony_ci/* 411662306a36Sopenharmony_ci * Reconfigure a shmem filesystem. 411762306a36Sopenharmony_ci */ 411862306a36Sopenharmony_cistatic int shmem_reconfigure(struct fs_context *fc) 411962306a36Sopenharmony_ci{ 412062306a36Sopenharmony_ci struct shmem_options *ctx = fc->fs_private; 412162306a36Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb); 412262306a36Sopenharmony_ci unsigned long used_isp; 412362306a36Sopenharmony_ci struct mempolicy *mpol = NULL; 412462306a36Sopenharmony_ci const char *err; 412562306a36Sopenharmony_ci 412662306a36Sopenharmony_ci raw_spin_lock(&sbinfo->stat_lock); 412762306a36Sopenharmony_ci used_isp = sbinfo->max_inodes * BOGO_INODE_SIZE - sbinfo->free_ispace; 412862306a36Sopenharmony_ci 412962306a36Sopenharmony_ci if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) { 413062306a36Sopenharmony_ci if (!sbinfo->max_blocks) { 413162306a36Sopenharmony_ci err = "Cannot retroactively limit size"; 413262306a36Sopenharmony_ci goto out; 413362306a36Sopenharmony_ci } 413462306a36Sopenharmony_ci if (percpu_counter_compare(&sbinfo->used_blocks, 413562306a36Sopenharmony_ci ctx->blocks) > 0) { 413662306a36Sopenharmony_ci err = "Too small a size for current use"; 413762306a36Sopenharmony_ci goto out; 413862306a36Sopenharmony_ci } 413962306a36Sopenharmony_ci } 414062306a36Sopenharmony_ci if ((ctx->seen & SHMEM_SEEN_INODES) && ctx->inodes) { 414162306a36Sopenharmony_ci if (!sbinfo->max_inodes) { 414262306a36Sopenharmony_ci err = "Cannot retroactively limit inodes"; 414362306a36Sopenharmony_ci goto out; 414462306a36Sopenharmony_ci } 414562306a36Sopenharmony_ci if (ctx->inodes * BOGO_INODE_SIZE < used_isp) { 414662306a36Sopenharmony_ci err = "Too few inodes for current use"; 414762306a36Sopenharmony_ci goto out; 414862306a36Sopenharmony_ci } 414962306a36Sopenharmony_ci } 415062306a36Sopenharmony_ci 415162306a36Sopenharmony_ci if ((ctx->seen & SHMEM_SEEN_INUMS) && !ctx->full_inums && 415262306a36Sopenharmony_ci sbinfo->next_ino > UINT_MAX) { 415362306a36Sopenharmony_ci err = "Current inum too high to switch to 32-bit inums"; 415462306a36Sopenharmony_ci goto out; 415562306a36Sopenharmony_ci } 415662306a36Sopenharmony_ci if ((ctx->seen & SHMEM_SEEN_NOSWAP) && ctx->noswap && !sbinfo->noswap) { 415762306a36Sopenharmony_ci err = "Cannot disable swap on remount"; 415862306a36Sopenharmony_ci goto out; 415962306a36Sopenharmony_ci } 416062306a36Sopenharmony_ci if (!(ctx->seen & SHMEM_SEEN_NOSWAP) && !ctx->noswap && sbinfo->noswap) { 416162306a36Sopenharmony_ci err = "Cannot enable swap on remount if it was disabled on first mount"; 416262306a36Sopenharmony_ci goto out; 416362306a36Sopenharmony_ci } 416462306a36Sopenharmony_ci 416562306a36Sopenharmony_ci if (ctx->seen & SHMEM_SEEN_QUOTA && 416662306a36Sopenharmony_ci !sb_any_quota_loaded(fc->root->d_sb)) { 416762306a36Sopenharmony_ci err = "Cannot enable quota on remount"; 416862306a36Sopenharmony_ci goto out; 416962306a36Sopenharmony_ci } 417062306a36Sopenharmony_ci 417162306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_QUOTA 417262306a36Sopenharmony_ci#define CHANGED_LIMIT(name) \ 417362306a36Sopenharmony_ci (ctx->qlimits.name## hardlimit && \ 417462306a36Sopenharmony_ci (ctx->qlimits.name## hardlimit != sbinfo->qlimits.name## hardlimit)) 417562306a36Sopenharmony_ci 417662306a36Sopenharmony_ci if (CHANGED_LIMIT(usrquota_b) || CHANGED_LIMIT(usrquota_i) || 417762306a36Sopenharmony_ci CHANGED_LIMIT(grpquota_b) || CHANGED_LIMIT(grpquota_i)) { 417862306a36Sopenharmony_ci err = "Cannot change global quota limit on remount"; 417962306a36Sopenharmony_ci goto out; 418062306a36Sopenharmony_ci } 418162306a36Sopenharmony_ci#endif /* CONFIG_TMPFS_QUOTA */ 418262306a36Sopenharmony_ci 418362306a36Sopenharmony_ci if (ctx->seen & SHMEM_SEEN_HUGE) 418462306a36Sopenharmony_ci sbinfo->huge = ctx->huge; 418562306a36Sopenharmony_ci if (ctx->seen & SHMEM_SEEN_INUMS) 418662306a36Sopenharmony_ci sbinfo->full_inums = ctx->full_inums; 418762306a36Sopenharmony_ci if (ctx->seen & SHMEM_SEEN_BLOCKS) 418862306a36Sopenharmony_ci sbinfo->max_blocks = ctx->blocks; 418962306a36Sopenharmony_ci if (ctx->seen & SHMEM_SEEN_INODES) { 419062306a36Sopenharmony_ci sbinfo->max_inodes = ctx->inodes; 419162306a36Sopenharmony_ci sbinfo->free_ispace = ctx->inodes * BOGO_INODE_SIZE - used_isp; 419262306a36Sopenharmony_ci } 419362306a36Sopenharmony_ci 419462306a36Sopenharmony_ci /* 419562306a36Sopenharmony_ci * Preserve previous mempolicy unless mpol remount option was specified. 419662306a36Sopenharmony_ci */ 419762306a36Sopenharmony_ci if (ctx->mpol) { 419862306a36Sopenharmony_ci mpol = sbinfo->mpol; 419962306a36Sopenharmony_ci sbinfo->mpol = ctx->mpol; /* transfers initial ref */ 420062306a36Sopenharmony_ci ctx->mpol = NULL; 420162306a36Sopenharmony_ci } 420262306a36Sopenharmony_ci 420362306a36Sopenharmony_ci if (ctx->noswap) 420462306a36Sopenharmony_ci sbinfo->noswap = true; 420562306a36Sopenharmony_ci 420662306a36Sopenharmony_ci raw_spin_unlock(&sbinfo->stat_lock); 420762306a36Sopenharmony_ci mpol_put(mpol); 420862306a36Sopenharmony_ci return 0; 420962306a36Sopenharmony_ciout: 421062306a36Sopenharmony_ci raw_spin_unlock(&sbinfo->stat_lock); 421162306a36Sopenharmony_ci return invalfc(fc, "%s", err); 421262306a36Sopenharmony_ci} 421362306a36Sopenharmony_ci 421462306a36Sopenharmony_cistatic int shmem_show_options(struct seq_file *seq, struct dentry *root) 421562306a36Sopenharmony_ci{ 421662306a36Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(root->d_sb); 421762306a36Sopenharmony_ci struct mempolicy *mpol; 421862306a36Sopenharmony_ci 421962306a36Sopenharmony_ci if (sbinfo->max_blocks != shmem_default_max_blocks()) 422062306a36Sopenharmony_ci seq_printf(seq, ",size=%luk", K(sbinfo->max_blocks)); 422162306a36Sopenharmony_ci if (sbinfo->max_inodes != shmem_default_max_inodes()) 422262306a36Sopenharmony_ci seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes); 422362306a36Sopenharmony_ci if (sbinfo->mode != (0777 | S_ISVTX)) 422462306a36Sopenharmony_ci seq_printf(seq, ",mode=%03ho", sbinfo->mode); 422562306a36Sopenharmony_ci if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID)) 422662306a36Sopenharmony_ci seq_printf(seq, ",uid=%u", 422762306a36Sopenharmony_ci from_kuid_munged(&init_user_ns, sbinfo->uid)); 422862306a36Sopenharmony_ci if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID)) 422962306a36Sopenharmony_ci seq_printf(seq, ",gid=%u", 423062306a36Sopenharmony_ci from_kgid_munged(&init_user_ns, sbinfo->gid)); 423162306a36Sopenharmony_ci 423262306a36Sopenharmony_ci /* 423362306a36Sopenharmony_ci * Showing inode{64,32} might be useful even if it's the system default, 423462306a36Sopenharmony_ci * since then people don't have to resort to checking both here and 423562306a36Sopenharmony_ci * /proc/config.gz to confirm 64-bit inums were successfully applied 423662306a36Sopenharmony_ci * (which may not even exist if IKCONFIG_PROC isn't enabled). 423762306a36Sopenharmony_ci * 423862306a36Sopenharmony_ci * We hide it when inode64 isn't the default and we are using 32-bit 423962306a36Sopenharmony_ci * inodes, since that probably just means the feature isn't even under 424062306a36Sopenharmony_ci * consideration. 424162306a36Sopenharmony_ci * 424262306a36Sopenharmony_ci * As such: 424362306a36Sopenharmony_ci * 424462306a36Sopenharmony_ci * +-----------------+-----------------+ 424562306a36Sopenharmony_ci * | TMPFS_INODE64=y | TMPFS_INODE64=n | 424662306a36Sopenharmony_ci * +------------------+-----------------+-----------------+ 424762306a36Sopenharmony_ci * | full_inums=true | show | show | 424862306a36Sopenharmony_ci * | full_inums=false | show | hide | 424962306a36Sopenharmony_ci * +------------------+-----------------+-----------------+ 425062306a36Sopenharmony_ci * 425162306a36Sopenharmony_ci */ 425262306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_TMPFS_INODE64) || sbinfo->full_inums) 425362306a36Sopenharmony_ci seq_printf(seq, ",inode%d", (sbinfo->full_inums ? 64 : 32)); 425462306a36Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE 425562306a36Sopenharmony_ci /* Rightly or wrongly, show huge mount option unmasked by shmem_huge */ 425662306a36Sopenharmony_ci if (sbinfo->huge) 425762306a36Sopenharmony_ci seq_printf(seq, ",huge=%s", shmem_format_huge(sbinfo->huge)); 425862306a36Sopenharmony_ci#endif 425962306a36Sopenharmony_ci mpol = shmem_get_sbmpol(sbinfo); 426062306a36Sopenharmony_ci shmem_show_mpol(seq, mpol); 426162306a36Sopenharmony_ci mpol_put(mpol); 426262306a36Sopenharmony_ci if (sbinfo->noswap) 426362306a36Sopenharmony_ci seq_printf(seq, ",noswap"); 426462306a36Sopenharmony_ci return 0; 426562306a36Sopenharmony_ci} 426662306a36Sopenharmony_ci 426762306a36Sopenharmony_ci#endif /* CONFIG_TMPFS */ 426862306a36Sopenharmony_ci 426962306a36Sopenharmony_cistatic void shmem_put_super(struct super_block *sb) 427062306a36Sopenharmony_ci{ 427162306a36Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 427262306a36Sopenharmony_ci 427362306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_QUOTA 427462306a36Sopenharmony_ci shmem_disable_quotas(sb); 427562306a36Sopenharmony_ci#endif 427662306a36Sopenharmony_ci free_percpu(sbinfo->ino_batch); 427762306a36Sopenharmony_ci percpu_counter_destroy(&sbinfo->used_blocks); 427862306a36Sopenharmony_ci mpol_put(sbinfo->mpol); 427962306a36Sopenharmony_ci kfree(sbinfo); 428062306a36Sopenharmony_ci sb->s_fs_info = NULL; 428162306a36Sopenharmony_ci} 428262306a36Sopenharmony_ci 428362306a36Sopenharmony_cistatic int shmem_fill_super(struct super_block *sb, struct fs_context *fc) 428462306a36Sopenharmony_ci{ 428562306a36Sopenharmony_ci struct shmem_options *ctx = fc->fs_private; 428662306a36Sopenharmony_ci struct inode *inode; 428762306a36Sopenharmony_ci struct shmem_sb_info *sbinfo; 428862306a36Sopenharmony_ci int error = -ENOMEM; 428962306a36Sopenharmony_ci 429062306a36Sopenharmony_ci /* Round up to L1_CACHE_BYTES to resist false sharing */ 429162306a36Sopenharmony_ci sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info), 429262306a36Sopenharmony_ci L1_CACHE_BYTES), GFP_KERNEL); 429362306a36Sopenharmony_ci if (!sbinfo) 429462306a36Sopenharmony_ci return error; 429562306a36Sopenharmony_ci 429662306a36Sopenharmony_ci sb->s_fs_info = sbinfo; 429762306a36Sopenharmony_ci 429862306a36Sopenharmony_ci#ifdef CONFIG_TMPFS 429962306a36Sopenharmony_ci /* 430062306a36Sopenharmony_ci * Per default we only allow half of the physical ram per 430162306a36Sopenharmony_ci * tmpfs instance, limiting inodes to one per page of lowmem; 430262306a36Sopenharmony_ci * but the internal instance is left unlimited. 430362306a36Sopenharmony_ci */ 430462306a36Sopenharmony_ci if (!(sb->s_flags & SB_KERNMOUNT)) { 430562306a36Sopenharmony_ci if (!(ctx->seen & SHMEM_SEEN_BLOCKS)) 430662306a36Sopenharmony_ci ctx->blocks = shmem_default_max_blocks(); 430762306a36Sopenharmony_ci if (!(ctx->seen & SHMEM_SEEN_INODES)) 430862306a36Sopenharmony_ci ctx->inodes = shmem_default_max_inodes(); 430962306a36Sopenharmony_ci if (!(ctx->seen & SHMEM_SEEN_INUMS)) 431062306a36Sopenharmony_ci ctx->full_inums = IS_ENABLED(CONFIG_TMPFS_INODE64); 431162306a36Sopenharmony_ci sbinfo->noswap = ctx->noswap; 431262306a36Sopenharmony_ci } else { 431362306a36Sopenharmony_ci sb->s_flags |= SB_NOUSER; 431462306a36Sopenharmony_ci } 431562306a36Sopenharmony_ci sb->s_export_op = &shmem_export_ops; 431662306a36Sopenharmony_ci sb->s_flags |= SB_NOSEC | SB_I_VERSION; 431762306a36Sopenharmony_ci#else 431862306a36Sopenharmony_ci sb->s_flags |= SB_NOUSER; 431962306a36Sopenharmony_ci#endif 432062306a36Sopenharmony_ci sbinfo->max_blocks = ctx->blocks; 432162306a36Sopenharmony_ci sbinfo->max_inodes = ctx->inodes; 432262306a36Sopenharmony_ci sbinfo->free_ispace = sbinfo->max_inodes * BOGO_INODE_SIZE; 432362306a36Sopenharmony_ci if (sb->s_flags & SB_KERNMOUNT) { 432462306a36Sopenharmony_ci sbinfo->ino_batch = alloc_percpu(ino_t); 432562306a36Sopenharmony_ci if (!sbinfo->ino_batch) 432662306a36Sopenharmony_ci goto failed; 432762306a36Sopenharmony_ci } 432862306a36Sopenharmony_ci sbinfo->uid = ctx->uid; 432962306a36Sopenharmony_ci sbinfo->gid = ctx->gid; 433062306a36Sopenharmony_ci sbinfo->full_inums = ctx->full_inums; 433162306a36Sopenharmony_ci sbinfo->mode = ctx->mode; 433262306a36Sopenharmony_ci sbinfo->huge = ctx->huge; 433362306a36Sopenharmony_ci sbinfo->mpol = ctx->mpol; 433462306a36Sopenharmony_ci ctx->mpol = NULL; 433562306a36Sopenharmony_ci 433662306a36Sopenharmony_ci raw_spin_lock_init(&sbinfo->stat_lock); 433762306a36Sopenharmony_ci if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL)) 433862306a36Sopenharmony_ci goto failed; 433962306a36Sopenharmony_ci spin_lock_init(&sbinfo->shrinklist_lock); 434062306a36Sopenharmony_ci INIT_LIST_HEAD(&sbinfo->shrinklist); 434162306a36Sopenharmony_ci 434262306a36Sopenharmony_ci sb->s_maxbytes = MAX_LFS_FILESIZE; 434362306a36Sopenharmony_ci sb->s_blocksize = PAGE_SIZE; 434462306a36Sopenharmony_ci sb->s_blocksize_bits = PAGE_SHIFT; 434562306a36Sopenharmony_ci sb->s_magic = TMPFS_MAGIC; 434662306a36Sopenharmony_ci sb->s_op = &shmem_ops; 434762306a36Sopenharmony_ci sb->s_time_gran = 1; 434862306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_XATTR 434962306a36Sopenharmony_ci sb->s_xattr = shmem_xattr_handlers; 435062306a36Sopenharmony_ci#endif 435162306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_POSIX_ACL 435262306a36Sopenharmony_ci sb->s_flags |= SB_POSIXACL; 435362306a36Sopenharmony_ci#endif 435462306a36Sopenharmony_ci uuid_gen(&sb->s_uuid); 435562306a36Sopenharmony_ci 435662306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_QUOTA 435762306a36Sopenharmony_ci if (ctx->seen & SHMEM_SEEN_QUOTA) { 435862306a36Sopenharmony_ci sb->dq_op = &shmem_quota_operations; 435962306a36Sopenharmony_ci sb->s_qcop = &dquot_quotactl_sysfile_ops; 436062306a36Sopenharmony_ci sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; 436162306a36Sopenharmony_ci 436262306a36Sopenharmony_ci /* Copy the default limits from ctx into sbinfo */ 436362306a36Sopenharmony_ci memcpy(&sbinfo->qlimits, &ctx->qlimits, 436462306a36Sopenharmony_ci sizeof(struct shmem_quota_limits)); 436562306a36Sopenharmony_ci 436662306a36Sopenharmony_ci if (shmem_enable_quotas(sb, ctx->quota_types)) 436762306a36Sopenharmony_ci goto failed; 436862306a36Sopenharmony_ci } 436962306a36Sopenharmony_ci#endif /* CONFIG_TMPFS_QUOTA */ 437062306a36Sopenharmony_ci 437162306a36Sopenharmony_ci inode = shmem_get_inode(&nop_mnt_idmap, sb, NULL, S_IFDIR | sbinfo->mode, 0, 437262306a36Sopenharmony_ci VM_NORESERVE); 437362306a36Sopenharmony_ci if (IS_ERR(inode)) { 437462306a36Sopenharmony_ci error = PTR_ERR(inode); 437562306a36Sopenharmony_ci goto failed; 437662306a36Sopenharmony_ci } 437762306a36Sopenharmony_ci inode->i_uid = sbinfo->uid; 437862306a36Sopenharmony_ci inode->i_gid = sbinfo->gid; 437962306a36Sopenharmony_ci sb->s_root = d_make_root(inode); 438062306a36Sopenharmony_ci if (!sb->s_root) 438162306a36Sopenharmony_ci goto failed; 438262306a36Sopenharmony_ci return 0; 438362306a36Sopenharmony_ci 438462306a36Sopenharmony_cifailed: 438562306a36Sopenharmony_ci shmem_put_super(sb); 438662306a36Sopenharmony_ci return error; 438762306a36Sopenharmony_ci} 438862306a36Sopenharmony_ci 438962306a36Sopenharmony_cistatic int shmem_get_tree(struct fs_context *fc) 439062306a36Sopenharmony_ci{ 439162306a36Sopenharmony_ci return get_tree_nodev(fc, shmem_fill_super); 439262306a36Sopenharmony_ci} 439362306a36Sopenharmony_ci 439462306a36Sopenharmony_cistatic void shmem_free_fc(struct fs_context *fc) 439562306a36Sopenharmony_ci{ 439662306a36Sopenharmony_ci struct shmem_options *ctx = fc->fs_private; 439762306a36Sopenharmony_ci 439862306a36Sopenharmony_ci if (ctx) { 439962306a36Sopenharmony_ci mpol_put(ctx->mpol); 440062306a36Sopenharmony_ci kfree(ctx); 440162306a36Sopenharmony_ci } 440262306a36Sopenharmony_ci} 440362306a36Sopenharmony_ci 440462306a36Sopenharmony_cistatic const struct fs_context_operations shmem_fs_context_ops = { 440562306a36Sopenharmony_ci .free = shmem_free_fc, 440662306a36Sopenharmony_ci .get_tree = shmem_get_tree, 440762306a36Sopenharmony_ci#ifdef CONFIG_TMPFS 440862306a36Sopenharmony_ci .parse_monolithic = shmem_parse_options, 440962306a36Sopenharmony_ci .parse_param = shmem_parse_one, 441062306a36Sopenharmony_ci .reconfigure = shmem_reconfigure, 441162306a36Sopenharmony_ci#endif 441262306a36Sopenharmony_ci}; 441362306a36Sopenharmony_ci 441462306a36Sopenharmony_cistatic struct kmem_cache *shmem_inode_cachep; 441562306a36Sopenharmony_ci 441662306a36Sopenharmony_cistatic struct inode *shmem_alloc_inode(struct super_block *sb) 441762306a36Sopenharmony_ci{ 441862306a36Sopenharmony_ci struct shmem_inode_info *info; 441962306a36Sopenharmony_ci info = alloc_inode_sb(sb, shmem_inode_cachep, GFP_KERNEL); 442062306a36Sopenharmony_ci if (!info) 442162306a36Sopenharmony_ci return NULL; 442262306a36Sopenharmony_ci return &info->vfs_inode; 442362306a36Sopenharmony_ci} 442462306a36Sopenharmony_ci 442562306a36Sopenharmony_cistatic void shmem_free_in_core_inode(struct inode *inode) 442662306a36Sopenharmony_ci{ 442762306a36Sopenharmony_ci if (S_ISLNK(inode->i_mode)) 442862306a36Sopenharmony_ci kfree(inode->i_link); 442962306a36Sopenharmony_ci kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); 443062306a36Sopenharmony_ci} 443162306a36Sopenharmony_ci 443262306a36Sopenharmony_cistatic void shmem_destroy_inode(struct inode *inode) 443362306a36Sopenharmony_ci{ 443462306a36Sopenharmony_ci if (S_ISREG(inode->i_mode)) 443562306a36Sopenharmony_ci mpol_free_shared_policy(&SHMEM_I(inode)->policy); 443662306a36Sopenharmony_ci if (S_ISDIR(inode->i_mode)) 443762306a36Sopenharmony_ci simple_offset_destroy(shmem_get_offset_ctx(inode)); 443862306a36Sopenharmony_ci} 443962306a36Sopenharmony_ci 444062306a36Sopenharmony_cistatic void shmem_init_inode(void *foo) 444162306a36Sopenharmony_ci{ 444262306a36Sopenharmony_ci struct shmem_inode_info *info = foo; 444362306a36Sopenharmony_ci inode_init_once(&info->vfs_inode); 444462306a36Sopenharmony_ci} 444562306a36Sopenharmony_ci 444662306a36Sopenharmony_cistatic void shmem_init_inodecache(void) 444762306a36Sopenharmony_ci{ 444862306a36Sopenharmony_ci shmem_inode_cachep = kmem_cache_create("shmem_inode_cache", 444962306a36Sopenharmony_ci sizeof(struct shmem_inode_info), 445062306a36Sopenharmony_ci 0, SLAB_PANIC|SLAB_ACCOUNT, shmem_init_inode); 445162306a36Sopenharmony_ci} 445262306a36Sopenharmony_ci 445362306a36Sopenharmony_cistatic void shmem_destroy_inodecache(void) 445462306a36Sopenharmony_ci{ 445562306a36Sopenharmony_ci kmem_cache_destroy(shmem_inode_cachep); 445662306a36Sopenharmony_ci} 445762306a36Sopenharmony_ci 445862306a36Sopenharmony_ci/* Keep the page in page cache instead of truncating it */ 445962306a36Sopenharmony_cistatic int shmem_error_remove_page(struct address_space *mapping, 446062306a36Sopenharmony_ci struct page *page) 446162306a36Sopenharmony_ci{ 446262306a36Sopenharmony_ci return 0; 446362306a36Sopenharmony_ci} 446462306a36Sopenharmony_ci 446562306a36Sopenharmony_ciconst struct address_space_operations shmem_aops = { 446662306a36Sopenharmony_ci .writepage = shmem_writepage, 446762306a36Sopenharmony_ci .dirty_folio = noop_dirty_folio, 446862306a36Sopenharmony_ci#ifdef CONFIG_TMPFS 446962306a36Sopenharmony_ci .write_begin = shmem_write_begin, 447062306a36Sopenharmony_ci .write_end = shmem_write_end, 447162306a36Sopenharmony_ci#endif 447262306a36Sopenharmony_ci#ifdef CONFIG_MIGRATION 447362306a36Sopenharmony_ci .migrate_folio = migrate_folio, 447462306a36Sopenharmony_ci#endif 447562306a36Sopenharmony_ci .error_remove_page = shmem_error_remove_page, 447662306a36Sopenharmony_ci}; 447762306a36Sopenharmony_ciEXPORT_SYMBOL(shmem_aops); 447862306a36Sopenharmony_ci 447962306a36Sopenharmony_cistatic const struct file_operations shmem_file_operations = { 448062306a36Sopenharmony_ci .mmap = shmem_mmap, 448162306a36Sopenharmony_ci .open = shmem_file_open, 448262306a36Sopenharmony_ci .get_unmapped_area = shmem_get_unmapped_area, 448362306a36Sopenharmony_ci#ifdef CONFIG_TMPFS 448462306a36Sopenharmony_ci .llseek = shmem_file_llseek, 448562306a36Sopenharmony_ci .read_iter = shmem_file_read_iter, 448662306a36Sopenharmony_ci .write_iter = shmem_file_write_iter, 448762306a36Sopenharmony_ci .fsync = noop_fsync, 448862306a36Sopenharmony_ci .splice_read = shmem_file_splice_read, 448962306a36Sopenharmony_ci .splice_write = iter_file_splice_write, 449062306a36Sopenharmony_ci .fallocate = shmem_fallocate, 449162306a36Sopenharmony_ci#endif 449262306a36Sopenharmony_ci}; 449362306a36Sopenharmony_ci 449462306a36Sopenharmony_cistatic const struct inode_operations shmem_inode_operations = { 449562306a36Sopenharmony_ci .getattr = shmem_getattr, 449662306a36Sopenharmony_ci .setattr = shmem_setattr, 449762306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_XATTR 449862306a36Sopenharmony_ci .listxattr = shmem_listxattr, 449962306a36Sopenharmony_ci .set_acl = simple_set_acl, 450062306a36Sopenharmony_ci .fileattr_get = shmem_fileattr_get, 450162306a36Sopenharmony_ci .fileattr_set = shmem_fileattr_set, 450262306a36Sopenharmony_ci#endif 450362306a36Sopenharmony_ci}; 450462306a36Sopenharmony_ci 450562306a36Sopenharmony_cistatic const struct inode_operations shmem_dir_inode_operations = { 450662306a36Sopenharmony_ci#ifdef CONFIG_TMPFS 450762306a36Sopenharmony_ci .getattr = shmem_getattr, 450862306a36Sopenharmony_ci .create = shmem_create, 450962306a36Sopenharmony_ci .lookup = simple_lookup, 451062306a36Sopenharmony_ci .link = shmem_link, 451162306a36Sopenharmony_ci .unlink = shmem_unlink, 451262306a36Sopenharmony_ci .symlink = shmem_symlink, 451362306a36Sopenharmony_ci .mkdir = shmem_mkdir, 451462306a36Sopenharmony_ci .rmdir = shmem_rmdir, 451562306a36Sopenharmony_ci .mknod = shmem_mknod, 451662306a36Sopenharmony_ci .rename = shmem_rename2, 451762306a36Sopenharmony_ci .tmpfile = shmem_tmpfile, 451862306a36Sopenharmony_ci .get_offset_ctx = shmem_get_offset_ctx, 451962306a36Sopenharmony_ci#endif 452062306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_XATTR 452162306a36Sopenharmony_ci .listxattr = shmem_listxattr, 452262306a36Sopenharmony_ci .fileattr_get = shmem_fileattr_get, 452362306a36Sopenharmony_ci .fileattr_set = shmem_fileattr_set, 452462306a36Sopenharmony_ci#endif 452562306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_POSIX_ACL 452662306a36Sopenharmony_ci .setattr = shmem_setattr, 452762306a36Sopenharmony_ci .set_acl = simple_set_acl, 452862306a36Sopenharmony_ci#endif 452962306a36Sopenharmony_ci}; 453062306a36Sopenharmony_ci 453162306a36Sopenharmony_cistatic const struct inode_operations shmem_special_inode_operations = { 453262306a36Sopenharmony_ci .getattr = shmem_getattr, 453362306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_XATTR 453462306a36Sopenharmony_ci .listxattr = shmem_listxattr, 453562306a36Sopenharmony_ci#endif 453662306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_POSIX_ACL 453762306a36Sopenharmony_ci .setattr = shmem_setattr, 453862306a36Sopenharmony_ci .set_acl = simple_set_acl, 453962306a36Sopenharmony_ci#endif 454062306a36Sopenharmony_ci}; 454162306a36Sopenharmony_ci 454262306a36Sopenharmony_cistatic const struct super_operations shmem_ops = { 454362306a36Sopenharmony_ci .alloc_inode = shmem_alloc_inode, 454462306a36Sopenharmony_ci .free_inode = shmem_free_in_core_inode, 454562306a36Sopenharmony_ci .destroy_inode = shmem_destroy_inode, 454662306a36Sopenharmony_ci#ifdef CONFIG_TMPFS 454762306a36Sopenharmony_ci .statfs = shmem_statfs, 454862306a36Sopenharmony_ci .show_options = shmem_show_options, 454962306a36Sopenharmony_ci#endif 455062306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_QUOTA 455162306a36Sopenharmony_ci .get_dquots = shmem_get_dquots, 455262306a36Sopenharmony_ci#endif 455362306a36Sopenharmony_ci .evict_inode = shmem_evict_inode, 455462306a36Sopenharmony_ci .drop_inode = generic_delete_inode, 455562306a36Sopenharmony_ci .put_super = shmem_put_super, 455662306a36Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE 455762306a36Sopenharmony_ci .nr_cached_objects = shmem_unused_huge_count, 455862306a36Sopenharmony_ci .free_cached_objects = shmem_unused_huge_scan, 455962306a36Sopenharmony_ci#endif 456062306a36Sopenharmony_ci}; 456162306a36Sopenharmony_ci 456262306a36Sopenharmony_cistatic const struct vm_operations_struct shmem_vm_ops = { 456362306a36Sopenharmony_ci .fault = shmem_fault, 456462306a36Sopenharmony_ci .map_pages = filemap_map_pages, 456562306a36Sopenharmony_ci#ifdef CONFIG_NUMA 456662306a36Sopenharmony_ci .set_policy = shmem_set_policy, 456762306a36Sopenharmony_ci .get_policy = shmem_get_policy, 456862306a36Sopenharmony_ci#endif 456962306a36Sopenharmony_ci}; 457062306a36Sopenharmony_ci 457162306a36Sopenharmony_cistatic const struct vm_operations_struct shmem_anon_vm_ops = { 457262306a36Sopenharmony_ci .fault = shmem_fault, 457362306a36Sopenharmony_ci .map_pages = filemap_map_pages, 457462306a36Sopenharmony_ci#ifdef CONFIG_NUMA 457562306a36Sopenharmony_ci .set_policy = shmem_set_policy, 457662306a36Sopenharmony_ci .get_policy = shmem_get_policy, 457762306a36Sopenharmony_ci#endif 457862306a36Sopenharmony_ci}; 457962306a36Sopenharmony_ci 458062306a36Sopenharmony_ciint shmem_init_fs_context(struct fs_context *fc) 458162306a36Sopenharmony_ci{ 458262306a36Sopenharmony_ci struct shmem_options *ctx; 458362306a36Sopenharmony_ci 458462306a36Sopenharmony_ci ctx = kzalloc(sizeof(struct shmem_options), GFP_KERNEL); 458562306a36Sopenharmony_ci if (!ctx) 458662306a36Sopenharmony_ci return -ENOMEM; 458762306a36Sopenharmony_ci 458862306a36Sopenharmony_ci ctx->mode = 0777 | S_ISVTX; 458962306a36Sopenharmony_ci ctx->uid = current_fsuid(); 459062306a36Sopenharmony_ci ctx->gid = current_fsgid(); 459162306a36Sopenharmony_ci 459262306a36Sopenharmony_ci fc->fs_private = ctx; 459362306a36Sopenharmony_ci fc->ops = &shmem_fs_context_ops; 459462306a36Sopenharmony_ci return 0; 459562306a36Sopenharmony_ci} 459662306a36Sopenharmony_ci 459762306a36Sopenharmony_cistatic struct file_system_type shmem_fs_type = { 459862306a36Sopenharmony_ci .owner = THIS_MODULE, 459962306a36Sopenharmony_ci .name = "tmpfs", 460062306a36Sopenharmony_ci .init_fs_context = shmem_init_fs_context, 460162306a36Sopenharmony_ci#ifdef CONFIG_TMPFS 460262306a36Sopenharmony_ci .parameters = shmem_fs_parameters, 460362306a36Sopenharmony_ci#endif 460462306a36Sopenharmony_ci .kill_sb = kill_litter_super, 460562306a36Sopenharmony_ci#ifdef CONFIG_SHMEM 460662306a36Sopenharmony_ci .fs_flags = FS_USERNS_MOUNT | FS_ALLOW_IDMAP, 460762306a36Sopenharmony_ci#else 460862306a36Sopenharmony_ci .fs_flags = FS_USERNS_MOUNT, 460962306a36Sopenharmony_ci#endif 461062306a36Sopenharmony_ci}; 461162306a36Sopenharmony_ci 461262306a36Sopenharmony_civoid __init shmem_init(void) 461362306a36Sopenharmony_ci{ 461462306a36Sopenharmony_ci int error; 461562306a36Sopenharmony_ci 461662306a36Sopenharmony_ci shmem_init_inodecache(); 461762306a36Sopenharmony_ci 461862306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_QUOTA 461962306a36Sopenharmony_ci error = register_quota_format(&shmem_quota_format); 462062306a36Sopenharmony_ci if (error < 0) { 462162306a36Sopenharmony_ci pr_err("Could not register quota format\n"); 462262306a36Sopenharmony_ci goto out3; 462362306a36Sopenharmony_ci } 462462306a36Sopenharmony_ci#endif 462562306a36Sopenharmony_ci 462662306a36Sopenharmony_ci error = register_filesystem(&shmem_fs_type); 462762306a36Sopenharmony_ci if (error) { 462862306a36Sopenharmony_ci pr_err("Could not register tmpfs\n"); 462962306a36Sopenharmony_ci goto out2; 463062306a36Sopenharmony_ci } 463162306a36Sopenharmony_ci 463262306a36Sopenharmony_ci shm_mnt = kern_mount(&shmem_fs_type); 463362306a36Sopenharmony_ci if (IS_ERR(shm_mnt)) { 463462306a36Sopenharmony_ci error = PTR_ERR(shm_mnt); 463562306a36Sopenharmony_ci pr_err("Could not kern_mount tmpfs\n"); 463662306a36Sopenharmony_ci goto out1; 463762306a36Sopenharmony_ci } 463862306a36Sopenharmony_ci 463962306a36Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE 464062306a36Sopenharmony_ci if (has_transparent_hugepage() && shmem_huge > SHMEM_HUGE_DENY) 464162306a36Sopenharmony_ci SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge; 464262306a36Sopenharmony_ci else 464362306a36Sopenharmony_ci shmem_huge = SHMEM_HUGE_NEVER; /* just in case it was patched */ 464462306a36Sopenharmony_ci#endif 464562306a36Sopenharmony_ci return; 464662306a36Sopenharmony_ci 464762306a36Sopenharmony_ciout1: 464862306a36Sopenharmony_ci unregister_filesystem(&shmem_fs_type); 464962306a36Sopenharmony_ciout2: 465062306a36Sopenharmony_ci#ifdef CONFIG_TMPFS_QUOTA 465162306a36Sopenharmony_ci unregister_quota_format(&shmem_quota_format); 465262306a36Sopenharmony_ciout3: 465362306a36Sopenharmony_ci#endif 465462306a36Sopenharmony_ci shmem_destroy_inodecache(); 465562306a36Sopenharmony_ci shm_mnt = ERR_PTR(error); 465662306a36Sopenharmony_ci} 465762306a36Sopenharmony_ci 465862306a36Sopenharmony_ci#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_SYSFS) 465962306a36Sopenharmony_cistatic ssize_t shmem_enabled_show(struct kobject *kobj, 466062306a36Sopenharmony_ci struct kobj_attribute *attr, char *buf) 466162306a36Sopenharmony_ci{ 466262306a36Sopenharmony_ci static const int values[] = { 466362306a36Sopenharmony_ci SHMEM_HUGE_ALWAYS, 466462306a36Sopenharmony_ci SHMEM_HUGE_WITHIN_SIZE, 466562306a36Sopenharmony_ci SHMEM_HUGE_ADVISE, 466662306a36Sopenharmony_ci SHMEM_HUGE_NEVER, 466762306a36Sopenharmony_ci SHMEM_HUGE_DENY, 466862306a36Sopenharmony_ci SHMEM_HUGE_FORCE, 466962306a36Sopenharmony_ci }; 467062306a36Sopenharmony_ci int len = 0; 467162306a36Sopenharmony_ci int i; 467262306a36Sopenharmony_ci 467362306a36Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(values); i++) { 467462306a36Sopenharmony_ci len += sysfs_emit_at(buf, len, 467562306a36Sopenharmony_ci shmem_huge == values[i] ? "%s[%s]" : "%s%s", 467662306a36Sopenharmony_ci i ? " " : "", 467762306a36Sopenharmony_ci shmem_format_huge(values[i])); 467862306a36Sopenharmony_ci } 467962306a36Sopenharmony_ci 468062306a36Sopenharmony_ci len += sysfs_emit_at(buf, len, "\n"); 468162306a36Sopenharmony_ci 468262306a36Sopenharmony_ci return len; 468362306a36Sopenharmony_ci} 468462306a36Sopenharmony_ci 468562306a36Sopenharmony_cistatic ssize_t shmem_enabled_store(struct kobject *kobj, 468662306a36Sopenharmony_ci struct kobj_attribute *attr, const char *buf, size_t count) 468762306a36Sopenharmony_ci{ 468862306a36Sopenharmony_ci char tmp[16]; 468962306a36Sopenharmony_ci int huge; 469062306a36Sopenharmony_ci 469162306a36Sopenharmony_ci if (count + 1 > sizeof(tmp)) 469262306a36Sopenharmony_ci return -EINVAL; 469362306a36Sopenharmony_ci memcpy(tmp, buf, count); 469462306a36Sopenharmony_ci tmp[count] = '\0'; 469562306a36Sopenharmony_ci if (count && tmp[count - 1] == '\n') 469662306a36Sopenharmony_ci tmp[count - 1] = '\0'; 469762306a36Sopenharmony_ci 469862306a36Sopenharmony_ci huge = shmem_parse_huge(tmp); 469962306a36Sopenharmony_ci if (huge == -EINVAL) 470062306a36Sopenharmony_ci return -EINVAL; 470162306a36Sopenharmony_ci if (!has_transparent_hugepage() && 470262306a36Sopenharmony_ci huge != SHMEM_HUGE_NEVER && huge != SHMEM_HUGE_DENY) 470362306a36Sopenharmony_ci return -EINVAL; 470462306a36Sopenharmony_ci 470562306a36Sopenharmony_ci shmem_huge = huge; 470662306a36Sopenharmony_ci if (shmem_huge > SHMEM_HUGE_DENY) 470762306a36Sopenharmony_ci SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge; 470862306a36Sopenharmony_ci return count; 470962306a36Sopenharmony_ci} 471062306a36Sopenharmony_ci 471162306a36Sopenharmony_cistruct kobj_attribute shmem_enabled_attr = __ATTR_RW(shmem_enabled); 471262306a36Sopenharmony_ci#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_SYSFS */ 471362306a36Sopenharmony_ci 471462306a36Sopenharmony_ci#else /* !CONFIG_SHMEM */ 471562306a36Sopenharmony_ci 471662306a36Sopenharmony_ci/* 471762306a36Sopenharmony_ci * tiny-shmem: simple shmemfs and tmpfs using ramfs code 471862306a36Sopenharmony_ci * 471962306a36Sopenharmony_ci * This is intended for small system where the benefits of the full 472062306a36Sopenharmony_ci * shmem code (swap-backed and resource-limited) are outweighed by 472162306a36Sopenharmony_ci * their complexity. On systems without swap this code should be 472262306a36Sopenharmony_ci * effectively equivalent, but much lighter weight. 472362306a36Sopenharmony_ci */ 472462306a36Sopenharmony_ci 472562306a36Sopenharmony_cistatic struct file_system_type shmem_fs_type = { 472662306a36Sopenharmony_ci .name = "tmpfs", 472762306a36Sopenharmony_ci .init_fs_context = ramfs_init_fs_context, 472862306a36Sopenharmony_ci .parameters = ramfs_fs_parameters, 472962306a36Sopenharmony_ci .kill_sb = ramfs_kill_sb, 473062306a36Sopenharmony_ci .fs_flags = FS_USERNS_MOUNT, 473162306a36Sopenharmony_ci}; 473262306a36Sopenharmony_ci 473362306a36Sopenharmony_civoid __init shmem_init(void) 473462306a36Sopenharmony_ci{ 473562306a36Sopenharmony_ci BUG_ON(register_filesystem(&shmem_fs_type) != 0); 473662306a36Sopenharmony_ci 473762306a36Sopenharmony_ci shm_mnt = kern_mount(&shmem_fs_type); 473862306a36Sopenharmony_ci BUG_ON(IS_ERR(shm_mnt)); 473962306a36Sopenharmony_ci} 474062306a36Sopenharmony_ci 474162306a36Sopenharmony_ciint shmem_unuse(unsigned int type) 474262306a36Sopenharmony_ci{ 474362306a36Sopenharmony_ci return 0; 474462306a36Sopenharmony_ci} 474562306a36Sopenharmony_ci 474662306a36Sopenharmony_ciint shmem_lock(struct file *file, int lock, struct ucounts *ucounts) 474762306a36Sopenharmony_ci{ 474862306a36Sopenharmony_ci return 0; 474962306a36Sopenharmony_ci} 475062306a36Sopenharmony_ci 475162306a36Sopenharmony_civoid shmem_unlock_mapping(struct address_space *mapping) 475262306a36Sopenharmony_ci{ 475362306a36Sopenharmony_ci} 475462306a36Sopenharmony_ci 475562306a36Sopenharmony_ci#ifdef CONFIG_MMU 475662306a36Sopenharmony_ciunsigned long shmem_get_unmapped_area(struct file *file, 475762306a36Sopenharmony_ci unsigned long addr, unsigned long len, 475862306a36Sopenharmony_ci unsigned long pgoff, unsigned long flags) 475962306a36Sopenharmony_ci{ 476062306a36Sopenharmony_ci return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); 476162306a36Sopenharmony_ci} 476262306a36Sopenharmony_ci#endif 476362306a36Sopenharmony_ci 476462306a36Sopenharmony_civoid shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) 476562306a36Sopenharmony_ci{ 476662306a36Sopenharmony_ci truncate_inode_pages_range(inode->i_mapping, lstart, lend); 476762306a36Sopenharmony_ci} 476862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(shmem_truncate_range); 476962306a36Sopenharmony_ci 477062306a36Sopenharmony_ci#define shmem_vm_ops generic_file_vm_ops 477162306a36Sopenharmony_ci#define shmem_anon_vm_ops generic_file_vm_ops 477262306a36Sopenharmony_ci#define shmem_file_operations ramfs_file_operations 477362306a36Sopenharmony_ci#define shmem_acct_size(flags, size) 0 477462306a36Sopenharmony_ci#define shmem_unacct_size(flags, size) do {} while (0) 477562306a36Sopenharmony_ci 477662306a36Sopenharmony_cistatic inline struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block *sb, struct inode *dir, 477762306a36Sopenharmony_ci umode_t mode, dev_t dev, unsigned long flags) 477862306a36Sopenharmony_ci{ 477962306a36Sopenharmony_ci struct inode *inode = ramfs_get_inode(sb, dir, mode, dev); 478062306a36Sopenharmony_ci return inode ? inode : ERR_PTR(-ENOSPC); 478162306a36Sopenharmony_ci} 478262306a36Sopenharmony_ci 478362306a36Sopenharmony_ci#endif /* CONFIG_SHMEM */ 478462306a36Sopenharmony_ci 478562306a36Sopenharmony_ci/* common code */ 478662306a36Sopenharmony_ci 478762306a36Sopenharmony_cistatic struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, loff_t size, 478862306a36Sopenharmony_ci unsigned long flags, unsigned int i_flags) 478962306a36Sopenharmony_ci{ 479062306a36Sopenharmony_ci struct inode *inode; 479162306a36Sopenharmony_ci struct file *res; 479262306a36Sopenharmony_ci 479362306a36Sopenharmony_ci if (IS_ERR(mnt)) 479462306a36Sopenharmony_ci return ERR_CAST(mnt); 479562306a36Sopenharmony_ci 479662306a36Sopenharmony_ci if (size < 0 || size > MAX_LFS_FILESIZE) 479762306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 479862306a36Sopenharmony_ci 479962306a36Sopenharmony_ci if (shmem_acct_size(flags, size)) 480062306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 480162306a36Sopenharmony_ci 480262306a36Sopenharmony_ci if (is_idmapped_mnt(mnt)) 480362306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 480462306a36Sopenharmony_ci 480562306a36Sopenharmony_ci inode = shmem_get_inode(&nop_mnt_idmap, mnt->mnt_sb, NULL, 480662306a36Sopenharmony_ci S_IFREG | S_IRWXUGO, 0, flags); 480762306a36Sopenharmony_ci 480862306a36Sopenharmony_ci if (IS_ERR(inode)) { 480962306a36Sopenharmony_ci shmem_unacct_size(flags, size); 481062306a36Sopenharmony_ci return ERR_CAST(inode); 481162306a36Sopenharmony_ci } 481262306a36Sopenharmony_ci inode->i_flags |= i_flags; 481362306a36Sopenharmony_ci inode->i_size = size; 481462306a36Sopenharmony_ci clear_nlink(inode); /* It is unlinked */ 481562306a36Sopenharmony_ci res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size)); 481662306a36Sopenharmony_ci if (!IS_ERR(res)) 481762306a36Sopenharmony_ci res = alloc_file_pseudo(inode, mnt, name, O_RDWR, 481862306a36Sopenharmony_ci &shmem_file_operations); 481962306a36Sopenharmony_ci if (IS_ERR(res)) 482062306a36Sopenharmony_ci iput(inode); 482162306a36Sopenharmony_ci return res; 482262306a36Sopenharmony_ci} 482362306a36Sopenharmony_ci 482462306a36Sopenharmony_ci/** 482562306a36Sopenharmony_ci * shmem_kernel_file_setup - get an unlinked file living in tmpfs which must be 482662306a36Sopenharmony_ci * kernel internal. There will be NO LSM permission checks against the 482762306a36Sopenharmony_ci * underlying inode. So users of this interface must do LSM checks at a 482862306a36Sopenharmony_ci * higher layer. The users are the big_key and shm implementations. LSM 482962306a36Sopenharmony_ci * checks are provided at the key or shm level rather than the inode. 483062306a36Sopenharmony_ci * @name: name for dentry (to be seen in /proc/<pid>/maps 483162306a36Sopenharmony_ci * @size: size to be set for the file 483262306a36Sopenharmony_ci * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size 483362306a36Sopenharmony_ci */ 483462306a36Sopenharmony_cistruct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags) 483562306a36Sopenharmony_ci{ 483662306a36Sopenharmony_ci return __shmem_file_setup(shm_mnt, name, size, flags, S_PRIVATE); 483762306a36Sopenharmony_ci} 483862306a36Sopenharmony_ci 483962306a36Sopenharmony_ci/** 484062306a36Sopenharmony_ci * shmem_file_setup - get an unlinked file living in tmpfs 484162306a36Sopenharmony_ci * @name: name for dentry (to be seen in /proc/<pid>/maps 484262306a36Sopenharmony_ci * @size: size to be set for the file 484362306a36Sopenharmony_ci * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size 484462306a36Sopenharmony_ci */ 484562306a36Sopenharmony_cistruct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags) 484662306a36Sopenharmony_ci{ 484762306a36Sopenharmony_ci return __shmem_file_setup(shm_mnt, name, size, flags, 0); 484862306a36Sopenharmony_ci} 484962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(shmem_file_setup); 485062306a36Sopenharmony_ci 485162306a36Sopenharmony_ci/** 485262306a36Sopenharmony_ci * shmem_file_setup_with_mnt - get an unlinked file living in tmpfs 485362306a36Sopenharmony_ci * @mnt: the tmpfs mount where the file will be created 485462306a36Sopenharmony_ci * @name: name for dentry (to be seen in /proc/<pid>/maps 485562306a36Sopenharmony_ci * @size: size to be set for the file 485662306a36Sopenharmony_ci * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size 485762306a36Sopenharmony_ci */ 485862306a36Sopenharmony_cistruct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, const char *name, 485962306a36Sopenharmony_ci loff_t size, unsigned long flags) 486062306a36Sopenharmony_ci{ 486162306a36Sopenharmony_ci return __shmem_file_setup(mnt, name, size, flags, 0); 486262306a36Sopenharmony_ci} 486362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(shmem_file_setup_with_mnt); 486462306a36Sopenharmony_ci 486562306a36Sopenharmony_ci/** 486662306a36Sopenharmony_ci * shmem_zero_setup - setup a shared anonymous mapping 486762306a36Sopenharmony_ci * @vma: the vma to be mmapped is prepared by do_mmap 486862306a36Sopenharmony_ci */ 486962306a36Sopenharmony_ciint shmem_zero_setup(struct vm_area_struct *vma) 487062306a36Sopenharmony_ci{ 487162306a36Sopenharmony_ci struct file *file; 487262306a36Sopenharmony_ci loff_t size = vma->vm_end - vma->vm_start; 487362306a36Sopenharmony_ci 487462306a36Sopenharmony_ci /* 487562306a36Sopenharmony_ci * Cloning a new file under mmap_lock leads to a lock ordering conflict 487662306a36Sopenharmony_ci * between XFS directory reading and selinux: since this file is only 487762306a36Sopenharmony_ci * accessible to the user through its mapping, use S_PRIVATE flag to 487862306a36Sopenharmony_ci * bypass file security, in the same way as shmem_kernel_file_setup(). 487962306a36Sopenharmony_ci */ 488062306a36Sopenharmony_ci file = shmem_kernel_file_setup("dev/zero", size, vma->vm_flags); 488162306a36Sopenharmony_ci if (IS_ERR(file)) 488262306a36Sopenharmony_ci return PTR_ERR(file); 488362306a36Sopenharmony_ci 488462306a36Sopenharmony_ci if (vma->vm_file) 488562306a36Sopenharmony_ci fput(vma->vm_file); 488662306a36Sopenharmony_ci vma->vm_file = file; 488762306a36Sopenharmony_ci vma->vm_ops = &shmem_anon_vm_ops; 488862306a36Sopenharmony_ci 488962306a36Sopenharmony_ci return 0; 489062306a36Sopenharmony_ci} 489162306a36Sopenharmony_ci 489262306a36Sopenharmony_ci/** 489362306a36Sopenharmony_ci * shmem_read_folio_gfp - read into page cache, using specified page allocation flags. 489462306a36Sopenharmony_ci * @mapping: the folio's address_space 489562306a36Sopenharmony_ci * @index: the folio index 489662306a36Sopenharmony_ci * @gfp: the page allocator flags to use if allocating 489762306a36Sopenharmony_ci * 489862306a36Sopenharmony_ci * This behaves as a tmpfs "read_cache_page_gfp(mapping, index, gfp)", 489962306a36Sopenharmony_ci * with any new page allocations done using the specified allocation flags. 490062306a36Sopenharmony_ci * But read_cache_page_gfp() uses the ->read_folio() method: which does not 490162306a36Sopenharmony_ci * suit tmpfs, since it may have pages in swapcache, and needs to find those 490262306a36Sopenharmony_ci * for itself; although drivers/gpu/drm i915 and ttm rely upon this support. 490362306a36Sopenharmony_ci * 490462306a36Sopenharmony_ci * i915_gem_object_get_pages_gtt() mixes __GFP_NORETRY | __GFP_NOWARN in 490562306a36Sopenharmony_ci * with the mapping_gfp_mask(), to avoid OOMing the machine unnecessarily. 490662306a36Sopenharmony_ci */ 490762306a36Sopenharmony_cistruct folio *shmem_read_folio_gfp(struct address_space *mapping, 490862306a36Sopenharmony_ci pgoff_t index, gfp_t gfp) 490962306a36Sopenharmony_ci{ 491062306a36Sopenharmony_ci#ifdef CONFIG_SHMEM 491162306a36Sopenharmony_ci struct inode *inode = mapping->host; 491262306a36Sopenharmony_ci struct folio *folio; 491362306a36Sopenharmony_ci int error; 491462306a36Sopenharmony_ci 491562306a36Sopenharmony_ci BUG_ON(!shmem_mapping(mapping)); 491662306a36Sopenharmony_ci error = shmem_get_folio_gfp(inode, index, &folio, SGP_CACHE, 491762306a36Sopenharmony_ci gfp, NULL, NULL, NULL); 491862306a36Sopenharmony_ci if (error) 491962306a36Sopenharmony_ci return ERR_PTR(error); 492062306a36Sopenharmony_ci 492162306a36Sopenharmony_ci folio_unlock(folio); 492262306a36Sopenharmony_ci return folio; 492362306a36Sopenharmony_ci#else 492462306a36Sopenharmony_ci /* 492562306a36Sopenharmony_ci * The tiny !SHMEM case uses ramfs without swap 492662306a36Sopenharmony_ci */ 492762306a36Sopenharmony_ci return mapping_read_folio_gfp(mapping, index, gfp); 492862306a36Sopenharmony_ci#endif 492962306a36Sopenharmony_ci} 493062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(shmem_read_folio_gfp); 493162306a36Sopenharmony_ci 493262306a36Sopenharmony_cistruct page *shmem_read_mapping_page_gfp(struct address_space *mapping, 493362306a36Sopenharmony_ci pgoff_t index, gfp_t gfp) 493462306a36Sopenharmony_ci{ 493562306a36Sopenharmony_ci struct folio *folio = shmem_read_folio_gfp(mapping, index, gfp); 493662306a36Sopenharmony_ci struct page *page; 493762306a36Sopenharmony_ci 493862306a36Sopenharmony_ci if (IS_ERR(folio)) 493962306a36Sopenharmony_ci return &folio->page; 494062306a36Sopenharmony_ci 494162306a36Sopenharmony_ci page = folio_file_page(folio, index); 494262306a36Sopenharmony_ci if (PageHWPoison(page)) { 494362306a36Sopenharmony_ci folio_put(folio); 494462306a36Sopenharmony_ci return ERR_PTR(-EIO); 494562306a36Sopenharmony_ci } 494662306a36Sopenharmony_ci 494762306a36Sopenharmony_ci return page; 494862306a36Sopenharmony_ci} 494962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp); 4950