18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Resizable virtual memory filesystem for Linux. 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * Copyright (C) 2000 Linus Torvalds. 58c2ecf20Sopenharmony_ci * 2000 Transmeta Corp. 68c2ecf20Sopenharmony_ci * 2000-2001 Christoph Rohland 78c2ecf20Sopenharmony_ci * 2000-2001 SAP AG 88c2ecf20Sopenharmony_ci * 2002 Red Hat Inc. 98c2ecf20Sopenharmony_ci * Copyright (C) 2002-2011 Hugh Dickins. 108c2ecf20Sopenharmony_ci * Copyright (C) 2011 Google Inc. 118c2ecf20Sopenharmony_ci * Copyright (C) 2002-2005 VERITAS Software Corporation. 128c2ecf20Sopenharmony_ci * Copyright (C) 2004 Andi Kleen, SuSE Labs 138c2ecf20Sopenharmony_ci * 148c2ecf20Sopenharmony_ci * Extended attribute support for tmpfs: 158c2ecf20Sopenharmony_ci * Copyright (c) 2004, Luke Kenneth Casson Leighton <lkcl@lkcl.net> 168c2ecf20Sopenharmony_ci * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> 178c2ecf20Sopenharmony_ci * 188c2ecf20Sopenharmony_ci * tiny-shmem: 198c2ecf20Sopenharmony_ci * Copyright (c) 2004, 2008 Matt Mackall <mpm@selenic.com> 208c2ecf20Sopenharmony_ci * 218c2ecf20Sopenharmony_ci * This file is released under the GPL. 228c2ecf20Sopenharmony_ci */ 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_ci#include <linux/fs.h> 258c2ecf20Sopenharmony_ci#include <linux/init.h> 268c2ecf20Sopenharmony_ci#include <linux/vfs.h> 278c2ecf20Sopenharmony_ci#include <linux/mount.h> 288c2ecf20Sopenharmony_ci#include <linux/ramfs.h> 298c2ecf20Sopenharmony_ci#include <linux/pagemap.h> 308c2ecf20Sopenharmony_ci#include <linux/file.h> 318c2ecf20Sopenharmony_ci#include <linux/mm.h> 328c2ecf20Sopenharmony_ci#include <linux/random.h> 338c2ecf20Sopenharmony_ci#include <linux/sched/signal.h> 348c2ecf20Sopenharmony_ci#include <linux/export.h> 358c2ecf20Sopenharmony_ci#include <linux/swap.h> 368c2ecf20Sopenharmony_ci#include <linux/uio.h> 378c2ecf20Sopenharmony_ci#include <linux/khugepaged.h> 388c2ecf20Sopenharmony_ci#include <linux/hugetlb.h> 398c2ecf20Sopenharmony_ci#include <linux/frontswap.h> 408c2ecf20Sopenharmony_ci#include <linux/fs_parser.h> 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci#include <asm/tlbflush.h> /* for arch/microblaze update_mmu_cache() */ 438c2ecf20Sopenharmony_ci 448c2ecf20Sopenharmony_cistatic struct vfsmount *shm_mnt; 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_ci#ifdef CONFIG_SHMEM 478c2ecf20Sopenharmony_ci/* 488c2ecf20Sopenharmony_ci * This virtual memory filesystem is heavily based on the ramfs. It 498c2ecf20Sopenharmony_ci * extends ramfs by the ability to use swap and honor resource limits 508c2ecf20Sopenharmony_ci * which makes it a completely usable filesystem. 518c2ecf20Sopenharmony_ci */ 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_ci#include <linux/xattr.h> 548c2ecf20Sopenharmony_ci#include <linux/exportfs.h> 558c2ecf20Sopenharmony_ci#include <linux/posix_acl.h> 568c2ecf20Sopenharmony_ci#include <linux/posix_acl_xattr.h> 578c2ecf20Sopenharmony_ci#include <linux/mman.h> 588c2ecf20Sopenharmony_ci#include <linux/string.h> 598c2ecf20Sopenharmony_ci#include <linux/slab.h> 608c2ecf20Sopenharmony_ci#include <linux/backing-dev.h> 618c2ecf20Sopenharmony_ci#include <linux/shmem_fs.h> 628c2ecf20Sopenharmony_ci#include <linux/writeback.h> 638c2ecf20Sopenharmony_ci#include <linux/blkdev.h> 648c2ecf20Sopenharmony_ci#include <linux/pagevec.h> 658c2ecf20Sopenharmony_ci#include <linux/percpu_counter.h> 668c2ecf20Sopenharmony_ci#include <linux/falloc.h> 678c2ecf20Sopenharmony_ci#include <linux/splice.h> 688c2ecf20Sopenharmony_ci#include <linux/security.h> 698c2ecf20Sopenharmony_ci#include <linux/swapops.h> 708c2ecf20Sopenharmony_ci#include <linux/mempolicy.h> 718c2ecf20Sopenharmony_ci#include <linux/namei.h> 728c2ecf20Sopenharmony_ci#include <linux/ctype.h> 738c2ecf20Sopenharmony_ci#include <linux/migrate.h> 748c2ecf20Sopenharmony_ci#include <linux/highmem.h> 758c2ecf20Sopenharmony_ci#include <linux/seq_file.h> 768c2ecf20Sopenharmony_ci#include <linux/magic.h> 778c2ecf20Sopenharmony_ci#include <linux/syscalls.h> 788c2ecf20Sopenharmony_ci#include <linux/fcntl.h> 798c2ecf20Sopenharmony_ci#include <uapi/linux/memfd.h> 808c2ecf20Sopenharmony_ci#include <linux/userfaultfd_k.h> 818c2ecf20Sopenharmony_ci#include <linux/rmap.h> 828c2ecf20Sopenharmony_ci#include <linux/uuid.h> 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_ci#include <linux/uaccess.h> 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci#include "internal.h" 878c2ecf20Sopenharmony_ci 888c2ecf20Sopenharmony_ci#define BLOCKS_PER_PAGE (PAGE_SIZE/512) 898c2ecf20Sopenharmony_ci#define VM_ACCT(size) (PAGE_ALIGN(size) >> PAGE_SHIFT) 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_ci/* Pretend that each entry is of this size in directory's i_size */ 928c2ecf20Sopenharmony_ci#define BOGO_DIRENT_SIZE 20 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_ci/* Symlink up to this size is kmalloc'ed instead of using a swappable page */ 958c2ecf20Sopenharmony_ci#define SHORT_SYMLINK_LEN 128 968c2ecf20Sopenharmony_ci 978c2ecf20Sopenharmony_ci/* 988c2ecf20Sopenharmony_ci * shmem_fallocate communicates with shmem_fault or shmem_writepage via 998c2ecf20Sopenharmony_ci * inode->i_private (with i_mutex making sure that it has only one user at 1008c2ecf20Sopenharmony_ci * a time): we would prefer not to enlarge the shmem inode just for that. 1018c2ecf20Sopenharmony_ci */ 1028c2ecf20Sopenharmony_cistruct shmem_falloc { 1038c2ecf20Sopenharmony_ci wait_queue_head_t *waitq; /* faults into hole wait for punch to end */ 1048c2ecf20Sopenharmony_ci pgoff_t start; /* start of range currently being fallocated */ 1058c2ecf20Sopenharmony_ci pgoff_t next; /* the next page offset to be fallocated */ 1068c2ecf20Sopenharmony_ci pgoff_t nr_falloced; /* how many new pages have been fallocated */ 1078c2ecf20Sopenharmony_ci pgoff_t nr_unswapped; /* how often writepage refused to swap out */ 1088c2ecf20Sopenharmony_ci}; 1098c2ecf20Sopenharmony_ci 1108c2ecf20Sopenharmony_cistruct shmem_options { 1118c2ecf20Sopenharmony_ci unsigned long long blocks; 1128c2ecf20Sopenharmony_ci unsigned long long inodes; 1138c2ecf20Sopenharmony_ci struct mempolicy *mpol; 1148c2ecf20Sopenharmony_ci kuid_t uid; 1158c2ecf20Sopenharmony_ci kgid_t gid; 1168c2ecf20Sopenharmony_ci umode_t mode; 1178c2ecf20Sopenharmony_ci bool full_inums; 1188c2ecf20Sopenharmony_ci int huge; 1198c2ecf20Sopenharmony_ci int seen; 1208c2ecf20Sopenharmony_ci#define SHMEM_SEEN_BLOCKS 1 1218c2ecf20Sopenharmony_ci#define SHMEM_SEEN_INODES 2 1228c2ecf20Sopenharmony_ci#define SHMEM_SEEN_HUGE 4 1238c2ecf20Sopenharmony_ci#define SHMEM_SEEN_INUMS 8 1248c2ecf20Sopenharmony_ci}; 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_ci#ifdef CONFIG_TMPFS 1278c2ecf20Sopenharmony_cistatic unsigned long shmem_default_max_blocks(void) 1288c2ecf20Sopenharmony_ci{ 1298c2ecf20Sopenharmony_ci return totalram_pages() / 2; 1308c2ecf20Sopenharmony_ci} 1318c2ecf20Sopenharmony_ci 1328c2ecf20Sopenharmony_cistatic unsigned long shmem_default_max_inodes(void) 1338c2ecf20Sopenharmony_ci{ 1348c2ecf20Sopenharmony_ci unsigned long nr_pages = totalram_pages(); 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ci return min(nr_pages - totalhigh_pages(), nr_pages / 2); 1378c2ecf20Sopenharmony_ci} 1388c2ecf20Sopenharmony_ci#endif 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_cistatic bool shmem_should_replace_page(struct page *page, gfp_t gfp); 1418c2ecf20Sopenharmony_cistatic int shmem_replace_page(struct page **pagep, gfp_t gfp, 1428c2ecf20Sopenharmony_ci struct shmem_inode_info *info, pgoff_t index); 1438c2ecf20Sopenharmony_cistatic int shmem_swapin_page(struct inode *inode, pgoff_t index, 1448c2ecf20Sopenharmony_ci struct page **pagep, enum sgp_type sgp, 1458c2ecf20Sopenharmony_ci gfp_t gfp, struct vm_area_struct *vma, 1468c2ecf20Sopenharmony_ci vm_fault_t *fault_type); 1478c2ecf20Sopenharmony_cistatic int shmem_getpage_gfp(struct inode *inode, pgoff_t index, 1488c2ecf20Sopenharmony_ci struct page **pagep, enum sgp_type sgp, 1498c2ecf20Sopenharmony_ci gfp_t gfp, struct vm_area_struct *vma, 1508c2ecf20Sopenharmony_ci struct vm_fault *vmf, vm_fault_t *fault_type); 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_ciint shmem_getpage(struct inode *inode, pgoff_t index, 1538c2ecf20Sopenharmony_ci struct page **pagep, enum sgp_type sgp) 1548c2ecf20Sopenharmony_ci{ 1558c2ecf20Sopenharmony_ci return shmem_getpage_gfp(inode, index, pagep, sgp, 1568c2ecf20Sopenharmony_ci mapping_gfp_mask(inode->i_mapping), NULL, NULL, NULL); 1578c2ecf20Sopenharmony_ci} 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_cistatic inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) 1608c2ecf20Sopenharmony_ci{ 1618c2ecf20Sopenharmony_ci return sb->s_fs_info; 1628c2ecf20Sopenharmony_ci} 1638c2ecf20Sopenharmony_ci 1648c2ecf20Sopenharmony_ci/* 1658c2ecf20Sopenharmony_ci * shmem_file_setup pre-accounts the whole fixed size of a VM object, 1668c2ecf20Sopenharmony_ci * for shared memory and for shared anonymous (/dev/zero) mappings 1678c2ecf20Sopenharmony_ci * (unless MAP_NORESERVE and sysctl_overcommit_memory <= 1), 1688c2ecf20Sopenharmony_ci * consistent with the pre-accounting of private mappings ... 1698c2ecf20Sopenharmony_ci */ 1708c2ecf20Sopenharmony_cistatic inline int shmem_acct_size(unsigned long flags, loff_t size) 1718c2ecf20Sopenharmony_ci{ 1728c2ecf20Sopenharmony_ci return (flags & VM_NORESERVE) ? 1738c2ecf20Sopenharmony_ci 0 : security_vm_enough_memory_mm(current->mm, VM_ACCT(size)); 1748c2ecf20Sopenharmony_ci} 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_cistatic inline void shmem_unacct_size(unsigned long flags, loff_t size) 1778c2ecf20Sopenharmony_ci{ 1788c2ecf20Sopenharmony_ci if (!(flags & VM_NORESERVE)) 1798c2ecf20Sopenharmony_ci vm_unacct_memory(VM_ACCT(size)); 1808c2ecf20Sopenharmony_ci} 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_cistatic inline int shmem_reacct_size(unsigned long flags, 1838c2ecf20Sopenharmony_ci loff_t oldsize, loff_t newsize) 1848c2ecf20Sopenharmony_ci{ 1858c2ecf20Sopenharmony_ci if (!(flags & VM_NORESERVE)) { 1868c2ecf20Sopenharmony_ci if (VM_ACCT(newsize) > VM_ACCT(oldsize)) 1878c2ecf20Sopenharmony_ci return security_vm_enough_memory_mm(current->mm, 1888c2ecf20Sopenharmony_ci VM_ACCT(newsize) - VM_ACCT(oldsize)); 1898c2ecf20Sopenharmony_ci else if (VM_ACCT(newsize) < VM_ACCT(oldsize)) 1908c2ecf20Sopenharmony_ci vm_unacct_memory(VM_ACCT(oldsize) - VM_ACCT(newsize)); 1918c2ecf20Sopenharmony_ci } 1928c2ecf20Sopenharmony_ci return 0; 1938c2ecf20Sopenharmony_ci} 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci/* 1968c2ecf20Sopenharmony_ci * ... whereas tmpfs objects are accounted incrementally as 1978c2ecf20Sopenharmony_ci * pages are allocated, in order to allow large sparse files. 1988c2ecf20Sopenharmony_ci * shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM, 1998c2ecf20Sopenharmony_ci * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM. 2008c2ecf20Sopenharmony_ci */ 2018c2ecf20Sopenharmony_cistatic inline int shmem_acct_block(unsigned long flags, long pages) 2028c2ecf20Sopenharmony_ci{ 2038c2ecf20Sopenharmony_ci if (!(flags & VM_NORESERVE)) 2048c2ecf20Sopenharmony_ci return 0; 2058c2ecf20Sopenharmony_ci 2068c2ecf20Sopenharmony_ci return security_vm_enough_memory_mm(current->mm, 2078c2ecf20Sopenharmony_ci pages * VM_ACCT(PAGE_SIZE)); 2088c2ecf20Sopenharmony_ci} 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_cistatic inline void shmem_unacct_blocks(unsigned long flags, long pages) 2118c2ecf20Sopenharmony_ci{ 2128c2ecf20Sopenharmony_ci if (flags & VM_NORESERVE) 2138c2ecf20Sopenharmony_ci vm_unacct_memory(pages * VM_ACCT(PAGE_SIZE)); 2148c2ecf20Sopenharmony_ci} 2158c2ecf20Sopenharmony_ci 2168c2ecf20Sopenharmony_cistatic inline bool shmem_inode_acct_block(struct inode *inode, long pages) 2178c2ecf20Sopenharmony_ci{ 2188c2ecf20Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 2198c2ecf20Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_ci if (shmem_acct_block(info->flags, pages)) 2228c2ecf20Sopenharmony_ci return false; 2238c2ecf20Sopenharmony_ci 2248c2ecf20Sopenharmony_ci if (sbinfo->max_blocks) { 2258c2ecf20Sopenharmony_ci if (percpu_counter_compare(&sbinfo->used_blocks, 2268c2ecf20Sopenharmony_ci sbinfo->max_blocks - pages) > 0) 2278c2ecf20Sopenharmony_ci goto unacct; 2288c2ecf20Sopenharmony_ci percpu_counter_add(&sbinfo->used_blocks, pages); 2298c2ecf20Sopenharmony_ci } 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_ci return true; 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_ciunacct: 2348c2ecf20Sopenharmony_ci shmem_unacct_blocks(info->flags, pages); 2358c2ecf20Sopenharmony_ci return false; 2368c2ecf20Sopenharmony_ci} 2378c2ecf20Sopenharmony_ci 2388c2ecf20Sopenharmony_cistatic inline void shmem_inode_unacct_blocks(struct inode *inode, long pages) 2398c2ecf20Sopenharmony_ci{ 2408c2ecf20Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 2418c2ecf20Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 2428c2ecf20Sopenharmony_ci 2438c2ecf20Sopenharmony_ci if (sbinfo->max_blocks) 2448c2ecf20Sopenharmony_ci percpu_counter_sub(&sbinfo->used_blocks, pages); 2458c2ecf20Sopenharmony_ci shmem_unacct_blocks(info->flags, pages); 2468c2ecf20Sopenharmony_ci} 2478c2ecf20Sopenharmony_ci 2488c2ecf20Sopenharmony_cistatic const struct super_operations shmem_ops; 2498c2ecf20Sopenharmony_cistatic const struct address_space_operations shmem_aops; 2508c2ecf20Sopenharmony_cistatic const struct file_operations shmem_file_operations; 2518c2ecf20Sopenharmony_cistatic const struct inode_operations shmem_inode_operations; 2528c2ecf20Sopenharmony_cistatic const struct inode_operations shmem_dir_inode_operations; 2538c2ecf20Sopenharmony_cistatic const struct inode_operations shmem_special_inode_operations; 2548c2ecf20Sopenharmony_cistatic const struct vm_operations_struct shmem_vm_ops; 2558c2ecf20Sopenharmony_cistatic struct file_system_type shmem_fs_type; 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_cibool vma_is_shmem(struct vm_area_struct *vma) 2588c2ecf20Sopenharmony_ci{ 2598c2ecf20Sopenharmony_ci return vma->vm_ops == &shmem_vm_ops; 2608c2ecf20Sopenharmony_ci} 2618c2ecf20Sopenharmony_ci 2628c2ecf20Sopenharmony_cistatic LIST_HEAD(shmem_swaplist); 2638c2ecf20Sopenharmony_cistatic DEFINE_MUTEX(shmem_swaplist_mutex); 2648c2ecf20Sopenharmony_ci 2658c2ecf20Sopenharmony_ci/* 2668c2ecf20Sopenharmony_ci * shmem_reserve_inode() performs bookkeeping to reserve a shmem inode, and 2678c2ecf20Sopenharmony_ci * produces a novel ino for the newly allocated inode. 2688c2ecf20Sopenharmony_ci * 2698c2ecf20Sopenharmony_ci * It may also be called when making a hard link to permit the space needed by 2708c2ecf20Sopenharmony_ci * each dentry. However, in that case, no new inode number is needed since that 2718c2ecf20Sopenharmony_ci * internally draws from another pool of inode numbers (currently global 2728c2ecf20Sopenharmony_ci * get_next_ino()). This case is indicated by passing NULL as inop. 2738c2ecf20Sopenharmony_ci */ 2748c2ecf20Sopenharmony_ci#define SHMEM_INO_BATCH 1024 2758c2ecf20Sopenharmony_cistatic int shmem_reserve_inode(struct super_block *sb, ino_t *inop) 2768c2ecf20Sopenharmony_ci{ 2778c2ecf20Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 2788c2ecf20Sopenharmony_ci ino_t ino; 2798c2ecf20Sopenharmony_ci 2808c2ecf20Sopenharmony_ci if (!(sb->s_flags & SB_KERNMOUNT)) { 2818c2ecf20Sopenharmony_ci spin_lock(&sbinfo->stat_lock); 2828c2ecf20Sopenharmony_ci if (sbinfo->max_inodes) { 2838c2ecf20Sopenharmony_ci if (!sbinfo->free_inodes) { 2848c2ecf20Sopenharmony_ci spin_unlock(&sbinfo->stat_lock); 2858c2ecf20Sopenharmony_ci return -ENOSPC; 2868c2ecf20Sopenharmony_ci } 2878c2ecf20Sopenharmony_ci sbinfo->free_inodes--; 2888c2ecf20Sopenharmony_ci } 2898c2ecf20Sopenharmony_ci if (inop) { 2908c2ecf20Sopenharmony_ci ino = sbinfo->next_ino++; 2918c2ecf20Sopenharmony_ci if (unlikely(is_zero_ino(ino))) 2928c2ecf20Sopenharmony_ci ino = sbinfo->next_ino++; 2938c2ecf20Sopenharmony_ci if (unlikely(!sbinfo->full_inums && 2948c2ecf20Sopenharmony_ci ino > UINT_MAX)) { 2958c2ecf20Sopenharmony_ci /* 2968c2ecf20Sopenharmony_ci * Emulate get_next_ino uint wraparound for 2978c2ecf20Sopenharmony_ci * compatibility 2988c2ecf20Sopenharmony_ci */ 2998c2ecf20Sopenharmony_ci if (IS_ENABLED(CONFIG_64BIT)) 3008c2ecf20Sopenharmony_ci pr_warn("%s: inode number overflow on device %d, consider using inode64 mount option\n", 3018c2ecf20Sopenharmony_ci __func__, MINOR(sb->s_dev)); 3028c2ecf20Sopenharmony_ci sbinfo->next_ino = 1; 3038c2ecf20Sopenharmony_ci ino = sbinfo->next_ino++; 3048c2ecf20Sopenharmony_ci } 3058c2ecf20Sopenharmony_ci *inop = ino; 3068c2ecf20Sopenharmony_ci } 3078c2ecf20Sopenharmony_ci spin_unlock(&sbinfo->stat_lock); 3088c2ecf20Sopenharmony_ci } else if (inop) { 3098c2ecf20Sopenharmony_ci /* 3108c2ecf20Sopenharmony_ci * __shmem_file_setup, one of our callers, is lock-free: it 3118c2ecf20Sopenharmony_ci * doesn't hold stat_lock in shmem_reserve_inode since 3128c2ecf20Sopenharmony_ci * max_inodes is always 0, and is called from potentially 3138c2ecf20Sopenharmony_ci * unknown contexts. As such, use a per-cpu batched allocator 3148c2ecf20Sopenharmony_ci * which doesn't require the per-sb stat_lock unless we are at 3158c2ecf20Sopenharmony_ci * the batch boundary. 3168c2ecf20Sopenharmony_ci * 3178c2ecf20Sopenharmony_ci * We don't need to worry about inode{32,64} since SB_KERNMOUNT 3188c2ecf20Sopenharmony_ci * shmem mounts are not exposed to userspace, so we don't need 3198c2ecf20Sopenharmony_ci * to worry about things like glibc compatibility. 3208c2ecf20Sopenharmony_ci */ 3218c2ecf20Sopenharmony_ci ino_t *next_ino; 3228c2ecf20Sopenharmony_ci next_ino = per_cpu_ptr(sbinfo->ino_batch, get_cpu()); 3238c2ecf20Sopenharmony_ci ino = *next_ino; 3248c2ecf20Sopenharmony_ci if (unlikely(ino % SHMEM_INO_BATCH == 0)) { 3258c2ecf20Sopenharmony_ci spin_lock(&sbinfo->stat_lock); 3268c2ecf20Sopenharmony_ci ino = sbinfo->next_ino; 3278c2ecf20Sopenharmony_ci sbinfo->next_ino += SHMEM_INO_BATCH; 3288c2ecf20Sopenharmony_ci spin_unlock(&sbinfo->stat_lock); 3298c2ecf20Sopenharmony_ci if (unlikely(is_zero_ino(ino))) 3308c2ecf20Sopenharmony_ci ino++; 3318c2ecf20Sopenharmony_ci } 3328c2ecf20Sopenharmony_ci *inop = ino; 3338c2ecf20Sopenharmony_ci *next_ino = ++ino; 3348c2ecf20Sopenharmony_ci put_cpu(); 3358c2ecf20Sopenharmony_ci } 3368c2ecf20Sopenharmony_ci 3378c2ecf20Sopenharmony_ci return 0; 3388c2ecf20Sopenharmony_ci} 3398c2ecf20Sopenharmony_ci 3408c2ecf20Sopenharmony_cistatic void shmem_free_inode(struct super_block *sb) 3418c2ecf20Sopenharmony_ci{ 3428c2ecf20Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 3438c2ecf20Sopenharmony_ci if (sbinfo->max_inodes) { 3448c2ecf20Sopenharmony_ci spin_lock(&sbinfo->stat_lock); 3458c2ecf20Sopenharmony_ci sbinfo->free_inodes++; 3468c2ecf20Sopenharmony_ci spin_unlock(&sbinfo->stat_lock); 3478c2ecf20Sopenharmony_ci } 3488c2ecf20Sopenharmony_ci} 3498c2ecf20Sopenharmony_ci 3508c2ecf20Sopenharmony_ci/** 3518c2ecf20Sopenharmony_ci * shmem_recalc_inode - recalculate the block usage of an inode 3528c2ecf20Sopenharmony_ci * @inode: inode to recalc 3538c2ecf20Sopenharmony_ci * 3548c2ecf20Sopenharmony_ci * We have to calculate the free blocks since the mm can drop 3558c2ecf20Sopenharmony_ci * undirtied hole pages behind our back. 3568c2ecf20Sopenharmony_ci * 3578c2ecf20Sopenharmony_ci * But normally info->alloced == inode->i_mapping->nrpages + info->swapped 3588c2ecf20Sopenharmony_ci * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped) 3598c2ecf20Sopenharmony_ci * 3608c2ecf20Sopenharmony_ci * It has to be called with the spinlock held. 3618c2ecf20Sopenharmony_ci */ 3628c2ecf20Sopenharmony_cistatic void shmem_recalc_inode(struct inode *inode) 3638c2ecf20Sopenharmony_ci{ 3648c2ecf20Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 3658c2ecf20Sopenharmony_ci long freed; 3668c2ecf20Sopenharmony_ci 3678c2ecf20Sopenharmony_ci freed = info->alloced - info->swapped - inode->i_mapping->nrpages; 3688c2ecf20Sopenharmony_ci if (freed > 0) { 3698c2ecf20Sopenharmony_ci info->alloced -= freed; 3708c2ecf20Sopenharmony_ci inode->i_blocks -= freed * BLOCKS_PER_PAGE; 3718c2ecf20Sopenharmony_ci shmem_inode_unacct_blocks(inode, freed); 3728c2ecf20Sopenharmony_ci } 3738c2ecf20Sopenharmony_ci} 3748c2ecf20Sopenharmony_ci 3758c2ecf20Sopenharmony_cibool shmem_charge(struct inode *inode, long pages) 3768c2ecf20Sopenharmony_ci{ 3778c2ecf20Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 3788c2ecf20Sopenharmony_ci unsigned long flags; 3798c2ecf20Sopenharmony_ci 3808c2ecf20Sopenharmony_ci if (!shmem_inode_acct_block(inode, pages)) 3818c2ecf20Sopenharmony_ci return false; 3828c2ecf20Sopenharmony_ci 3838c2ecf20Sopenharmony_ci /* nrpages adjustment first, then shmem_recalc_inode() when balanced */ 3848c2ecf20Sopenharmony_ci inode->i_mapping->nrpages += pages; 3858c2ecf20Sopenharmony_ci 3868c2ecf20Sopenharmony_ci spin_lock_irqsave(&info->lock, flags); 3878c2ecf20Sopenharmony_ci info->alloced += pages; 3888c2ecf20Sopenharmony_ci inode->i_blocks += pages * BLOCKS_PER_PAGE; 3898c2ecf20Sopenharmony_ci shmem_recalc_inode(inode); 3908c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&info->lock, flags); 3918c2ecf20Sopenharmony_ci 3928c2ecf20Sopenharmony_ci return true; 3938c2ecf20Sopenharmony_ci} 3948c2ecf20Sopenharmony_ci 3958c2ecf20Sopenharmony_civoid shmem_uncharge(struct inode *inode, long pages) 3968c2ecf20Sopenharmony_ci{ 3978c2ecf20Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 3988c2ecf20Sopenharmony_ci unsigned long flags; 3998c2ecf20Sopenharmony_ci 4008c2ecf20Sopenharmony_ci /* nrpages adjustment done by __delete_from_page_cache() or caller */ 4018c2ecf20Sopenharmony_ci 4028c2ecf20Sopenharmony_ci spin_lock_irqsave(&info->lock, flags); 4038c2ecf20Sopenharmony_ci info->alloced -= pages; 4048c2ecf20Sopenharmony_ci inode->i_blocks -= pages * BLOCKS_PER_PAGE; 4058c2ecf20Sopenharmony_ci shmem_recalc_inode(inode); 4068c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&info->lock, flags); 4078c2ecf20Sopenharmony_ci 4088c2ecf20Sopenharmony_ci shmem_inode_unacct_blocks(inode, pages); 4098c2ecf20Sopenharmony_ci} 4108c2ecf20Sopenharmony_ci 4118c2ecf20Sopenharmony_ci/* 4128c2ecf20Sopenharmony_ci * Replace item expected in xarray by a new item, while holding xa_lock. 4138c2ecf20Sopenharmony_ci */ 4148c2ecf20Sopenharmony_cistatic int shmem_replace_entry(struct address_space *mapping, 4158c2ecf20Sopenharmony_ci pgoff_t index, void *expected, void *replacement) 4168c2ecf20Sopenharmony_ci{ 4178c2ecf20Sopenharmony_ci XA_STATE(xas, &mapping->i_pages, index); 4188c2ecf20Sopenharmony_ci void *item; 4198c2ecf20Sopenharmony_ci 4208c2ecf20Sopenharmony_ci VM_BUG_ON(!expected); 4218c2ecf20Sopenharmony_ci VM_BUG_ON(!replacement); 4228c2ecf20Sopenharmony_ci item = xas_load(&xas); 4238c2ecf20Sopenharmony_ci if (item != expected) 4248c2ecf20Sopenharmony_ci return -ENOENT; 4258c2ecf20Sopenharmony_ci xas_store(&xas, replacement); 4268c2ecf20Sopenharmony_ci return 0; 4278c2ecf20Sopenharmony_ci} 4288c2ecf20Sopenharmony_ci 4298c2ecf20Sopenharmony_ci/* 4308c2ecf20Sopenharmony_ci * Sometimes, before we decide whether to proceed or to fail, we must check 4318c2ecf20Sopenharmony_ci * that an entry was not already brought back from swap by a racing thread. 4328c2ecf20Sopenharmony_ci * 4338c2ecf20Sopenharmony_ci * Checking page is not enough: by the time a SwapCache page is locked, it 4348c2ecf20Sopenharmony_ci * might be reused, and again be SwapCache, using the same swap as before. 4358c2ecf20Sopenharmony_ci */ 4368c2ecf20Sopenharmony_cistatic bool shmem_confirm_swap(struct address_space *mapping, 4378c2ecf20Sopenharmony_ci pgoff_t index, swp_entry_t swap) 4388c2ecf20Sopenharmony_ci{ 4398c2ecf20Sopenharmony_ci return xa_load(&mapping->i_pages, index) == swp_to_radix_entry(swap); 4408c2ecf20Sopenharmony_ci} 4418c2ecf20Sopenharmony_ci 4428c2ecf20Sopenharmony_ci/* 4438c2ecf20Sopenharmony_ci * Definitions for "huge tmpfs": tmpfs mounted with the huge= option 4448c2ecf20Sopenharmony_ci * 4458c2ecf20Sopenharmony_ci * SHMEM_HUGE_NEVER: 4468c2ecf20Sopenharmony_ci * disables huge pages for the mount; 4478c2ecf20Sopenharmony_ci * SHMEM_HUGE_ALWAYS: 4488c2ecf20Sopenharmony_ci * enables huge pages for the mount; 4498c2ecf20Sopenharmony_ci * SHMEM_HUGE_WITHIN_SIZE: 4508c2ecf20Sopenharmony_ci * only allocate huge pages if the page will be fully within i_size, 4518c2ecf20Sopenharmony_ci * also respect fadvise()/madvise() hints; 4528c2ecf20Sopenharmony_ci * SHMEM_HUGE_ADVISE: 4538c2ecf20Sopenharmony_ci * only allocate huge pages if requested with fadvise()/madvise(); 4548c2ecf20Sopenharmony_ci */ 4558c2ecf20Sopenharmony_ci 4568c2ecf20Sopenharmony_ci#define SHMEM_HUGE_NEVER 0 4578c2ecf20Sopenharmony_ci#define SHMEM_HUGE_ALWAYS 1 4588c2ecf20Sopenharmony_ci#define SHMEM_HUGE_WITHIN_SIZE 2 4598c2ecf20Sopenharmony_ci#define SHMEM_HUGE_ADVISE 3 4608c2ecf20Sopenharmony_ci 4618c2ecf20Sopenharmony_ci/* 4628c2ecf20Sopenharmony_ci * Special values. 4638c2ecf20Sopenharmony_ci * Only can be set via /sys/kernel/mm/transparent_hugepage/shmem_enabled: 4648c2ecf20Sopenharmony_ci * 4658c2ecf20Sopenharmony_ci * SHMEM_HUGE_DENY: 4668c2ecf20Sopenharmony_ci * disables huge on shm_mnt and all mounts, for emergency use; 4678c2ecf20Sopenharmony_ci * SHMEM_HUGE_FORCE: 4688c2ecf20Sopenharmony_ci * enables huge on shm_mnt and all mounts, w/o needing option, for testing; 4698c2ecf20Sopenharmony_ci * 4708c2ecf20Sopenharmony_ci */ 4718c2ecf20Sopenharmony_ci#define SHMEM_HUGE_DENY (-1) 4728c2ecf20Sopenharmony_ci#define SHMEM_HUGE_FORCE (-2) 4738c2ecf20Sopenharmony_ci 4748c2ecf20Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE 4758c2ecf20Sopenharmony_ci/* ifdef here to avoid bloating shmem.o when not necessary */ 4768c2ecf20Sopenharmony_ci 4778c2ecf20Sopenharmony_cistatic int shmem_huge __read_mostly; 4788c2ecf20Sopenharmony_ci 4798c2ecf20Sopenharmony_ci#if defined(CONFIG_SYSFS) 4808c2ecf20Sopenharmony_cistatic int shmem_parse_huge(const char *str) 4818c2ecf20Sopenharmony_ci{ 4828c2ecf20Sopenharmony_ci if (!strcmp(str, "never")) 4838c2ecf20Sopenharmony_ci return SHMEM_HUGE_NEVER; 4848c2ecf20Sopenharmony_ci if (!strcmp(str, "always")) 4858c2ecf20Sopenharmony_ci return SHMEM_HUGE_ALWAYS; 4868c2ecf20Sopenharmony_ci if (!strcmp(str, "within_size")) 4878c2ecf20Sopenharmony_ci return SHMEM_HUGE_WITHIN_SIZE; 4888c2ecf20Sopenharmony_ci if (!strcmp(str, "advise")) 4898c2ecf20Sopenharmony_ci return SHMEM_HUGE_ADVISE; 4908c2ecf20Sopenharmony_ci if (!strcmp(str, "deny")) 4918c2ecf20Sopenharmony_ci return SHMEM_HUGE_DENY; 4928c2ecf20Sopenharmony_ci if (!strcmp(str, "force")) 4938c2ecf20Sopenharmony_ci return SHMEM_HUGE_FORCE; 4948c2ecf20Sopenharmony_ci return -EINVAL; 4958c2ecf20Sopenharmony_ci} 4968c2ecf20Sopenharmony_ci#endif 4978c2ecf20Sopenharmony_ci 4988c2ecf20Sopenharmony_ci#if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS) 4998c2ecf20Sopenharmony_cistatic const char *shmem_format_huge(int huge) 5008c2ecf20Sopenharmony_ci{ 5018c2ecf20Sopenharmony_ci switch (huge) { 5028c2ecf20Sopenharmony_ci case SHMEM_HUGE_NEVER: 5038c2ecf20Sopenharmony_ci return "never"; 5048c2ecf20Sopenharmony_ci case SHMEM_HUGE_ALWAYS: 5058c2ecf20Sopenharmony_ci return "always"; 5068c2ecf20Sopenharmony_ci case SHMEM_HUGE_WITHIN_SIZE: 5078c2ecf20Sopenharmony_ci return "within_size"; 5088c2ecf20Sopenharmony_ci case SHMEM_HUGE_ADVISE: 5098c2ecf20Sopenharmony_ci return "advise"; 5108c2ecf20Sopenharmony_ci case SHMEM_HUGE_DENY: 5118c2ecf20Sopenharmony_ci return "deny"; 5128c2ecf20Sopenharmony_ci case SHMEM_HUGE_FORCE: 5138c2ecf20Sopenharmony_ci return "force"; 5148c2ecf20Sopenharmony_ci default: 5158c2ecf20Sopenharmony_ci VM_BUG_ON(1); 5168c2ecf20Sopenharmony_ci return "bad_val"; 5178c2ecf20Sopenharmony_ci } 5188c2ecf20Sopenharmony_ci} 5198c2ecf20Sopenharmony_ci#endif 5208c2ecf20Sopenharmony_ci 5218c2ecf20Sopenharmony_cistatic unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, 5228c2ecf20Sopenharmony_ci struct shrink_control *sc, unsigned long nr_to_split) 5238c2ecf20Sopenharmony_ci{ 5248c2ecf20Sopenharmony_ci LIST_HEAD(list), *pos, *next; 5258c2ecf20Sopenharmony_ci LIST_HEAD(to_remove); 5268c2ecf20Sopenharmony_ci struct inode *inode; 5278c2ecf20Sopenharmony_ci struct shmem_inode_info *info; 5288c2ecf20Sopenharmony_ci struct page *page; 5298c2ecf20Sopenharmony_ci unsigned long batch = sc ? sc->nr_to_scan : 128; 5308c2ecf20Sopenharmony_ci int split = 0; 5318c2ecf20Sopenharmony_ci 5328c2ecf20Sopenharmony_ci if (list_empty(&sbinfo->shrinklist)) 5338c2ecf20Sopenharmony_ci return SHRINK_STOP; 5348c2ecf20Sopenharmony_ci 5358c2ecf20Sopenharmony_ci spin_lock(&sbinfo->shrinklist_lock); 5368c2ecf20Sopenharmony_ci list_for_each_safe(pos, next, &sbinfo->shrinklist) { 5378c2ecf20Sopenharmony_ci info = list_entry(pos, struct shmem_inode_info, shrinklist); 5388c2ecf20Sopenharmony_ci 5398c2ecf20Sopenharmony_ci /* pin the inode */ 5408c2ecf20Sopenharmony_ci inode = igrab(&info->vfs_inode); 5418c2ecf20Sopenharmony_ci 5428c2ecf20Sopenharmony_ci /* inode is about to be evicted */ 5438c2ecf20Sopenharmony_ci if (!inode) { 5448c2ecf20Sopenharmony_ci list_del_init(&info->shrinklist); 5458c2ecf20Sopenharmony_ci goto next; 5468c2ecf20Sopenharmony_ci } 5478c2ecf20Sopenharmony_ci 5488c2ecf20Sopenharmony_ci /* Check if there's anything to gain */ 5498c2ecf20Sopenharmony_ci if (round_up(inode->i_size, PAGE_SIZE) == 5508c2ecf20Sopenharmony_ci round_up(inode->i_size, HPAGE_PMD_SIZE)) { 5518c2ecf20Sopenharmony_ci list_move(&info->shrinklist, &to_remove); 5528c2ecf20Sopenharmony_ci goto next; 5538c2ecf20Sopenharmony_ci } 5548c2ecf20Sopenharmony_ci 5558c2ecf20Sopenharmony_ci list_move(&info->shrinklist, &list); 5568c2ecf20Sopenharmony_cinext: 5578c2ecf20Sopenharmony_ci sbinfo->shrinklist_len--; 5588c2ecf20Sopenharmony_ci if (!--batch) 5598c2ecf20Sopenharmony_ci break; 5608c2ecf20Sopenharmony_ci } 5618c2ecf20Sopenharmony_ci spin_unlock(&sbinfo->shrinklist_lock); 5628c2ecf20Sopenharmony_ci 5638c2ecf20Sopenharmony_ci list_for_each_safe(pos, next, &to_remove) { 5648c2ecf20Sopenharmony_ci info = list_entry(pos, struct shmem_inode_info, shrinklist); 5658c2ecf20Sopenharmony_ci inode = &info->vfs_inode; 5668c2ecf20Sopenharmony_ci list_del_init(&info->shrinklist); 5678c2ecf20Sopenharmony_ci iput(inode); 5688c2ecf20Sopenharmony_ci } 5698c2ecf20Sopenharmony_ci 5708c2ecf20Sopenharmony_ci list_for_each_safe(pos, next, &list) { 5718c2ecf20Sopenharmony_ci int ret; 5728c2ecf20Sopenharmony_ci 5738c2ecf20Sopenharmony_ci info = list_entry(pos, struct shmem_inode_info, shrinklist); 5748c2ecf20Sopenharmony_ci inode = &info->vfs_inode; 5758c2ecf20Sopenharmony_ci 5768c2ecf20Sopenharmony_ci if (nr_to_split && split >= nr_to_split) 5778c2ecf20Sopenharmony_ci goto move_back; 5788c2ecf20Sopenharmony_ci 5798c2ecf20Sopenharmony_ci page = find_get_page(inode->i_mapping, 5808c2ecf20Sopenharmony_ci (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT); 5818c2ecf20Sopenharmony_ci if (!page) 5828c2ecf20Sopenharmony_ci goto drop; 5838c2ecf20Sopenharmony_ci 5848c2ecf20Sopenharmony_ci /* No huge page at the end of the file: nothing to split */ 5858c2ecf20Sopenharmony_ci if (!PageTransHuge(page)) { 5868c2ecf20Sopenharmony_ci put_page(page); 5878c2ecf20Sopenharmony_ci goto drop; 5888c2ecf20Sopenharmony_ci } 5898c2ecf20Sopenharmony_ci 5908c2ecf20Sopenharmony_ci /* 5918c2ecf20Sopenharmony_ci * Move the inode on the list back to shrinklist if we failed 5928c2ecf20Sopenharmony_ci * to lock the page at this time. 5938c2ecf20Sopenharmony_ci * 5948c2ecf20Sopenharmony_ci * Waiting for the lock may lead to deadlock in the 5958c2ecf20Sopenharmony_ci * reclaim path. 5968c2ecf20Sopenharmony_ci */ 5978c2ecf20Sopenharmony_ci if (!trylock_page(page)) { 5988c2ecf20Sopenharmony_ci put_page(page); 5998c2ecf20Sopenharmony_ci goto move_back; 6008c2ecf20Sopenharmony_ci } 6018c2ecf20Sopenharmony_ci 6028c2ecf20Sopenharmony_ci ret = split_huge_page(page); 6038c2ecf20Sopenharmony_ci unlock_page(page); 6048c2ecf20Sopenharmony_ci put_page(page); 6058c2ecf20Sopenharmony_ci 6068c2ecf20Sopenharmony_ci /* If split failed move the inode on the list back to shrinklist */ 6078c2ecf20Sopenharmony_ci if (ret) 6088c2ecf20Sopenharmony_ci goto move_back; 6098c2ecf20Sopenharmony_ci 6108c2ecf20Sopenharmony_ci split++; 6118c2ecf20Sopenharmony_cidrop: 6128c2ecf20Sopenharmony_ci list_del_init(&info->shrinklist); 6138c2ecf20Sopenharmony_ci goto put; 6148c2ecf20Sopenharmony_cimove_back: 6158c2ecf20Sopenharmony_ci /* 6168c2ecf20Sopenharmony_ci * Make sure the inode is either on the global list or deleted 6178c2ecf20Sopenharmony_ci * from any local list before iput() since it could be deleted 6188c2ecf20Sopenharmony_ci * in another thread once we put the inode (then the local list 6198c2ecf20Sopenharmony_ci * is corrupted). 6208c2ecf20Sopenharmony_ci */ 6218c2ecf20Sopenharmony_ci spin_lock(&sbinfo->shrinklist_lock); 6228c2ecf20Sopenharmony_ci list_move(&info->shrinklist, &sbinfo->shrinklist); 6238c2ecf20Sopenharmony_ci sbinfo->shrinklist_len++; 6248c2ecf20Sopenharmony_ci spin_unlock(&sbinfo->shrinklist_lock); 6258c2ecf20Sopenharmony_ciput: 6268c2ecf20Sopenharmony_ci iput(inode); 6278c2ecf20Sopenharmony_ci } 6288c2ecf20Sopenharmony_ci 6298c2ecf20Sopenharmony_ci return split; 6308c2ecf20Sopenharmony_ci} 6318c2ecf20Sopenharmony_ci 6328c2ecf20Sopenharmony_cistatic long shmem_unused_huge_scan(struct super_block *sb, 6338c2ecf20Sopenharmony_ci struct shrink_control *sc) 6348c2ecf20Sopenharmony_ci{ 6358c2ecf20Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 6368c2ecf20Sopenharmony_ci 6378c2ecf20Sopenharmony_ci if (!READ_ONCE(sbinfo->shrinklist_len)) 6388c2ecf20Sopenharmony_ci return SHRINK_STOP; 6398c2ecf20Sopenharmony_ci 6408c2ecf20Sopenharmony_ci return shmem_unused_huge_shrink(sbinfo, sc, 0); 6418c2ecf20Sopenharmony_ci} 6428c2ecf20Sopenharmony_ci 6438c2ecf20Sopenharmony_cistatic long shmem_unused_huge_count(struct super_block *sb, 6448c2ecf20Sopenharmony_ci struct shrink_control *sc) 6458c2ecf20Sopenharmony_ci{ 6468c2ecf20Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 6478c2ecf20Sopenharmony_ci return READ_ONCE(sbinfo->shrinklist_len); 6488c2ecf20Sopenharmony_ci} 6498c2ecf20Sopenharmony_ci#else /* !CONFIG_TRANSPARENT_HUGEPAGE */ 6508c2ecf20Sopenharmony_ci 6518c2ecf20Sopenharmony_ci#define shmem_huge SHMEM_HUGE_DENY 6528c2ecf20Sopenharmony_ci 6538c2ecf20Sopenharmony_cistatic unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, 6548c2ecf20Sopenharmony_ci struct shrink_control *sc, unsigned long nr_to_split) 6558c2ecf20Sopenharmony_ci{ 6568c2ecf20Sopenharmony_ci return 0; 6578c2ecf20Sopenharmony_ci} 6588c2ecf20Sopenharmony_ci#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 6598c2ecf20Sopenharmony_ci 6608c2ecf20Sopenharmony_cistatic inline bool is_huge_enabled(struct shmem_sb_info *sbinfo) 6618c2ecf20Sopenharmony_ci{ 6628c2ecf20Sopenharmony_ci if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && 6638c2ecf20Sopenharmony_ci (shmem_huge == SHMEM_HUGE_FORCE || sbinfo->huge) && 6648c2ecf20Sopenharmony_ci shmem_huge != SHMEM_HUGE_DENY) 6658c2ecf20Sopenharmony_ci return true; 6668c2ecf20Sopenharmony_ci return false; 6678c2ecf20Sopenharmony_ci} 6688c2ecf20Sopenharmony_ci 6698c2ecf20Sopenharmony_ci/* 6708c2ecf20Sopenharmony_ci * Like add_to_page_cache_locked, but error if expected item has gone. 6718c2ecf20Sopenharmony_ci */ 6728c2ecf20Sopenharmony_cistatic int shmem_add_to_page_cache(struct page *page, 6738c2ecf20Sopenharmony_ci struct address_space *mapping, 6748c2ecf20Sopenharmony_ci pgoff_t index, void *expected, gfp_t gfp, 6758c2ecf20Sopenharmony_ci struct mm_struct *charge_mm) 6768c2ecf20Sopenharmony_ci{ 6778c2ecf20Sopenharmony_ci XA_STATE_ORDER(xas, &mapping->i_pages, index, compound_order(page)); 6788c2ecf20Sopenharmony_ci unsigned long i = 0; 6798c2ecf20Sopenharmony_ci unsigned long nr = compound_nr(page); 6808c2ecf20Sopenharmony_ci int error; 6818c2ecf20Sopenharmony_ci 6828c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(PageTail(page), page); 6838c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(index != round_down(index, nr), page); 6848c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(!PageLocked(page), page); 6858c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(!PageSwapBacked(page), page); 6868c2ecf20Sopenharmony_ci VM_BUG_ON(expected && PageTransHuge(page)); 6878c2ecf20Sopenharmony_ci 6888c2ecf20Sopenharmony_ci page_ref_add(page, nr); 6898c2ecf20Sopenharmony_ci page->mapping = mapping; 6908c2ecf20Sopenharmony_ci page->index = index; 6918c2ecf20Sopenharmony_ci 6928c2ecf20Sopenharmony_ci if (!PageSwapCache(page)) { 6938c2ecf20Sopenharmony_ci error = mem_cgroup_charge(page, charge_mm, gfp); 6948c2ecf20Sopenharmony_ci if (error) { 6958c2ecf20Sopenharmony_ci if (PageTransHuge(page)) { 6968c2ecf20Sopenharmony_ci count_vm_event(THP_FILE_FALLBACK); 6978c2ecf20Sopenharmony_ci count_vm_event(THP_FILE_FALLBACK_CHARGE); 6988c2ecf20Sopenharmony_ci } 6998c2ecf20Sopenharmony_ci goto error; 7008c2ecf20Sopenharmony_ci } 7018c2ecf20Sopenharmony_ci } 7028c2ecf20Sopenharmony_ci cgroup_throttle_swaprate(page, gfp); 7038c2ecf20Sopenharmony_ci 7048c2ecf20Sopenharmony_ci do { 7058c2ecf20Sopenharmony_ci void *entry; 7068c2ecf20Sopenharmony_ci xas_lock_irq(&xas); 7078c2ecf20Sopenharmony_ci entry = xas_find_conflict(&xas); 7088c2ecf20Sopenharmony_ci if (entry != expected) 7098c2ecf20Sopenharmony_ci xas_set_err(&xas, -EEXIST); 7108c2ecf20Sopenharmony_ci xas_create_range(&xas); 7118c2ecf20Sopenharmony_ci if (xas_error(&xas)) 7128c2ecf20Sopenharmony_ci goto unlock; 7138c2ecf20Sopenharmony_cinext: 7148c2ecf20Sopenharmony_ci xas_store(&xas, page); 7158c2ecf20Sopenharmony_ci if (++i < nr) { 7168c2ecf20Sopenharmony_ci xas_next(&xas); 7178c2ecf20Sopenharmony_ci goto next; 7188c2ecf20Sopenharmony_ci } 7198c2ecf20Sopenharmony_ci if (PageTransHuge(page)) { 7208c2ecf20Sopenharmony_ci count_vm_event(THP_FILE_ALLOC); 7218c2ecf20Sopenharmony_ci __inc_node_page_state(page, NR_SHMEM_THPS); 7228c2ecf20Sopenharmony_ci } 7238c2ecf20Sopenharmony_ci mapping->nrpages += nr; 7248c2ecf20Sopenharmony_ci __mod_lruvec_page_state(page, NR_FILE_PAGES, nr); 7258c2ecf20Sopenharmony_ci __mod_lruvec_page_state(page, NR_SHMEM, nr); 7268c2ecf20Sopenharmony_ciunlock: 7278c2ecf20Sopenharmony_ci xas_unlock_irq(&xas); 7288c2ecf20Sopenharmony_ci } while (xas_nomem(&xas, gfp)); 7298c2ecf20Sopenharmony_ci 7308c2ecf20Sopenharmony_ci if (xas_error(&xas)) { 7318c2ecf20Sopenharmony_ci error = xas_error(&xas); 7328c2ecf20Sopenharmony_ci goto error; 7338c2ecf20Sopenharmony_ci } 7348c2ecf20Sopenharmony_ci 7358c2ecf20Sopenharmony_ci return 0; 7368c2ecf20Sopenharmony_cierror: 7378c2ecf20Sopenharmony_ci page->mapping = NULL; 7388c2ecf20Sopenharmony_ci page_ref_sub(page, nr); 7398c2ecf20Sopenharmony_ci return error; 7408c2ecf20Sopenharmony_ci} 7418c2ecf20Sopenharmony_ci 7428c2ecf20Sopenharmony_ci/* 7438c2ecf20Sopenharmony_ci * Like delete_from_page_cache, but substitutes swap for page. 7448c2ecf20Sopenharmony_ci */ 7458c2ecf20Sopenharmony_cistatic void shmem_delete_from_page_cache(struct page *page, void *radswap) 7468c2ecf20Sopenharmony_ci{ 7478c2ecf20Sopenharmony_ci struct address_space *mapping = page->mapping; 7488c2ecf20Sopenharmony_ci int error; 7498c2ecf20Sopenharmony_ci 7508c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(PageCompound(page), page); 7518c2ecf20Sopenharmony_ci 7528c2ecf20Sopenharmony_ci xa_lock_irq(&mapping->i_pages); 7538c2ecf20Sopenharmony_ci error = shmem_replace_entry(mapping, page->index, page, radswap); 7548c2ecf20Sopenharmony_ci page->mapping = NULL; 7558c2ecf20Sopenharmony_ci mapping->nrpages--; 7568c2ecf20Sopenharmony_ci __dec_lruvec_page_state(page, NR_FILE_PAGES); 7578c2ecf20Sopenharmony_ci __dec_lruvec_page_state(page, NR_SHMEM); 7588c2ecf20Sopenharmony_ci xa_unlock_irq(&mapping->i_pages); 7598c2ecf20Sopenharmony_ci put_page(page); 7608c2ecf20Sopenharmony_ci BUG_ON(error); 7618c2ecf20Sopenharmony_ci} 7628c2ecf20Sopenharmony_ci 7638c2ecf20Sopenharmony_ci/* 7648c2ecf20Sopenharmony_ci * Remove swap entry from page cache, free the swap and its page cache. 7658c2ecf20Sopenharmony_ci */ 7668c2ecf20Sopenharmony_cistatic int shmem_free_swap(struct address_space *mapping, 7678c2ecf20Sopenharmony_ci pgoff_t index, void *radswap) 7688c2ecf20Sopenharmony_ci{ 7698c2ecf20Sopenharmony_ci void *old; 7708c2ecf20Sopenharmony_ci 7718c2ecf20Sopenharmony_ci old = xa_cmpxchg_irq(&mapping->i_pages, index, radswap, NULL, 0); 7728c2ecf20Sopenharmony_ci if (old != radswap) 7738c2ecf20Sopenharmony_ci return -ENOENT; 7748c2ecf20Sopenharmony_ci free_swap_and_cache(radix_to_swp_entry(radswap)); 7758c2ecf20Sopenharmony_ci return 0; 7768c2ecf20Sopenharmony_ci} 7778c2ecf20Sopenharmony_ci 7788c2ecf20Sopenharmony_ci/* 7798c2ecf20Sopenharmony_ci * Determine (in bytes) how many of the shmem object's pages mapped by the 7808c2ecf20Sopenharmony_ci * given offsets are swapped out. 7818c2ecf20Sopenharmony_ci * 7828c2ecf20Sopenharmony_ci * This is safe to call without i_mutex or the i_pages lock thanks to RCU, 7838c2ecf20Sopenharmony_ci * as long as the inode doesn't go away and racy results are not a problem. 7848c2ecf20Sopenharmony_ci */ 7858c2ecf20Sopenharmony_ciunsigned long shmem_partial_swap_usage(struct address_space *mapping, 7868c2ecf20Sopenharmony_ci pgoff_t start, pgoff_t end) 7878c2ecf20Sopenharmony_ci{ 7888c2ecf20Sopenharmony_ci XA_STATE(xas, &mapping->i_pages, start); 7898c2ecf20Sopenharmony_ci struct page *page; 7908c2ecf20Sopenharmony_ci unsigned long swapped = 0; 7918c2ecf20Sopenharmony_ci 7928c2ecf20Sopenharmony_ci rcu_read_lock(); 7938c2ecf20Sopenharmony_ci xas_for_each(&xas, page, end - 1) { 7948c2ecf20Sopenharmony_ci if (xas_retry(&xas, page)) 7958c2ecf20Sopenharmony_ci continue; 7968c2ecf20Sopenharmony_ci if (xa_is_value(page)) 7978c2ecf20Sopenharmony_ci swapped++; 7988c2ecf20Sopenharmony_ci 7998c2ecf20Sopenharmony_ci if (need_resched()) { 8008c2ecf20Sopenharmony_ci xas_pause(&xas); 8018c2ecf20Sopenharmony_ci cond_resched_rcu(); 8028c2ecf20Sopenharmony_ci } 8038c2ecf20Sopenharmony_ci } 8048c2ecf20Sopenharmony_ci 8058c2ecf20Sopenharmony_ci rcu_read_unlock(); 8068c2ecf20Sopenharmony_ci 8078c2ecf20Sopenharmony_ci return swapped << PAGE_SHIFT; 8088c2ecf20Sopenharmony_ci} 8098c2ecf20Sopenharmony_ci 8108c2ecf20Sopenharmony_ci/* 8118c2ecf20Sopenharmony_ci * Determine (in bytes) how many of the shmem object's pages mapped by the 8128c2ecf20Sopenharmony_ci * given vma is swapped out. 8138c2ecf20Sopenharmony_ci * 8148c2ecf20Sopenharmony_ci * This is safe to call without i_mutex or the i_pages lock thanks to RCU, 8158c2ecf20Sopenharmony_ci * as long as the inode doesn't go away and racy results are not a problem. 8168c2ecf20Sopenharmony_ci */ 8178c2ecf20Sopenharmony_ciunsigned long shmem_swap_usage(struct vm_area_struct *vma) 8188c2ecf20Sopenharmony_ci{ 8198c2ecf20Sopenharmony_ci struct inode *inode = file_inode(vma->vm_file); 8208c2ecf20Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 8218c2ecf20Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 8228c2ecf20Sopenharmony_ci unsigned long swapped; 8238c2ecf20Sopenharmony_ci 8248c2ecf20Sopenharmony_ci /* Be careful as we don't hold info->lock */ 8258c2ecf20Sopenharmony_ci swapped = READ_ONCE(info->swapped); 8268c2ecf20Sopenharmony_ci 8278c2ecf20Sopenharmony_ci /* 8288c2ecf20Sopenharmony_ci * The easier cases are when the shmem object has nothing in swap, or 8298c2ecf20Sopenharmony_ci * the vma maps it whole. Then we can simply use the stats that we 8308c2ecf20Sopenharmony_ci * already track. 8318c2ecf20Sopenharmony_ci */ 8328c2ecf20Sopenharmony_ci if (!swapped) 8338c2ecf20Sopenharmony_ci return 0; 8348c2ecf20Sopenharmony_ci 8358c2ecf20Sopenharmony_ci if (!vma->vm_pgoff && vma->vm_end - vma->vm_start >= inode->i_size) 8368c2ecf20Sopenharmony_ci return swapped << PAGE_SHIFT; 8378c2ecf20Sopenharmony_ci 8388c2ecf20Sopenharmony_ci /* Here comes the more involved part */ 8398c2ecf20Sopenharmony_ci return shmem_partial_swap_usage(mapping, 8408c2ecf20Sopenharmony_ci linear_page_index(vma, vma->vm_start), 8418c2ecf20Sopenharmony_ci linear_page_index(vma, vma->vm_end)); 8428c2ecf20Sopenharmony_ci} 8438c2ecf20Sopenharmony_ci 8448c2ecf20Sopenharmony_ci/* 8458c2ecf20Sopenharmony_ci * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists. 8468c2ecf20Sopenharmony_ci */ 8478c2ecf20Sopenharmony_civoid shmem_unlock_mapping(struct address_space *mapping) 8488c2ecf20Sopenharmony_ci{ 8498c2ecf20Sopenharmony_ci struct pagevec pvec; 8508c2ecf20Sopenharmony_ci pgoff_t indices[PAGEVEC_SIZE]; 8518c2ecf20Sopenharmony_ci pgoff_t index = 0; 8528c2ecf20Sopenharmony_ci 8538c2ecf20Sopenharmony_ci pagevec_init(&pvec); 8548c2ecf20Sopenharmony_ci /* 8558c2ecf20Sopenharmony_ci * Minor point, but we might as well stop if someone else SHM_LOCKs it. 8568c2ecf20Sopenharmony_ci */ 8578c2ecf20Sopenharmony_ci while (!mapping_unevictable(mapping)) { 8588c2ecf20Sopenharmony_ci /* 8598c2ecf20Sopenharmony_ci * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it 8608c2ecf20Sopenharmony_ci * has finished, if it hits a row of PAGEVEC_SIZE swap entries. 8618c2ecf20Sopenharmony_ci */ 8628c2ecf20Sopenharmony_ci pvec.nr = find_get_entries(mapping, index, 8638c2ecf20Sopenharmony_ci PAGEVEC_SIZE, pvec.pages, indices); 8648c2ecf20Sopenharmony_ci if (!pvec.nr) 8658c2ecf20Sopenharmony_ci break; 8668c2ecf20Sopenharmony_ci index = indices[pvec.nr - 1] + 1; 8678c2ecf20Sopenharmony_ci pagevec_remove_exceptionals(&pvec); 8688c2ecf20Sopenharmony_ci check_move_unevictable_pages(&pvec); 8698c2ecf20Sopenharmony_ci pagevec_release(&pvec); 8708c2ecf20Sopenharmony_ci cond_resched(); 8718c2ecf20Sopenharmony_ci } 8728c2ecf20Sopenharmony_ci} 8738c2ecf20Sopenharmony_ci 8748c2ecf20Sopenharmony_ci/* 8758c2ecf20Sopenharmony_ci * Check whether a hole-punch or truncation needs to split a huge page, 8768c2ecf20Sopenharmony_ci * returning true if no split was required, or the split has been successful. 8778c2ecf20Sopenharmony_ci * 8788c2ecf20Sopenharmony_ci * Eviction (or truncation to 0 size) should never need to split a huge page; 8798c2ecf20Sopenharmony_ci * but in rare cases might do so, if shmem_undo_range() failed to trylock on 8808c2ecf20Sopenharmony_ci * head, and then succeeded to trylock on tail. 8818c2ecf20Sopenharmony_ci * 8828c2ecf20Sopenharmony_ci * A split can only succeed when there are no additional references on the 8838c2ecf20Sopenharmony_ci * huge page: so the split below relies upon find_get_entries() having stopped 8848c2ecf20Sopenharmony_ci * when it found a subpage of the huge page, without getting further references. 8858c2ecf20Sopenharmony_ci */ 8868c2ecf20Sopenharmony_cistatic bool shmem_punch_compound(struct page *page, pgoff_t start, pgoff_t end) 8878c2ecf20Sopenharmony_ci{ 8888c2ecf20Sopenharmony_ci if (!PageTransCompound(page)) 8898c2ecf20Sopenharmony_ci return true; 8908c2ecf20Sopenharmony_ci 8918c2ecf20Sopenharmony_ci /* Just proceed to delete a huge page wholly within the range punched */ 8928c2ecf20Sopenharmony_ci if (PageHead(page) && 8938c2ecf20Sopenharmony_ci page->index >= start && page->index + HPAGE_PMD_NR <= end) 8948c2ecf20Sopenharmony_ci return true; 8958c2ecf20Sopenharmony_ci 8968c2ecf20Sopenharmony_ci /* Try to split huge page, so we can truly punch the hole or truncate */ 8978c2ecf20Sopenharmony_ci return split_huge_page(page) >= 0; 8988c2ecf20Sopenharmony_ci} 8998c2ecf20Sopenharmony_ci 9008c2ecf20Sopenharmony_ci/* 9018c2ecf20Sopenharmony_ci * Remove range of pages and swap entries from page cache, and free them. 9028c2ecf20Sopenharmony_ci * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate. 9038c2ecf20Sopenharmony_ci */ 9048c2ecf20Sopenharmony_cistatic void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, 9058c2ecf20Sopenharmony_ci bool unfalloc) 9068c2ecf20Sopenharmony_ci{ 9078c2ecf20Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 9088c2ecf20Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 9098c2ecf20Sopenharmony_ci pgoff_t start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT; 9108c2ecf20Sopenharmony_ci pgoff_t end = (lend + 1) >> PAGE_SHIFT; 9118c2ecf20Sopenharmony_ci unsigned int partial_start = lstart & (PAGE_SIZE - 1); 9128c2ecf20Sopenharmony_ci unsigned int partial_end = (lend + 1) & (PAGE_SIZE - 1); 9138c2ecf20Sopenharmony_ci struct pagevec pvec; 9148c2ecf20Sopenharmony_ci pgoff_t indices[PAGEVEC_SIZE]; 9158c2ecf20Sopenharmony_ci long nr_swaps_freed = 0; 9168c2ecf20Sopenharmony_ci pgoff_t index; 9178c2ecf20Sopenharmony_ci int i; 9188c2ecf20Sopenharmony_ci 9198c2ecf20Sopenharmony_ci if (lend == -1) 9208c2ecf20Sopenharmony_ci end = -1; /* unsigned, so actually very big */ 9218c2ecf20Sopenharmony_ci 9228c2ecf20Sopenharmony_ci pagevec_init(&pvec); 9238c2ecf20Sopenharmony_ci index = start; 9248c2ecf20Sopenharmony_ci while (index < end) { 9258c2ecf20Sopenharmony_ci pvec.nr = find_get_entries(mapping, index, 9268c2ecf20Sopenharmony_ci min(end - index, (pgoff_t)PAGEVEC_SIZE), 9278c2ecf20Sopenharmony_ci pvec.pages, indices); 9288c2ecf20Sopenharmony_ci if (!pvec.nr) 9298c2ecf20Sopenharmony_ci break; 9308c2ecf20Sopenharmony_ci for (i = 0; i < pagevec_count(&pvec); i++) { 9318c2ecf20Sopenharmony_ci struct page *page = pvec.pages[i]; 9328c2ecf20Sopenharmony_ci 9338c2ecf20Sopenharmony_ci index = indices[i]; 9348c2ecf20Sopenharmony_ci if (index >= end) 9358c2ecf20Sopenharmony_ci break; 9368c2ecf20Sopenharmony_ci 9378c2ecf20Sopenharmony_ci if (xa_is_value(page)) { 9388c2ecf20Sopenharmony_ci if (unfalloc) 9398c2ecf20Sopenharmony_ci continue; 9408c2ecf20Sopenharmony_ci nr_swaps_freed += !shmem_free_swap(mapping, 9418c2ecf20Sopenharmony_ci index, page); 9428c2ecf20Sopenharmony_ci continue; 9438c2ecf20Sopenharmony_ci } 9448c2ecf20Sopenharmony_ci 9458c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(page_to_pgoff(page) != index, page); 9468c2ecf20Sopenharmony_ci 9478c2ecf20Sopenharmony_ci if (!trylock_page(page)) 9488c2ecf20Sopenharmony_ci continue; 9498c2ecf20Sopenharmony_ci 9508c2ecf20Sopenharmony_ci if ((!unfalloc || !PageUptodate(page)) && 9518c2ecf20Sopenharmony_ci page_mapping(page) == mapping) { 9528c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(PageWriteback(page), page); 9538c2ecf20Sopenharmony_ci if (shmem_punch_compound(page, start, end)) 9548c2ecf20Sopenharmony_ci truncate_inode_page(mapping, page); 9558c2ecf20Sopenharmony_ci } 9568c2ecf20Sopenharmony_ci unlock_page(page); 9578c2ecf20Sopenharmony_ci } 9588c2ecf20Sopenharmony_ci pagevec_remove_exceptionals(&pvec); 9598c2ecf20Sopenharmony_ci pagevec_release(&pvec); 9608c2ecf20Sopenharmony_ci cond_resched(); 9618c2ecf20Sopenharmony_ci index++; 9628c2ecf20Sopenharmony_ci } 9638c2ecf20Sopenharmony_ci 9648c2ecf20Sopenharmony_ci if (partial_start) { 9658c2ecf20Sopenharmony_ci struct page *page = NULL; 9668c2ecf20Sopenharmony_ci shmem_getpage(inode, start - 1, &page, SGP_READ); 9678c2ecf20Sopenharmony_ci if (page) { 9688c2ecf20Sopenharmony_ci unsigned int top = PAGE_SIZE; 9698c2ecf20Sopenharmony_ci if (start > end) { 9708c2ecf20Sopenharmony_ci top = partial_end; 9718c2ecf20Sopenharmony_ci partial_end = 0; 9728c2ecf20Sopenharmony_ci } 9738c2ecf20Sopenharmony_ci zero_user_segment(page, partial_start, top); 9748c2ecf20Sopenharmony_ci set_page_dirty(page); 9758c2ecf20Sopenharmony_ci unlock_page(page); 9768c2ecf20Sopenharmony_ci put_page(page); 9778c2ecf20Sopenharmony_ci } 9788c2ecf20Sopenharmony_ci } 9798c2ecf20Sopenharmony_ci if (partial_end) { 9808c2ecf20Sopenharmony_ci struct page *page = NULL; 9818c2ecf20Sopenharmony_ci shmem_getpage(inode, end, &page, SGP_READ); 9828c2ecf20Sopenharmony_ci if (page) { 9838c2ecf20Sopenharmony_ci zero_user_segment(page, 0, partial_end); 9848c2ecf20Sopenharmony_ci set_page_dirty(page); 9858c2ecf20Sopenharmony_ci unlock_page(page); 9868c2ecf20Sopenharmony_ci put_page(page); 9878c2ecf20Sopenharmony_ci } 9888c2ecf20Sopenharmony_ci } 9898c2ecf20Sopenharmony_ci if (start >= end) 9908c2ecf20Sopenharmony_ci return; 9918c2ecf20Sopenharmony_ci 9928c2ecf20Sopenharmony_ci index = start; 9938c2ecf20Sopenharmony_ci while (index < end) { 9948c2ecf20Sopenharmony_ci cond_resched(); 9958c2ecf20Sopenharmony_ci 9968c2ecf20Sopenharmony_ci pvec.nr = find_get_entries(mapping, index, 9978c2ecf20Sopenharmony_ci min(end - index, (pgoff_t)PAGEVEC_SIZE), 9988c2ecf20Sopenharmony_ci pvec.pages, indices); 9998c2ecf20Sopenharmony_ci if (!pvec.nr) { 10008c2ecf20Sopenharmony_ci /* If all gone or hole-punch or unfalloc, we're done */ 10018c2ecf20Sopenharmony_ci if (index == start || end != -1) 10028c2ecf20Sopenharmony_ci break; 10038c2ecf20Sopenharmony_ci /* But if truncating, restart to make sure all gone */ 10048c2ecf20Sopenharmony_ci index = start; 10058c2ecf20Sopenharmony_ci continue; 10068c2ecf20Sopenharmony_ci } 10078c2ecf20Sopenharmony_ci for (i = 0; i < pagevec_count(&pvec); i++) { 10088c2ecf20Sopenharmony_ci struct page *page = pvec.pages[i]; 10098c2ecf20Sopenharmony_ci 10108c2ecf20Sopenharmony_ci index = indices[i]; 10118c2ecf20Sopenharmony_ci if (index >= end) 10128c2ecf20Sopenharmony_ci break; 10138c2ecf20Sopenharmony_ci 10148c2ecf20Sopenharmony_ci if (xa_is_value(page)) { 10158c2ecf20Sopenharmony_ci if (unfalloc) 10168c2ecf20Sopenharmony_ci continue; 10178c2ecf20Sopenharmony_ci if (shmem_free_swap(mapping, index, page)) { 10188c2ecf20Sopenharmony_ci /* Swap was replaced by page: retry */ 10198c2ecf20Sopenharmony_ci index--; 10208c2ecf20Sopenharmony_ci break; 10218c2ecf20Sopenharmony_ci } 10228c2ecf20Sopenharmony_ci nr_swaps_freed++; 10238c2ecf20Sopenharmony_ci continue; 10248c2ecf20Sopenharmony_ci } 10258c2ecf20Sopenharmony_ci 10268c2ecf20Sopenharmony_ci lock_page(page); 10278c2ecf20Sopenharmony_ci 10288c2ecf20Sopenharmony_ci if (!unfalloc || !PageUptodate(page)) { 10298c2ecf20Sopenharmony_ci if (page_mapping(page) != mapping) { 10308c2ecf20Sopenharmony_ci /* Page was replaced by swap: retry */ 10318c2ecf20Sopenharmony_ci unlock_page(page); 10328c2ecf20Sopenharmony_ci index--; 10338c2ecf20Sopenharmony_ci break; 10348c2ecf20Sopenharmony_ci } 10358c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(PageWriteback(page), page); 10368c2ecf20Sopenharmony_ci if (shmem_punch_compound(page, start, end)) 10378c2ecf20Sopenharmony_ci truncate_inode_page(mapping, page); 10388c2ecf20Sopenharmony_ci else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { 10398c2ecf20Sopenharmony_ci /* Wipe the page and don't get stuck */ 10408c2ecf20Sopenharmony_ci clear_highpage(page); 10418c2ecf20Sopenharmony_ci flush_dcache_page(page); 10428c2ecf20Sopenharmony_ci set_page_dirty(page); 10438c2ecf20Sopenharmony_ci if (index < 10448c2ecf20Sopenharmony_ci round_up(start, HPAGE_PMD_NR)) 10458c2ecf20Sopenharmony_ci start = index + 1; 10468c2ecf20Sopenharmony_ci } 10478c2ecf20Sopenharmony_ci } 10488c2ecf20Sopenharmony_ci unlock_page(page); 10498c2ecf20Sopenharmony_ci } 10508c2ecf20Sopenharmony_ci pagevec_remove_exceptionals(&pvec); 10518c2ecf20Sopenharmony_ci pagevec_release(&pvec); 10528c2ecf20Sopenharmony_ci index++; 10538c2ecf20Sopenharmony_ci } 10548c2ecf20Sopenharmony_ci 10558c2ecf20Sopenharmony_ci spin_lock_irq(&info->lock); 10568c2ecf20Sopenharmony_ci info->swapped -= nr_swaps_freed; 10578c2ecf20Sopenharmony_ci shmem_recalc_inode(inode); 10588c2ecf20Sopenharmony_ci spin_unlock_irq(&info->lock); 10598c2ecf20Sopenharmony_ci} 10608c2ecf20Sopenharmony_ci 10618c2ecf20Sopenharmony_civoid shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) 10628c2ecf20Sopenharmony_ci{ 10638c2ecf20Sopenharmony_ci shmem_undo_range(inode, lstart, lend, false); 10648c2ecf20Sopenharmony_ci inode->i_ctime = inode->i_mtime = current_time(inode); 10658c2ecf20Sopenharmony_ci} 10668c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(shmem_truncate_range); 10678c2ecf20Sopenharmony_ci 10688c2ecf20Sopenharmony_cistatic int shmem_getattr(const struct path *path, struct kstat *stat, 10698c2ecf20Sopenharmony_ci u32 request_mask, unsigned int query_flags) 10708c2ecf20Sopenharmony_ci{ 10718c2ecf20Sopenharmony_ci struct inode *inode = path->dentry->d_inode; 10728c2ecf20Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 10738c2ecf20Sopenharmony_ci struct shmem_sb_info *sb_info = SHMEM_SB(inode->i_sb); 10748c2ecf20Sopenharmony_ci 10758c2ecf20Sopenharmony_ci if (info->alloced - info->swapped != inode->i_mapping->nrpages) { 10768c2ecf20Sopenharmony_ci spin_lock_irq(&info->lock); 10778c2ecf20Sopenharmony_ci shmem_recalc_inode(inode); 10788c2ecf20Sopenharmony_ci spin_unlock_irq(&info->lock); 10798c2ecf20Sopenharmony_ci } 10808c2ecf20Sopenharmony_ci generic_fillattr(inode, stat); 10818c2ecf20Sopenharmony_ci 10828c2ecf20Sopenharmony_ci if (is_huge_enabled(sb_info)) 10838c2ecf20Sopenharmony_ci stat->blksize = HPAGE_PMD_SIZE; 10848c2ecf20Sopenharmony_ci 10858c2ecf20Sopenharmony_ci return 0; 10868c2ecf20Sopenharmony_ci} 10878c2ecf20Sopenharmony_ci 10888c2ecf20Sopenharmony_cistatic int shmem_setattr(struct dentry *dentry, struct iattr *attr) 10898c2ecf20Sopenharmony_ci{ 10908c2ecf20Sopenharmony_ci struct inode *inode = d_inode(dentry); 10918c2ecf20Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 10928c2ecf20Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 10938c2ecf20Sopenharmony_ci int error; 10948c2ecf20Sopenharmony_ci 10958c2ecf20Sopenharmony_ci error = setattr_prepare(dentry, attr); 10968c2ecf20Sopenharmony_ci if (error) 10978c2ecf20Sopenharmony_ci return error; 10988c2ecf20Sopenharmony_ci 10998c2ecf20Sopenharmony_ci if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { 11008c2ecf20Sopenharmony_ci loff_t oldsize = inode->i_size; 11018c2ecf20Sopenharmony_ci loff_t newsize = attr->ia_size; 11028c2ecf20Sopenharmony_ci 11038c2ecf20Sopenharmony_ci /* protected by i_mutex */ 11048c2ecf20Sopenharmony_ci if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) || 11058c2ecf20Sopenharmony_ci (newsize > oldsize && (info->seals & F_SEAL_GROW))) 11068c2ecf20Sopenharmony_ci return -EPERM; 11078c2ecf20Sopenharmony_ci 11088c2ecf20Sopenharmony_ci if (newsize != oldsize) { 11098c2ecf20Sopenharmony_ci error = shmem_reacct_size(SHMEM_I(inode)->flags, 11108c2ecf20Sopenharmony_ci oldsize, newsize); 11118c2ecf20Sopenharmony_ci if (error) 11128c2ecf20Sopenharmony_ci return error; 11138c2ecf20Sopenharmony_ci i_size_write(inode, newsize); 11148c2ecf20Sopenharmony_ci inode->i_ctime = inode->i_mtime = current_time(inode); 11158c2ecf20Sopenharmony_ci } 11168c2ecf20Sopenharmony_ci if (newsize <= oldsize) { 11178c2ecf20Sopenharmony_ci loff_t holebegin = round_up(newsize, PAGE_SIZE); 11188c2ecf20Sopenharmony_ci if (oldsize > holebegin) 11198c2ecf20Sopenharmony_ci unmap_mapping_range(inode->i_mapping, 11208c2ecf20Sopenharmony_ci holebegin, 0, 1); 11218c2ecf20Sopenharmony_ci if (info->alloced) 11228c2ecf20Sopenharmony_ci shmem_truncate_range(inode, 11238c2ecf20Sopenharmony_ci newsize, (loff_t)-1); 11248c2ecf20Sopenharmony_ci /* unmap again to remove racily COWed private pages */ 11258c2ecf20Sopenharmony_ci if (oldsize > holebegin) 11268c2ecf20Sopenharmony_ci unmap_mapping_range(inode->i_mapping, 11278c2ecf20Sopenharmony_ci holebegin, 0, 1); 11288c2ecf20Sopenharmony_ci 11298c2ecf20Sopenharmony_ci /* 11308c2ecf20Sopenharmony_ci * Part of the huge page can be beyond i_size: subject 11318c2ecf20Sopenharmony_ci * to shrink under memory pressure. 11328c2ecf20Sopenharmony_ci */ 11338c2ecf20Sopenharmony_ci if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { 11348c2ecf20Sopenharmony_ci spin_lock(&sbinfo->shrinklist_lock); 11358c2ecf20Sopenharmony_ci /* 11368c2ecf20Sopenharmony_ci * _careful to defend against unlocked access to 11378c2ecf20Sopenharmony_ci * ->shrink_list in shmem_unused_huge_shrink() 11388c2ecf20Sopenharmony_ci */ 11398c2ecf20Sopenharmony_ci if (list_empty_careful(&info->shrinklist)) { 11408c2ecf20Sopenharmony_ci list_add_tail(&info->shrinklist, 11418c2ecf20Sopenharmony_ci &sbinfo->shrinklist); 11428c2ecf20Sopenharmony_ci sbinfo->shrinklist_len++; 11438c2ecf20Sopenharmony_ci } 11448c2ecf20Sopenharmony_ci spin_unlock(&sbinfo->shrinklist_lock); 11458c2ecf20Sopenharmony_ci } 11468c2ecf20Sopenharmony_ci } 11478c2ecf20Sopenharmony_ci } 11488c2ecf20Sopenharmony_ci 11498c2ecf20Sopenharmony_ci setattr_copy(inode, attr); 11508c2ecf20Sopenharmony_ci if (attr->ia_valid & ATTR_MODE) 11518c2ecf20Sopenharmony_ci error = posix_acl_chmod(inode, inode->i_mode); 11528c2ecf20Sopenharmony_ci return error; 11538c2ecf20Sopenharmony_ci} 11548c2ecf20Sopenharmony_ci 11558c2ecf20Sopenharmony_cistatic void shmem_evict_inode(struct inode *inode) 11568c2ecf20Sopenharmony_ci{ 11578c2ecf20Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 11588c2ecf20Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 11598c2ecf20Sopenharmony_ci 11608c2ecf20Sopenharmony_ci if (inode->i_mapping->a_ops == &shmem_aops) { 11618c2ecf20Sopenharmony_ci shmem_unacct_size(info->flags, inode->i_size); 11628c2ecf20Sopenharmony_ci inode->i_size = 0; 11638c2ecf20Sopenharmony_ci shmem_truncate_range(inode, 0, (loff_t)-1); 11648c2ecf20Sopenharmony_ci if (!list_empty(&info->shrinklist)) { 11658c2ecf20Sopenharmony_ci spin_lock(&sbinfo->shrinklist_lock); 11668c2ecf20Sopenharmony_ci if (!list_empty(&info->shrinklist)) { 11678c2ecf20Sopenharmony_ci list_del_init(&info->shrinklist); 11688c2ecf20Sopenharmony_ci sbinfo->shrinklist_len--; 11698c2ecf20Sopenharmony_ci } 11708c2ecf20Sopenharmony_ci spin_unlock(&sbinfo->shrinklist_lock); 11718c2ecf20Sopenharmony_ci } 11728c2ecf20Sopenharmony_ci while (!list_empty(&info->swaplist)) { 11738c2ecf20Sopenharmony_ci /* Wait while shmem_unuse() is scanning this inode... */ 11748c2ecf20Sopenharmony_ci wait_var_event(&info->stop_eviction, 11758c2ecf20Sopenharmony_ci !atomic_read(&info->stop_eviction)); 11768c2ecf20Sopenharmony_ci mutex_lock(&shmem_swaplist_mutex); 11778c2ecf20Sopenharmony_ci /* ...but beware of the race if we peeked too early */ 11788c2ecf20Sopenharmony_ci if (!atomic_read(&info->stop_eviction)) 11798c2ecf20Sopenharmony_ci list_del_init(&info->swaplist); 11808c2ecf20Sopenharmony_ci mutex_unlock(&shmem_swaplist_mutex); 11818c2ecf20Sopenharmony_ci } 11828c2ecf20Sopenharmony_ci } 11838c2ecf20Sopenharmony_ci 11848c2ecf20Sopenharmony_ci simple_xattrs_free(&info->xattrs); 11858c2ecf20Sopenharmony_ci WARN_ON(inode->i_blocks); 11868c2ecf20Sopenharmony_ci shmem_free_inode(inode->i_sb); 11878c2ecf20Sopenharmony_ci clear_inode(inode); 11888c2ecf20Sopenharmony_ci} 11898c2ecf20Sopenharmony_ci 11908c2ecf20Sopenharmony_ciextern struct swap_info_struct *swap_info[]; 11918c2ecf20Sopenharmony_ci 11928c2ecf20Sopenharmony_cistatic int shmem_find_swap_entries(struct address_space *mapping, 11938c2ecf20Sopenharmony_ci pgoff_t start, unsigned int nr_entries, 11948c2ecf20Sopenharmony_ci struct page **entries, pgoff_t *indices, 11958c2ecf20Sopenharmony_ci unsigned int type, bool frontswap) 11968c2ecf20Sopenharmony_ci{ 11978c2ecf20Sopenharmony_ci XA_STATE(xas, &mapping->i_pages, start); 11988c2ecf20Sopenharmony_ci struct page *page; 11998c2ecf20Sopenharmony_ci swp_entry_t entry; 12008c2ecf20Sopenharmony_ci unsigned int ret = 0; 12018c2ecf20Sopenharmony_ci 12028c2ecf20Sopenharmony_ci if (!nr_entries) 12038c2ecf20Sopenharmony_ci return 0; 12048c2ecf20Sopenharmony_ci 12058c2ecf20Sopenharmony_ci rcu_read_lock(); 12068c2ecf20Sopenharmony_ci xas_for_each(&xas, page, ULONG_MAX) { 12078c2ecf20Sopenharmony_ci if (xas_retry(&xas, page)) 12088c2ecf20Sopenharmony_ci continue; 12098c2ecf20Sopenharmony_ci 12108c2ecf20Sopenharmony_ci if (!xa_is_value(page)) 12118c2ecf20Sopenharmony_ci continue; 12128c2ecf20Sopenharmony_ci 12138c2ecf20Sopenharmony_ci entry = radix_to_swp_entry(page); 12148c2ecf20Sopenharmony_ci if (swp_type(entry) != type) 12158c2ecf20Sopenharmony_ci continue; 12168c2ecf20Sopenharmony_ci if (frontswap && 12178c2ecf20Sopenharmony_ci !frontswap_test(swap_info[type], swp_offset(entry))) 12188c2ecf20Sopenharmony_ci continue; 12198c2ecf20Sopenharmony_ci 12208c2ecf20Sopenharmony_ci indices[ret] = xas.xa_index; 12218c2ecf20Sopenharmony_ci entries[ret] = page; 12228c2ecf20Sopenharmony_ci 12238c2ecf20Sopenharmony_ci if (need_resched()) { 12248c2ecf20Sopenharmony_ci xas_pause(&xas); 12258c2ecf20Sopenharmony_ci cond_resched_rcu(); 12268c2ecf20Sopenharmony_ci } 12278c2ecf20Sopenharmony_ci if (++ret == nr_entries) 12288c2ecf20Sopenharmony_ci break; 12298c2ecf20Sopenharmony_ci } 12308c2ecf20Sopenharmony_ci rcu_read_unlock(); 12318c2ecf20Sopenharmony_ci 12328c2ecf20Sopenharmony_ci return ret; 12338c2ecf20Sopenharmony_ci} 12348c2ecf20Sopenharmony_ci 12358c2ecf20Sopenharmony_ci/* 12368c2ecf20Sopenharmony_ci * Move the swapped pages for an inode to page cache. Returns the count 12378c2ecf20Sopenharmony_ci * of pages swapped in, or the error in case of failure. 12388c2ecf20Sopenharmony_ci */ 12398c2ecf20Sopenharmony_cistatic int shmem_unuse_swap_entries(struct inode *inode, struct pagevec pvec, 12408c2ecf20Sopenharmony_ci pgoff_t *indices) 12418c2ecf20Sopenharmony_ci{ 12428c2ecf20Sopenharmony_ci int i = 0; 12438c2ecf20Sopenharmony_ci int ret = 0; 12448c2ecf20Sopenharmony_ci int error = 0; 12458c2ecf20Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 12468c2ecf20Sopenharmony_ci 12478c2ecf20Sopenharmony_ci for (i = 0; i < pvec.nr; i++) { 12488c2ecf20Sopenharmony_ci struct page *page = pvec.pages[i]; 12498c2ecf20Sopenharmony_ci 12508c2ecf20Sopenharmony_ci if (!xa_is_value(page)) 12518c2ecf20Sopenharmony_ci continue; 12528c2ecf20Sopenharmony_ci error = shmem_swapin_page(inode, indices[i], 12538c2ecf20Sopenharmony_ci &page, SGP_CACHE, 12548c2ecf20Sopenharmony_ci mapping_gfp_mask(mapping), 12558c2ecf20Sopenharmony_ci NULL, NULL); 12568c2ecf20Sopenharmony_ci if (error == 0) { 12578c2ecf20Sopenharmony_ci unlock_page(page); 12588c2ecf20Sopenharmony_ci put_page(page); 12598c2ecf20Sopenharmony_ci ret++; 12608c2ecf20Sopenharmony_ci } 12618c2ecf20Sopenharmony_ci if (error == -ENOMEM) 12628c2ecf20Sopenharmony_ci break; 12638c2ecf20Sopenharmony_ci error = 0; 12648c2ecf20Sopenharmony_ci } 12658c2ecf20Sopenharmony_ci return error ? error : ret; 12668c2ecf20Sopenharmony_ci} 12678c2ecf20Sopenharmony_ci 12688c2ecf20Sopenharmony_ci/* 12698c2ecf20Sopenharmony_ci * If swap found in inode, free it and move page from swapcache to filecache. 12708c2ecf20Sopenharmony_ci */ 12718c2ecf20Sopenharmony_cistatic int shmem_unuse_inode(struct inode *inode, unsigned int type, 12728c2ecf20Sopenharmony_ci bool frontswap, unsigned long *fs_pages_to_unuse) 12738c2ecf20Sopenharmony_ci{ 12748c2ecf20Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 12758c2ecf20Sopenharmony_ci pgoff_t start = 0; 12768c2ecf20Sopenharmony_ci struct pagevec pvec; 12778c2ecf20Sopenharmony_ci pgoff_t indices[PAGEVEC_SIZE]; 12788c2ecf20Sopenharmony_ci bool frontswap_partial = (frontswap && *fs_pages_to_unuse > 0); 12798c2ecf20Sopenharmony_ci int ret = 0; 12808c2ecf20Sopenharmony_ci 12818c2ecf20Sopenharmony_ci pagevec_init(&pvec); 12828c2ecf20Sopenharmony_ci do { 12838c2ecf20Sopenharmony_ci unsigned int nr_entries = PAGEVEC_SIZE; 12848c2ecf20Sopenharmony_ci 12858c2ecf20Sopenharmony_ci if (frontswap_partial && *fs_pages_to_unuse < PAGEVEC_SIZE) 12868c2ecf20Sopenharmony_ci nr_entries = *fs_pages_to_unuse; 12878c2ecf20Sopenharmony_ci 12888c2ecf20Sopenharmony_ci pvec.nr = shmem_find_swap_entries(mapping, start, nr_entries, 12898c2ecf20Sopenharmony_ci pvec.pages, indices, 12908c2ecf20Sopenharmony_ci type, frontswap); 12918c2ecf20Sopenharmony_ci if (pvec.nr == 0) { 12928c2ecf20Sopenharmony_ci ret = 0; 12938c2ecf20Sopenharmony_ci break; 12948c2ecf20Sopenharmony_ci } 12958c2ecf20Sopenharmony_ci 12968c2ecf20Sopenharmony_ci ret = shmem_unuse_swap_entries(inode, pvec, indices); 12978c2ecf20Sopenharmony_ci if (ret < 0) 12988c2ecf20Sopenharmony_ci break; 12998c2ecf20Sopenharmony_ci 13008c2ecf20Sopenharmony_ci if (frontswap_partial) { 13018c2ecf20Sopenharmony_ci *fs_pages_to_unuse -= ret; 13028c2ecf20Sopenharmony_ci if (*fs_pages_to_unuse == 0) { 13038c2ecf20Sopenharmony_ci ret = FRONTSWAP_PAGES_UNUSED; 13048c2ecf20Sopenharmony_ci break; 13058c2ecf20Sopenharmony_ci } 13068c2ecf20Sopenharmony_ci } 13078c2ecf20Sopenharmony_ci 13088c2ecf20Sopenharmony_ci start = indices[pvec.nr - 1]; 13098c2ecf20Sopenharmony_ci } while (true); 13108c2ecf20Sopenharmony_ci 13118c2ecf20Sopenharmony_ci return ret; 13128c2ecf20Sopenharmony_ci} 13138c2ecf20Sopenharmony_ci 13148c2ecf20Sopenharmony_ci/* 13158c2ecf20Sopenharmony_ci * Read all the shared memory data that resides in the swap 13168c2ecf20Sopenharmony_ci * device 'type' back into memory, so the swap device can be 13178c2ecf20Sopenharmony_ci * unused. 13188c2ecf20Sopenharmony_ci */ 13198c2ecf20Sopenharmony_ciint shmem_unuse(unsigned int type, bool frontswap, 13208c2ecf20Sopenharmony_ci unsigned long *fs_pages_to_unuse) 13218c2ecf20Sopenharmony_ci{ 13228c2ecf20Sopenharmony_ci struct shmem_inode_info *info, *next; 13238c2ecf20Sopenharmony_ci int error = 0; 13248c2ecf20Sopenharmony_ci 13258c2ecf20Sopenharmony_ci if (list_empty(&shmem_swaplist)) 13268c2ecf20Sopenharmony_ci return 0; 13278c2ecf20Sopenharmony_ci 13288c2ecf20Sopenharmony_ci mutex_lock(&shmem_swaplist_mutex); 13298c2ecf20Sopenharmony_ci list_for_each_entry_safe(info, next, &shmem_swaplist, swaplist) { 13308c2ecf20Sopenharmony_ci if (!info->swapped) { 13318c2ecf20Sopenharmony_ci list_del_init(&info->swaplist); 13328c2ecf20Sopenharmony_ci continue; 13338c2ecf20Sopenharmony_ci } 13348c2ecf20Sopenharmony_ci /* 13358c2ecf20Sopenharmony_ci * Drop the swaplist mutex while searching the inode for swap; 13368c2ecf20Sopenharmony_ci * but before doing so, make sure shmem_evict_inode() will not 13378c2ecf20Sopenharmony_ci * remove placeholder inode from swaplist, nor let it be freed 13388c2ecf20Sopenharmony_ci * (igrab() would protect from unlink, but not from unmount). 13398c2ecf20Sopenharmony_ci */ 13408c2ecf20Sopenharmony_ci atomic_inc(&info->stop_eviction); 13418c2ecf20Sopenharmony_ci mutex_unlock(&shmem_swaplist_mutex); 13428c2ecf20Sopenharmony_ci 13438c2ecf20Sopenharmony_ci error = shmem_unuse_inode(&info->vfs_inode, type, frontswap, 13448c2ecf20Sopenharmony_ci fs_pages_to_unuse); 13458c2ecf20Sopenharmony_ci cond_resched(); 13468c2ecf20Sopenharmony_ci 13478c2ecf20Sopenharmony_ci mutex_lock(&shmem_swaplist_mutex); 13488c2ecf20Sopenharmony_ci next = list_next_entry(info, swaplist); 13498c2ecf20Sopenharmony_ci if (!info->swapped) 13508c2ecf20Sopenharmony_ci list_del_init(&info->swaplist); 13518c2ecf20Sopenharmony_ci if (atomic_dec_and_test(&info->stop_eviction)) 13528c2ecf20Sopenharmony_ci wake_up_var(&info->stop_eviction); 13538c2ecf20Sopenharmony_ci if (error) 13548c2ecf20Sopenharmony_ci break; 13558c2ecf20Sopenharmony_ci } 13568c2ecf20Sopenharmony_ci mutex_unlock(&shmem_swaplist_mutex); 13578c2ecf20Sopenharmony_ci 13588c2ecf20Sopenharmony_ci return error; 13598c2ecf20Sopenharmony_ci} 13608c2ecf20Sopenharmony_ci 13618c2ecf20Sopenharmony_ci/* 13628c2ecf20Sopenharmony_ci * Move the page from the page cache to the swap cache. 13638c2ecf20Sopenharmony_ci */ 13648c2ecf20Sopenharmony_cistatic int shmem_writepage(struct page *page, struct writeback_control *wbc) 13658c2ecf20Sopenharmony_ci{ 13668c2ecf20Sopenharmony_ci struct shmem_inode_info *info; 13678c2ecf20Sopenharmony_ci struct address_space *mapping; 13688c2ecf20Sopenharmony_ci struct inode *inode; 13698c2ecf20Sopenharmony_ci swp_entry_t swap; 13708c2ecf20Sopenharmony_ci pgoff_t index; 13718c2ecf20Sopenharmony_ci 13728c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(PageCompound(page), page); 13738c2ecf20Sopenharmony_ci BUG_ON(!PageLocked(page)); 13748c2ecf20Sopenharmony_ci mapping = page->mapping; 13758c2ecf20Sopenharmony_ci index = page->index; 13768c2ecf20Sopenharmony_ci inode = mapping->host; 13778c2ecf20Sopenharmony_ci info = SHMEM_I(inode); 13788c2ecf20Sopenharmony_ci if (info->flags & VM_LOCKED) 13798c2ecf20Sopenharmony_ci goto redirty; 13808c2ecf20Sopenharmony_ci if (!total_swap_pages) 13818c2ecf20Sopenharmony_ci goto redirty; 13828c2ecf20Sopenharmony_ci 13838c2ecf20Sopenharmony_ci /* 13848c2ecf20Sopenharmony_ci * Our capabilities prevent regular writeback or sync from ever calling 13858c2ecf20Sopenharmony_ci * shmem_writepage; but a stacking filesystem might use ->writepage of 13868c2ecf20Sopenharmony_ci * its underlying filesystem, in which case tmpfs should write out to 13878c2ecf20Sopenharmony_ci * swap only in response to memory pressure, and not for the writeback 13888c2ecf20Sopenharmony_ci * threads or sync. 13898c2ecf20Sopenharmony_ci */ 13908c2ecf20Sopenharmony_ci if (!wbc->for_reclaim) { 13918c2ecf20Sopenharmony_ci WARN_ON_ONCE(1); /* Still happens? Tell us about it! */ 13928c2ecf20Sopenharmony_ci goto redirty; 13938c2ecf20Sopenharmony_ci } 13948c2ecf20Sopenharmony_ci 13958c2ecf20Sopenharmony_ci /* 13968c2ecf20Sopenharmony_ci * This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC 13978c2ecf20Sopenharmony_ci * value into swapfile.c, the only way we can correctly account for a 13988c2ecf20Sopenharmony_ci * fallocated page arriving here is now to initialize it and write it. 13998c2ecf20Sopenharmony_ci * 14008c2ecf20Sopenharmony_ci * That's okay for a page already fallocated earlier, but if we have 14018c2ecf20Sopenharmony_ci * not yet completed the fallocation, then (a) we want to keep track 14028c2ecf20Sopenharmony_ci * of this page in case we have to undo it, and (b) it may not be a 14038c2ecf20Sopenharmony_ci * good idea to continue anyway, once we're pushing into swap. So 14048c2ecf20Sopenharmony_ci * reactivate the page, and let shmem_fallocate() quit when too many. 14058c2ecf20Sopenharmony_ci */ 14068c2ecf20Sopenharmony_ci if (!PageUptodate(page)) { 14078c2ecf20Sopenharmony_ci if (inode->i_private) { 14088c2ecf20Sopenharmony_ci struct shmem_falloc *shmem_falloc; 14098c2ecf20Sopenharmony_ci spin_lock(&inode->i_lock); 14108c2ecf20Sopenharmony_ci shmem_falloc = inode->i_private; 14118c2ecf20Sopenharmony_ci if (shmem_falloc && 14128c2ecf20Sopenharmony_ci !shmem_falloc->waitq && 14138c2ecf20Sopenharmony_ci index >= shmem_falloc->start && 14148c2ecf20Sopenharmony_ci index < shmem_falloc->next) 14158c2ecf20Sopenharmony_ci shmem_falloc->nr_unswapped++; 14168c2ecf20Sopenharmony_ci else 14178c2ecf20Sopenharmony_ci shmem_falloc = NULL; 14188c2ecf20Sopenharmony_ci spin_unlock(&inode->i_lock); 14198c2ecf20Sopenharmony_ci if (shmem_falloc) 14208c2ecf20Sopenharmony_ci goto redirty; 14218c2ecf20Sopenharmony_ci } 14228c2ecf20Sopenharmony_ci clear_highpage(page); 14238c2ecf20Sopenharmony_ci flush_dcache_page(page); 14248c2ecf20Sopenharmony_ci SetPageUptodate(page); 14258c2ecf20Sopenharmony_ci } 14268c2ecf20Sopenharmony_ci 14278c2ecf20Sopenharmony_ci swap = get_swap_page(page); 14288c2ecf20Sopenharmony_ci if (!swap.val) 14298c2ecf20Sopenharmony_ci goto redirty; 14308c2ecf20Sopenharmony_ci 14318c2ecf20Sopenharmony_ci /* 14328c2ecf20Sopenharmony_ci * Add inode to shmem_unuse()'s list of swapped-out inodes, 14338c2ecf20Sopenharmony_ci * if it's not already there. Do it now before the page is 14348c2ecf20Sopenharmony_ci * moved to swap cache, when its pagelock no longer protects 14358c2ecf20Sopenharmony_ci * the inode from eviction. But don't unlock the mutex until 14368c2ecf20Sopenharmony_ci * we've incremented swapped, because shmem_unuse_inode() will 14378c2ecf20Sopenharmony_ci * prune a !swapped inode from the swaplist under this mutex. 14388c2ecf20Sopenharmony_ci */ 14398c2ecf20Sopenharmony_ci mutex_lock(&shmem_swaplist_mutex); 14408c2ecf20Sopenharmony_ci if (list_empty(&info->swaplist)) 14418c2ecf20Sopenharmony_ci list_add(&info->swaplist, &shmem_swaplist); 14428c2ecf20Sopenharmony_ci 14438c2ecf20Sopenharmony_ci if (add_to_swap_cache(page, swap, 14448c2ecf20Sopenharmony_ci __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN, 14458c2ecf20Sopenharmony_ci NULL) == 0) { 14468c2ecf20Sopenharmony_ci spin_lock_irq(&info->lock); 14478c2ecf20Sopenharmony_ci shmem_recalc_inode(inode); 14488c2ecf20Sopenharmony_ci info->swapped++; 14498c2ecf20Sopenharmony_ci spin_unlock_irq(&info->lock); 14508c2ecf20Sopenharmony_ci 14518c2ecf20Sopenharmony_ci swap_shmem_alloc(swap); 14528c2ecf20Sopenharmony_ci shmem_delete_from_page_cache(page, swp_to_radix_entry(swap)); 14538c2ecf20Sopenharmony_ci 14548c2ecf20Sopenharmony_ci mutex_unlock(&shmem_swaplist_mutex); 14558c2ecf20Sopenharmony_ci BUG_ON(page_mapped(page)); 14568c2ecf20Sopenharmony_ci swap_writepage(page, wbc); 14578c2ecf20Sopenharmony_ci return 0; 14588c2ecf20Sopenharmony_ci } 14598c2ecf20Sopenharmony_ci 14608c2ecf20Sopenharmony_ci mutex_unlock(&shmem_swaplist_mutex); 14618c2ecf20Sopenharmony_ci put_swap_page(page, swap); 14628c2ecf20Sopenharmony_ciredirty: 14638c2ecf20Sopenharmony_ci set_page_dirty(page); 14648c2ecf20Sopenharmony_ci if (wbc->for_reclaim) 14658c2ecf20Sopenharmony_ci return AOP_WRITEPAGE_ACTIVATE; /* Return with page locked */ 14668c2ecf20Sopenharmony_ci unlock_page(page); 14678c2ecf20Sopenharmony_ci return 0; 14688c2ecf20Sopenharmony_ci} 14698c2ecf20Sopenharmony_ci 14708c2ecf20Sopenharmony_ci#if defined(CONFIG_NUMA) && defined(CONFIG_TMPFS) 14718c2ecf20Sopenharmony_cistatic void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol) 14728c2ecf20Sopenharmony_ci{ 14738c2ecf20Sopenharmony_ci char buffer[64]; 14748c2ecf20Sopenharmony_ci 14758c2ecf20Sopenharmony_ci if (!mpol || mpol->mode == MPOL_DEFAULT) 14768c2ecf20Sopenharmony_ci return; /* show nothing */ 14778c2ecf20Sopenharmony_ci 14788c2ecf20Sopenharmony_ci mpol_to_str(buffer, sizeof(buffer), mpol); 14798c2ecf20Sopenharmony_ci 14808c2ecf20Sopenharmony_ci seq_printf(seq, ",mpol=%s", buffer); 14818c2ecf20Sopenharmony_ci} 14828c2ecf20Sopenharmony_ci 14838c2ecf20Sopenharmony_cistatic struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) 14848c2ecf20Sopenharmony_ci{ 14858c2ecf20Sopenharmony_ci struct mempolicy *mpol = NULL; 14868c2ecf20Sopenharmony_ci if (sbinfo->mpol) { 14878c2ecf20Sopenharmony_ci spin_lock(&sbinfo->stat_lock); /* prevent replace/use races */ 14888c2ecf20Sopenharmony_ci mpol = sbinfo->mpol; 14898c2ecf20Sopenharmony_ci mpol_get(mpol); 14908c2ecf20Sopenharmony_ci spin_unlock(&sbinfo->stat_lock); 14918c2ecf20Sopenharmony_ci } 14928c2ecf20Sopenharmony_ci return mpol; 14938c2ecf20Sopenharmony_ci} 14948c2ecf20Sopenharmony_ci#else /* !CONFIG_NUMA || !CONFIG_TMPFS */ 14958c2ecf20Sopenharmony_cistatic inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol) 14968c2ecf20Sopenharmony_ci{ 14978c2ecf20Sopenharmony_ci} 14988c2ecf20Sopenharmony_cistatic inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) 14998c2ecf20Sopenharmony_ci{ 15008c2ecf20Sopenharmony_ci return NULL; 15018c2ecf20Sopenharmony_ci} 15028c2ecf20Sopenharmony_ci#endif /* CONFIG_NUMA && CONFIG_TMPFS */ 15038c2ecf20Sopenharmony_ci#ifndef CONFIG_NUMA 15048c2ecf20Sopenharmony_ci#define vm_policy vm_private_data 15058c2ecf20Sopenharmony_ci#endif 15068c2ecf20Sopenharmony_ci 15078c2ecf20Sopenharmony_cistatic void shmem_pseudo_vma_init(struct vm_area_struct *vma, 15088c2ecf20Sopenharmony_ci struct shmem_inode_info *info, pgoff_t index) 15098c2ecf20Sopenharmony_ci{ 15108c2ecf20Sopenharmony_ci /* Create a pseudo vma that just contains the policy */ 15118c2ecf20Sopenharmony_ci vma_init(vma, NULL); 15128c2ecf20Sopenharmony_ci /* Bias interleave by inode number to distribute better across nodes */ 15138c2ecf20Sopenharmony_ci vma->vm_pgoff = index + info->vfs_inode.i_ino; 15148c2ecf20Sopenharmony_ci vma->vm_policy = mpol_shared_policy_lookup(&info->policy, index); 15158c2ecf20Sopenharmony_ci} 15168c2ecf20Sopenharmony_ci 15178c2ecf20Sopenharmony_cistatic void shmem_pseudo_vma_destroy(struct vm_area_struct *vma) 15188c2ecf20Sopenharmony_ci{ 15198c2ecf20Sopenharmony_ci /* Drop reference taken by mpol_shared_policy_lookup() */ 15208c2ecf20Sopenharmony_ci mpol_cond_put(vma->vm_policy); 15218c2ecf20Sopenharmony_ci} 15228c2ecf20Sopenharmony_ci 15238c2ecf20Sopenharmony_cistatic struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp, 15248c2ecf20Sopenharmony_ci struct shmem_inode_info *info, pgoff_t index) 15258c2ecf20Sopenharmony_ci{ 15268c2ecf20Sopenharmony_ci struct vm_area_struct pvma; 15278c2ecf20Sopenharmony_ci struct page *page; 15288c2ecf20Sopenharmony_ci struct vm_fault vmf; 15298c2ecf20Sopenharmony_ci 15308c2ecf20Sopenharmony_ci shmem_pseudo_vma_init(&pvma, info, index); 15318c2ecf20Sopenharmony_ci vmf.vma = &pvma; 15328c2ecf20Sopenharmony_ci vmf.address = 0; 15338c2ecf20Sopenharmony_ci page = swap_cluster_readahead(swap, gfp, &vmf); 15348c2ecf20Sopenharmony_ci shmem_pseudo_vma_destroy(&pvma); 15358c2ecf20Sopenharmony_ci 15368c2ecf20Sopenharmony_ci return page; 15378c2ecf20Sopenharmony_ci} 15388c2ecf20Sopenharmony_ci 15398c2ecf20Sopenharmony_cistatic struct page *shmem_alloc_hugepage(gfp_t gfp, 15408c2ecf20Sopenharmony_ci struct shmem_inode_info *info, pgoff_t index) 15418c2ecf20Sopenharmony_ci{ 15428c2ecf20Sopenharmony_ci struct vm_area_struct pvma; 15438c2ecf20Sopenharmony_ci struct address_space *mapping = info->vfs_inode.i_mapping; 15448c2ecf20Sopenharmony_ci pgoff_t hindex; 15458c2ecf20Sopenharmony_ci struct page *page; 15468c2ecf20Sopenharmony_ci 15478c2ecf20Sopenharmony_ci hindex = round_down(index, HPAGE_PMD_NR); 15488c2ecf20Sopenharmony_ci if (xa_find(&mapping->i_pages, &hindex, hindex + HPAGE_PMD_NR - 1, 15498c2ecf20Sopenharmony_ci XA_PRESENT)) 15508c2ecf20Sopenharmony_ci return NULL; 15518c2ecf20Sopenharmony_ci 15528c2ecf20Sopenharmony_ci shmem_pseudo_vma_init(&pvma, info, hindex); 15538c2ecf20Sopenharmony_ci page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN, 15548c2ecf20Sopenharmony_ci HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true); 15558c2ecf20Sopenharmony_ci shmem_pseudo_vma_destroy(&pvma); 15568c2ecf20Sopenharmony_ci if (page) 15578c2ecf20Sopenharmony_ci prep_transhuge_page(page); 15588c2ecf20Sopenharmony_ci else 15598c2ecf20Sopenharmony_ci count_vm_event(THP_FILE_FALLBACK); 15608c2ecf20Sopenharmony_ci return page; 15618c2ecf20Sopenharmony_ci} 15628c2ecf20Sopenharmony_ci 15638c2ecf20Sopenharmony_cistatic struct page *shmem_alloc_page(gfp_t gfp, 15648c2ecf20Sopenharmony_ci struct shmem_inode_info *info, pgoff_t index) 15658c2ecf20Sopenharmony_ci{ 15668c2ecf20Sopenharmony_ci struct vm_area_struct pvma; 15678c2ecf20Sopenharmony_ci struct page *page; 15688c2ecf20Sopenharmony_ci 15698c2ecf20Sopenharmony_ci shmem_pseudo_vma_init(&pvma, info, index); 15708c2ecf20Sopenharmony_ci page = alloc_page_vma(gfp, &pvma, 0); 15718c2ecf20Sopenharmony_ci shmem_pseudo_vma_destroy(&pvma); 15728c2ecf20Sopenharmony_ci 15738c2ecf20Sopenharmony_ci return page; 15748c2ecf20Sopenharmony_ci} 15758c2ecf20Sopenharmony_ci 15768c2ecf20Sopenharmony_cistatic struct page *shmem_alloc_and_acct_page(gfp_t gfp, 15778c2ecf20Sopenharmony_ci struct inode *inode, 15788c2ecf20Sopenharmony_ci pgoff_t index, bool huge) 15798c2ecf20Sopenharmony_ci{ 15808c2ecf20Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 15818c2ecf20Sopenharmony_ci struct page *page; 15828c2ecf20Sopenharmony_ci int nr; 15838c2ecf20Sopenharmony_ci int err = -ENOSPC; 15848c2ecf20Sopenharmony_ci 15858c2ecf20Sopenharmony_ci if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 15868c2ecf20Sopenharmony_ci huge = false; 15878c2ecf20Sopenharmony_ci nr = huge ? HPAGE_PMD_NR : 1; 15888c2ecf20Sopenharmony_ci 15898c2ecf20Sopenharmony_ci if (!shmem_inode_acct_block(inode, nr)) 15908c2ecf20Sopenharmony_ci goto failed; 15918c2ecf20Sopenharmony_ci 15928c2ecf20Sopenharmony_ci if (huge) 15938c2ecf20Sopenharmony_ci page = shmem_alloc_hugepage(gfp, info, index); 15948c2ecf20Sopenharmony_ci else 15958c2ecf20Sopenharmony_ci page = shmem_alloc_page(gfp, info, index); 15968c2ecf20Sopenharmony_ci if (page) { 15978c2ecf20Sopenharmony_ci __SetPageLocked(page); 15988c2ecf20Sopenharmony_ci __SetPageSwapBacked(page); 15998c2ecf20Sopenharmony_ci return page; 16008c2ecf20Sopenharmony_ci } 16018c2ecf20Sopenharmony_ci 16028c2ecf20Sopenharmony_ci err = -ENOMEM; 16038c2ecf20Sopenharmony_ci shmem_inode_unacct_blocks(inode, nr); 16048c2ecf20Sopenharmony_cifailed: 16058c2ecf20Sopenharmony_ci return ERR_PTR(err); 16068c2ecf20Sopenharmony_ci} 16078c2ecf20Sopenharmony_ci 16088c2ecf20Sopenharmony_ci/* 16098c2ecf20Sopenharmony_ci * When a page is moved from swapcache to shmem filecache (either by the 16108c2ecf20Sopenharmony_ci * usual swapin of shmem_getpage_gfp(), or by the less common swapoff of 16118c2ecf20Sopenharmony_ci * shmem_unuse_inode()), it may have been read in earlier from swap, in 16128c2ecf20Sopenharmony_ci * ignorance of the mapping it belongs to. If that mapping has special 16138c2ecf20Sopenharmony_ci * constraints (like the gma500 GEM driver, which requires RAM below 4GB), 16148c2ecf20Sopenharmony_ci * we may need to copy to a suitable page before moving to filecache. 16158c2ecf20Sopenharmony_ci * 16168c2ecf20Sopenharmony_ci * In a future release, this may well be extended to respect cpuset and 16178c2ecf20Sopenharmony_ci * NUMA mempolicy, and applied also to anonymous pages in do_swap_page(); 16188c2ecf20Sopenharmony_ci * but for now it is a simple matter of zone. 16198c2ecf20Sopenharmony_ci */ 16208c2ecf20Sopenharmony_cistatic bool shmem_should_replace_page(struct page *page, gfp_t gfp) 16218c2ecf20Sopenharmony_ci{ 16228c2ecf20Sopenharmony_ci return page_zonenum(page) > gfp_zone(gfp); 16238c2ecf20Sopenharmony_ci} 16248c2ecf20Sopenharmony_ci 16258c2ecf20Sopenharmony_cistatic int shmem_replace_page(struct page **pagep, gfp_t gfp, 16268c2ecf20Sopenharmony_ci struct shmem_inode_info *info, pgoff_t index) 16278c2ecf20Sopenharmony_ci{ 16288c2ecf20Sopenharmony_ci struct page *oldpage, *newpage; 16298c2ecf20Sopenharmony_ci struct address_space *swap_mapping; 16308c2ecf20Sopenharmony_ci swp_entry_t entry; 16318c2ecf20Sopenharmony_ci pgoff_t swap_index; 16328c2ecf20Sopenharmony_ci int error; 16338c2ecf20Sopenharmony_ci 16348c2ecf20Sopenharmony_ci oldpage = *pagep; 16358c2ecf20Sopenharmony_ci entry.val = page_private(oldpage); 16368c2ecf20Sopenharmony_ci swap_index = swp_offset(entry); 16378c2ecf20Sopenharmony_ci swap_mapping = page_mapping(oldpage); 16388c2ecf20Sopenharmony_ci 16398c2ecf20Sopenharmony_ci /* 16408c2ecf20Sopenharmony_ci * We have arrived here because our zones are constrained, so don't 16418c2ecf20Sopenharmony_ci * limit chance of success by further cpuset and node constraints. 16428c2ecf20Sopenharmony_ci */ 16438c2ecf20Sopenharmony_ci gfp &= ~GFP_CONSTRAINT_MASK; 16448c2ecf20Sopenharmony_ci newpage = shmem_alloc_page(gfp, info, index); 16458c2ecf20Sopenharmony_ci if (!newpage) 16468c2ecf20Sopenharmony_ci return -ENOMEM; 16478c2ecf20Sopenharmony_ci 16488c2ecf20Sopenharmony_ci get_page(newpage); 16498c2ecf20Sopenharmony_ci copy_highpage(newpage, oldpage); 16508c2ecf20Sopenharmony_ci flush_dcache_page(newpage); 16518c2ecf20Sopenharmony_ci 16528c2ecf20Sopenharmony_ci __SetPageLocked(newpage); 16538c2ecf20Sopenharmony_ci __SetPageSwapBacked(newpage); 16548c2ecf20Sopenharmony_ci SetPageUptodate(newpage); 16558c2ecf20Sopenharmony_ci set_page_private(newpage, entry.val); 16568c2ecf20Sopenharmony_ci SetPageSwapCache(newpage); 16578c2ecf20Sopenharmony_ci 16588c2ecf20Sopenharmony_ci /* 16598c2ecf20Sopenharmony_ci * Our caller will very soon move newpage out of swapcache, but it's 16608c2ecf20Sopenharmony_ci * a nice clean interface for us to replace oldpage by newpage there. 16618c2ecf20Sopenharmony_ci */ 16628c2ecf20Sopenharmony_ci xa_lock_irq(&swap_mapping->i_pages); 16638c2ecf20Sopenharmony_ci error = shmem_replace_entry(swap_mapping, swap_index, oldpage, newpage); 16648c2ecf20Sopenharmony_ci if (!error) { 16658c2ecf20Sopenharmony_ci mem_cgroup_migrate(oldpage, newpage); 16668c2ecf20Sopenharmony_ci __inc_lruvec_page_state(newpage, NR_FILE_PAGES); 16678c2ecf20Sopenharmony_ci __dec_lruvec_page_state(oldpage, NR_FILE_PAGES); 16688c2ecf20Sopenharmony_ci } 16698c2ecf20Sopenharmony_ci xa_unlock_irq(&swap_mapping->i_pages); 16708c2ecf20Sopenharmony_ci 16718c2ecf20Sopenharmony_ci if (unlikely(error)) { 16728c2ecf20Sopenharmony_ci /* 16738c2ecf20Sopenharmony_ci * Is this possible? I think not, now that our callers check 16748c2ecf20Sopenharmony_ci * both PageSwapCache and page_private after getting page lock; 16758c2ecf20Sopenharmony_ci * but be defensive. Reverse old to newpage for clear and free. 16768c2ecf20Sopenharmony_ci */ 16778c2ecf20Sopenharmony_ci oldpage = newpage; 16788c2ecf20Sopenharmony_ci } else { 16798c2ecf20Sopenharmony_ci lru_cache_add(newpage); 16808c2ecf20Sopenharmony_ci *pagep = newpage; 16818c2ecf20Sopenharmony_ci } 16828c2ecf20Sopenharmony_ci 16838c2ecf20Sopenharmony_ci ClearPageSwapCache(oldpage); 16848c2ecf20Sopenharmony_ci set_page_private(oldpage, 0); 16858c2ecf20Sopenharmony_ci 16868c2ecf20Sopenharmony_ci unlock_page(oldpage); 16878c2ecf20Sopenharmony_ci put_page(oldpage); 16888c2ecf20Sopenharmony_ci put_page(oldpage); 16898c2ecf20Sopenharmony_ci return error; 16908c2ecf20Sopenharmony_ci} 16918c2ecf20Sopenharmony_ci 16928c2ecf20Sopenharmony_ci/* 16938c2ecf20Sopenharmony_ci * Swap in the page pointed to by *pagep. 16948c2ecf20Sopenharmony_ci * Caller has to make sure that *pagep contains a valid swapped page. 16958c2ecf20Sopenharmony_ci * Returns 0 and the page in pagep if success. On failure, returns the 16968c2ecf20Sopenharmony_ci * error code and NULL in *pagep. 16978c2ecf20Sopenharmony_ci */ 16988c2ecf20Sopenharmony_cistatic int shmem_swapin_page(struct inode *inode, pgoff_t index, 16998c2ecf20Sopenharmony_ci struct page **pagep, enum sgp_type sgp, 17008c2ecf20Sopenharmony_ci gfp_t gfp, struct vm_area_struct *vma, 17018c2ecf20Sopenharmony_ci vm_fault_t *fault_type) 17028c2ecf20Sopenharmony_ci{ 17038c2ecf20Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 17048c2ecf20Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 17058c2ecf20Sopenharmony_ci struct mm_struct *charge_mm = vma ? vma->vm_mm : current->mm; 17068c2ecf20Sopenharmony_ci struct page *page; 17078c2ecf20Sopenharmony_ci swp_entry_t swap; 17088c2ecf20Sopenharmony_ci int error; 17098c2ecf20Sopenharmony_ci 17108c2ecf20Sopenharmony_ci VM_BUG_ON(!*pagep || !xa_is_value(*pagep)); 17118c2ecf20Sopenharmony_ci swap = radix_to_swp_entry(*pagep); 17128c2ecf20Sopenharmony_ci *pagep = NULL; 17138c2ecf20Sopenharmony_ci 17148c2ecf20Sopenharmony_ci /* Look it up and read it in.. */ 17158c2ecf20Sopenharmony_ci page = lookup_swap_cache(swap, NULL, 0); 17168c2ecf20Sopenharmony_ci if (!page) { 17178c2ecf20Sopenharmony_ci /* Or update major stats only when swapin succeeds?? */ 17188c2ecf20Sopenharmony_ci if (fault_type) { 17198c2ecf20Sopenharmony_ci *fault_type |= VM_FAULT_MAJOR; 17208c2ecf20Sopenharmony_ci count_vm_event(PGMAJFAULT); 17218c2ecf20Sopenharmony_ci count_memcg_event_mm(charge_mm, PGMAJFAULT); 17228c2ecf20Sopenharmony_ci } 17238c2ecf20Sopenharmony_ci /* Here we actually start the io */ 17248c2ecf20Sopenharmony_ci page = shmem_swapin(swap, gfp, info, index); 17258c2ecf20Sopenharmony_ci if (!page) { 17268c2ecf20Sopenharmony_ci error = -ENOMEM; 17278c2ecf20Sopenharmony_ci goto failed; 17288c2ecf20Sopenharmony_ci } 17298c2ecf20Sopenharmony_ci } 17308c2ecf20Sopenharmony_ci 17318c2ecf20Sopenharmony_ci /* We have to do this with page locked to prevent races */ 17328c2ecf20Sopenharmony_ci lock_page(page); 17338c2ecf20Sopenharmony_ci if (!PageSwapCache(page) || page_private(page) != swap.val || 17348c2ecf20Sopenharmony_ci !shmem_confirm_swap(mapping, index, swap)) { 17358c2ecf20Sopenharmony_ci error = -EEXIST; 17368c2ecf20Sopenharmony_ci goto unlock; 17378c2ecf20Sopenharmony_ci } 17388c2ecf20Sopenharmony_ci if (!PageUptodate(page)) { 17398c2ecf20Sopenharmony_ci error = -EIO; 17408c2ecf20Sopenharmony_ci goto failed; 17418c2ecf20Sopenharmony_ci } 17428c2ecf20Sopenharmony_ci wait_on_page_writeback(page); 17438c2ecf20Sopenharmony_ci 17448c2ecf20Sopenharmony_ci /* 17458c2ecf20Sopenharmony_ci * Some architectures may have to restore extra metadata to the 17468c2ecf20Sopenharmony_ci * physical page after reading from swap. 17478c2ecf20Sopenharmony_ci */ 17488c2ecf20Sopenharmony_ci arch_swap_restore(swap, page); 17498c2ecf20Sopenharmony_ci 17508c2ecf20Sopenharmony_ci if (shmem_should_replace_page(page, gfp)) { 17518c2ecf20Sopenharmony_ci error = shmem_replace_page(&page, gfp, info, index); 17528c2ecf20Sopenharmony_ci if (error) 17538c2ecf20Sopenharmony_ci goto failed; 17548c2ecf20Sopenharmony_ci } 17558c2ecf20Sopenharmony_ci 17568c2ecf20Sopenharmony_ci error = shmem_add_to_page_cache(page, mapping, index, 17578c2ecf20Sopenharmony_ci swp_to_radix_entry(swap), gfp, 17588c2ecf20Sopenharmony_ci charge_mm); 17598c2ecf20Sopenharmony_ci if (error) 17608c2ecf20Sopenharmony_ci goto failed; 17618c2ecf20Sopenharmony_ci 17628c2ecf20Sopenharmony_ci spin_lock_irq(&info->lock); 17638c2ecf20Sopenharmony_ci info->swapped--; 17648c2ecf20Sopenharmony_ci shmem_recalc_inode(inode); 17658c2ecf20Sopenharmony_ci spin_unlock_irq(&info->lock); 17668c2ecf20Sopenharmony_ci 17678c2ecf20Sopenharmony_ci if (sgp == SGP_WRITE) 17688c2ecf20Sopenharmony_ci mark_page_accessed(page); 17698c2ecf20Sopenharmony_ci 17708c2ecf20Sopenharmony_ci delete_from_swap_cache(page); 17718c2ecf20Sopenharmony_ci set_page_dirty(page); 17728c2ecf20Sopenharmony_ci swap_free(swap); 17738c2ecf20Sopenharmony_ci 17748c2ecf20Sopenharmony_ci *pagep = page; 17758c2ecf20Sopenharmony_ci return 0; 17768c2ecf20Sopenharmony_cifailed: 17778c2ecf20Sopenharmony_ci if (!shmem_confirm_swap(mapping, index, swap)) 17788c2ecf20Sopenharmony_ci error = -EEXIST; 17798c2ecf20Sopenharmony_ciunlock: 17808c2ecf20Sopenharmony_ci if (page) { 17818c2ecf20Sopenharmony_ci unlock_page(page); 17828c2ecf20Sopenharmony_ci put_page(page); 17838c2ecf20Sopenharmony_ci } 17848c2ecf20Sopenharmony_ci 17858c2ecf20Sopenharmony_ci return error; 17868c2ecf20Sopenharmony_ci} 17878c2ecf20Sopenharmony_ci 17888c2ecf20Sopenharmony_ci/* 17898c2ecf20Sopenharmony_ci * shmem_getpage_gfp - find page in cache, or get from swap, or allocate 17908c2ecf20Sopenharmony_ci * 17918c2ecf20Sopenharmony_ci * If we allocate a new one we do not mark it dirty. That's up to the 17928c2ecf20Sopenharmony_ci * vm. If we swap it in we mark it dirty since we also free the swap 17938c2ecf20Sopenharmony_ci * entry since a page cannot live in both the swap and page cache. 17948c2ecf20Sopenharmony_ci * 17958c2ecf20Sopenharmony_ci * vmf and fault_type are only supplied by shmem_fault: 17968c2ecf20Sopenharmony_ci * otherwise they are NULL. 17978c2ecf20Sopenharmony_ci */ 17988c2ecf20Sopenharmony_cistatic int shmem_getpage_gfp(struct inode *inode, pgoff_t index, 17998c2ecf20Sopenharmony_ci struct page **pagep, enum sgp_type sgp, gfp_t gfp, 18008c2ecf20Sopenharmony_ci struct vm_area_struct *vma, struct vm_fault *vmf, 18018c2ecf20Sopenharmony_ci vm_fault_t *fault_type) 18028c2ecf20Sopenharmony_ci{ 18038c2ecf20Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 18048c2ecf20Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 18058c2ecf20Sopenharmony_ci struct shmem_sb_info *sbinfo; 18068c2ecf20Sopenharmony_ci struct mm_struct *charge_mm; 18078c2ecf20Sopenharmony_ci struct page *page; 18088c2ecf20Sopenharmony_ci enum sgp_type sgp_huge = sgp; 18098c2ecf20Sopenharmony_ci pgoff_t hindex = index; 18108c2ecf20Sopenharmony_ci int error; 18118c2ecf20Sopenharmony_ci int once = 0; 18128c2ecf20Sopenharmony_ci int alloced = 0; 18138c2ecf20Sopenharmony_ci 18148c2ecf20Sopenharmony_ci if (index > (MAX_LFS_FILESIZE >> PAGE_SHIFT)) 18158c2ecf20Sopenharmony_ci return -EFBIG; 18168c2ecf20Sopenharmony_ci if (sgp == SGP_NOHUGE || sgp == SGP_HUGE) 18178c2ecf20Sopenharmony_ci sgp = SGP_CACHE; 18188c2ecf20Sopenharmony_cirepeat: 18198c2ecf20Sopenharmony_ci if (sgp <= SGP_CACHE && 18208c2ecf20Sopenharmony_ci ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) { 18218c2ecf20Sopenharmony_ci return -EINVAL; 18228c2ecf20Sopenharmony_ci } 18238c2ecf20Sopenharmony_ci 18248c2ecf20Sopenharmony_ci sbinfo = SHMEM_SB(inode->i_sb); 18258c2ecf20Sopenharmony_ci charge_mm = vma ? vma->vm_mm : current->mm; 18268c2ecf20Sopenharmony_ci 18278c2ecf20Sopenharmony_ci page = find_lock_entry(mapping, index); 18288c2ecf20Sopenharmony_ci if (xa_is_value(page)) { 18298c2ecf20Sopenharmony_ci error = shmem_swapin_page(inode, index, &page, 18308c2ecf20Sopenharmony_ci sgp, gfp, vma, fault_type); 18318c2ecf20Sopenharmony_ci if (error == -EEXIST) 18328c2ecf20Sopenharmony_ci goto repeat; 18338c2ecf20Sopenharmony_ci 18348c2ecf20Sopenharmony_ci *pagep = page; 18358c2ecf20Sopenharmony_ci return error; 18368c2ecf20Sopenharmony_ci } 18378c2ecf20Sopenharmony_ci 18388c2ecf20Sopenharmony_ci if (page) 18398c2ecf20Sopenharmony_ci hindex = page->index; 18408c2ecf20Sopenharmony_ci if (page && sgp == SGP_WRITE) 18418c2ecf20Sopenharmony_ci mark_page_accessed(page); 18428c2ecf20Sopenharmony_ci 18438c2ecf20Sopenharmony_ci /* fallocated page? */ 18448c2ecf20Sopenharmony_ci if (page && !PageUptodate(page)) { 18458c2ecf20Sopenharmony_ci if (sgp != SGP_READ) 18468c2ecf20Sopenharmony_ci goto clear; 18478c2ecf20Sopenharmony_ci unlock_page(page); 18488c2ecf20Sopenharmony_ci put_page(page); 18498c2ecf20Sopenharmony_ci page = NULL; 18508c2ecf20Sopenharmony_ci hindex = index; 18518c2ecf20Sopenharmony_ci } 18528c2ecf20Sopenharmony_ci if (page || sgp == SGP_READ) 18538c2ecf20Sopenharmony_ci goto out; 18548c2ecf20Sopenharmony_ci 18558c2ecf20Sopenharmony_ci /* 18568c2ecf20Sopenharmony_ci * Fast cache lookup did not find it: 18578c2ecf20Sopenharmony_ci * bring it back from swap or allocate. 18588c2ecf20Sopenharmony_ci */ 18598c2ecf20Sopenharmony_ci 18608c2ecf20Sopenharmony_ci if (vma && userfaultfd_missing(vma)) { 18618c2ecf20Sopenharmony_ci *fault_type = handle_userfault(vmf, VM_UFFD_MISSING); 18628c2ecf20Sopenharmony_ci return 0; 18638c2ecf20Sopenharmony_ci } 18648c2ecf20Sopenharmony_ci 18658c2ecf20Sopenharmony_ci /* shmem_symlink() */ 18668c2ecf20Sopenharmony_ci if (mapping->a_ops != &shmem_aops) 18678c2ecf20Sopenharmony_ci goto alloc_nohuge; 18688c2ecf20Sopenharmony_ci if (shmem_huge == SHMEM_HUGE_DENY || sgp_huge == SGP_NOHUGE) 18698c2ecf20Sopenharmony_ci goto alloc_nohuge; 18708c2ecf20Sopenharmony_ci if (shmem_huge == SHMEM_HUGE_FORCE) 18718c2ecf20Sopenharmony_ci goto alloc_huge; 18728c2ecf20Sopenharmony_ci switch (sbinfo->huge) { 18738c2ecf20Sopenharmony_ci case SHMEM_HUGE_NEVER: 18748c2ecf20Sopenharmony_ci goto alloc_nohuge; 18758c2ecf20Sopenharmony_ci case SHMEM_HUGE_WITHIN_SIZE: { 18768c2ecf20Sopenharmony_ci loff_t i_size; 18778c2ecf20Sopenharmony_ci pgoff_t off; 18788c2ecf20Sopenharmony_ci 18798c2ecf20Sopenharmony_ci off = round_up(index, HPAGE_PMD_NR); 18808c2ecf20Sopenharmony_ci i_size = round_up(i_size_read(inode), PAGE_SIZE); 18818c2ecf20Sopenharmony_ci if (i_size >= HPAGE_PMD_SIZE && 18828c2ecf20Sopenharmony_ci i_size >> PAGE_SHIFT >= off) 18838c2ecf20Sopenharmony_ci goto alloc_huge; 18848c2ecf20Sopenharmony_ci 18858c2ecf20Sopenharmony_ci fallthrough; 18868c2ecf20Sopenharmony_ci } 18878c2ecf20Sopenharmony_ci case SHMEM_HUGE_ADVISE: 18888c2ecf20Sopenharmony_ci if (sgp_huge == SGP_HUGE) 18898c2ecf20Sopenharmony_ci goto alloc_huge; 18908c2ecf20Sopenharmony_ci /* TODO: implement fadvise() hints */ 18918c2ecf20Sopenharmony_ci goto alloc_nohuge; 18928c2ecf20Sopenharmony_ci } 18938c2ecf20Sopenharmony_ci 18948c2ecf20Sopenharmony_cialloc_huge: 18958c2ecf20Sopenharmony_ci page = shmem_alloc_and_acct_page(gfp, inode, index, true); 18968c2ecf20Sopenharmony_ci if (IS_ERR(page)) { 18978c2ecf20Sopenharmony_cialloc_nohuge: 18988c2ecf20Sopenharmony_ci page = shmem_alloc_and_acct_page(gfp, inode, 18998c2ecf20Sopenharmony_ci index, false); 19008c2ecf20Sopenharmony_ci } 19018c2ecf20Sopenharmony_ci if (IS_ERR(page)) { 19028c2ecf20Sopenharmony_ci int retry = 5; 19038c2ecf20Sopenharmony_ci 19048c2ecf20Sopenharmony_ci error = PTR_ERR(page); 19058c2ecf20Sopenharmony_ci page = NULL; 19068c2ecf20Sopenharmony_ci if (error != -ENOSPC) 19078c2ecf20Sopenharmony_ci goto unlock; 19088c2ecf20Sopenharmony_ci /* 19098c2ecf20Sopenharmony_ci * Try to reclaim some space by splitting a huge page 19108c2ecf20Sopenharmony_ci * beyond i_size on the filesystem. 19118c2ecf20Sopenharmony_ci */ 19128c2ecf20Sopenharmony_ci while (retry--) { 19138c2ecf20Sopenharmony_ci int ret; 19148c2ecf20Sopenharmony_ci 19158c2ecf20Sopenharmony_ci ret = shmem_unused_huge_shrink(sbinfo, NULL, 1); 19168c2ecf20Sopenharmony_ci if (ret == SHRINK_STOP) 19178c2ecf20Sopenharmony_ci break; 19188c2ecf20Sopenharmony_ci if (ret) 19198c2ecf20Sopenharmony_ci goto alloc_nohuge; 19208c2ecf20Sopenharmony_ci } 19218c2ecf20Sopenharmony_ci goto unlock; 19228c2ecf20Sopenharmony_ci } 19238c2ecf20Sopenharmony_ci 19248c2ecf20Sopenharmony_ci if (PageTransHuge(page)) 19258c2ecf20Sopenharmony_ci hindex = round_down(index, HPAGE_PMD_NR); 19268c2ecf20Sopenharmony_ci else 19278c2ecf20Sopenharmony_ci hindex = index; 19288c2ecf20Sopenharmony_ci 19298c2ecf20Sopenharmony_ci if (sgp == SGP_WRITE) 19308c2ecf20Sopenharmony_ci __SetPageReferenced(page); 19318c2ecf20Sopenharmony_ci 19328c2ecf20Sopenharmony_ci error = shmem_add_to_page_cache(page, mapping, hindex, 19338c2ecf20Sopenharmony_ci NULL, gfp & GFP_RECLAIM_MASK, 19348c2ecf20Sopenharmony_ci charge_mm); 19358c2ecf20Sopenharmony_ci if (error) 19368c2ecf20Sopenharmony_ci goto unacct; 19378c2ecf20Sopenharmony_ci lru_cache_add(page); 19388c2ecf20Sopenharmony_ci 19398c2ecf20Sopenharmony_ci spin_lock_irq(&info->lock); 19408c2ecf20Sopenharmony_ci info->alloced += compound_nr(page); 19418c2ecf20Sopenharmony_ci inode->i_blocks += BLOCKS_PER_PAGE << compound_order(page); 19428c2ecf20Sopenharmony_ci shmem_recalc_inode(inode); 19438c2ecf20Sopenharmony_ci spin_unlock_irq(&info->lock); 19448c2ecf20Sopenharmony_ci alloced = true; 19458c2ecf20Sopenharmony_ci 19468c2ecf20Sopenharmony_ci if (PageTransHuge(page) && 19478c2ecf20Sopenharmony_ci DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) < 19488c2ecf20Sopenharmony_ci hindex + HPAGE_PMD_NR - 1) { 19498c2ecf20Sopenharmony_ci /* 19508c2ecf20Sopenharmony_ci * Part of the huge page is beyond i_size: subject 19518c2ecf20Sopenharmony_ci * to shrink under memory pressure. 19528c2ecf20Sopenharmony_ci */ 19538c2ecf20Sopenharmony_ci spin_lock(&sbinfo->shrinklist_lock); 19548c2ecf20Sopenharmony_ci /* 19558c2ecf20Sopenharmony_ci * _careful to defend against unlocked access to 19568c2ecf20Sopenharmony_ci * ->shrink_list in shmem_unused_huge_shrink() 19578c2ecf20Sopenharmony_ci */ 19588c2ecf20Sopenharmony_ci if (list_empty_careful(&info->shrinklist)) { 19598c2ecf20Sopenharmony_ci list_add_tail(&info->shrinklist, 19608c2ecf20Sopenharmony_ci &sbinfo->shrinklist); 19618c2ecf20Sopenharmony_ci sbinfo->shrinklist_len++; 19628c2ecf20Sopenharmony_ci } 19638c2ecf20Sopenharmony_ci spin_unlock(&sbinfo->shrinklist_lock); 19648c2ecf20Sopenharmony_ci } 19658c2ecf20Sopenharmony_ci 19668c2ecf20Sopenharmony_ci /* 19678c2ecf20Sopenharmony_ci * Let SGP_FALLOC use the SGP_WRITE optimization on a new page. 19688c2ecf20Sopenharmony_ci */ 19698c2ecf20Sopenharmony_ci if (sgp == SGP_FALLOC) 19708c2ecf20Sopenharmony_ci sgp = SGP_WRITE; 19718c2ecf20Sopenharmony_ciclear: 19728c2ecf20Sopenharmony_ci /* 19738c2ecf20Sopenharmony_ci * Let SGP_WRITE caller clear ends if write does not fill page; 19748c2ecf20Sopenharmony_ci * but SGP_FALLOC on a page fallocated earlier must initialize 19758c2ecf20Sopenharmony_ci * it now, lest undo on failure cancel our earlier guarantee. 19768c2ecf20Sopenharmony_ci */ 19778c2ecf20Sopenharmony_ci if (sgp != SGP_WRITE && !PageUptodate(page)) { 19788c2ecf20Sopenharmony_ci int i; 19798c2ecf20Sopenharmony_ci 19808c2ecf20Sopenharmony_ci for (i = 0; i < compound_nr(page); i++) { 19818c2ecf20Sopenharmony_ci clear_highpage(page + i); 19828c2ecf20Sopenharmony_ci flush_dcache_page(page + i); 19838c2ecf20Sopenharmony_ci } 19848c2ecf20Sopenharmony_ci SetPageUptodate(page); 19858c2ecf20Sopenharmony_ci } 19868c2ecf20Sopenharmony_ci 19878c2ecf20Sopenharmony_ci /* Perhaps the file has been truncated since we checked */ 19888c2ecf20Sopenharmony_ci if (sgp <= SGP_CACHE && 19898c2ecf20Sopenharmony_ci ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) { 19908c2ecf20Sopenharmony_ci if (alloced) { 19918c2ecf20Sopenharmony_ci ClearPageDirty(page); 19928c2ecf20Sopenharmony_ci delete_from_page_cache(page); 19938c2ecf20Sopenharmony_ci spin_lock_irq(&info->lock); 19948c2ecf20Sopenharmony_ci shmem_recalc_inode(inode); 19958c2ecf20Sopenharmony_ci spin_unlock_irq(&info->lock); 19968c2ecf20Sopenharmony_ci } 19978c2ecf20Sopenharmony_ci error = -EINVAL; 19988c2ecf20Sopenharmony_ci goto unlock; 19998c2ecf20Sopenharmony_ci } 20008c2ecf20Sopenharmony_ciout: 20018c2ecf20Sopenharmony_ci *pagep = page + index - hindex; 20028c2ecf20Sopenharmony_ci return 0; 20038c2ecf20Sopenharmony_ci 20048c2ecf20Sopenharmony_ci /* 20058c2ecf20Sopenharmony_ci * Error recovery. 20068c2ecf20Sopenharmony_ci */ 20078c2ecf20Sopenharmony_ciunacct: 20088c2ecf20Sopenharmony_ci shmem_inode_unacct_blocks(inode, compound_nr(page)); 20098c2ecf20Sopenharmony_ci 20108c2ecf20Sopenharmony_ci if (PageTransHuge(page)) { 20118c2ecf20Sopenharmony_ci unlock_page(page); 20128c2ecf20Sopenharmony_ci put_page(page); 20138c2ecf20Sopenharmony_ci goto alloc_nohuge; 20148c2ecf20Sopenharmony_ci } 20158c2ecf20Sopenharmony_ciunlock: 20168c2ecf20Sopenharmony_ci if (page) { 20178c2ecf20Sopenharmony_ci unlock_page(page); 20188c2ecf20Sopenharmony_ci put_page(page); 20198c2ecf20Sopenharmony_ci } 20208c2ecf20Sopenharmony_ci if (error == -ENOSPC && !once++) { 20218c2ecf20Sopenharmony_ci spin_lock_irq(&info->lock); 20228c2ecf20Sopenharmony_ci shmem_recalc_inode(inode); 20238c2ecf20Sopenharmony_ci spin_unlock_irq(&info->lock); 20248c2ecf20Sopenharmony_ci goto repeat; 20258c2ecf20Sopenharmony_ci } 20268c2ecf20Sopenharmony_ci if (error == -EEXIST) 20278c2ecf20Sopenharmony_ci goto repeat; 20288c2ecf20Sopenharmony_ci return error; 20298c2ecf20Sopenharmony_ci} 20308c2ecf20Sopenharmony_ci 20318c2ecf20Sopenharmony_ci/* 20328c2ecf20Sopenharmony_ci * This is like autoremove_wake_function, but it removes the wait queue 20338c2ecf20Sopenharmony_ci * entry unconditionally - even if something else had already woken the 20348c2ecf20Sopenharmony_ci * target. 20358c2ecf20Sopenharmony_ci */ 20368c2ecf20Sopenharmony_cistatic int synchronous_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) 20378c2ecf20Sopenharmony_ci{ 20388c2ecf20Sopenharmony_ci int ret = default_wake_function(wait, mode, sync, key); 20398c2ecf20Sopenharmony_ci list_del_init(&wait->entry); 20408c2ecf20Sopenharmony_ci return ret; 20418c2ecf20Sopenharmony_ci} 20428c2ecf20Sopenharmony_ci 20438c2ecf20Sopenharmony_cistatic vm_fault_t shmem_fault(struct vm_fault *vmf) 20448c2ecf20Sopenharmony_ci{ 20458c2ecf20Sopenharmony_ci struct vm_area_struct *vma = vmf->vma; 20468c2ecf20Sopenharmony_ci struct inode *inode = file_inode(vma->vm_file); 20478c2ecf20Sopenharmony_ci gfp_t gfp = mapping_gfp_mask(inode->i_mapping); 20488c2ecf20Sopenharmony_ci enum sgp_type sgp; 20498c2ecf20Sopenharmony_ci int err; 20508c2ecf20Sopenharmony_ci vm_fault_t ret = VM_FAULT_LOCKED; 20518c2ecf20Sopenharmony_ci 20528c2ecf20Sopenharmony_ci /* 20538c2ecf20Sopenharmony_ci * Trinity finds that probing a hole which tmpfs is punching can 20548c2ecf20Sopenharmony_ci * prevent the hole-punch from ever completing: which in turn 20558c2ecf20Sopenharmony_ci * locks writers out with its hold on i_mutex. So refrain from 20568c2ecf20Sopenharmony_ci * faulting pages into the hole while it's being punched. Although 20578c2ecf20Sopenharmony_ci * shmem_undo_range() does remove the additions, it may be unable to 20588c2ecf20Sopenharmony_ci * keep up, as each new page needs its own unmap_mapping_range() call, 20598c2ecf20Sopenharmony_ci * and the i_mmap tree grows ever slower to scan if new vmas are added. 20608c2ecf20Sopenharmony_ci * 20618c2ecf20Sopenharmony_ci * It does not matter if we sometimes reach this check just before the 20628c2ecf20Sopenharmony_ci * hole-punch begins, so that one fault then races with the punch: 20638c2ecf20Sopenharmony_ci * we just need to make racing faults a rare case. 20648c2ecf20Sopenharmony_ci * 20658c2ecf20Sopenharmony_ci * The implementation below would be much simpler if we just used a 20668c2ecf20Sopenharmony_ci * standard mutex or completion: but we cannot take i_mutex in fault, 20678c2ecf20Sopenharmony_ci * and bloating every shmem inode for this unlikely case would be sad. 20688c2ecf20Sopenharmony_ci */ 20698c2ecf20Sopenharmony_ci if (unlikely(inode->i_private)) { 20708c2ecf20Sopenharmony_ci struct shmem_falloc *shmem_falloc; 20718c2ecf20Sopenharmony_ci 20728c2ecf20Sopenharmony_ci spin_lock(&inode->i_lock); 20738c2ecf20Sopenharmony_ci shmem_falloc = inode->i_private; 20748c2ecf20Sopenharmony_ci if (shmem_falloc && 20758c2ecf20Sopenharmony_ci shmem_falloc->waitq && 20768c2ecf20Sopenharmony_ci vmf->pgoff >= shmem_falloc->start && 20778c2ecf20Sopenharmony_ci vmf->pgoff < shmem_falloc->next) { 20788c2ecf20Sopenharmony_ci struct file *fpin; 20798c2ecf20Sopenharmony_ci wait_queue_head_t *shmem_falloc_waitq; 20808c2ecf20Sopenharmony_ci DEFINE_WAIT_FUNC(shmem_fault_wait, synchronous_wake_function); 20818c2ecf20Sopenharmony_ci 20828c2ecf20Sopenharmony_ci ret = VM_FAULT_NOPAGE; 20838c2ecf20Sopenharmony_ci fpin = maybe_unlock_mmap_for_io(vmf, NULL); 20848c2ecf20Sopenharmony_ci if (fpin) 20858c2ecf20Sopenharmony_ci ret = VM_FAULT_RETRY; 20868c2ecf20Sopenharmony_ci 20878c2ecf20Sopenharmony_ci shmem_falloc_waitq = shmem_falloc->waitq; 20888c2ecf20Sopenharmony_ci prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait, 20898c2ecf20Sopenharmony_ci TASK_UNINTERRUPTIBLE); 20908c2ecf20Sopenharmony_ci spin_unlock(&inode->i_lock); 20918c2ecf20Sopenharmony_ci schedule(); 20928c2ecf20Sopenharmony_ci 20938c2ecf20Sopenharmony_ci /* 20948c2ecf20Sopenharmony_ci * shmem_falloc_waitq points into the shmem_fallocate() 20958c2ecf20Sopenharmony_ci * stack of the hole-punching task: shmem_falloc_waitq 20968c2ecf20Sopenharmony_ci * is usually invalid by the time we reach here, but 20978c2ecf20Sopenharmony_ci * finish_wait() does not dereference it in that case; 20988c2ecf20Sopenharmony_ci * though i_lock needed lest racing with wake_up_all(). 20998c2ecf20Sopenharmony_ci */ 21008c2ecf20Sopenharmony_ci spin_lock(&inode->i_lock); 21018c2ecf20Sopenharmony_ci finish_wait(shmem_falloc_waitq, &shmem_fault_wait); 21028c2ecf20Sopenharmony_ci spin_unlock(&inode->i_lock); 21038c2ecf20Sopenharmony_ci 21048c2ecf20Sopenharmony_ci if (fpin) 21058c2ecf20Sopenharmony_ci fput(fpin); 21068c2ecf20Sopenharmony_ci return ret; 21078c2ecf20Sopenharmony_ci } 21088c2ecf20Sopenharmony_ci spin_unlock(&inode->i_lock); 21098c2ecf20Sopenharmony_ci } 21108c2ecf20Sopenharmony_ci 21118c2ecf20Sopenharmony_ci sgp = SGP_CACHE; 21128c2ecf20Sopenharmony_ci 21138c2ecf20Sopenharmony_ci if ((vma->vm_flags & VM_NOHUGEPAGE) || 21148c2ecf20Sopenharmony_ci test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) 21158c2ecf20Sopenharmony_ci sgp = SGP_NOHUGE; 21168c2ecf20Sopenharmony_ci else if (vma->vm_flags & VM_HUGEPAGE) 21178c2ecf20Sopenharmony_ci sgp = SGP_HUGE; 21188c2ecf20Sopenharmony_ci 21198c2ecf20Sopenharmony_ci err = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, sgp, 21208c2ecf20Sopenharmony_ci gfp, vma, vmf, &ret); 21218c2ecf20Sopenharmony_ci if (err) 21228c2ecf20Sopenharmony_ci return vmf_error(err); 21238c2ecf20Sopenharmony_ci return ret; 21248c2ecf20Sopenharmony_ci} 21258c2ecf20Sopenharmony_ci 21268c2ecf20Sopenharmony_ciunsigned long shmem_get_unmapped_area(struct file *file, 21278c2ecf20Sopenharmony_ci unsigned long uaddr, unsigned long len, 21288c2ecf20Sopenharmony_ci unsigned long pgoff, unsigned long flags) 21298c2ecf20Sopenharmony_ci{ 21308c2ecf20Sopenharmony_ci unsigned long (*get_area)(struct file *, 21318c2ecf20Sopenharmony_ci unsigned long, unsigned long, unsigned long, unsigned long); 21328c2ecf20Sopenharmony_ci unsigned long addr; 21338c2ecf20Sopenharmony_ci unsigned long offset; 21348c2ecf20Sopenharmony_ci unsigned long inflated_len; 21358c2ecf20Sopenharmony_ci unsigned long inflated_addr; 21368c2ecf20Sopenharmony_ci unsigned long inflated_offset; 21378c2ecf20Sopenharmony_ci 21388c2ecf20Sopenharmony_ci if (len > TASK_SIZE) 21398c2ecf20Sopenharmony_ci return -ENOMEM; 21408c2ecf20Sopenharmony_ci 21418c2ecf20Sopenharmony_ci get_area = current->mm->get_unmapped_area; 21428c2ecf20Sopenharmony_ci addr = get_area(file, uaddr, len, pgoff, flags); 21438c2ecf20Sopenharmony_ci 21448c2ecf20Sopenharmony_ci if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 21458c2ecf20Sopenharmony_ci return addr; 21468c2ecf20Sopenharmony_ci if (IS_ERR_VALUE(addr)) 21478c2ecf20Sopenharmony_ci return addr; 21488c2ecf20Sopenharmony_ci if (addr & ~PAGE_MASK) 21498c2ecf20Sopenharmony_ci return addr; 21508c2ecf20Sopenharmony_ci if (addr > TASK_SIZE - len) 21518c2ecf20Sopenharmony_ci return addr; 21528c2ecf20Sopenharmony_ci 21538c2ecf20Sopenharmony_ci if (shmem_huge == SHMEM_HUGE_DENY) 21548c2ecf20Sopenharmony_ci return addr; 21558c2ecf20Sopenharmony_ci if (len < HPAGE_PMD_SIZE) 21568c2ecf20Sopenharmony_ci return addr; 21578c2ecf20Sopenharmony_ci if (flags & MAP_FIXED) 21588c2ecf20Sopenharmony_ci return addr; 21598c2ecf20Sopenharmony_ci /* 21608c2ecf20Sopenharmony_ci * Our priority is to support MAP_SHARED mapped hugely; 21618c2ecf20Sopenharmony_ci * and support MAP_PRIVATE mapped hugely too, until it is COWed. 21628c2ecf20Sopenharmony_ci * But if caller specified an address hint and we allocated area there 21638c2ecf20Sopenharmony_ci * successfully, respect that as before. 21648c2ecf20Sopenharmony_ci */ 21658c2ecf20Sopenharmony_ci if (uaddr == addr) 21668c2ecf20Sopenharmony_ci return addr; 21678c2ecf20Sopenharmony_ci 21688c2ecf20Sopenharmony_ci if (shmem_huge != SHMEM_HUGE_FORCE) { 21698c2ecf20Sopenharmony_ci struct super_block *sb; 21708c2ecf20Sopenharmony_ci 21718c2ecf20Sopenharmony_ci if (file) { 21728c2ecf20Sopenharmony_ci VM_BUG_ON(file->f_op != &shmem_file_operations); 21738c2ecf20Sopenharmony_ci sb = file_inode(file)->i_sb; 21748c2ecf20Sopenharmony_ci } else { 21758c2ecf20Sopenharmony_ci /* 21768c2ecf20Sopenharmony_ci * Called directly from mm/mmap.c, or drivers/char/mem.c 21778c2ecf20Sopenharmony_ci * for "/dev/zero", to create a shared anonymous object. 21788c2ecf20Sopenharmony_ci */ 21798c2ecf20Sopenharmony_ci if (IS_ERR(shm_mnt)) 21808c2ecf20Sopenharmony_ci return addr; 21818c2ecf20Sopenharmony_ci sb = shm_mnt->mnt_sb; 21828c2ecf20Sopenharmony_ci } 21838c2ecf20Sopenharmony_ci if (SHMEM_SB(sb)->huge == SHMEM_HUGE_NEVER) 21848c2ecf20Sopenharmony_ci return addr; 21858c2ecf20Sopenharmony_ci } 21868c2ecf20Sopenharmony_ci 21878c2ecf20Sopenharmony_ci offset = (pgoff << PAGE_SHIFT) & (HPAGE_PMD_SIZE-1); 21888c2ecf20Sopenharmony_ci if (offset && offset + len < 2 * HPAGE_PMD_SIZE) 21898c2ecf20Sopenharmony_ci return addr; 21908c2ecf20Sopenharmony_ci if ((addr & (HPAGE_PMD_SIZE-1)) == offset) 21918c2ecf20Sopenharmony_ci return addr; 21928c2ecf20Sopenharmony_ci 21938c2ecf20Sopenharmony_ci inflated_len = len + HPAGE_PMD_SIZE - PAGE_SIZE; 21948c2ecf20Sopenharmony_ci if (inflated_len > TASK_SIZE) 21958c2ecf20Sopenharmony_ci return addr; 21968c2ecf20Sopenharmony_ci if (inflated_len < len) 21978c2ecf20Sopenharmony_ci return addr; 21988c2ecf20Sopenharmony_ci 21998c2ecf20Sopenharmony_ci inflated_addr = get_area(NULL, uaddr, inflated_len, 0, flags); 22008c2ecf20Sopenharmony_ci if (IS_ERR_VALUE(inflated_addr)) 22018c2ecf20Sopenharmony_ci return addr; 22028c2ecf20Sopenharmony_ci if (inflated_addr & ~PAGE_MASK) 22038c2ecf20Sopenharmony_ci return addr; 22048c2ecf20Sopenharmony_ci 22058c2ecf20Sopenharmony_ci inflated_offset = inflated_addr & (HPAGE_PMD_SIZE-1); 22068c2ecf20Sopenharmony_ci inflated_addr += offset - inflated_offset; 22078c2ecf20Sopenharmony_ci if (inflated_offset > offset) 22088c2ecf20Sopenharmony_ci inflated_addr += HPAGE_PMD_SIZE; 22098c2ecf20Sopenharmony_ci 22108c2ecf20Sopenharmony_ci if (inflated_addr > TASK_SIZE - len) 22118c2ecf20Sopenharmony_ci return addr; 22128c2ecf20Sopenharmony_ci return inflated_addr; 22138c2ecf20Sopenharmony_ci} 22148c2ecf20Sopenharmony_ci 22158c2ecf20Sopenharmony_ci#ifdef CONFIG_NUMA 22168c2ecf20Sopenharmony_cistatic int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol) 22178c2ecf20Sopenharmony_ci{ 22188c2ecf20Sopenharmony_ci struct inode *inode = file_inode(vma->vm_file); 22198c2ecf20Sopenharmony_ci return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol); 22208c2ecf20Sopenharmony_ci} 22218c2ecf20Sopenharmony_ci 22228c2ecf20Sopenharmony_cistatic struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, 22238c2ecf20Sopenharmony_ci unsigned long addr) 22248c2ecf20Sopenharmony_ci{ 22258c2ecf20Sopenharmony_ci struct inode *inode = file_inode(vma->vm_file); 22268c2ecf20Sopenharmony_ci pgoff_t index; 22278c2ecf20Sopenharmony_ci 22288c2ecf20Sopenharmony_ci index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 22298c2ecf20Sopenharmony_ci return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index); 22308c2ecf20Sopenharmony_ci} 22318c2ecf20Sopenharmony_ci#endif 22328c2ecf20Sopenharmony_ci 22338c2ecf20Sopenharmony_ciint shmem_lock(struct file *file, int lock, struct user_struct *user) 22348c2ecf20Sopenharmony_ci{ 22358c2ecf20Sopenharmony_ci struct inode *inode = file_inode(file); 22368c2ecf20Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 22378c2ecf20Sopenharmony_ci int retval = -ENOMEM; 22388c2ecf20Sopenharmony_ci 22398c2ecf20Sopenharmony_ci /* 22408c2ecf20Sopenharmony_ci * What serializes the accesses to info->flags? 22418c2ecf20Sopenharmony_ci * ipc_lock_object() when called from shmctl_do_lock(), 22428c2ecf20Sopenharmony_ci * no serialization needed when called from shm_destroy(). 22438c2ecf20Sopenharmony_ci */ 22448c2ecf20Sopenharmony_ci if (lock && !(info->flags & VM_LOCKED)) { 22458c2ecf20Sopenharmony_ci if (!user_shm_lock(inode->i_size, user)) 22468c2ecf20Sopenharmony_ci goto out_nomem; 22478c2ecf20Sopenharmony_ci info->flags |= VM_LOCKED; 22488c2ecf20Sopenharmony_ci mapping_set_unevictable(file->f_mapping); 22498c2ecf20Sopenharmony_ci } 22508c2ecf20Sopenharmony_ci if (!lock && (info->flags & VM_LOCKED) && user) { 22518c2ecf20Sopenharmony_ci user_shm_unlock(inode->i_size, user); 22528c2ecf20Sopenharmony_ci info->flags &= ~VM_LOCKED; 22538c2ecf20Sopenharmony_ci mapping_clear_unevictable(file->f_mapping); 22548c2ecf20Sopenharmony_ci } 22558c2ecf20Sopenharmony_ci retval = 0; 22568c2ecf20Sopenharmony_ci 22578c2ecf20Sopenharmony_ciout_nomem: 22588c2ecf20Sopenharmony_ci return retval; 22598c2ecf20Sopenharmony_ci} 22608c2ecf20Sopenharmony_ci 22618c2ecf20Sopenharmony_cistatic int shmem_mmap(struct file *file, struct vm_area_struct *vma) 22628c2ecf20Sopenharmony_ci{ 22638c2ecf20Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(file_inode(file)); 22648c2ecf20Sopenharmony_ci int ret; 22658c2ecf20Sopenharmony_ci 22668c2ecf20Sopenharmony_ci ret = seal_check_future_write(info->seals, vma); 22678c2ecf20Sopenharmony_ci if (ret) 22688c2ecf20Sopenharmony_ci return ret; 22698c2ecf20Sopenharmony_ci 22708c2ecf20Sopenharmony_ci /* arm64 - allow memory tagging on RAM-based files */ 22718c2ecf20Sopenharmony_ci vma->vm_flags |= VM_MTE_ALLOWED; 22728c2ecf20Sopenharmony_ci 22738c2ecf20Sopenharmony_ci file_accessed(file); 22748c2ecf20Sopenharmony_ci vma->vm_ops = &shmem_vm_ops; 22758c2ecf20Sopenharmony_ci if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && 22768c2ecf20Sopenharmony_ci ((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) < 22778c2ecf20Sopenharmony_ci (vma->vm_end & HPAGE_PMD_MASK)) { 22788c2ecf20Sopenharmony_ci khugepaged_enter(vma, vma->vm_flags); 22798c2ecf20Sopenharmony_ci } 22808c2ecf20Sopenharmony_ci return 0; 22818c2ecf20Sopenharmony_ci} 22828c2ecf20Sopenharmony_ci 22838c2ecf20Sopenharmony_cistatic struct inode *shmem_get_inode(struct super_block *sb, const struct inode *dir, 22848c2ecf20Sopenharmony_ci umode_t mode, dev_t dev, unsigned long flags) 22858c2ecf20Sopenharmony_ci{ 22868c2ecf20Sopenharmony_ci struct inode *inode; 22878c2ecf20Sopenharmony_ci struct shmem_inode_info *info; 22888c2ecf20Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 22898c2ecf20Sopenharmony_ci ino_t ino; 22908c2ecf20Sopenharmony_ci 22918c2ecf20Sopenharmony_ci if (shmem_reserve_inode(sb, &ino)) 22928c2ecf20Sopenharmony_ci return NULL; 22938c2ecf20Sopenharmony_ci 22948c2ecf20Sopenharmony_ci inode = new_inode(sb); 22958c2ecf20Sopenharmony_ci if (inode) { 22968c2ecf20Sopenharmony_ci inode->i_ino = ino; 22978c2ecf20Sopenharmony_ci inode_init_owner(inode, dir, mode); 22988c2ecf20Sopenharmony_ci inode->i_blocks = 0; 22998c2ecf20Sopenharmony_ci inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); 23008c2ecf20Sopenharmony_ci inode->i_generation = prandom_u32(); 23018c2ecf20Sopenharmony_ci info = SHMEM_I(inode); 23028c2ecf20Sopenharmony_ci memset(info, 0, (char *)inode - (char *)info); 23038c2ecf20Sopenharmony_ci spin_lock_init(&info->lock); 23048c2ecf20Sopenharmony_ci atomic_set(&info->stop_eviction, 0); 23058c2ecf20Sopenharmony_ci info->seals = F_SEAL_SEAL; 23068c2ecf20Sopenharmony_ci info->flags = flags & VM_NORESERVE; 23078c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&info->shrinklist); 23088c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&info->swaplist); 23098c2ecf20Sopenharmony_ci simple_xattrs_init(&info->xattrs); 23108c2ecf20Sopenharmony_ci cache_no_acl(inode); 23118c2ecf20Sopenharmony_ci 23128c2ecf20Sopenharmony_ci switch (mode & S_IFMT) { 23138c2ecf20Sopenharmony_ci default: 23148c2ecf20Sopenharmony_ci inode->i_op = &shmem_special_inode_operations; 23158c2ecf20Sopenharmony_ci init_special_inode(inode, mode, dev); 23168c2ecf20Sopenharmony_ci break; 23178c2ecf20Sopenharmony_ci case S_IFREG: 23188c2ecf20Sopenharmony_ci inode->i_mapping->a_ops = &shmem_aops; 23198c2ecf20Sopenharmony_ci inode->i_op = &shmem_inode_operations; 23208c2ecf20Sopenharmony_ci inode->i_fop = &shmem_file_operations; 23218c2ecf20Sopenharmony_ci mpol_shared_policy_init(&info->policy, 23228c2ecf20Sopenharmony_ci shmem_get_sbmpol(sbinfo)); 23238c2ecf20Sopenharmony_ci break; 23248c2ecf20Sopenharmony_ci case S_IFDIR: 23258c2ecf20Sopenharmony_ci inc_nlink(inode); 23268c2ecf20Sopenharmony_ci /* Some things misbehave if size == 0 on a directory */ 23278c2ecf20Sopenharmony_ci inode->i_size = 2 * BOGO_DIRENT_SIZE; 23288c2ecf20Sopenharmony_ci inode->i_op = &shmem_dir_inode_operations; 23298c2ecf20Sopenharmony_ci inode->i_fop = &simple_dir_operations; 23308c2ecf20Sopenharmony_ci break; 23318c2ecf20Sopenharmony_ci case S_IFLNK: 23328c2ecf20Sopenharmony_ci /* 23338c2ecf20Sopenharmony_ci * Must not load anything in the rbtree, 23348c2ecf20Sopenharmony_ci * mpol_free_shared_policy will not be called. 23358c2ecf20Sopenharmony_ci */ 23368c2ecf20Sopenharmony_ci mpol_shared_policy_init(&info->policy, NULL); 23378c2ecf20Sopenharmony_ci break; 23388c2ecf20Sopenharmony_ci } 23398c2ecf20Sopenharmony_ci 23408c2ecf20Sopenharmony_ci lockdep_annotate_inode_mutex_key(inode); 23418c2ecf20Sopenharmony_ci } else 23428c2ecf20Sopenharmony_ci shmem_free_inode(sb); 23438c2ecf20Sopenharmony_ci return inode; 23448c2ecf20Sopenharmony_ci} 23458c2ecf20Sopenharmony_ci 23468c2ecf20Sopenharmony_cibool shmem_mapping(struct address_space *mapping) 23478c2ecf20Sopenharmony_ci{ 23488c2ecf20Sopenharmony_ci return mapping->a_ops == &shmem_aops; 23498c2ecf20Sopenharmony_ci} 23508c2ecf20Sopenharmony_ci 23518c2ecf20Sopenharmony_cistatic int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, 23528c2ecf20Sopenharmony_ci pmd_t *dst_pmd, 23538c2ecf20Sopenharmony_ci struct vm_area_struct *dst_vma, 23548c2ecf20Sopenharmony_ci unsigned long dst_addr, 23558c2ecf20Sopenharmony_ci unsigned long src_addr, 23568c2ecf20Sopenharmony_ci bool zeropage, 23578c2ecf20Sopenharmony_ci struct page **pagep) 23588c2ecf20Sopenharmony_ci{ 23598c2ecf20Sopenharmony_ci struct inode *inode = file_inode(dst_vma->vm_file); 23608c2ecf20Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 23618c2ecf20Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 23628c2ecf20Sopenharmony_ci gfp_t gfp = mapping_gfp_mask(mapping); 23638c2ecf20Sopenharmony_ci pgoff_t pgoff = linear_page_index(dst_vma, dst_addr); 23648c2ecf20Sopenharmony_ci spinlock_t *ptl; 23658c2ecf20Sopenharmony_ci void *page_kaddr; 23668c2ecf20Sopenharmony_ci struct page *page; 23678c2ecf20Sopenharmony_ci pte_t _dst_pte, *dst_pte; 23688c2ecf20Sopenharmony_ci int ret; 23698c2ecf20Sopenharmony_ci pgoff_t offset, max_off; 23708c2ecf20Sopenharmony_ci 23718c2ecf20Sopenharmony_ci ret = -ENOMEM; 23728c2ecf20Sopenharmony_ci if (!shmem_inode_acct_block(inode, 1)) { 23738c2ecf20Sopenharmony_ci /* 23748c2ecf20Sopenharmony_ci * We may have got a page, returned -ENOENT triggering a retry, 23758c2ecf20Sopenharmony_ci * and now we find ourselves with -ENOMEM. Release the page, to 23768c2ecf20Sopenharmony_ci * avoid a BUG_ON in our caller. 23778c2ecf20Sopenharmony_ci */ 23788c2ecf20Sopenharmony_ci if (unlikely(*pagep)) { 23798c2ecf20Sopenharmony_ci put_page(*pagep); 23808c2ecf20Sopenharmony_ci *pagep = NULL; 23818c2ecf20Sopenharmony_ci } 23828c2ecf20Sopenharmony_ci goto out; 23838c2ecf20Sopenharmony_ci } 23848c2ecf20Sopenharmony_ci 23858c2ecf20Sopenharmony_ci if (!*pagep) { 23868c2ecf20Sopenharmony_ci page = shmem_alloc_page(gfp, info, pgoff); 23878c2ecf20Sopenharmony_ci if (!page) 23888c2ecf20Sopenharmony_ci goto out_unacct_blocks; 23898c2ecf20Sopenharmony_ci 23908c2ecf20Sopenharmony_ci if (!zeropage) { /* mcopy_atomic */ 23918c2ecf20Sopenharmony_ci page_kaddr = kmap_atomic(page); 23928c2ecf20Sopenharmony_ci ret = copy_from_user(page_kaddr, 23938c2ecf20Sopenharmony_ci (const void __user *)src_addr, 23948c2ecf20Sopenharmony_ci PAGE_SIZE); 23958c2ecf20Sopenharmony_ci kunmap_atomic(page_kaddr); 23968c2ecf20Sopenharmony_ci 23978c2ecf20Sopenharmony_ci /* fallback to copy_from_user outside mmap_lock */ 23988c2ecf20Sopenharmony_ci if (unlikely(ret)) { 23998c2ecf20Sopenharmony_ci *pagep = page; 24008c2ecf20Sopenharmony_ci shmem_inode_unacct_blocks(inode, 1); 24018c2ecf20Sopenharmony_ci /* don't free the page */ 24028c2ecf20Sopenharmony_ci return -ENOENT; 24038c2ecf20Sopenharmony_ci } 24048c2ecf20Sopenharmony_ci } else { /* mfill_zeropage_atomic */ 24058c2ecf20Sopenharmony_ci clear_highpage(page); 24068c2ecf20Sopenharmony_ci } 24078c2ecf20Sopenharmony_ci } else { 24088c2ecf20Sopenharmony_ci page = *pagep; 24098c2ecf20Sopenharmony_ci *pagep = NULL; 24108c2ecf20Sopenharmony_ci } 24118c2ecf20Sopenharmony_ci 24128c2ecf20Sopenharmony_ci VM_BUG_ON(PageLocked(page) || PageSwapBacked(page)); 24138c2ecf20Sopenharmony_ci __SetPageLocked(page); 24148c2ecf20Sopenharmony_ci __SetPageSwapBacked(page); 24158c2ecf20Sopenharmony_ci __SetPageUptodate(page); 24168c2ecf20Sopenharmony_ci 24178c2ecf20Sopenharmony_ci ret = -EFAULT; 24188c2ecf20Sopenharmony_ci offset = linear_page_index(dst_vma, dst_addr); 24198c2ecf20Sopenharmony_ci max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 24208c2ecf20Sopenharmony_ci if (unlikely(offset >= max_off)) 24218c2ecf20Sopenharmony_ci goto out_release; 24228c2ecf20Sopenharmony_ci 24238c2ecf20Sopenharmony_ci ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL, 24248c2ecf20Sopenharmony_ci gfp & GFP_RECLAIM_MASK, dst_mm); 24258c2ecf20Sopenharmony_ci if (ret) 24268c2ecf20Sopenharmony_ci goto out_release; 24278c2ecf20Sopenharmony_ci 24288c2ecf20Sopenharmony_ci _dst_pte = mk_pte(page, dst_vma->vm_page_prot); 24298c2ecf20Sopenharmony_ci if (dst_vma->vm_flags & VM_WRITE) 24308c2ecf20Sopenharmony_ci _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte)); 24318c2ecf20Sopenharmony_ci else { 24328c2ecf20Sopenharmony_ci /* 24338c2ecf20Sopenharmony_ci * We don't set the pte dirty if the vma has no 24348c2ecf20Sopenharmony_ci * VM_WRITE permission, so mark the page dirty or it 24358c2ecf20Sopenharmony_ci * could be freed from under us. We could do it 24368c2ecf20Sopenharmony_ci * unconditionally before unlock_page(), but doing it 24378c2ecf20Sopenharmony_ci * only if VM_WRITE is not set is faster. 24388c2ecf20Sopenharmony_ci */ 24398c2ecf20Sopenharmony_ci set_page_dirty(page); 24408c2ecf20Sopenharmony_ci } 24418c2ecf20Sopenharmony_ci 24428c2ecf20Sopenharmony_ci dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); 24438c2ecf20Sopenharmony_ci 24448c2ecf20Sopenharmony_ci ret = -EFAULT; 24458c2ecf20Sopenharmony_ci max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 24468c2ecf20Sopenharmony_ci if (unlikely(offset >= max_off)) 24478c2ecf20Sopenharmony_ci goto out_release_unlock; 24488c2ecf20Sopenharmony_ci 24498c2ecf20Sopenharmony_ci ret = -EEXIST; 24508c2ecf20Sopenharmony_ci if (!pte_none(*dst_pte)) 24518c2ecf20Sopenharmony_ci goto out_release_unlock; 24528c2ecf20Sopenharmony_ci 24538c2ecf20Sopenharmony_ci lru_cache_add(page); 24548c2ecf20Sopenharmony_ci 24558c2ecf20Sopenharmony_ci spin_lock_irq(&info->lock); 24568c2ecf20Sopenharmony_ci info->alloced++; 24578c2ecf20Sopenharmony_ci inode->i_blocks += BLOCKS_PER_PAGE; 24588c2ecf20Sopenharmony_ci shmem_recalc_inode(inode); 24598c2ecf20Sopenharmony_ci spin_unlock_irq(&info->lock); 24608c2ecf20Sopenharmony_ci 24618c2ecf20Sopenharmony_ci inc_mm_counter(dst_mm, mm_counter_file(page)); 24628c2ecf20Sopenharmony_ci page_add_file_rmap(page, false); 24638c2ecf20Sopenharmony_ci set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); 24648c2ecf20Sopenharmony_ci 24658c2ecf20Sopenharmony_ci /* No need to invalidate - it was non-present before */ 24668c2ecf20Sopenharmony_ci update_mmu_cache(dst_vma, dst_addr, dst_pte); 24678c2ecf20Sopenharmony_ci pte_unmap_unlock(dst_pte, ptl); 24688c2ecf20Sopenharmony_ci unlock_page(page); 24698c2ecf20Sopenharmony_ci ret = 0; 24708c2ecf20Sopenharmony_ciout: 24718c2ecf20Sopenharmony_ci return ret; 24728c2ecf20Sopenharmony_ciout_release_unlock: 24738c2ecf20Sopenharmony_ci pte_unmap_unlock(dst_pte, ptl); 24748c2ecf20Sopenharmony_ci ClearPageDirty(page); 24758c2ecf20Sopenharmony_ci delete_from_page_cache(page); 24768c2ecf20Sopenharmony_ciout_release: 24778c2ecf20Sopenharmony_ci unlock_page(page); 24788c2ecf20Sopenharmony_ci put_page(page); 24798c2ecf20Sopenharmony_ciout_unacct_blocks: 24808c2ecf20Sopenharmony_ci shmem_inode_unacct_blocks(inode, 1); 24818c2ecf20Sopenharmony_ci goto out; 24828c2ecf20Sopenharmony_ci} 24838c2ecf20Sopenharmony_ci 24848c2ecf20Sopenharmony_ciint shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, 24858c2ecf20Sopenharmony_ci pmd_t *dst_pmd, 24868c2ecf20Sopenharmony_ci struct vm_area_struct *dst_vma, 24878c2ecf20Sopenharmony_ci unsigned long dst_addr, 24888c2ecf20Sopenharmony_ci unsigned long src_addr, 24898c2ecf20Sopenharmony_ci struct page **pagep) 24908c2ecf20Sopenharmony_ci{ 24918c2ecf20Sopenharmony_ci return shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, 24928c2ecf20Sopenharmony_ci dst_addr, src_addr, false, pagep); 24938c2ecf20Sopenharmony_ci} 24948c2ecf20Sopenharmony_ci 24958c2ecf20Sopenharmony_ciint shmem_mfill_zeropage_pte(struct mm_struct *dst_mm, 24968c2ecf20Sopenharmony_ci pmd_t *dst_pmd, 24978c2ecf20Sopenharmony_ci struct vm_area_struct *dst_vma, 24988c2ecf20Sopenharmony_ci unsigned long dst_addr) 24998c2ecf20Sopenharmony_ci{ 25008c2ecf20Sopenharmony_ci struct page *page = NULL; 25018c2ecf20Sopenharmony_ci 25028c2ecf20Sopenharmony_ci return shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, 25038c2ecf20Sopenharmony_ci dst_addr, 0, true, &page); 25048c2ecf20Sopenharmony_ci} 25058c2ecf20Sopenharmony_ci 25068c2ecf20Sopenharmony_ci#ifdef CONFIG_TMPFS 25078c2ecf20Sopenharmony_cistatic const struct inode_operations shmem_symlink_inode_operations; 25088c2ecf20Sopenharmony_cistatic const struct inode_operations shmem_short_symlink_operations; 25098c2ecf20Sopenharmony_ci 25108c2ecf20Sopenharmony_ci#ifdef CONFIG_TMPFS_XATTR 25118c2ecf20Sopenharmony_cistatic int shmem_initxattrs(struct inode *, const struct xattr *, void *); 25128c2ecf20Sopenharmony_ci#else 25138c2ecf20Sopenharmony_ci#define shmem_initxattrs NULL 25148c2ecf20Sopenharmony_ci#endif 25158c2ecf20Sopenharmony_ci 25168c2ecf20Sopenharmony_cistatic int 25178c2ecf20Sopenharmony_cishmem_write_begin(struct file *file, struct address_space *mapping, 25188c2ecf20Sopenharmony_ci loff_t pos, unsigned len, unsigned flags, 25198c2ecf20Sopenharmony_ci struct page **pagep, void **fsdata) 25208c2ecf20Sopenharmony_ci{ 25218c2ecf20Sopenharmony_ci struct inode *inode = mapping->host; 25228c2ecf20Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 25238c2ecf20Sopenharmony_ci pgoff_t index = pos >> PAGE_SHIFT; 25248c2ecf20Sopenharmony_ci 25258c2ecf20Sopenharmony_ci /* i_mutex is held by caller */ 25268c2ecf20Sopenharmony_ci if (unlikely(info->seals & (F_SEAL_GROW | 25278c2ecf20Sopenharmony_ci F_SEAL_WRITE | F_SEAL_FUTURE_WRITE))) { 25288c2ecf20Sopenharmony_ci if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) 25298c2ecf20Sopenharmony_ci return -EPERM; 25308c2ecf20Sopenharmony_ci if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size) 25318c2ecf20Sopenharmony_ci return -EPERM; 25328c2ecf20Sopenharmony_ci } 25338c2ecf20Sopenharmony_ci 25348c2ecf20Sopenharmony_ci return shmem_getpage(inode, index, pagep, SGP_WRITE); 25358c2ecf20Sopenharmony_ci} 25368c2ecf20Sopenharmony_ci 25378c2ecf20Sopenharmony_cistatic int 25388c2ecf20Sopenharmony_cishmem_write_end(struct file *file, struct address_space *mapping, 25398c2ecf20Sopenharmony_ci loff_t pos, unsigned len, unsigned copied, 25408c2ecf20Sopenharmony_ci struct page *page, void *fsdata) 25418c2ecf20Sopenharmony_ci{ 25428c2ecf20Sopenharmony_ci struct inode *inode = mapping->host; 25438c2ecf20Sopenharmony_ci 25448c2ecf20Sopenharmony_ci if (pos + copied > inode->i_size) 25458c2ecf20Sopenharmony_ci i_size_write(inode, pos + copied); 25468c2ecf20Sopenharmony_ci 25478c2ecf20Sopenharmony_ci if (!PageUptodate(page)) { 25488c2ecf20Sopenharmony_ci struct page *head = compound_head(page); 25498c2ecf20Sopenharmony_ci if (PageTransCompound(page)) { 25508c2ecf20Sopenharmony_ci int i; 25518c2ecf20Sopenharmony_ci 25528c2ecf20Sopenharmony_ci for (i = 0; i < HPAGE_PMD_NR; i++) { 25538c2ecf20Sopenharmony_ci if (head + i == page) 25548c2ecf20Sopenharmony_ci continue; 25558c2ecf20Sopenharmony_ci clear_highpage(head + i); 25568c2ecf20Sopenharmony_ci flush_dcache_page(head + i); 25578c2ecf20Sopenharmony_ci } 25588c2ecf20Sopenharmony_ci } 25598c2ecf20Sopenharmony_ci if (copied < PAGE_SIZE) { 25608c2ecf20Sopenharmony_ci unsigned from = pos & (PAGE_SIZE - 1); 25618c2ecf20Sopenharmony_ci zero_user_segments(page, 0, from, 25628c2ecf20Sopenharmony_ci from + copied, PAGE_SIZE); 25638c2ecf20Sopenharmony_ci } 25648c2ecf20Sopenharmony_ci SetPageUptodate(head); 25658c2ecf20Sopenharmony_ci } 25668c2ecf20Sopenharmony_ci set_page_dirty(page); 25678c2ecf20Sopenharmony_ci unlock_page(page); 25688c2ecf20Sopenharmony_ci put_page(page); 25698c2ecf20Sopenharmony_ci 25708c2ecf20Sopenharmony_ci return copied; 25718c2ecf20Sopenharmony_ci} 25728c2ecf20Sopenharmony_ci 25738c2ecf20Sopenharmony_cistatic ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) 25748c2ecf20Sopenharmony_ci{ 25758c2ecf20Sopenharmony_ci struct file *file = iocb->ki_filp; 25768c2ecf20Sopenharmony_ci struct inode *inode = file_inode(file); 25778c2ecf20Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 25788c2ecf20Sopenharmony_ci pgoff_t index; 25798c2ecf20Sopenharmony_ci unsigned long offset; 25808c2ecf20Sopenharmony_ci enum sgp_type sgp = SGP_READ; 25818c2ecf20Sopenharmony_ci int error = 0; 25828c2ecf20Sopenharmony_ci ssize_t retval = 0; 25838c2ecf20Sopenharmony_ci loff_t *ppos = &iocb->ki_pos; 25848c2ecf20Sopenharmony_ci 25858c2ecf20Sopenharmony_ci /* 25868c2ecf20Sopenharmony_ci * Might this read be for a stacking filesystem? Then when reading 25878c2ecf20Sopenharmony_ci * holes of a sparse file, we actually need to allocate those pages, 25888c2ecf20Sopenharmony_ci * and even mark them dirty, so it cannot exceed the max_blocks limit. 25898c2ecf20Sopenharmony_ci */ 25908c2ecf20Sopenharmony_ci if (!iter_is_iovec(to)) 25918c2ecf20Sopenharmony_ci sgp = SGP_CACHE; 25928c2ecf20Sopenharmony_ci 25938c2ecf20Sopenharmony_ci index = *ppos >> PAGE_SHIFT; 25948c2ecf20Sopenharmony_ci offset = *ppos & ~PAGE_MASK; 25958c2ecf20Sopenharmony_ci 25968c2ecf20Sopenharmony_ci for (;;) { 25978c2ecf20Sopenharmony_ci struct page *page = NULL; 25988c2ecf20Sopenharmony_ci pgoff_t end_index; 25998c2ecf20Sopenharmony_ci unsigned long nr, ret; 26008c2ecf20Sopenharmony_ci loff_t i_size = i_size_read(inode); 26018c2ecf20Sopenharmony_ci 26028c2ecf20Sopenharmony_ci end_index = i_size >> PAGE_SHIFT; 26038c2ecf20Sopenharmony_ci if (index > end_index) 26048c2ecf20Sopenharmony_ci break; 26058c2ecf20Sopenharmony_ci if (index == end_index) { 26068c2ecf20Sopenharmony_ci nr = i_size & ~PAGE_MASK; 26078c2ecf20Sopenharmony_ci if (nr <= offset) 26088c2ecf20Sopenharmony_ci break; 26098c2ecf20Sopenharmony_ci } 26108c2ecf20Sopenharmony_ci 26118c2ecf20Sopenharmony_ci error = shmem_getpage(inode, index, &page, sgp); 26128c2ecf20Sopenharmony_ci if (error) { 26138c2ecf20Sopenharmony_ci if (error == -EINVAL) 26148c2ecf20Sopenharmony_ci error = 0; 26158c2ecf20Sopenharmony_ci break; 26168c2ecf20Sopenharmony_ci } 26178c2ecf20Sopenharmony_ci if (page) { 26188c2ecf20Sopenharmony_ci if (sgp == SGP_CACHE) 26198c2ecf20Sopenharmony_ci set_page_dirty(page); 26208c2ecf20Sopenharmony_ci unlock_page(page); 26218c2ecf20Sopenharmony_ci } 26228c2ecf20Sopenharmony_ci 26238c2ecf20Sopenharmony_ci /* 26248c2ecf20Sopenharmony_ci * We must evaluate after, since reads (unlike writes) 26258c2ecf20Sopenharmony_ci * are called without i_mutex protection against truncate 26268c2ecf20Sopenharmony_ci */ 26278c2ecf20Sopenharmony_ci nr = PAGE_SIZE; 26288c2ecf20Sopenharmony_ci i_size = i_size_read(inode); 26298c2ecf20Sopenharmony_ci end_index = i_size >> PAGE_SHIFT; 26308c2ecf20Sopenharmony_ci if (index == end_index) { 26318c2ecf20Sopenharmony_ci nr = i_size & ~PAGE_MASK; 26328c2ecf20Sopenharmony_ci if (nr <= offset) { 26338c2ecf20Sopenharmony_ci if (page) 26348c2ecf20Sopenharmony_ci put_page(page); 26358c2ecf20Sopenharmony_ci break; 26368c2ecf20Sopenharmony_ci } 26378c2ecf20Sopenharmony_ci } 26388c2ecf20Sopenharmony_ci nr -= offset; 26398c2ecf20Sopenharmony_ci 26408c2ecf20Sopenharmony_ci if (page) { 26418c2ecf20Sopenharmony_ci /* 26428c2ecf20Sopenharmony_ci * If users can be writing to this page using arbitrary 26438c2ecf20Sopenharmony_ci * virtual addresses, take care about potential aliasing 26448c2ecf20Sopenharmony_ci * before reading the page on the kernel side. 26458c2ecf20Sopenharmony_ci */ 26468c2ecf20Sopenharmony_ci if (mapping_writably_mapped(mapping)) 26478c2ecf20Sopenharmony_ci flush_dcache_page(page); 26488c2ecf20Sopenharmony_ci /* 26498c2ecf20Sopenharmony_ci * Mark the page accessed if we read the beginning. 26508c2ecf20Sopenharmony_ci */ 26518c2ecf20Sopenharmony_ci if (!offset) 26528c2ecf20Sopenharmony_ci mark_page_accessed(page); 26538c2ecf20Sopenharmony_ci } else { 26548c2ecf20Sopenharmony_ci page = ZERO_PAGE(0); 26558c2ecf20Sopenharmony_ci get_page(page); 26568c2ecf20Sopenharmony_ci } 26578c2ecf20Sopenharmony_ci 26588c2ecf20Sopenharmony_ci /* 26598c2ecf20Sopenharmony_ci * Ok, we have the page, and it's up-to-date, so 26608c2ecf20Sopenharmony_ci * now we can copy it to user space... 26618c2ecf20Sopenharmony_ci */ 26628c2ecf20Sopenharmony_ci ret = copy_page_to_iter(page, offset, nr, to); 26638c2ecf20Sopenharmony_ci retval += ret; 26648c2ecf20Sopenharmony_ci offset += ret; 26658c2ecf20Sopenharmony_ci index += offset >> PAGE_SHIFT; 26668c2ecf20Sopenharmony_ci offset &= ~PAGE_MASK; 26678c2ecf20Sopenharmony_ci 26688c2ecf20Sopenharmony_ci put_page(page); 26698c2ecf20Sopenharmony_ci if (!iov_iter_count(to)) 26708c2ecf20Sopenharmony_ci break; 26718c2ecf20Sopenharmony_ci if (ret < nr) { 26728c2ecf20Sopenharmony_ci error = -EFAULT; 26738c2ecf20Sopenharmony_ci break; 26748c2ecf20Sopenharmony_ci } 26758c2ecf20Sopenharmony_ci cond_resched(); 26768c2ecf20Sopenharmony_ci } 26778c2ecf20Sopenharmony_ci 26788c2ecf20Sopenharmony_ci *ppos = ((loff_t) index << PAGE_SHIFT) + offset; 26798c2ecf20Sopenharmony_ci file_accessed(file); 26808c2ecf20Sopenharmony_ci return retval ? retval : error; 26818c2ecf20Sopenharmony_ci} 26828c2ecf20Sopenharmony_ci 26838c2ecf20Sopenharmony_ci/* 26848c2ecf20Sopenharmony_ci * llseek SEEK_DATA or SEEK_HOLE through the page cache. 26858c2ecf20Sopenharmony_ci */ 26868c2ecf20Sopenharmony_cistatic pgoff_t shmem_seek_hole_data(struct address_space *mapping, 26878c2ecf20Sopenharmony_ci pgoff_t index, pgoff_t end, int whence) 26888c2ecf20Sopenharmony_ci{ 26898c2ecf20Sopenharmony_ci struct page *page; 26908c2ecf20Sopenharmony_ci struct pagevec pvec; 26918c2ecf20Sopenharmony_ci pgoff_t indices[PAGEVEC_SIZE]; 26928c2ecf20Sopenharmony_ci bool done = false; 26938c2ecf20Sopenharmony_ci int i; 26948c2ecf20Sopenharmony_ci 26958c2ecf20Sopenharmony_ci pagevec_init(&pvec); 26968c2ecf20Sopenharmony_ci pvec.nr = 1; /* start small: we may be there already */ 26978c2ecf20Sopenharmony_ci while (!done) { 26988c2ecf20Sopenharmony_ci pvec.nr = find_get_entries(mapping, index, 26998c2ecf20Sopenharmony_ci pvec.nr, pvec.pages, indices); 27008c2ecf20Sopenharmony_ci if (!pvec.nr) { 27018c2ecf20Sopenharmony_ci if (whence == SEEK_DATA) 27028c2ecf20Sopenharmony_ci index = end; 27038c2ecf20Sopenharmony_ci break; 27048c2ecf20Sopenharmony_ci } 27058c2ecf20Sopenharmony_ci for (i = 0; i < pvec.nr; i++, index++) { 27068c2ecf20Sopenharmony_ci if (index < indices[i]) { 27078c2ecf20Sopenharmony_ci if (whence == SEEK_HOLE) { 27088c2ecf20Sopenharmony_ci done = true; 27098c2ecf20Sopenharmony_ci break; 27108c2ecf20Sopenharmony_ci } 27118c2ecf20Sopenharmony_ci index = indices[i]; 27128c2ecf20Sopenharmony_ci } 27138c2ecf20Sopenharmony_ci page = pvec.pages[i]; 27148c2ecf20Sopenharmony_ci if (page && !xa_is_value(page)) { 27158c2ecf20Sopenharmony_ci if (!PageUptodate(page)) 27168c2ecf20Sopenharmony_ci page = NULL; 27178c2ecf20Sopenharmony_ci } 27188c2ecf20Sopenharmony_ci if (index >= end || 27198c2ecf20Sopenharmony_ci (page && whence == SEEK_DATA) || 27208c2ecf20Sopenharmony_ci (!page && whence == SEEK_HOLE)) { 27218c2ecf20Sopenharmony_ci done = true; 27228c2ecf20Sopenharmony_ci break; 27238c2ecf20Sopenharmony_ci } 27248c2ecf20Sopenharmony_ci } 27258c2ecf20Sopenharmony_ci pagevec_remove_exceptionals(&pvec); 27268c2ecf20Sopenharmony_ci pagevec_release(&pvec); 27278c2ecf20Sopenharmony_ci pvec.nr = PAGEVEC_SIZE; 27288c2ecf20Sopenharmony_ci cond_resched(); 27298c2ecf20Sopenharmony_ci } 27308c2ecf20Sopenharmony_ci return index; 27318c2ecf20Sopenharmony_ci} 27328c2ecf20Sopenharmony_ci 27338c2ecf20Sopenharmony_cistatic loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence) 27348c2ecf20Sopenharmony_ci{ 27358c2ecf20Sopenharmony_ci struct address_space *mapping = file->f_mapping; 27368c2ecf20Sopenharmony_ci struct inode *inode = mapping->host; 27378c2ecf20Sopenharmony_ci pgoff_t start, end; 27388c2ecf20Sopenharmony_ci loff_t new_offset; 27398c2ecf20Sopenharmony_ci 27408c2ecf20Sopenharmony_ci if (whence != SEEK_DATA && whence != SEEK_HOLE) 27418c2ecf20Sopenharmony_ci return generic_file_llseek_size(file, offset, whence, 27428c2ecf20Sopenharmony_ci MAX_LFS_FILESIZE, i_size_read(inode)); 27438c2ecf20Sopenharmony_ci inode_lock(inode); 27448c2ecf20Sopenharmony_ci /* We're holding i_mutex so we can access i_size directly */ 27458c2ecf20Sopenharmony_ci 27468c2ecf20Sopenharmony_ci if (offset < 0 || offset >= inode->i_size) 27478c2ecf20Sopenharmony_ci offset = -ENXIO; 27488c2ecf20Sopenharmony_ci else { 27498c2ecf20Sopenharmony_ci start = offset >> PAGE_SHIFT; 27508c2ecf20Sopenharmony_ci end = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT; 27518c2ecf20Sopenharmony_ci new_offset = shmem_seek_hole_data(mapping, start, end, whence); 27528c2ecf20Sopenharmony_ci new_offset <<= PAGE_SHIFT; 27538c2ecf20Sopenharmony_ci if (new_offset > offset) { 27548c2ecf20Sopenharmony_ci if (new_offset < inode->i_size) 27558c2ecf20Sopenharmony_ci offset = new_offset; 27568c2ecf20Sopenharmony_ci else if (whence == SEEK_DATA) 27578c2ecf20Sopenharmony_ci offset = -ENXIO; 27588c2ecf20Sopenharmony_ci else 27598c2ecf20Sopenharmony_ci offset = inode->i_size; 27608c2ecf20Sopenharmony_ci } 27618c2ecf20Sopenharmony_ci } 27628c2ecf20Sopenharmony_ci 27638c2ecf20Sopenharmony_ci if (offset >= 0) 27648c2ecf20Sopenharmony_ci offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE); 27658c2ecf20Sopenharmony_ci inode_unlock(inode); 27668c2ecf20Sopenharmony_ci return offset; 27678c2ecf20Sopenharmony_ci} 27688c2ecf20Sopenharmony_ci 27698c2ecf20Sopenharmony_cistatic long shmem_fallocate(struct file *file, int mode, loff_t offset, 27708c2ecf20Sopenharmony_ci loff_t len) 27718c2ecf20Sopenharmony_ci{ 27728c2ecf20Sopenharmony_ci struct inode *inode = file_inode(file); 27738c2ecf20Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 27748c2ecf20Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 27758c2ecf20Sopenharmony_ci struct shmem_falloc shmem_falloc; 27768c2ecf20Sopenharmony_ci pgoff_t start, index, end; 27778c2ecf20Sopenharmony_ci int error; 27788c2ecf20Sopenharmony_ci 27798c2ecf20Sopenharmony_ci if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 27808c2ecf20Sopenharmony_ci return -EOPNOTSUPP; 27818c2ecf20Sopenharmony_ci 27828c2ecf20Sopenharmony_ci inode_lock(inode); 27838c2ecf20Sopenharmony_ci 27848c2ecf20Sopenharmony_ci if (mode & FALLOC_FL_PUNCH_HOLE) { 27858c2ecf20Sopenharmony_ci struct address_space *mapping = file->f_mapping; 27868c2ecf20Sopenharmony_ci loff_t unmap_start = round_up(offset, PAGE_SIZE); 27878c2ecf20Sopenharmony_ci loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1; 27888c2ecf20Sopenharmony_ci DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq); 27898c2ecf20Sopenharmony_ci 27908c2ecf20Sopenharmony_ci /* protected by i_mutex */ 27918c2ecf20Sopenharmony_ci if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) { 27928c2ecf20Sopenharmony_ci error = -EPERM; 27938c2ecf20Sopenharmony_ci goto out; 27948c2ecf20Sopenharmony_ci } 27958c2ecf20Sopenharmony_ci 27968c2ecf20Sopenharmony_ci shmem_falloc.waitq = &shmem_falloc_waitq; 27978c2ecf20Sopenharmony_ci shmem_falloc.start = (u64)unmap_start >> PAGE_SHIFT; 27988c2ecf20Sopenharmony_ci shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT; 27998c2ecf20Sopenharmony_ci spin_lock(&inode->i_lock); 28008c2ecf20Sopenharmony_ci inode->i_private = &shmem_falloc; 28018c2ecf20Sopenharmony_ci spin_unlock(&inode->i_lock); 28028c2ecf20Sopenharmony_ci 28038c2ecf20Sopenharmony_ci if ((u64)unmap_end > (u64)unmap_start) 28048c2ecf20Sopenharmony_ci unmap_mapping_range(mapping, unmap_start, 28058c2ecf20Sopenharmony_ci 1 + unmap_end - unmap_start, 0); 28068c2ecf20Sopenharmony_ci shmem_truncate_range(inode, offset, offset + len - 1); 28078c2ecf20Sopenharmony_ci /* No need to unmap again: hole-punching leaves COWed pages */ 28088c2ecf20Sopenharmony_ci 28098c2ecf20Sopenharmony_ci spin_lock(&inode->i_lock); 28108c2ecf20Sopenharmony_ci inode->i_private = NULL; 28118c2ecf20Sopenharmony_ci wake_up_all(&shmem_falloc_waitq); 28128c2ecf20Sopenharmony_ci WARN_ON_ONCE(!list_empty(&shmem_falloc_waitq.head)); 28138c2ecf20Sopenharmony_ci spin_unlock(&inode->i_lock); 28148c2ecf20Sopenharmony_ci error = 0; 28158c2ecf20Sopenharmony_ci goto out; 28168c2ecf20Sopenharmony_ci } 28178c2ecf20Sopenharmony_ci 28188c2ecf20Sopenharmony_ci /* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */ 28198c2ecf20Sopenharmony_ci error = inode_newsize_ok(inode, offset + len); 28208c2ecf20Sopenharmony_ci if (error) 28218c2ecf20Sopenharmony_ci goto out; 28228c2ecf20Sopenharmony_ci 28238c2ecf20Sopenharmony_ci if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) { 28248c2ecf20Sopenharmony_ci error = -EPERM; 28258c2ecf20Sopenharmony_ci goto out; 28268c2ecf20Sopenharmony_ci } 28278c2ecf20Sopenharmony_ci 28288c2ecf20Sopenharmony_ci start = offset >> PAGE_SHIFT; 28298c2ecf20Sopenharmony_ci end = (offset + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 28308c2ecf20Sopenharmony_ci /* Try to avoid a swapstorm if len is impossible to satisfy */ 28318c2ecf20Sopenharmony_ci if (sbinfo->max_blocks && end - start > sbinfo->max_blocks) { 28328c2ecf20Sopenharmony_ci error = -ENOSPC; 28338c2ecf20Sopenharmony_ci goto out; 28348c2ecf20Sopenharmony_ci } 28358c2ecf20Sopenharmony_ci 28368c2ecf20Sopenharmony_ci shmem_falloc.waitq = NULL; 28378c2ecf20Sopenharmony_ci shmem_falloc.start = start; 28388c2ecf20Sopenharmony_ci shmem_falloc.next = start; 28398c2ecf20Sopenharmony_ci shmem_falloc.nr_falloced = 0; 28408c2ecf20Sopenharmony_ci shmem_falloc.nr_unswapped = 0; 28418c2ecf20Sopenharmony_ci spin_lock(&inode->i_lock); 28428c2ecf20Sopenharmony_ci inode->i_private = &shmem_falloc; 28438c2ecf20Sopenharmony_ci spin_unlock(&inode->i_lock); 28448c2ecf20Sopenharmony_ci 28458c2ecf20Sopenharmony_ci for (index = start; index < end; index++) { 28468c2ecf20Sopenharmony_ci struct page *page; 28478c2ecf20Sopenharmony_ci 28488c2ecf20Sopenharmony_ci /* 28498c2ecf20Sopenharmony_ci * Good, the fallocate(2) manpage permits EINTR: we may have 28508c2ecf20Sopenharmony_ci * been interrupted because we are using up too much memory. 28518c2ecf20Sopenharmony_ci */ 28528c2ecf20Sopenharmony_ci if (signal_pending(current)) 28538c2ecf20Sopenharmony_ci error = -EINTR; 28548c2ecf20Sopenharmony_ci else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced) 28558c2ecf20Sopenharmony_ci error = -ENOMEM; 28568c2ecf20Sopenharmony_ci else 28578c2ecf20Sopenharmony_ci error = shmem_getpage(inode, index, &page, SGP_FALLOC); 28588c2ecf20Sopenharmony_ci if (error) { 28598c2ecf20Sopenharmony_ci /* Remove the !PageUptodate pages we added */ 28608c2ecf20Sopenharmony_ci if (index > start) { 28618c2ecf20Sopenharmony_ci shmem_undo_range(inode, 28628c2ecf20Sopenharmony_ci (loff_t)start << PAGE_SHIFT, 28638c2ecf20Sopenharmony_ci ((loff_t)index << PAGE_SHIFT) - 1, true); 28648c2ecf20Sopenharmony_ci } 28658c2ecf20Sopenharmony_ci goto undone; 28668c2ecf20Sopenharmony_ci } 28678c2ecf20Sopenharmony_ci 28688c2ecf20Sopenharmony_ci /* 28698c2ecf20Sopenharmony_ci * Inform shmem_writepage() how far we have reached. 28708c2ecf20Sopenharmony_ci * No need for lock or barrier: we have the page lock. 28718c2ecf20Sopenharmony_ci */ 28728c2ecf20Sopenharmony_ci shmem_falloc.next++; 28738c2ecf20Sopenharmony_ci if (!PageUptodate(page)) 28748c2ecf20Sopenharmony_ci shmem_falloc.nr_falloced++; 28758c2ecf20Sopenharmony_ci 28768c2ecf20Sopenharmony_ci /* 28778c2ecf20Sopenharmony_ci * If !PageUptodate, leave it that way so that freeable pages 28788c2ecf20Sopenharmony_ci * can be recognized if we need to rollback on error later. 28798c2ecf20Sopenharmony_ci * But set_page_dirty so that memory pressure will swap rather 28808c2ecf20Sopenharmony_ci * than free the pages we are allocating (and SGP_CACHE pages 28818c2ecf20Sopenharmony_ci * might still be clean: we now need to mark those dirty too). 28828c2ecf20Sopenharmony_ci */ 28838c2ecf20Sopenharmony_ci set_page_dirty(page); 28848c2ecf20Sopenharmony_ci unlock_page(page); 28858c2ecf20Sopenharmony_ci put_page(page); 28868c2ecf20Sopenharmony_ci cond_resched(); 28878c2ecf20Sopenharmony_ci } 28888c2ecf20Sopenharmony_ci 28898c2ecf20Sopenharmony_ci if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) 28908c2ecf20Sopenharmony_ci i_size_write(inode, offset + len); 28918c2ecf20Sopenharmony_ci inode->i_ctime = current_time(inode); 28928c2ecf20Sopenharmony_ciundone: 28938c2ecf20Sopenharmony_ci spin_lock(&inode->i_lock); 28948c2ecf20Sopenharmony_ci inode->i_private = NULL; 28958c2ecf20Sopenharmony_ci spin_unlock(&inode->i_lock); 28968c2ecf20Sopenharmony_ciout: 28978c2ecf20Sopenharmony_ci inode_unlock(inode); 28988c2ecf20Sopenharmony_ci return error; 28998c2ecf20Sopenharmony_ci} 29008c2ecf20Sopenharmony_ci 29018c2ecf20Sopenharmony_cistatic int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) 29028c2ecf20Sopenharmony_ci{ 29038c2ecf20Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb); 29048c2ecf20Sopenharmony_ci 29058c2ecf20Sopenharmony_ci buf->f_type = TMPFS_MAGIC; 29068c2ecf20Sopenharmony_ci buf->f_bsize = PAGE_SIZE; 29078c2ecf20Sopenharmony_ci buf->f_namelen = NAME_MAX; 29088c2ecf20Sopenharmony_ci if (sbinfo->max_blocks) { 29098c2ecf20Sopenharmony_ci buf->f_blocks = sbinfo->max_blocks; 29108c2ecf20Sopenharmony_ci buf->f_bavail = 29118c2ecf20Sopenharmony_ci buf->f_bfree = sbinfo->max_blocks - 29128c2ecf20Sopenharmony_ci percpu_counter_sum(&sbinfo->used_blocks); 29138c2ecf20Sopenharmony_ci } 29148c2ecf20Sopenharmony_ci if (sbinfo->max_inodes) { 29158c2ecf20Sopenharmony_ci buf->f_files = sbinfo->max_inodes; 29168c2ecf20Sopenharmony_ci buf->f_ffree = sbinfo->free_inodes; 29178c2ecf20Sopenharmony_ci } 29188c2ecf20Sopenharmony_ci /* else leave those fields 0 like simple_statfs */ 29198c2ecf20Sopenharmony_ci return 0; 29208c2ecf20Sopenharmony_ci} 29218c2ecf20Sopenharmony_ci 29228c2ecf20Sopenharmony_ci/* 29238c2ecf20Sopenharmony_ci * File creation. Allocate an inode, and we're done.. 29248c2ecf20Sopenharmony_ci */ 29258c2ecf20Sopenharmony_cistatic int 29268c2ecf20Sopenharmony_cishmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) 29278c2ecf20Sopenharmony_ci{ 29288c2ecf20Sopenharmony_ci struct inode *inode; 29298c2ecf20Sopenharmony_ci int error = -ENOSPC; 29308c2ecf20Sopenharmony_ci 29318c2ecf20Sopenharmony_ci inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE); 29328c2ecf20Sopenharmony_ci if (inode) { 29338c2ecf20Sopenharmony_ci error = simple_acl_create(dir, inode); 29348c2ecf20Sopenharmony_ci if (error) 29358c2ecf20Sopenharmony_ci goto out_iput; 29368c2ecf20Sopenharmony_ci error = security_inode_init_security(inode, dir, 29378c2ecf20Sopenharmony_ci &dentry->d_name, 29388c2ecf20Sopenharmony_ci shmem_initxattrs, NULL); 29398c2ecf20Sopenharmony_ci if (error && error != -EOPNOTSUPP) 29408c2ecf20Sopenharmony_ci goto out_iput; 29418c2ecf20Sopenharmony_ci 29428c2ecf20Sopenharmony_ci error = 0; 29438c2ecf20Sopenharmony_ci dir->i_size += BOGO_DIRENT_SIZE; 29448c2ecf20Sopenharmony_ci dir->i_ctime = dir->i_mtime = current_time(dir); 29458c2ecf20Sopenharmony_ci d_instantiate(dentry, inode); 29468c2ecf20Sopenharmony_ci dget(dentry); /* Extra count - pin the dentry in core */ 29478c2ecf20Sopenharmony_ci } 29488c2ecf20Sopenharmony_ci return error; 29498c2ecf20Sopenharmony_ciout_iput: 29508c2ecf20Sopenharmony_ci iput(inode); 29518c2ecf20Sopenharmony_ci return error; 29528c2ecf20Sopenharmony_ci} 29538c2ecf20Sopenharmony_ci 29548c2ecf20Sopenharmony_cistatic int 29558c2ecf20Sopenharmony_cishmem_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) 29568c2ecf20Sopenharmony_ci{ 29578c2ecf20Sopenharmony_ci struct inode *inode; 29588c2ecf20Sopenharmony_ci int error = -ENOSPC; 29598c2ecf20Sopenharmony_ci 29608c2ecf20Sopenharmony_ci inode = shmem_get_inode(dir->i_sb, dir, mode, 0, VM_NORESERVE); 29618c2ecf20Sopenharmony_ci if (inode) { 29628c2ecf20Sopenharmony_ci error = security_inode_init_security(inode, dir, 29638c2ecf20Sopenharmony_ci NULL, 29648c2ecf20Sopenharmony_ci shmem_initxattrs, NULL); 29658c2ecf20Sopenharmony_ci if (error && error != -EOPNOTSUPP) 29668c2ecf20Sopenharmony_ci goto out_iput; 29678c2ecf20Sopenharmony_ci error = simple_acl_create(dir, inode); 29688c2ecf20Sopenharmony_ci if (error) 29698c2ecf20Sopenharmony_ci goto out_iput; 29708c2ecf20Sopenharmony_ci d_tmpfile(dentry, inode); 29718c2ecf20Sopenharmony_ci } 29728c2ecf20Sopenharmony_ci return error; 29738c2ecf20Sopenharmony_ciout_iput: 29748c2ecf20Sopenharmony_ci iput(inode); 29758c2ecf20Sopenharmony_ci return error; 29768c2ecf20Sopenharmony_ci} 29778c2ecf20Sopenharmony_ci 29788c2ecf20Sopenharmony_cistatic int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) 29798c2ecf20Sopenharmony_ci{ 29808c2ecf20Sopenharmony_ci int error; 29818c2ecf20Sopenharmony_ci 29828c2ecf20Sopenharmony_ci if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0))) 29838c2ecf20Sopenharmony_ci return error; 29848c2ecf20Sopenharmony_ci inc_nlink(dir); 29858c2ecf20Sopenharmony_ci return 0; 29868c2ecf20Sopenharmony_ci} 29878c2ecf20Sopenharmony_ci 29888c2ecf20Sopenharmony_cistatic int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode, 29898c2ecf20Sopenharmony_ci bool excl) 29908c2ecf20Sopenharmony_ci{ 29918c2ecf20Sopenharmony_ci return shmem_mknod(dir, dentry, mode | S_IFREG, 0); 29928c2ecf20Sopenharmony_ci} 29938c2ecf20Sopenharmony_ci 29948c2ecf20Sopenharmony_ci/* 29958c2ecf20Sopenharmony_ci * Link a file.. 29968c2ecf20Sopenharmony_ci */ 29978c2ecf20Sopenharmony_cistatic int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) 29988c2ecf20Sopenharmony_ci{ 29998c2ecf20Sopenharmony_ci struct inode *inode = d_inode(old_dentry); 30008c2ecf20Sopenharmony_ci int ret = 0; 30018c2ecf20Sopenharmony_ci 30028c2ecf20Sopenharmony_ci /* 30038c2ecf20Sopenharmony_ci * No ordinary (disk based) filesystem counts links as inodes; 30048c2ecf20Sopenharmony_ci * but each new link needs a new dentry, pinning lowmem, and 30058c2ecf20Sopenharmony_ci * tmpfs dentries cannot be pruned until they are unlinked. 30068c2ecf20Sopenharmony_ci * But if an O_TMPFILE file is linked into the tmpfs, the 30078c2ecf20Sopenharmony_ci * first link must skip that, to get the accounting right. 30088c2ecf20Sopenharmony_ci */ 30098c2ecf20Sopenharmony_ci if (inode->i_nlink) { 30108c2ecf20Sopenharmony_ci ret = shmem_reserve_inode(inode->i_sb, NULL); 30118c2ecf20Sopenharmony_ci if (ret) 30128c2ecf20Sopenharmony_ci goto out; 30138c2ecf20Sopenharmony_ci } 30148c2ecf20Sopenharmony_ci 30158c2ecf20Sopenharmony_ci dir->i_size += BOGO_DIRENT_SIZE; 30168c2ecf20Sopenharmony_ci inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); 30178c2ecf20Sopenharmony_ci inc_nlink(inode); 30188c2ecf20Sopenharmony_ci ihold(inode); /* New dentry reference */ 30198c2ecf20Sopenharmony_ci dget(dentry); /* Extra pinning count for the created dentry */ 30208c2ecf20Sopenharmony_ci d_instantiate(dentry, inode); 30218c2ecf20Sopenharmony_ciout: 30228c2ecf20Sopenharmony_ci return ret; 30238c2ecf20Sopenharmony_ci} 30248c2ecf20Sopenharmony_ci 30258c2ecf20Sopenharmony_cistatic int shmem_unlink(struct inode *dir, struct dentry *dentry) 30268c2ecf20Sopenharmony_ci{ 30278c2ecf20Sopenharmony_ci struct inode *inode = d_inode(dentry); 30288c2ecf20Sopenharmony_ci 30298c2ecf20Sopenharmony_ci if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode)) 30308c2ecf20Sopenharmony_ci shmem_free_inode(inode->i_sb); 30318c2ecf20Sopenharmony_ci 30328c2ecf20Sopenharmony_ci dir->i_size -= BOGO_DIRENT_SIZE; 30338c2ecf20Sopenharmony_ci inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); 30348c2ecf20Sopenharmony_ci drop_nlink(inode); 30358c2ecf20Sopenharmony_ci dput(dentry); /* Undo the count from "create" - this does all the work */ 30368c2ecf20Sopenharmony_ci return 0; 30378c2ecf20Sopenharmony_ci} 30388c2ecf20Sopenharmony_ci 30398c2ecf20Sopenharmony_cistatic int shmem_rmdir(struct inode *dir, struct dentry *dentry) 30408c2ecf20Sopenharmony_ci{ 30418c2ecf20Sopenharmony_ci if (!simple_empty(dentry)) 30428c2ecf20Sopenharmony_ci return -ENOTEMPTY; 30438c2ecf20Sopenharmony_ci 30448c2ecf20Sopenharmony_ci drop_nlink(d_inode(dentry)); 30458c2ecf20Sopenharmony_ci drop_nlink(dir); 30468c2ecf20Sopenharmony_ci return shmem_unlink(dir, dentry); 30478c2ecf20Sopenharmony_ci} 30488c2ecf20Sopenharmony_ci 30498c2ecf20Sopenharmony_cistatic int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) 30508c2ecf20Sopenharmony_ci{ 30518c2ecf20Sopenharmony_ci bool old_is_dir = d_is_dir(old_dentry); 30528c2ecf20Sopenharmony_ci bool new_is_dir = d_is_dir(new_dentry); 30538c2ecf20Sopenharmony_ci 30548c2ecf20Sopenharmony_ci if (old_dir != new_dir && old_is_dir != new_is_dir) { 30558c2ecf20Sopenharmony_ci if (old_is_dir) { 30568c2ecf20Sopenharmony_ci drop_nlink(old_dir); 30578c2ecf20Sopenharmony_ci inc_nlink(new_dir); 30588c2ecf20Sopenharmony_ci } else { 30598c2ecf20Sopenharmony_ci drop_nlink(new_dir); 30608c2ecf20Sopenharmony_ci inc_nlink(old_dir); 30618c2ecf20Sopenharmony_ci } 30628c2ecf20Sopenharmony_ci } 30638c2ecf20Sopenharmony_ci old_dir->i_ctime = old_dir->i_mtime = 30648c2ecf20Sopenharmony_ci new_dir->i_ctime = new_dir->i_mtime = 30658c2ecf20Sopenharmony_ci d_inode(old_dentry)->i_ctime = 30668c2ecf20Sopenharmony_ci d_inode(new_dentry)->i_ctime = current_time(old_dir); 30678c2ecf20Sopenharmony_ci 30688c2ecf20Sopenharmony_ci return 0; 30698c2ecf20Sopenharmony_ci} 30708c2ecf20Sopenharmony_ci 30718c2ecf20Sopenharmony_cistatic int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry) 30728c2ecf20Sopenharmony_ci{ 30738c2ecf20Sopenharmony_ci struct dentry *whiteout; 30748c2ecf20Sopenharmony_ci int error; 30758c2ecf20Sopenharmony_ci 30768c2ecf20Sopenharmony_ci whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name); 30778c2ecf20Sopenharmony_ci if (!whiteout) 30788c2ecf20Sopenharmony_ci return -ENOMEM; 30798c2ecf20Sopenharmony_ci 30808c2ecf20Sopenharmony_ci error = shmem_mknod(old_dir, whiteout, 30818c2ecf20Sopenharmony_ci S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); 30828c2ecf20Sopenharmony_ci dput(whiteout); 30838c2ecf20Sopenharmony_ci if (error) 30848c2ecf20Sopenharmony_ci return error; 30858c2ecf20Sopenharmony_ci 30868c2ecf20Sopenharmony_ci /* 30878c2ecf20Sopenharmony_ci * Cheat and hash the whiteout while the old dentry is still in 30888c2ecf20Sopenharmony_ci * place, instead of playing games with FS_RENAME_DOES_D_MOVE. 30898c2ecf20Sopenharmony_ci * 30908c2ecf20Sopenharmony_ci * d_lookup() will consistently find one of them at this point, 30918c2ecf20Sopenharmony_ci * not sure which one, but that isn't even important. 30928c2ecf20Sopenharmony_ci */ 30938c2ecf20Sopenharmony_ci d_rehash(whiteout); 30948c2ecf20Sopenharmony_ci return 0; 30958c2ecf20Sopenharmony_ci} 30968c2ecf20Sopenharmony_ci 30978c2ecf20Sopenharmony_ci/* 30988c2ecf20Sopenharmony_ci * The VFS layer already does all the dentry stuff for rename, 30998c2ecf20Sopenharmony_ci * we just have to decrement the usage count for the target if 31008c2ecf20Sopenharmony_ci * it exists so that the VFS layer correctly free's it when it 31018c2ecf20Sopenharmony_ci * gets overwritten. 31028c2ecf20Sopenharmony_ci */ 31038c2ecf20Sopenharmony_cistatic int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) 31048c2ecf20Sopenharmony_ci{ 31058c2ecf20Sopenharmony_ci struct inode *inode = d_inode(old_dentry); 31068c2ecf20Sopenharmony_ci int they_are_dirs = S_ISDIR(inode->i_mode); 31078c2ecf20Sopenharmony_ci 31088c2ecf20Sopenharmony_ci if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) 31098c2ecf20Sopenharmony_ci return -EINVAL; 31108c2ecf20Sopenharmony_ci 31118c2ecf20Sopenharmony_ci if (flags & RENAME_EXCHANGE) 31128c2ecf20Sopenharmony_ci return shmem_exchange(old_dir, old_dentry, new_dir, new_dentry); 31138c2ecf20Sopenharmony_ci 31148c2ecf20Sopenharmony_ci if (!simple_empty(new_dentry)) 31158c2ecf20Sopenharmony_ci return -ENOTEMPTY; 31168c2ecf20Sopenharmony_ci 31178c2ecf20Sopenharmony_ci if (flags & RENAME_WHITEOUT) { 31188c2ecf20Sopenharmony_ci int error; 31198c2ecf20Sopenharmony_ci 31208c2ecf20Sopenharmony_ci error = shmem_whiteout(old_dir, old_dentry); 31218c2ecf20Sopenharmony_ci if (error) 31228c2ecf20Sopenharmony_ci return error; 31238c2ecf20Sopenharmony_ci } 31248c2ecf20Sopenharmony_ci 31258c2ecf20Sopenharmony_ci if (d_really_is_positive(new_dentry)) { 31268c2ecf20Sopenharmony_ci (void) shmem_unlink(new_dir, new_dentry); 31278c2ecf20Sopenharmony_ci if (they_are_dirs) { 31288c2ecf20Sopenharmony_ci drop_nlink(d_inode(new_dentry)); 31298c2ecf20Sopenharmony_ci drop_nlink(old_dir); 31308c2ecf20Sopenharmony_ci } 31318c2ecf20Sopenharmony_ci } else if (they_are_dirs) { 31328c2ecf20Sopenharmony_ci drop_nlink(old_dir); 31338c2ecf20Sopenharmony_ci inc_nlink(new_dir); 31348c2ecf20Sopenharmony_ci } 31358c2ecf20Sopenharmony_ci 31368c2ecf20Sopenharmony_ci old_dir->i_size -= BOGO_DIRENT_SIZE; 31378c2ecf20Sopenharmony_ci new_dir->i_size += BOGO_DIRENT_SIZE; 31388c2ecf20Sopenharmony_ci old_dir->i_ctime = old_dir->i_mtime = 31398c2ecf20Sopenharmony_ci new_dir->i_ctime = new_dir->i_mtime = 31408c2ecf20Sopenharmony_ci inode->i_ctime = current_time(old_dir); 31418c2ecf20Sopenharmony_ci return 0; 31428c2ecf20Sopenharmony_ci} 31438c2ecf20Sopenharmony_ci 31448c2ecf20Sopenharmony_cistatic int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname) 31458c2ecf20Sopenharmony_ci{ 31468c2ecf20Sopenharmony_ci int error; 31478c2ecf20Sopenharmony_ci int len; 31488c2ecf20Sopenharmony_ci struct inode *inode; 31498c2ecf20Sopenharmony_ci struct page *page; 31508c2ecf20Sopenharmony_ci 31518c2ecf20Sopenharmony_ci len = strlen(symname) + 1; 31528c2ecf20Sopenharmony_ci if (len > PAGE_SIZE) 31538c2ecf20Sopenharmony_ci return -ENAMETOOLONG; 31548c2ecf20Sopenharmony_ci 31558c2ecf20Sopenharmony_ci inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK | 0777, 0, 31568c2ecf20Sopenharmony_ci VM_NORESERVE); 31578c2ecf20Sopenharmony_ci if (!inode) 31588c2ecf20Sopenharmony_ci return -ENOSPC; 31598c2ecf20Sopenharmony_ci 31608c2ecf20Sopenharmony_ci error = security_inode_init_security(inode, dir, &dentry->d_name, 31618c2ecf20Sopenharmony_ci shmem_initxattrs, NULL); 31628c2ecf20Sopenharmony_ci if (error && error != -EOPNOTSUPP) { 31638c2ecf20Sopenharmony_ci iput(inode); 31648c2ecf20Sopenharmony_ci return error; 31658c2ecf20Sopenharmony_ci } 31668c2ecf20Sopenharmony_ci 31678c2ecf20Sopenharmony_ci inode->i_size = len-1; 31688c2ecf20Sopenharmony_ci if (len <= SHORT_SYMLINK_LEN) { 31698c2ecf20Sopenharmony_ci inode->i_link = kmemdup(symname, len, GFP_KERNEL); 31708c2ecf20Sopenharmony_ci if (!inode->i_link) { 31718c2ecf20Sopenharmony_ci iput(inode); 31728c2ecf20Sopenharmony_ci return -ENOMEM; 31738c2ecf20Sopenharmony_ci } 31748c2ecf20Sopenharmony_ci inode->i_op = &shmem_short_symlink_operations; 31758c2ecf20Sopenharmony_ci } else { 31768c2ecf20Sopenharmony_ci inode_nohighmem(inode); 31778c2ecf20Sopenharmony_ci error = shmem_getpage(inode, 0, &page, SGP_WRITE); 31788c2ecf20Sopenharmony_ci if (error) { 31798c2ecf20Sopenharmony_ci iput(inode); 31808c2ecf20Sopenharmony_ci return error; 31818c2ecf20Sopenharmony_ci } 31828c2ecf20Sopenharmony_ci inode->i_mapping->a_ops = &shmem_aops; 31838c2ecf20Sopenharmony_ci inode->i_op = &shmem_symlink_inode_operations; 31848c2ecf20Sopenharmony_ci memcpy(page_address(page), symname, len); 31858c2ecf20Sopenharmony_ci SetPageUptodate(page); 31868c2ecf20Sopenharmony_ci set_page_dirty(page); 31878c2ecf20Sopenharmony_ci unlock_page(page); 31888c2ecf20Sopenharmony_ci put_page(page); 31898c2ecf20Sopenharmony_ci } 31908c2ecf20Sopenharmony_ci dir->i_size += BOGO_DIRENT_SIZE; 31918c2ecf20Sopenharmony_ci dir->i_ctime = dir->i_mtime = current_time(dir); 31928c2ecf20Sopenharmony_ci d_instantiate(dentry, inode); 31938c2ecf20Sopenharmony_ci dget(dentry); 31948c2ecf20Sopenharmony_ci return 0; 31958c2ecf20Sopenharmony_ci} 31968c2ecf20Sopenharmony_ci 31978c2ecf20Sopenharmony_cistatic void shmem_put_link(void *arg) 31988c2ecf20Sopenharmony_ci{ 31998c2ecf20Sopenharmony_ci mark_page_accessed(arg); 32008c2ecf20Sopenharmony_ci put_page(arg); 32018c2ecf20Sopenharmony_ci} 32028c2ecf20Sopenharmony_ci 32038c2ecf20Sopenharmony_cistatic const char *shmem_get_link(struct dentry *dentry, 32048c2ecf20Sopenharmony_ci struct inode *inode, 32058c2ecf20Sopenharmony_ci struct delayed_call *done) 32068c2ecf20Sopenharmony_ci{ 32078c2ecf20Sopenharmony_ci struct page *page = NULL; 32088c2ecf20Sopenharmony_ci int error; 32098c2ecf20Sopenharmony_ci if (!dentry) { 32108c2ecf20Sopenharmony_ci page = find_get_page(inode->i_mapping, 0); 32118c2ecf20Sopenharmony_ci if (!page) 32128c2ecf20Sopenharmony_ci return ERR_PTR(-ECHILD); 32138c2ecf20Sopenharmony_ci if (!PageUptodate(page)) { 32148c2ecf20Sopenharmony_ci put_page(page); 32158c2ecf20Sopenharmony_ci return ERR_PTR(-ECHILD); 32168c2ecf20Sopenharmony_ci } 32178c2ecf20Sopenharmony_ci } else { 32188c2ecf20Sopenharmony_ci error = shmem_getpage(inode, 0, &page, SGP_READ); 32198c2ecf20Sopenharmony_ci if (error) 32208c2ecf20Sopenharmony_ci return ERR_PTR(error); 32218c2ecf20Sopenharmony_ci unlock_page(page); 32228c2ecf20Sopenharmony_ci } 32238c2ecf20Sopenharmony_ci set_delayed_call(done, shmem_put_link, page); 32248c2ecf20Sopenharmony_ci return page_address(page); 32258c2ecf20Sopenharmony_ci} 32268c2ecf20Sopenharmony_ci 32278c2ecf20Sopenharmony_ci#ifdef CONFIG_TMPFS_XATTR 32288c2ecf20Sopenharmony_ci/* 32298c2ecf20Sopenharmony_ci * Superblocks without xattr inode operations may get some security.* xattr 32308c2ecf20Sopenharmony_ci * support from the LSM "for free". As soon as we have any other xattrs 32318c2ecf20Sopenharmony_ci * like ACLs, we also need to implement the security.* handlers at 32328c2ecf20Sopenharmony_ci * filesystem level, though. 32338c2ecf20Sopenharmony_ci */ 32348c2ecf20Sopenharmony_ci 32358c2ecf20Sopenharmony_ci/* 32368c2ecf20Sopenharmony_ci * Callback for security_inode_init_security() for acquiring xattrs. 32378c2ecf20Sopenharmony_ci */ 32388c2ecf20Sopenharmony_cistatic int shmem_initxattrs(struct inode *inode, 32398c2ecf20Sopenharmony_ci const struct xattr *xattr_array, 32408c2ecf20Sopenharmony_ci void *fs_info) 32418c2ecf20Sopenharmony_ci{ 32428c2ecf20Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 32438c2ecf20Sopenharmony_ci const struct xattr *xattr; 32448c2ecf20Sopenharmony_ci struct simple_xattr *new_xattr; 32458c2ecf20Sopenharmony_ci size_t len; 32468c2ecf20Sopenharmony_ci 32478c2ecf20Sopenharmony_ci for (xattr = xattr_array; xattr->name != NULL; xattr++) { 32488c2ecf20Sopenharmony_ci new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len); 32498c2ecf20Sopenharmony_ci if (!new_xattr) 32508c2ecf20Sopenharmony_ci return -ENOMEM; 32518c2ecf20Sopenharmony_ci 32528c2ecf20Sopenharmony_ci len = strlen(xattr->name) + 1; 32538c2ecf20Sopenharmony_ci new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len, 32548c2ecf20Sopenharmony_ci GFP_KERNEL); 32558c2ecf20Sopenharmony_ci if (!new_xattr->name) { 32568c2ecf20Sopenharmony_ci kvfree(new_xattr); 32578c2ecf20Sopenharmony_ci return -ENOMEM; 32588c2ecf20Sopenharmony_ci } 32598c2ecf20Sopenharmony_ci 32608c2ecf20Sopenharmony_ci memcpy(new_xattr->name, XATTR_SECURITY_PREFIX, 32618c2ecf20Sopenharmony_ci XATTR_SECURITY_PREFIX_LEN); 32628c2ecf20Sopenharmony_ci memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN, 32638c2ecf20Sopenharmony_ci xattr->name, len); 32648c2ecf20Sopenharmony_ci 32658c2ecf20Sopenharmony_ci simple_xattr_list_add(&info->xattrs, new_xattr); 32668c2ecf20Sopenharmony_ci } 32678c2ecf20Sopenharmony_ci 32688c2ecf20Sopenharmony_ci return 0; 32698c2ecf20Sopenharmony_ci} 32708c2ecf20Sopenharmony_ci 32718c2ecf20Sopenharmony_cistatic int shmem_xattr_handler_get(const struct xattr_handler *handler, 32728c2ecf20Sopenharmony_ci struct dentry *unused, struct inode *inode, 32738c2ecf20Sopenharmony_ci const char *name, void *buffer, size_t size) 32748c2ecf20Sopenharmony_ci{ 32758c2ecf20Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 32768c2ecf20Sopenharmony_ci 32778c2ecf20Sopenharmony_ci name = xattr_full_name(handler, name); 32788c2ecf20Sopenharmony_ci return simple_xattr_get(&info->xattrs, name, buffer, size); 32798c2ecf20Sopenharmony_ci} 32808c2ecf20Sopenharmony_ci 32818c2ecf20Sopenharmony_cistatic int shmem_xattr_handler_set(const struct xattr_handler *handler, 32828c2ecf20Sopenharmony_ci struct dentry *unused, struct inode *inode, 32838c2ecf20Sopenharmony_ci const char *name, const void *value, 32848c2ecf20Sopenharmony_ci size_t size, int flags) 32858c2ecf20Sopenharmony_ci{ 32868c2ecf20Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(inode); 32878c2ecf20Sopenharmony_ci 32888c2ecf20Sopenharmony_ci name = xattr_full_name(handler, name); 32898c2ecf20Sopenharmony_ci return simple_xattr_set(&info->xattrs, name, value, size, flags, NULL); 32908c2ecf20Sopenharmony_ci} 32918c2ecf20Sopenharmony_ci 32928c2ecf20Sopenharmony_cistatic const struct xattr_handler shmem_security_xattr_handler = { 32938c2ecf20Sopenharmony_ci .prefix = XATTR_SECURITY_PREFIX, 32948c2ecf20Sopenharmony_ci .get = shmem_xattr_handler_get, 32958c2ecf20Sopenharmony_ci .set = shmem_xattr_handler_set, 32968c2ecf20Sopenharmony_ci}; 32978c2ecf20Sopenharmony_ci 32988c2ecf20Sopenharmony_cistatic const struct xattr_handler shmem_trusted_xattr_handler = { 32998c2ecf20Sopenharmony_ci .prefix = XATTR_TRUSTED_PREFIX, 33008c2ecf20Sopenharmony_ci .get = shmem_xattr_handler_get, 33018c2ecf20Sopenharmony_ci .set = shmem_xattr_handler_set, 33028c2ecf20Sopenharmony_ci}; 33038c2ecf20Sopenharmony_ci 33048c2ecf20Sopenharmony_cistatic const struct xattr_handler *shmem_xattr_handlers[] = { 33058c2ecf20Sopenharmony_ci#ifdef CONFIG_TMPFS_POSIX_ACL 33068c2ecf20Sopenharmony_ci &posix_acl_access_xattr_handler, 33078c2ecf20Sopenharmony_ci &posix_acl_default_xattr_handler, 33088c2ecf20Sopenharmony_ci#endif 33098c2ecf20Sopenharmony_ci &shmem_security_xattr_handler, 33108c2ecf20Sopenharmony_ci &shmem_trusted_xattr_handler, 33118c2ecf20Sopenharmony_ci NULL 33128c2ecf20Sopenharmony_ci}; 33138c2ecf20Sopenharmony_ci 33148c2ecf20Sopenharmony_cistatic ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size) 33158c2ecf20Sopenharmony_ci{ 33168c2ecf20Sopenharmony_ci struct shmem_inode_info *info = SHMEM_I(d_inode(dentry)); 33178c2ecf20Sopenharmony_ci return simple_xattr_list(d_inode(dentry), &info->xattrs, buffer, size); 33188c2ecf20Sopenharmony_ci} 33198c2ecf20Sopenharmony_ci#endif /* CONFIG_TMPFS_XATTR */ 33208c2ecf20Sopenharmony_ci 33218c2ecf20Sopenharmony_cistatic const struct inode_operations shmem_short_symlink_operations = { 33228c2ecf20Sopenharmony_ci .get_link = simple_get_link, 33238c2ecf20Sopenharmony_ci#ifdef CONFIG_TMPFS_XATTR 33248c2ecf20Sopenharmony_ci .listxattr = shmem_listxattr, 33258c2ecf20Sopenharmony_ci#endif 33268c2ecf20Sopenharmony_ci}; 33278c2ecf20Sopenharmony_ci 33288c2ecf20Sopenharmony_cistatic const struct inode_operations shmem_symlink_inode_operations = { 33298c2ecf20Sopenharmony_ci .get_link = shmem_get_link, 33308c2ecf20Sopenharmony_ci#ifdef CONFIG_TMPFS_XATTR 33318c2ecf20Sopenharmony_ci .listxattr = shmem_listxattr, 33328c2ecf20Sopenharmony_ci#endif 33338c2ecf20Sopenharmony_ci}; 33348c2ecf20Sopenharmony_ci 33358c2ecf20Sopenharmony_cistatic struct dentry *shmem_get_parent(struct dentry *child) 33368c2ecf20Sopenharmony_ci{ 33378c2ecf20Sopenharmony_ci return ERR_PTR(-ESTALE); 33388c2ecf20Sopenharmony_ci} 33398c2ecf20Sopenharmony_ci 33408c2ecf20Sopenharmony_cistatic int shmem_match(struct inode *ino, void *vfh) 33418c2ecf20Sopenharmony_ci{ 33428c2ecf20Sopenharmony_ci __u32 *fh = vfh; 33438c2ecf20Sopenharmony_ci __u64 inum = fh[2]; 33448c2ecf20Sopenharmony_ci inum = (inum << 32) | fh[1]; 33458c2ecf20Sopenharmony_ci return ino->i_ino == inum && fh[0] == ino->i_generation; 33468c2ecf20Sopenharmony_ci} 33478c2ecf20Sopenharmony_ci 33488c2ecf20Sopenharmony_ci/* Find any alias of inode, but prefer a hashed alias */ 33498c2ecf20Sopenharmony_cistatic struct dentry *shmem_find_alias(struct inode *inode) 33508c2ecf20Sopenharmony_ci{ 33518c2ecf20Sopenharmony_ci struct dentry *alias = d_find_alias(inode); 33528c2ecf20Sopenharmony_ci 33538c2ecf20Sopenharmony_ci return alias ?: d_find_any_alias(inode); 33548c2ecf20Sopenharmony_ci} 33558c2ecf20Sopenharmony_ci 33568c2ecf20Sopenharmony_ci 33578c2ecf20Sopenharmony_cistatic struct dentry *shmem_fh_to_dentry(struct super_block *sb, 33588c2ecf20Sopenharmony_ci struct fid *fid, int fh_len, int fh_type) 33598c2ecf20Sopenharmony_ci{ 33608c2ecf20Sopenharmony_ci struct inode *inode; 33618c2ecf20Sopenharmony_ci struct dentry *dentry = NULL; 33628c2ecf20Sopenharmony_ci u64 inum; 33638c2ecf20Sopenharmony_ci 33648c2ecf20Sopenharmony_ci if (fh_len < 3) 33658c2ecf20Sopenharmony_ci return NULL; 33668c2ecf20Sopenharmony_ci 33678c2ecf20Sopenharmony_ci inum = fid->raw[2]; 33688c2ecf20Sopenharmony_ci inum = (inum << 32) | fid->raw[1]; 33698c2ecf20Sopenharmony_ci 33708c2ecf20Sopenharmony_ci inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]), 33718c2ecf20Sopenharmony_ci shmem_match, fid->raw); 33728c2ecf20Sopenharmony_ci if (inode) { 33738c2ecf20Sopenharmony_ci dentry = shmem_find_alias(inode); 33748c2ecf20Sopenharmony_ci iput(inode); 33758c2ecf20Sopenharmony_ci } 33768c2ecf20Sopenharmony_ci 33778c2ecf20Sopenharmony_ci return dentry; 33788c2ecf20Sopenharmony_ci} 33798c2ecf20Sopenharmony_ci 33808c2ecf20Sopenharmony_cistatic int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len, 33818c2ecf20Sopenharmony_ci struct inode *parent) 33828c2ecf20Sopenharmony_ci{ 33838c2ecf20Sopenharmony_ci if (*len < 3) { 33848c2ecf20Sopenharmony_ci *len = 3; 33858c2ecf20Sopenharmony_ci return FILEID_INVALID; 33868c2ecf20Sopenharmony_ci } 33878c2ecf20Sopenharmony_ci 33888c2ecf20Sopenharmony_ci if (inode_unhashed(inode)) { 33898c2ecf20Sopenharmony_ci /* Unfortunately insert_inode_hash is not idempotent, 33908c2ecf20Sopenharmony_ci * so as we hash inodes here rather than at creation 33918c2ecf20Sopenharmony_ci * time, we need a lock to ensure we only try 33928c2ecf20Sopenharmony_ci * to do it once 33938c2ecf20Sopenharmony_ci */ 33948c2ecf20Sopenharmony_ci static DEFINE_SPINLOCK(lock); 33958c2ecf20Sopenharmony_ci spin_lock(&lock); 33968c2ecf20Sopenharmony_ci if (inode_unhashed(inode)) 33978c2ecf20Sopenharmony_ci __insert_inode_hash(inode, 33988c2ecf20Sopenharmony_ci inode->i_ino + inode->i_generation); 33998c2ecf20Sopenharmony_ci spin_unlock(&lock); 34008c2ecf20Sopenharmony_ci } 34018c2ecf20Sopenharmony_ci 34028c2ecf20Sopenharmony_ci fh[0] = inode->i_generation; 34038c2ecf20Sopenharmony_ci fh[1] = inode->i_ino; 34048c2ecf20Sopenharmony_ci fh[2] = ((__u64)inode->i_ino) >> 32; 34058c2ecf20Sopenharmony_ci 34068c2ecf20Sopenharmony_ci *len = 3; 34078c2ecf20Sopenharmony_ci return 1; 34088c2ecf20Sopenharmony_ci} 34098c2ecf20Sopenharmony_ci 34108c2ecf20Sopenharmony_cistatic const struct export_operations shmem_export_ops = { 34118c2ecf20Sopenharmony_ci .get_parent = shmem_get_parent, 34128c2ecf20Sopenharmony_ci .encode_fh = shmem_encode_fh, 34138c2ecf20Sopenharmony_ci .fh_to_dentry = shmem_fh_to_dentry, 34148c2ecf20Sopenharmony_ci}; 34158c2ecf20Sopenharmony_ci 34168c2ecf20Sopenharmony_cienum shmem_param { 34178c2ecf20Sopenharmony_ci Opt_gid, 34188c2ecf20Sopenharmony_ci Opt_huge, 34198c2ecf20Sopenharmony_ci Opt_mode, 34208c2ecf20Sopenharmony_ci Opt_mpol, 34218c2ecf20Sopenharmony_ci Opt_nr_blocks, 34228c2ecf20Sopenharmony_ci Opt_nr_inodes, 34238c2ecf20Sopenharmony_ci Opt_size, 34248c2ecf20Sopenharmony_ci Opt_uid, 34258c2ecf20Sopenharmony_ci Opt_inode32, 34268c2ecf20Sopenharmony_ci Opt_inode64, 34278c2ecf20Sopenharmony_ci}; 34288c2ecf20Sopenharmony_ci 34298c2ecf20Sopenharmony_cistatic const struct constant_table shmem_param_enums_huge[] = { 34308c2ecf20Sopenharmony_ci {"never", SHMEM_HUGE_NEVER }, 34318c2ecf20Sopenharmony_ci {"always", SHMEM_HUGE_ALWAYS }, 34328c2ecf20Sopenharmony_ci {"within_size", SHMEM_HUGE_WITHIN_SIZE }, 34338c2ecf20Sopenharmony_ci {"advise", SHMEM_HUGE_ADVISE }, 34348c2ecf20Sopenharmony_ci {} 34358c2ecf20Sopenharmony_ci}; 34368c2ecf20Sopenharmony_ci 34378c2ecf20Sopenharmony_ciconst struct fs_parameter_spec shmem_fs_parameters[] = { 34388c2ecf20Sopenharmony_ci fsparam_u32 ("gid", Opt_gid), 34398c2ecf20Sopenharmony_ci fsparam_enum ("huge", Opt_huge, shmem_param_enums_huge), 34408c2ecf20Sopenharmony_ci fsparam_u32oct("mode", Opt_mode), 34418c2ecf20Sopenharmony_ci fsparam_string("mpol", Opt_mpol), 34428c2ecf20Sopenharmony_ci fsparam_string("nr_blocks", Opt_nr_blocks), 34438c2ecf20Sopenharmony_ci fsparam_string("nr_inodes", Opt_nr_inodes), 34448c2ecf20Sopenharmony_ci fsparam_string("size", Opt_size), 34458c2ecf20Sopenharmony_ci fsparam_u32 ("uid", Opt_uid), 34468c2ecf20Sopenharmony_ci fsparam_flag ("inode32", Opt_inode32), 34478c2ecf20Sopenharmony_ci fsparam_flag ("inode64", Opt_inode64), 34488c2ecf20Sopenharmony_ci {} 34498c2ecf20Sopenharmony_ci}; 34508c2ecf20Sopenharmony_ci 34518c2ecf20Sopenharmony_cistatic int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param) 34528c2ecf20Sopenharmony_ci{ 34538c2ecf20Sopenharmony_ci struct shmem_options *ctx = fc->fs_private; 34548c2ecf20Sopenharmony_ci struct fs_parse_result result; 34558c2ecf20Sopenharmony_ci unsigned long long size; 34568c2ecf20Sopenharmony_ci char *rest; 34578c2ecf20Sopenharmony_ci int opt; 34588c2ecf20Sopenharmony_ci kuid_t kuid; 34598c2ecf20Sopenharmony_ci kgid_t kgid; 34608c2ecf20Sopenharmony_ci 34618c2ecf20Sopenharmony_ci opt = fs_parse(fc, shmem_fs_parameters, param, &result); 34628c2ecf20Sopenharmony_ci if (opt < 0) 34638c2ecf20Sopenharmony_ci return opt; 34648c2ecf20Sopenharmony_ci 34658c2ecf20Sopenharmony_ci switch (opt) { 34668c2ecf20Sopenharmony_ci case Opt_size: 34678c2ecf20Sopenharmony_ci size = memparse(param->string, &rest); 34688c2ecf20Sopenharmony_ci if (*rest == '%') { 34698c2ecf20Sopenharmony_ci size <<= PAGE_SHIFT; 34708c2ecf20Sopenharmony_ci size *= totalram_pages(); 34718c2ecf20Sopenharmony_ci do_div(size, 100); 34728c2ecf20Sopenharmony_ci rest++; 34738c2ecf20Sopenharmony_ci } 34748c2ecf20Sopenharmony_ci if (*rest) 34758c2ecf20Sopenharmony_ci goto bad_value; 34768c2ecf20Sopenharmony_ci ctx->blocks = DIV_ROUND_UP(size, PAGE_SIZE); 34778c2ecf20Sopenharmony_ci ctx->seen |= SHMEM_SEEN_BLOCKS; 34788c2ecf20Sopenharmony_ci break; 34798c2ecf20Sopenharmony_ci case Opt_nr_blocks: 34808c2ecf20Sopenharmony_ci ctx->blocks = memparse(param->string, &rest); 34818c2ecf20Sopenharmony_ci if (*rest) 34828c2ecf20Sopenharmony_ci goto bad_value; 34838c2ecf20Sopenharmony_ci ctx->seen |= SHMEM_SEEN_BLOCKS; 34848c2ecf20Sopenharmony_ci break; 34858c2ecf20Sopenharmony_ci case Opt_nr_inodes: 34868c2ecf20Sopenharmony_ci ctx->inodes = memparse(param->string, &rest); 34878c2ecf20Sopenharmony_ci if (*rest) 34888c2ecf20Sopenharmony_ci goto bad_value; 34898c2ecf20Sopenharmony_ci ctx->seen |= SHMEM_SEEN_INODES; 34908c2ecf20Sopenharmony_ci break; 34918c2ecf20Sopenharmony_ci case Opt_mode: 34928c2ecf20Sopenharmony_ci ctx->mode = result.uint_32 & 07777; 34938c2ecf20Sopenharmony_ci break; 34948c2ecf20Sopenharmony_ci case Opt_uid: 34958c2ecf20Sopenharmony_ci kuid = make_kuid(current_user_ns(), result.uint_32); 34968c2ecf20Sopenharmony_ci if (!uid_valid(kuid)) 34978c2ecf20Sopenharmony_ci goto bad_value; 34988c2ecf20Sopenharmony_ci 34998c2ecf20Sopenharmony_ci /* 35008c2ecf20Sopenharmony_ci * The requested uid must be representable in the 35018c2ecf20Sopenharmony_ci * filesystem's idmapping. 35028c2ecf20Sopenharmony_ci */ 35038c2ecf20Sopenharmony_ci if (!kuid_has_mapping(fc->user_ns, kuid)) 35048c2ecf20Sopenharmony_ci goto bad_value; 35058c2ecf20Sopenharmony_ci 35068c2ecf20Sopenharmony_ci ctx->uid = kuid; 35078c2ecf20Sopenharmony_ci break; 35088c2ecf20Sopenharmony_ci case Opt_gid: 35098c2ecf20Sopenharmony_ci kgid = make_kgid(current_user_ns(), result.uint_32); 35108c2ecf20Sopenharmony_ci if (!gid_valid(kgid)) 35118c2ecf20Sopenharmony_ci goto bad_value; 35128c2ecf20Sopenharmony_ci 35138c2ecf20Sopenharmony_ci /* 35148c2ecf20Sopenharmony_ci * The requested gid must be representable in the 35158c2ecf20Sopenharmony_ci * filesystem's idmapping. 35168c2ecf20Sopenharmony_ci */ 35178c2ecf20Sopenharmony_ci if (!kgid_has_mapping(fc->user_ns, kgid)) 35188c2ecf20Sopenharmony_ci goto bad_value; 35198c2ecf20Sopenharmony_ci 35208c2ecf20Sopenharmony_ci ctx->gid = kgid; 35218c2ecf20Sopenharmony_ci break; 35228c2ecf20Sopenharmony_ci case Opt_huge: 35238c2ecf20Sopenharmony_ci ctx->huge = result.uint_32; 35248c2ecf20Sopenharmony_ci if (ctx->huge != SHMEM_HUGE_NEVER && 35258c2ecf20Sopenharmony_ci !(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && 35268c2ecf20Sopenharmony_ci has_transparent_hugepage())) 35278c2ecf20Sopenharmony_ci goto unsupported_parameter; 35288c2ecf20Sopenharmony_ci ctx->seen |= SHMEM_SEEN_HUGE; 35298c2ecf20Sopenharmony_ci break; 35308c2ecf20Sopenharmony_ci case Opt_mpol: 35318c2ecf20Sopenharmony_ci if (IS_ENABLED(CONFIG_NUMA)) { 35328c2ecf20Sopenharmony_ci mpol_put(ctx->mpol); 35338c2ecf20Sopenharmony_ci ctx->mpol = NULL; 35348c2ecf20Sopenharmony_ci if (mpol_parse_str(param->string, &ctx->mpol)) 35358c2ecf20Sopenharmony_ci goto bad_value; 35368c2ecf20Sopenharmony_ci break; 35378c2ecf20Sopenharmony_ci } 35388c2ecf20Sopenharmony_ci goto unsupported_parameter; 35398c2ecf20Sopenharmony_ci case Opt_inode32: 35408c2ecf20Sopenharmony_ci ctx->full_inums = false; 35418c2ecf20Sopenharmony_ci ctx->seen |= SHMEM_SEEN_INUMS; 35428c2ecf20Sopenharmony_ci break; 35438c2ecf20Sopenharmony_ci case Opt_inode64: 35448c2ecf20Sopenharmony_ci if (sizeof(ino_t) < 8) { 35458c2ecf20Sopenharmony_ci return invalfc(fc, 35468c2ecf20Sopenharmony_ci "Cannot use inode64 with <64bit inums in kernel\n"); 35478c2ecf20Sopenharmony_ci } 35488c2ecf20Sopenharmony_ci ctx->full_inums = true; 35498c2ecf20Sopenharmony_ci ctx->seen |= SHMEM_SEEN_INUMS; 35508c2ecf20Sopenharmony_ci break; 35518c2ecf20Sopenharmony_ci } 35528c2ecf20Sopenharmony_ci return 0; 35538c2ecf20Sopenharmony_ci 35548c2ecf20Sopenharmony_ciunsupported_parameter: 35558c2ecf20Sopenharmony_ci return invalfc(fc, "Unsupported parameter '%s'", param->key); 35568c2ecf20Sopenharmony_cibad_value: 35578c2ecf20Sopenharmony_ci return invalfc(fc, "Bad value for '%s'", param->key); 35588c2ecf20Sopenharmony_ci} 35598c2ecf20Sopenharmony_ci 35608c2ecf20Sopenharmony_cistatic int shmem_parse_options(struct fs_context *fc, void *data) 35618c2ecf20Sopenharmony_ci{ 35628c2ecf20Sopenharmony_ci char *options = data; 35638c2ecf20Sopenharmony_ci 35648c2ecf20Sopenharmony_ci if (options) { 35658c2ecf20Sopenharmony_ci int err = security_sb_eat_lsm_opts(options, &fc->security); 35668c2ecf20Sopenharmony_ci if (err) 35678c2ecf20Sopenharmony_ci return err; 35688c2ecf20Sopenharmony_ci } 35698c2ecf20Sopenharmony_ci 35708c2ecf20Sopenharmony_ci while (options != NULL) { 35718c2ecf20Sopenharmony_ci char *this_char = options; 35728c2ecf20Sopenharmony_ci for (;;) { 35738c2ecf20Sopenharmony_ci /* 35748c2ecf20Sopenharmony_ci * NUL-terminate this option: unfortunately, 35758c2ecf20Sopenharmony_ci * mount options form a comma-separated list, 35768c2ecf20Sopenharmony_ci * but mpol's nodelist may also contain commas. 35778c2ecf20Sopenharmony_ci */ 35788c2ecf20Sopenharmony_ci options = strchr(options, ','); 35798c2ecf20Sopenharmony_ci if (options == NULL) 35808c2ecf20Sopenharmony_ci break; 35818c2ecf20Sopenharmony_ci options++; 35828c2ecf20Sopenharmony_ci if (!isdigit(*options)) { 35838c2ecf20Sopenharmony_ci options[-1] = '\0'; 35848c2ecf20Sopenharmony_ci break; 35858c2ecf20Sopenharmony_ci } 35868c2ecf20Sopenharmony_ci } 35878c2ecf20Sopenharmony_ci if (*this_char) { 35888c2ecf20Sopenharmony_ci char *value = strchr(this_char,'='); 35898c2ecf20Sopenharmony_ci size_t len = 0; 35908c2ecf20Sopenharmony_ci int err; 35918c2ecf20Sopenharmony_ci 35928c2ecf20Sopenharmony_ci if (value) { 35938c2ecf20Sopenharmony_ci *value++ = '\0'; 35948c2ecf20Sopenharmony_ci len = strlen(value); 35958c2ecf20Sopenharmony_ci } 35968c2ecf20Sopenharmony_ci err = vfs_parse_fs_string(fc, this_char, value, len); 35978c2ecf20Sopenharmony_ci if (err < 0) 35988c2ecf20Sopenharmony_ci return err; 35998c2ecf20Sopenharmony_ci } 36008c2ecf20Sopenharmony_ci } 36018c2ecf20Sopenharmony_ci return 0; 36028c2ecf20Sopenharmony_ci} 36038c2ecf20Sopenharmony_ci 36048c2ecf20Sopenharmony_ci/* 36058c2ecf20Sopenharmony_ci * Reconfigure a shmem filesystem. 36068c2ecf20Sopenharmony_ci * 36078c2ecf20Sopenharmony_ci * Note that we disallow change from limited->unlimited blocks/inodes while any 36088c2ecf20Sopenharmony_ci * are in use; but we must separately disallow unlimited->limited, because in 36098c2ecf20Sopenharmony_ci * that case we have no record of how much is already in use. 36108c2ecf20Sopenharmony_ci */ 36118c2ecf20Sopenharmony_cistatic int shmem_reconfigure(struct fs_context *fc) 36128c2ecf20Sopenharmony_ci{ 36138c2ecf20Sopenharmony_ci struct shmem_options *ctx = fc->fs_private; 36148c2ecf20Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb); 36158c2ecf20Sopenharmony_ci unsigned long inodes; 36168c2ecf20Sopenharmony_ci const char *err; 36178c2ecf20Sopenharmony_ci 36188c2ecf20Sopenharmony_ci spin_lock(&sbinfo->stat_lock); 36198c2ecf20Sopenharmony_ci inodes = sbinfo->max_inodes - sbinfo->free_inodes; 36208c2ecf20Sopenharmony_ci if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) { 36218c2ecf20Sopenharmony_ci if (!sbinfo->max_blocks) { 36228c2ecf20Sopenharmony_ci err = "Cannot retroactively limit size"; 36238c2ecf20Sopenharmony_ci goto out; 36248c2ecf20Sopenharmony_ci } 36258c2ecf20Sopenharmony_ci if (percpu_counter_compare(&sbinfo->used_blocks, 36268c2ecf20Sopenharmony_ci ctx->blocks) > 0) { 36278c2ecf20Sopenharmony_ci err = "Too small a size for current use"; 36288c2ecf20Sopenharmony_ci goto out; 36298c2ecf20Sopenharmony_ci } 36308c2ecf20Sopenharmony_ci } 36318c2ecf20Sopenharmony_ci if ((ctx->seen & SHMEM_SEEN_INODES) && ctx->inodes) { 36328c2ecf20Sopenharmony_ci if (!sbinfo->max_inodes) { 36338c2ecf20Sopenharmony_ci err = "Cannot retroactively limit inodes"; 36348c2ecf20Sopenharmony_ci goto out; 36358c2ecf20Sopenharmony_ci } 36368c2ecf20Sopenharmony_ci if (ctx->inodes < inodes) { 36378c2ecf20Sopenharmony_ci err = "Too few inodes for current use"; 36388c2ecf20Sopenharmony_ci goto out; 36398c2ecf20Sopenharmony_ci } 36408c2ecf20Sopenharmony_ci } 36418c2ecf20Sopenharmony_ci 36428c2ecf20Sopenharmony_ci if ((ctx->seen & SHMEM_SEEN_INUMS) && !ctx->full_inums && 36438c2ecf20Sopenharmony_ci sbinfo->next_ino > UINT_MAX) { 36448c2ecf20Sopenharmony_ci err = "Current inum too high to switch to 32-bit inums"; 36458c2ecf20Sopenharmony_ci goto out; 36468c2ecf20Sopenharmony_ci } 36478c2ecf20Sopenharmony_ci 36488c2ecf20Sopenharmony_ci if (ctx->seen & SHMEM_SEEN_HUGE) 36498c2ecf20Sopenharmony_ci sbinfo->huge = ctx->huge; 36508c2ecf20Sopenharmony_ci if (ctx->seen & SHMEM_SEEN_INUMS) 36518c2ecf20Sopenharmony_ci sbinfo->full_inums = ctx->full_inums; 36528c2ecf20Sopenharmony_ci if (ctx->seen & SHMEM_SEEN_BLOCKS) 36538c2ecf20Sopenharmony_ci sbinfo->max_blocks = ctx->blocks; 36548c2ecf20Sopenharmony_ci if (ctx->seen & SHMEM_SEEN_INODES) { 36558c2ecf20Sopenharmony_ci sbinfo->max_inodes = ctx->inodes; 36568c2ecf20Sopenharmony_ci sbinfo->free_inodes = ctx->inodes - inodes; 36578c2ecf20Sopenharmony_ci } 36588c2ecf20Sopenharmony_ci 36598c2ecf20Sopenharmony_ci /* 36608c2ecf20Sopenharmony_ci * Preserve previous mempolicy unless mpol remount option was specified. 36618c2ecf20Sopenharmony_ci */ 36628c2ecf20Sopenharmony_ci if (ctx->mpol) { 36638c2ecf20Sopenharmony_ci mpol_put(sbinfo->mpol); 36648c2ecf20Sopenharmony_ci sbinfo->mpol = ctx->mpol; /* transfers initial ref */ 36658c2ecf20Sopenharmony_ci ctx->mpol = NULL; 36668c2ecf20Sopenharmony_ci } 36678c2ecf20Sopenharmony_ci spin_unlock(&sbinfo->stat_lock); 36688c2ecf20Sopenharmony_ci return 0; 36698c2ecf20Sopenharmony_ciout: 36708c2ecf20Sopenharmony_ci spin_unlock(&sbinfo->stat_lock); 36718c2ecf20Sopenharmony_ci return invalfc(fc, "%s", err); 36728c2ecf20Sopenharmony_ci} 36738c2ecf20Sopenharmony_ci 36748c2ecf20Sopenharmony_cistatic int shmem_show_options(struct seq_file *seq, struct dentry *root) 36758c2ecf20Sopenharmony_ci{ 36768c2ecf20Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(root->d_sb); 36778c2ecf20Sopenharmony_ci 36788c2ecf20Sopenharmony_ci if (sbinfo->max_blocks != shmem_default_max_blocks()) 36798c2ecf20Sopenharmony_ci seq_printf(seq, ",size=%luk", 36808c2ecf20Sopenharmony_ci sbinfo->max_blocks << (PAGE_SHIFT - 10)); 36818c2ecf20Sopenharmony_ci if (sbinfo->max_inodes != shmem_default_max_inodes()) 36828c2ecf20Sopenharmony_ci seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes); 36838c2ecf20Sopenharmony_ci if (sbinfo->mode != (0777 | S_ISVTX)) 36848c2ecf20Sopenharmony_ci seq_printf(seq, ",mode=%03ho", sbinfo->mode); 36858c2ecf20Sopenharmony_ci if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID)) 36868c2ecf20Sopenharmony_ci seq_printf(seq, ",uid=%u", 36878c2ecf20Sopenharmony_ci from_kuid_munged(&init_user_ns, sbinfo->uid)); 36888c2ecf20Sopenharmony_ci if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID)) 36898c2ecf20Sopenharmony_ci seq_printf(seq, ",gid=%u", 36908c2ecf20Sopenharmony_ci from_kgid_munged(&init_user_ns, sbinfo->gid)); 36918c2ecf20Sopenharmony_ci 36928c2ecf20Sopenharmony_ci /* 36938c2ecf20Sopenharmony_ci * Showing inode{64,32} might be useful even if it's the system default, 36948c2ecf20Sopenharmony_ci * since then people don't have to resort to checking both here and 36958c2ecf20Sopenharmony_ci * /proc/config.gz to confirm 64-bit inums were successfully applied 36968c2ecf20Sopenharmony_ci * (which may not even exist if IKCONFIG_PROC isn't enabled). 36978c2ecf20Sopenharmony_ci * 36988c2ecf20Sopenharmony_ci * We hide it when inode64 isn't the default and we are using 32-bit 36998c2ecf20Sopenharmony_ci * inodes, since that probably just means the feature isn't even under 37008c2ecf20Sopenharmony_ci * consideration. 37018c2ecf20Sopenharmony_ci * 37028c2ecf20Sopenharmony_ci * As such: 37038c2ecf20Sopenharmony_ci * 37048c2ecf20Sopenharmony_ci * +-----------------+-----------------+ 37058c2ecf20Sopenharmony_ci * | TMPFS_INODE64=y | TMPFS_INODE64=n | 37068c2ecf20Sopenharmony_ci * +------------------+-----------------+-----------------+ 37078c2ecf20Sopenharmony_ci * | full_inums=true | show | show | 37088c2ecf20Sopenharmony_ci * | full_inums=false | show | hide | 37098c2ecf20Sopenharmony_ci * +------------------+-----------------+-----------------+ 37108c2ecf20Sopenharmony_ci * 37118c2ecf20Sopenharmony_ci */ 37128c2ecf20Sopenharmony_ci if (IS_ENABLED(CONFIG_TMPFS_INODE64) || sbinfo->full_inums) 37138c2ecf20Sopenharmony_ci seq_printf(seq, ",inode%d", (sbinfo->full_inums ? 64 : 32)); 37148c2ecf20Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE 37158c2ecf20Sopenharmony_ci /* Rightly or wrongly, show huge mount option unmasked by shmem_huge */ 37168c2ecf20Sopenharmony_ci if (sbinfo->huge) 37178c2ecf20Sopenharmony_ci seq_printf(seq, ",huge=%s", shmem_format_huge(sbinfo->huge)); 37188c2ecf20Sopenharmony_ci#endif 37198c2ecf20Sopenharmony_ci shmem_show_mpol(seq, sbinfo->mpol); 37208c2ecf20Sopenharmony_ci return 0; 37218c2ecf20Sopenharmony_ci} 37228c2ecf20Sopenharmony_ci 37238c2ecf20Sopenharmony_ci#endif /* CONFIG_TMPFS */ 37248c2ecf20Sopenharmony_ci 37258c2ecf20Sopenharmony_cistatic void shmem_put_super(struct super_block *sb) 37268c2ecf20Sopenharmony_ci{ 37278c2ecf20Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 37288c2ecf20Sopenharmony_ci 37298c2ecf20Sopenharmony_ci free_percpu(sbinfo->ino_batch); 37308c2ecf20Sopenharmony_ci percpu_counter_destroy(&sbinfo->used_blocks); 37318c2ecf20Sopenharmony_ci mpol_put(sbinfo->mpol); 37328c2ecf20Sopenharmony_ci kfree(sbinfo); 37338c2ecf20Sopenharmony_ci sb->s_fs_info = NULL; 37348c2ecf20Sopenharmony_ci} 37358c2ecf20Sopenharmony_ci 37368c2ecf20Sopenharmony_cistatic int shmem_fill_super(struct super_block *sb, struct fs_context *fc) 37378c2ecf20Sopenharmony_ci{ 37388c2ecf20Sopenharmony_ci struct shmem_options *ctx = fc->fs_private; 37398c2ecf20Sopenharmony_ci struct inode *inode; 37408c2ecf20Sopenharmony_ci struct shmem_sb_info *sbinfo; 37418c2ecf20Sopenharmony_ci int err = -ENOMEM; 37428c2ecf20Sopenharmony_ci 37438c2ecf20Sopenharmony_ci /* Round up to L1_CACHE_BYTES to resist false sharing */ 37448c2ecf20Sopenharmony_ci sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info), 37458c2ecf20Sopenharmony_ci L1_CACHE_BYTES), GFP_KERNEL); 37468c2ecf20Sopenharmony_ci if (!sbinfo) 37478c2ecf20Sopenharmony_ci return -ENOMEM; 37488c2ecf20Sopenharmony_ci 37498c2ecf20Sopenharmony_ci sb->s_fs_info = sbinfo; 37508c2ecf20Sopenharmony_ci 37518c2ecf20Sopenharmony_ci#ifdef CONFIG_TMPFS 37528c2ecf20Sopenharmony_ci /* 37538c2ecf20Sopenharmony_ci * Per default we only allow half of the physical ram per 37548c2ecf20Sopenharmony_ci * tmpfs instance, limiting inodes to one per page of lowmem; 37558c2ecf20Sopenharmony_ci * but the internal instance is left unlimited. 37568c2ecf20Sopenharmony_ci */ 37578c2ecf20Sopenharmony_ci if (!(sb->s_flags & SB_KERNMOUNT)) { 37588c2ecf20Sopenharmony_ci if (!(ctx->seen & SHMEM_SEEN_BLOCKS)) 37598c2ecf20Sopenharmony_ci ctx->blocks = shmem_default_max_blocks(); 37608c2ecf20Sopenharmony_ci if (!(ctx->seen & SHMEM_SEEN_INODES)) 37618c2ecf20Sopenharmony_ci ctx->inodes = shmem_default_max_inodes(); 37628c2ecf20Sopenharmony_ci if (!(ctx->seen & SHMEM_SEEN_INUMS)) 37638c2ecf20Sopenharmony_ci ctx->full_inums = IS_ENABLED(CONFIG_TMPFS_INODE64); 37648c2ecf20Sopenharmony_ci } else { 37658c2ecf20Sopenharmony_ci sb->s_flags |= SB_NOUSER; 37668c2ecf20Sopenharmony_ci } 37678c2ecf20Sopenharmony_ci sb->s_export_op = &shmem_export_ops; 37688c2ecf20Sopenharmony_ci sb->s_flags |= SB_NOSEC; 37698c2ecf20Sopenharmony_ci#else 37708c2ecf20Sopenharmony_ci sb->s_flags |= SB_NOUSER; 37718c2ecf20Sopenharmony_ci#endif 37728c2ecf20Sopenharmony_ci sbinfo->max_blocks = ctx->blocks; 37738c2ecf20Sopenharmony_ci sbinfo->free_inodes = sbinfo->max_inodes = ctx->inodes; 37748c2ecf20Sopenharmony_ci if (sb->s_flags & SB_KERNMOUNT) { 37758c2ecf20Sopenharmony_ci sbinfo->ino_batch = alloc_percpu(ino_t); 37768c2ecf20Sopenharmony_ci if (!sbinfo->ino_batch) 37778c2ecf20Sopenharmony_ci goto failed; 37788c2ecf20Sopenharmony_ci } 37798c2ecf20Sopenharmony_ci sbinfo->uid = ctx->uid; 37808c2ecf20Sopenharmony_ci sbinfo->gid = ctx->gid; 37818c2ecf20Sopenharmony_ci sbinfo->full_inums = ctx->full_inums; 37828c2ecf20Sopenharmony_ci sbinfo->mode = ctx->mode; 37838c2ecf20Sopenharmony_ci sbinfo->huge = ctx->huge; 37848c2ecf20Sopenharmony_ci sbinfo->mpol = ctx->mpol; 37858c2ecf20Sopenharmony_ci ctx->mpol = NULL; 37868c2ecf20Sopenharmony_ci 37878c2ecf20Sopenharmony_ci spin_lock_init(&sbinfo->stat_lock); 37888c2ecf20Sopenharmony_ci if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL)) 37898c2ecf20Sopenharmony_ci goto failed; 37908c2ecf20Sopenharmony_ci spin_lock_init(&sbinfo->shrinklist_lock); 37918c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&sbinfo->shrinklist); 37928c2ecf20Sopenharmony_ci 37938c2ecf20Sopenharmony_ci sb->s_maxbytes = MAX_LFS_FILESIZE; 37948c2ecf20Sopenharmony_ci sb->s_blocksize = PAGE_SIZE; 37958c2ecf20Sopenharmony_ci sb->s_blocksize_bits = PAGE_SHIFT; 37968c2ecf20Sopenharmony_ci sb->s_magic = TMPFS_MAGIC; 37978c2ecf20Sopenharmony_ci sb->s_op = &shmem_ops; 37988c2ecf20Sopenharmony_ci sb->s_time_gran = 1; 37998c2ecf20Sopenharmony_ci#ifdef CONFIG_TMPFS_XATTR 38008c2ecf20Sopenharmony_ci sb->s_xattr = shmem_xattr_handlers; 38018c2ecf20Sopenharmony_ci#endif 38028c2ecf20Sopenharmony_ci#ifdef CONFIG_TMPFS_POSIX_ACL 38038c2ecf20Sopenharmony_ci sb->s_flags |= SB_POSIXACL; 38048c2ecf20Sopenharmony_ci#endif 38058c2ecf20Sopenharmony_ci uuid_gen(&sb->s_uuid); 38068c2ecf20Sopenharmony_ci 38078c2ecf20Sopenharmony_ci inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE); 38088c2ecf20Sopenharmony_ci if (!inode) 38098c2ecf20Sopenharmony_ci goto failed; 38108c2ecf20Sopenharmony_ci inode->i_uid = sbinfo->uid; 38118c2ecf20Sopenharmony_ci inode->i_gid = sbinfo->gid; 38128c2ecf20Sopenharmony_ci sb->s_root = d_make_root(inode); 38138c2ecf20Sopenharmony_ci if (!sb->s_root) 38148c2ecf20Sopenharmony_ci goto failed; 38158c2ecf20Sopenharmony_ci return 0; 38168c2ecf20Sopenharmony_ci 38178c2ecf20Sopenharmony_cifailed: 38188c2ecf20Sopenharmony_ci shmem_put_super(sb); 38198c2ecf20Sopenharmony_ci return err; 38208c2ecf20Sopenharmony_ci} 38218c2ecf20Sopenharmony_ci 38228c2ecf20Sopenharmony_cistatic int shmem_get_tree(struct fs_context *fc) 38238c2ecf20Sopenharmony_ci{ 38248c2ecf20Sopenharmony_ci return get_tree_nodev(fc, shmem_fill_super); 38258c2ecf20Sopenharmony_ci} 38268c2ecf20Sopenharmony_ci 38278c2ecf20Sopenharmony_cistatic void shmem_free_fc(struct fs_context *fc) 38288c2ecf20Sopenharmony_ci{ 38298c2ecf20Sopenharmony_ci struct shmem_options *ctx = fc->fs_private; 38308c2ecf20Sopenharmony_ci 38318c2ecf20Sopenharmony_ci if (ctx) { 38328c2ecf20Sopenharmony_ci mpol_put(ctx->mpol); 38338c2ecf20Sopenharmony_ci kfree(ctx); 38348c2ecf20Sopenharmony_ci } 38358c2ecf20Sopenharmony_ci} 38368c2ecf20Sopenharmony_ci 38378c2ecf20Sopenharmony_cistatic const struct fs_context_operations shmem_fs_context_ops = { 38388c2ecf20Sopenharmony_ci .free = shmem_free_fc, 38398c2ecf20Sopenharmony_ci .get_tree = shmem_get_tree, 38408c2ecf20Sopenharmony_ci#ifdef CONFIG_TMPFS 38418c2ecf20Sopenharmony_ci .parse_monolithic = shmem_parse_options, 38428c2ecf20Sopenharmony_ci .parse_param = shmem_parse_one, 38438c2ecf20Sopenharmony_ci .reconfigure = shmem_reconfigure, 38448c2ecf20Sopenharmony_ci#endif 38458c2ecf20Sopenharmony_ci}; 38468c2ecf20Sopenharmony_ci 38478c2ecf20Sopenharmony_cistatic struct kmem_cache *shmem_inode_cachep; 38488c2ecf20Sopenharmony_ci 38498c2ecf20Sopenharmony_cistatic struct inode *shmem_alloc_inode(struct super_block *sb) 38508c2ecf20Sopenharmony_ci{ 38518c2ecf20Sopenharmony_ci struct shmem_inode_info *info; 38528c2ecf20Sopenharmony_ci info = kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL); 38538c2ecf20Sopenharmony_ci if (!info) 38548c2ecf20Sopenharmony_ci return NULL; 38558c2ecf20Sopenharmony_ci return &info->vfs_inode; 38568c2ecf20Sopenharmony_ci} 38578c2ecf20Sopenharmony_ci 38588c2ecf20Sopenharmony_cistatic void shmem_free_in_core_inode(struct inode *inode) 38598c2ecf20Sopenharmony_ci{ 38608c2ecf20Sopenharmony_ci if (S_ISLNK(inode->i_mode)) 38618c2ecf20Sopenharmony_ci kfree(inode->i_link); 38628c2ecf20Sopenharmony_ci kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); 38638c2ecf20Sopenharmony_ci} 38648c2ecf20Sopenharmony_ci 38658c2ecf20Sopenharmony_cistatic void shmem_destroy_inode(struct inode *inode) 38668c2ecf20Sopenharmony_ci{ 38678c2ecf20Sopenharmony_ci if (S_ISREG(inode->i_mode)) 38688c2ecf20Sopenharmony_ci mpol_free_shared_policy(&SHMEM_I(inode)->policy); 38698c2ecf20Sopenharmony_ci} 38708c2ecf20Sopenharmony_ci 38718c2ecf20Sopenharmony_cistatic void shmem_init_inode(void *foo) 38728c2ecf20Sopenharmony_ci{ 38738c2ecf20Sopenharmony_ci struct shmem_inode_info *info = foo; 38748c2ecf20Sopenharmony_ci inode_init_once(&info->vfs_inode); 38758c2ecf20Sopenharmony_ci} 38768c2ecf20Sopenharmony_ci 38778c2ecf20Sopenharmony_cistatic void shmem_init_inodecache(void) 38788c2ecf20Sopenharmony_ci{ 38798c2ecf20Sopenharmony_ci shmem_inode_cachep = kmem_cache_create("shmem_inode_cache", 38808c2ecf20Sopenharmony_ci sizeof(struct shmem_inode_info), 38818c2ecf20Sopenharmony_ci 0, SLAB_PANIC|SLAB_ACCOUNT, shmem_init_inode); 38828c2ecf20Sopenharmony_ci} 38838c2ecf20Sopenharmony_ci 38848c2ecf20Sopenharmony_cistatic void shmem_destroy_inodecache(void) 38858c2ecf20Sopenharmony_ci{ 38868c2ecf20Sopenharmony_ci kmem_cache_destroy(shmem_inode_cachep); 38878c2ecf20Sopenharmony_ci} 38888c2ecf20Sopenharmony_ci 38898c2ecf20Sopenharmony_cistatic const struct address_space_operations shmem_aops = { 38908c2ecf20Sopenharmony_ci .writepage = shmem_writepage, 38918c2ecf20Sopenharmony_ci .set_page_dirty = __set_page_dirty_no_writeback, 38928c2ecf20Sopenharmony_ci#ifdef CONFIG_TMPFS 38938c2ecf20Sopenharmony_ci .write_begin = shmem_write_begin, 38948c2ecf20Sopenharmony_ci .write_end = shmem_write_end, 38958c2ecf20Sopenharmony_ci#endif 38968c2ecf20Sopenharmony_ci#ifdef CONFIG_MIGRATION 38978c2ecf20Sopenharmony_ci .migratepage = migrate_page, 38988c2ecf20Sopenharmony_ci#endif 38998c2ecf20Sopenharmony_ci .error_remove_page = generic_error_remove_page, 39008c2ecf20Sopenharmony_ci}; 39018c2ecf20Sopenharmony_ci 39028c2ecf20Sopenharmony_cistatic const struct file_operations shmem_file_operations = { 39038c2ecf20Sopenharmony_ci .mmap = shmem_mmap, 39048c2ecf20Sopenharmony_ci .get_unmapped_area = shmem_get_unmapped_area, 39058c2ecf20Sopenharmony_ci#ifdef CONFIG_TMPFS 39068c2ecf20Sopenharmony_ci .llseek = shmem_file_llseek, 39078c2ecf20Sopenharmony_ci .read_iter = shmem_file_read_iter, 39088c2ecf20Sopenharmony_ci .write_iter = generic_file_write_iter, 39098c2ecf20Sopenharmony_ci .fsync = noop_fsync, 39108c2ecf20Sopenharmony_ci .splice_read = generic_file_splice_read, 39118c2ecf20Sopenharmony_ci .splice_write = iter_file_splice_write, 39128c2ecf20Sopenharmony_ci .fallocate = shmem_fallocate, 39138c2ecf20Sopenharmony_ci#endif 39148c2ecf20Sopenharmony_ci}; 39158c2ecf20Sopenharmony_ci 39168c2ecf20Sopenharmony_cistatic const struct inode_operations shmem_inode_operations = { 39178c2ecf20Sopenharmony_ci .getattr = shmem_getattr, 39188c2ecf20Sopenharmony_ci .setattr = shmem_setattr, 39198c2ecf20Sopenharmony_ci#ifdef CONFIG_TMPFS_XATTR 39208c2ecf20Sopenharmony_ci .listxattr = shmem_listxattr, 39218c2ecf20Sopenharmony_ci .set_acl = simple_set_acl, 39228c2ecf20Sopenharmony_ci#endif 39238c2ecf20Sopenharmony_ci}; 39248c2ecf20Sopenharmony_ci 39258c2ecf20Sopenharmony_cistatic const struct inode_operations shmem_dir_inode_operations = { 39268c2ecf20Sopenharmony_ci#ifdef CONFIG_TMPFS 39278c2ecf20Sopenharmony_ci .create = shmem_create, 39288c2ecf20Sopenharmony_ci .lookup = simple_lookup, 39298c2ecf20Sopenharmony_ci .link = shmem_link, 39308c2ecf20Sopenharmony_ci .unlink = shmem_unlink, 39318c2ecf20Sopenharmony_ci .symlink = shmem_symlink, 39328c2ecf20Sopenharmony_ci .mkdir = shmem_mkdir, 39338c2ecf20Sopenharmony_ci .rmdir = shmem_rmdir, 39348c2ecf20Sopenharmony_ci .mknod = shmem_mknod, 39358c2ecf20Sopenharmony_ci .rename = shmem_rename2, 39368c2ecf20Sopenharmony_ci .tmpfile = shmem_tmpfile, 39378c2ecf20Sopenharmony_ci#endif 39388c2ecf20Sopenharmony_ci#ifdef CONFIG_TMPFS_XATTR 39398c2ecf20Sopenharmony_ci .listxattr = shmem_listxattr, 39408c2ecf20Sopenharmony_ci#endif 39418c2ecf20Sopenharmony_ci#ifdef CONFIG_TMPFS_POSIX_ACL 39428c2ecf20Sopenharmony_ci .setattr = shmem_setattr, 39438c2ecf20Sopenharmony_ci .set_acl = simple_set_acl, 39448c2ecf20Sopenharmony_ci#endif 39458c2ecf20Sopenharmony_ci}; 39468c2ecf20Sopenharmony_ci 39478c2ecf20Sopenharmony_cistatic const struct inode_operations shmem_special_inode_operations = { 39488c2ecf20Sopenharmony_ci#ifdef CONFIG_TMPFS_XATTR 39498c2ecf20Sopenharmony_ci .listxattr = shmem_listxattr, 39508c2ecf20Sopenharmony_ci#endif 39518c2ecf20Sopenharmony_ci#ifdef CONFIG_TMPFS_POSIX_ACL 39528c2ecf20Sopenharmony_ci .setattr = shmem_setattr, 39538c2ecf20Sopenharmony_ci .set_acl = simple_set_acl, 39548c2ecf20Sopenharmony_ci#endif 39558c2ecf20Sopenharmony_ci}; 39568c2ecf20Sopenharmony_ci 39578c2ecf20Sopenharmony_cistatic const struct super_operations shmem_ops = { 39588c2ecf20Sopenharmony_ci .alloc_inode = shmem_alloc_inode, 39598c2ecf20Sopenharmony_ci .free_inode = shmem_free_in_core_inode, 39608c2ecf20Sopenharmony_ci .destroy_inode = shmem_destroy_inode, 39618c2ecf20Sopenharmony_ci#ifdef CONFIG_TMPFS 39628c2ecf20Sopenharmony_ci .statfs = shmem_statfs, 39638c2ecf20Sopenharmony_ci .show_options = shmem_show_options, 39648c2ecf20Sopenharmony_ci#endif 39658c2ecf20Sopenharmony_ci .evict_inode = shmem_evict_inode, 39668c2ecf20Sopenharmony_ci .drop_inode = generic_delete_inode, 39678c2ecf20Sopenharmony_ci .put_super = shmem_put_super, 39688c2ecf20Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE 39698c2ecf20Sopenharmony_ci .nr_cached_objects = shmem_unused_huge_count, 39708c2ecf20Sopenharmony_ci .free_cached_objects = shmem_unused_huge_scan, 39718c2ecf20Sopenharmony_ci#endif 39728c2ecf20Sopenharmony_ci}; 39738c2ecf20Sopenharmony_ci 39748c2ecf20Sopenharmony_cistatic const struct vm_operations_struct shmem_vm_ops = { 39758c2ecf20Sopenharmony_ci .fault = shmem_fault, 39768c2ecf20Sopenharmony_ci .map_pages = filemap_map_pages, 39778c2ecf20Sopenharmony_ci#ifdef CONFIG_NUMA 39788c2ecf20Sopenharmony_ci .set_policy = shmem_set_policy, 39798c2ecf20Sopenharmony_ci .get_policy = shmem_get_policy, 39808c2ecf20Sopenharmony_ci#endif 39818c2ecf20Sopenharmony_ci}; 39828c2ecf20Sopenharmony_ci 39838c2ecf20Sopenharmony_ciint shmem_init_fs_context(struct fs_context *fc) 39848c2ecf20Sopenharmony_ci{ 39858c2ecf20Sopenharmony_ci struct shmem_options *ctx; 39868c2ecf20Sopenharmony_ci 39878c2ecf20Sopenharmony_ci ctx = kzalloc(sizeof(struct shmem_options), GFP_KERNEL); 39888c2ecf20Sopenharmony_ci if (!ctx) 39898c2ecf20Sopenharmony_ci return -ENOMEM; 39908c2ecf20Sopenharmony_ci 39918c2ecf20Sopenharmony_ci ctx->mode = 0777 | S_ISVTX; 39928c2ecf20Sopenharmony_ci ctx->uid = current_fsuid(); 39938c2ecf20Sopenharmony_ci ctx->gid = current_fsgid(); 39948c2ecf20Sopenharmony_ci 39958c2ecf20Sopenharmony_ci fc->fs_private = ctx; 39968c2ecf20Sopenharmony_ci fc->ops = &shmem_fs_context_ops; 39978c2ecf20Sopenharmony_ci return 0; 39988c2ecf20Sopenharmony_ci} 39998c2ecf20Sopenharmony_ci 40008c2ecf20Sopenharmony_cistatic struct file_system_type shmem_fs_type = { 40018c2ecf20Sopenharmony_ci .owner = THIS_MODULE, 40028c2ecf20Sopenharmony_ci .name = "tmpfs", 40038c2ecf20Sopenharmony_ci .init_fs_context = shmem_init_fs_context, 40048c2ecf20Sopenharmony_ci#ifdef CONFIG_TMPFS 40058c2ecf20Sopenharmony_ci .parameters = shmem_fs_parameters, 40068c2ecf20Sopenharmony_ci#endif 40078c2ecf20Sopenharmony_ci .kill_sb = kill_litter_super, 40088c2ecf20Sopenharmony_ci .fs_flags = FS_USERNS_MOUNT | FS_THP_SUPPORT, 40098c2ecf20Sopenharmony_ci}; 40108c2ecf20Sopenharmony_ci 40118c2ecf20Sopenharmony_ciint __init shmem_init(void) 40128c2ecf20Sopenharmony_ci{ 40138c2ecf20Sopenharmony_ci int error; 40148c2ecf20Sopenharmony_ci 40158c2ecf20Sopenharmony_ci shmem_init_inodecache(); 40168c2ecf20Sopenharmony_ci 40178c2ecf20Sopenharmony_ci error = register_filesystem(&shmem_fs_type); 40188c2ecf20Sopenharmony_ci if (error) { 40198c2ecf20Sopenharmony_ci pr_err("Could not register tmpfs\n"); 40208c2ecf20Sopenharmony_ci goto out2; 40218c2ecf20Sopenharmony_ci } 40228c2ecf20Sopenharmony_ci 40238c2ecf20Sopenharmony_ci shm_mnt = kern_mount(&shmem_fs_type); 40248c2ecf20Sopenharmony_ci if (IS_ERR(shm_mnt)) { 40258c2ecf20Sopenharmony_ci error = PTR_ERR(shm_mnt); 40268c2ecf20Sopenharmony_ci pr_err("Could not kern_mount tmpfs\n"); 40278c2ecf20Sopenharmony_ci goto out1; 40288c2ecf20Sopenharmony_ci } 40298c2ecf20Sopenharmony_ci 40308c2ecf20Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE 40318c2ecf20Sopenharmony_ci if (has_transparent_hugepage() && shmem_huge > SHMEM_HUGE_DENY) 40328c2ecf20Sopenharmony_ci SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge; 40338c2ecf20Sopenharmony_ci else 40348c2ecf20Sopenharmony_ci shmem_huge = 0; /* just in case it was patched */ 40358c2ecf20Sopenharmony_ci#endif 40368c2ecf20Sopenharmony_ci return 0; 40378c2ecf20Sopenharmony_ci 40388c2ecf20Sopenharmony_ciout1: 40398c2ecf20Sopenharmony_ci unregister_filesystem(&shmem_fs_type); 40408c2ecf20Sopenharmony_ciout2: 40418c2ecf20Sopenharmony_ci shmem_destroy_inodecache(); 40428c2ecf20Sopenharmony_ci shm_mnt = ERR_PTR(error); 40438c2ecf20Sopenharmony_ci return error; 40448c2ecf20Sopenharmony_ci} 40458c2ecf20Sopenharmony_ci 40468c2ecf20Sopenharmony_ci#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_SYSFS) 40478c2ecf20Sopenharmony_cistatic ssize_t shmem_enabled_show(struct kobject *kobj, 40488c2ecf20Sopenharmony_ci struct kobj_attribute *attr, char *buf) 40498c2ecf20Sopenharmony_ci{ 40508c2ecf20Sopenharmony_ci static const int values[] = { 40518c2ecf20Sopenharmony_ci SHMEM_HUGE_ALWAYS, 40528c2ecf20Sopenharmony_ci SHMEM_HUGE_WITHIN_SIZE, 40538c2ecf20Sopenharmony_ci SHMEM_HUGE_ADVISE, 40548c2ecf20Sopenharmony_ci SHMEM_HUGE_NEVER, 40558c2ecf20Sopenharmony_ci SHMEM_HUGE_DENY, 40568c2ecf20Sopenharmony_ci SHMEM_HUGE_FORCE, 40578c2ecf20Sopenharmony_ci }; 40588c2ecf20Sopenharmony_ci int i, count; 40598c2ecf20Sopenharmony_ci 40608c2ecf20Sopenharmony_ci for (i = 0, count = 0; i < ARRAY_SIZE(values); i++) { 40618c2ecf20Sopenharmony_ci const char *fmt = shmem_huge == values[i] ? "[%s] " : "%s "; 40628c2ecf20Sopenharmony_ci 40638c2ecf20Sopenharmony_ci count += sprintf(buf + count, fmt, 40648c2ecf20Sopenharmony_ci shmem_format_huge(values[i])); 40658c2ecf20Sopenharmony_ci } 40668c2ecf20Sopenharmony_ci buf[count - 1] = '\n'; 40678c2ecf20Sopenharmony_ci return count; 40688c2ecf20Sopenharmony_ci} 40698c2ecf20Sopenharmony_ci 40708c2ecf20Sopenharmony_cistatic ssize_t shmem_enabled_store(struct kobject *kobj, 40718c2ecf20Sopenharmony_ci struct kobj_attribute *attr, const char *buf, size_t count) 40728c2ecf20Sopenharmony_ci{ 40738c2ecf20Sopenharmony_ci char tmp[16]; 40748c2ecf20Sopenharmony_ci int huge; 40758c2ecf20Sopenharmony_ci 40768c2ecf20Sopenharmony_ci if (count + 1 > sizeof(tmp)) 40778c2ecf20Sopenharmony_ci return -EINVAL; 40788c2ecf20Sopenharmony_ci memcpy(tmp, buf, count); 40798c2ecf20Sopenharmony_ci tmp[count] = '\0'; 40808c2ecf20Sopenharmony_ci if (count && tmp[count - 1] == '\n') 40818c2ecf20Sopenharmony_ci tmp[count - 1] = '\0'; 40828c2ecf20Sopenharmony_ci 40838c2ecf20Sopenharmony_ci huge = shmem_parse_huge(tmp); 40848c2ecf20Sopenharmony_ci if (huge == -EINVAL) 40858c2ecf20Sopenharmony_ci return -EINVAL; 40868c2ecf20Sopenharmony_ci if (!has_transparent_hugepage() && 40878c2ecf20Sopenharmony_ci huge != SHMEM_HUGE_NEVER && huge != SHMEM_HUGE_DENY) 40888c2ecf20Sopenharmony_ci return -EINVAL; 40898c2ecf20Sopenharmony_ci 40908c2ecf20Sopenharmony_ci shmem_huge = huge; 40918c2ecf20Sopenharmony_ci if (shmem_huge > SHMEM_HUGE_DENY) 40928c2ecf20Sopenharmony_ci SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge; 40938c2ecf20Sopenharmony_ci return count; 40948c2ecf20Sopenharmony_ci} 40958c2ecf20Sopenharmony_ci 40968c2ecf20Sopenharmony_cistruct kobj_attribute shmem_enabled_attr = 40978c2ecf20Sopenharmony_ci __ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store); 40988c2ecf20Sopenharmony_ci#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_SYSFS */ 40998c2ecf20Sopenharmony_ci 41008c2ecf20Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE 41018c2ecf20Sopenharmony_cibool shmem_huge_enabled(struct vm_area_struct *vma) 41028c2ecf20Sopenharmony_ci{ 41038c2ecf20Sopenharmony_ci struct inode *inode = file_inode(vma->vm_file); 41048c2ecf20Sopenharmony_ci struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 41058c2ecf20Sopenharmony_ci loff_t i_size; 41068c2ecf20Sopenharmony_ci pgoff_t off; 41078c2ecf20Sopenharmony_ci 41088c2ecf20Sopenharmony_ci if (!transhuge_vma_enabled(vma, vma->vm_flags)) 41098c2ecf20Sopenharmony_ci return false; 41108c2ecf20Sopenharmony_ci if (shmem_huge == SHMEM_HUGE_FORCE) 41118c2ecf20Sopenharmony_ci return true; 41128c2ecf20Sopenharmony_ci if (shmem_huge == SHMEM_HUGE_DENY) 41138c2ecf20Sopenharmony_ci return false; 41148c2ecf20Sopenharmony_ci switch (sbinfo->huge) { 41158c2ecf20Sopenharmony_ci case SHMEM_HUGE_NEVER: 41168c2ecf20Sopenharmony_ci return false; 41178c2ecf20Sopenharmony_ci case SHMEM_HUGE_ALWAYS: 41188c2ecf20Sopenharmony_ci return true; 41198c2ecf20Sopenharmony_ci case SHMEM_HUGE_WITHIN_SIZE: 41208c2ecf20Sopenharmony_ci off = round_up(vma->vm_pgoff, HPAGE_PMD_NR); 41218c2ecf20Sopenharmony_ci i_size = round_up(i_size_read(inode), PAGE_SIZE); 41228c2ecf20Sopenharmony_ci if (i_size >= HPAGE_PMD_SIZE && 41238c2ecf20Sopenharmony_ci i_size >> PAGE_SHIFT >= off) 41248c2ecf20Sopenharmony_ci return true; 41258c2ecf20Sopenharmony_ci fallthrough; 41268c2ecf20Sopenharmony_ci case SHMEM_HUGE_ADVISE: 41278c2ecf20Sopenharmony_ci /* TODO: implement fadvise() hints */ 41288c2ecf20Sopenharmony_ci return (vma->vm_flags & VM_HUGEPAGE); 41298c2ecf20Sopenharmony_ci default: 41308c2ecf20Sopenharmony_ci VM_BUG_ON(1); 41318c2ecf20Sopenharmony_ci return false; 41328c2ecf20Sopenharmony_ci } 41338c2ecf20Sopenharmony_ci} 41348c2ecf20Sopenharmony_ci#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 41358c2ecf20Sopenharmony_ci 41368c2ecf20Sopenharmony_ci#else /* !CONFIG_SHMEM */ 41378c2ecf20Sopenharmony_ci 41388c2ecf20Sopenharmony_ci/* 41398c2ecf20Sopenharmony_ci * tiny-shmem: simple shmemfs and tmpfs using ramfs code 41408c2ecf20Sopenharmony_ci * 41418c2ecf20Sopenharmony_ci * This is intended for small system where the benefits of the full 41428c2ecf20Sopenharmony_ci * shmem code (swap-backed and resource-limited) are outweighed by 41438c2ecf20Sopenharmony_ci * their complexity. On systems without swap this code should be 41448c2ecf20Sopenharmony_ci * effectively equivalent, but much lighter weight. 41458c2ecf20Sopenharmony_ci */ 41468c2ecf20Sopenharmony_ci 41478c2ecf20Sopenharmony_cistatic struct file_system_type shmem_fs_type = { 41488c2ecf20Sopenharmony_ci .name = "tmpfs", 41498c2ecf20Sopenharmony_ci .init_fs_context = ramfs_init_fs_context, 41508c2ecf20Sopenharmony_ci .parameters = ramfs_fs_parameters, 41518c2ecf20Sopenharmony_ci .kill_sb = ramfs_kill_sb, 41528c2ecf20Sopenharmony_ci .fs_flags = FS_USERNS_MOUNT, 41538c2ecf20Sopenharmony_ci}; 41548c2ecf20Sopenharmony_ci 41558c2ecf20Sopenharmony_ciint __init shmem_init(void) 41568c2ecf20Sopenharmony_ci{ 41578c2ecf20Sopenharmony_ci BUG_ON(register_filesystem(&shmem_fs_type) != 0); 41588c2ecf20Sopenharmony_ci 41598c2ecf20Sopenharmony_ci shm_mnt = kern_mount(&shmem_fs_type); 41608c2ecf20Sopenharmony_ci BUG_ON(IS_ERR(shm_mnt)); 41618c2ecf20Sopenharmony_ci 41628c2ecf20Sopenharmony_ci return 0; 41638c2ecf20Sopenharmony_ci} 41648c2ecf20Sopenharmony_ci 41658c2ecf20Sopenharmony_ciint shmem_unuse(unsigned int type, bool frontswap, 41668c2ecf20Sopenharmony_ci unsigned long *fs_pages_to_unuse) 41678c2ecf20Sopenharmony_ci{ 41688c2ecf20Sopenharmony_ci return 0; 41698c2ecf20Sopenharmony_ci} 41708c2ecf20Sopenharmony_ci 41718c2ecf20Sopenharmony_ciint shmem_lock(struct file *file, int lock, struct user_struct *user) 41728c2ecf20Sopenharmony_ci{ 41738c2ecf20Sopenharmony_ci return 0; 41748c2ecf20Sopenharmony_ci} 41758c2ecf20Sopenharmony_ci 41768c2ecf20Sopenharmony_civoid shmem_unlock_mapping(struct address_space *mapping) 41778c2ecf20Sopenharmony_ci{ 41788c2ecf20Sopenharmony_ci} 41798c2ecf20Sopenharmony_ci 41808c2ecf20Sopenharmony_ci#ifdef CONFIG_MMU 41818c2ecf20Sopenharmony_ciunsigned long shmem_get_unmapped_area(struct file *file, 41828c2ecf20Sopenharmony_ci unsigned long addr, unsigned long len, 41838c2ecf20Sopenharmony_ci unsigned long pgoff, unsigned long flags) 41848c2ecf20Sopenharmony_ci{ 41858c2ecf20Sopenharmony_ci return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); 41868c2ecf20Sopenharmony_ci} 41878c2ecf20Sopenharmony_ci#endif 41888c2ecf20Sopenharmony_ci 41898c2ecf20Sopenharmony_civoid shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) 41908c2ecf20Sopenharmony_ci{ 41918c2ecf20Sopenharmony_ci truncate_inode_pages_range(inode->i_mapping, lstart, lend); 41928c2ecf20Sopenharmony_ci} 41938c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(shmem_truncate_range); 41948c2ecf20Sopenharmony_ci 41958c2ecf20Sopenharmony_ci#define shmem_vm_ops generic_file_vm_ops 41968c2ecf20Sopenharmony_ci#define shmem_file_operations ramfs_file_operations 41978c2ecf20Sopenharmony_ci#define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev) 41988c2ecf20Sopenharmony_ci#define shmem_acct_size(flags, size) 0 41998c2ecf20Sopenharmony_ci#define shmem_unacct_size(flags, size) do {} while (0) 42008c2ecf20Sopenharmony_ci 42018c2ecf20Sopenharmony_ci#endif /* CONFIG_SHMEM */ 42028c2ecf20Sopenharmony_ci 42038c2ecf20Sopenharmony_ci/* common code */ 42048c2ecf20Sopenharmony_ci 42058c2ecf20Sopenharmony_cistatic struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, loff_t size, 42068c2ecf20Sopenharmony_ci unsigned long flags, unsigned int i_flags) 42078c2ecf20Sopenharmony_ci{ 42088c2ecf20Sopenharmony_ci struct inode *inode; 42098c2ecf20Sopenharmony_ci struct file *res; 42108c2ecf20Sopenharmony_ci 42118c2ecf20Sopenharmony_ci if (IS_ERR(mnt)) 42128c2ecf20Sopenharmony_ci return ERR_CAST(mnt); 42138c2ecf20Sopenharmony_ci 42148c2ecf20Sopenharmony_ci if (size < 0 || size > MAX_LFS_FILESIZE) 42158c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 42168c2ecf20Sopenharmony_ci 42178c2ecf20Sopenharmony_ci if (shmem_acct_size(flags, size)) 42188c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 42198c2ecf20Sopenharmony_ci 42208c2ecf20Sopenharmony_ci inode = shmem_get_inode(mnt->mnt_sb, NULL, S_IFREG | S_IRWXUGO, 0, 42218c2ecf20Sopenharmony_ci flags); 42228c2ecf20Sopenharmony_ci if (unlikely(!inode)) { 42238c2ecf20Sopenharmony_ci shmem_unacct_size(flags, size); 42248c2ecf20Sopenharmony_ci return ERR_PTR(-ENOSPC); 42258c2ecf20Sopenharmony_ci } 42268c2ecf20Sopenharmony_ci inode->i_flags |= i_flags; 42278c2ecf20Sopenharmony_ci inode->i_size = size; 42288c2ecf20Sopenharmony_ci clear_nlink(inode); /* It is unlinked */ 42298c2ecf20Sopenharmony_ci res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size)); 42308c2ecf20Sopenharmony_ci if (!IS_ERR(res)) 42318c2ecf20Sopenharmony_ci res = alloc_file_pseudo(inode, mnt, name, O_RDWR, 42328c2ecf20Sopenharmony_ci &shmem_file_operations); 42338c2ecf20Sopenharmony_ci if (IS_ERR(res)) 42348c2ecf20Sopenharmony_ci iput(inode); 42358c2ecf20Sopenharmony_ci return res; 42368c2ecf20Sopenharmony_ci} 42378c2ecf20Sopenharmony_ci 42388c2ecf20Sopenharmony_ci/** 42398c2ecf20Sopenharmony_ci * shmem_kernel_file_setup - get an unlinked file living in tmpfs which must be 42408c2ecf20Sopenharmony_ci * kernel internal. There will be NO LSM permission checks against the 42418c2ecf20Sopenharmony_ci * underlying inode. So users of this interface must do LSM checks at a 42428c2ecf20Sopenharmony_ci * higher layer. The users are the big_key and shm implementations. LSM 42438c2ecf20Sopenharmony_ci * checks are provided at the key or shm level rather than the inode. 42448c2ecf20Sopenharmony_ci * @name: name for dentry (to be seen in /proc/<pid>/maps 42458c2ecf20Sopenharmony_ci * @size: size to be set for the file 42468c2ecf20Sopenharmony_ci * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size 42478c2ecf20Sopenharmony_ci */ 42488c2ecf20Sopenharmony_cistruct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags) 42498c2ecf20Sopenharmony_ci{ 42508c2ecf20Sopenharmony_ci return __shmem_file_setup(shm_mnt, name, size, flags, S_PRIVATE); 42518c2ecf20Sopenharmony_ci} 42528c2ecf20Sopenharmony_ci 42538c2ecf20Sopenharmony_ci/** 42548c2ecf20Sopenharmony_ci * shmem_file_setup - get an unlinked file living in tmpfs 42558c2ecf20Sopenharmony_ci * @name: name for dentry (to be seen in /proc/<pid>/maps 42568c2ecf20Sopenharmony_ci * @size: size to be set for the file 42578c2ecf20Sopenharmony_ci * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size 42588c2ecf20Sopenharmony_ci */ 42598c2ecf20Sopenharmony_cistruct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags) 42608c2ecf20Sopenharmony_ci{ 42618c2ecf20Sopenharmony_ci return __shmem_file_setup(shm_mnt, name, size, flags, 0); 42628c2ecf20Sopenharmony_ci} 42638c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(shmem_file_setup); 42648c2ecf20Sopenharmony_ci 42658c2ecf20Sopenharmony_ci/** 42668c2ecf20Sopenharmony_ci * shmem_file_setup_with_mnt - get an unlinked file living in tmpfs 42678c2ecf20Sopenharmony_ci * @mnt: the tmpfs mount where the file will be created 42688c2ecf20Sopenharmony_ci * @name: name for dentry (to be seen in /proc/<pid>/maps 42698c2ecf20Sopenharmony_ci * @size: size to be set for the file 42708c2ecf20Sopenharmony_ci * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size 42718c2ecf20Sopenharmony_ci */ 42728c2ecf20Sopenharmony_cistruct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, const char *name, 42738c2ecf20Sopenharmony_ci loff_t size, unsigned long flags) 42748c2ecf20Sopenharmony_ci{ 42758c2ecf20Sopenharmony_ci return __shmem_file_setup(mnt, name, size, flags, 0); 42768c2ecf20Sopenharmony_ci} 42778c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(shmem_file_setup_with_mnt); 42788c2ecf20Sopenharmony_ci 42798c2ecf20Sopenharmony_ci/** 42808c2ecf20Sopenharmony_ci * shmem_zero_setup - setup a shared anonymous mapping 42818c2ecf20Sopenharmony_ci * @vma: the vma to be mmapped is prepared by do_mmap 42828c2ecf20Sopenharmony_ci */ 42838c2ecf20Sopenharmony_ciint shmem_zero_setup(struct vm_area_struct *vma) 42848c2ecf20Sopenharmony_ci{ 42858c2ecf20Sopenharmony_ci struct file *file; 42868c2ecf20Sopenharmony_ci loff_t size = vma->vm_end - vma->vm_start; 42878c2ecf20Sopenharmony_ci 42888c2ecf20Sopenharmony_ci /* 42898c2ecf20Sopenharmony_ci * Cloning a new file under mmap_lock leads to a lock ordering conflict 42908c2ecf20Sopenharmony_ci * between XFS directory reading and selinux: since this file is only 42918c2ecf20Sopenharmony_ci * accessible to the user through its mapping, use S_PRIVATE flag to 42928c2ecf20Sopenharmony_ci * bypass file security, in the same way as shmem_kernel_file_setup(). 42938c2ecf20Sopenharmony_ci */ 42948c2ecf20Sopenharmony_ci file = shmem_kernel_file_setup("dev/zero", size, vma->vm_flags); 42958c2ecf20Sopenharmony_ci if (IS_ERR(file)) 42968c2ecf20Sopenharmony_ci return PTR_ERR(file); 42978c2ecf20Sopenharmony_ci 42988c2ecf20Sopenharmony_ci if (vma->vm_file) 42998c2ecf20Sopenharmony_ci fput(vma->vm_file); 43008c2ecf20Sopenharmony_ci vma->vm_file = file; 43018c2ecf20Sopenharmony_ci vma->vm_ops = &shmem_vm_ops; 43028c2ecf20Sopenharmony_ci 43038c2ecf20Sopenharmony_ci if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && 43048c2ecf20Sopenharmony_ci ((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) < 43058c2ecf20Sopenharmony_ci (vma->vm_end & HPAGE_PMD_MASK)) { 43068c2ecf20Sopenharmony_ci khugepaged_enter(vma, vma->vm_flags); 43078c2ecf20Sopenharmony_ci } 43088c2ecf20Sopenharmony_ci 43098c2ecf20Sopenharmony_ci return 0; 43108c2ecf20Sopenharmony_ci} 43118c2ecf20Sopenharmony_ci 43128c2ecf20Sopenharmony_ci/** 43138c2ecf20Sopenharmony_ci * shmem_read_mapping_page_gfp - read into page cache, using specified page allocation flags. 43148c2ecf20Sopenharmony_ci * @mapping: the page's address_space 43158c2ecf20Sopenharmony_ci * @index: the page index 43168c2ecf20Sopenharmony_ci * @gfp: the page allocator flags to use if allocating 43178c2ecf20Sopenharmony_ci * 43188c2ecf20Sopenharmony_ci * This behaves as a tmpfs "read_cache_page_gfp(mapping, index, gfp)", 43198c2ecf20Sopenharmony_ci * with any new page allocations done using the specified allocation flags. 43208c2ecf20Sopenharmony_ci * But read_cache_page_gfp() uses the ->readpage() method: which does not 43218c2ecf20Sopenharmony_ci * suit tmpfs, since it may have pages in swapcache, and needs to find those 43228c2ecf20Sopenharmony_ci * for itself; although drivers/gpu/drm i915 and ttm rely upon this support. 43238c2ecf20Sopenharmony_ci * 43248c2ecf20Sopenharmony_ci * i915_gem_object_get_pages_gtt() mixes __GFP_NORETRY | __GFP_NOWARN in 43258c2ecf20Sopenharmony_ci * with the mapping_gfp_mask(), to avoid OOMing the machine unnecessarily. 43268c2ecf20Sopenharmony_ci */ 43278c2ecf20Sopenharmony_cistruct page *shmem_read_mapping_page_gfp(struct address_space *mapping, 43288c2ecf20Sopenharmony_ci pgoff_t index, gfp_t gfp) 43298c2ecf20Sopenharmony_ci{ 43308c2ecf20Sopenharmony_ci#ifdef CONFIG_SHMEM 43318c2ecf20Sopenharmony_ci struct inode *inode = mapping->host; 43328c2ecf20Sopenharmony_ci struct page *page; 43338c2ecf20Sopenharmony_ci int error; 43348c2ecf20Sopenharmony_ci 43358c2ecf20Sopenharmony_ci BUG_ON(mapping->a_ops != &shmem_aops); 43368c2ecf20Sopenharmony_ci error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE, 43378c2ecf20Sopenharmony_ci gfp, NULL, NULL, NULL); 43388c2ecf20Sopenharmony_ci if (error) 43398c2ecf20Sopenharmony_ci page = ERR_PTR(error); 43408c2ecf20Sopenharmony_ci else 43418c2ecf20Sopenharmony_ci unlock_page(page); 43428c2ecf20Sopenharmony_ci return page; 43438c2ecf20Sopenharmony_ci#else 43448c2ecf20Sopenharmony_ci /* 43458c2ecf20Sopenharmony_ci * The tiny !SHMEM case uses ramfs without swap 43468c2ecf20Sopenharmony_ci */ 43478c2ecf20Sopenharmony_ci return read_cache_page_gfp(mapping, index, gfp); 43488c2ecf20Sopenharmony_ci#endif 43498c2ecf20Sopenharmony_ci} 43508c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp); 4351